diff -Nru mesa-18.3.3/Android.common.mk mesa-19.0.1/Android.common.mk
--- mesa-18.3.3/Android.common.mk	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/Android.common.mk	2019-03-31 23:16:37.000000000 +0000
@@ -37,7 +37,6 @@
 	-Wno-missing-field-initializers \
 	-Wno-initializer-overrides \
 	-Wno-mismatched-tags \
-	-DVERSION=\"$(MESA_VERSION)\" \
 	-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
 	-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\"
 
diff -Nru mesa-18.3.3/Android.mk mesa-19.0.1/Android.mk
--- mesa-18.3.3/Android.mk	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers: i915 i965
-#   gallium drivers: swrast freedreno i915g nouveau pl111 r300g r600g radeonsi vc4 virgl vmwgfx etnaviv imx
+#   gallium drivers: swrast freedreno i915g nouveau kmsro r300g r600g radeonsi vc4 virgl vmwgfx etnaviv
 #
 # The main target is libGLES_mesa.  For each classic driver enabled, a DRI
 # module will also be built.  DRI modules will be loaded by libGLES_mesa.
@@ -52,15 +52,14 @@
 	freedreno.HAVE_GALLIUM_FREEDRENO \
 	i915g.HAVE_GALLIUM_I915 \
 	nouveau.HAVE_GALLIUM_NOUVEAU \
-	pl111.HAVE_GALLIUM_PL111 \
+	kmsro.HAVE_GALLIUM_KMSRO \
 	r300g.HAVE_GALLIUM_R300 \
 	r600g.HAVE_GALLIUM_R600 \
 	radeonsi.HAVE_GALLIUM_RADEONSI \
 	vmwgfx.HAVE_GALLIUM_VMWGFX \
 	vc4.HAVE_GALLIUM_VC4 \
 	virgl.HAVE_GALLIUM_VIRGL \
-	etnaviv.HAVE_GALLIUM_ETNAVIV \
-	imx.HAVE_GALLIUM_IMX
+	etnaviv.HAVE_GALLIUM_ETNAVIV
 
 ifeq ($(BOARD_GPU_DRIVERS),all)
 MESA_BUILD_CLASSIC := $(filter HAVE_%, $(subst ., , $(classic_drivers)))
diff -Nru mesa-18.3.3/bin/.cherry-ignore mesa-19.0.1/bin/.cherry-ignore
--- mesa-18.3.3/bin/.cherry-ignore	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/bin/.cherry-ignore	2019-03-31 23:16:37.000000000 +0000
@@ -1,16 +1,17 @@
-# fixes: Commit was squashed into the respective offenders
-c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix
-# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a
-ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support)
+# Both of these were already merged with different shas
+da48cba61ef6fefb799bf96e6364b70dbf4ec712
+c812c740e60c14060eb89db66039111881a0f42f
 
-# fixes: This commit requires commits aeaf8dbd097 and 7484bc894b9 which did not
-#        land in branch.
-f67dea5e19ef14187be0e8d0f61b1f764c7ccb4f radv: Fix multiview depth clears
+# The commit these fix was reverted from 19.0, but fixed for 19.1 due
+# to the number of fixes required to make that commit work
+8d8f80af3a17354508f2ec9d6559c915d5be351d
+0c0c69729b6d72a5297122856c8fe48510e90764
+0881e90c09965818b02e359474a6f7446b41d647
+b031c643491a92a5574c7a4bd659df33f2d89bb6
 
-# stable The commits aren't suitable in their present form.
-bfe31c5e461a1330d6f606bf5310685eff1198dd nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size
-abfe674c54bee6f8fdcae411b07db89c10b9d530 spirv: Handle arbitrary bit sizes for deref array indices
+# These were manually rebased by Jason, thanks!
+8ab95b849e66f3221d80a67eef2ec6e3730901a8
+5c30fffeec1732c21d600c036f95f8cdb1bb5487
 
-# warn   The commits refer stale sha, yet don't fix anything in particular.
-98984b7cdd79c15cc7331c791f8be61e873b8bbd Revert "mapi/new: sort by slot number"
-9f86f1da7c68b5b900cd6f60925610ff1225a72d egl: add glvnd entrypoints for EGL_MESA_query_driver
+# This doesn't actually appliy to 19.0
+29179f58c6ba8099859ea25900214dbbd3814a92
\ No newline at end of file
diff -Nru mesa-18.3.3/bin/get-pick-list.sh mesa-19.0.1/bin/get-pick-list.sh
--- mesa-18.3.3/bin/get-pick-list.sh	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/bin/get-pick-list.sh	2019-03-31 23:16:37.000000000 +0000
@@ -13,12 +13,12 @@
 
 is_stable_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
 }
 
 is_typod_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
 }
 
 fixes=
diff -Nru mesa-18.3.3/bin/install_megadrivers.py mesa-19.0.1/bin/install_megadrivers.py
--- mesa-18.3.3/bin/install_megadrivers.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/bin/install_megadrivers.py	2019-03-31 23:16:37.000000000 +0000
@@ -35,7 +35,11 @@
     args = parser.parse_args()
 
     if os.path.isabs(args.libdir):
-        to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
+        destdir = os.environ.get('DESTDIR')
+        if destdir:
+            to = os.path.join(destdir, args.libdir[1:])
+        else:
+            to = args.libdir
     else:
         to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
 
diff -Nru mesa-18.3.3/bin/meson-cmd-extract.py mesa-19.0.1/bin/meson-cmd-extract.py
--- mesa-18.3.3/bin/meson-cmd-extract.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/bin/meson-cmd-extract.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# Copyright © 2019 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""This script reads a meson build directory and gives back the command line it
+was configured with.
+
+This only works for meson 0.49.0 and newer.
+"""
+
+import argparse
+import ast
+import configparser
+import pathlib
+import sys
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'build_dir',
+        help='Path the meson build directory')
+    args = parser.parse_args()
+    return args
+
+
+def load_config(path: pathlib.Path) -> configparser.ConfigParser:
+    """Load config file."""
+    conf = configparser.ConfigParser()
+    with path.open() as f:
+        conf.read_file(f)
+    return conf
+
+
+def build_cmd(conf: configparser.ConfigParser) -> str:
+    """Rebuild the command line."""
+    args = []
+    for k, v in conf['options'].items():
+        if ' ' in v:
+            args.append(f'-D{k}="{v}"')
+        else:
+            args.append(f'-D{k}={v}')
+
+    cf = conf['properties'].get('cross_file')
+    if cf:
+        args.append('--cross-file={}'.format(cf))
+    nf = conf['properties'].get('native_file')
+    if nf:
+        # this will be in the form "['str', 'str']", so use ast.literal_eval to
+        # convert it to a list of strings.
+        nf = ast.literal_eval(nf)
+        args.extend(['--native-file={}'.format(f) for f in nf])
+    return ' '.join(args)
+
+
+def main():
+    args = parse_args()
+    path = pathlib.Path(args.build_dir, 'meson-private', 'cmd_line.txt')
+    if not path.exists():
+        print('Cannot find the necessary file to rebuild command line. '
+              'Is your meson version >= 0.49.0?', file=sys.stderr)
+        sys.exit(1)
+
+    conf = load_config(path)
+    cmd = build_cmd(conf)
+    print(cmd)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-18.3.3/configure.ac mesa-19.0.1/configure.ac
--- mesa-18.3.3/configure.ac	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/configure.ac	2019-03-31 23:16:37.000000000 +0000
@@ -52,6 +52,19 @@
     ;;
 esac
 
+AC_ARG_ENABLE(autotools,
+   [AS_HELP_STRING([--enable-autotools],
+                   [Enable the use of this autotools based build configuration])],
+   [enable_autotools=$enableval], [enable_autotools=no])
+
+if test "x$enable_autotools" != "xyes" ; then
+    AC_MSG_ERROR([the autotools build system has been deprecated in favour of
+    meson and will be removed eventually. For instructions on how to use meson
+    see https://www.mesa3d.org/meson.html.
+    If you still want to use the autotools build, then add --enable-autotools
+    to the configure command line.])
+fi
+
 # Support silent build rules, requires at least automake-1.11. Disable
 # by either passing --disable-silent-rules to configure or passing V=1
 # to make
@@ -74,7 +87,7 @@
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.95
+LIBDRM_AMDGPU_REQUIRED=2.4.97
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -107,9 +120,9 @@
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.9.0
 LLVM_REQUIRED_R600=3.9.0
-LLVM_REQUIRED_RADEONSI=6.0.0
-LLVM_REQUIRED_RADV=6.0.0
-LLVM_REQUIRED_SWR=6.0.0
+LLVM_REQUIRED_RADEONSI=7.0.0
+LLVM_REQUIRED_RADV=7.0.0
+LLVM_REQUIRED_SWR=7.0.0
 
 dnl Check for progs
 AC_PROG_CPP
@@ -1395,7 +1408,7 @@
 AC_ARG_WITH([gallium-drivers],
     [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
         [comma delimited Gallium drivers list, e.g.
-        "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,tegra,v3d,vc4,virgl,etnaviv,imx"
+        "i915,nouveau,r300,r600,radeonsi,freedreno,kmsro,svga,swrast,swr,tegra,v3d,vc4,virgl,etnaviv"
         @<:@default=r300,r600,svga,swrast@:>@])],
     [with_gallium_drivers="$withval"],
     [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1909,7 +1922,7 @@
     dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRED"
     PKG_CHECK_MODULES([XCB_DRI3_MODIFIERS], [$dri3_modifier_modules], [have_dri3_modifiers=yes], [have_dri3_modifiers=no])
 
-    if test "x$have_dri3_modifiers" == xyes; then
+    if test "x$have_dri3_modifiers" = xyes; then
         DEFINES="$DEFINES -DHAVE_DRI3_MODIFIERS"
     fi
 fi
@@ -2728,9 +2741,6 @@
             PKG_CHECK_MODULES([ETNAVIV], [libdrm >= $LIBDRM_ETNAVIV_REQUIRED libdrm_etnaviv >= $LIBDRM_ETNAVIV_REQUIRED])
             require_libdrm "etnaviv"
             ;;
-       ximx)
-            HAVE_GALLIUM_IMX=yes
-            ;;
         xtegra)
             HAVE_GALLIUM_TEGRA=yes
             require_libdrm "tegra"
@@ -2817,8 +2827,8 @@
                                DEFINES="$DEFINES -DUSE_V3D_SIMULATOR"],
                               [USE_V3D_SIMULATOR=no])
             ;;
-        xpl111)
-            HAVE_GALLIUM_PL111=yes
+        xkmsro)
+            HAVE_GALLIUM_KMSRO=yes
             ;;
         xvirgl)
             HAVE_GALLIUM_VIRGL=yes
@@ -2835,8 +2845,8 @@
 fi
 
 # XXX: Keep in sync with LLVM_REQUIRED_SWR
-AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x6.0.0 -a \
-                                              "x$LLVM_VERSION" != x6.0.1)
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x7.0.0 -a \
+                                              "x$LLVM_VERSION" != x7.0.1)
 
 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
     llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
@@ -2851,12 +2861,8 @@
 
 dnl We need to validate some needed dependencies for renderonly drivers.
 
-if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" = xyes  ; then
-    AC_MSG_ERROR([Building with imx requires etnaviv])
-fi
-
-if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_PL111" = xyes  ; then
-    AC_MSG_ERROR([Building with pl111 requires vc4])
+if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_KMSRO" = xyes  ; then
+    AC_MSG_ERROR([Building with kmsro requires vc4])
 fi
 
 if test "x$HAVE_GALLIUM_NOUVEAU" != xyes -a "x$HAVE_GALLIUM_TEGRA" = xyes; then
@@ -2904,6 +2910,7 @@
     LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
     LLVM_CFLAGS=$LLVM_CPPFLAGS   # CPPFLAGS seem to be sufficient
     LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
+    LLVM_CXXFLAGS="$CXX11_CXXFLAGS $LLVM_CXXFLAGS"
 
     dnl Set LLVM_LIBS - This is done after the driver configuration so
     dnl that drivers can add additional components to LLVM_COMPONENTS.
@@ -2938,11 +2945,11 @@
     fi
 
     dnl The gallium-xlib GLX and gallium OSMesa targets directly embed the
-    dnl swr/llvmpipe driver into the final binary.  Adding LLVM_LIBS results in 
+    dnl swr/llvmpipe driver into the final binary.  Adding LLVM_LIBS results in
     dnl the LLVM library propagated in the Libs.private of the respective .pc
     dnl file which ensures complete dependency information when statically
     dnl linking.
-    if test "x$enable_glx" == xgallium-xlib; then
+    if test "x$enable_glx" = xgallium-xlib; then
         GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $LLVM_LIBS"
     fi
     if test "x$enable_gallium_osmesa" = xyes; then
@@ -2952,14 +2959,13 @@
 
 AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes)
-AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_KMSRO, test "x$HAVE_GALLIUM_KMSRO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
-AM_CONDITIONAL(HAVE_GALLIUM_IMX, test "x$HAVE_GALLIUM_IMX" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_TEGRA, test "x$HAVE_GALLIUM_TEGRA" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
@@ -2998,6 +3004,7 @@
 AM_CONDITIONAL(HAVE_BROADCOM_DRIVERS, test "x$HAVE_GALLIUM_VC4" = xyes -o \
                                       "x$HAVE_GALLIUM_V3D" = xyes)
 
+AM_CONDITIONAL(HAVE_FREEDRENO_DRIVERS, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
                                         "x$HAVE_I965_DRI" = xyes)
 
@@ -3044,7 +3051,7 @@
 AC_SUBST([XVMC_MINOR], 0)
 
 AC_SUBST([XA_MAJOR], 2)
-AC_SUBST([XA_MINOR], 4)
+AC_SUBST([XA_MINOR], 5)
 AC_SUBST([XA_PATCH], 0)
 AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_PATCH")
 
@@ -3090,6 +3097,7 @@
                  src/amd/vulkan/Makefile
                  src/broadcom/Makefile
                  src/compiler/Makefile
+                 src/freedreno/Makefile
                  src/egl/Makefile
                  src/egl/main/egl.pc
                  src/egl/wayland/wayland-drm/Makefile
@@ -3100,7 +3108,7 @@
                  src/gallium/drivers/i915/Makefile
                  src/gallium/drivers/llvmpipe/Makefile
                  src/gallium/drivers/nouveau/Makefile
-                 src/gallium/drivers/pl111/Makefile
+                 src/gallium/drivers/kmsro/Makefile
                  src/gallium/drivers/r300/Makefile
                  src/gallium/drivers/r600/Makefile
                  src/gallium/drivers/radeonsi/Makefile
@@ -3109,7 +3117,6 @@
                  src/gallium/drivers/swr/Makefile
                  src/gallium/drivers/tegra/Makefile
                  src/gallium/drivers/etnaviv/Makefile
-                 src/gallium/drivers/imx/Makefile
                  src/gallium/drivers/v3d/Makefile
                  src/gallium/drivers/vc4/Makefile
                  src/gallium/drivers/virgl/Makefile
@@ -3144,11 +3151,10 @@
                  src/gallium/tests/trivial/Makefile
                  src/gallium/tests/unit/Makefile
                  src/gallium/winsys/etnaviv/drm/Makefile
-                 src/gallium/winsys/imx/drm/Makefile
                  src/gallium/winsys/freedreno/drm/Makefile
                  src/gallium/winsys/i915/drm/Makefile
                  src/gallium/winsys/nouveau/drm/Makefile
-                 src/gallium/winsys/pl111/drm/Makefile
+                 src/gallium/winsys/kmsro/drm/Makefile
                  src/gallium/winsys/radeon/drm/Makefile
                  src/gallium/winsys/amdgpu/drm/Makefile
                  src/gallium/winsys/svga/drm/Makefile
diff -Nru mesa-18.3.3/debian/changelog mesa-19.0.1/debian/changelog
--- mesa-18.3.3/debian/changelog	2019-02-01 12:03:52.000000000 +0000
+++ mesa-19.0.1/debian/changelog	2019-03-31 23:20:01.000000000 +0000
@@ -1,3 +1,10 @@
+mesa (19.0.1-0~c~padoka0) cosmic; urgency=high
+  
+  * backport from stable branch 19.0.1
+  * compiled with llvm 8.x stable
+
+ -- Paulo Dias <paulo.miguel.dias@gmail.com>  Mon, 01 Apr 2019 00:20:01 +0100
+
 mesa (18.3.3-1~c~padoka0) cosmic; urgency=high
   
   * backport from stable branch 18.3.3
diff -Nru mesa-18.3.3/debian/control mesa-19.0.1/debian/control
--- mesa-18.3.3/debian/control	2019-01-13 21:24:53.000000000 +0000
+++ mesa-19.0.1/debian/control	2019-03-31 23:17:04.000000000 +0000
@@ -39,11 +39,11 @@
  python-mako,
  flex,
  bison,
- llvm-7-dev (>= 1:7~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
+ llvm-8-dev (>= 1:8~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
  libelf-dev [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
  libwayland-dev (>= 1.15.0) [linux-any],
  libwayland-egl-backend-dev (>= 1.15.0) [linux-any],
- libclang-7-dev (>= 1:7~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
+ libclang-8-dev (>= 1:8~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
  libclc-dev (>= 0.2.0+git20180917-1~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el],
  wayland-protocols (>= 1.9),
  zlib1g-dev,
diff -Nru mesa-18.3.3/debian/patches/version mesa-19.0.1/debian/patches/version
--- mesa-18.3.3/debian/patches/version	2019-02-01 12:03:52.000000000 +0000
+++ mesa-19.0.1/debian/patches/version	2019-03-31 23:19:25.000000000 +0000
@@ -1,5 +1,5 @@
 --- a/VERSION
 +++ b/VERSION
 @@ -1 +1 @@
--18.3.3
-+18.3.3 - padoka PPA 
+-19.0.1
++19.0.1 - padoka PPA 
diff -Nru mesa-18.3.3/debian/rules mesa-19.0.1/debian/rules
--- mesa-18.3.3/debian/rules	2019-01-13 21:25:16.000000000 +0000
+++ mesa-19.0.1/debian/rules	2019-03-31 23:17:49.000000000 +0000
@@ -99,7 +99,7 @@
 	GALLIUM_DRIVERS += radeonsi
 	confflags_GALLIUM += --enable-llvm
 	confflags_GALLIUM += --enable-opencl --enable-opencl-icd
-	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-7
+	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-8
 	GALLIUM_DRIVERS += swrast
 
 	# nine makes sense only on archs that build wine
@@ -148,6 +148,7 @@
 	--enable-shared-glapi \
 	--disable-xvmc \
 	--disable-omx-bellagio \
+	--enable-autotools \
 	$(confflags_DIRECT_RENDERING) \
 	$(confflags_GBM) \
 	$(confflags_DRI3) \
diff -Nru mesa-18.3.3/docs/autoconf.html mesa-19.0.1/docs/autoconf.html
--- mesa-18.3.3/docs/autoconf.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/autoconf.html	2019-03-31 23:16:37.000000000 +0000
@@ -26,6 +26,12 @@
   </ul>
 </ol>
 
+<h2>ATTENTION:</h2>
+<p>
+    The autotools build is being replaced by the <a href="meson.html">meson</a>
+    build system. If you haven't yet now is a good time to try using meson and
+    report any issues you run into.
+</p>
 
 <h2 id="basic">1. Basic Usage</h2>
 
diff -Nru mesa-18.3.3/docs/features.txt mesa-19.0.1/docs/features.txt
--- mesa-18.3.3/docs/features.txt	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/features.txt	2019-03-31 23:16:37.000000000 +0000
@@ -319,7 +319,7 @@
   GL_EXT_memory_object                                  DONE (radeonsi)
   GL_EXT_memory_object_fd                               DONE (radeonsi)
   GL_EXT_memory_object_win32                            not started
-  GL_EXT_render_snorm                                   DONE (i965)
+  GL_EXT_render_snorm                                   DONE (i965, radeonsi)
   GL_EXT_semaphore                                      DONE (radeonsi)
   GL_EXT_semaphore_fd                                   DONE (radeonsi)
   GL_EXT_semaphore_win32                                not started
@@ -338,7 +338,7 @@
   GL_OES_texture_float_linear                           DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe)
   GL_OES_texture_half_float                             DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe)
   GL_OES_texture_half_float_linear                      DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe)
-  GL_OES_texture_view                                   DONE (i965/gen8+)
+  GL_OES_texture_view                                   DONE (freedreno, i965/gen8+, r600, radeonsi, nv50, nvc0, softpipe, llvmpipe, swr)
   GL_OES_viewport_array                                 DONE (i965, nvc0, radeonsi)
   GLX_ARB_context_flush_control                         not started
   GLX_ARB_robustness_application_isolation              not started
diff -Nru mesa-18.3.3/docs/index.html mesa-19.0.1/docs/index.html
--- mesa-18.3.3/docs/index.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/index.html	2019-03-31 23:16:37.000000000 +0000
@@ -15,6 +15,53 @@
 <div class="content">
 
 <h1>News</h1>
+<h2>January 17, 2019</h2>
+<p>
+<a href="relnotes/18.3.2.html">Mesa 18.3.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>December 27, 2018</h2>
+<p>
+<a href="relnotes/18.2.8.html">Mesa 18.2.8</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 18.2.8 will be the final release in the
+18.2 series. Users of 18.2 are encouraged to migrate to the 18.3
+series in order to obtain future fixes.
+</p>
+
+<h2>December 13, 2018</h2>
+<p>
+<a href="relnotes/18.2.7.html">Mesa 18.2.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>December 11, 2018</h2>
+<p>
+<a href="relnotes/18.3.1.html">Mesa 18.3.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>December 7, 2018</h2>
+<p>
+<a href="relnotes/18.3.0.html">Mesa 18.3.0</a> is released.  This is a
+new development release.  See the release notes for more information
+about the release.
+</p>
+
+<h2>November 28, 2018</h2>
+<p>
+<a href="relnotes/18.2.6.html">Mesa 18.2.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 15, 2018</h2>
+<p>
+<a href="relnotes/18.2.5.html">Mesa 18.2.5</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>October 31, 2018</h2>
 <p>
 <a href="relnotes/18.2.4.html">Mesa 18.2.4</a> is released.
diff -Nru mesa-18.3.3/docs/install.html mesa-19.0.1/docs/install.html
--- mesa-18.3.3/docs/install.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/install.html	2019-03-31 23:16:37.000000000 +0000
@@ -22,6 +22,7 @@
   <li><a href="#prereq-general">General prerequisites</a>
   <li><a href="#prereq-dri">For DRI and hardware acceleration</a>
   </ul>
+<li><a href="#meson">Building with meson</a>
 <li><a href="#autoconf">Building with autoconf (Linux/Unix/X11)</a>
 <li><a href="#scons">Building with SCons (Windows/Linux)</a>
 <li><a href="#android">Building with AOSP (Android)</a>
@@ -39,9 +40,10 @@
 </p>
 
 <ul>
-<li>Autoconf is required when building on *nix platforms.
+<li><a href="https://mesonbuild.com">meson</a> is recommended when building on *nix platforms.
+<li>Autoconf is another option when building on *nix platforms.
 <li><a href="http://www.scons.org/">SCons</a> is required for building on
-Windows and optional for Linux (it's an alternative to autoconf/automake.)
+Windows and optional for Linux (it's an alternative to autoconf/automake or meson.)
 </li>
 <li>Android Build system when building as native Android component. Autoconf
 is used when when building ARC.
@@ -72,7 +74,9 @@
 
 <ul>
 <li><a href="https://www.python.org/">Python</a> - Python is required.
-Version 2.7 or later should work.
+When building with scons 2.7 is required.
+When building with meson 3.5 or newer is required.
+When building with autotools 2.7, or 3.5 or later are required.
 </li>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
 Python Mako module is required. Version 0.8.0 or later should work.
@@ -111,11 +115,31 @@
   ... # others
 </pre>
 
+<h1 id="meson">2. Building with meson</h1>
 
-<h1 id="autoconf">2. Building with autoconf (Linux/Unix/X11)</h1>
+<p>
+Meson is the latest build system in mesa, it is currently able to build for
+*nix systems like Linux and BSD, and will be able to build for windows as well.
+</p>
+
+<p>
+The general approach is:
+</p>
+<pre>
+  meson builddir/
+  ninja -C builddir/
+  sudo ninja -C builddir/ install
+</pre>
+<p>
+Please read the <a href="meson.html">detailed meson instructions</a>
+for more information
+</p>
+
+<h1 id="autoconf">3. Building with autoconf (Linux/Unix/X11)</h1>
 
 <p>
-The primary method to build Mesa on Unix systems is with autoconf.
+Although meson is recommended, another supported way to build on *nix systems
+is with autoconf.
 </p>
 
 <p>
@@ -133,7 +157,7 @@
 
 
 
-<h1 id="scons">3. Building with SCons (Windows/Linux)</h1>
+<h1 id="scons">4. Building with SCons (Windows/Linux)</h1>
 
 <p>
 To build Mesa with SCons on Linux or Windows do
@@ -169,7 +193,7 @@
 
 
 
-<h1 id="android">4. Building with AOSP (Android)</h1>
+<h1 id="android">5. Building with AOSP (Android)</h1>
 
 <p>
 Currently one can build Mesa for Android as part of the AOSP project, yet
@@ -188,7 +212,7 @@
 </p>
 
 
-<h1 id="libs">5. Library Information</h1>
+<h1 id="libs">6. Library Information</h1>
 
 <p>
 When compilation has finished, look in the top-level <code>lib/</code>
@@ -226,7 +250,7 @@
 </p>
 
 
-<h1 id="pkg-config">6. Building OpenGL programs with pkg-config</h1>
+<h1 id="pkg-config">7. Building OpenGL programs with pkg-config</h1>
 
 <p>
 Running <code>make install</code> will install package configuration files
diff -Nru mesa-18.3.3/docs/mesa.css mesa-19.0.1/docs/mesa.css
--- mesa-18.3.3/docs/mesa.css	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/docs/mesa.css	2019-03-31 23:16:37.000000000 +0000
@@ -29,6 +29,9 @@
 	/*font-family: monospace;*/
 	font-size: 10pt;
 	/*color: black;*/
+	background-color: #eee;
+	margin-left: 2em;
+	padding: .5em;
 }
 
 iframe {
diff -Nru mesa-18.3.3/docs/meson.html mesa-19.0.1/docs/meson.html
--- mesa-18.3.3/docs/meson.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/meson.html	2019-03-31 23:16:37.000000000 +0000
@@ -16,6 +16,11 @@
 
 <h1>Compilation and Installation using Meson</h1>
 
+<ul>
+  <li><a href="#basic">Basic Usage</a></li>
+  <li><a href="#cross-compilation">Cross-compilation and 32-bit builds</a></li>
+</ul>
+
 <h2 id="basic">1. Basic Usage</h2>
 
 <p><strong>The Meson build system is generally considered stable and ready
@@ -48,9 +53,13 @@
 along with a build directory to view the selected options for. This will show
 your meson global arguments and project arguments, along with their defaults
 and your local settings.
+</p>
 
+<p>
 Meson does not currently support listing options before configure a build
 directory, but this feature is being discussed upstream.
+For now, the only way to see what options exist is to look at the
+<code>meson_options.txt</code> file at the root of the project.
 </p>
 
 <pre>
@@ -105,14 +114,14 @@
 <dl>
 <dt><code>Environment Variables</code></dt>
 <dd><p>Meson supports the standard CC and CXX environment variables for
-changing the default compiler, and CFLAGS, CXXFLAGS, and LDFLAGS for setting
-options to the compiler and linker during the initial configuration.
+changing the default compiler. Meson does support CFLAGS, CXXFLAGS, etc. But
+their use is discouraged because of the many caveats in using them. Instead it
+is recomended to use <code>-D${lang}_args</code> and
+<code>-D${lang}_link_args</code> instead. Among the benefits of these options
+is that they are guaranteed to persist across rebuilds and reconfigurations.
 
-These arguments are consumed and stored by meson when it is initialized. To
-change these flags after the build is initialized (or when doing a first
-initialization), consider using <code>-D${lang}_args</code> and
-<code>-D${lang}_link_args</code> instead. Meson will never change compiler in a
-configured build directory.
+Meson does not allow changing compiler in a configured builddir, you will need
+to create a new build dir for a different compiler.
 </p>
 
 <pre>
@@ -135,11 +144,56 @@
 
 <dt><code>LLVM</code></dt>
 <dd><p>Meson includes upstream logic to wrap llvm-config using its standard
-dependency interface. It will search <code>$PATH</code> (or <code>%PATH%</code> on windows) for
-llvm-config (and llvm-config$version and llvm-config-$version), so using an
-LLVM from a non-standard path is as easy as
-<code>PATH=/path/with/llvm-config:$PATH meson build</code>.
+dependency interface.
+</p></dd>
+
+<dd><p>
+As of meson 0.49.0 meson also has the concept of a
+<a href="https://mesonbuild.com/Native-environments.html">"native file"</a>,
+these files provide information about the native build environment (as opposed
+to a cross build environment). They are ini formatted and can override where to
+find llvm-config:
+
+custom-llvm.ini
+<pre>
+    [binaries]
+    llvm-config = '/usr/local/bin/llvm/llvm-config'
+</pre>
+
+Then configure meson:
+
+<pre>
+    meson builddir/ --native-file custom-llvm.ini
+</pre>
 </p></dd>
+
+<dd><p>
+For selecting llvm-config for cross compiling a
+<a href="https://mesonbuild.com/Cross-compilation.html#defining-the-environment">"cross file"</a>
+should be used. It uses the same format as the native file above:
+
+cross-llvm.ini
+<pre>
+    [binaries]
+    ...
+    llvm-config = '/usr/lib/llvm-config-32'
+</pre>
+
+Then configure meson:
+
+<pre>
+    meson builddir/ --cross-file cross-llvm.ini
+</pre>
+
+See the <a href="#cross-compilation">Cross Compilation</a> section for more information.
+</dd></p>
+
+<dd><p>
+For older versions of meson <code>$PATH</code> (or <code>%PATH%</code> on
+windows) will be searched for llvm-config (and llvm-config$version and
+llvm-config-$version), you can override this environment variable to control
+the search: <code>PATH=/path/with/llvm-config:$PATH meson build</code>.
+</dd></p>
 </dl>
 
 <dl>
@@ -190,6 +244,93 @@
 </dd>
 </dl>
 
+<h2 id="cross-compilation">2. Cross-compilation and 32-bit builds</h2>
+
+<p><a href="https://mesonbuild.com/Cross-compilation.html">Meson supports
+cross-compilation</a> by specifying a number of binary paths and
+settings in a file and passing this file to <code>meson</code> or
+<code>meson configure</code> with the <code>--cross-file</code>
+parameter.</p>
+
+<p>This file can live at any location, but you can use the bare filename
+(without the folder path) if you put it in $XDG_DATA_HOME/meson/cross or
+~/.local/share/meson/cross</p>
+
+<p>Below are a few example of cross files, but keep in mind that you
+will likely have to alter them for your system.</p>
+
+<p>
+Those running on ArchLinux can use the AUR-maintained packages for some
+of those, as they'll have the right values for your system:
+<ul>
+  <li><a href="https://aur.archlinux.org/packages/meson-cross-x86-linux-gnu">meson-cross-x86-linux-gnu</a></li>
+  <li><a href="https://aur.archlinux.org/packages/meson-cross-aarch64-linux-gnu">meson-cross-aarch64-linux-gnu</a></li>
+</ul>
+</p>
+
+<p>
+32-bit build on x86 linux:
+<pre>
+[binaries]
+c = '/usr/bin/gcc'
+cpp = '/usr/bin/g++'
+ar = '/usr/bin/gcc-ar'
+strip = '/usr/bin/strip'
+pkgconfig = '/usr/bin/pkg-config-32'
+llvm-config = '/usr/bin/llvm-config32'
+
+[properties]
+c_args = ['-m32']
+c_link_args = ['-m32']
+cpp_args = ['-m32']
+cpp_link_args = ['-m32']
+
+[host_machine]
+system = 'linux'
+cpu_family = 'x86'
+cpu = 'i686'
+endian = 'little'
+</pre>
+</p>
+
+<p>
+64-bit build on ARM linux:
+<pre>
+[binaries]
+c = '/usr/bin/aarch64-linux-gnu-gcc'
+cpp = '/usr/bin/aarch64-linux-gnu-g++'
+ar = '/usr/bin/aarch64-linux-gnu-gcc-ar'
+strip = '/usr/bin/aarch64-linux-gnu-strip'
+pkgconfig = '/usr/bin/aarch64-linux-gnu-pkg-config'
+exe_wrapper = '/usr/bin/qemu-aarch64-static'
+
+[host_machine]
+system = 'linux'
+cpu_family = 'aarch64'
+cpu = 'aarch64'
+endian = 'little'
+</pre>
+</p>
+
+<p>
+64-bit build on x86 windows:
+<pre>
+[binaries]
+c = '/usr/bin/x86_64-w64-mingw32-gcc'
+cpp = '/usr/bin/x86_64-w64-mingw32-g++'
+ar = '/usr/bin/x86_64-w64-mingw32-ar'
+strip = '/usr/bin/x86_64-w64-mingw32-strip'
+pkgconfig = '/usr/bin/x86_64-w64-mingw32-pkg-config'
+exe_wrapper = 'wine'
+
+[host_machine]
+system = 'windows'
+cpu_family = 'x86_64'
+cpu = 'i686'
+endian = 'little'
+</pre>
+</p>
+
 </div>
 </body>
 </html>
diff -Nru mesa-18.3.3/docs/release-calendar.html mesa-19.0.1/docs/release-calendar.html
--- mesa-18.3.3/docs/release-calendar.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/release-calendar.html	2019-03-31 23:16:37.000000000 +0000
@@ -23,6 +23,16 @@
 The table below lists the date and release manager that is expected to do the
 specific release.
 <br>
+Regular updates will ensure that the schedule for the current and the
+next two feature releases are shown in the table.
+<br>
+In order to keep the whole releasing team up to date with the tools
+used, best practices and other details, the member in charge of the
+next feature release will be in constant rotation.
+<br>
+The way the release schedule works is
+explained <a href="releasing.html#schedule" target="_parent">here</a>.
+<br>
 Take a look <a href="submittingpatches.html#criteria" target="_parent">here</a>
 if you'd like to nominate a patch in the next stable release.
 </p>
@@ -39,47 +49,129 @@
 <th>Notes</th>
 </tr>
 <tr>
-<td rowspan="3">18.2</td>
-<td>2018-11-14</td>
-<td>18.2.5</td>
-<td>Juan A. Suarez</td>
-<td/>
+<td rowspan="4">18.3</td>
+<td>2019-01-30</td>
+<td>18.3.3</td>
+<td>Emil Velikov</td>
+<td>
 </tr>
 <tr>
-<td>2018-11-28</td>
-<td>18.2.6</td>
-<td>Juan A. Suarez</td>
-<td/>
+<td>2019-02-13</td>
+<td>18.3.4</td>
+<td>Emil Velikov</td>
+<td>
 </tr>
 <tr>
-<td>2018-12-12</td>
-<td>18.2.7</td>
-<td>Juan A. Suarez</td>
-<td>Last planned 18.2.x release</td>
+<td>2019-02-27</td>
+<td>18.3.5</td>
+<td>Emil Velikov</td>
+<td>
 </tr>
-<td rowspan="4">18.3</td>
-<td>2018-10-31</td>
-<td>18.3.0-rc1</td>
+<tr>
+<td>2019-03-13</td>
+<td>18.3.6</td>
 <td>Emil Velikov</td>
-<td></td>
+<td>Last planned 18.3.x release</td>
+</tr>
+<tr>
+<td rowspan="4">19.0</td>
+<td>2019-01-29</td>
+<td>19.0.0-rc1</td>
+<td>Dylan Baker</td>
+<td>
+</tr>
+<tr>
+<td>2019-02-05</td>
+<td>19.0.0-rc2</td>
+<td>Dylan Baker</td>
+<td>
+</tr>
+<tr>
+<td>2019-02-12</td>
+<td>19.0.0-rc3</td>
+<td>Dylan Baker</td>
+<td>
+</tr>
+<tr>
+<td>2019-02-19</td>
+<td>19.0.0-rc4</td>
+<td>Dylan Baker</td>
+<td>Last planned RC/Final release</td>
+</tr>
+<tr>
+<td rowspan="4">19.1</td>
+<td>2019-04-30</td>
+<td>19.1.0-rc1</td>
+<td>Andres Gomez</td>
+<td>
 </tr>
 <tr>
-<td>2018-11-07</td>
-<td>18.3.0-rc2</td>
+<td>2019-05-07</td>
+<td>19.1.0-rc2</td>
+<td>Andres Gomez</td>
+<td>
+</tr>
+<tr>
+<td>2019-05-14</td>
+<td>19.1.0-rc3</td>
+<td>Andres Gomez</td>
+<td>
+</tr>
+<tr>
+<td>2019-05-21</td>
+<td>19.1.0-rc4</td>
+<td>Andres Gomez</td>
+<td>Last planned RC/Final release</td>
+</tr>
+<tr>
+<td rowspan="4">19.2</td>
+<td>2019-08-06</td>
+<td>19.2.0-rc1</td>
 <td>Emil Velikov</td>
-<td/>
+<td>
 </tr>
 <tr>
-<td>2018-11-14</td>
-<td>18.3.0-rc3</td>
+<td>2019-08-13</td>
+<td>19.2.0-rc2</td>
 <td>Emil Velikov</td>
-<td/>
+<td>
 </tr>
 <tr>
-<td>2018-11-21</td>
-<td>18.3.0-rc4</td>
+<td>2019-08-20</td>
+<td>19.2.0-rc3</td>
 <td>Emil Velikov</td>
-<td>Last planned RC/final release</td>
+<td>
+</tr>
+<tr>
+<td>2019-08-27</td>
+<td>19.2.0-rc4</td>
+<td>Emil Velikov</td>
+<td>Last planned RC/Final release</td>
+</tr>
+<tr>
+<td rowspan="4">19.3</td>
+<td>2019-10-15</td>
+<td>19.3.0-rc1</td>
+<td>Juan A. Suarez</td>
+<td>
+</tr>
+<tr>
+<td>2019-10-22</td>
+<td>19.3.0-rc2</td>
+<td>Juan A. Suarez</td>
+<td>
+</tr>
+<tr>
+<td>2019-10-29</td>
+<td>19.3.0-rc3</td>
+<td>Juan A. Suarez</td>
+<td>
+</tr>
+<tr>
+<td>2019-11-05</td>
+<td>19.3.0-rc4</td>
+<td>Juan A. Suarez</td>
+<td>Last planned RC/Final release</td>
 </tr>
 </table>
 
diff -Nru mesa-18.3.3/docs/releasing.html mesa-19.0.1/docs/releasing.html
--- mesa-18.3.3/docs/releasing.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/releasing.html	2019-03-31 23:16:37.000000000 +0000
@@ -56,9 +56,10 @@
 
 <p>
 Releases should happen on Wednesdays. Delays can occur although those
-should be keep to a minimum.
+should be kept to a minimum.
 <br>
-See our <a href="release-calendar.html" target="_parent">calendar</a> for the
+See our <a href="release-calendar.html" target="_parent">calendar</a>
+for information about how the release schedule is planned, and the
 date and other details for individual releases.
 </p>
 
@@ -67,6 +68,9 @@
 <li>Available approximately every three months.
 <li>Initial timeplan available 2-4 weeks before the planned branchpoint (rc1)
 on the mesa-announce@ mailing list.
+<li>Typically, the final release will happen after 4
+candidates. Additional ones may be needed in order to resolve blocking
+regressions, though.
 <li>A <a href="#prerelease">pre-release</a> announcement should be available
 approximately 24 hours before the final (non-rc) release.
 </ul>
@@ -84,6 +88,12 @@
 <br>
 The final release from the 12.0 series Mesa 12.0.5 will be out around the same
 time (or shortly after) 13.0.1 is out.
+<br>
+This also involves that, as a final release may be delayed due to the
+need of additional candidates to solve some blocking regression(s),
+the release manager might have to update
+the <a href="release-calendar.html" target="_parent">calendar</a> with
+additional bug fix releases of the current stable branch.
 </p>
 
 
@@ -112,18 +122,21 @@
 <p>Done continuously up-to the <a href="#prerelease">pre-release</a> announcement.</p>
 
 <p>
-As an exception, patches can be applied up-to the last ~1h before the actual
-release. This is made <strong>only</strong> with explicit permission/request,
-and the patch <strong>must</strong> be very well contained. Thus it cannot
-affect more than one driver/subsystem.
+Developers can request, <em>as an exception</em>, patches to be applied up-to
+the last one hour before the actual release. This is made <strong>only</strong>
+with explicit permission/request, and the patch <strong>must</strong> be very
+well contained. Thus it cannot affect more than one driver/subsystem.
 </p>
 
-<p>
-Currently Ilia Mirkin and AMD devs have requested "permanent" exception.
-</p>
+<p>Following developers have requested permanent exception</p>
+<ul>
+<li><em>Ilia Mirkin</em>
+<li><em>AMD team</em>
+</ul>
 
+<p>The following must pass:</p>
 <ul>
-<li>make distcheck, scons and scons check must pass
+<li>make distcheck, scons and scons check
 <li>Testing with different version of system components - LLVM and others is also
 performed where possible.
 <li>As a general rule, testing with various combinations of configure
@@ -131,9 +144,9 @@
 </ul>
 
 <p>
-Achieved by combination of local ad-hoc scripts, mingw-w64 cross
-compilation and AppVeyor plus Travis-CI, the latter as part of their
-Github integration.
+These are achieved by combination of <a href="basictesting">local testing</a>,
+which includes mingw-w64 cross compilation and AppVeyor plus Travis-CI, the
+latter two as part of their Github integration.
 </p>
 
 <p>
@@ -225,7 +238,7 @@
 Notes:
 </p>
 <ul>
-<li>People are encouraged to test the branch and report regressions.</li>
+<li>People are encouraged to test the staging branch and report regressions.</li>
 <li>The branch history is not stable and it <strong>will</strong> be rebased,</li>
 </ul>
 
@@ -445,7 +458,7 @@
 relevant branch.
 </p>
 
-<h3>Perform basic testing</h3>
+<h3 id="basictesting">Perform basic testing</h3>
 
 <p>
 Most of the testing should already be done during the
diff -Nru mesa-18.3.3/docs/relnotes/18.2.5.html mesa-19.0.1/docs/relnotes/18.2.5.html
--- mesa-18.3.3/docs/relnotes/18.2.5.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/18.2.5.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,172 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.5 Release Notes / November 15, 2018</h1>
+
+<p>
+Mesa 18.2.5 is a bug fix release which fixes bugs found since the 18.2.4 release.
+</p>
+<p>
+Mesa 18.2.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+dddc28928b6f4083a0d5120b58c1c8e2dc189ab5c14299c08a386607fdbbdce7  mesa-18.2.5.tar.gz
+b12c32872832e5353155e1e8026e1f1ab75bba9dc5b178d712045684d26c2b73  mesa-18.2.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105731">Bug 105731</a> - linker error &quot;fragment shader input ... has no matching output in the previous stage&quot; when previous stage's output declaration in a separate shader object</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107511">Bug 107511</a> - KHR/khrplatform.h not always installed when needed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108082">Bug 108082</a> - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108560">Bug 108560</a> - Mesa 32 is built without sse</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andre Heider (1):</p>
+<ul>
+  <li>st/nine: fix stack corruption due to ABI mismatch</li>
+</ul>
+
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965/batch: don't ignore the 'brw_new_batch' call for a 'new batch'</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>meson: link gallium nine with pthreads</li>
+  <li>meson: fix libatomic tests</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>egl/glvnd: correctly report errors when vendor cannot be found</li>
+  <li>m4: add Werror when checking for compiler flags</li>
+</ul>
+
+<p>Eric Engestrom (6):</p>
+<ul>
+  <li>svga: add missing meson build dependency</li>
+  <li>clover: add missing meson build dependency</li>
+  <li>wsi/wayland: use proper VkResult type</li>
+  <li>wsi/wayland: only finish() a successfully init()ed display</li>
+  <li>configure: install KHR/khrplatform.h when needed</li>
+  <li>meson: install KHR/khrplatform.h when needed</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>virgl/vtest-winsys: Use virgl version of bind flags</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>intel/tools: include stdarg.h in error2aub</li>
+</ul>
+
+<p>Juan A. Suarez Romero (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.4</li>
+  <li>cherry-ignore: add explicit 18.3 only nominations</li>
+  <li>cherry-ignore: i965/batch: avoid reverting batch buffer if saved state is an empty</li>
+  <li>Update version to 18.2.5</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv/android: mark gralloc allocated BOs as external</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>ac: fix ac_build_fdiv for f64</li>
+  <li>st/va: fix incorrect use of resource_destroy</li>
+  <li>include: update GL &amp; GLES headers (v2)</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util/ralloc: Switch from DEBUG to NDEBUG</li>
+  <li>util/ralloc: Make sizeof(linear_header) a multiple of 8</li>
+</ul>
+
+<p>Olivier Fourdan (1):</p>
+<ul>
+  <li>wayland/egl: Resize EGL surface on update buffer for swrast</li>
+</ul>
+
+<p>Rhys Perry (1):</p>
+<ul>
+  <li>glsl_to_tgsi: don't create 64-bit integer MAD/FMA</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: disable conditional rendering for vkCmdCopyQueryPoolResults()</li>
+  <li>radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>autotools: library-dependency when no sse and 32-bit</li>
+</ul>
+
+<p>Timothy Arceri (4):</p>
+<ul>
+  <li>st/mesa: calculate buffer size correctly for packed uniforms</li>
+  <li>st/glsl_to_nir: fix next_stage gathering</li>
+  <li>nir: add glsl_type_is_integer() helper</li>
+  <li>nir: don't pack varyings ints with floats unless flat</li>
+</ul>
+
+<p>Vadym Shovkoplias (1):</p>
+<ul>
+  <li>glsl/linker: Fix out variables linking during single stage</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>r600/sb: Fix constant logical operand in assert.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes/18.2.6.html mesa-19.0.1/docs/relnotes/18.2.6.html
--- mesa-18.3.3/docs/relnotes/18.2.6.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/18.2.6.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,179 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.6 Release Notes / November 28, 2018</h1>
+
+<p>
+Mesa 18.2.6 is a bug fix release which fixes bugs found since the 18.2.5 release.
+</p>
+<p>
+Mesa 18.2.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e0ea1236dbc6c412b02e1b5d7f838072525971a6630246fa82ae4466a6d8a587  mesa-18.2.6.tar.gz
+9ebafa4f8249df0c718e93b9ca155e3593a1239af303aa2a8b0f2056a7efdc12  mesa-18.2.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107856">Bug 107856</a> - i965 incorrectly calculates the number of layers for texture views (assert)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108630">Bug 108630</a> - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108713">Bug 108713</a> - Gallium: use after free with transform feedback</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108829">Bug 108829</a> - [meson] libglapi exports internal API</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965/batch: avoid reverting batch buffer if saved state is an empty</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Fix opaque metadata descriptor last layer.</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>scons/svga: remove opt from the list of valid build types</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>i965: Fix calculation of layers array length for isl_view</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>meson: Don't set -Wall</li>
+  <li>meson: Don't force libva to required from auto</li>
+</ul>
+
+<p>Emil Velikov (13):</p>
+<ul>
+  <li>bin/get-pick-list.sh: simplify git oneline printing</li>
+  <li>bin/get-pick-list.sh: prefix output with "[stable] "</li>
+  <li>bin/get-pick-list.sh: handle "typod" usecase.</li>
+  <li>bin/get-pick-list.sh: handle the fixes tag</li>
+  <li>bin/get-pick-list.sh: tweak the commit sha matching pattern</li>
+  <li>bin/get-pick-list.sh: flesh out is_sha_nomination</li>
+  <li>bin/get-pick-list.sh: handle fixes tag with missing colon</li>
+  <li>bin/get-pick-list.sh: handle unofficial "broken by" tag</li>
+  <li>bin/get-pick-list.sh: use test instead of [ ]</li>
+  <li>bin/get-pick-list.sh: handle reverts prior to the branchpoint</li>
+  <li>travis: drop unneeded x11proto-xf86vidmode-dev</li>
+  <li>glx: make xf86vidmode mandatory for direct rendering</li>
+  <li>travis: adding missing x11-xcb for meson+vulkan</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Make sure we make ro scanout resources for create_with_modifiers.</li>
+</ul>
+
+<p>Eric Engestrom (5):</p>
+<ul>
+  <li>meson: only run vulkan's meson.build when building vulkan</li>
+  <li>gbm: remove unnecessary meson include</li>
+  <li>meson: fix wayland-less builds</li>
+  <li>egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache</li>
+  <li>glapi: add missing visibility args</li>
+</ul>
+
+<p>Erik Faye-Lund (1):</p>
+<ul>
+  <li>mesa/main: remove bogus error for zero-sized images</li>
+</ul>
+
+<p>Gert Wollny (3):</p>
+<ul>
+  <li>mesa: Reference count shaders that are used by transform feedback objects</li>
+  <li>r600: clean up the GS ring buffers when the context is destroyed</li>
+  <li>glsl: free or reuse memory allocated for TF varying</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16</li>
+  <li>anv: Put robust buffer access in the pipeline hash</li>
+</ul>
+
+<p>Juan A. Suarez Romero (6):</p>
+<ul>
+  <li>cherry-ignore: add explicit 18.3 only nominations</li>
+  <li>cherry-ignore: intel/aub_viewer: fix dynamic state printing</li>
+  <li>cherry-ignore: intel/aub_viewer: Print blend states properly</li>
+  <li>cherry-ignore: mesa/main: fix incorrect depth-error</li>
+  <li>docs: add sha256 checksums for 18.2.5</li>
+  <li>Update version to 18.2.6</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nir/spirv: cast shift operand to u32</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Add PCI IDs for new Amberlake parts that are Coffeelake based</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>egl/dri: fix error value with unknown drm format</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle</li>
+  <li>winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create</li>
+</ul>
+
+<p>Rodrigo Vivi (4):</p>
+<ul>
+  <li>i965: Add a new CFL PCI ID.</li>
+  <li>intel: aubinator: Adding missed platforms to the error message.</li>
+  <li>intel: Introducing Amber Lake platform</li>
+  <li>intel: Introducing Whiskey Lake platform</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes/18.2.7.html mesa-19.0.1/docs/relnotes/18.2.7.html
--- mesa-18.3.3/docs/relnotes/18.2.7.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/18.2.7.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,167 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.7 Release Notes / December 13, 2018</h1>
+
+<p>
+Mesa 18.2.7 is a bug fix release which fixes bugs found since the 18.2.6 release.
+</p>
+<p>
+Mesa 18.2.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+092351cfbcd430ec595fbd3a3d8d253fd62c29074e1740d7198b00289ab400f8  mesa-18.2.7.tar.gz
+9c7b02560d89d77ca279cd21f36ea9a49e9ffc5611f6fe35099357d744d07ae6  mesa-18.2.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106577">Bug 106577</a> - broken rendering with nine and nouveau (GM107)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108245">Bug 108245</a> - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108311">Bug 108311</a> - Query buffer object support is broken on r600.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108894">Bug 108894</a> - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108909">Bug 108909</a> - Vkd3d test failure test_resolve_non_issued_query_data()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108914">Bug 108914</a> - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108925">Bug 108925</a> - vkCmdCopyQueryPoolResults(VK_QUERY_RESULT_WAIT_BIT) for timestamps with large query count hangs</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Flush before vkCmdWriteTimestamp() if needed</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Align large buffers to the fragment size.</li>
+  <li>radv: Clamp gfx9 image view extents to the allocated image extents.</li>
+  <li>radv/android: Mark android WSI image as shareable.</li>
+  <li>radv/android: Use buffer metadata to determine scanout compat.</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>r600: make suballocator 256-bytes align</li>
+  <li>radv: use 3d shader for gfx9 copies if dst is 3d</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>egl/wayland: bail out when drmGetMagic fails</li>
+  <li>egl/wayland: plug memory leak in drm_handle_device()</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>v3d: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>vc4: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>v3d: Fix a leak of the disassembled instruction string during debug dumps.</li>
+</ul>
+
+<p>Eric Engestrom (3):</p>
+<ul>
+  <li>anv: correctly use vulkan 1.0 by default</li>
+  <li>wsi/display: fix mem leak when freeing swapchains</li>
+  <li>vulkan/wsi: fix s/,/;/ typo</li>
+</ul>
+
+<p>Gurchetan Singh (3):</p>
+<ul>
+  <li>virgl: quadruple command buffer size</li>
+  <li>virgl: avoid large inline transfers</li>
+  <li>virgl: don't mark buffers as unclean after a write</li>
+</ul>
+
+<p>Juan A. Suarez Romero (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.6</li>
+  <li>cherry-ignore: freedreno: Fix autotools build.</li>
+  <li>cherry-ignore: mesa: Revert INTEL_fragment_shader_ordering support</li>
+  <li>Update version to 18.2.7</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nv50,nvc0: Fix gallium nine regression regarding sampler bindings</li>
+</ul>
+
+<p>Lionel Landwerlin (2):</p>
+<ul>
+  <li>anv: flush pipeline before query result copies</li>
+  <li>anv/query: flush render target before copying results</li>
+</ul>
+
+<p>Michal Srb (2):</p>
+<ul>
+  <li>gallium: Constify drisw_loader_funcs struct</li>
+  <li>drisw: Use separate drisw_loader_funcs for shm</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>egl/wayland: rather obvious build fix</li>
+  <li>meson: link LLVM 'native' component when LLVM is available</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: rework the TC-compat HTILE hardware bug with COND_EXEC</li>
+</ul>
+
+<p>Thomas Hellstrom (2):</p>
+<ul>
+  <li>st/xa: Fix a memory leak</li>
+  <li>winsys/svga: Fix a memory leak</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>amd/vulkan: meson build - use radv_deps for libvulkan_radeon</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>st/xvmc: Add X11 include path.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes/18.2.8.html mesa-19.0.1/docs/relnotes/18.2.8.html
--- mesa-18.3.3/docs/relnotes/18.2.8.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/18.2.8.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,183 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.8 Release Notes / December 27, 2018</h1>
+
+<p>
+Mesa 18.2.8 is a bug fix release which fixes bugs found since the 18.2.7 release.
+</p>
+<p>
+Mesa 18.2.8 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+77512edc0a84e19c7131a0e2e5ebf1beaf1494dc4b71508fcc92d06d65f9f4f5  mesa-18.2.8.tar.gz
+1d2ed9fd435d86d95b7215b287258d3e6b1180293a36f688e5a2efc18298d863  mesa-18.2.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108114">Bug 108114</a> - [vulkancts] new VK_KHR_16bit_storage tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108116">Bug 108116</a> - [vulkancts] stencil partial clear tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108910">Bug 108910</a> - Vkd3d test failure test_multisample_array_texture()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108911">Bug 108911</a> - Vkd3d test failure test_clear_render_target_view()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109081">Bug 109081</a> - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (3):</p>
+<ul>
+  <li>pci_ids: add new vega10 pci ids</li>
+  <li>pci_ids: add new vega20 pci id</li>
+  <li>pci_ids: add new VegaM pci id</li>
+</ul>
+
+<p>Axel Davy (3):</p>
+<ul>
+  <li>st/nine: Fix volumetexture dtor on ctor failure</li>
+  <li>st/nine: Bind src not dst in nine_context_box_upload</li>
+  <li>st/nine: Add src reference to nine_context_range_upload</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (1):</p>
+<ul>
+  <li>nir: properly clear the entry sources in copy_prop_vars</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: Fix ppc64 little endian detection</li>
+</ul>
+
+<p>Emil Velikov (9):</p>
+<ul>
+  <li>glx: mandate xf86vidmode only for "drm" dri platforms</li>
+  <li>bin/get-pick-list.sh: rework handing of sha nominations</li>
+  <li>bin/get-pick-list.sh: warn when commit lists invalid sha</li>
+  <li>meson: don't require glx/egl/gbm with gallium drivers</li>
+  <li>pipe-loader: meson: reference correct library</li>
+  <li>TODO: glx: meson: build dri based glx tests, only with -Dglx=dri</li>
+  <li>glx: meson: drop includes from a link-only library</li>
+  <li>glx: meson: wire up the dispatch-index-check test</li>
+  <li>glx/test: meson: assorted include fixes</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>v3d: Make sure that a thrsw doesn't split a multop from its umul24.</li>
+  <li>v3d: Add missing flagging of SYNCB as a TSY op.</li>
+</ul>
+
+<p>Erik Faye-Lund (2):</p>
+<ul>
+  <li>virgl: wrap vertex element state in a struct</li>
+  <li>virgl: work around bad assumptions in virglrenderer</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>i965/vec4/dce: Don't narrow the write mask if the flags are used</li>
+  <li>Revert "nir/lower_indirect: Bail early if modes == 0"</li>
+</ul>
+
+<p>Jan Vesely (1):</p>
+<ul>
+  <li>clover: Fix build after clang r348827</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>nir/constant_folding: Fix source bit size logic</li>
+</ul>
+
+<p>Jon Turney (1):</p>
+<ul>
+  <li>glx: Fix compilation with GLX_USE_WINDOWSGL</li>
+</ul>
+
+<p>Juan A. Suarez Romero (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.7</li>
+  <li>cherry-ignore: add explicit 18.3 only nominations</li>
+  <li>cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)</li>
+  <li>cherry-ignore: radv: Fix multiview depth clears</li>
+  <li>cherry-ignore: nir: properly find the entry to keep in copy_prop_vars</li>
+  <li>cherry-ignore: intel/compiler: move nir_lower_bool_to_int32 before nir_lower_locals_to_regs</li>
+  <li>Update version to 18.2.8</li>
+</ul>
+
+<p>Kirill Burtsev (1):</p>
+<ul>
+  <li>loader: free error state, when checking the drawable type</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: don't do partial resolve on layer &gt; 0</li>
+</ul>
+
+<p>Rhys Perry (2):</p>
+<ul>
+  <li>radv: don't set surf_index for stencil-only images</li>
+  <li>ac: split 16-bit ssbo loads that may not be dword aligned</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>mesa/st/nir: fix missing nir_compact_varyings</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: switch on EOP when primitive restart is enabled with triangle strips</li>
+</ul>
+
+<p>Vinson Lee (2):</p>
+<ul>
+  <li>meson: Fix typo.</li>
+  <li>meson: Fix libsensors detection.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes/18.3.3.html mesa-19.0.1/docs/relnotes/18.3.3.html
--- mesa-18.3.3/docs/relnotes/18.3.3.html	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/18.3.3.html	1970-01-01 00:00:00.000000000 +0000
@@ -1,207 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 18.3.3 Release Notes / January 31, 2019</h1>
-
-<p>
-Mesa 18.3.3 is a bug fix release which fixes bugs found since the 18.3.2 release.
-</p>
-<p>
-Mesa 18.3.3 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-TBD
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108877">Bug 108877</a> - OpenGL CTS gl43 test cases were interrupted due to segment fault</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109023">Bug 109023</a> - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109129">Bug 109129</a> - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109229">Bug 109229</a> - glLinkProgram locks up for ~30 seconds</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109242">Bug 109242</a> - [RADV] The Witcher 3 system freeze</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109488">Bug 109488</a> - Mesa 18.3.2 crash on a specific fragment shader (assert triggered) / already fixed on the master branch.</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Andres Gomez (2):</p>
-<ul>
-  <li>bin/get-pick-list.sh: fix the oneline printing</li>
-  <li>bin/get-pick-list.sh: fix redirection in sh</li>
-</ul>
-
-<p>Axel Davy (1):</p>
-<ul>
-  <li>st/nine: Immediately upload user provided textures</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (3):</p>
-<ul>
-  <li>radv: Only use 32 KiB per threadgroup on Stoney.</li>
-  <li>radv: Set partial_vs_wave for pipelines with just GS, not tess.</li>
-  <li>nir: Account for atomics in copy propagation.</li>
-</ul>
-
-<p>Bruce Cherniak (1):</p>
-<ul>
-  <li>gallium/swr: Fix multi-context sync fence deadlock.</li>
-</ul>
-
-<p>Carsten Haitzler (Rasterman) (2):</p>
-<ul>
-  <li>vc4: Use named parameters for the NEON inline asm.</li>
-  <li>vc4: Declare the cpu pointers as being modified in NEON asm.</li>
-</ul>
-
-<p>Danylo Piliaiev (1):</p>
-<ul>
-  <li>glsl: Fix copying function's out to temp if dereferenced by array</li>
-</ul>
-
-<p>Dave Airlie (3):</p>
-<ul>
-  <li>dri_interface: add put shm image2 (v2)</li>
-  <li>glx: add support for putimageshm2 path (v2)</li>
-  <li>gallium: use put image shm2 path (v2)</li>
-</ul>
-
-<p>Dylan Baker (4):</p>
-<ul>
-  <li>meson: allow building dri driver without window system if osmesa is classic</li>
-  <li>meson: fix swr KNL build</li>
-  <li>meson: Fix compiler checks for SWR with ICC</li>
-  <li>meson: Add warnings and errors when using ICC</li>
-</ul>
-
-<p>Emil Velikov (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 18.3.2</li>
-  <li>cherry-ignore: radv: Fix multiview depth clears</li>
-  <li>cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices</li>
-  <li>cherry-ignore: WARNING: Commit XXX lists invalid sha</li>
-</ul>
-
-<p>Eric Anholt (2):</p>
-<ul>
-  <li>vc4: Don't leak the GPU fd for renderonly usage.</li>
-  <li>vc4: Enable NEON asm on meson cross-builds.</li>
-</ul>
-
-<p>Eric Engestrom (2):</p>
-<ul>
-  <li>configure: EGL requirements only apply if EGL is built</li>
-  <li>meson/vdpau: add missing soversion</li>
-</ul>
-
-<p>Iago Toral Quiroga (1):</p>
-<ul>
-  <li>anv/device: fix maximum number of images supported</li>
-</ul>
-
-<p>Jason Ekstrand (3):</p>
-<ul>
-  <li>anv/nir: Rework arguments to apply_pipeline_layout</li>
-  <li>anv: Only parse pImmutableSamplers if the descriptor has samplers</li>
-  <li>nir/xfb: Fix offset accounting for dvec3/4</li>
-</ul>
-
-<p>Karol Herbst (2):</p>
-<ul>
-  <li>nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions</li>
-  <li>glsl/lower_output_reads: set invariant and precise flags on temporaries</li>
-</ul>
-
-<p>Lionel Landwerlin (1):</p>
-<ul>
-  <li>anv: fix invalid binding table index computation</li>
-</ul>
-
-<p>Marek Olšák (4):</p>
-<ul>
-  <li>radeonsi: also apply the GS hang workaround to draws without tessellation</li>
-  <li>radeonsi: fix a u_blitter crash after a shader with FBFETCH</li>
-  <li>radeonsi: fix rendering to tiny viewports where the viewport center is &gt; 8K</li>
-  <li>st/mesa: purge framebuffers when unbinding a context</li>
-</ul>
-
-<p>Niklas Haas (1):</p>
-<ul>
-  <li>radv: correctly use vulkan 1.0 by default</li>
-</ul>
-
-<p>Pierre Moreau (1):</p>
-<ul>
-  <li>meson: Fix with_gallium_icd to with_opencl_icd</li>
-</ul>
-
-<p>Rob Clark (1):</p>
-<ul>
-  <li>loader: fix the no-modifiers case</li>
-</ul>
-
-<p>Samuel Pitoiset (1):</p>
-<ul>
-  <li>radv: clean up setting partial_es_wave for distributed tess on VI</li>
-</ul>
-
-<p>Timothy Arceri (5):</p>
-<ul>
-  <li>ac/nir_to_llvm: fix interpolateAt* for arrays</li>
-  <li>ac/nir_to_llvm: fix clamp shadow reference for more hardware</li>
-  <li>radv/ac: fix some fp16 handling</li>
-  <li>glsl: use remap location when serialising uniform program resource data</li>
-  <li>glsl: Copy function out to temp if we don't directly ref a variable</li>
-</ul>
-
-<p>Tomeu Vizoso (1):</p>
-<ul>
-  <li>etnaviv: Consolidate buffer references from framebuffers</li>
-</ul>
-
-<p>Vinson Lee (1):</p>
-<ul>
-  <li>meson: Fix typo.</li>
-</ul>
-
-
-
-</div>
-</body>
-</html>
-
diff -Nru mesa-18.3.3/docs/relnotes/19.0.0.html mesa-19.0.1/docs/relnotes/19.0.0.html
--- mesa-18.3.3/docs/relnotes/19.0.0.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/19.0.0.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,2475 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.0.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 19.0.0 is a new development release. People who are concerned
+with stability and reliability should stick with a previous release or
+wait for Mesa 19.0.1.
+</p>
+<p>
+Mesa 19.0.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+  4c5b9c5227d37c1f6bdc786a6fa7ee7fbce40b2e8a87340c7d3234534ece3304  mesa-19.0.0.tar.gz
+  5a549dfb40ec31e5c36c47aadac04554cb2e2a8d144a046a378fc16da57e38f8  mesa-19.0.0.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+
+<ul>
+<li>GL_AMD_texture_texture4 on all GL 4.0 drivers.</li>
+<li>GL_EXT_shader_implicit_conversions on all drivers (ES extension).</li>
+<li>GL_EXT_texture_compression_bptc on all GL 4.0 drivers (ES extension).</li>
+<li>GL_EXT_texture_compression_rgtc on all GL 3.0 drivers (ES extension).</li>
+<li>GL_EXT_render_snorm on gallium drivers (ES extension).</li>
+<li>GL_EXT_texture_view on drivers supporting texture views (ES extension).</li>
+<li>GL_OES_texture_view on drivers supporting texture views (ES extension).</li>
+<li>GL_NV_shader_atomic_float on nvc0 (Fermi/Kepler only).</li>
+<li>Shader-based software implementations of GL_ARB_gpu_shader_fp64, GL_ARB_gpu_shader_int64, GL_ARB_vertex_attrib_64bit, and GL_ARB_shader_ballot on i965.</li>
+<li>VK_ANDROID_external_memory_android_hardware_buffer on Intel</li>
+<li>Fixed and re-exposed VK_EXT_pci_bus_info on Intel and RADV</li>
+<li>VK_EXT_scalar_block_layout on Intel and RADV</li>
+<li>VK_KHR_depth_stencil_resolve on Intel</li>
+<li>VK_KHR_draw_indirect_count on Intel</li>
+<li>VK_EXT_conditional_rendering on Intel</li>
+<li>VK_EXT_memory_budget on RADV</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32211">Bug 32211</a> - [GLSL] lower_jumps with continue-statements in for-loops prevents loop unrolling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102349">Bug 102349</a> - nv4x crashing with plasmashell - gdb log included</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102597">Bug 102597</a> - [Regression] mpv, high rendering times (two to three times higher)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104297">Bug 104297</a> - [i965] Downward causes GPU hangs and misrendering on Haswell</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105301">Bug 105301</a> - The big SKQP bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106577">Bug 106577</a> - broken rendering with nine and nouveau (GM107)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106595">Bug 106595</a> - [RADV] Rendering distortions only when MSAA is enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107052">Bug 107052</a> - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the &quot;true&quot; flag in &quot;drirc&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107510">Bug 107510</a> - [GEN8+] up to 10% perf drop on several 3D benchmarks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107728">Bug 107728</a> - Wrong background in Sascha Willem's Multisampling Demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107842">Bug 107842</a> - &quot;invariant&quot; qualifier on outputs of GLSL ES fragment shader causes compilation error.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107856">Bug 107856</a> - i965 incorrectly calculates the number of layers for texture views (assert)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108114">Bug 108114</a> - [vulkancts] new VK_KHR_16bit_storage tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108116">Bug 108116</a> - [vulkancts] stencil partial clear tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108245">Bug 108245</a> - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108311">Bug 108311</a> - Query buffer object support is broken on r600.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108457">Bug 108457</a> - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108560">Bug 108560</a> - Mesa 32 is built without sse</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108624">Bug 108624</a> - [regression][bisected] &quot;nir: Copy propagation between blocks&quot; regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108630">Bug 108630</a> - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108635">Bug 108635</a> - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108636">Bug 108636</a> - test_optpass has use after free bug, failing with memory testing tools like address sanitizer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108713">Bug 108713</a> - Gallium: use after free with transform feedback</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108734">Bug 108734</a> - Regression: [bisected] dEQP-GLES31.functional.tessellation.invariance.* start failing on r600</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108805">Bug 108805</a> - i965 regressions from EXT_texture_sRGB_R8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108829">Bug 108829</a> - [meson] libglapi exports internal API</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108868">Bug 108868</a> - [BYT IVB] Tesselation test regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108877">Bug 108877</a> - OpenGL CTS gl43 test cases were interrupted due to segment fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108894">Bug 108894</a> - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108909">Bug 108909</a> - Vkd3d test failure test_resolve_non_issued_query_data()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108910">Bug 108910</a> - Vkd3d test failure test_multisample_array_texture()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108911">Bug 108911</a> - Vkd3d test failure test_clear_render_target_view()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108914">Bug 108914</a> - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108925">Bug 108925</a> - vkCmdCopyQueryPoolResults(VK_QUERY_RESULT_WAIT_BIT) for timestamps with large query count hangs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108936">Bug 108936</a> - [ILK,G45,G965] Regressions from texture-format enums rework</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108943">Bug 108943</a> - Build fails on ppc64le with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108961">Bug 108961</a> - make check test_replace_src_bitsize failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108974">Bug 108974</a> - make check DispatchSanity_test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108999">Bug 108999</a> - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109023">Bug 109023</a> - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109072">Bug 109072</a> - GPU hang in blender 2.80</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109075">Bug 109075</a> - radv: New D3D boolean optimizations cause GPU hang in Witcher 3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109081">Bug 109081</a> - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109086">Bug 109086</a> - Crash software mesa with gl_select render mode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109107">Bug 109107</a> - gallium/st/va: change va max_profiles when using Radeon VCN Hardware</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109129">Bug 109129</a> - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109151">Bug 109151</a> - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109190">Bug 109190</a> - virgl: buffer flushing error with some dEQP tests [bisected]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109202">Bug 109202</a> - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109204">Bug 109204</a> - [regression, bisected] retroarch's crt-royale shader crash radv</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109229">Bug 109229</a> - glLinkProgram locks up for ~30 seconds</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109231">Bug 109231</a> - [nir] src/compiler/nir/nir_loop_analyze.c uninitialized variable</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109242">Bug 109242</a> - [RADV] The Witcher 3 system freeze</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109304">Bug 109304</a> - GfxBench AztecRuins Vulkan version Segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109325">Bug 109325</a> - mesa: Need ability to retrieve command line of Meson configuration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328">Bug 109328</a> - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109353">Bug 109353</a> - [regression][bisected] &quot;nir: Switch to using 1-bit Booleans for almost everything&quot; regression with shared bools</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109401">Bug 109401</a> - [DXVK] Project Cars rendering problems</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109404">Bug 109404</a> - [ANV] The Witcher 3 shadows flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109442">Bug 109442</a> - &quot;make check&quot; test anv_block_pool_no_free fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109443">Bug 109443</a> - Build failure with MSVC when using Scons &gt;= 3.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109449">Bug 109449</a> - [snb] quakespasm triggers a segmentation fault.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109451">Bug 109451</a> - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109543">Bug 109543</a> - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working [&quot;vulkan-cube&quot; received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109561">Bug 109561</a> - [regression, bisected] code re-factor causing games to stutter or lock-up system</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109573">Bug 109573</a> - dEQP-VK.spirv_assembly.instruction.graphics.module.same_module</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109575">Bug 109575</a> - Mesa-19.0.0-rc1 : Computer Crashes trying to run anything Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109581">Bug 109581</a> - [BISECTED] Nothing is Rendered on Sascha Willem's &quot;subpasses&quot; demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109594">Bug 109594</a> - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v &lt;= max' no se cumple.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109597">Bug 109597</a> - wreckfest issues with transparent objects &amp; skybox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109601">Bug 109601</a> - [Regression] RuneLite GPU rendering broken on 18.3.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109603">Bug 109603</a> - nir_instr_as_deref: Assertion `parent &amp;&amp; parent-&gt;type == nir_instr_type_deref' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109717">Bug 109717</a> - [regression]  Cull distance tests asserting</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109735">Bug 109735</a> - [Regression] broken font with mesa_vulkan_overlay</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109759">Bug 109759</a> - [BISECTED][REGRESSION][IVB, HSW] Font rendering problem in OpenGL</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<ul>
+ 
+<p>Adam Jackson (4):</p>
+<ul>
+  <li>glx: Demand success from CreateContext requests (v2)</li>
+  <li>specs: Remove GLES profile interaction text from GLX_MESA_query_renderer</li>
+  <li>specs: Remove GLX_RENDERER_ID_MESA from GLX_MESA_query_renderer</li>
+  <li>specs: Bump GLX_MESA_query_renderer to version 9</li>
+</ul>
+
+<p>Aditya Swarup (1):</p>
+<ul>
+  <li>i965: Lift restriction in external textures for EGLImage support</li>
+</ul>
+
+<p>Alejandro Piñeiro (3):</p>
+<ul>
+  <li>nir: remove unused variable</li>
+  <li>nir/xfb: don't assert when xfb_buffer/stride is present but not xfb_offset</li>
+  <li>nir/xfb: distinguish array of structs vs array of blocks</li>
+</ul>
+
+<p>Alex Deucher (3):</p>
+<ul>
+  <li>pci_ids: add new vega10 pci ids</li>
+  <li>pci_ids: add new vega20 pci id</li>
+  <li>pci_ids: add new VegaM pci id</li>
+</ul>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Flush before vkCmdWriteTimestamp() if needed</li>
+</ul>
+
+<p>Alexander von Gluck IV (1):</p>
+<ul>
+  <li>egl/haiku: Fix reference to disp vs dpy</li>
+</ul>
+
+<p>Alok Hota (8):</p>
+<ul>
+  <li>swr/rast: Use gfxptr_t value in JitGatherVertices</li>
+  <li>swr/rast: Add annotator to interleave isa text</li>
+  <li>swr/rast: partial support for Tiled Resources</li>
+  <li>swr/rast: Unaligned and translations in gathers</li>
+  <li>swr/rast: Scope MEM_CLIENT enum for mem usages</li>
+  <li>swr/rast: New execution engine per JIT</li>
+  <li>swr/rast: Store cached files in multiple subdirs</li>
+  <li>swr/rast: bypass size limit for non-sampled textures</li>
+</ul>
+
+<p>Alyssa Rosenzweig (1):</p>
+<ul>
+  <li>util: Fix warning in u_cpu_detect on non-x86</li>
+</ul>
+
+<p>Andre Heider (4):</p>
+<ul>
+  <li>st/nine: fix stack corruption due to ABI mismatch</li>
+  <li>st/nine: plug thread related leaks</li>
+  <li>st/nine: clean up thead shutdown sequence a bit</li>
+  <li>d3dadapter9: use snprintf(..., "%s", ...) instead of strncpy</li>
+</ul>
+
+<p>Andres Gomez (8):</p>
+<ul>
+  <li>glsl/linker: complete documentation for assign_attribute_or_color_locations</li>
+  <li>docs: update 18.3 and add 19.x cycles for the release calendar</li>
+  <li>glsl: correct typo in GLSL compilation error message</li>
+  <li>editorconfig: Add max_line_length property</li>
+  <li>glsl/linker: specify proper direction in location aliasing error</li>
+  <li>docs: complete the calendar and release schedule documentation</li>
+  <li>bin/get-pick-list.sh: fix the oneline printing</li>
+  <li>bin/get-pick-list.sh: fix redirection in sh</li>
+</ul>
+
+<p>Andrii Simiklit (9):</p>
+<ul>
+  <li>intel/tools: avoid 'unused variable' warnings</li>
+  <li>compiler: avoid 'unused variable' warnings</li>
+  <li>i965: avoid 'unused variable' warnings</li>
+  <li>i965/batch: avoid reverting batch buffer if saved state is an empty</li>
+  <li>intel/tools: make sure the binary file is properly read</li>
+  <li>anv/pipeline: remove unnecessary null-pointer check</li>
+  <li>intel/batch-decoder: fix vertex buffer size calculation for gen&lt;8</li>
+  <li>intel/batch-decoder: fix a vb end address calculation</li>
+  <li>i965: re-emit index buffer state on a reset option change.</li>
+</ul>
+
+<p>Anuj Phogat (7):</p>
+<ul>
+  <li>i965/icl: Set Error Detection Behavior Control Bit in L3CNTLREG</li>
+  <li>anv/icl: Set Error Detection Behavior Control Bit in L3CNTLREG</li>
+  <li>anv/icl: Disable prefetching of sampler state entries</li>
+  <li>i965/icl: Fix L3 configurations</li>
+  <li>i965/icl: Set use full ways in L3CNTLREG</li>
+  <li>intel/icl: Set way_size_per_bank to 4</li>
+  <li>anv/icl: Set use full ways in L3CNTLREG</li>
+</ul>
+
+<p>Axel Davy (12):</p>
+<ul>
+  <li>st/nine: Allow 'triple buffering' with thread_submit</li>
+  <li>st/nine: Remove thread_submit warning</li>
+  <li>st/nine: Use helper to release swapchain buffers later</li>
+  <li>st/nine: Switch to presentation buffer if resize is detected</li>
+  <li>st/nine: Fix volumetexture dtor on ctor failure</li>
+  <li>st/nine: Bind src not dst in nine_context_box_upload</li>
+  <li>st/nine: Add src reference to nine_context_range_upload</li>
+  <li>st/nine: Increase the limit of cached ff shaders</li>
+  <li>st/nine: Immediately upload user provided textures</li>
+  <li>st/nine: Enable debug info if NDEBUG is not set</li>
+  <li>st/nine: Ignore window size if error</li>
+  <li>st/nine: Ignore multisample quality level if no ms</li>
+</ul>
+
+<p>Bart Oldeman (1):</p>
+<ul>
+  <li>gallium-xlib: query MIT-SHM before using it.</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (41):</p>
+<ul>
+  <li>radv: Use structured intrinsics instead of indexing workaround for GFX9.</li>
+  <li>vulkan: Allow storage images in the WSI.</li>
+  <li>radv: Fix opaque metadata descriptor last layer.</li>
+  <li>radv: Clamp gfx9 image view extents to the allocated image extents.</li>
+  <li>radv: Align large buffers to the fragment size.</li>
+  <li>radv/android: Mark android WSI image as shareable.</li>
+  <li>radv/android: Use buffer metadata to determine scanout compat.</li>
+  <li>radv: Check for shareable images in central place.</li>
+  <li>radv: Remove redundant format check.</li>
+  <li>radv: Fix multiview depth clears</li>
+  <li>radv: Work around non-renderable 128bpp compressed 3d textures on GFX9.</li>
+  <li>radv: Fix wrongly positioned paren.</li>
+  <li>radv: Do a cache flush if needed before reading predicates.</li>
+  <li>radv: Implement buffer stores with less than 4 components.</li>
+  <li>anv/android: Do not reject storage images.</li>
+  <li>radv: Remove device path.</li>
+  <li>radv: Remove unused variable.</li>
+  <li>amd/common: Add some parentheses to silence warning.</li>
+  <li>radv: Fix rasterization precision bits.</li>
+  <li>spirv: Fix matrix parameters in function calls.</li>
+  <li>freedreno: Move register constant files to src/freedreno.</li>
+  <li>radv: Only use 32 KiB per threadgroup on Stoney.</li>
+  <li>radv: Set partial_vs_wave for pipelines with just GS, not tess.</li>
+  <li>nir: Account for atomics in copy propagation.</li>
+  <li>radv: Remove unused variable.</li>
+  <li>radv/winsys: Set winsys bo priority on creation.</li>
+  <li>radv/winsys: Add priority handling during submit.</li>
+  <li>radv: Enable VK_EXT_memory_priority.</li>
+  <li>radv: Fix the shader info pass for not having the variable.</li>
+  <li>amd/common: Fix stores to derefs with unknown variable.</li>
+  <li>amd/common: Add gep helper for pointer increment.</li>
+  <li>amd/common: Handle nir_deref_type_ptr_as_array for shared memory.</li>
+  <li>amd/common: handle nir_deref_cast for shared memory from integers.</li>
+  <li>radv: Only look at pImmutableSamples if the descriptor has a sampler.</li>
+  <li>amd/common: Use correct writemask for shared memory stores.</li>
+  <li>radv: Sync ETC2 whitelisted devices.</li>
+  <li>radv: Fix float16 interpolation set up.</li>
+  <li>radv: Allow interpolation on non-float types.</li>
+  <li>radv: Handle clip+cull distances more generally as compact arrays.</li>
+  <li>radv: Fix rebase issue in 19.0 for float16 fix.</li>
+  <li>radv: Interpolate less aggressively.</li>
+</ul>
+
+<p>Boyan Ding (3):</p>
+<ul>
+  <li>gk110/ir: Add rcp f64 implementation</li>
+  <li>gk110/ir: Add rsq f64 implementation</li>
+  <li>gk110/ir: Use the new rcp/rsq in library</li>
+</ul>
+
+<p>Brian Paul (3):</p>
+<ul>
+  <li>svga: add new gallium formats to the format conversion table</li>
+  <li>mesa: fix display list corner case assertion</li>
+  <li>svga: remove SVGA_RELOC_READ flag in SVGA3D_BindGBSurface()</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>gallium/swr: Fix multi-context sync fence deadlock.</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (10):</p>
+<ul>
+  <li>nir: properly clear the entry sources in copy_prop_vars</li>
+  <li>nir: properly find the entry to keep in copy_prop_vars</li>
+  <li>nir: add a way to print the deref chain</li>
+  <li>nir: remove dead code from copy_prop_vars</li>
+  <li>nir: fix warning in nir_lower_io.c</li>
+  <li>util: Helper to create sets and hashes with pointer keys</li>
+  <li>src/compiler: use new hash table and set creation helpers</li>
+  <li>src/intel: use new hash table and set creation helpers</li>
+  <li>nir: check NIR_SKIP to skip passes by name</li>
+  <li>gallium: Add PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS</li>
+</ul>
+
+<p>Carlos Garnacho (1):</p>
+<ul>
+  <li>wayland/egl: Ensure EGL surface is resized on DRI update_buffers()</li>
+</ul>
+
+<p>Carsten Haitzler (Rasterman) (2):</p>
+<ul>
+  <li>vc4: Use named parameters for the NEON inline asm.</li>
+  <li>vc4: Declare the cpu pointers as being modified in NEON asm.</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>i965: Fix -Wswitch on INTEL_COPY_STREAMING_LOAD</li>
+</ul>
+
+<p>Chia-I Wu (2):</p>
+<ul>
+  <li>meson: fix EGL/X11 build without GLX</li>
+  <li>freedreno/drm: sync uapi again</li>
+</ul>
+
+<p>Christian Gmeiner (6):</p>
+<ul>
+  <li>nir: add lowering for ffloor</li>
+  <li>etnaviv: drop redundant ctx function parameter</li>
+  <li>meson: add etnaviv to the tools option</li>
+  <li>etnaviv: extend etna_resource with an addressing mode</li>
+  <li>etnaviv: update headers from rnndb</li>
+  <li>etnaviv: add linear sampling support</li>
+</ul>
+
+<p>Connor Abbott (4):</p>
+<ul>
+  <li>Revert "radv: disable VK_SUBGROUP_FEATURE_VOTE_BIT"</li>
+  <li>nir/algebraic: Rewrite bit-size inference</li>
+  <li>nir/algebraic: Add unit tests for bitsize validation</li>
+  <li>nir: Fixup algebraic test for variable-sized conversions</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>gbm: Clarify acceptable formats for gbm_bo</li>
+</ul>
+
+<p>Danylo Piliaiev (9):</p>
+<ul>
+  <li>i965: Fix calculation of layers array length for isl_view</li>
+  <li>nir: add if opt opt_if_loop_last_continue()</li>
+  <li>glsl/linker: Fix unmatched TCS outputs being reduced to local variable</li>
+  <li>glsl: Make invariant outputs in ES fragment shader not to cause error</li>
+  <li>glsl: Fix copying function's out to temp if dereferenced by array</li>
+  <li>anv: Implement VK_KHR_draw_indirect_count for gen 7+</li>
+  <li>anv: Implement VK_EXT_conditional_rendering for gen 7.5+</li>
+  <li>anv: Fix VK_EXT_transform_feedback working with varyings packed in PSIZ</li>
+  <li>anv: Fix destroying descriptor sets when pool gets reset</li>
+</ul>
+
+<p>Dave Airlie (19):</p>
+<ul>
+  <li>radv: apply xfb buffer offset at buffer binding time not later. (v2)</li>
+  <li>radv: fix begin/end transform feedback with 0 counter buffers.</li>
+  <li>virgl: fix vtest regression since fencing changes.</li>
+  <li>spirv/vtn: handle variable pointers without offset lowering</li>
+  <li>nir: move getting deref from var after we check deref type.</li>
+  <li>nir: handle shared pointers in lowering indirect derefs.</li>
+  <li>ac: avoid casting pointers on bcsel and stores</li>
+  <li>radv: handle loading from shared pointers</li>
+  <li>ac: handle cast derefs</li>
+  <li>r600: make suballocator 256-bytes align</li>
+  <li>virgl: fix undefined shift to use unsigned.</li>
+  <li>virgl: fix const warning on debug flags.</li>
+  <li>radv: use 3d shader for gfx9 copies if dst is 3d</li>
+  <li>radv/xfb: fix counter buffer bounds checks.</li>
+  <li>virgl/vtest: fix front buffer flush with protocol version 0.</li>
+  <li>virgl: use primconvert provoking vertex properly</li>
+  <li>dri_interface: add put shm image2 (v2)</li>
+  <li>glx: add support for putimageshm2 path (v2)</li>
+  <li>gallium: use put image shm2 path (v2)</li>
+</ul>
+
+<p>David Shao (1):</p>
+<ul>
+  <li>meson: ensure that xmlpool_options.h is generated for gallium targets that need it</li>
+</ul>
+
+<p>Dieter Nützel (1):</p>
+<ul>
+  <li>docs/features: Delete double nv50 entry and wrong enumeration</li>
+</ul>
+
+<p>Dylan Baker (48):</p>
+<ul>
+  <li>meson: link gallium nine with pthreads</li>
+  <li>meson: Don't set -Wall</li>
+  <li>meson: fix libatomic tests</li>
+  <li>meson: Add tests to suites</li>
+  <li>util: promote u_memory to src/util</li>
+  <li>meson: Add nir_algebraic_parser_test to suites</li>
+  <li>meson: Fix ppc64 little endian detection</li>
+  <li>meson: remove duplicate definition</li>
+  <li>meson: Add support for gnu hurd</li>
+  <li>meson: Add toggle for glx-direct</li>
+  <li>docs/meson: Recommend not using CFLAGS and friends</li>
+  <li>travis: meson: use native files to override llvm-config</li>
+  <li>travis: Don't try to read libdrm out of configure.ac</li>
+  <li>travis: meson: enable unit tests</li>
+  <li>docs: add note about using backticks for rbs in gitlab</li>
+  <li>docs/install: Add meson to the main install page</li>
+  <li>docs/meson: Update LLVM section with information about native files</li>
+  <li>docs/install: Update python dependency section</li>
+  <li>docs/autoconf: Mark autoconf as being replaced</li>
+  <li>meson: Override C++ standard to gnu++11 when building with altivec on ppc64</li>
+  <li>meson: Error out if building nouveau and using LLVM without rtti</li>
+  <li>autotools: Remove tegra vdpau driver</li>
+  <li>meson: Add a script to extract the cmd line used for meson</li>
+  <li>meson: allow building dri driver without window system if osmesa is classic</li>
+  <li>bin/meson-cmd-extract: Also handle cross and native files</li>
+  <li>meson: fix swr KNL build</li>
+  <li>meson: Fix compiler checks for SWR with ICC</li>
+  <li>meson: Add warnings and errors when using ICC</li>
+  <li>automake: Fix path to generated source</li>
+  <li>automake: Add float64.glsl to dist tarball</li>
+  <li>automake: Add include dir for nir src directory</li>
+  <li>configure: Bump SWR LLVM requirement to 7</li>
+  <li>automake: Add --enable-autotools to distcheck flags</li>
+  <li>android,autotools,i965: Fix location of float64_glsl.h</li>
+  <li>VERSION: bump to 19.0.0-rc1</li>
+  <li>Version: Bump for rc2</li>
+  <li>cherry-ignore: Add some patches</li>
+  <li>Revert "intel/compiler: More peephole_select for pre-Gen6"</li>
+  <li>Revert "nir/opt_peephole_select: Don't peephole_select expensive math instructions"</li>
+  <li>Revert "intel/compiler: More peephole select"</li>
+  <li>Bump version for 19.0-rc3</li>
+  <li>version: bump for 19.0-rc4</li>
+  <li>get-pick-list: Add --pretty=medium to the arguments for Cc patches</li>
+  <li>meson: Add dependency on genxml to anvil</li>
+  <li>Version: update to 19.0-rc5</li>
+  <li>Bump version for rc6</li>
+  <li>VERSION: bump version for rc7</li>
+  <li>cherry-ignore: Update the cherry-ignore file</li>
+</ul>
+
+<p>Eduardo Lima Mitev (2):</p>
+<ul>
+  <li>freedreno/ir3: Make imageStore use num components from image format</li>
+  <li>freedreno/ir3: Handle GL_NONE in get_num_components_for_glformat()</li>
+</ul>
+
+<p>Eleni Maria Stea (1):</p>
+<ul>
+  <li>i965: fixed clamping in set_scissor_bits when the y is flipped</li>
+</ul>
+
+<p>Elie Tournier (17):</p>
+<ul>
+  <li>glsl: Add "built-in" function to do abs(fp64)</li>
+  <li>glsl: Add "built-in" functions to do neg(fp64)</li>
+  <li>glsl: Add "built-in" function to do sign(fp64)</li>
+  <li>glsl: Add "built-in" functions to do eq/ne(fp64, fp64)</li>
+  <li>glsl: Add utility function to extract 64-bit sign</li>
+  <li>glsl: Add "built-in" functions to do lt(fp64, fp64)</li>
+  <li>glsl: Add "built-in" functions to do add(fp64, fp64)</li>
+  <li>glsl: Add "built-in" functions to do mul(fp64, fp64)</li>
+  <li>glsl: Add "built-in" functions to do fp64_to_uint(fp64)</li>
+  <li>glsl: Add "built-in" functions to do uint_to_fp64(uint)</li>
+  <li>glsl: Add "built-in" functions to do fp64_to_int(fp64)</li>
+  <li>glsl: Add "built-in" functions to do int_to_fp64(int)</li>
+  <li>glsl: Add "built-in" functions to do fp64_to_fp32(fp64)</li>
+  <li>glsl: Add "built-in" functions to do fp32_to_fp64(fp32)</li>
+  <li>glsl: Add "built-in" functions to do sqrt(fp64)</li>
+  <li>glsl: Add "built-in" functions to do trunc(fp64)</li>
+  <li>glsl: Add "built-in" functions to do round(fp64)</li>
+</ul>
+
+<p>Emil Velikov (81):</p>
+<ul>
+  <li>mesa: bump version to 19.1.0-devel</li>
+  <li>docs: add 19.0.0-devel release notes template</li>
+  <li>docs: mention EXT_shader_implicit_conversions</li>
+  <li>egl: add EGL_EXT_device_base entrypoints</li>
+  <li>egl/glvnd: correctly report errors when vendor cannot be found</li>
+  <li>docs/releasing.html: polish cherry-picking/testing text</li>
+  <li>docs/submittingpatches.html: correctly handle the &lt;p&gt; tag</li>
+  <li>docs: document the staging branch and add reference to it</li>
+  <li>bin/get-pick-list.sh: simplify git oneline printing</li>
+  <li>bin/get-pick-list.sh: prefix output with "[stable] "</li>
+  <li>bin/get-pick-list.sh: handle "typod" usecase.</li>
+  <li>bin/get-pick-list.sh: handle the fixes tag</li>
+  <li>bin/get-pick-list.sh: tweak the commit sha matching pattern</li>
+  <li>bin/get-pick-list.sh: flesh out is_sha_nomination</li>
+  <li>bin/get-pick-list.sh: handle fixes tag with missing colon</li>
+  <li>bin/get-pick-list.sh: handle unofficial "broken by" tag</li>
+  <li>bin/get-pick-list.sh: use test instead of [ ]</li>
+  <li>bin/get-pick-list.sh: handle reverts prior to the branchpoint</li>
+  <li>travis: drop unneeded x11proto-xf86vidmode-dev</li>
+  <li>glx: make xf86vidmode mandatory for direct rendering</li>
+  <li>travis: adding missing x11-xcb for meson+vulkan</li>
+  <li>egl/wayland: bail out when drmGetMagic fails</li>
+  <li>egl/wayland: plug memory leak in drm_handle_device()</li>
+  <li>docs: update 18.3.0 release notes</li>
+  <li>docs: add sha256 checksums for 18.3.0</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.0</li>
+  <li>freedreno: drop duplicate MKDIR_GEN declaration</li>
+  <li>freedreno: add the missing _la in libfreedreno_ir3_la</li>
+  <li>amd/addrlib: drop si_ci_vi_merged_enum.h from the list</li>
+  <li>docs: add release notes for 18.3.1</li>
+  <li>docs: add sha256 checksums for 18.3.1</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.1</li>
+  <li>glx: mandate xf86vidmode only for "drm" dri platforms</li>
+  <li>bin/get-pick-list.sh: rework handing of sha nominations</li>
+  <li>bin/get-pick-list.sh: warn when commit lists invalid sha</li>
+  <li>meson: don't require glx/egl/gbm with gallium drivers</li>
+  <li>pipe-loader: meson: reference correct library</li>
+  <li>TODO: glx: meson: build dri based glx tests, only with -Dglx=dri</li>
+  <li>glx: meson: drop includes from a link-only library</li>
+  <li>glx: meson: wire up the dispatch-index-check test</li>
+  <li>glx/test: meson: assorted include fixes</li>
+  <li>configure: add CXX11_CXXFLAGS to LLVM_CXXFLAGS</li>
+  <li>travis: flip to distro xenial, drop sudo false</li>
+  <li>travis: meson: print the configured state</li>
+  <li>travis: printout llvm-config --version</li>
+  <li>travis: meson: use FOO_DRIVERS directly</li>
+  <li>travis: meson: add unwind handling</li>
+  <li>travis: meson: explicitly control the DRI loaders</li>
+  <li>travis: meson: add explicit handling to gallium ST</li>
+  <li>travis: meson: port gallium build combinations over</li>
+  <li>docs: add release notes for 18.3.2</li>
+  <li>docs: add sha256 checksums for 18.3.2</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.2</li>
+  <li>freedreno: automake: ship ir3_nir_trig.py in the tarball</li>
+  <li>mesa: correctly use os.path.join in our python scripts</li>
+  <li>Revert "mesa/main: remove ARB suffix from glGetnTexImage"</li>
+  <li>mapi: sort static entrypoints numerically</li>
+  <li>mapi: add all _glapi_table entrypoints to static_data.py</li>
+  <li>genCommon.py: Fix typo in _LIBRARY_FEATURE_NAMES.</li>
+  <li>mapi: move genCommon.py to src/mapi/new</li>
+  <li>mapi/new: import mapi scripts from glvnd</li>
+  <li>mapi/new: sort by slot number</li>
+  <li>mapi/new: use the static_data offsets in the new generator</li>
+  <li>mapi/new: reinstate _NO_HIDDEN suffixes in the new generator</li>
+  <li>mapi/new: split out public_entries handling</li>
+  <li>mapi/new: don't print info we don't need for ES1/ES2</li>
+  <li>mapi/new: fixup the GLDEBUGPROCKHR typedef to the non KHR one</li>
+  <li>mapi/new: remove duplicate GLvoid/void substitution</li>
+  <li>autotools: wire the new generator for es1 and es2</li>
+  <li>meson: wire the new generator for es1 and es2</li>
+  <li>scons: wire the new generator for es1 and es2</li>
+  <li>Revert "mapi/new: sort by slot number"</li>
+  <li>mapi/es*api: remove GL_OES_EGL_image entrypoints</li>
+  <li>mapi/es*api: remove GL_EXT_multi_draw_arrays entrypoints</li>
+  <li>mapi/es2api: remove no longer present entrypoints</li>
+  <li>mapi: remove old, unused ES* generator code</li>
+  <li>mapi: remove machinery handling CSV files</li>
+  <li>mapi: print function declarations for shared glapi</li>
+  <li>vc4: Declare the last cpu pointer as being modified in NEON asm.</li>
+  <li>anv: wire up the state_pool_padding test</li>
+  <li>meson: egl: correctly manage loader/xmlconfig</li>
+</ul>
+
+<p>Eric Anholt (171):</p>
+<ul>
+  <li>v3d: Fix a copy-and-paste comment in the simulator code.</li>
+  <li>v3d: Fix a typo in a comment in job handling.</li>
+  <li>v3d: Drop #if 0-ed out v3d_dump_to_file().</li>
+  <li>v3d: Respect user-passed strides for BO imports.</li>
+  <li>v3d: Take advantage of _mesa_hash_table_remove_key() in the simulator.</li>
+  <li>v3d: Use the TLB R/B swapping instead of recompiles when available.</li>
+  <li>v3d: Update the TLB config for depth writes on V3D 4.2.</li>
+  <li>vc4: Drop the winsys_stride relayout in the simluator</li>
+  <li>v3d: Maintain a mapping of the GEM buffer in the simulator.</li>
+  <li>v3d: Remove the special path for simulaton of the submit ioctl.</li>
+  <li>vc4: Take advantage of _mesa_hash_table_remove_key() in the simulator.</li>
+  <li>vc4: Maintain a separate GEM mapping of BOs in the simulator.</li>
+  <li>vc4: Use the normal simulator ioctl path for CL submit as well.</li>
+  <li>gbm: Move gbm_format_canonicalize() to the core.</li>
+  <li>gbm: Introduce a helper function for printing GBM format names.</li>
+  <li>egl: Improve the debugging of gbm format matching in DRI configs.</li>
+  <li>v3d: Fix double-swapping of R/B on V3D 4.1</li>
+  <li>v3d: Don't try to set PF flags on a LDTMU operation</li>
+  <li>vc4: Make sure we make ro scanout resources for create_with_modifiers.</li>
+  <li>vc4: Don't return a vc4 BO handle on a renderonly screen.</li>
+  <li>glx: Remove an old DEFAULT_DRIVER_DIR default.</li>
+  <li>glx: Move DRI extensions pointer loading to driOpenDriver().</li>
+  <li>egl: Move loader_set_logger() up to egl_dri2.c.</li>
+  <li>loader: Stop using a local definition for an in-tree header</li>
+  <li>loader: Factor out the common driver opening logic from each loader.</li>
+  <li>egl: Print the actual message to the console from _eglError().</li>
+  <li>gallium: Fix uninitialized variable warning in compute test.</li>
+  <li>gallium: Remove unused variable in u_tests.</li>
+  <li>v3d: Add renderonly support.</li>
+  <li>v3d: Add support for RGBA_SRGB along with BGRA_SRGB.</li>
+  <li>v3d: Add missing OES_half_float_linear support.</li>
+  <li>v3d: Use combined input/output segments.</li>
+  <li>v3d: Add the V3D TFU submit interface to the simulator.</li>
+  <li>v3d: Use the TFU to do generatemipmap.</li>
+  <li>v3d: Update simulator cache flushing code to match the kernel better.</li>
+  <li>v3d: Create a state uploader for packing our shaders together.</li>
+  <li>v3d: Put default vertex attribute values into the state uploader as well.</li>
+  <li>v3d: Re-use the wrap mode uniform on V3D 3.3.</li>
+  <li>v3d: Make an array for frag/vert texture state in the context.</li>
+  <li>v3d: Don't forget to flush writes to UBOs.</li>
+  <li>v3d: Convert to using nir_src_as_uint() from const_value derefs.</li>
+  <li>v3d: Fix a comment typo</li>
+  <li>v3d: Return the right gl_SampleMaskIn[] value.</li>
+  <li>v3d: Fix handling of texture first_layer offsets for 3D textures.</li>
+  <li>v3d: Avoid confusing auto-indenting in TEXTURE_SHADER_STATE packing</li>
+  <li>v3d: Split most of TEXTURE_SHADER_STATE setup out of sampler views.</li>
+  <li>v3d: Garbage collect unused uniforms code.</li>
+  <li>v3d: Simplify VIR uniform dumping using a temporary.</li>
+  <li>v3d: Add VIR dumping of TMU config p0/p1.</li>
+  <li>v3d: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>vc4: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>v3d: Fix a leak of the disassembled instruction string during debug dumps.</li>
+  <li>tfu</li>
+  <li>shader-packing</li>
+  <li>nir: Add some more consts to the nir_format_convert.h helpers.</li>
+  <li>nir: Pull some of intel's image load/store format conversion to nir_format.h</li>
+  <li>intel: Simplify the half-float packing in image load/store lowering.</li>
+  <li>mesa/st: Expose compute shaders when NIR support is advertised.</li>
+  <li>nir: Print the format of image variables.</li>
+  <li>Revert "intel: Simplify the half-float packing in image load/store lowering."</li>
+  <li>nir: Move intel's half-float image store lowering to to nir_format.h.</li>
+  <li>v3d: Don't forget to wait for our TFU job before rendering from it.</li>
+  <li>v3d: Set up the right stride for raster TFU.</li>
+  <li>v3d: Don't forget to bump the number of writes when doing TFU ops.</li>
+  <li>v3d: Add support for using the TFU to do some blits.</li>
+  <li>v3d: Add support for texturing from linear.</li>
+  <li>v3d: Add safety checks for resource_create().</li>
+  <li>v3d: Make sure that a thrsw doesn't split a multop from its umul24.</li>
+  <li>v3d: Add missing flagging of SYNCB as a TSY op.</li>
+  <li>v3d: Add support for draw indirect for GLES3.1.</li>
+  <li>v3d: Avoid assertion failures when removing end-of-shader instructions.</li>
+  <li>v3d: Move uinfo-&gt;data[] dereference to the top of v3d_write_uniforms().</li>
+  <li>v3d: Move uniform pretty-printing to its own helper function.</li>
+  <li>v3d: Use the uniform pretty-printer in v3d_write_uniforms()'s debug code.</li>
+  <li>v3d: Do uniform pretty-printing in the QPU dump.</li>
+  <li>v3d: Drop in a bunch of notes about performance improvement opportunities.</li>
+  <li>vc4: Use the original bit size when scalarizing uniform loads.</li>
+  <li>v3d: Use the original bit size when scalarizing uniform loads.</li>
+  <li>vc4: Reuse nir_format_convert.h in our blend lowering.</li>
+  <li>v3d: Fix the argument type for vir_BRANCH().</li>
+  <li>nir: Fix clamping of uints for image store lowering.</li>
+  <li>v3d: Put the dst bo first in the list of BOs for TFU calls.</li>
+  <li>v3d: Fix check for TFU job completion in the simulator.</li>
+  <li>v3d: Don't try to create shadow tiled temporaries for 1D textures.</li>
+  <li>v3d: Remove dead prototypes for load/store utile functions.</li>
+  <li>v3d: Implement texture_subdata to reduce teximage upload copies.</li>
+  <li>vc4: Move the utile load/store functions to a header for reuse by v3d.</li>
+  <li>v3d: Add a fallthrough path for utile load/store of 32 byte lines.</li>
+  <li>v3d: Load and store aligned utiles all at once.</li>
+  <li>docs: Add a note that MRs should still include any r-b or a-b tags.</li>
+  <li>docs: Add an encouraging note about providing reviews and acks.</li>
+  <li>v3d: Fix simulator mode on i915 render nodes.</li>
+  <li>v3d: Drop shadow comparison state from shader variant key.</li>
+  <li>v3d: Hook up perf_debug() output to GL_ARB_debug output as well.</li>
+  <li>vc4: Hook up perf_debug() output to GL_ARB_debug_output as well.</li>
+  <li>gallium/ttn: Fix setup of outputs_written.</li>
+  <li>v3d: Fix uniform pretty printing assertion failure with branches.</li>
+  <li>v3d: Add a "precompile" debug flag for shader-db.</li>
+  <li>v3d: Hook up some shader-db output to GL_ARB_debug_output.</li>
+  <li>v3d: Drop unused count_nir_instrs() helper.</li>
+  <li>v3d: Drop incorrect dependency for flpop.</li>
+  <li>v3d: Move "does this instruction have flags" from sched to generic helpers.</li>
+  <li>v3d: Don't generate temps for comparisons.</li>
+  <li>v3d: Dead-code eliminate unused flags updates.</li>
+  <li>v3d: Add a note for a potential performance win on multop/umul24.</li>
+  <li>v3d: Force sampling from base level for tg4.</li>
+  <li>v3d: Add support for non-constant texture offsets.</li>
+  <li>v3d: Add support for requesting the sample offsets.</li>
+  <li>v3d: Add support for textureSize() on MSAA textures.</li>
+  <li>v3d: Add support for gl_HelperInvocation.</li>
+  <li>v3d: Fix segfault when failing to compile a program.</li>
+  <li>v3d: Don't forget to include RT writes in precompiles.</li>
+  <li>v3d: Simplify the emission of comparisons for the bcsel optimization.</li>
+  <li>v3d: Move the "Find the ALU instruction generating our bool" out of bcsel.</li>
+  <li>v3d: Don't try to fold non-SSA-src comparisons into bcsels.</li>
+  <li>v3d: Fold comparisons for IF conditions into the flags for the IF.</li>
+  <li>v3d: Handle dynamically uniform IF statements with uniform control flow.</li>
+  <li>v3d: Refactor compiler entrypoints.</li>
+  <li>v3d: Reinstate the new shader-db output after v3d_compile() refactor.</li>
+  <li>v3d: Fix up VS output setup during precompiles.</li>
+  <li>v3d: Remove dead switch cases and comments from v3d_nir_lower_io.</li>
+  <li>v3d: Do UBO loads a vector at a time.</li>
+  <li>v3d: Stop scalarizing our uniform loads.</li>
+  <li>nir: Allow nir_format_unpack_int/sint to unpack larger values.</li>
+  <li>nir: Add nir_lower_tex options to lower sampler return formats.</li>
+  <li>v3d: Use the core tex lowering.</li>
+  <li>nir: Add nir_lower_tex support for Broadcom's swizzled TG4 results.</li>
+  <li>v3d: Enable GL_ARB_texture_gather on V3D 4.x.</li>
+  <li>nir: Make nir_deref_instr_build/get_const_offset actually use size_align.</li>
+  <li>glsl: Fix buffer overflow with an atomic buffer binding out of range.</li>
+  <li>v3d: Add support for flushing dirty TMU data at job end.</li>
+  <li>v3d: Add support for the early_fragment_tests flag.</li>
+  <li>v3d: Add support for GL_ARB_framebuffer_no_attachments.</li>
+  <li>v3d: Fix txf_ms 2D_ARRAY array index.</li>
+  <li>v3d: Add an isr to the simulator to catch GMP violations.</li>
+  <li>v3d: Add support for matrix inputs to the FS.</li>
+  <li>v3d: Drop the GLSL version level.</li>
+  <li>v3d: Add SSBO/atomic counters support.</li>
+  <li>v3d: Add support for shader_image_load_store.</li>
+  <li>v3d: Add support for CS workgroup/invocation id intrinsics.</li>
+  <li>v3d: Add support for CS shared variable load/store/atomics.</li>
+  <li>v3d: Add support for CS barrier() intrinsics.</li>
+  <li>v3d: SHARED but not necessarily SCANOUT buffers on RO must be linear.</li>
+  <li>v3d: If the modifier is not known on BO import, default to linear for RO.</li>
+  <li>v3d: Restructure RO allocations using resource_from_handle.</li>
+  <li>v3d: Don't leak the GPU fd for renderonly usage.</li>
+  <li>vc4: Don't leak the GPU fd for renderonly usage.</li>
+  <li>gallium: Enable unit tests as actual meson unit tests.</li>
+  <li>gallium: Fix comment about possible colorspaces.</li>
+  <li>gallium: Make sure we return is_unorm/is_snorm for compressed formats.</li>
+  <li>v3d: Rename gallium-local limits defines from VC5 to V3D.</li>
+  <li>v3d: Fix overly-large vattr_sizes structs.</li>
+  <li>v3d: Avoid duplicating limits defines between gallium and v3d core.</li>
+  <li>v3d: Drop maximum number of texture units down to 16.</li>
+  <li>v3d: Fix BO stats accounting for imported buffers.</li>
+  <li>v3d: Flush blit jobs immediately after generating them.</li>
+  <li>v3d: Fix release-build warning about utile_h.</li>
+  <li>v3d: Fix stencil sampling from packed depth/stencil.</li>
+  <li>v3d: Fix stencil sampling from a separate-stencil buffer.</li>
+  <li>v3d: Use the symbolic names for wrap modes from the XML.</li>
+  <li>v3d: Move the sampler state to the long-lived state uploader.</li>
+  <li>v3d: Create separate sampler states for the various blend formats.</li>
+  <li>pl111: Rename the pl111 driver to "kmsro".</li>
+  <li>kmsro: Extend to include hx8357d.</li>
+  <li>vc4: Enable NEON asm on meson cross-builds.</li>
+  <li>v3d: Fix the autotools build.</li>
+  <li>mesa: Skip partial InvalidateFramebuffer of packed depth/stencil.</li>
+  <li>v3d: Fix image_load_store clamping of signed integer stores.</li>
+  <li>v3d: Use the early_fragment_tests flag for the shader's disable-EZ field.</li>
+  <li>v3d: Fix the check for "is the last thrsw inside control flow"</li>
+  <li>st/dri: Set the PIPE_BIND_SHARED flag on create_image_with_modifiers.</li>
+</ul>
+
+<p>Eric Engestrom (47):</p>
+<ul>
+  <li>wsi/wayland: use proper VkResult type</li>
+  <li>wsi/wayland: only finish() a successfully init()ed display</li>
+  <li>REVIEWERS: add include path for EGL</li>
+  <li>REVIEWERS: add Emil as EGL reviewer</li>
+  <li>REVIEWERS: add Vulkan reviewer group</li>
+  <li>xmlpool: update translation po files</li>
+  <li>meson: only run vulkan's meson.build when building vulkan</li>
+  <li>gbm: remove unnecessary meson include</li>
+  <li>meson: fix wayland-less builds</li>
+  <li>gbm: add new entrypoint to symbols check</li>
+  <li>egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache</li>
+  <li>egl: fix bad rebase</li>
+  <li>gbm: add missing comma between strings</li>
+  <li>glapi: add missing visibility args</li>
+  <li>anv: correctly use vulkan 1.0 by default</li>
+  <li>vulkan/utils: s/VERSION/PACKAGE_VERSION/</li>
+  <li>build: stop defining unused VERSION</li>
+  <li>wsi/display: fix mem leak when freeing swapchains</li>
+  <li>vulkan/wsi: fix s/,/;/ typo</li>
+  <li>meson: skip asm check when asm is disabled</li>
+  <li>anv: add unreachable() for VK_EXT_fragment_density_map</li>
+  <li>mesa: drop unused &amp; deprecated lib</li>
+  <li>loader: deduplicate logger function declaration</li>
+  <li>docs: add meson cross compilation instructions</li>
+  <li>docs: format code blocks a bit nicely</li>
+  <li>docs: fix the meson aarch64 cross-file</li>
+  <li>docs: advertise distro-provided meson cross-files</li>
+  <li>anv: drop unneeded KHR suffix</li>
+  <li>wsi: drop unneeded KHR suffix</li>
+  <li>radv: remove a few more unnecessary KHR suffixes</li>
+  <li>egl: add missing includes</li>
+  <li>egl: remove unused include</li>
+  <li>travis: avoid using unset llvm-config</li>
+  <li>egl: fix python lib deprecation warning</li>
+  <li>docs: explain how to see what meson options exist</li>
+  <li>travis: fix autotools build after --enable-autotools switch addition</li>
+  <li>configure: EGL requirements only apply if EGL is built</li>
+  <li>egl: finalize EGL_MESA_query_driver</li>
+  <li>egl: update headers from Khronos</li>
+  <li>egl: add glvnd entrypoints for EGL_MESA_query_driver</li>
+  <li>travis: bump libdrm to 2.4.97</li>
+  <li>egl/glvnd: sync egl.xml from Khronos</li>
+  <li>anv: drop always-successful VkResult</li>
+  <li>meson/vdpau: add missing soversion</li>
+  <li>xvmc: fix string comparison</li>
+  <li>xvmc: fix string comparison</li>
+  <li>egl: fix libdrm-less builds</li>
+</ul>
+
+<p>Erik Faye-Lund (70):</p>
+<ul>
+  <li>glsl: add has_implicit_conversions()-helper</li>
+  <li>glsl: add has_implicit_uint_to_int_conversion()-helper</li>
+  <li>glsl: fall back to inexact function-match</li>
+  <li>mesa/glsl: add support for EXT_shader_implicit_conversions</li>
+  <li>glsl: do not allow implicit casts of unsized array initializers</li>
+  <li>mesa: expose NV_conditional_render on GLES</li>
+  <li>mesa/main: fixup make check after NV_conditional_render for gles</li>
+  <li>Revert "mesa/main: fixup make check after NV_conditional_render for gles"</li>
+  <li>Revert "mesa: expose NV_conditional_render on GLES"</li>
+  <li>mesa/main: correct requirement for EXT_occlusion_query_boolean</li>
+  <li>mesa/main: correct year for EXT_occlusion_query_boolean</li>
+  <li>mesa/main: use non-prefixed enums for consistency</li>
+  <li>mesa/main: simplify pipeline-statistics query validation</li>
+  <li>mesa/main: fix validation of GL_SAMPLES_PASSED</li>
+  <li>mesa/main: fix validation of GL_ANY_SAMPLES_PASSED</li>
+  <li>mesa/main: fix validation of GL_ANY_SAMPLES_PASSED_CONSERVATIVE</li>
+  <li>mesa/main: fix validation of GL_TIME_ELAPSED</li>
+  <li>mesa/main: fix validation of transform-feedback queries</li>
+  <li>mesa/main: fix validation of transform-feedback overflow queries</li>
+  <li>mesa/main: fix validation of ARB_query_buffer_object</li>
+  <li>mesa/main: fix validation of GL_TIMESTAMP</li>
+  <li>mesa/main: remove overly strict query-validation</li>
+  <li>mesa/main: remove ARB suffix from glGetnTexImage</li>
+  <li>mesa/main: remove bogus error for zero-sized images</li>
+  <li>mesa/main: factor out tex-image error-checking</li>
+  <li>mesa/main: factor out common error-checking</li>
+  <li>mesa/main: check cube-completeness in common code</li>
+  <li>mesa/main: fix incorrect depth-error</li>
+  <li>mesa/main: fixup requirements for GL_PRIMITIVES_GENERATED</li>
+  <li>mesa/main: make _mesa_has_tessellation return bool</li>
+  <li>mesa/main: rename format-check function</li>
+  <li>mesa/main: clean up S3_s3tc check</li>
+  <li>mesa/main: clean up OES_texture_float_linear check</li>
+  <li>mesa/main: clean up ES2_compatibility check</li>
+  <li>mesa/main: clean up integer texture check</li>
+  <li>mesa/main: use _mesa_has_FOO_bar for compressed format checks</li>
+  <li>mesa/main: do not allow s3tc enums on gles1</li>
+  <li>mesa/main: do not allow etc2 enums on gles1</li>
+  <li>mesa/main: do not allow astc enums on gles1</li>
+  <li>mesa/main: do not allow depth-texture enums on gles1</li>
+  <li>mesa/main: do not allow stencil-texture enums on gles1</li>
+  <li>mesa/main: do not allow ARB_texture_rgb10_a2ui enums before gles3</li>
+  <li>mesa/main: do not allow integer-texture enums before gles3</li>
+  <li>mesa/main: do not allow ARB_depth_buffer_float enums before gles3</li>
+  <li>mesa/main: do not allow EXT_packed_float enums before gles3</li>
+  <li>mesa/main: do not allow rg-textures enums before gles3</li>
+  <li>mesa/main: do not allow EXT_texture_shared_exponent enums before gles3</li>
+  <li>mesa/main: do not allow MESA_ycbcr_texture enums on gles</li>
+  <li>mesa/main: do not allow type_2_10_10_10_REV enums before gles3</li>
+  <li>mesa/main: do not allow floating-point texture enums on gles1</li>
+  <li>mesa/main: do not allow snorm-texture enums before gles3</li>
+  <li>mesa/main: do not allow sRGB texture enums before gles3</li>
+  <li>mesa/main: do not allow EXT_texture_sRGB_R8 enums before gles3</li>
+  <li>mesa/main: split float-texture support checking in two</li>
+  <li>mesa/main: require EXT_texture_type_2_10_10_10_REV for gles3</li>
+  <li>mesa/main: require EXT_texture_sRGB for gles3</li>
+  <li>mesa/st: do not probe for the same texture-formats twice</li>
+  <li>mesa/main: do not require float-texture filtering for es3</li>
+  <li>mesa/main: correct validation for GL_RGB565</li>
+  <li>mesa/main: fix up _mesa_has_rg_textures for gles2</li>
+  <li>virgl: force linear texturing support</li>
+  <li>virgl: simplify virgl_hw_set_vertex_buffers</li>
+  <li>virgl: simplify virgl_hw_set_index_buffer</li>
+  <li>virgl: wrap vertex element state in a struct</li>
+  <li>virgl: work around bad assumptions in virglrenderer</li>
+  <li>anv/meson: make sure tests link with -msse2</li>
+  <li>anv/autotools: make sure tests link with -msse2</li>
+  <li>docs: add note about sending merge-requests from forks</li>
+  <li>mapi: drop unneeded gl_dispatch_stub declarations</li>
+  <li>virgl: remove unused variable</li>
+</ul>
+
+<p>Ernestas Kulik (2):</p>
+<ul>
+  <li>vc4: Fix leak in HW queries error path</li>
+  <li>v3d: Fix leak in resource setup error path</li>
+</ul>
+
+<p>Francisco Jerez (14):</p>
+<ul>
+  <li>intel/fs: Prevent emission of IR instructions not aligned to their own execution size.</li>
+  <li>intel/fs: Handle source modifiers in lower_integer_multiplication().</li>
+  <li>intel/fs: Implement quad swizzles on ICL+.</li>
+  <li>intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.</li>
+  <li>intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.</li>
+  <li>intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.</li>
+  <li>intel/fs: Constify fs_inst::can_do_source_mods().</li>
+  <li>intel/fs: Introduce regioning lowering pass.</li>
+  <li>intel/fs: Remove existing lower_conversions pass.</li>
+  <li>intel/fs: Remove nasty open-coded CHV/BXT 64-bit workarounds.</li>
+  <li>intel/fs: Remove FS_OPCODE_UNPACK_HALF_2x16_SPLIT opcodes.</li>
+  <li>intel/fs: Promote execution type to 32-bit when any half-float conversion is needed.</li>
+  <li>intel/fs: Exclude control sources from execution type and region alignment calculations.</li>
+  <li>intel/fs: Implement extended strides greater than 4 for IR source regions.</li>
+</ul>
+
+<p>Fritz Koenig (2):</p>
+<ul>
+  <li>freedreno: drm_fourcc.h header include</li>
+  <li>freedreno: add query for dmabuf modifiers</li>
+</ul>
+
+<p>Gert Wollny (30):</p>
+<ul>
+  <li>mesa/core: Add definitions and translations for EXT_texture_sRGB_R8</li>
+  <li>Gallium: Add format PIPE_FORMAT_R8_SRGB</li>
+  <li>mesa/st: Add support for EXT_texture_sRGB_R8</li>
+  <li>virgl/vtest-winsys: Use virgl version of bind flags</li>
+  <li>r600: Add support for EXT_texture_sRGB_R8</li>
+  <li>mesa: Reference count shaders that are used by transform feedback objects</li>
+  <li>virgl: Add command and flags to initiate debugging on the host (v2)</li>
+  <li>nir: Allow to skip integer ops in nir_lower_to_source_mods</li>
+  <li>i965: Correct L8_UNORM_SRGB table entry</li>
+  <li>i965: be more specific about FBO completeness errors</li>
+  <li>i965: Force zero swizzles for unused components in GL_RED and GL_RG</li>
+  <li>i965: Add support for and expose EXT_texture_sRGB_R8</li>
+  <li>virgl: Use file descriptor instead of un-allocated object</li>
+  <li>i965:use FRAMEBUFFER_UNSUPPORTED instead of FRAMEBUFFER_INCOMPLETE_DIMENSIONS</li>
+  <li>r600: Only set context streamout strides info from the shader that has outputs</li>
+  <li>r600: clean up the GS ring buffers when the context is destroyed</li>
+  <li>glsl: free or reuse memory allocated for TF varying</li>
+  <li>virgl,vtest: Initialize return value</li>
+  <li>virgl: Don't try handling server fences when they are not supported</li>
+  <li>i965: Explicitely handle swizzles for MESA_FORMAT_R_SRGB8</li>
+  <li>i965: Set the FBO error state INCOMPLETE_ATTACHMENT only for SRGB_R8</li>
+  <li>autotools: Deprecate the use of autotools</li>
+  <li>Gallium: Add new CAPS to indicate whether a driver can switch SRGB write</li>
+  <li>virgl: Set sRGB write control CAP based on host capabilities</li>
+  <li>mesa:main: Add flag for EXT_sRGB to gl_extensions</li>
+  <li>i965: Set flag for EXT_sRGB</li>
+  <li>mesa/st: rework support for sRGB framebuffer attachements</li>
+  <li>mesa/main: Use flag for EXT_sRGB instead of EXT_framebuffer_sRGB where possible</li>
+  <li>mesa/main/version: Lower the requirements for GLES 3.0</li>
+  <li>mesa/main: Expose EXT_sRGB_write_control</li>
+</ul>
+
+<p>Guido Günther (2):</p>
+<ul>
+  <li>etnaviv: Make sure rs alignment checks match</li>
+  <li>etnaviv: fix typo in cflush_all description</li>
+</ul>
+
+<p>Gurchetan Singh (18):</p>
+<ul>
+  <li>egl: add missing #include &lt;stddef.h&gt; in egldevice.h</li>
+  <li>virgl: quadruple command buffer size</li>
+  <li>virgl: avoid large inline transfers</li>
+  <li>virgl: don't mark buffers as unclean after a write</li>
+  <li>virgl: texture_transfer_pool --&gt; transfer_pool</li>
+  <li>virgl: remove unnessecary code</li>
+  <li>virgl: move texture metadata to common code</li>
+  <li>virgl: move virgl_resource_layout to common code</li>
+  <li>virgl: move vrend_get_tex_image_offset to common code</li>
+  <li>virgl: store layer_stride in metadata</li>
+  <li>virgl: consolidate transfer code</li>
+  <li>virgl: make transfer code with PIPE_BUFFER targets</li>
+  <li>virgl: make virgl_buffers use resource helpers</li>
+  <li>virgl: modify how we handle GL_MAP_FLUSH_EXPLICIT_BIT</li>
+  <li>virgl: move resource metadata into base resource</li>
+  <li>virgl: move resource creation / import / destruction to common code</li>
+  <li>virgl: don't flush an empty range</li>
+  <li>virgl: remove empty file</li>
+</ul>
+
+<p>Hanno Böck (1):</p>
+<ul>
+  <li>glsl/test: Fix use after free in test_optpass.</li>
+</ul>
+
+<p>Hyunjun Ko (1):</p>
+<ul>
+  <li>freedreno: implements get_sample_position</li>
+</ul>
+
+<p>Iago Toral Quiroga (22):</p>
+<ul>
+  <li>intel/compiler: fix node interference of simd16 instructions</li>
+  <li>nir/constant_folding: fix incorrect bit-size check</li>
+  <li>nir/from_ssa: fix bit-size of temporary register</li>
+  <li>Revert "nir/builder: Assert that intN_t immediates fit"</li>
+  <li>intel/compiler: fix indentation style in opt_algebraic()</li>
+  <li>intel/compiler: fix register allocation in opt_peephole_sel</li>
+  <li>intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments</li>
+  <li>intel/compiler: move nir_lower_bool_to_int32 before nir_lower_locals_to_regs</li>
+  <li>compiler/nir: add a nir_b2f() helper</li>
+  <li>compiler/nir: add nir_fadd_imm() and nir_fmul_imm() helpers</li>
+  <li>compiler/spirv: handle 16-bit float in radians() and degrees()</li>
+  <li>compiler/spirv: implement 16-bit asin</li>
+  <li>compiler/spirv: implement 16-bit acos</li>
+  <li>compiler/spirv: implement 16-bit atan</li>
+  <li>compiler/spirv: implement 16-bit atan2</li>
+  <li>compiler/spirv: implement 16-bit exp and log</li>
+  <li>compiler/spirv: implement 16-bit hyperbolic trigonometric functions</li>
+  <li>compiler/spirv: implement 16-bit frexp</li>
+  <li>compiler/spirv: use 32-bit polynomial approximation for 16-bit asin()</li>
+  <li>anv/pipeline_cache: fix incorrect guards for NIR cache</li>
+  <li>anv/pipeline_cache: free NIR shader cache</li>
+  <li>anv/device: fix maximum number of images supported</li>
+</ul>
+
+<p>Ian Romanick (28):</p>
+<ul>
+  <li>glsl: Add warning tests for identifiers with __</li>
+  <li>glsl: Add pragma to disable all warnings</li>
+  <li>glsl: prevent qualifiers modification of predeclared variables</li>
+  <li>glsl: Omit redundant qualifier checks on redeclarations</li>
+  <li>glsl: Refactor type checking for redeclarations</li>
+  <li>nir: Add a saturated unsigned integer add opcode</li>
+  <li>i965/fs: Implement nir_op_uadd_sat</li>
+  <li>nir/phi_builder: Internal users should use nir_phi_builder_value_set_block_def too</li>
+  <li>util/slab: Rename slab_mempool typed parameters to mempool</li>
+  <li>util/hash_table: Add _mesa_hash_table_init function</li>
+  <li>nir/phi_builder: Use per-value hash table to store [block] -&gt; def mapping</li>
+  <li>nir: Fix holes in nir_instr</li>
+  <li>nir: Release per-block metadata in nir_sweep</li>
+  <li>i965/vec4: Silence unused parameter warnings in vec4 compiler tests</li>
+  <li>i965/vec4/dce: Don't narrow the write mask if the flags are used</li>
+  <li>i965/fs: Eliminate unary op on operand of compare-with-zero</li>
+  <li>i965/vec4: Propagate conditional modifiers from more compares to other compares</li>
+  <li>nir/opt_peephole_select: Don't try to remove flow control around indirect loads</li>
+  <li>intel/compiler: More peephole select</li>
+  <li>nir/opt_peephole_select: Don't peephole_select expensive math instructions</li>
+  <li>intel/compiler: More peephole_select for pre-Gen6</li>
+  <li>Revert "nir/lower_indirect: Bail early if modes == 0"</li>
+  <li>nir/algebraic: Don't put quotes around floating point literals</li>
+  <li>glsl: Add utility to convert text files to C strings</li>
+  <li>nir: Silence zillions of unused parameter warnings in release builds</li>
+  <li>spirv: Add missing break</li>
+  <li>intel/fs: nir_op_extract_i8 extracts a byte, not a word</li>
+  <li>intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer</li>
+</ul>
+
+<p>Ilia Mirkin (37):</p>
+<ul>
+  <li>nv50/ir: delete MINMAX instruction that is no longer in the BB</li>
+  <li>nv50/ir/ra: improve condition for short regs, unify with cond for 16-bit</li>
+  <li>nv50/ir/ra: enforce max register requirement, and change spill order</li>
+  <li>nv50/ir: remove dnz flag when converting MAD to ADD due to optimizations</li>
+  <li>nv50: always keep TSC slot 0 bound</li>
+  <li>nv50,nvc0: add explicit handling of PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET</li>
+  <li>nouveau: set texture upload budget</li>
+  <li>nvc0: replace use of explicit default_tsc with entry 0</li>
+  <li>nvc0: always keep TSC slot 0 bound to fix TXF</li>
+  <li>st/mesa: remove sampler associated with buffer texture in pbo logic</li>
+  <li>st/mesa: allow glDrawElements to work with GL_SELECT feedback</li>
+  <li>tgsi: add ATOMFADD operation</li>
+  <li>gallium: add PIPE_CAP_TGSI_ATOMFADD to indicate support</li>
+  <li>st/mesa: select ATOMFADD when source type is float</li>
+  <li>st/mesa: expose GL_NV_shader_atomic_float when ATOMFADD is supported</li>
+  <li>nv50/ir: add support for converting ATOMFADD to proper ir</li>
+  <li>nvc0: enable GL_NV_shader_atomic_float on pre-Maxwell</li>
+  <li>nv50,nvc0: add missing CAPs for unsupported features</li>
+  <li>nv30: avoid setting user_priv without setting cur_ctx</li>
+  <li>nv30: fix rare issue with fp unbinding not finding the bufctx</li>
+  <li>nv30: add support for multi-layer transfers</li>
+  <li>nv30: use correct helper to get blocks in y direction</li>
+  <li>nv30: fix some s3tc layout issues</li>
+  <li>nv30: disable rendering to 3D textures</li>
+  <li>docs: fix gallium screen cap docs</li>
+  <li>nv50,nvc0: mark textures dirty on fb update</li>
+  <li>nvc0: don't put text segment into bufctx</li>
+  <li>nvc0/ir: fix second tex argument after levelZero optimization</li>
+  <li>nv50,nvc0: add explicit settings for recent caps</li>
+  <li>nvc0: add support for handling indirect draws with attrib conversion</li>
+  <li>nvc0/ir: always use CG mode for loads from atomic-only buffers</li>
+  <li>nvc0: fix 3d images on kepler</li>
+  <li>nv50,nvc0: use condition for occlusion queries when already complete</li>
+  <li>nvc0: stick zero values for the compute invocation counts</li>
+  <li>nvc0: we have 16k-sized framebuffers, fix default scissors</li>
+  <li>swr: set PIPE_CAP_MAX_VARYINGS correctly</li>
+  <li>glsl: fix recording of variables for XFB in TCS shaders</li>
+</ul>
+
+<p>Indrajit Das (1):</p>
+<ul>
+  <li>st/va: Return correct status from vlVaQuerySurfaceStatus</li>
+</ul>
+
+<p>Jakob Bornecrantz (1):</p>
+<ul>
+  <li>virgl/vtest: Use default socket name from protocol header</li>
+</ul>
+
+<p>Jan Vesely (2):</p>
+<ul>
+  <li>amd: Make vgpr-spilling depend on llvm version</li>
+  <li>clover: Fix build after clang r348827</li>
+</ul>
+
+<p>Jason Ekstrand (207):</p>
+<ul>
+  <li>vulkan: Update the XML and headers to 1.1.91</li>
+  <li>intel/fs,vec4: Clean up a repeated pattern with SSBOs</li>
+  <li>intel/fs: Use the new nir_src_is_const and friends</li>
+  <li>nir: Add a read_mask helper for ALU instructions</li>
+  <li>intel/vec4: Use the new nir_src_is_const and friends</li>
+  <li>intel/analyze_ubo_ranges: Use nir_src_is_const and friends</li>
+  <li>anv: Use nir_src_is_const and friends in lowering code</li>
+  <li>intel/fs: Add an assert to optimize_frontfacing_ternary</li>
+  <li>nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16</li>
+  <li>nir/builder: Assert that intN_t immediates fit</li>
+  <li>nir/builder: Add iadd_imm and imul_imm helpers</li>
+  <li>nir/builder: Add a nir_pack/unpack/bitcast helpers</li>
+  <li>nir/spirv: Force 32-bit for UBO and SSBO Booleans</li>
+  <li>nir/glsl: Force 32-bit for UBO and SSBO Booleans</li>
+  <li>nir/lower_io: Add shared to get_io_offset_src</li>
+  <li>nir: Add alignment parameters to SSBO, UBO, and shared access</li>
+  <li>intel/compiler: Lower SSBO and shared loads/stores in NIR</li>
+  <li>intel,nir: Move gl_LocalInvocationID lowering to nir_lower_system_values</li>
+  <li>intel/fs,vec4: Fix a compiler warning</li>
+  <li>vulkan: Update the XML and headers to 1.1.93</li>
+  <li>anv: Expose VK_EXT_scalar_block_layout</li>
+  <li>anv: Put robust buffer access in the pipeline hash</li>
+  <li>anv/nir: Rework arguments to apply_pipeline_layout</li>
+  <li>nir/derefs: Add a nir_derefs_do_not_alias enum value</li>
+  <li>vulkan: Update the XML and headers to 1.1.95</li>
+  <li>nir/opcodes: Pull in the type helpers from constant_expressions</li>
+  <li>nir/opcodes: Rename tbool to tbool32</li>
+  <li>nir/algebraic: Clean up some __str__ cruft</li>
+  <li>nir/algebraic: Refactor codegen a bit</li>
+  <li>nir/algebraic: Add support for unsized conversion opcodes</li>
+  <li>nir/opt_algebraic: Simplify an optimization using the new search ops</li>
+  <li>nir/opt_algebraic: Drop bit-size suffixes from conversions</li>
+  <li>nir/opt_algebraic: Add 32-bit specifiers to a bunch of booleans</li>
+  <li>nir: Make boolean conversions sized just like the others</li>
+  <li>anv,radv: Disable VK_EXT_pci_bus_info</li>
+  <li>intel/ir: Don't allow allocating zero registers</li>
+  <li>spirv: Add support for MinLod</li>
+  <li>nir/lower_tex: Simplify lower_gradient logic</li>
+  <li>nir/lower_tex: Modify txd instructions instead of replacing them</li>
+  <li>nir/lower_tex: Add lowering for some min_lod cases</li>
+  <li>intel/fs: Support min_lod parameters on texture instructions</li>
+  <li>anv: Advertise support for MinLod on Skylake+</li>
+  <li>anv/pipeline: Set the correct binding count for compute shaders</li>
+  <li>intel/blorp: Assert that we don't re-layout a compressed surface</li>
+  <li>nir: Document the function inlining process</li>
+  <li>nir: Allow [iu]mul_high on non-32-bit types</li>
+  <li>nir/lower_int64: Add support for [iu]mul_high</li>
+  <li>nir: Add a pass for lowering integer division by constants</li>
+  <li>i965/vec4: Implement nir_op_uadd_sat</li>
+  <li>i965: Enable nir_opt_idiv_const for 32 and 64-bit integers</li>
+  <li>nir/lower_idiv: Use ilt instead of bit twiddling</li>
+  <li>nir/tgsi: Use nir_bany in ttn_kill_if</li>
+  <li>nir/constant_folding: Fix source bit size logic</li>
+  <li>nir/algebraic: Optimize x2b(xneg(a)) -&gt; a</li>
+  <li>nir: Drop support for lower_b2f</li>
+  <li>nir/algebraic: Make an optimization more specific</li>
+  <li>nir: Rename Boolean-related opcodes to include 32 in the name</li>
+  <li>nir/constant_expressions: Rework Boolean handling</li>
+  <li>nir: Add support for 1-bit data types</li>
+  <li>nir/large_constants: Properly handle 1-bit bools</li>
+  <li>nir/algebraic: Generalize an optimization</li>
+  <li>nir: Add 1-bit Boolean opcodes</li>
+  <li>nir: Add a bool to int32 lowering pass</li>
+  <li>nir: Switch to using 1-bit Booleans for almost everything</li>
+  <li>nir/algebraic: Optimize 1-bit Booleans</li>
+  <li>nir/algebraic: Add some optimizations for D3D-style Booleans</li>
+  <li>radv: Fix a stupid if in gather_intrinsic_info</li>
+  <li>st/nir: Use nir_src_as_uint for tokens</li>
+  <li>vulkan: Update the XML and headers to 1.1.96</li>
+  <li>anv,radv: Re-enable VK_EXT_pci_bus_info</li>
+  <li>anv: Bump the patch version to 96</li>
+  <li>nir/propagate_invariant: Skip unknown vars</li>
+  <li>nir/linking_helpers: Look at derefs for modes</li>
+  <li>nir/lower_io_arrays_to_elements: Look at derefs for modes</li>
+  <li>nir/lower_io_to_scalar: Look at derefs for modes</li>
+  <li>nir/lower_wpos_center: Look at derefs for modes</li>
+  <li>nir/copy_prop_vars: Get modes directly from derefs</li>
+  <li>nir/dead_write_vars: Get modes directly from derefs</li>
+  <li>radv/query: Add a nir_test_flag helper</li>
+  <li>radv/query: Use 1-bit booleans in query shaders</li>
+  <li>intel/blorp: Be more conservative about copying clear colors</li>
+  <li>vulkan: Update the XML and headers to 1.1.97</li>
+  <li>glsl_type: Support serializing 8 and 16-bit types</li>
+  <li>spirv: Handle any bit size in vector_insert/extract</li>
+  <li>anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic</li>
+  <li>spirv: Sign-extend array indices</li>
+  <li>spirv: Emit NIR deref instructions on-the-fly</li>
+  <li>nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size</li>
+  <li>spirv: Handle arbitrary bit sizes for deref array indices</li>
+  <li>nir/validate: Require array indices to match the deref bit size</li>
+  <li>nir: Allow storing to shader_storage</li>
+  <li>nir: Distinguish between normal uniforms and UBOs</li>
+  <li>glsl_type: Drop the glsl_get_array_instance C helper</li>
+  <li>glsl_type: Add a C wrapper to get struct field offsets</li>
+  <li>glsl_type: Simplify glsl_channel_type</li>
+  <li>glsl_type: Add support for explicitly laid out matrices and arrays</li>
+  <li>spirv: Propagate layout decorations to created glsl_types</li>
+  <li>nir: Move propagation of cast derefs to a new nir_opt_deref pass</li>
+  <li>nir: Add a ptr_as_array deref type</li>
+  <li>nir/validate: Don't allow derefs in if conditions</li>
+  <li>nir/opt_deref: Properly optimize ptr_as_array derefs</li>
+  <li>nir/deref: Support casts and ptr_as_array in comparisons</li>
+  <li>nir/deref: Skip over casts in fixup_deref_modes</li>
+  <li>nir/remove_dead_variables: Properly handle deref casts</li>
+  <li>nir/validate: Allow derefs in phi nodes</li>
+  <li>nir/intrinsics: Allow deref sources to consume anything</li>
+  <li>nir/intrinsics: Add access flags to load/store_deref</li>
+  <li>nir/validate: Allow array derefs on vectors in more modes</li>
+  <li>nir/lower_io: Add "explicit" IO lowering</li>
+  <li>nir/vulkan: Add a descriptor type to vulkan resource intrinsics</li>
+  <li>spirv: Add error checking for Block and BufferBlock decorations</li>
+  <li>spirv: Choose atomic deref type with pointer_uses_ssa_offset</li>
+  <li>spirv: Add explicit pointer types</li>
+  <li>spirv: Make better use of vtn_pointer_uses_ssa_offset</li>
+  <li>spirv: Add support for using derefs for UBO/SSBO access</li>
+  <li>anv: Enable the new deref-based UBO/SSBO path</li>
+  <li>spirv: Sort supported capabilities</li>
+  <li>anv: Sort properties and features switch statements</li>
+  <li>nir: Add some more int64 lowering helpers</li>
+  <li>anv/pipeline: Constant fold after apply_pipeline_layout</li>
+  <li>anv/pipeline: Move wpos and input attachment lowering to lower_nir</li>
+  <li>compiler/types: Serialize/deserialize subpass input types correctly</li>
+  <li>anv/pipeline: Hash shader modules and spec constants separately</li>
+  <li>anv/pipeline_cache: Add support for caching NIR</li>
+  <li>anv/pipeline: Cache the pre-lowered NIR</li>
+  <li>intel/peephole_ffma: Fix swizzle propagation</li>
+  <li>spirv: Whack sampler/image pointers to uniform</li>
+  <li>spirv: Contain the GLSLang issue #179 workaround to old GLSLang</li>
+  <li>intel/nir: Call nir_opt_deref in brw_nir_optimize</li>
+  <li>nir/gcm: Support deref instructions</li>
+  <li>spirv: Emit switch conditions on-the-fly</li>
+  <li>intel/blorp: Add two more filter modes</li>
+  <li>anv: Rename has_resolve to has_color_resolve</li>
+  <li>anv/blorp: Refactor MSAA resolves into an exportable helper function</li>
+  <li>anv: Move resolve_subpass to genX_cmd_buffer.c</li>
+  <li>anv: Implement VK_KHR_depth_stencil_resolve</li>
+  <li>nir: Add a bool to float32 lowering pass</li>
+  <li>intel/eu: Stop overriding exec sizes in send_indirect_message</li>
+  <li>intel/fs: Don't touch accumulator destination while applying regioning alignment rule</li>
+  <li>anv: Re-sort the extensions list</li>
+  <li>anv: Only parse pImmutableSamplers if the descriptor has samplers</li>
+  <li>relnotes: Add newly added Vulkan extensions</li>
+  <li>anv/pipeline: Add a pdevice helper variable</li>
+  <li>nir: Mark deref UBO and SSBO access as non-scalar</li>
+  <li>spirv: Update the JSON and headers from Khronos master</li>
+  <li>anv: Always emit at least one vertex element</li>
+  <li>spirv: Initialize struct member offsets to -1</li>
+  <li>spirv: Only split blocks</li>
+  <li>spirv: Only set interface_type on blocks</li>
+  <li>nir: Preserve offsets in lower_io_to_scalar_early</li>
+  <li>nir/xfb: Fix offset accounting for dvec3/4</li>
+  <li>nir/xfb: Properly handle arrays of blocks</li>
+  <li>anv: Add but do not enable VK_EXT_transform_feedback</li>
+  <li>anv: Add pipeline cache support for xfb_info</li>
+  <li>anv: Implement the basic form of VK_EXT_transform_feedback</li>
+  <li>anv: Implement vkCmdDrawIndirectByteCountEXT</li>
+  <li>anv: Implement CmdBegin/EndQueryIndexed</li>
+  <li>genxml: Add SO_PRIM_STORAGE_NEEDED and SO_NUM_PRIMS_WRITTEN</li>
+  <li>anv: Implement transform feedback queries</li>
+  <li>nir: Add load/store/atomic global intrinsics</li>
+  <li>nir/lower_io: Add a 32 and 64-bit global address formats</li>
+  <li>nir/lower_io: Add support for nir_var_mem_global</li>
+  <li>nir/validate: Allow array derefs of vectors for nir_var_mem_global</li>
+  <li>nir: Allow SSBOs and global to alias</li>
+  <li>spirv: Drop a bogus assert</li>
+  <li>spirv: Handle OpTypeForwardPointer</li>
+  <li>spirv: Implement OpConvertPtrToU and OpConvertUToPtr</li>
+  <li>spirv: Add support for SPV_EXT_physical_storage_buffer</li>
+  <li>intel/fs: Get rid of fs_inst::equals</li>
+  <li>intel/defines: Explicitly cast to uint32_t in SET_FIELD and SET_BITS</li>
+  <li>intel/fs: Handle IMAGE_SIZE in size_read() and is_send_from_grf()</li>
+  <li>intel/fs: Take an explicit exec size in brw_surface_payload_size()</li>
+  <li>intel/eu: Add has_simd4x2 bools to surface_write functions</li>
+  <li>intel/eu: Rework surface descriptor helpers</li>
+  <li>intel/fs: Add a generic SEND opcode</li>
+  <li>intel/fs: Use SHADER_OPCODE_SEND for surface messages</li>
+  <li>intel/fs: Use a logical opcode for IMAGE_SIZE</li>
+  <li>intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+</li>
+  <li>intel/fs: Use SHADER_OPCODE_SEND for varying UBO pulls on gen7+</li>
+  <li>intel/eu: Use GET_BITS in brw_inst_set_send_ex_desc</li>
+  <li>intel/eu/validate: SEND restrictions also apply to SENDC</li>
+  <li>intel/eu: Add more message descriptor helpers</li>
+  <li>intel/disasm: Rework SEND decoding to use descriptors</li>
+  <li>intel/inst: Fix the ia16_addr_imm helpers</li>
+  <li>intel/inst: Indent some code</li>
+  <li>intel/eu: Add support for the SENDS[C] messages</li>
+  <li>intel/disasm: Properly disassemble split sends</li>
+  <li>intel/fs: Support SENDS in SHADER_OPCODE_SEND</li>
+  <li>intel/fs: Add interference between SENDS sources</li>
+  <li>intel/fs: Use split sends for surface writes on gen9+</li>
+  <li>intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode</li>
+  <li>nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks</li>
+  <li>intel/fs: Bail in optimize_extract_to_float if we have modifiers</li>
+  <li>compiler/types: Add a contains_64bit helper</li>
+  <li>nir/xfb: Properly align 64-bit values</li>
+  <li>nir: Rewrite lower_clip_cull_distance_arrays to do a lot less lowering</li>
+  <li>nir/xfb: Work in terms of components rather than slots</li>
+  <li>nir/xfb: Handle compact arrays in gather_xfb_info</li>
+  <li>nir/lower_clip_cull: Fix an incorrect assert</li>
+  <li>anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport</li>
+  <li>spirv: OpImageQueryLod requires a sampler</li>
+  <li>intel,nir: Lower TXD with min_lod when the sampler index is not &lt; 16</li>
+  <li>spirv: Pull offset/stride from the pointer for OpArrayLength</li>
+  <li>anv: Refactor descriptor pushing a bit</li>
+  <li>anv: Take references to push descriptor set layouts</li>
+  <li>nir: Add a pass for lowering IO back to vector when possible</li>
+  <li>intel/nir: Vectorize all IO</li>
+</ul>
+
+<p>Jiang, Sonny (1):</p>
+<ul>
+  <li>radeonsi: add compute_last_block to configure the partial block fields</li>
+</ul>
+
+<p>Jon Turney (3):</p>
+<ul>
+  <li>glx: Fix compilation with GLX_USE_WINDOWSGL</li>
+  <li>appveyor: put build steps in a script, rather than inline in appveyor.yml</li>
+  <li>appveyor: Add a Cygwin build script</li>
+</ul>
+
+<p>Jonathan Marek (42):</p>
+<ul>
+  <li>nir: add fceil lowering</li>
+  <li>freedreno: a2xx: fd2_draw update</li>
+  <li>freedreno/a2xx: fix POINT_MINMAX_MAX overflow</li>
+  <li>freedreno: add missing a20x ids</li>
+  <li>freedreno/a2xx: set VIZ_QUERY_ID on a20x</li>
+  <li>freedreno/a2xx: Compute depth base in gmem correctly</li>
+  <li>freedreno: a2xx texture update</li>
+  <li>freedreno: use GENERIC instead of TEXCOORD for blit program</li>
+  <li>freedreno: use MSM_BO_SCANOUT with scanout buffers</li>
+  <li>glsl/nir: int constants as float for native_integers=false</li>
+  <li>glsl/nir: ftrunc for native_integers=false float to int cast</li>
+  <li>glsl/nir: keep bool types when native_integers=false</li>
+  <li>freedreno: a2xx: cleanup init_shader_const</li>
+  <li>freedreno: a2xx: cleanup REG_A2XX_PA_CL_VTE_CNTL</li>
+  <li>freedreno: a2xx: fix gmem2mem viewport</li>
+  <li>freedreno: a2xx: fix VERTEX_REUSE/DEALLOC on a20x</li>
+  <li>freedreno: a2xx: fix non-zero texture base offsets</li>
+  <li>freedreno: a2xx: sysmem rendering</li>
+  <li>freedreno: a2xx: NIR backend</li>
+  <li>freedreno: a2xx: insert scalar MOV to allow 2 source scalar</li>
+  <li>freedreno: a2xx: add ir2 copy propagation</li>
+  <li>freedreno: a2xx: add partial lower_scalar pass for ir2</li>
+  <li>freedreno: add renderonly scanout</li>
+  <li>freedreno: a2xx: ir2 cleanup</li>
+  <li>freedreno: a2xx: enable early-Z testing</li>
+  <li>freedreno: update a2xx registers</li>
+  <li>freedreno: a2xx: a20x hw binning</li>
+  <li>freedreno: a2xx: clear fixes and fast clear path</li>
+  <li>freedreno: a2xx: minor solid_vertexbuf fixups</li>
+  <li>freedreno: a2xx: add perfcntrs</li>
+  <li>kmsro: Add freedreno renderonly support</li>
+  <li>st/dri: invalidate_resource depth/stencil before flush_resource</li>
+  <li>mesa/st: wire up DiscardFramebuffer</li>
+  <li>freedreno: fix invalidate logic</li>
+  <li>freedreno: fix depth usage logic</li>
+  <li>freedreno: fix sysmem rendering being used when clear is used</li>
+  <li>freedreno: a2xx: fix fast clear</li>
+  <li>freedreno: a2xx: don't write 4th vertex in mem2gmem</li>
+  <li>freedreno: a2xx: add use_hw_binning function</li>
+  <li>freedreno: a2xx: fix fast clear for some gmem configurations</li>
+  <li>freedreno: a2xx: fix mipmapping for NPOT textures</li>
+  <li>freedreno: use renderonly path for buffers allocated with modifiers</li>
+</ul>
+
+<p>Jordan Justen (3):</p>
+<ul>
+  <li>docs: Document GitLab merge request process (email alternative)</li>
+  <li>i965/genX_state: Add register access functions</li>
+  <li>i965/compute: Emit GPGPU_WALKER in genX_state_upload</li>
+</ul>
+
+<p>Jose Maria Casanova Crespo (1):</p>
+<ul>
+  <li>glsl: TCS outputs can not be transform feedback candidates on GLES</li>
+</ul>
+
+<p>José Fonseca (2):</p>
+<ul>
+  <li>appveyor: Revert commits adding Cygwin support.</li>
+  <li>scons: Workaround failures with MSVC when using SCons 3.0.[2-4].</li>
+</ul>
+
+<p>Juan A. Suarez Romero (17):</p>
+<ul>
+  <li>docs: add release notes for 18.2.5</li>
+  <li>docs: add sha256 checksums for 18.2.5</li>
+  <li>docs: update calendar, add news item and link release notes for 18.2.5</li>
+  <li>docs: add release notes for 18.2.6</li>
+  <li>docs: add sha256 checksums for 18.2.6</li>
+  <li>docs: update calendar, add news item and link release notes for 18.2.6</li>
+  <li>docs: extends 18.2 lifecycle</li>
+  <li>docs: add release notes for 18.2.7</li>
+  <li>docs: add sha256 checksums for 18.2.7</li>
+  <li>docs: update calendar, add news item and link release notes for 18.2.7</li>
+  <li>docs: add release notes for 18.2.8</li>
+  <li>docs: add sha256 checksums for 18.2.8</li>
+  <li>docs: update calendar, add news item and link release notes for 18.2.8</li>
+  <li>anv/cmd_buffer: check for NULL framebuffer</li>
+  <li>genxml: add missing field values for 3DSTATE_SF</li>
+  <li>anv: advertise 8 subpixel precision bits</li>
+  <li>anv: destroy descriptor sets when pool gets reset</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>nir: Fix assert in print_intrinsic_instr().</li>
+</ul>
+
+<p>Karol Herbst (35):</p>
+<ul>
+  <li>nv50/ir: print color masks of tex instructions</li>
+  <li>nv50/ra: add condenseDef overloads for partial condenses</li>
+  <li>nv50/ir: add scalar field to TexInstructions</li>
+  <li>gm107/ir: use scalar tex instructions where possible</li>
+  <li>gm107/ir: fix compile time warning in getTEXSMask</li>
+  <li>nir: add const_index parameters to system value builder function</li>
+  <li>nir: replace nir_load_system_value calls with appropiate builder functions</li>
+  <li>nir/spirv: cast shift operand to u32</li>
+  <li>nv50,nvc0: Fix gallium nine regression regarding sampler bindings</li>
+  <li>nv50/ir: initialize relDegree staticly</li>
+  <li>nouveau: use atomic operations for driver statistics</li>
+  <li>nv50/ir: fix use-after-free in ConstantFolding::visit</li>
+  <li>nir: rename global/local to private/function memory</li>
+  <li>nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions</li>
+  <li>gm107/ir: disable TEXS for tex with derivAll set</li>
+  <li>nir: rename nir_var_private to nir_var_shader_temp</li>
+  <li>nir: rename nir_var_function to nir_var_function_temp</li>
+  <li>nir: rename nir_var_ubo to nir_var_mem_ubo</li>
+  <li>nir: rename nir_var_ssbo to nir_var_mem_ssbo</li>
+  <li>nir: rename nir_var_shared to nir_var_mem_shared</li>
+  <li>nir/spirv: handle SpvStorageClassCrossWorkgroup</li>
+  <li>glsl/lower_output_reads: set invariant and precise flags on temporaries</li>
+  <li>nir: replace more nir_load_system_value calls with builder functions</li>
+  <li>nir/validate: allow to check against a bitmask of bit_sizes</li>
+  <li>nir: add legal bit_sizes to intrinsics</li>
+  <li>nir: add bit_size parameter to system values with multiple allowed bit sizes</li>
+  <li>mesa: add MESA_SHADER_KERNEL</li>
+  <li>vtn: handle SpvExecutionModelKernel</li>
+  <li>nir/spirv: handle ContractionOff execution mode</li>
+  <li>gk104/ir: Use the new rcp/rsq in library</li>
+  <li>gm107/ir: add fp64 rcp</li>
+  <li>gm107/ir: add fp64 rsq</li>
+  <li>gallium: add PIPE_CAP_MAX_VARYINGS</li>
+  <li>st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable</li>
+  <li>nir/opt_if: don't mark progress if nothing changes</li>
+</ul>
+
+<p>Kenneth Graunke (41):</p>
+<ul>
+  <li>intel: Use a URB start offset of 0 for disabled stages.</li>
+  <li>st/mesa: Pull nir_lower_wpos_ytransform work into a helper function.</li>
+  <li>st/nir: Drop unused parameter from st_nir_assign_uniform_locations().</li>
+  <li>st/mesa: Don't record garbage streamout information in the non-SSO case.</li>
+  <li>i915: Delete swizzling detection logic.</li>
+  <li>nir: Use nir_shader_get_entrypoint in nir_lower_clip_vs().</li>
+  <li>nir: Inline lower_clip_vs() into nir_lower_clip_vs().</li>
+  <li>nir: Save nir_variable pointers in nir_lower_clip_vs rather than locs.</li>
+  <li>nir: Make nir_lower_clip_vs optionally work with variables.</li>
+  <li>i965: Allow only one slot of clip distances to be set on Gen4-5.</li>
+  <li>i965: Use a 'nir' temporary rather than poking at brw_program</li>
+  <li>i965: Do NIR shader cloning in the caller.</li>
+  <li>intel/compiler: Use nir's info when checking uses_streams.</li>
+  <li>intel/blorp: Expand blorp_address::offset to be 64 bits.</li>
+  <li>i965: Delete dead brw_meta_resolve_color prototype.</li>
+  <li>i965: Flip arguments to load_register_reg helpers.</li>
+  <li>genxml: Consistently use a numeric "MOCS" field</li>
+  <li>i965: Don't override subslice count to 4 on Gen11.</li>
+  <li>st/mesa: Drop dead 'passthrough_fs' field.</li>
+  <li>st/mesa: Drop !passColor optimization in drawpixels shaders.</li>
+  <li>st/mesa: Don't open code the drawpixels vertex shader.</li>
+  <li>st/mesa: Combine the DrawPixels and Bitmap passthrough VS programs.</li>
+  <li>st/nir: Gather info after applying lowering FS variant features</li>
+  <li>st/nir: Drop unused gl_program parameter in VS input handling helper.</li>
+  <li>nir: Fix gl_nir_lower_samplers_as_deref's structure type handling.</li>
+  <li>nir: Make gl_nir_lower_samplers use gl_nir_lower_samplers_as_deref</li>
+  <li>blorp: Add blorp_get_surface_address to the driver interface.</li>
+  <li>blorp: Pass the batch to lookup/upload_shader instead of context</li>
+  <li>nir: Allow a non-existent sampler deref in nir_lower_samplers_as_deref</li>
+  <li>st/nir: Lower TES gl_PatchVerticesIn to a constant if linked with a TCS.</li>
+  <li>i965: Drop mark_surface_used mechanism.</li>
+  <li>st/mesa: Make an enum for pipeline statistics query result indices.</li>
+  <li>st/mesa: Rearrange PIPE_QUERY_PIPELINE_STATISTICS result fetching.</li>
+  <li>gallium: Add the ability to query a single pipeline statistics counter</li>
+  <li>st/mesa: Optionally override RGB/RGBX dst alpha blend factors</li>
+  <li>gallium: Add forgotten docs for PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS.</li>
+  <li>st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048</li>
+  <li>anv: Put MOCS in the correct location</li>
+  <li>nir: Don't reassociate add/mul chains containing only constants</li>
+  <li>compiler: Mark clip/cull distance arrays as compact before lowering.</li>
+  <li>spirv: Eliminate dead input/output variables after translation.</li>
+</ul>
+
+<p>Kirill Burtsev (1):</p>
+<ul>
+  <li>loader: free error state, when checking the drawable type</li>
+</ul>
+
+<p>Kristian H. Kristensen (14):</p>
+<ul>
+  <li>freedreno/a6xx: Clear z32 and separate stencil with blitter</li>
+  <li>freedreno/a6xx: Move restore blits to IB</li>
+  <li>freedreno/a6xx: Move resolve blits to an IB</li>
+  <li>freedreno/a6xx: Clear gmem buffers at flush time</li>
+  <li>gallium: Android build fixes</li>
+  <li>mesa: Add core support for EXT_multisampled_render_to_texture{,2}</li>
+  <li>gallium: Add new PIPE_CAP_SURFACE_SAMPLE_COUNT</li>
+  <li>st/mesa: Add support for EXT_multisampled_render_to_texture</li>
+  <li>freedreno: Add support for EXT_multisampled_render_to_texture</li>
+  <li>freedreno: Fix the Makefile.am fix</li>
+  <li>glapi: fixup EXT_multisampled_render_to_texture dispatch</li>
+  <li>freedreno: Synchronize batch and flush for staging resource</li>
+  <li>freedreno/a6xx: Turn on texture tiling by default</li>
+  <li>freedreno/a6xx: Emit blitter dst with OUT_RELOCW</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>st/va: fix the incorrect max profiles report</li>
+  <li>st/va/vp9: set max reference as default of VP9 reference number</li>
+</ul>
+
+<p>Lionel Landwerlin (47):</p>
+<ul>
+  <li>intel/dump_gpu: add missing gdb option</li>
+  <li>intel/sanitize_gpu: add help/gdb options to wrapper</li>
+  <li>intel/sanitize_gpu: deal with non page multiple buffer sizes</li>
+  <li>intel/sanitize_gpu: add debug message on mmap fail</li>
+  <li>intel/decoders: fix instruction base address parsing</li>
+  <li>anv: stub internal android code</li>
+  <li>anv/android: mark gralloc allocated BOs as external</li>
+  <li>intel/dump_gpu: move output option together</li>
+  <li>intel/dump_gpu: add platform option</li>
+  <li>intel/aub_read: remove useless breaks</li>
+  <li>nir/lower_tex: add alpha channel parameter for yuv lowering</li>
+  <li>nir/lower_tex: Add AYUV lowering support</li>
+  <li>dri: add AYUV format</li>
+  <li>i965: add support for sampling from AYUV</li>
+  <li>anv: simplify internal address offset</li>
+  <li>anv/image: remove unused parameter</li>
+  <li>anv/lower_ycbcr: make sure to set 0s on all components</li>
+  <li>anv: associate vulkan formats with aspects</li>
+  <li>anv: use image aspects rather than computed ones</li>
+  <li>anv: move helper function internally</li>
+  <li>egl/dri: fix error value with unknown drm format</li>
+  <li>intel/decoders: read ring buffer length</li>
+  <li>intel/aubinator: fix ring buffer pointer</li>
+  <li>intel/aub_viewer: fix dynamic state printing</li>
+  <li>intel/aub_viewer: Print blend states properly</li>
+  <li>anv: flush pipeline before query result copies</li>
+  <li>anv/query: flush render target before copying results</li>
+  <li>anv: don't do partial resolve on layer &gt; 0</li>
+  <li>intel/aub_viewer: fix shader get_bo</li>
+  <li>intel/aub_viewer: fixup 0x address prefix</li>
+  <li>intel/aub_viewer: print address of missing shader</li>
+  <li>intel/aub_viewer: fix shader view</li>
+  <li>intel/aub_viewer: fold binding/sampler table items</li>
+  <li>intel/aub_viewer: highlight true booleans</li>
+  <li>i965: limit VF caching workaround to gen8/9/10</li>
+  <li>intel/blorp: emit VF caching workaround before 3DSTATE_VERTEX_BUFFERS</li>
+  <li>i965: include draw_params/derived_draw_params for VF cache workaround</li>
+  <li>i965: add CS stall on VF invalidation workaround</li>
+  <li>anv: explictly specify format for blorp ccs/mcs op</li>
+  <li>anv: flush fast clear colors into compressed surfaces</li>
+  <li>anv: fix invalid binding table index computation</li>
+  <li>anv: narrow flushing of the render target to buffer writes</li>
+  <li>anv: document cache flushes &amp; invalidations</li>
+  <li>intel/genxml: add missing MI_PREDICATE compare operations</li>
+  <li>vulkan: make generated enum to strings helpers available from c++</li>
+  <li>intel: fix urb size for CFL GT1</li>
+  <li>intel/compiler: use correct swizzle for replacement</li>
+</ul>
+
+<p>Lucas Stach (6):</p>
+<ul>
+  <li>etnaviv: use dummy RT buffer when rendering without color buffer</li>
+  <li>etnaviv: use surface format directly</li>
+  <li>st/dri: allow both render and sampler compatible dma-buf formats</li>
+  <li>st/dri: replace format conversion functions with single mapping table</li>
+  <li>etnaviv: enable full overwrite in a few more cases</li>
+  <li>etnaviv: annotate variables only used in debug build</li>
+</ul>
+
+<p>Marek Olšák (94):</p>
+<ul>
+  <li>st/va: fix incorrect use of resource_destroy</li>
+  <li>ac/surface: remove the overallocation workaround for Vega12</li>
+  <li>radeonsi: use better DCC clear codes</li>
+  <li>radeonsi: don't set the CB clear color registers for 0/1 clear colors on Raven2</li>
+  <li>gallium: add PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET</li>
+  <li>radeonsi: stop command submission with PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET only</li>
+  <li>st/mesa: disable L3 thread pinning</li>
+  <li>mesa: mark GL_SR8_EXT non-renderable on GLES</li>
+  <li>radeonsi: fix video APIs on Raven2</li>
+  <li>gallium/u_tests: add a compute shader test that clears an image</li>
+  <li>gallium/u_tests: fix MSVC build by using old-style zero initializers</li>
+  <li>mesa/glthread: pass the function name to _mesa_glthread_restore_dispatch</li>
+  <li>mesa/glthread: enable immediate mode</li>
+  <li>drirc: enable glthread for Talos Principle</li>
+  <li>st/mesa: regularly re-pin driver threads to the CCX where the app thread is</li>
+  <li>st/mesa: pin driver threads to a fixed CCX when glthread is enabled</li>
+  <li>radeonsi: don't send data after write-confirm with BOTTOM_OF_PIPE_TS</li>
+  <li>radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED</li>
+  <li>winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle</li>
+  <li>winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create</li>
+  <li>radeonsi: clean up primitive binning enablement</li>
+  <li>radeonsi: use structured buffer intrinsics for image views</li>
+  <li>radeonsi: fix is_oneway_access_only for image stores</li>
+  <li>radeonsi: small cleanup for memory opcodes</li>
+  <li>tgsi/scan: add more information about bindless usage</li>
+  <li>radeonsi/nir: parse more information about bindless usage</li>
+  <li>radeonsi: fix is_oneway_access_only for bindless images</li>
+  <li>winsys/amdgpu: always reclaim/release slabs if there is not enough memory</li>
+  <li>radeonsi: generalize the slab allocator code to allow layered slab allocators</li>
+  <li>winsys/amdgpu: optimize slab allocation for 2 MB amdgpu page tables</li>
+  <li>winsys/amdgpu: clean up code around BO VM alignment</li>
+  <li>winsys/amdgpu: use &gt;= instead of &gt; for VM address alignment</li>
+  <li>winsys/amdgpu: increase the VM alignment to the MSB of the size for Gfx9</li>
+  <li>winsys/amdgpu: overallocate buffers for faster address translation on Gfx9</li>
+  <li>winsys/amdgpu,radeon: pass vm_alignment to buffer_from_handle</li>
+  <li>winsys/amdgpu: use optimal VM alignment for imported buffers</li>
+  <li>winsys/amdgpu: use optimal VM alignment for CPU allocations</li>
+  <li>radeonsi: allow si_cp_dma_clear_buffer to clear GDS from any IB</li>
+  <li>winsys/amdgpu: add support for allocating GDS and OA resources</li>
+  <li>radeonsi: add memory management stress tests for GDS</li>
+  <li>Revert "winsys/amdgpu: overallocate buffers for faster address translation on Gfx9"</li>
+  <li>st/mesa: expose GL_OES_texture_view</li>
+  <li>mesa: expose GL_EXT_texture_view as an alias of GL_OES_texture_view</li>
+  <li>mesa: expose EXT_texture_compression_rgtc on GLES</li>
+  <li>mesa: expose EXT_texture_compression_bptc in GLES</li>
+  <li>mesa: expose AMD_texture_texture4</li>
+  <li>st/mesa: expose EXT_render_snorm on GLES</li>
+  <li>radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets</li>
+  <li>radeonsi: call si_fix_resource_usage for the GS copy shader as well</li>
+  <li>radeonsi: make si_cp_wait_mem more configurable</li>
+  <li>radeonsi: use u_decomposed_prims_for_vertices instead of u_prims_for_vertices</li>
+  <li>radeonsi: remove unused variables in si_insert_input_ptr</li>
+  <li>radeonsi: always unmap texture CPU mappings on 32-bit CPU architectures</li>
+  <li>ac: remove unused variable from ac_build_ddxy</li>
+  <li>st/mesa: unify window-system renderbuffer initialization</li>
+  <li>st/mesa: don't reference pipe_surface locally in PBO code</li>
+  <li>st/mesa: don't leak pipe_surface if pipe_context is not current</li>
+  <li>st/dri: fix dri2_format_table for argb1555 and rgb565</li>
+  <li>radeonsi: also apply the GS hang workaround to draws without tessellation</li>
+  <li>winsys/amdgpu: fix whitespace</li>
+  <li>winsys/amdgpu: use the new BO list API</li>
+  <li>radeonsi: fix a u_blitter crash after a shader with FBFETCH</li>
+  <li>radeonsi: fix rendering to tiny viewports where the viewport center is &gt; 8K</li>
+  <li>radeonsi: use buffer_store_format_x &amp; xy</li>
+  <li>radeonsi: remove redundant call to emit_cache_flush in compute clear/copy</li>
+  <li>radeonsi: compile clear and copy buffer compute shaders on demand</li>
+  <li>radeonsi: correct WRITE_DATA.DST_SEL definitions</li>
+  <li>radeonsi: fix the top-of-pipe fence on SI</li>
+  <li>radeonsi: don't use WRITE_DATA.DST_SEL == MEM_GRBM on &gt;= CIK</li>
+  <li>radeonsi: move PKT3_WRITE_DATA generation into a helper function</li>
+  <li>gallium: add SINT formats to have exact counterparts to SNORM formats</li>
+  <li>gallium/util: add util_format_snorm8_to_sint8 (from radeonsi)</li>
+  <li>radeonsi: disable render cond &amp; pipeline stats for internal compute dispatches</li>
+  <li>radeonsi: rename rscreen -&gt; sscreen</li>
+  <li>radeonsi: rename rview -&gt; sview</li>
+  <li>winsys/amdgpu: rename rfence, rsrc, rdst -&gt; afence, asrc, adst</li>
+  <li>radeonsi: remove r600 from comments</li>
+  <li>radeonsi: rename r600_resource -&gt; si_resource</li>
+  <li>radeonsi: rename rquery -&gt; squery</li>
+  <li>radeonsi: rename rsrc -&gt; ssrc, rdst -&gt; sdst</li>
+  <li>radeonsi: rename rbo, rbuffer to buf or buffer</li>
+  <li>radeonsi: rename rfence -&gt; sfence</li>
+  <li>st/mesa: purge framebuffers when unbinding a context</li>
+  <li>st/mesa: fix PRIMITIVES_GENERATED query after the "pipeline stat single" changes</li>
+  <li>ac: use the correct LLVM processor name on Raven2</li>
+  <li>radeonsi: fix crashing performance counters (division by zero)</li>
+  <li>meson: drop the xcb-xrandr version requirement</li>
+  <li>gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>radeonsi: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>winsys/amdgpu: don't drop manually added fence dependencies</li>
+  <li>radeonsi: add driconf option radeonsi_enable_nir</li>
+  <li>radeonsi: always enable NIR for Civilization 6 to fix corruption</li>
+  <li>driconf: add Civ6Sub executable for Civilization 6</li>
+  <li>tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics</li>
+</ul>
+
+<p>Mario Kleiner (4):</p>
+<ul>
+  <li>radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.</li>
+  <li>egl/wayland: Allow client-&gt;server format conversion for PRIME offload. (v2)</li>
+  <li>egl/wayland-drm: Only announce formats via wl_drm which the driver supports.</li>
+  <li>drirc: Add sddm-greeter to adaptive_sync blacklist.</li>
+</ul>
+
+<p>Mark Janes (3):</p>
+<ul>
+  <li>Revert "i965/batch: avoid reverting batch buffer if saved state is an empty"</li>
+  <li>Revert "Implementation of egl dri2 drivers for MESA_query_driver"</li>
+  <li>Revert "Implement EGL API for MESA_query_driver"</li>
+</ul>
+
+<p>Mathias Fröhlich (17):</p>
+<ul>
+  <li>mesa: Remove needless indirection in some draw functions.</li>
+  <li>mesa: Rename gl_vertex_array_object::_Enabled -&gt; Enabled.</li>
+  <li>mesa: Use the gl_vertex_array_object::Enabled bitfield.</li>
+  <li>mesa: Use gl_vertex_array_object::Enabled for glGet.</li>
+  <li>mesa: Remove gl_array_attributes::Enabled.</li>
+  <li>mesa: Work with bitmasks when en/dis-abling VAO arrays.</li>
+  <li>mesa: Unify glEdgeFlagPointer data type.</li>
+  <li>nouveau: Use gl_array_attribute::_ElementSize.</li>
+  <li>tnl: Use gl_array_attribute::_ElementSize.</li>
+  <li>mesa: Factor out struct gl_vertex_format.</li>
+  <li>mesa: Remove unneeded bitfield widths from the VAO.</li>
+  <li>mesa/st: Only care about the uploader if it was used.</li>
+  <li>mesa/st: Only unmap the uploader that was actually used.</li>
+  <li>mesa/st: Factor out array and buffer setup from st_atom_array.c.</li>
+  <li>mesa/st: Avoid extra references in the feedback draw function scope.</li>
+  <li>mesa/st: Use binding information from the VAO in feedback rendering.</li>
+  <li>mesa/st: Make st_pipe_vertex_format static.</li>
+</ul>
+
+<p>Matt Turner (41):</p>
+<ul>
+  <li>util/ralloc: Switch from DEBUG to NDEBUG</li>
+  <li>util/ralloc: Make sizeof(linear_header) a multiple of 8</li>
+  <li>nir: Call fflush() at the end of nir_print_shader()</li>
+  <li>glsl: Remove unused member variable</li>
+  <li>gallivm: Use nextafterf(0.5, 0.0) as rounding constant</li>
+  <li>mesa: Revert INTEL_fragment_shader_ordering support</li>
+  <li>Revert "st/mesa: silenced unhanded enum warning in st_glsl_to_tgsi.cpp"</li>
+  <li>i965/fs: Handle V/UV immediates in dump_instructions()</li>
+  <li>glsl: Add function support to glsl_to_nir</li>
+  <li>glsl: Create file to contain software fp64 functions</li>
+  <li>glsl: Add "built-in" functions to do ffma(fp64)</li>
+  <li>glsl: Add "built-in" functions to do fmin/fmax(fp64)</li>
+  <li>glsl: Add "built-in" function to do ffloor(fp64)</li>
+  <li>glsl: Add "built-in" functions to do ffract(fp64)</li>
+  <li>glsl: Add "built-in" functions to convert bool to double</li>
+  <li>nir: Rework nir_lower_constant_initializers() to handle functions</li>
+  <li>nir: Tag entrypoint for easy recognition by nir_shader_get_entrypoint()</li>
+  <li>nir: Wire up int64 lowering functions</li>
+  <li>nir: Implement lowering of 64-bit shift operations</li>
+  <li>nir: Add and set info::uses_64bit</li>
+  <li>nir: Create nir_builder in nir_lower_doubles_impl()</li>
+  <li>nir: Add lowering support for 64-bit operations to software</li>
+  <li>nir: Unset metadata debug bit if no progress made</li>
+  <li>intel/compiler: Lower 64-bit MOV/SEL operations</li>
+  <li>intel/compiler: Split 64-bit MOV-indirects if needed</li>
+  <li>intel/compiler: Avoid false positive assertions</li>
+  <li>intel/compiler: Rearrange code to avoid future problems</li>
+  <li>intel/compiler: Prevent warnings in the following patch</li>
+  <li>intel/compiler: Expand size of the 'nr' field</li>
+  <li>intel/compiler: Heap-allocate temporary storage</li>
+  <li>i965: Compile fp64 software routines and lower double-ops</li>
+  <li>i965: Enable 64-bit GLSL extensions</li>
+  <li>i965: Compile fp64 funcs only if we do not have 64-bit hardware support</li>
+  <li>intel/compiler: Reset default flag register in brw_find_live_channel()</li>
+  <li>gallium: Enable ASIMD/NEON on aarch64.</li>
+  <li>gallivm: Return true from arch_rounding_available() if NEON is available</li>
+  <li>intel/compiler: Add a file-level description of brw_eu_validate.c</li>
+  <li>i965: Always compile fp64 funcs when needed</li>
+  <li>nir: Optimize double-precision lower_round_even()</li>
+  <li>intel/compiler: Avoid propagating inequality cmods if types are different</li>
+  <li>intel/compiler/test: Add unit test for mismatched signedness comparison</li>
+</ul>
+
+<p>Mauro Rossi (6):</p>
+<ul>
+  <li>android: gallium/auxiliary: add include to get u_debug.h header</li>
+  <li>android: radv: add libmesa_git_sha1 static dependency</li>
+  <li>android: amd/addrlib: update Mesa's copy of addrlib</li>
+  <li>android: st/mesa: fix building error due to sched_getcpu()</li>
+  <li>android: anv: fix generated files depedencies (v2)</li>
+  <li>android: anv: fix libexpat shared dependency</li>
+</ul>
+
+<p>Maya Rashish (2):</p>
+<ul>
+  <li>radeon: fix printf format specifier.</li>
+  <li>configure: fix test portability</li>
+</ul>
+
+<p>Michal Srb (2):</p>
+<ul>
+  <li>gallium: Constify drisw_loader_funcs struct</li>
+  <li>drisw: Use separate drisw_loader_funcs for shm</li>
+</ul>
+
+<p>Michel Dänzer (4):</p>
+<ul>
+  <li>winsys/amdgpu: Stop using amdgpu_bo_handle_type_kms_noimport</li>
+  <li>winsys/amdgpu: Pull in LLVM CFLAGS</li>
+  <li>amd/common: Restore v4i32 suffix for llvm.SI.load.const intrinsic</li>
+  <li>loader/dri3: Use strlen instead of sizeof for creating VRR property atom</li>
+</ul>
+
+<p>Neha Bhende (1):</p>
+<ul>
+  <li>st/mesa: Fix topogun-1.06-orc-84k-resize.trace crash</li>
+</ul>
+
+<p>Neil Roberts (4):</p>
+<ul>
+  <li>freedreno: Add .dir-locals to the common directory</li>
+  <li>spirv/nir: handle location decorations on block interface members</li>
+  <li>glsl_types: Rename parameter of glsl_count_attribute_slots</li>
+  <li>spirv: Don't use special semantics when counting vertex attribute size</li>
+</ul>
+
+<p>Nicholas Kazlauskas (5):</p>
+<ul>
+  <li>util: Get program name based on path when possible</li>
+  <li>util: Add adaptive_sync driconf option</li>
+  <li>drirc: Initial blacklist for adaptive sync</li>
+  <li>loader/dri3: Enable adaptive_sync via _VARIABLE_REFRESH property</li>
+  <li>radeonsi: Enable adaptive_sync by default for radeon</li>
+</ul>
+
+<p>Nicolai Hähnle (37):</p>
+<ul>
+  <li>radv: include LLVM IR in the VK_AMD_shader_info "disassembly"</li>
+  <li>radeonsi: fix an out-of-bounds read reported by ASAN</li>
+  <li>winsys/amdgpu: add amdgpu_winsys_bo::lock</li>
+  <li>winsys/amdgpu: explicitly declare whether buffer_map is permanent or not</li>
+  <li>egl/wayland: rather obvious build fix</li>
+  <li>radv: remove dependency on addrlib gfx9_enum.h</li>
+  <li>ac/surface/gfx9: let addrlib choose the preferred swizzle kind</li>
+  <li>amd/addrlib: update Mesa's copy of addrlib</li>
+  <li>meson: link LLVM 'native' component when LLVM is available</li>
+  <li>ddebug: simplify watchdog loop and fix crash in the no-timeout case</li>
+  <li>ddebug: always flush when requested, even when hang detection is disabled</li>
+  <li>r600: remove redundant semicolon</li>
+  <li>amd/sid_tables: add additional python3 compatibility imports</li>
+  <li>amd/common: whitespace fixes</li>
+  <li>amd/common: add ac_build_ifcc</li>
+  <li>amd/common: scan/reduce across waves of a workgroup</li>
+  <li>amd/common: add i1 special case to ac_build_{inclusive,exclusive}_scan</li>
+  <li>ac/surface: 3D and cube surfaces are never displayable</li>
+  <li>radeonsi: move SI_FORCE_FAMILY functionality to winsys</li>
+  <li>radeonsi: extract declare_vs_blit_inputs</li>
+  <li>radeonsi: add si_init_draw_functions and make some functions static</li>
+  <li>radeonsi/gfx9: use SET_UCONFIG_REG_INDEX packets when available</li>
+  <li>radeonsi: don't set RAW_WAIT for CP DMA clears</li>
+  <li>radeonsi: rename SI_RESOURCE_FLAG_FORCE_TILING to clarify its purpose</li>
+  <li>radeonsi: const-ify si_set_tesseval_regs</li>
+  <li>radeonsi: show the fixed function TCS in debug dumps</li>
+  <li>radeonsi: avoid using hard-coded SI_NUM_RW_BUFFERS</li>
+  <li>radeonsi: add an si_set_rw_shader_buffer convenience function</li>
+  <li>radeonsi: use si_set_rw_shader_buffer for setting streamout buffers</li>
+  <li>radeonsi: track constant buffer bind history in si_pipe_set_constant_buffer</li>
+  <li>radeonsi: move remaining perfcounter code into si_perfcounter.c</li>
+  <li>radeonsi: move query suspend logic into the top-level si_query struct</li>
+  <li>radeonsi: factor si_query_buffer logic out of si_query_hw</li>
+  <li>radeonsi: split perfcounter queries from si_query_hw</li>
+  <li>radeonsi: const-ify the si_query_ops</li>
+  <li>amd/common: use llvm.amdgcn.s.buffer.load for LLVM 8.0</li>
+  <li>amd/common/vi+: enable SMEM loads with GLC=1</li>
+</ul>
+
+<p>Niklas Haas (3):</p>
+<ul>
+  <li>glsl: fix block member alignment validation for vec3</li>
+  <li>radv: correctly use vulkan 1.0 by default</li>
+  <li>radv: add device-&gt;instance extension dependencies</li>
+</ul>
+
+<p>Olivier Fourdan (1):</p>
+<ul>
+  <li>wayland/egl: Resize EGL surface on update buffer for swrast</li>
+</ul>
+
+<p>Oscar Blumberg (1):</p>
+<ul>
+  <li>radeonsi: Fix guardband computation for large render targets</li>
+</ul>
+
+<p>Pierre Moreau (2):</p>
+<ul>
+  <li>clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR</li>
+  <li>meson: Fix with_gallium_icd to with_opencl_icd</li>
+</ul>
+
+<p>Plamena Manolova (1):</p>
+<ul>
+  <li>nir: Don't lower the local work group size if it's variable.</li>
+</ul>
+
+<p>Rafael Antognolli (24):</p>
+<ul>
+  <li>intel/genxml: Add register for object preemption.</li>
+  <li>i965/gen10+: Enable object level preemption.</li>
+  <li>i965/gen9: Add workarounds for object preemption.</li>
+  <li>anv/tests: Fix block_pool_no_free test.</li>
+  <li>anv/allocator: Add anv_state_table.</li>
+  <li>anv/allocator: Add getter for anv_block_pool.</li>
+  <li>anv/allocator: Add helper to push states back to the state table.</li>
+  <li>anv/allocator: Use anv_state_table on anv_state_pool_alloc.</li>
+  <li>anv/allocator: Use anv_state_table on back_alloc too.</li>
+  <li>anv/allocator: Remove anv_free_list.</li>
+  <li>anv/allocator: Rename anv_free_list2 to anv_free_list.</li>
+  <li>anv/allocator: Remove pool-&gt;map.</li>
+  <li>anv: Update usage of block_pool-&gt;bo.</li>
+  <li>anv/allocator: Add support for a list of BOs in block pool.</li>
+  <li>anv: Split code to add BO dependencies to execbuf.</li>
+  <li>anv: Validate the list of BOs from the block pool.</li>
+  <li>anv: Remove some asserts.</li>
+  <li>anv/allocator: Rework chunk return to the state pool.</li>
+  <li>anv/allocator: Add padding information.</li>
+  <li>anv/allocator: Enable snooping on block pool and anv_bo_pool BOs.</li>
+  <li>anv: Remove state flush.</li>
+  <li>anv/allocator: Add support for non-userptr.</li>
+  <li>anv/tests: Adding test for the state_pool padding.</li>
+  <li>anv/allocator: Avoid race condition in anv_block_pool_map.</li>
+</ul>
+
+<p>Ray Zhang (1):</p>
+<ul>
+  <li>glx: fix shared memory leak in X11</li>
+</ul>
+
+<p>Rhys Kidd (5):</p>
+<ul>
+  <li>travis: radeonsi and radv require LLVM 7.0</li>
+  <li>meson: libfreedreno depends upon libdrm (for fence support)</li>
+  <li>v3d: Wire up core pipe_debug_callback</li>
+  <li>vc4: Wire up core pipe_debug_callback</li>
+  <li>nv50,nvc0: add missing CAPs for unsupported features</li>
+</ul>
+
+<p>Rhys Perry (14):</p>
+<ul>
+  <li>nir: fix constness in nir_intrinsic_align()</li>
+  <li>ac: refactor visit_load_buffer</li>
+  <li>ac: split 16-bit ssbo loads that may not be dword aligned</li>
+  <li>radv: don't set surf_index for stencil-only images</li>
+  <li>radv: switch from nir_bcsel to nir_b32csel</li>
+  <li>ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics</li>
+  <li>nir: fix copy-paste error in nir_lower_constant_initializers</li>
+  <li>radv: use dithered alpha-to-coverage</li>
+  <li>radv: pass radv_draw_info to radv_emit_draw_registers()</li>
+  <li>radv: add missed situations for scissor bug workaround</li>
+  <li>radv: avoid context rolls when binding graphics pipelines</li>
+  <li>radv: prevent dirtying of dynamic state when it does not change</li>
+  <li>radv: bitcast 16-bit outputs to integers</li>
+  <li>radv: ensure export arguments are always float</li>
+</ul>
+
+<p>Rob Clark (79):</p>
+<ul>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a6xx: fix VSC bug with larger # of tiles</li>
+  <li>freedreno/drm: fix unused 'entry' warnings</li>
+  <li>freedreno/drm: remove dependency on gallium driver</li>
+  <li>freedreno: move drm to common location</li>
+  <li>freedreno/ir3: standalone compiler updates</li>
+  <li>freedreno: shader_t -&gt; gl_shader_stage</li>
+  <li>freedreno: remove shader_stage_name()</li>
+  <li>freedreno: FD_SHADER_DEBUG -&gt; IR3_SHADER_DEBUG</li>
+  <li>freedreno/ir3: move disasm and optmsgs debug flags</li>
+  <li>util: env_var_as_unsigned() helper</li>
+  <li>freedreno/ir3: use env_var_as_unsigned()</li>
+  <li>freedreno/ir3: some header file cleanup</li>
+  <li>freedreno/ir3: remove pipe_stream_output_info dependency</li>
+  <li>freedreno/ir3: split up ir3_shader</li>
+  <li>freedreno/ir3: remove u_inlines usage</li>
+  <li>freedreno: move ir3 to common location</li>
+  <li>mesa/st: swap order of clear() and clear_with_quad()</li>
+  <li>mesa/st: better colormask check for clear fallback</li>
+  <li>freedreno/a6xx: disable LRZ for z32</li>
+  <li>freedreno/a6xx: set guardband clip</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a3xx: also set FSSUPERTHREADENABLE</li>
+  <li>freedreno/a6xx: MSAA</li>
+  <li>freedreno: remove unused fd_surface fields</li>
+  <li>gallium: fix typo</li>
+  <li>freedreno/a5xx+a6xx: remove unused fs/vs pvt mem</li>
+  <li>freedreno/drm: fix relocs in nested stateobjs</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a6xx: blitter fixes</li>
+  <li>freedreno/ir3: don't fetch unused tex components</li>
+  <li>freedreno/ir3: sync instr/disasm</li>
+  <li>freedreno/ir3: code-motion</li>
+  <li>freedreno/ir3: track max flow control depth for a5xx/a6xx</li>
+  <li>freedreno/drm: fix memory leak</li>
+  <li>nir: fix spelling typo</li>
+  <li>mesa/st/nir: fix missing nir_compact_varyings</li>
+  <li>freedreno/drm: sync uapi and enable softpin</li>
+  <li>freedreno: debug GEM obj names</li>
+  <li>freedreno: also set DUMP flag on shaders</li>
+  <li>freedreno/ir3: fix crash</li>
+  <li>freedreno/ir3: don't remove unused input components</li>
+  <li>freedreno/a6xx: fix blitter crash</li>
+  <li>gallium/aux: add is_unorm() helper</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a6xx: more blitter fixes</li>
+  <li>freedreno: move fd_resource_copy_region()</li>
+  <li>freedreno/a6xx: fix resource_copy_region()</li>
+  <li>freedreno/a6xx: fix corrupted uniforms</li>
+  <li>freedreno/ir3: fix fallout of extra assert</li>
+  <li>freedreno/ir3: don't treat all inputs/outputs as vec4</li>
+  <li>freedreno: combine fd_resource_layer_offset()/fd_resource_offset()</li>
+  <li>freedreno/a6xx: simplify special case for 3d layout</li>
+  <li>freedreno/a6xx: improve setup_slices() debug msgs</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a6xx: fix 3d texture layout</li>
+  <li>freedreno: skip depth resolve if not written</li>
+  <li>freedreno: rework blit API</li>
+  <li>freedreno: try blitter for fd_resource_copy_region()</li>
+  <li>freedreno/a6xx: rework blitter API</li>
+  <li>freedreno: remove blit_via_copy_region()</li>
+  <li>freedreno: fix staging resource size for arrays</li>
+  <li>freedreno: make cmdstream bo's read-only to GPU</li>
+  <li>freedreno/a6xx: separate stencil restore/resolve fixes</li>
+  <li>freedreno/a6xx: move tile_mode to sampler-view CSO</li>
+  <li>freedreno/a6xx: fix 3d+tiled layout</li>
+  <li>nir/vtn: add caps for some cl related capabilities</li>
+  <li>loader: fix the no-modifiers case</li>
+  <li>freedreno: core buffer modifier support</li>
+  <li>freedreno: set modifier when exporting buffer</li>
+  <li>freedreno: limit tiling to PIPE_BIND_SAMPLER_VIEW</li>
+  <li>freedreno/a2xx: fix unused variable warning</li>
+  <li>freedreno/a5xx: fix blitter nr_samples check</li>
+  <li>freedreno/a6xx: fix blitter nr_samples check</li>
+  <li>freedreno: stop frob'ing pipe_resource::nr_samples</li>
+  <li>freedreno: minor cleanups</li>
+  <li>mesa: wire up InvalidateFramebuffer</li>
+  <li>freedreno: fix release tarball</li>
+  <li>freedreno: more fixing release tarball</li>
+</ul>
+
+<p>Rob Herring (3):</p>
+<ul>
+  <li>pipe-loader: Fallback to kmsro driver when no matching driver name found</li>
+  <li>kmsro: Add etnaviv renderonly support</li>
+  <li>Switch imx to kmsro and remove the imx winsys</li>
+</ul>
+
+<p>Robert Foss (3):</p>
+<ul>
+  <li>virgl: native fence fd support</li>
+  <li>virgl: Clean up fences commit</li>
+  <li>virgl: add assert and missing function parameter</li>
+</ul>
+
+<p>Rodrigo Vivi (1):</p>
+<ul>
+  <li>intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.</li>
+</ul>
+
+<p>Roland Scheidegger (7):</p>
+<ul>
+  <li>gallivm: fix improper clamping of vertex index when fetching gs inputs</li>
+  <li>draw: fix infinite loop in line stippling</li>
+  <li>gallivm: remove unused float coord wrapping for aos sampling</li>
+  <li>gallivm: use llvm jit code for decoding s3tc</li>
+  <li>gallivm: don't use pavg.b intrinsic on llvm &gt;= 6.0</li>
+  <li>gallivm: abort when trying to use non-existing intrinsic</li>
+  <li>Revert "llvmpipe: Always return some fence in flush (v2)"</li>
+</ul>
+
+<p>Sagar Ghuge (14):</p>
+<ul>
+  <li>intel/compiler: Disassemble GEN6_SFID_DATAPORT_SAMPLER_CACHE as dp_sampler</li>
+  <li>intel/compiler: Set swizzle to BRW_SWIZZLE_XXXX for scalar region</li>
+  <li>intel/compiler: Always print flag subregister number</li>
+  <li>nir: Add a new lowering option to lower 3D surfaces from txd to txl.</li>
+  <li>glsl: Add "built-in" functions to do uint64_to_fp64(uint64_t)</li>
+  <li>glsl: Add "built-in" functions to do int64_to_fp64(int64_t)</li>
+  <li>glsl: Add "built-in" functions to do uint64_to_fp32(uint64_t)</li>
+  <li>glsl: Add "built-in" functions to do int64_to_fp32(int64_t)</li>
+  <li>glsl: Add utility function to round and pack uint64_t value</li>
+  <li>glsl: Add "built-in" functions to do fp64_to_uint64(fp64)</li>
+  <li>glsl: Add utility function to round and pack int64_t value</li>
+  <li>glsl: Add "built-in" functions to do fp64_to_int64(fp64)</li>
+  <li>glsl: Add "built-in" functions to do fp32_to_uint64(fp32)</li>
+  <li>glsl: Add "built-in" functions to do fp32_to_int64(fp32)</li>
+</ul>
+
+<p>Samuel Pitoiset (103):</p>
+<ul>
+  <li>radv: remove useless sync after copying query results with compute</li>
+  <li>radv: add missing TFB queries support to CmdCopyQueryPoolsResults()</li>
+  <li>radv: replace si_emit_wait_fence() with radv_cp_wait_mem()</li>
+  <li>radv: more use of radv_cp_wait_mem()</li>
+  <li>radv: allocate enough space in CS when copying query results with compute</li>
+  <li>radv: disable conditional rendering for vkCmdCopyQueryPoolResults()</li>
+  <li>radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+</li>
+  <li>radv: use LOAD_CONTEXT_REG when loading fast clear values</li>
+  <li>radv: fix GPU hangs when loading depth/stencil clear values on SI/CIK</li>
+  <li>radv: cleanup and document a Hawaii bug with offchip buffers</li>
+  <li>radv: clean up setting partial_es_wave for distributed tess on VI</li>
+  <li>radv: make use of num_good_cu_per_sh in si_emit_graphics() too</li>
+  <li>radv: binding streamout buffers doesn't change context regs</li>
+  <li>radv: set PA.SC_CONSERVATIVE_RASTERIZATION.NULL_SQUAD_AA_MASK_ENABLE</li>
+  <li>radv: set optimal OVERWRITE_COMBINER_WATERMARK on GFX9</li>
+  <li>radv: add a debug option for disabling primitive binning</li>
+  <li>radv: enable primitive binning by default</li>
+  <li>radv: tidy up radv_set_dcc_need_cmask_elim_pred()</li>
+  <li>radv: always clear the FCE predicate after DCC/FMASK/CMASK decompressions</li>
+  <li>radv/winsys: remove the max IBs per submit limit for the fallback path</li>
+  <li>radv/winsys: remove the max IBs per submit limit for the sysmem path</li>
+  <li>radv: remove unnecessary goto in the fast clear paths</li>
+  <li>radv: add radv_get_htile_fast_clear_value() helper</li>
+  <li>radv: add radv_is_fast_clear_{depth,stencil}_allowed() helpers</li>
+  <li>radv: check allowed fast HTILE clears a bit earlier</li>
+  <li>radv: rewrite the condition that checks allowed depth/stencil values</li>
+  <li>radv: implement fast HTILE clears for depth or stencil only on GFX9</li>
+  <li>ac/nir: fix intrinsic name string size in visit_image_atomic()</li>
+  <li>radv: ignore subpass self-dependencies</li>
+  <li>radv: only sync CP DMA for transfer operations or bottom pipe</li>
+  <li>radv: remove useless sync after CmdClear{Color,DepthStencil}Image()</li>
+  <li>radv: remove useless sync before CmdClear{Color,DepthStencil}Image()</li>
+  <li>radv: ignore subpass self-dependencies for CreateRenderPass() too</li>
+  <li>radv: remove useless check in emit_fast_color_clear()</li>
+  <li>radv: add radv_image_can_fast_clear() helper</li>
+  <li>radv: add radv_image_view_can_fast_clear() helper</li>
+  <li>radv: add radv_can_fast_clear_{color,depth}() helpers</li>
+  <li>radv: simplify a check in emit_fast_color_clear()</li>
+  <li>radv: refactor the fast clear path for better re-use</li>
+  <li>radv: optimize CmdClear{Color,DepthStencil}Image() for layered textures</li>
+  <li>radv: remove unused pending_clears param in the transition path</li>
+  <li>radv: drop few useless state changes when doing color/depth decompressions</li>
+  <li>radv: rework the TC-compat HTILE hardware bug with COND_EXEC</li>
+  <li>radv: reset pending_reset_query when flushing caches</li>
+  <li>radv: wait on the high 32 bits of timestamp queries</li>
+  <li>spirv: add SpvCapabilityInt64Atomics</li>
+  <li>radv: expose VK_EXT_scalar_block_layout</li>
+  <li>amd: remove support for LLVM 6.0</li>
+  <li>gallium: add missing PIPE_CAP_SURFACE_SAMPLE_COUNT default value</li>
+  <li>radv: bump reported version to 1.1.90</li>
+  <li>radv: add a predicate for reflecting DCC decompression state</li>
+  <li>radv: allow to skip DCC decompressions with the new predicate</li>
+  <li>radv: switch on EOP when primitive restart is enabled with triangle strips</li>
+  <li>radv: check if addrlib enabled HTILE in radv_image_can_enable_htile()</li>
+  <li>radv: don't check if format is depth in radv_image_can_enable_hile()</li>
+  <li>radv: report Vulkan version 1.1.90 for real</li>
+  <li>ac/nir: remove the bitfield_extract workaround for LLVM 8</li>
+  <li>radv: drop the amdgpu-skip-threshold=1 workaround for LLVM 8</li>
+  <li>radv: fix subpass image transitions with multiviews</li>
+  <li>radv: compute optimal VM alignment for imported buffers</li>
+  <li>spirv: add support for SpvCapabilityStorageImageMultisample</li>
+  <li>ac/nir: restrict fmask lookup to image load intrinsics</li>
+  <li>radv: initialize FMASK for images in fully expanded mode</li>
+  <li>radv: add support for FMASK expand</li>
+  <li>radv: enable shaderStorageImageMultisample feature on GFX8+</li>
+  <li>radv: get rid of bunch of KHR suffixes</li>
+  <li>radv: enable variable pointers</li>
+  <li>radv: skip draws with instance_count == 0</li>
+  <li>ac/nir: add get_cache_policy() helper and use it</li>
+  <li>ac/nir: set cache policy when loading/storing buffer images</li>
+  <li>ac: add missing 16-bit types to glsl_base_to_llvm_type()</li>
+  <li>radv: remove unnecessary returns in GetPhysicalDevice*Properties()</li>
+  <li>radv: add two small helpers for getting VRAM and visible VRAM sizes</li>
+  <li>radv: add support for VK_EXT_memory_budget</li>
+  <li>ac/nir: don't trash L1 caches for store operations with writeonly memory</li>
+  <li>radv: drop unused code related to 16 sample locations</li>
+  <li>radv: reduce size of the per-queue descriptor BO</li>
+  <li>radv: do not write unused descriptors to the per-queue BO</li>
+  <li>radv: initialize the per-queue descriptor BO only once</li>
+  <li>nir: do not remove varyings used for transform feedback</li>
+  <li>nir: fix lowering arrays to elements for XFB outputs</li>
+  <li>radv: improve gathering of load_push_constants with dynamic bindings</li>
+  <li>radv: remove old_fence parameter from si_cs_emit_write_event_eop()</li>
+  <li>radv: only allocate the GFX9 fence and EOP BOs for the gfx queue</li>
+  <li>radv: compute the GFX9 fence VA at allocation time</li>
+  <li>radv: always pass the GFX9 fence data to si_cs_emit_cache_flush()</li>
+  <li>radv: fix computing number of user SGPRs for streamout buffers</li>
+  <li>radv: remove radv_userdata_info::indirect field</li>
+  <li>radv: simplify allocating user SGPRS for descriptor sets</li>
+  <li>radv: set noalias/dereferenceable LLVM attributes based on param types</li>
+  <li>radv: re-enable fast depth clears for 16-bit surfaces on VI</li>
+  <li>radv/winsys: fix hash when adding internal buffers</li>
+  <li>radv: fix compiler issues with GCC 9</li>
+  <li>radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8</li>
+  <li>radv/winsys: fix BO list creation when RADV_DEBUG=allbos is set</li>
+  <li>radv: always export gl_SampleMask when the fragment shader uses it</li>
+  <li>radv: write the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix out-of-bounds access when copying descriptors BO list</li>
+  <li>radv: don't copy buffer descriptors list for samplers</li>
+  <li>radv: fix clearing attachments in secondary command buffers</li>
+  <li>radv: properly align the fence and EOP bug VA on GFX9</li>
+  <li>radv: fix pointSizeRange limits</li>
+</ul>
+
+<p>Sergii Romantsov (4):</p>
+<ul>
+  <li>autotools: library-dependency when no sse and 32-bit</li>
+  <li>i965/batch/debug: Allow log be dumped before assert</li>
+  <li>nir: Length of boolean vtn_value now is 1</li>
+  <li>dri: meson: do not prefix user provided dri-drivers-path</li>
+</ul>
+
+<p>Sonny Jiang (1):</p>
+<ul>
+  <li>radeonsi: use compute for resource_copy_region when possible</li>
+</ul>
+
+<p>Tapani Pälli (27):</p>
+<ul>
+  <li>anv: allow exporting an imported SYNC_FD semaphore type</li>
+  <li>anv: add create_flags as part of anv_image</li>
+  <li>anv: refactor make_surface to use data from anv_image</li>
+  <li>anv: make anv_get_image_format_features public</li>
+  <li>anv: add from/to helpers with android and vulkan formats</li>
+  <li>anv/android: add GetAndroidHardwareBufferPropertiesANDROID</li>
+  <li>anv: add anv_ahw_usage_from_vk_usage helper function</li>
+  <li>anv: refactor, remove else block in AllocateMemory</li>
+  <li>anv/android: support import/export of AHardwareBuffer objects</li>
+  <li>anv/android: add ahardwarebuffer external memory properties</li>
+  <li>anv/android: support creating images from external format</li>
+  <li>anv: support VkExternalFormatANDROID in vkCreateSamplerYcbcrConversion</li>
+  <li>anv: add VkFormat field as part of anv_format</li>
+  <li>anv: support VkSamplerYcbcrConversionInfo in vkCreateImageView</li>
+  <li>anv: ignore VkSamplerYcbcrConversion on non-yuv formats</li>
+  <li>anv/android: turn on VK_ANDROID_external_memory_android_hardware_buffer</li>
+  <li>dri3: initialize adaptive_sync as false before configQueryb</li>
+  <li>intel/isl: move tiled_memcpy static libs from i965 to isl</li>
+  <li>anv: do not advertise AHW support if extension not enabled</li>
+  <li>nir: cleanup glsl_get_struct_field_offset, glsl_get_explicit_stride</li>
+  <li>android: fix build issues with libmesa_anv_gen* libraries</li>
+  <li>mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment</li>
+  <li>nir: initialize value in copy_prop_vars_block</li>
+  <li>anv: retain the is_array state in create_plane_tex_instr_implicit</li>
+  <li>anv: release memory allocated by glsl types during spirv_to_nir</li>
+  <li>anv: revert "anv: release memory allocated by glsl types during spirv_to_nir"</li>
+  <li>anv: destroy descriptor sets when pool gets destroyed</li>
+</ul>
+
+<p>Thomas Hellstrom (9):</p>
+<ul>
+  <li>st/xa: Render update. Better support for solid pictures</li>
+  <li>st/xa: Support higher color precision for solid pictures</li>
+  <li>st/xa: Support a couple of new formats</li>
+  <li>st/xa: Fix transformations when we have both source and mask samplers</li>
+  <li>st/xa: Minor renderer cleanups</li>
+  <li>st/xa: Support Component Alpha with trivial blending</li>
+  <li>st/xa: Bump minor</li>
+  <li>st/xa: Fix a memory leak</li>
+  <li>winsys/svga: Fix a memory leak</li>
+</ul>
+
+<p>Timothy Arceri (56):</p>
+<ul>
+  <li>nir: allow propagation of if evaluation for bcsel</li>
+  <li>nir: fix condition propagation when src has a swizzle</li>
+  <li>ac/nir_to_llvm: fix b2f for f64</li>
+  <li>nir: add new linking opt nir_link_constant_varyings()</li>
+  <li>st/mesa: make use of nir_link_constant_varyings()</li>
+  <li>nir: add glsl_type_is_integer() helper</li>
+  <li>nir: don't pack varyings ints with floats unless flat</li>
+  <li>anv/i965: make use of nir_link_constant_varyings()</li>
+  <li>nir: add support for removing redundant stores to copy prop var</li>
+  <li>radv: make use of nir_move_out_const_to_consumer()</li>
+  <li>nir: small tidy ups for nir_loop_analyze()</li>
+  <li>nir: clarify some nit_loop_info member names</li>
+  <li>nir: add a new nir_cf_list_clone_and_reinsert() helper</li>
+  <li>nir: make use of new nir_cf_list_clone_and_reinsert() helper</li>
+  <li>nir: factor out some of the complex loop unroll code to a helper</li>
+  <li>nir: rework force_unroll_array_access()</li>
+  <li>nir: in loop analysis track actual control flow type</li>
+  <li>nir: reword code comment</li>
+  <li>nir: detect more induction variables</li>
+  <li>nir: fix opt_if_loop_last_continue()</li>
+  <li>tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()</li>
+  <li>tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()</li>
+  <li>radeonsi: remove unrequired param in si_nir_scan_tess_ctrl()</li>
+  <li>ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()</li>
+  <li>radeonsi: make use of ac_are_tessfactors_def_in_all_invocs()</li>
+  <li>st/glsl_to_nir: call nir_lower_load_const_to_scalar() in the st</li>
+  <li>nir: rename nir_link_constant_varyings() nir_link_opt_varyings()</li>
+  <li>nir: add can_replace_varying() helper</li>
+  <li>nir: rework nir_link_opt_varyings()</li>
+  <li>nir: link time opt duplicate varyings</li>
+  <li>nir: make nir_opt_remove_phis_impl() static</li>
+  <li>nir: make use of does_varying_match() helper</li>
+  <li>nir: simplify does_varying_match()</li>
+  <li>nir: add rewrite_phi_predecessor_blocks() helper</li>
+  <li>nir: merge some basic consecutive ifs</li>
+  <li>st/glsl: refactor st_link_nir()</li>
+  <li>nir: avoid uninitialized variable warning</li>
+  <li>glsl: Copy function out to temp if we don't directly ref a variable</li>
+  <li>ac/nir_to_llvm: fix type handling in image code</li>
+  <li>radeonsi/nir: get correct type for images inside structs</li>
+  <li>ac/nir_to_llvm: fix regression in bindless support</li>
+  <li>ac/nir_to_llvm: add support for structs to get_sampler_desc()</li>
+  <li>glsl: don't skip GLSL IR opts on first-time compiles</li>
+  <li>glsl: be much more aggressive when skipping shader compilation</li>
+  <li>Revert "glsl: be much more aggressive when skipping shader compilation"</li>
+  <li>ac/nir_to_llvm: fix interpolateAt* for arrays</li>
+  <li>glsl: be much more aggressive when skipping shader compilation</li>
+  <li>radeonsi/nir: add missing piece for bindless image support</li>
+  <li>ac/nir_to_llvm: add bindless support for uniform handles</li>
+  <li>ac/nir_to_llvm: fix interpolateAt* for structs</li>
+  <li>ac/nir_to_llvm: fix clamp shadow reference for more hardware</li>
+  <li>tgsi: remove culldist semantic from docs</li>
+  <li>radv/ac: fix some fp16 handling</li>
+  <li>glsl: use remap location when serialising uniform program resource data</li>
+  <li>radeonsi: fix query buffer allocation</li>
+  <li>glsl: fix shader cache for packed param list</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>amd/vulkan: meson build - use radv_deps for libvulkan_radeon</li>
+</ul>
+
+<p>Tomasz Figa (1):</p>
+<ul>
+  <li>llvmpipe: Always return some fence in flush (v2)</li>
+</ul>
+
+<p>Tomeu Vizoso (1):</p>
+<ul>
+  <li>etnaviv: Consolidate buffer references from framebuffers</li>
+</ul>
+
+<p>Toni Lönnberg (14):</p>
+<ul>
+  <li>intel/decoder: Engine parameter for instructions</li>
+  <li>intel/decoder: tools: gen_engine to drm_i915_gem_engine_class</li>
+  <li>intel/decoder: tools: Use engine for decoding batch instructions</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen4)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen45)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen5)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen6)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen7)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen75)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen8)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen9)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen10)</li>
+  <li>intel/genxml: Add engine definition to render engine instructions (gen11)</li>
+  <li>intel/aubinator_error_decode: Get rid of warning for missing switch case</li>
+</ul>
+
+<p>Topi Pohjolainen (1):</p>
+<ul>
+  <li>i965/icl: Disable prefetching of sampler state entries</li>
+</ul>
+
+<p>Veluri Mithun (5):</p>
+<ul>
+  <li>Add extension doc for MESA_query_driver</li>
+  <li>Implement EGL API for MESA_query_driver</li>
+  <li>Implementation of egl dri2 drivers for MESA_query_driver</li>
+  <li>egl: Implement EGL API for MESA_query_driver</li>
+  <li>egl: Implementation of egl dri2 drivers for MESA_query_driver</li>
+</ul>
+
+<p>Vinson Lee (7):</p>
+<ul>
+  <li>r600/sb: Fix constant logical operand in assert.</li>
+  <li>freedreno: Fix autotools build.</li>
+  <li>st/xvmc: Add X11 include path.</li>
+  <li>nir/algebraic: Make algebraic_parser_test.sh executable.</li>
+  <li>meson: Fix typo.</li>
+  <li>meson: Fix libsensors detection.</li>
+  <li>meson: Fix typo.</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0</li>
+</ul>
+
+<p>pal1000 (1):</p>
+<ul>
+  <li>scons: Compatibility with Scons development version string</li>
+</ul>
+
+</ul>
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes/19.0.1.html mesa-19.0.1/docs/relnotes/19.0.1.html
--- mesa-18.3.3/docs/relnotes/19.0.1.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/relnotes/19.0.1.html	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,158 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.0.1 Release Notes / March 27, 2019</h1>
+
+<p>
+Mesa 19.0.1 is a bug fix release which fixes bugs found since the 19.0.0 release.
+</p>
+<p>
+Mesa 19.0.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+  TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (4):</p>
+<ul>
+  <li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
+  <li>glsl/linker: don't fail non static used inputs without matching outputs</li>
+  <li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
+  <li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Use correct image view comparison for fast clears.</li>
+  <li>ac/nir: Return frag_coord as integer.</li>
+</ul>
+
+<p>Danylo Piliaiev (2):</p>
+<ul>
+  <li>anv: Treat zero size XFB buffer as disabled</li>
+  <li>glsl: Cross validate variable's invariance by explicit invariance only</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>softpipe: fix texture view crashes</li>
+</ul>
+
+<p>Dylan Baker (5):</p>
+<ul>
+  <li>docs: Add SHA256 sums for 19.0.0</li>
+  <li>cherry-ignore: Add commit that doesn't apply</li>
+  <li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
+  <li>bin/install_megadrivers.py: Fix regression for set DESTDIR</li>
+  <li>bump version for 19.0.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>v3d: Fix leak of the renderonly struct on screen destruction.</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
+  <li>glsl/list: Add a list variant of insert_after</li>
+  <li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
+  <li>nir/builder: Add a vector extract helper</li>
+  <li>nir: Add a new pass to lower array dereferences on vectors</li>
+  <li>intel/nir: Lower array-deref-of-vector UBO and SSBO loads</li>
+</ul>
+
+<p>Józef Kucia (2):</p>
+<ul>
+  <li>radv: Fix driverUUID</li>
+  <li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>egl/dri: Avoid out of bounds array access</li>
+</ul>
+
+<p>Mark Janes (1):</p>
+<ul>
+  <li>mesa: properly report the length of truncated log messages</li>
+</ul>
+
+<p>Plamena Manolova (1):</p>
+<ul>
+  <li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
+</ul>
+
+<p>Samuel Pitoiset (3):</p>
+<ul>
+  <li>radv: set the maximum number of IBs per submit to 192</li>
+  <li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
+  <li>radv: fix binding transform feedback buffers</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>isl: fix automake build when sse41 is not supported</li>
+  <li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-18.3.3/docs/relnotes.html mesa-19.0.1/docs/relnotes.html
--- mesa-18.3.3/docs/relnotes.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/relnotes.html	2019-03-31 23:16:37.000000000 +0000
@@ -21,6 +21,13 @@
 </p>
 
 <ul>
+<li><a href="relnotes/18.3.2.html">18.3.2 release notes</a>
+<li><a href="relnotes/18.2.8.html">18.2.8 release notes</a>
+<li><a href="relnotes/18.2.7.html">18.2.7 release notes</a>
+<li><a href="relnotes/18.3.1.html">18.3.1 release notes</a>
+<li><a href="relnotes/18.3.0.html">18.3.0 release notes</a>
+<li><a href="relnotes/18.2.6.html">18.2.6 release notes</a>
+<li><a href="relnotes/18.2.5.html">18.2.5 release notes</a>
 <li><a href="relnotes/18.2.4.html">18.2.4 release notes</a>
 <li><a href="relnotes/18.2.3.html">18.2.3 release notes</a>
 <li><a href="relnotes/18.2.2.html">18.2.2 release notes</a>
diff -Nru mesa-18.3.3/docs/specs/EGL_MESA_query_driver.txt mesa-19.0.1/docs/specs/EGL_MESA_query_driver.txt
--- mesa-18.3.3/docs/specs/EGL_MESA_query_driver.txt	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/docs/specs/EGL_MESA_query_driver.txt	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,95 @@
+Name
+
+    MESA_query_driver
+
+Name Strings
+
+    EGL_MESA_query_driver
+
+Contact
+
+    Rob Clark      <robdclark 'at' gmail.com>
+    Nicolai Hähnle <Nicolai.Haehnle 'at' amd.com>
+
+Contibutors
+
+    Veluri Mithun <velurimithun38 'at' gmail.com>
+
+Status
+
+    Complete
+
+Version
+
+    Version 3, 2019-01-24
+
+Number
+
+    EGL Extension 131
+
+Dependencies
+
+    EGL 1.0 is required.
+
+Overview
+
+    When an application has to query the name of a driver and for
+    obtaining driver's option list (UTF-8 encoded XML) of a driver
+    the below functions are useful.
+
+    XML file formally describes all available options and also
+    includes verbal descriptions in multiple languages. Its main purpose
+    is to be automatically processed by configuration GUIs.
+    The XML shall respect the following DTD:
+
+    <!ELEMENT driinfo      (section*)>
+    <!ELEMENT section      (description+, option+)>
+    <!ELEMENT description  (enum*)>
+    <!ATTLIST description  lang CDATA #REQUIRED
+                           text CDATA #REQUIRED>
+    <!ELEMENT option       (description+)>
+    <!ATTLIST option       name CDATA #REQUIRED
+                           type (bool|enum|int|float) #REQUIRED
+                           default CDATA #REQUIRED
+                           valid CDATA #IMPLIED>
+    <!ELEMENT enum         EMPTY>
+    <!ATTLIST enum         value CDATA #REQUIRED
+                           text CDATA #REQUIRED>
+
+New Procedures and Functions
+
+    char* eglGetDisplayDriverConfig(EGLDisplay dpy);
+    const char* eglGetDisplayDriverName(EGLDisplay dpy);
+
+Description
+
+    By passing EGLDisplay as parameter to `eglGetDisplayDriverName` one can retrieve
+    driverName. Similarly passing EGLDisplay to `eglGetDisplayDriverConfig` we can retrieve
+    driverConfig options of the driver in XML format.
+
+    The string returned by `eglGetDisplayDriverConfig` is heap-allocated and caller
+    is responsible for freeing it.
+
+    EGL_BAD_DISPLAY is generated if `disp` is not an EGL display connection.
+
+    EGL_NOT_INITIALIZED is generated if `disp` has not been initialized.
+
+    If the implementation does not have enough resources to allocate the XML then an
+    EGL_BAD_ALLOC error is generated.
+
+New Tokens
+
+    No new tokens
+
+Issues
+
+    None
+
+
+Revision History
+
+    Version 1, 2018-11-05 - First draft (Veluri Mithun)
+    Version 2, 2019-01-23 - Final version (Veluri Mithun)
+    Version 3, 2019-01-24 - Mark as complete, add Khronos extension
+                            number, fix parameter name in prototypes,
+                            write revision history (Eric Engestrom)
diff -Nru mesa-18.3.3/docs/specs/MESA_query_renderer.spec mesa-19.0.1/docs/specs/MESA_query_renderer.spec
--- mesa-18.3.3/docs/specs/MESA_query_renderer.spec	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/docs/specs/MESA_query_renderer.spec	2019-03-31 23:16:37.000000000 +0000
@@ -20,11 +20,11 @@
 
 Version
 
-    Version 8, 14-February-2014
+    Version 9, 09 November 2018
 
 Number
 
-    TBD.
+    OpenGL Extension #446
 
 Dependencies
 
@@ -32,9 +32,6 @@
 
     GLX_ARB_create_context and GLX_ARB_create_context_profile are required.
 
-    This extension interacts with GLX_EXT_create_context_es2_profile and
-    GLX_EXT_create_context_es_profile.
-
 Overview
 
     In many situations, applications want to detect characteristics of a
@@ -95,18 +92,13 @@
         GLX_RENDERER_VENDOR_ID_MESA
         GLX_RENDERER_DEVICE_ID_MESA
 
-    Accepted as an attribute name in <*attrib_list> in
-    glXCreateContextAttribsARB:
-
-        GLX_RENDERER_ID_MESA                             0x818E
-
 Additions to the OpenGL / WGL Specifications
 
     None. This specification is written for GLX.
 
 Additions to the GLX 1.4 Specification
 
-    [Add the following to Section X.Y.Z of the GLX Specification]
+    [Add to Section 3.3.2 "GLX Versioning" of the GLX Specification]
 
     To obtain information about the available renderers for a particular
     display and screen,
@@ -206,29 +198,6 @@
     format as the string that would be returned by glGetString of GL_RENDERER.
     It may, however, have a different value.
 
-
-    [Add to section section 3.3.7 "Rendering Contexts"]
-
-    The attribute name GLX_RENDERER_ID_MESA specified the index of the render
-    against which the context should be created.  The default value of
-    GLX_RENDERER_ID_MESA is 0.
-
-
-    [Add to list of errors for glXCreateContextAttribsARB in section section
-    3.3.7 "Rendering Contexts"]
-
-      * If the value of GLX_RENDERER_ID_MESA specifies a non-existent
-        renderer, BadMatch is generated.
-
-Dependencies on GLX_EXT_create_context_es_profile and
-GLX_EXT_create_context_es2_profile
-
-    If neither extension is supported, remove all mention of
-    GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA from the spec.
-
-    If GLX_EXT_create_context_es_profile is not supported, remove all mention of
-    GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA from the spec.
-
 Issues
 
     1) How should the difference between on-card and GART memory be exposed?
@@ -408,3 +377,9 @@
                             read GLX_RENDERER_ID_MESA. The VENDOR/DEVICE_ID
                             example given in issue #17 should be 0x5143 and
                             0xFFFFFFFF respectively.
+
+    Version 9, 2018/11/09 - Remove GLX_RENDERER_ID_MESA, which has never been
+                            implemented. Remove the unnecessary interactions
+                            with the GLX GLES profile extensions. Note the
+                            official GL extension number. Specify the section
+                            of the GLX spec to modify.
diff -Nru mesa-18.3.3/docs/submittingpatches.html mesa-19.0.1/docs/submittingpatches.html
--- mesa-18.3.3/docs/submittingpatches.html	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/docs/submittingpatches.html	2019-03-31 23:16:37.000000000 +0000
@@ -21,7 +21,7 @@
 <li><a href="#guidelines">Basic guidelines</a>
 <li><a href="#formatting">Patch formatting</a>
 <li><a href="#testing">Testing Patches</a>
-<li><a href="#mailing">Mailing Patches</a>
+<li><a href="#submit">Submitting Patches</a>
 <li><a href="#reviewing">Reviewing Patches</a>
 <li><a href="#nominations">Nominating a commit for a stable branch</a>
 <li><a href="#criteria">Criteria for accepting patches to the stable branch</a>
@@ -42,8 +42,10 @@
 <code>git bisect</code>.)
 <li>Patches should be properly <a href="#formatting">formatted</a>.
 <li>Patches should be sufficiently <a href="#testing">tested</a> before submitting.
-<li>Patches should be submitted to <a href="#mailing">mesa-dev</a>
-for <a href="#reviewing">review</a> using <code>git send-email</code>.
+<li>Patches should be <a href="#submit">submitted</a>
+to <a href="#mailing">mesa-dev</a> or with
+a <a href="#merge-request">merge request</a>
+for <a href="#reviewing">review</a>.
 
 </ul>
 
@@ -156,18 +158,29 @@
 A good way to test this is to make use of the `git rebase` command,
 to run your tests on each commit. Assuming your branch is based off
 <code>origin/master</code>, you can run:
+</p>
 <pre>
 $ git rebase --interactive --exec "make check" origin/master
 </pre>
+<p>
 replacing <code>"make check"</code> with whatever other test you want to
 run.
 </p>
 
 
-<h2 id="mailing">Mailing Patches</h2>
+<h2 id="submit">Submitting Patches</h2>
 
 <p>
-Patches should be sent to the mesa-dev mailing list for review:
+Patches may be submitted to the Mesa project by
+<a href="#mailing">email</a> or with a
+GitLab <a href="#merge-request">merge request</a>. To prevent
+duplicate code review, only use one method to submit your changes.
+</p>
+
+<h3 id="mailing">Mailing Patches</h3>
+
+<p>
+Patches may be sent to the mesa-dev mailing list for review:
 <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
 mesa-dev@lists.freedesktop.org</a>.
 When submitting a patch make sure to use
@@ -201,9 +214,66 @@
 your email administrator for this.)
 </p>
 
+<h3 id="merge-request">GitLab Merge Requests</h3>
+
+<p>
+  <a href="https://gitlab.freedesktop.org/mesa/mesa">GitLab</a> Merge
+  Requests (MR) can also be used to submit patches for Mesa.
+</p>
+
+<p>
+  If the MR may have interest for most of the Mesa community, you can
+  send an email to the mesa-dev email list including a link to the MR.
+  Don't send the patch to mesa-dev, just the MR link.
+</p>
+<p>
+  Add labels to your MR to help reviewers find it. For example:
+  <ul>
+    <li>Mesa changes affecting all drivers: mesa
+    <li>Hardware vendor specific code: amd, intel, nvidia, ...
+    <li>Driver specific code: anvil, freedreno, i965, iris, radeonsi,
+      radv, vc4, ...
+    <li>Other tag examples: gallium, util
+  </ul>
+</p>
+<p>
+  If you revise your patches based on code review and push an update
+  to your branch, you should maintain a <strong>clean</strong> history
+  in your patches. There should not be "fixup" patches in the history.
+  The series should be buildable and functional after every commit
+  whenever you push the branch.
+</p>
+<p>
+  It is your responsibility to keep the MR alive and making progress,
+  as there are no guarantees that a Mesa dev will independently take
+  interest in it.
+</p>
+<p>
+  Some other notes:
+  <ul>
+    <li>Make changes and update your branch based on feedback
+    <li>Old, stale MR may be closed, but you can reopen it if you
+      still want to pursue the changes
+    <li>You should periodically check to see if your MR needs to be
+      rebased
+    <li>Make sure your MR is closed if your patches get pushed outside
+      of GitLab
+    <li>Please send MRs from a personal fork rather than from the main
+      Mesa repository, as it clutters it unnecessarily.
+  </ul>
+</p>
+
 <h2 id="reviewing">Reviewing Patches</h2>
 
 <p>
+  To participate in code review, you should monitor the
+  <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
+  mesa-dev</a> email list and the GitLab
+  Mesa <a href="https://gitlab.freedesktop.org/mesa/mesa/merge_requests">Merge
+  Requests</a> page.
+</p>
+
+<p>
 When you've reviewed a patch on the mailing list, please be unambiguous
 about your review.  That is, state either
 </p>
@@ -229,6 +299,29 @@
 as the issues are resolved first.
 </p>
 
+<p>
+These Reviewed-by, Acked-by, and Tested-by tags should also be amended
+into commits in a MR before it is merged.
+</p>
+
+<p>
+When providing a Reviewed-by, Acked-by, or Tested-by tag in a gitlab MR,
+enclose the tag in backticks:
+</p>
+<pre>
+  `Reviewed-by: Joe Hacker &lt;jhacker@example.com&gt;`</pre>
+<p>
+This is the markdown format for literal, and will prevent gitlab from hiding
+the &lt; and &gt; symbols.
+</p>
+
+<p>
+Review by non-experts is encouraged.  Understanding how someone else
+goes about solving a problem is a great way to learn your way around
+the project.  The submitter is expected to evaluate whether they have
+an appropriate amount of review feedback from people who also
+understand the code before merging their patches.
+</p>
 
 <h2 id="nominations">Nominating a commit for a stable branch</h2>
 
diff -Nru mesa-18.3.3/.editorconfig mesa-19.0.1/.editorconfig
--- mesa-18.3.3/.editorconfig	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/.editorconfig	2019-03-31 23:16:37.000000000 +0000
@@ -11,6 +11,7 @@
 [*.{c,h,cpp,hpp,cc,hh}]
 indent_style = space
 indent_size = 3
+max_line_length = 78
 
 [{Makefile*,*.mk}]
 indent_style = tab
diff -Nru mesa-18.3.3/include/drm-uapi/drm_fourcc.h mesa-19.0.1/include/drm-uapi/drm_fourcc.h
--- mesa-18.3.3/include/drm-uapi/drm_fourcc.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/include/drm-uapi/drm_fourcc.h	2019-03-31 23:16:37.000000000 +0000
@@ -298,6 +298,19 @@
  */
 #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE	fourcc_mod_code(SAMSUNG, 1)
 
+/*
+ * Qualcomm Compressed Format
+ *
+ * Refers to a compressed variant of the base format that is compressed.
+ * Implementation may be platform and base-format specific.
+ *
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+#define DRM_FORMAT_MOD_QCOM_COMPRESSED  fourcc_mod_code(QCOM, 1)
+
 /* Vivante framebuffer modifiers */
 
 /*
diff -Nru mesa-18.3.3/include/drm-uapi/v3d_drm.h mesa-19.0.1/include/drm-uapi/v3d_drm.h
--- mesa-18.3.3/include/drm-uapi/v3d_drm.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/include/drm-uapi/v3d_drm.h	2019-03-31 23:16:37.000000000 +0000
@@ -36,6 +36,7 @@
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -58,10 +60,15 @@
 	 * coordinate shader to determine where primitives land on the screen,
 	 * then writes out the state updates and draw calls necessary per tile
 	 * to the tile allocation BO.
+	 *
+	 * This BCL will block on any previous BCL submitted on the
+	 * same FD, but not on any RCL or BCLs submitted by other
+	 * clients -- that is left up to the submitter to control
+	 * using in_sync_bcl if necessary.
 	 */
 	__u32 bcl_start;
 
-	 /** End address of the BCL (first byte after the BCL) */
+	/** End address of the BCL (first byte after the BCL) */
 	__u32 bcl_end;
 
 	/* Offset of the render command list.
@@ -69,10 +76,15 @@
 	 * This is the second set of commands executed, which will either
 	 * execute the tiles that have been set up by the BCL, or a fixed set
 	 * of tiles (in the case of RCL-only blits).
+	 *
+	 * This RCL will block on this submit's BCL, and any previous
+	 * RCL submitted on the same FD, but not on any RCL or BCLs
+	 * submitted by other clients -- that is left up to the
+	 * submitter to control using in_sync_rcl if necessary.
 	 */
 	__u32 rcl_start;
 
-	 /** End address of the RCL (first byte after the RCL) */
+	/** End address of the RCL (first byte after the RCL) */
 	__u32 rcl_end;
 
 	/** An optional sync object to wait on before starting the BCL. */
@@ -169,6 +181,7 @@
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -187,6 +200,28 @@
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff -Nru mesa-18.3.3/include/EGL/eglext.h mesa-19.0.1/include/EGL/eglext.h
--- mesa-18.3.3/include/EGL/eglext.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/include/EGL/eglext.h	2019-03-31 23:16:37.000000000 +0000
@@ -28,17 +28,17 @@
 ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 */
 /*
-** This header is generated from the Khronos OpenGL / OpenGL ES XML
-** API Registry. The current version of the Registry, generator scripts
+** This header is generated from the Khronos EGL XML API Registry.
+** The current version of the Registry, generator scripts
 ** used to make the header, and the header can be found at
 **   http://www.khronos.org/registry/egl
 **
-** Khronos $Git commit SHA1: bae3518c48 $ on $Git commit date: 2018-05-17 10:56:57 -0700 $
+** Khronos $Git commit SHA1: 9ed2ec4c67 $ on $Git commit date: 2019-01-09 17:54:35 -0800 $
 */
 
 #include <EGL/eglplatform.h>
 
-#define EGL_EGLEXT_VERSION 20180517
+#define EGL_EGLEXT_VERSION 20190124
 
 /* Generated C header for:
  * API: egl
@@ -681,6 +681,7 @@
 #ifndef EGL_EXT_device_drm
 #define EGL_EXT_device_drm 1
 #define EGL_DRM_DEVICE_FILE_EXT           0x3233
+#define EGL_DRM_MASTER_FD_EXT             0x333C
 #endif /* EGL_EXT_device_drm */
 
 #ifndef EGL_EXT_device_enumeration
@@ -716,6 +717,11 @@
 #define EGL_GL_COLORSPACE_DISPLAY_P3_LINEAR_EXT 0x3362
 #endif /* EGL_EXT_gl_colorspace_display_p3_linear */
 
+#ifndef EGL_EXT_gl_colorspace_display_p3_passthrough
+#define EGL_EXT_gl_colorspace_display_p3_passthrough 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_PASSTHROUGH_EXT 0x3490
+#endif /* EGL_EXT_gl_colorspace_display_p3_passthrough */
+
 #ifndef EGL_EXT_gl_colorspace_scrgb
 #define EGL_EXT_gl_colorspace_scrgb 1
 #define EGL_GL_COLORSPACE_SCRGB_EXT       0x3351
@@ -1025,6 +1031,16 @@
 #define EGL_PLATFORM_SURFACELESS_MESA     0x31DD
 #endif /* EGL_MESA_platform_surfaceless */
 
+#ifndef EGL_MESA_query_driver
+#define EGL_MESA_query_driver 1
+typedef char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERCONFIGPROC) (EGLDisplay dpy);
+typedef const char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERNAMEPROC) (EGLDisplay dpy);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI char *EGLAPIENTRY eglGetDisplayDriverConfig (EGLDisplay dpy);
+EGLAPI const char *EGLAPIENTRY eglGetDisplayDriverName (EGLDisplay dpy);
+#endif
+#endif /* EGL_MESA_query_driver */
+
 #ifndef EGL_NOK_swap_region
 #define EGL_NOK_swap_region 1
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
diff -Nru mesa-18.3.3/include/EGL/egl.h mesa-19.0.1/include/EGL/egl.h
--- mesa-18.3.3/include/EGL/egl.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/include/EGL/egl.h	2019-03-31 23:16:37.000000000 +0000
@@ -28,17 +28,17 @@
 ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 */
 /*
-** This header is generated from the Khronos OpenGL / OpenGL ES XML
-** API Registry. The current version of the Registry, generator scripts
+** This header is generated from the Khronos EGL XML API Registry.
+** The current version of the Registry, generator scripts
 ** used to make the header, and the header can be found at
 **   http://www.khronos.org/registry/egl
 **
-** Khronos $Git commit SHA1: a732b061e7 $ on $Git commit date: 2017-06-17 23:27:53 +0100 $
+** Khronos $Git commit SHA1: 9ed2ec4c67 $ on $Git commit date: 2019-01-09 17:54:35 -0800 $
 */
 
 #include <EGL/eglplatform.h>
 
-/* Generated on date 20170627 */
+/* Generated on date 20190124 */
 
 /* Generated C header for:
  * API: egl
diff -Nru mesa-18.3.3/include/GL/internal/dri_interface.h mesa-19.0.1/include/GL/internal/dri_interface.h
--- mesa-18.3.3/include/GL/internal/dri_interface.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/include/GL/internal/dri_interface.h	2019-03-31 23:16:37.000000000 +0000
@@ -1344,6 +1344,7 @@
 #define __DRI_IMAGE_FOURCC_NV16		0x3631564e
 #define __DRI_IMAGE_FOURCC_YUYV		0x56595559
 #define __DRI_IMAGE_FOURCC_UYVY		0x59565955
+#define __DRI_IMAGE_FOURCC_AYUV		0x56555941
 
 #define __DRI_IMAGE_FOURCC_YVU410	0x39555659
 #define __DRI_IMAGE_FOURCC_YVU411	0x31315659
@@ -1370,6 +1371,7 @@
 #define __DRI_IMAGE_COMPONENTS_Y_UV	0x3004
 #define __DRI_IMAGE_COMPONENTS_Y_XUXV	0x3005
 #define __DRI_IMAGE_COMPONENTS_Y_UXVX	0x3008
+#define __DRI_IMAGE_COMPONENTS_AYUV	0x3009
 #define __DRI_IMAGE_COMPONENTS_R	0x3006
 #define __DRI_IMAGE_COMPONENTS_RG	0x3007
 
diff -Nru mesa-18.3.3/include/pci_ids/i965_pci_ids.h mesa-19.0.1/include/pci_ids/i965_pci_ids.h
--- mesa-18.3.3/include/pci_ids/i965_pci_ids.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/include/pci_ids/i965_pci_ids.h	2019-03-31 23:16:37.000000000 +0000
@@ -171,6 +171,7 @@
 CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E93, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
+CHIPSET(0x3E9C, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E91, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
 CHIPSET(0x3E92, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
 CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
@@ -203,6 +204,10 @@
 CHIPSET(0x8A50, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
 CHIPSET(0x8A51, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
 CHIPSET(0x8A52, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
+CHIPSET(0x8A56, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
+CHIPSET(0x8A57, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
+CHIPSET(0x8A58, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
+CHIPSET(0x8A59, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
 CHIPSET(0x8A5A, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
 CHIPSET(0x8A5B, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
 CHIPSET(0x8A5C, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
diff -Nru mesa-18.3.3/include/vulkan/vulkan_android.h mesa-19.0.1/include/vulkan/vulkan_android.h
--- mesa-18.3.3/include/vulkan/vulkan_android.h	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_android.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_core.h mesa-19.0.1/include/vulkan/vulkan_core.h
--- mesa-18.3.3/include/vulkan/vulkan_core.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_core.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
@@ -43,13 +43,12 @@
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 90
+#define VK_HEADER_VERSION 97
 
 
 #define VK_NULL_HANDLE 0
 
 
-
 #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object;
 
 
@@ -62,7 +61,6 @@
 #endif
 
 
-
 typedef uint32_t VkFlags;
 typedef uint32_t VkBool32;
 typedef uint64_t VkDeviceSize;
@@ -150,6 +148,7 @@
     VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000,
     VK_ERROR_FRAGMENTATION_EXT = -1000161000,
     VK_ERROR_NOT_PERMITTED_EXT = -1000174001,
+    VK_ERROR_INVALID_DEVICE_ADDRESS_EXT = -1000244000,
     VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY,
     VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE,
     VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
@@ -287,7 +286,6 @@
     VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000,
     VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000,
     VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000,
-    VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000,
     VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000,
     VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000,
     VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT = 1000011000,
@@ -330,6 +328,7 @@
     VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT = 1000081000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT = 1000081001,
     VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT = 1000081002,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR = 1000082000,
     VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR = 1000084000,
     VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000,
     VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001,
@@ -419,17 +418,17 @@
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_FEATURES_NV = 1000164001,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_PROPERTIES_NV = 1000164002,
     VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_COARSE_SAMPLE_ORDER_STATE_CREATE_INFO_NV = 1000164005,
-    VK_STRUCTURE_TYPE_RAYTRACING_PIPELINE_CREATE_INFO_NVX = 1000165000,
-    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NVX = 1000165001,
-    VK_STRUCTURE_TYPE_GEOMETRY_INSTANCE_NVX = 1000165002,
-    VK_STRUCTURE_TYPE_GEOMETRY_NVX = 1000165003,
-    VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NVX = 1000165004,
-    VK_STRUCTURE_TYPE_GEOMETRY_AABB_NVX = 1000165005,
-    VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NVX = 1000165006,
-    VK_STRUCTURE_TYPE_DESCRIPTOR_ACCELERATION_STRUCTURE_INFO_NVX = 1000165007,
-    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NVX = 1000165008,
-    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAYTRACING_PROPERTIES_NVX = 1000165009,
-    VK_STRUCTURE_TYPE_HIT_SHADER_MODULE_CREATE_INFO_NVX = 1000165010,
+    VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV = 1000165000,
+    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV = 1000165001,
+    VK_STRUCTURE_TYPE_GEOMETRY_NV = 1000165003,
+    VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV = 1000165004,
+    VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV = 1000165005,
+    VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV = 1000165006,
+    VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV = 1000165007,
+    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV = 1000165008,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV = 1000165009,
+    VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV = 1000165011,
+    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV = 1000165012,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_REPRESENTATIVE_FRAGMENT_TEST_FEATURES_NV = 1000166000,
     VK_STRUCTURE_TYPE_PIPELINE_REPRESENTATIVE_FRAGMENT_TEST_STATE_CREATE_INFO_NV = 1000166001,
     VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT = 1000174000,
@@ -440,10 +439,14 @@
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR = 1000180000,
     VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT = 1000184000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000,
+    VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD = 1000189000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000,
     VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 1000190001,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 1000190002,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR = 1000196000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = 1000197000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = 1000199000,
+    VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = 1000199001,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = 1000201000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001,
@@ -456,6 +459,18 @@
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 1000211000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 1000212000,
     VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT = 1000218000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 1000218001,
+    VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 1000218002,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = 1000221000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 1000238000,
+    VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = 1000244000,
+    VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = 1000244001,
+    VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002,
+    VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000,
+    VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000,
     VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
     VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
@@ -848,7 +863,7 @@
     VK_QUERY_TYPE_PIPELINE_STATISTICS = 1,
     VK_QUERY_TYPE_TIMESTAMP = 2,
     VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT = 1000028004,
-    VK_QUERY_TYPE_COMPACTED_SIZE_NVX = 1000165000,
+    VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV = 1000165000,
     VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION,
     VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP,
     VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1),
@@ -879,6 +894,7 @@
     VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002,
     VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000,
     VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV = 1000164003,
+    VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT = 1000218000,
     VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
     VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
     VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED,
@@ -1178,7 +1194,7 @@
     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9,
     VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10,
     VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT = 1000138000,
-    VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NVX = 1000165000,
+    VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV = 1000165000,
     VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER,
     VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
     VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1),
@@ -1207,7 +1223,7 @@
 typedef enum VkPipelineBindPoint {
     VK_PIPELINE_BIND_POINT_GRAPHICS = 0,
     VK_PIPELINE_BIND_POINT_COMPUTE = 1,
-    VK_PIPELINE_BIND_POINT_RAYTRACING_NVX = 1000165000,
+    VK_PIPELINE_BIND_POINT_RAY_TRACING_NV = 1000165000,
     VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS,
     VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE,
     VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1),
@@ -1226,6 +1242,7 @@
 typedef enum VkIndexType {
     VK_INDEX_TYPE_UINT16 = 0,
     VK_INDEX_TYPE_UINT32 = 1,
+    VK_INDEX_TYPE_NONE_NV = 1000165000,
     VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16,
     VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32,
     VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1),
@@ -1279,7 +1296,7 @@
     VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001,
     VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT = 1000128000,
     VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000,
-    VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX = 1000165000,
+    VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV = 1000165000,
     VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE,
     VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION,
     VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN,
@@ -1325,6 +1342,7 @@
     VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT = 0x00800000,
     VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000,
     VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = 0x00010000,
+    VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x01000000,
     VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT,
     VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_DST_BIT,
     VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT_KHR = VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT,
@@ -1348,6 +1366,7 @@
     VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040,
     VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080,
     VK_IMAGE_USAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00000100,
+    VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x00000200,
     VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkImageUsageFlagBits;
 typedef VkFlags VkImageUsageFlags;
@@ -1367,6 +1386,7 @@
     VK_IMAGE_CREATE_DISJOINT_BIT = 0x00000200,
     VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000,
     VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000,
+    VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000,
     VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT,
     VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT,
     VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT,
@@ -1447,9 +1467,11 @@
     VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00040000,
     VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000,
     VK_PIPELINE_STAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00400000,
-    VK_PIPELINE_STAGE_RAYTRACING_BIT_NVX = 0x00200000,
+    VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV = 0x00200000,
+    VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV = 0x02000000,
     VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV = 0x00080000,
     VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV = 0x00100000,
+    VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT = 0x00800000,
     VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkPipelineStageFlagBits;
 typedef VkFlags VkPipelineStageFlags;
@@ -1527,6 +1549,7 @@
     VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002,
     VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
     VK_BUFFER_CREATE_PROTECTED_BIT = 0x00000008,
+    VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT = 0x00000010,
     VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkBufferCreateFlagBits;
 typedef VkFlags VkBufferCreateFlags;
@@ -1544,11 +1567,17 @@
     VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT = 0x00000800,
     VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT = 0x00001000,
     VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200,
-    VK_BUFFER_USAGE_RAYTRACING_BIT_NVX = 0x00000400,
+    VK_BUFFER_USAGE_RAY_TRACING_BIT_NV = 0x00000400,
+    VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT = 0x00020000,
     VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkBufferUsageFlagBits;
 typedef VkFlags VkBufferUsageFlags;
 typedef VkFlags VkBufferViewCreateFlags;
+
+typedef enum VkImageViewCreateFlagBits {
+    VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT = 0x00000001,
+    VK_IMAGE_VIEW_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkImageViewCreateFlagBits;
 typedef VkFlags VkImageViewCreateFlags;
 typedef VkFlags VkShaderModuleCreateFlags;
 typedef VkFlags VkPipelineCacheCreateFlags;
@@ -1559,7 +1588,7 @@
     VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004,
     VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT = 0x00000008,
     VK_PIPELINE_CREATE_DISPATCH_BASE = 0x00000010,
-    VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NVX = 0x00000020,
+    VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV = 0x00000020,
     VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT,
     VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE,
     VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
@@ -1576,12 +1605,12 @@
     VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020,
     VK_SHADER_STAGE_ALL_GRAPHICS = 0x0000001F,
     VK_SHADER_STAGE_ALL = 0x7FFFFFFF,
-    VK_SHADER_STAGE_RAYGEN_BIT_NVX = 0x00000100,
-    VK_SHADER_STAGE_ANY_HIT_BIT_NVX = 0x00000200,
-    VK_SHADER_STAGE_CLOSEST_HIT_BIT_NVX = 0x00000400,
-    VK_SHADER_STAGE_MISS_BIT_NVX = 0x00000800,
-    VK_SHADER_STAGE_INTERSECTION_BIT_NVX = 0x00001000,
-    VK_SHADER_STAGE_CALLABLE_BIT_NVX = 0x00002000,
+    VK_SHADER_STAGE_RAYGEN_BIT_NV = 0x00000100,
+    VK_SHADER_STAGE_ANY_HIT_BIT_NV = 0x00000200,
+    VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV = 0x00000400,
+    VK_SHADER_STAGE_MISS_BIT_NV = 0x00000800,
+    VK_SHADER_STAGE_INTERSECTION_BIT_NV = 0x00001000,
+    VK_SHADER_STAGE_CALLABLE_BIT_NV = 0x00002000,
     VK_SHADER_STAGE_TASK_BIT_NV = 0x00000040,
     VK_SHADER_STAGE_MESH_BIT_NV = 0x00000080,
     VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
@@ -1615,6 +1644,12 @@
 typedef VkFlags VkPipelineDynamicStateCreateFlags;
 typedef VkFlags VkPipelineLayoutCreateFlags;
 typedef VkFlags VkShaderStageFlags;
+
+typedef enum VkSamplerCreateFlagBits {
+    VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT = 0x00000001,
+    VK_SAMPLER_CREATE_SUBSAMPLED_COARSE_RECONSTRUCTION_BIT_EXT = 0x00000002,
+    VK_SAMPLER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkSamplerCreateFlagBits;
 typedef VkFlags VkSamplerCreateFlags;
 
 typedef enum VkDescriptorSetLayoutCreateFlagBits {
@@ -1673,8 +1708,9 @@
     VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x00040000,
     VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000,
     VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV = 0x00800000,
-    VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NVX = 0x00200000,
-    VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NVX = 0x00400000,
+    VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV = 0x00200000,
+    VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV = 0x00400000,
+    VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT = 0x01000000,
     VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkAccessFlagBits;
 typedef VkFlags VkAccessFlags;
@@ -4355,7 +4391,7 @@
 
 typedef struct VkDescriptorUpdateTemplateCreateInfo {
     VkStructureType                           sType;
-    void*                                     pNext;
+    const void*                               pNext;
     VkDescriptorUpdateTemplateCreateFlags     flags;
     uint32_t                                  descriptorUpdateEntryCount;
     const VkDescriptorUpdateTemplateEntry*    pDescriptorUpdateEntries;
@@ -4794,6 +4830,7 @@
 typedef enum VkSwapchainCreateFlagBitsKHR {
     VK_SWAPCHAIN_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = 0x00000001,
     VK_SWAPCHAIN_CREATE_PROTECTED_BIT_KHR = 0x00000002,
+    VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR = 0x00000004,
     VK_SWAPCHAIN_CREATE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkSwapchainCreateFlagBitsKHR;
 typedef VkFlags VkSwapchainCreateFlagsKHR;
@@ -5470,6 +5507,19 @@
     const void*                                 pData);
 #endif
 
+#define VK_KHR_shader_float16_int8 1
+#define VK_KHR_SHADER_FLOAT16_INT8_SPEC_VERSION 1
+#define VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME "VK_KHR_shader_float16_int8"
+
+typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           shaderFloat16;
+    VkBool32           shaderInt8;
+} VkPhysicalDeviceFloat16Int8FeaturesKHR;
+
+
+
 #define VK_KHR_16bit_storage 1
 #define VK_KHR_16BIT_STORAGE_SPEC_VERSION 1
 #define VK_KHR_16BIT_STORAGE_EXTENSION_NAME "VK_KHR_16bit_storage"
@@ -6101,9 +6151,10 @@
     VK_DRIVER_ID_IMAGINATION_PROPRIETARY_KHR = 7,
     VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR = 8,
     VK_DRIVER_ID_ARM_PROPRIETARY_KHR = 9,
+    VK_DRIVER_ID_GOOGLE_PASTEL_KHR = 10,
     VK_DRIVER_ID_BEGIN_RANGE_KHR = VK_DRIVER_ID_AMD_PROPRIETARY_KHR,
-    VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_ARM_PROPRIETARY_KHR,
-    VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_ARM_PROPRIETARY_KHR - VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1),
+    VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_GOOGLE_PASTEL_KHR,
+    VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_GOOGLE_PASTEL_KHR - VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1),
     VK_DRIVER_ID_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkDriverIdKHR;
 
@@ -6125,6 +6176,73 @@
 
 
 
+#define VK_KHR_shader_float_controls 1
+#define VK_KHR_SHADER_FLOAT_CONTROLS_SPEC_VERSION 1
+#define VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME "VK_KHR_shader_float_controls"
+
+typedef struct VkPhysicalDeviceFloatControlsPropertiesKHR {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           separateDenormSettings;
+    VkBool32           separateRoundingModeSettings;
+    VkBool32           shaderSignedZeroInfNanPreserveFloat16;
+    VkBool32           shaderSignedZeroInfNanPreserveFloat32;
+    VkBool32           shaderSignedZeroInfNanPreserveFloat64;
+    VkBool32           shaderDenormPreserveFloat16;
+    VkBool32           shaderDenormPreserveFloat32;
+    VkBool32           shaderDenormPreserveFloat64;
+    VkBool32           shaderDenormFlushToZeroFloat16;
+    VkBool32           shaderDenormFlushToZeroFloat32;
+    VkBool32           shaderDenormFlushToZeroFloat64;
+    VkBool32           shaderRoundingModeRTEFloat16;
+    VkBool32           shaderRoundingModeRTEFloat32;
+    VkBool32           shaderRoundingModeRTEFloat64;
+    VkBool32           shaderRoundingModeRTZFloat16;
+    VkBool32           shaderRoundingModeRTZFloat32;
+    VkBool32           shaderRoundingModeRTZFloat64;
+} VkPhysicalDeviceFloatControlsPropertiesKHR;
+
+
+
+#define VK_KHR_depth_stencil_resolve 1
+#define VK_KHR_DEPTH_STENCIL_RESOLVE_SPEC_VERSION 1
+#define VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME "VK_KHR_depth_stencil_resolve"
+
+
+typedef enum VkResolveModeFlagBitsKHR {
+    VK_RESOLVE_MODE_NONE_KHR = 0,
+    VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR = 0x00000001,
+    VK_RESOLVE_MODE_AVERAGE_BIT_KHR = 0x00000002,
+    VK_RESOLVE_MODE_MIN_BIT_KHR = 0x00000004,
+    VK_RESOLVE_MODE_MAX_BIT_KHR = 0x00000008,
+    VK_RESOLVE_MODE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF
+} VkResolveModeFlagBitsKHR;
+typedef VkFlags VkResolveModeFlagsKHR;
+
+typedef struct VkSubpassDescriptionDepthStencilResolveKHR {
+    VkStructureType                     sType;
+    const void*                         pNext;
+    VkResolveModeFlagBitsKHR            depthResolveMode;
+    VkResolveModeFlagBitsKHR            stencilResolveMode;
+    const VkAttachmentReference2KHR*    pDepthStencilResolveAttachment;
+} VkSubpassDescriptionDepthStencilResolveKHR;
+
+typedef struct VkPhysicalDeviceDepthStencilResolvePropertiesKHR {
+    VkStructureType          sType;
+    void*                    pNext;
+    VkResolveModeFlagsKHR    supportedDepthResolveModes;
+    VkResolveModeFlagsKHR    supportedStencilResolveModes;
+    VkBool32                 independentResolveNone;
+    VkBool32                 independentResolve;
+} VkPhysicalDeviceDepthStencilResolvePropertiesKHR;
+
+
+
+#define VK_KHR_swapchain_mutable_format 1
+#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_SPEC_VERSION 1
+#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME "VK_KHR_swapchain_mutable_format"
+
+
 #define VK_KHR_vulkan_memory_model 1
 #define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 2
 #define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model"
@@ -6182,7 +6300,7 @@
     VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33,
     VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT = 1000156000,
     VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT = 1000085000,
-    VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX_EXT = 1000165000,
+    VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT = 1000165000,
     VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT,
     VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT,
     VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT,
@@ -7446,11 +7564,11 @@
     int32_t                                      messageIdNumber;
     const char*                                  pMessage;
     uint32_t                                     queueLabelCount;
-    VkDebugUtilsLabelEXT*                        pQueueLabels;
+    const VkDebugUtilsLabelEXT*                  pQueueLabels;
     uint32_t                                     cmdBufLabelCount;
-    VkDebugUtilsLabelEXT*                        pCmdBufLabels;
+    const VkDebugUtilsLabelEXT*                  pCmdBufLabels;
     uint32_t                                     objectCount;
-    VkDebugUtilsObjectNameInfoEXT*               pObjects;
+    const VkDebugUtilsObjectNameInfoEXT*         pObjects;
 } VkDebugUtilsMessengerCallbackDataEXT;
 
 typedef VkBool32 (VKAPI_PTR *PFN_vkDebugUtilsMessengerCallbackEXT)(
@@ -7791,8 +7909,6 @@
 
 
 #define VK_EXT_image_drm_format_modifier 1
-#define VK_EXT_EXTENSION_159_SPEC_VERSION 0
-#define VK_EXT_EXTENSION_159_EXTENSION_NAME "VK_EXT_extension_159"
 #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION 1
 #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME "VK_EXT_image_drm_format_modifier"
 
@@ -8113,81 +8229,113 @@
     const VkCoarseSampleOrderCustomNV*          pCustomSampleOrders);
 #endif
 
-#define VK_NVX_raytracing 1
-VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureNVX)
+#define VK_NV_ray_tracing 1
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureNV)
 
-#define VK_NVX_RAYTRACING_SPEC_VERSION    1
-#define VK_NVX_RAYTRACING_EXTENSION_NAME  "VK_NVX_raytracing"
+#define VK_NV_RAY_TRACING_SPEC_VERSION    3
+#define VK_NV_RAY_TRACING_EXTENSION_NAME  "VK_NV_ray_tracing"
+#define VK_SHADER_UNUSED_NV               (~0U)
+
+
+typedef enum VkRayTracingShaderGroupTypeNV {
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV = 0,
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV = 1,
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV = 2,
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_BEGIN_RANGE_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV,
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_END_RANGE_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV,
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_RANGE_SIZE_NV = (VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV - VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV + 1),
+    VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkRayTracingShaderGroupTypeNV;
+
+typedef enum VkGeometryTypeNV {
+    VK_GEOMETRY_TYPE_TRIANGLES_NV = 0,
+    VK_GEOMETRY_TYPE_AABBS_NV = 1,
+    VK_GEOMETRY_TYPE_BEGIN_RANGE_NV = VK_GEOMETRY_TYPE_TRIANGLES_NV,
+    VK_GEOMETRY_TYPE_END_RANGE_NV = VK_GEOMETRY_TYPE_AABBS_NV,
+    VK_GEOMETRY_TYPE_RANGE_SIZE_NV = (VK_GEOMETRY_TYPE_AABBS_NV - VK_GEOMETRY_TYPE_TRIANGLES_NV + 1),
+    VK_GEOMETRY_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkGeometryTypeNV;
+
+typedef enum VkAccelerationStructureTypeNV {
+    VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV = 0,
+    VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV = 1,
+    VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV,
+    VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV,
+    VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV + 1),
+    VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkAccelerationStructureTypeNV;
+
+typedef enum VkCopyAccelerationStructureModeNV {
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV = 0,
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV = 1,
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_BEGIN_RANGE_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV,
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_END_RANGE_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV,
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_RANGE_SIZE_NV = (VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV - VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV + 1),
+    VK_COPY_ACCELERATION_STRUCTURE_MODE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkCopyAccelerationStructureModeNV;
+
+typedef enum VkAccelerationStructureMemoryRequirementsTypeNV {
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV = 0,
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV = 1,
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV = 2,
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV,
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV,
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV - VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + 1),
+    VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkAccelerationStructureMemoryRequirementsTypeNV;
+
+
+typedef enum VkGeometryFlagBitsNV {
+    VK_GEOMETRY_OPAQUE_BIT_NV = 0x00000001,
+    VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NV = 0x00000002,
+    VK_GEOMETRY_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF
+} VkGeometryFlagBitsNV;
+typedef VkFlags VkGeometryFlagsNV;
+
+typedef enum VkGeometryInstanceFlagBitsNV {
+    VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV = 0x00000001,
+    VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV = 0x00000002,
+    VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NV = 0x00000004,
+    VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NV = 0x00000008,
+    VK_GEOMETRY_INSTANCE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF
+} VkGeometryInstanceFlagBitsNV;
+typedef VkFlags VkGeometryInstanceFlagsNV;
+
+typedef enum VkBuildAccelerationStructureFlagBitsNV {
+    VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV = 0x00000001,
+    VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV = 0x00000002,
+    VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV = 0x00000004,
+    VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV = 0x00000008,
+    VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NV = 0x00000010,
+    VK_BUILD_ACCELERATION_STRUCTURE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF
+} VkBuildAccelerationStructureFlagBitsNV;
+typedef VkFlags VkBuildAccelerationStructureFlagsNV;
 
+typedef struct VkRayTracingShaderGroupCreateInfoNV {
+    VkStructureType                  sType;
+    const void*                      pNext;
+    VkRayTracingShaderGroupTypeNV    type;
+    uint32_t                         generalShader;
+    uint32_t                         closestHitShader;
+    uint32_t                         anyHitShader;
+    uint32_t                         intersectionShader;
+} VkRayTracingShaderGroupCreateInfoNV;
 
-typedef enum VkGeometryTypeNVX {
-    VK_GEOMETRY_TYPE_TRIANGLES_NVX = 0,
-    VK_GEOMETRY_TYPE_AABBS_NVX = 1,
-    VK_GEOMETRY_TYPE_BEGIN_RANGE_NVX = VK_GEOMETRY_TYPE_TRIANGLES_NVX,
-    VK_GEOMETRY_TYPE_END_RANGE_NVX = VK_GEOMETRY_TYPE_AABBS_NVX,
-    VK_GEOMETRY_TYPE_RANGE_SIZE_NVX = (VK_GEOMETRY_TYPE_AABBS_NVX - VK_GEOMETRY_TYPE_TRIANGLES_NVX + 1),
-    VK_GEOMETRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkGeometryTypeNVX;
-
-typedef enum VkAccelerationStructureTypeNVX {
-    VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX = 0,
-    VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX = 1,
-    VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NVX = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX,
-    VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NVX = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX,
-    VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NVX = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX + 1),
-    VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkAccelerationStructureTypeNVX;
-
-typedef enum VkCopyAccelerationStructureModeNVX {
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX = 0,
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX = 1,
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_BEGIN_RANGE_NVX = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX,
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_END_RANGE_NVX = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX,
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_RANGE_SIZE_NVX = (VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX - VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX + 1),
-    VK_COPY_ACCELERATION_STRUCTURE_MODE_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkCopyAccelerationStructureModeNVX;
-
-
-typedef enum VkGeometryFlagBitsNVX {
-    VK_GEOMETRY_OPAQUE_BIT_NVX = 0x00000001,
-    VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NVX = 0x00000002,
-    VK_GEOMETRY_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkGeometryFlagBitsNVX;
-typedef VkFlags VkGeometryFlagsNVX;
-
-typedef enum VkGeometryInstanceFlagBitsNVX {
-    VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NVX = 0x00000001,
-    VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_FLIP_WINDING_BIT_NVX = 0x00000002,
-    VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NVX = 0x00000004,
-    VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NVX = 0x00000008,
-    VK_GEOMETRY_INSTANCE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkGeometryInstanceFlagBitsNVX;
-typedef VkFlags VkGeometryInstanceFlagsNVX;
-
-typedef enum VkBuildAccelerationStructureFlagBitsNVX {
-    VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NVX = 0x00000001,
-    VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NVX = 0x00000002,
-    VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NVX = 0x00000004,
-    VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NVX = 0x00000008,
-    VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NVX = 0x00000010,
-    VK_BUILD_ACCELERATION_STRUCTURE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF
-} VkBuildAccelerationStructureFlagBitsNVX;
-typedef VkFlags VkBuildAccelerationStructureFlagsNVX;
-
-typedef struct VkRaytracingPipelineCreateInfoNVX {
-    VkStructureType                           sType;
-    const void*                               pNext;
-    VkPipelineCreateFlags                     flags;
-    uint32_t                                  stageCount;
-    const VkPipelineShaderStageCreateInfo*    pStages;
-    const uint32_t*                           pGroupNumbers;
-    uint32_t                                  maxRecursionDepth;
-    VkPipelineLayout                          layout;
-    VkPipeline                                basePipelineHandle;
-    int32_t                                   basePipelineIndex;
-} VkRaytracingPipelineCreateInfoNVX;
+typedef struct VkRayTracingPipelineCreateInfoNV {
+    VkStructureType                               sType;
+    const void*                                   pNext;
+    VkPipelineCreateFlags                         flags;
+    uint32_t                                      stageCount;
+    const VkPipelineShaderStageCreateInfo*        pStages;
+    uint32_t                                      groupCount;
+    const VkRayTracingShaderGroupCreateInfoNV*    pGroups;
+    uint32_t                                      maxRecursionDepth;
+    VkPipelineLayout                              layout;
+    VkPipeline                                    basePipelineHandle;
+    int32_t                                       basePipelineIndex;
+} VkRayTracingPipelineCreateInfoNV;
 
-typedef struct VkGeometryTrianglesNVX {
+typedef struct VkGeometryTrianglesNV {
     VkStructureType    sType;
     const void*        pNext;
     VkBuffer           vertexData;
@@ -8201,136 +8349,138 @@
     VkIndexType        indexType;
     VkBuffer           transformData;
     VkDeviceSize       transformOffset;
-} VkGeometryTrianglesNVX;
+} VkGeometryTrianglesNV;
 
-typedef struct VkGeometryAABBNVX {
+typedef struct VkGeometryAABBNV {
     VkStructureType    sType;
     const void*        pNext;
     VkBuffer           aabbData;
     uint32_t           numAABBs;
     uint32_t           stride;
     VkDeviceSize       offset;
-} VkGeometryAABBNVX;
+} VkGeometryAABBNV;
 
-typedef struct VkGeometryDataNVX {
-    VkGeometryTrianglesNVX    triangles;
-    VkGeometryAABBNVX         aabbs;
-} VkGeometryDataNVX;
+typedef struct VkGeometryDataNV {
+    VkGeometryTrianglesNV    triangles;
+    VkGeometryAABBNV         aabbs;
+} VkGeometryDataNV;
 
-typedef struct VkGeometryNVX {
-    VkStructureType       sType;
-    const void*           pNext;
-    VkGeometryTypeNVX     geometryType;
-    VkGeometryDataNVX     geometry;
-    VkGeometryFlagsNVX    flags;
-} VkGeometryNVX;
+typedef struct VkGeometryNV {
+    VkStructureType      sType;
+    const void*          pNext;
+    VkGeometryTypeNV     geometryType;
+    VkGeometryDataNV     geometry;
+    VkGeometryFlagsNV    flags;
+} VkGeometryNV;
 
-typedef struct VkAccelerationStructureCreateInfoNVX {
-    VkStructureType                         sType;
-    const void*                             pNext;
-    VkAccelerationStructureTypeNVX          type;
-    VkBuildAccelerationStructureFlagsNVX    flags;
-    VkDeviceSize                            compactedSize;
-    uint32_t                                instanceCount;
-    uint32_t                                geometryCount;
-    const VkGeometryNVX*                    pGeometries;
-} VkAccelerationStructureCreateInfoNVX;
+typedef struct VkAccelerationStructureInfoNV {
+    VkStructureType                        sType;
+    const void*                            pNext;
+    VkAccelerationStructureTypeNV          type;
+    VkBuildAccelerationStructureFlagsNV    flags;
+    uint32_t                               instanceCount;
+    uint32_t                               geometryCount;
+    const VkGeometryNV*                    pGeometries;
+} VkAccelerationStructureInfoNV;
 
-typedef struct VkBindAccelerationStructureMemoryInfoNVX {
-    VkStructureType               sType;
-    const void*                   pNext;
-    VkAccelerationStructureNVX    accelerationStructure;
-    VkDeviceMemory                memory;
-    VkDeviceSize                  memoryOffset;
-    uint32_t                      deviceIndexCount;
-    const uint32_t*               pDeviceIndices;
-} VkBindAccelerationStructureMemoryInfoNVX;
+typedef struct VkAccelerationStructureCreateInfoNV {
+    VkStructureType                  sType;
+    const void*                      pNext;
+    VkDeviceSize                     compactedSize;
+    VkAccelerationStructureInfoNV    info;
+} VkAccelerationStructureCreateInfoNV;
 
-typedef struct VkDescriptorAccelerationStructureInfoNVX {
-    VkStructureType                      sType;
-    const void*                          pNext;
-    uint32_t                             accelerationStructureCount;
-    const VkAccelerationStructureNVX*    pAccelerationStructures;
-} VkDescriptorAccelerationStructureInfoNVX;
+typedef struct VkBindAccelerationStructureMemoryInfoNV {
+    VkStructureType              sType;
+    const void*                  pNext;
+    VkAccelerationStructureNV    accelerationStructure;
+    VkDeviceMemory               memory;
+    VkDeviceSize                 memoryOffset;
+    uint32_t                     deviceIndexCount;
+    const uint32_t*              pDeviceIndices;
+} VkBindAccelerationStructureMemoryInfoNV;
 
-typedef struct VkAccelerationStructureMemoryRequirementsInfoNVX {
-    VkStructureType               sType;
-    const void*                   pNext;
-    VkAccelerationStructureNVX    accelerationStructure;
-} VkAccelerationStructureMemoryRequirementsInfoNVX;
+typedef struct VkWriteDescriptorSetAccelerationStructureNV {
+    VkStructureType                     sType;
+    const void*                         pNext;
+    uint32_t                            accelerationStructureCount;
+    const VkAccelerationStructureNV*    pAccelerationStructures;
+} VkWriteDescriptorSetAccelerationStructureNV;
+
+typedef struct VkAccelerationStructureMemoryRequirementsInfoNV {
+    VkStructureType                                    sType;
+    const void*                                        pNext;
+    VkAccelerationStructureMemoryRequirementsTypeNV    type;
+    VkAccelerationStructureNV                          accelerationStructure;
+} VkAccelerationStructureMemoryRequirementsInfoNV;
 
-typedef struct VkPhysicalDeviceRaytracingPropertiesNVX {
+typedef struct VkPhysicalDeviceRayTracingPropertiesNV {
     VkStructureType    sType;
     void*              pNext;
-    uint32_t           shaderHeaderSize;
+    uint32_t           shaderGroupHandleSize;
     uint32_t           maxRecursionDepth;
-    uint32_t           maxGeometryCount;
-} VkPhysicalDeviceRaytracingPropertiesNVX;
-
-
-typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureNVX)(VkDevice device, const VkAccelerationStructureCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureNVX* pAccelerationStructure);
-typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureNVX)(VkDevice device, VkAccelerationStructureNVX accelerationStructure, const VkAllocationCallbacks* pAllocator);
-typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsNVX)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements);
-typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureScratchMemoryRequirementsNVX)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements);
-typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryNVX)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoNVX* pBindInfos);
-typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureTypeNVX type, uint32_t instanceCount, VkBuffer instanceData, VkDeviceSize instanceOffset, uint32_t geometryCount, const VkGeometryNVX* pGeometries, VkBuildAccelerationStructureFlagsNVX flags, VkBool32 update, VkAccelerationStructureNVX dst, VkAccelerationStructureNVX src, VkBuffer scratch, VkDeviceSize scratchOffset);
-typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureNVX dst, VkAccelerationStructureNVX src, VkCopyAccelerationStructureModeNVX mode);
-typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysNVX)(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer, VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride, VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, uint32_t width, uint32_t height);
-typedef VkResult (VKAPI_PTR *PFN_vkCreateRaytracingPipelinesNVX)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRaytracingPipelineCreateInfoNVX* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines);
-typedef VkResult (VKAPI_PTR *PFN_vkGetRaytracingShaderHandlesNVX)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData);
-typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureHandleNVX)(VkDevice device, VkAccelerationStructureNVX accelerationStructure, size_t dataSize, void* pData);
-typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructurePropertiesNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureNVX accelerationStructure, VkQueryType queryType, VkQueryPool queryPool, uint32_t query);
-typedef VkResult (VKAPI_PTR *PFN_vkCompileDeferredNVX)(VkDevice device, VkPipeline pipeline, uint32_t shader);
+    uint32_t           maxShaderGroupStride;
+    uint32_t           shaderGroupBaseAlignment;
+    uint64_t           maxGeometryCount;
+    uint64_t           maxInstanceCount;
+    uint64_t           maxTriangleCount;
+    uint32_t           maxDescriptorSetAccelerationStructures;
+} VkPhysicalDeviceRayTracingPropertiesNV;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureNV)(VkDevice device, const VkAccelerationStructureCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureNV* pAccelerationStructure);
+typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureNV)(VkDevice device, VkAccelerationStructureNV accelerationStructure, const VkAllocationCallbacks* pAllocator);
+typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsNV)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements);
+typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryNV)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoNV* pBindInfos);
+typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureNV)(VkCommandBuffer commandBuffer, const VkAccelerationStructureInfoNV* pInfo, VkBuffer instanceData, VkDeviceSize instanceOffset, VkBool32 update, VkAccelerationStructureNV dst, VkAccelerationStructureNV src, VkBuffer scratch, VkDeviceSize scratchOffset);
+typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureNV)(VkCommandBuffer commandBuffer, VkAccelerationStructureNV dst, VkAccelerationStructureNV src, VkCopyAccelerationStructureModeNV mode);
+typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysNV)(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer, VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride, VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer, VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride, uint32_t width, uint32_t height, uint32_t depth);
+typedef VkResult (VKAPI_PTR *PFN_vkCreateRayTracingPipelinesNV)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoNV* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines);
+typedef VkResult (VKAPI_PTR *PFN_vkGetRayTracingShaderGroupHandlesNV)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData);
+typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureHandleNV)(VkDevice device, VkAccelerationStructureNV accelerationStructure, size_t dataSize, void* pData);
+typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructuresPropertiesNV)(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, const VkAccelerationStructureNV* pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery);
+typedef VkResult (VKAPI_PTR *PFN_vkCompileDeferredNV)(VkDevice device, VkPipeline pipeline, uint32_t shader);
 
 #ifndef VK_NO_PROTOTYPES
-VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureNV(
     VkDevice                                    device,
-    const VkAccelerationStructureCreateInfoNVX* pCreateInfo,
+    const VkAccelerationStructureCreateInfoNV*  pCreateInfo,
     const VkAllocationCallbacks*                pAllocator,
-    VkAccelerationStructureNVX*                 pAccelerationStructure);
+    VkAccelerationStructureNV*                  pAccelerationStructure);
 
-VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureNVX(
+VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureNV(
     VkDevice                                    device,
-    VkAccelerationStructureNVX                  accelerationStructure,
+    VkAccelerationStructureNV                   accelerationStructure,
     const VkAllocationCallbacks*                pAllocator);
 
-VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsNVX(
-    VkDevice                                    device,
-    const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo,
-    VkMemoryRequirements2KHR*                   pMemoryRequirements);
-
-VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureScratchMemoryRequirementsNVX(
+VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsNV(
     VkDevice                                    device,
-    const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo,
+    const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo,
     VkMemoryRequirements2KHR*                   pMemoryRequirements);
 
-VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryNV(
     VkDevice                                    device,
     uint32_t                                    bindInfoCount,
-    const VkBindAccelerationStructureMemoryInfoNVX* pBindInfos);
+    const VkBindAccelerationStructureMemoryInfoNV* pBindInfos);
 
-VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureNVX(
+VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureNV(
     VkCommandBuffer                             commandBuffer,
-    VkAccelerationStructureTypeNVX              type,
-    uint32_t                                    instanceCount,
+    const VkAccelerationStructureInfoNV*        pInfo,
     VkBuffer                                    instanceData,
     VkDeviceSize                                instanceOffset,
-    uint32_t                                    geometryCount,
-    const VkGeometryNVX*                        pGeometries,
-    VkBuildAccelerationStructureFlagsNVX        flags,
     VkBool32                                    update,
-    VkAccelerationStructureNVX                  dst,
-    VkAccelerationStructureNVX                  src,
+    VkAccelerationStructureNV                   dst,
+    VkAccelerationStructureNV                   src,
     VkBuffer                                    scratch,
     VkDeviceSize                                scratchOffset);
 
-VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureNVX(
+VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureNV(
     VkCommandBuffer                             commandBuffer,
-    VkAccelerationStructureNVX                  dst,
-    VkAccelerationStructureNVX                  src,
-    VkCopyAccelerationStructureModeNVX          mode);
+    VkAccelerationStructureNV                   dst,
+    VkAccelerationStructureNV                   src,
+    VkCopyAccelerationStructureModeNV           mode);
 
-VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysNVX(
+VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysNV(
     VkCommandBuffer                             commandBuffer,
     VkBuffer                                    raygenShaderBindingTableBuffer,
     VkDeviceSize                                raygenShaderBindingOffset,
@@ -8340,18 +8490,22 @@
     VkBuffer                                    hitShaderBindingTableBuffer,
     VkDeviceSize                                hitShaderBindingOffset,
     VkDeviceSize                                hitShaderBindingStride,
+    VkBuffer                                    callableShaderBindingTableBuffer,
+    VkDeviceSize                                callableShaderBindingOffset,
+    VkDeviceSize                                callableShaderBindingStride,
     uint32_t                                    width,
-    uint32_t                                    height);
+    uint32_t                                    height,
+    uint32_t                                    depth);
 
-VKAPI_ATTR VkResult VKAPI_CALL vkCreateRaytracingPipelinesNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesNV(
     VkDevice                                    device,
     VkPipelineCache                             pipelineCache,
     uint32_t                                    createInfoCount,
-    const VkRaytracingPipelineCreateInfoNVX*    pCreateInfos,
+    const VkRayTracingPipelineCreateInfoNV*     pCreateInfos,
     const VkAllocationCallbacks*                pAllocator,
     VkPipeline*                                 pPipelines);
 
-VKAPI_ATTR VkResult VKAPI_CALL vkGetRaytracingShaderHandlesNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkGetRayTracingShaderGroupHandlesNV(
     VkDevice                                    device,
     VkPipeline                                  pipeline,
     uint32_t                                    firstGroup,
@@ -8359,20 +8513,21 @@
     size_t                                      dataSize,
     void*                                       pData);
 
-VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureHandleNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureHandleNV(
     VkDevice                                    device,
-    VkAccelerationStructureNVX                  accelerationStructure,
+    VkAccelerationStructureNV                   accelerationStructure,
     size_t                                      dataSize,
     void*                                       pData);
 
-VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructurePropertiesNVX(
+VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructuresPropertiesNV(
     VkCommandBuffer                             commandBuffer,
-    VkAccelerationStructureNVX                  accelerationStructure,
+    uint32_t                                    accelerationStructureCount,
+    const VkAccelerationStructureNV*            pAccelerationStructures,
     VkQueryType                                 queryType,
     VkQueryPool                                 queryPool,
-    uint32_t                                    query);
+    uint32_t                                    firstQuery);
 
-VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNVX(
+VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNV(
     VkDevice                                    device,
     VkPipeline                                  pipeline,
     uint32_t                                    shader);
@@ -8534,6 +8689,29 @@
 
 
 
+#define VK_AMD_memory_overallocation_behavior 1
+#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_SPEC_VERSION 1
+#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME "VK_AMD_memory_overallocation_behavior"
+
+
+typedef enum VkMemoryOverallocationBehaviorAMD {
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD = 0,
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_ALLOWED_AMD = 1,
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD = 2,
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_BEGIN_RANGE_AMD = VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD,
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_END_RANGE_AMD = VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD,
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_RANGE_SIZE_AMD = (VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD - VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD + 1),
+    VK_MEMORY_OVERALLOCATION_BEHAVIOR_MAX_ENUM_AMD = 0x7FFFFFFF
+} VkMemoryOverallocationBehaviorAMD;
+
+typedef struct VkDeviceMemoryOverallocationCreateInfoAMD {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    VkMemoryOverallocationBehaviorAMD    overallocationBehavior;
+} VkDeviceMemoryOverallocationCreateInfoAMD;
+
+
+
 #define VK_EXT_vertex_attribute_divisor 1
 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 3
 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor"
@@ -8730,30 +8908,187 @@
 #endif
 
 #define VK_EXT_pci_bus_info 1
-#define VK_EXT_PCI_BUS_INFO_SPEC_VERSION  1
+#define VK_EXT_PCI_BUS_INFO_SPEC_VERSION  2
 #define VK_EXT_PCI_BUS_INFO_EXTENSION_NAME "VK_EXT_pci_bus_info"
 
 typedef struct VkPhysicalDevicePCIBusInfoPropertiesEXT {
     VkStructureType    sType;
     void*              pNext;
-    uint16_t           pciDomain;
-    uint8_t            pciBus;
-    uint8_t            pciDevice;
-    uint8_t            pciFunction;
+    uint32_t           pciDomain;
+    uint32_t           pciBus;
+    uint32_t           pciDevice;
+    uint32_t           pciFunction;
 } VkPhysicalDevicePCIBusInfoPropertiesEXT;
 
 
 
+#define VK_EXT_fragment_density_map 1
+#define VK_EXT_FRAGMENT_DENSITY_MAP_SPEC_VERSION 1
+#define VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME "VK_EXT_fragment_density_map"
+
+typedef struct VkPhysicalDeviceFragmentDensityMapFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           fragmentDensityMap;
+    VkBool32           fragmentDensityMapDynamic;
+    VkBool32           fragmentDensityMapNonSubsampledImages;
+} VkPhysicalDeviceFragmentDensityMapFeaturesEXT;
+
+typedef struct VkPhysicalDeviceFragmentDensityMapPropertiesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkExtent2D         minFragmentDensityTexelSize;
+    VkExtent2D         maxFragmentDensityTexelSize;
+    VkBool32           fragmentDensityInvocations;
+} VkPhysicalDeviceFragmentDensityMapPropertiesEXT;
+
+typedef struct VkRenderPassFragmentDensityMapCreateInfoEXT {
+    VkStructureType          sType;
+    const void*              pNext;
+    VkAttachmentReference    fragmentDensityMapAttachment;
+} VkRenderPassFragmentDensityMapCreateInfoEXT;
+
+
+
+#define VK_EXT_scalar_block_layout 1
+#define VK_EXT_SCALAR_BLOCK_LAYOUT_SPEC_VERSION 1
+#define VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME "VK_EXT_scalar_block_layout"
+
+typedef struct VkPhysicalDeviceScalarBlockLayoutFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           scalarBlockLayout;
+} VkPhysicalDeviceScalarBlockLayoutFeaturesEXT;
+
+
+
 #define VK_GOOGLE_hlsl_functionality1 1
-#define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 0
+#define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 1
 #define VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME "VK_GOOGLE_hlsl_functionality1"
 
 
 #define VK_GOOGLE_decorate_string 1
-#define VK_GOOGLE_DECORATE_STRING_SPEC_VERSION 0
+#define VK_GOOGLE_DECORATE_STRING_SPEC_VERSION 1
 #define VK_GOOGLE_DECORATE_STRING_EXTENSION_NAME "VK_GOOGLE_decorate_string"
 
 
+#define VK_EXT_memory_budget 1
+#define VK_EXT_MEMORY_BUDGET_SPEC_VERSION 1
+#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget"
+
+typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkDeviceSize       heapBudget[VK_MAX_MEMORY_HEAPS];
+    VkDeviceSize       heapUsage[VK_MAX_MEMORY_HEAPS];
+} VkPhysicalDeviceMemoryBudgetPropertiesEXT;
+
+
+
+#define VK_EXT_memory_priority 1
+#define VK_EXT_MEMORY_PRIORITY_SPEC_VERSION 1
+#define VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME "VK_EXT_memory_priority"
+
+typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           memoryPriority;
+} VkPhysicalDeviceMemoryPriorityFeaturesEXT;
+
+typedef struct VkMemoryPriorityAllocateInfoEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    float              priority;
+} VkMemoryPriorityAllocateInfoEXT;
+
+
+
+#define VK_EXT_buffer_device_address 1
+typedef uint64_t VkDeviceAddress;
+
+#define VK_EXT_BUFFER_DEVICE_ADDRESS_SPEC_VERSION 2
+#define VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME "VK_EXT_buffer_device_address"
+
+typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           bufferDeviceAddress;
+    VkBool32           bufferDeviceAddressCaptureReplay;
+    VkBool32           bufferDeviceAddressMultiDevice;
+} VkPhysicalDeviceBufferAddressFeaturesEXT;
+
+typedef struct VkBufferDeviceAddressInfoEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkBuffer           buffer;
+} VkBufferDeviceAddressInfoEXT;
+
+typedef struct VkBufferDeviceAddressCreateInfoEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkDeviceSize       deviceAddress;
+} VkBufferDeviceAddressCreateInfoEXT;
+
+
+typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetBufferDeviceAddressEXT(
+    VkDevice                                    device,
+    const VkBufferDeviceAddressInfoEXT*         pInfo);
+#endif
+
+#define VK_EXT_separate_stencil_usage 1
+#define VK_EXT_SEPARATE_STENCIL_USAGE_SPEC_VERSION 1
+#define VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME "VK_EXT_separate_stencil_usage"
+
+typedef struct VkImageStencilUsageCreateInfoEXT {
+    VkStructureType      sType;
+    const void*          pNext;
+    VkImageUsageFlags    stencilUsage;
+} VkImageStencilUsageCreateInfoEXT;
+
+
+
+#define VK_EXT_validation_features 1
+#define VK_EXT_VALIDATION_FEATURES_SPEC_VERSION 1
+#define VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME "VK_EXT_validation_features"
+
+
+typedef enum VkValidationFeatureEnableEXT {
+    VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0,
+    VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1,
+    VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
+    VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
+    VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1),
+    VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationFeatureEnableEXT;
+
+typedef enum VkValidationFeatureDisableEXT {
+    VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0,
+    VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1,
+    VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2,
+    VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3,
+    VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4,
+    VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5,
+    VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6,
+    VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT,
+    VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT,
+    VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1),
+    VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationFeatureDisableEXT;
+
+typedef struct VkValidationFeaturesEXT {
+    VkStructureType                         sType;
+    const void*                             pNext;
+    uint32_t                                enabledValidationFeatureCount;
+    const VkValidationFeatureEnableEXT*     pEnabledValidationFeatures;
+    uint32_t                                disabledValidationFeatureCount;
+    const VkValidationFeatureDisableEXT*    pDisabledValidationFeatures;
+} VkValidationFeaturesEXT;
+
+
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-18.3.3/include/vulkan/vulkan_fuchsia.h mesa-19.0.1/include/vulkan/vulkan_fuchsia.h
--- mesa-18.3.3/include/vulkan/vulkan_fuchsia.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_fuchsia.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan.h mesa-19.0.1/include/vulkan/vulkan.h
--- mesa-18.3.3/include/vulkan/vulkan.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan.h	2019-03-31 23:16:37.000000000 +0000
@@ -2,7 +2,7 @@
 #define VULKAN_H_ 1
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
@@ -39,12 +39,6 @@
 #endif
 
 
-#ifdef VK_USE_PLATFORM_MIR_KHR
-#include <mir_toolkit/client_types.h>
-#include "vulkan_mir.h"
-#endif
-
-
 #ifdef VK_USE_PLATFORM_VI_NN
 #include "vulkan_vi.h"
 #endif
diff -Nru mesa-18.3.3/include/vulkan/vulkan_ios.h mesa-19.0.1/include/vulkan/vulkan_ios.h
--- mesa-18.3.3/include/vulkan/vulkan_ios.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_ios.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_macos.h mesa-19.0.1/include/vulkan/vulkan_macos.h
--- mesa-18.3.3/include/vulkan/vulkan_macos.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_macos.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_mir.h mesa-19.0.1/include/vulkan/vulkan_mir.h
--- mesa-18.3.3/include/vulkan/vulkan_mir.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_mir.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,65 +0,0 @@
-#ifndef VULKAN_MIR_H_
-#define VULKAN_MIR_H_ 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-**     http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
-
-/*
-** This header is generated from the Khronos Vulkan XML API Registry.
-**
-*/
-
-
-#define VK_KHR_mir_surface 1
-#define VK_KHR_MIR_SURFACE_SPEC_VERSION   4
-#define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface"
-
-typedef VkFlags VkMirSurfaceCreateFlagsKHR;
-
-typedef struct VkMirSurfaceCreateInfoKHR {
-    VkStructureType               sType;
-    const void*                   pNext;
-    VkMirSurfaceCreateFlagsKHR    flags;
-    MirConnection*                connection;
-    MirSurface*                   mirSurface;
-} VkMirSurfaceCreateInfoKHR;
-
-
-typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
-typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection);
-
-#ifndef VK_NO_PROTOTYPES
-VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR(
-    VkInstance                                  instance,
-    const VkMirSurfaceCreateInfoKHR*            pCreateInfo,
-    const VkAllocationCallbacks*                pAllocator,
-    VkSurfaceKHR*                               pSurface);
-
-VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR(
-    VkPhysicalDevice                            physicalDevice,
-    uint32_t                                    queueFamilyIndex,
-    MirConnection*                              connection);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff -Nru mesa-18.3.3/include/vulkan/vulkan_vi.h mesa-19.0.1/include/vulkan/vulkan_vi.h
--- mesa-18.3.3/include/vulkan/vulkan_vi.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_vi.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_wayland.h mesa-19.0.1/include/vulkan/vulkan_wayland.h
--- mesa-18.3.3/include/vulkan/vulkan_wayland.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_wayland.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_win32.h mesa-19.0.1/include/vulkan/vulkan_win32.h
--- mesa-18.3.3/include/vulkan/vulkan_win32.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_win32.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_xcb.h mesa-19.0.1/include/vulkan/vulkan_xcb.h
--- mesa-18.3.3/include/vulkan/vulkan_xcb.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_xcb.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_xlib.h mesa-19.0.1/include/vulkan/vulkan_xlib.h
--- mesa-18.3.3/include/vulkan/vulkan_xlib.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_xlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/include/vulkan/vulkan_xlib_xrandr.h mesa-19.0.1/include/vulkan/vulkan_xlib_xrandr.h
--- mesa-18.3.3/include/vulkan/vulkan_xlib_xrandr.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/include/vulkan/vulkan_xlib_xrandr.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2015-2018 The Khronos Group Inc.
+** Copyright (c) 2015-2019 The Khronos Group Inc.
 **
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
diff -Nru mesa-18.3.3/Makefile.am mesa-19.0.1/Makefile.am
--- mesa-18.3.3/Makefile.am	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -22,6 +22,7 @@
 SUBDIRS = src
 
 AM_DISTCHECK_CONFIGURE_FLAGS = \
+	--enable-autotools \
 	--enable-dri \
 	--enable-dri3 \
 	--enable-egl \
@@ -45,7 +46,7 @@
 	--enable-libunwind \
 	--with-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
-	--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,tegra,virgl,swr,etnaviv,imx \
+	--with-gallium-drivers=i915,nouveau,r300,kmsro,r600,radeonsi,freedreno,svga,swrast,vc4,tegra,virgl,swr,etnaviv \
 	--with-vulkan-drivers=intel,radeon
 
 ACLOCAL_AMFLAGS = -I m4
diff -Nru mesa-18.3.3/meson.build mesa-19.0.1/meson.build
--- mesa-18.3.3/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -34,8 +34,6 @@
 
 null_dep = dependency('', required : false)
 
-system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system())
-
 # Arguments for the preprocessor, put these in a separate array from the C and
 # C++ (cpp in meson terminology) arguments since they need to be added to the
 # default arguments for both C and C++.
@@ -43,8 +41,7 @@
   '-D__STDC_CONSTANT_MACROS',
   '-D__STDC_FORMAT_MACROS',
   '-D__STDC_LIMIT_MACROS',
-  '-DVERSION="@0@"'.format(meson.project_version()),
-  '-DPACKAGE_VERSION=VERSION',
+  '-DPACKAGE_VERSION="@0@"'.format(meson.project_version()),
   '-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa"',
 ]
 
@@ -59,16 +56,16 @@
 with_swr_arches = get_option('swr-arches')
 with_tools = get_option('tools')
 if with_tools.contains('all')
-  with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc']
+  with_tools = ['etnaviv', 'freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc']
 endif
 
 dri_drivers_path = get_option('dri-drivers-path')
 if dri_drivers_path == ''
-  dri_drivers_path = join_paths(get_option('libdir'), 'dri')
+  dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri')
 endif
 dri_search_path = get_option('dri-search-path')
 if dri_search_path == ''
-  dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
+  dri_search_path = dri_drivers_path
 endif
 
 with_gles1 = get_option('gles1')
@@ -134,7 +131,7 @@
       ]
     elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
       _drivers = [
-        'pl111', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'imx', 'nouveau',
+        'kmsro', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'nouveau',
         'tegra', 'virgl', 'swrast',
       ]
     else
@@ -148,7 +145,7 @@
           host_machine.system()))
   endif
 endif
-with_gallium_pl111 = _drivers.contains('pl111')
+with_gallium_kmsro = _drivers.contains('kmsro')
 with_gallium_radeonsi = _drivers.contains('radeonsi')
 with_gallium_r300 = _drivers.contains('r300')
 with_gallium_r600 = _drivers.contains('r600')
@@ -158,7 +155,6 @@
 with_gallium_vc4 = _drivers.contains('vc4')
 with_gallium_v3d = _drivers.contains('v3d')
 with_gallium_etnaviv = _drivers.contains('etnaviv')
-with_gallium_imx = _drivers.contains('imx')
 with_gallium_tegra = _drivers.contains('tegra')
 with_gallium_i915 = _drivers.contains('i915')
 with_gallium_svga = _drivers.contains('svga')
@@ -213,11 +209,8 @@
 if with_dri_i915 and with_gallium_i915
   error('Only one i915 provider can be built')
 endif
-if with_gallium_imx and not with_gallium_etnaviv
-  error('IMX driver requires etnaviv driver')
-endif
-if with_gallium_pl111 and not with_gallium_vc4
-  error('pl111 driver requires vc4 driver')
+if with_gallium_kmsro and not (with_gallium_vc4 or with_gallium_etnaviv or with_gallium_freedreno)
+  error('kmsro driver requires one or more renderonly drivers (vc4, etnaviv, freedreno)')
 endif
 if with_gallium_tegra and not with_gallium_nouveau
   error('tegra driver requires nouveau driver')
@@ -615,7 +608,7 @@
 
 d3d_drivers_path = get_option('d3d-drivers-path')
 if d3d_drivers_path == ''
-  d3d_drivers_path = join_paths(get_option('libdir'), 'd3d')
+  d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d')
 endif
 
 with_gallium_st_nine =  get_option('gallium-nine')
@@ -936,7 +929,7 @@
 # case of cross compiling where we can use asm, and that's x86_64 -> x86 when
 # host OS == build OS, since in that case the build machine can run the host's
 # binaries.
-if meson.is_cross_build() 
+if with_asm and meson.is_cross_build()
   if build_machine.system() != host_machine.system()
     # TODO: It may be possible to do this with an exe_wrapper (like wine).
     message('Cross compiling from one OS to another, disabling assembly.')
@@ -1120,7 +1113,7 @@
 dep_libdrm_etnaviv = null_dep
 dep_libdrm_intel = null_dep
 
-_drm_amdgpu_ver = '2.4.95'
+_drm_amdgpu_ver = '2.4.97'
 _drm_radeon_ver = '2.4.71'
 _drm_nouveau_ver = '2.4.66'
 _drm_etnaviv_ver = '2.4.89'
@@ -1195,7 +1188,7 @@
 endif
 
 if with_amd_vk or with_gallium_radeonsi
-  _llvm_version = '>= 6.0.0'
+  _llvm_version = '>= 7.0.0'
 elif with_gallium_swr
   _llvm_version = '>= 6.0.0'
 elif with_gallium_opencl or with_gallium_r600
@@ -1372,7 +1365,7 @@
     dep_xfixes = dependency('xfixes')
     dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
   endif
-  if (with_any_vk or with_glx == 'dri' or
+  if (with_any_vk or with_glx == 'dri' or with_egl or
        (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
         with_gallium_omx != 'disabled'))
     dep_xcb = dependency('xcb')
@@ -1407,7 +1400,7 @@
     dep_xcb_xfixes = dependency('xcb-xfixes')
   endif
   if with_xlib_lease
-    dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
+    dep_xcb_xrandr = dependency('xcb-randr')
     dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
   endif
 endif
diff -Nru mesa-18.3.3/meson_options.txt mesa-19.0.1/meson_options.txt
--- mesa-18.3.3/meson_options.txt	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/meson_options.txt	2019-03-31 23:16:37.000000000 +0000
@@ -58,8 +58,8 @@
   type : 'array',
   value : ['auto'],
   choices : [
-    '', 'auto', 'pl111', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
-    'swrast', 'v3d', 'vc4', 'etnaviv', 'imx', 'tegra', 'i915', 'svga', 'virgl',
+    '', 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
+    'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
     'swr',
   ],
   description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
@@ -301,7 +301,7 @@
   'tools',
   type : 'array',
   value : [],
-  choices : ['freedreno', 'glsl', 'intel', 'intel-ui', 'nir', 'nouveau', 'xvmc', 'all'],
+  choices : ['etnaviv', 'freedreno', 'glsl', 'intel', 'intel-ui', 'nir', 'nouveau', 'xvmc', 'all'],
   description : 'List of tools to build. (Note: `intel-ui` selects `intel`)',
 )
 option(
diff -Nru mesa-18.3.3/REVIEWERS mesa-19.0.1/REVIEWERS
--- mesa-18.3.3/REVIEWERS	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/REVIEWERS	2019-03-31 23:16:37.000000000 +0000
@@ -72,7 +72,9 @@
 
 EGL
 R: Eric Engestrom <eric@engestrom.ch>
+R: Emil Velikov <emil.l.velikov@gmail.com>
 F: src/egl/
+F: include/EGL/
 
 HAIKU
 R: Alexander von Gluck IV <kallisti5@unixzen.com>
@@ -136,3 +138,8 @@
 GLX
 R: Adam Jackson <ajax@redhat.com>
 F: src/glx/
+
+VULKAN
+R: Eric Engestrom <eric@engestrom.ch>
+F: src/vulkan/
+F: include/vulkan/
diff -Nru mesa-18.3.3/scons/custom.py mesa-19.0.1/scons/custom.py
--- mesa-18.3.3/scons/custom.py	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/scons/custom.py	2019-03-31 23:16:37.000000000 +0000
@@ -48,7 +48,12 @@
 # a path directly. We want to support both, so we need to detect the SCons version,
 # for which no API is provided by SCons 8-P
 
-scons_version = tuple(map(int, SCons.__version__.split('.')))
+# Scons version string has consistently been in this format:
+# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+# so this formula should cover all versions regardless of type
+# stable, alpha or beta.
+# For simplicity alpha and beta flags are removed.
+scons_version = tuple(map(int, SCons.__version__.split('.')[:3]))
 
 def quietCommandLines(env):
     # Quiet command lines
diff -Nru mesa-18.3.3/scons/gallium.py mesa-19.0.1/scons/gallium.py
--- mesa-18.3.3/scons/gallium.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/scons/gallium.py	2019-03-31 23:16:37.000000000 +0000
@@ -308,7 +308,20 @@
     if env.GetOption('num_jobs') <= 1:
         env.SetOption('num_jobs', num_jobs())
 
-    env.Decider('MD5-timestamp')
+    # Speed up dependency checking.  See
+    # - https://github.com/SCons/scons/wiki/GoFastButton
+    # - https://bugs.freedesktop.org/show_bug.cgi?id=109443
+
+    # Scons version string has consistently been in this format:
+    # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+    # so this formula should cover all versions regardless of type
+    # stable, alpha or beta.
+    # For simplicity alpha and beta flags are removed.
+
+    scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3]))
+    if scons_version < distutils.version.StrictVersion('3.0.2') or \
+       scons_version > distutils.version.StrictVersion('3.0.4'):
+        env.Decider('MD5-timestamp')
     env.SetOption('max_drift', 60)
 
     # C preprocessor options
diff -Nru mesa-18.3.3/src/amd/addrlib/addrinterface.cpp mesa-19.0.1/src/amd/addrlib/addrinterface.cpp
--- mesa-18.3.3/src/amd/addrlib/addrinterface.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/addrinterface.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,1743 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrinterface.cpp
-* @brief Contains the addrlib interface functions
-****************************************************************************************************
-*/
-#include "addrinterface.h"
-#include "addrlib1.h"
-#include "addrlib2.h"
-
-#include "addrcommon.h"
-
-#include "util/macros.h"
-
-using namespace Addr;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Create/Destroy/Config functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrCreate
-*
-*   @brief
-*       Create address lib object
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrCreate(
-    const ADDR_CREATE_INPUT*    pAddrCreateIn,  ///< [in] infomation for creating address lib object
-    ADDR_CREATE_OUTPUT*         pAddrCreateOut) ///< [out] address lib handle
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut);
-
-    return returnCode;
-}
-
-
-
-/**
-****************************************************************************************************
-*   AddrDestroy
-*
-*   @brief
-*       Destroy address lib object
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrDestroy(
-    ADDR_HANDLE hLib) ///< address lib handle
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (hLib)
-    {
-        Lib* pLib = Lib::GetLib(hLib);
-        pLib->Destroy();
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                    Surface functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceInfo
-*
-*   @brief
-*       Calculate surface width/height/depth/alignments and suitable tiling mode
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
-    ADDR_HANDLE                             hLib, ///< address lib handle
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,  ///< [in] surface information
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) ///< [out] surface parameters and alignments
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceAddrFromCoord
-*
-*   @brief
-*       Compute surface address according to coordinates
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,  ///< [in] surface info and coordinates
-    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] surface address
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Compute coordinates according to surface address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,  ///< [in] surface info and address
-    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) ///< [out] coordinates
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                   HTile functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileInfo
-*
-*   @brief
-*       Compute Htile pitch, height, base alignment and size in bytes
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
-    ADDR_HANDLE                             hLib, ///< address lib handle
-    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
-    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileAddrFromCoord
-*
-*   @brief
-*       Compute Htile address according to coordinates (of depth buffer)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Htile info and coordinates
-    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Htile address
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
-*       Htile address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,  ///< [in] Htile info and address
-    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Htile coordinates
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     C-mask functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskInfo
-*
-*   @brief
-*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
-*       info
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
-    ADDR_HANDLE                             hLib, ///< address lib handle
-    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
-    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Compute Cmask address according to coordinates (of MSAA color buffer)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Cmask info and coordinates
-    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Cmask address
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
-*       Cmask address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Cmask info and address
-    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Cmask coordinates
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     F-mask functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskInfo
-*
-*   @brief
-*       Compute Fmask pitch/height/depth/alignments and size in bytes
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
-    ADDR_HANDLE                             hLib, ///< address lib handle
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Fmask info and coordinates
-    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Fmask address
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Fmask info and address
-    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Fmask coordinates
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     DCC key functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrComputeDccInfo
-*
-*   @brief
-*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
-    ADDR_HANDLE                             hLib,   ///< handle of addrlib
-    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,    ///< [in] input
-    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut)   ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeDccInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// Below functions are element related or helper functions
-///////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrGetVersion
-*
-*   @brief
-*       Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
-*       defined in addrinterface.h to see if there is a mismatch.
-****************************************************************************************************
-*/
-UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
-{
-    UINT_32 version = 0;
-
-    Addr::Lib* pLib = Lib::GetLib(hLib);
-
-    ADDR_ASSERT(pLib != NULL);
-
-    if (pLib)
-    {
-        version = pLib->GetVersion();
-    }
-
-    return version;
-}
-
-/**
-****************************************************************************************************
-*   AddrUseTileIndex
-*
-*   @brief
-*       Return TRUE if tileIndex is enabled in this address library
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
-{
-    BOOL_32 useTileIndex = FALSE;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_ASSERT(pLib != NULL);
-
-    if (pLib)
-    {
-        useTileIndex = pLib->UseTileIndex(0);
-    }
-
-    return useTileIndex;
-}
-
-/**
-****************************************************************************************************
-*   AddrUseCombinedSwizzle
-*
-*   @brief
-*       Return TRUE if combined swizzle is enabled in this address library
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
-{
-    BOOL_32 useCombinedSwizzle = FALSE;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_ASSERT(pLib != NULL);
-
-    if (pLib)
-    {
-        useCombinedSwizzle = pLib->UseCombinedSwizzle();
-    }
-
-    return useCombinedSwizzle;
-}
-
-/**
-****************************************************************************************************
-*   AddrExtractBankPipeSwizzle
-*
-*   @brief
-*       Extract Bank and Pipe swizzle from base256b
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
-    ADDR_HANDLE                                 hLib,     ///< addrlib handle
-    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,      ///< [in] input structure
-    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut)     ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrCombineBankPipeSwizzle
-*
-*   @brief
-*       Combine Bank and Pipe swizzle
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
-    ADDR_HANDLE                                 hLib,
-    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
-    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut)
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeSliceSwizzle
-*
-*   @brief
-*       Compute a swizzle for slice from a base swizzle
-*   @return
-*       ADDR_OK if no error
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
-    ADDR_HANDLE                                 hLib,
-    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*      pIn,
-    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*           pOut)
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputeBaseSwizzle
-*
-*   @brief
-*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
-*   @return
-*       ADDR_OK if no error
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
-    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut)
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   ElemFlt32ToDepthPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a depth/stencil pixel value
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
-    ADDR_HANDLE                         hLib,    ///< addrlib handle
-    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,     ///< [in] per-component value
-    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    Lib* pLib = Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        pLib->Flt32ToDepthPixel(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   ElemFlt32ToColorPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
-    ADDR_HANDLE                         hLib,    ///< addrlib handle
-    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,     ///< [in] format, surface number and swap value
-    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    Lib* pLib = Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        pLib->Flt32ToColorPixel(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   ElemGetExportNorm
-*
-*   @brief
-*       Helper function to check one format can be EXPORT_NUM,
-*       which is a register CB_COLOR_INFO.SURFACE_FORMAT.
-*       FP16 can be reported as EXPORT_NORM for rv770 in r600
-*       family
-*
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API ElemGetExportNorm(
-    ADDR_HANDLE                     hLib, ///< addrlib handle
-    const ELEM_GETEXPORTNORM_INPUT* pIn)  ///< [in] input structure
-{
-    Addr::Lib* pLib = Lib::GetLib(hLib);
-    BOOL_32 enabled = FALSE;
-
-    MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        enabled = pLib->GetExportNorm(pIn);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    ADDR_ASSERT(returnCode == ADDR_OK);
-
-    return enabled;
-}
-
-/**
-****************************************************************************************************
-*   AddrConvertTileInfoToHW
-*
-*   @brief
-*       Convert tile info from real value to hardware register value
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
-    ADDR_HANDLE                             hLib, ///< address lib handle
-    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,  ///< [in] tile info with real value
-    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut) ///< [out] tile info with HW register value
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrConvertTileIndex
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
-    ADDR_HANDLE                          hLib, ///< address lib handle
-    const ADDR_CONVERT_TILEINDEX_INPUT*  pIn,  ///< [in] input - tile index
-    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ConvertTileIndex(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrGetMacroModeIndex
-*
-*   @brief
-*       Get macro mode index based on input parameters
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
-    ADDR_HANDLE                          hLib, ///< address lib handle
-    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,  ///< [in] input
-    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut) ///< [out] macro mode index
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->GetMacroModeIndex(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrConvertTileIndex1
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
-    ADDR_HANDLE                          hLib, ///< address lib handle
-    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,  ///< [in] input - tile index
-    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ConvertTileIndex1(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrGetTileIndex
-*
-*   @brief
-*       Get tile index from tile mode/type/info
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-*
-*   @note
-*       Only meaningful for SI (and above)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
-    ADDR_HANDLE                     hLib,
-    const ADDR_GET_TILEINDEX_INPUT* pIn,
-    ADDR_GET_TILEINDEX_OUTPUT*      pOut)
-{
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->GetTileIndex(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrComputePrtInfo
-*
-*   @brief
-*       Interface function for ComputePrtInfo
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
-    ADDR_HANDLE                 hLib,
-    const ADDR_PRT_INFO_INPUT*  pIn,
-    ADDR_PRT_INFO_OUTPUT*       pOut)
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    V1::Lib* pLib = V1::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputePrtInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrGetMaxAlignments
-*
-*   @brief
-*       Convert maximum alignments
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
-    ADDR_HANDLE                     hLib, ///< address lib handle
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
-{
-    Addr::Lib* pLib = Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->GetMaxAlignments(pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   AddrGetMaxMetaAlignments
-*
-*   @brief
-*       Convert maximum alignments for metadata
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
-    ADDR_HANDLE                     hLib, ///< address lib handle
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
-{
-    Addr::Lib* pLib = Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->GetMaxMetaAlignments(pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                    Surface functions for Addr2
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceInfo
-*
-*   @brief
-*       Calculate surface width/height/depth/alignments and suitable tiling mode
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
-    ADDR_HANDLE                                hLib, ///< address lib handle
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,  ///< [in] surface information
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut) ///< [out] surface parameters and alignments
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceAddrFromCoord
-*
-*   @brief
-*       Compute surface address according to coordinates
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
-    ADDR_HANDLE                                         hLib, ///< address lib handle
-    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] surface info and coordinates
-    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] surface address
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Compute coordinates according to surface address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
-    ADDR_HANDLE                                         hLib, ///< address lib handle
-    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,  ///< [in] surface info and address
-    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] coordinates
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                   HTile functions for Addr2
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileInfo
-*
-*   @brief
-*       Compute Htile pitch, height, base alignment and size in bytes
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
-    ADDR_HANDLE                              hLib, ///< address lib handle
-    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
-    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileAddrFromCoord
-*
-*   @brief
-*       Compute Htile address according to coordinates (of depth buffer)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Htile info and coordinates
-    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Htile address
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
-*       Htile address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,  ///< [in] Htile info and address
-    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Htile coordinates
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     C-mask functions for Addr2
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskInfo
-*
-*   @brief
-*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
-*       info
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
-    ADDR_HANDLE                              hLib, ///< address lib handle
-    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
-    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Compute Cmask address according to coordinates (of MSAA color buffer)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Cmask info and coordinates
-    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Cmask address
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
-*       Cmask address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Cmask info and address
-    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Cmask coordinates
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     F-mask functions for Addr2
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskInfo
-*
-*   @brief
-*       Compute Fmask pitch/height/depth/alignments and size in bytes
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
-    ADDR_HANDLE                              hLib, ///< address lib handle
-    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
-    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Fmask info and coordinates
-    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Fmask address
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
-    ADDR_HANDLE                                       hLib, ///< address lib handle
-    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Fmask info and address
-    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Fmask coordinates
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     DCC key functions for Addr2
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Addr2ComputeDccInfo
-*
-*   @brief
-*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
-    ADDR_HANDLE                           hLib,   ///< handle of addrlib
-    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input
-    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut)   ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeDccInfo(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2ComputeDccAddrFromCoord
-*
-*   @brief
-*       Compute DCC key address according to coordinates
-*
-*   @return
-*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
-    ADDR_HANDLE                                     hLib, ///< address lib handle
-    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Dcc info and coordinates
-    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Dcc address
-{
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeDccAddrFromCoord(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2ComputePipeBankXor
-*
-*   @brief
-*       Calculate a valid bank pipe xor value for client to use.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
-    ADDR_HANDLE                            hLib, ///< handle of addrlib
-    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
-    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputePipeBankXor(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSlicePipeBankXor
-*
-*   @brief
-*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
-    ADDR_HANDLE                                  hLib, ///< handle of addrlib
-    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
-    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSubResourceOffsetForSwizzlePattern
-*
-*   @brief
-*       Calculate sub resource offset for swizzle pattern.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
-    ADDR_HANDLE                                                     hLib, ///< handle of addrlib
-    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,  ///< [in] input
-    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2GetPreferredSurfaceSetting
-*
-*   @brief
-*       Suggest a preferred setting for client driver to program HW register
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
-    ADDR_HANDLE                                   hLib, ///< handle of addrlib
-    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input
-    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) ///< [out] output
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        returnCode = pLib->Addr2GetPreferredSurfaceSetting(pIn, pOut);
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Addr2IsValidDisplaySwizzleMode
-*
-*   @brief
-*       Return whether the swizzle mode is supported by DCE / DCN.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
-    ADDR_HANDLE     hLib,
-    AddrSwizzleMode swizzleMode,
-    UINT_32         bpp,
-    bool            *result)
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    V2::Lib* pLib = V2::Lib::GetLib(hLib);
-
-    if (pLib != NULL)
-    {
-        ADDR2_COMPUTE_SURFACE_INFO_INPUT in;
-        in.swizzleMode = swizzleMode;
-        in.bpp = bpp;
-
-        *result = pLib->IsValidDisplaySwizzleMode(&in);
-        returnCode = ADDR_OK;
-    }
-    else
-    {
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
diff -Nru mesa-18.3.3/src/amd/addrlib/addrinterface.h mesa-19.0.1/src/amd/addrlib/addrinterface.h
--- mesa-18.3.3/src/amd/addrlib/addrinterface.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/addrinterface.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,3717 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrinterface.h
-* @brief Contains the addrlib interfaces declaration and parameter defines
-****************************************************************************************************
-*/
-#ifndef __ADDR_INTERFACE_H__
-#define __ADDR_INTERFACE_H__
-
-#include "addrtypes.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define ADDRLIB_VERSION_MAJOR 6
-#define ADDRLIB_VERSION_MINOR 2
-#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
-
-/// Virtually all interface functions need ADDR_HANDLE as first parameter
-typedef VOID*   ADDR_HANDLE;
-
-/// Client handle used in callbacks
-typedef VOID*   ADDR_CLIENT_HANDLE;
-
-/**
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                                  Callback functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*    typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
-*         const ADDR_ALLOCSYSMEM_INPUT* pInput);
-*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
-*         VOID* pVirtAddr);
-*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
-*         const ADDR_DEBUGPRINT_INPUT* pInput);
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                               Create/Destroy/Config functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     AddrCreate()
-*     AddrDestroy()
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                                  Surface functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     AddrComputeSurfaceInfo()
-*     AddrComputeSurfaceAddrFromCoord()
-*     AddrComputeSurfaceCoordFromAddr()
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                                   HTile functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     AddrComputeHtileInfo()
-*     AddrComputeHtileAddrFromCoord()
-*     AddrComputeHtileCoordFromAddr()
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                                   C-mask functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     AddrComputeCmaskInfo()
-*     AddrComputeCmaskAddrFromCoord()
-*     AddrComputeCmaskCoordFromAddr()
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                                   F-mask functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     AddrComputeFmaskInfo()
-*     AddrComputeFmaskAddrFromCoord()
-*     AddrComputeFmaskCoordFromAddr()
-*
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-* //                               Element/Utility functions
-* /////////////////////////////////////////////////////////////////////////////////////////////////
-*     ElemFlt32ToDepthPixel()
-*     ElemFlt32ToColorPixel()
-*     AddrExtractBankPipeSwizzle()
-*     AddrCombineBankPipeSwizzle()
-*     AddrComputeSliceSwizzle()
-*     AddrConvertTileInfoToHW()
-*     AddrConvertTileIndex()
-*     AddrConvertTileIndex1()
-*     AddrGetTileIndex()
-*     AddrComputeBaseSwizzle()
-*     AddrUseTileIndex()
-*     AddrUseCombinedSwizzle()
-*
-**/
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                      Callback functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-* @brief channel setting structure
-****************************************************************************************************
-*/
-typedef union _ADDR_CHANNEL_SETTING
-{
-    struct
-    {
-        UINT_8 valid   : 1;    ///< Indicate whehter this channel setting is valid
-        UINT_8 channel : 2;    ///< 0 for x channel, 1 for y channel, 2 for z channel
-        UINT_8 index   : 5;    ///< Channel index
-    };
-    UINT_8 value;              ///< Value
-} ADDR_CHANNEL_SETTING;
-
-/**
-****************************************************************************************************
-* @brief address equation key structure
-****************************************************************************************************
-*/
-typedef union _ADDR_EQUATION_KEY
-{
-    struct
-    {
-        UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel
-        UINT_32 tileMode         : 5; ///< Tile mode
-        UINT_32 microTileType    : 3; ///< Micro tile type
-        UINT_32 pipeConfig       : 5; ///< pipe config
-        UINT_32 numBanksLog2     : 3; ///< Number of banks log2
-        UINT_32 bankWidth        : 4; ///< Bank width
-        UINT_32 bankHeight       : 4; ///< Bank height
-        UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
-        UINT_32 prt              : 1; ///< SI only, indicate whether this equation is for prt
-        UINT_32 reserved         : 1; ///< Reserved bit
-    } fields;
-    UINT_32 value;
-} ADDR_EQUATION_KEY;
-
-/**
-****************************************************************************************************
-* @brief address equation structure
-****************************************************************************************************
-*/
-#define ADDR_MAX_EQUATION_BIT 20u
-
-// Invalid equation index
-#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF
-
-typedef struct _ADDR_EQUATION
-{
-    ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT];  ///< addr setting
-                                                       ///< each bit is result of addr ^ xor ^ xor2
-    ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT];  ///< xor setting
-    ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT];  ///< xor2 setting
-    UINT_32              numBits;                      ///< The number of bits in equation
-    BOOL_32              stackedDepthSlices;           ///< TRUE if depth slices are treated as being
-                                                       ///< stacked vertically prior to swizzling
-} ADDR_EQUATION;
-
-
-/**
-****************************************************************************************************
-* @brief Alloc system memory flags.
-* @note These flags are reserved for future use and if flags are added will minimize the impact
-*       of the client.
-****************************************************************************************************
-*/
-typedef union _ADDR_ALLOCSYSMEM_FLAGS
-{
-    struct
-    {
-        UINT_32 reserved    : 32;  ///< Reserved for future use.
-    } fields;
-    UINT_32 value;
-
-} ADDR_ALLOCSYSMEM_FLAGS;
-
-/**
-****************************************************************************************************
-* @brief Alloc system memory input structure
-****************************************************************************************************
-*/
-typedef struct _ADDR_ALLOCSYSMEM_INPUT
-{
-    UINT_32                 size;           ///< Size of this structure in bytes
-
-    ADDR_ALLOCSYSMEM_FLAGS  flags;          ///< System memory flags.
-    UINT_32                 sizeInBytes;    ///< System memory allocation size in bytes.
-    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
-} ADDR_ALLOCSYSMEM_INPUT;
-
-/**
-****************************************************************************************************
-* ADDR_ALLOCSYSMEM
-*   @brief
-*       Allocate system memory callback function. Returns valid pointer on success.
-****************************************************************************************************
-*/
-typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
-    const ADDR_ALLOCSYSMEM_INPUT* pInput);
-
-/**
-****************************************************************************************************
-* @brief Free system memory input structure
-****************************************************************************************************
-*/
-typedef struct _ADDR_FREESYSMEM_INPUT
-{
-    UINT_32                 size;           ///< Size of this structure in bytes
-
-    VOID*                   pVirtAddr;      ///< Virtual address
-    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
-} ADDR_FREESYSMEM_INPUT;
-
-/**
-****************************************************************************************************
-* ADDR_FREESYSMEM
-*   @brief
-*       Free system memory callback function.
-*       Returns ADDR_OK on success.
-****************************************************************************************************
-*/
-typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
-    const ADDR_FREESYSMEM_INPUT* pInput);
-
-/**
-****************************************************************************************************
-* @brief Print debug message input structure
-****************************************************************************************************
-*/
-typedef struct _ADDR_DEBUGPRINT_INPUT
-{
-    UINT_32             size;           ///< Size of this structure in bytes
-
-    CHAR*               pDebugString;   ///< Debug print string
-    va_list             ap;             ///< Variable argument list
-    ADDR_CLIENT_HANDLE  hClient;        ///< Client handle
-} ADDR_DEBUGPRINT_INPUT;
-
-/**
-****************************************************************************************************
-* ADDR_DEBUGPRINT
-*   @brief
-*       Print debug message callback function.
-*       Returns ADDR_OK on success.
-****************************************************************************************************
-*/
-typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
-    const ADDR_DEBUGPRINT_INPUT* pInput);
-
-/**
-****************************************************************************************************
-* ADDR_CALLBACKS
-*
-*   @brief
-*       Address Library needs client to provide system memory alloc/free routines.
-****************************************************************************************************
-*/
-typedef struct _ADDR_CALLBACKS
-{
-    ADDR_ALLOCSYSMEM allocSysMem;   ///< Routine to allocate system memory
-    ADDR_FREESYSMEM  freeSysMem;    ///< Routine to free system memory
-    ADDR_DEBUGPRINT  debugPrint;    ///< Routine to print debug message
-} ADDR_CALLBACKS;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Create/Destroy functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-* ADDR_CREATE_FLAGS
-*
-*   @brief
-*       This structure is used to pass some setup in creation of AddrLib
-*   @note
-****************************************************************************************************
-*/
-typedef union _ADDR_CREATE_FLAGS
-{
-    struct
-    {
-        UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
-        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
-                                               ///  output structure
-        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
-        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
-        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
-        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
-        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
-        UINT_32 reserved               : 25;   ///< Reserved bits for future use
-    };
-
-    UINT_32 value;
-} ADDR_CREATE_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR_REGISTER_VALUE
-*
-*   @brief
-*       Data from registers to setup AddrLib global data, used in AddrCreate
-****************************************************************************************************
-*/
-typedef struct _ADDR_REGISTER_VALUE
-{
-    UINT_32  gbAddrConfig;       ///< For R8xx, use GB_ADDR_CONFIG register value.
-                                 ///  For R6xx/R7xx, use GB_TILING_CONFIG.
-                                 ///  But they can be treated as the same.
-                                 ///  if this value is 0, use chip to set default value
-    UINT_32  backendDisables;    ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
-                                 ///  Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE
-
-                                 ///  R800 registers-----------------------------------------------
-    UINT_32  noOfBanks;          ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
-                                 ///  No enums for this value in h/w header files
-                                 ///  0: 4
-                                 ///  1: 8
-                                 ///  2: 16
-    UINT_32  noOfRanks;          ///  MC_ARB_RAMCFG.NOOFRANK
-                                 ///  0: 1
-                                 ///  1: 2
-                                 ///  SI (R1000) registers-----------------------------------------
-    const UINT_32* pTileConfig;  ///< Global tile setting tables
-    UINT_32  noOfEntries;        ///< Number of entries in pTileConfig
-
-                                 ///< CI registers-------------------------------------------------
-    const UINT_32* pMacroTileConfig;    ///< Global macro tile mode table
-    UINT_32  noOfMacroEntries;   ///< Number of entries in pMacroTileConfig
-
-                                 ///< GFX9 HW parameters
-    UINT_32  blockVarSizeLog2;   ///< SW_VAR_* block size
-} ADDR_REGISTER_VALUE;
-
-/**
-****************************************************************************************************
-* ADDR_CREATE_INPUT
-*
-*   @brief
-*       Parameters use to create an AddrLib Object. Caller must provide all fields.
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR_CREATE_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_32             chipEngine;          ///< Chip Engine
-    UINT_32             chipFamily;          ///< Chip Family
-    UINT_32             chipRevision;        ///< Chip Revision
-    ADDR_CALLBACKS      callbacks;           ///< Callbacks for sysmem alloc/free/print
-    ADDR_CREATE_FLAGS   createFlags;         ///< Flags to setup AddrLib
-    ADDR_REGISTER_VALUE regValue;            ///< Data from registers to setup AddrLib global data
-    ADDR_CLIENT_HANDLE  hClient;             ///< Client handle
-    UINT_32             minPitchAlignPixels; ///< Minimum pitch alignment in pixels
-} ADDR_CREATE_INPUT;
-
-/**
-****************************************************************************************************
-* ADDR_CREATEINFO_OUTPUT
-*
-*   @brief
-*       Return AddrLib handle to client driver
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR_CREATE_OUTPUT
-{
-    UINT_32              size;            ///< Size of this structure in bytes
-
-    ADDR_HANDLE          hLib;            ///< Address lib handle
-
-    UINT_32              numEquations;    ///< Number of equations in the table
-    const ADDR_EQUATION* pEquationTable;  ///< Pointer to the equation table
-} ADDR_CREATE_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrCreate
-*
-*   @brief
-*       Create AddrLib object, must be called before any interface calls
-*
-*   @return
-*       ADDR_OK if successful
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrCreate(
-    const ADDR_CREATE_INPUT*    pAddrCreateIn,
-    ADDR_CREATE_OUTPUT*         pAddrCreateOut);
-
-
-
-/**
-****************************************************************************************************
-*   AddrDestroy
-*
-*   @brief
-*       Destroy AddrLib object, must be called to free internally allocated resources.
-*
-*   @return
-*      ADDR_OK if successful
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrDestroy(
-    ADDR_HANDLE hLib);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                    Surface functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-* @brief
-*       Bank/tiling parameters. On function input, these can be set as desired or
-*       left 0 for AddrLib to calculate/default. On function output, these are the actual
-*       parameters used.
-* @note
-*       Valid bankWidth/bankHeight value:
-*       1,2,4,8. They are factors instead of pixels or bytes.
-*
-*       The bank number remains constant across each row of the
-*       macro tile as each pipe is selected, so the number of
-*       tiles in the x direction with the same bank number will
-*       be bank_width * num_pipes.
-****************************************************************************************************
-*/
-typedef struct _ADDR_TILEINFO
-{
-    ///  Any of these parameters can be set to 0 to use the HW default.
-    UINT_32     banks;              ///< Number of banks, numerical value
-    UINT_32     bankWidth;          ///< Number of tiles in the X direction in the same bank
-    UINT_32     bankHeight;         ///< Number of tiles in the Y direction in the same bank
-    UINT_32     macroAspectRatio;   ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
-    UINT_32     tileSplitBytes;     ///< Tile split size, in bytes
-    AddrPipeCfg pipeConfig;         ///< Pipe Config = HW enum + 1
-} ADDR_TILEINFO;
-
-// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
-// within 800 HWL - An AddrPipeCfg is added in above data structure
-typedef ADDR_TILEINFO ADDR_R800_TILEINFO;
-
-/**
-****************************************************************************************************
-* @brief
-*       Information needed by quad buffer stereo support
-****************************************************************************************************
-*/
-typedef struct _ADDR_QBSTEREOINFO
-{
-    UINT_32         eyeHeight;          ///< Height (in pixel rows) to right eye
-    UINT_32         rightOffset;        ///< Offset (in bytes) to right eye
-    UINT_32         rightSwizzle;       ///< TileSwizzle for right eyes
-} ADDR_QBSTEREOINFO;
-
-/**
-****************************************************************************************************
-*   ADDR_SURFACE_FLAGS
-*
-*   @brief
-*       Surface flags
-****************************************************************************************************
-*/
-typedef union _ADDR_SURFACE_FLAGS
-{
-    struct
-    {
-        UINT_32 color                : 1; ///< Flag indicates this is a color buffer
-        UINT_32 depth                : 1; ///< Flag indicates this is a depth/stencil buffer
-        UINT_32 stencil              : 1; ///< Flag indicates this is a stencil buffer
-        UINT_32 texture              : 1; ///< Flag indicates this is a texture
-        UINT_32 cube                 : 1; ///< Flag indicates this is a cubemap
-        UINT_32 volume               : 1; ///< Flag indicates this is a volume texture
-        UINT_32 fmask                : 1; ///< Flag indicates this is an fmask
-        UINT_32 cubeAsArray          : 1; ///< Flag indicates if treat cubemap as arrays
-        UINT_32 compressZ            : 1; ///< Flag indicates z buffer is compressed
-        UINT_32 overlay              : 1; ///< Flag indicates this is an overlay surface
-        UINT_32 noStencil            : 1; ///< Flag indicates this depth has no separate stencil
-        UINT_32 display              : 1; ///< Flag indicates this should match display controller req.
-        UINT_32 opt4Space            : 1; ///< Flag indicates this surface should be optimized for space
-                                          ///  i.e. save some memory but may lose performance
-        UINT_32 prt                  : 1; ///< Flag for partially resident texture
-        UINT_32 qbStereo             : 1; ///< Quad buffer stereo surface
-        UINT_32 pow2Pad              : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
-        UINT_32 interleaved          : 1; ///< Special flag for interleaved YUV surface padding
-        UINT_32 tcCompatible         : 1; ///< Flag indicates surface needs to be shader readable
-        UINT_32 dispTileType         : 1; ///< NI: force display Tiling for 128 bit shared resoruce
-        UINT_32 dccCompatible        : 1; ///< VI: whether to make MSAA surface support dcc fast clear
-        UINT_32 dccPipeWorkaround    : 1; ///< VI: whether to workaround the HW limit that
-                                          ///  dcc can't be enabled if pipe config of tile mode
-                                          ///  is different from that of ASIC, this flag
-                                          ///  is address lib internal flag, client should ignore it
-        UINT_32 czDispCompatible     : 1; ///< SI+: CZ family has a HW bug needs special alignment.
-                                          ///  This flag indicates we need to follow the
-                                          ///  alignment with CZ families or other ASICs under
-                                          ///  PX configuration + CZ.
-        UINT_32 nonSplit             : 1; ///< CI: depth texture should not be split
-        UINT_32 disableLinearOpt     : 1; ///< Disable tile mode optimization to linear
-        UINT_32 needEquation         : 1; ///< Make the surface tile setting equation compatible.
-                                          ///  This flag indicates we need to override tile
-                                          ///  mode to PRT_* tile mode to disable slice rotation,
-                                          ///  which is needed by swizzle pattern equation.
-        UINT_32 skipIndicesOutput    : 1; ///< Skipping indices in output.
-        UINT_32 rotateDisplay        : 1; ///< Rotate micro tile type
-        UINT_32 minimizeAlignment    : 1; ///< Minimize alignment
-        UINT_32 preferEquation       : 1; ///< Return equation index without adjusting tile mode
-        UINT_32 matchStencilTileCfg  : 1; ///< Select tile index of stencil as well as depth surface
-                                          ///  to make sure they share same tile config parameters
-        UINT_32 disallowLargeThickDegrade   : 1;    ///< Disallow large thick tile degrade
-        UINT_32 reserved             : 1; ///< Reserved bits
-    };
-
-    UINT_32 value;
-} ADDR_SURFACE_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_INFO_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeSurfaceInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    AddrTileMode        tileMode;           ///< Tile mode
-    AddrFormat          format;             ///< If format is set to valid one, bpp/width/height
-                                            ///  might be overwritten
-    UINT_32             bpp;                ///< Bits per pixel
-    UINT_32             numSamples;         ///< Number of samples
-    UINT_32             width;              ///< Width, in pixels
-    UINT_32             height;             ///< Height, in pixels
-    UINT_32             numSlices;          ///< Number of surface slices or depth
-    UINT_32             slice;              ///< Slice index
-    UINT_32             mipLevel;           ///< Current mipmap level
-    UINT_32             numMipLevels;       ///< Number of mips in mip chain
-    ADDR_SURFACE_FLAGS  flags;              ///< Surface type flags
-    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
-                                            ///  number of samples for normal AA; Set it to the
-                                            ///  number of fragments for EQAA
-    /// r800 and later HWL parameters
-    // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
-    ADDR_TILEINFO*      pTileInfo;          ///< 2D tile parameters. Set to 0 to default/calculate
-    AddrTileType        tileType;           ///< Micro tiling type, not needed when tileIndex != -1
-    INT_32              tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                            ///  while the global useTileIndex is set to 1
-    UINT_32             basePitch;          ///< Base level pitch in pixels, 0 means ignored, is a
-                                            ///  must for mip levels from SI+.
-                                            ///  Don't use pitch in blocks for compressed formats!
-    UINT_32             maxBaseAlign;       ///< Max base alignment request from client
-    UINT_32             pitchAlign;         ///< Pitch alignment request from client
-    UINT_32             heightAlign;        ///< Height alignment request from client
-} ADDR_COMPUTE_SURFACE_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_INFO_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeSurfInfo
-*   @note
-        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
-        Pixel: Original pixel
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    UINT_32         pitch;          ///< Pitch in elements (in blocks for compressed formats)
-    UINT_32         height;         ///< Height in elements (in blocks for compressed formats)
-    UINT_32         depth;          ///< Number of slice/depth
-    UINT_64         surfSize;       ///< Surface size in bytes
-    AddrTileMode    tileMode;       ///< Actual tile mode. May differ from that in input
-    UINT_32         baseAlign;      ///< Base address alignment
-    UINT_32         pitchAlign;     ///< Pitch alignment, in elements
-    UINT_32         heightAlign;    ///< Height alignment, in elements
-    UINT_32         depthAlign;     ///< Depth alignment, aligned to thickness, for 3d texture
-    UINT_32         bpp;            ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
-    UINT_32         pixelPitch;     ///< Pitch in original pixels
-    UINT_32         pixelHeight;    ///< Height in original pixels
-    UINT_32         pixelBits;      ///< Original bits per pixel, passed from input
-    UINT_64         sliceSize;      ///< Size of slice specified by input's slice
-                                    ///  The result is controlled by surface flags & createFlags
-                                    ///  By default this value equals to surfSize for volume
-    UINT_32         pitchTileMax;   ///< PITCH_TILE_MAX value for h/w register
-    UINT_32         heightTileMax;  ///< HEIGHT_TILE_MAX value for h/w register
-    UINT_32         sliceTileMax;   ///< SLICE_TILE_MAX value for h/w register
-
-    UINT_32         numSamples;     ///< Pass the effective numSamples processed in this call
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Filled in if 0 on input
-    AddrTileType    tileType;       ///< Micro tiling type, only valid when tileIndex != -1
-    INT_32          tileIndex;      ///< Tile index, MAY be "downgraded"
-
-    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-    /// Output flags
-    struct
-    {
-        /// Special information to work around SI mipmap swizzle bug UBTS #317508
-        UINT_32     last2DLevel  : 1;  ///< TRUE if this is the last 2D(3D) tiled
-                                       ///< Only meaningful when create flag checkLast2DLevel is set
-        UINT_32     tcCompatible : 1;  ///< If the surface can be shader compatible
-        UINT_32     dccUnsupport : 1;  ///< If the surface can support DCC compressed rendering
-        UINT_32     prtTileIndex : 1;  ///< SI only, indicate the returned tile index is for PRT
-                                       ///< If address lib return true for mip 0, client should set prt flag
-                                       ///< for child mips in subsequent compute surface info calls
-        UINT_32     reserved     :28;  ///< Reserved bits
-    };
-
-    UINT_32         equationIndex;     ///< Equation index in the equation table;
-
-    UINT_32         blockWidth;        ///< Width in element inside one block(1D->Micro, 2D->Macro)
-    UINT_32         blockHeight;       ///< Height in element inside one block(1D->Micro, 2D->Macro)
-    UINT_32         blockSlices;       ///< Slice number inside one block(1D->Micro, 2D->Macro)
-
-    /// Stereo info
-    ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
-
-    INT_32          stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set
-} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceInfo
-*
-*   @brief
-*       Compute surface width/height/depth/alignments and suitable tiling mode
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeSurfaceAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    UINT_32         x;                  ///< X coordinate
-    UINT_32         y;                  ///< Y coordinate
-    UINT_32         slice;              ///< Slice index
-    UINT_32         sample;             ///< Sample index, use fragment index for EQAA
-
-    UINT_32         bpp;                ///< Bits per pixel
-    UINT_32         pitch;              ///< Surface pitch, in pixels
-    UINT_32         height;             ///< Surface height, in pixels
-    UINT_32         numSlices;          ///< Surface depth
-    UINT_32         numSamples;         ///< Number of samples
-
-    AddrTileMode    tileMode;           ///< Tile mode
-    BOOL_32         isDepth;            ///< TRUE if the surface uses depth sample ordering within
-                                        ///  micro tile. Textures can also choose depth sample order
-    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
-                                        ///  the case that components are stored separately
-    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
-
-    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
-                                        ///  number of samples for normal AA; Set it to the
-                                        ///  number of fragments for EQAA
-    /// r800 and later HWL parameters
-    // Used for 1D tiling above
-    AddrTileType    tileType;           ///< See defintion of AddrTileType
-    struct
-    {
-        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
-                                        ///  only flag. Only non-RT texture can set this to TRUE
-        UINT_32     reserved :31;       ///< Reserved for future use.
-    };
-    // 2D tiling needs following structure
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
-    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    union
-    {
-        struct
-        {
-            UINT_32  bankSwizzle;       ///< Bank swizzle
-            UINT_32  pipeSwizzle;       ///< Pipe swizzle
-        };
-        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
-    };
-} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeSurfaceAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_64 addr;           ///< Byte address
-    UINT_32 bitPosition;    ///< Bit position within surfaceAddr, 0-7.
-                            ///  For surface bpp < 8, e.g. FMT_1.
-    UINT_32 prtBlockIndex;  ///< Index of a PRT tile (64K block)
-} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceAddrFromCoord
-*
-*   @brief
-*       Compute surface address from a given coordinate.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeSurfaceCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    UINT_64         addr;               ///< Address in bytes
-    UINT_32         bitPosition;        ///< Bit position in addr. 0-7. for surface bpp < 8,
-                                        ///  e.g. FMT_1;
-    UINT_32         bpp;                ///< Bits per pixel
-    UINT_32         pitch;              ///< Pitch, in pixels
-    UINT_32         height;             ///< Height in pixels
-    UINT_32         numSlices;          ///< Surface depth
-    UINT_32         numSamples;         ///< Number of samples
-
-    AddrTileMode    tileMode;           ///< Tile mode
-    BOOL_32         isDepth;            ///< Surface uses depth sample ordering within micro tile.
-                                        ///  Note: Textures can choose depth sample order as well.
-    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
-                                        ///  the case that components are stored separately
-    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
-
-    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
-                                        ///  number of samples for normal AA; Set it to the
-                                        ///  number of fragments for EQAA
-    /// r800 and later HWL parameters
-    // Used for 1D tiling above
-    AddrTileType    tileType;           ///< See defintion of AddrTileType
-    struct
-    {
-        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
-                                        ///  only flag. Only non-RT texture can set this to TRUE
-        UINT_32     reserved :31;       ///< Reserved for future use.
-    };
-    // 2D tiling needs following structure
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
-    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    union
-    {
-        struct
-        {
-            UINT_32  bankSwizzle;       ///< Bank swizzle
-            UINT_32  pipeSwizzle;       ///< Pipe swizzle
-        };
-        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
-    };
-} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeSurfaceCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
-{
-    UINT_32 size;   ///< Size of this structure in bytes
-
-    UINT_32 x;      ///< X coordinate
-    UINT_32 y;      ///< Y coordinate
-    UINT_32 slice;  ///< Index of slices
-    UINT_32 sample; ///< Index of samples, means fragment index for EQAA
-} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Compute coordinate from a given surface address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut);
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                   HTile functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR_HTILE_FLAGS
-*
-*   @brief
-*       HTILE flags
-****************************************************************************************************
-*/
-typedef union _ADDR_HTILE_FLAGS
-{
-    struct
-    {
-        UINT_32 tcCompatible          : 1;  ///< Flag indicates surface needs to be shader readable
-        UINT_32 skipTcCompatSizeAlign : 1;  ///< Flag indicates that addrLib will not align htile
-                                            ///  size to 256xBankxPipe when computing tc-compatible
-                                            ///  htile info.
-        UINT_32 reserved              : 30; ///< Reserved bits
-    };
-
-    UINT_32 value;
-} ADDR_HTILE_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_INFO_INPUT
-*
-*   @brief
-*       Input structure of AddrComputeHtileInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
-{
-    UINT_32            size;            ///< Size of this structure in bytes
-
-    ADDR_HTILE_FLAGS   flags;           ///< HTILE flags
-    UINT_32            pitch;           ///< Surface pitch, in pixels
-    UINT_32            height;          ///< Surface height, in pixels
-    UINT_32            numSlices;       ///< Number of slices
-    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
-    AddrHtileBlockSize blockWidth;      ///< 4 or 8. EG above only support 8
-    AddrHtileBlockSize blockHeight;     ///< 4 or 8. EG above only support 8
-    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
-
-    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_HTILE_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_INFO_OUTPUT
-*
-*   @brief
-*       Output structure of AddrComputeHtileInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
-{
-    UINT_32 size;               ///< Size of this structure in bytes
-
-    UINT_32 pitch;              ///< Pitch in pixels of depth buffer represented in this
-                                ///  HTile buffer. This might be larger than original depth
-                                ///  buffer pitch when called with an unaligned pitch.
-    UINT_32 height;             ///< Height in pixels, as above
-    UINT_64 htileBytes;         ///< Size of HTILE buffer, in bytes
-    UINT_32 baseAlign;          ///< Base alignment
-    UINT_32 bpp;                ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
-    UINT_32 macroWidth;         ///< Macro width in pixels, actually squared cache shape
-    UINT_32 macroHeight;        ///< Macro height in pixels
-    UINT_64 sliceSize;          ///< Slice size, in bytes.
-    BOOL_32 sliceInterleaved;   ///< Flag to indicate if different slice's htile is interleaved
-                                ///  Compute engine clear can't be used if htile is interleaved
-    BOOL_32 nextMipLevelCompressible;   ///< Flag to indicate whether HTILE can be enabled in
-                                        ///  next mip level, it also indicates if memory set based
-                                        ///  fast clear can be used for current mip level.
-} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileInfo
-*
-*   @brief
-*       Compute Htile pitch, height, base alignment and size in bytes
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,
-    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeHtileAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
-{
-    UINT_32            size;            ///< Size of this structure in bytes
-
-    UINT_32            pitch;           ///< Pitch, in pixels
-    UINT_32            height;          ///< Height in pixels
-    UINT_32            x;               ///< X coordinate
-    UINT_32            y;               ///< Y coordinate
-    UINT_32            slice;           ///< Index of slice
-    UINT_32            numSlices;       ///< Number of slices
-    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
-    ADDR_HTILE_FLAGS   flags;           ///< htile flags
-    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
-    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
-    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
-
-    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-    UINT_32            bpp;             ///< depth/stencil buffer bit per pixel size
-    UINT_32            zStencilAddr;    ///< tcCompatible Z/Stencil surface address
-} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeHtileAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_64 addr;           ///< Address in bytes
-    UINT_32 bitPosition;    ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
-                            ///  So we keep bitPosition for HTILE as well
-} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileAddrFromCoord
-*
-*   @brief
-*       Compute Htile address according to coordinates (of depth buffer)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,
-    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeHtileCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
-{
-    UINT_32            size;            ///< Size of this structure in bytes
-
-    UINT_64            addr;            ///< Address
-    UINT_32            bitPosition;     ///< Bit position 0 or 4. CMASK and HTILE share some methods
-                                        ///  so we keep bitPosition for HTILE as well
-    UINT_32            pitch;           ///< Pitch, in pixels
-    UINT_32            height;          ///< Height, in pixels
-    UINT_32            numSlices;       ///< Number of slices
-    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
-    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
-    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
-    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
-
-    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeHtileCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
-{
-    UINT_32 size;   ///< Size of this structure in bytes
-
-    UINT_32 x;      ///< X coordinate
-    UINT_32 y;      ///< Y coordinate
-    UINT_32 slice;  ///< Slice index
-} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeHtileCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
-*       Htile address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,
-    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     C-mask functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR_CMASK_FLAGS
-*
-*   @brief
-*       CMASK flags
-****************************************************************************************************
-*/
-typedef union _ADDR_CMASK_FLAGS
-{
-    struct
-    {
-        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
-        UINT_32 reserved      :31; ///< Reserved bits
-    };
-
-    UINT_32 value;
-} ADDR_CMASK_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_INFO_INPUT
-*
-*   @brief
-*       Input structure of AddrComputeCmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
-{
-    UINT_32             size;            ///< Size of this structure in bytes
-
-    ADDR_CMASK_FLAGS    flags;           ///< CMASK flags
-    UINT_32             pitch;           ///< Pitch, in pixels, of color buffer
-    UINT_32             height;          ///< Height, in pixels, of color buffer
-    UINT_32             numSlices;       ///< Number of slices, of color buffer
-    BOOL_32             isLinear;        ///< Linear or tiled layout, Only SI can be linear
-    ADDR_TILEINFO*      pTileInfo;       ///< Tile info
-
-    INT_32              tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
-                                         ///  while the global useTileIndex is set to 1
-    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                         ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_CMASK_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_INFO_OUTPUT
-*
-*   @brief
-*       Output structure of AddrComputeCmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_32 pitch;          ///< Pitch in pixels of color buffer which
-                            ///  this Cmask matches. The size might be larger than
-                            ///  original color buffer pitch when called with
-                            ///  an unaligned pitch.
-    UINT_32 height;         ///< Height in pixels, as above
-    UINT_64 cmaskBytes;     ///< Size in bytes of CMask buffer
-    UINT_32 baseAlign;      ///< Base alignment
-    UINT_32 blockMax;       ///< Cmask block size. Need this to set CB_COLORn_MASK register
-    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
-    UINT_32 macroHeight;    ///< Macro height in pixels
-    UINT_64 sliceSize;      ///< Slice size, in bytes.
-} ADDR_COMPUTE_CMASK_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskInfo
-*
-*   @brief
-*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
-*       info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,
-    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeCmaskAddrFromCoord
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
-{
-    UINT_32          size;           ///< Size of this structure in bytes
-    UINT_32          x;              ///< X coordinate
-    UINT_32          y;              ///< Y coordinate
-    UINT_64          fmaskAddr;      ///< Fmask addr for tc compatible Cmask
-    UINT_32          slice;          ///< Slice index
-    UINT_32          pitch;          ///< Pitch in pixels, of color buffer
-    UINT_32          height;         ///< Height in pixels, of color buffer
-    UINT_32          numSlices;      ///< Number of slices
-    UINT_32          bpp;
-    BOOL_32          isLinear;       ///< Linear or tiled layout, Only SI can be linear
-    ADDR_CMASK_FLAGS flags;          ///< CMASK flags
-    ADDR_TILEINFO*   pTileInfo;      ///< Tile info
-
-    INT_32           tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
-                                     ///< while the global useTileIndex is set to 1
-    INT_32           macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-                                     ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeCmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_64 addr;           ///< CMASK address in bytes
-    UINT_32 bitPosition;    ///< Bit position within addr, 0-7. CMASK is 4 bpp,
-                            ///  so the address may be located in bit 0 (0) or 4 (4)
-} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Compute Cmask address according to coordinates (of MSAA color buffer)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
-    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeCmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
-{
-    UINT_32        size;            ///< Size of this structure in bytes
-
-    UINT_64        addr;            ///< CMASK address in bytes
-    UINT_32        bitPosition;     ///< Bit position within addr, 0-7. CMASK is 4 bpp,
-                                    ///  so the address may be located in bit 0 (0) or 4 (4)
-    UINT_32        pitch;           ///< Pitch, in pixels
-    UINT_32        height;          ///< Height in pixels
-    UINT_32        numSlices;       ///< Number of slices
-    BOOL_32        isLinear;        ///< Linear or tiled layout, Only SI can be linear
-    ADDR_TILEINFO* pTileInfo;       ///< Tile info
-
-    INT_32         tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
-                                    ///  while the global useTileIndex is set to 1
-    INT_32         macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                    ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeCmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
-{
-    UINT_32 size;   ///< Size of this structure in bytes
-
-    UINT_32 x;      ///< X coordinate
-    UINT_32 y;      ///< Y coordinate
-    UINT_32 slice;  ///< Slice index
-} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
-*       Cmask address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,
-    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     F-mask functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_INFO_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeFmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    AddrTileMode    tileMode;           ///< Tile mode
-    UINT_32         pitch;              ///< Surface pitch, in pixels
-    UINT_32         height;             ///< Surface height, in pixels
-    UINT_32         numSlices;          ///< Number of slice/depth
-    UINT_32         numSamples;         ///< Number of samples
-    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
-                                        ///  number of samples for normal AA; Set it to the
-                                        ///  number of fragments for EQAA
-    /// r800 and later HWL parameters
-    struct
-    {
-        UINT_32 resolved:   1;          ///< TRUE if the surface is for resolved fmask, only used
-                                        ///  by H/W clients. S/W should always set it to FALSE.
-        UINT_32 reserved:  31;          ///< Reserved for future use.
-    };
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Clients must give valid data
-    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-} ADDR_COMPUTE_FMASK_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_INFO_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeFmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    UINT_32         pitch;          ///< Pitch of fmask in pixels
-    UINT_32         height;         ///< Height of fmask in pixels
-    UINT_32         numSlices;      ///< Slices of fmask
-    UINT_64         fmaskBytes;     ///< Size of fmask in bytes
-    UINT_32         baseAlign;      ///< Base address alignment
-    UINT_32         pitchAlign;     ///< Pitch alignment
-    UINT_32         heightAlign;    ///< Height alignment
-    UINT_32         bpp;            ///< Bits per pixel of FMASK is: number of bit planes
-    UINT_32         numSamples;     ///< Number of samples, used for dump, export this since input
-                                    ///  may be changed in 9xx and above
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Fmask can have different
-                                    ///  bank_height from color buffer
-    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
-                                    ///  while the global useTileIndex is set to 1
-    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-    UINT_64         sliceSize;      ///< Size of slice in bytes
-} ADDR_COMPUTE_FMASK_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskInfo
-*
-*   @brief
-*       Compute Fmask pitch/height/depth/alignments and size in bytes
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeFmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    UINT_32         x;                  ///< X coordinate
-    UINT_32         y;                  ///< Y coordinate
-    UINT_32         slice;              ///< Slice index
-    UINT_32         plane;              ///< Plane number
-    UINT_32         sample;             ///< Sample index (fragment index for EQAA)
-
-    UINT_32         pitch;              ///< Surface pitch, in pixels
-    UINT_32         height;             ///< Surface height, in pixels
-    UINT_32         numSamples;         ///< Number of samples
-    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
-                                        ///  number of samples for normal AA; Set it to the
-                                        ///  number of fragments for EQAA
-
-    AddrTileMode    tileMode;           ///< Tile mode
-    union
-    {
-        struct
-        {
-            UINT_32  bankSwizzle;       ///< Bank swizzle
-            UINT_32  pipeSwizzle;       ///< Pipe swizzle
-        };
-        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
-    };
-
-    /// r800 and later HWL parameters
-    struct
-    {
-        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by H/W clients
-        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
-        UINT_32 reserved:  30;          ///< Reserved for future use.
-    };
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Client must provide all data
-
-} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeFmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_64 addr;           ///< Fmask address
-    UINT_32 bitPosition;    ///< Bit position within fmaskAddr, 0-7.
-} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,
-    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for AddrComputeFmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    UINT_64         addr;               ///< Address
-    UINT_32         bitPosition;        ///< Bit position within addr, 0-7.
-
-    UINT_32         pitch;              ///< Pitch, in pixels
-    UINT_32         height;             ///< Height in pixels
-    UINT_32         numSamples;         ///< Number of samples
-    UINT_32         numFrags;           ///< Number of fragments
-    AddrTileMode    tileMode;           ///< Tile mode
-    union
-    {
-        struct
-        {
-            UINT_32  bankSwizzle;       ///< Bank swizzle
-            UINT_32  pipeSwizzle;       ///< Pipe swizzle
-        };
-        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
-    };
-
-    /// r800 and later HWL parameters
-    struct
-    {
-        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by HW components
-        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
-        UINT_32 reserved:  30;          ///< Reserved for future use.
-    };
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
-
-} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for AddrComputeFmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
-{
-    UINT_32 size;       ///< Size of this structure in bytes
-
-    UINT_32 x;          ///< X coordinate
-    UINT_32 y;          ///< Y coordinate
-    UINT_32 slice;      ///< Slice index
-    UINT_32 plane;      ///< Plane number
-    UINT_32 sample;     ///< Sample index (fragment index for EQAA)
-} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Compute FMASK coordinate from an given address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
-    ADDR_HANDLE                                     hLib,
-    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,
-    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                          Element/utility functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrGetVersion
-*
-*   @brief
-*       Get AddrLib version number
-****************************************************************************************************
-*/
-UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
-
-/**
-****************************************************************************************************
-*   AddrUseTileIndex
-*
-*   @brief
-*       Return TRUE if tileIndex is enabled in this address library
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);
-
-/**
-****************************************************************************************************
-*   AddrUseCombinedSwizzle
-*
-*   @brief
-*       Return TRUE if combined swizzle is enabled in this address library
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);
-
-/**
-****************************************************************************************************
-*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
-*
-*   @brief
-*       Input structure of AddrExtractBankPipeSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    UINT_32         base256b;       ///< Base256b value
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
-
-    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
-                                    ///  while the global useTileIndex is set to 1
-    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-                                    ///< README: When tileIndex is not -1, this must be valid
-} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
-*
-*   @brief
-*       Output structure of AddrExtractBankPipeSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_32 bankSwizzle;    ///< Bank swizzle
-    UINT_32 pipeSwizzle;    ///< Pipe swizzle
-} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrExtractBankPipeSwizzle
-*
-*   @brief
-*       Extract Bank and Pipe swizzle from base256b
-*   @return
-*       ADDR_OK if no error
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
-    ADDR_HANDLE                                 hLib,
-    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,
-    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
-*
-*   @brief
-*       Input structure of AddrCombineBankPipeSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    UINT_32         bankSwizzle;    ///< Bank swizzle
-    UINT_32         pipeSwizzle;    ///< Pipe swizzle
-    UINT_64         baseAddr;       ///< Base address (leave it zero for driver clients)
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
-
-    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
-                                    ///  while the global useTileIndex is set to 1
-    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-                                    ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
-*
-*   @brief
-*       Output structure of AddrCombineBankPipeSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_32 tileSwizzle;    ///< Combined swizzle
-} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrCombineBankPipeSwizzle
-*
-*   @brief
-*       Combine Bank and Pipe swizzle
-*   @return
-*       ADDR_OK if no error
-*   @note
-*       baseAddr here is full MCAddress instead of base256b
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
-    ADDR_HANDLE                                 hLib,
-    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
-    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SLICESWIZZLE_INPUT
-*
-*   @brief
-*       Input structure of AddrComputeSliceSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    AddrTileMode    tileMode;           ///< Tile Mode
-    UINT_32         baseSwizzle;        ///< Base tile swizzle
-    UINT_32         slice;              ///< Slice index
-    UINT_64         baseAddr;           ///< Base address, driver should leave it 0 in most cases
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Actually banks needed here!
-
-    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_SLICESWIZZLE_INPUT;
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
-*
-*   @brief
-*       Output structure of AddrComputeSliceSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
-{
-    UINT_32  size;           ///< Size of this structure in bytes
-
-    UINT_32  tileSwizzle;    ///< Recalculated tileSwizzle value
-} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeSliceSwizzle
-*
-*   @brief
-*       Extract Bank and Pipe swizzle from base256b
-*   @return
-*       ADDR_OK if no error
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
-    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut);
-
-
-/**
-****************************************************************************************************
-*   AddrSwizzleGenOption
-*
-*   @brief
-*       Which swizzle generating options: legacy or linear
-****************************************************************************************************
-*/
-typedef enum _AddrSwizzleGenOption
-{
-    ADDR_SWIZZLE_GEN_DEFAULT    = 0,    ///< As is in client driver implemention for swizzle
-    ADDR_SWIZZLE_GEN_LINEAR     = 1,    ///< Using a linear increment of swizzle
-} AddrSwizzleGenOption;
-
-/**
-****************************************************************************************************
-*   AddrSwizzleOption
-*
-*   @brief
-*       Controls how swizzle is generated
-****************************************************************************************************
-*/
-typedef union _ADDR_SWIZZLE_OPTION
-{
-    struct
-    {
-        UINT_32 genOption       : 1;    ///< The way swizzle is generated, see AddrSwizzleGenOption
-        UINT_32 reduceBankBit   : 1;    ///< TRUE if we need reduce swizzle bits
-        UINT_32 reserved        :30;    ///< Reserved bits
-    };
-
-    UINT_32 value;
-
-} ADDR_SWIZZLE_OPTION;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_BASE_SWIZZLE_INPUT
-*
-*   @brief
-*       Input structure of AddrComputeBaseSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
-{
-    UINT_32             size;           ///< Size of this structure in bytes
-
-    ADDR_SWIZZLE_OPTION option;         ///< Swizzle option
-    UINT_32             surfIndex;      ///< Index of this surface type
-    AddrTileMode        tileMode;       ///< Tile Mode
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*      pTileInfo;      ///< 2D tile parameters. Actually banks needed here!
-
-    INT_32              tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32              macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
-*
-*   @brief
-*       Output structure of AddrComputeBaseSwizzle
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_32 tileSwizzle;    ///< Combined swizzle
-} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeBaseSwizzle
-*
-*   @brief
-*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
-*   @return
-*       ADDR_OK if no error
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
-    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ELEM_GETEXPORTNORM_INPUT
-*
-*   @brief
-*       Input structure for ElemGetExportNorm
-*
-****************************************************************************************************
-*/
-typedef struct _ELEM_GETEXPORTNORM_INPUT
-{
-    UINT_32             size;       ///< Size of this structure in bytes
-
-    AddrColorFormat     format;     ///< Color buffer format; Client should use ColorFormat
-    AddrSurfaceNumber   num;        ///< Surface number type; Client should use NumberType
-    AddrSurfaceSwap     swap;       ///< Surface swap byte swap; Client should use SurfaceSwap
-    UINT_32             numSamples; ///< Number of samples
-} ELEM_GETEXPORTNORM_INPUT;
-
-/**
-****************************************************************************************************
-*  ElemGetExportNorm
-*
-*   @brief
-*       Helper function to check one format can be EXPORT_NUM, which is a register
-*       CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
-*       family
-*   @note
-*       The implementation is only for r600.
-*       00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
-*       clocks per export)
-*       01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
-*       clock per export)
-*
-****************************************************************************************************
-*/
-BOOL_32 ADDR_API ElemGetExportNorm(
-    ADDR_HANDLE                     hLib,
-    const ELEM_GETEXPORTNORM_INPUT* pIn);
-
-
-
-/**
-****************************************************************************************************
-*   ELEM_FLT32TODEPTHPIXEL_INPUT
-*
-*   @brief
-*       Input structure for addrFlt32ToDepthPixel
-*
-****************************************************************************************************
-*/
-typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    AddrDepthFormat format;         ///< Depth buffer format
-    ADDR_FLT_32     comps[2];       ///< Component values (Z/stencil)
-} ELEM_FLT32TODEPTHPIXEL_INPUT;
-
-/**
-****************************************************************************************************
-*   ELEM_FLT32TODEPTHPIXEL_INPUT
-*
-*   @brief
-*       Output structure for ElemFlt32ToDepthPixel
-*
-****************************************************************************************************
-*/
-typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
-{
-    UINT_32 size;           ///< Size of this structure in bytes
-
-    UINT_8* pPixel;         ///< Real depth value. Same data type as depth buffer.
-                            ///  Client must provide enough storage for this type.
-    UINT_32 depthBase;      ///< Tile base in bits for depth bits
-    UINT_32 stencilBase;    ///< Tile base in bits for stencil bits
-    UINT_32 depthBits;      ///< Bits for depth
-    UINT_32 stencilBits;    ///< Bits for stencil
-} ELEM_FLT32TODEPTHPIXEL_OUTPUT;
-
-/**
-****************************************************************************************************
-*   ElemFlt32ToDepthPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a depth/stencil pixel value
-*
-*   @return
-*       Return code
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
-    ADDR_HANDLE                         hLib,
-    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
-    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ELEM_FLT32TOCOLORPIXEL_INPUT
-*
-*   @brief
-*       Input structure for addrFlt32ToColorPixel
-*
-****************************************************************************************************
-*/
-typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
-{
-    UINT_32            size;           ///< Size of this structure in bytes
-
-    AddrColorFormat    format;         ///< Color buffer format
-    AddrSurfaceNumber  surfNum;        ///< Surface number
-    AddrSurfaceSwap    surfSwap;       ///< Surface swap
-    ADDR_FLT_32        comps[4];       ///< Component values (r/g/b/a)
-} ELEM_FLT32TOCOLORPIXEL_INPUT;
-
-/**
-****************************************************************************************************
-*   ELEM_FLT32TOCOLORPIXEL_INPUT
-*
-*   @brief
-*       Output structure for ElemFlt32ToColorPixel
-*
-****************************************************************************************************
-*/
-typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
-{
-    UINT_32 size;       ///< Size of this structure in bytes
-
-    UINT_8* pPixel;     ///< Real color value. Same data type as color buffer.
-                        ///  Client must provide enough storage for this type.
-} ELEM_FLT32TOCOLORPIXEL_OUTPUT;
-
-/**
-****************************************************************************************************
-*   ElemFlt32ToColorPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
-*
-*   @return
-*       Return code
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
-    ADDR_HANDLE                         hLib,
-    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
-    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut);
-
-
-/**
-****************************************************************************************************
-*   ADDR_CONVERT_TILEINFOTOHW_INPUT
-*
-*   @brief
-*       Input structure for AddrConvertTileInfoToHW
-*   @note
-*       When reverse is TRUE, indices are igonred
-****************************************************************************************************
-*/
-typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-    BOOL_32         reverse;            ///< Convert control flag.
-                                        ///  FALSE: convert from real value to HW value;
-                                        ///  TRUE: convert from HW value to real value.
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*  pTileInfo;          ///< Tile parameters with real value
-
-    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
-                                        ///  while the global useTileIndex is set to 1
-    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
-                                        ///< README: When tileIndex is not -1, this must be valid
-    UINT_32         bpp;                ///< Bits per pixel
-} ADDR_CONVERT_TILEINFOTOHW_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_CONVERT_TILEINFOTOHW_OUTPUT
-*
-*   @brief
-*       Output structure for AddrConvertTileInfoToHW
-****************************************************************************************************
-*/
-typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    /// r800 and later HWL parameters
-    ADDR_TILEINFO*      pTileInfo;          ///< Tile parameters with hardware register value
-
-} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrConvertTileInfoToHW
-*
-*   @brief
-*       Convert tile info from real value to hardware register value
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
-    ADDR_HANDLE                             hLib,
-    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,
-    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_CONVERT_TILEINDEX_INPUT
-*
-*   @brief
-*       Input structure for AddrConvertTileIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    INT_32          tileIndex;          ///< Tile index
-    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
-    UINT_32         bpp;                ///< Bits per pixel
-    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
-} ADDR_CONVERT_TILEINDEX_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_CONVERT_TILEINDEX_OUTPUT
-*
-*   @brief
-*       Output structure for AddrConvertTileIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
-{
-    UINT_32             size;           ///< Size of this structure in bytes
-
-    AddrTileMode        tileMode;       ///< Tile mode
-    AddrTileType        tileType;       ///< Tile type
-    ADDR_TILEINFO*      pTileInfo;      ///< Tile info
-
-} ADDR_CONVERT_TILEINDEX_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrConvertTileIndex
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
-    ADDR_HANDLE                         hLib,
-    const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
-    ADDR_CONVERT_TILEINDEX_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   ADDR_GET_MACROMODEINDEX_INPUT
-*
-*   @brief
-*       Input structure for AddrGetMacroModeIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_GET_MACROMODEINDEX_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    ADDR_SURFACE_FLAGS  flags;              ///< Surface flag
-    INT_32              tileIndex;          ///< Tile index
-    UINT_32             bpp;                ///< Bits per pixel
-    UINT_32             numFrags;           ///< Number of color fragments
-} ADDR_GET_MACROMODEINDEX_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_GET_MACROMODEINDEX_OUTPUT
-*
-*   @brief
-*       Output structure for AddrGetMacroModeIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_GET_MACROMODEINDEX_OUTPUT
-{
-    UINT_32             size;            ///< Size of this structure in bytes
-    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-} ADDR_GET_MACROMODEINDEX_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrGetMacroModeIndex
-*
-*   @brief
-*       Get macro mode index based on input parameters
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
-    ADDR_HANDLE                          hLib,
-    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
-    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   ADDR_CONVERT_TILEINDEX1_INPUT
-*
-*   @brief
-*       Input structure for AddrConvertTileIndex1 (without macro mode index)
-****************************************************************************************************
-*/
-typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
-{
-    UINT_32         size;               ///< Size of this structure in bytes
-
-    INT_32          tileIndex;          ///< Tile index
-    UINT_32         bpp;                ///< Bits per pixel
-    UINT_32         numSamples;         ///< Number of samples
-    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
-} ADDR_CONVERT_TILEINDEX1_INPUT;
-
-/**
-****************************************************************************************************
-*   AddrConvertTileIndex1
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
-    ADDR_HANDLE                             hLib,
-    const ADDR_CONVERT_TILEINDEX1_INPUT*    pIn,
-    ADDR_CONVERT_TILEINDEX_OUTPUT*          pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_GET_TILEINDEX_INPUT
-*
-*   @brief
-*       Input structure for AddrGetTileIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_GET_TILEINDEX_INPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    AddrTileMode    tileMode;       ///< Tile mode
-    AddrTileType    tileType;       ///< Tile-type: disp/non-disp/...
-    ADDR_TILEINFO*  pTileInfo;      ///< Pointer to tile-info structure, can be NULL for linear/1D
-} ADDR_GET_TILEINDEX_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_GET_TILEINDEX_OUTPUT
-*
-*   @brief
-*       Output structure for AddrGetTileIndex
-****************************************************************************************************
-*/
-typedef struct _ADDR_GET_TILEINDEX_OUTPUT
-{
-    UINT_32         size;           ///< Size of this structure in bytes
-
-    INT_32          index;          ///< index in table
-} ADDR_GET_TILEINDEX_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrGetTileIndex
-*
-*   @brief
-*       Get the tiling mode index in table
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
-    ADDR_HANDLE                     hLib,
-    const ADDR_GET_TILEINDEX_INPUT* pIn,
-    ADDR_GET_TILEINDEX_OUTPUT*      pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_PRT_INFO_INPUT
-*
-*   @brief
-*       Input structure for AddrComputePrtInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_PRT_INFO_INPUT
-{
-    AddrFormat          format;        ///< Surface format
-    UINT_32             baseMipWidth;  ///< Base mipmap width
-    UINT_32             baseMipHeight; ///< Base mipmap height
-    UINT_32             baseMipDepth;  ///< Base mipmap depth
-    UINT_32             numFrags;      ///< Number of fragments,
-} ADDR_PRT_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_PRT_INFO_OUTPUT
-*
-*   @brief
-*       Input structure for AddrComputePrtInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_PRT_INFO_OUTPUT
-{
-    UINT_32             prtTileWidth;
-    UINT_32             prtTileHeight;
-} ADDR_PRT_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputePrtInfo
-*
-*   @brief
-*       Compute prt surface related information
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
-    ADDR_HANDLE                 hLib,
-    const ADDR_PRT_INFO_INPUT*  pIn,
-    ADDR_PRT_INFO_OUTPUT*       pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     DCC key functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   _ADDR_COMPUTE_DCCINFO_INPUT
-*
-*   @brief
-*       Input structure of AddrComputeDccInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
-{
-    UINT_32             size;            ///< Size of this structure in bytes
-    UINT_32             bpp;             ///< BitPP of color surface
-    UINT_32             numSamples;      ///< Sample number of color surface
-    UINT_64             colorSurfSize;   ///< Size of color surface to which dcc key is bound
-    AddrTileMode        tileMode;        ///< Tile mode of color surface
-    ADDR_TILEINFO       tileInfo;        ///< Tile info of color surface
-    UINT_32             tileSwizzle;     ///< Tile swizzle
-    INT_32              tileIndex;       ///< Tile index of color surface,
-                                         ///< MUST be -1 if you don't want to use it
-                                         ///< while the global useTileIndex is set to 1
-    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
-                                         ///< README: When tileIndex is not -1, this must be valid
-} ADDR_COMPUTE_DCCINFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR_COMPUTE_DCCINFO_OUTPUT
-*
-*   @brief
-*       Output structure of AddrComputeDccInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
-{
-    UINT_32 size;                 ///< Size of this structure in bytes
-    UINT_32 dccRamBaseAlign;      ///< Base alignment of dcc key
-    UINT_64 dccRamSize;           ///< Size of dcc key
-    UINT_64 dccFastClearSize;     ///< Size of dcc key portion that can be fast cleared
-    BOOL_32 subLvlCompressible;   ///< Whether sub resource is compressiable
-    BOOL_32 dccRamSizeAligned;    ///< Whether the dcc key size is aligned
-} ADDR_COMPUTE_DCCINFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrComputeDccInfo
-*
-*   @brief
-*       Compute DCC key size, base alignment
-*       info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
-    ADDR_HANDLE                             hLib,
-    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,
-    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR_GET_MAX_ALINGMENTS_OUTPUT
-*
-*   @brief
-*       Output structure of AddrGetMaxAlignments
-****************************************************************************************************
-*/
-typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT
-{
-    UINT_32 size;                   ///< Size of this structure in bytes
-    UINT_32 baseAlign;              ///< Maximum base alignment in bytes
-} ADDR_GET_MAX_ALINGMENTS_OUTPUT;
-
-/**
-****************************************************************************************************
-*   AddrGetMaxAlignments
-*
-*   @brief
-*       Gets maximnum alignments
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
-    ADDR_HANDLE                     hLib,
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
-
-/**
-****************************************************************************************************
-*   AddrGetMaxMetaAlignments
-*
-*   @brief
-*       Gets maximnum alignments for metadata
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
-    ADDR_HANDLE                     hLib,
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
-
-/**
-****************************************************************************************************
-*                                Address library interface version 2
-*                                    available from Gfx9 hardware
-****************************************************************************************************
-*     Addr2ComputeSurfaceInfo()
-*     Addr2ComputeSurfaceAddrFromCoord()
-*     Addr2ComputeSurfaceCoordFromAddr()
-
-*     Addr2ComputeHtileInfo()
-*     Addr2ComputeHtileAddrFromCoord()
-*     Addr2ComputeHtileCoordFromAddr()
-*
-*     Addr2ComputeCmaskInfo()
-*     Addr2ComputeCmaskAddrFromCoord()
-*     Addr2ComputeCmaskCoordFromAddr()
-*
-*     Addr2ComputeFmaskInfo()
-*     Addr2ComputeFmaskAddrFromCoord()
-*     Addr2ComputeFmaskCoordFromAddr()
-*
-*     Addr2ComputeDccInfo()
-*
-**/
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                    Surface functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR2_SURFACE_FLAGS
-*
-*   @brief
-*       Surface flags
-****************************************************************************************************
-*/
-typedef union _ADDR2_SURFACE_FLAGS
-{
-    struct
-    {
-        UINT_32 color             :  1; ///< This resource is a color buffer, can be used with RTV
-        UINT_32 depth             :  1; ///< Thie resource is a depth buffer, can be used with DSV
-        UINT_32 stencil           :  1; ///< Thie resource is a stencil buffer, can be used with DSV
-        UINT_32 fmask             :  1; ///< This is an fmask surface
-        UINT_32 overlay           :  1; ///< This is an overlay surface
-        UINT_32 display           :  1; ///< This resource is displable, can be used with DRV
-        UINT_32 prt               :  1; ///< This is a partially resident texture
-        UINT_32 qbStereo          :  1; ///< This is a quad buffer stereo surface
-        UINT_32 interleaved       :  1; ///< Special flag for interleaved YUV surface padding
-        UINT_32 texture           :  1; ///< This resource can be used with SRV
-        UINT_32 unordered         :  1; ///< This resource can be used with UAV
-        UINT_32 rotated           :  1; ///< This resource is rotated and displable
-        UINT_32 needEquation      :  1; ///< This resource needs equation to be generated if possible
-        UINT_32 opt4space         :  1; ///< This resource should be optimized for space
-        UINT_32 minimizeAlign     :  1; ///< This resource should use minimum alignment
-        UINT_32 noMetadata        :  1; ///< This resource has no metadata
-        UINT_32 metaRbUnaligned   :  1; ///< This resource has rb unaligned metadata
-        UINT_32 metaPipeUnaligned :  1; ///< This resource has pipe unaligned metadata
-        UINT_32 reserved          : 14; ///< Reserved bits
-    };
-
-    UINT_32 value;
-} ADDR2_SURFACE_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_INFO_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeSurfaceInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT
-{
-    UINT_32               size;              ///< Size of this structure in bytes
-
-    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
-    AddrSwizzleMode       swizzleMode;       ///< Swizzle Mode for Gfx9
-    AddrResourceType      resourceType;      ///< Surface type
-    AddrFormat            format;            ///< Surface format
-    UINT_32               bpp;               ///< bits per pixel
-    UINT_32               width;             ///< Width (of mip0), in pixels
-    UINT_32               height;            ///< Height (of mip0), in pixels
-    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
-    UINT_32               numMipLevels;      ///< Total mipmap levels.
-    UINT_32               numSamples;        ///< Number of samples
-    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
-                                             ///  number of samples for normal AA; Set it to the
-                                             ///  number of fragments for EQAA
-    UINT_32               pitchInElement;    ///< Pitch in elements (blocks for compressed formats)
-    UINT_32               sliceAlign;        ///< Required slice size in bytes
-} ADDR2_COMPUTE_SURFACE_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_MIP_INFO
-*
-*   @brief
-*       Structure that contains information for mip level
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR2_MIP_INFO
-{
-    UINT_32             pitch;              ///< Pitch in elements
-    UINT_32             height;             ///< Padded height in elements
-    UINT_32             depth;              ///< Padded depth
-    UINT_32             pixelPitch;         ///< Pitch in pixels
-    UINT_32             pixelHeight;        ///< Padded height in pixels
-    UINT_32             equationIndex;      ///< Equation index in the equation table
-    UINT_64             offset;             ///< Offset in bytes from mip base, should only be used
-                                            ///< to setup vam surface descriptor, can't be used
-                                            ///< to setup swizzle pattern
-    UINT_64             macroBlockOffset;   ///< macro block offset in bytes from mip base
-    UINT_32             mipTailOffset;      ///< mip tail offset in bytes
-    UINT_32             mipTailCoordX;      ///< mip tail coord x
-    UINT_32             mipTailCoordY;      ///< mip tail coord y
-    UINT_32             mipTailCoordZ;      ///< mip tail coord z
-} ADDR2_MIP_INFO;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeSurfInfo
-*   @note
-        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
-        Pixel: Original pixel
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
-{
-    UINT_32             size;                 ///< Size of this structure in bytes
-
-    UINT_32             pitch;                ///< Pitch in elements (blocks for compressed formats)
-    UINT_32             height;               ///< Padded height (of mip0) in elements
-    UINT_32             numSlices;            ///< Padded depth for 3d resource
-                                              ///< or padded number of slices for 2d array resource
-    UINT_32             mipChainPitch;        ///< Pitch (of total mip chain) in elements
-    UINT_32             mipChainHeight;       ///< Padded height (of total mip chain) in elements
-    UINT_32             mipChainSlice;        ///< Padded depth (of total mip chain)
-    UINT_64             sliceSize;            ///< Slice (total mip chain) size in bytes
-    UINT_64             surfSize;             ///< Surface (total mip chain) size in bytes
-    UINT_32             baseAlign;            ///< Base address alignment
-    UINT_32             bpp;                  ///< Bits per elements
-                                              ///  (e.g. blocks for BCn, 1/3 for 96bit)
-    UINT_32             pixelMipChainPitch;   ///< Mip chain pitch in original pixels
-    UINT_32             pixelMipChainHeight;  ///< Mip chain height in original pixels
-    UINT_32             pixelPitch;           ///< Pitch in original pixels
-    UINT_32             pixelHeight;          ///< Height in original pixels
-    UINT_32             pixelBits;            ///< Original bits per pixel, passed from input
-
-    UINT_32             blockWidth;           ///< Width in element inside one block
-    UINT_32             blockHeight;          ///< Height in element inside one block
-    UINT_32             blockSlices;          ///< Slice number inside one block
-                                              ///< Prt tile is one block, its width/height/slice
-                                              ///< equals to blcok width/height/slice
-
-    BOOL_32             epitchIsHeight;       ///< Whether to use height to program epitch register
-    /// Stereo info
-    ADDR_QBSTEREOINFO*  pStereoInfo;          ///< Stereo info, needed if qbStereo flag is TRUE
-    /// Mip info
-    ADDR2_MIP_INFO*     pMipInfo;             ///< Pointer to mip information array
-                                              ///  if it is not NULL, the array is assumed to
-                                              ///  contain numMipLevels entries
-
-    UINT_32             equationIndex;        ///< Equation index in the equation table of mip0
-    BOOL_32             mipChainInTail;       ///< If whole mipchain falls into mip tail block
-    UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
-                                              ///  in tail, it will be set to number of mip levels
-} ADDR2_COMPUTE_SURFACE_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceInfo
-*
-*   @brief
-*       Compute surface width/height/slices/alignments and suitable tiling mode
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
-    ADDR_HANDLE                                hLib,
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeSurfaceAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
-{
-    UINT_32             size;            ///< Size of this structure in bytes
-
-    UINT_32             x;               ///< X coordinate
-    UINT_32             y;               ///< Y coordinate
-    UINT_32             slice;           ///< Slice index
-    UINT_32             sample;          ///< Sample index, use fragment index for EQAA
-    UINT_32             mipId;           ///< the mip ID in mip chain
-
-    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
-    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
-    AddrResourceType    resourceType;    ///< Surface type
-    UINT_32             bpp;             ///< Bits per pixel
-    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
-    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
-    UINT_32             numSlices;       ///< Surface original slices (of mip0)
-    UINT_32             numMipLevels;    ///< Total mipmap levels
-    UINT_32             numSamples;      ///< Number of samples
-    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
-                                         ///  number of samples for normal AA; Set it to the
-                                         ///  number of fragments for EQAA
-
-    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
-    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
-} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeSurfaceAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32    size;             ///< Size of this structure in bytes
-
-    UINT_64    addr;             ///< Byte address
-    UINT_32    bitPosition;      ///< Bit position within surfaceAddr, 0-7.
-                                 ///  For surface bpp < 8, e.g. FMT_1.
-    UINT_32    prtBlockIndex;    ///< Index of a PRT tile (64K block)
-} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceAddrFromCoord
-*
-*   @brief
-*       Compute surface address from a given coordinate.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
-    ADDR_HANDLE                                         hLib,
-    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,
-    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeSurfaceCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
-{
-    UINT_32             size;            ///< Size of this structure in bytes
-
-    UINT_64             addr;            ///< Address in bytes
-    UINT_32             bitPosition;     ///< Bit position in addr. 0-7. for surface bpp < 8,
-                                         ///  e.g. FMT_1;
-
-    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
-    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
-    AddrResourceType    resourceType;    ///< Surface type
-    UINT_32             bpp;             ///< Bits per pixel
-    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
-    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
-    UINT_32             numSlices;       ///< Surface original slices (of mip0)
-    UINT_32             numMipLevels;    ///< Total mipmap levels.
-    UINT_32             numSamples;      ///< Number of samples
-    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
-                                         ///  number of samples for normal AA; Set it to the
-                                         ///  number of fragments for EQAA
-
-    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
-    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
-} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeSurfaceCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
-{
-    UINT_32    size;       ///< Size of this structure in bytes
-
-    UINT_32    x;          ///< X coordinate
-    UINT_32    y;          ///< Y coordinate
-    UINT_32    slice;      ///< Index of slices
-    UINT_32    sample;     ///< Index of samples, means fragment index for EQAA
-    UINT_32    mipId;      ///< mipmap level id
-} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Compute coordinate from a given surface address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
-    ADDR_HANDLE                                         hLib,
-    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,
-    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                   HTile functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR2_META_FLAGS
-*
-*   @brief
-*       Metadata flags
-****************************************************************************************************
-*/
-typedef union _ADDR2_META_FLAGS
-{
-    struct
-    {
-        UINT_32 pipeAligned :  1;    ///< if Metadata being pipe aligned
-        UINT_32 rbAligned   :  1;    ///< if Metadata being RB aligned
-        UINT_32 linear      :  1;    ///< if Metadata linear, GFX9 does not suppord this!
-        UINT_32 reserved    : 29;    ///< Reserved bits
-    };
-
-    UINT_32 value;
-} ADDR2_META_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR2_META_MIP_INFO
-*
-*   @brief
-*       Structure to store per mip metadata information
-****************************************************************************************************
-*/
-typedef struct _ADDR2_META_MIP_INFO
-{
-    BOOL_32    inMiptail;
-    union
-    {
-        struct
-        {
-            UINT_32    startX;
-            UINT_32    startY;
-            UINT_32    startZ;
-            UINT_32    width;
-            UINT_32    height;
-            UINT_32    depth;
-        };
-
-        struct
-        {
-            UINT_32    offset;
-            UINT_32    sliceSize;
-        };
-    };
-} ADDR2_META_MIP_INFO;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_INFO_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputeHtileInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    ADDR2_META_FLAGS    hTileFlags;         ///< HTILE flags
-    ADDR2_SURFACE_FLAGS depthFlags;         ///< Depth surface flags
-    AddrSwizzleMode     swizzleMode;        ///< Depth surface swizzle mode
-    UINT_32             unalignedWidth;     ///< Depth surface original width (of mip0)
-    UINT_32             unalignedHeight;    ///< Depth surface original height (of mip0)
-    UINT_32             numSlices;          ///< Number of slices of depth surface (of mip0)
-    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
-    UINT_32             firstMipIdInTail;
-} ADDR2_COMPUTE_HTILE_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_INFO_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputeHtileInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_INFO_OUTPUT
-{
-    UINT_32    size;                ///< Size of this structure in bytes
-
-    UINT_32    pitch;               ///< Pitch in pixels of depth buffer represented in this
-                                    ///  HTile buffer. This might be larger than original depth
-                                    ///  buffer pitch when called with an unaligned pitch.
-    UINT_32    height;              ///< Height in pixels, as above
-    UINT_32    baseAlign;           ///< Base alignment
-    UINT_32    sliceSize;           ///< Slice size, in bytes.
-    UINT_32    htileBytes;          ///< Size of HTILE buffer, in bytes
-    UINT_32    metaBlkWidth;        ///< Meta block width
-    UINT_32    metaBlkHeight;       ///< Meta block height
-    UINT_32    metaBlkNumPerSlice;  ///< Number of metablock within one slice
-
-    ADDR2_META_MIP_INFO* pMipInfo;  ///< HTILE mip information
-} ADDR2_COMPUTE_HTILE_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileInfo
-*
-*   @brief
-*       Compute Htile pitch, height, base alignment and size in bytes
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
-    ADDR_HANDLE                              hLib,
-    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
-    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeHtileAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_32             x;                   ///< X coordinate
-    UINT_32             y;                   ///< Y coordinate
-    UINT_32             slice;               ///< Index of slices
-    UINT_32             mipId;               ///< mipmap level id
-
-    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
-    ADDR2_SURFACE_FLAGS depthflags;          ///< Depth surface flags
-    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
-    UINT_32             bpp;                 ///< Depth surface bits per pixel
-    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
-    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
-    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
-    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
-    UINT_32             numSamples;          ///< Depth surface number of samples
-    UINT_32             pipeXor;             ///< Pipe xor setting
-} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeHtileAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32    size;    ///< Size of this structure in bytes
-
-    UINT_64    addr;    ///< Address in bytes
-} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileAddrFromCoord
-*
-*   @brief
-*       Compute Htile address according to coordinates (of depth buffer)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
-    ADDR_HANDLE                                       hLib,
-    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,
-    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeHtileCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_64             addr;                ///< Address
-
-    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
-    ADDR2_SURFACE_FLAGS depthFlags;          ///< Depth surface flags
-    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
-    UINT_32             bpp;                 ///< Depth surface bits per pixel
-    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
-    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
-    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
-    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
-    UINT_32             numSamples;          ///< Depth surface number of samples
-    UINT_32             pipeXor;             ///< Pipe xor setting
-} ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeHtileCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
-{
-    UINT_32    size;        ///< Size of this structure in bytes
-
-    UINT_32    x;           ///< X coordinate
-    UINT_32    y;           ///< Y coordinate
-    UINT_32    slice;       ///< Index of slices
-    UINT_32    mipId;       ///< mipmap level id
-} ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeHtileCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
-*       Htile address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
-    ADDR_HANDLE                                       hLib,
-    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,
-    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     C-mask functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_INFO_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputeCmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASKINFO_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    ADDR2_META_FLAGS    cMaskFlags;         ///< CMASK flags
-    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
-    AddrResourceType    resourceType;       ///< Color surface type
-    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
-    UINT_32             unalignedWidth;     ///< Color surface original width
-    UINT_32             unalignedHeight;    ///< Color surface original height
-    UINT_32             numSlices;          ///< Number of slices of color buffer
-} ADDR2_COMPUTE_CMASK_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_INFO_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputeCmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASK_INFO_OUTPUT
-{
-    UINT_32    size;          ///< Size of this structure in bytes
-
-    UINT_32    pitch;         ///< Pitch in pixels of color buffer which
-                              ///  this Cmask matches. The size might be larger than
-                              ///  original color buffer pitch when called with
-                              ///  an unaligned pitch.
-    UINT_32    height;        ///< Height in pixels, as above
-    UINT_32    baseAlign;     ///< Base alignment
-    UINT_32    sliceSize;     ///< Slice size, in bytes.
-    UINT_32    cmaskBytes;    ///< Size in bytes of CMask buffer
-    UINT_32    metaBlkWidth;  ///< Meta block width
-    UINT_32    metaBlkHeight; ///< Meta block height
-
-    UINT_32    metaBlkNumPerSlice; ///< Number of metablock within one slice
-} ADDR2_COMPUTE_CMASK_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskInfo
-*
-*   @brief
-*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
-*       info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
-    ADDR_HANDLE                              hLib,
-    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
-    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeCmaskAddrFromCoord
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_32             x;                   ///< X coordinate
-    UINT_32             y;                   ///< Y coordinate
-    UINT_32             slice;               ///< Index of slices
-
-    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
-    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
-    AddrResourceType    resourceType;        ///< Color surface type
-    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode
-
-    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
-    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
-    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
-
-    UINT_32             numSamples;          ///< Color surfae sample number
-    UINT_32             numFrags;            ///< Color surface fragment number
-
-    UINT_32             pipeXor;             ///< pipe Xor setting
-} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeCmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32    size;           ///< Size of this structure in bytes
-
-    UINT_64    addr;           ///< CMASK address in bytes
-    UINT_32    bitPosition;    ///< Bit position within addr, 0 or 4
-} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Compute Cmask address according to coordinates (of MSAA color buffer)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
-    ADDR_HANDLE                                      hLib,
-    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
-    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeCmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_64             addr;                ///< CMASK address in bytes
-    UINT_32             bitPosition;         ///< Bit position within addr, 0 or 4
-
-    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
-    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
-    AddrResourceType    resourceType;        ///< Color surface type
-    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode
-
-    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
-    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
-    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
-    UINT_32             numMipLevels;        ///< Color surface total mipmap levels.
-} ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeCmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
-{
-    UINT_32    size;        ///< Size of this structure in bytes
-
-    UINT_32    x;           ///< X coordinate
-    UINT_32    y;           ///< Y coordinate
-    UINT_32    slice;       ///< Index of slices
-    UINT_32    mipId;       ///< mipmap level id
-} ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
-*       Cmask address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
-    ADDR_HANDLE                                       hLib,
-    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,
-    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     F-mask functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR2_FMASK_FLAGS
-*
-*   @brief
-*       FMASK flags
-****************************************************************************************************
-*/
-typedef union _ADDR2_FMASK_FLAGS
-{
-    struct
-    {
-        UINT_32 resolved :  1;    ///< TRUE if this is a resolved fmask, used by H/W clients
-                                  ///  by H/W clients. S/W should always set it to FALSE.
-        UINT_32 reserved : 31;    ///< Reserved for future use.
-    };
-
-    UINT_32 value;
-} ADDR2_FMASK_FLAGS;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_INFO_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeFmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_INFO_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
-    UINT_32             unalignedWidth;     ///< Color surface original width
-    UINT_32             unalignedHeight;    ///< Color surface original height
-    UINT_32             numSlices;          ///< Number of slices/depth
-    UINT_32             numSamples;         ///< Number of samples
-    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
-                                            ///  number of samples for normal AA; Set it to the
-                                            ///  number of fragments for EQAA
-    ADDR2_FMASK_FLAGS   fMaskFlags;         ///< FMASK flags
-} ADDR2_COMPUTE_FMASK_INFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_INFO_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeFmaskInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_INFO_OUTPUT
-{
-    UINT_32    size;           ///< Size of this structure in bytes
-
-    UINT_32    pitch;          ///< Pitch of fmask in pixels
-    UINT_32    height;         ///< Height of fmask in pixels
-    UINT_32    baseAlign;      ///< Base alignment
-    UINT_32    numSlices;      ///< Slices of fmask
-    UINT_32    fmaskBytes;     ///< Size of fmask in bytes
-    UINT_32    bpp;            ///< Bits per pixel of FMASK is: number of bit planes
-    UINT_32    numSamples;     ///< Number of samples
-    UINT_32    sliceSize;      ///< Size of slice in bytes
-} ADDR2_COMPUTE_FMASK_INFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskInfo
-*
-*   @brief
-*       Compute Fmask pitch/height/slices/alignments and size in bytes
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
-    ADDR_HANDLE                              hLib,
-    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,
-    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeFmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
-{
-    UINT_32            size;               ///< Size of this structure in bytes
-
-    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode
-    UINT_32            x;                  ///< X coordinate
-    UINT_32            y;                  ///< Y coordinate
-    UINT_32            slice;              ///< Slice index
-    UINT_32            sample;             ///< Sample index (fragment index for EQAA)
-    UINT_32            plane;              ///< Plane number
-
-    UINT_32            unalignedWidth;     ///< Color surface original width
-    UINT_32            unalignedHeight;    ///< Color surface original height
-    UINT_32            numSamples;         ///< Number of samples
-    UINT_32            numFrags;           ///< Number of fragments, leave it zero or the same as
-                                           ///  number of samples for normal AA; Set it to the
-                                           ///  number of fragments for EQAA
-    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation
-
-    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
-} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeFmaskAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32    size;           ///< Size of this structure in bytes
-
-    UINT_64    addr;           ///< Fmask address
-    UINT_32    bitPosition;    ///< Bit position within fmaskAddr, 0-7.
-} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
-    ADDR_HANDLE                                       hLib,
-    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,
-    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut);
-
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeFmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
-{
-    UINT_32            size;               ///< Size of this structure in bytes
-
-    UINT_64            addr;               ///< Address
-    UINT_32            bitPosition;        ///< Bit position within addr, 0-7.
-    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode
-
-    UINT_32            unalignedWidth;     ///< Color surface original width
-    UINT_32            unalignedHeight;    ///< Color surface original height
-    UINT_32            numSamples;         ///< Number of samples
-    UINT_32            numFrags;           ///< Number of fragments
-
-    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation
-
-    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
-} ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeFmaskCoordFromAddr
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
-{
-    UINT_32    size;      ///< Size of this structure in bytes
-
-    UINT_32    x;         ///< X coordinate
-    UINT_32    y;         ///< Y coordinate
-    UINT_32    slice;     ///< Slice index
-    UINT_32    sample;    ///< Sample index (fragment index for EQAA)
-    UINT_32    plane;     ///< Plane number
-} ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Compute FMASK coordinate from an given address
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
-    ADDR_HANDLE                                       hLib,
-    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,
-    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut);
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     DCC key functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   _ADDR2_COMPUTE_DCCINFO_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputeDccInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-
-    ADDR2_META_FLAGS    dccKeyFlags;        ///< DCC key flags
-    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
-    AddrResourceType    resourceType;       ///< Color surface type
-    AddrSwizzleMode     swizzleMode;        ///< Color surface swizzle mode
-    UINT_32             bpp;                ///< bits per pixel
-    UINT_32             unalignedWidth;     ///< Color surface original width (of mip0)
-    UINT_32             unalignedHeight;    ///< Color surface original height (of mip0)
-    UINT_32             numSlices;          ///< Number of slices, of color surface (of mip0)
-    UINT_32             numFrags;           ///< Fragment number of color surface
-    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
-    UINT_32             dataSurfaceSize;    ///< The padded size of all slices and mip levels
-                                            ///< useful in meta linear case
-    UINT_32             firstMipIdInTail;
-} ADDR2_COMPUTE_DCCINFO_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_DCCINFO_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputeDccInfo
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT
-{
-    UINT_32    size;               ///< Size of this structure in bytes
-
-    UINT_32    dccRamBaseAlign;    ///< Base alignment of dcc key
-    UINT_32    dccRamSize;         ///< Size of dcc key
-
-    UINT_32    pitch;              ///< DCC surface mip chain pitch
-    UINT_32    height;             ///< DCC surface mip chain height
-    UINT_32    depth;              ///< DCC surface mip chain depth
-
-    UINT_32    compressBlkWidth;   ///< DCC compress block width
-    UINT_32    compressBlkHeight;  ///< DCC compress block height
-    UINT_32    compressBlkDepth;   ///< DCC compress block depth
-
-    UINT_32    metaBlkWidth;       ///< DCC meta block width
-    UINT_32    metaBlkHeight;      ///< DCC meta block height
-    UINT_32    metaBlkDepth;       ///< DCC meta block depth
-
-    UINT_32    metaBlkNumPerSlice; ///< Number of metablock within one slice
-
-    union
-    {
-        UINT_32 fastClearSizePerSlice;  ///< Size of DCC within a slice should be fast cleared
-        UINT_32 dccRamSliceSize;
-    };
-
-    ADDR2_META_MIP_INFO* pMipInfo;      ///< DCC mip information
-} ADDR2_COMPUTE_DCCINFO_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeDccInfo
-*
-*   @brief
-*       Compute DCC key size, base alignment
-*       info
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
-    ADDR_HANDLE                           hLib,
-    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
-    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut);
-
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
-*
-*   @brief
-*       Input structure for Addr2ComputeDccAddrFromCoord
-*
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
-{
-    UINT_32             size;                ///< Size of this structure in bytes
-
-    UINT_32             x;                   ///< X coordinate
-    UINT_32             y;                   ///< Y coordinate
-    UINT_32             slice;               ///< Index of slices
-    UINT_32             sample;              ///< Index of samples, means fragment index for EQAA
-    UINT_32             mipId;               ///< mipmap level id
-
-    ADDR2_META_FLAGS    dccKeyFlags;         ///< DCC flags
-    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
-    AddrResourceType    resourceType;        ///< Color surface type
-    AddrSwizzleMode     swizzleMode;         ///< Color surface swizzle mode
-    UINT_32             bpp;                 ///< Color surface bits per pixel
-    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
-    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
-    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
-    UINT_32             numMipLevels;        ///< Color surface mipmap levels
-    UINT_32             numFrags;            ///< Color surface fragment number
-
-    UINT_32             pipeXor;             ///< pipe Xor setting
-} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
-*
-*   @brief
-*       Output structure for Addr2ComputeDccAddrFromCoord
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
-{
-    UINT_32    size;           ///< Size of this structure in bytes
-
-    UINT_64    addr;           ///< DCC address in bytes
-} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeDccAddrFromCoord
-*
-*   @brief
-*       Compute DCC address according to coordinates (of MSAA color buffer)
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
-    ADDR_HANDLE                                    hLib,
-    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*   pIn,
-    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*        pOut);
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                     Misc functions for Gfx9
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_PIPEBANKXOR_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputePipebankXor
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    UINT_32             surfIndex;          ///< Input surface index
-    ADDR2_SURFACE_FLAGS flags;              ///< Surface flag
-    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
-    AddrResourceType    resourceType;       ///< Surface resource type
-    AddrFormat          format;             ///< Surface format
-    UINT_32             numSamples;         ///< Number of samples
-    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
-                                            ///  number of samples for normal AA; Set it to the
-                                            ///  number of fragments for EQAA
-} ADDR2_COMPUTE_PIPEBANKXOR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputePipebankXor
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    UINT_32             pipeBankXor;        ///< Pipe bank xor
-} ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputePipeBankXor
-*
-*   @brief
-*       Calculate a valid bank pipe xor value for client to use.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
-    ADDR_HANDLE                            hLib,
-    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputeSlicePipeBankXor
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
-    AddrResourceType    resourceType;       ///< Surface resource type
-    UINT_32             basePipeBankXor;    ///< Base pipe bank xor
-    UINT_32             slice;              ///< Slice id
-    UINT_32             numSamples;         ///< Number of samples
-} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputeSlicePipeBankXor
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    UINT_32             pipeBankXor;        ///< Pipe bank xor
-} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSlicePipeBankXor
-*
-*   @brief
-*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
-    ADDR_HANDLE                                  hLib,
-    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
-*
-*   @brief
-*       Input structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
-    AddrResourceType    resourceType;       ///< Surface resource type
-    UINT_32             pipeBankXor;        ///< Per resource xor
-    UINT_32             slice;              ///< Slice id
-    UINT_64             sliceSize;          ///< Slice size of a mip chain
-    UINT_64             macroBlockOffset;   ///< Macro block offset, returned in ADDR2_MIP_INFO
-    UINT_32             mipTailOffset;      ///< Mip tail offset, returned in ADDR2_MIP_INFO
-} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
-****************************************************************************************************
-*/
-typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
-{
-    UINT_32             size;               ///< Size of this structure in bytes
-    UINT_64             offset;             ///< offset
-} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2ComputeSubResourceOffsetForSwizzlePattern
-*
-*   @brief
-*       Calculate sub resource offset to support swizzle pattern.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
-    ADDR_HANDLE                                                     hLib,
-    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   ADDR2_BLOCK_SET
-*
-*   @brief
-*       Bit field that defines block type
-****************************************************************************************************
-*/
-typedef union _ADDR2_BLOCK_SET
-{
-    struct
-    {
-        UINT_32 micro       : 1;   // 256B block for 2D resource
-        UINT_32 macro4KB    : 1;   // 4KB for 2D/3D resource
-        UINT_32 macro64KB   : 1;   // 64KB for 2D/3D resource
-        UINT_32 var         : 1;   // VAR block
-        UINT_32 linear      : 1;   // Linear block
-        UINT_32 reserved    : 27;
-    };
-
-    UINT_32 value;
-} ADDR2_BLOCK_SET;
-
-/**
-****************************************************************************************************
-*   ADDR2_SWTYPE_SET
-*
-*   @brief
-*       Bit field that defines swizzle type
-****************************************************************************************************
-*/
-typedef union _ADDR2_SWTYPE_SET
-{
-    struct
-    {
-        UINT_32 sw_Z     : 1;   // SW_*_Z_*
-        UINT_32 sw_S     : 1;   // SW_*_S_*
-        UINT_32 sw_D     : 1;   // SW_*_D_*
-        UINT_32 sw_R     : 1;   // SW_*_R_*
-        UINT_32 reserved : 28;
-    };
-
-    UINT_32 value;
-} ADDR2_SWTYPE_SET;
-
-/**
-****************************************************************************************************
-*   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
-*
-*   @brief
-*       Input structure of Addr2GetPreferredSurfaceSetting
-****************************************************************************************************
-*/
-typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
-{
-    UINT_32               size;              ///< Size of this structure in bytes
-
-    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
-    AddrResourceType      resourceType;      ///< Surface type
-    AddrFormat            format;            ///< Surface format
-    AddrResrouceLocation  resourceLoction;   ///< Surface heap choice
-    ADDR2_BLOCK_SET       forbiddenBlock;    ///< Client can use it to disable some block setting
-                                             ///< such as linear for DXTn, tiled for YUV
-    ADDR2_SWTYPE_SET      preferredSwSet;    ///< Client can use it to specify sw type(s) wanted
-    BOOL_32               noXor;             ///< Do not use xor mode for this resource
-    UINT_32               bpp;               ///< bits per pixel
-    UINT_32               width;             ///< Width (of mip0), in pixels
-    UINT_32               height;            ///< Height (of mip0), in pixels
-    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
-    UINT_32               numMipLevels;      ///< Total mipmap levels.
-    UINT_32               numSamples;        ///< Number of samples
-    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
-                                             ///  number of samples for normal AA; Set it to the
-                                             ///  number of fragments for EQAA
-    UINT_32               maxAlign;          ///< maximum base/size alignment requested by client
-    UINT_32               minSizeAlign;      ///< memory allocated for surface in client driver will
-                                             ///  be padded to multiple of this value (in bytes)
-} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT;
-
-/**
-****************************************************************************************************
-*   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
-*
-*   @brief
-*       Output structure of Addr2GetPreferredSurfaceSetting
-****************************************************************************************************
-*/
-typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
-{
-    UINT_32               size;                 ///< Size of this structure in bytes
-
-    AddrSwizzleMode       swizzleMode;          ///< Suggested swizzle mode to be used
-    AddrResourceType      resourceType;         ///< Suggested resource type to program HW
-    ADDR2_BLOCK_SET       validBlockSet;        ///< Valid block type bit conbination
-    BOOL_32               canXor;               ///< If client can use xor on a valid macro block
-                                                ///  type
-    ADDR2_SWTYPE_SET      validSwTypeSet;       ///< Valid swizzle type bit combination
-    ADDR2_SWTYPE_SET      clientPreferredSwSet; ///< Client-preferred swizzle type bit combination
-} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT;
-
-/**
-****************************************************************************************************
-*   Addr2GetPreferredSurfaceSetting
-*
-*   @brief
-*       Suggest a preferred setting for client driver to program HW register
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
-    ADDR_HANDLE                                   hLib,
-    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut);
-
-/**
-****************************************************************************************************
-*   Addr2IsValidDisplaySwizzleMode
-*
-*   @brief
-*       Return whether the swizzle mode is supported by DCE / DCN.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
-    ADDR_HANDLE     hLib,
-    AddrSwizzleMode swizzleMode,
-    UINT_32         bpp,
-    bool            *result);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif // __ADDR_INTERFACE_H__
diff -Nru mesa-18.3.3/src/amd/addrlib/addrtypes.h mesa-19.0.1/src/amd/addrlib/addrtypes.h
--- mesa-18.3.3/src/amd/addrlib/addrtypes.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/addrtypes.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,749 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrtypes.h
-* @brief Contains the helper function and constants
-****************************************************************************************************
-*/
-#ifndef __ADDR_TYPES_H__
-#define __ADDR_TYPES_H__
-
-#if defined(__APPLE__) && !defined(HAVE_TSERVER)
-// External definitions header maintained by Apple driver team, but not for diag team under Mac.
-// Helps address compilation issues & reduces code covered by NDA
-#include "addrExtDef.h"
-
-#else
-
-// Windows and/or Linux
-#if !defined(VOID)
-typedef void           VOID;
-#endif
-
-#if !defined(FLOAT)
-typedef float          FLOAT;
-#endif
-
-#if !defined(CHAR)
-typedef char           CHAR;
-#endif
-
-#if !defined(INT)
-typedef int            INT;
-#endif
-
-#include <stdarg.h> // va_list...etc need this header
-
-#endif // defined (__APPLE__) && !defined(HAVE_TSERVER)
-
-/**
-****************************************************************************************************
-*   Calling conventions
-****************************************************************************************************
-*/
-#ifndef ADDR_CDECL
-    #if defined(__GNUC__)
-        #define ADDR_CDECL __attribute__((cdecl))
-    #else
-        #define ADDR_CDECL __cdecl
-    #endif
-#endif
-
-#ifndef ADDR_STDCALL
-    #if defined(__GNUC__)
-        #if defined(__amd64__) || defined(__x86_64__)
-            #define ADDR_STDCALL
-        #else
-            #define ADDR_STDCALL __attribute__((stdcall))
-        #endif
-    #else
-        #define ADDR_STDCALL __stdcall
-    #endif
-#endif
-
-#ifndef ADDR_FASTCALL
-    #if defined(BRAHMA_ARM)
-        #define ADDR_FASTCALL
-    #elif defined(__GNUC__)
-        #if defined(__i386__)
-            #define ADDR_FASTCALL __attribute__((regparm(0)))
-        #else
-            #define ADDR_FASTCALL
-        #endif
-    #else
-        #define ADDR_FASTCALL __fastcall
-    #endif
-#endif
-
-#ifndef GC_CDECL
-    #define GC_CDECL  ADDR_CDECL
-#endif
-
-#ifndef GC_STDCALL
-    #define GC_STDCALL  ADDR_STDCALL
-#endif
-
-#ifndef GC_FASTCALL
-    #define GC_FASTCALL  ADDR_FASTCALL
-#endif
-
-
-#if defined(__GNUC__)
-    #define ADDR_INLINE static inline   // inline needs to be static to link
-#else
-    // win32, win64, other platforms
-    #define ADDR_INLINE   __inline
-#endif // #if defined(__GNUC__)
-
-#define ADDR_API ADDR_FASTCALL //default call convention is fast call
-
-/**
-****************************************************************************************************
-* Global defines used by other modules
-****************************************************************************************************
-*/
-#if !defined(TILEINDEX_INVALID)
-#define TILEINDEX_INVALID                -1
-#endif
-
-#if !defined(TILEINDEX_LINEAR_GENERAL)
-#define TILEINDEX_LINEAR_GENERAL         -2
-#endif
-
-#if !defined(TILEINDEX_LINEAR_ALIGNED)
-#define TILEINDEX_LINEAR_ALIGNED          8
-#endif
-
-/**
-****************************************************************************************************
-* Return codes
-****************************************************************************************************
-*/
-typedef enum _ADDR_E_RETURNCODE
-{
-    // General Return
-    ADDR_OK    = 0,
-    ADDR_ERROR = 1,
-
-    // Specific Errors
-    ADDR_OUTOFMEMORY,
-    ADDR_INVALIDPARAMS,
-    ADDR_NOTSUPPORTED,
-    ADDR_NOTIMPLEMENTED,
-    ADDR_PARAMSIZEMISMATCH,
-    ADDR_INVALIDGBREGVALUES,
-
-} ADDR_E_RETURNCODE;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define tile modes for all H/W
-* @note
-*   R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
-*   ADDR_TM_2D_TILED_XTHICK
-*
-****************************************************************************************************
-*/
-typedef enum _AddrTileMode
-{
-    ADDR_TM_LINEAR_GENERAL      = 0,    ///< Least restrictions, pitch: multiple of 8 if not buffer
-    ADDR_TM_LINEAR_ALIGNED      = 1,    ///< Requests pitch or slice to be multiple of 64 pixels
-    ADDR_TM_1D_TILED_THIN1      = 2,    ///< Linear array of 8x8 tiles
-    ADDR_TM_1D_TILED_THICK      = 3,    ///< Linear array of 8x8x4 tiles
-    ADDR_TM_2D_TILED_THIN1      = 4,    ///< A set of macro tiles consist of 8x8 tiles
-    ADDR_TM_2D_TILED_THIN2      = 5,    ///< 600 HWL only, macro tile ratio is 1:4
-    ADDR_TM_2D_TILED_THIN4      = 6,    ///< 600 HWL only, macro tile ratio is 1:16
-    ADDR_TM_2D_TILED_THICK      = 7,    ///< A set of macro tiles consist of 8x8x4 tiles
-    ADDR_TM_2B_TILED_THIN1      = 8,    ///< 600 HWL only, with bank swap
-    ADDR_TM_2B_TILED_THIN2      = 9,    ///< 600 HWL only, with bank swap and ratio is 1:4
-    ADDR_TM_2B_TILED_THIN4      = 10,   ///< 600 HWL only, with bank swap and ratio is 1:16
-    ADDR_TM_2B_TILED_THICK      = 11,   ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
-    ADDR_TM_3D_TILED_THIN1      = 12,   ///< Macro tiling w/ pipe rotation between slices
-    ADDR_TM_3D_TILED_THICK      = 13,   ///< Macro tiling w/ pipe rotation bwtween slices, thick
-    ADDR_TM_3B_TILED_THIN1      = 14,   ///< 600 HWL only, with bank swap
-    ADDR_TM_3B_TILED_THICK      = 15,   ///< 600 HWL only, with bank swap, thick
-    ADDR_TM_2D_TILED_XTHICK     = 16,   ///< Tile is 8x8x8, valid from NI
-    ADDR_TM_3D_TILED_XTHICK     = 17,   ///< Tile is 8x8x8, valid from NI
-    ADDR_TM_POWER_SAVE          = 18,   ///< Power save mode, only used by KMD on NI
-    ADDR_TM_PRT_TILED_THIN1     = 19,   ///< No bank/pipe rotation or hashing beyond macrotile size
-    ADDR_TM_PRT_2D_TILED_THIN1  = 20,   ///< Same as 2D_TILED_THIN1, PRT only
-    ADDR_TM_PRT_3D_TILED_THIN1  = 21,   ///< Same as 3D_TILED_THIN1, PRT only
-    ADDR_TM_PRT_TILED_THICK     = 22,   ///< No bank/pipe rotation or hashing beyond macrotile size
-    ADDR_TM_PRT_2D_TILED_THICK  = 23,   ///< Same as 2D_TILED_THICK, PRT only
-    ADDR_TM_PRT_3D_TILED_THICK  = 24,   ///< Same as 3D_TILED_THICK, PRT only
-    ADDR_TM_UNKNOWN             = 25,   ///< Unkown tile mode, should be decided by address lib
-    ADDR_TM_COUNT               = 26,   ///< Must be the value of the last tile mode
-} AddrTileMode;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define swizzle modes for Gfx9 ASIC
-* @note
-*
-*   ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce
-*   ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce
-*   ADDR_SW_4KB_*  addressing block aligned size is 4KB, for 2D/3D resouce
-*   ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce
-*   ADDR_SW_VAR_*  addressing block aligned size is ASIC specific, for 2D/3D resouce
-*
-*   ADDR_SW_*_Z    For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask
-                   For 3D resouce, represents a swizzle mode similar to legacy thick tile mode
-*   ADDR_SW_*_S    represents standard swizzle mode defined by MS
-*   ADDR_SW_*_D    For 2D resouce, represents a swizzle mode for displayable resource
-*                  For 3D resouce, represents a swizzle mode which places each slice in order & pixel
-                   within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
-*   ADDR_SW_*_R    For 2D resouce only, represents a swizzle mode for rotated displayable resource
-*
-****************************************************************************************************
-*/
-typedef enum _AddrSwizzleMode
-{
-    ADDR_SW_LINEAR          = 0,
-    ADDR_SW_256B_S          = 1,
-    ADDR_SW_256B_D          = 2,
-    ADDR_SW_256B_R          = 3,
-    ADDR_SW_4KB_Z           = 4,
-    ADDR_SW_4KB_S           = 5,
-    ADDR_SW_4KB_D           = 6,
-    ADDR_SW_4KB_R           = 7,
-    ADDR_SW_64KB_Z          = 8,
-    ADDR_SW_64KB_S          = 9,
-    ADDR_SW_64KB_D          = 10,
-    ADDR_SW_64KB_R          = 11,
-    ADDR_SW_VAR_Z           = 12,
-    ADDR_SW_VAR_S           = 13,
-    ADDR_SW_VAR_D           = 14,
-    ADDR_SW_VAR_R           = 15,
-    ADDR_SW_64KB_Z_T        = 16,
-    ADDR_SW_64KB_S_T        = 17,
-    ADDR_SW_64KB_D_T        = 18,
-    ADDR_SW_64KB_R_T        = 19,
-    ADDR_SW_4KB_Z_X         = 20,
-    ADDR_SW_4KB_S_X         = 21,
-    ADDR_SW_4KB_D_X         = 22,
-    ADDR_SW_4KB_R_X         = 23,
-    ADDR_SW_64KB_Z_X        = 24,
-    ADDR_SW_64KB_S_X        = 25,
-    ADDR_SW_64KB_D_X        = 26,
-    ADDR_SW_64KB_R_X        = 27,
-    ADDR_SW_VAR_Z_X         = 28,
-    ADDR_SW_VAR_S_X         = 29,
-    ADDR_SW_VAR_D_X         = 30,
-    ADDR_SW_VAR_R_X         = 31,
-    ADDR_SW_LINEAR_GENERAL  = 32,
-    ADDR_SW_MAX_TYPE        = 33,
-
-    // Used for represent block with identical size
-    ADDR_SW_256B            = ADDR_SW_256B_S,
-    ADDR_SW_4KB             = ADDR_SW_4KB_S_X,
-    ADDR_SW_64KB            = ADDR_SW_64KB_S_X,
-    ADDR_SW_VAR             = ADDR_SW_VAR_S_X,
-} AddrSwizzleMode;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define image type
-* @note
-*   this is new for address library interface version 2
-*
-****************************************************************************************************
-*/
-typedef enum _AddrResourceType
-{
-    ADDR_RSRC_TEX_1D = 0,
-    ADDR_RSRC_TEX_2D = 1,
-    ADDR_RSRC_TEX_3D = 2,
-    ADDR_RSRC_MAX_TYPE = 3,
-} AddrResourceType;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define resource heap location
-* @note
-*   this is new for address library interface version 2
-*
-****************************************************************************************************
-*/
-typedef enum _AddrResrouceLocation
-{
-    ADDR_RSRC_LOC_UNDEF  = 0,   // Resource heap is undefined/unknown
-    ADDR_RSRC_LOC_LOCAL  = 1,   // CPU visable and CPU invisable local heap
-    ADDR_RSRC_LOC_USWC   = 2,   // CPU write-combined non-cached nonlocal heap
-    ADDR_RSRC_LOC_CACHED = 3,   // CPU cached nonlocal heap
-    ADDR_RSRC_LOC_INVIS  = 4,   // CPU invisable local heap only
-    ADDR_RSRC_LOC_MAX_TYPE = 5,
-} AddrResrouceLocation;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define resource basic swizzle mode
-* @note
-*   this is new for address library interface version 2
-*
-****************************************************************************************************
-*/
-typedef enum _AddrSwType
-{
-    ADDR_SW_Z  = 0,   // Resource basic swizzle mode is ZOrder
-    ADDR_SW_S  = 1,   // Resource basic swizzle mode is Standard
-    ADDR_SW_D  = 2,   // Resource basic swizzle mode is Display
-    ADDR_SW_R  = 3,   // Resource basic swizzle mode is Rotated
-} AddrSwType;
-
-/**
-****************************************************************************************************
-* @brief
-*   Neutral enums that define mipmap major mode
-* @note
-*   this is new for address library interface version 2
-*
-****************************************************************************************************
-*/
-typedef enum _AddrMajorMode
-{
-    ADDR_MAJOR_X = 0,
-    ADDR_MAJOR_Y = 1,
-    ADDR_MAJOR_Z = 2,
-    ADDR_MAJOR_MAX_TYPE = 3,
-} AddrMajorMode;
-
-/**
-****************************************************************************************************
-*   AddrFormat
-*
-*   @brief
-*       Neutral enum for SurfaceFormat
-*
-****************************************************************************************************
-*/
-typedef enum _AddrFormat {
-    ADDR_FMT_INVALID                              = 0x00000000,
-    ADDR_FMT_8                                    = 0x00000001,
-    ADDR_FMT_4_4                                  = 0x00000002,
-    ADDR_FMT_3_3_2                                = 0x00000003,
-    ADDR_FMT_RESERVED_4                           = 0x00000004,
-    ADDR_FMT_16                                   = 0x00000005,
-    ADDR_FMT_16_FLOAT                             = 0x00000006,
-    ADDR_FMT_8_8                                  = 0x00000007,
-    ADDR_FMT_5_6_5                                = 0x00000008,
-    ADDR_FMT_6_5_5                                = 0x00000009,
-    ADDR_FMT_1_5_5_5                              = 0x0000000a,
-    ADDR_FMT_4_4_4_4                              = 0x0000000b,
-    ADDR_FMT_5_5_5_1                              = 0x0000000c,
-    ADDR_FMT_32                                   = 0x0000000d,
-    ADDR_FMT_32_FLOAT                             = 0x0000000e,
-    ADDR_FMT_16_16                                = 0x0000000f,
-    ADDR_FMT_16_16_FLOAT                          = 0x00000010,
-    ADDR_FMT_8_24                                 = 0x00000011,
-    ADDR_FMT_8_24_FLOAT                           = 0x00000012,
-    ADDR_FMT_24_8                                 = 0x00000013,
-    ADDR_FMT_24_8_FLOAT                           = 0x00000014,
-    ADDR_FMT_10_11_11                             = 0x00000015,
-    ADDR_FMT_10_11_11_FLOAT                       = 0x00000016,
-    ADDR_FMT_11_11_10                             = 0x00000017,
-    ADDR_FMT_11_11_10_FLOAT                       = 0x00000018,
-    ADDR_FMT_2_10_10_10                           = 0x00000019,
-    ADDR_FMT_8_8_8_8                              = 0x0000001a,
-    ADDR_FMT_10_10_10_2                           = 0x0000001b,
-    ADDR_FMT_X24_8_32_FLOAT                       = 0x0000001c,
-    ADDR_FMT_32_32                                = 0x0000001d,
-    ADDR_FMT_32_32_FLOAT                          = 0x0000001e,
-    ADDR_FMT_16_16_16_16                          = 0x0000001f,
-    ADDR_FMT_16_16_16_16_FLOAT                    = 0x00000020,
-    ADDR_FMT_RESERVED_33                          = 0x00000021,
-    ADDR_FMT_32_32_32_32                          = 0x00000022,
-    ADDR_FMT_32_32_32_32_FLOAT                    = 0x00000023,
-    ADDR_FMT_RESERVED_36                          = 0x00000024,
-    ADDR_FMT_1                                    = 0x00000025,
-    ADDR_FMT_1_REVERSED                           = 0x00000026,
-    ADDR_FMT_GB_GR                                = 0x00000027,
-    ADDR_FMT_BG_RG                                = 0x00000028,
-    ADDR_FMT_32_AS_8                              = 0x00000029,
-    ADDR_FMT_32_AS_8_8                            = 0x0000002a,
-    ADDR_FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
-    ADDR_FMT_8_8_8                                = 0x0000002c,
-    ADDR_FMT_16_16_16                             = 0x0000002d,
-    ADDR_FMT_16_16_16_FLOAT                       = 0x0000002e,
-    ADDR_FMT_32_32_32                             = 0x0000002f,
-    ADDR_FMT_32_32_32_FLOAT                       = 0x00000030,
-    ADDR_FMT_BC1                                  = 0x00000031,
-    ADDR_FMT_BC2                                  = 0x00000032,
-    ADDR_FMT_BC3                                  = 0x00000033,
-    ADDR_FMT_BC4                                  = 0x00000034,
-    ADDR_FMT_BC5                                  = 0x00000035,
-    ADDR_FMT_BC6                                  = 0x00000036,
-    ADDR_FMT_BC7                                  = 0x00000037,
-    ADDR_FMT_32_AS_32_32_32_32                    = 0x00000038,
-    ADDR_FMT_APC3                                 = 0x00000039,
-    ADDR_FMT_APC4                                 = 0x0000003a,
-    ADDR_FMT_APC5                                 = 0x0000003b,
-    ADDR_FMT_APC6                                 = 0x0000003c,
-    ADDR_FMT_APC7                                 = 0x0000003d,
-    ADDR_FMT_CTX1                                 = 0x0000003e,
-    ADDR_FMT_RESERVED_63                          = 0x0000003f,
-    ADDR_FMT_ASTC_4x4                             = 0x00000040,
-    ADDR_FMT_ASTC_5x4                             = 0x00000041,
-    ADDR_FMT_ASTC_5x5                             = 0x00000042,
-    ADDR_FMT_ASTC_6x5                             = 0x00000043,
-    ADDR_FMT_ASTC_6x6                             = 0x00000044,
-    ADDR_FMT_ASTC_8x5                             = 0x00000045,
-    ADDR_FMT_ASTC_8x6                             = 0x00000046,
-    ADDR_FMT_ASTC_8x8                             = 0x00000047,
-    ADDR_FMT_ASTC_10x5                            = 0x00000048,
-    ADDR_FMT_ASTC_10x6                            = 0x00000049,
-    ADDR_FMT_ASTC_10x8                            = 0x0000004a,
-    ADDR_FMT_ASTC_10x10                           = 0x0000004b,
-    ADDR_FMT_ASTC_12x10                           = 0x0000004c,
-    ADDR_FMT_ASTC_12x12                           = 0x0000004d,
-    ADDR_FMT_ETC2_64BPP                           = 0x0000004e,
-    ADDR_FMT_ETC2_128BPP                          = 0x0000004f,
-} AddrFormat;
-
-/**
-****************************************************************************************************
-*   AddrDepthFormat
-*
-*   @brief
-*       Neutral enum for addrFlt32ToDepthPixel
-*
-****************************************************************************************************
-*/
-typedef enum _AddrDepthFormat
-{
-    ADDR_DEPTH_INVALID                            = 0x00000000,
-    ADDR_DEPTH_16                                 = 0x00000001,
-    ADDR_DEPTH_X8_24                              = 0x00000002,
-    ADDR_DEPTH_8_24                               = 0x00000003,
-    ADDR_DEPTH_X8_24_FLOAT                        = 0x00000004,
-    ADDR_DEPTH_8_24_FLOAT                         = 0x00000005,
-    ADDR_DEPTH_32_FLOAT                           = 0x00000006,
-    ADDR_DEPTH_X24_8_32_FLOAT                     = 0x00000007,
-
-} AddrDepthFormat;
-
-/**
-****************************************************************************************************
-*   AddrColorFormat
-*
-*   @brief
-*       Neutral enum for ColorFormat
-*
-****************************************************************************************************
-*/
-typedef enum _AddrColorFormat
-{
-    ADDR_COLOR_INVALID                            = 0x00000000,
-    ADDR_COLOR_8                                  = 0x00000001,
-    ADDR_COLOR_4_4                                = 0x00000002,
-    ADDR_COLOR_3_3_2                              = 0x00000003,
-    ADDR_COLOR_RESERVED_4                         = 0x00000004,
-    ADDR_COLOR_16                                 = 0x00000005,
-    ADDR_COLOR_16_FLOAT                           = 0x00000006,
-    ADDR_COLOR_8_8                                = 0x00000007,
-    ADDR_COLOR_5_6_5                              = 0x00000008,
-    ADDR_COLOR_6_5_5                              = 0x00000009,
-    ADDR_COLOR_1_5_5_5                            = 0x0000000a,
-    ADDR_COLOR_4_4_4_4                            = 0x0000000b,
-    ADDR_COLOR_5_5_5_1                            = 0x0000000c,
-    ADDR_COLOR_32                                 = 0x0000000d,
-    ADDR_COLOR_32_FLOAT                           = 0x0000000e,
-    ADDR_COLOR_16_16                              = 0x0000000f,
-    ADDR_COLOR_16_16_FLOAT                        = 0x00000010,
-    ADDR_COLOR_8_24                               = 0x00000011,
-    ADDR_COLOR_8_24_FLOAT                         = 0x00000012,
-    ADDR_COLOR_24_8                               = 0x00000013,
-    ADDR_COLOR_24_8_FLOAT                         = 0x00000014,
-    ADDR_COLOR_10_11_11                           = 0x00000015,
-    ADDR_COLOR_10_11_11_FLOAT                     = 0x00000016,
-    ADDR_COLOR_11_11_10                           = 0x00000017,
-    ADDR_COLOR_11_11_10_FLOAT                     = 0x00000018,
-    ADDR_COLOR_2_10_10_10                         = 0x00000019,
-    ADDR_COLOR_8_8_8_8                            = 0x0000001a,
-    ADDR_COLOR_10_10_10_2                         = 0x0000001b,
-    ADDR_COLOR_X24_8_32_FLOAT                     = 0x0000001c,
-    ADDR_COLOR_32_32                              = 0x0000001d,
-    ADDR_COLOR_32_32_FLOAT                        = 0x0000001e,
-    ADDR_COLOR_16_16_16_16                        = 0x0000001f,
-    ADDR_COLOR_16_16_16_16_FLOAT                  = 0x00000020,
-    ADDR_COLOR_RESERVED_33                        = 0x00000021,
-    ADDR_COLOR_32_32_32_32                        = 0x00000022,
-    ADDR_COLOR_32_32_32_32_FLOAT                  = 0x00000023,
-} AddrColorFormat;
-
-/**
-****************************************************************************************************
-*   AddrSurfaceNumber
-*
-*   @brief
-*       Neutral enum for SurfaceNumber
-*
-****************************************************************************************************
-*/
-typedef enum _AddrSurfaceNumber {
-    ADDR_NUMBER_UNORM                             = 0x00000000,
-    ADDR_NUMBER_SNORM                             = 0x00000001,
-    ADDR_NUMBER_USCALED                           = 0x00000002,
-    ADDR_NUMBER_SSCALED                           = 0x00000003,
-    ADDR_NUMBER_UINT                              = 0x00000004,
-    ADDR_NUMBER_SINT                              = 0x00000005,
-    ADDR_NUMBER_SRGB                              = 0x00000006,
-    ADDR_NUMBER_FLOAT                             = 0x00000007,
-} AddrSurfaceNumber;
-
-/**
-****************************************************************************************************
-*   AddrSurfaceSwap
-*
-*   @brief
-*       Neutral enum for SurfaceSwap
-*
-****************************************************************************************************
-*/
-typedef enum _AddrSurfaceSwap {
-    ADDR_SWAP_STD                                 = 0x00000000,
-    ADDR_SWAP_ALT                                 = 0x00000001,
-    ADDR_SWAP_STD_REV                             = 0x00000002,
-    ADDR_SWAP_ALT_REV                             = 0x00000003,
-} AddrSurfaceSwap;
-
-/**
-****************************************************************************************************
-*   AddrHtileBlockSize
-*
-*   @brief
-*       Size of HTILE blocks, valid values are 4 or 8 for now
-****************************************************************************************************
-*/
-typedef enum _AddrHtileBlockSize
-{
-    ADDR_HTILE_BLOCKSIZE_4 = 4,
-    ADDR_HTILE_BLOCKSIZE_8 = 8,
-} AddrHtileBlockSize;
-
-
-/**
-****************************************************************************************************
-*   AddrPipeCfg
-*
-*   @brief
-*       The pipe configuration field specifies both the number of pipes and
-*       how pipes are interleaved on the surface.
-*       The expression of number of pipes, the shader engine tile size, and packer tile size
-*       is encoded in a PIPE_CONFIG register field.
-*       In general the number of pipes usually matches the number of memory channels of the
-*       hardware configuration.
-*       For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
-*       the number of ROP units(? TODO: which registers??)
-*       The enum value = hw enum + 1 which is to reserve 0 for requesting default.
-****************************************************************************************************
-*/
-typedef enum _AddrPipeCfg
-{
-    ADDR_PIPECFG_INVALID         = 0,
-    ADDR_PIPECFG_P2              = 1, /// 2 pipes,
-    ADDR_PIPECFG_P4_8x16         = 5, /// 4 pipes,
-    ADDR_PIPECFG_P4_16x16        = 6,
-    ADDR_PIPECFG_P4_16x32        = 7,
-    ADDR_PIPECFG_P4_32x32        = 8,
-    ADDR_PIPECFG_P8_16x16_8x16   = 9, /// 8 pipes
-    ADDR_PIPECFG_P8_16x32_8x16   = 10,
-    ADDR_PIPECFG_P8_32x32_8x16   = 11,
-    ADDR_PIPECFG_P8_16x32_16x16  = 12,
-    ADDR_PIPECFG_P8_32x32_16x16  = 13,
-    ADDR_PIPECFG_P8_32x32_16x32  = 14,
-    ADDR_PIPECFG_P8_32x64_32x32  = 15,
-    ADDR_PIPECFG_P16_32x32_8x16  = 17, /// 16 pipes
-    ADDR_PIPECFG_P16_32x32_16x16 = 18,
-    ADDR_PIPECFG_MAX             = 19,
-} AddrPipeCfg;
-
-/**
-****************************************************************************************************
-* AddrTileType
-*
-*   @brief
-*       Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
-****************************************************************************************************
-*/
-typedef enum _AddrTileType
-{
-    ADDR_DISPLAYABLE        = 0,    ///< Displayable tiling
-    ADDR_NON_DISPLAYABLE    = 1,    ///< Non-displayable tiling, a.k.a thin micro tiling
-    ADDR_DEPTH_SAMPLE_ORDER = 2,    ///< Same as non-displayable plus depth-sample-order
-    ADDR_ROTATED            = 3,    ///< Rotated displayable tiling
-    ADDR_THICK              = 4,    ///< Thick micro-tiling, only valid for THICK and XTHICK
-} AddrTileType;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Type definitions: short system-independent names for address library types
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#if !defined(__APPLE__) || defined(HAVE_TSERVER)
-
-#ifndef BOOL_32        // no bool type in C
-/// @brief Boolean type, since none is defined in C
-/// @ingroup type
-#define BOOL_32 int
-#endif
-
-#ifndef INT_32
-#define INT_32  int
-#endif
-
-#ifndef UINT_32
-#define UINT_32 unsigned int
-#endif
-
-#ifndef INT_16
-#define INT_16  short
-#endif
-
-#ifndef UINT_16
-#define UINT_16 unsigned short
-#endif
-
-#ifndef INT_8
-#define INT_8   char
-#endif
-
-#ifndef UINT_8
-#define UINT_8  unsigned char
-#endif
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-//
-//  64-bit integer types depend on the compiler
-//
-#if defined( __GNUC__ ) || defined( __WATCOMC__ )
-#define INT_64   long long
-#define UINT_64  unsigned long long
-
-#elif defined( _WIN32 )
-#define INT_64   __int64
-#define UINT_64  unsigned __int64
-
-#else
-#error Unsupported compiler and/or operating system for 64-bit integers
-
-/// @brief 64-bit signed integer type (compiler dependent)
-/// @ingroup type
-///
-/// The addrlib defines a 64-bit signed integer type for either
-/// Gnu/Watcom compilers (which use the first syntax) or for
-/// the Windows VCC compiler (which uses the second syntax).
-#define INT_64  long long OR __int64
-
-/// @brief 64-bit unsigned integer type (compiler dependent)
-/// @ingroup type
-///
-/// The addrlib defines a 64-bit unsigned integer type for either
-/// Gnu/Watcom compilers (which use the first syntax) or for
-/// the Windows VCC compiler (which uses the second syntax).
-///
-#define UINT_64  unsigned long long OR unsigned __int64
-#endif
-
-#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER)
-
-//  ADDR64X is used to print addresses in hex form on both Windows and Linux
-//
-#if defined( __GNUC__ ) || defined( __WATCOMC__ )
-#define ADDR64X "llx"
-#define ADDR64D "lld"
-
-#elif defined( _WIN32 )
-#define ADDR64X "I64x"
-#define ADDR64D "I64d"
-
-#else
-#error Unsupported compiler and/or operating system for 64-bit integers
-
-/// @brief Addrlib device address 64-bit printf tag  (compiler dependent)
-/// @ingroup type
-///
-/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
-/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
-/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
-///
-#define ADDR64X "llx" OR "I64x"
-#define ADDR64D "lld" OR "I64d"
-#endif
-
-
-/// @brief Union for storing a 32-bit float or 32-bit integer
-/// @ingroup type
-///
-/// This union provides a simple way to convert between a 32-bit float
-/// and a 32-bit integer. It also prevents the compiler from producing
-/// code that alters NaN values when assiging or coying floats.
-/// Therefore, all address library routines that pass or return 32-bit
-/// floating point data do so by passing or returning a FLT_32.
-///
-typedef union {
-    INT_32   i;
-    UINT_32  u;
-    float    f;
-} ADDR_FLT_32;
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//  Macros for controlling linking and building on multiple systems
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#if defined(_MSC_VER)
-#if defined(va_copy)
-#undef va_copy  //redefine va_copy to support VC2013
-#endif
-#endif
-
-#if !defined(va_copy)
-#define va_copy(dst, src) \
-    ((void) memcpy(&(dst), &(src), sizeof(va_list)))
-#endif
-
-#endif // __ADDR_TYPES_H__
-
diff -Nru mesa-18.3.3/src/amd/addrlib/amdgpu_asic_addr.h mesa-19.0.1/src/amd/addrlib/amdgpu_asic_addr.h
--- mesa-18.3.3/src/amd/addrlib/amdgpu_asic_addr.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/amdgpu_asic_addr.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-#ifndef _AMDGPU_ASIC_ADDR_H
-#define _AMDGPU_ASIC_ADDR_H
-
-#define ATI_VENDOR_ID         0x1002
-#define AMD_VENDOR_ID         0x1022
-
-// AMDGPU_VENDOR_IS_AMD(vendorId)
-#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
-
-#define FAMILY_UNKNOWN 0x00
-#define FAMILY_TN      0x69
-#define FAMILY_SI      0x6E
-#define FAMILY_CI      0x78
-#define FAMILY_KV      0x7D
-#define FAMILY_VI      0x82
-#define FAMILY_POLARIS 0x82
-#define FAMILY_CZ      0x87
-#define FAMILY_AI      0x8D
-#define FAMILY_RV      0x8E
-
-// AMDGPU_FAMILY_IS(familyId, familyName)
-#define FAMILY_IS(f, fn)     (f == FAMILY_##fn)
-#define FAMILY_IS_TN(f)      FAMILY_IS(f, TN)
-#define FAMILY_IS_SI(f)      FAMILY_IS(f, SI)
-#define FAMILY_IS_CI(f)      FAMILY_IS(f, CI)
-#define FAMILY_IS_KV(f)      FAMILY_IS(f, KV)
-#define FAMILY_IS_VI(f)      FAMILY_IS(f, VI)
-#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
-#define FAMILY_IS_CZ(f)      FAMILY_IS(f, CZ)
-#define FAMILY_IS_AI(f)      FAMILY_IS(f, AI)
-#define FAMILY_IS_RV(f)      FAMILY_IS(f, RV)
-
-#define AMDGPU_UNKNOWN          0xFF
-
-#define AMDGPU_TAHITI_RANGE     0x05, 0x14
-#define AMDGPU_PITCAIRN_RANGE   0x15, 0x28
-#define AMDGPU_CAPEVERDE_RANGE  0x29, 0x3C
-#define AMDGPU_OLAND_RANGE      0x3C, 0x46
-#define AMDGPU_HAINAN_RANGE     0x46, 0xFF
-
-#define AMDGPU_BONAIRE_RANGE    0x14, 0x28
-#define AMDGPU_HAWAII_RANGE     0x28, 0x3C
-
-#define AMDGPU_SPECTRE_RANGE    0x01, 0x41
-#define AMDGPU_SPOOKY_RANGE     0x41, 0x81
-#define AMDGPU_KALINDI_RANGE    0x81, 0xA1
-#define AMDGPU_GODAVARI_RANGE   0xA1, 0xFF
-
-#define AMDGPU_ICELAND_RANGE    0x01, 0x14
-#define AMDGPU_TONGA_RANGE      0x14, 0x28
-#define AMDGPU_FIJI_RANGE       0x3C, 0x50
-
-#define AMDGPU_POLARIS10_RANGE  0x50, 0x5A
-#define AMDGPU_POLARIS11_RANGE  0x5A, 0x64
-#define AMDGPU_POLARIS12_RANGE  0x64, 0x6E
-#define AMDGPU_VEGAM_RANGE      0x6E, 0xFF
-
-#define AMDGPU_CARRIZO_RANGE    0x01, 0x21
-#define AMDGPU_BRISTOL_RANGE    0x10, 0x21
-#define AMDGPU_STONEY_RANGE     0x61, 0xFF
-
-#define AMDGPU_VEGA10_RANGE     0x01, 0x14
-#define AMDGPU_VEGA12_RANGE     0x14, 0x28
-#define AMDGPU_VEGA20_RANGE     0x28, 0xFF
-
-#define AMDGPU_RAVEN_RANGE      0x01, 0x81
-#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF
-
-#define AMDGPU_EXPAND_FIX(x) x
-#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
-#define AMDGPU_IN_RANGE(val, ...)   AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
-
-
-// ASICREV_IS(eRevisionId, revisionName)
-#define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
-#define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
-#define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
-#define ASICREV_IS_CAPEVERDE_M(r)      ASICREV_IS(r, CAPEVERDE)
-#define ASICREV_IS_OLAND_M(r)          ASICREV_IS(r, OLAND)
-#define ASICREV_IS_HAINAN_V(r)         ASICREV_IS(r, HAINAN)
-
-#define ASICREV_IS_BONAIRE_M(r)        ASICREV_IS(r, BONAIRE)
-#define ASICREV_IS_HAWAII_P(r)         ASICREV_IS(r, HAWAII)
-
-#define ASICREV_IS_SPECTRE(r)          ASICREV_IS(r, SPECTRE)
-#define ASICREV_IS_SPOOKY(r)           ASICREV_IS(r, SPOOKY)
-#define ASICREV_IS_KALINDI(r)          ASICREV_IS(r, KALINDI)
-#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)
-
-#define ASICREV_IS_ICELAND_M(r)        ASICREV_IS(r, ICELAND)
-#define ASICREV_IS_TONGA_P(r)          ASICREV_IS(r, TONGA)
-#define ASICREV_IS_FIJI_P(r)           ASICREV_IS(r, FIJI)
-
-#define ASICREV_IS_POLARIS10_P(r)      ASICREV_IS(r, POLARIS10)
-#define ASICREV_IS_POLARIS11_M(r)      ASICREV_IS(r, POLARIS11)
-#define ASICREV_IS_POLARIS12_V(r)      ASICREV_IS(r, POLARIS12)
-#define ASICREV_IS_VEGAM_P(r)          ASICREV_IS(r, VEGAM)
-
-#define ASICREV_IS_CARRIZO(r)          ASICREV_IS(r, CARRIZO)
-#define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
-#define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)
-
-#define ASICREV_IS_VEGA10_M(r)         ASICREV_IS(r, VEGA10)
-#define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
-#define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
-#define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
-#define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)
-
-#define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
-#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
-
-#endif // _AMDGPU_ASIC_ADDR_H
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrcommon.h mesa-19.0.1/src/amd/addrlib/core/addrcommon.h
--- mesa-18.3.3/src/amd/addrlib/core/addrcommon.h	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrcommon.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,924 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrcommon.h
-* @brief Contains the helper function and constants.
-****************************************************************************************************
-*/
-
-#ifndef __ADDR_COMMON_H__
-#define __ADDR_COMMON_H__
-
-#include "addrinterface.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#if !defined(DEBUG)
-#ifdef NDEBUG
-#define DEBUG 0
-#else
-#define DEBUG 1
-#endif
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Platform specific debug break defines
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#if DEBUG
-    #if defined(__GNUC__)
-        #define ADDR_DBG_BREAK()    assert(false)
-    #elif defined(__APPLE__)
-        #define ADDR_DBG_BREAK()    { IOPanic("");}
-    #else
-        #define ADDR_DBG_BREAK()    { __debugbreak(); }
-    #endif
-#else
-    #define ADDR_DBG_BREAK()
-#endif
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Debug assertions used in AddrLib
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#if defined(_WIN32) && (_MSC_VER >= 1400)
-    #define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
-#else
-    #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0)
-#endif
-
-#define ADDR_ASSERT(__e) assert(__e)
-#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
-#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
-#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Debug print macro from legacy address library
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#if DEBUG
-
-#define ADDR_PRNT(a)    Object::DebugPrint a
-
-/// @brief Macro for reporting informational messages
-/// @ingroup util
-///
-/// This macro optionally prints an informational message to stdout.
-/// The first parameter is a condition -- if it is true, nothing is done.
-/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
-/// starting with a string. This is passed to printf() or an equivalent
-/// in order to format the informational message. For example,
-/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
-///
-#define ADDR_INFO(cond, a)         \
-{ if (!(cond)) { ADDR_PRNT(a); } }
-
-
-/// @brief Macro for reporting error warning messages
-/// @ingroup util
-///
-/// This macro optionally prints an error warning message to stdout,
-/// followed by the file name and line number where the macro was called.
-/// The first parameter is a condition -- if it is true, nothing is done.
-/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
-/// starting with a string. This is passed to printf() or an equivalent
-/// in order to format the informational message. For example,
-/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
-/// a second line with the file name and line number.
-///
-#define ADDR_WARN(cond, a)         \
-{ if (!(cond))                     \
-  { ADDR_PRNT(a);                  \
-    ADDR_PRNT(("  WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
-} }
-
-
-/// @brief Macro for reporting fatal error conditions
-/// @ingroup util
-///
-/// This macro optionally stops execution of the current routine
-/// after printing an error warning message to stdout,
-/// followed by the file name and line number where the macro was called.
-/// The first parameter is a condition -- if it is true, nothing is done.
-/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
-/// starting with a string. This is passed to printf() or an equivalent
-/// in order to format the informational message. For example,
-/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
-/// a second line with the file name and line number, then stops execution.
-///
-#define ADDR_EXIT(cond, a)         \
-{ if (!(cond))                     \
-  { ADDR_PRNT(a); ADDR_DBG_BREAK();\
-} }
-
-#else // DEBUG
-
-#define ADDRDPF 1 ? (void)0 : (void)
-
-#define ADDR_PRNT(a)
-
-#define ADDR_DBG_BREAK()
-
-#define ADDR_INFO(cond, a)
-
-#define ADDR_WARN(cond, a)
-
-#define ADDR_EXIT(cond, a)
-
-#endif // DEBUG
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
-
-namespace Addr
-{
-
-namespace V1
-{
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Common constants
-////////////////////////////////////////////////////////////////////////////////////////////////////
-static const UINT_32 MicroTileWidth      = 8;       ///< Micro tile width, for 1D and 2D tiling
-static const UINT_32 MicroTileHeight     = 8;       ///< Micro tile height, for 1D and 2D tiling
-static const UINT_32 ThickTileThickness  = 4;       ///< Micro tile thickness, for THICK modes
-static const UINT_32 XThickTileThickness = 8;       ///< Extra thick tiling thickness
-static const UINT_32 PowerSaveTileBytes  = 64;      ///< Nuber of bytes per tile for power save 64
-static const UINT_32 CmaskCacheBits      = 1024;    ///< Number of bits for CMASK cache
-static const UINT_32 CmaskElemBits       = 4;       ///< Number of bits for CMASK element
-static const UINT_32 HtileCacheBits      = 16384;   ///< Number of bits for HTILE cache 512*32
-
-static const UINT_32 MicroTilePixels     = MicroTileWidth * MicroTileHeight;
-
-static const INT_32 TileIndexInvalid        = TILEINDEX_INVALID;
-static const INT_32 TileIndexLinearGeneral  = TILEINDEX_LINEAR_GENERAL;
-static const INT_32 TileIndexNoMacroIndex   = -3;
-
-} // V1
-
-namespace V2
-{
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Common constants
-////////////////////////////////////////////////////////////////////////////////////////////////////
-static const UINT_32 MaxSurfaceHeight = 16384;
-
-} // V2
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Common macros
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#define BITS_PER_BYTE 8
-#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
-#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
-
-/// Helper macros to select a single bit from an int (undefined later in section)
-#define _BIT(v,b)      (((v) >> (b) ) & 1)
-
-/**
-****************************************************************************************************
-* @brief Enums to identify AddrLib type
-****************************************************************************************************
-*/
-enum LibClass
-{
-    BASE_ADDRLIB = 0x0,
-    R600_ADDRLIB = 0x6,
-    R800_ADDRLIB = 0x8,
-    SI_ADDRLIB   = 0xa,
-    CI_ADDRLIB   = 0xb,
-    AI_ADDRLIB   = 0xd,
-};
-
-/**
-****************************************************************************************************
-* ChipFamily
-*
-*   @brief
-*       Neutral enums that specifies chip family.
-*
-****************************************************************************************************
-*/
-enum ChipFamily
-{
-    ADDR_CHIP_FAMILY_IVLD,    ///< Invalid family
-    ADDR_CHIP_FAMILY_R6XX,
-    ADDR_CHIP_FAMILY_R7XX,
-    ADDR_CHIP_FAMILY_R8XX,
-    ADDR_CHIP_FAMILY_NI,
-    ADDR_CHIP_FAMILY_SI,
-    ADDR_CHIP_FAMILY_CI,
-    ADDR_CHIP_FAMILY_VI,
-    ADDR_CHIP_FAMILY_AI,
-};
-
-/**
-****************************************************************************************************
-* ConfigFlags
-*
-*   @brief
-*       This structure is used to set configuration flags.
-****************************************************************************************************
-*/
-union ConfigFlags
-{
-    struct
-    {
-        /// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS
-        UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
-        UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
-        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
-                                               ///  output structure
-        UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
-        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
-        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
-        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
-        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
-        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
-        UINT_32 disableLinearOpt       : 1;    ///< Disallow tile modes to be optimized to linear
-        UINT_32 reserved               : 22;   ///< Reserved bits for future use
-    };
-
-    UINT_32 value;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Misc helper functions
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   AddrXorReduce
-*
-*   @brief
-*       Xor the right-side numberOfBits bits of x.
-****************************************************************************************************
-*/
-static inline UINT_32 XorReduce(
-    UINT_32 x,
-    UINT_32 numberOfBits)
-{
-    UINT_32 i;
-    UINT_32 result = x & 1;
-
-    for (i=1; i<numberOfBits; i++)
-    {
-        result ^= ((x>>i) & 1);
-    }
-
-    return result;
-}
-
-/**
-****************************************************************************************************
-*   IsPow2
-*
-*   @brief
-*       Check if the size (UINT_32) is pow 2
-****************************************************************************************************
-*/
-static inline UINT_32 IsPow2(
-    UINT_32 dim)        ///< [in] dimension of miplevel
-{
-    ADDR_ASSERT(dim > 0);
-    return !(dim & (dim - 1));
-}
-
-/**
-****************************************************************************************************
-*   IsPow2
-*
-*   @brief
-*       Check if the size (UINT_64) is pow 2
-****************************************************************************************************
-*/
-static inline UINT_64 IsPow2(
-    UINT_64 dim)        ///< [in] dimension of miplevel
-{
-    ADDR_ASSERT(dim > 0);
-    return !(dim & (dim - 1));
-}
-
-/**
-****************************************************************************************************
-*   ByteAlign
-*
-*   @brief
-*       Align UINT_32 "x" to "align" alignment, "align" should be power of 2
-****************************************************************************************************
-*/
-static inline UINT_32 PowTwoAlign(
-    UINT_32 x,
-    UINT_32 align)
-{
-    //
-    // Assert that x is a power of two.
-    //
-    ADDR_ASSERT(IsPow2(align));
-    return (x + (align - 1)) & (~(align - 1));
-}
-
-/**
-****************************************************************************************************
-*   ByteAlign
-*
-*   @brief
-*       Align UINT_64 "x" to "align" alignment, "align" should be power of 2
-****************************************************************************************************
-*/
-static inline UINT_64 PowTwoAlign(
-    UINT_64 x,
-    UINT_64 align)
-{
-    //
-    // Assert that x is a power of two.
-    //
-    ADDR_ASSERT(IsPow2(align));
-    return (x + (align - 1)) & (~(align - 1));
-}
-
-/**
-****************************************************************************************************
-*   Min
-*
-*   @brief
-*       Get the min value between two unsigned values
-****************************************************************************************************
-*/
-static inline UINT_32 Min(
-    UINT_32 value1,
-    UINT_32 value2)
-{
-    return ((value1 < (value2)) ? (value1) : value2);
-}
-
-/**
-****************************************************************************************************
-*   Min
-*
-*   @brief
-*       Get the min value between two signed values
-****************************************************************************************************
-*/
-static inline INT_32 Min(
-    INT_32 value1,
-    INT_32 value2)
-{
-    return ((value1 < (value2)) ? (value1) : value2);
-}
-
-/**
-****************************************************************************************************
-*   Max
-*
-*   @brief
-*       Get the max value between two unsigned values
-****************************************************************************************************
-*/
-static inline UINT_32 Max(
-    UINT_32 value1,
-    UINT_32 value2)
-{
-    return ((value1 > (value2)) ? (value1) : value2);
-}
-
-/**
-****************************************************************************************************
-*   Max
-*
-*   @brief
-*       Get the max value between two signed values
-****************************************************************************************************
-*/
-static inline INT_32 Max(
-    INT_32 value1,
-    INT_32 value2)
-{
-    return ((value1 > (value2)) ? (value1) : value2);
-}
-
-/**
-****************************************************************************************************
-*   NextPow2
-*
-*   @brief
-*       Compute the mipmap's next level dim size
-****************************************************************************************************
-*/
-static inline UINT_32 NextPow2(
-    UINT_32 dim)        ///< [in] dimension of miplevel
-{
-    UINT_32 newDim = 1;
-
-    if (dim > 0x7fffffff)
-    {
-        ADDR_ASSERT_ALWAYS();
-        newDim = 0x80000000;
-    }
-    else
-    {
-        while (newDim < dim)
-        {
-            newDim <<= 1;
-        }
-    }
-
-    return newDim;
-}
-
-/**
-****************************************************************************************************
-*   Log2NonPow2
-*
-*   @brief
-*       Compute log of base 2 no matter the target is power of 2 or not
-****************************************************************************************************
-*/
-static inline UINT_32 Log2NonPow2(
-    UINT_32 x)      ///< [in] the value should calculate log based 2
-{
-    UINT_32 y;
-
-    y = 0;
-    while (x > 1)
-    {
-        x >>= 1;
-        y++;
-    }
-
-    return y;
-}
-
-/**
-****************************************************************************************************
-*   Log2
-*
-*   @brief
-*       Compute log of base 2
-****************************************************************************************************
-*/
-static inline UINT_32 Log2(
-    UINT_32 x)      ///< [in] the value should calculate log based 2
-{
-    // Assert that x is a power of two.
-    ADDR_ASSERT(IsPow2(x));
-
-    return Log2NonPow2(x);
-}
-
-/**
-****************************************************************************************************
-*   QLog2
-*
-*   @brief
-*       Compute log of base 2 quickly (<= 16)
-****************************************************************************************************
-*/
-static inline UINT_32 QLog2(
-    UINT_32 x)      ///< [in] the value should calculate log based 2
-{
-    ADDR_ASSERT(x <= 16);
-
-    UINT_32 y = 0;
-
-    switch (x)
-    {
-        case 1:
-            y = 0;
-            break;
-        case 2:
-            y = 1;
-            break;
-        case 4:
-            y = 2;
-            break;
-        case 8:
-            y = 3;
-            break;
-        case 16:
-            y = 4;
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-    }
-
-    return y;
-}
-
-/**
-****************************************************************************************************
-*   SafeAssign
-*
-*   @brief
-*       NULL pointer safe assignment
-****************************************************************************************************
-*/
-static inline VOID SafeAssign(
-    UINT_32*    pLVal,  ///< [in] Pointer to left val
-    UINT_32     rVal)   ///< [in] Right value
-{
-    if (pLVal)
-    {
-        *pLVal = rVal;
-    }
-}
-
-/**
-****************************************************************************************************
-*   SafeAssign
-*
-*   @brief
-*       NULL pointer safe assignment for 64bit values
-****************************************************************************************************
-*/
-static inline VOID SafeAssign(
-    UINT_64*    pLVal,  ///< [in] Pointer to left val
-    UINT_64     rVal)   ///< [in] Right value
-{
-    if (pLVal)
-    {
-        *pLVal = rVal;
-    }
-}
-
-/**
-****************************************************************************************************
-*   SafeAssign
-*
-*   @brief
-*       NULL pointer safe assignment for AddrTileMode
-****************************************************************************************************
-*/
-static inline VOID SafeAssign(
-    AddrTileMode*    pLVal, ///< [in] Pointer to left val
-    AddrTileMode     rVal)  ///< [in] Right value
-{
-    if (pLVal)
-    {
-        *pLVal = rVal;
-    }
-}
-
-/**
-****************************************************************************************************
-*   RoundHalf
-*
-*   @brief
-*       return (x + 1) / 2
-****************************************************************************************************
-*/
-static inline UINT_32 RoundHalf(
-    UINT_32     x)     ///< [in] input value
-{
-    ADDR_ASSERT(x != 0);
-
-#if 1
-    return (x >> 1) + (x & 1);
-#else
-    return (x + 1) >> 1;
-#endif
-}
-
-/**
-****************************************************************************************************
-*   SumGeo
-*
-*   @brief
-*       Calculate sum of a geometric progression whose ratio is 1/2
-****************************************************************************************************
-*/
-static inline UINT_32 SumGeo(
-    UINT_32     base,   ///< [in] First term in the geometric progression
-    UINT_32     num)    ///< [in] Number of terms to be added into sum
-{
-    ADDR_ASSERT(base > 0);
-
-    UINT_32 sum = 0;
-    UINT_32 i = 0;
-    for (; (i < num) && (base > 1); i++)
-    {
-        sum += base;
-        base = RoundHalf(base);
-    }
-    sum += num - i;
-
-    return sum;
-}
-
-/**
-****************************************************************************************************
-*   GetBit
-*
-*   @brief
-*       Extract bit N value (0 or 1) of a UINT32 value.
-****************************************************************************************************
-*/
-static inline UINT_32 GetBit(
-    UINT_32     u32,   ///< [in] UINT32 value
-    UINT_32     pos)   ///< [in] bit position from LSB, valid range is [0..31]
-{
-    ADDR_ASSERT(pos <= 31);
-
-    return (u32 >> pos) & 0x1;
-}
-
-/**
-****************************************************************************************************
-*   GetBits
-*
-*   @brief
-*       Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos
-*       srcStartPos: 0~31 for UINT_32
-*       bitsNum    : 1~32 for UINT_32
-*       srcStartPos: 0~31 for UINT_32
-*                                                                 src start position
-*                                                                          |
-*       src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0]
-*                                   || Bits num || copy length  || Bits num ||
-*       dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0]
-*                                              |
-*                                     dst start position
-****************************************************************************************************
-*/
-static inline UINT_32 GetBits(
-    UINT_32 src,
-    UINT_32 srcStartPos,
-    UINT_32 bitsNum,
-    UINT_32 dstStartPos)
-{
-    ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0));
-    ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32));
-
-    return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos);
-}
-
-/**
-****************************************************************************************************
-*   MortonGen2d
-*
-*   @brief
-*       Generate 2D Morton interleave code with num lowest bits in each channel
-****************************************************************************************************
-*/
-static inline UINT_32 MortonGen2d(
-    UINT_32     x,     ///< [in] First channel
-    UINT_32     y,     ///< [in] Second channel
-    UINT_32     num)   ///< [in] Number of bits extracted from each channel
-{
-    UINT_32 mort = 0;
-
-    for (UINT_32 i = 0; i < num; i++)
-    {
-        mort |= (GetBit(y, i) << (2 * i));
-        mort |= (GetBit(x, i) << (2 * i + 1));
-    }
-
-    return mort;
-}
-
-/**
-****************************************************************************************************
-*   MortonGen3d
-*
-*   @brief
-*       Generate 3D Morton interleave code with num lowest bits in each channel
-****************************************************************************************************
-*/
-static inline UINT_32 MortonGen3d(
-    UINT_32     x,     ///< [in] First channel
-    UINT_32     y,     ///< [in] Second channel
-    UINT_32     z,     ///< [in] Third channel
-    UINT_32     num)   ///< [in] Number of bits extracted from each channel
-{
-    UINT_32 mort = 0;
-
-    for (UINT_32 i = 0; i < num; i++)
-    {
-        mort |= (GetBit(z, i) << (3 * i));
-        mort |= (GetBit(y, i) << (3 * i + 1));
-        mort |= (GetBit(x, i) << (3 * i + 2));
-    }
-
-    return mort;
-}
-
-/**
-****************************************************************************************************
-*   ReverseBitVector
-*
-*   @brief
-*       Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1]
-****************************************************************************************************
-*/
-static inline UINT_32 ReverseBitVector(
-    UINT_32     v,     ///< [in] Reverse operation base value
-    UINT_32     num)   ///< [in] Number of bits used in reverse operation
-{
-    UINT_32 reverse = 0;
-
-    for (UINT_32 i = 0; i < num; i++)
-    {
-        reverse |= (GetBit(v, num - 1 - i) << i);
-    }
-
-    return reverse;
-}
-
-/**
-****************************************************************************************************
-*   FoldXor2d
-*
-*   @brief
-*       Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1]
-****************************************************************************************************
-*/
-static inline UINT_32 FoldXor2d(
-    UINT_32     v,     ///< [in] Xor operation base value
-    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
-{
-    return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num);
-}
-
-/**
-****************************************************************************************************
-*   DeMort
-*
-*   @brief
-*       Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2]
-****************************************************************************************************
-*/
-static inline UINT_32 DeMort(
-    UINT_32     v,     ///< [in] DeMort operation base value
-    UINT_32     num)   ///< [in] Number of bits used in fold DeMort operation
-{
-    UINT_32 d = 0;
-
-    for (UINT_32 i = 0; i < num; i++)
-    {
-        d |= ((v & (1 << (i << 1))) >> i);
-    }
-
-    return d;
-}
-
-/**
-****************************************************************************************************
-*   FoldXor3d
-*
-*   @brief
-*       v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1]
-****************************************************************************************************
-*/
-static inline UINT_32 FoldXor3d(
-    UINT_32     v,     ///< [in] Xor operation base value
-    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
-{
-    UINT_32 t = v & ((1 << num) - 1);
-    t ^= ReverseBitVector(DeMort(v >> num, num), num);
-    t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num);
-
-    return t;
-}
-
-/**
-****************************************************************************************************
-*   InitChannel
-*
-*   @brief
-*       Set channel initialization value via a return value
-****************************************************************************************************
-*/
-static inline ADDR_CHANNEL_SETTING InitChannel(
-    UINT_32     valid,     ///< [in] valid setting
-    UINT_32     channel,   ///< [in] channel setting
-    UINT_32     index)     ///< [in] index setting
-{
-    ADDR_CHANNEL_SETTING t;
-    t.valid = valid;
-    t.channel = channel;
-    t.index = index;
-
-    return t;
-}
-
-/**
-****************************************************************************************************
-*   InitChannel
-*
-*   @brief
-*       Set channel initialization value via channel pointer
-****************************************************************************************************
-*/
-static inline VOID InitChannel(
-    UINT_32     valid,              ///< [in] valid setting
-    UINT_32     channel,            ///< [in] channel setting
-    UINT_32     index,              ///< [in] index setting
-    ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized
-{
-    pChanSet->valid = valid;
-    pChanSet->channel = channel;
-    pChanSet->index = index;
-}
-
-
-/**
-****************************************************************************************************
-*   InitChannel
-*
-*   @brief
-*       Set channel initialization value via another channel
-****************************************************************************************************
-*/
-static inline VOID InitChannel(
-    ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from
-    ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized
-{
-    pChanDst->valid = pChanSrc->valid;
-    pChanDst->channel = pChanSrc->channel;
-    pChanDst->index = pChanSrc->index;
-}
-
-/**
-****************************************************************************************************
-*   GetMaxValidChannelIndex
-*
-*   @brief
-*       Get max valid index for a specific channel
-****************************************************************************************************
-*/
-static inline UINT_32 GetMaxValidChannelIndex(
-    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
-    UINT_32                     searchCount,///< [in] number of channel setting to be searched
-    UINT_32                     channel)    ///< [in] channel to be searched
-{
-    UINT_32 index = 0;
-
-    for (UINT_32 i = 0; i < searchCount; i++)
-    {
-        if (pChanSet[i].valid && (pChanSet[i].channel == channel))
-        {
-            index = Max(index, static_cast<UINT_32>(pChanSet[i].index));
-        }
-    }
-
-    return index;
-}
-
-/**
-****************************************************************************************************
-*   GetCoordActiveMask
-*
-*   @brief
-*       Get bit mask which indicates which positions in the equation match the target coord
-****************************************************************************************************
-*/
-static inline UINT_32 GetCoordActiveMask(
-    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
-    UINT_32                     searchCount,///< [in] number of channel setting to be searched
-    UINT_32                     channel,    ///< [in] channel to be searched
-    UINT_32                     index)      ///< [in] index to be searched
-{
-    UINT_32 mask = 0;
-
-    for (UINT_32 i = 0; i < searchCount; i++)
-    {
-        if ((pChanSet[i].valid   == TRUE)    &&
-            (pChanSet[i].channel == channel) &&
-            (pChanSet[i].index   == index))
-        {
-            mask |= (1 << i);
-        }
-    }
-
-    return mask;
-}
-
-} // Addr
-
-#endif // __ADDR_COMMON_H__
-
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrelemlib.cpp mesa-19.0.1/src/amd/addrlib/core/addrelemlib.cpp
--- mesa-18.3.3/src/amd/addrlib/core/addrelemlib.cpp	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrelemlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,1843 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrelemlib.cpp
-* @brief Contains the class implementation for element/pixel related functions.
-****************************************************************************************************
-*/
-
-#include "addrelemlib.h"
-#include "addrlib.h"
-
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-*   ElemLib::ElemLib
-*
-*   @brief
-*       constructor
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-ElemLib::ElemLib(
-    Lib* pAddrLib)  ///< [in] Parent addrlib instance pointer
-    :
-    Object(pAddrLib->GetClient()),
-    m_pAddrLib(pAddrLib)
-{
-    switch (m_pAddrLib->GetChipFamily())
-    {
-        case ADDR_CHIP_FAMILY_R6XX:
-            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
-            m_fp16ExportNorm = 0;
-            break;
-        case ADDR_CHIP_FAMILY_R7XX:
-            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
-            m_fp16ExportNorm = 1;
-            break;
-        case ADDR_CHIP_FAMILY_R8XX:
-        case ADDR_CHIP_FAMILY_NI: // Same as 8xx
-            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
-            m_fp16ExportNorm = 1;
-            break;
-        default:
-            m_fp16ExportNorm = 1;
-            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
-    }
-
-    m_configFlags.value = 0;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::~ElemLib
-*
-*   @brief
-*       destructor
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-ElemLib::~ElemLib()
-{
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::Create
-*
-*   @brief
-*       Creates and initializes AddrLib object.
-*
-*   @return
-*       Returns point to ADDR_CREATEINFO if successful.
-****************************************************************************************************
-*/
-ElemLib* ElemLib::Create(
-    const Lib* pAddrLib)   ///< [in] Pointer of parent AddrLib instance
-{
-    ElemLib* pElemLib = NULL;
-
-    if (pAddrLib)
-    {
-        VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient());
-        if (pObj)
-        {
-            pElemLib = new(pObj) ElemLib(const_cast<Lib* const>(pAddrLib));
-        }
-    }
-
-    return pElemLib;
-}
-
-/**************************************************************************************************
-*   ElemLib::Flt32sToInt32s
-*
-*   @brief
-*       Convert a ADDR_FLT_32 value to Int32 value
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::Flt32sToInt32s(
-    ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
-    UINT_32         bits,       ///< [in] nubmer of bits in value
-    NumberType      numberType, ///< [in] the type of number
-    UINT_32*        pResult)    ///< [out] Int32 value
-{
-    UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
-    UINT_32 uscale;
-    UINT_32 sign;
-
-    //convert each component to an INT_32
-    switch ( numberType )
-    {
-        case ADDR_NO_NUMBER:    //fall through
-        case ADDR_ZERO:         //fall through
-        case ADDR_ONE:          //fall through
-        case ADDR_EPSILON:      //fall through
-            return;        // these are zero-bit components, so don't set result
-
-        case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
-            uscale = (1<<bits) - 1;
-            if (bits == 32)               // special case unsigned 32-bit int
-            {
-                *pResult = value.i;
-            }
-            else
-            {
-                if ((value.i < 0) || (value.u > uscale))
-                {
-                    *pResult = uscale;
-                }
-                else
-                {
-                    *pResult = value.i;
-                }
-                return;
-            }
-
-        // The algorithm used in the DB and TX differs at one value for 24-bit unorms
-        case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
-            if ((bits==24) && (value.i == 0x33000000))
-            {
-                *pResult = 1;
-                return;
-            }              // Else treat like ADDR_UNORM_R6XX
-
-        case ADDR_UNORM_R6XX:            // unsigned repeating fraction
-            if (value.f <= 0)
-            {
-                *pResult = 0;            // first clamp to [0..1]
-            }
-            else
-            {
-                if (value.f >= 1)
-                {
-                     *pResult = (1<<bits) - 1;
-                }
-                else
-                {
-                    if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
-                    {
-                        *pResult = 0;                        // NaN, so force to 0
-                    }
-
-                    #if 0 // floating point version for documentation
-                    else
-                    {
-                        FLOAT f = value.f * ((1<<bits) - 1);
-                        *pResult = static_cast<INT_32>(f + (round/256.0f));
-                    }
-                    #endif
-                    else
-                    {
-                        ADDR_FLT_32 scaled;
-                        ADDR_FLT_32 shifted;
-                        UINT_64 truncated, rounded;
-                        UINT_32 altShift;
-                        UINT_32 mask = (1 << bits) - 1;
-                        UINT_32 half = 1 << (bits - 1);
-                        UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
-                        UINT_64 temp = mant24 - (mant24>>bits) -
-                            static_cast<INT_32>((mant24 & mask) > half);
-                        UINT_32 exp8 = value.i >> 23;
-                        UINT_32 shift = 126 - exp8 + 24 - bits;
-                        UINT_64 final;
-
-                        if (shift >= 32) // This is zero, even with maximum dither add
-                        {
-                            final = 0;
-                        }
-                        else
-                        {
-                            final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
-                        }
-                        //ADDR_EXIT( *pResult == final,
-                        //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
-                        //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
-                        if (final > mask)
-                        {
-                            final = mask;
-                        }
-
-                        scaled.f  = value.f * ((1<<bits) - 1);
-                        shifted.f = (scaled.f * 256);
-                        truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
-                        altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
-                        truncated = (altShift > 60) ? 0 : truncated >> altShift;
-                        rounded   = static_cast<INT_32>((round + truncated) >> 8);
-                        //if (rounded > ((1<<bits) - 1))
-                        //    rounded = ((1<<bits) - 1);
-                        *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
-                    }
-                }
-            }
-
-            return;
-
-        case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
-            *pResult = value.i;
-            return;
-
-        // @@ FIX ROUNDING in this code, fix the denorm case
-        case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
-            sign = (value.i >> 31) & 1;
-            if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
-            {
-                if ((value.i&0x007FFFFF) != 0)             // then if NaN
-                {
-                    *pResult = 0;                       // return 0
-                }
-                else
-                {
-                    *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
-                }
-                return;
-            }
-            if (value.f <= 0)
-            {
-                *pResult = 0;
-            }
-            else
-            {
-                if (value.f>=1)
-                {
-                    *pResult = 0xF << (bits-4);
-                }
-                else
-                {
-                    if ((value.i>>23) > 112 )
-                    {
-                        // 24-bit float: normalized
-                        // value.i += 1 << (22-bits+4);
-                        // round the IEEE mantissa to mantissa size
-                        // @@ NOTE: add code to support rounding
-                        value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
-                        *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
-                    }
-                    else
-                    {
-                        // 24-bit float: denormalized
-                        value.f = value.f / (1<<28) / (1<<28);
-                        value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
-                        // value.i += 1 << (22-bits+4);
-                        // round the IEEE mantissa to mantissa size
-                        // @@ NOTE: add code to support rounding
-                        *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
-                    }
-                }
-            }
-
-            return;
-
-        default:                    // invalid number mode
-            //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
-            break;
-
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::Int32sToPixel
-*
-*   @brief
-*       Pack 32-bit integer values into an uncompressed pixel,
-*       in the proper order
-*
-*   @return
-*       N/A
-*
-*   @note
-*       This entry point packes four 32-bit integer values into
-*       an uncompressed pixel. The pixel values are specifies in
-*       standard order, e.g. depth/stencil. This routine asserts
-*       if called on compressed pixel.
-****************************************************************************************************
-*/
-VOID ElemLib::Int32sToPixel(
-    UINT_32              numComps,      ///< [in] number of components
-    UINT_32*             pComps,        ///< [in] compnents
-    UINT_32*             pCompBits,     ///< [in] total bits in each component
-    UINT_32*             pCompStart,    ///< [in] the first bit position of each component
-    ComponentFlags       properties,    ///< [in] properties about byteAligned, exportNorm
-    UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
-    UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
-{
-    UINT_32 i;
-    UINT_32 j;
-    UINT_32 start;
-    UINT_32 size;
-    UINT_32 byte;
-    UINT_32 value = 0;
-    UINT_32 compMask;
-    UINT_32 elemMask=0;
-    UINT_32 elementXor = 0;  // address xor when reading bytes from elements
-
-
-    // @@ NOTE: assert if called on a compressed format!
-
-    if (properties.byteAligned)    // Components are all byte-sized
-    {
-        for (i = 0; i < numComps; i++)        // Then for each component
-        {
-            // Copy the bytes of the component into the element
-            start = pCompStart[i] / 8;
-            size  = pCompBits[i]  / 8;
-            for (j = 0; j < size; j++)
-            {
-                pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
-            }
-        }
-    }
-    else                        // Element is 32-bits or less, components are bit fields
-    {
-        // First, extract each component in turn and combine it into a 32-bit value
-        for (i = 0; i < numComps; i++)
-        {
-            compMask = (1 << pCompBits[i]) - 1;
-            elemMask |= compMask << pCompStart[i];
-            value |= (pComps[i] & compMask) << pCompStart[i];
-        }
-
-        // Mext, copy the masked value into the element
-        size = (resultBits + 7) / 8;
-        for (i = 0; i < size; i++)
-        {
-            byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
-            pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   Flt32ToDepthPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a depth/stencil pixel value
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::Flt32ToDepthPixel(
-    AddrDepthFormat     format,     ///< [in] Depth format
-    const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
-    UINT_8*             pPixel      ///< [out] depth pixel value
-    ) const
-{
-    UINT_32 i;
-    UINT_32 values[2];
-    ComponentFlags properties;  // byteAligned, exportNorm
-    UINT_32 resultBits = 0;     // result bits: total bits per pixel after decompression
-
-    PixelFormatInfo fmt;
-
-    // get type for each component
-    PixGetDepthCompInfo(format, &fmt);
-
-    //initialize properties
-    properties.byteAligned = TRUE;
-    properties.exportNorm  = TRUE;
-    properties.floatComp   = FALSE;
-
-    //set properties and result bits
-    for (i = 0; i < 2; i++)
-    {
-        if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
-        {
-            properties.byteAligned = FALSE;
-        }
-
-        if (resultBits < fmt.compStart[i] + fmt.compBit[i])
-        {
-            resultBits = fmt.compStart[i] + fmt.compBit[i];
-        }
-
-        // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
-        if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
-        {
-            properties.exportNorm = FALSE;
-        }
-
-        // Mark if there are any floating point components
-        if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
-        {
-            properties.floatComp = TRUE;
-        }
-    }
-
-    // Convert the two input floats to integer values
-    for (i = 0; i < 2; i++)
-    {
-        Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
-    }
-
-    // Then pack the two integer components, in the proper order
-    Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
-
-}
-
-/**
-****************************************************************************************************
-*   Flt32ToColorPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::Flt32ToColorPixel(
-    AddrColorFormat     format,     ///< [in] Color format
-    AddrSurfaceNumber   surfNum,    ///< [in] Surface number
-    AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
-    const ADDR_FLT_32   comps[4],   ///< [in] four components of color
-    UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
-    ) const
-{
-    PixelFormatInfo pixelInfo;
-
-    UINT_32 i;
-    UINT_32 values[4];
-    ComponentFlags properties;    // byteAligned, exportNorm
-    UINT_32 resultBits = 0;       // result bits: total bits per pixel after decompression
-
-    memset(&pixelInfo, 0, sizeof(PixelFormatInfo));
-
-    PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
-
-    //initialize properties
-    properties.byteAligned = TRUE;
-    properties.exportNorm  = TRUE;
-    properties.floatComp   = FALSE;
-
-    //set properties and result bits
-    for (i = 0; i < 4; i++)
-    {
-        if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
-        {
-            properties.byteAligned = FALSE;
-        }
-
-        if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
-        {
-            resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
-        }
-
-        if (m_fp16ExportNorm)
-        {
-            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
-            // or if it's not FP and <=16 bits
-            if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
-                && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
-            {
-                properties.exportNorm = FALSE;
-            }
-        }
-        else
-        {
-            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
-            if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
-            {
-                properties.exportNorm = FALSE;
-            }
-        }
-
-        // Mark if there are any floating point components
-        if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
-             (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
-        {
-            properties.floatComp = TRUE;
-        }
-    }
-
-    // Convert the four input floats to integer values
-    for (i = 0; i < 4; i++)
-    {
-        Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
-    }
-
-    // Then pack the four integer components, in the proper order
-    Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
-                  properties, resultBits, pPixel);
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetCompType
-*
-*   @brief
-*       Fill per component info
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID ElemLib::GetCompType(
-    AddrColorFormat   format,     ///< [in] surface format
-    AddrSurfaceNumber numType,  ///< [in] number type
-    PixelFormatInfo*  pInfo)       ///< [in][out] per component info out
-{
-    BOOL_32 handled = FALSE;
-
-    // Floating point formats override the number format
-    switch (format)
-    {
-        case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
-        case ADDR_COLOR_16_16_FLOAT:
-        case ADDR_COLOR_16_16_16_16_FLOAT:
-        case ADDR_COLOR_32_FLOAT:
-        case ADDR_COLOR_32_32_FLOAT:
-        case ADDR_COLOR_32_32_32_32_FLOAT:
-        case ADDR_COLOR_10_11_11_FLOAT:
-        case ADDR_COLOR_11_11_10_FLOAT:
-            numType = ADDR_NUMBER_FLOAT;
-            break;
-            // Special handling for the depth formats
-        case ADDR_COLOR_8_24:                // fall through for these 2 similar format
-        case ADDR_COLOR_24_8:
-            for (UINT_32 c = 0; c < 4; c++)
-            {
-                if (pInfo->compBit[c] == 8)
-                {
-                    pInfo->numType[c] = ADDR_UINT_BITS;
-                }
-                else if (pInfo->compBit[c]  == 24)
-                {
-                    pInfo->numType[c] = ADDR_UNORM_R6XX;
-                }
-                else
-                {
-                    pInfo->numType[c] = ADDR_NO_NUMBER;
-                }
-            }
-            handled = TRUE;
-            break;
-        case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
-        case ADDR_COLOR_24_8_FLOAT:
-        case ADDR_COLOR_X24_8_32_FLOAT:
-            for (UINT_32 c = 0; c < 4; c++)
-            {
-                if (pInfo->compBit[c] == 8)
-                {
-                    pInfo->numType[c] = ADDR_UINT_BITS;
-                }
-                else if (pInfo->compBit[c] == 24)
-                {
-                    pInfo->numType[c] = ADDR_U4FLOATC;
-                }
-                else if (pInfo->compBit[c] == 32)
-                {
-                    pInfo->numType[c] = ADDR_S8FLOAT32;
-                }
-                else
-                {
-                    pInfo->numType[c] = ADDR_NO_NUMBER;
-                }
-            }
-            handled = TRUE;
-            break;
-        default:
-            break;
-    }
-
-    if (!handled)
-    {
-        for (UINT_32 c = 0; c < 4; c++)
-        {
-            // Assign a number type for each component
-            AddrSurfaceNumber cnum;
-
-            // First handle default component values
-            if (pInfo->compBit[c] == 0)
-            {
-                if (c < 3)
-                {
-                    pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
-                }
-                else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
-                {
-                    pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
-                }
-                else
-                {
-                    pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
-                }
-                continue;
-            }
-            // Now handle small components
-            else if (pInfo->compBit[c] == 1)
-            {
-                if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
-                {
-                    cnum = ADDR_NUMBER_UINT;
-                }
-                else
-                {
-                    cnum = ADDR_NUMBER_UNORM;
-                }
-            }
-            else
-            {
-                cnum = numType;
-            }
-
-            // If no default, set the number type fom num, compbits, and architecture
-            switch (cnum)
-            {
-                case ADDR_NUMBER_SRGB:
-                    pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
-                    break;
-                case ADDR_NUMBER_UNORM:
-                    pInfo->numType[c] = ADDR_UNORM_R6XX;
-                    break;
-                case ADDR_NUMBER_SNORM:
-                    pInfo->numType[c] = ADDR_SNORM_R6XX;
-                    break;
-                case ADDR_NUMBER_USCALED:
-                    pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
-                    break;
-                case ADDR_NUMBER_SSCALED:
-                    pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
-                    break;
-                case ADDR_NUMBER_FLOAT:
-                    if (pInfo->compBit[c] == 32)
-                    {
-                        pInfo->numType[c] = ADDR_S8FLOAT32;
-                    }
-                    else if (pInfo->compBit[c] == 16)
-                    {
-                        pInfo->numType[c] = ADDR_S5FLOAT;
-                    }
-                    else if (pInfo->compBit[c] >= 10)
-                    {
-                        pInfo->numType[c] = ADDR_U5FLOAT;
-                    }
-                    else
-                    {
-                        ADDR_ASSERT_ALWAYS();
-                    }
-                    break;
-                case ADDR_NUMBER_SINT:
-                    pInfo->numType[c] = ADDR_SINT_BITS;
-                    break;
-                case ADDR_NUMBER_UINT:
-                    pInfo->numType[c] = ADDR_UINT_BITS;
-                    break;
-
-                default:
-                    ADDR_ASSERT(!"Invalid number type");
-                    pInfo->numType[c] = ADDR_NO_NUMBER;
-                    break;
-             }
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetCompSwap
-*
-*   @brief
-*       Get components swapped for color surface
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID ElemLib::GetCompSwap(
-    AddrSurfaceSwap  swap,   ///< [in] swap mode
-    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
-{
-    switch (pInfo->comps)
-    {
-        case 4:
-            switch (swap)
-            {
-                case ADDR_SWAP_ALT:
-                    SwapComps( 0, 2, pInfo );
-                    break;    // BGRA
-                case ADDR_SWAP_STD_REV:
-                    SwapComps( 0, 3, pInfo );
-                    SwapComps( 1, 2, pInfo );
-                    break;    // ABGR
-                case ADDR_SWAP_ALT_REV:
-                    SwapComps( 0, 3, pInfo );
-                    SwapComps( 0, 2, pInfo );
-                    SwapComps( 0, 1, pInfo );
-                    break;    // ARGB
-                default:
-                    break;
-            }
-            break;
-        case 3:
-            switch (swap)
-            {
-                case ADDR_SWAP_ALT_REV:
-                    SwapComps( 0, 3, pInfo );
-                    SwapComps( 0, 2, pInfo );
-                    break;    // AGR
-                case ADDR_SWAP_STD_REV:
-                    SwapComps( 0, 2, pInfo );
-                    break;    // BGR
-                case ADDR_SWAP_ALT:
-                    SwapComps( 2, 3, pInfo );
-                    break;    // RGA
-                default:
-                    break;    // RGB
-            }
-            break;
-        case 2:
-            switch (swap)
-            {
-                case ADDR_SWAP_ALT_REV:
-                    SwapComps( 0, 1, pInfo );
-                    SwapComps( 1, 3, pInfo );
-                    break;    // AR
-                case ADDR_SWAP_STD_REV:
-                    SwapComps( 0, 1, pInfo );
-                    break;    // GR
-                case ADDR_SWAP_ALT:
-                    SwapComps( 1, 3, pInfo );
-                    break;    // RA
-                default:
-                    break;    // RG
-            }
-            break;
-        case 1:
-            switch (swap)
-            {
-                case ADDR_SWAP_ALT_REV:
-                    SwapComps( 0, 3, pInfo );
-                    break;    // A
-                case ADDR_SWAP_STD_REV:
-                    SwapComps( 0, 2, pInfo );
-                    break;    // B
-                case ADDR_SWAP_ALT:
-                    SwapComps( 0, 1, pInfo );
-                    break;    // G
-                default:
-                    break;    // R
-            }
-            break;
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetCompSwap
-*
-*   @brief
-*       Get components swapped for color surface
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID ElemLib::SwapComps(
-    UINT_32          c0,     ///< [in] component index 0
-    UINT_32          c1,     ///< [in] component index 1
-    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
-{
-    UINT_32 start;
-    UINT_32 bits;
-
-    start = pInfo->compStart[c0];
-    pInfo->compStart[c0] = pInfo->compStart[c1];
-    pInfo->compStart[c1] = start;
-
-    bits  = pInfo->compBit[c0];
-    pInfo->compBit[c0] = pInfo->compBit[c1];
-    pInfo->compBit[c1] = bits;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::PixGetColorCompInfo
-*
-*   @brief
-*       Get per component info for color surface
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID ElemLib::PixGetColorCompInfo(
-    AddrColorFormat   format, ///< [in] surface format, read from register
-    AddrSurfaceNumber number, ///< [in] pixel number type
-    AddrSurfaceSwap   swap,   ///< [in] component swap mode
-    PixelFormatInfo*  pInfo   ///< [out] output per component info
-    ) const
-{
-    // 1. Get componet bits
-    switch (format)
-    {
-        case ADDR_COLOR_8:
-            GetCompBits(8, 0, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_1_5_5_5:
-            GetCompBits(5, 5, 5, 1, pInfo);
-            break;
-        case ADDR_COLOR_5_6_5:
-            GetCompBits(8, 6, 5, 0, pInfo);
-            break;
-        case ADDR_COLOR_6_5_5:
-            GetCompBits(5, 5, 6, 0, pInfo);
-            break;
-        case ADDR_COLOR_8_8:
-            GetCompBits(8, 8, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_4_4_4_4:
-            GetCompBits(4, 4, 4, 4, pInfo);
-            break;
-        case ADDR_COLOR_16:
-            GetCompBits(16, 0, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_8_8_8_8:
-            GetCompBits(8, 8, 8, 8, pInfo);
-            break;
-        case ADDR_COLOR_2_10_10_10:
-            GetCompBits(10, 10, 10, 2, pInfo);
-            break;
-        case ADDR_COLOR_10_11_11:
-            GetCompBits(11, 11, 10, 0, pInfo);
-            break;
-        case ADDR_COLOR_11_11_10:
-            GetCompBits(10, 11, 11, 0, pInfo);
-            break;
-        case ADDR_COLOR_16_16:
-            GetCompBits(16, 16, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_16_16_16_16:
-            GetCompBits(16, 16, 16, 16, pInfo);
-            break;
-        case ADDR_COLOR_16_FLOAT:
-            GetCompBits(16, 0, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_16_16_FLOAT:
-            GetCompBits(16, 16, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_32_FLOAT:
-            GetCompBits(32, 0, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_32_32_FLOAT:
-            GetCompBits(32, 32, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_16_16_16_16_FLOAT:
-            GetCompBits(16, 16, 16, 16, pInfo);
-            break;
-        case ADDR_COLOR_32_32_32_32_FLOAT:
-            GetCompBits(32, 32, 32, 32, pInfo);
-            break;
-
-        case ADDR_COLOR_32:
-            GetCompBits(32, 0, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_32_32:
-            GetCompBits(32, 32, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_32_32_32_32:
-            GetCompBits(32, 32, 32, 32, pInfo);
-            break;
-        case ADDR_COLOR_10_10_10_2:
-            GetCompBits(2, 10, 10, 10, pInfo);
-            break;
-        case ADDR_COLOR_10_11_11_FLOAT:
-            GetCompBits(11, 11, 10, 0, pInfo);
-            break;
-        case ADDR_COLOR_11_11_10_FLOAT:
-            GetCompBits(10, 11, 11, 0, pInfo);
-            break;
-        case ADDR_COLOR_5_5_5_1:
-            GetCompBits(1, 5, 5, 5, pInfo);
-            break;
-        case ADDR_COLOR_3_3_2:
-            GetCompBits(2, 3, 3, 0, pInfo);
-            break;
-        case ADDR_COLOR_4_4:
-            GetCompBits(4, 4, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_8_24:
-        case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
-            GetCompBits(24, 8, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_24_8:
-        case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
-            GetCompBits(8, 24, 0, 0, pInfo);
-            break;
-        case ADDR_COLOR_X24_8_32_FLOAT:
-            GetCompBits(32, 8, 0, 0, pInfo);
-            break;
-
-        case ADDR_COLOR_INVALID:
-            GetCompBits(0, 0, 0, 0, pInfo);
-            break;
-        default:
-            ADDR_ASSERT(0);
-            GetCompBits(0, 0, 0, 0, pInfo);
-            break;
-    }
-
-    // 2. Get component number type
-
-    GetCompType(format, number, pInfo);
-
-    // 3. Swap components if needed
-
-    GetCompSwap(swap, pInfo);
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::PixGetDepthCompInfo
-*
-*   @brief
-*       Get per component info for depth surface
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID ElemLib::PixGetDepthCompInfo(
-    AddrDepthFormat  format,     ///< [in] surface format, read from register
-    PixelFormatInfo* pInfo       ///< [out] output per component bits and type
-    ) const
-{
-    if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
-    {
-        if (format == ADDR_DEPTH_8_24_FLOAT)
-        {
-            format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
-        }
-
-        if (format == ADDR_DEPTH_X8_24_FLOAT)
-        {
-            format = ADDR_DEPTH_32_FLOAT;
-        }
-    }
-
-    switch (format)
-    {
-        case ADDR_DEPTH_16:
-            GetCompBits(16, 0, 0, 0, pInfo);
-            break;
-        case ADDR_DEPTH_8_24:
-        case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
-            GetCompBits(24, 8, 0, 0, pInfo);
-            break;
-        case ADDR_DEPTH_X8_24:
-        case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
-            GetCompBits(24, 0, 0, 0, pInfo);
-            break;
-        case ADDR_DEPTH_32_FLOAT:
-            GetCompBits(32, 0, 0, 0, pInfo);
-            break;
-        case ADDR_DEPTH_X24_8_32_FLOAT:
-            GetCompBits(32, 8, 0, 0, pInfo);
-            break;
-        case ADDR_DEPTH_INVALID:
-            GetCompBits(0, 0, 0, 0, pInfo);
-            break;
-        default:
-            ADDR_ASSERT(0);
-            GetCompBits(0, 0, 0, 0, pInfo);
-            break;
-    }
-
-    switch (format)
-    {
-        case ADDR_DEPTH_16:
-            pInfo->numType [0] = ADDR_UNORM_R6XX;
-            pInfo->numType [1] = ADDR_ZERO;
-            break;
-        case ADDR_DEPTH_8_24:
-            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
-            pInfo->numType [1] = ADDR_UINT_BITS;
-            break;
-        case ADDR_DEPTH_8_24_FLOAT:
-            pInfo->numType [0] = ADDR_U4FLOATC;
-            pInfo->numType [1] = ADDR_UINT_BITS;
-            break;
-        case ADDR_DEPTH_X8_24:
-            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
-            pInfo->numType [1] = ADDR_ZERO;
-            break;
-        case ADDR_DEPTH_X8_24_FLOAT:
-            pInfo->numType [0] = ADDR_U4FLOATC;
-            pInfo->numType [1] = ADDR_ZERO;
-            break;
-        case ADDR_DEPTH_32_FLOAT:
-            pInfo->numType [0] = ADDR_S8FLOAT32;
-            pInfo->numType [1] = ADDR_ZERO;
-            break;
-        case ADDR_DEPTH_X24_8_32_FLOAT:
-            pInfo->numType [0] = ADDR_S8FLOAT32;
-            pInfo->numType [1] = ADDR_UINT_BITS;
-            break;
-        default:
-            pInfo->numType [0] = ADDR_NO_NUMBER;
-            pInfo->numType [1] = ADDR_NO_NUMBER;
-            break;
-    }
-
-    pInfo->numType [2] = ADDR_NO_NUMBER;
-    pInfo->numType [3] = ADDR_NO_NUMBER;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::PixGetExportNorm
-*
-*   @brief
-*       Check if fp16 export norm can be enabled.
-*
-*   @return
-*       TRUE if this can be enabled.
-*
-****************************************************************************************************
-*/
-BOOL_32 ElemLib::PixGetExportNorm(
-    AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
-    AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
-    AddrSurfaceSwap     swap            ///< [in] components swap type
-    ) const
-{
-    BOOL_32 enabled = TRUE;
-
-    PixelFormatInfo formatInfo;
-
-    PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
-
-    for (UINT_32 c = 0; c < 4; c++)
-    {
-        if (m_fp16ExportNorm)
-        {
-            if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
-                (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
-                (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
-                (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
-                (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
-                (formatInfo.numType[c] != ADDR_U3FLOATM))
-            {
-                enabled = FALSE;
-                break;
-            }
-        }
-        else
-        {
-            if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
-            {
-                enabled = FALSE;
-                break;
-            }
-        }
-    }
-
-    return enabled;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::AdjustSurfaceInfo
-*
-*   @brief
-*       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::AdjustSurfaceInfo(
-    ElemMode        elemMode,       ///< [in] element mode
-    UINT_32         expandX,        ///< [in] decompression expansion factor in X
-    UINT_32         expandY,        ///< [in] decompression expansion factor in Y
-    UINT_32*        pBpp,           ///< [in,out] bpp
-    UINT_32*        pBasePitch,     ///< [in,out] base pitch
-    UINT_32*        pWidth,         ///< [in,out] width
-    UINT_32*        pHeight)        ///< [in,out] height
-{
-    UINT_32 packedBits;
-    UINT_32 basePitch;
-    UINT_32 width;
-    UINT_32 height;
-    UINT_32 bpp;
-    BOOL_32 bBCnFormat = FALSE;
-
-    ADDR_ASSERT(pBpp != NULL);
-    ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
-
-    if (pBpp)
-    {
-        bpp = *pBpp;
-
-        switch (elemMode)
-        {
-            case ADDR_EXPANDED:
-                packedBits = bpp / expandX / expandY;
-                break;
-            case ADDR_PACKED_STD: // Different bit order
-            case ADDR_PACKED_REV:
-                packedBits = bpp * expandX * expandY;
-                break;
-            case ADDR_PACKED_GBGR:
-            case ADDR_PACKED_BGRG:
-                packedBits = bpp; // 32-bit packed ==> 2 32-bit result
-                break;
-            case ADDR_PACKED_BC1: // Fall through
-            case ADDR_PACKED_BC4:
-                packedBits = 64;
-                bBCnFormat = TRUE;
-                break;
-            case ADDR_PACKED_BC2: // Fall through
-            case ADDR_PACKED_BC3: // Fall through
-            case ADDR_PACKED_BC5: // Fall through
-                bBCnFormat = TRUE;
-                // fall through
-            case ADDR_PACKED_ASTC:
-            case ADDR_PACKED_ETC2_128BPP:
-                packedBits = 128;
-                break;
-            case ADDR_PACKED_ETC2_64BPP:
-                packedBits = 64;
-                break;
-            case ADDR_ROUND_BY_HALF:  // Fall through
-            case ADDR_ROUND_TRUNCATE: // Fall through
-            case ADDR_ROUND_DITHER:   // Fall through
-            case ADDR_UNCOMPRESSED:
-                packedBits = bpp;
-                break;
-            default:
-                packedBits = bpp;
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        *pBpp = packedBits;
-    }
-
-    if (pWidth && pHeight && pBasePitch)
-    {
-        basePitch = *pBasePitch;
-        width     = *pWidth;
-        height    = *pHeight;
-
-        if ((expandX > 1) || (expandY > 1))
-        {
-            if (elemMode == ADDR_EXPANDED)
-            {
-                basePitch *= expandX;
-                width     *= expandX;
-                height    *= expandY;
-            }
-            else
-            {
-                // Evergreen family workaround
-                if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX))
-                {
-                    // For BCn we now pad it to POW2 at the beginning so it is safe to
-                    // divide by 4 directly
-                    basePitch = basePitch / expandX;
-                    width     = width  / expandX;
-                    height    = height / expandY;
-#if DEBUG
-                    width     = (width == 0) ? 1 : width;
-                    height    = (height == 0) ? 1 : height;
-
-                    if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
-                        (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
-                    {
-                        // if this assertion is hit we may have issues if app samples
-                        // rightmost/bottommost pixels
-                        ADDR_ASSERT_ALWAYS();
-                    }
-#endif
-                }
-                else // Not BCn format we still keep old way (FMT_1? No real test yet)
-                {
-                    basePitch = (basePitch + expandX - 1) / expandX;
-                    width     = (width + expandX - 1) / expandX;
-                    height    = (height + expandY - 1) / expandY;
-                }
-            }
-
-            *pBasePitch = basePitch; // 0 is legal value for base pitch.
-            *pWidth     = (width == 0) ? 1 : width;
-            *pHeight    = (height == 0) ? 1 : height;
-        } //if (pWidth && pHeight && pBasePitch)
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::RestoreSurfaceInfo
-*
-*   @brief
-*       Reverse operation of AdjustSurfaceInfo
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::RestoreSurfaceInfo(
-    ElemMode        elemMode,       ///< [in] element mode
-    UINT_32         expandX,        ///< [in] decompression expansion factor in X
-    UINT_32         expandY,        ///< [out] decompression expansion factor in Y
-    UINT_32*        pBpp,           ///< [in,out] bpp
-    UINT_32*        pWidth,         ///< [in,out] width
-    UINT_32*        pHeight)        ///< [in,out] height
-{
-    UINT_32 originalBits;
-    UINT_32 width;
-    UINT_32 height;
-    UINT_32 bpp;
-
-    BOOL_32 bBCnFormat = FALSE;
-    (void)bBCnFormat;
-
-    ADDR_ASSERT(pBpp != NULL);
-    ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
-
-    if (pBpp)
-    {
-        bpp = *pBpp;
-
-        switch (elemMode)
-        {
-        case ADDR_EXPANDED:
-            originalBits = bpp * expandX * expandY;
-            break;
-        case ADDR_PACKED_STD: // Different bit order
-        case ADDR_PACKED_REV:
-            originalBits = bpp / expandX / expandY;
-            break;
-        case ADDR_PACKED_GBGR:
-        case ADDR_PACKED_BGRG:
-            originalBits = bpp; // 32-bit packed ==> 2 32-bit result
-            break;
-        case ADDR_PACKED_BC1: // Fall through
-        case ADDR_PACKED_BC4:
-            originalBits = 64;
-            bBCnFormat = TRUE;
-            break;
-        case ADDR_PACKED_BC2: // Fall through
-        case ADDR_PACKED_BC3: // Fall through
-        case ADDR_PACKED_BC5:
-            bBCnFormat = TRUE;
-            // fall through
-        case ADDR_PACKED_ASTC:
-        case ADDR_PACKED_ETC2_128BPP:
-            originalBits = 128;
-            break;
-        case ADDR_PACKED_ETC2_64BPP:
-            originalBits = 64;
-            break;
-        case ADDR_ROUND_BY_HALF:  // Fall through
-        case ADDR_ROUND_TRUNCATE: // Fall through
-        case ADDR_ROUND_DITHER:   // Fall through
-        case ADDR_UNCOMPRESSED:
-            originalBits = bpp;
-            break;
-        default:
-            originalBits = bpp;
-            ADDR_ASSERT_ALWAYS();
-            break;
-        }
-
-        *pBpp = originalBits;
-    }
-
-    if (pWidth && pHeight)
-    {
-        width    = *pWidth;
-        height   = *pHeight;
-
-        if ((expandX > 1) || (expandY > 1))
-        {
-            if (elemMode == ADDR_EXPANDED)
-            {
-                width /= expandX;
-                height /= expandY;
-            }
-            else
-            {
-                width *= expandX;
-                height *= expandY;
-            }
-        }
-
-        *pWidth  = (width == 0) ? 1 : width;
-        *pHeight = (height == 0) ? 1 : height;
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetBitsPerPixel
-*
-*   @brief
-*       Compute the total bits per element according to a format
-*       code. For compressed formats, this is not the same as
-*       the number of bits per decompressed element.
-*
-*   @return
-*       Bits per pixel
-****************************************************************************************************
-*/
-UINT_32 ElemLib::GetBitsPerPixel(
-    AddrFormat          format,         ///< [in] surface format code
-    ElemMode*           pElemMode,      ///< [out] element mode
-    UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
-    UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
-    UINT_32*            pUnusedBits)    ///< [out] bits unused
-{
-    UINT_32 bpp;
-    UINT_32 expandX = 1;
-    UINT_32 expandY = 1;
-    UINT_32 bitUnused = 0;
-    ElemMode elemMode = ADDR_UNCOMPRESSED; // default value
-
-    switch (format)
-    {
-        case ADDR_FMT_8:
-            bpp = 8;
-            break;
-        case ADDR_FMT_1_5_5_5:
-        case ADDR_FMT_5_6_5:
-        case ADDR_FMT_6_5_5:
-        case ADDR_FMT_8_8:
-        case ADDR_FMT_4_4_4_4:
-        case ADDR_FMT_16:
-        case ADDR_FMT_16_FLOAT:
-            bpp = 16;
-            break;
-        case ADDR_FMT_GB_GR: // treat as FMT_8_8
-            elemMode = ADDR_PACKED_GBGR;
-            bpp     = 16;
-            break;
-        case ADDR_FMT_BG_RG: // treat as FMT_8_8
-            elemMode = ADDR_PACKED_BGRG;
-            bpp     = 16;
-            break;
-        case ADDR_FMT_8_8_8_8:
-        case ADDR_FMT_2_10_10_10:
-        case ADDR_FMT_10_11_11:
-        case ADDR_FMT_11_11_10:
-        case ADDR_FMT_16_16:
-        case ADDR_FMT_16_16_FLOAT:
-        case ADDR_FMT_32:
-        case ADDR_FMT_32_FLOAT:
-        case ADDR_FMT_24_8:
-        case ADDR_FMT_24_8_FLOAT:
-            bpp = 32;
-            break;
-        case ADDR_FMT_16_16_16_16:
-        case ADDR_FMT_16_16_16_16_FLOAT:
-        case ADDR_FMT_32_32:
-        case ADDR_FMT_32_32_FLOAT:
-        case ADDR_FMT_CTX1:
-            bpp = 64;
-            break;
-        case ADDR_FMT_32_32_32_32:
-        case ADDR_FMT_32_32_32_32_FLOAT:
-            bpp = 128;
-            break;
-        case ADDR_FMT_INVALID:
-            bpp = 0;
-            break;
-        case ADDR_FMT_1_REVERSED:
-            elemMode = ADDR_PACKED_REV;
-            expandX = 8;
-            bpp = 1;
-            break;
-        case ADDR_FMT_1:
-            elemMode = ADDR_PACKED_STD;
-            expandX = 8;
-            bpp = 1;
-            break;
-        case ADDR_FMT_4_4:
-        case ADDR_FMT_3_3_2:
-            bpp = 8;
-            break;
-        case ADDR_FMT_5_5_5_1:
-            bpp = 16;
-            break;
-        case ADDR_FMT_32_AS_8:
-        case ADDR_FMT_32_AS_8_8:
-        case ADDR_FMT_8_24:
-        case ADDR_FMT_8_24_FLOAT:
-        case ADDR_FMT_10_10_10_2:
-        case ADDR_FMT_10_11_11_FLOAT:
-        case ADDR_FMT_11_11_10_FLOAT:
-        case ADDR_FMT_5_9_9_9_SHAREDEXP:
-            bpp = 32;
-            break;
-        case ADDR_FMT_X24_8_32_FLOAT:
-            bpp = 64;
-            bitUnused = 24;
-            break;
-        case ADDR_FMT_8_8_8:
-            elemMode = ADDR_EXPANDED;
-            bpp = 24;//@@ 8;      // read 3 elements per pixel
-            expandX = 3;
-            break;
-        case ADDR_FMT_16_16_16:
-        case ADDR_FMT_16_16_16_FLOAT:
-            elemMode = ADDR_EXPANDED;
-            bpp = 48;//@@ 16;      // read 3 elements per pixel
-            expandX = 3;
-            break;
-        case ADDR_FMT_32_32_32_FLOAT:
-        case ADDR_FMT_32_32_32:
-            elemMode = ADDR_EXPANDED;
-            expandX = 3;
-            bpp = 96;//@@ 32;      // read 3 elements per pixel
-            break;
-        case ADDR_FMT_BC1:
-            elemMode = ADDR_PACKED_BC1;
-            expandX = 4;
-            expandY = 4;
-            bpp = 64;
-            break;
-        case ADDR_FMT_BC4:
-            elemMode = ADDR_PACKED_BC4;
-            expandX = 4;
-            expandY = 4;
-            bpp = 64;
-            break;
-        case ADDR_FMT_BC2:
-            elemMode = ADDR_PACKED_BC2;
-            expandX = 4;
-            expandY = 4;
-            bpp = 128;
-            break;
-        case ADDR_FMT_BC3:
-            elemMode = ADDR_PACKED_BC3;
-            expandX = 4;
-            expandY = 4;
-            bpp = 128;
-            break;
-        case ADDR_FMT_BC5:
-        case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
-        case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
-            elemMode = ADDR_PACKED_BC5;
-            expandX = 4;
-            expandY = 4;
-            bpp = 128;
-            break;
-
-        case ADDR_FMT_ETC2_64BPP:
-            elemMode = ADDR_PACKED_ETC2_64BPP;
-            expandX  = 4;
-            expandY  = 4;
-            bpp      = 64;
-            break;
-
-        case ADDR_FMT_ETC2_128BPP:
-            elemMode = ADDR_PACKED_ETC2_128BPP;
-            expandX  = 4;
-            expandY  = 4;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_4x4:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 4;
-            expandY  = 4;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_5x4:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 5;
-            expandY  = 4;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_5x5:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 5;
-            expandY  = 5;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_6x5:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 6;
-            expandY  = 5;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_6x6:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 6;
-            expandY  = 6;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_8x5:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 8;
-            expandY  = 5;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_8x6:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 8;
-            expandY  = 6;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_8x8:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 8;
-            expandY  = 8;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_10x5:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 10;
-            expandY  = 5;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_10x6:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 10;
-            expandY  = 6;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_10x8:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 10;
-            expandY  = 8;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_10x10:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 10;
-            expandY  = 10;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_12x10:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 12;
-            expandY  = 10;
-            bpp      = 128;
-            break;
-
-        case ADDR_FMT_ASTC_12x12:
-            elemMode = ADDR_PACKED_ASTC;
-            expandX  = 12;
-            expandY  = 12;
-            bpp      = 128;
-            break;
-
-        default:
-            bpp = 0;
-            ADDR_ASSERT_ALWAYS();
-            break;
-            // @@ or should this be an error?
-    }
-
-    SafeAssign(pExpandX, expandX);
-    SafeAssign(pExpandY, expandY);
-    SafeAssign(pUnusedBits, bitUnused);
-    SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
-
-    return bpp;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetCompBits
-*
-*   @brief
-*       Set each component's bit size and bit start. And set element mode and number type
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::GetCompBits(
-    UINT_32          c0,        ///< [in] bits of component 0
-    UINT_32          c1,        ///< [in] bits of component 1
-    UINT_32          c2,        ///< [in] bits of component 2
-    UINT_32          c3,        ///< [in] bits of component 3
-    PixelFormatInfo* pInfo,     ///< [out] per component info out
-    ElemMode         elemMode)  ///< [in] element mode
-{
-    pInfo->comps = 0;
-
-    pInfo->compBit[0] = c0;
-    pInfo->compBit[1] = c1;
-    pInfo->compBit[2] = c2;
-    pInfo->compBit[3] = c3;
-
-    pInfo->compStart[0] = 0;
-    pInfo->compStart[1] = c0;
-    pInfo->compStart[2] = c0+c1;
-    pInfo->compStart[3] = c0+c1+c2;
-
-    pInfo->elemMode = elemMode;
-    // still needed since component swap may depend on number of components
-    for (INT i=0; i<4; i++)
-    {
-        if (pInfo->compBit[i] == 0)
-        {
-            pInfo->compStart[i]  = 0;       // all null components start at bit 0
-            pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
-        }
-        else
-        {
-            pInfo->comps++;
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::GetCompBits
-*
-*   @brief
-*       Set the clear color (or clear depth/stencil) for a surface
-*
-*   @note
-*       If clearColor is zero, a default clear value is used in place of comps[4].
-*       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID ElemLib::SetClearComps(
-    ADDR_FLT_32 comps[4],   ///< [in,out] components
-    BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
-    BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
-{
-    INT_32 i;
-
-    // Use default clearvalues if clearColor is disabled
-    if (clearColor == FALSE)
-    {
-        for (i=0; i<3; i++)
-        {
-            comps[i].f = 0.0;
-        }
-        comps[3].f = 1.0;
-    }
-
-    // Otherwise use the (modified) clear value
-    else
-    {
-        for (i=0; i<4; i++)
-        {   // If full precision, use clear value unchanged
-            if (float32)
-            {
-                // Do nothing
-                //comps[i] = comps[i];
-            }
-            // Else if it is a NaN, use the standard NaN value
-            else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
-            {
-                comps[i].u = 0xFFC00000;
-            }
-            // Else reduce the mantissa precision
-            else
-            {
-                comps[i].u = comps[i].u & 0xFFFFF000;
-            }
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::IsBlockCompressed
-*
-*   @brief
-*       TRUE if this is block compressed format
-*
-*   @note
-*
-*   @return
-*       BOOL_32
-****************************************************************************************************
-*/
-BOOL_32 ElemLib::IsBlockCompressed(
-    AddrFormat format)  ///< [in] Format
-{
-    return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) ||
-            ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP)));
-}
-
-
-/**
-****************************************************************************************************
-*   ElemLib::IsCompressed
-*
-*   @brief
-*       TRUE if this is block compressed format or 1 bit format
-*
-*   @note
-*
-*   @return
-*       BOOL_32
-****************************************************************************************************
-*/
-BOOL_32 ElemLib::IsCompressed(
-    AddrFormat format)  ///< [in] Format
-{
-    return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::IsExpand3x
-*
-*   @brief
-*       TRUE if this is 3x expand format
-*
-*   @note
-*
-*   @return
-*       BOOL_32
-****************************************************************************************************
-*/
-BOOL_32 ElemLib::IsExpand3x(
-    AddrFormat format)  ///< [in] Format
-{
-    BOOL_32 is3x = FALSE;
-
-    switch (format)
-    {
-        case ADDR_FMT_8_8_8:
-        case ADDR_FMT_16_16_16:
-        case ADDR_FMT_16_16_16_FLOAT:
-        case ADDR_FMT_32_32_32:
-        case ADDR_FMT_32_32_32_FLOAT:
-            is3x = TRUE;
-            break;
-        default:
-            break;
-    }
-
-    return is3x;
-}
-
-/**
-****************************************************************************************************
-*   ElemLib::IsMacroPixelPacked
-*
-*   @brief
-*       TRUE if this is a macro-pixel-packed format.
-*
-*   @note
-*
-*   @return
-*       BOOL_32
-****************************************************************************************************
-*/
-BOOL_32 ElemLib::IsMacroPixelPacked(
-    AddrFormat format)  ///< [in] Format
-{
-    BOOL_32 isMacroPixelPacked = FALSE;
-
-    switch (format)
-    {
-        case ADDR_FMT_BG_RG:
-        case ADDR_FMT_GB_GR:
-            isMacroPixelPacked = TRUE;
-            break;
-        default:
-            break;
-    }
-
-    return isMacroPixelPacked;
-}
-
-}
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrelemlib.h mesa-19.0.1/src/amd/addrlib/core/addrelemlib.h
--- mesa-18.3.3/src/amd/addrlib/core/addrelemlib.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrelemlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,279 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrelemlib.h
-* @brief Contains the class for element/pixel related functions.
-****************************************************************************************************
-*/
-
-#ifndef __ELEM_LIB_H__
-#define __ELEM_LIB_H__
-
-#include "addrinterface.h"
-#include "addrobject.h"
-#include "addrcommon.h"
-
-namespace Addr
-{
-
-class Lib;
-
-// The masks for property bits within the Properties INT_32
-union ComponentFlags
-{
-    struct
-    {
-        UINT_32 byteAligned    : 1;    ///< all components are byte aligned
-        UINT_32 exportNorm     : 1;    ///< components support R6xx NORM compression
-        UINT_32 floatComp      : 1;    ///< there is at least one floating point component
-    };
-
-    UINT_32 value;
-};
-
-// Copy from legacy lib's NumberType
-enum NumberType
-{
-    // The following number types have the range [-1..1]
-    ADDR_NO_NUMBER,         // This component doesn't exist and has no default value
-    ADDR_EPSILON,           // Force component value to integer 0x00000001
-    ADDR_ZERO,              // Force component value to integer 0x00000000
-    ADDR_ONE,               // Force component value to floating point 1.0
-    // Above values don't have any bits per component (keep ADDR_ONE the last of these)
-
-    ADDR_UNORM,             // Unsigned normalized (repeating fraction) full precision
-    ADDR_SNORM,             // Signed normalized (repeating fraction) full precision
-    ADDR_GAMMA,             // Gamma-corrected, full precision
-
-    ADDR_UNORM_R5XXRB,      // Unsigned normalized (repeating fraction) for r5xx RB
-    ADDR_SNORM_R5XXRB,      // Signed normalized (repeating fraction) for r5xx RB
-    ADDR_GAMMA_R5XXRB,      // Gamma-corrected for r5xx RB (note: unnormalized value)
-    ADDR_UNORM_R5XXBC,      // Unsigned normalized (repeating fraction) for r5xx BC
-    ADDR_SNORM_R5XXBC,      // Signed normalized (repeating fraction) for r5xx BC
-    ADDR_GAMMA_R5XXBC,      // Gamma-corrected for r5xx BC (note: unnormalized value)
-
-    ADDR_UNORM_R6XX,        // Unsigned normalized (repeating fraction) for R6xx
-    ADDR_UNORM_R6XXDB,      // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
-    ADDR_SNORM_R6XX,        // Signed normalized (repeating fraction) for R6xx
-    ADDR_GAMMA8_R6XX,       // Gamma-corrected for r6xx
-    ADDR_GAMMA8_R7XX_TP,    // Gamma-corrected for r7xx TP 12bit unorm 8.4.
-
-    ADDR_U4FLOATC,          // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
-    ADDR_GAMMA_4SEG,        // Gamma-corrected, four segment approximation
-    ADDR_U0FIXED,           // Unsigned 0.N-bit fixed point
-
-    // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
-    ADDR_USCALED,           // Unsigned integer converted to/from floating point
-    ADDR_SSCALED,           // Signed integer converted to/from floating point
-    ADDR_USCALED_R5XXRB,    // Unsigned integer to/from floating point for r5xx RB
-    ADDR_SSCALED_R5XXRB,    // Signed integer to/from floating point for r5xx RB
-    ADDR_UINT_BITS,         // Keep in unsigned integer form, clamped to specified range
-    ADDR_SINT_BITS,         // Keep in signed integer form, clamped to specified range
-    ADDR_UINTBITS,          // @@ remove Keep in unsigned integer form, use modulus to reduce bits
-    ADDR_SINTBITS,          // @@ remove Keep in signed integer form, use modulus to reduce bits
-
-    // The following number types and ADDR_U4FLOATC have exponents
-    // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
-    ADDR_S8FLOAT,           // Signed floating point with 8-bit exponent, bias=127
-    ADDR_S8FLOAT32,         // 32-bit IEEE float, passes through NaN values
-    ADDR_S5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
-    ADDR_S5FLOATM,          // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
-    ADDR_U5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
-    ADDR_U3FLOATM,          // Unsigned floating point with 3-bit exponent, bias=3
-
-    ADDR_S5FIXED,           // Signed 5.N-bit fixed point, with rounding
-
-    ADDR_END_NUMBER         // Used for range comparisons
-};
-
-// Copy from legacy lib's AddrElement
-enum ElemMode
-{
-    // These formats allow both packing an unpacking
-    ADDR_ROUND_BY_HALF,      // add 1/2 and truncate when packing this element
-    ADDR_ROUND_TRUNCATE,     // truncate toward 0 for sign/mag, else toward neg
-    ADDR_ROUND_DITHER,       // Pack by dithering -- requires (x,y) position
-
-    // These formats only allow unpacking, no packing
-    ADDR_UNCOMPRESSED,       // Elements are not compressed: one data element per pixel/texel
-    ADDR_EXPANDED,           // Elements are split up and stored in multiple data elements
-    ADDR_PACKED_STD,         // Elements are compressed into ExpandX by ExpandY data elements
-    ADDR_PACKED_REV,         // Like ADDR_PACKED, but X order of pixels is reverved
-    ADDR_PACKED_GBGR,        // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
-    ADDR_PACKED_BGRG,        // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
-    ADDR_PACKED_BC1,         // Each data element is uncompressed to a 4x4 pixel/texel array
-    ADDR_PACKED_BC2,         // Each data element is uncompressed to a 4x4 pixel/texel array
-    ADDR_PACKED_BC3,         // Each data element is uncompressed to a 4x4 pixel/texel array
-    ADDR_PACKED_BC4,         // Each data element is uncompressed to a 4x4 pixel/texel array
-    ADDR_PACKED_BC5,         // Each data element is uncompressed to a 4x4 pixel/texel array
-    ADDR_PACKED_ETC2_64BPP,  // ETC2 formats that use 64bpp to represent each 4x4 block
-    ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block
-    ADDR_PACKED_ASTC,        // Various ASTC formats, all are 128bpp with varying block sizes
-
-    // These formats provide various kinds of compression
-    ADDR_ZPLANE_R5XX,        // Compressed Zplane using r5xx architecture format
-    ADDR_ZPLANE_R6XX,        // Compressed Zplane using r6xx architecture format
-    //@@ Fill in the compression modes
-
-    ADDR_END_ELEMENT         // Used for range comparisons
-};
-
-enum DepthPlanarType
-{
-    ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
-    ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
-    ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
-};
-
-/**
-****************************************************************************************************
-*   PixelFormatInfo
-*
-*   @brief
-*       Per component info
-*
-****************************************************************************************************
-*/
-struct PixelFormatInfo
-{
-    UINT_32             compBit[4];
-    NumberType          numType[4];
-    UINT_32             compStart[4];
-    ElemMode            elemMode;
-    UINT_32             comps;          ///< Number of components
-};
-
-/**
-****************************************************************************************************
-* @brief This class contains asic indepentent element related attributes and operations
-****************************************************************************************************
-*/
-class ElemLib : public Object
-{
-protected:
-    ElemLib(Lib* pAddrLib);
-
-public:
-
-    /// Makes this class virtual
-    virtual ~ElemLib();
-
-    static ElemLib* Create(
-        const Lib* pAddrLib);
-
-    /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
-    BOOL_32 PixGetExportNorm(
-        AddrColorFormat colorFmt,
-        AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
-
-    /// Below method are asic independent, so make them just static.
-    /// Remove static if we need different operation in hwl.
-
-    VOID    Flt32ToDepthPixel(
-        AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
-
-    VOID    Flt32ToColorPixel(
-        AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
-        const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
-
-    static VOID    Flt32sToInt32s(
-        ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult);
-
-    static VOID    Int32sToPixel(
-        UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
-        ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel);
-
-    VOID    PixGetColorCompInfo(
-        AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
-        PixelFormatInfo* pInfo) const;
-
-    VOID    PixGetDepthCompInfo(
-        AddrDepthFormat format, PixelFormatInfo* pInfo) const;
-
-    UINT_32 GetBitsPerPixel(
-        AddrFormat format, ElemMode* pElemMode = NULL,
-        UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
-
-    static VOID    SetClearComps(
-        ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
-
-    VOID    AdjustSurfaceInfo(
-        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
-        UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
-
-    VOID    RestoreSurfaceInfo(
-        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
-        UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
-
-    /// Checks if depth and stencil are planar inside a tile
-    BOOL_32 IsDepthStencilTilePlanar()
-    {
-        return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
-    }
-
-    /// Sets m_configFlags, copied from AddrLib
-    VOID    SetConfigFlags(ConfigFlags flags)
-    {
-        m_configFlags = flags;
-    }
-
-    static BOOL_32 IsCompressed(AddrFormat format);
-    static BOOL_32 IsBlockCompressed(AddrFormat format);
-    static BOOL_32 IsExpand3x(AddrFormat format);
-    static BOOL_32 IsMacroPixelPacked(AddrFormat format);
-
-protected:
-
-    static VOID    GetCompBits(
-        UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
-        PixelFormatInfo* pInfo,
-        ElemMode elemMode = ADDR_ROUND_BY_HALF);
-
-    static VOID    GetCompType(
-        AddrColorFormat format, AddrSurfaceNumber numType,
-        PixelFormatInfo* pInfo);
-
-    static VOID    GetCompSwap(
-        AddrSurfaceSwap swap, PixelFormatInfo* pInfo);
-
-    static VOID    SwapComps(
-        UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo);
-
-private:
-
-    UINT_32             m_fp16ExportNorm;   ///< If allow FP16 to be reported as EXPORT_NORM
-    DepthPlanarType     m_depthPlanarType;
-
-    ConfigFlags         m_configFlags;      ///< Copy of AddrLib's configFlags
-    Addr::Lib* const    m_pAddrLib;         ///< Pointer to parent addrlib instance
-};
-
-} //Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib1.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib1.cpp
--- mesa-18.3.3/src/amd/addrlib/core/addrlib1.cpp	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib1.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,4076 +0,0 @@
-/*
- * Copyright © 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addr1lib.cpp
-* @brief Contains the implementation for the Addr::V1::Lib base class.
-****************************************************************************************************
-*/
-
-#include "addrinterface.h"
-#include "addrlib1.h"
-#include "addrcommon.h"
-
-namespace Addr
-{
-namespace V1
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Static Const Member
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const TileModeFlags Lib::ModeFlags[ADDR_TM_COUNT] =
-{// T   L  1  2  3  P  Pr B
-    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
-    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
-    {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
-    {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
-    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
-    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
-    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
-    {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
-    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
-    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
-    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
-    {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
-    {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
-    {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
-    {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
-    {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
-    {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
-    {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
-    {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
-    {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
-    {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
-    {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
-    {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
-    {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
-    {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
-    {0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_UNKNOWN
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Constructor/Destructor
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Lib::AddrLib1
-*
-*   @brief
-*       Constructor for the AddrLib1 class
-*
-****************************************************************************************************
-*/
-Lib::Lib()
-    :
-    Addr::Lib()
-{
-}
-
-/**
-****************************************************************************************************
-*   Lib::Lib
-*
-*   @brief
-*       Constructor for the Addr::V1::Lib class with hClient as parameter
-*
-****************************************************************************************************
-*/
-Lib::Lib(const Client* pClient)
-    :
-    Addr::Lib(pClient)
-{
-}
-
-/**
-****************************************************************************************************
-*   Lib::~AddrLib1
-*
-*   @brief
-*       Destructor for the AddrLib1 class
-*
-****************************************************************************************************
-*/
-Lib::~Lib()
-{
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetLib
-*
-*   @brief
-*       Get AddrLib1 pointer
-*
-*   @return
-*      An Addr::V1::Lib class pointer
-****************************************************************************************************
-*/
-Lib* Lib::GetLib(
-    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
-{
-    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
-    if ((pAddrLib != NULL) &&
-        ((pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_IVLD) ||
-         (pAddrLib->GetChipFamily() > ADDR_CHIP_FAMILY_VI)))
-    {
-        // only valid and pre-VI AISC can use AddrLib1 function.
-        ADDR_ASSERT_ALWAYS();
-        hLib = NULL;
-    }
-    return static_cast<Lib*>(hLib);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Surface Methods
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeSurfaceInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
-     const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    // We suggest client do sanity check but a check here is also good
-    if (pIn->bpp > 128)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if ((pIn->tileMode == ADDR_TM_UNKNOWN) && (pIn->mipLevel > 0))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    // Thick modes don't support multisample
-    if ((Thickness(pIn->tileMode) > 1) && (pIn->numSamples > 1))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        // Get a local copy of input structure and only reference pIn for unadjusted values
-        ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
-        ADDR_TILEINFO tileInfoNull = {0};
-
-        if (UseTileInfo())
-        {
-            // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
-            // Otherwise the default 0's in tileInfoNull are used.
-            if (pIn->pTileInfo)
-            {
-                tileInfoNull = *pIn->pTileInfo;
-            }
-            localIn.pTileInfo  = &tileInfoNull;
-        }
-
-        localIn.numSamples = (pIn->numSamples == 0) ? 1 : pIn->numSamples;
-
-        // Do mipmap check first
-        // If format is BCn, pre-pad dimension to power-of-two according to HWL
-        ComputeMipLevel(&localIn);
-
-        if (m_configFlags.checkLast2DLevel)
-        {
-            // Save this level's original height in pixels
-            pOut->height = pIn->height;
-        }
-
-        UINT_32 expandX = 1;
-        UINT_32 expandY = 1;
-        ElemMode elemMode;
-
-        // Save outputs that may not go through HWL
-        pOut->pixelBits = localIn.bpp;
-        pOut->numSamples = localIn.numSamples;
-        pOut->last2DLevel = FALSE;
-        pOut->tcCompatible = FALSE;
-
-#if !ALT_TEST
-        if (localIn.numSamples > 1)
-        {
-            ADDR_ASSERT(localIn.mipLevel == 0);
-        }
-#endif
-
-        if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
-        {
-            // Get compression/expansion factors and element mode
-            // (which indicates compression/expansion
-            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
-                                                        &elemMode,
-                                                        &expandX,
-                                                        &expandY);
-
-            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
-            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
-            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
-            // restrictions are different.
-            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
-            // but we use this flag to skip RestoreSurfaceInfo below
-
-            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
-            {
-                ADDR_ASSERT(IsLinear(localIn.tileMode));
-            }
-
-            GetElemLib()->AdjustSurfaceInfo(elemMode,
-                                            expandX,
-                                            expandY,
-                                            &localIn.bpp,
-                                            &localIn.basePitch,
-                                            &localIn.width,
-                                            &localIn.height);
-
-            // Overwrite these parameters if we have a valid format
-        }
-        else if (localIn.bpp != 0)
-        {
-            localIn.width  = (localIn.width != 0) ? localIn.width : 1;
-            localIn.height = (localIn.height != 0) ? localIn.height : 1;
-        }
-        else // Rule out some invalid parameters
-        {
-            ADDR_ASSERT_ALWAYS();
-
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-
-        // Check mipmap after surface expansion
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = PostComputeMipLevel(&localIn, pOut);
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (UseTileIndex(localIn.tileIndex))
-            {
-                // Make sure pTileInfo is not NULL
-                ADDR_ASSERT(localIn.pTileInfo);
-
-                UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);
-
-                INT_32 macroModeIndex = TileIndexNoMacroIndex;
-
-                if (localIn.tileIndex != TileIndexLinearGeneral)
-                {
-                    // Try finding a macroModeIndex
-                    macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
-                                                              localIn.flags,
-                                                              localIn.bpp,
-                                                              numSamples,
-                                                              localIn.pTileInfo,
-                                                              &localIn.tileMode,
-                                                              &localIn.tileType);
-                }
-
-                // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
-                if (macroModeIndex == TileIndexNoMacroIndex)
-                {
-                    returnCode = HwlSetupTileCfg(localIn.bpp,
-                                                 localIn.tileIndex, macroModeIndex,
-                                                 localIn.pTileInfo,
-                                                 &localIn.tileMode, &localIn.tileType);
-                }
-                // If macroModeIndex is invalid, then assert this is not macro tiled
-                else if (macroModeIndex == TileIndexInvalid)
-                {
-                    ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
-                }
-
-                pOut->macroModeIndex = macroModeIndex;
-            }
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            localIn.flags.dccPipeWorkaround = localIn.flags.dccCompatible;
-
-            if (localIn.tileMode == ADDR_TM_UNKNOWN)
-            {
-                // HWL layer may override tile mode if necessary
-                HwlSelectTileMode(&localIn);
-            }
-            else
-            {
-                // HWL layer may override tile mode if necessary
-                HwlOverrideTileMode(&localIn);
-
-                // Optimize tile mode if possible
-                OptimizeTileMode(&localIn);
-            }
-        }
-
-        // Call main function to compute surface info
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            // Since bpp might be changed we just pass it through
-            pOut->bpp  = localIn.bpp;
-
-            // Also original width/height/bpp
-            pOut->pixelPitch    = pOut->pitch;
-            pOut->pixelHeight   = pOut->height;
-
-#if DEBUG
-            if (localIn.flags.display)
-            {
-                ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
-            }
-#endif //DEBUG
-
-            if (localIn.format != ADDR_FMT_INVALID)
-            {
-                //
-                // Note: For 96 bit surface, the pixelPitch returned might be an odd number, but it
-                // is okay to program texture pitch as HW's mip calculator would multiply 3 first,
-                // then do the appropriate paddings (linear alignment requirement and possible the
-                // nearest power-of-two for mipmaps), which results in the original pitch.
-                //
-                GetElemLib()->RestoreSurfaceInfo(elemMode,
-                                                 expandX,
-                                                 expandY,
-                                                 &localIn.bpp,
-                                                 &pOut->pixelPitch,
-                                                 &pOut->pixelHeight);
-            }
-
-            if (localIn.flags.qbStereo)
-            {
-                if (pOut->pStereoInfo)
-                {
-                    ComputeQbStereoInfo(pOut);
-                }
-            }
-
-            if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
-            {
-                pOut->sliceSize = pOut->surfSize;
-            }
-            else // For array: sliceSize is likely to have slice-padding (the last one)
-            {
-                pOut->sliceSize = pOut->surfSize / pOut->depth;
-
-                // array or cubemap
-                if (pIn->numSlices > 1)
-                {
-                    // If this is the last slice then add the padding size to this slice
-                    if (pIn->slice == (pIn->numSlices - 1))
-                    {
-                        pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
-                    }
-                    else if (m_configFlags.checkLast2DLevel)
-                    {
-                        // Reset last2DLevel flag if this is not the last array slice
-                        pOut->last2DLevel = FALSE;
-                    }
-                }
-            }
-
-            pOut->pitchTileMax = pOut->pitch / 8 - 1;
-            pOut->heightTileMax = pOut->height / 8 - 1;
-            pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
-        }
-    }
-
-    ValidBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeSurfaceInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
-    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            const ADDR_SURFACE_FLAGS flags = {{0}};
-            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
-
-            // Try finding a macroModeIndex
-            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
-                                                             flags,
-                                                             input.bpp,
-                                                             numSamples,
-                                                             input.pTileInfo,
-                                                             &input.tileMode,
-                                                             &input.tileType);
-
-            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
-            if (macroModeIndex == TileIndexNoMacroIndex)
-            {
-                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
-                                             input.pTileInfo, &input.tileMode, &input.tileType);
-            }
-            // If macroModeIndex is invalid, then assert this is not macro tiled
-            else if (macroModeIndex == TileIndexInvalid)
-            {
-                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
-            }
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);
-
-            if (returnCode == ADDR_OK)
-            {
-                pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Interface function stub of ComputeSurfaceCoordFromAddr.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
-    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            const ADDR_SURFACE_FLAGS flags = {{0}};
-            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
-
-            // Try finding a macroModeIndex
-            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
-                                                             flags,
-                                                             input.bpp,
-                                                             numSamples,
-                                                             input.pTileInfo,
-                                                             &input.tileMode,
-                                                             &input.tileType);
-
-            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
-            if (macroModeIndex == TileIndexNoMacroIndex)
-            {
-                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
-                                             input.pTileInfo, &input.tileMode, &input.tileType);
-            }
-            // If macroModeIndex is invalid, then assert this is not macro tiled
-            else if (macroModeIndex == TileIndexInvalid)
-            {
-                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
-            }
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSliceTileSwizzle
-*
-*   @brief
-*       Interface function stub of ComputeSliceTileSwizzle.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSliceTileSwizzle(
-    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_SLICESWIZZLE_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex,
-                                         input.pTileInfo, &input.tileMode);
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ExtractBankPipeSwizzle
-*
-*   @brief
-*       Interface function stub of AddrExtractBankPipeSwizzle.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ExtractBankPipeSwizzle(
-    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
-    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
-            (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::CombineBankPipeSwizzle
-*
-*   @brief
-*       Interface function stub of AddrCombineBankPipeSwizzle.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::CombineBankPipeSwizzle(
-    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
-                                                   pIn->pipeSwizzle,
-                                                   pIn->pTileInfo,
-                                                   pIn->baseAddr,
-                                                   &pOut->tileSwizzle);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeBaseSwizzle
-*
-*   @brief
-*       Interface function stub of AddrCompueBaseSwizzle.
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeBaseSwizzle(
-    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
-    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (IsMacroTiled(pIn->tileMode))
-            {
-                returnCode = HwlComputeBaseSwizzle(pIn, pOut);
-            }
-            else
-            {
-                pOut->tileSwizzle = 0;
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeFmaskInfo
-*
-*   @brief
-*       Interface function stub of ComputeFmaskInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
-    )
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    // No thick MSAA
-    if (Thickness(pIn->tileMode) > 1)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_FMASK_INFO_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-
-            if (pOut->pTileInfo)
-            {
-                // Use temp tile info for calcalation
-                input.pTileInfo = pOut->pTileInfo;
-            }
-            else
-            {
-                input.pTileInfo = &tileInfoNull;
-            }
-
-            ADDR_SURFACE_FLAGS flags = {{0}};
-            flags.fmask = 1;
-
-            // Try finding a macroModeIndex
-            INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
-                                                             flags,
-                                                             HwlComputeFmaskBits(pIn, NULL),
-                                                             pIn->numSamples,
-                                                             input.pTileInfo,
-                                                             &input.tileMode);
-
-            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
-            if (macroModeIndex == TileIndexNoMacroIndex)
-            {
-                returnCode = HwlSetupTileCfg(0, input.tileIndex, macroModeIndex,
-                                             input.pTileInfo, &input.tileMode);
-            }
-
-            ADDR_ASSERT(macroModeIndex != TileIndexInvalid);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (pIn->numSamples > 1)
-            {
-                returnCode = HwlComputeFmaskInfo(pIn, pOut);
-            }
-            else
-            {
-                memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
-
-                returnCode = ADDR_INVALIDPARAMS;
-            }
-        }
-    }
-
-    ValidBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Interface function stub of ComputeFmaskAddrFromCoord.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
-    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_ASSERT(pIn->numSamples > 1);
-
-        if (pIn->numSamples > 1)
-        {
-            returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
-        }
-        else
-        {
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Interface function stub of ComputeFmaskAddrFromCoord.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
-    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
-    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut           ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_ASSERT(pIn->numSamples > 1);
-
-        if (pIn->numSamples > 1)
-        {
-            returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
-        }
-        else
-        {
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ConvertTileInfoToHW
-*
-*   @brief
-*       Convert tile info from real value to HW register value in HW layer
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ConvertTileInfoToHW(
-    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
-    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
-            (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_CONVERT_TILEINFOTOHW_INPUT input;
-        // if pIn->reverse is TRUE, indices are ignored
-        if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex,
-                                         input.macroModeIndex, input.pTileInfo);
-
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = HwlConvertTileInfoToHW(pIn, pOut);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ConvertTileIndex
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ConvertTileIndex(
-    const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
-    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
-            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-
-        returnCode = HwlSetupTileCfg(pIn->bpp, pIn->tileIndex, pIn->macroModeIndex,
-                                     pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
-
-        if (returnCode == ADDR_OK && pIn->tileInfoHw)
-        {
-            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
-            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
-
-            hwInput.pTileInfo = pOut->pTileInfo;
-            hwInput.tileIndex = -1;
-            hwOutput.pTileInfo = pOut->pTileInfo;
-
-            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetMacroModeIndex
-*
-*   @brief
-*       Get macro mode index based on input info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::GetMacroModeIndex(
-    const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input structure
-    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags())
-    {
-        if ((pIn->size != sizeof(ADDR_GET_MACROMODEINDEX_INPUT)) ||
-            (pOut->size != sizeof(ADDR_GET_MACROMODEINDEX_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfo = {0};
-        pOut->macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, pIn->flags, pIn->bpp,
-                                                        pIn->numFrags, &tileInfo);
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ConvertTileIndex1
-*
-*   @brief
-*       Convert tile index to tile mode/type/info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ConvertTileIndex1(
-    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,   ///< [in] input structure
-    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut         ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
-            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_SURFACE_FLAGS flags = {{0}};
-
-        HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
-                                 pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
-
-        if (pIn->tileInfoHw)
-        {
-            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
-            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
-
-            hwInput.pTileInfo = pOut->pTileInfo;
-            hwInput.tileIndex = -1;
-            hwOutput.pTileInfo = pOut->pTileInfo;
-
-            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetTileIndex
-*
-*   @brief
-*       Get tile index from tile mode/type/info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::GetTileIndex(
-    const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
-    ADDR_GET_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
-            (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        returnCode = HwlGetTileIndex(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::Thickness
-*
-*   @brief
-*       Get tile mode thickness
-*
-*   @return
-*       Tile mode thickness
-****************************************************************************************************
-*/
-UINT_32 Lib::Thickness(
-    AddrTileMode tileMode)    ///< [in] tile mode
-{
-    return ModeFlags[tileMode].thickness;
-}
-
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               CMASK/HTILE
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Lib::ComputeHtileInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeHtilenfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
-    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
-    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_HTILE_INFO_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (pIn->flags.tcCompatible)
-            {
-                const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8);
-                const UINT_32 align     = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes;
-
-                if (pIn->numSlices > 1)
-                {
-                    const UINT_32 surfBytes = (sliceSize * pIn->numSlices);
-
-                    pOut->sliceSize        = sliceSize;
-                    pOut->htileBytes       = pIn->flags.skipTcCompatSizeAlign ?
-                                             surfBytes : PowTwoAlign(surfBytes, align);
-                    pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE;
-                }
-                else
-                {
-                    pOut->sliceSize        = pIn->flags.skipTcCompatSizeAlign ?
-                                             sliceSize : PowTwoAlign(sliceSize, align);
-                    pOut->htileBytes       = pOut->sliceSize;
-                    pOut->sliceInterleaved = FALSE;
-                }
-
-                pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE;
-
-                pOut->pitch       = pIn->pitch;
-                pOut->height      = pIn->height;
-                pOut->baseAlign   = align;
-                pOut->macroWidth  = 0;
-                pOut->macroHeight = 0;
-                pOut->bpp         = 32;
-            }
-            else
-            {
-                pOut->bpp = ComputeHtileInfo(pIn->flags,
-                                             pIn->pitch,
-                                             pIn->height,
-                                             pIn->numSlices,
-                                             pIn->isLinear,
-                                             isWidth8,
-                                             isHeight8,
-                                             pIn->pTileInfo,
-                                             &pOut->pitch,
-                                             &pOut->height,
-                                             &pOut->htileBytes,
-                                             &pOut->macroWidth,
-                                             &pOut->macroHeight,
-                                             &pOut->sliceSize,
-                                             &pOut->baseAlign);
-            }
-        }
-    }
-
-    ValidMetaBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskInfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
-    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_CMASK_INFO_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            returnCode = ComputeCmaskInfo(pIn->flags,
-                                          pIn->pitch,
-                                          pIn->height,
-                                          pIn->numSlices,
-                                          pIn->isLinear,
-                                          pIn->pTileInfo,
-                                          &pOut->pitch,
-                                          &pOut->height,
-                                          &pOut->cmaskBytes,
-                                          &pOut->macroWidth,
-                                          &pOut->macroHeight,
-                                          &pOut->sliceSize,
-                                          &pOut->baseAlign,
-                                          &pOut->blockMax);
-        }
-    }
-
-    ValidMetaBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeDccInfo
-*
-*   @brief
-*       Interface function to compute DCC key info
-*
-*   @return
-*       return code of HwlComputeDccInfo
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeDccInfo(
-    const ADDR_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE ret = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
-        {
-            ret = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (ret == ADDR_OK)
-    {
-        ADDR_COMPUTE_DCCINFO_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-
-            ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex,
-                                  &input.tileInfo, &input.tileMode);
-
-            pIn = &input;
-        }
-
-        if (ret == ADDR_OK)
-        {
-            ret = HwlComputeDccInfo(pIn, pOut);
-
-            ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
-        }
-    }
-
-    return ret;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeHtileAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
-    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
-    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (pIn->flags.tcCompatible)
-            {
-                HwlComputeHtileAddrFromCoord(pIn, pOut);
-            }
-            else
-            {
-                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
-                                                          pIn->height,
-                                                          pIn->x,
-                                                          pIn->y,
-                                                          pIn->slice,
-                                                          pIn->numSlices,
-                                                          1,
-                                                          pIn->isLinear,
-                                                          isWidth8,
-                                                          isHeight8,
-                                                          pIn->pTileInfo,
-                                                          &pOut->bitPosition);
-            }
-        }
-    }
-
-    return returnCode;
-
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeHtileCoordFromAddr
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileCoordFromAddr
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
-    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
-    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            HwlComputeXmaskCoordFromAddr(pIn->addr,
-                                         pIn->bitPosition,
-                                         pIn->pitch,
-                                         pIn->height,
-                                         pIn->numSlices,
-                                         1,
-                                         pIn->isLinear,
-                                         isWidth8,
-                                         isHeight8,
-                                         pIn->pTileInfo,
-                                         &pOut->x,
-                                         &pOut->y,
-                                         &pOut->slice);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
-    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (pIn->flags.tcCompatible == TRUE)
-            {
-                returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
-            }
-            else
-            {
-                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
-                                                          pIn->height,
-                                                          pIn->x,
-                                                          pIn->y,
-                                                          pIn->slice,
-                                                          pIn->numSlices,
-                                                          2,
-                                                          pIn->isLinear,
-                                                          FALSE, //this is cmask, isWidth8 is not needed
-                                                          FALSE, //this is cmask, isHeight8 is not needed
-                                                          pIn->pTileInfo,
-                                                          &pOut->bitPosition);
-            }
-
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskCoordFromAddr
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
-    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
-            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        ADDR_TILEINFO tileInfoNull;
-        ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;
-
-        if (UseTileIndex(pIn->tileIndex))
-        {
-            input = *pIn;
-            // Use temp tile info for calcalation
-            input.pTileInfo = &tileInfoNull;
-
-            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
-
-            // Change the input structure
-            pIn = &input;
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            HwlComputeXmaskCoordFromAddr(pIn->addr,
-                                         pIn->bitPosition,
-                                         pIn->pitch,
-                                         pIn->height,
-                                         pIn->numSlices,
-                                         2,
-                                         pIn->isLinear,
-                                         FALSE,
-                                         FALSE,
-                                         pIn->pTileInfo,
-                                         &pOut->x,
-                                         &pOut->y,
-                                         &pOut->slice);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeTileDataWidthAndHeight
-*
-*   @brief
-*       Compute the squared cache shape for per-tile data (CMASK and HTILE)
-*
-*   @return
-*       N/A
-*
-*   @note
-*       MacroWidth and macroHeight are measured in pixels
-****************************************************************************************************
-*/
-VOID Lib::ComputeTileDataWidthAndHeight(
-    UINT_32         bpp,             ///< [in] bits per pixel
-    UINT_32         cacheBits,       ///< [in] bits of cache
-    ADDR_TILEINFO*  pTileInfo,       ///< [in] Tile info
-    UINT_32*        pMacroWidth,     ///< [out] macro tile width
-    UINT_32*        pMacroHeight     ///< [out] macro tile height
-    ) const
-{
-    UINT_32 height = 1;
-    UINT_32 width  = cacheBits / bpp;
-    UINT_32 pipes  = HwlGetPipes(pTileInfo);
-
-    // Double height until the macro-tile is close to square
-    // Height can only be doubled if width is even
-
-    while ((width > height * 2 * pipes) && !(width & 1))
-    {
-        width  /= 2;
-        height *= 2;
-    }
-
-    *pMacroWidth  = 8 * width;
-    *pMacroHeight = 8 * height * pipes;
-
-    // Note: The above iterative comptuation is equivalent to the following
-    //
-    //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
-    //int macroHeight = pow2( 3+log2(pipes)+log2_height );
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlComputeTileDataWidthAndHeightLinear
-*
-*   @brief
-*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
-*
-*   @return
-*       N/A
-*
-*   @note
-*       MacroWidth and macroHeight are measured in pixels
-****************************************************************************************************
-*/
-VOID Lib::HwlComputeTileDataWidthAndHeightLinear(
-    UINT_32*        pMacroWidth,     ///< [out] macro tile width
-    UINT_32*        pMacroHeight,    ///< [out] macro tile height
-    UINT_32         bpp,             ///< [in] bits per pixel
-    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
-    ) const
-{
-    ADDR_ASSERT(bpp != 4);              // Cmask does not support linear layout prior to SI
-    *pMacroWidth  = 8 * 512 / bpp;      // Align width to 512-bit memory accesses
-    *pMacroHeight = 8 * m_pipes;        // Align height to number of pipes
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeHtileInfo
-*
-*   @brief
-*       Compute htile pitch,width, bytes per 2D slice
-*
-*   @return
-*       Htile bpp i.e. How many bits for an 8x8 tile
-*       Also returns by output parameters:
-*       *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
-****************************************************************************************************
-*/
-UINT_32 Lib::ComputeHtileInfo(
-    ADDR_HTILE_FLAGS flags,             ///< [in] htile flags
-    UINT_32          pitchIn,           ///< [in] pitch input
-    UINT_32          heightIn,          ///< [in] height input
-    UINT_32          numSlices,         ///< [in] number of slices
-    BOOL_32          isLinear,          ///< [in] if it is linear mode
-    BOOL_32          isWidth8,          ///< [in] if htile block width is 8
-    BOOL_32          isHeight8,         ///< [in] if htile block height is 8
-    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
-    UINT_32*         pPitchOut,         ///< [out] pitch output
-    UINT_32*         pHeightOut,        ///< [out] height output
-    UINT_64*         pHtileBytes,       ///< [out] bytes per 2D slice
-    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
-    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
-    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
-    UINT_32*         pBaseAlign         ///< [out] base alignment
-    ) const
-{
-
-    UINT_32 macroWidth;
-    UINT_32 macroHeight;
-    UINT_32 baseAlign;
-    UINT_64 surfBytes;
-    UINT_64 sliceBytes;
-
-    numSlices = Max(1u, numSlices);
-
-    const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
-    const UINT_32 cacheBits = HtileCacheBits;
-
-    if (isLinear)
-    {
-        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
-                                               &macroHeight,
-                                               bpp,
-                                               pTileInfo);
-    }
-    else
-    {
-        ComputeTileDataWidthAndHeight(bpp,
-                                      cacheBits,
-                                      pTileInfo,
-                                      &macroWidth,
-                                      &macroHeight);
-    }
-
-    *pPitchOut = PowTwoAlign(pitchIn,  macroWidth);
-    *pHeightOut = PowTwoAlign(heightIn,  macroHeight);
-
-    baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);
-
-    surfBytes = HwlComputeHtileBytes(*pPitchOut,
-                                     *pHeightOut,
-                                     bpp,
-                                     isLinear,
-                                     numSlices,
-                                     &sliceBytes,
-                                     baseAlign);
-
-    *pHtileBytes = surfBytes;
-
-    //
-    // Use SafeAssign since they are optional
-    //
-    SafeAssign(pMacroWidth, macroWidth);
-
-    SafeAssign(pMacroHeight, macroHeight);
-
-    SafeAssign(pSliceSize,  sliceBytes);
-
-    SafeAssign(pBaseAlign, baseAlign);
-
-    return bpp;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskBaseAlign
-*
-*   @brief
-*       Compute cmask base alignment
-*
-*   @return
-*       Cmask base alignment
-****************************************************************************************************
-*/
-UINT_32 Lib::ComputeCmaskBaseAlign(
-    ADDR_CMASK_FLAGS flags,           ///< [in] Cmask flags
-    ADDR_TILEINFO*   pTileInfo        ///< [in] Tile info
-    ) const
-{
-    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
-
-    if (flags.tcCompatible)
-    {
-        ADDR_ASSERT(pTileInfo != NULL);
-        if (pTileInfo)
-        {
-            baseAlign *= pTileInfo->banks;
-        }
-    }
-
-    return baseAlign;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskBytes
-*
-*   @brief
-*       Compute cmask size in bytes
-*
-*   @return
-*       Cmask size in bytes
-****************************************************************************************************
-*/
-UINT_64 Lib::ComputeCmaskBytes(
-    UINT_32 pitch,        ///< [in] pitch
-    UINT_32 height,       ///< [in] height
-    UINT_32 numSlices     ///< [in] number of slices
-    ) const
-{
-    return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) /
-        MicroTilePixels;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeCmaskInfo
-*
-*   @brief
-*       Compute cmask pitch,width, bytes per 2D slice
-*
-*   @return
-*       BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
-*       macro-tile dimensions
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
-    ADDR_CMASK_FLAGS flags,            ///< [in] cmask flags
-    UINT_32          pitchIn,           ///< [in] pitch input
-    UINT_32          heightIn,          ///< [in] height input
-    UINT_32          numSlices,         ///< [in] number of slices
-    BOOL_32          isLinear,          ///< [in] is linear mode
-    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
-    UINT_32*         pPitchOut,         ///< [out] pitch output
-    UINT_32*         pHeightOut,        ///< [out] height output
-    UINT_64*         pCmaskBytes,       ///< [out] bytes per 2D slice
-    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
-    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
-    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
-    UINT_32*         pBaseAlign,        ///< [out] base alignment
-    UINT_32*         pBlockMax          ///< [out] block max == slice / 128 / 128 - 1
-    ) const
-{
-    UINT_32 macroWidth;
-    UINT_32 macroHeight;
-    UINT_32 baseAlign;
-    UINT_64 surfBytes;
-    UINT_64 sliceBytes;
-
-    numSlices = Max(1u, numSlices);
-
-    const UINT_32 bpp = CmaskElemBits;
-    const UINT_32 cacheBits = CmaskCacheBits;
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (isLinear)
-    {
-        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
-                                               &macroHeight,
-                                               bpp,
-                                               pTileInfo);
-    }
-    else
-    {
-        ComputeTileDataWidthAndHeight(bpp,
-                                      cacheBits,
-                                      pTileInfo,
-                                      &macroWidth,
-                                      &macroHeight);
-    }
-
-    *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
-    *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);
-
-
-    sliceBytes = ComputeCmaskBytes(*pPitchOut,
-                                   *pHeightOut,
-                                   1);
-
-    baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);
-
-    while (sliceBytes % baseAlign)
-    {
-        *pHeightOut += macroHeight;
-
-        sliceBytes = ComputeCmaskBytes(*pPitchOut,
-                                       *pHeightOut,
-                                       1);
-    }
-
-    surfBytes = sliceBytes * numSlices;
-
-    *pCmaskBytes = surfBytes;
-
-    //
-    // Use SafeAssign since they are optional
-    //
-    SafeAssign(pMacroWidth, macroWidth);
-
-    SafeAssign(pMacroHeight, macroHeight);
-
-    SafeAssign(pBaseAlign, baseAlign);
-
-    SafeAssign(pSliceSize, sliceBytes);
-
-    UINT_32 slice = (*pPitchOut) * (*pHeightOut);
-    UINT_32 blockMax = slice / 128 / 128 - 1;
-
-#if DEBUG
-    if (slice % (64*256) != 0)
-    {
-        ADDR_ASSERT_ALWAYS();
-    }
-#endif //DEBUG
-
-    UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();
-
-    if (blockMax > maxBlockMax)
-    {
-        blockMax = maxBlockMax;
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    SafeAssign(pBlockMax, blockMax);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeXmaskCoordYFromPipe
-*
-*   @brief
-*       Compute the Y coord from pipe number for cmask/htile
-*
-*   @return
-*       Y coordinate
-*
-****************************************************************************************************
-*/
-UINT_32 Lib::ComputeXmaskCoordYFromPipe(
-    UINT_32         pipe,       ///< [in] pipe number
-    UINT_32         x           ///< [in] x coordinate
-    ) const
-{
-    UINT_32 pipeBit0;
-    UINT_32 pipeBit1;
-    UINT_32 xBit0;
-    UINT_32 xBit1;
-    UINT_32 yBit0;
-    UINT_32 yBit1;
-
-    UINT_32 y = 0;
-
-    UINT_32 numPipes = m_pipes; // SI has its implementation
-    //
-    // Convert pipe + x to y coordinate.
-    //
-    switch (numPipes)
-    {
-        case 1:
-            //
-            // 1 pipe
-            //
-            // p0 = 0
-            //
-            y = 0;
-            break;
-        case 2:
-            //
-            // 2 pipes
-            //
-            // p0 = x0 ^ y0
-            //
-            // y0 = p0 ^ x0
-            //
-            pipeBit0 = pipe & 0x1;
-
-            xBit0 = x & 0x1;
-
-            yBit0 = pipeBit0 ^ xBit0;
-
-            y = yBit0;
-            break;
-        case 4:
-            //
-            // 4 pipes
-            //
-            // p0 = x1 ^ y0
-            // p1 = x0 ^ y1
-            //
-            // y0 = p0 ^ x1
-            // y1 = p1 ^ x0
-            //
-            pipeBit0 =  pipe & 0x1;
-            pipeBit1 = (pipe & 0x2) >> 1;
-
-            xBit0 =  x & 0x1;
-            xBit1 = (x & 0x2) >> 1;
-
-            yBit0 = pipeBit0 ^ xBit1;
-            yBit1 = pipeBit1 ^ xBit0;
-
-            y = (yBit0 |
-                 (yBit1 << 1));
-            break;
-        case 8:
-            //
-            // 8 pipes
-            //
-            // r600 and r800 have different method
-            //
-            y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
-            break;
-        default:
-            break;
-    }
-    return y;
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlComputeXmaskCoordFromAddr
-*
-*   @brief
-*       Compute the coord from an address of a cmask/htile
-*
-*   @return
-*       N/A
-*
-*   @note
-*       This method is reused by htile, so rename to Xmask
-****************************************************************************************************
-*/
-VOID Lib::HwlComputeXmaskCoordFromAddr(
-    UINT_64         addr,           ///< [in] address
-    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
-    UINT_32         pitch,          ///< [in] pitch
-    UINT_32         height,         ///< [in] height
-    UINT_32         numSlices,      ///< [in] number of slices
-    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
-    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
-    BOOL_32         isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    BOOL_32         isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
-    UINT_32*        pX,             ///< [out] x coord
-    UINT_32*        pY,             ///< [out] y coord
-    UINT_32*        pSlice          ///< [out] slice index
-    ) const
-{
-    UINT_32 pipe;
-    UINT_32 numPipes;
-    UINT_32 numGroupBits;
-    (void)numGroupBits;
-    UINT_32 numPipeBits;
-    UINT_32 macroTilePitch;
-    UINT_32 macroTileHeight;
-
-    UINT_64 bitAddr;
-
-    UINT_32 microTileCoordY;
-
-    UINT_32 elemBits;
-
-    UINT_32 pitchAligned = pitch;
-    UINT_32 heightAligned = height;
-    UINT_64 totalBytes;
-
-    UINT_64 elemOffset;
-
-    UINT_64 macroIndex;
-    UINT_32 microIndex;
-
-    UINT_64 macroNumber;
-    UINT_32 microNumber;
-
-    UINT_32 macroX;
-    UINT_32 macroY;
-    UINT_32 macroZ;
-
-    UINT_32 microX;
-    UINT_32 microY;
-
-    UINT_32 tilesPerMacro;
-    UINT_32 macrosPerPitch;
-    UINT_32 macrosPerSlice;
-
-    //
-    // Extract pipe.
-    //
-    numPipes = HwlGetPipes(pTileInfo);
-    pipe = ComputePipeFromAddr(addr, numPipes);
-
-    //
-    // Compute the number of group and pipe bits.
-    //
-    numGroupBits = Log2(m_pipeInterleaveBytes);
-    numPipeBits  = Log2(numPipes);
-
-    UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
-    UINT_32 pipes = numPipes;
-
-
-    //
-    // Compute the micro tile size, in bits. And macro tile pitch and height.
-    //
-    if (factor == 2) //CMASK
-    {
-        ADDR_CMASK_FLAGS flags = {{0}};
-
-        elemBits = CmaskElemBits;
-
-        ComputeCmaskInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         pTileInfo,
-                         &pitchAligned,
-                         &heightAligned,
-                         &totalBytes,
-                         &macroTilePitch,
-                         &macroTileHeight);
-    }
-    else  //HTILE
-    {
-        ADDR_HTILE_FLAGS flags = {{0}};
-
-        if (factor != 1)
-        {
-            factor = 1;
-        }
-
-        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
-
-        ComputeHtileInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         isWidth8,
-                         isHeight8,
-                         pTileInfo,
-                         &pitchAligned,
-                         &heightAligned,
-                         &totalBytes,
-                         &macroTilePitch,
-                         &macroTileHeight);
-    }
-
-    // Should use aligned dims
-    //
-    pitch = pitchAligned;
-    height = heightAligned;
-
-
-    //
-    // Convert byte address to bit address.
-    //
-    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
-
-
-    //
-    // Remove pipe bits from address.
-    //
-
-    bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);
-
-
-    elemOffset = bitAddr / elemBits;
-
-    tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;
-
-    macrosPerPitch = pitch / (macroTilePitch/factor);
-    macrosPerSlice = macrosPerPitch * height / macroTileHeight;
-
-    macroIndex = elemOffset / factor / tilesPerMacro;
-    microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor));
-
-    macroNumber = macroIndex * factor + microIndex % factor;
-    microNumber = microIndex / factor;
-
-    macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch));
-    macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch);
-    macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice));
-
-
-    microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
-    microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));
-
-    *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
-    *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
-    *pSlice = macroZ;
-
-    microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
-                                                 *pX/MicroTileWidth);
-
-
-    //
-    // Assemble final coordinates.
-    //
-    *pY += microTileCoordY * MicroTileHeight;
-
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlComputeXmaskAddrFromCoord
-*
-*   @brief
-*       Compute the address from an address of cmask (prior to si)
-*
-*   @return
-*       Address in bytes
-*
-****************************************************************************************************
-*/
-UINT_64 Lib::HwlComputeXmaskAddrFromCoord(
-    UINT_32        pitch,          ///< [in] pitch
-    UINT_32        height,         ///< [in] height
-    UINT_32        x,              ///< [in] x coord
-    UINT_32        y,              ///< [in] y coord
-    UINT_32        slice,          ///< [in] slice/depth index
-    UINT_32        numSlices,      ///< [in] number of slices
-    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
-    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
-    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
-    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
-    ) const
-{
-    UINT_64 addr;
-    UINT_32 numGroupBits;
-    UINT_32 numPipeBits;
-    UINT_32 newPitch = 0;
-    UINT_32 newHeight = 0;
-    UINT_64 sliceBytes = 0;
-    UINT_64 totalBytes = 0;
-    UINT_64 sliceOffset;
-    UINT_32 pipe;
-    UINT_32 macroTileWidth;
-    UINT_32 macroTileHeight;
-    UINT_32 macroTilesPerRow;
-    UINT_32 macroTileBytes;
-    UINT_32 macroTileIndexX;
-    UINT_32 macroTileIndexY;
-    UINT_64 macroTileOffset;
-    UINT_32 pixelBytesPerRow;
-    UINT_32 pixelOffsetX;
-    UINT_32 pixelOffsetY;
-    UINT_32 pixelOffset;
-    UINT_64 totalOffset;
-    UINT_64 offsetLo;
-    UINT_64 offsetHi;
-    UINT_64 groupMask;
-
-
-    UINT_32 elemBits = 0;
-
-    UINT_32 numPipes = m_pipes; // This function is accessed prior to si only
-
-    if (factor == 2) //CMASK
-    {
-        elemBits = CmaskElemBits;
-
-        // For asics before SI, cmask is always tiled
-        isLinear = FALSE;
-    }
-    else //HTILE
-    {
-        if (factor != 1) // Fix compile warning
-        {
-            factor = 1;
-        }
-
-        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
-    }
-
-    //
-    // Compute the number of group bits and pipe bits.
-    //
-    numGroupBits = Log2(m_pipeInterleaveBytes);
-    numPipeBits  = Log2(numPipes);
-
-    //
-    // Compute macro tile dimensions.
-    //
-    if (factor == 2) // CMASK
-    {
-        ADDR_CMASK_FLAGS flags = {{0}};
-
-        ComputeCmaskInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &macroTileWidth,
-                         &macroTileHeight);
-
-        sliceBytes = totalBytes / numSlices;
-    }
-    else // HTILE
-    {
-        ADDR_HTILE_FLAGS flags = {{0}};
-
-        ComputeHtileInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         isWidth8,
-                         isHeight8,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &macroTileWidth,
-                         &macroTileHeight,
-                         &sliceBytes);
-    }
-
-    sliceOffset = slice * sliceBytes;
-
-    //
-    // Get the pipe.  Note that neither slice rotation nor pipe swizzling apply for CMASK.
-    //
-    pipe = ComputePipeFromCoord(x,
-                                y,
-                                0,
-                                ADDR_TM_2D_TILED_THIN1,
-                                0,
-                                FALSE,
-                                pTileInfo);
-
-    //
-    // Compute the number of macro tiles per row.
-    //
-    macroTilesPerRow = newPitch / macroTileWidth;
-
-    //
-    // Compute the number of bytes per macro tile.
-    //
-    macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);
-
-    //
-    // Compute the offset to the macro tile containing the specified coordinate.
-    //
-    macroTileIndexX = x / macroTileWidth;
-    macroTileIndexY = y / macroTileHeight;
-    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
-
-    //
-    // Compute the pixel offset within the macro tile.
-    //
-    pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;
-
-    //
-    // The nibbles are interleaved (see below), so the part of the offset relative to the x
-    // coordinate repeats halfway across the row. (Not for HTILE)
-    //
-    if (factor == 2)
-    {
-        pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
-    }
-    else
-    {
-        pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
-    }
-
-    //
-    // Compute the y offset within the macro tile.
-    //
-    pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;
-
-    pixelOffset = pixelOffsetX + pixelOffsetY;
-
-    //
-    // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
-    // pipe bits in the middle of the address.
-    //
-    totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;
-
-    //
-    // Split the offset to put some bits below the pipe bits and some above.
-    //
-    groupMask = (1 << numGroupBits) - 1;
-    offsetLo  = totalOffset &  groupMask;
-    offsetHi  = (totalOffset & ~groupMask) << numPipeBits;
-
-    //
-    // Assemble the address from its components.
-    //
-    addr  = offsetLo;
-    addr |= offsetHi;
-    // This is to remove warning with /analyze option
-    UINT_32 pipeBits = pipe << numGroupBits;
-    addr |= pipeBits;
-
-    //
-    // Compute the bit position.  The lower nibble is used when the x coordinate within the macro
-    // tile is less than half of the macro tile width, and the upper nibble is used when the x
-    // coordinate within the macro tile is greater than or equal to half the macro tile width.
-    //
-    *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;
-
-    return addr;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Surface Addressing Shared
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceAddrFromCoordLinear
-*
-*   @brief
-*       Compute address from coord for linear surface
-*
-*   @return
-*       Address in bytes
-*
-****************************************************************************************************
-*/
-UINT_64 Lib::ComputeSurfaceAddrFromCoordLinear(
-    UINT_32  x,              ///< [in] x coord
-    UINT_32  y,              ///< [in] y coord
-    UINT_32  slice,          ///< [in] slice/depth index
-    UINT_32  sample,         ///< [in] sample index
-    UINT_32  bpp,            ///< [in] bits per pixel
-    UINT_32  pitch,          ///< [in] pitch
-    UINT_32  height,         ///< [in] height
-    UINT_32  numSlices,      ///< [in] number of slices
-    UINT_32* pBitPosition    ///< [out] bit position inside a byte
-    ) const
-{
-    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
-
-    UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
-    UINT_64 rowOffset   = static_cast<UINT_64>(y) * pitch;
-    UINT_64 pixOffset   = x;
-
-    UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;
-
-    *pBitPosition = static_cast<UINT_32>(addr % 8);
-    addr /= 8;
-
-    return addr;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddrLinear
-*
-*   @brief
-*       Compute the coord from an address of a linear surface
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID Lib::ComputeSurfaceCoordFromAddrLinear(
-    UINT_64  addr,           ///< [in] address
-    UINT_32  bitPosition,    ///< [in] bitPosition in a byte
-    UINT_32  bpp,            ///< [in] bits per pixel
-    UINT_32  pitch,          ///< [in] pitch
-    UINT_32  height,         ///< [in] height
-    UINT_32  numSlices,      ///< [in] number of slices
-    UINT_32* pX,             ///< [out] x coord
-    UINT_32* pY,             ///< [out] y coord
-    UINT_32* pSlice,         ///< [out] slice/depth index
-    UINT_32* pSample         ///< [out] sample index
-    ) const
-{
-    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
-    const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;
-
-    *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch);
-    *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height);
-    *pSlice  = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices);
-    *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices);
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddrMicroTiled
-*
-*   @brief
-*       Compute the coord from an address of a micro tiled surface
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled(
-    UINT_64         addr,               ///< [in] address
-    UINT_32         bitPosition,        ///< [in] bitPosition in a byte
-    UINT_32         bpp,                ///< [in] bits per pixel
-    UINT_32         pitch,              ///< [in] pitch
-    UINT_32         height,             ///< [in] height
-    UINT_32         numSamples,         ///< [in] number of samples
-    AddrTileMode    tileMode,           ///< [in] tile mode
-    UINT_32         tileBase,           ///< [in] base offset within a tile
-    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
-    UINT_32*        pX,                 ///< [out] x coord
-    UINT_32*        pY,                 ///< [out] y coord
-    UINT_32*        pSlice,             ///< [out] slice/depth index
-    UINT_32*        pSample,            ///< [out] sample index,
-    AddrTileType    microTileType,      ///< [in] micro tiling order
-    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if in depth sample order
-    ) const
-{
-    UINT_64 bitAddr;
-    UINT_32 microTileThickness;
-    UINT_32 microTileBits;
-    UINT_64 sliceBits;
-    UINT_64 rowBits;
-    UINT_32 sliceIndex;
-    UINT_32 microTileCoordX;
-    UINT_32 microTileCoordY;
-    UINT_32 pixelOffset;
-    UINT_32 pixelCoordX = 0;
-    UINT_32 pixelCoordY = 0;
-    UINT_32 pixelCoordZ = 0;
-    UINT_32 pixelCoordS = 0;
-
-    //
-    // Convert byte address to bit address.
-    //
-    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
-
-    //
-    // Compute the micro tile size, in bits.
-    //
-    switch (tileMode)
-    {
-        case ADDR_TM_1D_TILED_THICK:
-            microTileThickness = ThickTileThickness;
-            break;
-        default:
-            microTileThickness = 1;
-            break;
-    }
-
-    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
-
-    //
-    // Compute number of bits per slice and number of bits per row of micro tiles.
-    //
-    sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;
-
-    rowBits   = (pitch / MicroTileWidth) * microTileBits;
-
-    //
-    // Extract the slice index.
-    //
-    sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits);
-    bitAddr -= sliceIndex * sliceBits;
-
-    //
-    // Extract the y coordinate of the micro tile.
-    //
-    microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight;
-    bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;
-
-    //
-    // Extract the x coordinate of the micro tile.
-    //
-    microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth;
-
-    //
-    // Compute the pixel offset within the micro tile.
-    //
-    pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits);
-
-    //
-    // Extract pixel coordinates from the offset.
-    //
-    HwlComputePixelCoordFromOffset(pixelOffset,
-                                   bpp,
-                                   numSamples,
-                                   tileMode,
-                                   tileBase,
-                                   compBits,
-                                   &pixelCoordX,
-                                   &pixelCoordY,
-                                   &pixelCoordZ,
-                                   &pixelCoordS,
-                                   microTileType,
-                                   isDepthSampleOrder);
-
-    //
-    // Assemble final coordinates.
-    //
-    *pX     = microTileCoordX + pixelCoordX;
-    *pY     = microTileCoordY + pixelCoordY;
-    *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
-    *pSample = pixelCoordS;
-
-    if (microTileThickness > 1)
-    {
-        *pSample = 0;
-    }
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputePipeFromAddr
-*
-*   @brief
-*       Compute the pipe number from an address
-*
-*   @return
-*       Pipe number
-*
-****************************************************************************************************
-*/
-UINT_32 Lib::ComputePipeFromAddr(
-    UINT_64 addr,        ///< [in] address
-    UINT_32 numPipes     ///< [in] number of banks
-    ) const
-{
-    UINT_32 pipe;
-
-    UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms
-
-    // R600
-    // The LSBs of the address are arranged as follows:
-    //   bank | pipe | group
-    //
-    // To get the pipe number, shift off the group bits and mask the pipe bits.
-    //
-
-    // R800
-    // The LSBs of the address are arranged as follows:
-    //   bank | bankInterleave | pipe | pipeInterleave
-    //
-    // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
-    //
-
-    pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1);
-
-    return pipe;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeMicroTileEquation
-*
-*   @brief
-*       Compute micro tile equation
-*
-*   @return
-*       If equation can be computed
-*
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation(
-    UINT_32         log2BytesPP,    ///< [in] log2 of bytes per pixel
-    AddrTileMode    tileMode,       ///< [in] tile mode
-    AddrTileType    microTileType,  ///< [in] pixel order in display/non-display mode
-    ADDR_EQUATION*  pEquation       ///< [out] equation
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    for (UINT_32 i = 0; i < log2BytesPP; i++)
-    {
-        pEquation->addr[i].valid = 1;
-        pEquation->addr[i].channel = 0;
-        pEquation->addr[i].index = i;
-    }
-
-    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP];
-
-    ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0);
-    ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1);
-    ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2);
-    ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0);
-    ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1);
-    ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2);
-    ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0);
-    ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1);
-    ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2);
-
-    UINT_32 thickness = Thickness(tileMode);
-    UINT_32 bpp = 1 << (log2BytesPP + 3);
-
-    if (microTileType != ADDR_THICK)
-    {
-        if (microTileType == ADDR_DISPLAYABLE)
-        {
-            switch (bpp)
-            {
-                case 8:
-                    pixelBit[0] = x0;
-                    pixelBit[1] = x1;
-                    pixelBit[2] = x2;
-                    pixelBit[3] = y1;
-                    pixelBit[4] = y0;
-                    pixelBit[5] = y2;
-                    break;
-                case 16:
-                    pixelBit[0] = x0;
-                    pixelBit[1] = x1;
-                    pixelBit[2] = x2;
-                    pixelBit[3] = y0;
-                    pixelBit[4] = y1;
-                    pixelBit[5] = y2;
-                    break;
-                case 32:
-                    pixelBit[0] = x0;
-                    pixelBit[1] = x1;
-                    pixelBit[2] = y0;
-                    pixelBit[3] = x2;
-                    pixelBit[4] = y1;
-                    pixelBit[5] = y2;
-                    break;
-                case 64:
-                    pixelBit[0] = x0;
-                    pixelBit[1] = y0;
-                    pixelBit[2] = x1;
-                    pixelBit[3] = x2;
-                    pixelBit[4] = y1;
-                    pixelBit[5] = y2;
-                    break;
-                case 128:
-                    pixelBit[0] = y0;
-                    pixelBit[1] = x0;
-                    pixelBit[2] = x1;
-                    pixelBit[3] = x2;
-                    pixelBit[4] = y1;
-                    pixelBit[5] = y2;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-        }
-        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            pixelBit[0] = x0;
-            pixelBit[1] = y0;
-            pixelBit[2] = x1;
-            pixelBit[3] = y1;
-            pixelBit[4] = x2;
-            pixelBit[5] = y2;
-        }
-        else if (microTileType == ADDR_ROTATED)
-        {
-            ADDR_ASSERT(thickness == 1);
-
-            switch (bpp)
-            {
-                case 8:
-                    pixelBit[0] = y0;
-                    pixelBit[1] = y1;
-                    pixelBit[2] = y2;
-                    pixelBit[3] = x1;
-                    pixelBit[4] = x0;
-                    pixelBit[5] = x2;
-                    break;
-                case 16:
-                    pixelBit[0] = y0;
-                    pixelBit[1] = y1;
-                    pixelBit[2] = y2;
-                    pixelBit[3] = x0;
-                    pixelBit[4] = x1;
-                    pixelBit[5] = x2;
-                    break;
-                case 32:
-                    pixelBit[0] = y0;
-                    pixelBit[1] = y1;
-                    pixelBit[2] = x0;
-                    pixelBit[3] = y2;
-                    pixelBit[4] = x1;
-                    pixelBit[5] = x2;
-                    break;
-                case 64:
-                    pixelBit[0] = y0;
-                    pixelBit[1] = x0;
-                    pixelBit[2] = y1;
-                    pixelBit[3] = x1;
-                    pixelBit[4] = x2;
-                    pixelBit[5] = y2;
-                    break;
-                default:
-                    retCode = ADDR_NOTSUPPORTED;
-                    break;
-            }
-        }
-
-        if (thickness > 1)
-        {
-            pixelBit[6] = z0;
-            pixelBit[7] = z1;
-            pEquation->numBits = 8 + log2BytesPP;
-        }
-        else
-        {
-            pEquation->numBits = 6 + log2BytesPP;
-        }
-    }
-    else // ADDR_THICK
-    {
-        ADDR_ASSERT(thickness > 1);
-
-        switch (bpp)
-        {
-            case 8:
-            case 16:
-                pixelBit[0] = x0;
-                pixelBit[1] = y0;
-                pixelBit[2] = x1;
-                pixelBit[3] = y1;
-                pixelBit[4] = z0;
-                pixelBit[5] = z1;
-                break;
-            case 32:
-                pixelBit[0] = x0;
-                pixelBit[1] = y0;
-                pixelBit[2] = x1;
-                pixelBit[3] = z0;
-                pixelBit[4] = y1;
-                pixelBit[5] = z1;
-                break;
-            case 64:
-            case 128:
-                pixelBit[0] = x0;
-                pixelBit[1] = y0;
-                pixelBit[2] = z0;
-                pixelBit[3] = x1;
-                pixelBit[4] = y1;
-                pixelBit[5] = z1;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        pixelBit[6] = x2;
-        pixelBit[7] = y2;
-        pEquation->numBits = 8 + log2BytesPP;
-    }
-
-    if (thickness == 8)
-    {
-        pixelBit[8] = z2;
-        pEquation->numBits = 9 + log2BytesPP;
-    }
-
-    // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices,
-    // which is not supported by our address lib
-    pEquation->stackedDepthSlices = FALSE;
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputePixelIndexWithinMicroTile
-*
-*   @brief
-*       Compute the pixel index inside a micro tile of surface
-*
-*   @return
-*       Pixel index
-*
-****************************************************************************************************
-*/
-UINT_32 Lib::ComputePixelIndexWithinMicroTile(
-    UINT_32         x,              ///< [in] x coord
-    UINT_32         y,              ///< [in] y coord
-    UINT_32         z,              ///< [in] slice/depth index
-    UINT_32         bpp,            ///< [in] bits per pixel
-    AddrTileMode    tileMode,       ///< [in] tile mode
-    AddrTileType    microTileType   ///< [in] pixel order in display/non-display mode
-    ) const
-{
-    UINT_32 pixelBit0 = 0;
-    UINT_32 pixelBit1 = 0;
-    UINT_32 pixelBit2 = 0;
-    UINT_32 pixelBit3 = 0;
-    UINT_32 pixelBit4 = 0;
-    UINT_32 pixelBit5 = 0;
-    UINT_32 pixelBit6 = 0;
-    UINT_32 pixelBit7 = 0;
-    UINT_32 pixelBit8 = 0;
-    UINT_32 pixelNumber;
-
-    UINT_32 x0 = _BIT(x, 0);
-    UINT_32 x1 = _BIT(x, 1);
-    UINT_32 x2 = _BIT(x, 2);
-    UINT_32 y0 = _BIT(y, 0);
-    UINT_32 y1 = _BIT(y, 1);
-    UINT_32 y2 = _BIT(y, 2);
-    UINT_32 z0 = _BIT(z, 0);
-    UINT_32 z1 = _BIT(z, 1);
-    UINT_32 z2 = _BIT(z, 2);
-
-    UINT_32 thickness = Thickness(tileMode);
-
-    // Compute the pixel number within the micro tile.
-
-    if (microTileType != ADDR_THICK)
-    {
-        if (microTileType == ADDR_DISPLAYABLE)
-        {
-            switch (bpp)
-            {
-                case 8:
-                    pixelBit0 = x0;
-                    pixelBit1 = x1;
-                    pixelBit2 = x2;
-                    pixelBit3 = y1;
-                    pixelBit4 = y0;
-                    pixelBit5 = y2;
-                    break;
-                case 16:
-                    pixelBit0 = x0;
-                    pixelBit1 = x1;
-                    pixelBit2 = x2;
-                    pixelBit3 = y0;
-                    pixelBit4 = y1;
-                    pixelBit5 = y2;
-                    break;
-                case 32:
-                    pixelBit0 = x0;
-                    pixelBit1 = x1;
-                    pixelBit2 = y0;
-                    pixelBit3 = x2;
-                    pixelBit4 = y1;
-                    pixelBit5 = y2;
-                    break;
-                case 64:
-                    pixelBit0 = x0;
-                    pixelBit1 = y0;
-                    pixelBit2 = x1;
-                    pixelBit3 = x2;
-                    pixelBit4 = y1;
-                    pixelBit5 = y2;
-                    break;
-                case 128:
-                    pixelBit0 = y0;
-                    pixelBit1 = x0;
-                    pixelBit2 = x1;
-                    pixelBit3 = x2;
-                    pixelBit4 = y1;
-                    pixelBit5 = y2;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-        }
-        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            pixelBit0 = x0;
-            pixelBit1 = y0;
-            pixelBit2 = x1;
-            pixelBit3 = y1;
-            pixelBit4 = x2;
-            pixelBit5 = y2;
-        }
-        else if (microTileType == ADDR_ROTATED)
-        {
-            ADDR_ASSERT(thickness == 1);
-
-            switch (bpp)
-            {
-                case 8:
-                    pixelBit0 = y0;
-                    pixelBit1 = y1;
-                    pixelBit2 = y2;
-                    pixelBit3 = x1;
-                    pixelBit4 = x0;
-                    pixelBit5 = x2;
-                    break;
-                case 16:
-                    pixelBit0 = y0;
-                    pixelBit1 = y1;
-                    pixelBit2 = y2;
-                    pixelBit3 = x0;
-                    pixelBit4 = x1;
-                    pixelBit5 = x2;
-                    break;
-                case 32:
-                    pixelBit0 = y0;
-                    pixelBit1 = y1;
-                    pixelBit2 = x0;
-                    pixelBit3 = y2;
-                    pixelBit4 = x1;
-                    pixelBit5 = x2;
-                    break;
-                case 64:
-                    pixelBit0 = y0;
-                    pixelBit1 = x0;
-                    pixelBit2 = y1;
-                    pixelBit3 = x1;
-                    pixelBit4 = x2;
-                    pixelBit5 = y2;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-        }
-
-        if (thickness > 1)
-        {
-            pixelBit6 = z0;
-            pixelBit7 = z1;
-        }
-    }
-    else // ADDR_THICK
-    {
-        ADDR_ASSERT(thickness > 1);
-
-        switch (bpp)
-        {
-            case 8:
-            case 16:
-                pixelBit0 = x0;
-                pixelBit1 = y0;
-                pixelBit2 = x1;
-                pixelBit3 = y1;
-                pixelBit4 = z0;
-                pixelBit5 = z1;
-                break;
-            case 32:
-                pixelBit0 = x0;
-                pixelBit1 = y0;
-                pixelBit2 = x1;
-                pixelBit3 = z0;
-                pixelBit4 = y1;
-                pixelBit5 = z1;
-                break;
-            case 64:
-            case 128:
-                pixelBit0 = x0;
-                pixelBit1 = y0;
-                pixelBit2 = z0;
-                pixelBit3 = x1;
-                pixelBit4 = y1;
-                pixelBit5 = z1;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        pixelBit6 = x2;
-        pixelBit7 = y2;
-    }
-
-    if (thickness == 8)
-    {
-        pixelBit8 = z2;
-    }
-
-    pixelNumber = ((pixelBit0     ) |
-                   (pixelBit1 << 1) |
-                   (pixelBit2 << 2) |
-                   (pixelBit3 << 3) |
-                   (pixelBit4 << 4) |
-                   (pixelBit5 << 5) |
-                   (pixelBit6 << 6) |
-                   (pixelBit7 << 7) |
-                   (pixelBit8 << 8));
-
-    return pixelNumber;
-}
-
-/**
-****************************************************************************************************
-*   Lib::AdjustPitchAlignment
-*
-*   @brief
-*       Adjusts pitch alignment for flipping surface
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID Lib::AdjustPitchAlignment(
-    ADDR_SURFACE_FLAGS  flags,      ///< [in] Surface flags
-    UINT_32*            pPitchAlign ///< [out] Pointer to pitch alignment
-    ) const
-{
-    // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
-    // Maybe it will be fixed in future but let's make it general for now.
-    if (flags.display || flags.overlay)
-    {
-        *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);
-
-        if(flags.display)
-        {
-            *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   Lib::PadDimensions
-*
-*   @brief
-*       Helper function to pad dimensions
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID Lib::PadDimensions(
-    AddrTileMode        tileMode,    ///< [in] tile mode
-    UINT_32             bpp,         ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
-    UINT_32             numSamples,  ///< [in] number of samples
-    ADDR_TILEINFO*      pTileInfo,   ///< [in,out] bank structure.
-    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
-    UINT_32             mipLevel,    ///< [in] MipLevel
-    UINT_32*            pPitch,      ///< [in,out] pitch in pixels
-    UINT_32*            pPitchAlign, ///< [in,out] pitch align could be changed in HwlPadDimensions
-    UINT_32*            pHeight,     ///< [in,out] height in pixels
-    UINT_32             heightAlign, ///< [in] height alignment
-    UINT_32*            pSlices,     ///< [in,out] number of slices
-    UINT_32             sliceAlign   ///< [in] number of slice alignment
-    ) const
-{
-    UINT_32 pitchAlign = *pPitchAlign;
-    UINT_32 thickness = Thickness(tileMode);
-
-    ADDR_ASSERT(padDims <= 3);
-
-    //
-    // Override padding for mip levels
-    //
-    if (mipLevel > 0)
-    {
-        if (flags.cube)
-        {
-            // for cubemap, we only pad when client call with 6 faces as an identity
-            if (*pSlices > 1)
-            {
-                padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
-            }
-            else
-            {
-                padDims = 2;
-            }
-        }
-    }
-
-    // Any possibilities that padDims is 0?
-    if (padDims == 0)
-    {
-        padDims = 3;
-    }
-
-    if (IsPow2(pitchAlign))
-    {
-        *pPitch = PowTwoAlign((*pPitch), pitchAlign);
-    }
-    else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
-    {
-        *pPitch += pitchAlign - 1;
-        *pPitch /= pitchAlign;
-        *pPitch *= pitchAlign;
-    }
-
-    if (padDims > 1)
-    {
-        if (IsPow2(heightAlign))
-        {
-            *pHeight = PowTwoAlign((*pHeight), heightAlign);
-        }
-        else
-        {
-            *pHeight += heightAlign - 1;
-            *pHeight /= heightAlign;
-            *pHeight *= heightAlign;
-        }
-    }
-
-    if (padDims > 2 || thickness > 1)
-    {
-        // for cubemap single face, we do not pad slices.
-        // if we pad it, the slice number should be set to 6 and current mip level > 1
-        if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
-        {
-            *pSlices = NextPow2(*pSlices);
-        }
-
-        // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
-        if (thickness > 1)
-        {
-            *pSlices = PowTwoAlign((*pSlices), sliceAlign);
-        }
-
-    }
-
-    HwlPadDimensions(tileMode,
-                     bpp,
-                     flags,
-                     numSamples,
-                     pTileInfo,
-                     mipLevel,
-                     pPitch,
-                     pPitchAlign,
-                     *pHeight,
-                     heightAlign);
-}
-
-
-/**
-****************************************************************************************************
-*   Lib::HwlPreHandleBaseLvl3xPitch
-*
-*   @brief
-*       Pre-handler of 3x pitch (96 bit) adjustment
-*
-*   @return
-*       Expected pitch
-****************************************************************************************************
-*/
-UINT_32 Lib::HwlPreHandleBaseLvl3xPitch(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
-    UINT_32                                 expPitch    ///< [in] pitch
-    ) const
-{
-    ADDR_ASSERT(pIn->width == expPitch);
-    //
-    // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
-    //
-    if (ElemLib::IsExpand3x(pIn->format) &&
-        pIn->mipLevel == 0 &&
-        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
-    {
-        expPitch /= 3;
-        expPitch = NextPow2(expPitch);
-    }
-
-    return expPitch;
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlPostHandleBaseLvl3xPitch
-*
-*   @brief
-*       Post-handler of 3x pitch adjustment
-*
-*   @return
-*       Expected pitch
-****************************************************************************************************
-*/
-UINT_32 Lib::HwlPostHandleBaseLvl3xPitch(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
-    UINT_32                                 expPitch    ///< [in] pitch
-    ) const
-{
-    //
-    // 96 bits surface of sub levels require element pitch of 32 bits instead
-    // So we just return pitch in 32 bit pixels without timing 3
-    //
-    if (ElemLib::IsExpand3x(pIn->format) &&
-        pIn->mipLevel == 0 &&
-        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
-    {
-        expPitch *= 3;
-    }
-
-    return expPitch;
-}
-
-
-/**
-****************************************************************************************************
-*   Lib::IsMacroTiled
-*
-*   @brief
-*       Check if the tile mode is macro tiled
-*
-*   @return
-*       TRUE if it is macro tiled (2D/2B/3D/3B)
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsMacroTiled(
-    AddrTileMode tileMode)  ///< [in] tile mode
-{
-   return ModeFlags[tileMode].isMacro;
-}
-
-/**
-****************************************************************************************************
-*   Lib::IsMacro3dTiled
-*
-*   @brief
-*       Check if the tile mode is 3D macro tiled
-*
-*   @return
-*       TRUE if it is 3D macro tiled
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsMacro3dTiled(
-    AddrTileMode tileMode)  ///< [in] tile mode
-{
-    return ModeFlags[tileMode].isMacro3d;
-}
-
-/**
-****************************************************************************************************
-*   Lib::IsMicroTiled
-*
-*   @brief
-*       Check if the tile mode is micro tiled
-*
-*   @return
-*       TRUE if micro tiled
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsMicroTiled(
-    AddrTileMode tileMode)  ///< [in] tile mode
-{
-    return ModeFlags[tileMode].isMicro;
-}
-
-/**
-****************************************************************************************************
-*   Lib::IsLinear
-*
-*   @brief
-*       Check if the tile mode is linear
-*
-*   @return
-*       TRUE if linear
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsLinear(
-    AddrTileMode tileMode)  ///< [in] tile mode
-{
-    return ModeFlags[tileMode].isLinear;
-}
-
-/**
-****************************************************************************************************
-*   Lib::IsPrtNoRotationTileMode
-*
-*   @brief
-*       Return TRUE if it is prt tile without rotation
-*   @note
-*       This function just used by CI
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsPrtNoRotationTileMode(
-    AddrTileMode tileMode)
-{
-    return ModeFlags[tileMode].isPrtNoRotation;
-}
-
-/**
-****************************************************************************************************
-*   Lib::IsPrtTileMode
-*
-*   @brief
-*       Return TRUE if it is prt tile
-*   @note
-*       This function just used by CI
-****************************************************************************************************
-*/
-BOOL_32 Lib::IsPrtTileMode(
-    AddrTileMode tileMode)
-{
-    return ModeFlags[tileMode].isPrt;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeMipLevel
-*
-*   @brief
-*       Compute mipmap level width/height/slices
-*   @return
-*      N/A
-****************************************************************************************************
-*/
-VOID Lib::ComputeMipLevel(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
-    ) const
-{
-    // Check if HWL has handled
-    BOOL_32 hwlHandled = FALSE;
-    (void)hwlHandled;
-
-    if (ElemLib::IsBlockCompressed(pIn->format))
-    {
-        if (pIn->mipLevel == 0)
-        {
-            // DXTn's level 0 must be multiple of 4
-            // But there are exceptions:
-            // 1. Internal surface creation in hostblt/vsblt/etc...
-            // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
-            pIn->width = PowTwoAlign(pIn->width, 4);
-            pIn->height = PowTwoAlign(pIn->height, 4);
-        }
-    }
-
-    hwlHandled = HwlComputeMipLevel(pIn);
-}
-
-/**
-****************************************************************************************************
-*   Lib::DegradeTo1D
-*
-*   @brief
-*       Check if surface can be degraded to 1D
-*   @return
-*       TRUE if degraded
-****************************************************************************************************
-*/
-BOOL_32 Lib::DegradeTo1D(
-    UINT_32 width,                  ///< surface width
-    UINT_32 height,                 ///< surface height
-    UINT_32 macroTilePitchAlign,    ///< macro tile pitch align
-    UINT_32 macroTileHeightAlign    ///< macro tile height align
-    )
-{
-    BOOL_32 degrade = ((width < macroTilePitchAlign) || (height < macroTileHeightAlign));
-
-    // Check whether 2D tiling still has too much footprint
-    if (degrade == FALSE)
-    {
-        // Only check width and height as slices are aligned to thickness
-        UINT_64 unalignedSize = width * height;
-
-        UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign);
-        UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign);
-        UINT_64 alignedSize = alignedPitch * alignedHeight;
-
-        // alignedSize > 1.5 * unalignedSize
-        if (2 * alignedSize > 3 * unalignedSize)
-        {
-            degrade = TRUE;
-        }
-    }
-
-    return degrade;
-}
-
-/**
-****************************************************************************************************
-*   Lib::OptimizeTileMode
-*
-*   @brief
-*       Check if base level's tile mode can be optimized (degraded)
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID Lib::OptimizeTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*  pInOut     ///< [in, out] structure for surface info
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-
-    BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) ||
-                    (pInOut->flags.minimizeAlignment == TRUE) ||
-                    (pInOut->maxBaseAlign != 0);
-
-    BOOL_32 convertToPrt = FALSE;
-
-    // Optimization can only be done on level 0 and samples <= 1
-    if ((doOpt == TRUE)                     &&
-        (pInOut->mipLevel == 0)             &&
-        (IsPrtTileMode(tileMode) == FALSE)  &&
-        (pInOut->flags.prt == FALSE))
-    {
-        UINT_32 width = pInOut->width;
-        UINT_32 height = pInOut->height;
-        UINT_32 thickness = Thickness(tileMode);
-        BOOL_32 macroTiledOK = TRUE;
-        UINT_32 macroWidthAlign = 0;
-        UINT_32 macroHeightAlign = 0;
-        UINT_32 macroSizeAlign = 0;
-
-        if (IsMacroTiled(tileMode))
-        {
-            macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut,
-                                                         &macroWidthAlign,
-                                                         &macroHeightAlign,
-                                                         &macroSizeAlign);
-        }
-
-        if (macroTiledOK)
-        {
-            if ((pInOut->flags.display == FALSE) &&
-                (pInOut->flags.opt4Space == TRUE) &&
-                (pInOut->numSamples <= 1))
-            {
-                // Check if linear mode is optimal
-                if ((pInOut->height == 1) &&
-                    (IsLinear(tileMode) == FALSE) &&
-                    (ElemLib::IsBlockCompressed(pInOut->format) == FALSE) &&
-                    (pInOut->flags.depth == FALSE) &&
-                    (pInOut->flags.stencil == FALSE) &&
-                    (m_configFlags.disableLinearOpt == FALSE) &&
-                    (pInOut->flags.disableLinearOpt == FALSE))
-                {
-                    tileMode = ADDR_TM_LINEAR_ALIGNED;
-                }
-                else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE))
-                {
-                    if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
-                    {
-                        tileMode = (thickness == 1) ?
-                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
-                    }
-                    else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0))
-                    {
-                        // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
-                        // thinner modes, we should re-evaluate whether the corresponding
-                        // thinner modes should be degraded. If so, we choose 1D thick mode instead.
-                        tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp);
-
-                        if (tileMode != pInOut->tileMode)
-                        {
-                            // Get thickness again after large thick degrade
-                            thickness = Thickness(tileMode);
-
-                            ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pInOut;
-                            input.tileMode = tileMode;
-
-                            macroTiledOK = HwlGetAlignmentInfoMacroTiled(&input,
-                                                                         &macroWidthAlign,
-                                                                         &macroHeightAlign,
-                                                                         &macroSizeAlign);
-
-                            if (macroTiledOK &&
-                                DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
-                            {
-                                tileMode = ADDR_TM_1D_TILED_THICK;
-                            }
-                        }
-                    }
-                }
-            }
-
-            if (macroTiledOK)
-            {
-                if ((pInOut->flags.minimizeAlignment == TRUE) &&
-                    (pInOut->numSamples <= 1) &&
-                    (IsMacroTiled(tileMode) == TRUE))
-                {
-                    UINT_32 macroSize = PowTwoAlign(width, macroWidthAlign) *
-                                        PowTwoAlign(height, macroHeightAlign);
-                    UINT_32 microSize = PowTwoAlign(width, MicroTileWidth) *
-                                        PowTwoAlign(height, MicroTileHeight);
-
-                    if (macroSize > microSize)
-                    {
-                        tileMode = (thickness == 1) ?
-                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
-                    }
-                }
-
-                if ((pInOut->maxBaseAlign != 0) &&
-                    (IsMacroTiled(tileMode) == TRUE))
-                {
-                    if (macroSizeAlign > pInOut->maxBaseAlign)
-                    {
-                        if (pInOut->numSamples > 1)
-                        {
-                            ADDR_ASSERT(pInOut->maxBaseAlign >= Block64K);
-
-                            convertToPrt = TRUE;
-                        }
-                        else if (pInOut->maxBaseAlign < Block64K)
-                        {
-                            tileMode = (thickness == 1) ?
-                                       ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
-                        }
-                        else
-                        {
-                            convertToPrt = TRUE;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    if (convertToPrt)
-    {
-        if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1))
-        {
-            pInOut->tileMode = ADDR_TM_1D_TILED_THIN1;
-        }
-        else
-        {
-            HwlSetPrtTileMode(pInOut);
-        }
-    }
-    else if (tileMode != pInOut->tileMode)
-    {
-        pInOut->tileMode = tileMode;
-    }
-
-    HwlOptimizeTileMode(pInOut);
-}
-
-/**
-****************************************************************************************************
-*   Lib::DegradeLargeThickTile
-*
-*   @brief
-*       Check if the thickness needs to be reduced if a tile is too large
-*   @return
-*       The degraded tile mode (unchanged if not degraded)
-****************************************************************************************************
-*/
-AddrTileMode Lib::DegradeLargeThickTile(
-    AddrTileMode tileMode,
-    UINT_32 bpp) const
-{
-    // Override tilemode
-    // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
-    // it is better to just use THIN mode in this case
-    UINT_32 thickness = Thickness(tileMode);
-
-    if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
-    {
-        UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);
-
-        if (tileSize > m_rowSize)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_2D_TILED_XTHICK:
-                    if ((tileSize >> 1) <= m_rowSize)
-                    {
-                        tileMode = ADDR_TM_2D_TILED_THICK;
-                        break;
-                    }
-                    // else fall through
-                case ADDR_TM_2D_TILED_THICK:
-                    tileMode    = ADDR_TM_2D_TILED_THIN1;
-                    break;
-
-                case ADDR_TM_3D_TILED_XTHICK:
-                    if ((tileSize >> 1) <= m_rowSize)
-                    {
-                        tileMode = ADDR_TM_3D_TILED_THICK;
-                        break;
-                    }
-                    // else fall through
-                case ADDR_TM_3D_TILED_THICK:
-                    tileMode    = ADDR_TM_3D_TILED_THIN1;
-                    break;
-
-                case ADDR_TM_PRT_TILED_THICK:
-                    tileMode    = ADDR_TM_PRT_TILED_THIN1;
-                    break;
-
-                case ADDR_TM_PRT_2D_TILED_THICK:
-                    tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
-                    break;
-
-                case ADDR_TM_PRT_3D_TILED_THICK:
-                    tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
-                    break;
-
-                default:
-                    break;
-            }
-        }
-    }
-
-    return tileMode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::PostComputeMipLevel
-*   @brief
-*       Compute MipLevel info (including level 0) after surface adjustment
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::PostComputeMipLevel(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pIn,   ///< [in,out] Input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut   ///< [out] Output structure
-    ) const
-{
-    // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
-    // required by CFX  for Hw Compatibility between NI and SI. Otherwise it is only needed for
-    // mipLevel > 0. Any h/w has different requirement should implement its own virtual function
-
-    if (pIn->flags.pow2Pad)
-    {
-        pIn->width      = NextPow2(pIn->width);
-        pIn->height     = NextPow2(pIn->height);
-        pIn->numSlices  = NextPow2(pIn->numSlices);
-    }
-    else if (pIn->mipLevel > 0)
-    {
-        pIn->width      = NextPow2(pIn->width);
-        pIn->height     = NextPow2(pIn->height);
-
-        if (!pIn->flags.cube)
-        {
-            pIn->numSlices = NextPow2(pIn->numSlices);
-        }
-
-        // for cubemap, we keep its value at first
-    }
-
-    return ADDR_OK;
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlSetupTileCfg
-*
-*   @brief
-*       Map tile index to tile setting.
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::HwlSetupTileCfg(
-    UINT_32         bpp,              ///< Bits per pixel
-    INT_32          index,            ///< [in] Tile index
-    INT_32          macroModeIndex,   ///< [in] Index in macro tile mode table(CI)
-    ADDR_TILEINFO*  pInfo,            ///< [out] Tile Info
-    AddrTileMode*   pMode,            ///< [out] Tile mode
-    AddrTileType*   pType             ///< [out] Tile type
-    ) const
-{
-    return ADDR_NOTSUPPORTED;
-}
-
-/**
-****************************************************************************************************
-*   Lib::HwlGetPipes
-*
-*   @brief
-*       Get number pipes
-*   @return
-*       num pipes
-****************************************************************************************************
-*/
-UINT_32 Lib::HwlGetPipes(
-    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
-    ) const
-{
-    //pTileInfo can be NULL when asic is 6xx and 8xx.
-    return m_pipes;
-}
-
-/**
-****************************************************************************************************
-*   Lib::ComputeQbStereoInfo
-*
-*   @brief
-*       Get quad buffer stereo information
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID Lib::ComputeQbStereoInfo(
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [in,out] updated pOut+pStereoInfo
-    ) const
-{
-    ADDR_ASSERT(pOut->bpp >= 8);
-    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
-
-    // Save original height
-    pOut->pStereoInfo->eyeHeight = pOut->height;
-
-    // Right offset
-    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
-
-    pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
-    // Double height
-    pOut->height <<= 1;
-    pOut->pixelHeight <<= 1;
-
-    // Double size
-    pOut->surfSize <<= 1;
-
-    // Right start address meets the base align since it is guaranteed by AddrLib1
-
-    // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
-}
-
-
-/**
-****************************************************************************************************
-*   Lib::ComputePrtInfo
-*
-*   @brief
-*       Compute prt surface related info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputePrtInfo(
-    const ADDR_PRT_INFO_INPUT*  pIn,
-    ADDR_PRT_INFO_OUTPUT*       pOut) const
-{
-    ADDR_ASSERT(pOut != NULL);
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    UINT_32     expandX = 1;
-    UINT_32     expandY = 1;
-    ElemMode    elemMode;
-
-    UINT_32     bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
-                                                    &elemMode,
-                                                    &expandX,
-                                                    &expandY);
-
-    if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    UINT_32     numFrags = pIn->numFrags;
-    ADDR_ASSERT(numFrags <= 8);
-
-    UINT_32     tileWidth = 0;
-    UINT_32     tileHeight = 0;
-    if (returnCode == ADDR_OK)
-    {
-        // 3D texture without depth or 2d texture
-        if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
-        {
-            if (bpp == 8)
-            {
-                tileWidth = 256;
-                tileHeight = 256;
-            }
-            else if (bpp == 16)
-            {
-                tileWidth = 256;
-                tileHeight = 128;
-            }
-            else if (bpp == 32)
-            {
-                tileWidth = 128;
-                tileHeight = 128;
-            }
-            else if (bpp == 64)
-            {
-                // assume it is BC1/4
-                tileWidth = 512;
-                tileHeight = 256;
-
-                if (elemMode == ADDR_UNCOMPRESSED)
-                {
-                    tileWidth = 128;
-                    tileHeight = 64;
-                }
-            }
-            else if (bpp == 128)
-            {
-                // assume it is BC2/3/5/6H/7
-                tileWidth = 256;
-                tileHeight = 256;
-
-                if (elemMode == ADDR_UNCOMPRESSED)
-                {
-                    tileWidth = 64;
-                    tileHeight = 64;
-                }
-            }
-
-            if (numFrags == 2)
-            {
-                tileWidth = tileWidth / 2;
-            }
-            else if (numFrags == 4)
-            {
-                tileWidth = tileWidth / 2;
-                tileHeight = tileHeight / 2;
-            }
-            else if (numFrags == 8)
-            {
-                tileWidth = tileWidth / 4;
-                tileHeight = tileHeight / 2;
-            }
-        }
-        else    // 1d
-        {
-            tileHeight = 1;
-            if (bpp == 8)
-            {
-                tileWidth = 65536;
-            }
-            else if (bpp == 16)
-            {
-                tileWidth = 32768;
-            }
-            else if (bpp == 32)
-            {
-                tileWidth = 16384;
-            }
-            else if (bpp == 64)
-            {
-                tileWidth = 8192;
-            }
-            else if (bpp == 128)
-            {
-                tileWidth = 4096;
-            }
-        }
-    }
-
-    pOut->prtTileWidth = tileWidth;
-    pOut->prtTileHeight = tileHeight;
-
-    return returnCode;
-}
-
-} // V1
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib1.h mesa-19.0.1/src/amd/addrlib/core/addrlib1.h
--- mesa-18.3.3/src/amd/addrlib/core/addrlib1.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib1.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,545 +0,0 @@
-/*
- * Copyright © 2016 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrlib1.h
-* @brief Contains the Addr::V1::Lib class definition.
-****************************************************************************************************
-*/
-
-#ifndef __ADDR_LIB1_H__
-#define __ADDR_LIB1_H__
-
-#include "addrlib.h"
-
-namespace Addr
-{
-namespace V1
-{
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define bank swap size
-****************************************************************************************************
-*/
-enum SampleSplitSize
-{
-    ADDR_SAMPLESPLIT_1KB = 1024,
-    ADDR_SAMPLESPLIT_2KB = 2048,
-    ADDR_SAMPLESPLIT_4KB = 4096,
-    ADDR_SAMPLESPLIT_8KB = 8192,
-};
-
-/**
-****************************************************************************************************
-* @brief Flags for AddrTileMode
-****************************************************************************************************
-*/
-struct TileModeFlags
-{
-    UINT_32 thickness       : 4;
-    UINT_32 isLinear        : 1;
-    UINT_32 isMicro         : 1;
-    UINT_32 isMacro         : 1;
-    UINT_32 isMacro3d       : 1;
-    UINT_32 isPrt           : 1;
-    UINT_32 isPrtNoRotation : 1;
-    UINT_32 isBankSwapped   : 1;
-};
-
-static const UINT_32 Block64K = 0x10000;
-static const UINT_32 PrtTileSize = Block64K;
-
-/**
-****************************************************************************************************
-* @brief This class contains asic independent address lib functionalities
-****************************************************************************************************
-*/
-class Lib : public Addr::Lib
-{
-public:
-    virtual ~Lib();
-
-    static Lib* GetLib(
-        ADDR_HANDLE hLib);
-
-    /// Returns tileIndex support
-    BOOL_32 UseTileIndex(INT_32 index) const
-    {
-        return m_configFlags.useTileIndex && (index != TileIndexInvalid);
-    }
-
-    /// Returns combined swizzle support
-    BOOL_32 UseCombinedSwizzle() const
-    {
-        return m_configFlags.useCombinedSwizzle;
-    }
-
-    //
-    // Interface stubs
-    //
-    ADDR_E_RETURNCODE ComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
-        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
-        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT*  pIn,
-        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
-        const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
-        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
-        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
-        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE CombineBankPipeSwizzle(
-        const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
-        ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeBaseSwizzle(
-        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
-        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeFmaskInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT*  pIn,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
-
-    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
-        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*  pIn,
-        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
-        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,
-        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ConvertTileInfoToHW(
-        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
-        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ConvertTileIndex(
-        const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
-        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE GetMacroModeIndex(
-        const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
-        ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ConvertTileIndex1(
-        const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
-        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE GetTileIndex(
-        const ADDR_GET_TILEINDEX_INPUT* pIn,
-        ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeHtileInfo(
-        const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeCmaskInfo(
-        const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
-        ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeDccInfo(
-        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
-        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
-        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
-        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
-        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,
-        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
-        const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*  pIn,
-        ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
-        const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*  pIn,
-        ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE ComputePrtInfo(
-        const ADDR_PRT_INFO_INPUT*  pIn,
-        ADDR_PRT_INFO_OUTPUT*       pOut) const;
-protected:
-    Lib();  // Constructor is protected
-    Lib(const Client* pClient);
-
-    /// Pure Virtual function for Hwl computing surface info
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl computing surface address from coord
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
-        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl computing surface coord from address
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
-        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl computing surface tile swizzle
-    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
-        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
-        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
-    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
-        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
-        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl combining bank/pipe swizzle
-    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
-        UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
-
-    /// Pure Virtual function for Hwl computing base swizzle
-    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
-        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
-        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl computing HTILE base align
-    virtual UINT_32 HwlComputeHtileBaseAlign(
-        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
-
-    /// Pure Virtual function for Hwl computing HTILE bpp
-    virtual UINT_32 HwlComputeHtileBpp(
-        BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
-
-    /// Pure Virtual function for Hwl computing HTILE bytes
-    virtual UINT_64 HwlComputeHtileBytes(
-        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
-        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
-
-    /// Pure Virtual function for Hwl computing FMASK info
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
-
-    /// Pure Virtual function for Hwl FMASK address from coord
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
-        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl FMASK coord from address
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
-        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl convert tile info from real value to HW value
-    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
-        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
-        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
-
-    /// Pure Virtual function for Hwl compute mipmap info
-    virtual BOOL_32 HwlComputeMipLevel(
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
-
-    /// Pure Virtual function for Hwl compute max cmask blockMax value
-    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
-
-    /// Pure Virtual function for Hwl compute fmask bits
-    virtual UINT_32 HwlComputeFmaskBits(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        UINT_32* pNumSamples) const = 0;
-
-    /// Virtual function to get index (not pure then no need to implement this in all hwls
-    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
-        const ADDR_GET_TILEINDEX_INPUT* pIn,
-        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    /// Virtual function for Hwl to compute Dcc info
-    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
-        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
-        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    /// Virtual function to get cmask address for tc compatible cmask
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
-        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    /// Virtual function to get htile address for tc compatible htile
-    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
-        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    // Compute attributes
-
-    // HTILE
-    UINT_32    ComputeHtileInfo(
-        ADDR_HTILE_FLAGS flags,
-        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
-        BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
-        ADDR_TILEINFO*  pTileInfo,
-        UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
-        UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
-        UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
-
-    // CMASK
-    ADDR_E_RETURNCODE ComputeCmaskInfo(
-        ADDR_CMASK_FLAGS flags,
-        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
-        ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
-        UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
-        UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
-
-    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
-        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
-        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
-
-    // CMASK & HTILE addressing
-    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
-        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
-        UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
-        BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
-        UINT_32* bitPosition) const;
-
-    virtual VOID HwlComputeXmaskCoordFromAddr(
-        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
-        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
-        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
-
-    // Surface mipmap
-    VOID    ComputeMipLevel(
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
-    /// Pure Virtual function for Hwl to get macro tiled alignment info
-    virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0;
-
-
-    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
-    {
-        // not supported in hwl layer
-    }
-
-    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
-    {
-        // not supported in hwl layer
-    }
-
-    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
-    {
-        // not supported in hwl layer
-    }
-
-    AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
-
-    VOID PadDimensions(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
-        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
-        UINT_32* pSlices, UINT_32 sliceAlign) const;
-
-    virtual VOID HwlPadDimensions(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
-        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const
-    {
-    }
-
-    //
-    // Addressing shared for linear/1D tiling
-    //
-    UINT_64 ComputeSurfaceAddrFromCoordLinear(
-        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
-        UINT_32* pBitPosition) const;
-
-    VOID    ComputeSurfaceCoordFromAddrLinear(
-        UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
-
-    VOID    ComputeSurfaceCoordFromAddrMicroTiled(
-        UINT_64 addr, UINT_32 bitPosition,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
-        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
-
-    ADDR_E_RETURNCODE ComputeMicroTileEquation(
-        UINT_32 bpp, AddrTileMode tileMode,
-        AddrTileType microTileType, ADDR_EQUATION* pEquation) const;
-
-    UINT_32 ComputePixelIndexWithinMicroTile(
-        UINT_32 x, UINT_32 y, UINT_32 z,
-        UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
-
-    /// Pure Virtual function for Hwl computing coord from offset inside micro tile
-    virtual VOID HwlComputePixelCoordFromOffset(
-        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
-        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
-        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
-
-    //
-    // Addressing shared by all
-    //
-    virtual UINT_32 HwlGetPipes(
-        const ADDR_TILEINFO* pTileInfo) const;
-
-    UINT_32 ComputePipeFromAddr(
-        UINT_64 addr, UINT_32 numPipes) const;
-
-    virtual ADDR_E_RETURNCODE ComputePipeEquation(
-        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    /// Pure Virtual function for Hwl computing pipe from coord
-    virtual UINT_32 ComputePipeFromCoord(
-        UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
-        UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
-
-    /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
-    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
-        UINT_32 pipe, UINT_32 x) const = 0;
-
-    //
-    // Misc helper
-    //
-    static const TileModeFlags ModeFlags[ADDR_TM_COUNT];
-
-    static UINT_32 Thickness(
-        AddrTileMode tileMode);
-
-    // Checking tile mode
-    static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
-    static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
-    static BOOL_32 IsLinear(AddrTileMode tileMode);
-    static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
-    static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
-    static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
-
-    /// Return TRUE if tile info is needed
-    BOOL_32 UseTileInfo() const
-    {
-        return !m_configFlags.ignoreTileInfo;
-    }
-
-    /// Adjusts pitch alignment for flipping surface
-    VOID    AdjustPitchAlignment(
-        ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
-
-    /// Overwrite tile config according to tile index
-    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
-        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
-        ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
-
-    /// Overwrite macro tile config according to tile index
-    virtual INT_32 HwlComputeMacroModeIndex(
-        INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
-        ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
-        ) const
-    {
-        return TileIndexNoMacroIndex;
-    }
-
-    /// Pre-handler of 3x pitch (96 bit) adjustment
-    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
-    /// Post-handler of 3x pitch adjustment
-    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
-    /// Check miplevel after surface adjustment
-    ADDR_E_RETURNCODE PostComputeMipLevel(
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    /// Quad buffer stereo support, has its implementation in ind. layer
-    VOID ComputeQbStereoInfo(
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    /// Pure virutual function to compute stereo bank swizzle for right eye
-    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
-
-    VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    /// Overwrite tile setting to PRT
-    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
-    {
-    }
-
-    static BOOL_32 DegradeTo1D(
-        UINT_32 width, UINT_32 height,
-        UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign);
-
-private:
-    // Disallow the copy constructor
-    Lib(const Lib& a);
-
-    // Disallow the assignment operator
-    Lib& operator=(const Lib& a);
-
-    UINT_32 ComputeCmaskBaseAlign(
-        ADDR_CMASK_FLAGS flags, ADDR_TILEINFO*  pTileInfo) const;
-
-    UINT_64 ComputeCmaskBytes(
-        UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
-
-    //
-    // CMASK/HTILE shared methods
-    //
-    VOID    ComputeTileDataWidthAndHeight(
-        UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
-        UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
-
-    UINT_32 ComputeXmaskCoordYFromPipe(
-        UINT_32 pipe, UINT_32 x) const;
-};
-
-} // V1
-} // Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib2.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib2.cpp
--- mesa-18.3.3/src/amd/addrlib/core/addrlib2.cpp	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib2.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,1891 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-************************************************************************************************************************
-* @file  addrlib2.cpp
-* @brief Contains the implementation for the AddrLib2 base class.
-************************************************************************************************************************
-*/
-
-#include "addrinterface.h"
-#include "addrlib2.h"
-#include "addrcommon.h"
-
-namespace Addr
-{
-namespace V2
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Static Const Member
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}};
-
-const Dim3d Lib::Block1K_3d[]  = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Constructor/Destructor
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-************************************************************************************************************************
-*   Lib::Lib
-*
-*   @brief
-*       Constructor for the Addr::V2::Lib class
-*
-************************************************************************************************************************
-*/
-Lib::Lib()
-    :
-    Addr::Lib()
-{
-}
-
-/**
-************************************************************************************************************************
-*   Lib::Lib
-*
-*   @brief
-*       Constructor for the AddrLib2 class with hClient as parameter
-*
-************************************************************************************************************************
-*/
-Lib::Lib(const Client* pClient)
-    :
-    Addr::Lib(pClient)
-{
-}
-
-/**
-************************************************************************************************************************
-*   Lib::~Lib
-*
-*   @brief
-*       Destructor for the AddrLib2 class
-*
-************************************************************************************************************************
-*/
-Lib::~Lib()
-{
-}
-
-/**
-************************************************************************************************************************
-*   Lib::GetLib
-*
-*   @brief
-*       Get Addr::V2::Lib pointer
-*
-*   @return
-*      An Addr::V2::Lib class pointer
-************************************************************************************************************************
-*/
-Lib* Lib::GetLib(
-    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
-{
-    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
-    if ((pAddrLib != NULL) &&
-        (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI))
-    {
-        // only valid and GFX9+ AISC can use AddrLib2 function.
-        ADDR_ASSERT_ALWAYS();
-        hLib = NULL;
-    }
-    return static_cast<Lib*>(hLib);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Surface Methods
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeSurfaceInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
-     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    // Adjust coming parameters.
-    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
-    localIn.width        = Max(pIn->width, 1u);
-    localIn.height       = Max(pIn->height, 1u);
-    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
-    localIn.numSlices    = Max(pIn->numSlices, 1u);
-    localIn.numSamples   = Max(pIn->numSamples, 1u);
-    localIn.numFrags     = (localIn.numFrags == 0) ? localIn.numSamples : pIn->numFrags;
-
-    UINT_32  expandX  = 1;
-    UINT_32  expandY  = 1;
-    ElemMode elemMode = ADDR_UNCOMPRESSED;
-
-    if (returnCode == ADDR_OK)
-    {
-        // Set format to INVALID will skip this conversion
-        if (localIn.format != ADDR_FMT_INVALID)
-        {
-            // Get compression/expansion factors and element mode which indicates compression/expansion
-            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
-                                                        &elemMode,
-                                                        &expandX,
-                                                        &expandY);
-
-            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
-            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
-            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
-            // restrictions are different.
-            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
-            // but we use this flag to skip RestoreSurfaceInfo below
-
-            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
-            {
-                ADDR_ASSERT(IsLinear(localIn.swizzleMode));
-            }
-
-            UINT_32 basePitch = 0;
-            GetElemLib()->AdjustSurfaceInfo(elemMode,
-                                            expandX,
-                                            expandY,
-                                            &localIn.bpp,
-                                            &basePitch,
-                                            &localIn.width,
-                                            &localIn.height);
-
-            // Overwrite these parameters if we have a valid format
-        }
-
-        if (localIn.bpp != 0)
-        {
-            localIn.width  = Max(localIn.width, 1u);
-            localIn.height = Max(localIn.height, 1u);
-        }
-        else // Rule out some invalid parameters
-        {
-            ADDR_ASSERT_ALWAYS();
-
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        returnCode = ComputeSurfaceInfoSanityCheck(&localIn);
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        VerifyMipLevelInfo(pIn);
-
-        if (IsLinear(pIn->swizzleMode))
-        {
-            // linear mode
-            returnCode = ComputeSurfaceInfoLinear(&localIn, pOut);
-        }
-        else
-        {
-            // tiled mode
-            returnCode = ComputeSurfaceInfoTiled(&localIn, pOut);
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->bpp = localIn.bpp;
-            pOut->pixelPitch = pOut->pitch;
-            pOut->pixelHeight = pOut->height;
-            pOut->pixelMipChainPitch = pOut->mipChainPitch;
-            pOut->pixelMipChainHeight = pOut->mipChainHeight;
-            pOut->pixelBits = localIn.bpp;
-
-            if (localIn.format != ADDR_FMT_INVALID)
-            {
-                UINT_32 pixelBits = pOut->pixelBits;
-
-                GetElemLib()->RestoreSurfaceInfo(elemMode,
-                                                 expandX,
-                                                 expandY,
-                                                 &pOut->pixelBits,
-                                                 &pOut->pixelPitch,
-                                                 &pOut->pixelHeight);
-
-                GetElemLib()->RestoreSurfaceInfo(elemMode,
-                                                 expandX,
-                                                 expandY,
-                                                 &pixelBits,
-                                                 &pOut->pixelMipChainPitch,
-                                                 &pOut->pixelMipChainHeight);
-
-                if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL))
-                {
-                    for (UINT_32 i = 0; i < localIn.numMipLevels; i++)
-                    {
-                        pOut->pMipInfo[i].pixelPitch  = pOut->pMipInfo[i].pitch;
-                        pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height;
-
-                        GetElemLib()->RestoreSurfaceInfo(elemMode,
-                                                         expandX,
-                                                         expandY,
-                                                         &pixelBits,
-                                                         &pOut->pMipInfo[i].pixelPitch,
-                                                         &pOut->pMipInfo[i].pixelHeight);
-                    }
-                }
-            }
-
-            if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0))
-            {
-                pOut->equationIndex = GetEquationIndex(&localIn, pOut);
-            }
-
-            if (localIn.flags.qbStereo)
-            {
-                if (pOut->pStereoInfo != NULL)
-                {
-                    ComputeQbStereoInfo(pOut);
-                }
-            }
-        }
-    }
-
-    ADDR_ASSERT(pOut->surfSize != 0);
-
-    ValidBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeSurfaceInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
-    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
-            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn;
-    localIn.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-    localIn.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-    localIn.numMipLevels    = Max(pIn->numMipLevels, 1u);
-    localIn.numSlices       = Max(pIn->numSlices, 1u);
-    localIn.numSamples      = Max(pIn->numSamples, 1u);
-    localIn.numFrags        = Max(pIn->numFrags, 1u);
-
-    if ((localIn.bpp < 8)        ||
-        (localIn.bpp > 128)      ||
-        ((localIn.bpp % 8) != 0) ||
-        (localIn.sample >= localIn.numSamples)  ||
-        (localIn.slice >= localIn.numSlices)    ||
-        (localIn.mipId >= localIn.numMipLevels) ||
-        (IsTex3d(localIn.resourceType) &&
-         (Valid3DMipSliceIdConstraint(localIn.numSlices, localIn.mipId, localIn.slice) == FALSE)))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        if (IsLinear(localIn.swizzleMode))
-        {
-            returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut);
-        }
-        else
-        {
-            returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut);
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddr
-*
-*   @brief
-*       Interface function stub of ComputeSurfaceCoordFromAddr.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
-    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
-            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if ((pIn->bpp < 8)        ||
-        (pIn->bpp > 128)      ||
-        ((pIn->bpp % 8) != 0) ||
-        (pIn->bitPosition >= 8))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        if (IsLinear(pIn->swizzleMode))
-        {
-            returnCode = ComputeSurfaceCoordFromAddrLinear(pIn, pOut);
-        }
-        else
-        {
-            returnCode = ComputeSurfaceCoordFromAddrTiled(pIn, pOut);
-        }
-    }
-
-    return returnCode;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               CMASK/HTILE
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeHtileInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeHtilenfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
-    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeHtileInfo(pIn, pOut);
-
-        ValidMetaBaseAlignments(pOut->baseAlign);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeHtileAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
-    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeHtileAddrFromCoord(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeHtileCoordFromAddr
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileCoordFromAddr
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
-    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeHtileCoordFromAddr(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeCmaskInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskInfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
-    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else if (pIn->cMaskFlags.linear)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeCmaskInfo(pIn, pOut);
-
-        ValidMetaBaseAlignments(pOut->baseAlign);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
-    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeCmaskCoordFromAddr
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskCoordFromAddr
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
-    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
-
-    ADDR_NOT_IMPLEMENTED();
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeFmaskInfo
-*
-*   @brief
-*       Interface function stub of ComputeFmaskInfo.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
-    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
-    )
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    BOOL_32 valid = (IsZOrderSwizzle(pIn->swizzleMode) == TRUE) &&
-                    ((pIn->numSamples > 0) || (pIn->numFrags > 0));
-
-    if (GetFillSizeFieldsFlags())
-    {
-        if ((pIn->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT)) ||
-            (pOut->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT)))
-        {
-            valid = FALSE;
-        }
-    }
-
-    if (valid == FALSE)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn = {0};
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
-
-        localIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
-        localOut.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
-
-        localIn.swizzleMode  = pIn->swizzleMode;
-        localIn.numSlices    = Max(pIn->numSlices, 1u);
-        localIn.width        = Max(pIn->unalignedWidth, 1u);
-        localIn.height       = Max(pIn->unalignedHeight, 1u);
-        localIn.bpp          = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
-        localIn.flags.fmask  = 1;
-        localIn.numFrags     = 1;
-        localIn.numSamples   = 1;
-        localIn.resourceType = ADDR_RSRC_TEX_2D;
-
-        if (localIn.bpp == 8)
-        {
-            localIn.format = ADDR_FMT_8;
-        }
-        else if (localIn.bpp == 16)
-        {
-            localIn.format = ADDR_FMT_16;
-        }
-        else if (localIn.bpp == 32)
-        {
-            localIn.format = ADDR_FMT_32;
-        }
-        else
-        {
-            localIn.format = ADDR_FMT_32_32;
-        }
-
-        returnCode = ComputeSurfaceInfo(&localIn, &localOut);
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->pitch      = localOut.pitch;
-            pOut->height     = localOut.height;
-            pOut->baseAlign  = localOut.baseAlign;
-            pOut->numSlices  = localOut.numSlices;
-            pOut->fmaskBytes = static_cast<UINT_32>(localOut.surfSize);
-            pOut->sliceSize  = static_cast<UINT_32>(localOut.sliceSize);
-            pOut->bpp        = localIn.bpp;
-            pOut->numSamples = 1;
-        }
-    }
-
-    ValidBaseAlignments(pOut->baseAlign);
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeFmaskAddrFromCoord
-*
-*   @brief
-*       Interface function stub of ComputeFmaskAddrFromCoord.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
-    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
-
-    ADDR_NOT_IMPLEMENTED();
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeFmaskCoordFromAddr
-*
-*   @brief
-*       Interface function stub of ComputeFmaskAddrFromCoord.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
-    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
-    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*       pOut     ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
-
-    ADDR_NOT_IMPLEMENTED();
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeDccInfo
-*
-*   @brief
-*       Interface function to compute DCC key info
-*
-*   @return
-*       return code of HwlComputeDccInfo
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeDccInfo(
-    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeDccInfo(pIn, pOut);
-
-        ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeDccAddrFromCoord
-*
-*   @brief
-*       Interface function stub of ComputeDccAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord(
-    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeDccAddrFromCoord(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputePipeBankXor
-*
-*   @brief
-*       Interface function stub of Addr2ComputePipeBankXor.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
-    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut)
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else if (IsXor(pIn->swizzleMode) == FALSE)
-    {
-        returnCode = ADDR_NOTSUPPORTED;
-    }
-    else
-    {
-        returnCode = HwlComputePipeBankXor(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSlicePipeBankXor
-*
-*   @brief
-*       Interface function stub of Addr2ComputeSlicePipeBankXor.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor(
-    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut)
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else if ((IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) ||
-             (IsNonPrtXor(pIn->swizzleMode) == FALSE) ||
-             (pIn->numSamples > 1))
-    {
-        returnCode = ADDR_NOTSUPPORTED;
-    }
-    else
-    {
-        returnCode = HwlComputeSlicePipeBankXor(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSubResourceOffsetForSwizzlePattern
-*
-*   @brief
-*       Interface function stub of Addr2ComputeSubResourceOffsetForSwizzlePattern.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern(
-    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut)
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ExtractPipeBankXor
-*
-*   @brief
-*       Internal function to extract bank and pipe xor bits from combined xor bits.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ExtractPipeBankXor(
-    UINT_32  pipeBankXor,
-    UINT_32  bankBits,
-    UINT_32  pipeBits,
-    UINT_32* pBankX,
-    UINT_32* pPipeX)
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if (pipeBankXor < (1u << (pipeBits + bankBits)))
-    {
-        *pPipeX = pipeBankXor % (1 << pipeBits);
-        *pBankX = pipeBankXor >> pipeBits;
-        returnCode = ADDR_OK;
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceInfoSanityCheck
-*
-*   @brief
-*       Internal function to do basic sanity check before compute surface info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*  pIn   ///< [in] input structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        (pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlComputeSurfaceInfoSanityCheck(pIn);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ApplyCustomizedPitchHeight
-*
-*   @brief
-*       Helper function to override hw required row pitch/slice pitch by customrized one
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-    UINT_32  elementBytes,                          ///< [in] element bytes per element
-    UINT_32  pitchAlignInElement,                   ///< [in] pitch alignment in element
-    UINT_32* pPitch,                                ///< [in/out] pitch
-    UINT_32* pHeight                                ///< [in/out] height
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pIn->numMipLevels <= 1)
-    {
-        if (pIn->pitchInElement > 0)
-        {
-            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
-            {
-                returnCode = ADDR_INVALIDPARAMS;
-            }
-            else if (pIn->pitchInElement < (*pPitch))
-            {
-                returnCode = ADDR_INVALIDPARAMS;
-            }
-            else
-            {
-                *pPitch = pIn->pitchInElement;
-            }
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            if (pIn->sliceAlign > 0)
-            {
-                UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / (*pPitch);
-
-                if (customizedHeight * elementBytes * (*pPitch) != pIn->sliceAlign)
-                {
-                    returnCode = ADDR_INVALIDPARAMS;
-                }
-                else if ((pIn->numSlices > 1) && ((*pHeight) != customizedHeight))
-                {
-                    returnCode = ADDR_INVALIDPARAMS;
-                }
-                else
-                {
-                    *pHeight = customizedHeight;
-                }
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceInfoLinear
-*
-*   @brief
-*       Internal function to calculate alignment for linear swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear(
-     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    return HwlComputeSurfaceInfoLinear(pIn, pOut);
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceInfoTiled
-*
-*   @brief
-*       Internal function to calculate alignment for tiled swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoTiled(
-     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    return HwlComputeSurfaceInfoTiled(pIn, pOut);
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceAddrFromCoordLinear
-*
-*   @brief
-*       Internal function to calculate address from coord for linear swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
-     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1) && (pIn->pipeBankXor == 0);
-
-    if (valid)
-    {
-        if (IsTex1d(pIn->resourceType))
-        {
-            valid = (pIn->y == 0);
-        }
-    }
-
-    if (valid)
-    {
-        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
-        ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
-
-        localIn.bpp          = pIn->bpp;
-        localIn.flags        = pIn->flags;
-        localIn.width        = Max(pIn->unalignedWidth, 1u);
-        localIn.height       = Max(pIn->unalignedHeight, 1u);
-        localIn.numSlices    = Max(pIn->numSlices, 1u);
-        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
-        localIn.resourceType = pIn->resourceType;
-
-        if (localIn.numMipLevels <= 1)
-        {
-            localIn.pitchInElement = pIn->pitchInElement;
-        }
-
-        localOut.pMipInfo = mipInfo;
-
-        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->addr        = (localOut.sliceSize * pIn->slice) +
-                                mipInfo[pIn->mipId].offset +
-                                (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
-            pOut->bitPosition = 0;
-        }
-        else
-        {
-            valid = FALSE;
-        }
-    }
-
-    if (valid == FALSE)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceAddrFromCoordTiled
-*
-*   @brief
-*       Internal function to calculate address from coord for tiled swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled(
-     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut);
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddrLinear
-*
-*   @brief
-*       Internal function to calculate coord from address for linear swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear(
-     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1);
-
-    if (valid)
-    {
-        if (IsTex1d(pIn->resourceType))
-        {
-            valid = (pIn->unalignedHeight == 1);
-        }
-    }
-
-    if (valid)
-    {
-        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
-        localIn.bpp          = pIn->bpp;
-        localIn.flags        = pIn->flags;
-        localIn.width        = Max(pIn->unalignedWidth, 1u);
-        localIn.height       = Max(pIn->unalignedHeight, 1u);
-        localIn.numSlices    = Max(pIn->numSlices, 1u);
-        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
-        localIn.resourceType = pIn->resourceType;
-        if (localIn.numMipLevels <= 1)
-        {
-            localIn.pitchInElement = pIn->pitchInElement;
-        }
-        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->slice = static_cast<UINT_32>(pIn->addr / localOut.sliceSize);
-            pOut->sample = 0;
-
-            UINT_32 offsetInSlice = static_cast<UINT_32>(pIn->addr % localOut.sliceSize);
-            UINT_32 elementBytes = pIn->bpp >> 3;
-            UINT_32 mipOffsetInSlice = 0;
-            UINT_32 mipSize = 0;
-            UINT_32 mipId = 0;
-            for (; mipId < pIn->numMipLevels ; mipId++)
-            {
-                if (IsTex1d(pIn->resourceType))
-                {
-                    mipSize = localOut.pitch * elementBytes;
-                }
-                else
-                {
-                    UINT_32 currentMipHeight = (PowTwoAlign(localIn.height, (1 << mipId))) >> mipId;
-                    mipSize = currentMipHeight * localOut.pitch * elementBytes;
-                }
-
-                if (mipSize == 0)
-                {
-                    valid = FALSE;
-                    break;
-                }
-                else if ((mipSize + mipOffsetInSlice) > offsetInSlice)
-                {
-                    break;
-                }
-                else
-                {
-                    mipOffsetInSlice += mipSize;
-                    if ((mipId == (pIn->numMipLevels - 1)) ||
-                        (mipOffsetInSlice >= localOut.sliceSize))
-                    {
-                        valid = FALSE;
-                    }
-                }
-            }
-
-            if (valid)
-            {
-                pOut->mipId = mipId;
-
-                UINT_32 elemOffsetInMip = (offsetInSlice - mipOffsetInSlice) / elementBytes;
-                if (IsTex1d(pIn->resourceType))
-                {
-                    if (elemOffsetInMip < localOut.pitch)
-                    {
-                        pOut->x = elemOffsetInMip;
-                        pOut->y = 0;
-                    }
-                    else
-                    {
-                        valid = FALSE;
-                    }
-                }
-                else
-                {
-                    pOut->y = elemOffsetInMip / localOut.pitch;
-                    pOut->x = elemOffsetInMip % localOut.pitch;
-                }
-
-                if ((pOut->slice >= pIn->numSlices)    ||
-                    (pOut->mipId >= pIn->numMipLevels) ||
-                    (pOut->x >= Max((pIn->unalignedWidth >> pOut->mipId), 1u))  ||
-                    (pOut->y >= Max((pIn->unalignedHeight >> pOut->mipId), 1u)) ||
-                    (IsTex3d(pIn->resourceType) &&
-                     (FALSE == Valid3DMipSliceIdConstraint(pIn->numSlices,
-                                                           pOut->mipId,
-                                                           pOut->slice))))
-                {
-                    valid = FALSE;
-                }
-            }
-        }
-        else
-        {
-            valid = FALSE;
-        }
-    }
-
-    if (valid == FALSE)
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurfaceCoordFromAddrTiled
-*
-*   @brief
-*       Internal function to calculate coord from address for tiled swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled(
-     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
-
-    ADDR_NOT_IMPLEMENTED();
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeBlockDimensionForSurf
-*
-*   @brief
-*       Internal function to get block width/height/depth in element from surface input params.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
-    UINT_32*         pWidth,
-    UINT_32*         pHeight,
-    UINT_32*         pDepth,
-    UINT_32          bpp,
-    UINT_32          numSamples,
-    AddrResourceType resourceType,
-    AddrSwizzleMode  swizzleMode) const
-{
-    ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth,
-                                                         pHeight,
-                                                         pDepth,
-                                                         bpp,
-                                                         resourceType,
-                                                         swizzleMode);
-
-    if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode))
-    {
-        const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
-        const UINT_32 log2sample  = Log2(numSamples);
-        const UINT_32 q           = log2sample >> 1;
-        const UINT_32 r           = log2sample & 1;
-
-        if (log2blkSize & 1)
-        {
-            *pWidth  >>= q;
-            *pHeight >>= (q + r);
-        }
-        else
-        {
-            *pWidth  >>= (q + r);
-            *pHeight >>= q;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeBlockDimension
-*
-*   @brief
-*       Internal function to get block width/height/depth in element without considering MSAA case
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
-    UINT_32*          pWidth,
-    UINT_32*          pHeight,
-    UINT_32*          pDepth,
-    UINT_32           bpp,
-    AddrResourceType  resourceType,
-    AddrSwizzleMode   swizzleMode) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    UINT_32 eleBytes                 = bpp >> 3;
-    UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
-    UINT_32 log2blkSize              = GetBlockSizeLog2(swizzleMode);
-
-    if (IsThin(resourceType, swizzleMode))
-    {
-        UINT_32 log2blkSizeIn256B = log2blkSize - 8;
-        UINT_32 widthAmp          = log2blkSizeIn256B / 2;
-        UINT_32 heightAmp         = log2blkSizeIn256B - widthAmp;
-
-        ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
-
-        *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
-        *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
-        *pDepth  = 1;
-    }
-    else if (IsThick(resourceType, swizzleMode))
-    {
-        UINT_32 log2blkSizeIn1KB = log2blkSize - 10;
-        UINT_32 averageAmp       = log2blkSizeIn1KB / 3;
-        UINT_32 restAmp          = log2blkSizeIn1KB % 3;
-
-        ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));
-
-        *pWidth  = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
-        *pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
-        *pDepth  = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::GetMipTailDim
-*
-*   @brief
-*       Internal function to get out max dimension of first level in mip tail
-*
-*   @return
-*       Max Width/Height/Depth value of the first mip fitted in mip tail
-************************************************************************************************************************
-*/
-Dim3d Lib::GetMipTailDim(
-    AddrResourceType  resourceType,
-    AddrSwizzleMode   swizzleMode,
-    UINT_32           blockWidth,
-    UINT_32           blockHeight,
-    UINT_32           blockDepth) const
-{
-    Dim3d   out         = {blockWidth, blockHeight, blockDepth};
-    UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
-
-    if (IsThick(resourceType, swizzleMode))
-    {
-        UINT_32 dim = log2blkSize % 3;
-
-        if (dim == 0)
-        {
-            out.h >>= 1;
-        }
-        else if (dim == 1)
-        {
-            out.w >>= 1;
-        }
-        else
-        {
-            out.d >>= 1;
-        }
-    }
-    else
-    {
-        if (log2blkSize & 1)
-        {
-            out.h >>= 1;
-        }
-        else
-        {
-            out.w >>= 1;
-        }
-    }
-
-    return out;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurface2DMicroBlockOffset
-*
-*   @brief
-*       Internal function to calculate micro block (256B) offset from coord for 2D resource
-*
-*   @return
-*       micro block (256B) offset for 2D resource
-************************************************************************************************************************
-*/
-UINT_32 Lib::ComputeSurface2DMicroBlockOffset(
-    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
-{
-    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
-
-    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
-    UINT_32 microBlockOffset = 0;
-    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
-    {
-        UINT_32 xBits = pIn->x << log2ElementBytes;
-        microBlockOffset = (xBits & 0xf) | ((pIn->y & 0x3) << 4);
-        if (log2ElementBytes < 3)
-        {
-            microBlockOffset |= (pIn->y & 0x4) << 4;
-            if (log2ElementBytes == 0)
-            {
-                microBlockOffset |= (pIn->y & 0x8) << 4;
-            }
-            else
-            {
-                microBlockOffset |= (xBits & 0x10) << 3;
-            }
-        }
-        else
-        {
-            microBlockOffset |= (xBits & 0x30) << 2;
-        }
-    }
-    else if (IsDisplaySwizzle(pIn->resourceType, pIn->swizzleMode))
-    {
-        if (log2ElementBytes == 4)
-        {
-            microBlockOffset = (GetBit(pIn->x, 0) << 4) |
-                               (GetBit(pIn->y, 0) << 5) |
-                               (GetBit(pIn->x, 1) << 6) |
-                               (GetBit(pIn->y, 1) << 7);
-        }
-        else
-        {
-            microBlockOffset = GetBits(pIn->x, 0, 3, log2ElementBytes)     |
-                               GetBits(pIn->y, 1, 2, 3 + log2ElementBytes) |
-                               GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
-                               GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
-            microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
-                               (GetBit(pIn->y, 0) << 4) |
-                               GetBits(microBlockOffset, 4, 3, 5);
-        }
-    }
-    else if (IsRotateSwizzle(pIn->swizzleMode))
-    {
-        microBlockOffset = GetBits(pIn->y, 0, 3, log2ElementBytes) |
-                           GetBits(pIn->x, 1, 2, 3 + log2ElementBytes) |
-                           GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
-                           GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
-        microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
-                           (GetBit(pIn->x, 0) << 4) |
-                           GetBits(microBlockOffset, 4, 3, 5);
-        if (log2ElementBytes == 3)
-        {
-           microBlockOffset = GetBits(microBlockOffset, 0, 6, 0) |
-                              GetBits(pIn->x, 1, 2, 6);
-        }
-    }
-
-    return microBlockOffset;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeSurface3DMicroBlockOffset
-*
-*   @brief
-*       Internal function to calculate micro block (1KB) offset from coord for 3D resource
-*
-*   @return
-*       micro block (1KB) offset for 3D resource
-************************************************************************************************************************
-*/
-UINT_32 Lib::ComputeSurface3DMicroBlockOffset(
-    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
-{
-    ADDR_ASSERT(IsThick(pIn->resourceType, pIn->swizzleMode));
-
-    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
-    UINT_32 microBlockOffset = 0;
-    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
-    {
-        if (log2ElementBytes == 0)
-        {
-            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
-        }
-        else if (log2ElementBytes == 1)
-        {
-            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
-        }
-        else if (log2ElementBytes == 2)
-        {
-            microBlockOffset = ((pIn->y & 4) >> 2) | ((pIn->x & 4) >> 1);
-        }
-        else if (log2ElementBytes == 3)
-        {
-            microBlockOffset = (pIn->x & 6) >> 1;
-        }
-        else
-        {
-            microBlockOffset = pIn->x & 3;
-        }
-
-        microBlockOffset <<= 8;
-
-        UINT_32 xBits = pIn->x << log2ElementBytes;
-        microBlockOffset |= (xBits & 0xf) | ((pIn->y & 0x3) << 4) | ((pIn->slice & 0x3) << 6);
-    }
-    else if (IsZOrderSwizzle(pIn->swizzleMode))
-    {
-        UINT_32 xh, yh, zh;
-
-        if (log2ElementBytes == 0)
-        {
-            microBlockOffset =
-                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
-            microBlockOffset = microBlockOffset | ((pIn->slice & 3) << 4) | ((pIn->x & 4) << 4);
-
-            xh = pIn->x >> 3;
-            yh = pIn->y >> 2;
-            zh = pIn->slice >> 2;
-        }
-        else if (log2ElementBytes == 1)
-        {
-            microBlockOffset =
-                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
-            microBlockOffset = (microBlockOffset << 1) | ((pIn->slice & 3) << 5);
-
-            xh = pIn->x >> 2;
-            yh = pIn->y >> 2;
-            zh = pIn->slice >> 2;
-        }
-        else if (log2ElementBytes == 2)
-        {
-            microBlockOffset =
-                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->slice & 1) << 3);
-            microBlockOffset = (microBlockOffset << 2) | ((pIn->y & 2) << 5);
-
-            xh = pIn->x >> 2;
-            yh = pIn->y >> 2;
-            zh = pIn->slice >> 1;
-        }
-        else if (log2ElementBytes == 3)
-        {
-            microBlockOffset =
-                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2) | ((pIn->x & 2) << 2);
-            microBlockOffset <<= 3;
-
-            xh = pIn->x >> 2;
-            yh = pIn->y >> 1;
-            zh = pIn->slice >> 1;
-        }
-        else
-        {
-            microBlockOffset =
-                (((pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2)) << 4);
-
-            xh = pIn->x >> 1;
-            yh = pIn->y >> 1;
-            zh = pIn->slice >> 1;
-        }
-
-        microBlockOffset |= ((MortonGen3d(xh, yh, zh, 1) << 7) & 0x380);
-    }
-
-    return microBlockOffset;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::GetPipeXorBits
-*
-*   @brief
-*       Internal function to get bits number for pipe/se xor operation
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-UINT_32 Lib::GetPipeXorBits(
-    UINT_32 macroBlockBits) const
-{
-    ADDR_ASSERT(macroBlockBits >= m_pipeInterleaveLog2);
-
-    // Total available xor bits
-    UINT_32 xorBits = macroBlockBits - m_pipeInterleaveLog2;
-
-    // Pipe/Se xor bits
-    UINT_32 pipeBits = Min(xorBits, m_pipesLog2 + m_seLog2);
-
-    return pipeBits;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::GetBankXorBits
-*
-*   @brief
-*       Internal function to get bits number for pipe/se xor operation
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-UINT_32 Lib::GetBankXorBits(
-    UINT_32 macroBlockBits) const
-{
-    UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
-
-    // Bank xor bits
-    UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);
-
-    return bankBits;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::Addr2GetPreferredSurfaceSetting
-*
-*   @brief
-*       Internal function to get suggested surface information for cliet to use
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting(
-    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
-{
-    ADDR_E_RETURNCODE returnCode;
-
-    if ((GetFillSizeFieldsFlags() == TRUE) &&
-        ((pIn->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT)) ||
-         (pOut->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT))))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        returnCode = HwlGetPreferredSurfaceSetting(pIn, pOut);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeBlock256Equation
-*
-*   @brief
-*       Compute equation for block 256B
-*
-*   @return
-*       If equation computed successfully
-*
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeBlock256Equation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode swMode,
-    UINT_32 elementBytesLog2,
-    ADDR_EQUATION* pEquation) const
-{
-    ADDR_E_RETURNCODE ret;
-
-    if (IsBlock256b(swMode))
-    {
-        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        ret = ADDR_INVALIDPARAMS;
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeThinEquation
-*
-*   @brief
-*       Compute equation for 2D/3D resource which use THIN mode
-*
-*   @return
-*       If equation computed successfully
-*
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeThinEquation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode swMode,
-    UINT_32 elementBytesLog2,
-    ADDR_EQUATION* pEquation) const
-{
-    ADDR_E_RETURNCODE ret;
-
-    if (IsThin(rsrcType, swMode))
-    {
-        ret = HwlComputeThinEquation(rsrcType, swMode, elementBytesLog2, pEquation);
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        ret = ADDR_INVALIDPARAMS;
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeThickEquation
-*
-*   @brief
-*       Compute equation for 3D resource which use THICK mode
-*
-*   @return
-*       If equation computed successfully
-*
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::ComputeThickEquation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode swMode,
-    UINT_32 elementBytesLog2,
-    ADDR_EQUATION* pEquation) const
-{
-    ADDR_E_RETURNCODE ret;
-
-    if (IsThick(rsrcType, swMode))
-    {
-        ret = HwlComputeThickEquation(rsrcType, swMode, elementBytesLog2, pEquation);
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        ret = ADDR_INVALIDPARAMS;
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Lib::ComputeQbStereoInfo
-*
-*   @brief
-*       Get quad buffer stereo information
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Lib::ComputeQbStereoInfo(
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut    ///< [in,out] updated pOut+pStereoInfo
-    ) const
-{
-    ADDR_ASSERT(pOut->bpp >= 8);
-    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
-
-    // Save original height
-    pOut->pStereoInfo->eyeHeight = pOut->height;
-
-    // Right offset
-    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
-
-    // Double height
-    pOut->height <<= 1;
-
-    ADDR_ASSERT(pOut->height <= MaxSurfaceHeight);
-
-    pOut->pixelHeight <<= 1;
-
-    // Double size
-    pOut->surfSize <<= 1;
-}
-
-
-} // V2
-} // Addr
-
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib2.h mesa-19.0.1/src/amd/addrlib/core/addrlib2.h
--- mesa-18.3.3/src/amd/addrlib/core/addrlib2.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib2.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,793 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-************************************************************************************************************************
-* @file  addrlib2.h
-* @brief Contains the Addr::V2::Lib class definition.
-************************************************************************************************************************
-*/
-
-#ifndef __ADDR2_LIB2_H__
-#define __ADDR2_LIB2_H__
-
-#include "addrlib.h"
-
-namespace Addr
-{
-namespace V2
-{
-
-/**
-************************************************************************************************************************
-* @brief Flags for SwizzleModeTable
-************************************************************************************************************************
-*/
-struct SwizzleModeFlags
-{
-    // Swizzle mode
-    UINT_32 isLinear        : 1;    // Linear
-
-    // Block size
-    UINT_32 is256b          : 1;    // Block size is 256B
-    UINT_32 is4kb           : 1;    // Block size is 4KB
-    UINT_32 is64kb          : 1;    // Block size is 64KB
-    UINT_32 isVar           : 1;    // Block size is variable
-
-    UINT_32 isZ             : 1;    // Z order swizzle mode
-    UINT_32 isStd           : 1;    // Standard swizzle mode
-    UINT_32 isDisp          : 1;    // Display swizzle mode
-    UINT_32 isRot           : 1;    // Rotate swizzle mode
-
-    // XOR mode
-    UINT_32 isXor           : 1;    // XOR after swizzle if set
-
-    UINT_32 isT             : 1;    // T mode
-
-    UINT_32 isRtOpt         : 1;    // mode opt for render target
-};
-
-struct Dim2d
-{
-    UINT_32 w;
-    UINT_32 h;
-};
-
-struct Dim3d
-{
-    UINT_32 w;
-    UINT_32 h;
-    UINT_32 d;
-};
-
-/**
-************************************************************************************************************************
-* @brief This class contains asic independent address lib functionalities
-************************************************************************************************************************
-*/
-class Lib : public Addr::Lib
-{
-public:
-    virtual ~Lib();
-
-    static Lib* GetLib(
-        ADDR_HANDLE hLib);
-
-    //
-    // Interface stubs
-    //
-
-    // For data surface
-    ADDR_E_RETURNCODE ComputeSurfaceInfo(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
-        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
-        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
-
-    // For HTile
-    ADDR_E_RETURNCODE ComputeHtileInfo(
-        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
-        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
-        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);
-
-    // For CMask
-    ADDR_E_RETURNCODE ComputeCmaskInfo(
-        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
-        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
-        const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*      pOut) const;
-
-    // For FMask
-    ADDR_E_RETURNCODE ComputeFmaskInfo(
-        const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_FMASK_INFO_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
-        const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
-        const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*      pOut) const;
-
-    // For DCC key
-    ADDR_E_RETURNCODE ComputeDccInfo(
-        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
-        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeDccAddrFromCoord(
-        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    // Misc
-    ADDR_E_RETURNCODE ComputePipeBankXor(
-        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE ComputeSlicePipeBankXor(
-        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern(
-        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);
-
-    ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting(
-        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;
-
-    virtual BOOL_32 IsValidDisplaySwizzleMode(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTIMPLEMENTED;
-    }
-
-protected:
-    Lib();  // Constructor is protected
-    Lib(const Client* pClient);
-
-    static const UINT_32 MaxNumOfBpp = 5;
-
-    static const Dim2d Block256_2d[MaxNumOfBpp];
-    static const Dim3d Block1K_3d[MaxNumOfBpp];
-
-    static const UINT_32 PrtAlignment = 64 * 1024;
-    static const UINT_32 MaxMacroBits = 20;
-
-    static const UINT_32 MaxMipLevels = 16;
-
-    // Checking block size
-    BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].is256b;
-    }
-
-    BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].is4kb;
-    }
-
-    BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].is64kb;
-    }
-
-    BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isVar;
-    }
-
-    // Checking swizzle mode
-    BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isLinear;
-    }
-
-    BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isRtOpt;
-    }
-
-    BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isZ;
-    }
-
-    BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
-    {
-        return HwlIsStandardSwizzle(resourceType, swizzleMode);
-    }
-
-    BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
-    {
-        return HwlIsDisplaySwizzle(resourceType, swizzleMode);
-    }
-
-    BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isRot;
-    }
-
-    BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isXor;
-    }
-
-    BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isT;
-    }
-
-    BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const
-    {
-        return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE));
-    }
-
-    // Checking resource type
-    static BOOL_32 IsTex1d(AddrResourceType resourceType)
-    {
-        return (resourceType == ADDR_RSRC_TEX_1D);
-    }
-
-    static BOOL_32 IsTex2d(AddrResourceType resourceType)
-    {
-        return (resourceType == ADDR_RSRC_TEX_2D);
-    }
-
-    static BOOL_32 IsTex3d(AddrResourceType resourceType)
-    {
-        return (resourceType == ADDR_RSRC_TEX_3D);
-    }
-
-    BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
-    {
-        return HwlIsThick(resourceType, swizzleMode);
-    }
-
-    BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
-    {
-        return HwlIsThin(resourceType, swizzleMode);
-    }
-
-    UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const
-    {
-        UINT_32 blockSizeLog2 = 0;
-
-        if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode))
-        {
-            blockSizeLog2 = 8;
-        }
-        else if (IsBlock4kb(swizzleMode))
-        {
-            blockSizeLog2 = 12;
-        }
-        else if (IsBlock64kb(swizzleMode))
-        {
-            blockSizeLog2 = 16;
-        }
-        else if (IsBlockVariable(swizzleMode))
-        {
-            blockSizeLog2 = m_blockVarSizeLog2;
-        }
-        else
-        {
-            ADDR_ASSERT_ALWAYS();
-        }
-
-        return blockSizeLog2;
-    }
-
-    UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const
-    {
-        return (1 << GetBlockSizeLog2(swizzleMode));
-    }
-
-    static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag)
-    {
-        sample = (sample == 0) ? 1 : sample;
-        frag   = (frag   == 0) ? sample : frag;
-
-        UINT_32 fmaskBpp = QLog2(frag);
-
-        if (sample > frag)
-        {
-            fmaskBpp++;
-        }
-
-        if (fmaskBpp == 3)
-        {
-            fmaskBpp = 4;
-        }
-
-        fmaskBpp = Max(8u, fmaskBpp * sample);
-
-        return fmaskBpp;
-    }
-
-    virtual BOOL_32 HwlIsStandardSwizzle(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return FALSE;
-    }
-
-    virtual BOOL_32 HwlIsDisplaySwizzle(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return FALSE;
-    }
-
-    virtual BOOL_32 HwlIsThin(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return FALSE;
-    }
-
-    virtual BOOL_32 HwlIsThick(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return FALSE;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
-        const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
-        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
-        const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
-        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
-        const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
-        ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
-        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
-        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
-        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
-        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual UINT_32 HwlGetEquationIndex(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_INVALID_EQUATION_INDEX;
-    }
-
-    UINT_32 GetEquationIndex(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
-    {
-        return HwlGetEquationIndex(pIn, pOut);
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
-        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
-        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-
-    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
-        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
-        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTSUPPORTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
-         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTIMPLEMENTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
-         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTIMPLEMENTED;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
-        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const
-    {
-        ADDR_NOT_IMPLEMENTED();
-        return ADDR_NOTIMPLEMENTED;
-    }
-
-    ADDR_E_RETURNCODE ComputeBlock256Equation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    ADDR_E_RETURNCODE ComputeThinEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    ADDR_E_RETURNCODE ComputeThickEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceInfoLinear(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceInfoTiled(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
-        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled(
-        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear(
-        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled(
-        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
-
-    UINT_32 ComputeSurface2DMicroBlockOffset(
-        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
-
-    UINT_32 ComputeSurface3DMicroBlockOffset(
-        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
-
-    // Misc
-    ADDR_E_RETURNCODE ComputeBlockDimensionForSurf(
-        UINT_32*         pWidth,
-        UINT_32*         pHeight,
-        UINT_32*         pDepth,
-        UINT_32          bpp,
-        UINT_32          numSamples,
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const;
-
-    ADDR_E_RETURNCODE ComputeBlockDimension(
-        UINT_32*          pWidth,
-        UINT_32*          pHeight,
-        UINT_32*          pDepth,
-        UINT_32           bpp,
-        AddrResourceType  resourceType,
-        AddrSwizzleMode   swizzleMode) const;
-
-    static UINT_64 ComputePadSize(
-        const Dim3d*      pBlkDim,
-        UINT_32           width,
-        UINT_32           height,
-        UINT_32           numSlices,
-        Dim3d*            pPadDim)
-    {
-        pPadDim->w = PowTwoAlign(width ,pBlkDim->w);
-        pPadDim->h = PowTwoAlign(height ,pBlkDim->h);
-        pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d);
-        return static_cast<UINT_64>(pPadDim->w) * pPadDim->h * pPadDim->d;
-    }
-
-    static ADDR_E_RETURNCODE ExtractPipeBankXor(
-        UINT_32  pipeBankXor,
-        UINT_32  bankBits,
-        UINT_32  pipeBits,
-        UINT_32* pBankX,
-        UINT_32* pPipeX);
-
-    static BOOL_32 Valid3DMipSliceIdConstraint(
-        UINT_32 numSlices,
-        UINT_32 mipId,
-        UINT_32 slice)
-    {
-        return (Max((numSlices >> mipId), 1u) > slice);
-    }
-
-    Dim3d GetMipTailDim(
-        AddrResourceType  resourceType,
-        AddrSwizzleMode   swizzleMode,
-        UINT_32           blockWidth,
-        UINT_32           blockHeight,
-        UINT_32           blockDepth) const;
-
-    BOOL_32 IsInMipTail(
-        AddrResourceType  resourceType,
-        AddrSwizzleMode   swizzleMode,
-        Dim3d             mipTailDim,
-        UINT_32           width,
-        UINT_32           height,
-        UINT_32           depth) const
-    {
-        BOOL_32 inTail = ((width <= mipTailDim.w) &&
-                          (height <= mipTailDim.h) &&
-                          (IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));
-
-        return inTail;
-    }
-
-    static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType)
-    {
-        return ((resourceType == ADDR_RSRC_LOC_LOCAL) ||
-                (resourceType == ADDR_RSRC_LOC_INVIS));
-    }
-
-    static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType)
-    {
-        return (resourceType == ADDR_RSRC_LOC_INVIS);
-    }
-
-    static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType)
-    {
-        return ((resourceType == ADDR_RSRC_LOC_USWC) ||
-                (resourceType == ADDR_RSRC_LOC_CACHED));
-    }
-
-    UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
-    {
-        UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0;
-
-        if (IsXor(swizzleMode))
-        {
-            UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2;
-
-            numPipeLog2 = Min(numPipeLog2, maxPipeLog2);
-        }
-
-        return numPipeLog2;
-    }
-
-    UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
-    {
-        return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode));
-    }
-
-    VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
-    {
-#if DEBUG
-        if (pIn->numMipLevels > 1)
-        {
-            UINT_32 actualMipLevels = 1;
-            switch (pIn->resourceType)
-            {
-                case ADDR_RSRC_TEX_3D:
-                    // Fall through to share 2D case
-                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
-                case ADDR_RSRC_TEX_2D:
-                    // Fall through to share 1D case
-                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
-                case ADDR_RSRC_TEX_1D:
-                    // Base 1D case
-                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-            // Client pass wrong number of MipLevels to addrlib and result will be bad.
-            // Not sure if we should fail this calling instead of putting an assertion here.
-            ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels);
-        }
-#endif
-    }
-
-    ADDR_E_RETURNCODE ApplyCustomerPipeBankXor(
-        AddrSwizzleMode swizzleMode,
-        UINT_32         pipeBankXor,
-        UINT_32         bankBits,
-        UINT_32         pipeBits,
-        UINT_32*        pBlockOffset) const
-    {
-        ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-        if (IsXor(swizzleMode))
-        {
-            // Apply driver set bankPipeXor
-            UINT_32 bankX = 0;
-            UINT_32 pipeX = 0;
-            returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX);
-            *pBlockOffset ^= (pipeX << m_pipeInterleaveLog2);
-            *pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits));
-        }
-
-        return returnCode;
-    }
-
-    UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const;
-    UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const;
-
-    ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        UINT_32                                 elementBytes,
-        UINT_32                                 pitchAlignInElement,
-        UINT_32*                                pPitch,
-        UINT_32*                                pHeight) const;
-
-    VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    UINT_32 m_se;                       ///< Number of shader engine
-    UINT_32 m_rbPerSe;                  ///< Number of render backend per shader engine
-    UINT_32 m_maxCompFrag;              ///< Number of max compressed fragment
-
-    UINT_32 m_banksLog2;                ///< Number of bank Log2
-    UINT_32 m_pipesLog2;                ///< Number of pipe per shader engine Log2
-    UINT_32 m_seLog2;                   ///< Number of shader engine Log2
-    UINT_32 m_rbPerSeLog2;              ///< Number of render backend per shader engine Log2
-    UINT_32 m_maxCompFragLog2;          ///< Number of max compressed fragment Log2
-
-    UINT_32 m_pipeInterleaveLog2;       ///< Log2 of pipe interleave bytes
-
-    UINT_32 m_blockVarSizeLog2;         ///< Log2 of block var size
-
-    SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE];  ///< Swizzle mode table
-
-private:
-    // Disallow the copy constructor
-    Lib(const Lib& a);
-
-    // Disallow the assignment operator
-    Lib& operator=(const Lib& a);
-};
-
-} // V2
-} // Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/core/addrlib.cpp	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,644 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrlib.cpp
-* @brief Contains the implementation for the Addr::Lib class.
-****************************************************************************************************
-*/
-
-#include "addrinterface.h"
-#include "addrlib.h"
-#include "addrcommon.h"
-
-#if defined(__APPLE__)
-
-UINT_32 div64_32(UINT_64 n, UINT_32 base)
-{
-    UINT_64 rem = n;
-    UINT_64 b = base;
-    UINT_64 res, d = 1;
-    UINT_32 high = rem >> 32;
-
-    res = 0;
-    if (high >= base)
-    {
-        high /= base;
-        res = (UINT_64) high << 32;
-        rem -= (UINT_64) (high * base) << 32;
-    }
-
-    while (((INT_64)b > 0) && (b < rem))
-    {
-        b = b + b;
-        d = d + d;
-    }
-
-    do
-    {
-        if (rem >= b)
-        {
-            rem -= b;
-            res += d;
-        }
-        b >>= 1;
-        d >>= 1;
-    } while (d);
-
-    n = res;
-    return rem;
-}
-
-extern "C"
-UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
-{
-    return div64_32(n, base);
-}
-
-#endif // __APPLE__
-
-namespace Addr
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Constructor/Destructor
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Lib::Lib
-*
-*   @brief
-*       Constructor for the AddrLib class
-*
-****************************************************************************************************
-*/
-Lib::Lib() :
-    m_class(BASE_ADDRLIB),
-    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
-    m_chipRevision(0),
-    m_version(ADDRLIB_VERSION),
-    m_pipes(0),
-    m_banks(0),
-    m_pipeInterleaveBytes(0),
-    m_rowSize(0),
-    m_minPitchAlignPixels(1),
-    m_maxSamples(8),
-    m_pElemLib(NULL)
-{
-    m_configFlags.value = 0;
-}
-
-/**
-****************************************************************************************************
-*   Lib::Lib
-*
-*   @brief
-*       Constructor for the AddrLib class with hClient as parameter
-*
-****************************************************************************************************
-*/
-Lib::Lib(const Client* pClient) :
-    Object(pClient),
-    m_class(BASE_ADDRLIB),
-    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
-    m_chipRevision(0),
-    m_version(ADDRLIB_VERSION),
-    m_pipes(0),
-    m_banks(0),
-    m_pipeInterleaveBytes(0),
-    m_rowSize(0),
-    m_minPitchAlignPixels(1),
-    m_maxSamples(8),
-    m_pElemLib(NULL)
-{
-    m_configFlags.value = 0;
-}
-
-/**
-****************************************************************************************************
-*   Lib::~AddrLib
-*
-*   @brief
-*       Destructor for the AddrLib class
-*
-****************************************************************************************************
-*/
-Lib::~Lib()
-{
-    if (m_pElemLib)
-    {
-        delete m_pElemLib;
-        m_pElemLib = NULL;
-    }
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Initialization/Helper
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
-****************************************************************************************************
-*   Lib::Create
-*
-*   @brief
-*       Creates and initializes AddrLib object.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::Create(
-    const ADDR_CREATE_INPUT* pCreateIn,     ///< [in] pointer to ADDR_CREATE_INPUT
-    ADDR_CREATE_OUTPUT*      pCreateOut)    ///< [out] pointer to ADDR_CREATE_OUTPUT
-{
-    Lib* pLib = NULL;
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pCreateIn->createFlags.fillSizeFields == TRUE)
-    {
-        if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
-            (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if ((returnCode == ADDR_OK)                    &&
-        (pCreateIn->callbacks.allocSysMem != NULL) &&
-        (pCreateIn->callbacks.freeSysMem != NULL))
-    {
-        Client client = {
-            pCreateIn->hClient,
-            pCreateIn->callbacks
-        };
-
-        switch (pCreateIn->chipEngine)
-        {
-            case CIASICIDGFXENGINE_SOUTHERNISLAND:
-                switch (pCreateIn->chipFamily)
-                {
-                    case FAMILY_SI:
-                        pLib = SiHwlInit(&client);
-                        break;
-                    case FAMILY_VI:
-                    case FAMILY_CZ: // VI based fusion(carrizo)
-                    case FAMILY_CI:
-                    case FAMILY_KV: // CI based fusion
-                        pLib = CiHwlInit(&client);
-                        break;
-                    default:
-                        ADDR_ASSERT_ALWAYS();
-                        break;
-                }
-                break;
-            case CIASICIDGFXENGINE_ARCTICISLAND:
-                switch (pCreateIn->chipFamily)
-                {
-                    case FAMILY_AI:
-                    case FAMILY_RV:
-                        pLib = Gfx9HwlInit(&client);
-                        break;
-                    default:
-                        ADDR_ASSERT_ALWAYS();
-                        break;
-                }
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-    }
-
-    if (pLib != NULL)
-    {
-        BOOL_32 initValid;
-
-        // Pass createFlags to configFlags first since these flags may be overwritten
-        pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
-        pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
-        pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
-        pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
-        pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
-        pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
-        pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
-        pLib->m_configFlags.disableLinearOpt    = FALSE;
-
-        pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
-
-        pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
-
-        // Global parameters initialized and remaining configFlags bits are set as well
-        initValid = pLib->HwlInitGlobalParams(pCreateIn);
-
-        if (initValid)
-        {
-            pLib->m_pElemLib = ElemLib::Create(pLib);
-        }
-        else
-        {
-            pLib->m_pElemLib = NULL; // Don't go on allocating element lib
-            returnCode = ADDR_INVALIDGBREGVALUES;
-        }
-
-        if (pLib->m_pElemLib == NULL)
-        {
-            delete pLib;
-            pLib = NULL;
-            ADDR_ASSERT_ALWAYS();
-        }
-        else
-        {
-            pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
-        }
-    }
-
-    pCreateOut->hLib = pLib;
-
-    if ((pLib != NULL) &&
-        (returnCode == ADDR_OK))
-    {
-        pCreateOut->numEquations =
-            pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable);
-
-        pLib->SetMaxAlignments();
-
-    }
-    else if ((pLib == NULL) &&
-             (returnCode == ADDR_OK))
-    {
-        // Unknown failures, we return the general error code
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::SetChipFamily
-*
-*   @brief
-*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
-*   @return
-*      N/A
-****************************************************************************************************
-*/
-VOID Lib::SetChipFamily(
-    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
-    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
-{
-    ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision);
-
-    ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
-
-    m_chipFamily   = family;
-    m_chipRevision = uChipRevision;
-}
-
-/**
-****************************************************************************************************
-*   Lib::SetMinPitchAlignPixels
-*
-*   @brief
-*       Set m_minPitchAlignPixels with input param
-*
-*   @return
-*      N/A
-****************************************************************************************************
-*/
-VOID Lib::SetMinPitchAlignPixels(
-    UINT_32 minPitchAlignPixels)    ///< [in] minmum pitch alignment in pixels
-{
-    m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels;
-}
-
-/**
-****************************************************************************************************
-*   Lib::SetMaxAlignments
-*
-*   @brief
-*       Set max alignments
-*
-*   @return
-*      N/A
-****************************************************************************************************
-*/
-VOID Lib::SetMaxAlignments()
-{
-    m_maxBaseAlign     = HwlComputeMaxBaseAlignments();
-    m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments();
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetLib
-*
-*   @brief
-*       Get AddrLib pointer
-*
-*   @return
-*      An AddrLib class pointer
-****************************************************************************************************
-*/
-Lib* Lib::GetLib(
-    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
-{
-    return static_cast<Addr::Lib*>(hLib);
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetMaxAlignments
-*
-*   @brief
-*       Gets maximum alignments for data surface (include FMask)
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::GetMaxAlignments(
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        if (m_maxBaseAlign != 0)
-        {
-            pOut->baseAlign = m_maxBaseAlign;
-        }
-        else
-        {
-            returnCode = ADDR_NOTIMPLEMENTED;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::GetMaxMetaAlignments
-*
-*   @brief
-*       Gets maximum alignments for metadata (CMask, DCC and HTile)
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments(
-    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        if (m_maxMetaBaseAlign != 0)
-        {
-            pOut->baseAlign = m_maxMetaBaseAlign;
-        }
-        else
-        {
-            returnCode = ADDR_NOTIMPLEMENTED;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::Bits2Number
-*
-*   @brief
-*       Cat a array of binary bit to a number
-*
-*   @return
-*       The number combined with the array of bits
-****************************************************************************************************
-*/
-UINT_32 Lib::Bits2Number(
-    UINT_32 bitNum,     ///< [in] how many bits
-    ...)                ///< [in] varaible bits value starting from MSB
-{
-    UINT_32 number = 0;
-    UINT_32 i;
-    va_list bits_ptr;
-
-    va_start(bits_ptr, bitNum);
-
-    for(i = 0; i < bitNum; i++)
-    {
-        number |= va_arg(bits_ptr, UINT_32);
-        number <<= 1;
-    }
-
-    number >>= 1;
-
-    va_end(bits_ptr);
-
-    return number;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Element lib
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-/**
-****************************************************************************************************
-*   Lib::Flt32ToColorPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a depth/stencil pixel value
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel(
-    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
-    ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
-            (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel);
-
-        UINT_32 depthBase = 0;
-        UINT_32 stencilBase = 0;
-        UINT_32 depthBits = 0;
-        UINT_32 stencilBits = 0;
-
-        switch (pIn->format)
-        {
-            case ADDR_DEPTH_16:
-                depthBits = 16;
-                break;
-            case ADDR_DEPTH_X8_24:
-            case ADDR_DEPTH_8_24:
-            case ADDR_DEPTH_X8_24_FLOAT:
-            case ADDR_DEPTH_8_24_FLOAT:
-                depthBase = 8;
-                depthBits = 24;
-                stencilBits = 8;
-                break;
-            case ADDR_DEPTH_32_FLOAT:
-                depthBits = 32;
-                break;
-            case ADDR_DEPTH_X24_8_32_FLOAT:
-                depthBase = 8;
-                depthBits = 32;
-                stencilBits = 8;
-                break;
-            default:
-                break;
-        }
-
-        // Overwrite base since R800 has no "tileBase"
-        if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
-        {
-            depthBase = 0;
-            stencilBase = 0;
-        }
-
-        depthBase *= 64;
-        stencilBase *= 64;
-
-        pOut->stencilBase = stencilBase;
-        pOut->depthBase = depthBase;
-        pOut->depthBits = depthBits;
-        pOut->stencilBits = stencilBits;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   Lib::Flt32ToColorPixel
-*
-*   @brief
-*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE Lib::Flt32ToColorPixel(
-    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
-    ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
-            (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        GetElemLib()->Flt32ToColorPixel(pIn->format,
-                                        pIn->surfNum,
-                                        pIn->surfSwap,
-                                        pIn->comps,
-                                        pOut->pPixel);
-    }
-
-    return returnCode;
-}
-
-
-/**
-****************************************************************************************************
-*   Lib::GetExportNorm
-*
-*   @brief
-*       Check one format can be EXPORT_NUM
-*   @return
-*       TRUE if EXPORT_NORM can be used
-****************************************************************************************************
-*/
-BOOL_32 Lib::GetExportNorm(
-    const ELEM_GETEXPORTNORM_INPUT* pIn) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    BOOL_32 enabled = FALSE;
-
-    if (GetFillSizeFieldsFlags() == TRUE)
-    {
-        if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
-        {
-            returnCode = ADDR_PARAMSIZEMISMATCH;
-        }
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap);
-    }
-
-    return enabled;
-}
-
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib.h mesa-19.0.1/src/amd/addrlib/core/addrlib.h
--- mesa-18.3.3/src/amd/addrlib/core/addrlib.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,413 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrlib.h
-* @brief Contains the Addr::Lib base class definition.
-****************************************************************************************************
-*/
-
-#ifndef __ADDR_LIB_H__
-#define __ADDR_LIB_H__
-
-#include "addrinterface.h"
-#include "addrobject.h"
-#include "addrelemlib.h"
-
-#include "amdgpu_asic_addr.h"
-
-#ifndef CIASICIDGFXENGINE_R600
-#define CIASICIDGFXENGINE_R600 0x00000006
-#endif
-
-#ifndef CIASICIDGFXENGINE_R800
-#define CIASICIDGFXENGINE_R800 0x00000008
-#endif
-
-#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
-#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
-#endif
-
-#ifndef CIASICIDGFXENGINE_ARCTICISLAND
-#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
-#endif
-
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define pipeinterleave
-****************************************************************************************************
-*/
-enum PipeInterleave
-{
-    ADDR_PIPEINTERLEAVE_256B = 256,
-    ADDR_PIPEINTERLEAVE_512B = 512,
-    ADDR_PIPEINTERLEAVE_1KB  = 1024,
-    ADDR_PIPEINTERLEAVE_2KB  = 2048,
-};
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define DRAM row size
-****************************************************************************************************
-*/
-enum RowSize
-{
-    ADDR_ROWSIZE_1KB = 1024,
-    ADDR_ROWSIZE_2KB = 2048,
-    ADDR_ROWSIZE_4KB = 4096,
-    ADDR_ROWSIZE_8KB = 8192,
-};
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define bank interleave
-****************************************************************************************************
-*/
-enum BankInterleave
-{
-    ADDR_BANKINTERLEAVE_1 = 1,
-    ADDR_BANKINTERLEAVE_2 = 2,
-    ADDR_BANKINTERLEAVE_4 = 4,
-    ADDR_BANKINTERLEAVE_8 = 8,
-};
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define shader engine tile size
-****************************************************************************************************
-*/
-enum ShaderEngineTileSize
-{
-    ADDR_SE_TILESIZE_16 = 16,
-    ADDR_SE_TILESIZE_32 = 32,
-};
-
-/**
-****************************************************************************************************
-* @brief Neutral enums that define bank swap size
-****************************************************************************************************
-*/
-enum BankSwapSize
-{
-    ADDR_BANKSWAP_128B = 128,
-    ADDR_BANKSWAP_256B = 256,
-    ADDR_BANKSWAP_512B = 512,
-    ADDR_BANKSWAP_1KB = 1024,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define max compressed fragments config
-****************************************************************************************************
-*/
-enum NumMaxCompressedFragmentsConfig
-{
-    ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS   = 0x00000000,
-    ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS   = 0x00000001,
-    ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS   = 0x00000002,
-    ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS   = 0x00000003,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define num pipes config
-****************************************************************************************************
-*/
-enum NumPipesConfig
-{
-    ADDR_CONFIG_1_PIPE                       = 0x00000000,
-    ADDR_CONFIG_2_PIPE                       = 0x00000001,
-    ADDR_CONFIG_4_PIPE                       = 0x00000002,
-    ADDR_CONFIG_8_PIPE                       = 0x00000003,
-    ADDR_CONFIG_16_PIPE                      = 0x00000004,
-    ADDR_CONFIG_32_PIPE                      = 0x00000005,
-    ADDR_CONFIG_64_PIPE                      = 0x00000006,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define num banks config
-****************************************************************************************************
-*/
-enum NumBanksConfig
-{
-    ADDR_CONFIG_1_BANK                       = 0x00000000,
-    ADDR_CONFIG_2_BANK                       = 0x00000001,
-    ADDR_CONFIG_4_BANK                       = 0x00000002,
-    ADDR_CONFIG_8_BANK                       = 0x00000003,
-    ADDR_CONFIG_16_BANK                      = 0x00000004,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define num rb per shader engine config
-****************************************************************************************************
-*/
-enum NumRbPerShaderEngineConfig
-{
-    ADDR_CONFIG_1_RB_PER_SHADER_ENGINE       = 0x00000000,
-    ADDR_CONFIG_2_RB_PER_SHADER_ENGINE       = 0x00000001,
-    ADDR_CONFIG_4_RB_PER_SHADER_ENGINE       = 0x00000002,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define num shader engines config
-****************************************************************************************************
-*/
-enum NumShaderEnginesConfig
-{
-    ADDR_CONFIG_1_SHADER_ENGINE              = 0x00000000,
-    ADDR_CONFIG_2_SHADER_ENGINE              = 0x00000001,
-    ADDR_CONFIG_4_SHADER_ENGINE              = 0x00000002,
-    ADDR_CONFIG_8_SHADER_ENGINE              = 0x00000003,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define pipe interleave size config
-****************************************************************************************************
-*/
-enum PipeInterleaveSizeConfig
-{
-    ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
-    ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
-    ADDR_CONFIG_PIPE_INTERLEAVE_1KB          = 0x00000002,
-    ADDR_CONFIG_PIPE_INTERLEAVE_2KB          = 0x00000003,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define row size config
-****************************************************************************************************
-*/
-enum RowSizeConfig
-{
-    ADDR_CONFIG_1KB_ROW                      = 0x00000000,
-    ADDR_CONFIG_2KB_ROW                      = 0x00000001,
-    ADDR_CONFIG_4KB_ROW                      = 0x00000002,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define bank interleave size config
-****************************************************************************************************
-*/
-enum BankInterleaveSizeConfig
-{
-    ADDR_CONFIG_BANK_INTERLEAVE_1            = 0x00000000,
-    ADDR_CONFIG_BANK_INTERLEAVE_2            = 0x00000001,
-    ADDR_CONFIG_BANK_INTERLEAVE_4            = 0x00000002,
-    ADDR_CONFIG_BANK_INTERLEAVE_8            = 0x00000003,
-};
-
-/**
-****************************************************************************************************
-* @brief Enums that define engine tile size config
-****************************************************************************************************
-*/
-enum ShaderEngineTileSizeConfig
-{
-    ADDR_CONFIG_SE_TILE_16                   = 0x00000000,
-    ADDR_CONFIG_SE_TILE_32                   = 0x00000001,
-};
-
-/**
-****************************************************************************************************
-* @brief This class contains asic independent address lib functionalities
-****************************************************************************************************
-*/
-class Lib : public Object
-{
-public:
-    virtual ~Lib();
-
-    static ADDR_E_RETURNCODE Create(
-        const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
-
-    /// Pair of Create
-    VOID Destroy()
-    {
-        delete this;
-    }
-
-    static Lib* GetLib(ADDR_HANDLE hLib);
-
-    /// Returns AddrLib version (from compiled binary instead include file)
-    UINT_32 GetVersion()
-    {
-        return m_version;
-    }
-
-    /// Returns asic chip family name defined by AddrLib
-    ChipFamily GetChipFamily()
-    {
-        return m_chipFamily;
-    }
-
-    ADDR_E_RETURNCODE Flt32ToDepthPixel(
-        const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
-        ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE Flt32ToColorPixel(
-        const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
-        ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
-
-    BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const;
-
-    ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
-
-protected:
-    Lib();  // Constructor is protected
-    Lib(const Client* pClient);
-
-    /// Pure virtual function to get max base alignments
-    virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0;
-
-    /// Gets maximum alignements for metadata
-    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const
-    {
-        ADDR_NOT_IMPLEMENTED();
-
-        return 0;
-    }
-
-    VOID ValidBaseAlignments(UINT_32 alignment) const
-    {
-#if DEBUG
-        ADDR_ASSERT(alignment <= m_maxBaseAlign);
-#endif
-    }
-
-    VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const
-    {
-#if DEBUG
-        ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign);
-#endif
-    }
-
-    //
-    // Initialization
-    //
-    /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
-    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0;
-
-    /// Pure Virtual function for Hwl converting chip family
-    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
-
-    /// Get equation table pointer and number of equations
-    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
-    {
-        *ppEquationTable = NULL;
-
-        return 0;
-    }
-
-    //
-    // Misc helper
-    //
-    static UINT_32 Bits2Number(UINT_32 bitNum, ...);
-
-    static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
-    {
-        return (numFrags != 0) ? numFrags : Max(1u, numSamples);
-    }
-
-    /// Returns pointer of ElemLib
-    ElemLib* GetElemLib() const
-    {
-        return m_pElemLib;
-    }
-
-    /// Returns fillSizeFields flag
-    UINT_32 GetFillSizeFieldsFlags() const
-    {
-        return m_configFlags.fillSizeFields;
-    }
-
-private:
-    // Disallow the copy constructor
-    Lib(const Lib& a);
-
-    // Disallow the assignment operator
-    Lib& operator=(const Lib& a);
-
-    VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
-
-    VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
-
-    VOID SetMaxAlignments();
-
-protected:
-    LibClass    m_class;        ///< Store class type (HWL type)
-
-    ChipFamily  m_chipFamily;   ///< Chip family translated from the one in atiid.h
-
-    UINT_32     m_chipRevision; ///< Revision id from xxx_id.h
-
-    UINT_32     m_version;      ///< Current version
-
-    //
-    // Global parameters
-    //
-    ConfigFlags m_configFlags;          ///< Global configuration flags. Note this is setup by
-                                        ///  AddrLib instead of Client except forceLinearAligned
-
-    UINT_32     m_pipes;                ///< Number of pipes
-    UINT_32     m_banks;                ///< Number of banks
-                                        ///  For r800 this is MC_ARB_RAMCFG.NOOFBANK
-                                        ///  Keep it here to do default parameter calculation
-
-    UINT_32     m_pipeInterleaveBytes;
-                                        ///< Specifies the size of contiguous address space
-                                        ///  within each tiling pipe when making linear
-                                        ///  accesses. (Formerly Group Size)
-
-    UINT_32     m_rowSize;              ///< DRAM row size, in bytes
-
-    UINT_32     m_minPitchAlignPixels;  ///< Minimum pitch alignment in pixels
-    UINT_32     m_maxSamples;           ///< Max numSamples
-
-    UINT_32     m_maxBaseAlign;         ///< Max base alignment for data surface
-    UINT_32     m_maxMetaBaseAlign;     ///< Max base alignment for metadata
-
-private:
-    ElemLib*    m_pElemLib;             ///< Element Lib pointer
-};
-
-Lib* SiHwlInit   (const Client* pClient);
-Lib* CiHwlInit   (const Client* pClient);
-Lib* Gfx9HwlInit (const Client* pClient);
-
-} // Addr
-
-#endif
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrobject.cpp mesa-19.0.1/src/amd/addrlib/core/addrobject.cpp
--- mesa-18.3.3/src/amd/addrlib/core/addrobject.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrobject.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,233 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrobject.cpp
-* @brief Contains the Object base class implementation.
-****************************************************************************************************
-*/
-
-#include "addrinterface.h"
-#include "addrobject.h"
-
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-*   Object::Object
-*
-*   @brief
-*       Constructor for the Object class.
-****************************************************************************************************
-*/
-Object::Object()
-{
-    m_client.handle = NULL;
-    m_client.callbacks.allocSysMem = NULL;
-    m_client.callbacks.freeSysMem = NULL;
-    m_client.callbacks.debugPrint = NULL;
-}
-
-/**
-****************************************************************************************************
-*   Object::Object
-*
-*   @brief
-*       Constructor for the Object class.
-****************************************************************************************************
-*/
-Object::Object(const Client* pClient)
-{
-    m_client = *pClient;
-}
-
-/**
-****************************************************************************************************
-*   Object::~Object
-*
-*   @brief
-*       Destructor for the Object class.
-****************************************************************************************************
-*/
-Object::~Object()
-{
-}
-
-/**
-****************************************************************************************************
-*   Object::ClientAlloc
-*
-*   @brief
-*       Calls instanced allocSysMem inside Client
-****************************************************************************************************
-*/
-VOID* Object::ClientAlloc(
-    size_t         objSize,    ///< [in] Size to allocate
-    const Client*  pClient)    ///< [in] Client pointer
-{
-    VOID* pObjMem = NULL;
-
-    if (pClient->callbacks.allocSysMem != NULL)
-    {
-        ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
-
-        allocInput.size        = sizeof(ADDR_ALLOCSYSMEM_INPUT);
-        allocInput.flags.value = 0;
-        allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
-        allocInput.hClient     = pClient->handle;
-
-        pObjMem = pClient->callbacks.allocSysMem(&allocInput);
-    }
-
-    return pObjMem;
-}
-
-/**
-****************************************************************************************************
-*   Object::Alloc
-*
-*   @brief
-*       A wrapper of ClientAlloc
-****************************************************************************************************
-*/
-VOID* Object::Alloc(
-    size_t objSize      ///< [in] Size to allocate
-    ) const
-{
-    return ClientAlloc(objSize, &m_client);
-}
-
-/**
-****************************************************************************************************
-*   Object::ClientFree
-*
-*   @brief
-*       Calls freeSysMem inside Client
-****************************************************************************************************
-*/
-VOID Object::ClientFree(
-    VOID*          pObjMem,    ///< [in] User virtual address to free.
-    const Client*  pClient)    ///< [in] Client pointer
-{
-    if (pClient->callbacks.freeSysMem != NULL)
-    {
-        if (pObjMem != NULL)
-        {
-            ADDR_FREESYSMEM_INPUT freeInput = {0};
-
-            freeInput.size      = sizeof(ADDR_FREESYSMEM_INPUT);
-            freeInput.hClient   = pClient->handle;
-            freeInput.pVirtAddr = pObjMem;
-
-            pClient->callbacks.freeSysMem(&freeInput);
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   Object::Free
-*
-*   @brief
-*       A wrapper of ClientFree
-****************************************************************************************************
-*/
-VOID Object::Free(
-    VOID* pObjMem       ///< [in] User virtual address to free.
-    ) const
-{
-    ClientFree(pObjMem, &m_client);
-}
-
-/**
-****************************************************************************************************
-*   Object::operator new
-*
-*   @brief
-*       Placement new operator. (with pre-allocated memory pointer)
-*
-*   @return
-*       Returns pre-allocated memory pointer.
-****************************************************************************************************
-*/
-VOID* Object::operator new(
-    size_t objSize,     ///< [in] Size to allocate
-    VOID*  pMem)        ///< [in] Pre-allocated pointer
-{
-    return pMem;
-}
-
-/**
-****************************************************************************************************
-*   Object::operator delete
-*
-*   @brief
-*       Frees Object object memory.
-****************************************************************************************************
-*/
-VOID Object::operator delete(
-    VOID* pObjMem)      ///< [in] User virtual address to free.
-{
-    Object* pObj = static_cast<Object*>(pObjMem);
-    ClientFree(pObjMem, &pObj->m_client);
-}
-
-/**
-****************************************************************************************************
-*   Object::DebugPrint
-*
-*   @brief
-*       Print debug message
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID Object::DebugPrint(
-    const CHAR* pDebugString,     ///< [in] Debug string
-    ...
-    ) const
-{
-#if DEBUG
-    if (m_client.callbacks.debugPrint != NULL)
-    {
-        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
-
-        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
-        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
-        debugPrintInput.hClient      = m_client.handle;
-        va_start(debugPrintInput.ap, pDebugString);
-
-        m_client.callbacks.debugPrint(&debugPrintInput);
-
-        va_end(debugPrintInput.ap);
-    }
-#endif
-}
-
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrobject.h mesa-19.0.1/src/amd/addrlib/core/addrobject.h
--- mesa-18.3.3/src/amd/addrlib/core/addrobject.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/core/addrobject.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,95 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  addrobject.h
-* @brief Contains the Object base class definition.
-****************************************************************************************************
-*/
-
-#ifndef __ADDR_OBJECT_H__
-#define __ADDR_OBJECT_H__
-
-#include "addrtypes.h"
-#include "addrcommon.h"
-
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-* @brief This structure contains client specific data
-****************************************************************************************************
-*/
-struct Client
-{
-    ADDR_CLIENT_HANDLE  handle;
-    ADDR_CALLBACKS      callbacks;
-};
-/**
-****************************************************************************************************
-* @brief This class is the base class for all ADDR class objects.
-****************************************************************************************************
-*/
-class Object
-{
-public:
-    Object();
-    Object(const Client* pClient);
-    virtual ~Object();
-
-    VOID* operator new(size_t size, VOID* pMem);
-    VOID  operator delete(VOID* pObj);
-    /// Microsoft compiler requires a matching delete implementation, which seems to be called when
-    /// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is
-    /// added to eliminate the warning.
-    VOID  operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); }
-
-    VOID* Alloc(size_t size) const;
-    VOID  Free(VOID* pObj) const;
-
-    VOID DebugPrint(const CHAR* pDebugString, ...) const;
-
-    const Client* GetClient() const {return &m_client;}
-
-protected:
-    Client m_client;
-
-    static VOID* ClientAlloc(size_t size, const Client* pClient);
-    static VOID  ClientFree(VOID* pObj, const Client* pClient);
-
-private:
-    // disallow the copy constructor
-    Object(const Object& a);
-
-    // disallow the assignment operator
-    Object& operator=(const Object& a);
-};
-
-} // Addr
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/chip/gfx9_enum.h mesa-19.0.1/src/amd/addrlib/gfx9/chip/gfx9_enum.h
--- mesa-18.3.3/src/amd/addrlib/gfx9/chip/gfx9_enum.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/gfx9/chip/gfx9_enum.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,10535 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-#if !defined (_vega10_ENUM_HEADER)
-#define _vega10_ENUM_HEADER
-
-
-#ifndef _DRIVER_BUILD
-#ifndef GL_ZERO
-#define GL__ZERO                      BLEND_ZERO
-#define GL__ONE                       BLEND_ONE
-#define GL__SRC_COLOR                 BLEND_SRC_COLOR
-#define GL__ONE_MINUS_SRC_COLOR       BLEND_ONE_MINUS_SRC_COLOR
-#define GL__DST_COLOR                 BLEND_DST_COLOR
-#define GL__ONE_MINUS_DST_COLOR       BLEND_ONE_MINUS_DST_COLOR
-#define GL__SRC_ALPHA                 BLEND_SRC_ALPHA
-#define GL__ONE_MINUS_SRC_ALPHA       BLEND_ONE_MINUS_SRC_ALPHA
-#define GL__DST_ALPHA                 BLEND_DST_ALPHA
-#define GL__ONE_MINUS_DST_ALPHA       BLEND_ONE_MINUS_DST_ALPHA
-#define GL__SRC_ALPHA_SATURATE        BLEND_SRC_ALPHA_SATURATE
-#define GL__CONSTANT_COLOR            BLEND_CONSTANT_COLOR
-#define GL__ONE_MINUS_CONSTANT_COLOR  BLEND_ONE_MINUS_CONSTANT_COLOR
-#define GL__CONSTANT_ALPHA            BLEND_CONSTANT_ALPHA
-#define GL__ONE_MINUS_CONSTANT_ALPHA  BLEND_ONE_MINUS_CONSTANT_ALPHA
-#endif
-#endif
-
-/*******************************************************
- * GDS DATA_TYPE Enums
- *******************************************************/
-
-#ifndef ENUMS_GDS_PERFCOUNT_SELECT_H
-#define ENUMS_GDS_PERFCOUNT_SELECT_H
-typedef enum GDS_PERFCOUNT_SELECT {
- GDS_PERF_SEL_DS_ADDR_CONFL = 0,
- GDS_PERF_SEL_DS_BANK_CONFL = 1,
- GDS_PERF_SEL_WBUF_FLUSH = 2,
- GDS_PERF_SEL_WR_COMP = 3,
- GDS_PERF_SEL_WBUF_WR = 4,
- GDS_PERF_SEL_RBUF_HIT = 5,
- GDS_PERF_SEL_RBUF_MISS = 6,
- GDS_PERF_SEL_SE0_SH0_NORET = 7,
- GDS_PERF_SEL_SE0_SH0_RET = 8,
- GDS_PERF_SEL_SE0_SH0_ORD_CNT = 9,
- GDS_PERF_SEL_SE0_SH0_2COMP_REQ = 10,
- GDS_PERF_SEL_SE0_SH0_ORD_WAVE_VALID = 11,
- GDS_PERF_SEL_SE0_SH0_GDS_DATA_VALID = 12,
- GDS_PERF_SEL_SE0_SH0_GDS_STALL_BY_ORD = 13,
- GDS_PERF_SEL_SE0_SH0_GDS_WR_OP = 14,
- GDS_PERF_SEL_SE0_SH0_GDS_RD_OP = 15,
- GDS_PERF_SEL_SE0_SH0_GDS_ATOM_OP = 16,
- GDS_PERF_SEL_SE0_SH0_GDS_REL_OP = 17,
- GDS_PERF_SEL_SE0_SH0_GDS_CMPXCH_OP = 18,
- GDS_PERF_SEL_SE0_SH0_GDS_BYTE_OP = 19,
- GDS_PERF_SEL_SE0_SH0_GDS_SHORT_OP = 20,
- GDS_PERF_SEL_SE0_SH1_NORET = 21,
- GDS_PERF_SEL_SE0_SH1_RET = 22,
- GDS_PERF_SEL_SE0_SH1_ORD_CNT = 23,
- GDS_PERF_SEL_SE0_SH1_2COMP_REQ = 24,
- GDS_PERF_SEL_SE0_SH1_ORD_WAVE_VALID = 25,
- GDS_PERF_SEL_SE0_SH1_GDS_DATA_VALID = 26,
- GDS_PERF_SEL_SE0_SH1_GDS_STALL_BY_ORD = 27,
- GDS_PERF_SEL_SE0_SH1_GDS_WR_OP = 28,
- GDS_PERF_SEL_SE0_SH1_GDS_RD_OP = 29,
- GDS_PERF_SEL_SE0_SH1_GDS_ATOM_OP = 30,
- GDS_PERF_SEL_SE0_SH1_GDS_REL_OP = 31,
- GDS_PERF_SEL_SE0_SH1_GDS_CMPXCH_OP = 32,
- GDS_PERF_SEL_SE0_SH1_GDS_BYTE_OP = 33,
- GDS_PERF_SEL_SE0_SH1_GDS_SHORT_OP = 34,
- GDS_PERF_SEL_SE1_SH0_NORET = 35,
- GDS_PERF_SEL_SE1_SH0_RET = 36,
- GDS_PERF_SEL_SE1_SH0_ORD_CNT = 37,
- GDS_PERF_SEL_SE1_SH0_2COMP_REQ = 38,
- GDS_PERF_SEL_SE1_SH0_ORD_WAVE_VALID = 39,
- GDS_PERF_SEL_SE1_SH0_GDS_DATA_VALID = 40,
- GDS_PERF_SEL_SE1_SH0_GDS_STALL_BY_ORD = 41,
- GDS_PERF_SEL_SE1_SH0_GDS_WR_OP = 42,
- GDS_PERF_SEL_SE1_SH0_GDS_RD_OP = 43,
- GDS_PERF_SEL_SE1_SH0_GDS_ATOM_OP = 44,
- GDS_PERF_SEL_SE1_SH0_GDS_REL_OP = 45,
- GDS_PERF_SEL_SE1_SH0_GDS_CMPXCH_OP = 46,
- GDS_PERF_SEL_SE1_SH0_GDS_BYTE_OP = 47,
- GDS_PERF_SEL_SE1_SH0_GDS_SHORT_OP = 48,
- GDS_PERF_SEL_SE1_SH1_NORET = 49,
- GDS_PERF_SEL_SE1_SH1_RET = 50,
- GDS_PERF_SEL_SE1_SH1_ORD_CNT = 51,
- GDS_PERF_SEL_SE1_SH1_2COMP_REQ = 52,
- GDS_PERF_SEL_SE1_SH1_ORD_WAVE_VALID = 53,
- GDS_PERF_SEL_SE1_SH1_GDS_DATA_VALID = 54,
- GDS_PERF_SEL_SE1_SH1_GDS_STALL_BY_ORD = 55,
- GDS_PERF_SEL_SE1_SH1_GDS_WR_OP = 56,
- GDS_PERF_SEL_SE1_SH1_GDS_RD_OP = 57,
- GDS_PERF_SEL_SE1_SH1_GDS_ATOM_OP = 58,
- GDS_PERF_SEL_SE1_SH1_GDS_REL_OP = 59,
- GDS_PERF_SEL_SE1_SH1_GDS_CMPXCH_OP = 60,
- GDS_PERF_SEL_SE1_SH1_GDS_BYTE_OP = 61,
- GDS_PERF_SEL_SE1_SH1_GDS_SHORT_OP = 62,
- GDS_PERF_SEL_SE2_SH0_NORET = 63,
- GDS_PERF_SEL_SE2_SH0_RET = 64,
- GDS_PERF_SEL_SE2_SH0_ORD_CNT = 65,
- GDS_PERF_SEL_SE2_SH0_2COMP_REQ = 66,
- GDS_PERF_SEL_SE2_SH0_ORD_WAVE_VALID = 67,
- GDS_PERF_SEL_SE2_SH0_GDS_DATA_VALID = 68,
- GDS_PERF_SEL_SE2_SH0_GDS_STALL_BY_ORD = 69,
- GDS_PERF_SEL_SE2_SH0_GDS_WR_OP = 70,
- GDS_PERF_SEL_SE2_SH0_GDS_RD_OP = 71,
- GDS_PERF_SEL_SE2_SH0_GDS_ATOM_OP = 72,
- GDS_PERF_SEL_SE2_SH0_GDS_REL_OP = 73,
- GDS_PERF_SEL_SE2_SH0_GDS_CMPXCH_OP = 74,
- GDS_PERF_SEL_SE2_SH0_GDS_BYTE_OP = 75,
- GDS_PERF_SEL_SE2_SH0_GDS_SHORT_OP = 76,
- GDS_PERF_SEL_SE2_SH1_NORET = 77,
- GDS_PERF_SEL_SE2_SH1_RET = 78,
- GDS_PERF_SEL_SE2_SH1_ORD_CNT = 79,
- GDS_PERF_SEL_SE2_SH1_2COMP_REQ = 80,
- GDS_PERF_SEL_SE2_SH1_ORD_WAVE_VALID = 81,
- GDS_PERF_SEL_SE2_SH1_GDS_DATA_VALID = 82,
- GDS_PERF_SEL_SE2_SH1_GDS_STALL_BY_ORD = 83,
- GDS_PERF_SEL_SE2_SH1_GDS_WR_OP = 84,
- GDS_PERF_SEL_SE2_SH1_GDS_RD_OP = 85,
- GDS_PERF_SEL_SE2_SH1_GDS_ATOM_OP = 86,
- GDS_PERF_SEL_SE2_SH1_GDS_REL_OP = 87,
- GDS_PERF_SEL_SE2_SH1_GDS_CMPXCH_OP = 88,
- GDS_PERF_SEL_SE2_SH1_GDS_BYTE_OP = 89,
- GDS_PERF_SEL_SE2_SH1_GDS_SHORT_OP = 90,
- GDS_PERF_SEL_SE3_SH0_NORET = 91,
- GDS_PERF_SEL_SE3_SH0_RET = 92,
- GDS_PERF_SEL_SE3_SH0_ORD_CNT = 93,
- GDS_PERF_SEL_SE3_SH0_2COMP_REQ = 94,
- GDS_PERF_SEL_SE3_SH0_ORD_WAVE_VALID = 95,
- GDS_PERF_SEL_SE3_SH0_GDS_DATA_VALID = 96,
- GDS_PERF_SEL_SE3_SH0_GDS_STALL_BY_ORD = 97,
- GDS_PERF_SEL_SE3_SH0_GDS_WR_OP = 98,
- GDS_PERF_SEL_SE3_SH0_GDS_RD_OP = 99,
- GDS_PERF_SEL_SE3_SH0_GDS_ATOM_OP = 100,
- GDS_PERF_SEL_SE3_SH0_GDS_REL_OP = 101,
- GDS_PERF_SEL_SE3_SH0_GDS_CMPXCH_OP = 102,
- GDS_PERF_SEL_SE3_SH0_GDS_BYTE_OP = 103,
- GDS_PERF_SEL_SE3_SH0_GDS_SHORT_OP = 104,
- GDS_PERF_SEL_SE3_SH1_NORET = 105,
- GDS_PERF_SEL_SE3_SH1_RET = 106,
- GDS_PERF_SEL_SE3_SH1_ORD_CNT = 107,
- GDS_PERF_SEL_SE3_SH1_2COMP_REQ = 108,
- GDS_PERF_SEL_SE3_SH1_ORD_WAVE_VALID = 109,
- GDS_PERF_SEL_SE3_SH1_GDS_DATA_VALID = 110,
- GDS_PERF_SEL_SE3_SH1_GDS_STALL_BY_ORD = 111,
- GDS_PERF_SEL_SE3_SH1_GDS_WR_OP = 112,
- GDS_PERF_SEL_SE3_SH1_GDS_RD_OP = 113,
- GDS_PERF_SEL_SE3_SH1_GDS_ATOM_OP = 114,
- GDS_PERF_SEL_SE3_SH1_GDS_REL_OP = 115,
- GDS_PERF_SEL_SE3_SH1_GDS_CMPXCH_OP = 116,
- GDS_PERF_SEL_SE3_SH1_GDS_BYTE_OP = 117,
- GDS_PERF_SEL_SE3_SH1_GDS_SHORT_OP = 118,
- GDS_PERF_SEL_GWS_RELEASED = 119,
- GDS_PERF_SEL_GWS_BYPASS = 120,
-} GDS_PERFCOUNT_SELECT;
-#endif /*ENUMS_GDS_PERFCOUNT_SELECT_H*/
-
-/*******************************************************
- * Chip Enums
- *******************************************************/
-
-/*
- * SurfaceEndian enum
- */
-
-typedef enum SurfaceEndian {
-ENDIAN_NONE                              = 0x00000000,
-ENDIAN_8IN16                             = 0x00000001,
-ENDIAN_8IN32                             = 0x00000002,
-ENDIAN_8IN64                             = 0x00000003,
-} SurfaceEndian;
-
-/*
- * ArrayMode enum
- */
-
-typedef enum ArrayMode {
-ARRAY_LINEAR_GENERAL                     = 0x00000000,
-ARRAY_LINEAR_ALIGNED                     = 0x00000001,
-ARRAY_1D_TILED_THIN1                     = 0x00000002,
-ARRAY_1D_TILED_THICK                     = 0x00000003,
-ARRAY_2D_TILED_THIN1                     = 0x00000004,
-ARRAY_PRT_TILED_THIN1                    = 0x00000005,
-ARRAY_PRT_2D_TILED_THIN1                 = 0x00000006,
-ARRAY_2D_TILED_THICK                     = 0x00000007,
-ARRAY_2D_TILED_XTHICK                    = 0x00000008,
-ARRAY_PRT_TILED_THICK                    = 0x00000009,
-ARRAY_PRT_2D_TILED_THICK                 = 0x0000000a,
-ARRAY_PRT_3D_TILED_THIN1                 = 0x0000000b,
-ARRAY_3D_TILED_THIN1                     = 0x0000000c,
-ARRAY_3D_TILED_THICK                     = 0x0000000d,
-ARRAY_3D_TILED_XTHICK                    = 0x0000000e,
-ARRAY_PRT_3D_TILED_THICK                 = 0x0000000f,
-} ArrayMode;
-
-/*
- * PipeTiling enum
- */
-
-typedef enum PipeTiling {
-CONFIG_1_PIPE                            = 0x00000000,
-CONFIG_2_PIPE                            = 0x00000001,
-CONFIG_4_PIPE                            = 0x00000002,
-CONFIG_8_PIPE                            = 0x00000003,
-} PipeTiling;
-
-/*
- * BankTiling enum
- */
-
-typedef enum BankTiling {
-CONFIG_4_BANK                            = 0x00000000,
-CONFIG_8_BANK                            = 0x00000001,
-} BankTiling;
-
-/*
- * GroupInterleave enum
- */
-
-typedef enum GroupInterleave {
-CONFIG_256B_GROUP                        = 0x00000000,
-CONFIG_512B_GROUP                        = 0x00000001,
-} GroupInterleave;
-
-/*
- * RowTiling enum
- */
-
-typedef enum RowTiling {
-CONFIG_1KB_ROW                           = 0x00000000,
-CONFIG_2KB_ROW                           = 0x00000001,
-CONFIG_4KB_ROW                           = 0x00000002,
-CONFIG_8KB_ROW                           = 0x00000003,
-CONFIG_1KB_ROW_OPT                       = 0x00000004,
-CONFIG_2KB_ROW_OPT                       = 0x00000005,
-CONFIG_4KB_ROW_OPT                       = 0x00000006,
-CONFIG_8KB_ROW_OPT                       = 0x00000007,
-} RowTiling;
-
-/*
- * BankSwapBytes enum
- */
-
-typedef enum BankSwapBytes {
-CONFIG_128B_SWAPS                        = 0x00000000,
-CONFIG_256B_SWAPS                        = 0x00000001,
-CONFIG_512B_SWAPS                        = 0x00000002,
-CONFIG_1KB_SWAPS                         = 0x00000003,
-} BankSwapBytes;
-
-/*
- * SampleSplitBytes enum
- */
-
-typedef enum SampleSplitBytes {
-CONFIG_1KB_SPLIT                         = 0x00000000,
-CONFIG_2KB_SPLIT                         = 0x00000001,
-CONFIG_4KB_SPLIT                         = 0x00000002,
-CONFIG_8KB_SPLIT                         = 0x00000003,
-} SampleSplitBytes;
-
-/*
- * NumPipes enum
- */
-
-typedef enum NumPipes {
-ADDR_CONFIG_1_PIPE                       = 0x00000000,
-ADDR_CONFIG_2_PIPE                       = 0x00000001,
-ADDR_CONFIG_4_PIPE                       = 0x00000002,
-ADDR_CONFIG_8_PIPE                       = 0x00000003,
-ADDR_CONFIG_16_PIPE                      = 0x00000004,
-ADDR_CONFIG_32_PIPE                      = 0x00000005,
-} NumPipes;
-
-/*
- * NumBanksConfig enum
- */
-
-typedef enum NumBanksConfig {
-ADDR_CONFIG_1_BANK                       = 0x00000000,
-ADDR_CONFIG_2_BANK                       = 0x00000001,
-ADDR_CONFIG_4_BANK                       = 0x00000002,
-ADDR_CONFIG_8_BANK                       = 0x00000003,
-ADDR_CONFIG_16_BANK                      = 0x00000004,
-} NumBanksConfig;
-
-/*
- * PipeInterleaveSize enum
- */
-
-typedef enum PipeInterleaveSize {
-ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
-ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
-ADDR_CONFIG_PIPE_INTERLEAVE_1KB          = 0x00000002,
-ADDR_CONFIG_PIPE_INTERLEAVE_2KB          = 0x00000003,
-} PipeInterleaveSize;
-
-/*
- * BankInterleaveSize enum
- */
-
-typedef enum BankInterleaveSize {
-ADDR_CONFIG_BANK_INTERLEAVE_1            = 0x00000000,
-ADDR_CONFIG_BANK_INTERLEAVE_2            = 0x00000001,
-ADDR_CONFIG_BANK_INTERLEAVE_4            = 0x00000002,
-ADDR_CONFIG_BANK_INTERLEAVE_8            = 0x00000003,
-} BankInterleaveSize;
-
-/*
- * NumShaderEngines enum
- */
-
-typedef enum NumShaderEngines {
-ADDR_CONFIG_1_SHADER_ENGINE              = 0x00000000,
-ADDR_CONFIG_2_SHADER_ENGINE              = 0x00000001,
-ADDR_CONFIG_4_SHADER_ENGINE              = 0x00000002,
-ADDR_CONFIG_8_SHADER_ENGINE              = 0x00000003,
-} NumShaderEngines;
-
-/*
- * NumRbPerShaderEngine enum
- */
-
-typedef enum NumRbPerShaderEngine {
-ADDR_CONFIG_1_RB_PER_SHADER_ENGINE       = 0x00000000,
-ADDR_CONFIG_2_RB_PER_SHADER_ENGINE       = 0x00000001,
-ADDR_CONFIG_4_RB_PER_SHADER_ENGINE       = 0x00000002,
-} NumRbPerShaderEngine;
-
-/*
- * NumGPUs enum
- */
-
-typedef enum NumGPUs {
-ADDR_CONFIG_1_GPU                        = 0x00000000,
-ADDR_CONFIG_2_GPU                        = 0x00000001,
-ADDR_CONFIG_4_GPU                        = 0x00000002,
-ADDR_CONFIG_8_GPU                        = 0x00000003,
-} NumGPUs;
-
-/*
- * NumMaxCompressedFragments enum
- */
-
-typedef enum NumMaxCompressedFragments {
-ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS   = 0x00000000,
-ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS   = 0x00000001,
-ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS   = 0x00000002,
-ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS   = 0x00000003,
-} NumMaxCompressedFragments;
-
-/*
- * ShaderEngineTileSize enum
- */
-
-typedef enum ShaderEngineTileSize {
-ADDR_CONFIG_SE_TILE_16                   = 0x00000000,
-ADDR_CONFIG_SE_TILE_32                   = 0x00000001,
-} ShaderEngineTileSize;
-
-/*
- * MultiGPUTileSize enum
- */
-
-typedef enum MultiGPUTileSize {
-ADDR_CONFIG_GPU_TILE_16                  = 0x00000000,
-ADDR_CONFIG_GPU_TILE_32                  = 0x00000001,
-ADDR_CONFIG_GPU_TILE_64                  = 0x00000002,
-ADDR_CONFIG_GPU_TILE_128                 = 0x00000003,
-} MultiGPUTileSize;
-
-/*
- * RowSize enum
- */
-
-typedef enum RowSize {
-ADDR_CONFIG_1KB_ROW                      = 0x00000000,
-ADDR_CONFIG_2KB_ROW                      = 0x00000001,
-ADDR_CONFIG_4KB_ROW                      = 0x00000002,
-} RowSize;
-
-/*
- * NumLowerPipes enum
- */
-
-typedef enum NumLowerPipes {
-ADDR_CONFIG_1_LOWER_PIPES                = 0x00000000,
-ADDR_CONFIG_2_LOWER_PIPES                = 0x00000001,
-} NumLowerPipes;
-
-/*
- * ColorTransform enum
- */
-
-typedef enum ColorTransform {
-DCC_CT_AUTO                              = 0x00000000,
-DCC_CT_NONE                              = 0x00000001,
-ABGR_TO_A_BG_G_RB                        = 0x00000002,
-BGRA_TO_BG_G_RB_A                        = 0x00000003,
-} ColorTransform;
-
-/*
- * CompareRef enum
- */
-
-typedef enum CompareRef {
-REF_NEVER                                = 0x00000000,
-REF_LESS                                 = 0x00000001,
-REF_EQUAL                                = 0x00000002,
-REF_LEQUAL                               = 0x00000003,
-REF_GREATER                              = 0x00000004,
-REF_NOTEQUAL                             = 0x00000005,
-REF_GEQUAL                               = 0x00000006,
-REF_ALWAYS                               = 0x00000007,
-} CompareRef;
-
-/*
- * ReadSize enum
- */
-
-typedef enum ReadSize {
-READ_256_BITS                            = 0x00000000,
-READ_512_BITS                            = 0x00000001,
-} ReadSize;
-
-/*
- * DepthFormat enum
- */
-
-typedef enum DepthFormat {
-DEPTH_INVALID                            = 0x00000000,
-DEPTH_16                                 = 0x00000001,
-DEPTH_X8_24                              = 0x00000002,
-DEPTH_8_24                               = 0x00000003,
-DEPTH_X8_24_FLOAT                        = 0x00000004,
-DEPTH_8_24_FLOAT                         = 0x00000005,
-DEPTH_32_FLOAT                           = 0x00000006,
-DEPTH_X24_8_32_FLOAT                     = 0x00000007,
-} DepthFormat;
-
-/*
- * ZFormat enum
- */
-
-typedef enum ZFormat {
-Z_INVALID                                = 0x00000000,
-Z_16                                     = 0x00000001,
-Z_24                                     = 0x00000002,
-Z_32_FLOAT                               = 0x00000003,
-} ZFormat;
-
-/*
- * StencilFormat enum
- */
-
-typedef enum StencilFormat {
-STENCIL_INVALID                          = 0x00000000,
-STENCIL_8                                = 0x00000001,
-} StencilFormat;
-
-/*
- * CmaskMode enum
- */
-
-typedef enum CmaskMode {
-CMASK_CLEAR_NONE                         = 0x00000000,
-CMASK_CLEAR_ONE                          = 0x00000001,
-CMASK_CLEAR_ALL                          = 0x00000002,
-CMASK_ANY_EXPANDED                       = 0x00000003,
-CMASK_ALPHA0_FRAG1                       = 0x00000004,
-CMASK_ALPHA0_FRAG2                       = 0x00000005,
-CMASK_ALPHA0_FRAG4                       = 0x00000006,
-CMASK_ALPHA0_FRAGS                       = 0x00000007,
-CMASK_ALPHA1_FRAG1                       = 0x00000008,
-CMASK_ALPHA1_FRAG2                       = 0x00000009,
-CMASK_ALPHA1_FRAG4                       = 0x0000000a,
-CMASK_ALPHA1_FRAGS                       = 0x0000000b,
-CMASK_ALPHAX_FRAG1                       = 0x0000000c,
-CMASK_ALPHAX_FRAG2                       = 0x0000000d,
-CMASK_ALPHAX_FRAG4                       = 0x0000000e,
-CMASK_ALPHAX_FRAGS                       = 0x0000000f,
-} CmaskMode;
-
-/*
- * QuadExportFormat enum
- */
-
-typedef enum QuadExportFormat {
-EXPORT_UNUSED                            = 0x00000000,
-EXPORT_32_R                              = 0x00000001,
-EXPORT_32_GR                             = 0x00000002,
-EXPORT_32_AR                             = 0x00000003,
-EXPORT_FP16_ABGR                         = 0x00000004,
-EXPORT_UNSIGNED16_ABGR                   = 0x00000005,
-EXPORT_SIGNED16_ABGR                     = 0x00000006,
-EXPORT_32_ABGR                           = 0x00000007,
-EXPORT_32BPP_8PIX                        = 0x00000008,
-EXPORT_16_16_UNSIGNED_8PIX               = 0x00000009,
-EXPORT_16_16_SIGNED_8PIX                 = 0x0000000a,
-EXPORT_16_16_FLOAT_8PIX                  = 0x0000000b,
-} QuadExportFormat;
-
-/*
- * QuadExportFormatOld enum
- */
-
-typedef enum QuadExportFormatOld {
-EXPORT_4P_32BPC_ABGR                     = 0x00000000,
-EXPORT_4P_16BPC_ABGR                     = 0x00000001,
-EXPORT_4P_32BPC_GR                       = 0x00000002,
-EXPORT_4P_32BPC_AR                       = 0x00000003,
-EXPORT_2P_32BPC_ABGR                     = 0x00000004,
-EXPORT_8P_32BPC_R                        = 0x00000005,
-} QuadExportFormatOld;
-
-/*
- * ColorFormat enum
- */
-
-typedef enum ColorFormat {
-COLOR_INVALID                            = 0x00000000,
-COLOR_8                                  = 0x00000001,
-COLOR_16                                 = 0x00000002,
-COLOR_8_8                                = 0x00000003,
-COLOR_32                                 = 0x00000004,
-COLOR_16_16                              = 0x00000005,
-COLOR_10_11_11                           = 0x00000006,
-COLOR_11_11_10                           = 0x00000007,
-COLOR_10_10_10_2                         = 0x00000008,
-COLOR_2_10_10_10                         = 0x00000009,
-COLOR_8_8_8_8                            = 0x0000000a,
-COLOR_32_32                              = 0x0000000b,
-COLOR_16_16_16_16                        = 0x0000000c,
-COLOR_RESERVED_13                        = 0x0000000d,
-COLOR_32_32_32_32                        = 0x0000000e,
-COLOR_RESERVED_15                        = 0x0000000f,
-COLOR_5_6_5                              = 0x00000010,
-COLOR_1_5_5_5                            = 0x00000011,
-COLOR_5_5_5_1                            = 0x00000012,
-COLOR_4_4_4_4                            = 0x00000013,
-COLOR_8_24                               = 0x00000014,
-COLOR_24_8                               = 0x00000015,
-COLOR_X24_8_32_FLOAT                     = 0x00000016,
-COLOR_RESERVED_23                        = 0x00000017,
-COLOR_RESERVED_24                        = 0x00000018,
-COLOR_RESERVED_25                        = 0x00000019,
-COLOR_RESERVED_26                        = 0x0000001a,
-COLOR_RESERVED_27                        = 0x0000001b,
-COLOR_RESERVED_28                        = 0x0000001c,
-COLOR_RESERVED_29                        = 0x0000001d,
-COLOR_RESERVED_30                        = 0x0000001e,
-COLOR_2_10_10_10_6E4                     = 0x0000001f,
-} ColorFormat;
-
-/*
- * SurfaceFormat enum
- */
-
-typedef enum SurfaceFormat {
-FMT_INVALID                              = 0x00000000,
-FMT_8                                    = 0x00000001,
-FMT_16                                   = 0x00000002,
-FMT_8_8                                  = 0x00000003,
-FMT_32                                   = 0x00000004,
-FMT_16_16                                = 0x00000005,
-FMT_10_11_11                             = 0x00000006,
-FMT_11_11_10                             = 0x00000007,
-FMT_10_10_10_2                           = 0x00000008,
-FMT_2_10_10_10                           = 0x00000009,
-FMT_8_8_8_8                              = 0x0000000a,
-FMT_32_32                                = 0x0000000b,
-FMT_16_16_16_16                          = 0x0000000c,
-FMT_32_32_32                             = 0x0000000d,
-FMT_32_32_32_32                          = 0x0000000e,
-FMT_RESERVED_4                           = 0x0000000f,
-FMT_5_6_5                                = 0x00000010,
-FMT_1_5_5_5                              = 0x00000011,
-FMT_5_5_5_1                              = 0x00000012,
-FMT_4_4_4_4                              = 0x00000013,
-FMT_8_24                                 = 0x00000014,
-FMT_24_8                                 = 0x00000015,
-FMT_X24_8_32_FLOAT                       = 0x00000016,
-FMT_RESERVED_33                          = 0x00000017,
-FMT_11_11_10_FLOAT                       = 0x00000018,
-FMT_16_FLOAT                             = 0x00000019,
-FMT_32_FLOAT                             = 0x0000001a,
-FMT_16_16_FLOAT                          = 0x0000001b,
-FMT_8_24_FLOAT                           = 0x0000001c,
-FMT_24_8_FLOAT                           = 0x0000001d,
-FMT_32_32_FLOAT                          = 0x0000001e,
-FMT_10_11_11_FLOAT                       = 0x0000001f,
-FMT_16_16_16_16_FLOAT                    = 0x00000020,
-FMT_3_3_2                                = 0x00000021,
-FMT_6_5_5                                = 0x00000022,
-FMT_32_32_32_32_FLOAT                    = 0x00000023,
-FMT_RESERVED_36                          = 0x00000024,
-FMT_1                                    = 0x00000025,
-FMT_1_REVERSED                           = 0x00000026,
-FMT_GB_GR                                = 0x00000027,
-FMT_BG_RG                                = 0x00000028,
-FMT_32_AS_8                              = 0x00000029,
-FMT_32_AS_8_8                            = 0x0000002a,
-FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
-FMT_8_8_8                                = 0x0000002c,
-FMT_16_16_16                             = 0x0000002d,
-FMT_16_16_16_FLOAT                       = 0x0000002e,
-FMT_4_4                                  = 0x0000002f,
-FMT_32_32_32_FLOAT                       = 0x00000030,
-FMT_BC1                                  = 0x00000031,
-FMT_BC2                                  = 0x00000032,
-FMT_BC3                                  = 0x00000033,
-FMT_BC4                                  = 0x00000034,
-FMT_BC5                                  = 0x00000035,
-FMT_BC6                                  = 0x00000036,
-FMT_BC7                                  = 0x00000037,
-FMT_32_AS_32_32_32_32                    = 0x00000038,
-FMT_APC3                                 = 0x00000039,
-FMT_APC4                                 = 0x0000003a,
-FMT_APC5                                 = 0x0000003b,
-FMT_APC6                                 = 0x0000003c,
-FMT_APC7                                 = 0x0000003d,
-FMT_CTX1                                 = 0x0000003e,
-FMT_RESERVED_63                          = 0x0000003f,
-} SurfaceFormat;
-
-/*
- * BUF_DATA_FORMAT enum
- */
-
-typedef enum BUF_DATA_FORMAT {
-BUF_DATA_FORMAT_INVALID                  = 0x00000000,
-BUF_DATA_FORMAT_8                        = 0x00000001,
-BUF_DATA_FORMAT_16                       = 0x00000002,
-BUF_DATA_FORMAT_8_8                      = 0x00000003,
-BUF_DATA_FORMAT_32                       = 0x00000004,
-BUF_DATA_FORMAT_16_16                    = 0x00000005,
-BUF_DATA_FORMAT_10_11_11                 = 0x00000006,
-BUF_DATA_FORMAT_11_11_10                 = 0x00000007,
-BUF_DATA_FORMAT_10_10_10_2               = 0x00000008,
-BUF_DATA_FORMAT_2_10_10_10               = 0x00000009,
-BUF_DATA_FORMAT_8_8_8_8                  = 0x0000000a,
-BUF_DATA_FORMAT_32_32                    = 0x0000000b,
-BUF_DATA_FORMAT_16_16_16_16              = 0x0000000c,
-BUF_DATA_FORMAT_32_32_32                 = 0x0000000d,
-BUF_DATA_FORMAT_32_32_32_32              = 0x0000000e,
-BUF_DATA_FORMAT_RESERVED_15              = 0x0000000f,
-} BUF_DATA_FORMAT;
-
-/*
- * IMG_DATA_FORMAT enum
- */
-
-typedef enum IMG_DATA_FORMAT {
-IMG_DATA_FORMAT_INVALID                  = 0x00000000,
-IMG_DATA_FORMAT_8                        = 0x00000001,
-IMG_DATA_FORMAT_16                       = 0x00000002,
-IMG_DATA_FORMAT_8_8                      = 0x00000003,
-IMG_DATA_FORMAT_32                       = 0x00000004,
-IMG_DATA_FORMAT_16_16                    = 0x00000005,
-IMG_DATA_FORMAT_10_11_11                 = 0x00000006,
-IMG_DATA_FORMAT_11_11_10                 = 0x00000007,
-IMG_DATA_FORMAT_10_10_10_2               = 0x00000008,
-IMG_DATA_FORMAT_2_10_10_10               = 0x00000009,
-IMG_DATA_FORMAT_8_8_8_8                  = 0x0000000a,
-IMG_DATA_FORMAT_32_32                    = 0x0000000b,
-IMG_DATA_FORMAT_16_16_16_16              = 0x0000000c,
-IMG_DATA_FORMAT_32_32_32                 = 0x0000000d,
-IMG_DATA_FORMAT_32_32_32_32              = 0x0000000e,
-IMG_DATA_FORMAT_RESERVED_15              = 0x0000000f,
-IMG_DATA_FORMAT_5_6_5                    = 0x00000010,
-IMG_DATA_FORMAT_1_5_5_5                  = 0x00000011,
-IMG_DATA_FORMAT_5_5_5_1                  = 0x00000012,
-IMG_DATA_FORMAT_4_4_4_4                  = 0x00000013,
-IMG_DATA_FORMAT_8_24                     = 0x00000014,
-IMG_DATA_FORMAT_24_8                     = 0x00000015,
-IMG_DATA_FORMAT_X24_8_32                 = 0x00000016,
-IMG_DATA_FORMAT_8_AS_8_8_8_8             = 0x00000017,
-IMG_DATA_FORMAT_ETC2_RGB                 = 0x00000018,
-IMG_DATA_FORMAT_ETC2_RGBA                = 0x00000019,
-IMG_DATA_FORMAT_ETC2_R                   = 0x0000001a,
-IMG_DATA_FORMAT_ETC2_RG                  = 0x0000001b,
-IMG_DATA_FORMAT_ETC2_RGBA1               = 0x0000001c,
-IMG_DATA_FORMAT_RESERVED_29              = 0x0000001d,
-IMG_DATA_FORMAT_RESERVED_30              = 0x0000001e,
-IMG_DATA_FORMAT_6E4                      = 0x0000001f,
-IMG_DATA_FORMAT_GB_GR                    = 0x00000020,
-IMG_DATA_FORMAT_BG_RG                    = 0x00000021,
-IMG_DATA_FORMAT_5_9_9_9                  = 0x00000022,
-IMG_DATA_FORMAT_BC1                      = 0x00000023,
-IMG_DATA_FORMAT_BC2                      = 0x00000024,
-IMG_DATA_FORMAT_BC3                      = 0x00000025,
-IMG_DATA_FORMAT_BC4                      = 0x00000026,
-IMG_DATA_FORMAT_BC5                      = 0x00000027,
-IMG_DATA_FORMAT_BC6                      = 0x00000028,
-IMG_DATA_FORMAT_BC7                      = 0x00000029,
-IMG_DATA_FORMAT_16_AS_32_32              = 0x0000002a,
-IMG_DATA_FORMAT_16_AS_16_16_16_16        = 0x0000002b,
-IMG_DATA_FORMAT_16_AS_32_32_32_32        = 0x0000002c,
-IMG_DATA_FORMAT_FMASK                    = 0x0000002d,
-IMG_DATA_FORMAT_ASTC_2D_LDR              = 0x0000002e,
-IMG_DATA_FORMAT_ASTC_2D_HDR              = 0x0000002f,
-IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB         = 0x00000030,
-IMG_DATA_FORMAT_ASTC_3D_LDR              = 0x00000031,
-IMG_DATA_FORMAT_ASTC_3D_HDR              = 0x00000032,
-IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB         = 0x00000033,
-IMG_DATA_FORMAT_N_IN_16                  = 0x00000034,
-IMG_DATA_FORMAT_N_IN_16_16               = 0x00000035,
-IMG_DATA_FORMAT_N_IN_16_16_16_16         = 0x00000036,
-IMG_DATA_FORMAT_N_IN_16_AS_16_16_16_16   = 0x00000037,
-IMG_DATA_FORMAT_RESERVED_56              = 0x00000038,
-IMG_DATA_FORMAT_4_4                      = 0x00000039,
-IMG_DATA_FORMAT_6_5_5                    = 0x0000003a,
-IMG_DATA_FORMAT_RESERVED_59              = 0x0000003b,
-IMG_DATA_FORMAT_RESERVED_60              = 0x0000003c,
-IMG_DATA_FORMAT_8_AS_32                  = 0x0000003d,
-IMG_DATA_FORMAT_8_AS_32_32               = 0x0000003e,
-IMG_DATA_FORMAT_32_AS_32_32_32_32        = 0x0000003f,
-} IMG_DATA_FORMAT;
-
-/*
- * BUF_NUM_FORMAT enum
- */
-
-typedef enum BUF_NUM_FORMAT {
-BUF_NUM_FORMAT_UNORM                     = 0x00000000,
-BUF_NUM_FORMAT_SNORM                     = 0x00000001,
-BUF_NUM_FORMAT_USCALED                   = 0x00000002,
-BUF_NUM_FORMAT_SSCALED                   = 0x00000003,
-BUF_NUM_FORMAT_UINT                      = 0x00000004,
-BUF_NUM_FORMAT_SINT                      = 0x00000005,
-BUF_NUM_FORMAT_UNORM_UINT                = 0x00000006,
-BUF_NUM_FORMAT_FLOAT                     = 0x00000007,
-} BUF_NUM_FORMAT;
-
-/*
- * IMG_NUM_FORMAT enum
- */
-
-typedef enum IMG_NUM_FORMAT {
-IMG_NUM_FORMAT_UNORM                     = 0x00000000,
-IMG_NUM_FORMAT_SNORM                     = 0x00000001,
-IMG_NUM_FORMAT_USCALED                   = 0x00000002,
-IMG_NUM_FORMAT_SSCALED                   = 0x00000003,
-IMG_NUM_FORMAT_UINT                      = 0x00000004,
-IMG_NUM_FORMAT_SINT                      = 0x00000005,
-IMG_NUM_FORMAT_UNORM_UINT                = 0x00000006,
-IMG_NUM_FORMAT_FLOAT                     = 0x00000007,
-IMG_NUM_FORMAT_RESERVED_8                = 0x00000008,
-IMG_NUM_FORMAT_SRGB                      = 0x00000009,
-IMG_NUM_FORMAT_RESERVED_10               = 0x0000000a,
-IMG_NUM_FORMAT_RESERVED_11               = 0x0000000b,
-IMG_NUM_FORMAT_RESERVED_12               = 0x0000000c,
-IMG_NUM_FORMAT_RESERVED_13               = 0x0000000d,
-IMG_NUM_FORMAT_RESERVED_14               = 0x0000000e,
-IMG_NUM_FORMAT_RESERVED_15               = 0x0000000f,
-} IMG_NUM_FORMAT;
-
-/*
- * IMG_NUM_FORMAT_FMASK enum
- */
-
-typedef enum IMG_NUM_FORMAT_FMASK {
-IMG_NUM_FORMAT_FMASK_8_2_1               = 0x00000000,
-IMG_NUM_FORMAT_FMASK_8_4_1               = 0x00000001,
-IMG_NUM_FORMAT_FMASK_8_8_1               = 0x00000002,
-IMG_NUM_FORMAT_FMASK_8_2_2               = 0x00000003,
-IMG_NUM_FORMAT_FMASK_8_4_2               = 0x00000004,
-IMG_NUM_FORMAT_FMASK_8_4_4               = 0x00000005,
-IMG_NUM_FORMAT_FMASK_16_16_1             = 0x00000006,
-IMG_NUM_FORMAT_FMASK_16_8_2              = 0x00000007,
-IMG_NUM_FORMAT_FMASK_32_16_2             = 0x00000008,
-IMG_NUM_FORMAT_FMASK_32_8_4              = 0x00000009,
-IMG_NUM_FORMAT_FMASK_32_8_8              = 0x0000000a,
-IMG_NUM_FORMAT_FMASK_64_16_4             = 0x0000000b,
-IMG_NUM_FORMAT_FMASK_64_16_8             = 0x0000000c,
-IMG_NUM_FORMAT_FMASK_RESERVED_13         = 0x0000000d,
-IMG_NUM_FORMAT_FMASK_RESERVED_14         = 0x0000000e,
-IMG_NUM_FORMAT_FMASK_RESERVED_15         = 0x0000000f,
-} IMG_NUM_FORMAT_FMASK;
-
-/*
- * IMG_NUM_FORMAT_N_IN_16 enum
- */
-
-typedef enum IMG_NUM_FORMAT_N_IN_16 {
-IMG_NUM_FORMAT_N_IN_16_RESERVED_0        = 0x00000000,
-IMG_NUM_FORMAT_N_IN_16_UNORM_10          = 0x00000001,
-IMG_NUM_FORMAT_N_IN_16_UNORM_9           = 0x00000002,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_3        = 0x00000003,
-IMG_NUM_FORMAT_N_IN_16_UINT_10           = 0x00000004,
-IMG_NUM_FORMAT_N_IN_16_UINT_9            = 0x00000005,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_6        = 0x00000006,
-IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_10     = 0x00000007,
-IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_9      = 0x00000008,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_9        = 0x00000009,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_10       = 0x0000000a,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_11       = 0x0000000b,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_12       = 0x0000000c,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_13       = 0x0000000d,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_14       = 0x0000000e,
-IMG_NUM_FORMAT_N_IN_16_RESERVED_15       = 0x0000000f,
-} IMG_NUM_FORMAT_N_IN_16;
-
-/*
- * IMG_NUM_FORMAT_ASTC_2D enum
- */
-
-typedef enum IMG_NUM_FORMAT_ASTC_2D {
-IMG_NUM_FORMAT_ASTC_2D_4x4               = 0x00000000,
-IMG_NUM_FORMAT_ASTC_2D_5x4               = 0x00000001,
-IMG_NUM_FORMAT_ASTC_2D_5x5               = 0x00000002,
-IMG_NUM_FORMAT_ASTC_2D_6x5               = 0x00000003,
-IMG_NUM_FORMAT_ASTC_2D_6x6               = 0x00000004,
-IMG_NUM_FORMAT_ASTC_2D_8x5               = 0x00000005,
-IMG_NUM_FORMAT_ASTC_2D_8x6               = 0x00000006,
-IMG_NUM_FORMAT_ASTC_2D_8x8               = 0x00000007,
-IMG_NUM_FORMAT_ASTC_2D_10x5              = 0x00000008,
-IMG_NUM_FORMAT_ASTC_2D_10x6              = 0x00000009,
-IMG_NUM_FORMAT_ASTC_2D_10x8              = 0x0000000a,
-IMG_NUM_FORMAT_ASTC_2D_10x10             = 0x0000000b,
-IMG_NUM_FORMAT_ASTC_2D_12x10             = 0x0000000c,
-IMG_NUM_FORMAT_ASTC_2D_12x12             = 0x0000000d,
-IMG_NUM_FORMAT_ASTC_2D_RESERVED_14       = 0x0000000e,
-IMG_NUM_FORMAT_ASTC_2D_RESERVED_15       = 0x0000000f,
-} IMG_NUM_FORMAT_ASTC_2D;
-
-/*
- * IMG_NUM_FORMAT_ASTC_3D enum
- */
-
-typedef enum IMG_NUM_FORMAT_ASTC_3D {
-IMG_NUM_FORMAT_ASTC_3D_3x3x3             = 0x00000000,
-IMG_NUM_FORMAT_ASTC_3D_4x3x3             = 0x00000001,
-IMG_NUM_FORMAT_ASTC_3D_4x4x3             = 0x00000002,
-IMG_NUM_FORMAT_ASTC_3D_4x4x4             = 0x00000003,
-IMG_NUM_FORMAT_ASTC_3D_5x4x4             = 0x00000004,
-IMG_NUM_FORMAT_ASTC_3D_5x5x4             = 0x00000005,
-IMG_NUM_FORMAT_ASTC_3D_5x5x5             = 0x00000006,
-IMG_NUM_FORMAT_ASTC_3D_6x5x5             = 0x00000007,
-IMG_NUM_FORMAT_ASTC_3D_6x6x5             = 0x00000008,
-IMG_NUM_FORMAT_ASTC_3D_6x6x6             = 0x00000009,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_10       = 0x0000000a,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_11       = 0x0000000b,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_12       = 0x0000000c,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_13       = 0x0000000d,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_14       = 0x0000000e,
-IMG_NUM_FORMAT_ASTC_3D_RESERVED_15       = 0x0000000f,
-} IMG_NUM_FORMAT_ASTC_3D;
-
-/*
- * TileType enum
- */
-
-typedef enum TileType {
-ARRAY_COLOR_TILE                         = 0x00000000,
-ARRAY_DEPTH_TILE                         = 0x00000001,
-} TileType;
-
-/*
- * NonDispTilingOrder enum
- */
-
-typedef enum NonDispTilingOrder {
-ADDR_SURF_MICRO_TILING_DISPLAY           = 0x00000000,
-ADDR_SURF_MICRO_TILING_NON_DISPLAY       = 0x00000001,
-} NonDispTilingOrder;
-
-/*
- * MicroTileMode enum
- */
-
-typedef enum MicroTileMode {
-ADDR_SURF_DISPLAY_MICRO_TILING           = 0x00000000,
-ADDR_SURF_THIN_MICRO_TILING              = 0x00000001,
-ADDR_SURF_DEPTH_MICRO_TILING             = 0x00000002,
-ADDR_SURF_ROTATED_MICRO_TILING           = 0x00000003,
-ADDR_SURF_THICK_MICRO_TILING             = 0x00000004,
-} MicroTileMode;
-
-/*
- * TileSplit enum
- */
-
-typedef enum TileSplit {
-ADDR_SURF_TILE_SPLIT_64B                 = 0x00000000,
-ADDR_SURF_TILE_SPLIT_128B                = 0x00000001,
-ADDR_SURF_TILE_SPLIT_256B                = 0x00000002,
-ADDR_SURF_TILE_SPLIT_512B                = 0x00000003,
-ADDR_SURF_TILE_SPLIT_1KB                 = 0x00000004,
-ADDR_SURF_TILE_SPLIT_2KB                 = 0x00000005,
-ADDR_SURF_TILE_SPLIT_4KB                 = 0x00000006,
-} TileSplit;
-
-/*
- * SampleSplit enum
- */
-
-typedef enum SampleSplit {
-ADDR_SURF_SAMPLE_SPLIT_1                 = 0x00000000,
-ADDR_SURF_SAMPLE_SPLIT_2                 = 0x00000001,
-ADDR_SURF_SAMPLE_SPLIT_4                 = 0x00000002,
-ADDR_SURF_SAMPLE_SPLIT_8                 = 0x00000003,
-} SampleSplit;
-
-/*
- * PipeConfig enum
- */
-
-typedef enum PipeConfig {
-ADDR_SURF_P2                             = 0x00000000,
-ADDR_SURF_P2_RESERVED0                   = 0x00000001,
-ADDR_SURF_P2_RESERVED1                   = 0x00000002,
-ADDR_SURF_P2_RESERVED2                   = 0x00000003,
-ADDR_SURF_P4_8x16                        = 0x00000004,
-ADDR_SURF_P4_16x16                       = 0x00000005,
-ADDR_SURF_P4_16x32                       = 0x00000006,
-ADDR_SURF_P4_32x32                       = 0x00000007,
-ADDR_SURF_P8_16x16_8x16                  = 0x00000008,
-ADDR_SURF_P8_16x32_8x16                  = 0x00000009,
-ADDR_SURF_P8_32x32_8x16                  = 0x0000000a,
-ADDR_SURF_P8_16x32_16x16                 = 0x0000000b,
-ADDR_SURF_P8_32x32_16x16                 = 0x0000000c,
-ADDR_SURF_P8_32x32_16x32                 = 0x0000000d,
-ADDR_SURF_P8_32x64_32x32                 = 0x0000000e,
-ADDR_SURF_P8_RESERVED0                   = 0x0000000f,
-ADDR_SURF_P16_32x32_8x16                 = 0x00000010,
-ADDR_SURF_P16_32x32_16x16                = 0x00000011,
-} PipeConfig;
-
-/*
- * SeEnable enum
- */
-
-typedef enum SeEnable {
-ADDR_CONFIG_DISABLE_SE                   = 0x00000000,
-ADDR_CONFIG_ENABLE_SE                    = 0x00000001,
-} SeEnable;
-
-/*
- * NumBanks enum
- */
-
-typedef enum NumBanks {
-ADDR_SURF_2_BANK                         = 0x00000000,
-ADDR_SURF_4_BANK                         = 0x00000001,
-ADDR_SURF_8_BANK                         = 0x00000002,
-ADDR_SURF_16_BANK                        = 0x00000003,
-} NumBanks;
-
-/*
- * BankWidth enum
- */
-
-typedef enum BankWidth {
-ADDR_SURF_BANK_WIDTH_1                   = 0x00000000,
-ADDR_SURF_BANK_WIDTH_2                   = 0x00000001,
-ADDR_SURF_BANK_WIDTH_4                   = 0x00000002,
-ADDR_SURF_BANK_WIDTH_8                   = 0x00000003,
-} BankWidth;
-
-/*
- * BankHeight enum
- */
-
-typedef enum BankHeight {
-ADDR_SURF_BANK_HEIGHT_1                  = 0x00000000,
-ADDR_SURF_BANK_HEIGHT_2                  = 0x00000001,
-ADDR_SURF_BANK_HEIGHT_4                  = 0x00000002,
-ADDR_SURF_BANK_HEIGHT_8                  = 0x00000003,
-} BankHeight;
-
-/*
- * BankWidthHeight enum
- */
-
-typedef enum BankWidthHeight {
-ADDR_SURF_BANK_WH_1                      = 0x00000000,
-ADDR_SURF_BANK_WH_2                      = 0x00000001,
-ADDR_SURF_BANK_WH_4                      = 0x00000002,
-ADDR_SURF_BANK_WH_8                      = 0x00000003,
-} BankWidthHeight;
-
-/*
- * MacroTileAspect enum
- */
-
-typedef enum MacroTileAspect {
-ADDR_SURF_MACRO_ASPECT_1                 = 0x00000000,
-ADDR_SURF_MACRO_ASPECT_2                 = 0x00000001,
-ADDR_SURF_MACRO_ASPECT_4                 = 0x00000002,
-ADDR_SURF_MACRO_ASPECT_8                 = 0x00000003,
-} MacroTileAspect;
-
-/*
- * GATCL1RequestType enum
- */
-
-typedef enum GATCL1RequestType {
-GATCL1_TYPE_NORMAL                       = 0x00000000,
-GATCL1_TYPE_SHOOTDOWN                    = 0x00000001,
-GATCL1_TYPE_BYPASS                       = 0x00000002,
-} GATCL1RequestType;
-
-/*
- * UTCL1RequestType enum
- */
-
-typedef enum UTCL1RequestType {
-UTCL1_TYPE_NORMAL                        = 0x00000000,
-UTCL1_TYPE_SHOOTDOWN                     = 0x00000001,
-UTCL1_TYPE_BYPASS                        = 0x00000002,
-} UTCL1RequestType;
-
-/*
- * UTCL1FaultType enum
- */
-
-typedef enum UTCL1FaultType {
-UTCL1_XNACK_SUCCESS                      = 0x00000000,
-UTCL1_XNACK_RETRY                        = 0x00000001,
-UTCL1_XNACK_PRT                          = 0x00000002,
-UTCL1_XNACK_NO_RETRY                     = 0x00000003,
-} UTCL1FaultType;
-
-/*
- * TCC_CACHE_POLICIES enum
- */
-
-typedef enum TCC_CACHE_POLICIES {
-TCC_CACHE_POLICY_LRU                     = 0x00000000,
-TCC_CACHE_POLICY_STREAM                  = 0x00000001,
-} TCC_CACHE_POLICIES;
-
-/*
- * MTYPE enum
- */
-
-typedef enum MTYPE {
-MTYPE_NC                                 = 0x00000000,
-MTYPE_WC                                 = 0x00000001,
-MTYPE_CC                                 = 0x00000002,
-MTYPE_UC                                 = 0x00000003,
-} MTYPE;
-
-/*
- * RMI_CID enum
- */
-
-typedef enum RMI_CID {
-RMI_CID_CC                               = 0x00000000,
-RMI_CID_FC                               = 0x00000001,
-RMI_CID_CM                               = 0x00000002,
-RMI_CID_DC                               = 0x00000003,
-RMI_CID_Z                                = 0x00000004,
-RMI_CID_S                                = 0x00000005,
-RMI_CID_TILE                             = 0x00000006,
-RMI_CID_ZPCPSD                           = 0x00000007,
-} RMI_CID;
-
-/*
- * PERFMON_COUNTER_MODE enum
- */
-
-typedef enum PERFMON_COUNTER_MODE {
-PERFMON_COUNTER_MODE_ACCUM               = 0x00000000,
-PERFMON_COUNTER_MODE_ACTIVE_CYCLES       = 0x00000001,
-PERFMON_COUNTER_MODE_MAX                 = 0x00000002,
-PERFMON_COUNTER_MODE_DIRTY               = 0x00000003,
-PERFMON_COUNTER_MODE_SAMPLE              = 0x00000004,
-PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT  = 0x00000005,
-PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT  = 0x00000006,
-PERFMON_COUNTER_MODE_CYCLES_GE_HI        = 0x00000007,
-PERFMON_COUNTER_MODE_CYCLES_EQ_HI        = 0x00000008,
-PERFMON_COUNTER_MODE_INACTIVE_CYCLES     = 0x00000009,
-PERFMON_COUNTER_MODE_RESERVED            = 0x0000000f,
-} PERFMON_COUNTER_MODE;
-
-/*
- * PERFMON_SPM_MODE enum
- */
-
-typedef enum PERFMON_SPM_MODE {
-PERFMON_SPM_MODE_OFF                     = 0x00000000,
-PERFMON_SPM_MODE_16BIT_CLAMP             = 0x00000001,
-PERFMON_SPM_MODE_16BIT_NO_CLAMP          = 0x00000002,
-PERFMON_SPM_MODE_32BIT_CLAMP             = 0x00000003,
-PERFMON_SPM_MODE_32BIT_NO_CLAMP          = 0x00000004,
-PERFMON_SPM_MODE_RESERVED_5              = 0x00000005,
-PERFMON_SPM_MODE_RESERVED_6              = 0x00000006,
-PERFMON_SPM_MODE_RESERVED_7              = 0x00000007,
-PERFMON_SPM_MODE_TEST_MODE_0             = 0x00000008,
-PERFMON_SPM_MODE_TEST_MODE_1             = 0x00000009,
-PERFMON_SPM_MODE_TEST_MODE_2             = 0x0000000a,
-} PERFMON_SPM_MODE;
-
-/*
- * SurfaceTiling enum
- */
-
-typedef enum SurfaceTiling {
-ARRAY_LINEAR                             = 0x00000000,
-ARRAY_TILED                              = 0x00000001,
-} SurfaceTiling;
-
-/*
- * SurfaceArray enum
- */
-
-typedef enum SurfaceArray {
-ARRAY_1D                                 = 0x00000000,
-ARRAY_2D                                 = 0x00000001,
-ARRAY_3D                                 = 0x00000002,
-ARRAY_3D_SLICE                           = 0x00000003,
-} SurfaceArray;
-
-/*
- * ColorArray enum
- */
-
-typedef enum ColorArray {
-ARRAY_2D_ALT_COLOR                       = 0x00000000,
-ARRAY_2D_COLOR                           = 0x00000001,
-ARRAY_3D_SLICE_COLOR                     = 0x00000003,
-} ColorArray;
-
-/*
- * DepthArray enum
- */
-
-typedef enum DepthArray {
-ARRAY_2D_ALT_DEPTH                       = 0x00000000,
-ARRAY_2D_DEPTH                           = 0x00000001,
-} DepthArray;
-
-/*
- * ENUM_NUM_SIMD_PER_CU enum
- */
-
-typedef enum ENUM_NUM_SIMD_PER_CU {
-NUM_SIMD_PER_CU                          = 0x00000004,
-} ENUM_NUM_SIMD_PER_CU;
-
-/*
- * DSM_ENABLE_ERROR_INJECT enum
- */
-
-typedef enum DSM_ENABLE_ERROR_INJECT {
-DSM_ENABLE_ERROR_INJECT_FED_IN           = 0x00000000,
-DSM_ENABLE_ERROR_INJECT_SINGLE           = 0x00000001,
-DSM_ENABLE_ERROR_INJECT_UNCORRECTABLE    = 0x00000002,
-DSM_ENABLE_ERROR_INJECT_UNCORRECTABLE_LIMITED  = 0x00000003,
-} DSM_ENABLE_ERROR_INJECT;
-
-/*
- * DSM_SELECT_INJECT_DELAY enum
- */
-
-typedef enum DSM_SELECT_INJECT_DELAY {
-DSM_SELECT_INJECT_DELAY_NO_DELAY         = 0x00000000,
-DSM_SELECT_INJECT_DELAY_DELAY_ERROR      = 0x00000001,
-} DSM_SELECT_INJECT_DELAY;
-
-/*
- * DSM_DATA_SEL enum
- */
-
-typedef enum DSM_DATA_SEL {
-DSM_DATA_SEL_DISABLE                     = 0x00000000,
-DSM_DATA_SEL_0                           = 0x00000001,
-DSM_DATA_SEL_1                           = 0x00000002,
-DSM_DATA_SEL_BOTH                        = 0x00000003,
-} DSM_DATA_SEL;
-
-/*
- * DSM_SINGLE_WRITE enum
- */
-
-typedef enum DSM_SINGLE_WRITE {
-DSM_SINGLE_WRITE_DIS                     = 0x00000000,
-DSM_SINGLE_WRITE_EN                      = 0x00000001,
-} DSM_SINGLE_WRITE;
-
-/*
- * SWIZZLE_TYPE_ENUM enum
- */
-
-typedef enum SWIZZLE_TYPE_ENUM {
-SW_Z                                     = 0x00000000,
-SW_S                                     = 0x00000001,
-SW_D                                     = 0x00000002,
-SW_R                                     = 0x00000003,
-SW_L                                     = 0x00000004,
-} SWIZZLE_TYPE_ENUM;
-
-/*
- * TC_MICRO_TILE_MODE enum
- */
-
-typedef enum TC_MICRO_TILE_MODE {
-MICRO_TILE_MODE_LINEAR                   = 0x00000000,
-MICRO_TILE_MODE_ROTATED                  = 0x00000001,
-MICRO_TILE_MODE_STD_2D                   = 0x00000002,
-MICRO_TILE_MODE_STD_3D                   = 0x00000003,
-MICRO_TILE_MODE_DISPLAY_2D               = 0x00000004,
-MICRO_TILE_MODE_DISPLAY_3D               = 0x00000005,
-MICRO_TILE_MODE_Z_2D                     = 0x00000006,
-MICRO_TILE_MODE_Z_3D                     = 0x00000007,
-} TC_MICRO_TILE_MODE;
-
-/*
- * SWIZZLE_MODE_ENUM enum
- */
-
-typedef enum SWIZZLE_MODE_ENUM {
-SW_LINEAR                                = 0x00000000,
-SW_256B_S                                = 0x00000001,
-SW_256B_D                                = 0x00000002,
-SW_256B_R                                = 0x00000003,
-SW_4KB_Z                                 = 0x00000004,
-SW_4KB_S                                 = 0x00000005,
-SW_4KB_D                                 = 0x00000006,
-SW_4KB_R                                 = 0x00000007,
-SW_64KB_Z                                = 0x00000008,
-SW_64KB_S                                = 0x00000009,
-SW_64KB_D                                = 0x0000000a,
-SW_64KB_R                                = 0x0000000b,
-SW_VAR_Z                                 = 0x0000000c,
-SW_VAR_S                                 = 0x0000000d,
-SW_VAR_D                                 = 0x0000000e,
-SW_VAR_R                                 = 0x0000000f,
-SW_RESERVED_16                           = 0x00000010,
-SW_RESERVED_17                           = 0x00000011,
-SW_RESERVED_18                           = 0x00000012,
-SW_RESERVED_19                           = 0x00000013,
-SW_4KB_Z_X                               = 0x00000014,
-SW_4KB_S_X                               = 0x00000015,
-SW_4KB_D_X                               = 0x00000016,
-SW_4KB_R_X                               = 0x00000017,
-SW_64KB_Z_X                              = 0x00000018,
-SW_64KB_S_X                              = 0x00000019,
-SW_64KB_D_X                              = 0x0000001a,
-SW_64KB_R_X                              = 0x0000001b,
-SW_VAR_Z_X                               = 0x0000001c,
-SW_VAR_S_X                               = 0x0000001d,
-SW_VAR_D_X                               = 0x0000001e,
-SW_VAR_R_X                               = 0x0000001f,
-} SWIZZLE_MODE_ENUM;
-
-/*******************************************************
- * IH Enums
- *******************************************************/
-
-/*
- * IH_PERF_SEL enum
- */
-
-typedef enum IH_PERF_SEL {
-IH_PERF_SEL_CYCLE                        = 0x00000000,
-IH_PERF_SEL_IDLE                         = 0x00000001,
-IH_PERF_SEL_INPUT_IDLE                   = 0x00000002,
-IH_PERF_SEL_BUFFER_IDLE                  = 0x00000003,
-IH_PERF_SEL_RB0_FULL                     = 0x00000004,
-IH_PERF_SEL_RB0_OVERFLOW                 = 0x00000005,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK           = 0x00000006,
-IH_PERF_SEL_RB0_WPTR_WRAP                = 0x00000007,
-IH_PERF_SEL_RB0_RPTR_WRAP                = 0x00000008,
-IH_PERF_SEL_MC_WR_IDLE                   = 0x00000009,
-IH_PERF_SEL_MC_WR_COUNT                  = 0x0000000a,
-IH_PERF_SEL_MC_WR_STALL                  = 0x0000000b,
-IH_PERF_SEL_MC_WR_CLEAN_PENDING          = 0x0000000c,
-IH_PERF_SEL_MC_WR_CLEAN_STALL            = 0x0000000d,
-IH_PERF_SEL_BIF_LINE0_RISING             = 0x0000000e,
-IH_PERF_SEL_BIF_LINE0_FALLING            = 0x0000000f,
-IH_PERF_SEL_RB1_FULL                     = 0x00000010,
-IH_PERF_SEL_RB1_OVERFLOW                 = 0x00000011,
-Reserved18                               = 0x00000012,
-IH_PERF_SEL_RB1_WPTR_WRAP                = 0x00000013,
-IH_PERF_SEL_RB1_RPTR_WRAP                = 0x00000014,
-IH_PERF_SEL_RB2_FULL                     = 0x00000015,
-IH_PERF_SEL_RB2_OVERFLOW                 = 0x00000016,
-Reserved23                               = 0x00000017,
-IH_PERF_SEL_RB2_WPTR_WRAP                = 0x00000018,
-IH_PERF_SEL_RB2_RPTR_WRAP                = 0x00000019,
-Reserved26                               = 0x0000001a,
-Reserved27                               = 0x0000001b,
-Reserved28                               = 0x0000001c,
-Reserved29                               = 0x0000001d,
-IH_PERF_SEL_RB0_FULL_VF0                 = 0x0000001e,
-IH_PERF_SEL_RB0_FULL_VF1                 = 0x0000001f,
-IH_PERF_SEL_RB0_FULL_VF2                 = 0x00000020,
-IH_PERF_SEL_RB0_FULL_VF3                 = 0x00000021,
-IH_PERF_SEL_RB0_FULL_VF4                 = 0x00000022,
-IH_PERF_SEL_RB0_FULL_VF5                 = 0x00000023,
-IH_PERF_SEL_RB0_FULL_VF6                 = 0x00000024,
-IH_PERF_SEL_RB0_FULL_VF7                 = 0x00000025,
-IH_PERF_SEL_RB0_FULL_VF8                 = 0x00000026,
-IH_PERF_SEL_RB0_FULL_VF9                 = 0x00000027,
-IH_PERF_SEL_RB0_FULL_VF10                = 0x00000028,
-IH_PERF_SEL_RB0_FULL_VF11                = 0x00000029,
-IH_PERF_SEL_RB0_FULL_VF12                = 0x0000002a,
-IH_PERF_SEL_RB0_FULL_VF13                = 0x0000002b,
-IH_PERF_SEL_RB0_FULL_VF14                = 0x0000002c,
-IH_PERF_SEL_RB0_FULL_VF15                = 0x0000002d,
-IH_PERF_SEL_RB0_OVERFLOW_VF0             = 0x0000002e,
-IH_PERF_SEL_RB0_OVERFLOW_VF1             = 0x0000002f,
-IH_PERF_SEL_RB0_OVERFLOW_VF2             = 0x00000030,
-IH_PERF_SEL_RB0_OVERFLOW_VF3             = 0x00000031,
-IH_PERF_SEL_RB0_OVERFLOW_VF4             = 0x00000032,
-IH_PERF_SEL_RB0_OVERFLOW_VF5             = 0x00000033,
-IH_PERF_SEL_RB0_OVERFLOW_VF6             = 0x00000034,
-IH_PERF_SEL_RB0_OVERFLOW_VF7             = 0x00000035,
-IH_PERF_SEL_RB0_OVERFLOW_VF8             = 0x00000036,
-IH_PERF_SEL_RB0_OVERFLOW_VF9             = 0x00000037,
-IH_PERF_SEL_RB0_OVERFLOW_VF10            = 0x00000038,
-IH_PERF_SEL_RB0_OVERFLOW_VF11            = 0x00000039,
-IH_PERF_SEL_RB0_OVERFLOW_VF12            = 0x0000003a,
-IH_PERF_SEL_RB0_OVERFLOW_VF13            = 0x0000003b,
-IH_PERF_SEL_RB0_OVERFLOW_VF14            = 0x0000003c,
-IH_PERF_SEL_RB0_OVERFLOW_VF15            = 0x0000003d,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF0       = 0x0000003e,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF1       = 0x0000003f,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF2       = 0x00000040,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF3       = 0x00000041,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF4       = 0x00000042,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF5       = 0x00000043,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF6       = 0x00000044,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF7       = 0x00000045,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF8       = 0x00000046,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF9       = 0x00000047,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF10      = 0x00000048,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF11      = 0x00000049,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF12      = 0x0000004a,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF13      = 0x0000004b,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF14      = 0x0000004c,
-IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF15      = 0x0000004d,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF0            = 0x0000004e,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF1            = 0x0000004f,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF2            = 0x00000050,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF3            = 0x00000051,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF4            = 0x00000052,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF5            = 0x00000053,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF6            = 0x00000054,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF7            = 0x00000055,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF8            = 0x00000056,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF9            = 0x00000057,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF10           = 0x00000058,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF11           = 0x00000059,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF12           = 0x0000005a,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF13           = 0x0000005b,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF14           = 0x0000005c,
-IH_PERF_SEL_RB0_WPTR_WRAP_VF15           = 0x0000005d,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF0            = 0x0000005e,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF1            = 0x0000005f,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF2            = 0x00000060,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF3            = 0x00000061,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF4            = 0x00000062,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF5            = 0x00000063,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF6            = 0x00000064,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF7            = 0x00000065,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF8            = 0x00000066,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF9            = 0x00000067,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF10           = 0x00000068,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF11           = 0x00000069,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF12           = 0x0000006a,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF13           = 0x0000006b,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF14           = 0x0000006c,
-IH_PERF_SEL_RB0_RPTR_WRAP_VF15           = 0x0000006d,
-IH_PERF_SEL_BIF_LINE0_RISING_VF0         = 0x0000006e,
-IH_PERF_SEL_BIF_LINE0_RISING_VF1         = 0x0000006f,
-IH_PERF_SEL_BIF_LINE0_RISING_VF2         = 0x00000070,
-IH_PERF_SEL_BIF_LINE0_RISING_VF3         = 0x00000071,
-IH_PERF_SEL_BIF_LINE0_RISING_VF4         = 0x00000072,
-IH_PERF_SEL_BIF_LINE0_RISING_VF5         = 0x00000073,
-IH_PERF_SEL_BIF_LINE0_RISING_VF6         = 0x00000074,
-IH_PERF_SEL_BIF_LINE0_RISING_VF7         = 0x00000075,
-IH_PERF_SEL_BIF_LINE0_RISING_VF8         = 0x00000076,
-IH_PERF_SEL_BIF_LINE0_RISING_VF9         = 0x00000077,
-IH_PERF_SEL_BIF_LINE0_RISING_VF10        = 0x00000078,
-IH_PERF_SEL_BIF_LINE0_RISING_VF11        = 0x00000079,
-IH_PERF_SEL_BIF_LINE0_RISING_VF12        = 0x0000007a,
-IH_PERF_SEL_BIF_LINE0_RISING_VF13        = 0x0000007b,
-IH_PERF_SEL_BIF_LINE0_RISING_VF14        = 0x0000007c,
-IH_PERF_SEL_BIF_LINE0_RISING_VF15        = 0x0000007d,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF0        = 0x0000007e,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF1        = 0x0000007f,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF2        = 0x00000080,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF3        = 0x00000081,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF4        = 0x00000082,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF5        = 0x00000083,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF6        = 0x00000084,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF7        = 0x00000085,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF8        = 0x00000086,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF9        = 0x00000087,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF10       = 0x00000088,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF11       = 0x00000089,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF12       = 0x0000008a,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF13       = 0x0000008b,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF14       = 0x0000008c,
-IH_PERF_SEL_BIF_LINE0_FALLING_VF15       = 0x0000008d,
-Reserved142                              = 0x0000008e,
-Reserved143                              = 0x0000008f,
-Reserved144                              = 0x00000090,
-Reserved145                              = 0x00000091,
-Reserved146                              = 0x00000092,
-Reserved147                              = 0x00000093,
-Reserved148                              = 0x00000094,
-Reserved149                              = 0x00000095,
-IH_PERF_SEL_CLIENT0_INT                  = 0x00000096,
-IH_PERF_SEL_CLIENT1_INT                  = 0x00000097,
-IH_PERF_SEL_CLIENT2_INT                  = 0x00000098,
-IH_PERF_SEL_CLIENT3_INT                  = 0x00000099,
-IH_PERF_SEL_CLIENT4_INT                  = 0x0000009a,
-IH_PERF_SEL_CLIENT5_INT                  = 0x0000009b,
-IH_PERF_SEL_CLIENT6_INT                  = 0x0000009c,
-IH_PERF_SEL_CLIENT7_INT                  = 0x0000009d,
-IH_PERF_SEL_CLIENT8_INT                  = 0x0000009e,
-IH_PERF_SEL_CLIENT9_INT                  = 0x0000009f,
-IH_PERF_SEL_CLIENT10_INT                 = 0x000000a0,
-IH_PERF_SEL_CLIENT11_INT                 = 0x000000a1,
-IH_PERF_SEL_CLIENT12_INT                 = 0x000000a2,
-IH_PERF_SEL_CLIENT13_INT                 = 0x000000a3,
-IH_PERF_SEL_CLIENT14_INT                 = 0x000000a4,
-IH_PERF_SEL_CLIENT15_INT                 = 0x000000a5,
-IH_PERF_SEL_CLIENT16_INT                 = 0x000000a6,
-IH_PERF_SEL_CLIENT17_INT                 = 0x000000a7,
-IH_PERF_SEL_CLIENT18_INT                 = 0x000000a8,
-IH_PERF_SEL_CLIENT19_INT                 = 0x000000a9,
-IH_PERF_SEL_CLIENT20_INT                 = 0x000000aa,
-IH_PERF_SEL_CLIENT21_INT                 = 0x000000ab,
-IH_PERF_SEL_CLIENT22_INT                 = 0x000000ac,
-IH_PERF_SEL_CLIENT23_INT                 = 0x000000ad,
-IH_PERF_SEL_CLIENT24_INT                 = 0x000000ae,
-IH_PERF_SEL_CLIENT25_INT                 = 0x000000af,
-IH_PERF_SEL_CLIENT26_INT                 = 0x000000b0,
-IH_PERF_SEL_CLIENT27_INT                 = 0x000000b1,
-IH_PERF_SEL_CLIENT28_INT                 = 0x000000b2,
-IH_PERF_SEL_CLIENT29_INT                 = 0x000000b3,
-IH_PERF_SEL_CLIENT30_INT                 = 0x000000b4,
-IH_PERF_SEL_CLIENT31_INT                 = 0x000000b5,
-Reserved182                              = 0x000000b6,
-Reserved183                              = 0x000000b7,
-Reserved184                              = 0x000000b8,
-Reserved185                              = 0x000000b9,
-Reserved186                              = 0x000000ba,
-Reserved187                              = 0x000000bb,
-Reserved188                              = 0x000000bc,
-Reserved189                              = 0x000000bd,
-Reserved190                              = 0x000000be,
-Reserved191                              = 0x000000bf,
-Reserved192                              = 0x000000c0,
-Reserved193                              = 0x000000c1,
-Reserved194                              = 0x000000c2,
-Reserved195                              = 0x000000c3,
-Reserved196                              = 0x000000c4,
-Reserved197                              = 0x000000c5,
-Reserved198                              = 0x000000c6,
-Reserved199                              = 0x000000c7,
-Reserved200                              = 0x000000c8,
-Reserved201                              = 0x000000c9,
-Reserved202                              = 0x000000ca,
-Reserved203                              = 0x000000cb,
-Reserved204                              = 0x000000cc,
-Reserved205                              = 0x000000cd,
-Reserved206                              = 0x000000ce,
-Reserved207                              = 0x000000cf,
-Reserved208                              = 0x000000d0,
-Reserved209                              = 0x000000d1,
-Reserved210                              = 0x000000d2,
-Reserved211                              = 0x000000d3,
-Reserved212                              = 0x000000d4,
-Reserved213                              = 0x000000d5,
-Reserved214                              = 0x000000d6,
-Reserved215                              = 0x000000d7,
-Reserved216                              = 0x000000d8,
-Reserved217                              = 0x000000d9,
-Reserved218                              = 0x000000da,
-Reserved219                              = 0x000000db,
-IH_PERF_SEL_RB1_FULL_VF0                 = 0x000000dc,
-IH_PERF_SEL_RB1_FULL_VF1                 = 0x000000dd,
-IH_PERF_SEL_RB1_FULL_VF2                 = 0x000000de,
-IH_PERF_SEL_RB1_FULL_VF3                 = 0x000000df,
-IH_PERF_SEL_RB1_FULL_VF4                 = 0x000000e0,
-IH_PERF_SEL_RB1_FULL_VF5                 = 0x000000e1,
-IH_PERF_SEL_RB1_FULL_VF6                 = 0x000000e2,
-IH_PERF_SEL_RB1_FULL_VF7                 = 0x000000e3,
-IH_PERF_SEL_RB1_FULL_VF8                 = 0x000000e4,
-IH_PERF_SEL_RB1_FULL_VF9                 = 0x000000e5,
-IH_PERF_SEL_RB1_FULL_VF10                = 0x000000e6,
-IH_PERF_SEL_RB1_FULL_VF11                = 0x000000e7,
-IH_PERF_SEL_RB1_FULL_VF12                = 0x000000e8,
-IH_PERF_SEL_RB1_FULL_VF13                = 0x000000e9,
-IH_PERF_SEL_RB1_FULL_VF14                = 0x000000ea,
-IH_PERF_SEL_RB1_FULL_VF15                = 0x000000eb,
-IH_PERF_SEL_RB1_OVERFLOW_VF0             = 0x000000ec,
-IH_PERF_SEL_RB1_OVERFLOW_VF1             = 0x000000ed,
-IH_PERF_SEL_RB1_OVERFLOW_VF2             = 0x000000ee,
-IH_PERF_SEL_RB1_OVERFLOW_VF3             = 0x000000ef,
-IH_PERF_SEL_RB1_OVERFLOW_VF4             = 0x000000f0,
-IH_PERF_SEL_RB1_OVERFLOW_VF5             = 0x000000f1,
-IH_PERF_SEL_RB1_OVERFLOW_VF6             = 0x000000f2,
-IH_PERF_SEL_RB1_OVERFLOW_VF7             = 0x000000f3,
-IH_PERF_SEL_RB1_OVERFLOW_VF8             = 0x000000f4,
-IH_PERF_SEL_RB1_OVERFLOW_VF9             = 0x000000f5,
-IH_PERF_SEL_RB1_OVERFLOW_VF10            = 0x000000f6,
-IH_PERF_SEL_RB1_OVERFLOW_VF11            = 0x000000f7,
-IH_PERF_SEL_RB1_OVERFLOW_VF12            = 0x000000f8,
-IH_PERF_SEL_RB1_OVERFLOW_VF13            = 0x000000f9,
-IH_PERF_SEL_RB1_OVERFLOW_VF14            = 0x000000fa,
-IH_PERF_SEL_RB1_OVERFLOW_VF15            = 0x000000fb,
-Reserved252                              = 0x000000fc,
-Reserved253                              = 0x000000fd,
-Reserved254                              = 0x000000fe,
-Reserved255                              = 0x000000ff,
-Reserved256                              = 0x00000100,
-Reserved257                              = 0x00000101,
-Reserved258                              = 0x00000102,
-Reserved259                              = 0x00000103,
-Reserved260                              = 0x00000104,
-Reserved261                              = 0x00000105,
-Reserved262                              = 0x00000106,
-Reserved263                              = 0x00000107,
-Reserved264                              = 0x00000108,
-Reserved265                              = 0x00000109,
-Reserved266                              = 0x0000010a,
-Reserved267                              = 0x0000010b,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF0            = 0x0000010c,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF1            = 0x0000010d,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF2            = 0x0000010e,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF3            = 0x0000010f,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF4            = 0x00000110,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF5            = 0x00000111,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF6            = 0x00000112,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF7            = 0x00000113,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF8            = 0x00000114,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF9            = 0x00000115,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF10           = 0x00000116,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF11           = 0x00000117,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF12           = 0x00000118,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF13           = 0x00000119,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF14           = 0x0000011a,
-IH_PERF_SEL_RB1_WPTR_WRAP_VF15           = 0x0000011b,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF0            = 0x0000011c,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF1            = 0x0000011d,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF2            = 0x0000011e,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF3            = 0x0000011f,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF4            = 0x00000120,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF5            = 0x00000121,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF6            = 0x00000122,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF7            = 0x00000123,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF8            = 0x00000124,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF9            = 0x00000125,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF10           = 0x00000126,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF11           = 0x00000127,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF12           = 0x00000128,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF13           = 0x00000129,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF14           = 0x0000012a,
-IH_PERF_SEL_RB1_RPTR_WRAP_VF15           = 0x0000012b,
-Reserved300                              = 0x0000012c,
-Reserved301                              = 0x0000012d,
-Reserved302                              = 0x0000012e,
-Reserved303                              = 0x0000012f,
-Reserved304                              = 0x00000130,
-Reserved305                              = 0x00000131,
-Reserved306                              = 0x00000132,
-Reserved307                              = 0x00000133,
-Reserved308                              = 0x00000134,
-Reserved309                              = 0x00000135,
-Reserved310                              = 0x00000136,
-Reserved311                              = 0x00000137,
-Reserved312                              = 0x00000138,
-Reserved313                              = 0x00000139,
-Reserved314                              = 0x0000013a,
-Reserved315                              = 0x0000013b,
-Reserved316                              = 0x0000013c,
-Reserved317                              = 0x0000013d,
-Reserved318                              = 0x0000013e,
-Reserved319                              = 0x0000013f,
-Reserved320                              = 0x00000140,
-Reserved321                              = 0x00000141,
-Reserved322                              = 0x00000142,
-Reserved323                              = 0x00000143,
-Reserved324                              = 0x00000144,
-Reserved325                              = 0x00000145,
-Reserved326                              = 0x00000146,
-Reserved327                              = 0x00000147,
-Reserved328                              = 0x00000148,
-Reserved329                              = 0x00000149,
-Reserved330                              = 0x0000014a,
-Reserved331                              = 0x0000014b,
-IH_PERF_SEL_RB2_FULL_VF0                 = 0x0000014c,
-IH_PERF_SEL_RB2_FULL_VF1                 = 0x0000014d,
-IH_PERF_SEL_RB2_FULL_VF2                 = 0x0000014e,
-IH_PERF_SEL_RB2_FULL_VF3                 = 0x0000014f,
-IH_PERF_SEL_RB2_FULL_VF4                 = 0x00000150,
-IH_PERF_SEL_RB2_FULL_VF5                 = 0x00000151,
-IH_PERF_SEL_RB2_FULL_VF6                 = 0x00000152,
-IH_PERF_SEL_RB2_FULL_VF7                 = 0x00000153,
-IH_PERF_SEL_RB2_FULL_VF8                 = 0x00000154,
-IH_PERF_SEL_RB2_FULL_VF9                 = 0x00000155,
-IH_PERF_SEL_RB2_FULL_VF10                = 0x00000156,
-IH_PERF_SEL_RB2_FULL_VF11                = 0x00000157,
-IH_PERF_SEL_RB2_FULL_VF12                = 0x00000158,
-IH_PERF_SEL_RB2_FULL_VF13                = 0x00000159,
-IH_PERF_SEL_RB2_FULL_VF14                = 0x0000015a,
-IH_PERF_SEL_RB2_FULL_VF15                = 0x0000015b,
-IH_PERF_SEL_RB2_OVERFLOW_VF0             = 0x0000015c,
-IH_PERF_SEL_RB2_OVERFLOW_VF1             = 0x0000015d,
-IH_PERF_SEL_RB2_OVERFLOW_VF2             = 0x0000015e,
-IH_PERF_SEL_RB2_OVERFLOW_VF3             = 0x0000015f,
-IH_PERF_SEL_RB2_OVERFLOW_VF4             = 0x00000160,
-IH_PERF_SEL_RB2_OVERFLOW_VF5             = 0x00000161,
-IH_PERF_SEL_RB2_OVERFLOW_VF6             = 0x00000162,
-IH_PERF_SEL_RB2_OVERFLOW_VF7             = 0x00000163,
-IH_PERF_SEL_RB2_OVERFLOW_VF8             = 0x00000164,
-IH_PERF_SEL_RB2_OVERFLOW_VF9             = 0x00000165,
-IH_PERF_SEL_RB2_OVERFLOW_VF10            = 0x00000166,
-IH_PERF_SEL_RB2_OVERFLOW_VF11            = 0x00000167,
-IH_PERF_SEL_RB2_OVERFLOW_VF12            = 0x00000168,
-IH_PERF_SEL_RB2_OVERFLOW_VF13            = 0x00000169,
-IH_PERF_SEL_RB2_OVERFLOW_VF14            = 0x0000016a,
-IH_PERF_SEL_RB2_OVERFLOW_VF15            = 0x0000016b,
-Reserved364                              = 0x0000016c,
-Reserved365                              = 0x0000016d,
-Reserved366                              = 0x0000016e,
-Reserved367                              = 0x0000016f,
-Reserved368                              = 0x00000170,
-Reserved369                              = 0x00000171,
-Reserved370                              = 0x00000172,
-Reserved371                              = 0x00000173,
-Reserved372                              = 0x00000174,
-Reserved373                              = 0x00000175,
-Reserved374                              = 0x00000176,
-Reserved375                              = 0x00000177,
-Reserved376                              = 0x00000178,
-Reserved377                              = 0x00000179,
-Reserved378                              = 0x0000017a,
-Reserved379                              = 0x0000017b,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF0            = 0x0000017c,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF1            = 0x0000017d,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF2            = 0x0000017e,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF3            = 0x0000017f,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF4            = 0x00000180,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF5            = 0x00000181,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF6            = 0x00000182,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF7            = 0x00000183,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF8            = 0x00000184,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF9            = 0x00000185,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF10           = 0x00000186,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF11           = 0x00000187,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF12           = 0x00000188,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF13           = 0x00000189,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF14           = 0x0000018a,
-IH_PERF_SEL_RB2_WPTR_WRAP_VF15           = 0x0000018b,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF0            = 0x0000018c,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF1            = 0x0000018d,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF2            = 0x0000018e,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF3            = 0x0000018f,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF4            = 0x00000190,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF5            = 0x00000191,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF6            = 0x00000192,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF7            = 0x00000193,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF8            = 0x00000194,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF9            = 0x00000195,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF10           = 0x00000196,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF11           = 0x00000197,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF12           = 0x00000198,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF13           = 0x00000199,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF14           = 0x0000019a,
-IH_PERF_SEL_RB2_RPTR_WRAP_VF15           = 0x0000019b,
-Reserved412                              = 0x0000019c,
-Reserved413                              = 0x0000019d,
-Reserved414                              = 0x0000019e,
-Reserved415                              = 0x0000019f,
-Reserved416                              = 0x000001a0,
-Reserved417                              = 0x000001a1,
-Reserved418                              = 0x000001a2,
-Reserved419                              = 0x000001a3,
-Reserved420                              = 0x000001a4,
-Reserved421                              = 0x000001a5,
-Reserved422                              = 0x000001a6,
-Reserved423                              = 0x000001a7,
-Reserved424                              = 0x000001a8,
-Reserved425                              = 0x000001a9,
-Reserved426                              = 0x000001aa,
-Reserved427                              = 0x000001ab,
-Reserved428                              = 0x000001ac,
-Reserved429                              = 0x000001ad,
-Reserved430                              = 0x000001ae,
-Reserved431                              = 0x000001af,
-Reserved432                              = 0x000001b0,
-Reserved433                              = 0x000001b1,
-Reserved434                              = 0x000001b2,
-Reserved435                              = 0x000001b3,
-Reserved436                              = 0x000001b4,
-Reserved437                              = 0x000001b5,
-Reserved438                              = 0x000001b6,
-Reserved439                              = 0x000001b7,
-Reserved440                              = 0x000001b8,
-Reserved441                              = 0x000001b9,
-Reserved442                              = 0x000001ba,
-Reserved443                              = 0x000001bb,
-Reserved444                              = 0x000001bc,
-Reserved445                              = 0x000001bd,
-Reserved446                              = 0x000001be,
-Reserved447                              = 0x000001bf,
-Reserved448                              = 0x000001c0,
-Reserved449                              = 0x000001c1,
-Reserved450                              = 0x000001c2,
-Reserved451                              = 0x000001c3,
-Reserved452                              = 0x000001c4,
-Reserved453                              = 0x000001c5,
-Reserved454                              = 0x000001c6,
-Reserved455                              = 0x000001c7,
-Reserved456                              = 0x000001c8,
-Reserved457                              = 0x000001c9,
-Reserved458                              = 0x000001ca,
-Reserved459                              = 0x000001cb,
-Reserved460                              = 0x000001cc,
-Reserved461                              = 0x000001cd,
-Reserved462                              = 0x000001ce,
-Reserved463                              = 0x000001cf,
-Reserved464                              = 0x000001d0,
-Reserved465                              = 0x000001d1,
-Reserved466                              = 0x000001d2,
-Reserved467                              = 0x000001d3,
-Reserved468                              = 0x000001d4,
-Reserved469                              = 0x000001d5,
-Reserved470                              = 0x000001d6,
-Reserved471                              = 0x000001d7,
-Reserved472                              = 0x000001d8,
-Reserved473                              = 0x000001d9,
-Reserved474                              = 0x000001da,
-Reserved475                              = 0x000001db,
-Reserved476                              = 0x000001dc,
-Reserved477                              = 0x000001dd,
-Reserved478                              = 0x000001de,
-Reserved479                              = 0x000001df,
-Reserved480                              = 0x000001e0,
-Reserved481                              = 0x000001e1,
-Reserved482                              = 0x000001e2,
-Reserved483                              = 0x000001e3,
-Reserved484                              = 0x000001e4,
-Reserved485                              = 0x000001e5,
-Reserved486                              = 0x000001e6,
-Reserved487                              = 0x000001e7,
-Reserved488                              = 0x000001e8,
-Reserved489                              = 0x000001e9,
-Reserved490                              = 0x000001ea,
-Reserved491                              = 0x000001eb,
-Reserved492                              = 0x000001ec,
-Reserved493                              = 0x000001ed,
-Reserved494                              = 0x000001ee,
-Reserved495                              = 0x000001ef,
-Reserved496                              = 0x000001f0,
-Reserved497                              = 0x000001f1,
-Reserved498                              = 0x000001f2,
-Reserved499                              = 0x000001f3,
-Reserved500                              = 0x000001f4,
-Reserved501                              = 0x000001f5,
-Reserved502                              = 0x000001f6,
-Reserved503                              = 0x000001f7,
-Reserved504                              = 0x000001f8,
-Reserved505                              = 0x000001f9,
-Reserved506                              = 0x000001fa,
-Reserved507                              = 0x000001fb,
-Reserved508                              = 0x000001fc,
-Reserved509                              = 0x000001fd,
-Reserved510                              = 0x000001fe,
-Reserved511                              = 0x000001ff,
-} IH_PERF_SEL;
-
-/*******************************************************
- * SEM Enums
- *******************************************************/
-
-/*
- * SEM_PERF_SEL enum
- */
-
-typedef enum SEM_PERF_SEL {
-SEM_PERF_SEL_CYCLE                       = 0x00000000,
-SEM_PERF_SEL_IDLE                        = 0x00000001,
-SEM_PERF_SEL_SDMA0_REQ_SIGNAL            = 0x00000002,
-SEM_PERF_SEL_SDMA1_REQ_SIGNAL            = 0x00000003,
-SEM_PERF_SEL_UVD_REQ_SIGNAL              = 0x00000004,
-SEM_PERF_SEL_VCE0_REQ_SIGNAL             = 0x00000005,
-SEM_PERF_SEL_ACP_REQ_SIGNAL              = 0x00000006,
-SEM_PERF_SEL_ISP_REQ_SIGNAL              = 0x00000007,
-SEM_PERF_SEL_VCE1_REQ_SIGNAL             = 0x00000008,
-SEM_PERF_SEL_VP8_REQ_SIGNAL              = 0x00000009,
-SEM_PERF_SEL_CPG_E0_REQ_SIGNAL           = 0x0000000a,
-SEM_PERF_SEL_CPG_E1_REQ_SIGNAL           = 0x0000000b,
-SEM_PERF_SEL_CPC1_IMME_E0_REQ_SIGNAL     = 0x0000000c,
-SEM_PERF_SEL_CPC1_IMME_E1_REQ_SIGNAL     = 0x0000000d,
-SEM_PERF_SEL_CPC1_IMME_E2_REQ_SIGNAL     = 0x0000000e,
-SEM_PERF_SEL_CPC1_IMME_E3_REQ_SIGNAL     = 0x0000000f,
-SEM_PERF_SEL_CPC2_IMME_E0_REQ_SIGNAL     = 0x00000010,
-SEM_PERF_SEL_CPC2_IMME_E1_REQ_SIGNAL     = 0x00000011,
-SEM_PERF_SEL_CPC2_IMME_E2_REQ_SIGNAL     = 0x00000012,
-SEM_PERF_SEL_CPC2_IMME_E3_REQ_SIGNAL     = 0x00000013,
-SEM_PERF_SEL_SDMA0_REQ_WAIT              = 0x00000014,
-SEM_PERF_SEL_SDMA1_REQ_WAIT              = 0x00000015,
-SEM_PERF_SEL_UVD_REQ_WAIT                = 0x00000016,
-SEM_PERF_SEL_VCE0_REQ_WAIT               = 0x00000017,
-SEM_PERF_SEL_ACP_REQ_WAIT                = 0x00000018,
-SEM_PERF_SEL_ISP_REQ_WAIT                = 0x00000019,
-SEM_PERF_SEL_VCE1_REQ_WAIT               = 0x0000001a,
-SEM_PERF_SEL_VP8_REQ_WAIT                = 0x0000001b,
-SEM_PERF_SEL_CPG_E0_REQ_WAIT             = 0x0000001c,
-SEM_PERF_SEL_CPG_E1_REQ_WAIT             = 0x0000001d,
-SEM_PERF_SEL_CPC1_IMME_E0_REQ_WAIT       = 0x0000001e,
-SEM_PERF_SEL_CPC1_IMME_E1_REQ_WAIT       = 0x0000001f,
-SEM_PERF_SEL_CPC1_IMME_E2_REQ_WAIT       = 0x00000020,
-SEM_PERF_SEL_CPC1_IMME_E3_REQ_WAIT       = 0x00000021,
-SEM_PERF_SEL_CPC2_IMME_E0_REQ_WAIT       = 0x00000022,
-SEM_PERF_SEL_CPC2_IMME_E1_REQ_WAIT       = 0x00000023,
-SEM_PERF_SEL_CPC2_IMME_E2_REQ_WAIT       = 0x00000024,
-SEM_PERF_SEL_CPC2_IMME_E3_REQ_WAIT       = 0x00000025,
-SEM_PERF_SEL_CPC1_OFFL_E0_REQ_WAIT       = 0x00000026,
-SEM_PERF_SEL_CPC1_OFFL_E1_REQ_WAIT       = 0x00000027,
-SEM_PERF_SEL_CPC1_OFFL_E2_REQ_WAIT       = 0x00000028,
-SEM_PERF_SEL_CPC1_OFFL_E3_REQ_WAIT       = 0x00000029,
-SEM_PERF_SEL_CPC1_OFFL_E4_REQ_WAIT       = 0x0000002a,
-SEM_PERF_SEL_CPC1_OFFL_E5_REQ_WAIT       = 0x0000002b,
-SEM_PERF_SEL_CPC1_OFFL_E6_REQ_WAIT       = 0x0000002c,
-SEM_PERF_SEL_CPC1_OFFL_E7_REQ_WAIT       = 0x0000002d,
-SEM_PERF_SEL_CPC1_OFFL_E8_REQ_WAIT       = 0x0000002e,
-SEM_PERF_SEL_CPC1_OFFL_E9_REQ_WAIT       = 0x0000002f,
-SEM_PERF_SEL_CPC1_OFFL_E10_REQ_WAIT      = 0x00000030,
-SEM_PERF_SEL_CPC1_OFFL_E11_REQ_WAIT      = 0x00000031,
-SEM_PERF_SEL_CPC1_OFFL_E12_REQ_WAIT      = 0x00000032,
-SEM_PERF_SEL_CPC1_OFFL_E13_REQ_WAIT      = 0x00000033,
-SEM_PERF_SEL_CPC1_OFFL_E14_REQ_WAIT      = 0x00000034,
-SEM_PERF_SEL_CPC1_OFFL_E15_REQ_WAIT      = 0x00000035,
-SEM_PERF_SEL_CPC1_OFFL_E16_REQ_WAIT      = 0x00000036,
-SEM_PERF_SEL_CPC1_OFFL_E17_REQ_WAIT      = 0x00000037,
-SEM_PERF_SEL_CPC1_OFFL_E18_REQ_WAIT      = 0x00000038,
-SEM_PERF_SEL_CPC1_OFFL_E19_REQ_WAIT      = 0x00000039,
-SEM_PERF_SEL_CPC1_OFFL_E20_REQ_WAIT      = 0x0000003a,
-SEM_PERF_SEL_CPC1_OFFL_E21_REQ_WAIT      = 0x0000003b,
-SEM_PERF_SEL_CPC1_OFFL_E22_REQ_WAIT      = 0x0000003c,
-SEM_PERF_SEL_CPC1_OFFL_E23_REQ_WAIT      = 0x0000003d,
-SEM_PERF_SEL_CPC1_OFFL_E24_REQ_WAIT      = 0x0000003e,
-SEM_PERF_SEL_CPC1_OFFL_E25_REQ_WAIT      = 0x0000003f,
-SEM_PERF_SEL_CPC1_OFFL_E26_REQ_WAIT      = 0x00000040,
-SEM_PERF_SEL_CPC1_OFFL_E27_REQ_WAIT      = 0x00000041,
-SEM_PERF_SEL_CPC1_OFFL_E28_REQ_WAIT      = 0x00000042,
-SEM_PERF_SEL_CPC1_OFFL_E29_REQ_WAIT      = 0x00000043,
-SEM_PERF_SEL_CPC1_OFFL_E30_REQ_WAIT      = 0x00000044,
-SEM_PERF_SEL_CPC1_OFFL_E31_REQ_WAIT      = 0x00000045,
-SEM_PERF_SEL_CPC2_OFFL_E0_REQ_WAIT       = 0x00000046,
-SEM_PERF_SEL_CPC2_OFFL_E1_REQ_WAIT       = 0x00000047,
-SEM_PERF_SEL_CPC2_OFFL_E2_REQ_WAIT       = 0x00000048,
-SEM_PERF_SEL_CPC2_OFFL_E3_REQ_WAIT       = 0x00000049,
-SEM_PERF_SEL_CPC2_OFFL_E4_REQ_WAIT       = 0x0000004a,
-SEM_PERF_SEL_CPC2_OFFL_E5_REQ_WAIT       = 0x0000004b,
-SEM_PERF_SEL_CPC2_OFFL_E6_REQ_WAIT       = 0x0000004c,
-SEM_PERF_SEL_CPC2_OFFL_E7_REQ_WAIT       = 0x0000004d,
-SEM_PERF_SEL_CPC2_OFFL_E8_REQ_WAIT       = 0x0000004e,
-SEM_PERF_SEL_CPC2_OFFL_E9_REQ_WAIT       = 0x0000004f,
-SEM_PERF_SEL_CPC2_OFFL_E10_REQ_WAIT      = 0x00000050,
-SEM_PERF_SEL_CPC2_OFFL_E11_REQ_WAIT      = 0x00000051,
-SEM_PERF_SEL_CPC2_OFFL_E12_REQ_WAIT      = 0x00000052,
-SEM_PERF_SEL_CPC2_OFFL_E13_REQ_WAIT      = 0x00000053,
-SEM_PERF_SEL_CPC2_OFFL_E14_REQ_WAIT      = 0x00000054,
-SEM_PERF_SEL_CPC2_OFFL_E15_REQ_WAIT      = 0x00000055,
-SEM_PERF_SEL_CPC2_OFFL_E16_REQ_WAIT      = 0x00000056,
-SEM_PERF_SEL_CPC2_OFFL_E17_REQ_WAIT      = 0x00000057,
-SEM_PERF_SEL_CPC2_OFFL_E18_REQ_WAIT      = 0x00000058,
-SEM_PERF_SEL_CPC2_OFFL_E19_REQ_WAIT      = 0x00000059,
-SEM_PERF_SEL_CPC2_OFFL_E20_REQ_WAIT      = 0x0000005a,
-SEM_PERF_SEL_CPC2_OFFL_E21_REQ_WAIT      = 0x0000005b,
-SEM_PERF_SEL_CPC2_OFFL_E22_REQ_WAIT      = 0x0000005c,
-SEM_PERF_SEL_CPC2_OFFL_E23_REQ_WAIT      = 0x0000005d,
-SEM_PERF_SEL_CPC2_OFFL_E24_REQ_WAIT      = 0x0000005e,
-SEM_PERF_SEL_CPC2_OFFL_E25_REQ_WAIT      = 0x0000005f,
-SEM_PERF_SEL_CPC2_OFFL_E26_REQ_WAIT      = 0x00000060,
-SEM_PERF_SEL_CPC2_OFFL_E27_REQ_WAIT      = 0x00000061,
-SEM_PERF_SEL_CPC2_OFFL_E28_REQ_WAIT      = 0x00000062,
-SEM_PERF_SEL_CPC2_OFFL_E29_REQ_WAIT      = 0x00000063,
-SEM_PERF_SEL_CPC2_OFFL_E30_REQ_WAIT      = 0x00000064,
-SEM_PERF_SEL_CPC2_OFFL_E31_REQ_WAIT      = 0x00000065,
-SEM_PERF_SEL_CPC1_OFFL_E0_POLL_WAIT      = 0x00000066,
-SEM_PERF_SEL_CPC1_OFFL_E1_POLL_WAIT      = 0x00000067,
-SEM_PERF_SEL_CPC1_OFFL_E2_POLL_WAIT      = 0x00000068,
-SEM_PERF_SEL_CPC1_OFFL_E3_POLL_WAIT      = 0x00000069,
-SEM_PERF_SEL_CPC1_OFFL_E4_POLL_WAIT      = 0x0000006a,
-SEM_PERF_SEL_CPC1_OFFL_E5_POLL_WAIT      = 0x0000006b,
-SEM_PERF_SEL_CPC1_OFFL_E6_POLL_WAIT      = 0x0000006c,
-SEM_PERF_SEL_CPC1_OFFL_E7_POLL_WAIT      = 0x0000006d,
-SEM_PERF_SEL_CPC1_OFFL_E8_POLL_WAIT      = 0x0000006e,
-SEM_PERF_SEL_CPC1_OFFL_E9_POLL_WAIT      = 0x0000006f,
-SEM_PERF_SEL_CPC1_OFFL_E10_POLL_WAIT     = 0x00000070,
-SEM_PERF_SEL_CPC1_OFFL_E11_POLL_WAIT     = 0x00000071,
-SEM_PERF_SEL_CPC1_OFFL_E12_POLL_WAIT     = 0x00000072,
-SEM_PERF_SEL_CPC1_OFFL_E13_POLL_WAIT     = 0x00000073,
-SEM_PERF_SEL_CPC1_OFFL_E14_POLL_WAIT     = 0x00000074,
-SEM_PERF_SEL_CPC1_OFFL_E15_POLL_WAIT     = 0x00000075,
-SEM_PERF_SEL_CPC1_OFFL_E16_POLL_WAIT     = 0x00000076,
-SEM_PERF_SEL_CPC1_OFFL_E17_POLL_WAIT     = 0x00000077,
-SEM_PERF_SEL_CPC1_OFFL_E18_POLL_WAIT     = 0x00000078,
-SEM_PERF_SEL_CPC1_OFFL_E19_POLL_WAIT     = 0x00000079,
-SEM_PERF_SEL_CPC1_OFFL_E20_POLL_WAIT     = 0x0000007a,
-SEM_PERF_SEL_CPC1_OFFL_E21_POLL_WAIT     = 0x0000007b,
-SEM_PERF_SEL_CPC1_OFFL_E22_POLL_WAIT     = 0x0000007c,
-SEM_PERF_SEL_CPC1_OFFL_E23_POLL_WAIT     = 0x0000007d,
-SEM_PERF_SEL_CPC1_OFFL_E24_POLL_WAIT     = 0x0000007e,
-SEM_PERF_SEL_CPC1_OFFL_E25_POLL_WAIT     = 0x0000007f,
-SEM_PERF_SEL_CPC1_OFFL_E26_POLL_WAIT     = 0x00000080,
-SEM_PERF_SEL_CPC1_OFFL_E27_POLL_WAIT     = 0x00000081,
-SEM_PERF_SEL_CPC1_OFFL_E28_POLL_WAIT     = 0x00000082,
-SEM_PERF_SEL_CPC1_OFFL_E29_POLL_WAIT     = 0x00000083,
-SEM_PERF_SEL_CPC1_OFFL_E30_POLL_WAIT     = 0x00000084,
-SEM_PERF_SEL_CPC1_OFFL_E31_POLL_WAIT     = 0x00000085,
-SEM_PERF_SEL_CPC2_OFFL_E0_POLL_WAIT      = 0x00000086,
-SEM_PERF_SEL_CPC2_OFFL_E1_POLL_WAIT      = 0x00000087,
-SEM_PERF_SEL_CPC2_OFFL_E2_POLL_WAIT      = 0x00000088,
-SEM_PERF_SEL_CPC2_OFFL_E3_POLL_WAIT      = 0x00000089,
-SEM_PERF_SEL_CPC2_OFFL_E4_POLL_WAIT      = 0x0000008a,
-SEM_PERF_SEL_CPC2_OFFL_E5_POLL_WAIT      = 0x0000008b,
-SEM_PERF_SEL_CPC2_OFFL_E6_POLL_WAIT      = 0x0000008c,
-SEM_PERF_SEL_CPC2_OFFL_E7_POLL_WAIT      = 0x0000008d,
-SEM_PERF_SEL_CPC2_OFFL_E8_POLL_WAIT      = 0x0000008e,
-SEM_PERF_SEL_CPC2_OFFL_E9_POLL_WAIT      = 0x0000008f,
-SEM_PERF_SEL_CPC2_OFFL_E10_POLL_WAIT     = 0x00000090,
-SEM_PERF_SEL_CPC2_OFFL_E11_POLL_WAIT     = 0x00000091,
-SEM_PERF_SEL_CPC2_OFFL_E12_POLL_WAIT     = 0x00000092,
-SEM_PERF_SEL_CPC2_OFFL_E13_POLL_WAIT     = 0x00000093,
-SEM_PERF_SEL_CPC2_OFFL_E14_POLL_WAIT     = 0x00000094,
-SEM_PERF_SEL_CPC2_OFFL_E15_POLL_WAIT     = 0x00000095,
-SEM_PERF_SEL_CPC2_OFFL_E16_POLL_WAIT     = 0x00000096,
-SEM_PERF_SEL_CPC2_OFFL_E17_POLL_WAIT     = 0x00000097,
-SEM_PERF_SEL_CPC2_OFFL_E18_POLL_WAIT     = 0x00000098,
-SEM_PERF_SEL_CPC2_OFFL_E19_POLL_WAIT     = 0x00000099,
-SEM_PERF_SEL_CPC2_OFFL_E20_POLL_WAIT     = 0x0000009a,
-SEM_PERF_SEL_CPC2_OFFL_E21_POLL_WAIT     = 0x0000009b,
-SEM_PERF_SEL_CPC2_OFFL_E22_POLL_WAIT     = 0x0000009c,
-SEM_PERF_SEL_CPC2_OFFL_E23_POLL_WAIT     = 0x0000009d,
-SEM_PERF_SEL_CPC2_OFFL_E24_POLL_WAIT     = 0x0000009e,
-SEM_PERF_SEL_CPC2_OFFL_E25_POLL_WAIT     = 0x0000009f,
-SEM_PERF_SEL_CPC2_OFFL_E26_POLL_WAIT     = 0x000000a0,
-SEM_PERF_SEL_CPC2_OFFL_E27_POLL_WAIT     = 0x000000a1,
-SEM_PERF_SEL_CPC2_OFFL_E28_POLL_WAIT     = 0x000000a2,
-SEM_PERF_SEL_CPC2_OFFL_E29_POLL_WAIT     = 0x000000a3,
-SEM_PERF_SEL_CPC2_OFFL_E30_POLL_WAIT     = 0x000000a4,
-SEM_PERF_SEL_CPC2_OFFL_E31_POLL_WAIT     = 0x000000a5,
-SEM_PERF_SEL_MC_RD_REQ                   = 0x000000a6,
-SEM_PERF_SEL_MC_RD_RET                   = 0x000000a7,
-SEM_PERF_SEL_MC_WR_REQ                   = 0x000000a8,
-SEM_PERF_SEL_MC_WR_RET                   = 0x000000a9,
-SEM_PERF_SEL_ATC_REQ                     = 0x000000aa,
-SEM_PERF_SEL_ATC_RET                     = 0x000000ab,
-SEM_PERF_SEL_ATC_XNACK                   = 0x000000ac,
-SEM_PERF_SEL_ATC_INVALIDATION            = 0x000000ad,
-} SEM_PERF_SEL;
-
-/*******************************************************
- * SDMA Enums
- *******************************************************/
-
-/*
- * SDMA_PERF_SEL enum
- */
-
-typedef enum SDMA_PERF_SEL {
-SDMA_PERF_SEL_CYCLE                      = 0x00000000,
-SDMA_PERF_SEL_IDLE                       = 0x00000001,
-SDMA_PERF_SEL_REG_IDLE                   = 0x00000002,
-SDMA_PERF_SEL_RB_EMPTY                   = 0x00000003,
-SDMA_PERF_SEL_RB_FULL                    = 0x00000004,
-SDMA_PERF_SEL_RB_WPTR_WRAP               = 0x00000005,
-SDMA_PERF_SEL_RB_RPTR_WRAP               = 0x00000006,
-SDMA_PERF_SEL_RB_WPTR_POLL_READ          = 0x00000007,
-SDMA_PERF_SEL_RB_RPTR_WB                 = 0x00000008,
-SDMA_PERF_SEL_RB_CMD_IDLE                = 0x00000009,
-SDMA_PERF_SEL_RB_CMD_FULL                = 0x0000000a,
-SDMA_PERF_SEL_IB_CMD_IDLE                = 0x0000000b,
-SDMA_PERF_SEL_IB_CMD_FULL                = 0x0000000c,
-SDMA_PERF_SEL_EX_IDLE                    = 0x0000000d,
-SDMA_PERF_SEL_SRBM_REG_SEND              = 0x0000000e,
-SDMA_PERF_SEL_EX_IDLE_POLL_TIMER_EXPIRE  = 0x0000000f,
-SDMA_PERF_SEL_MC_WR_IDLE                 = 0x00000010,
-SDMA_PERF_SEL_MC_WR_COUNT                = 0x00000011,
-SDMA_PERF_SEL_MC_RD_IDLE                 = 0x00000012,
-SDMA_PERF_SEL_MC_RD_COUNT                = 0x00000013,
-SDMA_PERF_SEL_MC_RD_RET_STALL            = 0x00000014,
-SDMA_PERF_SEL_MC_RD_NO_POLL_IDLE         = 0x00000015,
-SDMA_PERF_SEL_DRM_IDLE                   = 0x00000016,
-SDMA_PERF_SEL_DRM_REQ_STALL              = 0x00000017,
-SDMA_PERF_SEL_SEM_IDLE                   = 0x00000018,
-SDMA_PERF_SEL_SEM_REQ_STALL              = 0x00000019,
-SDMA_PERF_SEL_SEM_REQ_COUNT              = 0x0000001a,
-SDMA_PERF_SEL_SEM_RESP_INCOMPLETE        = 0x0000001b,
-SDMA_PERF_SEL_SEM_RESP_FAIL              = 0x0000001c,
-SDMA_PERF_SEL_SEM_RESP_PASS              = 0x0000001d,
-SDMA_PERF_SEL_INT_IDLE                   = 0x0000001e,
-SDMA_PERF_SEL_INT_REQ_STALL              = 0x0000001f,
-SDMA_PERF_SEL_INT_REQ_COUNT              = 0x00000020,
-SDMA_PERF_SEL_INT_RESP_ACCEPTED          = 0x00000021,
-SDMA_PERF_SEL_INT_RESP_RETRY             = 0x00000022,
-SDMA_PERF_SEL_NUM_PACKET                 = 0x00000023,
-SDMA_PERF_SEL_DRM1_REQ_STALL             = 0x00000024,
-SDMA_PERF_SEL_CE_WREQ_IDLE               = 0x00000025,
-SDMA_PERF_SEL_CE_WR_IDLE                 = 0x00000026,
-SDMA_PERF_SEL_CE_SPLIT_IDLE              = 0x00000027,
-SDMA_PERF_SEL_CE_RREQ_IDLE               = 0x00000028,
-SDMA_PERF_SEL_CE_OUT_IDLE                = 0x00000029,
-SDMA_PERF_SEL_CE_IN_IDLE                 = 0x0000002a,
-SDMA_PERF_SEL_CE_DST_IDLE                = 0x0000002b,
-SDMA_PERF_SEL_CE_DRM_IDLE                = 0x0000002c,
-SDMA_PERF_SEL_CE_DRM1_IDLE               = 0x0000002d,
-SDMA_PERF_SEL_CE_AFIFO_FULL              = 0x0000002e,
-SDMA_PERF_SEL_CE_DRM_FULL                = 0x0000002f,
-SDMA_PERF_SEL_CE_DRM1_FULL               = 0x00000030,
-SDMA_PERF_SEL_CE_INFO_FULL               = 0x00000031,
-SDMA_PERF_SEL_CE_INFO1_FULL              = 0x00000032,
-SDMA_PERF_SEL_CE_RD_STALL                = 0x00000033,
-SDMA_PERF_SEL_CE_WR_STALL                = 0x00000034,
-SDMA_PERF_SEL_GFX_SELECT                 = 0x00000035,
-SDMA_PERF_SEL_RLC0_SELECT                = 0x00000036,
-SDMA_PERF_SEL_RLC1_SELECT                = 0x00000037,
-SDMA_PERF_SEL_PAGE_SELECT                = 0x00000038,
-SDMA_PERF_SEL_CTX_CHANGE                 = 0x00000039,
-SDMA_PERF_SEL_CTX_CHANGE_EXPIRED         = 0x0000003a,
-SDMA_PERF_SEL_CTX_CHANGE_EXCEPTION       = 0x0000003b,
-SDMA_PERF_SEL_DOORBELL                   = 0x0000003c,
-SDMA_PERF_SEL_RD_BA_RTR                  = 0x0000003d,
-SDMA_PERF_SEL_WR_BA_RTR                  = 0x0000003e,
-SDMA_PERF_SEL_F32_L1_WR_VLD              = 0x0000003f,
-SDMA_PERF_SEL_CE_L1_WR_VLD               = 0x00000040,
-SDMA_PERF_SEL_CE_L1_STALL                = 0x00000041,
-SDMA_PERF_SEL_SDMA_INVACK_NFLUSH         = 0x00000042,
-SDMA_PERF_SEL_SDMA_INVACK_FLUSH          = 0x00000043,
-SDMA_PERF_SEL_ATCL2_INVREQ_NFLUSH        = 0x00000044,
-SDMA_PERF_SEL_ATCL2_INVREQ_FLUSH         = 0x00000045,
-SDMA_PERF_SEL_ATCL2_RET_XNACK            = 0x00000046,
-SDMA_PERF_SEL_ATCL2_RET_ACK              = 0x00000047,
-SDMA_PERF_SEL_ATCL2_FREE                 = 0x00000048,
-SDMA_PERF_SEL_SDMA_ATCL2_SEND            = 0x00000049,
-SDMA_PERF_SEL_DMA_L1_WR_SEND             = 0x0000004a,
-SDMA_PERF_SEL_DMA_L1_RD_SEND             = 0x0000004b,
-SDMA_PERF_SEL_DMA_MC_WR_SEND             = 0x0000004c,
-SDMA_PERF_SEL_DMA_MC_RD_SEND             = 0x0000004d,
-SDMA_PERF_SEL_L1_WR_FIFO_IDLE            = 0x0000004e,
-SDMA_PERF_SEL_L1_RD_FIFO_IDLE            = 0x0000004f,
-SDMA_PERF_SEL_L1_WRL2_IDLE               = 0x00000050,
-SDMA_PERF_SEL_L1_RDL2_IDLE               = 0x00000051,
-SDMA_PERF_SEL_L1_WRMC_IDLE               = 0x00000052,
-SDMA_PERF_SEL_L1_RDMC_IDLE               = 0x00000053,
-SDMA_PERF_SEL_L1_WR_INV_IDLE             = 0x00000054,
-SDMA_PERF_SEL_L1_RD_INV_IDLE             = 0x00000055,
-SDMA_PERF_SEL_L1_WR_INV_EN               = 0x00000056,
-SDMA_PERF_SEL_L1_RD_INV_EN               = 0x00000057,
-SDMA_PERF_SEL_L1_WR_WAIT_INVADR          = 0x00000058,
-SDMA_PERF_SEL_L1_RD_WAIT_INVADR          = 0x00000059,
-SDMA_PERF_SEL_IS_INVREQ_ADDR_WR          = 0x0000005a,
-SDMA_PERF_SEL_IS_INVREQ_ADDR_RD          = 0x0000005b,
-SDMA_PERF_SEL_L1_WR_XNACK_TIMEOUT        = 0x0000005c,
-SDMA_PERF_SEL_L1_RD_XNACK_TIMEOUT        = 0x0000005d,
-SDMA_PERF_SEL_L1_INV_MIDDLE              = 0x0000005e,
-SDMA_PERF_SEL_UTCL1_TAG_DELAY_COUNTER    = 0x000000fe,
-SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER    = 0x000000ff,
-} SDMA_PERF_SEL;
-
-/*******************************************************
- * SMUIO Enums
- *******************************************************/
-
-/*
- * ROM_SIGNATURE value
- */
-
-#define ROM_SIGNATURE                  0x0000aa55
-
-/*******************************************************
- * GDS Enums
- *******************************************************/
-
-/*******************************************************
- * CB Enums
- *******************************************************/
-
-/*
- * SurfaceNumber enum
- */
-
-typedef enum SurfaceNumber {
-NUMBER_UNORM                             = 0x00000000,
-NUMBER_SNORM                             = 0x00000001,
-NUMBER_USCALED                           = 0x00000002,
-NUMBER_SSCALED                           = 0x00000003,
-NUMBER_UINT                              = 0x00000004,
-NUMBER_SINT                              = 0x00000005,
-NUMBER_SRGB                              = 0x00000006,
-NUMBER_FLOAT                             = 0x00000007,
-} SurfaceNumber;
-
-/*
- * SurfaceSwap enum
- */
-
-typedef enum SurfaceSwap {
-SWAP_STD                                 = 0x00000000,
-SWAP_ALT                                 = 0x00000001,
-SWAP_STD_REV                             = 0x00000002,
-SWAP_ALT_REV                             = 0x00000003,
-} SurfaceSwap;
-
-/*
- * CBMode enum
- */
-
-typedef enum CBMode {
-CB_DISABLE                               = 0x00000000,
-CB_NORMAL                                = 0x00000001,
-CB_ELIMINATE_FAST_CLEAR                  = 0x00000002,
-CB_RESOLVE                               = 0x00000003,
-CB_DECOMPRESS                            = 0x00000004,
-CB_FMASK_DECOMPRESS                      = 0x00000005,
-CB_DCC_DECOMPRESS                        = 0x00000006,
-} CBMode;
-
-/*
- * RoundMode enum
- */
-
-typedef enum RoundMode {
-ROUND_BY_HALF                            = 0x00000000,
-ROUND_TRUNCATE                           = 0x00000001,
-} RoundMode;
-
-/*
- * SourceFormat enum
- */
-
-typedef enum SourceFormat {
-EXPORT_4C_32BPC                          = 0x00000000,
-EXPORT_4C_16BPC                          = 0x00000001,
-EXPORT_2C_32BPC_GR                       = 0x00000002,
-EXPORT_2C_32BPC_AR                       = 0x00000003,
-} SourceFormat;
-
-/*
- * BlendOp enum
- */
-
-typedef enum BlendOp {
-BLEND_ZERO                               = 0x00000000,
-BLEND_ONE                                = 0x00000001,
-BLEND_SRC_COLOR                          = 0x00000002,
-BLEND_ONE_MINUS_SRC_COLOR                = 0x00000003,
-BLEND_SRC_ALPHA                          = 0x00000004,
-BLEND_ONE_MINUS_SRC_ALPHA                = 0x00000005,
-BLEND_DST_ALPHA                          = 0x00000006,
-BLEND_ONE_MINUS_DST_ALPHA                = 0x00000007,
-BLEND_DST_COLOR                          = 0x00000008,
-BLEND_ONE_MINUS_DST_COLOR                = 0x00000009,
-BLEND_SRC_ALPHA_SATURATE                 = 0x0000000a,
-BLEND_BOTH_SRC_ALPHA                     = 0x0000000b,
-BLEND_BOTH_INV_SRC_ALPHA                 = 0x0000000c,
-BLEND_CONSTANT_COLOR                     = 0x0000000d,
-BLEND_ONE_MINUS_CONSTANT_COLOR           = 0x0000000e,
-BLEND_SRC1_COLOR                         = 0x0000000f,
-BLEND_INV_SRC1_COLOR                     = 0x00000010,
-BLEND_SRC1_ALPHA                         = 0x00000011,
-BLEND_INV_SRC1_ALPHA                     = 0x00000012,
-BLEND_CONSTANT_ALPHA                     = 0x00000013,
-BLEND_ONE_MINUS_CONSTANT_ALPHA           = 0x00000014,
-} BlendOp;
-
-/*
- * CombFunc enum
- */
-
-typedef enum CombFunc {
-COMB_DST_PLUS_SRC                        = 0x00000000,
-COMB_SRC_MINUS_DST                       = 0x00000001,
-COMB_MIN_DST_SRC                         = 0x00000002,
-COMB_MAX_DST_SRC                         = 0x00000003,
-COMB_DST_MINUS_SRC                       = 0x00000004,
-} CombFunc;
-
-/*
- * BlendOpt enum
- */
-
-typedef enum BlendOpt {
-FORCE_OPT_AUTO                           = 0x00000000,
-FORCE_OPT_DISABLE                        = 0x00000001,
-FORCE_OPT_ENABLE_IF_SRC_A_0              = 0x00000002,
-FORCE_OPT_ENABLE_IF_SRC_RGB_0            = 0x00000003,
-FORCE_OPT_ENABLE_IF_SRC_ARGB_0           = 0x00000004,
-FORCE_OPT_ENABLE_IF_SRC_A_1              = 0x00000005,
-FORCE_OPT_ENABLE_IF_SRC_RGB_1            = 0x00000006,
-FORCE_OPT_ENABLE_IF_SRC_ARGB_1           = 0x00000007,
-} BlendOpt;
-
-/*
- * CmaskCode enum
- */
-
-typedef enum CmaskCode {
-CMASK_CLR00_F0                           = 0x00000000,
-CMASK_CLR00_F1                           = 0x00000001,
-CMASK_CLR00_F2                           = 0x00000002,
-CMASK_CLR00_FX                           = 0x00000003,
-CMASK_CLR01_F0                           = 0x00000004,
-CMASK_CLR01_F1                           = 0x00000005,
-CMASK_CLR01_F2                           = 0x00000006,
-CMASK_CLR01_FX                           = 0x00000007,
-CMASK_CLR10_F0                           = 0x00000008,
-CMASK_CLR10_F1                           = 0x00000009,
-CMASK_CLR10_F2                           = 0x0000000a,
-CMASK_CLR10_FX                           = 0x0000000b,
-CMASK_CLR11_F0                           = 0x0000000c,
-CMASK_CLR11_F1                           = 0x0000000d,
-CMASK_CLR11_F2                           = 0x0000000e,
-CMASK_CLR11_FX                           = 0x0000000f,
-} CmaskCode;
-
-/*
- * CmaskAddr enum
- */
-
-typedef enum CmaskAddr {
-CMASK_ADDR_TILED                         = 0x00000000,
-CMASK_ADDR_LINEAR                        = 0x00000001,
-CMASK_ADDR_COMPATIBLE                    = 0x00000002,
-} CmaskAddr;
-
-/*
- * MemArbMode enum
- */
-
-typedef enum MemArbMode {
-MEM_ARB_MODE_FIXED                       = 0x00000000,
-MEM_ARB_MODE_AGE                         = 0x00000001,
-MEM_ARB_MODE_WEIGHT                      = 0x00000002,
-MEM_ARB_MODE_BOTH                        = 0x00000003,
-} MemArbMode;
-
-/*
- * CBPerfSel enum
- */
-
-typedef enum CBPerfSel {
-CB_PERF_SEL_NONE                         = 0x00000000,
-CB_PERF_SEL_BUSY                         = 0x00000001,
-CB_PERF_SEL_CORE_SCLK_VLD                = 0x00000002,
-CB_PERF_SEL_REG_SCLK0_VLD                = 0x00000003,
-CB_PERF_SEL_REG_SCLK1_VLD                = 0x00000004,
-CB_PERF_SEL_DRAWN_QUAD                   = 0x00000005,
-CB_PERF_SEL_DRAWN_PIXEL                  = 0x00000006,
-CB_PERF_SEL_DRAWN_QUAD_FRAGMENT          = 0x00000007,
-CB_PERF_SEL_DRAWN_TILE                   = 0x00000008,
-CB_PERF_SEL_DB_CB_TILE_VALID_READY       = 0x00000009,
-CB_PERF_SEL_DB_CB_TILE_VALID_READYB      = 0x0000000a,
-CB_PERF_SEL_DB_CB_TILE_VALIDB_READY      = 0x0000000b,
-CB_PERF_SEL_DB_CB_TILE_VALIDB_READYB     = 0x0000000c,
-CB_PERF_SEL_CM_FC_TILE_VALID_READY       = 0x0000000d,
-CB_PERF_SEL_CM_FC_TILE_VALID_READYB      = 0x0000000e,
-CB_PERF_SEL_CM_FC_TILE_VALIDB_READY      = 0x0000000f,
-CB_PERF_SEL_CM_FC_TILE_VALIDB_READYB     = 0x00000010,
-CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READY  = 0x00000011,
-CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READYB  = 0x00000012,
-CB_PERF_SEL_DB_CB_LQUAD_VALID_READY      = 0x00000013,
-CB_PERF_SEL_DB_CB_LQUAD_VALID_READYB     = 0x00000014,
-CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READY     = 0x00000015,
-CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READYB    = 0x00000016,
-CB_PERF_SEL_LQUAD_NO_TILE                = 0x00000017,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_R  = 0x00000018,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_AR  = 0x00000019,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_GR  = 0x0000001a,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_ABGR  = 0x0000001b,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_FP16_ABGR  = 0x0000001c,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_SIGNED16_ABGR  = 0x0000001d,
-CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_UNSIGNED16_ABGR  = 0x0000001e,
-CB_PERF_SEL_QUAD_KILLED_BY_EXTRA_PIXEL_EXPORT  = 0x0000001f,
-CB_PERF_SEL_QUAD_KILLED_BY_COLOR_INVALID  = 0x00000020,
-CB_PERF_SEL_QUAD_KILLED_BY_NULL_TARGET_SHADER_MASK  = 0x00000021,
-CB_PERF_SEL_QUAD_KILLED_BY_NULL_SAMPLE_MASK  = 0x00000022,
-CB_PERF_SEL_QUAD_KILLED_BY_DISCARD_PIXEL  = 0x00000023,
-CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READY    = 0x00000024,
-CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READYB   = 0x00000025,
-CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READY   = 0x00000026,
-CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READYB  = 0x00000027,
-CB_PERF_SEL_FOP_IN_VALID_READY           = 0x00000028,
-CB_PERF_SEL_FOP_IN_VALID_READYB          = 0x00000029,
-CB_PERF_SEL_FOP_IN_VALIDB_READY          = 0x0000002a,
-CB_PERF_SEL_FOP_IN_VALIDB_READYB         = 0x0000002b,
-CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READY   = 0x0000002c,
-CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READYB  = 0x0000002d,
-CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READY  = 0x0000002e,
-CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READYB  = 0x0000002f,
-CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READY    = 0x00000030,
-CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READYB   = 0x00000031,
-CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READY   = 0x00000032,
-CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READYB  = 0x00000033,
-CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READY    = 0x00000034,
-CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READYB   = 0x00000035,
-CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READY   = 0x00000036,
-CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READYB  = 0x00000037,
-CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READY  = 0x00000038,
-CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READYB  = 0x00000039,
-CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READY  = 0x0000003a,
-CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READYB  = 0x0000003b,
-CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READY  = 0x0000003c,
-CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READYB  = 0x0000003d,
-CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READY  = 0x0000003e,
-CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READYB  = 0x0000003f,
-CB_PERF_SEL_CC_BC_CS_FRAG_VALID          = 0x00000040,
-CB_PERF_SEL_CM_CACHE_HIT                 = 0x00000041,
-CB_PERF_SEL_CM_CACHE_TAG_MISS            = 0x00000042,
-CB_PERF_SEL_CM_CACHE_SECTOR_MISS         = 0x00000043,
-CB_PERF_SEL_CM_CACHE_REEVICTION_STALL    = 0x00000044,
-CB_PERF_SEL_CM_CACHE_EVICT_NONZERO_INFLIGHT_STALL  = 0x00000045,
-CB_PERF_SEL_CM_CACHE_REPLACE_PENDING_EVICT_STALL  = 0x00000046,
-CB_PERF_SEL_CM_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL  = 0x00000047,
-CB_PERF_SEL_CM_CACHE_READ_OUTPUT_STALL   = 0x00000048,
-CB_PERF_SEL_CM_CACHE_WRITE_OUTPUT_STALL  = 0x00000049,
-CB_PERF_SEL_CM_CACHE_ACK_OUTPUT_STALL    = 0x0000004a,
-CB_PERF_SEL_CM_CACHE_STALL               = 0x0000004b,
-CB_PERF_SEL_CM_CACHE_FLUSH               = 0x0000004c,
-CB_PERF_SEL_CM_CACHE_TAGS_FLUSHED        = 0x0000004d,
-CB_PERF_SEL_CM_CACHE_SECTORS_FLUSHED     = 0x0000004e,
-CB_PERF_SEL_CM_CACHE_DIRTY_SECTORS_FLUSHED  = 0x0000004f,
-CB_PERF_SEL_FC_CACHE_HIT                 = 0x00000050,
-CB_PERF_SEL_FC_CACHE_TAG_MISS            = 0x00000051,
-CB_PERF_SEL_FC_CACHE_SECTOR_MISS         = 0x00000052,
-CB_PERF_SEL_FC_CACHE_REEVICTION_STALL    = 0x00000053,
-CB_PERF_SEL_FC_CACHE_EVICT_NONZERO_INFLIGHT_STALL  = 0x00000054,
-CB_PERF_SEL_FC_CACHE_REPLACE_PENDING_EVICT_STALL  = 0x00000055,
-CB_PERF_SEL_FC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL  = 0x00000056,
-CB_PERF_SEL_FC_CACHE_READ_OUTPUT_STALL   = 0x00000057,
-CB_PERF_SEL_FC_CACHE_WRITE_OUTPUT_STALL  = 0x00000058,
-CB_PERF_SEL_FC_CACHE_ACK_OUTPUT_STALL    = 0x00000059,
-CB_PERF_SEL_FC_CACHE_STALL               = 0x0000005a,
-CB_PERF_SEL_FC_CACHE_FLUSH               = 0x0000005b,
-CB_PERF_SEL_FC_CACHE_TAGS_FLUSHED        = 0x0000005c,
-CB_PERF_SEL_FC_CACHE_SECTORS_FLUSHED     = 0x0000005d,
-CB_PERF_SEL_FC_CACHE_DIRTY_SECTORS_FLUSHED  = 0x0000005e,
-CB_PERF_SEL_CC_CACHE_HIT                 = 0x0000005f,
-CB_PERF_SEL_CC_CACHE_TAG_MISS            = 0x00000060,
-CB_PERF_SEL_CC_CACHE_SECTOR_MISS         = 0x00000061,
-CB_PERF_SEL_CC_CACHE_REEVICTION_STALL    = 0x00000062,
-CB_PERF_SEL_CC_CACHE_EVICT_NONZERO_INFLIGHT_STALL  = 0x00000063,
-CB_PERF_SEL_CC_CACHE_REPLACE_PENDING_EVICT_STALL  = 0x00000064,
-CB_PERF_SEL_CC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL  = 0x00000065,
-CB_PERF_SEL_CC_CACHE_READ_OUTPUT_STALL   = 0x00000066,
-CB_PERF_SEL_CC_CACHE_WRITE_OUTPUT_STALL  = 0x00000067,
-CB_PERF_SEL_CC_CACHE_ACK_OUTPUT_STALL    = 0x00000068,
-CB_PERF_SEL_CC_CACHE_STALL               = 0x00000069,
-CB_PERF_SEL_CC_CACHE_FLUSH               = 0x0000006a,
-CB_PERF_SEL_CC_CACHE_TAGS_FLUSHED        = 0x0000006b,
-CB_PERF_SEL_CC_CACHE_SECTORS_FLUSHED     = 0x0000006c,
-CB_PERF_SEL_CC_CACHE_DIRTY_SECTORS_FLUSHED  = 0x0000006d,
-CB_PERF_SEL_CC_CACHE_WA_TO_RMW_CONVERSION  = 0x0000006e,
-CB_PERF_SEL_CC_CACHE_READS_SAVED_DUE_TO_DCC  = 0x0000006f,
-CB_PERF_SEL_CB_TAP_WRREQ_VALID_READY     = 0x00000070,
-CB_PERF_SEL_CB_TAP_WRREQ_VALID_READYB    = 0x00000071,
-CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READY    = 0x00000072,
-CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READYB   = 0x00000073,
-CB_PERF_SEL_CM_MC_WRITE_REQUEST          = 0x00000074,
-CB_PERF_SEL_FC_MC_WRITE_REQUEST          = 0x00000075,
-CB_PERF_SEL_CC_MC_WRITE_REQUEST          = 0x00000076,
-CB_PERF_SEL_CM_MC_WRITE_REQUESTS_IN_FLIGHT  = 0x00000077,
-CB_PERF_SEL_FC_MC_WRITE_REQUESTS_IN_FLIGHT  = 0x00000078,
-CB_PERF_SEL_CC_MC_WRITE_REQUESTS_IN_FLIGHT  = 0x00000079,
-CB_PERF_SEL_CB_TAP_RDREQ_VALID_READY     = 0x0000007a,
-CB_PERF_SEL_CB_TAP_RDREQ_VALID_READYB    = 0x0000007b,
-CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READY    = 0x0000007c,
-CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READYB   = 0x0000007d,
-CB_PERF_SEL_CM_MC_READ_REQUEST           = 0x0000007e,
-CB_PERF_SEL_FC_MC_READ_REQUEST           = 0x0000007f,
-CB_PERF_SEL_CC_MC_READ_REQUEST           = 0x00000080,
-CB_PERF_SEL_CM_MC_READ_REQUESTS_IN_FLIGHT  = 0x00000081,
-CB_PERF_SEL_FC_MC_READ_REQUESTS_IN_FLIGHT  = 0x00000082,
-CB_PERF_SEL_CC_MC_READ_REQUESTS_IN_FLIGHT  = 0x00000083,
-CB_PERF_SEL_CM_TQ_FULL                   = 0x00000084,
-CB_PERF_SEL_CM_TQ_FIFO_TILE_RESIDENCY_STALL  = 0x00000085,
-CB_PERF_SEL_FC_QUAD_RDLAT_FIFO_FULL      = 0x00000086,
-CB_PERF_SEL_FC_TILE_RDLAT_FIFO_FULL      = 0x00000087,
-CB_PERF_SEL_FC_RDLAT_FIFO_QUAD_RESIDENCY_STALL  = 0x00000088,
-CB_PERF_SEL_FOP_FMASK_RAW_STALL          = 0x00000089,
-CB_PERF_SEL_FOP_FMASK_BYPASS_STALL       = 0x0000008a,
-CB_PERF_SEL_CC_SF_FULL                   = 0x0000008b,
-CB_PERF_SEL_CC_RB_FULL                   = 0x0000008c,
-CB_PERF_SEL_CC_EVENFIFO_QUAD_RESIDENCY_STALL  = 0x0000008d,
-CB_PERF_SEL_CC_ODDFIFO_QUAD_RESIDENCY_STALL  = 0x0000008e,
-CB_PERF_SEL_BLENDER_RAW_HAZARD_STALL     = 0x0000008f,
-CB_PERF_SEL_EVENT                        = 0x00000090,
-CB_PERF_SEL_EVENT_CACHE_FLUSH_TS         = 0x00000091,
-CB_PERF_SEL_EVENT_CONTEXT_DONE           = 0x00000092,
-CB_PERF_SEL_EVENT_CACHE_FLUSH            = 0x00000093,
-CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_TS_EVENT  = 0x00000094,
-CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_EVENT  = 0x00000095,
-CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_DATA_TS  = 0x00000096,
-CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_META  = 0x00000097,
-CB_PERF_SEL_CC_SURFACE_SYNC              = 0x00000098,
-CB_PERF_SEL_CMASK_READ_DATA_0xC          = 0x00000099,
-CB_PERF_SEL_CMASK_READ_DATA_0xD          = 0x0000009a,
-CB_PERF_SEL_CMASK_READ_DATA_0xE          = 0x0000009b,
-CB_PERF_SEL_CMASK_READ_DATA_0xF          = 0x0000009c,
-CB_PERF_SEL_CMASK_WRITE_DATA_0xC         = 0x0000009d,
-CB_PERF_SEL_CMASK_WRITE_DATA_0xD         = 0x0000009e,
-CB_PERF_SEL_CMASK_WRITE_DATA_0xE         = 0x0000009f,
-CB_PERF_SEL_CMASK_WRITE_DATA_0xF         = 0x000000a0,
-CB_PERF_SEL_TWO_PROBE_QUAD_FRAGMENT      = 0x000000a1,
-CB_PERF_SEL_EXPORT_32_ABGR_QUAD_FRAGMENT  = 0x000000a2,
-CB_PERF_SEL_DUAL_SOURCE_COLOR_QUAD_FRAGMENT  = 0x000000a3,
-CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_BEFORE_UPDATE  = 0x000000a4,
-CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_BEFORE_UPDATE  = 0x000000a5,
-CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_BEFORE_UPDATE  = 0x000000a6,
-CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_BEFORE_UPDATE  = 0x000000a7,
-CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_BEFORE_UPDATE  = 0x000000a8,
-CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_BEFORE_UPDATE  = 0x000000a9,
-CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_BEFORE_UPDATE  = 0x000000aa,
-CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_BEFORE_UPDATE  = 0x000000ab,
-CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_AFTER_UPDATE  = 0x000000ac,
-CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_AFTER_UPDATE  = 0x000000ad,
-CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_AFTER_UPDATE  = 0x000000ae,
-CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_AFTER_UPDATE  = 0x000000af,
-CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_AFTER_UPDATE  = 0x000000b0,
-CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_AFTER_UPDATE  = 0x000000b1,
-CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_AFTER_UPDATE  = 0x000000b2,
-CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_AFTER_UPDATE  = 0x000000b3,
-CB_PERF_SEL_QUAD_ADDED_1_FRAGMENT        = 0x000000b4,
-CB_PERF_SEL_QUAD_ADDED_2_FRAGMENTS       = 0x000000b5,
-CB_PERF_SEL_QUAD_ADDED_3_FRAGMENTS       = 0x000000b6,
-CB_PERF_SEL_QUAD_ADDED_4_FRAGMENTS       = 0x000000b7,
-CB_PERF_SEL_QUAD_ADDED_5_FRAGMENTS       = 0x000000b8,
-CB_PERF_SEL_QUAD_ADDED_6_FRAGMENTS       = 0x000000b9,
-CB_PERF_SEL_QUAD_ADDED_7_FRAGMENTS       = 0x000000ba,
-CB_PERF_SEL_QUAD_REMOVED_1_FRAGMENT      = 0x000000bb,
-CB_PERF_SEL_QUAD_REMOVED_2_FRAGMENTS     = 0x000000bc,
-CB_PERF_SEL_QUAD_REMOVED_3_FRAGMENTS     = 0x000000bd,
-CB_PERF_SEL_QUAD_REMOVED_4_FRAGMENTS     = 0x000000be,
-CB_PERF_SEL_QUAD_REMOVED_5_FRAGMENTS     = 0x000000bf,
-CB_PERF_SEL_QUAD_REMOVED_6_FRAGMENTS     = 0x000000c0,
-CB_PERF_SEL_QUAD_REMOVED_7_FRAGMENTS     = 0x000000c1,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_0        = 0x000000c2,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_1        = 0x000000c3,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_2        = 0x000000c4,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_3        = 0x000000c5,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_4        = 0x000000c6,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_5        = 0x000000c7,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_6        = 0x000000c8,
-CB_PERF_SEL_QUAD_READS_FRAGMENT_7        = 0x000000c9,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_0       = 0x000000ca,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_1       = 0x000000cb,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_2       = 0x000000cc,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_3       = 0x000000cd,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_4       = 0x000000ce,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_5       = 0x000000cf,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_6       = 0x000000d0,
-CB_PERF_SEL_QUAD_WRITES_FRAGMENT_7       = 0x000000d1,
-CB_PERF_SEL_QUAD_BLEND_OPT_DONT_READ_DST  = 0x000000d2,
-CB_PERF_SEL_QUAD_BLEND_OPT_BLEND_BYPASS  = 0x000000d3,
-CB_PERF_SEL_QUAD_BLEND_OPT_DISCARD_PIXELS  = 0x000000d4,
-CB_PERF_SEL_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED  = 0x000000d5,
-CB_PERF_SEL_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED  = 0x000000d6,
-CB_PERF_SEL_QUAD_COULD_HAVE_BEEN_DISCARDED  = 0x000000d7,
-CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST  = 0x000000d8,
-CB_PERF_SEL_DRAWN_BUSY                   = 0x000000d9,
-CB_PERF_SEL_TILE_TO_CMR_REGION_BUSY      = 0x000000da,
-CB_PERF_SEL_CMR_TO_FCR_REGION_BUSY       = 0x000000db,
-CB_PERF_SEL_FCR_TO_CCR_REGION_BUSY       = 0x000000dc,
-CB_PERF_SEL_CCR_TO_CCW_REGION_BUSY       = 0x000000dd,
-CB_PERF_SEL_FC_PF_SLOW_MODE_QUAD_EMPTY_HALF_DROPPED  = 0x000000de,
-CB_PERF_SEL_FC_SEQUENCER_CLEAR           = 0x000000df,
-CB_PERF_SEL_FC_SEQUENCER_ELIMINATE_FAST_CLEAR  = 0x000000e0,
-CB_PERF_SEL_FC_SEQUENCER_FMASK_DECOMPRESS  = 0x000000e1,
-CB_PERF_SEL_FC_SEQUENCER_FMASK_COMPRESSION_DISABLE  = 0x000000e2,
-CB_PERF_SEL_FC_KEYID_RDLAT_FIFO_FULL     = 0x000000e3,
-CB_PERF_SEL_FC_DOC_IS_STALLED            = 0x000000e4,
-CB_PERF_SEL_FC_DOC_MRTS_NOT_COMBINED     = 0x000000e5,
-CB_PERF_SEL_FC_DOC_MRTS_COMBINED         = 0x000000e6,
-CB_PERF_SEL_FC_DOC_QTILE_CAM_MISS        = 0x000000e7,
-CB_PERF_SEL_FC_DOC_QTILE_CAM_HIT         = 0x000000e8,
-CB_PERF_SEL_FC_DOC_CLINE_CAM_MISS        = 0x000000e9,
-CB_PERF_SEL_FC_DOC_CLINE_CAM_HIT         = 0x000000ea,
-CB_PERF_SEL_FC_DOC_QUAD_PTR_FIFO_IS_FULL  = 0x000000eb,
-CB_PERF_SEL_FC_DOC_OVERWROTE_1_SECTOR    = 0x000000ec,
-CB_PERF_SEL_FC_DOC_OVERWROTE_2_SECTORS   = 0x000000ed,
-CB_PERF_SEL_FC_DOC_OVERWROTE_3_SECTORS   = 0x000000ee,
-CB_PERF_SEL_FC_DOC_OVERWROTE_4_SECTORS   = 0x000000ef,
-CB_PERF_SEL_FC_DOC_TOTAL_OVERWRITTEN_SECTORS  = 0x000000f0,
-CB_PERF_SEL_FC_DCC_CACHE_HIT             = 0x000000f1,
-CB_PERF_SEL_FC_DCC_CACHE_TAG_MISS        = 0x000000f2,
-CB_PERF_SEL_FC_DCC_CACHE_SECTOR_MISS     = 0x000000f3,
-CB_PERF_SEL_FC_DCC_CACHE_REEVICTION_STALL  = 0x000000f4,
-CB_PERF_SEL_FC_DCC_CACHE_EVICT_NONZERO_INFLIGHT_STALL  = 0x000000f5,
-CB_PERF_SEL_FC_DCC_CACHE_REPLACE_PENDING_EVICT_STALL  = 0x000000f6,
-CB_PERF_SEL_FC_DCC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL  = 0x000000f7,
-CB_PERF_SEL_FC_DCC_CACHE_READ_OUTPUT_STALL  = 0x000000f8,
-CB_PERF_SEL_FC_DCC_CACHE_WRITE_OUTPUT_STALL  = 0x000000f9,
-CB_PERF_SEL_FC_DCC_CACHE_ACK_OUTPUT_STALL  = 0x000000fa,
-CB_PERF_SEL_FC_DCC_CACHE_STALL           = 0x000000fb,
-CB_PERF_SEL_FC_DCC_CACHE_FLUSH           = 0x000000fc,
-CB_PERF_SEL_FC_DCC_CACHE_TAGS_FLUSHED    = 0x000000fd,
-CB_PERF_SEL_FC_DCC_CACHE_SECTORS_FLUSHED  = 0x000000fe,
-CB_PERF_SEL_FC_DCC_CACHE_DIRTY_SECTORS_FLUSHED  = 0x000000ff,
-CB_PERF_SEL_CC_DCC_BEYOND_TILE_SPLIT     = 0x00000100,
-CB_PERF_SEL_FC_MC_DCC_WRITE_REQUEST      = 0x00000101,
-CB_PERF_SEL_FC_MC_DCC_WRITE_REQUESTS_IN_FLIGHT  = 0x00000102,
-CB_PERF_SEL_FC_MC_DCC_READ_REQUEST       = 0x00000103,
-CB_PERF_SEL_FC_MC_DCC_READ_REQUESTS_IN_FLIGHT  = 0x00000104,
-CB_PERF_SEL_CC_DCC_RDREQ_STALL           = 0x00000105,
-CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_IN    = 0x00000106,
-CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_OUT   = 0x00000107,
-CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_IN      = 0x00000108,
-CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_OUT     = 0x00000109,
-CB_PERF_SEL_FC_DCC_KEY_VALUE__CLEAR      = 0x0000010a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__4_BLOCKS__2TO1  = 0x0000010b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO1__1BLOCK_2TO2  = 0x0000010c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_2TO2__1BLOCK_2TO1  = 0x0000010d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__2BLOCKS_2TO1  = 0x0000010e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__3BLOCKS_2TO1  = 0x0000010f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__2BLOCKS_2TO2  = 0x00000110,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__2BLOCKS_2TO2__1BLOCK_2TO1  = 0x00000111,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2  = 0x00000112,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1  = 0x00000113,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__2BLOCKS_2TO1  = 0x00000114,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__2BLOCKS_2TO1__1BLOCK_2TO2  = 0x00000115,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__3BLOCKS_2TO2  = 0x00000116,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__2BLOCKS_2TO2  = 0x00000117,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_2TO1__1BLOCK_2TO2  = 0x00000118,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO2__1BLOCK_2TO1  = 0x00000119,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO1  = 0x0000011a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO2  = 0x0000011b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO3  = 0x0000011c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO4  = 0x0000011d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO1  = 0x0000011e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO2  = 0x0000011f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO3  = 0x00000120,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO4  = 0x00000121,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO1  = 0x00000122,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO2  = 0x00000123,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO3  = 0x00000124,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO4  = 0x00000125,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO1  = 0x00000126,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO2  = 0x00000127,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO3  = 0x00000128,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO1  = 0x00000129,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO2  = 0x0000012a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO3  = 0x0000012b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO4  = 0x0000012c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO1  = 0x0000012d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO2  = 0x0000012e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO3  = 0x0000012f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO4  = 0x00000130,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO1  = 0x00000131,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO2  = 0x00000132,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO3  = 0x00000133,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO4  = 0x00000134,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO1  = 0x00000135,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO2  = 0x00000136,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO3  = 0x00000137,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO1  = 0x00000138,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO1  = 0x00000139,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO1  = 0x0000013a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO1  = 0x0000013b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO1  = 0x0000013c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO1  = 0x0000013d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO1  = 0x0000013e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO4__1BLOCK_2TO1  = 0x0000013f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO2  = 0x00000140,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO2  = 0x00000141,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO2  = 0x00000142,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO2  = 0x00000143,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO2  = 0x00000144,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO2  = 0x00000145,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO2  = 0x00000146,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO1  = 0x00000147,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO1  = 0x00000148,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO1  = 0x00000149,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__2BLOCKS_2TO1  = 0x0000014a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO2  = 0x0000014b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO2  = 0x0000014c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO2  = 0x0000014d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO1__1BLOCK_2TO2  = 0x0000014e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO1__1BLOCK_2TO2  = 0x0000014f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO1__1BLOCK_2TO2  = 0x00000150,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO1__1BLOCK_2TO2  = 0x00000151,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO2__1BLOCK_2TO1  = 0x00000152,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO2__1BLOCK_2TO1  = 0x00000153,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO2__1BLOCK_2TO1  = 0x00000154,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO2__1BLOCK_2TO1  = 0x00000155,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO1  = 0x00000156,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO2  = 0x00000157,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO3  = 0x00000158,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO4  = 0x00000159,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO5  = 0x0000015a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO6  = 0x0000015b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV0  = 0x0000015c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV1  = 0x0000015d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO1  = 0x0000015e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO2  = 0x0000015f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO3  = 0x00000160,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO4  = 0x00000161,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO5  = 0x00000162,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV0  = 0x00000163,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV1  = 0x00000164,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO1  = 0x00000165,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO1  = 0x00000166,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO1  = 0x00000167,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO1  = 0x00000168,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO1  = 0x00000169,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO6__1BLOCK_2TO1  = 0x0000016a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO1  = 0x0000016b,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO1  = 0x0000016c,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO2  = 0x0000016d,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO2  = 0x0000016e,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO2  = 0x0000016f,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO2  = 0x00000170,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO2  = 0x00000171,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO2  = 0x00000172,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO2  = 0x00000173,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO1  = 0x00000174,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO2  = 0x00000175,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO3  = 0x00000176,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO4  = 0x00000177,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO5  = 0x00000178,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO6  = 0x00000179,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO7  = 0x0000017a,
-CB_PERF_SEL_CC_DCC_KEY_VALUE__UNCOMPRESSED  = 0x0000017b,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_2TO1   = 0x0000017c,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO1   = 0x0000017d,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO2   = 0x0000017e,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO3   = 0x0000017f,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO1   = 0x00000180,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO2   = 0x00000181,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO3   = 0x00000182,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO4   = 0x00000183,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO5   = 0x00000184,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO1   = 0x00000185,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO2   = 0x00000186,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO3   = 0x00000187,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO4   = 0x00000188,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO5   = 0x00000189,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO6   = 0x0000018a,
-CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO7   = 0x0000018b,
-CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_BOTH     = 0x0000018c,
-CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_LEFT     = 0x0000018d,
-CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_RIGHT    = 0x0000018e,
-CB_PERF_SEL_RBP_SPLIT_MICROTILE          = 0x0000018f,
-CB_PERF_SEL_RBP_SPLIT_AA_SAMPLE_MASK     = 0x00000190,
-CB_PERF_SEL_RBP_SPLIT_PARTIAL_TARGET_MASK  = 0x00000191,
-CB_PERF_SEL_RBP_SPLIT_LINEAR_ADDRESSING  = 0x00000192,
-CB_PERF_SEL_RBP_SPLIT_AA_NO_FMASK_COMPRESS  = 0x00000193,
-CB_PERF_SEL_RBP_INSERT_MISSING_LAST_QUAD  = 0x00000194,
-} CBPerfSel;
-
-/*
- * CBPerfOpFilterSel enum
- */
-
-typedef enum CBPerfOpFilterSel {
-CB_PERF_OP_FILTER_SEL_WRITE_ONLY         = 0x00000000,
-CB_PERF_OP_FILTER_SEL_NEEDS_DESTINATION  = 0x00000001,
-CB_PERF_OP_FILTER_SEL_RESOLVE            = 0x00000002,
-CB_PERF_OP_FILTER_SEL_DECOMPRESS         = 0x00000003,
-CB_PERF_OP_FILTER_SEL_FMASK_DECOMPRESS   = 0x00000004,
-CB_PERF_OP_FILTER_SEL_ELIMINATE_FAST_CLEAR  = 0x00000005,
-} CBPerfOpFilterSel;
-
-/*
- * CBPerfClearFilterSel enum
- */
-
-typedef enum CBPerfClearFilterSel {
-CB_PERF_CLEAR_FILTER_SEL_NONCLEAR        = 0x00000000,
-CB_PERF_CLEAR_FILTER_SEL_CLEAR           = 0x00000001,
-} CBPerfClearFilterSel;
-
-/*******************************************************
- * TC Enums
- *******************************************************/
-
-/*
- * TC_OP_MASKS enum
- */
-
-typedef enum TC_OP_MASKS {
-TC_OP_MASK_FLUSH_DENROM                  = 0x00000008,
-TC_OP_MASK_64                            = 0x00000020,
-TC_OP_MASK_NO_RTN                        = 0x00000040,
-} TC_OP_MASKS;
-
-/*
- * TC_OP enum
- */
-
-typedef enum TC_OP {
-TC_OP_READ                               = 0x00000000,
-TC_OP_ATOMIC_FCMPSWAP_RTN_32             = 0x00000001,
-TC_OP_ATOMIC_FMIN_RTN_32                 = 0x00000002,
-TC_OP_ATOMIC_FMAX_RTN_32                 = 0x00000003,
-TC_OP_RESERVED_FOP_RTN_32_0              = 0x00000004,
-TC_OP_RESERVED_FOP_RTN_32_1              = 0x00000005,
-TC_OP_RESERVED_FOP_RTN_32_2              = 0x00000006,
-TC_OP_ATOMIC_SWAP_RTN_32                 = 0x00000007,
-TC_OP_ATOMIC_CMPSWAP_RTN_32              = 0x00000008,
-TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_32  = 0x00000009,
-TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_32    = 0x0000000a,
-TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_32    = 0x0000000b,
-TC_OP_PROBE_FILTER                       = 0x0000000c,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_1  = 0x0000000d,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_2  = 0x0000000e,
-TC_OP_ATOMIC_ADD_RTN_32                  = 0x0000000f,
-TC_OP_ATOMIC_SUB_RTN_32                  = 0x00000010,
-TC_OP_ATOMIC_SMIN_RTN_32                 = 0x00000011,
-TC_OP_ATOMIC_UMIN_RTN_32                 = 0x00000012,
-TC_OP_ATOMIC_SMAX_RTN_32                 = 0x00000013,
-TC_OP_ATOMIC_UMAX_RTN_32                 = 0x00000014,
-TC_OP_ATOMIC_AND_RTN_32                  = 0x00000015,
-TC_OP_ATOMIC_OR_RTN_32                   = 0x00000016,
-TC_OP_ATOMIC_XOR_RTN_32                  = 0x00000017,
-TC_OP_ATOMIC_INC_RTN_32                  = 0x00000018,
-TC_OP_ATOMIC_DEC_RTN_32                  = 0x00000019,
-TC_OP_WBINVL1_VOL                        = 0x0000001a,
-TC_OP_WBINVL1_SD                         = 0x0000001b,
-TC_OP_RESERVED_NON_FLOAT_RTN_32_0        = 0x0000001c,
-TC_OP_RESERVED_NON_FLOAT_RTN_32_1        = 0x0000001d,
-TC_OP_RESERVED_NON_FLOAT_RTN_32_2        = 0x0000001e,
-TC_OP_RESERVED_NON_FLOAT_RTN_32_3        = 0x0000001f,
-TC_OP_WRITE                              = 0x00000020,
-TC_OP_ATOMIC_FCMPSWAP_RTN_64             = 0x00000021,
-TC_OP_ATOMIC_FMIN_RTN_64                 = 0x00000022,
-TC_OP_ATOMIC_FMAX_RTN_64                 = 0x00000023,
-TC_OP_RESERVED_FOP_RTN_64_0              = 0x00000024,
-TC_OP_RESERVED_FOP_RTN_64_1              = 0x00000025,
-TC_OP_RESERVED_FOP_RTN_64_2              = 0x00000026,
-TC_OP_ATOMIC_SWAP_RTN_64                 = 0x00000027,
-TC_OP_ATOMIC_CMPSWAP_RTN_64              = 0x00000028,
-TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_64  = 0x00000029,
-TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_64    = 0x0000002a,
-TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_64    = 0x0000002b,
-TC_OP_WBINVL2_SD                         = 0x0000002c,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_0  = 0x0000002d,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_1  = 0x0000002e,
-TC_OP_ATOMIC_ADD_RTN_64                  = 0x0000002f,
-TC_OP_ATOMIC_SUB_RTN_64                  = 0x00000030,
-TC_OP_ATOMIC_SMIN_RTN_64                 = 0x00000031,
-TC_OP_ATOMIC_UMIN_RTN_64                 = 0x00000032,
-TC_OP_ATOMIC_SMAX_RTN_64                 = 0x00000033,
-TC_OP_ATOMIC_UMAX_RTN_64                 = 0x00000034,
-TC_OP_ATOMIC_AND_RTN_64                  = 0x00000035,
-TC_OP_ATOMIC_OR_RTN_64                   = 0x00000036,
-TC_OP_ATOMIC_XOR_RTN_64                  = 0x00000037,
-TC_OP_ATOMIC_INC_RTN_64                  = 0x00000038,
-TC_OP_ATOMIC_DEC_RTN_64                  = 0x00000039,
-TC_OP_WBL2_NC                            = 0x0000003a,
-TC_OP_WBL2_WC                            = 0x0000003b,
-TC_OP_RESERVED_NON_FLOAT_RTN_64_1        = 0x0000003c,
-TC_OP_RESERVED_NON_FLOAT_RTN_64_2        = 0x0000003d,
-TC_OP_RESERVED_NON_FLOAT_RTN_64_3        = 0x0000003e,
-TC_OP_RESERVED_NON_FLOAT_RTN_64_4        = 0x0000003f,
-TC_OP_WBINVL1                            = 0x00000040,
-TC_OP_ATOMIC_FCMPSWAP_32                 = 0x00000041,
-TC_OP_ATOMIC_FMIN_32                     = 0x00000042,
-TC_OP_ATOMIC_FMAX_32                     = 0x00000043,
-TC_OP_RESERVED_FOP_32_0                  = 0x00000044,
-TC_OP_RESERVED_FOP_32_1                  = 0x00000045,
-TC_OP_RESERVED_FOP_32_2                  = 0x00000046,
-TC_OP_ATOMIC_SWAP_32                     = 0x00000047,
-TC_OP_ATOMIC_CMPSWAP_32                  = 0x00000048,
-TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_32    = 0x00000049,
-TC_OP_ATOMIC_FMIN_FLUSH_DENORM_32        = 0x0000004a,
-TC_OP_ATOMIC_FMAX_FLUSH_DENORM_32        = 0x0000004b,
-TC_OP_INV_METADATA                       = 0x0000004c,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_32_1     = 0x0000004d,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_32_2     = 0x0000004e,
-TC_OP_ATOMIC_ADD_32                      = 0x0000004f,
-TC_OP_ATOMIC_SUB_32                      = 0x00000050,
-TC_OP_ATOMIC_SMIN_32                     = 0x00000051,
-TC_OP_ATOMIC_UMIN_32                     = 0x00000052,
-TC_OP_ATOMIC_SMAX_32                     = 0x00000053,
-TC_OP_ATOMIC_UMAX_32                     = 0x00000054,
-TC_OP_ATOMIC_AND_32                      = 0x00000055,
-TC_OP_ATOMIC_OR_32                       = 0x00000056,
-TC_OP_ATOMIC_XOR_32                      = 0x00000057,
-TC_OP_ATOMIC_INC_32                      = 0x00000058,
-TC_OP_ATOMIC_DEC_32                      = 0x00000059,
-TC_OP_INVL2_NC                           = 0x0000005a,
-TC_OP_NOP_RTN0                           = 0x0000005b,
-TC_OP_RESERVED_NON_FLOAT_32_1            = 0x0000005c,
-TC_OP_RESERVED_NON_FLOAT_32_2            = 0x0000005d,
-TC_OP_RESERVED_NON_FLOAT_32_3            = 0x0000005e,
-TC_OP_RESERVED_NON_FLOAT_32_4            = 0x0000005f,
-TC_OP_WBINVL2                            = 0x00000060,
-TC_OP_ATOMIC_FCMPSWAP_64                 = 0x00000061,
-TC_OP_ATOMIC_FMIN_64                     = 0x00000062,
-TC_OP_ATOMIC_FMAX_64                     = 0x00000063,
-TC_OP_RESERVED_FOP_64_0                  = 0x00000064,
-TC_OP_RESERVED_FOP_64_1                  = 0x00000065,
-TC_OP_RESERVED_FOP_64_2                  = 0x00000066,
-TC_OP_ATOMIC_SWAP_64                     = 0x00000067,
-TC_OP_ATOMIC_CMPSWAP_64                  = 0x00000068,
-TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_64    = 0x00000069,
-TC_OP_ATOMIC_FMIN_FLUSH_DENORM_64        = 0x0000006a,
-TC_OP_ATOMIC_FMAX_FLUSH_DENORM_64        = 0x0000006b,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_64_0     = 0x0000006c,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_64_1     = 0x0000006d,
-TC_OP_RESERVED_FOP_FLUSH_DENORM_64_2     = 0x0000006e,
-TC_OP_ATOMIC_ADD_64                      = 0x0000006f,
-TC_OP_ATOMIC_SUB_64                      = 0x00000070,
-TC_OP_ATOMIC_SMIN_64                     = 0x00000071,
-TC_OP_ATOMIC_UMIN_64                     = 0x00000072,
-TC_OP_ATOMIC_SMAX_64                     = 0x00000073,
-TC_OP_ATOMIC_UMAX_64                     = 0x00000074,
-TC_OP_ATOMIC_AND_64                      = 0x00000075,
-TC_OP_ATOMIC_OR_64                       = 0x00000076,
-TC_OP_ATOMIC_XOR_64                      = 0x00000077,
-TC_OP_ATOMIC_INC_64                      = 0x00000078,
-TC_OP_ATOMIC_DEC_64                      = 0x00000079,
-TC_OP_WBINVL2_NC                         = 0x0000007a,
-TC_OP_NOP_ACK                            = 0x0000007b,
-TC_OP_RESERVED_NON_FLOAT_64_1            = 0x0000007c,
-TC_OP_RESERVED_NON_FLOAT_64_2            = 0x0000007d,
-TC_OP_RESERVED_NON_FLOAT_64_3            = 0x0000007e,
-TC_OP_RESERVED_NON_FLOAT_64_4            = 0x0000007f,
-} TC_OP;
-
-/*
- * TC_CHUB_REQ_CREDITS_ENUM enum
- */
-
-typedef enum TC_CHUB_REQ_CREDITS_ENUM {
-TC_CHUB_REQ_CREDITS                      = 0x00000010,
-} TC_CHUB_REQ_CREDITS_ENUM;
-
-/*
- * CHUB_TC_RET_CREDITS_ENUM enum
- */
-
-typedef enum CHUB_TC_RET_CREDITS_ENUM {
-CHUB_TC_RET_CREDITS                      = 0x00000020,
-} CHUB_TC_RET_CREDITS_ENUM;
-
-/*
- * TC_NACKS enum
- */
-
-typedef enum TC_NACKS {
-TC_NACK_NO_FAULT                         = 0x00000000,
-TC_NACK_PAGE_FAULT                       = 0x00000001,
-TC_NACK_PROTECTION_FAULT                 = 0x00000002,
-TC_NACK_DATA_ERROR                       = 0x00000003,
-} TC_NACKS;
-
-/*
- * TC_EA_CID enum
- */
-
-typedef enum TC_EA_CID {
-TC_EA_CID_RT                             = 0x00000000,
-TC_EA_CID_FMASK                          = 0x00000001,
-TC_EA_CID_DCC                            = 0x00000002,
-TC_EA_CID_TCPMETA                        = 0x00000003,
-TC_EA_CID_Z                              = 0x00000004,
-TC_EA_CID_STENCIL                        = 0x00000005,
-TC_EA_CID_HTILE                          = 0x00000006,
-TC_EA_CID_MISC                           = 0x00000007,
-TC_EA_CID_TCP                            = 0x00000008,
-TC_EA_CID_SQC                            = 0x00000009,
-TC_EA_CID_CPF                            = 0x0000000a,
-TC_EA_CID_CPG                            = 0x0000000b,
-TC_EA_CID_IA                             = 0x0000000c,
-TC_EA_CID_WD                             = 0x0000000d,
-TC_EA_CID_PA                             = 0x0000000e,
-TC_EA_CID_UTCL2_TPI                      = 0x0000000f,
-} TC_EA_CID;
-
-/*******************************************************
- * GC_CAC Enums
- *******************************************************/
-
-/*******************************************************
- * RLC Enums
- *******************************************************/
-
-/*******************************************************
- * SPI Enums
- *******************************************************/
-
-/*
- * SPI_SAMPLE_CNTL enum
- */
-
-typedef enum SPI_SAMPLE_CNTL {
-CENTROIDS_ONLY                           = 0x00000000,
-CENTERS_ONLY                             = 0x00000001,
-CENTROIDS_AND_CENTERS                    = 0x00000002,
-UNDEF                                    = 0x00000003,
-} SPI_SAMPLE_CNTL;
-
-/*
- * SPI_FOG_MODE enum
- */
-
-typedef enum SPI_FOG_MODE {
-SPI_FOG_NONE                             = 0x00000000,
-SPI_FOG_EXP                              = 0x00000001,
-SPI_FOG_EXP2                             = 0x00000002,
-SPI_FOG_LINEAR                           = 0x00000003,
-} SPI_FOG_MODE;
-
-/*
- * SPI_PNT_SPRITE_OVERRIDE enum
- */
-
-typedef enum SPI_PNT_SPRITE_OVERRIDE {
-SPI_PNT_SPRITE_SEL_0                     = 0x00000000,
-SPI_PNT_SPRITE_SEL_1                     = 0x00000001,
-SPI_PNT_SPRITE_SEL_S                     = 0x00000002,
-SPI_PNT_SPRITE_SEL_T                     = 0x00000003,
-SPI_PNT_SPRITE_SEL_NONE                  = 0x00000004,
-} SPI_PNT_SPRITE_OVERRIDE;
-
-/*
- * SPI_PERFCNT_SEL enum
- */
-
-typedef enum SPI_PERFCNT_SEL {
-SPI_PERF_VS_WINDOW_VALID                 = 0x00000000,
-SPI_PERF_VS_BUSY                         = 0x00000001,
-SPI_PERF_VS_FIRST_WAVE                   = 0x00000002,
-SPI_PERF_VS_LAST_WAVE                    = 0x00000003,
-SPI_PERF_VS_LSHS_DEALLOC                 = 0x00000004,
-SPI_PERF_VS_PC_STALL                     = 0x00000005,
-SPI_PERF_VS_POS0_STALL                   = 0x00000006,
-SPI_PERF_VS_POS1_STALL                   = 0x00000007,
-SPI_PERF_VS_CRAWLER_STALL                = 0x00000008,
-SPI_PERF_VS_EVENT_WAVE                   = 0x00000009,
-SPI_PERF_VS_WAVE                         = 0x0000000a,
-SPI_PERF_VS_PERS_UPD_FULL0               = 0x0000000b,
-SPI_PERF_VS_PERS_UPD_FULL1               = 0x0000000c,
-SPI_PERF_VS_LATE_ALLOC_FULL              = 0x0000000d,
-SPI_PERF_VS_FIRST_SUBGRP                 = 0x0000000e,
-SPI_PERF_VS_LAST_SUBGRP                  = 0x0000000f,
-SPI_PERF_GS_WINDOW_VALID                 = 0x00000010,
-SPI_PERF_GS_BUSY                         = 0x00000011,
-SPI_PERF_GS_CRAWLER_STALL                = 0x00000012,
-SPI_PERF_GS_EVENT_WAVE                   = 0x00000013,
-SPI_PERF_GS_WAVE                         = 0x00000014,
-SPI_PERF_GS_PERS_UPD_FULL0               = 0x00000015,
-SPI_PERF_GS_PERS_UPD_FULL1               = 0x00000016,
-SPI_PERF_GS_FIRST_SUBGRP                 = 0x00000017,
-SPI_PERF_GS_LAST_SUBGRP                  = 0x00000018,
-SPI_PERF_ES_WINDOW_VALID                 = 0x00000019,
-SPI_PERF_ES_BUSY                         = 0x0000001a,
-SPI_PERF_ES_CRAWLER_STALL                = 0x0000001b,
-SPI_PERF_ES_FIRST_WAVE                   = 0x0000001c,
-SPI_PERF_ES_LAST_WAVE                    = 0x0000001d,
-SPI_PERF_ES_LSHS_DEALLOC                 = 0x0000001e,
-SPI_PERF_ES_EVENT_WAVE                   = 0x0000001f,
-SPI_PERF_ES_WAVE                         = 0x00000020,
-SPI_PERF_ES_PERS_UPD_FULL0               = 0x00000021,
-SPI_PERF_ES_PERS_UPD_FULL1               = 0x00000022,
-SPI_PERF_ES_FIRST_SUBGRP                 = 0x00000023,
-SPI_PERF_ES_LAST_SUBGRP                  = 0x00000024,
-SPI_PERF_HS_WINDOW_VALID                 = 0x00000025,
-SPI_PERF_HS_BUSY                         = 0x00000026,
-SPI_PERF_HS_CRAWLER_STALL                = 0x00000027,
-SPI_PERF_HS_FIRST_WAVE                   = 0x00000028,
-SPI_PERF_HS_LAST_WAVE                    = 0x00000029,
-SPI_PERF_HS_LSHS_DEALLOC                 = 0x0000002a,
-SPI_PERF_HS_EVENT_WAVE                   = 0x0000002b,
-SPI_PERF_HS_WAVE                         = 0x0000002c,
-SPI_PERF_HS_PERS_UPD_FULL0               = 0x0000002d,
-SPI_PERF_HS_PERS_UPD_FULL1               = 0x0000002e,
-SPI_PERF_LS_WINDOW_VALID                 = 0x0000002f,
-SPI_PERF_LS_BUSY                         = 0x00000030,
-SPI_PERF_LS_CRAWLER_STALL                = 0x00000031,
-SPI_PERF_LS_FIRST_WAVE                   = 0x00000032,
-SPI_PERF_LS_LAST_WAVE                    = 0x00000033,
-SPI_PERF_OFFCHIP_LDS_STALL_LS            = 0x00000034,
-SPI_PERF_LS_EVENT_WAVE                   = 0x00000035,
-SPI_PERF_LS_WAVE                         = 0x00000036,
-SPI_PERF_LS_PERS_UPD_FULL0               = 0x00000037,
-SPI_PERF_LS_PERS_UPD_FULL1               = 0x00000038,
-SPI_PERF_CSG_WINDOW_VALID                = 0x00000039,
-SPI_PERF_CSG_BUSY                        = 0x0000003a,
-SPI_PERF_CSG_NUM_THREADGROUPS            = 0x0000003b,
-SPI_PERF_CSG_CRAWLER_STALL               = 0x0000003c,
-SPI_PERF_CSG_EVENT_WAVE                  = 0x0000003d,
-SPI_PERF_CSG_WAVE                        = 0x0000003e,
-SPI_PERF_CSN_WINDOW_VALID                = 0x0000003f,
-SPI_PERF_CSN_BUSY                        = 0x00000040,
-SPI_PERF_CSN_NUM_THREADGROUPS            = 0x00000041,
-SPI_PERF_CSN_CRAWLER_STALL               = 0x00000042,
-SPI_PERF_CSN_EVENT_WAVE                  = 0x00000043,
-SPI_PERF_CSN_WAVE                        = 0x00000044,
-SPI_PERF_PS_CTL_WINDOW_VALID             = 0x00000045,
-SPI_PERF_PS_CTL_BUSY                     = 0x00000046,
-SPI_PERF_PS_CTL_ACTIVE                   = 0x00000047,
-SPI_PERF_PS_CTL_DEALLOC_BIN0             = 0x00000048,
-SPI_PERF_PS_CTL_FPOS_BIN1_STALL          = 0x00000049,
-SPI_PERF_PS_CTL_EVENT_WAVE               = 0x0000004a,
-SPI_PERF_PS_CTL_WAVE                     = 0x0000004b,
-SPI_PERF_PS_CTL_OPT_WAVE                 = 0x0000004c,
-SPI_PERF_PS_CTL_PASS_BIN0                = 0x0000004d,
-SPI_PERF_PS_CTL_PASS_BIN1                = 0x0000004e,
-SPI_PERF_PS_CTL_FPOS_BIN2                = 0x0000004f,
-SPI_PERF_PS_CTL_PRIM_BIN0                = 0x00000050,
-SPI_PERF_PS_CTL_PRIM_BIN1                = 0x00000051,
-SPI_PERF_PS_CTL_CNF_BIN2                 = 0x00000052,
-SPI_PERF_PS_CTL_CNF_BIN3                 = 0x00000053,
-SPI_PERF_PS_CTL_CRAWLER_STALL            = 0x00000054,
-SPI_PERF_PS_CTL_LDS_RES_FULL             = 0x00000055,
-SPI_PERF_PS_PERS_UPD_FULL0               = 0x00000056,
-SPI_PERF_PS_PERS_UPD_FULL1               = 0x00000057,
-SPI_PERF_PIX_ALLOC_PEND_CNT              = 0x00000058,
-SPI_PERF_PIX_ALLOC_SCB_STALL             = 0x00000059,
-SPI_PERF_PIX_ALLOC_DB0_STALL             = 0x0000005a,
-SPI_PERF_PIX_ALLOC_DB1_STALL             = 0x0000005b,
-SPI_PERF_PIX_ALLOC_DB2_STALL             = 0x0000005c,
-SPI_PERF_PIX_ALLOC_DB3_STALL             = 0x0000005d,
-SPI_PERF_LDS0_PC_VALID                   = 0x0000005e,
-SPI_PERF_LDS1_PC_VALID                   = 0x0000005f,
-SPI_PERF_RA_PIPE_REQ_BIN2                = 0x00000060,
-SPI_PERF_RA_TASK_REQ_BIN3                = 0x00000061,
-SPI_PERF_RA_WR_CTL_FULL                  = 0x00000062,
-SPI_PERF_RA_REQ_NO_ALLOC                 = 0x00000063,
-SPI_PERF_RA_REQ_NO_ALLOC_PS              = 0x00000064,
-SPI_PERF_RA_REQ_NO_ALLOC_VS              = 0x00000065,
-SPI_PERF_RA_REQ_NO_ALLOC_GS              = 0x00000066,
-SPI_PERF_RA_REQ_NO_ALLOC_ES              = 0x00000067,
-SPI_PERF_RA_REQ_NO_ALLOC_HS              = 0x00000068,
-SPI_PERF_RA_REQ_NO_ALLOC_LS              = 0x00000069,
-SPI_PERF_RA_REQ_NO_ALLOC_CSG             = 0x0000006a,
-SPI_PERF_RA_REQ_NO_ALLOC_CSN             = 0x0000006b,
-SPI_PERF_RA_RES_STALL_PS                 = 0x0000006c,
-SPI_PERF_RA_RES_STALL_VS                 = 0x0000006d,
-SPI_PERF_RA_RES_STALL_GS                 = 0x0000006e,
-SPI_PERF_RA_RES_STALL_ES                 = 0x0000006f,
-SPI_PERF_RA_RES_STALL_HS                 = 0x00000070,
-SPI_PERF_RA_RES_STALL_LS                 = 0x00000071,
-SPI_PERF_RA_RES_STALL_CSG                = 0x00000072,
-SPI_PERF_RA_RES_STALL_CSN                = 0x00000073,
-SPI_PERF_RA_TMP_STALL_PS                 = 0x00000074,
-SPI_PERF_RA_TMP_STALL_VS                 = 0x00000075,
-SPI_PERF_RA_TMP_STALL_GS                 = 0x00000076,
-SPI_PERF_RA_TMP_STALL_ES                 = 0x00000077,
-SPI_PERF_RA_TMP_STALL_HS                 = 0x00000078,
-SPI_PERF_RA_TMP_STALL_LS                 = 0x00000079,
-SPI_PERF_RA_TMP_STALL_CSG                = 0x0000007a,
-SPI_PERF_RA_TMP_STALL_CSN                = 0x0000007b,
-SPI_PERF_RA_WAVE_SIMD_FULL_PS            = 0x0000007c,
-SPI_PERF_RA_WAVE_SIMD_FULL_VS            = 0x0000007d,
-SPI_PERF_RA_WAVE_SIMD_FULL_GS            = 0x0000007e,
-SPI_PERF_RA_WAVE_SIMD_FULL_ES            = 0x0000007f,
-SPI_PERF_RA_WAVE_SIMD_FULL_HS            = 0x00000080,
-SPI_PERF_RA_WAVE_SIMD_FULL_LS            = 0x00000081,
-SPI_PERF_RA_WAVE_SIMD_FULL_CSG           = 0x00000082,
-SPI_PERF_RA_WAVE_SIMD_FULL_CSN           = 0x00000083,
-SPI_PERF_RA_VGPR_SIMD_FULL_PS            = 0x00000084,
-SPI_PERF_RA_VGPR_SIMD_FULL_VS            = 0x00000085,
-SPI_PERF_RA_VGPR_SIMD_FULL_GS            = 0x00000086,
-SPI_PERF_RA_VGPR_SIMD_FULL_ES            = 0x00000087,
-SPI_PERF_RA_VGPR_SIMD_FULL_HS            = 0x00000088,
-SPI_PERF_RA_VGPR_SIMD_FULL_LS            = 0x00000089,
-SPI_PERF_RA_VGPR_SIMD_FULL_CSG           = 0x0000008a,
-SPI_PERF_RA_VGPR_SIMD_FULL_CSN           = 0x0000008b,
-SPI_PERF_RA_SGPR_SIMD_FULL_PS            = 0x0000008c,
-SPI_PERF_RA_SGPR_SIMD_FULL_VS            = 0x0000008d,
-SPI_PERF_RA_SGPR_SIMD_FULL_GS            = 0x0000008e,
-SPI_PERF_RA_SGPR_SIMD_FULL_ES            = 0x0000008f,
-SPI_PERF_RA_SGPR_SIMD_FULL_HS            = 0x00000090,
-SPI_PERF_RA_SGPR_SIMD_FULL_LS            = 0x00000091,
-SPI_PERF_RA_SGPR_SIMD_FULL_CSG           = 0x00000092,
-SPI_PERF_RA_SGPR_SIMD_FULL_CSN           = 0x00000093,
-SPI_PERF_RA_LDS_CU_FULL_PS               = 0x00000094,
-SPI_PERF_RA_LDS_CU_FULL_LS               = 0x00000095,
-SPI_PERF_RA_LDS_CU_FULL_ES               = 0x00000096,
-SPI_PERF_RA_LDS_CU_FULL_CSG              = 0x00000097,
-SPI_PERF_RA_LDS_CU_FULL_CSN              = 0x00000098,
-SPI_PERF_RA_BAR_CU_FULL_HS               = 0x00000099,
-SPI_PERF_RA_BAR_CU_FULL_CSG              = 0x0000009a,
-SPI_PERF_RA_BAR_CU_FULL_CSN              = 0x0000009b,
-SPI_PERF_RA_BULKY_CU_FULL_CSG            = 0x0000009c,
-SPI_PERF_RA_BULKY_CU_FULL_CSN            = 0x0000009d,
-SPI_PERF_RA_TGLIM_CU_FULL_CSG            = 0x0000009e,
-SPI_PERF_RA_TGLIM_CU_FULL_CSN            = 0x0000009f,
-SPI_PERF_RA_WVLIM_STALL_PS               = 0x000000a0,
-SPI_PERF_RA_WVLIM_STALL_VS               = 0x000000a1,
-SPI_PERF_RA_WVLIM_STALL_GS               = 0x000000a2,
-SPI_PERF_RA_WVLIM_STALL_ES               = 0x000000a3,
-SPI_PERF_RA_WVLIM_STALL_HS               = 0x000000a4,
-SPI_PERF_RA_WVLIM_STALL_LS               = 0x000000a5,
-SPI_PERF_RA_WVLIM_STALL_CSG              = 0x000000a6,
-SPI_PERF_RA_WVLIM_STALL_CSN              = 0x000000a7,
-SPI_PERF_RA_PS_LOCK_NA                   = 0x000000a8,
-SPI_PERF_RA_VS_LOCK                      = 0x000000a9,
-SPI_PERF_RA_GS_LOCK                      = 0x000000aa,
-SPI_PERF_RA_ES_LOCK                      = 0x000000ab,
-SPI_PERF_RA_HS_LOCK                      = 0x000000ac,
-SPI_PERF_RA_LS_LOCK                      = 0x000000ad,
-SPI_PERF_RA_CSG_LOCK                     = 0x000000ae,
-SPI_PERF_RA_CSN_LOCK                     = 0x000000af,
-SPI_PERF_RA_RSV_UPD                      = 0x000000b0,
-SPI_PERF_EXP_ARB_COL_CNT                 = 0x000000b1,
-SPI_PERF_EXP_ARB_PAR_CNT                 = 0x000000b2,
-SPI_PERF_EXP_ARB_POS_CNT                 = 0x000000b3,
-SPI_PERF_EXP_ARB_GDS_CNT                 = 0x000000b4,
-SPI_PERF_CLKGATE_BUSY_STALL              = 0x000000b5,
-SPI_PERF_CLKGATE_ACTIVE_STALL            = 0x000000b6,
-SPI_PERF_CLKGATE_ALL_CLOCKS_ON           = 0x000000b7,
-SPI_PERF_CLKGATE_CGTT_DYN_ON             = 0x000000b8,
-SPI_PERF_CLKGATE_CGTT_REG_ON             = 0x000000b9,
-SPI_PERF_NUM_VS_POS_EXPORTS              = 0x000000ba,
-SPI_PERF_NUM_VS_PARAM_EXPORTS            = 0x000000bb,
-SPI_PERF_NUM_PS_COL_EXPORTS              = 0x000000bc,
-SPI_PERF_ES_GRP_FIFO_FULL                = 0x000000bd,
-SPI_PERF_GS_GRP_FIFO_FULL                = 0x000000be,
-SPI_PERF_HS_GRP_FIFO_FULL                = 0x000000bf,
-SPI_PERF_LS_GRP_FIFO_FULL                = 0x000000c0,
-SPI_PERF_VS_ALLOC_CNT                    = 0x000000c1,
-SPI_PERF_VS_LATE_ALLOC_ACCUM             = 0x000000c2,
-SPI_PERF_PC_ALLOC_CNT                    = 0x000000c3,
-SPI_PERF_PC_ALLOC_ACCUM                  = 0x000000c4,
-} SPI_PERFCNT_SEL;
-
-/*
- * SPI_SHADER_FORMAT enum
- */
-
-typedef enum SPI_SHADER_FORMAT {
-SPI_SHADER_NONE                          = 0x00000000,
-SPI_SHADER_1COMP                         = 0x00000001,
-SPI_SHADER_2COMP                         = 0x00000002,
-SPI_SHADER_4COMPRESS                     = 0x00000003,
-SPI_SHADER_4COMP                         = 0x00000004,
-} SPI_SHADER_FORMAT;
-
-/*
- * SPI_SHADER_EX_FORMAT enum
- */
-
-typedef enum SPI_SHADER_EX_FORMAT {
-SPI_SHADER_ZERO                          = 0x00000000,
-SPI_SHADER_32_R                          = 0x00000001,
-SPI_SHADER_32_GR                         = 0x00000002,
-SPI_SHADER_32_AR                         = 0x00000003,
-SPI_SHADER_FP16_ABGR                     = 0x00000004,
-SPI_SHADER_UNORM16_ABGR                  = 0x00000005,
-SPI_SHADER_SNORM16_ABGR                  = 0x00000006,
-SPI_SHADER_UINT16_ABGR                   = 0x00000007,
-SPI_SHADER_SINT16_ABGR                   = 0x00000008,
-SPI_SHADER_32_ABGR                       = 0x00000009,
-} SPI_SHADER_EX_FORMAT;
-
-/*
- * CLKGATE_SM_MODE enum
- */
-
-typedef enum CLKGATE_SM_MODE {
-ON_SEQ                                   = 0x00000000,
-OFF_SEQ                                  = 0x00000001,
-PROG_SEQ                                 = 0x00000002,
-READ_SEQ                                 = 0x00000003,
-SM_MODE_RESERVED                         = 0x00000004,
-} CLKGATE_SM_MODE;
-
-/*
- * CLKGATE_BASE_MODE enum
- */
-
-typedef enum CLKGATE_BASE_MODE {
-MULT_8                                   = 0x00000000,
-MULT_16                                  = 0x00000001,
-} CLKGATE_BASE_MODE;
-
-/*******************************************************
- * SQ Enums
- *******************************************************/
-
-/*
- * SQ_TEX_CLAMP enum
- */
-
-typedef enum SQ_TEX_CLAMP {
-SQ_TEX_WRAP                              = 0x00000000,
-SQ_TEX_MIRROR                            = 0x00000001,
-SQ_TEX_CLAMP_LAST_TEXEL                  = 0x00000002,
-SQ_TEX_MIRROR_ONCE_LAST_TEXEL            = 0x00000003,
-SQ_TEX_CLAMP_HALF_BORDER                 = 0x00000004,
-SQ_TEX_MIRROR_ONCE_HALF_BORDER           = 0x00000005,
-SQ_TEX_CLAMP_BORDER                      = 0x00000006,
-SQ_TEX_MIRROR_ONCE_BORDER                = 0x00000007,
-} SQ_TEX_CLAMP;
-
-/*
- * SQ_TEX_XY_FILTER enum
- */
-
-typedef enum SQ_TEX_XY_FILTER {
-SQ_TEX_XY_FILTER_POINT                   = 0x00000000,
-SQ_TEX_XY_FILTER_BILINEAR                = 0x00000001,
-SQ_TEX_XY_FILTER_ANISO_POINT             = 0x00000002,
-SQ_TEX_XY_FILTER_ANISO_BILINEAR          = 0x00000003,
-} SQ_TEX_XY_FILTER;
-
-/*
- * SQ_TEX_Z_FILTER enum
- */
-
-typedef enum SQ_TEX_Z_FILTER {
-SQ_TEX_Z_FILTER_NONE                     = 0x00000000,
-SQ_TEX_Z_FILTER_POINT                    = 0x00000001,
-SQ_TEX_Z_FILTER_LINEAR                   = 0x00000002,
-} SQ_TEX_Z_FILTER;
-
-/*
- * SQ_TEX_MIP_FILTER enum
- */
-
-typedef enum SQ_TEX_MIP_FILTER {
-SQ_TEX_MIP_FILTER_NONE                   = 0x00000000,
-SQ_TEX_MIP_FILTER_POINT                  = 0x00000001,
-SQ_TEX_MIP_FILTER_LINEAR                 = 0x00000002,
-SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ        = 0x00000003,
-} SQ_TEX_MIP_FILTER;
-
-/*
- * SQ_TEX_ANISO_RATIO enum
- */
-
-typedef enum SQ_TEX_ANISO_RATIO {
-SQ_TEX_ANISO_RATIO_1                     = 0x00000000,
-SQ_TEX_ANISO_RATIO_2                     = 0x00000001,
-SQ_TEX_ANISO_RATIO_4                     = 0x00000002,
-SQ_TEX_ANISO_RATIO_8                     = 0x00000003,
-SQ_TEX_ANISO_RATIO_16                    = 0x00000004,
-} SQ_TEX_ANISO_RATIO;
-
-/*
- * SQ_TEX_DEPTH_COMPARE enum
- */
-
-typedef enum SQ_TEX_DEPTH_COMPARE {
-SQ_TEX_DEPTH_COMPARE_NEVER               = 0x00000000,
-SQ_TEX_DEPTH_COMPARE_LESS                = 0x00000001,
-SQ_TEX_DEPTH_COMPARE_EQUAL               = 0x00000002,
-SQ_TEX_DEPTH_COMPARE_LESSEQUAL           = 0x00000003,
-SQ_TEX_DEPTH_COMPARE_GREATER             = 0x00000004,
-SQ_TEX_DEPTH_COMPARE_NOTEQUAL            = 0x00000005,
-SQ_TEX_DEPTH_COMPARE_GREATEREQUAL        = 0x00000006,
-SQ_TEX_DEPTH_COMPARE_ALWAYS              = 0x00000007,
-} SQ_TEX_DEPTH_COMPARE;
-
-/*
- * SQ_TEX_BORDER_COLOR enum
- */
-
-typedef enum SQ_TEX_BORDER_COLOR {
-SQ_TEX_BORDER_COLOR_TRANS_BLACK          = 0x00000000,
-SQ_TEX_BORDER_COLOR_OPAQUE_BLACK         = 0x00000001,
-SQ_TEX_BORDER_COLOR_OPAQUE_WHITE         = 0x00000002,
-SQ_TEX_BORDER_COLOR_REGISTER             = 0x00000003,
-} SQ_TEX_BORDER_COLOR;
-
-/*
- * SQ_RSRC_BUF_TYPE enum
- */
-
-typedef enum SQ_RSRC_BUF_TYPE {
-SQ_RSRC_BUF                              = 0x00000000,
-SQ_RSRC_BUF_RSVD_1                       = 0x00000001,
-SQ_RSRC_BUF_RSVD_2                       = 0x00000002,
-SQ_RSRC_BUF_RSVD_3                       = 0x00000003,
-} SQ_RSRC_BUF_TYPE;
-
-/*
- * SQ_RSRC_IMG_TYPE enum
- */
-
-typedef enum SQ_RSRC_IMG_TYPE {
-SQ_RSRC_IMG_RSVD_0                       = 0x00000000,
-SQ_RSRC_IMG_RSVD_1                       = 0x00000001,
-SQ_RSRC_IMG_RSVD_2                       = 0x00000002,
-SQ_RSRC_IMG_RSVD_3                       = 0x00000003,
-SQ_RSRC_IMG_RSVD_4                       = 0x00000004,
-SQ_RSRC_IMG_RSVD_5                       = 0x00000005,
-SQ_RSRC_IMG_RSVD_6                       = 0x00000006,
-SQ_RSRC_IMG_RSVD_7                       = 0x00000007,
-SQ_RSRC_IMG_1D                           = 0x00000008,
-SQ_RSRC_IMG_2D                           = 0x00000009,
-SQ_RSRC_IMG_3D                           = 0x0000000a,
-SQ_RSRC_IMG_CUBE                         = 0x0000000b,
-SQ_RSRC_IMG_1D_ARRAY                     = 0x0000000c,
-SQ_RSRC_IMG_2D_ARRAY                     = 0x0000000d,
-SQ_RSRC_IMG_2D_MSAA                      = 0x0000000e,
-SQ_RSRC_IMG_2D_MSAA_ARRAY                = 0x0000000f,
-} SQ_RSRC_IMG_TYPE;
-
-/*
- * SQ_RSRC_FLAT_TYPE enum
- */
-
-typedef enum SQ_RSRC_FLAT_TYPE {
-SQ_RSRC_FLAT_RSVD_0                      = 0x00000000,
-SQ_RSRC_FLAT                             = 0x00000001,
-SQ_RSRC_FLAT_RSVD_2                      = 0x00000002,
-SQ_RSRC_FLAT_RSVD_3                      = 0x00000003,
-} SQ_RSRC_FLAT_TYPE;
-
-/*
- * SQ_IMG_FILTER_TYPE enum
- */
-
-typedef enum SQ_IMG_FILTER_TYPE {
-SQ_IMG_FILTER_MODE_BLEND                 = 0x00000000,
-SQ_IMG_FILTER_MODE_MIN                   = 0x00000001,
-SQ_IMG_FILTER_MODE_MAX                   = 0x00000002,
-} SQ_IMG_FILTER_TYPE;
-
-/*
- * SQ_SEL_XYZW01 enum
- */
-
-typedef enum SQ_SEL_XYZW01 {
-SQ_SEL_0                                 = 0x00000000,
-SQ_SEL_1                                 = 0x00000001,
-SQ_SEL_RESERVED_0                        = 0x00000002,
-SQ_SEL_RESERVED_1                        = 0x00000003,
-SQ_SEL_X                                 = 0x00000004,
-SQ_SEL_Y                                 = 0x00000005,
-SQ_SEL_Z                                 = 0x00000006,
-SQ_SEL_W                                 = 0x00000007,
-} SQ_SEL_XYZW01;
-
-/*
- * SQ_WAVE_TYPE enum
- */
-
-typedef enum SQ_WAVE_TYPE {
-SQ_WAVE_TYPE_PS                          = 0x00000000,
-SQ_WAVE_TYPE_VS                          = 0x00000001,
-SQ_WAVE_TYPE_GS                          = 0x00000002,
-SQ_WAVE_TYPE_ES                          = 0x00000003,
-SQ_WAVE_TYPE_HS                          = 0x00000004,
-SQ_WAVE_TYPE_LS                          = 0x00000005,
-SQ_WAVE_TYPE_CS                          = 0x00000006,
-SQ_WAVE_TYPE_PS1                         = 0x00000007,
-} SQ_WAVE_TYPE;
-
-/*
- * SQ_THREAD_TRACE_TOKEN_TYPE enum
- */
-
-typedef enum SQ_THREAD_TRACE_TOKEN_TYPE {
-SQ_THREAD_TRACE_TOKEN_MISC               = 0x00000000,
-SQ_THREAD_TRACE_TOKEN_TIMESTAMP          = 0x00000001,
-SQ_THREAD_TRACE_TOKEN_REG                = 0x00000002,
-SQ_THREAD_TRACE_TOKEN_WAVE_START         = 0x00000003,
-SQ_THREAD_TRACE_TOKEN_WAVE_ALLOC         = 0x00000004,
-SQ_THREAD_TRACE_TOKEN_REG_CSPRIV         = 0x00000005,
-SQ_THREAD_TRACE_TOKEN_WAVE_END           = 0x00000006,
-SQ_THREAD_TRACE_TOKEN_EVENT              = 0x00000007,
-SQ_THREAD_TRACE_TOKEN_EVENT_CS           = 0x00000008,
-SQ_THREAD_TRACE_TOKEN_EVENT_GFX1         = 0x00000009,
-SQ_THREAD_TRACE_TOKEN_INST               = 0x0000000a,
-SQ_THREAD_TRACE_TOKEN_INST_PC            = 0x0000000b,
-SQ_THREAD_TRACE_TOKEN_INST_USERDATA      = 0x0000000c,
-SQ_THREAD_TRACE_TOKEN_ISSUE              = 0x0000000d,
-SQ_THREAD_TRACE_TOKEN_PERF               = 0x0000000e,
-SQ_THREAD_TRACE_TOKEN_REG_CS             = 0x0000000f,
-} SQ_THREAD_TRACE_TOKEN_TYPE;
-
-/*
- * SQ_THREAD_TRACE_MISC_TOKEN_TYPE enum
- */
-
-typedef enum SQ_THREAD_TRACE_MISC_TOKEN_TYPE {
-SQ_THREAD_TRACE_MISC_TOKEN_TIME          = 0x00000000,
-SQ_THREAD_TRACE_MISC_TOKEN_TIME_RESET    = 0x00000001,
-SQ_THREAD_TRACE_MISC_TOKEN_PACKET_LOST   = 0x00000002,
-SQ_THREAD_TRACE_MISC_TOKEN_SURF_SYNC     = 0x00000003,
-SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_BEGIN  = 0x00000004,
-SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_END  = 0x00000005,
-SQ_THREAD_TRACE_MISC_TOKEN_SAVECTX       = 0x00000006,
-SQ_THREAD_TRACE_MISC_TOKEN_SHOOT_DOWN    = 0x00000007,
-} SQ_THREAD_TRACE_MISC_TOKEN_TYPE;
-
-/*
- * SQ_THREAD_TRACE_INST_TYPE enum
- */
-
-typedef enum SQ_THREAD_TRACE_INST_TYPE {
-SQ_THREAD_TRACE_INST_TYPE_SMEM_RD        = 0x00000000,
-SQ_THREAD_TRACE_INST_TYPE_SALU_32        = 0x00000001,
-SQ_THREAD_TRACE_INST_TYPE_VMEM_RD        = 0x00000002,
-SQ_THREAD_TRACE_INST_TYPE_VMEM_WR        = 0x00000003,
-SQ_THREAD_TRACE_INST_TYPE_FLAT_WR        = 0x00000004,
-SQ_THREAD_TRACE_INST_TYPE_VALU_32        = 0x00000005,
-SQ_THREAD_TRACE_INST_TYPE_LDS            = 0x00000006,
-SQ_THREAD_TRACE_INST_TYPE_PC             = 0x00000007,
-SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GDS     = 0x00000008,
-SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GFX     = 0x00000009,
-SQ_THREAD_TRACE_INST_TYPE_EXPGNT_PAR_COL  = 0x0000000a,
-SQ_THREAD_TRACE_INST_TYPE_EXPGNT_POS_GDS  = 0x0000000b,
-SQ_THREAD_TRACE_INST_TYPE_JUMP           = 0x0000000c,
-SQ_THREAD_TRACE_INST_TYPE_NEXT           = 0x0000000d,
-SQ_THREAD_TRACE_INST_TYPE_FLAT_RD        = 0x0000000e,
-SQ_THREAD_TRACE_INST_TYPE_OTHER_MSG      = 0x0000000f,
-SQ_THREAD_TRACE_INST_TYPE_SMEM_WR        = 0x00000010,
-SQ_THREAD_TRACE_INST_TYPE_SALU_64        = 0x00000011,
-SQ_THREAD_TRACE_INST_TYPE_VALU_64        = 0x00000012,
-SQ_THREAD_TRACE_INST_TYPE_SMEM_RD_REPLAY  = 0x00000013,
-SQ_THREAD_TRACE_INST_TYPE_SMEM_WR_REPLAY  = 0x00000014,
-SQ_THREAD_TRACE_INST_TYPE_VMEM_RD_REPLAY  = 0x00000015,
-SQ_THREAD_TRACE_INST_TYPE_VMEM_WR_REPLAY  = 0x00000016,
-SQ_THREAD_TRACE_INST_TYPE_FLAT_RD_REPLAY  = 0x00000017,
-SQ_THREAD_TRACE_INST_TYPE_FLAT_WR_REPLAY  = 0x00000018,
-SQ_THREAD_TRACE_INST_TYPE_FATAL_HALT     = 0x00000019,
-} SQ_THREAD_TRACE_INST_TYPE;
-
-/*
- * SQ_THREAD_TRACE_REG_TYPE enum
- */
-
-typedef enum SQ_THREAD_TRACE_REG_TYPE {
-SQ_THREAD_TRACE_REG_TYPE_EVENT           = 0x00000000,
-SQ_THREAD_TRACE_REG_TYPE_DRAW            = 0x00000001,
-SQ_THREAD_TRACE_REG_TYPE_DISPATCH        = 0x00000002,
-SQ_THREAD_TRACE_REG_TYPE_USERDATA        = 0x00000003,
-SQ_THREAD_TRACE_REG_TYPE_MARKER          = 0x00000004,
-SQ_THREAD_TRACE_REG_TYPE_GFXDEC          = 0x00000005,
-SQ_THREAD_TRACE_REG_TYPE_SHDEC           = 0x00000006,
-SQ_THREAD_TRACE_REG_TYPE_OTHER           = 0x00000007,
-} SQ_THREAD_TRACE_REG_TYPE;
-
-/*
- * SQ_THREAD_TRACE_REG_OP enum
- */
-
-typedef enum SQ_THREAD_TRACE_REG_OP {
-SQ_THREAD_TRACE_REG_OP_READ              = 0x00000000,
-SQ_THREAD_TRACE_REG_OP_WRITE             = 0x00000001,
-} SQ_THREAD_TRACE_REG_OP;
-
-/*
- * SQ_THREAD_TRACE_MODE_SEL enum
- */
-
-typedef enum SQ_THREAD_TRACE_MODE_SEL {
-SQ_THREAD_TRACE_MODE_OFF                 = 0x00000000,
-SQ_THREAD_TRACE_MODE_ON                  = 0x00000001,
-} SQ_THREAD_TRACE_MODE_SEL;
-
-/*
- * SQ_THREAD_TRACE_CAPTURE_MODE enum
- */
-
-typedef enum SQ_THREAD_TRACE_CAPTURE_MODE {
-SQ_THREAD_TRACE_CAPTURE_MODE_ALL         = 0x00000000,
-SQ_THREAD_TRACE_CAPTURE_MODE_SELECT      = 0x00000001,
-SQ_THREAD_TRACE_CAPTURE_MODE_SELECT_DETAIL  = 0x00000002,
-} SQ_THREAD_TRACE_CAPTURE_MODE;
-
-/*
- * SQ_THREAD_TRACE_VM_ID_MASK enum
- */
-
-typedef enum SQ_THREAD_TRACE_VM_ID_MASK {
-SQ_THREAD_TRACE_VM_ID_MASK_SINGLE        = 0x00000000,
-SQ_THREAD_TRACE_VM_ID_MASK_ALL           = 0x00000001,
-SQ_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL  = 0x00000002,
-} SQ_THREAD_TRACE_VM_ID_MASK;
-
-/*
- * SQ_THREAD_TRACE_WAVE_MASK enum
- */
-
-typedef enum SQ_THREAD_TRACE_WAVE_MASK {
-SQ_THREAD_TRACE_WAVE_MASK_NONE           = 0x00000000,
-SQ_THREAD_TRACE_WAVE_MASK_ALL            = 0x00000001,
-} SQ_THREAD_TRACE_WAVE_MASK;
-
-/*
- * SQ_THREAD_TRACE_ISSUE enum
- */
-
-typedef enum SQ_THREAD_TRACE_ISSUE {
-SQ_THREAD_TRACE_ISSUE_NULL               = 0x00000000,
-SQ_THREAD_TRACE_ISSUE_STALL              = 0x00000001,
-SQ_THREAD_TRACE_ISSUE_INST               = 0x00000002,
-SQ_THREAD_TRACE_ISSUE_IMMED              = 0x00000003,
-} SQ_THREAD_TRACE_ISSUE;
-
-/*
- * SQ_THREAD_TRACE_ISSUE_MASK enum
- */
-
-typedef enum SQ_THREAD_TRACE_ISSUE_MASK {
-SQ_THREAD_TRACE_ISSUE_MASK_ALL           = 0x00000000,
-SQ_THREAD_TRACE_ISSUE_MASK_STALLED       = 0x00000001,
-SQ_THREAD_TRACE_ISSUE_MASK_STALLED_AND_IMMED  = 0x00000002,
-SQ_THREAD_TRACE_ISSUE_MASK_IMMED         = 0x00000003,
-} SQ_THREAD_TRACE_ISSUE_MASK;
-
-/*
- * SQ_PERF_SEL enum
- */
-
-typedef enum SQ_PERF_SEL {
-SQ_PERF_SEL_NONE                         = 0x00000000,
-SQ_PERF_SEL_ACCUM_PREV                   = 0x00000001,
-SQ_PERF_SEL_CYCLES                       = 0x00000002,
-SQ_PERF_SEL_BUSY_CYCLES                  = 0x00000003,
-SQ_PERF_SEL_WAVES                        = 0x00000004,
-SQ_PERF_SEL_LEVEL_WAVES                  = 0x00000005,
-SQ_PERF_SEL_WAVES_EQ_64                  = 0x00000006,
-SQ_PERF_SEL_WAVES_LT_64                  = 0x00000007,
-SQ_PERF_SEL_WAVES_LT_48                  = 0x00000008,
-SQ_PERF_SEL_WAVES_LT_32                  = 0x00000009,
-SQ_PERF_SEL_WAVES_LT_16                  = 0x0000000a,
-SQ_PERF_SEL_WAVES_CU                     = 0x0000000b,
-SQ_PERF_SEL_LEVEL_WAVES_CU               = 0x0000000c,
-SQ_PERF_SEL_BUSY_CU_CYCLES               = 0x0000000d,
-SQ_PERF_SEL_ITEMS                        = 0x0000000e,
-SQ_PERF_SEL_QUADS                        = 0x0000000f,
-SQ_PERF_SEL_EVENTS                       = 0x00000010,
-SQ_PERF_SEL_SURF_SYNCS                   = 0x00000011,
-SQ_PERF_SEL_TTRACE_REQS                  = 0x00000012,
-SQ_PERF_SEL_TTRACE_INFLIGHT_REQS         = 0x00000013,
-SQ_PERF_SEL_TTRACE_STALL                 = 0x00000014,
-SQ_PERF_SEL_MSG_CNTR                     = 0x00000015,
-SQ_PERF_SEL_MSG_PERF                     = 0x00000016,
-SQ_PERF_SEL_MSG_GSCNT                    = 0x00000017,
-SQ_PERF_SEL_MSG_INTERRUPT                = 0x00000018,
-SQ_PERF_SEL_INSTS                        = 0x00000019,
-SQ_PERF_SEL_INSTS_VALU                   = 0x0000001a,
-SQ_PERF_SEL_INSTS_VMEM_WR                = 0x0000001b,
-SQ_PERF_SEL_INSTS_VMEM_RD                = 0x0000001c,
-SQ_PERF_SEL_INSTS_VMEM                   = 0x0000001d,
-SQ_PERF_SEL_INSTS_SALU                   = 0x0000001e,
-SQ_PERF_SEL_INSTS_SMEM                   = 0x0000001f,
-SQ_PERF_SEL_INSTS_FLAT                   = 0x00000020,
-SQ_PERF_SEL_INSTS_FLAT_LDS_ONLY          = 0x00000021,
-SQ_PERF_SEL_INSTS_LDS                    = 0x00000022,
-SQ_PERF_SEL_INSTS_GDS                    = 0x00000023,
-SQ_PERF_SEL_INSTS_EXP                    = 0x00000024,
-SQ_PERF_SEL_INSTS_EXP_GDS                = 0x00000025,
-SQ_PERF_SEL_INSTS_BRANCH                 = 0x00000026,
-SQ_PERF_SEL_INSTS_SENDMSG                = 0x00000027,
-SQ_PERF_SEL_INSTS_VSKIPPED               = 0x00000028,
-SQ_PERF_SEL_INST_LEVEL_VMEM              = 0x00000029,
-SQ_PERF_SEL_INST_LEVEL_SMEM              = 0x0000002a,
-SQ_PERF_SEL_INST_LEVEL_LDS               = 0x0000002b,
-SQ_PERF_SEL_INST_LEVEL_GDS               = 0x0000002c,
-SQ_PERF_SEL_INST_LEVEL_EXP               = 0x0000002d,
-SQ_PERF_SEL_WAVE_CYCLES                  = 0x0000002e,
-SQ_PERF_SEL_WAVE_READY                   = 0x0000002f,
-SQ_PERF_SEL_WAIT_CNT_VM                  = 0x00000030,
-SQ_PERF_SEL_WAIT_CNT_LGKM                = 0x00000031,
-SQ_PERF_SEL_WAIT_CNT_EXP                 = 0x00000032,
-SQ_PERF_SEL_WAIT_CNT_ANY                 = 0x00000033,
-SQ_PERF_SEL_WAIT_BARRIER                 = 0x00000034,
-SQ_PERF_SEL_WAIT_EXP_ALLOC               = 0x00000035,
-SQ_PERF_SEL_WAIT_SLEEP                   = 0x00000036,
-SQ_PERF_SEL_WAIT_SLEEP_XNACK             = 0x00000037,
-SQ_PERF_SEL_WAIT_OTHER                   = 0x00000038,
-SQ_PERF_SEL_WAIT_ANY                     = 0x00000039,
-SQ_PERF_SEL_WAIT_TTRACE                  = 0x0000003a,
-SQ_PERF_SEL_WAIT_IFETCH                  = 0x0000003b,
-SQ_PERF_SEL_WAIT_INST_ANY                = 0x0000003c,
-SQ_PERF_SEL_WAIT_INST_VMEM               = 0x0000003d,
-SQ_PERF_SEL_WAIT_INST_SCA                = 0x0000003e,
-SQ_PERF_SEL_WAIT_INST_LDS                = 0x0000003f,
-SQ_PERF_SEL_WAIT_INST_VALU               = 0x00000040,
-SQ_PERF_SEL_WAIT_INST_EXP_GDS            = 0x00000041,
-SQ_PERF_SEL_WAIT_INST_MISC               = 0x00000042,
-SQ_PERF_SEL_WAIT_INST_FLAT               = 0x00000043,
-SQ_PERF_SEL_ACTIVE_INST_ANY              = 0x00000044,
-SQ_PERF_SEL_ACTIVE_INST_VMEM             = 0x00000045,
-SQ_PERF_SEL_ACTIVE_INST_LDS              = 0x00000046,
-SQ_PERF_SEL_ACTIVE_INST_VALU             = 0x00000047,
-SQ_PERF_SEL_ACTIVE_INST_SCA              = 0x00000048,
-SQ_PERF_SEL_ACTIVE_INST_EXP_GDS          = 0x00000049,
-SQ_PERF_SEL_ACTIVE_INST_MISC             = 0x0000004a,
-SQ_PERF_SEL_ACTIVE_INST_FLAT             = 0x0000004b,
-SQ_PERF_SEL_INST_CYCLES_VMEM_WR          = 0x0000004c,
-SQ_PERF_SEL_INST_CYCLES_VMEM_RD          = 0x0000004d,
-SQ_PERF_SEL_INST_CYCLES_VMEM_ADDR        = 0x0000004e,
-SQ_PERF_SEL_INST_CYCLES_VMEM_DATA        = 0x0000004f,
-SQ_PERF_SEL_INST_CYCLES_VMEM_CMD         = 0x00000050,
-SQ_PERF_SEL_INST_CYCLES_EXP              = 0x00000051,
-SQ_PERF_SEL_INST_CYCLES_GDS              = 0x00000052,
-SQ_PERF_SEL_INST_CYCLES_SMEM             = 0x00000053,
-SQ_PERF_SEL_INST_CYCLES_SALU             = 0x00000054,
-SQ_PERF_SEL_THREAD_CYCLES_VALU           = 0x00000055,
-SQ_PERF_SEL_THREAD_CYCLES_VALU_MAX       = 0x00000056,
-SQ_PERF_SEL_IFETCH                       = 0x00000057,
-SQ_PERF_SEL_IFETCH_LEVEL                 = 0x00000058,
-SQ_PERF_SEL_CBRANCH_FORK                 = 0x00000059,
-SQ_PERF_SEL_CBRANCH_FORK_SPLIT           = 0x0000005a,
-SQ_PERF_SEL_VALU_LDS_DIRECT_RD           = 0x0000005b,
-SQ_PERF_SEL_VALU_LDS_INTERP_OP           = 0x0000005c,
-SQ_PERF_SEL_LDS_BANK_CONFLICT            = 0x0000005d,
-SQ_PERF_SEL_LDS_ADDR_CONFLICT            = 0x0000005e,
-SQ_PERF_SEL_LDS_UNALIGNED_STALL          = 0x0000005f,
-SQ_PERF_SEL_LDS_MEM_VIOLATIONS           = 0x00000060,
-SQ_PERF_SEL_LDS_ATOMIC_RETURN            = 0x00000061,
-SQ_PERF_SEL_LDS_IDX_ACTIVE               = 0x00000062,
-SQ_PERF_SEL_VALU_DEP_STALL               = 0x00000063,
-SQ_PERF_SEL_VALU_STARVE                  = 0x00000064,
-SQ_PERF_SEL_EXP_REQ_FIFO_FULL            = 0x00000065,
-SQ_PERF_SEL_LDS_DATA_FIFO_FULL           = 0x00000066,
-SQ_PERF_SEL_LDS_CMD_FIFO_FULL            = 0x00000067,
-SQ_PERF_SEL_VMEM_TA_ADDR_FIFO_FULL       = 0x00000068,
-SQ_PERF_SEL_VMEM_TA_CMD_FIFO_FULL        = 0x00000069,
-SQ_PERF_SEL_VMEM_EX_DATA_REG_BUSY        = 0x0000006a,
-SQ_PERF_SEL_VMEM_WR_TA_DATA_FIFO_FULL    = 0x0000006b,
-SQ_PERF_SEL_VALU_SRC_C_CONFLICT          = 0x0000006c,
-SQ_PERF_SEL_VMEM_RD_SRC_CD_CONFLICT      = 0x0000006d,
-SQ_PERF_SEL_VMEM_WR_SRC_CD_CONFLICT      = 0x0000006e,
-SQ_PERF_SEL_FLAT_SRC_CD_CONFLICT         = 0x0000006f,
-SQ_PERF_SEL_LDS_SRC_CD_CONFLICT          = 0x00000070,
-SQ_PERF_SEL_SRC_CD_BUSY                  = 0x00000071,
-SQ_PERF_SEL_PT_POWER_STALL               = 0x00000072,
-SQ_PERF_SEL_USER0                        = 0x00000073,
-SQ_PERF_SEL_USER1                        = 0x00000074,
-SQ_PERF_SEL_USER2                        = 0x00000075,
-SQ_PERF_SEL_USER3                        = 0x00000076,
-SQ_PERF_SEL_USER4                        = 0x00000077,
-SQ_PERF_SEL_USER5                        = 0x00000078,
-SQ_PERF_SEL_USER6                        = 0x00000079,
-SQ_PERF_SEL_USER7                        = 0x0000007a,
-SQ_PERF_SEL_USER8                        = 0x0000007b,
-SQ_PERF_SEL_USER9                        = 0x0000007c,
-SQ_PERF_SEL_USER10                       = 0x0000007d,
-SQ_PERF_SEL_USER11                       = 0x0000007e,
-SQ_PERF_SEL_USER12                       = 0x0000007f,
-SQ_PERF_SEL_USER13                       = 0x00000080,
-SQ_PERF_SEL_USER14                       = 0x00000081,
-SQ_PERF_SEL_USER15                       = 0x00000082,
-SQ_PERF_SEL_USER_LEVEL0                  = 0x00000083,
-SQ_PERF_SEL_USER_LEVEL1                  = 0x00000084,
-SQ_PERF_SEL_USER_LEVEL2                  = 0x00000085,
-SQ_PERF_SEL_USER_LEVEL3                  = 0x00000086,
-SQ_PERF_SEL_USER_LEVEL4                  = 0x00000087,
-SQ_PERF_SEL_USER_LEVEL5                  = 0x00000088,
-SQ_PERF_SEL_USER_LEVEL6                  = 0x00000089,
-SQ_PERF_SEL_USER_LEVEL7                  = 0x0000008a,
-SQ_PERF_SEL_USER_LEVEL8                  = 0x0000008b,
-SQ_PERF_SEL_USER_LEVEL9                  = 0x0000008c,
-SQ_PERF_SEL_USER_LEVEL10                 = 0x0000008d,
-SQ_PERF_SEL_USER_LEVEL11                 = 0x0000008e,
-SQ_PERF_SEL_USER_LEVEL12                 = 0x0000008f,
-SQ_PERF_SEL_USER_LEVEL13                 = 0x00000090,
-SQ_PERF_SEL_USER_LEVEL14                 = 0x00000091,
-SQ_PERF_SEL_USER_LEVEL15                 = 0x00000092,
-SQ_PERF_SEL_POWER_VALU                   = 0x00000093,
-SQ_PERF_SEL_POWER_VALU0                  = 0x00000094,
-SQ_PERF_SEL_POWER_VALU1                  = 0x00000095,
-SQ_PERF_SEL_POWER_VALU2                  = 0x00000096,
-SQ_PERF_SEL_POWER_GPR_RD                 = 0x00000097,
-SQ_PERF_SEL_POWER_GPR_WR                 = 0x00000098,
-SQ_PERF_SEL_POWER_LDS_BUSY               = 0x00000099,
-SQ_PERF_SEL_POWER_ALU_BUSY               = 0x0000009a,
-SQ_PERF_SEL_POWER_TEX_BUSY               = 0x0000009b,
-SQ_PERF_SEL_ACCUM_PREV_HIRES             = 0x0000009c,
-SQ_PERF_SEL_WAVES_RESTORED               = 0x0000009d,
-SQ_PERF_SEL_WAVES_SAVED                  = 0x0000009e,
-SQ_PERF_SEL_INSTS_SMEM_NORM              = 0x0000009f,
-SQ_PERF_SEL_ATC_INSTS_VMEM               = 0x000000a0,
-SQ_PERF_SEL_ATC_INST_LEVEL_VMEM          = 0x000000a1,
-SQ_PERF_SEL_ATC_XNACK_FIRST              = 0x000000a2,
-SQ_PERF_SEL_ATC_XNACK_ALL                = 0x000000a3,
-SQ_PERF_SEL_ATC_XNACK_FIFO_FULL          = 0x000000a4,
-SQ_PERF_SEL_ATC_INSTS_SMEM               = 0x000000a5,
-SQ_PERF_SEL_ATC_INST_LEVEL_SMEM          = 0x000000a6,
-SQ_PERF_SEL_IFETCH_XNACK                 = 0x000000a7,
-SQ_PERF_SEL_TLB_SHOOTDOWN                = 0x000000a8,
-SQ_PERF_SEL_TLB_SHOOTDOWN_CYCLES         = 0x000000a9,
-SQ_PERF_SEL_INSTS_VMEM_WR_REPLAY         = 0x000000aa,
-SQ_PERF_SEL_INSTS_VMEM_RD_REPLAY         = 0x000000ab,
-SQ_PERF_SEL_INSTS_VMEM_REPLAY            = 0x000000ac,
-SQ_PERF_SEL_INSTS_SMEM_REPLAY            = 0x000000ad,
-SQ_PERF_SEL_INSTS_SMEM_NORM_REPLAY       = 0x000000ae,
-SQ_PERF_SEL_INSTS_FLAT_REPLAY            = 0x000000af,
-SQ_PERF_SEL_ATC_INSTS_VMEM_REPLAY        = 0x000000b0,
-SQ_PERF_SEL_ATC_INSTS_SMEM_REPLAY        = 0x000000b1,
-SQ_PERF_SEL_UTCL1_TRANSLATION_MISS       = 0x000000b2,
-SQ_PERF_SEL_UTCL1_PERMISSION_MISS        = 0x000000b3,
-SQ_PERF_SEL_UTCL1_REQUEST                = 0x000000b4,
-SQ_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL    = 0x000000b5,
-SQ_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX     = 0x000000b6,
-SQ_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT     = 0x000000b7,
-SQ_PERF_SEL_UTCL1_LFIFO_FULL             = 0x000000b8,
-SQ_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES    = 0x000000b9,
-SQ_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS  = 0x000000ba,
-SQ_PERF_SEL_DUMMY_END                    = 0x000000bb,
-SQ_PERF_SEL_DUMMY_LAST                   = 0x000000ff,
-SQC_PERF_SEL_ICACHE_INPUT_VALID_READY    = 0x00000100,
-SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB   = 0x00000101,
-SQC_PERF_SEL_ICACHE_INPUT_VALIDB         = 0x00000102,
-SQC_PERF_SEL_DCACHE_INPUT_VALID_READY    = 0x00000103,
-SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB   = 0x00000104,
-SQC_PERF_SEL_DCACHE_INPUT_VALIDB         = 0x00000105,
-SQC_PERF_SEL_TC_REQ                      = 0x00000106,
-SQC_PERF_SEL_TC_INST_REQ                 = 0x00000107,
-SQC_PERF_SEL_TC_DATA_READ_REQ            = 0x00000108,
-SQC_PERF_SEL_TC_DATA_WRITE_REQ           = 0x00000109,
-SQC_PERF_SEL_TC_DATA_ATOMIC_REQ          = 0x0000010a,
-SQC_PERF_SEL_TC_STALL                    = 0x0000010b,
-SQC_PERF_SEL_TC_STARVE                   = 0x0000010c,
-SQC_PERF_SEL_ICACHE_BUSY_CYCLES          = 0x0000010d,
-SQC_PERF_SEL_ICACHE_REQ                  = 0x0000010e,
-SQC_PERF_SEL_ICACHE_HITS                 = 0x0000010f,
-SQC_PERF_SEL_ICACHE_MISSES               = 0x00000110,
-SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE     = 0x00000111,
-SQC_PERF_SEL_ICACHE_INVAL_INST           = 0x00000112,
-SQC_PERF_SEL_ICACHE_INVAL_ASYNC          = 0x00000113,
-SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT  = 0x00000114,
-SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB  = 0x00000115,
-SQC_PERF_SEL_ICACHE_CACHE_STALLED        = 0x00000116,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_NONZERO  = 0x00000117,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX  = 0x00000118,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT   = 0x00000119,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_MISS_FIFO  = 0x0000011a,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_HIT_FIFO  = 0x0000011b,
-SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_TC_IF  = 0x0000011c,
-SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT  = 0x0000011d,
-SQC_PERF_SEL_ICACHE_PREFETCH_1           = 0x0000011e,
-SQC_PERF_SEL_ICACHE_PREFETCH_2           = 0x0000011f,
-SQC_PERF_SEL_ICACHE_PREFETCH_FILTERED    = 0x00000120,
-SQC_PERF_SEL_DCACHE_BUSY_CYCLES          = 0x00000121,
-SQC_PERF_SEL_DCACHE_REQ                  = 0x00000122,
-SQC_PERF_SEL_DCACHE_HITS                 = 0x00000123,
-SQC_PERF_SEL_DCACHE_MISSES               = 0x00000124,
-SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE     = 0x00000125,
-SQC_PERF_SEL_DCACHE_HIT_LRU_READ         = 0x00000126,
-SQC_PERF_SEL_DCACHE_MISS_EVICT_READ      = 0x00000127,
-SQC_PERF_SEL_DCACHE_WC_LRU_WRITE         = 0x00000128,
-SQC_PERF_SEL_DCACHE_WT_EVICT_WRITE       = 0x00000129,
-SQC_PERF_SEL_DCACHE_ATOMIC               = 0x0000012a,
-SQC_PERF_SEL_DCACHE_VOLATILE             = 0x0000012b,
-SQC_PERF_SEL_DCACHE_INVAL_INST           = 0x0000012c,
-SQC_PERF_SEL_DCACHE_INVAL_ASYNC          = 0x0000012d,
-SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_INST  = 0x0000012e,
-SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_ASYNC  = 0x0000012f,
-SQC_PERF_SEL_DCACHE_WB_INST              = 0x00000130,
-SQC_PERF_SEL_DCACHE_WB_ASYNC             = 0x00000131,
-SQC_PERF_SEL_DCACHE_WB_VOLATILE_INST     = 0x00000132,
-SQC_PERF_SEL_DCACHE_WB_VOLATILE_ASYNC    = 0x00000133,
-SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT  = 0x00000134,
-SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB  = 0x00000135,
-SQC_PERF_SEL_DCACHE_CACHE_STALLED        = 0x00000136,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX  = 0x00000137,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT   = 0x00000138,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_EVICT    = 0x00000139,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_UNORDERED  = 0x0000013a,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_ALLOC_UNAVAILABLE  = 0x0000013b,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_FORCE_EVICT  = 0x0000013c,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_MULTI_FLUSH  = 0x0000013d,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_FLUSH_DONE  = 0x0000013e,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_MISS_FIFO  = 0x0000013f,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_HIT_FIFO  = 0x00000140,
-SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_TC_IF  = 0x00000141,
-SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT  = 0x00000142,
-SQC_PERF_SEL_DCACHE_REQ_READ_1           = 0x00000143,
-SQC_PERF_SEL_DCACHE_REQ_READ_2           = 0x00000144,
-SQC_PERF_SEL_DCACHE_REQ_READ_4           = 0x00000145,
-SQC_PERF_SEL_DCACHE_REQ_READ_8           = 0x00000146,
-SQC_PERF_SEL_DCACHE_REQ_READ_16          = 0x00000147,
-SQC_PERF_SEL_DCACHE_REQ_TIME             = 0x00000148,
-SQC_PERF_SEL_DCACHE_REQ_WRITE_1          = 0x00000149,
-SQC_PERF_SEL_DCACHE_REQ_WRITE_2          = 0x0000014a,
-SQC_PERF_SEL_DCACHE_REQ_WRITE_4          = 0x0000014b,
-SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE        = 0x0000014c,
-SQC_PERF_SEL_SQ_DCACHE_REQS              = 0x0000014d,
-SQC_PERF_SEL_DCACHE_FLAT_REQ             = 0x0000014e,
-SQC_PERF_SEL_DCACHE_NONFLAT_REQ          = 0x0000014f,
-SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL       = 0x00000150,
-SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL       = 0x00000151,
-SQC_PERF_SEL_TC_INFLIGHT_LEVEL           = 0x00000152,
-SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL    = 0x00000153,
-SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL    = 0x00000154,
-SQC_PERF_SEL_ICACHE_GATCL1_TRANSLATION_MISS  = 0x00000155,
-SQC_PERF_SEL_ICACHE_GATCL1_PERMISSION_MISS  = 0x00000156,
-SQC_PERF_SEL_ICACHE_GATCL1_REQUEST       = 0x00000157,
-SQC_PERF_SEL_ICACHE_GATCL1_STALL_INFLIGHT_MAX  = 0x00000158,
-SQC_PERF_SEL_ICACHE_GATCL1_STALL_LRU_INFLIGHT  = 0x00000159,
-SQC_PERF_SEL_ICACHE_GATCL1_LFIFO_FULL    = 0x0000015a,
-SQC_PERF_SEL_ICACHE_GATCL1_STALL_LFIFO_NOT_RES  = 0x0000015b,
-SQC_PERF_SEL_ICACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS  = 0x0000015c,
-SQC_PERF_SEL_ICACHE_GATCL1_ATCL2_INFLIGHT  = 0x0000015d,
-SQC_PERF_SEL_ICACHE_GATCL1_STALL_MISSFIFO_FULL  = 0x0000015e,
-SQC_PERF_SEL_DCACHE_GATCL1_TRANSLATION_MISS  = 0x0000015f,
-SQC_PERF_SEL_DCACHE_GATCL1_PERMISSION_MISS  = 0x00000160,
-SQC_PERF_SEL_DCACHE_GATCL1_REQUEST       = 0x00000161,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_INFLIGHT_MAX  = 0x00000162,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_LRU_INFLIGHT  = 0x00000163,
-SQC_PERF_SEL_DCACHE_GATCL1_LFIFO_FULL    = 0x00000164,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_LFIFO_NOT_RES  = 0x00000165,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS  = 0x00000166,
-SQC_PERF_SEL_DCACHE_GATCL1_ATCL2_INFLIGHT  = 0x00000167,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_MISSFIFO_FULL  = 0x00000168,
-SQC_PERF_SEL_DCACHE_GATCL1_STALL_MULTI_MISS  = 0x00000169,
-SQC_PERF_SEL_DCACHE_GATCL1_HIT_FIFO_FULL  = 0x0000016a,
-SQC_PERF_SEL_DUMMY_LAST                  = 0x0000016b,
-} SQ_PERF_SEL;
-
-/*
- * SQ_CAC_POWER_SEL enum
- */
-
-typedef enum SQ_CAC_POWER_SEL {
-SQ_CAC_POWER_VALU                        = 0x00000000,
-SQ_CAC_POWER_VALU0                       = 0x00000001,
-SQ_CAC_POWER_VALU1                       = 0x00000002,
-SQ_CAC_POWER_VALU2                       = 0x00000003,
-SQ_CAC_POWER_GPR_RD                      = 0x00000004,
-SQ_CAC_POWER_GPR_WR                      = 0x00000005,
-SQ_CAC_POWER_LDS_BUSY                    = 0x00000006,
-SQ_CAC_POWER_ALU_BUSY                    = 0x00000007,
-SQ_CAC_POWER_TEX_BUSY                    = 0x00000008,
-} SQ_CAC_POWER_SEL;
-
-/*
- * SQ_IND_CMD_CMD enum
- */
-
-typedef enum SQ_IND_CMD_CMD {
-SQ_IND_CMD_CMD_NULL                      = 0x00000000,
-SQ_IND_CMD_CMD_SETHALT                   = 0x00000001,
-SQ_IND_CMD_CMD_SAVECTX                   = 0x00000002,
-SQ_IND_CMD_CMD_KILL                      = 0x00000003,
-SQ_IND_CMD_CMD_DEBUG                     = 0x00000004,
-SQ_IND_CMD_CMD_TRAP                      = 0x00000005,
-SQ_IND_CMD_CMD_SET_SPI_PRIO              = 0x00000006,
-SQ_IND_CMD_CMD_SETFATALHALT              = 0x00000007,
-} SQ_IND_CMD_CMD;
-
-/*
- * SQ_IND_CMD_MODE enum
- */
-
-typedef enum SQ_IND_CMD_MODE {
-SQ_IND_CMD_MODE_SINGLE                   = 0x00000000,
-SQ_IND_CMD_MODE_BROADCAST                = 0x00000001,
-SQ_IND_CMD_MODE_BROADCAST_QUEUE          = 0x00000002,
-SQ_IND_CMD_MODE_BROADCAST_PIPE           = 0x00000003,
-SQ_IND_CMD_MODE_BROADCAST_ME             = 0x00000004,
-} SQ_IND_CMD_MODE;
-
-/*
- * SQ_EDC_INFO_SOURCE enum
- */
-
-typedef enum SQ_EDC_INFO_SOURCE {
-SQ_EDC_INFO_SOURCE_INVALID               = 0x00000000,
-SQ_EDC_INFO_SOURCE_INST                  = 0x00000001,
-SQ_EDC_INFO_SOURCE_SGPR                  = 0x00000002,
-SQ_EDC_INFO_SOURCE_VGPR                  = 0x00000003,
-SQ_EDC_INFO_SOURCE_LDS                   = 0x00000004,
-SQ_EDC_INFO_SOURCE_GDS                   = 0x00000005,
-SQ_EDC_INFO_SOURCE_TA                    = 0x00000006,
-} SQ_EDC_INFO_SOURCE;
-
-/*
- * SQ_ROUND_MODE enum
- */
-
-typedef enum SQ_ROUND_MODE {
-SQ_ROUND_NEAREST_EVEN                    = 0x00000000,
-SQ_ROUND_PLUS_INFINITY                   = 0x00000001,
-SQ_ROUND_MINUS_INFINITY                  = 0x00000002,
-SQ_ROUND_TO_ZERO                         = 0x00000003,
-} SQ_ROUND_MODE;
-
-/*
- * SQ_INTERRUPT_WORD_ENCODING enum
- */
-
-typedef enum SQ_INTERRUPT_WORD_ENCODING {
-SQ_INTERRUPT_WORD_ENCODING_AUTO          = 0x00000000,
-SQ_INTERRUPT_WORD_ENCODING_INST          = 0x00000001,
-SQ_INTERRUPT_WORD_ENCODING_ERROR         = 0x00000002,
-} SQ_INTERRUPT_WORD_ENCODING;
-
-/*
- * ENUM_SQ_EXPORT_RAT_INST enum
- */
-
-typedef enum ENUM_SQ_EXPORT_RAT_INST {
-SQ_EXPORT_RAT_INST_NOP                   = 0x00000000,
-SQ_EXPORT_RAT_INST_STORE_TYPED           = 0x00000001,
-SQ_EXPORT_RAT_INST_STORE_RAW             = 0x00000002,
-SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM     = 0x00000003,
-SQ_EXPORT_RAT_INST_CMPXCHG_INT           = 0x00000004,
-SQ_EXPORT_RAT_INST_CMPXCHG_FLT           = 0x00000005,
-SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM       = 0x00000006,
-SQ_EXPORT_RAT_INST_ADD                   = 0x00000007,
-SQ_EXPORT_RAT_INST_SUB                   = 0x00000008,
-SQ_EXPORT_RAT_INST_RSUB                  = 0x00000009,
-SQ_EXPORT_RAT_INST_MIN_INT               = 0x0000000a,
-SQ_EXPORT_RAT_INST_MIN_UINT              = 0x0000000b,
-SQ_EXPORT_RAT_INST_MAX_INT               = 0x0000000c,
-SQ_EXPORT_RAT_INST_MAX_UINT              = 0x0000000d,
-SQ_EXPORT_RAT_INST_AND                   = 0x0000000e,
-SQ_EXPORT_RAT_INST_OR                    = 0x0000000f,
-SQ_EXPORT_RAT_INST_XOR                   = 0x00000010,
-SQ_EXPORT_RAT_INST_MSKOR                 = 0x00000011,
-SQ_EXPORT_RAT_INST_INC_UINT              = 0x00000012,
-SQ_EXPORT_RAT_INST_DEC_UINT              = 0x00000013,
-SQ_EXPORT_RAT_INST_STORE_DWORD           = 0x00000014,
-SQ_EXPORT_RAT_INST_STORE_SHORT           = 0x00000015,
-SQ_EXPORT_RAT_INST_STORE_BYTE            = 0x00000016,
-SQ_EXPORT_RAT_INST_NOP_RTN               = 0x00000020,
-SQ_EXPORT_RAT_INST_XCHG_RTN              = 0x00000022,
-SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN      = 0x00000023,
-SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN       = 0x00000024,
-SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN       = 0x00000025,
-SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN   = 0x00000026,
-SQ_EXPORT_RAT_INST_ADD_RTN               = 0x00000027,
-SQ_EXPORT_RAT_INST_SUB_RTN               = 0x00000028,
-SQ_EXPORT_RAT_INST_RSUB_RTN              = 0x00000029,
-SQ_EXPORT_RAT_INST_MIN_INT_RTN           = 0x0000002a,
-SQ_EXPORT_RAT_INST_MIN_UINT_RTN          = 0x0000002b,
-SQ_EXPORT_RAT_INST_MAX_INT_RTN           = 0x0000002c,
-SQ_EXPORT_RAT_INST_MAX_UINT_RTN          = 0x0000002d,
-SQ_EXPORT_RAT_INST_AND_RTN               = 0x0000002e,
-SQ_EXPORT_RAT_INST_OR_RTN                = 0x0000002f,
-SQ_EXPORT_RAT_INST_XOR_RTN               = 0x00000030,
-SQ_EXPORT_RAT_INST_MSKOR_RTN             = 0x00000031,
-SQ_EXPORT_RAT_INST_INC_UINT_RTN          = 0x00000032,
-SQ_EXPORT_RAT_INST_DEC_UINT_RTN          = 0x00000033,
-} ENUM_SQ_EXPORT_RAT_INST;
-
-/*
- * SQ_IBUF_ST enum
- */
-
-typedef enum SQ_IBUF_ST {
-SQ_IBUF_IB_IDLE                          = 0x00000000,
-SQ_IBUF_IB_INI_WAIT_GNT                  = 0x00000001,
-SQ_IBUF_IB_INI_WAIT_DRET                 = 0x00000002,
-SQ_IBUF_IB_LE_4DW                        = 0x00000003,
-SQ_IBUF_IB_WAIT_DRET                     = 0x00000004,
-SQ_IBUF_IB_EMPTY_WAIT_DRET               = 0x00000005,
-SQ_IBUF_IB_DRET                          = 0x00000006,
-SQ_IBUF_IB_EMPTY_WAIT_GNT                = 0x00000007,
-} SQ_IBUF_ST;
-
-/*
- * SQ_INST_STR_ST enum
- */
-
-typedef enum SQ_INST_STR_ST {
-SQ_INST_STR_IB_WAVE_NORML                = 0x00000000,
-SQ_INST_STR_IB_WAVE2ID_NORMAL_INST_AV    = 0x00000001,
-SQ_INST_STR_IB_WAVE_INTERNAL_INST_AV     = 0x00000002,
-SQ_INST_STR_IB_WAVE_INST_SKIP_AV         = 0x00000003,
-SQ_INST_STR_IB_WAVE_SETVSKIP_ST0         = 0x00000004,
-SQ_INST_STR_IB_WAVE_SETVSKIP_ST1         = 0x00000005,
-SQ_INST_STR_IB_WAVE_NOP_SLEEP_WAIT       = 0x00000006,
-SQ_INST_STR_IB_WAVE_PC_FROM_SGPR_MSG_WAIT  = 0x00000007,
-} SQ_INST_STR_ST;
-
-/*
- * SQ_WAVE_IB_ECC_ST enum
- */
-
-typedef enum SQ_WAVE_IB_ECC_ST {
-SQ_WAVE_IB_ECC_CLEAN                     = 0x00000000,
-SQ_WAVE_IB_ECC_ERR_CONTINUE              = 0x00000001,
-SQ_WAVE_IB_ECC_ERR_HALT                  = 0x00000002,
-SQ_WAVE_IB_ECC_WITH_ERR_MSG              = 0x00000003,
-} SQ_WAVE_IB_ECC_ST;
-
-/*
- * SH_MEM_ADDRESS_MODE enum
- */
-
-typedef enum SH_MEM_ADDRESS_MODE {
-SH_MEM_ADDRESS_MODE_64                   = 0x00000000,
-SH_MEM_ADDRESS_MODE_32                   = 0x00000001,
-} SH_MEM_ADDRESS_MODE;
-
-/*
- * SH_MEM_ALIGNMENT_MODE enum
- */
-
-typedef enum SH_MEM_ALIGNMENT_MODE {
-SH_MEM_ALIGNMENT_MODE_DWORD              = 0x00000000,
-SH_MEM_ALIGNMENT_MODE_DWORD_STRICT       = 0x00000001,
-SH_MEM_ALIGNMENT_MODE_STRICT             = 0x00000002,
-SH_MEM_ALIGNMENT_MODE_UNALIGNED          = 0x00000003,
-} SH_MEM_ALIGNMENT_MODE;
-
-/*
- * SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX enum
- */
-
-typedef enum SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX {
-SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_WREXEC  = 0x00000018,
-SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_RESTORE  = 0x00000019,
-} SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX;
-
-/*
- * SQ_LB_CTR_SEL_VALUES enum
- */
-
-typedef enum SQ_LB_CTR_SEL_VALUES {
-SQ_LB_CTR_SEL_ALU_CYCLES                 = 0x00000000,
-SQ_LB_CTR_SEL_ALU_STALLS                 = 0x00000001,
-SQ_LB_CTR_SEL_TEX_CYCLES                 = 0x00000002,
-SQ_LB_CTR_SEL_TEX_STALLS                 = 0x00000003,
-SQ_LB_CTR_SEL_SALU_CYCLES                = 0x00000004,
-SQ_LB_CTR_SEL_SCALAR_STALLS              = 0x00000005,
-SQ_LB_CTR_SEL_SMEM_CYCLES                = 0x00000006,
-SQ_LB_CTR_SEL_ICACHE_STALLS              = 0x00000007,
-SQ_LB_CTR_SEL_DCACHE_STALLS              = 0x00000008,
-SQ_LB_CTR_SEL_RESERVED0                  = 0x00000009,
-SQ_LB_CTR_SEL_RESERVED1                  = 0x0000000a,
-SQ_LB_CTR_SEL_RESERVED2                  = 0x0000000b,
-SQ_LB_CTR_SEL_RESERVED3                  = 0x0000000c,
-SQ_LB_CTR_SEL_RESERVED4                  = 0x0000000d,
-SQ_LB_CTR_SEL_RESERVED5                  = 0x0000000e,
-SQ_LB_CTR_SEL_RESERVED6                  = 0x0000000f,
-} SQ_LB_CTR_SEL_VALUES;
-
-/*
- * SQ_WAVE_TYPE value
- */
-
-#define SQ_WAVE_TYPE_PS0               0x00000000
-
-/*
- * SQIND_PARTITIONS value
- */
-
-#define SQIND_GLOBAL_REGS_OFFSET       0x00000000
-#define SQIND_GLOBAL_REGS_SIZE         0x00000008
-#define SQIND_LOCAL_REGS_OFFSET        0x00000008
-#define SQIND_LOCAL_REGS_SIZE          0x00000008
-#define SQIND_WAVE_HWREGS_OFFSET       0x00000010
-#define SQIND_WAVE_HWREGS_SIZE         0x000001f0
-#define SQIND_WAVE_SGPRS_OFFSET        0x00000200
-#define SQIND_WAVE_SGPRS_SIZE          0x00000200
-#define SQIND_WAVE_VGPRS_OFFSET        0x00000400
-#define SQIND_WAVE_VGPRS_SIZE          0x00000100
-
-/*
- * SQ_GFXDEC value
- */
-
-#define SQ_GFXDEC_BEGIN                0x0000a000
-#define SQ_GFXDEC_END                  0x0000c000
-#define SQ_GFXDEC_STATE_ID_SHIFT       0x0000000a
-
-/*
- * SQDEC value
- */
-
-#define SQDEC_BEGIN                    0x00002300
-#define SQDEC_END                      0x000023ff
-
-/*
- * SQPERFSDEC value
- */
-
-#define SQPERFSDEC_BEGIN               0x0000d9c0
-#define SQPERFSDEC_END                 0x0000da40
-
-/*
- * SQPERFDDEC value
- */
-
-#define SQPERFDDEC_BEGIN               0x0000d1c0
-#define SQPERFDDEC_END                 0x0000d240
-
-/*
- * SQGFXUDEC value
- */
-
-#define SQGFXUDEC_BEGIN                0x0000c330
-#define SQGFXUDEC_END                  0x0000c380
-
-/*
- * SQPWRDEC value
- */
-
-#define SQPWRDEC_BEGIN                 0x0000f08c
-#define SQPWRDEC_END                   0x0000f094
-
-/*
- * SQ_DISPATCHER value
- */
-
-#define SQ_DISPATCHER_GFX_MIN          0x00000010
-#define SQ_DISPATCHER_GFX_CNT_PER_RING 0x00000008
-
-/*
- * SQ_MAX value
- */
-
-#define SQ_MAX_PGM_SGPRS               0x00000068
-#define SQ_MAX_PGM_VGPRS               0x00000100
-
-/*
- * SQ_THREAD_TRACE_TIME_UNIT value
- */
-
-#define SQ_THREAD_TRACE_TIME_UNIT      0x00000004
-
-/*
- * SQ_EXCP_BITS value
- */
-
-#define SQ_EX_MODE_EXCP_VALU_BASE      0x00000000
-#define SQ_EX_MODE_EXCP_VALU_SIZE      0x00000007
-#define SQ_EX_MODE_EXCP_INVALID        0x00000000
-#define SQ_EX_MODE_EXCP_INPUT_DENORM   0x00000001
-#define SQ_EX_MODE_EXCP_DIV0           0x00000002
-#define SQ_EX_MODE_EXCP_OVERFLOW       0x00000003
-#define SQ_EX_MODE_EXCP_UNDERFLOW      0x00000004
-#define SQ_EX_MODE_EXCP_INEXACT        0x00000005
-#define SQ_EX_MODE_EXCP_INT_DIV0       0x00000006
-#define SQ_EX_MODE_EXCP_ADDR_WATCH0    0x00000007
-#define SQ_EX_MODE_EXCP_MEM_VIOL       0x00000008
-
-/*
- * SQ_EXCP_HI_BITS value
- */
-
-#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH1 0x00000000
-#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH2 0x00000001
-#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH3 0x00000002
-
-/*
- * HW_INSERTED_INST_ID value
- */
-
-#define INST_ID_PRIV_START             0x80000000
-#define INST_ID_ECC_INTERRUPT_MSG      0xfffffff0
-#define INST_ID_TTRACE_NEW_PC_MSG      0xfffffff1
-#define INST_ID_HW_TRAP                0xfffffff2
-#define INST_ID_KILL_SEQ               0xfffffff3
-#define INST_ID_SPI_WREXEC             0xfffffff4
-#define INST_ID_HOST_REG_TRAP_MSG      0xfffffffe
-
-/*
- * SIMM16_WAITCNT_PARTITIONS value
- */
-
-#define SIMM16_WAITCNT_VM_CNT_START    0x00000000
-#define SIMM16_WAITCNT_VM_CNT_SIZE     0x00000004
-#define SIMM16_WAITCNT_EXP_CNT_START   0x00000004
-#define SIMM16_WAITCNT_EXP_CNT_SIZE    0x00000003
-#define SIMM16_WAITCNT_LGKM_CNT_START  0x00000008
-#define SIMM16_WAITCNT_LGKM_CNT_SIZE   0x00000004
-#define SIMM16_WAITCNT_VM_CNT_HI_START 0x0000000e
-#define SIMM16_WAITCNT_VM_CNT_HI_SIZE  0x00000002
-
-/*
- * SQ_EDC_FUE_CNTL_BITS value
- */
-
-#define SQ_EDC_FUE_CNTL_SQ             0x00000000
-#define SQ_EDC_FUE_CNTL_LDS            0x00000001
-#define SQ_EDC_FUE_CNTL_SIMD0          0x00000002
-#define SQ_EDC_FUE_CNTL_SIMD1          0x00000003
-#define SQ_EDC_FUE_CNTL_SIMD2          0x00000004
-#define SQ_EDC_FUE_CNTL_SIMD3          0x00000005
-#define SQ_EDC_FUE_CNTL_TA             0x00000006
-#define SQ_EDC_FUE_CNTL_TD             0x00000007
-#define SQ_EDC_FUE_CNTL_TCP            0x00000008
-
-/*******************************************************
- * COMP Enums
- *******************************************************/
-
-/*
- * CSDATA_TYPE enum
- */
-
-typedef enum CSDATA_TYPE {
-CSDATA_TYPE_TG                           = 0x00000000,
-CSDATA_TYPE_STATE                        = 0x00000001,
-CSDATA_TYPE_EVENT                        = 0x00000002,
-CSDATA_TYPE_PRIVATE                      = 0x00000003,
-} CSDATA_TYPE;
-
-/*
- * CSDATA_TYPE_WIDTH value
- */
-
-#define CSDATA_TYPE_WIDTH              0x00000002
-
-/*
- * CSDATA_ADDR_WIDTH value
- */
-
-#define CSDATA_ADDR_WIDTH              0x00000007
-
-/*
- * CSDATA_DATA_WIDTH value
- */
-
-#define CSDATA_DATA_WIDTH              0x00000020
-
-/*******************************************************
- * VGT Enums
- *******************************************************/
-
-/*
- * VGT_OUT_PRIM_TYPE enum
- */
-
-typedef enum VGT_OUT_PRIM_TYPE {
-VGT_OUT_POINT                            = 0x00000000,
-VGT_OUT_LINE                             = 0x00000001,
-VGT_OUT_TRI                              = 0x00000002,
-VGT_OUT_RECT_V0                          = 0x00000003,
-VGT_OUT_RECT_V1                          = 0x00000004,
-VGT_OUT_RECT_V2                          = 0x00000005,
-VGT_OUT_RECT_V3                          = 0x00000006,
-VGT_OUT_2D_RECT                          = 0x00000007,
-VGT_TE_QUAD                              = 0x00000008,
-VGT_TE_PRIM_INDEX_LINE                   = 0x00000009,
-VGT_TE_PRIM_INDEX_TRI                    = 0x0000000a,
-VGT_TE_PRIM_INDEX_QUAD                   = 0x0000000b,
-VGT_OUT_LINE_ADJ                         = 0x0000000c,
-VGT_OUT_TRI_ADJ                          = 0x0000000d,
-VGT_OUT_PATCH                            = 0x0000000e,
-} VGT_OUT_PRIM_TYPE;
-
-/*
- * VGT_DI_PRIM_TYPE enum
- */
-
-typedef enum VGT_DI_PRIM_TYPE {
-DI_PT_NONE                               = 0x00000000,
-DI_PT_POINTLIST                          = 0x00000001,
-DI_PT_LINELIST                           = 0x00000002,
-DI_PT_LINESTRIP                          = 0x00000003,
-DI_PT_TRILIST                            = 0x00000004,
-DI_PT_TRIFAN                             = 0x00000005,
-DI_PT_TRISTRIP                           = 0x00000006,
-DI_PT_2D_RECTANGLE                       = 0x00000007,
-DI_PT_UNUSED_1                           = 0x00000008,
-DI_PT_PATCH                              = 0x00000009,
-DI_PT_LINELIST_ADJ                       = 0x0000000a,
-DI_PT_LINESTRIP_ADJ                      = 0x0000000b,
-DI_PT_TRILIST_ADJ                        = 0x0000000c,
-DI_PT_TRISTRIP_ADJ                       = 0x0000000d,
-DI_PT_UNUSED_3                           = 0x0000000e,
-DI_PT_UNUSED_4                           = 0x0000000f,
-DI_PT_TRI_WITH_WFLAGS                    = 0x00000010,
-DI_PT_RECTLIST                           = 0x00000011,
-DI_PT_LINELOOP                           = 0x00000012,
-DI_PT_QUADLIST                           = 0x00000013,
-DI_PT_QUADSTRIP                          = 0x00000014,
-DI_PT_POLYGON                            = 0x00000015,
-} VGT_DI_PRIM_TYPE;
-
-/*
- * VGT_DI_SOURCE_SELECT enum
- */
-
-typedef enum VGT_DI_SOURCE_SELECT {
-DI_SRC_SEL_DMA                           = 0x00000000,
-DI_SRC_SEL_IMMEDIATE                     = 0x00000001,
-DI_SRC_SEL_AUTO_INDEX                    = 0x00000002,
-DI_SRC_SEL_RESERVED                      = 0x00000003,
-} VGT_DI_SOURCE_SELECT;
-
-/*
- * VGT_DI_MAJOR_MODE_SELECT enum
- */
-
-typedef enum VGT_DI_MAJOR_MODE_SELECT {
-DI_MAJOR_MODE_0                          = 0x00000000,
-DI_MAJOR_MODE_1                          = 0x00000001,
-} VGT_DI_MAJOR_MODE_SELECT;
-
-/*
- * VGT_DI_INDEX_SIZE enum
- */
-
-typedef enum VGT_DI_INDEX_SIZE {
-DI_INDEX_SIZE_16_BIT                     = 0x00000000,
-DI_INDEX_SIZE_32_BIT                     = 0x00000001,
-DI_INDEX_SIZE_8_BIT                      = 0x00000002,
-} VGT_DI_INDEX_SIZE;
-
-/*
- * VGT_EVENT_TYPE enum
- */
-
-typedef enum VGT_EVENT_TYPE {
-Reserved_0x00                            = 0x00000000,
-SAMPLE_STREAMOUTSTATS1                   = 0x00000001,
-SAMPLE_STREAMOUTSTATS2                   = 0x00000002,
-SAMPLE_STREAMOUTSTATS3                   = 0x00000003,
-CACHE_FLUSH_TS                           = 0x00000004,
-CONTEXT_DONE                             = 0x00000005,
-CACHE_FLUSH                              = 0x00000006,
-CS_PARTIAL_FLUSH                         = 0x00000007,
-VGT_STREAMOUT_SYNC                       = 0x00000008,
-Reserved_0x09                            = 0x00000009,
-VGT_STREAMOUT_RESET                      = 0x0000000a,
-END_OF_PIPE_INCR_DE                      = 0x0000000b,
-END_OF_PIPE_IB_END                       = 0x0000000c,
-RST_PIX_CNT                              = 0x0000000d,
-BREAK_BATCH                              = 0x0000000e,
-VS_PARTIAL_FLUSH                         = 0x0000000f,
-PS_PARTIAL_FLUSH                         = 0x00000010,
-FLUSH_HS_OUTPUT                          = 0x00000011,
-FLUSH_DFSM                               = 0x00000012,
-RESET_TO_LOWEST_VGT                      = 0x00000013,
-CACHE_FLUSH_AND_INV_TS_EVENT             = 0x00000014,
-ZPASS_DONE                               = 0x00000015,
-CACHE_FLUSH_AND_INV_EVENT                = 0x00000016,
-PERFCOUNTER_START                        = 0x00000017,
-PERFCOUNTER_STOP                         = 0x00000018,
-PIPELINESTAT_START                       = 0x00000019,
-PIPELINESTAT_STOP                        = 0x0000001a,
-PERFCOUNTER_SAMPLE                       = 0x0000001b,
-Available_0x1c                           = 0x0000001c,
-Available_0x1d                           = 0x0000001d,
-SAMPLE_PIPELINESTAT                      = 0x0000001e,
-SO_VGTSTREAMOUT_FLUSH                    = 0x0000001f,
-SAMPLE_STREAMOUTSTATS                    = 0x00000020,
-RESET_VTX_CNT                            = 0x00000021,
-BLOCK_CONTEXT_DONE                       = 0x00000022,
-CS_CONTEXT_DONE                          = 0x00000023,
-VGT_FLUSH                                = 0x00000024,
-TGID_ROLLOVER                            = 0x00000025,
-SQ_NON_EVENT                             = 0x00000026,
-SC_SEND_DB_VPZ                           = 0x00000027,
-BOTTOM_OF_PIPE_TS                        = 0x00000028,
-FLUSH_SX_TS                              = 0x00000029,
-DB_CACHE_FLUSH_AND_INV                   = 0x0000002a,
-FLUSH_AND_INV_DB_DATA_TS                 = 0x0000002b,
-FLUSH_AND_INV_DB_META                    = 0x0000002c,
-FLUSH_AND_INV_CB_DATA_TS                 = 0x0000002d,
-FLUSH_AND_INV_CB_META                    = 0x0000002e,
-CS_DONE                                  = 0x0000002f,
-PS_DONE                                  = 0x00000030,
-FLUSH_AND_INV_CB_PIXEL_DATA              = 0x00000031,
-SX_CB_RAT_ACK_REQUEST                    = 0x00000032,
-THREAD_TRACE_START                       = 0x00000033,
-THREAD_TRACE_STOP                        = 0x00000034,
-THREAD_TRACE_MARKER                      = 0x00000035,
-THREAD_TRACE_FLUSH                       = 0x00000036,
-THREAD_TRACE_FINISH                      = 0x00000037,
-PIXEL_PIPE_STAT_CONTROL                  = 0x00000038,
-PIXEL_PIPE_STAT_DUMP                     = 0x00000039,
-PIXEL_PIPE_STAT_RESET                    = 0x0000003a,
-CONTEXT_SUSPEND                          = 0x0000003b,
-OFFCHIP_HS_DEALLOC                       = 0x0000003c,
-ENABLE_NGG_PIPELINE                      = 0x0000003d,
-ENABLE_LEGACY_PIPELINE                   = 0x0000003e,
-Reserved_0x3f                            = 0x0000003f,
-} VGT_EVENT_TYPE;
-
-/*
- * VGT_DMA_SWAP_MODE enum
- */
-
-typedef enum VGT_DMA_SWAP_MODE {
-VGT_DMA_SWAP_NONE                        = 0x00000000,
-VGT_DMA_SWAP_16_BIT                      = 0x00000001,
-VGT_DMA_SWAP_32_BIT                      = 0x00000002,
-VGT_DMA_SWAP_WORD                        = 0x00000003,
-} VGT_DMA_SWAP_MODE;
-
-/*
- * VGT_INDEX_TYPE_MODE enum
- */
-
-typedef enum VGT_INDEX_TYPE_MODE {
-VGT_INDEX_16                             = 0x00000000,
-VGT_INDEX_32                             = 0x00000001,
-VGT_INDEX_8                              = 0x00000002,
-} VGT_INDEX_TYPE_MODE;
-
-/*
- * VGT_DMA_BUF_TYPE enum
- */
-
-typedef enum VGT_DMA_BUF_TYPE {
-VGT_DMA_BUF_MEM                          = 0x00000000,
-VGT_DMA_BUF_RING                         = 0x00000001,
-VGT_DMA_BUF_SETUP                        = 0x00000002,
-VGT_DMA_PTR_UPDATE                       = 0x00000003,
-} VGT_DMA_BUF_TYPE;
-
-/*
- * VGT_OUTPATH_SELECT enum
- */
-
-typedef enum VGT_OUTPATH_SELECT {
-VGT_OUTPATH_VTX_REUSE                    = 0x00000000,
-VGT_OUTPATH_TESS_EN                      = 0x00000001,
-VGT_OUTPATH_PASSTHRU                     = 0x00000002,
-VGT_OUTPATH_GS_BLOCK                     = 0x00000003,
-VGT_OUTPATH_HS_BLOCK                     = 0x00000004,
-VGT_OUTPATH_PRIM_GEN                     = 0x00000005,
-} VGT_OUTPATH_SELECT;
-
-/*
- * VGT_GRP_PRIM_TYPE enum
- */
-
-typedef enum VGT_GRP_PRIM_TYPE {
-VGT_GRP_3D_POINT                         = 0x00000000,
-VGT_GRP_3D_LINE                          = 0x00000001,
-VGT_GRP_3D_TRI                           = 0x00000002,
-VGT_GRP_3D_RECT                          = 0x00000003,
-VGT_GRP_3D_QUAD                          = 0x00000004,
-VGT_GRP_2D_COPY_RECT_V0                  = 0x00000005,
-VGT_GRP_2D_COPY_RECT_V1                  = 0x00000006,
-VGT_GRP_2D_COPY_RECT_V2                  = 0x00000007,
-VGT_GRP_2D_COPY_RECT_V3                  = 0x00000008,
-VGT_GRP_2D_FILL_RECT                     = 0x00000009,
-VGT_GRP_2D_LINE                          = 0x0000000a,
-VGT_GRP_2D_TRI                           = 0x0000000b,
-VGT_GRP_PRIM_INDEX_LINE                  = 0x0000000c,
-VGT_GRP_PRIM_INDEX_TRI                   = 0x0000000d,
-VGT_GRP_PRIM_INDEX_QUAD                  = 0x0000000e,
-VGT_GRP_3D_LINE_ADJ                      = 0x0000000f,
-VGT_GRP_3D_TRI_ADJ                       = 0x00000010,
-VGT_GRP_3D_PATCH                         = 0x00000011,
-VGT_GRP_2D_RECT                          = 0x00000012,
-} VGT_GRP_PRIM_TYPE;
-
-/*
- * VGT_GRP_PRIM_ORDER enum
- */
-
-typedef enum VGT_GRP_PRIM_ORDER {
-VGT_GRP_LIST                             = 0x00000000,
-VGT_GRP_STRIP                            = 0x00000001,
-VGT_GRP_FAN                              = 0x00000002,
-VGT_GRP_LOOP                             = 0x00000003,
-VGT_GRP_POLYGON                          = 0x00000004,
-} VGT_GRP_PRIM_ORDER;
-
-/*
- * VGT_GROUP_CONV_SEL enum
- */
-
-typedef enum VGT_GROUP_CONV_SEL {
-VGT_GRP_INDEX_16                         = 0x00000000,
-VGT_GRP_INDEX_32                         = 0x00000001,
-VGT_GRP_UINT_16                          = 0x00000002,
-VGT_GRP_UINT_32                          = 0x00000003,
-VGT_GRP_SINT_16                          = 0x00000004,
-VGT_GRP_SINT_32                          = 0x00000005,
-VGT_GRP_FLOAT_32                         = 0x00000006,
-VGT_GRP_AUTO_PRIM                        = 0x00000007,
-VGT_GRP_FIX_1_23_TO_FLOAT                = 0x00000008,
-} VGT_GROUP_CONV_SEL;
-
-/*
- * VGT_GS_MODE_TYPE enum
- */
-
-typedef enum VGT_GS_MODE_TYPE {
-GS_OFF                                   = 0x00000000,
-GS_SCENARIO_A                            = 0x00000001,
-GS_SCENARIO_B                            = 0x00000002,
-GS_SCENARIO_G                            = 0x00000003,
-GS_SCENARIO_C                            = 0x00000004,
-SPRITE_EN                                = 0x00000005,
-} VGT_GS_MODE_TYPE;
-
-/*
- * VGT_GS_CUT_MODE enum
- */
-
-typedef enum VGT_GS_CUT_MODE {
-GS_CUT_1024                              = 0x00000000,
-GS_CUT_512                               = 0x00000001,
-GS_CUT_256                               = 0x00000002,
-GS_CUT_128                               = 0x00000003,
-} VGT_GS_CUT_MODE;
-
-/*
- * VGT_GS_OUTPRIM_TYPE enum
- */
-
-typedef enum VGT_GS_OUTPRIM_TYPE {
-POINTLIST                                = 0x00000000,
-LINESTRIP                                = 0x00000001,
-TRISTRIP                                 = 0x00000002,
-RECTLIST                                 = 0x00000003,
-} VGT_GS_OUTPRIM_TYPE;
-
-/*
- * VGT_CACHE_INVALID_MODE enum
- */
-
-typedef enum VGT_CACHE_INVALID_MODE {
-VC_ONLY                                  = 0x00000000,
-TC_ONLY                                  = 0x00000001,
-VC_AND_TC                                = 0x00000002,
-} VGT_CACHE_INVALID_MODE;
-
-/*
- * VGT_TESS_TYPE enum
- */
-
-typedef enum VGT_TESS_TYPE {
-TESS_ISOLINE                             = 0x00000000,
-TESS_TRIANGLE                            = 0x00000001,
-TESS_QUAD                                = 0x00000002,
-} VGT_TESS_TYPE;
-
-/*
- * VGT_TESS_PARTITION enum
- */
-
-typedef enum VGT_TESS_PARTITION {
-PART_INTEGER                             = 0x00000000,
-PART_POW2                                = 0x00000001,
-PART_FRAC_ODD                            = 0x00000002,
-PART_FRAC_EVEN                           = 0x00000003,
-} VGT_TESS_PARTITION;
-
-/*
- * VGT_TESS_TOPOLOGY enum
- */
-
-typedef enum VGT_TESS_TOPOLOGY {
-OUTPUT_POINT                             = 0x00000000,
-OUTPUT_LINE                              = 0x00000001,
-OUTPUT_TRIANGLE_CW                       = 0x00000002,
-OUTPUT_TRIANGLE_CCW                      = 0x00000003,
-} VGT_TESS_TOPOLOGY;
-
-/*
- * VGT_RDREQ_POLICY enum
- */
-
-typedef enum VGT_RDREQ_POLICY {
-VGT_POLICY_LRU                           = 0x00000000,
-VGT_POLICY_STREAM                        = 0x00000001,
-} VGT_RDREQ_POLICY;
-
-/*
- * VGT_DIST_MODE enum
- */
-
-typedef enum VGT_DIST_MODE {
-NO_DIST                                  = 0x00000000,
-PATCHES                                  = 0x00000001,
-DONUTS                                   = 0x00000002,
-TRAPEZOIDS                               = 0x00000003,
-} VGT_DIST_MODE;
-
-/*
- * VGT_STAGES_LS_EN enum
- */
-
-typedef enum VGT_STAGES_LS_EN {
-LS_STAGE_OFF                             = 0x00000000,
-LS_STAGE_ON                              = 0x00000001,
-CS_STAGE_ON                              = 0x00000002,
-RESERVED_LS                              = 0x00000003,
-} VGT_STAGES_LS_EN;
-
-/*
- * VGT_STAGES_HS_EN enum
- */
-
-typedef enum VGT_STAGES_HS_EN {
-HS_STAGE_OFF                             = 0x00000000,
-HS_STAGE_ON                              = 0x00000001,
-} VGT_STAGES_HS_EN;
-
-/*
- * VGT_STAGES_ES_EN enum
- */
-
-typedef enum VGT_STAGES_ES_EN {
-ES_STAGE_OFF                             = 0x00000000,
-ES_STAGE_DS                              = 0x00000001,
-ES_STAGE_REAL                            = 0x00000002,
-RESERVED_ES                              = 0x00000003,
-} VGT_STAGES_ES_EN;
-
-/*
- * VGT_STAGES_GS_EN enum
- */
-
-typedef enum VGT_STAGES_GS_EN {
-GS_STAGE_OFF                             = 0x00000000,
-GS_STAGE_ON                              = 0x00000001,
-} VGT_STAGES_GS_EN;
-
-/*
- * VGT_STAGES_VS_EN enum
- */
-
-typedef enum VGT_STAGES_VS_EN {
-VS_STAGE_REAL                            = 0x00000000,
-VS_STAGE_DS                              = 0x00000001,
-VS_STAGE_COPY_SHADER                     = 0x00000002,
-RESERVED_VS                              = 0x00000003,
-} VGT_STAGES_VS_EN;
-
-/*
- * VGT_PERFCOUNT_SELECT enum
- */
-
-typedef enum VGT_PERFCOUNT_SELECT {
-vgt_perf_VGT_SPI_ESTHREAD_EVENT_WINDOW_ACTIVE  = 0x00000000,
-vgt_perf_VGT_SPI_ESVERT_VALID            = 0x00000001,
-vgt_perf_VGT_SPI_ESVERT_EOV              = 0x00000002,
-vgt_perf_VGT_SPI_ESVERT_STALLED          = 0x00000003,
-vgt_perf_VGT_SPI_ESVERT_STARVED_BUSY     = 0x00000004,
-vgt_perf_VGT_SPI_ESVERT_STARVED_IDLE     = 0x00000005,
-vgt_perf_VGT_SPI_ESVERT_STATIC           = 0x00000006,
-vgt_perf_VGT_SPI_ESTHREAD_IS_EVENT       = 0x00000007,
-vgt_perf_VGT_SPI_ESTHREAD_SEND           = 0x00000008,
-vgt_perf_VGT_SPI_GSPRIM_VALID            = 0x00000009,
-vgt_perf_VGT_SPI_GSPRIM_EOV              = 0x0000000a,
-vgt_perf_VGT_SPI_GSPRIM_CONT             = 0x0000000b,
-vgt_perf_VGT_SPI_GSPRIM_STALLED          = 0x0000000c,
-vgt_perf_VGT_SPI_GSPRIM_STARVED_BUSY     = 0x0000000d,
-vgt_perf_VGT_SPI_GSPRIM_STARVED_IDLE     = 0x0000000e,
-vgt_perf_VGT_SPI_GSPRIM_STATIC           = 0x0000000f,
-vgt_perf_VGT_SPI_GSTHREAD_EVENT_WINDOW_ACTIVE  = 0x00000010,
-vgt_perf_VGT_SPI_GSTHREAD_IS_EVENT       = 0x00000011,
-vgt_perf_VGT_SPI_GSTHREAD_SEND           = 0x00000012,
-vgt_perf_VGT_SPI_VSTHREAD_EVENT_WINDOW_ACTIVE  = 0x00000013,
-vgt_perf_VGT_SPI_VSVERT_SEND             = 0x00000014,
-vgt_perf_VGT_SPI_VSVERT_EOV              = 0x00000015,
-vgt_perf_VGT_SPI_VSVERT_STALLED          = 0x00000016,
-vgt_perf_VGT_SPI_VSVERT_STARVED_BUSY     = 0x00000017,
-vgt_perf_VGT_SPI_VSVERT_STARVED_IDLE     = 0x00000018,
-vgt_perf_VGT_SPI_VSVERT_STATIC           = 0x00000019,
-vgt_perf_VGT_SPI_VSTHREAD_IS_EVENT       = 0x0000001a,
-vgt_perf_VGT_SPI_VSTHREAD_SEND           = 0x0000001b,
-vgt_perf_VGT_PA_EVENT_WINDOW_ACTIVE      = 0x0000001c,
-vgt_perf_VGT_PA_CLIPV_SEND               = 0x0000001d,
-vgt_perf_VGT_PA_CLIPV_FIRSTVERT          = 0x0000001e,
-vgt_perf_VGT_PA_CLIPV_STALLED            = 0x0000001f,
-vgt_perf_VGT_PA_CLIPV_STARVED_BUSY       = 0x00000020,
-vgt_perf_VGT_PA_CLIPV_STARVED_IDLE       = 0x00000021,
-vgt_perf_VGT_PA_CLIPV_STATIC             = 0x00000022,
-vgt_perf_VGT_PA_CLIPP_SEND               = 0x00000023,
-vgt_perf_VGT_PA_CLIPP_EOP                = 0x00000024,
-vgt_perf_VGT_PA_CLIPP_IS_EVENT           = 0x00000025,
-vgt_perf_VGT_PA_CLIPP_NULL_PRIM          = 0x00000026,
-vgt_perf_VGT_PA_CLIPP_NEW_VTX_VECT       = 0x00000027,
-vgt_perf_VGT_PA_CLIPP_STALLED            = 0x00000028,
-vgt_perf_VGT_PA_CLIPP_STARVED_BUSY       = 0x00000029,
-vgt_perf_VGT_PA_CLIPP_STARVED_IDLE       = 0x0000002a,
-vgt_perf_VGT_PA_CLIPP_STATIC             = 0x0000002b,
-vgt_perf_VGT_PA_CLIPS_SEND               = 0x0000002c,
-vgt_perf_VGT_PA_CLIPS_STALLED            = 0x0000002d,
-vgt_perf_VGT_PA_CLIPS_STARVED_BUSY       = 0x0000002e,
-vgt_perf_VGT_PA_CLIPS_STARVED_IDLE       = 0x0000002f,
-vgt_perf_VGT_PA_CLIPS_STATIC             = 0x00000030,
-vgt_perf_vsvert_ds_send                  = 0x00000031,
-vgt_perf_vsvert_api_send                 = 0x00000032,
-vgt_perf_hs_tif_stall                    = 0x00000033,
-vgt_perf_hs_input_stall                  = 0x00000034,
-vgt_perf_hs_interface_stall              = 0x00000035,
-vgt_perf_hs_tfm_stall                    = 0x00000036,
-vgt_perf_te11_starved                    = 0x00000037,
-vgt_perf_gs_event_stall                  = 0x00000038,
-vgt_perf_vgt_pa_clipp_send_not_event     = 0x00000039,
-vgt_perf_vgt_pa_clipp_valid_prim         = 0x0000003a,
-vgt_perf_reused_es_indices               = 0x0000003b,
-vgt_perf_vs_cache_hits                   = 0x0000003c,
-vgt_perf_gs_cache_hits                   = 0x0000003d,
-vgt_perf_ds_cache_hits                   = 0x0000003e,
-vgt_perf_total_cache_hits                = 0x0000003f,
-vgt_perf_vgt_busy                        = 0x00000040,
-vgt_perf_vgt_gs_busy                     = 0x00000041,
-vgt_perf_esvert_stalled_es_tbl           = 0x00000042,
-vgt_perf_esvert_stalled_gs_tbl           = 0x00000043,
-vgt_perf_esvert_stalled_gs_event         = 0x00000044,
-vgt_perf_esvert_stalled_gsprim           = 0x00000045,
-vgt_perf_gsprim_stalled_es_tbl           = 0x00000046,
-vgt_perf_gsprim_stalled_gs_tbl           = 0x00000047,
-vgt_perf_gsprim_stalled_gs_event         = 0x00000048,
-vgt_perf_gsprim_stalled_esvert           = 0x00000049,
-vgt_perf_esthread_stalled_es_rb_full     = 0x0000004a,
-vgt_perf_esthread_stalled_spi_bp         = 0x0000004b,
-vgt_perf_counters_avail_stalled          = 0x0000004c,
-vgt_perf_gs_rb_space_avail_stalled       = 0x0000004d,
-vgt_perf_gs_issue_rtr_stalled            = 0x0000004e,
-vgt_perf_gsthread_stalled                = 0x0000004f,
-vgt_perf_strmout_stalled                 = 0x00000050,
-vgt_perf_wait_for_es_done_stalled        = 0x00000051,
-vgt_perf_cm_stalled_by_gog               = 0x00000052,
-vgt_perf_cm_reading_stalled              = 0x00000053,
-vgt_perf_cm_stalled_by_gsfetch_done      = 0x00000054,
-vgt_perf_gog_vs_tbl_stalled              = 0x00000055,
-vgt_perf_gog_out_indx_stalled            = 0x00000056,
-vgt_perf_gog_out_prim_stalled            = 0x00000057,
-vgt_perf_waveid_stalled                  = 0x00000058,
-vgt_perf_gog_busy                        = 0x00000059,
-vgt_perf_reused_vs_indices               = 0x0000005a,
-vgt_perf_sclk_reg_vld_event              = 0x0000005b,
-vgt_perf_vs_conflicting_indices          = 0x0000005c,
-vgt_perf_sclk_core_vld_event             = 0x0000005d,
-vgt_perf_hswave_stalled                  = 0x0000005e,
-vgt_perf_sclk_gs_vld_event               = 0x0000005f,
-vgt_perf_VGT_SPI_LSVERT_VALID            = 0x00000060,
-vgt_perf_VGT_SPI_LSVERT_EOV              = 0x00000061,
-vgt_perf_VGT_SPI_LSVERT_STALLED          = 0x00000062,
-vgt_perf_VGT_SPI_LSVERT_STARVED_BUSY     = 0x00000063,
-vgt_perf_VGT_SPI_LSVERT_STARVED_IDLE     = 0x00000064,
-vgt_perf_VGT_SPI_LSVERT_STATIC           = 0x00000065,
-vgt_perf_VGT_SPI_LSWAVE_EVENT_WINDOW_ACTIVE  = 0x00000066,
-vgt_perf_VGT_SPI_LSWAVE_IS_EVENT         = 0x00000067,
-vgt_perf_VGT_SPI_LSWAVE_SEND             = 0x00000068,
-vgt_perf_VGT_SPI_HSVERT_VALID            = 0x00000069,
-vgt_perf_VGT_SPI_HSVERT_EOV              = 0x0000006a,
-vgt_perf_VGT_SPI_HSVERT_STALLED          = 0x0000006b,
-vgt_perf_VGT_SPI_HSVERT_STARVED_BUSY     = 0x0000006c,
-vgt_perf_VGT_SPI_HSVERT_STARVED_IDLE     = 0x0000006d,
-vgt_perf_VGT_SPI_HSVERT_STATIC           = 0x0000006e,
-vgt_perf_VGT_SPI_HSWAVE_EVENT_WINDOW_ACTIVE  = 0x0000006f,
-vgt_perf_VGT_SPI_HSWAVE_IS_EVENT         = 0x00000070,
-vgt_perf_VGT_SPI_HSWAVE_SEND             = 0x00000071,
-vgt_perf_ds_prims                        = 0x00000072,
-vgt_perf_ds_RESERVED                     = 0x00000073,
-vgt_perf_ls_thread_groups                = 0x00000074,
-vgt_perf_hs_thread_groups                = 0x00000075,
-vgt_perf_es_thread_groups                = 0x00000076,
-vgt_perf_vs_thread_groups                = 0x00000077,
-vgt_perf_ls_done_latency                 = 0x00000078,
-vgt_perf_hs_done_latency                 = 0x00000079,
-vgt_perf_es_done_latency                 = 0x0000007a,
-vgt_perf_gs_done_latency                 = 0x0000007b,
-vgt_perf_vgt_hs_busy                     = 0x0000007c,
-vgt_perf_vgt_te11_busy                   = 0x0000007d,
-vgt_perf_ls_flush                        = 0x0000007e,
-vgt_perf_hs_flush                        = 0x0000007f,
-vgt_perf_es_flush                        = 0x00000080,
-vgt_perf_vgt_pa_clipp_eopg               = 0x00000081,
-vgt_perf_ls_done                         = 0x00000082,
-vgt_perf_hs_done                         = 0x00000083,
-vgt_perf_es_done                         = 0x00000084,
-vgt_perf_gs_done                         = 0x00000085,
-vgt_perf_vsfetch_done                    = 0x00000086,
-vgt_perf_gs_done_received                = 0x00000087,
-vgt_perf_es_ring_high_water_mark         = 0x00000088,
-vgt_perf_gs_ring_high_water_mark         = 0x00000089,
-vgt_perf_vs_table_high_water_mark        = 0x0000008a,
-vgt_perf_hs_tgs_active_high_water_mark   = 0x0000008b,
-vgt_perf_pa_clipp_dealloc                = 0x0000008c,
-vgt_perf_cut_mem_flush_stalled           = 0x0000008d,
-vgt_perf_vsvert_work_received            = 0x0000008e,
-vgt_perf_vgt_pa_clipp_starved_after_work  = 0x0000008f,
-vgt_perf_te11_con_starved_after_work     = 0x00000090,
-vgt_perf_hs_waiting_on_ls_done_stall     = 0x00000091,
-vgt_spi_vsvert_valid                     = 0x00000092,
-} VGT_PERFCOUNT_SELECT;
-
-/*
- * IA_PERFCOUNT_SELECT enum
- */
-
-typedef enum IA_PERFCOUNT_SELECT {
-ia_perf_GRP_INPUT_EVENT_WINDOW_ACTIVE    = 0x00000000,
-ia_perf_dma_data_fifo_full               = 0x00000001,
-ia_perf_RESERVED1                        = 0x00000002,
-ia_perf_RESERVED2                        = 0x00000003,
-ia_perf_RESERVED3                        = 0x00000004,
-ia_perf_RESERVED4                        = 0x00000005,
-ia_perf_RESERVED5                        = 0x00000006,
-ia_perf_MC_LAT_BIN_0                     = 0x00000007,
-ia_perf_MC_LAT_BIN_1                     = 0x00000008,
-ia_perf_MC_LAT_BIN_2                     = 0x00000009,
-ia_perf_MC_LAT_BIN_3                     = 0x0000000a,
-ia_perf_MC_LAT_BIN_4                     = 0x0000000b,
-ia_perf_MC_LAT_BIN_5                     = 0x0000000c,
-ia_perf_MC_LAT_BIN_6                     = 0x0000000d,
-ia_perf_MC_LAT_BIN_7                     = 0x0000000e,
-ia_perf_ia_busy                          = 0x0000000f,
-ia_perf_ia_sclk_reg_vld_event            = 0x00000010,
-ia_perf_RESERVED6                        = 0x00000011,
-ia_perf_ia_sclk_core_vld_event           = 0x00000012,
-ia_perf_RESERVED7                        = 0x00000013,
-ia_perf_ia_dma_return                    = 0x00000014,
-ia_perf_ia_stalled                       = 0x00000015,
-ia_perf_shift_starved_pipe0_event        = 0x00000016,
-ia_perf_shift_starved_pipe1_event        = 0x00000017,
-} IA_PERFCOUNT_SELECT;
-
-/*
- * WD_PERFCOUNT_SELECT enum
- */
-
-typedef enum WD_PERFCOUNT_SELECT {
-wd_perf_RBIU_FIFOS_EVENT_WINDOW_ACTIVE   = 0x00000000,
-wd_perf_RBIU_DR_FIFO_STARVED             = 0x00000001,
-wd_perf_RBIU_DR_FIFO_STALLED             = 0x00000002,
-wd_perf_RBIU_DI_FIFO_STARVED             = 0x00000003,
-wd_perf_RBIU_DI_FIFO_STALLED             = 0x00000004,
-wd_perf_wd_busy                          = 0x00000005,
-wd_perf_wd_sclk_reg_vld_event            = 0x00000006,
-wd_perf_wd_sclk_input_vld_event          = 0x00000007,
-wd_perf_wd_sclk_core_vld_event           = 0x00000008,
-wd_perf_wd_stalled                       = 0x00000009,
-wd_perf_inside_tf_bin_0                  = 0x0000000a,
-wd_perf_inside_tf_bin_1                  = 0x0000000b,
-wd_perf_inside_tf_bin_2                  = 0x0000000c,
-wd_perf_inside_tf_bin_3                  = 0x0000000d,
-wd_perf_inside_tf_bin_4                  = 0x0000000e,
-wd_perf_inside_tf_bin_5                  = 0x0000000f,
-wd_perf_inside_tf_bin_6                  = 0x00000010,
-wd_perf_inside_tf_bin_7                  = 0x00000011,
-wd_perf_inside_tf_bin_8                  = 0x00000012,
-wd_perf_tfreq_lat_bin_0                  = 0x00000013,
-wd_perf_tfreq_lat_bin_1                  = 0x00000014,
-wd_perf_tfreq_lat_bin_2                  = 0x00000015,
-wd_perf_tfreq_lat_bin_3                  = 0x00000016,
-wd_perf_tfreq_lat_bin_4                  = 0x00000017,
-wd_perf_tfreq_lat_bin_5                  = 0x00000018,
-wd_perf_tfreq_lat_bin_6                  = 0x00000019,
-wd_perf_tfreq_lat_bin_7                  = 0x0000001a,
-wd_starved_on_hs_done                    = 0x0000001b,
-wd_perf_se0_hs_done_latency              = 0x0000001c,
-wd_perf_se1_hs_done_latency              = 0x0000001d,
-wd_perf_se2_hs_done_latency              = 0x0000001e,
-wd_perf_se3_hs_done_latency              = 0x0000001f,
-wd_perf_hs_done_se0                      = 0x00000020,
-wd_perf_hs_done_se1                      = 0x00000021,
-wd_perf_hs_done_se2                      = 0x00000022,
-wd_perf_hs_done_se3                      = 0x00000023,
-wd_perf_null_patches                     = 0x00000024,
-} WD_PERFCOUNT_SELECT;
-
-/*
- * WD_IA_DRAW_TYPE enum
- */
-
-typedef enum WD_IA_DRAW_TYPE {
-WD_IA_DRAW_TYPE_DI_MM0                   = 0x00000000,
-WD_IA_DRAW_TYPE_REG_XFER                 = 0x00000001,
-WD_IA_DRAW_TYPE_EVENT_INIT               = 0x00000002,
-WD_IA_DRAW_TYPE_EVENT_ADDR               = 0x00000003,
-WD_IA_DRAW_TYPE_MIN_INDX                 = 0x00000004,
-WD_IA_DRAW_TYPE_MAX_INDX                 = 0x00000005,
-WD_IA_DRAW_TYPE_INDX_OFF                 = 0x00000006,
-WD_IA_DRAW_TYPE_IMM_DATA                 = 0x00000007,
-} WD_IA_DRAW_TYPE;
-
-/*
- * WD_IA_DRAW_REG_XFER enum
- */
-
-typedef enum WD_IA_DRAW_REG_XFER {
-WD_IA_DRAW_REG_XFER_IA_MULTI_VGT_PARAM   = 0x00000000,
-WD_IA_DRAW_REG_XFER_VGT_MULTI_PRIM_IB_RESET_EN = 0x00000001,
-} WD_IA_DRAW_REG_XFER;
-
-/*
- * WD_IA_DRAW_SOURCE enum
- */
-
-typedef enum WD_IA_DRAW_SOURCE {
-WD_IA_DRAW_SOURCE_DMA                    = 0x00000000,
-WD_IA_DRAW_SOURCE_IMMD                   = 0x00000001,
-WD_IA_DRAW_SOURCE_AUTO                   = 0x00000002,
-WD_IA_DRAW_SOURCE_OPAQ                   = 0x00000003,
-} WD_IA_DRAW_SOURCE;
-
-/*
- * GS_THREADID_SIZE value
- */
-
-#define GSTHREADID_SIZE                0x00000002
-
-/*******************************************************
- * GB Enums
- *******************************************************/
-
-/*
- * GB_EDC_DED_MODE enum
- */
-
-typedef enum GB_EDC_DED_MODE {
-GB_EDC_DED_MODE_LOG                      = 0x00000000,
-GB_EDC_DED_MODE_HALT                     = 0x00000001,
-GB_EDC_DED_MODE_INT_HALT                 = 0x00000002,
-} GB_EDC_DED_MODE;
-
-/*
- * VALUE_GB_TILING_CONFIG_TABLE_SIZE value
- */
-
-#define GB_TILING_CONFIG_TABLE_SIZE    0x00000020
-
-/*
- * VALUE_GB_TILING_CONFIG_MACROTABLE_SIZE value
- */
-
-#define GB_TILING_CONFIG_MACROTABLE_SIZE 0x00000010
-
-/*******************************************************
- * TP Enums
- *******************************************************/
-
-/*
- * TA_TC_ADDR_MODES enum
- */
-
-typedef enum TA_TC_ADDR_MODES {
-TA_TC_ADDR_MODE_DEFAULT                  = 0x00000000,
-TA_TC_ADDR_MODE_COMP0                    = 0x00000001,
-TA_TC_ADDR_MODE_COMP1                    = 0x00000002,
-TA_TC_ADDR_MODE_COMP2                    = 0x00000003,
-TA_TC_ADDR_MODE_COMP3                    = 0x00000004,
-TA_TC_ADDR_MODE_UNALIGNED                = 0x00000005,
-TA_TC_ADDR_MODE_BORDER_COLOR             = 0x00000006,
-} TA_TC_ADDR_MODES;
-
-/*
- * TA_PERFCOUNT_SEL enum
- */
-
-typedef enum TA_PERFCOUNT_SEL {
-TA_PERF_SEL_NULL                         = 0x00000000,
-TA_PERF_SEL_sh_fifo_busy                 = 0x00000001,
-TA_PERF_SEL_sh_fifo_cmd_busy             = 0x00000002,
-TA_PERF_SEL_sh_fifo_addr_busy            = 0x00000003,
-TA_PERF_SEL_sh_fifo_data_busy            = 0x00000004,
-TA_PERF_SEL_sh_fifo_data_sfifo_busy      = 0x00000005,
-TA_PERF_SEL_sh_fifo_data_tfifo_busy      = 0x00000006,
-TA_PERF_SEL_gradient_busy                = 0x00000007,
-TA_PERF_SEL_gradient_fifo_busy           = 0x00000008,
-TA_PERF_SEL_lod_busy                     = 0x00000009,
-TA_PERF_SEL_lod_fifo_busy                = 0x0000000a,
-TA_PERF_SEL_addresser_busy               = 0x0000000b,
-TA_PERF_SEL_addresser_fifo_busy          = 0x0000000c,
-TA_PERF_SEL_aligner_busy                 = 0x0000000d,
-TA_PERF_SEL_write_path_busy              = 0x0000000e,
-TA_PERF_SEL_ta_busy                      = 0x0000000f,
-TA_PERF_SEL_sq_ta_cmd_cycles             = 0x00000010,
-TA_PERF_SEL_sp_ta_addr_cycles            = 0x00000011,
-TA_PERF_SEL_sp_ta_data_cycles            = 0x00000012,
-TA_PERF_SEL_ta_fa_data_state_cycles      = 0x00000013,
-TA_PERF_SEL_sh_fifo_addr_waiting_on_cmd_cycles  = 0x00000014,
-TA_PERF_SEL_sh_fifo_cmd_waiting_on_addr_cycles  = 0x00000015,
-TA_PERF_SEL_sh_fifo_addr_starved_while_busy_cycles  = 0x00000016,
-TA_PERF_SEL_sh_fifo_cmd_starved_while_busy_cycles  = 0x00000017,
-TA_PERF_SEL_sh_fifo_data_waiting_on_data_state_cycles  = 0x00000018,
-TA_PERF_SEL_sh_fifo_data_state_waiting_on_data_cycles  = 0x00000019,
-TA_PERF_SEL_sh_fifo_data_starved_while_busy_cycles  = 0x0000001a,
-TA_PERF_SEL_sh_fifo_data_state_starved_while_busy_cycles  = 0x0000001b,
-TA_PERF_SEL_RESERVED_28                  = 0x0000001c,
-TA_PERF_SEL_RESERVED_29                  = 0x0000001d,
-TA_PERF_SEL_sh_fifo_addr_cycles          = 0x0000001e,
-TA_PERF_SEL_sh_fifo_data_cycles          = 0x0000001f,
-TA_PERF_SEL_total_wavefronts             = 0x00000020,
-TA_PERF_SEL_gradient_cycles              = 0x00000021,
-TA_PERF_SEL_walker_cycles                = 0x00000022,
-TA_PERF_SEL_aligner_cycles               = 0x00000023,
-TA_PERF_SEL_image_wavefronts             = 0x00000024,
-TA_PERF_SEL_image_read_wavefronts        = 0x00000025,
-TA_PERF_SEL_image_write_wavefronts       = 0x00000026,
-TA_PERF_SEL_image_atomic_wavefronts      = 0x00000027,
-TA_PERF_SEL_image_total_cycles           = 0x00000028,
-TA_PERF_SEL_RESERVED_41                  = 0x00000029,
-TA_PERF_SEL_RESERVED_42                  = 0x0000002a,
-TA_PERF_SEL_RESERVED_43                  = 0x0000002b,
-TA_PERF_SEL_buffer_wavefronts            = 0x0000002c,
-TA_PERF_SEL_buffer_read_wavefronts       = 0x0000002d,
-TA_PERF_SEL_buffer_write_wavefronts      = 0x0000002e,
-TA_PERF_SEL_buffer_atomic_wavefronts     = 0x0000002f,
-TA_PERF_SEL_buffer_coalescable_wavefronts  = 0x00000030,
-TA_PERF_SEL_buffer_total_cycles          = 0x00000031,
-TA_PERF_SEL_buffer_coalescable_addr_multicycled_cycles  = 0x00000032,
-TA_PERF_SEL_buffer_coalescable_clamp_16kdword_multicycled_cycles  = 0x00000033,
-TA_PERF_SEL_buffer_coalesced_read_cycles  = 0x00000034,
-TA_PERF_SEL_buffer_coalesced_write_cycles  = 0x00000035,
-TA_PERF_SEL_addr_stalled_by_tc_cycles    = 0x00000036,
-TA_PERF_SEL_addr_stalled_by_td_cycles    = 0x00000037,
-TA_PERF_SEL_data_stalled_by_tc_cycles    = 0x00000038,
-TA_PERF_SEL_addresser_stalled_by_aligner_only_cycles  = 0x00000039,
-TA_PERF_SEL_addresser_stalled_cycles     = 0x0000003a,
-TA_PERF_SEL_aniso_stalled_by_addresser_only_cycles  = 0x0000003b,
-TA_PERF_SEL_aniso_stalled_cycles         = 0x0000003c,
-TA_PERF_SEL_deriv_stalled_by_aniso_only_cycles  = 0x0000003d,
-TA_PERF_SEL_deriv_stalled_cycles         = 0x0000003e,
-TA_PERF_SEL_aniso_gt1_cycle_quads        = 0x0000003f,
-TA_PERF_SEL_color_1_cycle_pixels         = 0x00000040,
-TA_PERF_SEL_color_2_cycle_pixels         = 0x00000041,
-TA_PERF_SEL_color_3_cycle_pixels         = 0x00000042,
-TA_PERF_SEL_color_4_cycle_pixels         = 0x00000043,
-TA_PERF_SEL_mip_1_cycle_pixels           = 0x00000044,
-TA_PERF_SEL_mip_2_cycle_pixels           = 0x00000045,
-TA_PERF_SEL_vol_1_cycle_pixels           = 0x00000046,
-TA_PERF_SEL_vol_2_cycle_pixels           = 0x00000047,
-TA_PERF_SEL_bilin_point_1_cycle_pixels   = 0x00000048,
-TA_PERF_SEL_mipmap_lod_0_samples         = 0x00000049,
-TA_PERF_SEL_mipmap_lod_1_samples         = 0x0000004a,
-TA_PERF_SEL_mipmap_lod_2_samples         = 0x0000004b,
-TA_PERF_SEL_mipmap_lod_3_samples         = 0x0000004c,
-TA_PERF_SEL_mipmap_lod_4_samples         = 0x0000004d,
-TA_PERF_SEL_mipmap_lod_5_samples         = 0x0000004e,
-TA_PERF_SEL_mipmap_lod_6_samples         = 0x0000004f,
-TA_PERF_SEL_mipmap_lod_7_samples         = 0x00000050,
-TA_PERF_SEL_mipmap_lod_8_samples         = 0x00000051,
-TA_PERF_SEL_mipmap_lod_9_samples         = 0x00000052,
-TA_PERF_SEL_mipmap_lod_10_samples        = 0x00000053,
-TA_PERF_SEL_mipmap_lod_11_samples        = 0x00000054,
-TA_PERF_SEL_mipmap_lod_12_samples        = 0x00000055,
-TA_PERF_SEL_mipmap_lod_13_samples        = 0x00000056,
-TA_PERF_SEL_mipmap_lod_14_samples        = 0x00000057,
-TA_PERF_SEL_mipmap_invalid_samples       = 0x00000058,
-TA_PERF_SEL_aniso_1_cycle_quads          = 0x00000059,
-TA_PERF_SEL_aniso_2_cycle_quads          = 0x0000005a,
-TA_PERF_SEL_aniso_4_cycle_quads          = 0x0000005b,
-TA_PERF_SEL_aniso_6_cycle_quads          = 0x0000005c,
-TA_PERF_SEL_aniso_8_cycle_quads          = 0x0000005d,
-TA_PERF_SEL_aniso_10_cycle_quads         = 0x0000005e,
-TA_PERF_SEL_aniso_12_cycle_quads         = 0x0000005f,
-TA_PERF_SEL_aniso_14_cycle_quads         = 0x00000060,
-TA_PERF_SEL_aniso_16_cycle_quads         = 0x00000061,
-TA_PERF_SEL_write_path_input_cycles      = 0x00000062,
-TA_PERF_SEL_write_path_output_cycles     = 0x00000063,
-TA_PERF_SEL_flat_wavefronts              = 0x00000064,
-TA_PERF_SEL_flat_read_wavefronts         = 0x00000065,
-TA_PERF_SEL_flat_write_wavefronts        = 0x00000066,
-TA_PERF_SEL_flat_atomic_wavefronts       = 0x00000067,
-TA_PERF_SEL_flat_coalesceable_wavefronts  = 0x00000068,
-TA_PERF_SEL_reg_sclk_vld                 = 0x00000069,
-TA_PERF_SEL_local_cg_dyn_sclk_grp0_en    = 0x0000006a,
-TA_PERF_SEL_local_cg_dyn_sclk_grp1_en    = 0x0000006b,
-TA_PERF_SEL_local_cg_dyn_sclk_grp1_mems_en  = 0x0000006c,
-TA_PERF_SEL_local_cg_dyn_sclk_grp4_en    = 0x0000006d,
-TA_PERF_SEL_local_cg_dyn_sclk_grp5_en    = 0x0000006e,
-TA_PERF_SEL_xnack_on_phase0              = 0x0000006f,
-TA_PERF_SEL_xnack_on_phase1              = 0x00000070,
-TA_PERF_SEL_xnack_on_phase2              = 0x00000071,
-TA_PERF_SEL_xnack_on_phase3              = 0x00000072,
-TA_PERF_SEL_first_xnack_on_phase0        = 0x00000073,
-TA_PERF_SEL_first_xnack_on_phase1        = 0x00000074,
-TA_PERF_SEL_first_xnack_on_phase2        = 0x00000075,
-TA_PERF_SEL_first_xnack_on_phase3        = 0x00000076,
-} TA_PERFCOUNT_SEL;
-
-/*
- * TD_PERFCOUNT_SEL enum
- */
-
-typedef enum TD_PERFCOUNT_SEL {
-TD_PERF_SEL_none                         = 0x00000000,
-TD_PERF_SEL_td_busy                      = 0x00000001,
-TD_PERF_SEL_input_busy                   = 0x00000002,
-TD_PERF_SEL_output_busy                  = 0x00000003,
-TD_PERF_SEL_lerp_busy                    = 0x00000004,
-TD_PERF_SEL_reg_sclk_vld                 = 0x00000005,
-TD_PERF_SEL_local_cg_dyn_sclk_grp0_en    = 0x00000006,
-TD_PERF_SEL_local_cg_dyn_sclk_grp1_en    = 0x00000007,
-TD_PERF_SEL_local_cg_dyn_sclk_grp4_en    = 0x00000008,
-TD_PERF_SEL_local_cg_dyn_sclk_grp5_en    = 0x00000009,
-TD_PERF_SEL_tc_td_fifo_full              = 0x0000000a,
-TD_PERF_SEL_constant_state_full          = 0x0000000b,
-TD_PERF_SEL_sample_state_full            = 0x0000000c,
-TD_PERF_SEL_output_fifo_full             = 0x0000000d,
-TD_PERF_SEL_RESERVED_14                  = 0x0000000e,
-TD_PERF_SEL_tc_stall                     = 0x0000000f,
-TD_PERF_SEL_pc_stall                     = 0x00000010,
-TD_PERF_SEL_gds_stall                    = 0x00000011,
-TD_PERF_SEL_RESERVED_18                  = 0x00000012,
-TD_PERF_SEL_RESERVED_19                  = 0x00000013,
-TD_PERF_SEL_gather4_wavefront            = 0x00000014,
-TD_PERF_SEL_gather4h_wavefront           = 0x00000015,
-TD_PERF_SEL_gather4h_packed_wavefront    = 0x00000016,
-TD_PERF_SEL_gather8h_packed_wavefront    = 0x00000017,
-TD_PERF_SEL_sample_c_wavefront           = 0x00000018,
-TD_PERF_SEL_load_wavefront               = 0x00000019,
-TD_PERF_SEL_atomic_wavefront             = 0x0000001a,
-TD_PERF_SEL_store_wavefront              = 0x0000001b,
-TD_PERF_SEL_ldfptr_wavefront             = 0x0000001c,
-TD_PERF_SEL_d16_en_wavefront             = 0x0000001d,
-TD_PERF_SEL_bypass_filter_wavefront      = 0x0000001e,
-TD_PERF_SEL_min_max_filter_wavefront     = 0x0000001f,
-TD_PERF_SEL_coalescable_wavefront        = 0x00000020,
-TD_PERF_SEL_coalesced_phase              = 0x00000021,
-TD_PERF_SEL_four_phase_wavefront         = 0x00000022,
-TD_PERF_SEL_eight_phase_wavefront        = 0x00000023,
-TD_PERF_SEL_sixteen_phase_wavefront      = 0x00000024,
-TD_PERF_SEL_four_phase_forward_wavefront  = 0x00000025,
-TD_PERF_SEL_write_ack_wavefront          = 0x00000026,
-TD_PERF_SEL_RESERVED_39                  = 0x00000027,
-TD_PERF_SEL_user_defined_border          = 0x00000028,
-TD_PERF_SEL_white_border                 = 0x00000029,
-TD_PERF_SEL_opaque_black_border          = 0x0000002a,
-TD_PERF_SEL_RESERVED_43                  = 0x0000002b,
-TD_PERF_SEL_RESERVED_44                  = 0x0000002c,
-TD_PERF_SEL_nack                         = 0x0000002d,
-TD_PERF_SEL_td_sp_traffic                = 0x0000002e,
-TD_PERF_SEL_consume_gds_traffic          = 0x0000002f,
-TD_PERF_SEL_addresscmd_poison            = 0x00000030,
-TD_PERF_SEL_data_poison                  = 0x00000031,
-TD_PERF_SEL_start_cycle_0                = 0x00000032,
-TD_PERF_SEL_start_cycle_1                = 0x00000033,
-TD_PERF_SEL_start_cycle_2                = 0x00000034,
-TD_PERF_SEL_start_cycle_3                = 0x00000035,
-TD_PERF_SEL_null_cycle_output            = 0x00000036,
-TD_PERF_SEL_d16_data_packed              = 0x00000037,
-TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt  = 0x00000038,
-} TD_PERFCOUNT_SEL;
-
-/*
- * TCP_PERFCOUNT_SELECT enum
- */
-
-typedef enum TCP_PERFCOUNT_SELECT {
-TCP_PERF_SEL_TA_TCP_ADDR_STARVE_CYCLES   = 0x00000000,
-TCP_PERF_SEL_TA_TCP_DATA_STARVE_CYCLES   = 0x00000001,
-TCP_PERF_SEL_TCP_TA_ADDR_STALL_CYCLES    = 0x00000002,
-TCP_PERF_SEL_TCP_TA_DATA_STALL_CYCLES    = 0x00000003,
-TCP_PERF_SEL_TD_TCP_STALL_CYCLES         = 0x00000004,
-TCP_PERF_SEL_TCR_TCP_STALL_CYCLES        = 0x00000005,
-TCP_PERF_SEL_LOD_STALL_CYCLES            = 0x00000006,
-TCP_PERF_SEL_READ_TAGCONFLICT_STALL_CYCLES  = 0x00000007,
-TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL_CYCLES  = 0x00000008,
-TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL_CYCLES  = 0x00000009,
-TCP_PERF_SEL_ALLOC_STALL_CYCLES          = 0x0000000a,
-TCP_PERF_SEL_LFIFO_STALL_CYCLES          = 0x0000000b,
-TCP_PERF_SEL_RFIFO_STALL_CYCLES          = 0x0000000c,
-TCP_PERF_SEL_TCR_RDRET_STALL             = 0x0000000d,
-TCP_PERF_SEL_WRITE_CONFLICT_STALL        = 0x0000000e,
-TCP_PERF_SEL_HOLE_READ_STALL             = 0x0000000f,
-TCP_PERF_SEL_READCONFLICT_STALL_CYCLES   = 0x00000010,
-TCP_PERF_SEL_PENDING_STALL_CYCLES        = 0x00000011,
-TCP_PERF_SEL_READFIFO_STALL_CYCLES       = 0x00000012,
-TCP_PERF_SEL_TCP_LATENCY                 = 0x00000013,
-TCP_PERF_SEL_TCC_READ_REQ_LATENCY        = 0x00000014,
-TCP_PERF_SEL_TCC_WRITE_REQ_LATENCY       = 0x00000015,
-TCP_PERF_SEL_TCC_WRITE_REQ_HOLE_LATENCY  = 0x00000016,
-TCP_PERF_SEL_TCC_READ_REQ                = 0x00000017,
-TCP_PERF_SEL_TCC_WRITE_REQ               = 0x00000018,
-TCP_PERF_SEL_TCC_ATOMIC_WITH_RET_REQ     = 0x00000019,
-TCP_PERF_SEL_TCC_ATOMIC_WITHOUT_RET_REQ  = 0x0000001a,
-TCP_PERF_SEL_TOTAL_LOCAL_READ            = 0x0000001b,
-TCP_PERF_SEL_TOTAL_GLOBAL_READ           = 0x0000001c,
-TCP_PERF_SEL_TOTAL_LOCAL_WRITE           = 0x0000001d,
-TCP_PERF_SEL_TOTAL_GLOBAL_WRITE          = 0x0000001e,
-TCP_PERF_SEL_TOTAL_ATOMIC_WITH_RET       = 0x0000001f,
-TCP_PERF_SEL_TOTAL_ATOMIC_WITHOUT_RET    = 0x00000020,
-TCP_PERF_SEL_TOTAL_WBINVL1               = 0x00000021,
-TCP_PERF_SEL_IMG_READ_FMT_1              = 0x00000022,
-TCP_PERF_SEL_IMG_READ_FMT_8              = 0x00000023,
-TCP_PERF_SEL_IMG_READ_FMT_16             = 0x00000024,
-TCP_PERF_SEL_IMG_READ_FMT_32             = 0x00000025,
-TCP_PERF_SEL_IMG_READ_FMT_32_AS_8        = 0x00000026,
-TCP_PERF_SEL_IMG_READ_FMT_32_AS_16       = 0x00000027,
-TCP_PERF_SEL_IMG_READ_FMT_32_AS_128      = 0x00000028,
-TCP_PERF_SEL_IMG_READ_FMT_64_2_CYCLE     = 0x00000029,
-TCP_PERF_SEL_IMG_READ_FMT_64_1_CYCLE     = 0x0000002a,
-TCP_PERF_SEL_IMG_READ_FMT_96             = 0x0000002b,
-TCP_PERF_SEL_IMG_READ_FMT_128_4_CYCLE    = 0x0000002c,
-TCP_PERF_SEL_IMG_READ_FMT_128_1_CYCLE    = 0x0000002d,
-TCP_PERF_SEL_IMG_READ_FMT_BC1            = 0x0000002e,
-TCP_PERF_SEL_IMG_READ_FMT_BC2            = 0x0000002f,
-TCP_PERF_SEL_IMG_READ_FMT_BC3            = 0x00000030,
-TCP_PERF_SEL_IMG_READ_FMT_BC4            = 0x00000031,
-TCP_PERF_SEL_IMG_READ_FMT_BC5            = 0x00000032,
-TCP_PERF_SEL_IMG_READ_FMT_BC6            = 0x00000033,
-TCP_PERF_SEL_IMG_READ_FMT_BC7            = 0x00000034,
-TCP_PERF_SEL_IMG_READ_FMT_I8             = 0x00000035,
-TCP_PERF_SEL_IMG_READ_FMT_I16            = 0x00000036,
-TCP_PERF_SEL_IMG_READ_FMT_I32            = 0x00000037,
-TCP_PERF_SEL_IMG_READ_FMT_I32_AS_8       = 0x00000038,
-TCP_PERF_SEL_IMG_READ_FMT_I32_AS_16      = 0x00000039,
-TCP_PERF_SEL_IMG_READ_FMT_D8             = 0x0000003a,
-TCP_PERF_SEL_IMG_READ_FMT_D16            = 0x0000003b,
-TCP_PERF_SEL_IMG_READ_FMT_D32            = 0x0000003c,
-TCP_PERF_SEL_IMG_WRITE_FMT_8             = 0x0000003d,
-TCP_PERF_SEL_IMG_WRITE_FMT_16            = 0x0000003e,
-TCP_PERF_SEL_IMG_WRITE_FMT_32            = 0x0000003f,
-TCP_PERF_SEL_IMG_WRITE_FMT_64            = 0x00000040,
-TCP_PERF_SEL_IMG_WRITE_FMT_128           = 0x00000041,
-TCP_PERF_SEL_IMG_WRITE_FMT_D8            = 0x00000042,
-TCP_PERF_SEL_IMG_WRITE_FMT_D16           = 0x00000043,
-TCP_PERF_SEL_IMG_WRITE_FMT_D32           = 0x00000044,
-TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_32  = 0x00000045,
-TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_32  = 0x00000046,
-TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_64  = 0x00000047,
-TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_64  = 0x00000048,
-TCP_PERF_SEL_BUF_READ_FMT_8              = 0x00000049,
-TCP_PERF_SEL_BUF_READ_FMT_16             = 0x0000004a,
-TCP_PERF_SEL_BUF_READ_FMT_32             = 0x0000004b,
-TCP_PERF_SEL_BUF_WRITE_FMT_8             = 0x0000004c,
-TCP_PERF_SEL_BUF_WRITE_FMT_16            = 0x0000004d,
-TCP_PERF_SEL_BUF_WRITE_FMT_32            = 0x0000004e,
-TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_32  = 0x0000004f,
-TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_32  = 0x00000050,
-TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_64  = 0x00000051,
-TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_64  = 0x00000052,
-TCP_PERF_SEL_ARR_LINEAR_GENERAL          = 0x00000053,
-TCP_PERF_SEL_ARR_LINEAR_ALIGNED          = 0x00000054,
-TCP_PERF_SEL_ARR_1D_THIN1                = 0x00000055,
-TCP_PERF_SEL_ARR_1D_THICK                = 0x00000056,
-TCP_PERF_SEL_ARR_2D_THIN1                = 0x00000057,
-TCP_PERF_SEL_ARR_2D_THICK                = 0x00000058,
-TCP_PERF_SEL_ARR_2D_XTHICK               = 0x00000059,
-TCP_PERF_SEL_ARR_3D_THIN1                = 0x0000005a,
-TCP_PERF_SEL_ARR_3D_THICK                = 0x0000005b,
-TCP_PERF_SEL_ARR_3D_XTHICK               = 0x0000005c,
-TCP_PERF_SEL_DIM_1D                      = 0x0000005d,
-TCP_PERF_SEL_DIM_2D                      = 0x0000005e,
-TCP_PERF_SEL_DIM_3D                      = 0x0000005f,
-TCP_PERF_SEL_DIM_1D_ARRAY                = 0x00000060,
-TCP_PERF_SEL_DIM_2D_ARRAY                = 0x00000061,
-TCP_PERF_SEL_DIM_2D_MSAA                 = 0x00000062,
-TCP_PERF_SEL_DIM_2D_ARRAY_MSAA           = 0x00000063,
-TCP_PERF_SEL_DIM_CUBE_ARRAY              = 0x00000064,
-TCP_PERF_SEL_CP_TCP_INVALIDATE           = 0x00000065,
-TCP_PERF_SEL_TA_TCP_STATE_READ           = 0x00000066,
-TCP_PERF_SEL_TAGRAM0_REQ                 = 0x00000067,
-TCP_PERF_SEL_TAGRAM1_REQ                 = 0x00000068,
-TCP_PERF_SEL_TAGRAM2_REQ                 = 0x00000069,
-TCP_PERF_SEL_TAGRAM3_REQ                 = 0x0000006a,
-TCP_PERF_SEL_GATE_EN1                    = 0x0000006b,
-TCP_PERF_SEL_GATE_EN2                    = 0x0000006c,
-TCP_PERF_SEL_CORE_REG_SCLK_VLD           = 0x0000006d,
-TCP_PERF_SEL_TCC_REQ                     = 0x0000006e,
-TCP_PERF_SEL_TCC_NON_READ_REQ            = 0x0000006f,
-TCP_PERF_SEL_TCC_BYPASS_READ_REQ         = 0x00000070,
-TCP_PERF_SEL_TCC_MISS_EVICT_READ_REQ     = 0x00000071,
-TCP_PERF_SEL_TCC_VOLATILE_READ_REQ       = 0x00000072,
-TCP_PERF_SEL_TCC_VOLATILE_BYPASS_READ_REQ  = 0x00000073,
-TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_READ_REQ  = 0x00000074,
-TCP_PERF_SEL_TCC_BYPASS_WRITE_REQ        = 0x00000075,
-TCP_PERF_SEL_TCC_MISS_EVICT_WRITE_REQ    = 0x00000076,
-TCP_PERF_SEL_TCC_VOLATILE_BYPASS_WRITE_REQ  = 0x00000077,
-TCP_PERF_SEL_TCC_VOLATILE_WRITE_REQ      = 0x00000078,
-TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_WRITE_REQ  = 0x00000079,
-TCP_PERF_SEL_TCC_BYPASS_ATOMIC_REQ       = 0x0000007a,
-TCP_PERF_SEL_TCC_ATOMIC_REQ              = 0x0000007b,
-TCP_PERF_SEL_TCC_VOLATILE_ATOMIC_REQ     = 0x0000007c,
-TCP_PERF_SEL_TCC_DATA_BUS_BUSY           = 0x0000007d,
-TCP_PERF_SEL_TOTAL_ACCESSES              = 0x0000007e,
-TCP_PERF_SEL_TOTAL_READ                  = 0x0000007f,
-TCP_PERF_SEL_TOTAL_HIT_LRU_READ          = 0x00000080,
-TCP_PERF_SEL_TOTAL_HIT_EVICT_READ        = 0x00000081,
-TCP_PERF_SEL_TOTAL_MISS_LRU_READ         = 0x00000082,
-TCP_PERF_SEL_TOTAL_MISS_EVICT_READ       = 0x00000083,
-TCP_PERF_SEL_TOTAL_NON_READ              = 0x00000084,
-TCP_PERF_SEL_TOTAL_WRITE                 = 0x00000085,
-TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE        = 0x00000086,
-TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE      = 0x00000087,
-TCP_PERF_SEL_TOTAL_WBINVL1_VOL           = 0x00000088,
-TCP_PERF_SEL_TOTAL_WRITEBACK_INVALIDATES  = 0x00000089,
-TCP_PERF_SEL_DISPLAY_MICROTILING         = 0x0000008a,
-TCP_PERF_SEL_THIN_MICROTILING            = 0x0000008b,
-TCP_PERF_SEL_DEPTH_MICROTILING           = 0x0000008c,
-TCP_PERF_SEL_ARR_PRT_THIN1               = 0x0000008d,
-TCP_PERF_SEL_ARR_PRT_2D_THIN1            = 0x0000008e,
-TCP_PERF_SEL_ARR_PRT_3D_THIN1            = 0x0000008f,
-TCP_PERF_SEL_ARR_PRT_THICK               = 0x00000090,
-TCP_PERF_SEL_ARR_PRT_2D_THICK            = 0x00000091,
-TCP_PERF_SEL_ARR_PRT_3D_THICK            = 0x00000092,
-TCP_PERF_SEL_CP_TCP_INVALIDATE_VOL       = 0x00000093,
-TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL       = 0x00000094,
-TCP_PERF_SEL_UNALIGNED                   = 0x00000095,
-TCP_PERF_SEL_ROTATED_MICROTILING         = 0x00000096,
-TCP_PERF_SEL_THICK_MICROTILING           = 0x00000097,
-TCP_PERF_SEL_ATC                         = 0x00000098,
-TCP_PERF_SEL_POWER_STALL                 = 0x00000099,
-TCP_PERF_SEL_RESERVED_154                = 0x0000009a,
-TCP_PERF_SEL_TCC_LRU_REQ                 = 0x0000009b,
-TCP_PERF_SEL_TCC_STREAM_REQ              = 0x0000009c,
-TCP_PERF_SEL_TCC_NC_READ_REQ             = 0x0000009d,
-TCP_PERF_SEL_TCC_NC_WRITE_REQ            = 0x0000009e,
-TCP_PERF_SEL_TCC_NC_ATOMIC_REQ           = 0x0000009f,
-TCP_PERF_SEL_TCC_UC_READ_REQ             = 0x000000a0,
-TCP_PERF_SEL_TCC_UC_WRITE_REQ            = 0x000000a1,
-TCP_PERF_SEL_TCC_UC_ATOMIC_REQ           = 0x000000a2,
-TCP_PERF_SEL_TCC_CC_READ_REQ             = 0x000000a3,
-TCP_PERF_SEL_TCC_CC_WRITE_REQ            = 0x000000a4,
-TCP_PERF_SEL_TCC_CC_ATOMIC_REQ           = 0x000000a5,
-TCP_PERF_SEL_TCC_DCC_REQ                 = 0x000000a6,
-TCP_PERF_SEL_TCC_PHYSICAL_REQ            = 0x000000a7,
-TCP_PERF_SEL_UNORDERED_MTYPE_STALL       = 0x000000a8,
-TCP_PERF_SEL_VOLATILE                    = 0x000000a9,
-TCP_PERF_SEL_TC_TA_XNACK_STALL           = 0x000000aa,
-TCP_PERF_SEL_UTCL1_SERIALIZATION_STALL   = 0x000000ab,
-TCP_PERF_SEL_SHOOTDOWN                   = 0x000000ac,
-TCP_PERF_SEL_UTCL1_TRANSLATION_MISS      = 0x000000ad,
-TCP_PERF_SEL_UTCL1_PERMISSION_MISS       = 0x000000ae,
-TCP_PERF_SEL_UTCL1_REQUEST               = 0x000000af,
-TCP_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX    = 0x000000b0,
-TCP_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT    = 0x000000b1,
-TCP_PERF_SEL_UTCL1_LFIFO_FULL            = 0x000000b2,
-TCP_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES   = 0x000000b3,
-TCP_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS  = 0x000000b4,
-TCP_PERF_SEL_UTCL1_UTCL2_INFLIGHT        = 0x000000b5,
-TCP_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL   = 0x000000b6,
-TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGB       = 0x000000b7,
-TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGBA      = 0x000000b8,
-TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGBA1     = 0x000000b9,
-TCP_PERF_SEL_IMG_READ_FMT_ETC2_R         = 0x000000ba,
-TCP_PERF_SEL_IMG_READ_FMT_ETC2_RG        = 0x000000bb,
-TCP_PERF_SEL_IMG_READ_FMT_8_AS_32        = 0x000000bc,
-TCP_PERF_SEL_IMG_READ_FMT_8_AS_64        = 0x000000bd,
-TCP_PERF_SEL_IMG_READ_FMT_16_AS_64       = 0x000000be,
-TCP_PERF_SEL_IMG_READ_FMT_16_AS_128      = 0x000000bf,
-TCP_PERF_SEL_IMG_WRITE_FMT_8_AS_32       = 0x000000c0,
-TCP_PERF_SEL_IMG_WRITE_FMT_8_AS_64       = 0x000000c1,
-TCP_PERF_SEL_IMG_WRITE_FMT_16_AS_64      = 0x000000c2,
-TCP_PERF_SEL_IMG_WRITE_FMT_16_AS_128     = 0x000000c3,
-} TCP_PERFCOUNT_SELECT;
-
-/*
- * TCP_CACHE_POLICIES enum
- */
-
-typedef enum TCP_CACHE_POLICIES {
-TCP_CACHE_POLICY_MISS_LRU                = 0x00000000,
-TCP_CACHE_POLICY_MISS_EVICT              = 0x00000001,
-TCP_CACHE_POLICY_HIT_LRU                 = 0x00000002,
-TCP_CACHE_POLICY_HIT_EVICT               = 0x00000003,
-} TCP_CACHE_POLICIES;
-
-/*
- * TCP_CACHE_STORE_POLICIES enum
- */
-
-typedef enum TCP_CACHE_STORE_POLICIES {
-TCP_CACHE_STORE_POLICY_WT_LRU            = 0x00000000,
-TCP_CACHE_STORE_POLICY_WT_EVICT          = 0x00000001,
-} TCP_CACHE_STORE_POLICIES;
-
-/*
- * TCP_WATCH_MODES enum
- */
-
-typedef enum TCP_WATCH_MODES {
-TCP_WATCH_MODE_READ                      = 0x00000000,
-TCP_WATCH_MODE_NONREAD                   = 0x00000001,
-TCP_WATCH_MODE_ATOMIC                    = 0x00000002,
-TCP_WATCH_MODE_ALL                       = 0x00000003,
-} TCP_WATCH_MODES;
-
-/*
- * TCP_DSM_DATA_SEL enum
- */
-
-typedef enum TCP_DSM_DATA_SEL {
-TCP_DSM_DISABLE                          = 0x00000000,
-TCP_DSM_SEL0                             = 0x00000001,
-TCP_DSM_SEL1                             = 0x00000002,
-TCP_DSM_SEL_BOTH                         = 0x00000003,
-} TCP_DSM_DATA_SEL;
-
-/*
- * TCP_DSM_SINGLE_WRITE enum
- */
-
-typedef enum TCP_DSM_SINGLE_WRITE {
-TCP_DSM_SINGLE_WRITE_DIS                 = 0x00000000,
-TCP_DSM_SINGLE_WRITE_EN                  = 0x00000001,
-} TCP_DSM_SINGLE_WRITE;
-
-/*
- * TCP_DSM_INJECT_SEL enum
- */
-
-typedef enum TCP_DSM_INJECT_SEL {
-TCP_DSM_INJECT_SEL0                      = 0x00000000,
-TCP_DSM_INJECT_SEL1                      = 0x00000001,
-TCP_DSM_INJECT_SEL2                      = 0x00000002,
-TCP_DSM_INJECT_SEL3                      = 0x00000003,
-} TCP_DSM_INJECT_SEL;
-
-/*******************************************************
- * TCC Enums
- *******************************************************/
-
-/*
- * TCC_PERF_SEL enum
- */
-
-typedef enum TCC_PERF_SEL {
-TCC_PERF_SEL_NONE                        = 0x00000000,
-TCC_PERF_SEL_CYCLE                       = 0x00000001,
-TCC_PERF_SEL_BUSY                        = 0x00000002,
-TCC_PERF_SEL_REQ                         = 0x00000003,
-TCC_PERF_SEL_STREAMING_REQ               = 0x00000004,
-TCC_PERF_SEL_EXE_REQ                     = 0x00000005,
-TCC_PERF_SEL_COMPRESSED_REQ              = 0x00000006,
-TCC_PERF_SEL_COMPRESSED_0_REQ            = 0x00000007,
-TCC_PERF_SEL_METADATA_REQ                = 0x00000008,
-TCC_PERF_SEL_NC_VIRTUAL_REQ              = 0x00000009,
-TCC_PERF_SEL_UC_VIRTUAL_REQ              = 0x0000000a,
-TCC_PERF_SEL_CC_PHYSICAL_REQ             = 0x0000000b,
-TCC_PERF_SEL_PROBE                       = 0x0000000c,
-TCC_PERF_SEL_PROBE_ALL                   = 0x0000000d,
-TCC_PERF_SEL_READ                        = 0x0000000e,
-TCC_PERF_SEL_WRITE                       = 0x0000000f,
-TCC_PERF_SEL_ATOMIC                      = 0x00000010,
-TCC_PERF_SEL_HIT                         = 0x00000011,
-TCC_PERF_SEL_SECTOR_HIT                  = 0x00000012,
-TCC_PERF_SEL_MISS                        = 0x00000013,
-TCC_PERF_SEL_DEWRITE_ALLOCATE_HIT        = 0x00000014,
-TCC_PERF_SEL_FULLY_WRITTEN_HIT           = 0x00000015,
-TCC_PERF_SEL_WRITEBACK                   = 0x00000016,
-TCC_PERF_SEL_LATENCY_FIFO_FULL           = 0x00000017,
-TCC_PERF_SEL_SRC_FIFO_FULL               = 0x00000018,
-TCC_PERF_SEL_HOLE_FIFO_FULL              = 0x00000019,
-TCC_PERF_SEL_EA_WRREQ                    = 0x0000001a,
-TCC_PERF_SEL_EA_WRREQ_64B                = 0x0000001b,
-TCC_PERF_SEL_EA_WRREQ_PROBE_COMMAND      = 0x0000001c,
-TCC_PERF_SEL_EA_WR_UNCACHED_32B          = 0x0000001d,
-TCC_PERF_SEL_EA_WRREQ_STALL              = 0x0000001e,
-TCC_PERF_SEL_EA_WRREQ_CREDIT_STALL       = 0x0000001f,
-TCC_PERF_SEL_TOO_MANY_EA_WRREQS_STALL    = 0x00000020,
-TCC_PERF_SEL_EA_WRREQ_LEVEL              = 0x00000021,
-TCC_PERF_SEL_EA_ATOMIC                   = 0x00000022,
-TCC_PERF_SEL_EA_ATOMIC_LEVEL             = 0x00000023,
-TCC_PERF_SEL_EA_RDREQ                    = 0x00000024,
-TCC_PERF_SEL_EA_RDREQ_32B                = 0x00000025,
-TCC_PERF_SEL_EA_RD_UNCACHED_32B          = 0x00000026,
-TCC_PERF_SEL_EA_RD_MDC_32B               = 0x00000027,
-TCC_PERF_SEL_EA_RD_COMPRESSED_32B        = 0x00000028,
-TCC_PERF_SEL_EA_RDREQ_CREDIT_STALL       = 0x00000029,
-TCC_PERF_SEL_EA_RDREQ_LEVEL              = 0x0000002a,
-TCC_PERF_SEL_TAG_STALL                   = 0x0000002b,
-TCC_PERF_SEL_TAG_WRITEBACK_FIFO_FULL_STALL  = 0x0000002c,
-TCC_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL  = 0x0000002d,
-TCC_PERF_SEL_TAG_UNCACHED_WRITE_ATOMIC_FIFO_FULL_STALL  = 0x0000002e,
-TCC_PERF_SEL_TAG_NO_UNCACHED_WRITE_ATOMIC_ENTRIES_STALL  = 0x0000002f,
-TCC_PERF_SEL_TAG_PROBE_STALL             = 0x00000030,
-TCC_PERF_SEL_TAG_PROBE_FILTER_STALL      = 0x00000031,
-TCC_PERF_SEL_READ_RETURN_TIMEOUT         = 0x00000032,
-TCC_PERF_SEL_WRITEBACK_READ_TIMEOUT      = 0x00000033,
-TCC_PERF_SEL_READ_RETURN_FULL_BUBBLE     = 0x00000034,
-TCC_PERF_SEL_BUBBLE                      = 0x00000035,
-TCC_PERF_SEL_RETURN_ACK                  = 0x00000036,
-TCC_PERF_SEL_RETURN_DATA                 = 0x00000037,
-TCC_PERF_SEL_RETURN_HOLE                 = 0x00000038,
-TCC_PERF_SEL_RETURN_ACK_HOLE             = 0x00000039,
-TCC_PERF_SEL_IB_REQ                      = 0x0000003a,
-TCC_PERF_SEL_IB_STALL                    = 0x0000003b,
-TCC_PERF_SEL_IB_TAG_STALL                = 0x0000003c,
-TCC_PERF_SEL_IB_MDC_STALL                = 0x0000003d,
-TCC_PERF_SEL_TCA_LEVEL                   = 0x0000003e,
-TCC_PERF_SEL_HOLE_LEVEL                  = 0x0000003f,
-TCC_PERF_SEL_EA_RDRET_NACK               = 0x00000040,
-TCC_PERF_SEL_EA_WRRET_NACK               = 0x00000041,
-TCC_PERF_SEL_NORMAL_WRITEBACK            = 0x00000042,
-TCC_PERF_SEL_TC_OP_WBL2_NC_WRITEBACK     = 0x00000043,
-TCC_PERF_SEL_TC_OP_WBL2_WC_WRITEBACK     = 0x00000044,
-TCC_PERF_SEL_TC_OP_WBINVL2_WRITEBACK     = 0x00000045,
-TCC_PERF_SEL_TC_OP_WBINVL2_NC_WRITEBACK  = 0x00000046,
-TCC_PERF_SEL_TC_OP_WBINVL2_SD_WRITEBACK  = 0x00000047,
-TCC_PERF_SEL_ALL_TC_OP_WB_WRITEBACK      = 0x00000048,
-TCC_PERF_SEL_NORMAL_EVICT                = 0x00000049,
-TCC_PERF_SEL_TC_OP_WBL2_NC_EVICT         = 0x0000004a,
-TCC_PERF_SEL_TC_OP_WBL2_WC_EVICT         = 0x0000004b,
-TCC_PERF_SEL_TC_OP_INVL2_NC_EVICT        = 0x0000004c,
-TCC_PERF_SEL_TC_OP_WBINVL2_EVICT         = 0x0000004d,
-TCC_PERF_SEL_TC_OP_WBINVL2_NC_EVICT      = 0x0000004e,
-TCC_PERF_SEL_TC_OP_WBINVL2_SD_EVICT      = 0x0000004f,
-TCC_PERF_SEL_ALL_TC_OP_INV_EVICT         = 0x00000050,
-TCC_PERF_SEL_PROBE_EVICT                 = 0x00000051,
-TCC_PERF_SEL_TC_OP_WBL2_NC_CYCLE         = 0x00000052,
-TCC_PERF_SEL_TC_OP_WBL2_WC_CYCLE         = 0x00000053,
-TCC_PERF_SEL_TC_OP_INVL2_NC_CYCLE        = 0x00000054,
-TCC_PERF_SEL_TC_OP_WBINVL2_CYCLE         = 0x00000055,
-TCC_PERF_SEL_TC_OP_WBINVL2_NC_CYCLE      = 0x00000056,
-TCC_PERF_SEL_TC_OP_WBINVL2_SD_CYCLE      = 0x00000057,
-TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_CYCLE   = 0x00000058,
-TCC_PERF_SEL_TC_OP_WBL2_NC_START         = 0x00000059,
-TCC_PERF_SEL_TC_OP_WBL2_WC_START         = 0x0000005a,
-TCC_PERF_SEL_TC_OP_INVL2_NC_START        = 0x0000005b,
-TCC_PERF_SEL_TC_OP_WBINVL2_START         = 0x0000005c,
-TCC_PERF_SEL_TC_OP_WBINVL2_NC_START      = 0x0000005d,
-TCC_PERF_SEL_TC_OP_WBINVL2_SD_START      = 0x0000005e,
-TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_START   = 0x0000005f,
-TCC_PERF_SEL_TC_OP_WBL2_NC_FINISH        = 0x00000060,
-TCC_PERF_SEL_TC_OP_WBL2_WC_FINISH        = 0x00000061,
-TCC_PERF_SEL_TC_OP_INVL2_NC_FINISH       = 0x00000062,
-TCC_PERF_SEL_TC_OP_WBINVL2_FINISH        = 0x00000063,
-TCC_PERF_SEL_TC_OP_WBINVL2_NC_FINISH     = 0x00000064,
-TCC_PERF_SEL_TC_OP_WBINVL2_SD_FINISH     = 0x00000065,
-TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_FINISH  = 0x00000066,
-TCC_PERF_SEL_MDC_REQ                     = 0x00000067,
-TCC_PERF_SEL_MDC_LEVEL                   = 0x00000068,
-TCC_PERF_SEL_MDC_TAG_HIT                 = 0x00000069,
-TCC_PERF_SEL_MDC_SECTOR_HIT              = 0x0000006a,
-TCC_PERF_SEL_MDC_SECTOR_MISS             = 0x0000006b,
-TCC_PERF_SEL_MDC_TAG_STALL               = 0x0000006c,
-TCC_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL  = 0x0000006d,
-TCC_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL  = 0x0000006e,
-TCC_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL  = 0x0000006f,
-TCC_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION  = 0x00000070,
-TCC_PERF_SEL_PROBE_FILTER_DISABLED       = 0x00000071,
-TCC_PERF_SEL_CLIENT0_REQ                 = 0x00000080,
-TCC_PERF_SEL_CLIENT1_REQ                 = 0x00000081,
-TCC_PERF_SEL_CLIENT2_REQ                 = 0x00000082,
-TCC_PERF_SEL_CLIENT3_REQ                 = 0x00000083,
-TCC_PERF_SEL_CLIENT4_REQ                 = 0x00000084,
-TCC_PERF_SEL_CLIENT5_REQ                 = 0x00000085,
-TCC_PERF_SEL_CLIENT6_REQ                 = 0x00000086,
-TCC_PERF_SEL_CLIENT7_REQ                 = 0x00000087,
-TCC_PERF_SEL_CLIENT8_REQ                 = 0x00000088,
-TCC_PERF_SEL_CLIENT9_REQ                 = 0x00000089,
-TCC_PERF_SEL_CLIENT10_REQ                = 0x0000008a,
-TCC_PERF_SEL_CLIENT11_REQ                = 0x0000008b,
-TCC_PERF_SEL_CLIENT12_REQ                = 0x0000008c,
-TCC_PERF_SEL_CLIENT13_REQ                = 0x0000008d,
-TCC_PERF_SEL_CLIENT14_REQ                = 0x0000008e,
-TCC_PERF_SEL_CLIENT15_REQ                = 0x0000008f,
-TCC_PERF_SEL_CLIENT16_REQ                = 0x00000090,
-TCC_PERF_SEL_CLIENT17_REQ                = 0x00000091,
-TCC_PERF_SEL_CLIENT18_REQ                = 0x00000092,
-TCC_PERF_SEL_CLIENT19_REQ                = 0x00000093,
-TCC_PERF_SEL_CLIENT20_REQ                = 0x00000094,
-TCC_PERF_SEL_CLIENT21_REQ                = 0x00000095,
-TCC_PERF_SEL_CLIENT22_REQ                = 0x00000096,
-TCC_PERF_SEL_CLIENT23_REQ                = 0x00000097,
-TCC_PERF_SEL_CLIENT24_REQ                = 0x00000098,
-TCC_PERF_SEL_CLIENT25_REQ                = 0x00000099,
-TCC_PERF_SEL_CLIENT26_REQ                = 0x0000009a,
-TCC_PERF_SEL_CLIENT27_REQ                = 0x0000009b,
-TCC_PERF_SEL_CLIENT28_REQ                = 0x0000009c,
-TCC_PERF_SEL_CLIENT29_REQ                = 0x0000009d,
-TCC_PERF_SEL_CLIENT30_REQ                = 0x0000009e,
-TCC_PERF_SEL_CLIENT31_REQ                = 0x0000009f,
-TCC_PERF_SEL_CLIENT32_REQ                = 0x000000a0,
-TCC_PERF_SEL_CLIENT33_REQ                = 0x000000a1,
-TCC_PERF_SEL_CLIENT34_REQ                = 0x000000a2,
-TCC_PERF_SEL_CLIENT35_REQ                = 0x000000a3,
-TCC_PERF_SEL_CLIENT36_REQ                = 0x000000a4,
-TCC_PERF_SEL_CLIENT37_REQ                = 0x000000a5,
-TCC_PERF_SEL_CLIENT38_REQ                = 0x000000a6,
-TCC_PERF_SEL_CLIENT39_REQ                = 0x000000a7,
-TCC_PERF_SEL_CLIENT40_REQ                = 0x000000a8,
-TCC_PERF_SEL_CLIENT41_REQ                = 0x000000a9,
-TCC_PERF_SEL_CLIENT42_REQ                = 0x000000aa,
-TCC_PERF_SEL_CLIENT43_REQ                = 0x000000ab,
-TCC_PERF_SEL_CLIENT44_REQ                = 0x000000ac,
-TCC_PERF_SEL_CLIENT45_REQ                = 0x000000ad,
-TCC_PERF_SEL_CLIENT46_REQ                = 0x000000ae,
-TCC_PERF_SEL_CLIENT47_REQ                = 0x000000af,
-TCC_PERF_SEL_CLIENT48_REQ                = 0x000000b0,
-TCC_PERF_SEL_CLIENT49_REQ                = 0x000000b1,
-TCC_PERF_SEL_CLIENT50_REQ                = 0x000000b2,
-TCC_PERF_SEL_CLIENT51_REQ                = 0x000000b3,
-TCC_PERF_SEL_CLIENT52_REQ                = 0x000000b4,
-TCC_PERF_SEL_CLIENT53_REQ                = 0x000000b5,
-TCC_PERF_SEL_CLIENT54_REQ                = 0x000000b6,
-TCC_PERF_SEL_CLIENT55_REQ                = 0x000000b7,
-TCC_PERF_SEL_CLIENT56_REQ                = 0x000000b8,
-TCC_PERF_SEL_CLIENT57_REQ                = 0x000000b9,
-TCC_PERF_SEL_CLIENT58_REQ                = 0x000000ba,
-TCC_PERF_SEL_CLIENT59_REQ                = 0x000000bb,
-TCC_PERF_SEL_CLIENT60_REQ                = 0x000000bc,
-TCC_PERF_SEL_CLIENT61_REQ                = 0x000000bd,
-TCC_PERF_SEL_CLIENT62_REQ                = 0x000000be,
-TCC_PERF_SEL_CLIENT63_REQ                = 0x000000bf,
-TCC_PERF_SEL_CLIENT64_REQ                = 0x000000c0,
-TCC_PERF_SEL_CLIENT65_REQ                = 0x000000c1,
-TCC_PERF_SEL_CLIENT66_REQ                = 0x000000c2,
-TCC_PERF_SEL_CLIENT67_REQ                = 0x000000c3,
-TCC_PERF_SEL_CLIENT68_REQ                = 0x000000c4,
-TCC_PERF_SEL_CLIENT69_REQ                = 0x000000c5,
-TCC_PERF_SEL_CLIENT70_REQ                = 0x000000c6,
-TCC_PERF_SEL_CLIENT71_REQ                = 0x000000c7,
-TCC_PERF_SEL_CLIENT72_REQ                = 0x000000c8,
-TCC_PERF_SEL_CLIENT73_REQ                = 0x000000c9,
-TCC_PERF_SEL_CLIENT74_REQ                = 0x000000ca,
-TCC_PERF_SEL_CLIENT75_REQ                = 0x000000cb,
-TCC_PERF_SEL_CLIENT76_REQ                = 0x000000cc,
-TCC_PERF_SEL_CLIENT77_REQ                = 0x000000cd,
-TCC_PERF_SEL_CLIENT78_REQ                = 0x000000ce,
-TCC_PERF_SEL_CLIENT79_REQ                = 0x000000cf,
-TCC_PERF_SEL_CLIENT80_REQ                = 0x000000d0,
-TCC_PERF_SEL_CLIENT81_REQ                = 0x000000d1,
-TCC_PERF_SEL_CLIENT82_REQ                = 0x000000d2,
-TCC_PERF_SEL_CLIENT83_REQ                = 0x000000d3,
-TCC_PERF_SEL_CLIENT84_REQ                = 0x000000d4,
-TCC_PERF_SEL_CLIENT85_REQ                = 0x000000d5,
-TCC_PERF_SEL_CLIENT86_REQ                = 0x000000d6,
-TCC_PERF_SEL_CLIENT87_REQ                = 0x000000d7,
-TCC_PERF_SEL_CLIENT88_REQ                = 0x000000d8,
-TCC_PERF_SEL_CLIENT89_REQ                = 0x000000d9,
-TCC_PERF_SEL_CLIENT90_REQ                = 0x000000da,
-TCC_PERF_SEL_CLIENT91_REQ                = 0x000000db,
-TCC_PERF_SEL_CLIENT92_REQ                = 0x000000dc,
-TCC_PERF_SEL_CLIENT93_REQ                = 0x000000dd,
-TCC_PERF_SEL_CLIENT94_REQ                = 0x000000de,
-TCC_PERF_SEL_CLIENT95_REQ                = 0x000000df,
-TCC_PERF_SEL_CLIENT96_REQ                = 0x000000e0,
-TCC_PERF_SEL_CLIENT97_REQ                = 0x000000e1,
-TCC_PERF_SEL_CLIENT98_REQ                = 0x000000e2,
-TCC_PERF_SEL_CLIENT99_REQ                = 0x000000e3,
-TCC_PERF_SEL_CLIENT100_REQ               = 0x000000e4,
-TCC_PERF_SEL_CLIENT101_REQ               = 0x000000e5,
-TCC_PERF_SEL_CLIENT102_REQ               = 0x000000e6,
-TCC_PERF_SEL_CLIENT103_REQ               = 0x000000e7,
-TCC_PERF_SEL_CLIENT104_REQ               = 0x000000e8,
-TCC_PERF_SEL_CLIENT105_REQ               = 0x000000e9,
-TCC_PERF_SEL_CLIENT106_REQ               = 0x000000ea,
-TCC_PERF_SEL_CLIENT107_REQ               = 0x000000eb,
-TCC_PERF_SEL_CLIENT108_REQ               = 0x000000ec,
-TCC_PERF_SEL_CLIENT109_REQ               = 0x000000ed,
-TCC_PERF_SEL_CLIENT110_REQ               = 0x000000ee,
-TCC_PERF_SEL_CLIENT111_REQ               = 0x000000ef,
-TCC_PERF_SEL_CLIENT112_REQ               = 0x000000f0,
-TCC_PERF_SEL_CLIENT113_REQ               = 0x000000f1,
-TCC_PERF_SEL_CLIENT114_REQ               = 0x000000f2,
-TCC_PERF_SEL_CLIENT115_REQ               = 0x000000f3,
-TCC_PERF_SEL_CLIENT116_REQ               = 0x000000f4,
-TCC_PERF_SEL_CLIENT117_REQ               = 0x000000f5,
-TCC_PERF_SEL_CLIENT118_REQ               = 0x000000f6,
-TCC_PERF_SEL_CLIENT119_REQ               = 0x000000f7,
-TCC_PERF_SEL_CLIENT120_REQ               = 0x000000f8,
-TCC_PERF_SEL_CLIENT121_REQ               = 0x000000f9,
-TCC_PERF_SEL_CLIENT122_REQ               = 0x000000fa,
-TCC_PERF_SEL_CLIENT123_REQ               = 0x000000fb,
-TCC_PERF_SEL_CLIENT124_REQ               = 0x000000fc,
-TCC_PERF_SEL_CLIENT125_REQ               = 0x000000fd,
-TCC_PERF_SEL_CLIENT126_REQ               = 0x000000fe,
-TCC_PERF_SEL_CLIENT127_REQ               = 0x000000ff,
-} TCC_PERF_SEL;
-
-/*
- * TCA_PERF_SEL enum
- */
-
-typedef enum TCA_PERF_SEL {
-TCA_PERF_SEL_NONE                        = 0x00000000,
-TCA_PERF_SEL_CYCLE                       = 0x00000001,
-TCA_PERF_SEL_BUSY                        = 0x00000002,
-TCA_PERF_SEL_FORCED_HOLE_TCC0            = 0x00000003,
-TCA_PERF_SEL_FORCED_HOLE_TCC1            = 0x00000004,
-TCA_PERF_SEL_FORCED_HOLE_TCC2            = 0x00000005,
-TCA_PERF_SEL_FORCED_HOLE_TCC3            = 0x00000006,
-TCA_PERF_SEL_FORCED_HOLE_TCC4            = 0x00000007,
-TCA_PERF_SEL_FORCED_HOLE_TCC5            = 0x00000008,
-TCA_PERF_SEL_FORCED_HOLE_TCC6            = 0x00000009,
-TCA_PERF_SEL_FORCED_HOLE_TCC7            = 0x0000000a,
-TCA_PERF_SEL_REQ_TCC0                    = 0x0000000b,
-TCA_PERF_SEL_REQ_TCC1                    = 0x0000000c,
-TCA_PERF_SEL_REQ_TCC2                    = 0x0000000d,
-TCA_PERF_SEL_REQ_TCC3                    = 0x0000000e,
-TCA_PERF_SEL_REQ_TCC4                    = 0x0000000f,
-TCA_PERF_SEL_REQ_TCC5                    = 0x00000010,
-TCA_PERF_SEL_REQ_TCC6                    = 0x00000011,
-TCA_PERF_SEL_REQ_TCC7                    = 0x00000012,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC0    = 0x00000013,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC1    = 0x00000014,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC2    = 0x00000015,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC3    = 0x00000016,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC4    = 0x00000017,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC5    = 0x00000018,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC6    = 0x00000019,
-TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC7    = 0x0000001a,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC0         = 0x0000001b,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC1         = 0x0000001c,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC2         = 0x0000001d,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC3         = 0x0000001e,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC4         = 0x0000001f,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC5         = 0x00000020,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC6         = 0x00000021,
-TCA_PERF_SEL_CROSSBAR_STALL_TCC7         = 0x00000022,
-} TCA_PERF_SEL;
-
-/*******************************************************
- * GRBM Enums
- *******************************************************/
-
-/*
- * GRBM_PERF_SEL enum
- */
-
-typedef enum GRBM_PERF_SEL {
-GRBM_PERF_SEL_COUNT                      = 0x00000000,
-GRBM_PERF_SEL_USER_DEFINED               = 0x00000001,
-GRBM_PERF_SEL_GUI_ACTIVE                 = 0x00000002,
-GRBM_PERF_SEL_CP_BUSY                    = 0x00000003,
-GRBM_PERF_SEL_CP_COHER_BUSY              = 0x00000004,
-GRBM_PERF_SEL_CP_DMA_BUSY                = 0x00000005,
-GRBM_PERF_SEL_CB_BUSY                    = 0x00000006,
-GRBM_PERF_SEL_DB_BUSY                    = 0x00000007,
-GRBM_PERF_SEL_PA_BUSY                    = 0x00000008,
-GRBM_PERF_SEL_SC_BUSY                    = 0x00000009,
-GRBM_PERF_SEL_RESERVED_6                 = 0x0000000a,
-GRBM_PERF_SEL_SPI_BUSY                   = 0x0000000b,
-GRBM_PERF_SEL_SX_BUSY                    = 0x0000000c,
-GRBM_PERF_SEL_TA_BUSY                    = 0x0000000d,
-GRBM_PERF_SEL_CB_CLEAN                   = 0x0000000e,
-GRBM_PERF_SEL_DB_CLEAN                   = 0x0000000f,
-GRBM_PERF_SEL_RESERVED_5                 = 0x00000010,
-GRBM_PERF_SEL_VGT_BUSY                   = 0x00000011,
-GRBM_PERF_SEL_RESERVED_4                 = 0x00000012,
-GRBM_PERF_SEL_RESERVED_3                 = 0x00000013,
-GRBM_PERF_SEL_RESERVED_2                 = 0x00000014,
-GRBM_PERF_SEL_RESERVED_1                 = 0x00000015,
-GRBM_PERF_SEL_RESERVED_0                 = 0x00000016,
-GRBM_PERF_SEL_IA_BUSY                    = 0x00000017,
-GRBM_PERF_SEL_IA_NO_DMA_BUSY             = 0x00000018,
-GRBM_PERF_SEL_GDS_BUSY                   = 0x00000019,
-GRBM_PERF_SEL_BCI_BUSY                   = 0x0000001a,
-GRBM_PERF_SEL_RLC_BUSY                   = 0x0000001b,
-GRBM_PERF_SEL_TC_BUSY                    = 0x0000001c,
-GRBM_PERF_SEL_CPG_BUSY                   = 0x0000001d,
-GRBM_PERF_SEL_CPC_BUSY                   = 0x0000001e,
-GRBM_PERF_SEL_CPF_BUSY                   = 0x0000001f,
-GRBM_PERF_SEL_WD_BUSY                    = 0x00000020,
-GRBM_PERF_SEL_WD_NO_DMA_BUSY             = 0x00000021,
-GRBM_PERF_SEL_UTCL2_BUSY                 = 0x00000022,
-GRBM_PERF_SEL_EA_BUSY                    = 0x00000023,
-GRBM_PERF_SEL_RMI_BUSY                   = 0x00000024,
-GRBM_PERF_SEL_CPAXI_BUSY                 = 0x00000025,
-} GRBM_PERF_SEL;
-
-/*
- * GRBM_SE0_PERF_SEL enum
- */
-
-typedef enum GRBM_SE0_PERF_SEL {
-GRBM_SE0_PERF_SEL_COUNT                  = 0x00000000,
-GRBM_SE0_PERF_SEL_USER_DEFINED           = 0x00000001,
-GRBM_SE0_PERF_SEL_CB_BUSY                = 0x00000002,
-GRBM_SE0_PERF_SEL_DB_BUSY                = 0x00000003,
-GRBM_SE0_PERF_SEL_SC_BUSY                = 0x00000004,
-GRBM_SE0_PERF_SEL_RESERVED_1             = 0x00000005,
-GRBM_SE0_PERF_SEL_SPI_BUSY               = 0x00000006,
-GRBM_SE0_PERF_SEL_SX_BUSY                = 0x00000007,
-GRBM_SE0_PERF_SEL_TA_BUSY                = 0x00000008,
-GRBM_SE0_PERF_SEL_CB_CLEAN               = 0x00000009,
-GRBM_SE0_PERF_SEL_DB_CLEAN               = 0x0000000a,
-GRBM_SE0_PERF_SEL_RESERVED_0             = 0x0000000b,
-GRBM_SE0_PERF_SEL_PA_BUSY                = 0x0000000c,
-GRBM_SE0_PERF_SEL_VGT_BUSY               = 0x0000000d,
-GRBM_SE0_PERF_SEL_BCI_BUSY               = 0x0000000e,
-GRBM_SE0_PERF_SEL_RMI_BUSY               = 0x0000000f,
-} GRBM_SE0_PERF_SEL;
-
-/*
- * GRBM_SE1_PERF_SEL enum
- */
-
-typedef enum GRBM_SE1_PERF_SEL {
-GRBM_SE1_PERF_SEL_COUNT                  = 0x00000000,
-GRBM_SE1_PERF_SEL_USER_DEFINED           = 0x00000001,
-GRBM_SE1_PERF_SEL_CB_BUSY                = 0x00000002,
-GRBM_SE1_PERF_SEL_DB_BUSY                = 0x00000003,
-GRBM_SE1_PERF_SEL_SC_BUSY                = 0x00000004,
-GRBM_SE1_PERF_SEL_RESERVED_1             = 0x00000005,
-GRBM_SE1_PERF_SEL_SPI_BUSY               = 0x00000006,
-GRBM_SE1_PERF_SEL_SX_BUSY                = 0x00000007,
-GRBM_SE1_PERF_SEL_TA_BUSY                = 0x00000008,
-GRBM_SE1_PERF_SEL_CB_CLEAN               = 0x00000009,
-GRBM_SE1_PERF_SEL_DB_CLEAN               = 0x0000000a,
-GRBM_SE1_PERF_SEL_RESERVED_0             = 0x0000000b,
-GRBM_SE1_PERF_SEL_PA_BUSY                = 0x0000000c,
-GRBM_SE1_PERF_SEL_VGT_BUSY               = 0x0000000d,
-GRBM_SE1_PERF_SEL_BCI_BUSY               = 0x0000000e,
-GRBM_SE1_PERF_SEL_RMI_BUSY               = 0x0000000f,
-} GRBM_SE1_PERF_SEL;
-
-/*
- * GRBM_SE2_PERF_SEL enum
- */
-
-typedef enum GRBM_SE2_PERF_SEL {
-GRBM_SE2_PERF_SEL_COUNT                  = 0x00000000,
-GRBM_SE2_PERF_SEL_USER_DEFINED           = 0x00000001,
-GRBM_SE2_PERF_SEL_CB_BUSY                = 0x00000002,
-GRBM_SE2_PERF_SEL_DB_BUSY                = 0x00000003,
-GRBM_SE2_PERF_SEL_SC_BUSY                = 0x00000004,
-GRBM_SE2_PERF_SEL_RESERVED_1             = 0x00000005,
-GRBM_SE2_PERF_SEL_SPI_BUSY               = 0x00000006,
-GRBM_SE2_PERF_SEL_SX_BUSY                = 0x00000007,
-GRBM_SE2_PERF_SEL_TA_BUSY                = 0x00000008,
-GRBM_SE2_PERF_SEL_CB_CLEAN               = 0x00000009,
-GRBM_SE2_PERF_SEL_DB_CLEAN               = 0x0000000a,
-GRBM_SE2_PERF_SEL_RESERVED_0             = 0x0000000b,
-GRBM_SE2_PERF_SEL_PA_BUSY                = 0x0000000c,
-GRBM_SE2_PERF_SEL_VGT_BUSY               = 0x0000000d,
-GRBM_SE2_PERF_SEL_BCI_BUSY               = 0x0000000e,
-GRBM_SE2_PERF_SEL_RMI_BUSY               = 0x0000000f,
-} GRBM_SE2_PERF_SEL;
-
-/*
- * GRBM_SE3_PERF_SEL enum
- */
-
-typedef enum GRBM_SE3_PERF_SEL {
-GRBM_SE3_PERF_SEL_COUNT                  = 0x00000000,
-GRBM_SE3_PERF_SEL_USER_DEFINED           = 0x00000001,
-GRBM_SE3_PERF_SEL_CB_BUSY                = 0x00000002,
-GRBM_SE3_PERF_SEL_DB_BUSY                = 0x00000003,
-GRBM_SE3_PERF_SEL_SC_BUSY                = 0x00000004,
-GRBM_SE3_PERF_SEL_RESERVED_1             = 0x00000005,
-GRBM_SE3_PERF_SEL_SPI_BUSY               = 0x00000006,
-GRBM_SE3_PERF_SEL_SX_BUSY                = 0x00000007,
-GRBM_SE3_PERF_SEL_TA_BUSY                = 0x00000008,
-GRBM_SE3_PERF_SEL_CB_CLEAN               = 0x00000009,
-GRBM_SE3_PERF_SEL_DB_CLEAN               = 0x0000000a,
-GRBM_SE3_PERF_SEL_RESERVED_0             = 0x0000000b,
-GRBM_SE3_PERF_SEL_PA_BUSY                = 0x0000000c,
-GRBM_SE3_PERF_SEL_VGT_BUSY               = 0x0000000d,
-GRBM_SE3_PERF_SEL_BCI_BUSY               = 0x0000000e,
-GRBM_SE3_PERF_SEL_RMI_BUSY               = 0x0000000f,
-} GRBM_SE3_PERF_SEL;
-
-/*******************************************************
- * CP Enums
- *******************************************************/
-
-/*
- * CP_RING_ID enum
- */
-
-typedef enum CP_RING_ID {
-RINGID0                                  = 0x00000000,
-RINGID1                                  = 0x00000001,
-RINGID2                                  = 0x00000002,
-RINGID3                                  = 0x00000003,
-} CP_RING_ID;
-
-/*
- * CP_PIPE_ID enum
- */
-
-typedef enum CP_PIPE_ID {
-PIPE_ID0                                 = 0x00000000,
-PIPE_ID1                                 = 0x00000001,
-PIPE_ID2                                 = 0x00000002,
-PIPE_ID3                                 = 0x00000003,
-} CP_PIPE_ID;
-
-/*
- * CP_ME_ID enum
- */
-
-typedef enum CP_ME_ID {
-ME_ID0                                   = 0x00000000,
-ME_ID1                                   = 0x00000001,
-ME_ID2                                   = 0x00000002,
-ME_ID3                                   = 0x00000003,
-} CP_ME_ID;
-
-/*
- * SPM_PERFMON_STATE enum
- */
-
-typedef enum SPM_PERFMON_STATE {
-STRM_PERFMON_STATE_DISABLE_AND_RESET     = 0x00000000,
-STRM_PERFMON_STATE_START_COUNTING        = 0x00000001,
-STRM_PERFMON_STATE_STOP_COUNTING         = 0x00000002,
-STRM_PERFMON_STATE_RESERVED_3            = 0x00000003,
-STRM_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM  = 0x00000004,
-STRM_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM  = 0x00000005,
-} SPM_PERFMON_STATE;
-
-/*
- * CP_PERFMON_STATE enum
- */
-
-typedef enum CP_PERFMON_STATE {
-CP_PERFMON_STATE_DISABLE_AND_RESET       = 0x00000000,
-CP_PERFMON_STATE_START_COUNTING          = 0x00000001,
-CP_PERFMON_STATE_STOP_COUNTING           = 0x00000002,
-CP_PERFMON_STATE_RESERVED_3              = 0x00000003,
-CP_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM  = 0x00000004,
-CP_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM  = 0x00000005,
-} CP_PERFMON_STATE;
-
-/*
- * CP_PERFMON_ENABLE_MODE enum
- */
-
-typedef enum CP_PERFMON_ENABLE_MODE {
-CP_PERFMON_ENABLE_MODE_ALWAYS_COUNT      = 0x00000000,
-CP_PERFMON_ENABLE_MODE_RESERVED_1        = 0x00000001,
-CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_TRUE  = 0x00000002,
-CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_FALSE  = 0x00000003,
-} CP_PERFMON_ENABLE_MODE;
-
-/*
- * CPG_PERFCOUNT_SEL enum
- */
-
-typedef enum CPG_PERFCOUNT_SEL {
-CPG_PERF_SEL_ALWAYS_COUNT                = 0x00000000,
-CPG_PERF_SEL_RBIU_FIFO_FULL              = 0x00000001,
-CPG_PERF_SEL_CSF_RTS_BUT_MIU_NOT_RTR     = 0x00000002,
-CPG_PERF_SEL_CSF_ST_BASE_SIZE_FIFO_FULL  = 0x00000003,
-CPG_PERF_SEL_CP_GRBM_DWORDS_SENT         = 0x00000004,
-CPG_PERF_SEL_ME_PARSER_BUSY              = 0x00000005,
-CPG_PERF_SEL_COUNT_TYPE0_PACKETS         = 0x00000006,
-CPG_PERF_SEL_COUNT_TYPE3_PACKETS         = 0x00000007,
-CPG_PERF_SEL_CSF_FETCHING_CMD_BUFFERS    = 0x00000008,
-CPG_PERF_SEL_CP_GRBM_OUT_OF_CREDITS      = 0x00000009,
-CPG_PERF_SEL_CP_PFP_GRBM_OUT_OF_CREDITS  = 0x0000000a,
-CPG_PERF_SEL_CP_GDS_GRBM_OUT_OF_CREDITS  = 0x0000000b,
-CPG_PERF_SEL_RCIU_STALLED_ON_ME_READ     = 0x0000000c,
-CPG_PERF_SEL_RCIU_STALLED_ON_DMA_READ    = 0x0000000d,
-CPG_PERF_SEL_SSU_STALLED_ON_ACTIVE_CNTX  = 0x0000000e,
-CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS  = 0x0000000f,
-CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE  = 0x00000010,
-CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM  = 0x00000011,
-CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY    = 0x00000012,
-CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY    = 0x00000013,
-CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY   = 0x00000014,
-CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ  = 0x00000015,
-CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP  = 0x00000016,
-CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ  = 0x00000017,
-CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX  = 0x00000018,
-CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU  = 0x00000019,
-CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS  = 0x0000001a,
-CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH  = 0x0000001b,
-CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER       = 0x0000001c,
-CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER     = 0x0000001d,
-CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS  = 0x0000001e,
-CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY  = 0x0000001f,
-CPG_PERF_SEL_DYNAMIC_CLK_VALID           = 0x00000020,
-CPG_PERF_SEL_REGISTER_CLK_VALID          = 0x00000021,
-CPG_PERF_SEL_MIU_WRITE_REQUEST_SENT      = 0x00000022,
-CPG_PERF_SEL_MIU_READ_REQUEST_SENT       = 0x00000023,
-CPG_PERF_SEL_CE_STALL_RAM_DUMP           = 0x00000024,
-CPG_PERF_SEL_CE_STALL_RAM_WRITE          = 0x00000025,
-CPG_PERF_SEL_CE_STALL_ON_INC_FIFO        = 0x00000026,
-CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO     = 0x00000027,
-CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU   = 0x00000028,
-CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ   = 0x00000029,
-CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG  = 0x0000002a,
-CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER      = 0x0000002b,
-CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE     = 0x0000002c,
-CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS     = 0x0000002d,
-CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE  = 0x0000002e,
-CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS  = 0x0000002f,
-CPG_PERF_SEL_UTCL1_STALL_ON_TRANSLATION  = 0x00000030,
-} CPG_PERFCOUNT_SEL;
-
-/*
- * CPF_PERFCOUNT_SEL enum
- */
-
-typedef enum CPF_PERFCOUNT_SEL {
-CPF_PERF_SEL_ALWAYS_COUNT                = 0x00000000,
-CPF_PERF_SEL_MIU_STALLED_WAITING_RDREQ_FREE  = 0x00000001,
-CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_FREE  = 0x00000002,
-CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_TAGS  = 0x00000003,
-CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_RING  = 0x00000004,
-CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB1   = 0x00000005,
-CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB2   = 0x00000006,
-CPF_PERF_SEL_CSF_BUSY_FOR_FECTHINC_STATE  = 0x00000007,
-CPF_PERF_SEL_MIU_BUSY_FOR_OUTSTANDING_TAGS  = 0x00000008,
-CPF_PERF_SEL_CSF_RTS_MIU_NOT_RTR         = 0x00000009,
-CPF_PERF_SEL_CSF_STATE_FIFO_NOT_RTR      = 0x0000000a,
-CPF_PERF_SEL_CSF_FETCHING_CMD_BUFFERS    = 0x0000000b,
-CPF_PERF_SEL_GRBM_DWORDS_SENT            = 0x0000000c,
-CPF_PERF_SEL_DYNAMIC_CLOCK_VALID         = 0x0000000d,
-CPF_PERF_SEL_REGISTER_CLOCK_VALID        = 0x0000000e,
-CPF_PERF_SEL_MIU_WRITE_REQUEST_SEND      = 0x0000000f,
-CPF_PERF_SEL_MIU_READ_REQUEST_SEND       = 0x00000010,
-CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE  = 0x00000011,
-CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS  = 0x00000012,
-CPF_PERF_SEL_UTCL1_STALL_ON_TRANSLATION  = 0x00000013,
-CPF_PERF_SEL_RCIU_STALL_WAIT_ON_FREE     = 0x00000014,
-} CPF_PERFCOUNT_SEL;
-
-/*
- * CPC_PERFCOUNT_SEL enum
- */
-
-typedef enum CPC_PERFCOUNT_SEL {
-CPC_PERF_SEL_ALWAYS_COUNT                = 0x00000000,
-CPC_PERF_SEL_RCIU_STALL_WAIT_ON_FREE     = 0x00000001,
-CPC_PERF_SEL_RCIU_STALL_PRIV_VIOLATION   = 0x00000002,
-CPC_PERF_SEL_MIU_STALL_ON_RDREQ_FREE     = 0x00000003,
-CPC_PERF_SEL_MIU_STALL_ON_WRREQ_FREE     = 0x00000004,
-CPC_PERF_SEL_TCIU_STALL_WAIT_ON_FREE     = 0x00000005,
-CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY  = 0x00000006,
-CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY_PERF  = 0x00000007,
-CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READ  = 0x00000008,
-CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_READ  = 0x00000009,
-CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_WRITE  = 0x0000000a,
-CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ  = 0x0000000b,
-CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ_PERF  = 0x0000000c,
-CPC_PERF_SEL_ME1_BUSY_FOR_PACKET_DECODE  = 0x0000000d,
-CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY  = 0x0000000e,
-CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY_PERF  = 0x0000000f,
-CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READ  = 0x00000010,
-CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_READ  = 0x00000011,
-CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_WRITE  = 0x00000012,
-CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ  = 0x00000013,
-CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ_PERF  = 0x00000014,
-CPC_PERF_SEL_ME2_BUSY_FOR_PACKET_DECODE  = 0x00000015,
-CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE  = 0x00000016,
-CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS  = 0x00000017,
-CPC_PERF_SEL_UTCL1_STALL_ON_TRANSLATION  = 0x00000018,
-} CPC_PERFCOUNT_SEL;
-
-/*
- * CP_ALPHA_TAG_RAM_SEL enum
- */
-
-typedef enum CP_ALPHA_TAG_RAM_SEL {
-CPG_TAG_RAM                              = 0x00000000,
-CPC_TAG_RAM                              = 0x00000001,
-CPF_TAG_RAM                              = 0x00000002,
-RSV_TAG_RAM                              = 0x00000003,
-} CP_ALPHA_TAG_RAM_SEL;
-
-/*
- * SEM_RESPONSE value
- */
-
-#define SEM_ECC_ERROR                  0x00000000
-#define SEM_TRANS_ERROR                0x00000001
-#define SEM_FAILED                     0x00000002
-#define SEM_PASSED                     0x00000003
-
-/*
- * IQ_RETRY_TYPE value
- */
-
-#define IQ_QUEUE_SLEEP                 0x00000000
-#define IQ_OFFLOAD_RETRY               0x00000001
-#define IQ_SCH_WAVE_MSG                0x00000002
-#define IQ_SEM_REARM                   0x00000003
-#define IQ_DEQUEUE_RETRY               0x00000004
-
-/*
- * IQ_INTR_TYPE value
- */
-
-#define IQ_INTR_TYPE_PQ                0x00000000
-#define IQ_INTR_TYPE_IB                0x00000001
-#define IQ_INTR_TYPE_MQD               0x00000002
-
-/*
- * VMID_SIZE value
- */
-
-#define VMID_SZ                        0x00000004
-
-/*
- * CONFIG_SPACE value
- */
-
-#define CONFIG_SPACE_START             0x00002000
-#define CONFIG_SPACE_END               0x00009fff
-
-/*
- * CONFIG_SPACE1 value
- */
-
-#define CONFIG_SPACE1_START            0x00002000
-#define CONFIG_SPACE1_END              0x00002bff
-
-/*
- * CONFIG_SPACE2 value
- */
-
-#define CONFIG_SPACE2_START            0x00003000
-#define CONFIG_SPACE2_END              0x00009fff
-
-/*
- * UCONFIG_SPACE value
- */
-
-#define UCONFIG_SPACE_START            0x0000c000
-#define UCONFIG_SPACE_END              0x0000ffff
-
-/*
- * PERSISTENT_SPACE value
- */
-
-#define PERSISTENT_SPACE_START         0x00002c00
-#define PERSISTENT_SPACE_END           0x00002fff
-
-/*
- * CONTEXT_SPACE value
- */
-
-#define CONTEXT_SPACE_START            0x0000a000
-#define CONTEXT_SPACE_END              0x0000bfff
-
-/*******************************************************
- * SQ_UC Enums
- *******************************************************/
-
-/*
- * VALUE_SQ_ENC_SOP1 value
- */
-
-#define SQ_ENC_SOP1_BITS               0xbe800000
-#define SQ_ENC_SOP1_MASK               0xff800000
-#define SQ_ENC_SOP1_FIELD              0x0000017d
-
-/*
- * VALUE_SQ_ENC_SOPC value
- */
-
-#define SQ_ENC_SOPC_BITS               0xbf000000
-#define SQ_ENC_SOPC_MASK               0xff800000
-#define SQ_ENC_SOPC_FIELD              0x0000017e
-
-/*
- * VALUE_SQ_ENC_SOPP value
- */
-
-#define SQ_ENC_SOPP_BITS               0xbf800000
-#define SQ_ENC_SOPP_MASK               0xff800000
-#define SQ_ENC_SOPP_FIELD              0x0000017f
-
-/*
- * VALUE_SQ_ENC_SOPK value
- */
-
-#define SQ_ENC_SOPK_BITS               0xb0000000
-#define SQ_ENC_SOPK_MASK               0xf0000000
-#define SQ_ENC_SOPK_FIELD              0x0000000b
-
-/*
- * VALUE_SQ_ENC_SOP2 value
- */
-
-#define SQ_ENC_SOP2_BITS               0x80000000
-#define SQ_ENC_SOP2_MASK               0xc0000000
-#define SQ_ENC_SOP2_FIELD              0x00000002
-
-/*
- * VALUE_SQ_ENC_SMEM value
- */
-
-#define SQ_ENC_SMEM_BITS               0xc0000000
-#define SQ_ENC_SMEM_MASK               0xfc000000
-#define SQ_ENC_SMEM_FIELD              0x00000030
-
-/*
- * VALUE_SQ_ENC_VOP1 value
- */
-
-#define SQ_ENC_VOP1_BITS               0x7e000000
-#define SQ_ENC_VOP1_MASK               0xfe000000
-#define SQ_ENC_VOP1_FIELD              0x0000003f
-
-/*
- * VALUE_SQ_ENC_VOPC value
- */
-
-#define SQ_ENC_VOPC_BITS               0x7c000000
-#define SQ_ENC_VOPC_MASK               0xfe000000
-#define SQ_ENC_VOPC_FIELD              0x0000003e
-
-/*
- * VALUE_SQ_ENC_VOP2 value
- */
-
-#define SQ_ENC_VOP2_BITS               0x00000000
-#define SQ_ENC_VOP2_MASK               0x80000000
-#define SQ_ENC_VOP2_FIELD              0x00000000
-
-/*
- * VALUE_SQ_ENC_VINTRP value
- */
-
-#define SQ_ENC_VINTRP_BITS             0xd4000000
-#define SQ_ENC_VINTRP_MASK             0xfc000000
-#define SQ_ENC_VINTRP_FIELD            0x00000035
-
-/*
- * VALUE_SQ_ENC_VOP3P value
- */
-
-#define SQ_ENC_VOP3P_BITS              0xd3800000
-#define SQ_ENC_VOP3P_MASK              0xff800000
-#define SQ_ENC_VOP3P_FIELD             0x000001a7
-
-/*
- * VALUE_SQ_ENC_VOP3 value
- */
-
-#define SQ_ENC_VOP3_BITS               0xd0000000
-#define SQ_ENC_VOP3_MASK               0xfc000000
-#define SQ_ENC_VOP3_FIELD              0x00000034
-
-/*
- * VALUE_SQ_ENC_DS value
- */
-
-#define SQ_ENC_DS_BITS                 0xd8000000
-#define SQ_ENC_DS_MASK                 0xfc000000
-#define SQ_ENC_DS_FIELD                0x00000036
-
-/*
- * VALUE_SQ_ENC_MUBUF value
- */
-
-#define SQ_ENC_MUBUF_BITS              0xe0000000
-#define SQ_ENC_MUBUF_MASK              0xfc000000
-#define SQ_ENC_MUBUF_FIELD             0x00000038
-
-/*
- * VALUE_SQ_ENC_MTBUF value
- */
-
-#define SQ_ENC_MTBUF_BITS              0xe8000000
-#define SQ_ENC_MTBUF_MASK              0xfc000000
-#define SQ_ENC_MTBUF_FIELD             0x0000003a
-
-/*
- * VALUE_SQ_ENC_MIMG value
- */
-
-#define SQ_ENC_MIMG_BITS               0xf0000000
-#define SQ_ENC_MIMG_MASK               0xfc000000
-#define SQ_ENC_MIMG_FIELD              0x0000003c
-
-/*
- * VALUE_SQ_ENC_EXP value
- */
-
-#define SQ_ENC_EXP_BITS                0xc4000000
-#define SQ_ENC_EXP_MASK                0xfc000000
-#define SQ_ENC_EXP_FIELD               0x00000031
-
-/*
- * VALUE_SQ_ENC_FLAT value
- */
-
-#define SQ_ENC_FLAT_BITS               0xdc000000
-#define SQ_ENC_FLAT_MASK               0xfc000000
-#define SQ_ENC_FLAT_FIELD              0x00000037
-
-/*
- * VALUE_SQ_HWREG_ID_SHIFT value
- */
-
-#define SQ_HWREG_ID_SHIFT              0x00000000
-
-/*
- * VALUE_SQ_V_OP3P_COUNT value
- */
-
-#define SQ_V_OP3P_COUNT                0x00000080
-
-/*
- * VALUE_SQ_SENDMSG_SYSTEM_SHIFT value
- */
-
-#define SQ_SENDMSG_SYSTEM_SHIFT        0x00000004
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP1_COUNT value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP1_COUNT    0x00000080
-
-/*
- * VALUE_SQ_SRC_VGPR_BIT value
- */
-
-#define SQ_SRC_VGPR_BIT                0x00000100
-
-/*
- * VALUE_SQ_V_OP1_COUNT value
- */
-
-#define SQ_V_OP1_COUNT                 0x00000080
-
-/*
- * VALUE_SQ_SENDMSG_STREAMID_SHIFT value
- */
-
-#define SQ_SENDMSG_STREAMID_SHIFT      0x00000008
-
-/*
- * VALUE_SQ_HWREG_ID_SIZE value
- */
-
-#define SQ_HWREG_ID_SIZE               0x00000006
-
-/*
- * VALUE_SQ_EXP_NUM_MRT value
- */
-
-#define SQ_EXP_NUM_MRT                 0x00000008
-
-/*
- * VALUE_SQ_V_OP3_3IN_OFFSET value
- */
-
-#define SQ_V_OP3_3IN_OFFSET            0x000001c0
-
-/*
- * VALUE_SQ_SENDMSG_STREAMID_SIZE value
- */
-
-#define SQ_SENDMSG_STREAMID_SIZE       0x00000002
-
-/*
- * VALUE_SQ_HWREG_OFFSET_SHIFT value
- */
-
-#define SQ_HWREG_OFFSET_SHIFT          0x00000006
-
-/*
- * VALUE_SQ_SENDMSG_MSG_SIZE value
- */
-
-#define SQ_SENDMSG_MSG_SIZE            0x00000004
-
-/*
- * VALUE_SQ_HWREG_SIZE_SHIFT value
- */
-
-#define SQ_HWREG_SIZE_SHIFT            0x0000000b
-
-/*
- * VALUE_SQ_SENDMSG_SYSTEM_SIZE value
- */
-
-#define SQ_SENDMSG_SYSTEM_SIZE         0x00000003
-
-/*
- * VALUE_SQ_SENDMSG_MSG_SHIFT value
- */
-
-#define SQ_SENDMSG_MSG_SHIFT           0x00000000
-
-/*
- * VALUE_SQ_SENDMSG_GSOP_SIZE value
- */
-
-#define SQ_SENDMSG_GSOP_SIZE           0x00000002
-
-/*
- * VALUE_SQ_SENDMSG_GSOP_SHIFT value
- */
-
-#define SQ_SENDMSG_GSOP_SHIFT          0x00000004
-
-/*
- * VALUE_SQ_NUM_TTMP value
- */
-
-#define SQ_NUM_TTMP                    0x00000010
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP3P_COUNT value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP3P_COUNT   0x00000080
-
-/*
- * VALUE_SQ_EXP_NUM_POS value
- */
-
-#define SQ_EXP_NUM_POS                 0x00000004
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP3P_OFFSET value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP3P_OFFSET  0x00000380
-
-/*
- * VALUE_SQ_WAITCNT_EXP_SIZE value
- */
-
-#define SQ_WAITCNT_EXP_SIZE            0x00000003
-
-/*
- * VALUE_SQ_V_OP2_COUNT value
- */
-
-#define SQ_V_OP2_COUNT                 0x00000040
-
-/*
- * VALUE_SQ_HWREG_SIZE_SIZE value
- */
-
-#define SQ_HWREG_SIZE_SIZE             0x00000005
-
-/*
- * VALUE_SQ_WAITCNT_VM_SHIFT value
- */
-
-#define SQ_WAITCNT_VM_SHIFT            0x00000000
-
-/*
- * VALUE_SQ_V_OP3_3IN_COUNT value
- */
-
-#define SQ_V_OP3_3IN_COUNT             0x000000b0
-
-/*
- * VALUE_SQ_NUM_VGPR value
- */
-
-#define SQ_NUM_VGPR                    0x00000100
-
-/*
- * VALUE_SQ_EXP_NUM_PARAM value
- */
-
-#define SQ_EXP_NUM_PARAM               0x00000020
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOPC_OFFSET value
- */
-
-#define SQ_XLATE_VOP3_TO_VOPC_OFFSET   0x00000000
-
-/*
- * VALUE_SQ_V_OP3_INTRP_COUNT value
- */
-
-#define SQ_V_OP3_INTRP_COUNT           0x0000000c
-
-/*
- * VALUE_SQ_WAITCNT_LGKM_SHIFT value
- */
-
-#define SQ_WAITCNT_LGKM_SHIFT          0x00000008
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP2_OFFSET value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP2_OFFSET   0x00000100
-
-/*
- * VALUE_SQ_V_OP3_2IN_OFFSET value
- */
-
-#define SQ_V_OP3_2IN_OFFSET            0x00000280
-
-/*
- * VALUE_SQ_V_INTRP_COUNT value
- */
-
-#define SQ_V_INTRP_COUNT               0x00000004
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VINTRP_OFFSET value
- */
-
-#define SQ_XLATE_VOP3_TO_VINTRP_OFFSET 0x00000270
-
-/*
- * VALUE_SQ_WAITCNT_LGKM_SIZE value
- */
-
-#define SQ_WAITCNT_LGKM_SIZE           0x00000004
-
-/*
- * VALUE_SQ_EXP_NUM_GDS value
- */
-
-#define SQ_EXP_NUM_GDS                 0x00000005
-
-/*
- * VALUE_SQ_HWREG_OFFSET_SIZE value
- */
-
-#define SQ_HWREG_OFFSET_SIZE           0x00000005
-
-/*
- * VALUE_SQ_WAITCNT_VM_SIZE value
- */
-
-#define SQ_WAITCNT_VM_SIZE             0x00000004
-
-/*
- * VALUE_SQ_V_OP3_2IN_COUNT value
- */
-
-#define SQ_V_OP3_2IN_COUNT             0x00000080
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VINTRP_COUNT value
- */
-
-#define SQ_XLATE_VOP3_TO_VINTRP_COUNT  0x00000004
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOPC_COUNT value
- */
-
-#define SQ_XLATE_VOP3_TO_VOPC_COUNT    0x00000100
-
-/*
- * VALUE_SQ_NUM_ATTR value
- */
-
-#define SQ_NUM_ATTR                    0x00000021
-
-/*
- * VALUE_SQ_V_OPC_COUNT value
- */
-
-#define SQ_V_OPC_COUNT                 0x00000100
-
-/*
- * VALUE_SQ_V_OP3_INTRP_OFFSET value
- */
-
-#define SQ_V_OP3_INTRP_OFFSET          0x00000274
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP2_COUNT value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP2_COUNT    0x00000040
-
-/*
- * VALUE_SQ_WAITCNT_EXP_SHIFT value
- */
-
-#define SQ_WAITCNT_EXP_SHIFT           0x00000004
-
-/*
- * VALUE_SQ_XLATE_VOP3_TO_VOP1_OFFSET value
- */
-
-#define SQ_XLATE_VOP3_TO_VOP1_OFFSET   0x00000140
-
-/*
- * VALUE_SQ_NUM_SGPR value
- */
-
-#define SQ_NUM_SGPR                    0x00000066
-
-/*
- * VALUE_SQ_FLAT_SCRATCH_LOHI value
- */
-
-#define SQ_FLAT_SCRATCH_LO             0x00000066
-#define SQ_FLAT_SCRATCH_HI             0x00000067
-
-/*
- * VALUE_SQ_OP_VOP3 value
- */
-
-#define SQ_V_MAD_LEGACY_F32            0x000001c0
-#define SQ_V_MAD_F32                   0x000001c1
-#define SQ_V_MAD_I32_I24               0x000001c2
-#define SQ_V_MAD_U32_U24               0x000001c3
-#define SQ_V_CUBEID_F32                0x000001c4
-#define SQ_V_CUBESC_F32                0x000001c5
-#define SQ_V_CUBETC_F32                0x000001c6
-#define SQ_V_CUBEMA_F32                0x000001c7
-#define SQ_V_BFE_U32                   0x000001c8
-#define SQ_V_BFE_I32                   0x000001c9
-#define SQ_V_BFI_B32                   0x000001ca
-#define SQ_V_FMA_F32                   0x000001cb
-#define SQ_V_FMA_F64                   0x000001cc
-#define SQ_V_LERP_U8                   0x000001cd
-#define SQ_V_ALIGNBIT_B32              0x000001ce
-#define SQ_V_ALIGNBYTE_B32             0x000001cf
-#define SQ_V_MIN3_F32                  0x000001d0
-#define SQ_V_MIN3_I32                  0x000001d1
-#define SQ_V_MIN3_U32                  0x000001d2
-#define SQ_V_MAX3_F32                  0x000001d3
-#define SQ_V_MAX3_I32                  0x000001d4
-#define SQ_V_MAX3_U32                  0x000001d5
-#define SQ_V_MED3_F32                  0x000001d6
-#define SQ_V_MED3_I32                  0x000001d7
-#define SQ_V_MED3_U32                  0x000001d8
-#define SQ_V_SAD_U8                    0x000001d9
-#define SQ_V_SAD_HI_U8                 0x000001da
-#define SQ_V_SAD_U16                   0x000001db
-#define SQ_V_SAD_U32                   0x000001dc
-#define SQ_V_CVT_PK_U8_F32             0x000001dd
-#define SQ_V_DIV_FIXUP_F32             0x000001de
-#define SQ_V_DIV_FIXUP_F64             0x000001df
-#define SQ_V_DIV_SCALE_F32             0x000001e0
-#define SQ_V_DIV_SCALE_F64             0x000001e1
-#define SQ_V_DIV_FMAS_F32              0x000001e2
-#define SQ_V_DIV_FMAS_F64              0x000001e3
-#define SQ_V_MSAD_U8                   0x000001e4
-#define SQ_V_QSAD_PK_U16_U8            0x000001e5
-#define SQ_V_MQSAD_PK_U16_U8           0x000001e6
-#define SQ_V_MQSAD_U32_U8              0x000001e7
-#define SQ_V_MAD_U64_U32               0x000001e8
-#define SQ_V_MAD_I64_I32               0x000001e9
-#define SQ_V_MAD_LEGACY_F16            0x000001ea
-#define SQ_V_MAD_LEGACY_U16            0x000001eb
-#define SQ_V_MAD_LEGACY_I16            0x000001ec
-#define SQ_V_PERM_B32                  0x000001ed
-#define SQ_V_FMA_LEGACY_F16            0x000001ee
-#define SQ_V_DIV_FIXUP_LEGACY_F16      0x000001ef
-#define SQ_V_CVT_PKACCUM_U8_F32        0x000001f0
-#define SQ_V_MAD_U32_U16               0x000001f1
-#define SQ_V_MAD_I32_I16               0x000001f2
-#define SQ_V_XAD_U32                   0x000001f3
-#define SQ_V_MIN3_F16                  0x000001f4
-#define SQ_V_MIN3_I16                  0x000001f5
-#define SQ_V_MIN3_U16                  0x000001f6
-#define SQ_V_MAX3_F16                  0x000001f7
-#define SQ_V_MAX3_I16                  0x000001f8
-#define SQ_V_MAX3_U16                  0x000001f9
-#define SQ_V_MED3_F16                  0x000001fa
-#define SQ_V_MED3_I16                  0x000001fb
-#define SQ_V_MED3_U16                  0x000001fc
-#define SQ_V_LSHL_ADD_U32              0x000001fd
-#define SQ_V_ADD_LSHL_U32              0x000001fe
-#define SQ_V_ADD3_U32                  0x000001ff
-#define SQ_V_LSHL_OR_B32               0x00000200
-#define SQ_V_AND_OR_B32                0x00000201
-#define SQ_V_OR3_B32                   0x00000202
-#define SQ_V_MAD_F16                   0x00000203
-#define SQ_V_MAD_U16                   0x00000204
-#define SQ_V_MAD_I16                   0x00000205
-#define SQ_V_FMA_F16                   0x00000206
-#define SQ_V_DIV_FIXUP_F16             0x00000207
-#define SQ_V_INTERP_P1LL_F16           0x00000274
-#define SQ_V_INTERP_P1LV_F16           0x00000275
-#define SQ_V_INTERP_P2_LEGACY_F16      0x00000276
-#define SQ_V_INTERP_P2_F16             0x00000277
-#define SQ_V_ADD_F64                   0x00000280
-#define SQ_V_MUL_F64                   0x00000281
-#define SQ_V_MIN_F64                   0x00000282
-#define SQ_V_MAX_F64                   0x00000283
-#define SQ_V_LDEXP_F64                 0x00000284
-#define SQ_V_MUL_LO_U32                0x00000285
-#define SQ_V_MUL_HI_U32                0x00000286
-#define SQ_V_MUL_HI_I32                0x00000287
-#define SQ_V_LDEXP_F32                 0x00000288
-#define SQ_V_READLANE_B32              0x00000289
-#define SQ_V_WRITELANE_B32             0x0000028a
-#define SQ_V_BCNT_U32_B32              0x0000028b
-#define SQ_V_MBCNT_LO_U32_B32          0x0000028c
-#define SQ_V_MBCNT_HI_U32_B32          0x0000028d
-#define SQ_V_MAC_LEGACY_F32            0x0000028e
-#define SQ_V_LSHLREV_B64               0x0000028f
-#define SQ_V_LSHRREV_B64               0x00000290
-#define SQ_V_ASHRREV_I64               0x00000291
-#define SQ_V_TRIG_PREOP_F64            0x00000292
-#define SQ_V_BFM_B32                   0x00000293
-#define SQ_V_CVT_PKNORM_I16_F32        0x00000294
-#define SQ_V_CVT_PKNORM_U16_F32        0x00000295
-#define SQ_V_CVT_PKRTZ_F16_F32         0x00000296
-#define SQ_V_CVT_PK_U16_U32            0x00000297
-#define SQ_V_CVT_PK_I16_I32            0x00000298
-#define SQ_V_CVT_PKNORM_I16_F16        0x00000299
-#define SQ_V_CVT_PKNORM_U16_F16        0x0000029a
-#define SQ_V_READLANE_REGRD_B32        0x0000029b
-#define SQ_V_ADD_I32                   0x0000029c
-#define SQ_V_SUB_I32                   0x0000029d
-#define SQ_V_ADD_I16                   0x0000029e
-#define SQ_V_SUB_I16                   0x0000029f
-#define SQ_V_PACK_B32_F16              0x000002a0
-
-/*
- * VALUE_SQ_OP_VINTRP value
- */
-
-#define SQ_V_INTERP_P1_F32             0x00000000
-#define SQ_V_INTERP_P2_F32             0x00000001
-#define SQ_V_INTERP_MOV_F32            0x00000002
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_VCCZ value
- */
-
-#define SQ_SRC_VCCZ                    0x000000fb
-
-/*
- * VALUE_SQ_TGT_INTERNAL value
- */
-
-#define SQ_EXP_GDS0                    0x00000018
-
-/*
- * VALUE_SQ_OMOD value
- */
-
-#define SQ_OMOD_OFF                    0x00000000
-#define SQ_OMOD_M2                     0x00000001
-#define SQ_OMOD_M4                     0x00000002
-#define SQ_OMOD_D2                     0x00000003
-
-/*
- * VALUE_SQ_ATTR value
- */
-
-#define SQ_ATTR0                       0x00000000
-
-/*
- * VALUE_SQ_TGT value
- */
-
-#define SQ_EXP_MRT0                    0x00000000
-#define SQ_EXP_MRTZ                    0x00000008
-#define SQ_EXP_NULL                    0x00000009
-#define SQ_EXP_POS0                    0x0000000c
-#define SQ_EXP_PARAM0                  0x00000020
-
-/*
- * VALUE_SQ_OPU_VOP3 value
- */
-
-#define SQ_V_OPC_OFFSET                0x00000000
-#define SQ_V_OP2_OFFSET                0x00000100
-#define SQ_V_OP1_OFFSET                0x00000140
-#define SQ_V_INTRP_OFFSET              0x00000270
-#define SQ_V_OP3P_OFFSET               0x00000380
-
-/*
- * VALUE_SQ_OP_SOPK value
- */
-
-#define SQ_S_MOVK_I32                  0x00000000
-#define SQ_S_CMOVK_I32                 0x00000001
-#define SQ_S_CMPK_EQ_I32               0x00000002
-#define SQ_S_CMPK_LG_I32               0x00000003
-#define SQ_S_CMPK_GT_I32               0x00000004
-#define SQ_S_CMPK_GE_I32               0x00000005
-#define SQ_S_CMPK_LT_I32               0x00000006
-#define SQ_S_CMPK_LE_I32               0x00000007
-#define SQ_S_CMPK_EQ_U32               0x00000008
-#define SQ_S_CMPK_LG_U32               0x00000009
-#define SQ_S_CMPK_GT_U32               0x0000000a
-#define SQ_S_CMPK_GE_U32               0x0000000b
-#define SQ_S_CMPK_LT_U32               0x0000000c
-#define SQ_S_CMPK_LE_U32               0x0000000d
-#define SQ_S_ADDK_I32                  0x0000000e
-#define SQ_S_MULK_I32                  0x0000000f
-#define SQ_S_CBRANCH_I_FORK            0x00000010
-#define SQ_S_GETREG_B32                0x00000011
-#define SQ_S_SETREG_B32                0x00000012
-#define SQ_S_GETREG_REGRD_B32          0x00000013
-#define SQ_S_SETREG_IMM32_B32          0x00000014
-#define SQ_S_CALL_B64                  0x00000015
-
-/*
- * VALUE_SQ_COMPF value
- */
-
-#define SQ_F                           0x00000000
-#define SQ_LT                          0x00000001
-#define SQ_EQ                          0x00000002
-#define SQ_LE                          0x00000003
-#define SQ_GT                          0x00000004
-#define SQ_LG                          0x00000005
-#define SQ_GE                          0x00000006
-#define SQ_O                           0x00000007
-#define SQ_U                           0x00000008
-#define SQ_NGE                         0x00000009
-#define SQ_NLG                         0x0000000a
-#define SQ_NGT                         0x0000000b
-#define SQ_NLE                         0x0000000c
-#define SQ_NEQ                         0x0000000d
-#define SQ_NLT                         0x0000000e
-#define SQ_TRU                         0x0000000f
-
-/*
- * VALUE_SQ_DPP_CTRL value
- */
-
-#define SQ_DPP_QUAD_PERM               0x00000000
-#define SQ_DPP_ROW_SL1                 0x00000101
-#define SQ_DPP_ROW_SL2                 0x00000102
-#define SQ_DPP_ROW_SL3                 0x00000103
-#define SQ_DPP_ROW_SL4                 0x00000104
-#define SQ_DPP_ROW_SL5                 0x00000105
-#define SQ_DPP_ROW_SL6                 0x00000106
-#define SQ_DPP_ROW_SL7                 0x00000107
-#define SQ_DPP_ROW_SL8                 0x00000108
-#define SQ_DPP_ROW_SL9                 0x00000109
-#define SQ_DPP_ROW_SL10                0x0000010a
-#define SQ_DPP_ROW_SL11                0x0000010b
-#define SQ_DPP_ROW_SL12                0x0000010c
-#define SQ_DPP_ROW_SL13                0x0000010d
-#define SQ_DPP_ROW_SL14                0x0000010e
-#define SQ_DPP_ROW_SL15                0x0000010f
-#define SQ_DPP_ROW_SR1                 0x00000111
-#define SQ_DPP_ROW_SR2                 0x00000112
-#define SQ_DPP_ROW_SR3                 0x00000113
-#define SQ_DPP_ROW_SR4                 0x00000114
-#define SQ_DPP_ROW_SR5                 0x00000115
-#define SQ_DPP_ROW_SR6                 0x00000116
-#define SQ_DPP_ROW_SR7                 0x00000117
-#define SQ_DPP_ROW_SR8                 0x00000118
-#define SQ_DPP_ROW_SR9                 0x00000119
-#define SQ_DPP_ROW_SR10                0x0000011a
-#define SQ_DPP_ROW_SR11                0x0000011b
-#define SQ_DPP_ROW_SR12                0x0000011c
-#define SQ_DPP_ROW_SR13                0x0000011d
-#define SQ_DPP_ROW_SR14                0x0000011e
-#define SQ_DPP_ROW_SR15                0x0000011f
-#define SQ_DPP_ROW_RR1                 0x00000121
-#define SQ_DPP_ROW_RR2                 0x00000122
-#define SQ_DPP_ROW_RR3                 0x00000123
-#define SQ_DPP_ROW_RR4                 0x00000124
-#define SQ_DPP_ROW_RR5                 0x00000125
-#define SQ_DPP_ROW_RR6                 0x00000126
-#define SQ_DPP_ROW_RR7                 0x00000127
-#define SQ_DPP_ROW_RR8                 0x00000128
-#define SQ_DPP_ROW_RR9                 0x00000129
-#define SQ_DPP_ROW_RR10                0x0000012a
-#define SQ_DPP_ROW_RR11                0x0000012b
-#define SQ_DPP_ROW_RR12                0x0000012c
-#define SQ_DPP_ROW_RR13                0x0000012d
-#define SQ_DPP_ROW_RR14                0x0000012e
-#define SQ_DPP_ROW_RR15                0x0000012f
-#define SQ_DPP_WF_SL1                  0x00000130
-#define SQ_DPP_WF_RL1                  0x00000134
-#define SQ_DPP_WF_SR1                  0x00000138
-#define SQ_DPP_WF_RR1                  0x0000013c
-#define SQ_DPP_ROW_MIRROR              0x00000140
-#define SQ_DPP_ROW_HALF_MIRROR         0x00000141
-#define SQ_DPP_ROW_BCAST15             0x00000142
-#define SQ_DPP_ROW_BCAST31             0x00000143
-
-/*
- * VALUE_SQ_VCC_LOHI value
- */
-
-#define SQ_VCC_LO                      0x0000006a
-#define SQ_VCC_HI                      0x0000006b
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_SCC value
- */
-
-#define SQ_SRC_SCC                     0x000000fd
-
-/*
- * VALUE_SQ_OP_SOP1 value
- */
-
-#define SQ_S_MOV_B32                   0x00000000
-#define SQ_S_MOV_B64                   0x00000001
-#define SQ_S_CMOV_B32                  0x00000002
-#define SQ_S_CMOV_B64                  0x00000003
-#define SQ_S_NOT_B32                   0x00000004
-#define SQ_S_NOT_B64                   0x00000005
-#define SQ_S_WQM_B32                   0x00000006
-#define SQ_S_WQM_B64                   0x00000007
-#define SQ_S_BREV_B32                  0x00000008
-#define SQ_S_BREV_B64                  0x00000009
-#define SQ_S_BCNT0_I32_B32             0x0000000a
-#define SQ_S_BCNT0_I32_B64             0x0000000b
-#define SQ_S_BCNT1_I32_B32             0x0000000c
-#define SQ_S_BCNT1_I32_B64             0x0000000d
-#define SQ_S_FF0_I32_B32               0x0000000e
-#define SQ_S_FF0_I32_B64               0x0000000f
-#define SQ_S_FF1_I32_B32               0x00000010
-#define SQ_S_FF1_I32_B64               0x00000011
-#define SQ_S_FLBIT_I32_B32             0x00000012
-#define SQ_S_FLBIT_I32_B64             0x00000013
-#define SQ_S_FLBIT_I32                 0x00000014
-#define SQ_S_FLBIT_I32_I64             0x00000015
-#define SQ_S_SEXT_I32_I8               0x00000016
-#define SQ_S_SEXT_I32_I16              0x00000017
-#define SQ_S_BITSET0_B32               0x00000018
-#define SQ_S_BITSET0_B64               0x00000019
-#define SQ_S_BITSET1_B32               0x0000001a
-#define SQ_S_BITSET1_B64               0x0000001b
-#define SQ_S_GETPC_B64                 0x0000001c
-#define SQ_S_SETPC_B64                 0x0000001d
-#define SQ_S_SWAPPC_B64                0x0000001e
-#define SQ_S_RFE_B64                   0x0000001f
-#define SQ_S_AND_SAVEEXEC_B64          0x00000020
-#define SQ_S_OR_SAVEEXEC_B64           0x00000021
-#define SQ_S_XOR_SAVEEXEC_B64          0x00000022
-#define SQ_S_ANDN2_SAVEEXEC_B64        0x00000023
-#define SQ_S_ORN2_SAVEEXEC_B64         0x00000024
-#define SQ_S_NAND_SAVEEXEC_B64         0x00000025
-#define SQ_S_NOR_SAVEEXEC_B64          0x00000026
-#define SQ_S_XNOR_SAVEEXEC_B64         0x00000027
-#define SQ_S_QUADMASK_B32              0x00000028
-#define SQ_S_QUADMASK_B64              0x00000029
-#define SQ_S_MOVRELS_B32               0x0000002a
-#define SQ_S_MOVRELS_B64               0x0000002b
-#define SQ_S_MOVRELD_B32               0x0000002c
-#define SQ_S_MOVRELD_B64               0x0000002d
-#define SQ_S_CBRANCH_JOIN              0x0000002e
-#define SQ_S_MOV_REGRD_B32             0x0000002f
-#define SQ_S_ABS_I32                   0x00000030
-#define SQ_S_MOV_FED_B32               0x00000031
-#define SQ_S_SET_GPR_IDX_IDX           0x00000032
-#define SQ_S_ANDN1_SAVEEXEC_B64        0x00000033
-#define SQ_S_ORN1_SAVEEXEC_B64         0x00000034
-#define SQ_S_ANDN1_WREXEC_B64          0x00000035
-#define SQ_S_ANDN2_WREXEC_B64          0x00000036
-#define SQ_S_BITREPLICATE_B64_B32      0x00000037
-
-/*
- * VALUE_SQ_MSG value
- */
-
-#define SQ_MSG_INTERRUPT               0x00000001
-#define SQ_MSG_GS                      0x00000002
-#define SQ_MSG_GS_DONE                 0x00000003
-#define SQ_MSG_SAVEWAVE                0x00000004
-#define SQ_MSG_STALL_WAVE_GEN          0x00000005
-#define SQ_MSG_HALT_WAVES              0x00000006
-#define SQ_MSG_ORDERED_PS_DONE         0x00000007
-#define SQ_MSG_EARLY_PRIM_DEALLOC      0x00000008
-#define SQ_MSG_GS_ALLOC_REQ            0x00000009
-#define SQ_MSG_SYSMSG                  0x0000000f
-
-/*
- * VALUE_SQ_OP_FLAT_GLBL value
- */
-
-#define SQ_GLOBAL_LOAD_UBYTE           0x00000010
-#define SQ_GLOBAL_LOAD_SBYTE           0x00000011
-#define SQ_GLOBAL_LOAD_USHORT          0x00000012
-#define SQ_GLOBAL_LOAD_SSHORT          0x00000013
-#define SQ_GLOBAL_LOAD_DWORD           0x00000014
-#define SQ_GLOBAL_LOAD_DWORDX2         0x00000015
-#define SQ_GLOBAL_LOAD_DWORDX3         0x00000016
-#define SQ_GLOBAL_LOAD_DWORDX4         0x00000017
-#define SQ_GLOBAL_STORE_BYTE           0x00000018
-#define SQ_GLOBAL_STORE_SHORT          0x0000001a
-#define SQ_GLOBAL_STORE_DWORD          0x0000001c
-#define SQ_GLOBAL_STORE_DWORDX2        0x0000001d
-#define SQ_GLOBAL_STORE_DWORDX3        0x0000001e
-#define SQ_GLOBAL_STORE_DWORDX4        0x0000001f
-#define SQ_GLOBAL_ATOMIC_SWAP          0x00000040
-#define SQ_GLOBAL_ATOMIC_CMPSWAP       0x00000041
-#define SQ_GLOBAL_ATOMIC_ADD           0x00000042
-#define SQ_GLOBAL_ATOMIC_SUB           0x00000043
-#define SQ_GLOBAL_ATOMIC_SMIN          0x00000044
-#define SQ_GLOBAL_ATOMIC_UMIN          0x00000045
-#define SQ_GLOBAL_ATOMIC_SMAX          0x00000046
-#define SQ_GLOBAL_ATOMIC_UMAX          0x00000047
-#define SQ_GLOBAL_ATOMIC_AND           0x00000048
-#define SQ_GLOBAL_ATOMIC_OR            0x00000049
-#define SQ_GLOBAL_ATOMIC_XOR           0x0000004a
-#define SQ_GLOBAL_ATOMIC_INC           0x0000004b
-#define SQ_GLOBAL_ATOMIC_DEC           0x0000004c
-#define SQ_GLOBAL_ATOMIC_SWAP_X2       0x00000060
-#define SQ_GLOBAL_ATOMIC_CMPSWAP_X2    0x00000061
-#define SQ_GLOBAL_ATOMIC_ADD_X2        0x00000062
-#define SQ_GLOBAL_ATOMIC_SUB_X2        0x00000063
-#define SQ_GLOBAL_ATOMIC_SMIN_X2       0x00000064
-#define SQ_GLOBAL_ATOMIC_UMIN_X2       0x00000065
-#define SQ_GLOBAL_ATOMIC_SMAX_X2       0x00000066
-#define SQ_GLOBAL_ATOMIC_UMAX_X2       0x00000067
-#define SQ_GLOBAL_ATOMIC_AND_X2        0x00000068
-#define SQ_GLOBAL_ATOMIC_OR_X2         0x00000069
-#define SQ_GLOBAL_ATOMIC_XOR_X2        0x0000006a
-#define SQ_GLOBAL_ATOMIC_INC_X2        0x0000006b
-#define SQ_GLOBAL_ATOMIC_DEC_X2        0x0000006c
-
-/*
- * VALUE_SQ_VGPR value
- */
-
-#define SQ_VGPR0                       0x00000000
-
-/*
- * VALUE_SQ_HW_REG value
- */
-
-#define SQ_HW_REG_MODE                 0x00000001
-#define SQ_HW_REG_STATUS               0x00000002
-#define SQ_HW_REG_TRAPSTS              0x00000003
-#define SQ_HW_REG_HW_ID                0x00000004
-#define SQ_HW_REG_GPR_ALLOC            0x00000005
-#define SQ_HW_REG_LDS_ALLOC            0x00000006
-#define SQ_HW_REG_IB_STS               0x00000007
-#define SQ_HW_REG_PC_LO                0x00000008
-#define SQ_HW_REG_PC_HI                0x00000009
-#define SQ_HW_REG_INST_DW0             0x0000000a
-#define SQ_HW_REG_INST_DW1             0x0000000b
-#define SQ_HW_REG_IB_DBG0              0x0000000c
-#define SQ_HW_REG_IB_DBG1              0x0000000d
-#define SQ_HW_REG_FLUSH_IB             0x0000000e
-#define SQ_HW_REG_SH_MEM_BASES         0x0000000f
-#define SQ_HW_REG_SQ_SHADER_TBA_LO     0x00000010
-#define SQ_HW_REG_SQ_SHADER_TBA_HI     0x00000011
-#define SQ_HW_REG_SQ_SHADER_TMA_LO     0x00000012
-#define SQ_HW_REG_SQ_SHADER_TMA_HI     0x00000013
-
-/*
- * VALUE_SQ_OP_VOP1 value
- */
-
-#define SQ_V_NOP                       0x00000000
-#define SQ_V_MOV_B32                   0x00000001
-#define SQ_V_READFIRSTLANE_B32         0x00000002
-#define SQ_V_CVT_I32_F64               0x00000003
-#define SQ_V_CVT_F64_I32               0x00000004
-#define SQ_V_CVT_F32_I32               0x00000005
-#define SQ_V_CVT_F32_U32               0x00000006
-#define SQ_V_CVT_U32_F32               0x00000007
-#define SQ_V_CVT_I32_F32               0x00000008
-#define SQ_V_MOV_FED_B32               0x00000009
-#define SQ_V_CVT_F16_F32               0x0000000a
-#define SQ_V_CVT_F32_F16               0x0000000b
-#define SQ_V_CVT_RPI_I32_F32           0x0000000c
-#define SQ_V_CVT_FLR_I32_F32           0x0000000d
-#define SQ_V_CVT_OFF_F32_I4            0x0000000e
-#define SQ_V_CVT_F32_F64               0x0000000f
-#define SQ_V_CVT_F64_F32               0x00000010
-#define SQ_V_CVT_F32_UBYTE0            0x00000011
-#define SQ_V_CVT_F32_UBYTE1            0x00000012
-#define SQ_V_CVT_F32_UBYTE2            0x00000013
-#define SQ_V_CVT_F32_UBYTE3            0x00000014
-#define SQ_V_CVT_U32_F64               0x00000015
-#define SQ_V_CVT_F64_U32               0x00000016
-#define SQ_V_TRUNC_F64                 0x00000017
-#define SQ_V_CEIL_F64                  0x00000018
-#define SQ_V_RNDNE_F64                 0x00000019
-#define SQ_V_FLOOR_F64                 0x0000001a
-#define SQ_V_FRACT_F32                 0x0000001b
-#define SQ_V_TRUNC_F32                 0x0000001c
-#define SQ_V_CEIL_F32                  0x0000001d
-#define SQ_V_RNDNE_F32                 0x0000001e
-#define SQ_V_FLOOR_F32                 0x0000001f
-#define SQ_V_EXP_F32                   0x00000020
-#define SQ_V_LOG_F32                   0x00000021
-#define SQ_V_RCP_F32                   0x00000022
-#define SQ_V_RCP_IFLAG_F32             0x00000023
-#define SQ_V_RSQ_F32                   0x00000024
-#define SQ_V_RCP_F64                   0x00000025
-#define SQ_V_RSQ_F64                   0x00000026
-#define SQ_V_SQRT_F32                  0x00000027
-#define SQ_V_SQRT_F64                  0x00000028
-#define SQ_V_SIN_F32                   0x00000029
-#define SQ_V_COS_F32                   0x0000002a
-#define SQ_V_NOT_B32                   0x0000002b
-#define SQ_V_BFREV_B32                 0x0000002c
-#define SQ_V_FFBH_U32                  0x0000002d
-#define SQ_V_FFBL_B32                  0x0000002e
-#define SQ_V_FFBH_I32                  0x0000002f
-#define SQ_V_FREXP_EXP_I32_F64         0x00000030
-#define SQ_V_FREXP_MANT_F64            0x00000031
-#define SQ_V_FRACT_F64                 0x00000032
-#define SQ_V_FREXP_EXP_I32_F32         0x00000033
-#define SQ_V_FREXP_MANT_F32            0x00000034
-#define SQ_V_CLREXCP                   0x00000035
-#define SQ_V_MOV_PRSV_B32              0x00000036
-#define SQ_V_CVT_F16_U16               0x00000039
-#define SQ_V_CVT_F16_I16               0x0000003a
-#define SQ_V_CVT_U16_F16               0x0000003b
-#define SQ_V_CVT_I16_F16               0x0000003c
-#define SQ_V_RCP_F16                   0x0000003d
-#define SQ_V_SQRT_F16                  0x0000003e
-#define SQ_V_RSQ_F16                   0x0000003f
-#define SQ_V_LOG_F16                   0x00000040
-#define SQ_V_EXP_F16                   0x00000041
-#define SQ_V_FREXP_MANT_F16            0x00000042
-#define SQ_V_FREXP_EXP_I16_F16         0x00000043
-#define SQ_V_FLOOR_F16                 0x00000044
-#define SQ_V_CEIL_F16                  0x00000045
-#define SQ_V_TRUNC_F16                 0x00000046
-#define SQ_V_RNDNE_F16                 0x00000047
-#define SQ_V_FRACT_F16                 0x00000048
-#define SQ_V_SIN_F16                   0x00000049
-#define SQ_V_COS_F16                   0x0000004a
-#define SQ_V_EXP_LEGACY_F32            0x0000004b
-#define SQ_V_LOG_LEGACY_F32            0x0000004c
-#define SQ_V_CVT_NORM_I16_F16          0x0000004d
-#define SQ_V_CVT_NORM_U16_F16          0x0000004e
-#define SQ_V_SAT_PK_U8_I16             0x0000004f
-#define SQ_V_WRITELANE_IMM32           0x00000050
-#define SQ_V_SWAP_B32                  0x00000051
-
-/*
- * VALUE_SQ_OP_MUBUF value
- */
-
-#define SQ_BUFFER_LOAD_FORMAT_X        0x00000000
-#define SQ_BUFFER_LOAD_FORMAT_XY       0x00000001
-#define SQ_BUFFER_LOAD_FORMAT_XYZ      0x00000002
-#define SQ_BUFFER_LOAD_FORMAT_XYZW     0x00000003
-#define SQ_BUFFER_STORE_FORMAT_X       0x00000004
-#define SQ_BUFFER_STORE_FORMAT_XY      0x00000005
-#define SQ_BUFFER_STORE_FORMAT_XYZ     0x00000006
-#define SQ_BUFFER_STORE_FORMAT_XYZW    0x00000007
-#define SQ_BUFFER_LOAD_FORMAT_D16_X    0x00000008
-#define SQ_BUFFER_LOAD_FORMAT_D16_XY   0x00000009
-#define SQ_BUFFER_LOAD_FORMAT_D16_XYZ  0x0000000a
-#define SQ_BUFFER_LOAD_FORMAT_D16_XYZW 0x0000000b
-#define SQ_BUFFER_STORE_FORMAT_D16_X   0x0000000c
-#define SQ_BUFFER_STORE_FORMAT_D16_XY  0x0000000d
-#define SQ_BUFFER_STORE_FORMAT_D16_XYZ 0x0000000e
-#define SQ_BUFFER_STORE_FORMAT_D16_XYZW 0x0000000f
-#define SQ_BUFFER_LOAD_UBYTE           0x00000010
-#define SQ_BUFFER_LOAD_SBYTE           0x00000011
-#define SQ_BUFFER_LOAD_USHORT          0x00000012
-#define SQ_BUFFER_LOAD_SSHORT          0x00000013
-#define SQ_BUFFER_LOAD_DWORD           0x00000014
-#define SQ_BUFFER_LOAD_DWORDX2         0x00000015
-#define SQ_BUFFER_LOAD_DWORDX3         0x00000016
-#define SQ_BUFFER_LOAD_DWORDX4         0x00000017
-#define SQ_BUFFER_STORE_BYTE           0x00000018
-#define SQ_BUFFER_STORE_SHORT          0x0000001a
-#define SQ_BUFFER_STORE_DWORD          0x0000001c
-#define SQ_BUFFER_STORE_DWORDX2        0x0000001d
-#define SQ_BUFFER_STORE_DWORDX3        0x0000001e
-#define SQ_BUFFER_STORE_DWORDX4        0x0000001f
-#define SQ_BUFFER_STORE_LDS_DWORD      0x0000003d
-#define SQ_BUFFER_WBINVL1              0x0000003e
-#define SQ_BUFFER_WBINVL1_VOL          0x0000003f
-#define SQ_BUFFER_ATOMIC_SWAP          0x00000040
-#define SQ_BUFFER_ATOMIC_CMPSWAP       0x00000041
-#define SQ_BUFFER_ATOMIC_ADD           0x00000042
-#define SQ_BUFFER_ATOMIC_SUB           0x00000043
-#define SQ_BUFFER_ATOMIC_SMIN          0x00000044
-#define SQ_BUFFER_ATOMIC_UMIN          0x00000045
-#define SQ_BUFFER_ATOMIC_SMAX          0x00000046
-#define SQ_BUFFER_ATOMIC_UMAX          0x00000047
-#define SQ_BUFFER_ATOMIC_AND           0x00000048
-#define SQ_BUFFER_ATOMIC_OR            0x00000049
-#define SQ_BUFFER_ATOMIC_XOR           0x0000004a
-#define SQ_BUFFER_ATOMIC_INC           0x0000004b
-#define SQ_BUFFER_ATOMIC_DEC           0x0000004c
-#define SQ_BUFFER_ATOMIC_SWAP_X2       0x00000060
-#define SQ_BUFFER_ATOMIC_CMPSWAP_X2    0x00000061
-#define SQ_BUFFER_ATOMIC_ADD_X2        0x00000062
-#define SQ_BUFFER_ATOMIC_SUB_X2        0x00000063
-#define SQ_BUFFER_ATOMIC_SMIN_X2       0x00000064
-#define SQ_BUFFER_ATOMIC_UMIN_X2       0x00000065
-#define SQ_BUFFER_ATOMIC_SMAX_X2       0x00000066
-#define SQ_BUFFER_ATOMIC_UMAX_X2       0x00000067
-#define SQ_BUFFER_ATOMIC_AND_X2        0x00000068
-#define SQ_BUFFER_ATOMIC_OR_X2         0x00000069
-#define SQ_BUFFER_ATOMIC_XOR_X2        0x0000006a
-#define SQ_BUFFER_ATOMIC_INC_X2        0x0000006b
-#define SQ_BUFFER_ATOMIC_DEC_X2        0x0000006c
-
-/*
- * VALUE_SQ_TRAP value
- */
-
-#define SQ_TTMP0                       0x0000006c
-#define SQ_TTMP1                       0x0000006d
-#define SQ_TTMP2                       0x0000006e
-#define SQ_TTMP3                       0x0000006f
-#define SQ_TTMP4                       0x00000070
-#define SQ_TTMP5                       0x00000071
-#define SQ_TTMP6                       0x00000072
-#define SQ_TTMP7                       0x00000073
-#define SQ_TTMP8                       0x00000074
-#define SQ_TTMP9                       0x00000075
-#define SQ_TTMP10                      0x00000076
-#define SQ_TTMP11                      0x00000077
-#define SQ_TTMP12                      0x00000078
-#define SQ_TTMP13                      0x00000079
-#define SQ_TTMP14                      0x0000007a
-#define SQ_TTMP15                      0x0000007b
-
-/*
- * VALUE_SQ_OP_VOPC value
- */
-
-#define SQ_V_CMP_CLASS_F32             0x00000010
-#define SQ_V_CMPX_CLASS_F32            0x00000011
-#define SQ_V_CMP_CLASS_F64             0x00000012
-#define SQ_V_CMPX_CLASS_F64            0x00000013
-#define SQ_V_CMP_CLASS_F16             0x00000014
-#define SQ_V_CMPX_CLASS_F16            0x00000015
-#define SQ_V_CMP_F_F16                 0x00000020
-#define SQ_V_CMP_LT_F16                0x00000021
-#define SQ_V_CMP_EQ_F16                0x00000022
-#define SQ_V_CMP_LE_F16                0x00000023
-#define SQ_V_CMP_GT_F16                0x00000024
-#define SQ_V_CMP_LG_F16                0x00000025
-#define SQ_V_CMP_GE_F16                0x00000026
-#define SQ_V_CMP_O_F16                 0x00000027
-#define SQ_V_CMP_U_F16                 0x00000028
-#define SQ_V_CMP_NGE_F16               0x00000029
-#define SQ_V_CMP_NLG_F16               0x0000002a
-#define SQ_V_CMP_NGT_F16               0x0000002b
-#define SQ_V_CMP_NLE_F16               0x0000002c
-#define SQ_V_CMP_NEQ_F16               0x0000002d
-#define SQ_V_CMP_NLT_F16               0x0000002e
-#define SQ_V_CMP_TRU_F16               0x0000002f
-#define SQ_V_CMPX_F_F16                0x00000030
-#define SQ_V_CMPX_LT_F16               0x00000031
-#define SQ_V_CMPX_EQ_F16               0x00000032
-#define SQ_V_CMPX_LE_F16               0x00000033
-#define SQ_V_CMPX_GT_F16               0x00000034
-#define SQ_V_CMPX_LG_F16               0x00000035
-#define SQ_V_CMPX_GE_F16               0x00000036
-#define SQ_V_CMPX_O_F16                0x00000037
-#define SQ_V_CMPX_U_F16                0x00000038
-#define SQ_V_CMPX_NGE_F16              0x00000039
-#define SQ_V_CMPX_NLG_F16              0x0000003a
-#define SQ_V_CMPX_NGT_F16              0x0000003b
-#define SQ_V_CMPX_NLE_F16              0x0000003c
-#define SQ_V_CMPX_NEQ_F16              0x0000003d
-#define SQ_V_CMPX_NLT_F16              0x0000003e
-#define SQ_V_CMPX_TRU_F16              0x0000003f
-#define SQ_V_CMP_F_F32                 0x00000040
-#define SQ_V_CMP_LT_F32                0x00000041
-#define SQ_V_CMP_EQ_F32                0x00000042
-#define SQ_V_CMP_LE_F32                0x00000043
-#define SQ_V_CMP_GT_F32                0x00000044
-#define SQ_V_CMP_LG_F32                0x00000045
-#define SQ_V_CMP_GE_F32                0x00000046
-#define SQ_V_CMP_O_F32                 0x00000047
-#define SQ_V_CMP_U_F32                 0x00000048
-#define SQ_V_CMP_NGE_F32               0x00000049
-#define SQ_V_CMP_NLG_F32               0x0000004a
-#define SQ_V_CMP_NGT_F32               0x0000004b
-#define SQ_V_CMP_NLE_F32               0x0000004c
-#define SQ_V_CMP_NEQ_F32               0x0000004d
-#define SQ_V_CMP_NLT_F32               0x0000004e
-#define SQ_V_CMP_TRU_F32               0x0000004f
-#define SQ_V_CMPX_F_F32                0x00000050
-#define SQ_V_CMPX_LT_F32               0x00000051
-#define SQ_V_CMPX_EQ_F32               0x00000052
-#define SQ_V_CMPX_LE_F32               0x00000053
-#define SQ_V_CMPX_GT_F32               0x00000054
-#define SQ_V_CMPX_LG_F32               0x00000055
-#define SQ_V_CMPX_GE_F32               0x00000056
-#define SQ_V_CMPX_O_F32                0x00000057
-#define SQ_V_CMPX_U_F32                0x00000058
-#define SQ_V_CMPX_NGE_F32              0x00000059
-#define SQ_V_CMPX_NLG_F32              0x0000005a
-#define SQ_V_CMPX_NGT_F32              0x0000005b
-#define SQ_V_CMPX_NLE_F32              0x0000005c
-#define SQ_V_CMPX_NEQ_F32              0x0000005d
-#define SQ_V_CMPX_NLT_F32              0x0000005e
-#define SQ_V_CMPX_TRU_F32              0x0000005f
-#define SQ_V_CMP_F_F64                 0x00000060
-#define SQ_V_CMP_LT_F64                0x00000061
-#define SQ_V_CMP_EQ_F64                0x00000062
-#define SQ_V_CMP_LE_F64                0x00000063
-#define SQ_V_CMP_GT_F64                0x00000064
-#define SQ_V_CMP_LG_F64                0x00000065
-#define SQ_V_CMP_GE_F64                0x00000066
-#define SQ_V_CMP_O_F64                 0x00000067
-#define SQ_V_CMP_U_F64                 0x00000068
-#define SQ_V_CMP_NGE_F64               0x00000069
-#define SQ_V_CMP_NLG_F64               0x0000006a
-#define SQ_V_CMP_NGT_F64               0x0000006b
-#define SQ_V_CMP_NLE_F64               0x0000006c
-#define SQ_V_CMP_NEQ_F64               0x0000006d
-#define SQ_V_CMP_NLT_F64               0x0000006e
-#define SQ_V_CMP_TRU_F64               0x0000006f
-#define SQ_V_CMPX_F_F64                0x00000070
-#define SQ_V_CMPX_LT_F64               0x00000071
-#define SQ_V_CMPX_EQ_F64               0x00000072
-#define SQ_V_CMPX_LE_F64               0x00000073
-#define SQ_V_CMPX_GT_F64               0x00000074
-#define SQ_V_CMPX_LG_F64               0x00000075
-#define SQ_V_CMPX_GE_F64               0x00000076
-#define SQ_V_CMPX_O_F64                0x00000077
-#define SQ_V_CMPX_U_F64                0x00000078
-#define SQ_V_CMPX_NGE_F64              0x00000079
-#define SQ_V_CMPX_NLG_F64              0x0000007a
-#define SQ_V_CMPX_NGT_F64              0x0000007b
-#define SQ_V_CMPX_NLE_F64              0x0000007c
-#define SQ_V_CMPX_NEQ_F64              0x0000007d
-#define SQ_V_CMPX_NLT_F64              0x0000007e
-#define SQ_V_CMPX_TRU_F64              0x0000007f
-#define SQ_V_CMP_F_I16                 0x000000a0
-#define SQ_V_CMP_LT_I16                0x000000a1
-#define SQ_V_CMP_EQ_I16                0x000000a2
-#define SQ_V_CMP_LE_I16                0x000000a3
-#define SQ_V_CMP_GT_I16                0x000000a4
-#define SQ_V_CMP_NE_I16                0x000000a5
-#define SQ_V_CMP_GE_I16                0x000000a6
-#define SQ_V_CMP_T_I16                 0x000000a7
-#define SQ_V_CMP_F_U16                 0x000000a8
-#define SQ_V_CMP_LT_U16                0x000000a9
-#define SQ_V_CMP_EQ_U16                0x000000aa
-#define SQ_V_CMP_LE_U16                0x000000ab
-#define SQ_V_CMP_GT_U16                0x000000ac
-#define SQ_V_CMP_NE_U16                0x000000ad
-#define SQ_V_CMP_GE_U16                0x000000ae
-#define SQ_V_CMP_T_U16                 0x000000af
-#define SQ_V_CMPX_F_I16                0x000000b0
-#define SQ_V_CMPX_LT_I16               0x000000b1
-#define SQ_V_CMPX_EQ_I16               0x000000b2
-#define SQ_V_CMPX_LE_I16               0x000000b3
-#define SQ_V_CMPX_GT_I16               0x000000b4
-#define SQ_V_CMPX_NE_I16               0x000000b5
-#define SQ_V_CMPX_GE_I16               0x000000b6
-#define SQ_V_CMPX_T_I16                0x000000b7
-#define SQ_V_CMPX_F_U16                0x000000b8
-#define SQ_V_CMPX_LT_U16               0x000000b9
-#define SQ_V_CMPX_EQ_U16               0x000000ba
-#define SQ_V_CMPX_LE_U16               0x000000bb
-#define SQ_V_CMPX_GT_U16               0x000000bc
-#define SQ_V_CMPX_NE_U16               0x000000bd
-#define SQ_V_CMPX_GE_U16               0x000000be
-#define SQ_V_CMPX_T_U16                0x000000bf
-#define SQ_V_CMP_F_I32                 0x000000c0
-#define SQ_V_CMP_LT_I32                0x000000c1
-#define SQ_V_CMP_EQ_I32                0x000000c2
-#define SQ_V_CMP_LE_I32                0x000000c3
-#define SQ_V_CMP_GT_I32                0x000000c4
-#define SQ_V_CMP_NE_I32                0x000000c5
-#define SQ_V_CMP_GE_I32                0x000000c6
-#define SQ_V_CMP_T_I32                 0x000000c7
-#define SQ_V_CMP_F_U32                 0x000000c8
-#define SQ_V_CMP_LT_U32                0x000000c9
-#define SQ_V_CMP_EQ_U32                0x000000ca
-#define SQ_V_CMP_LE_U32                0x000000cb
-#define SQ_V_CMP_GT_U32                0x000000cc
-#define SQ_V_CMP_NE_U32                0x000000cd
-#define SQ_V_CMP_GE_U32                0x000000ce
-#define SQ_V_CMP_T_U32                 0x000000cf
-#define SQ_V_CMPX_F_I32                0x000000d0
-#define SQ_V_CMPX_LT_I32               0x000000d1
-#define SQ_V_CMPX_EQ_I32               0x000000d2
-#define SQ_V_CMPX_LE_I32               0x000000d3
-#define SQ_V_CMPX_GT_I32               0x000000d4
-#define SQ_V_CMPX_NE_I32               0x000000d5
-#define SQ_V_CMPX_GE_I32               0x000000d6
-#define SQ_V_CMPX_T_I32                0x000000d7
-#define SQ_V_CMPX_F_U32                0x000000d8
-#define SQ_V_CMPX_LT_U32               0x000000d9
-#define SQ_V_CMPX_EQ_U32               0x000000da
-#define SQ_V_CMPX_LE_U32               0x000000db
-#define SQ_V_CMPX_GT_U32               0x000000dc
-#define SQ_V_CMPX_NE_U32               0x000000dd
-#define SQ_V_CMPX_GE_U32               0x000000de
-#define SQ_V_CMPX_T_U32                0x000000df
-#define SQ_V_CMP_F_I64                 0x000000e0
-#define SQ_V_CMP_LT_I64                0x000000e1
-#define SQ_V_CMP_EQ_I64                0x000000e2
-#define SQ_V_CMP_LE_I64                0x000000e3
-#define SQ_V_CMP_GT_I64                0x000000e4
-#define SQ_V_CMP_NE_I64                0x000000e5
-#define SQ_V_CMP_GE_I64                0x000000e6
-#define SQ_V_CMP_T_I64                 0x000000e7
-#define SQ_V_CMP_F_U64                 0x000000e8
-#define SQ_V_CMP_LT_U64                0x000000e9
-#define SQ_V_CMP_EQ_U64                0x000000ea
-#define SQ_V_CMP_LE_U64                0x000000eb
-#define SQ_V_CMP_GT_U64                0x000000ec
-#define SQ_V_CMP_NE_U64                0x000000ed
-#define SQ_V_CMP_GE_U64                0x000000ee
-#define SQ_V_CMP_T_U64                 0x000000ef
-#define SQ_V_CMPX_F_I64                0x000000f0
-#define SQ_V_CMPX_LT_I64               0x000000f1
-#define SQ_V_CMPX_EQ_I64               0x000000f2
-#define SQ_V_CMPX_LE_I64               0x000000f3
-#define SQ_V_CMPX_GT_I64               0x000000f4
-#define SQ_V_CMPX_NE_I64               0x000000f5
-#define SQ_V_CMPX_GE_I64               0x000000f6
-#define SQ_V_CMPX_T_I64                0x000000f7
-#define SQ_V_CMPX_F_U64                0x000000f8
-#define SQ_V_CMPX_LT_U64               0x000000f9
-#define SQ_V_CMPX_EQ_U64               0x000000fa
-#define SQ_V_CMPX_LE_U64               0x000000fb
-#define SQ_V_CMPX_GT_U64               0x000000fc
-#define SQ_V_CMPX_NE_U64               0x000000fd
-#define SQ_V_CMPX_GE_U64               0x000000fe
-#define SQ_V_CMPX_T_U64                0x000000ff
-
-/*
- * VALUE_SQ_DPP_CTRL_R_1_15 value
- */
-
-#define SQ_R1                          0x00000001
-#define SQ_R2                          0x00000002
-#define SQ_R3                          0x00000003
-#define SQ_R4                          0x00000004
-#define SQ_R5                          0x00000005
-#define SQ_R6                          0x00000006
-#define SQ_R7                          0x00000007
-#define SQ_R8                          0x00000008
-#define SQ_R9                          0x00000009
-#define SQ_R10                         0x0000000a
-#define SQ_R11                         0x0000000b
-#define SQ_R12                         0x0000000c
-#define SQ_R13                         0x0000000d
-#define SQ_R14                         0x0000000e
-#define SQ_R15                         0x0000000f
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_LDS value
- */
-
-#define SQ_SRC_LDS_DIRECT              0x000000fe
-
-/*
- * VALUE_SQ_OP_EXP value
- */
-
-#define SQ_EXP                         0x00000000
-
-/*
- * VALUE_SQ_SDST_M0 value
- */
-
-#define SQ_M0                          0x0000007c
-
-/*
- * VALUE_SQ_OP_MIMG value
- */
-
-#define SQ_IMAGE_LOAD                  0x00000000
-#define SQ_IMAGE_LOAD_MIP              0x00000001
-#define SQ_IMAGE_LOAD_PCK              0x00000002
-#define SQ_IMAGE_LOAD_PCK_SGN          0x00000003
-#define SQ_IMAGE_LOAD_MIP_PCK          0x00000004
-#define SQ_IMAGE_LOAD_MIP_PCK_SGN      0x00000005
-#define SQ_IMAGE_STORE                 0x00000008
-#define SQ_IMAGE_STORE_MIP             0x00000009
-#define SQ_IMAGE_STORE_PCK             0x0000000a
-#define SQ_IMAGE_STORE_MIP_PCK         0x0000000b
-#define SQ_IMAGE_GET_RESINFO           0x0000000e
-#define SQ_IMAGE_ATOMIC_SWAP           0x00000010
-#define SQ_IMAGE_ATOMIC_CMPSWAP        0x00000011
-#define SQ_IMAGE_ATOMIC_ADD            0x00000012
-#define SQ_IMAGE_ATOMIC_SUB            0x00000013
-#define SQ_IMAGE_ATOMIC_SMIN           0x00000014
-#define SQ_IMAGE_ATOMIC_UMIN           0x00000015
-#define SQ_IMAGE_ATOMIC_SMAX           0x00000016
-#define SQ_IMAGE_ATOMIC_UMAX           0x00000017
-#define SQ_IMAGE_ATOMIC_AND            0x00000018
-#define SQ_IMAGE_ATOMIC_OR             0x00000019
-#define SQ_IMAGE_ATOMIC_XOR            0x0000001a
-#define SQ_IMAGE_ATOMIC_INC            0x0000001b
-#define SQ_IMAGE_ATOMIC_DEC            0x0000001c
-#define SQ_IMAGE_SAMPLE                0x00000020
-#define SQ_IMAGE_SAMPLE_CL             0x00000021
-#define SQ_IMAGE_SAMPLE_D              0x00000022
-#define SQ_IMAGE_SAMPLE_D_CL           0x00000023
-#define SQ_IMAGE_SAMPLE_L              0x00000024
-#define SQ_IMAGE_SAMPLE_B              0x00000025
-#define SQ_IMAGE_SAMPLE_B_CL           0x00000026
-#define SQ_IMAGE_SAMPLE_LZ             0x00000027
-#define SQ_IMAGE_SAMPLE_C              0x00000028
-#define SQ_IMAGE_SAMPLE_C_CL           0x00000029
-#define SQ_IMAGE_SAMPLE_C_D            0x0000002a
-#define SQ_IMAGE_SAMPLE_C_D_CL         0x0000002b
-#define SQ_IMAGE_SAMPLE_C_L            0x0000002c
-#define SQ_IMAGE_SAMPLE_C_B            0x0000002d
-#define SQ_IMAGE_SAMPLE_C_B_CL         0x0000002e
-#define SQ_IMAGE_SAMPLE_C_LZ           0x0000002f
-#define SQ_IMAGE_SAMPLE_O              0x00000030
-#define SQ_IMAGE_SAMPLE_CL_O           0x00000031
-#define SQ_IMAGE_SAMPLE_D_O            0x00000032
-#define SQ_IMAGE_SAMPLE_D_CL_O         0x00000033
-#define SQ_IMAGE_SAMPLE_L_O            0x00000034
-#define SQ_IMAGE_SAMPLE_B_O            0x00000035
-#define SQ_IMAGE_SAMPLE_B_CL_O         0x00000036
-#define SQ_IMAGE_SAMPLE_LZ_O           0x00000037
-#define SQ_IMAGE_SAMPLE_C_O            0x00000038
-#define SQ_IMAGE_SAMPLE_C_CL_O         0x00000039
-#define SQ_IMAGE_SAMPLE_C_D_O          0x0000003a
-#define SQ_IMAGE_SAMPLE_C_D_CL_O       0x0000003b
-#define SQ_IMAGE_SAMPLE_C_L_O          0x0000003c
-#define SQ_IMAGE_SAMPLE_C_B_O          0x0000003d
-#define SQ_IMAGE_SAMPLE_C_B_CL_O       0x0000003e
-#define SQ_IMAGE_SAMPLE_C_LZ_O         0x0000003f
-#define SQ_IMAGE_GATHER4               0x00000040
-#define SQ_IMAGE_GATHER4_CL            0x00000041
-#define SQ_IMAGE_GATHER4H              0x00000042
-#define SQ_IMAGE_GATHER4_L             0x00000044
-#define SQ_IMAGE_GATHER4_B             0x00000045
-#define SQ_IMAGE_GATHER4_B_CL          0x00000046
-#define SQ_IMAGE_GATHER4_LZ            0x00000047
-#define SQ_IMAGE_GATHER4_C             0x00000048
-#define SQ_IMAGE_GATHER4_C_CL          0x00000049
-#define SQ_IMAGE_GATHER4H_PCK          0x0000004a
-#define SQ_IMAGE_GATHER8H_PCK          0x0000004b
-#define SQ_IMAGE_GATHER4_C_L           0x0000004c
-#define SQ_IMAGE_GATHER4_C_B           0x0000004d
-#define SQ_IMAGE_GATHER4_C_B_CL        0x0000004e
-#define SQ_IMAGE_GATHER4_C_LZ          0x0000004f
-#define SQ_IMAGE_GATHER4_O             0x00000050
-#define SQ_IMAGE_GATHER4_CL_O          0x00000051
-#define SQ_IMAGE_GATHER4_L_O           0x00000054
-#define SQ_IMAGE_GATHER4_B_O           0x00000055
-#define SQ_IMAGE_GATHER4_B_CL_O        0x00000056
-#define SQ_IMAGE_GATHER4_LZ_O          0x00000057
-#define SQ_IMAGE_GATHER4_C_O           0x00000058
-#define SQ_IMAGE_GATHER4_C_CL_O        0x00000059
-#define SQ_IMAGE_GATHER4_C_L_O         0x0000005c
-#define SQ_IMAGE_GATHER4_C_B_O         0x0000005d
-#define SQ_IMAGE_GATHER4_C_B_CL_O      0x0000005e
-#define SQ_IMAGE_GATHER4_C_LZ_O        0x0000005f
-#define SQ_IMAGE_GET_LOD               0x00000060
-#define SQ_IMAGE_SAMPLE_CD             0x00000068
-#define SQ_IMAGE_SAMPLE_CD_CL          0x00000069
-#define SQ_IMAGE_SAMPLE_C_CD           0x0000006a
-#define SQ_IMAGE_SAMPLE_C_CD_CL        0x0000006b
-#define SQ_IMAGE_SAMPLE_CD_O           0x0000006c
-#define SQ_IMAGE_SAMPLE_CD_CL_O        0x0000006d
-#define SQ_IMAGE_SAMPLE_C_CD_O         0x0000006e
-#define SQ_IMAGE_SAMPLE_C_CD_CL_O      0x0000006f
-#define SQ_IMAGE_RSRC256               0x0000007e
-#define SQ_IMAGE_SAMPLER               0x0000007f
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_NOLIT value
- */
-
-#define SQ_SRC_64_INT                  0x000000c0
-#define SQ_SRC_M_1_INT                 0x000000c1
-#define SQ_SRC_M_2_INT                 0x000000c2
-#define SQ_SRC_M_3_INT                 0x000000c3
-#define SQ_SRC_M_4_INT                 0x000000c4
-#define SQ_SRC_M_5_INT                 0x000000c5
-#define SQ_SRC_M_6_INT                 0x000000c6
-#define SQ_SRC_M_7_INT                 0x000000c7
-#define SQ_SRC_M_8_INT                 0x000000c8
-#define SQ_SRC_M_9_INT                 0x000000c9
-#define SQ_SRC_M_10_INT                0x000000ca
-#define SQ_SRC_M_11_INT                0x000000cb
-#define SQ_SRC_M_12_INT                0x000000cc
-#define SQ_SRC_M_13_INT                0x000000cd
-#define SQ_SRC_M_14_INT                0x000000ce
-#define SQ_SRC_M_15_INT                0x000000cf
-#define SQ_SRC_M_16_INT                0x000000d0
-#define SQ_SRC_0_5                     0x000000f0
-#define SQ_SRC_M_0_5                   0x000000f1
-#define SQ_SRC_1                       0x000000f2
-#define SQ_SRC_M_1                     0x000000f3
-#define SQ_SRC_2                       0x000000f4
-#define SQ_SRC_M_2                     0x000000f5
-#define SQ_SRC_4                       0x000000f6
-#define SQ_SRC_M_4                     0x000000f7
-#define SQ_SRC_INV_2PI                 0x000000f8
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_DPP value
- */
-
-#define SQ_SRC_DPP                     0x000000fa
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_APERTURE value
- */
-
-#define SQ_SRC_SHARED_BASE             0x000000eb
-#define SQ_SRC_SHARED_LIMIT            0x000000ec
-#define SQ_SRC_PRIVATE_BASE            0x000000ed
-#define SQ_SRC_PRIVATE_LIMIT           0x000000ee
-
-/*
- * VALUE_SQ_DPP_CTRL_L_1_15 value
- */
-
-#define SQ_L1                          0x00000001
-#define SQ_L2                          0x00000002
-#define SQ_L3                          0x00000003
-#define SQ_L4                          0x00000004
-#define SQ_L5                          0x00000005
-#define SQ_L6                          0x00000006
-#define SQ_L7                          0x00000007
-#define SQ_L8                          0x00000008
-#define SQ_L9                          0x00000009
-#define SQ_L10                         0x0000000a
-#define SQ_L11                         0x0000000b
-#define SQ_L12                         0x0000000c
-#define SQ_L13                         0x0000000d
-#define SQ_L14                         0x0000000e
-#define SQ_L15                         0x0000000f
-
-/*
- * VALUE_SQ_OP_SOP2 value
- */
-
-#define SQ_S_ADD_U32                   0x00000000
-#define SQ_S_SUB_U32                   0x00000001
-#define SQ_S_ADD_I32                   0x00000002
-#define SQ_S_SUB_I32                   0x00000003
-#define SQ_S_ADDC_U32                  0x00000004
-#define SQ_S_SUBB_U32                  0x00000005
-#define SQ_S_MIN_I32                   0x00000006
-#define SQ_S_MIN_U32                   0x00000007
-#define SQ_S_MAX_I32                   0x00000008
-#define SQ_S_MAX_U32                   0x00000009
-#define SQ_S_CSELECT_B32               0x0000000a
-#define SQ_S_CSELECT_B64               0x0000000b
-#define SQ_S_AND_B32                   0x0000000c
-#define SQ_S_AND_B64                   0x0000000d
-#define SQ_S_OR_B32                    0x0000000e
-#define SQ_S_OR_B64                    0x0000000f
-#define SQ_S_XOR_B32                   0x00000010
-#define SQ_S_XOR_B64                   0x00000011
-#define SQ_S_ANDN2_B32                 0x00000012
-#define SQ_S_ANDN2_B64                 0x00000013
-#define SQ_S_ORN2_B32                  0x00000014
-#define SQ_S_ORN2_B64                  0x00000015
-#define SQ_S_NAND_B32                  0x00000016
-#define SQ_S_NAND_B64                  0x00000017
-#define SQ_S_NOR_B32                   0x00000018
-#define SQ_S_NOR_B64                   0x00000019
-#define SQ_S_XNOR_B32                  0x0000001a
-#define SQ_S_XNOR_B64                  0x0000001b
-#define SQ_S_LSHL_B32                  0x0000001c
-#define SQ_S_LSHL_B64                  0x0000001d
-#define SQ_S_LSHR_B32                  0x0000001e
-#define SQ_S_LSHR_B64                  0x0000001f
-#define SQ_S_ASHR_I32                  0x00000020
-#define SQ_S_ASHR_I64                  0x00000021
-#define SQ_S_BFM_B32                   0x00000022
-#define SQ_S_BFM_B64                   0x00000023
-#define SQ_S_MUL_I32                   0x00000024
-#define SQ_S_BFE_U32                   0x00000025
-#define SQ_S_BFE_I32                   0x00000026
-#define SQ_S_BFE_U64                   0x00000027
-#define SQ_S_BFE_I64                   0x00000028
-#define SQ_S_CBRANCH_G_FORK            0x00000029
-#define SQ_S_ABSDIFF_I32               0x0000002a
-#define SQ_S_RFE_RESTORE_B64           0x0000002b
-#define SQ_S_MUL_HI_U32                0x0000002c
-#define SQ_S_MUL_HI_I32                0x0000002d
-#define SQ_S_LSHL1_ADD_U32             0x0000002e
-#define SQ_S_LSHL2_ADD_U32             0x0000002f
-#define SQ_S_LSHL3_ADD_U32             0x00000030
-#define SQ_S_LSHL4_ADD_U32             0x00000031
-#define SQ_S_PACK_LL_B32_B16           0x00000032
-#define SQ_S_PACK_LH_B32_B16           0x00000033
-#define SQ_S_PACK_HH_B32_B16           0x00000034
-
-/*
- * VALUE_SQ_SDST_EXEC value
- */
-
-#define SQ_EXEC_LO                     0x0000007e
-#define SQ_EXEC_HI                     0x0000007f
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_POPS_EXITING_WAVE_ID value
- */
-
-#define SQ_SRC_POPS_EXITING_WAVE_ID    0x000000ef
-
-/*
- * VALUE_SQ_COMPI value
- */
-
-#define SQ_F                           0x00000000
-#define SQ_LT                          0x00000001
-#define SQ_EQ                          0x00000002
-#define SQ_LE                          0x00000003
-#define SQ_GT                          0x00000004
-#define SQ_NE                          0x00000005
-#define SQ_GE                          0x00000006
-#define SQ_T                           0x00000007
-
-/*
- * VALUE_SQ_SGPR value
- */
-
-#define SQ_SGPR0                       0x00000000
-
-/*
- * VALUE_SQ_CHAN value
- */
-
-#define SQ_CHAN_X                      0x00000000
-#define SQ_CHAN_Y                      0x00000001
-#define SQ_CHAN_Z                      0x00000002
-#define SQ_CHAN_W                      0x00000003
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_SDWA value
- */
-
-#define SQ_SRC_SDWA                    0x000000f9
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_LIT value
- */
-
-#define SQ_SRC_LITERAL                 0x000000ff
-
-/*
- * VALUE_SQ_DPP_BOUND_CTRL value
- */
-
-#define SQ_DPP_BOUND_OFF               0x00000000
-#define SQ_DPP_BOUND_ZERO              0x00000001
-
-/*
- * VALUE_SQ_GS_OP value
- */
-
-#define SQ_GS_OP_NOP                   0x00000000
-#define SQ_GS_OP_CUT                   0x00000001
-#define SQ_GS_OP_EMIT                  0x00000002
-#define SQ_GS_OP_EMIT_CUT              0x00000003
-
-/*
- * VALUE_SQ_OP_MTBUF value
- */
-
-#define SQ_TBUFFER_LOAD_FORMAT_X       0x00000000
-#define SQ_TBUFFER_LOAD_FORMAT_XY      0x00000001
-#define SQ_TBUFFER_LOAD_FORMAT_XYZ     0x00000002
-#define SQ_TBUFFER_LOAD_FORMAT_XYZW    0x00000003
-#define SQ_TBUFFER_STORE_FORMAT_X      0x00000004
-#define SQ_TBUFFER_STORE_FORMAT_XY     0x00000005
-#define SQ_TBUFFER_STORE_FORMAT_XYZ    0x00000006
-#define SQ_TBUFFER_STORE_FORMAT_XYZW   0x00000007
-#define SQ_TBUFFER_LOAD_FORMAT_D16_X   0x00000008
-#define SQ_TBUFFER_LOAD_FORMAT_D16_XY  0x00000009
-#define SQ_TBUFFER_LOAD_FORMAT_D16_XYZ 0x0000000a
-#define SQ_TBUFFER_LOAD_FORMAT_D16_XYZW 0x0000000b
-#define SQ_TBUFFER_STORE_FORMAT_D16_X  0x0000000c
-#define SQ_TBUFFER_STORE_FORMAT_D16_XY 0x0000000d
-#define SQ_TBUFFER_STORE_FORMAT_D16_XYZ 0x0000000e
-#define SQ_TBUFFER_STORE_FORMAT_D16_XYZW 0x0000000f
-
-/*
- * VALUE_SQ_SSRC_SPECIAL_EXECZ value
- */
-
-#define SQ_SRC_EXECZ                   0x000000fc
-
-/*
- * VALUE_SQ_OP_VOP3P value
- */
-
-#define SQ_V_PK_MAD_I16                0x00000000
-#define SQ_V_PK_MUL_LO_U16             0x00000001
-#define SQ_V_PK_ADD_I16                0x00000002
-#define SQ_V_PK_SUB_I16                0x00000003
-#define SQ_V_PK_LSHLREV_B16            0x00000004
-#define SQ_V_PK_LSHRREV_B16            0x00000005
-#define SQ_V_PK_ASHRREV_I16            0x00000006
-#define SQ_V_PK_MAX_I16                0x00000007
-#define SQ_V_PK_MIN_I16                0x00000008
-#define SQ_V_PK_MAD_U16                0x00000009
-#define SQ_V_PK_ADD_U16                0x0000000a
-#define SQ_V_PK_SUB_U16                0x0000000b
-#define SQ_V_PK_MAX_U16                0x0000000c
-#define SQ_V_PK_MIN_U16                0x0000000d
-#define SQ_V_PK_MAD_F16                0x0000000e
-#define SQ_V_PK_ADD_F16                0x0000000f
-#define SQ_V_PK_MUL_F16                0x00000010
-#define SQ_V_PK_MIN_F16                0x00000011
-#define SQ_V_PK_MAX_F16                0x00000012
-#define SQ_V_MAD_MIX_F32               0x00000020
-#define SQ_V_MAD_MIXLO_F16             0x00000021
-#define SQ_V_MAD_MIXHI_F16             0x00000022
-
-/*
- * VALUE_SQ_SYSMSG_OP value
- */
-
-#define SQ_SYSMSG_OP_ECC_ERR_INTERRUPT 0x00000001
-#define SQ_SYSMSG_OP_REG_RD            0x00000002
-#define SQ_SYSMSG_OP_HOST_TRAP_ACK     0x00000003
-#define SQ_SYSMSG_OP_TTRACE_PC         0x00000004
-#define SQ_SYSMSG_OP_ILLEGAL_INST_INTERRUPT 0x00000005
-#define SQ_SYSMSG_OP_MEMVIOL_INTERRUPT 0x00000006
-
-/*
- * VALUE_SQ_VCC value
- */
-
-#define SQ_VCC_ALL                     0x00000000
-
-/*
- * VALUE_SQ_OP_SMEM value
- */
-
-#define SQ_S_LOAD_DWORD                0x00000000
-#define SQ_S_LOAD_DWORDX2              0x00000001
-#define SQ_S_LOAD_DWORDX4              0x00000002
-#define SQ_S_LOAD_DWORDX8              0x00000003
-#define SQ_S_LOAD_DWORDX16             0x00000004
-#define SQ_S_SCRATCH_LOAD_DWORD        0x00000005
-#define SQ_S_SCRATCH_LOAD_DWORDX2      0x00000006
-#define SQ_S_SCRATCH_LOAD_DWORDX4      0x00000007
-#define SQ_S_BUFFER_LOAD_DWORD         0x00000008
-#define SQ_S_BUFFER_LOAD_DWORDX2       0x00000009
-#define SQ_S_BUFFER_LOAD_DWORDX4       0x0000000a
-#define SQ_S_BUFFER_LOAD_DWORDX8       0x0000000b
-#define SQ_S_BUFFER_LOAD_DWORDX16      0x0000000c
-#define SQ_S_STORE_DWORD               0x00000010
-#define SQ_S_STORE_DWORDX2             0x00000011
-#define SQ_S_STORE_DWORDX4             0x00000012
-#define SQ_S_SCRATCH_STORE_DWORD       0x00000015
-#define SQ_S_SCRATCH_STORE_DWORDX2     0x00000016
-#define SQ_S_SCRATCH_STORE_DWORDX4     0x00000017
-#define SQ_S_BUFFER_STORE_DWORD        0x00000018
-#define SQ_S_BUFFER_STORE_DWORDX2      0x00000019
-#define SQ_S_BUFFER_STORE_DWORDX4      0x0000001a
-#define SQ_S_DCACHE_INV                0x00000020
-#define SQ_S_DCACHE_WB                 0x00000021
-#define SQ_S_DCACHE_INV_VOL            0x00000022
-#define SQ_S_DCACHE_WB_VOL             0x00000023
-#define SQ_S_MEMTIME                   0x00000024
-#define SQ_S_MEMREALTIME               0x00000025
-#define SQ_S_ATC_PROBE                 0x00000026
-#define SQ_S_ATC_PROBE_BUFFER          0x00000027
-#define SQ_S_BUFFER_ATOMIC_SWAP        0x00000040
-#define SQ_S_BUFFER_ATOMIC_CMPSWAP     0x00000041
-#define SQ_S_BUFFER_ATOMIC_ADD         0x00000042
-#define SQ_S_BUFFER_ATOMIC_SUB         0x00000043
-#define SQ_S_BUFFER_ATOMIC_SMIN        0x00000044
-#define SQ_S_BUFFER_ATOMIC_UMIN        0x00000045
-#define SQ_S_BUFFER_ATOMIC_SMAX        0x00000046
-#define SQ_S_BUFFER_ATOMIC_UMAX        0x00000047
-#define SQ_S_BUFFER_ATOMIC_AND         0x00000048
-#define SQ_S_BUFFER_ATOMIC_OR          0x00000049
-#define SQ_S_BUFFER_ATOMIC_XOR         0x0000004a
-#define SQ_S_BUFFER_ATOMIC_INC         0x0000004b
-#define SQ_S_BUFFER_ATOMIC_DEC         0x0000004c
-#define SQ_S_BUFFER_ATOMIC_SWAP_X2     0x00000060
-#define SQ_S_BUFFER_ATOMIC_CMPSWAP_X2  0x00000061
-#define SQ_S_BUFFER_ATOMIC_ADD_X2      0x00000062
-#define SQ_S_BUFFER_ATOMIC_SUB_X2      0x00000063
-#define SQ_S_BUFFER_ATOMIC_SMIN_X2     0x00000064
-#define SQ_S_BUFFER_ATOMIC_UMIN_X2     0x00000065
-#define SQ_S_BUFFER_ATOMIC_SMAX_X2     0x00000066
-#define SQ_S_BUFFER_ATOMIC_UMAX_X2     0x00000067
-#define SQ_S_BUFFER_ATOMIC_AND_X2      0x00000068
-#define SQ_S_BUFFER_ATOMIC_OR_X2       0x00000069
-#define SQ_S_BUFFER_ATOMIC_XOR_X2      0x0000006a
-#define SQ_S_BUFFER_ATOMIC_INC_X2      0x0000006b
-#define SQ_S_BUFFER_ATOMIC_DEC_X2      0x0000006c
-#define SQ_S_ATOMIC_SWAP               0x00000080
-#define SQ_S_ATOMIC_CMPSWAP            0x00000081
-#define SQ_S_ATOMIC_ADD                0x00000082
-#define SQ_S_ATOMIC_SUB                0x00000083
-#define SQ_S_ATOMIC_SMIN               0x00000084
-#define SQ_S_ATOMIC_UMIN               0x00000085
-#define SQ_S_ATOMIC_SMAX               0x00000086
-#define SQ_S_ATOMIC_UMAX               0x00000087
-#define SQ_S_ATOMIC_AND                0x00000088
-#define SQ_S_ATOMIC_OR                 0x00000089
-#define SQ_S_ATOMIC_XOR                0x0000008a
-#define SQ_S_ATOMIC_INC                0x0000008b
-#define SQ_S_ATOMIC_DEC                0x0000008c
-#define SQ_S_ATOMIC_SWAP_X2            0x000000a0
-#define SQ_S_ATOMIC_CMPSWAP_X2         0x000000a1
-#define SQ_S_ATOMIC_ADD_X2             0x000000a2
-#define SQ_S_ATOMIC_SUB_X2             0x000000a3
-#define SQ_S_ATOMIC_SMIN_X2            0x000000a4
-#define SQ_S_ATOMIC_UMIN_X2            0x000000a5
-#define SQ_S_ATOMIC_SMAX_X2            0x000000a6
-#define SQ_S_ATOMIC_UMAX_X2            0x000000a7
-#define SQ_S_ATOMIC_AND_X2             0x000000a8
-#define SQ_S_ATOMIC_OR_X2              0x000000a9
-#define SQ_S_ATOMIC_XOR_X2             0x000000aa
-#define SQ_S_ATOMIC_INC_X2             0x000000ab
-#define SQ_S_ATOMIC_DEC_X2             0x000000ac
-
-/*
- * VALUE_SQ_OP_DS value
- */
-
-#define SQ_DS_ADD_U32                  0x00000000
-#define SQ_DS_SUB_U32                  0x00000001
-#define SQ_DS_RSUB_U32                 0x00000002
-#define SQ_DS_INC_U32                  0x00000003
-#define SQ_DS_DEC_U32                  0x00000004
-#define SQ_DS_MIN_I32                  0x00000005
-#define SQ_DS_MAX_I32                  0x00000006
-#define SQ_DS_MIN_U32                  0x00000007
-#define SQ_DS_MAX_U32                  0x00000008
-#define SQ_DS_AND_B32                  0x00000009
-#define SQ_DS_OR_B32                   0x0000000a
-#define SQ_DS_XOR_B32                  0x0000000b
-#define SQ_DS_MSKOR_B32                0x0000000c
-#define SQ_DS_WRITE_B32                0x0000000d
-#define SQ_DS_WRITE2_B32               0x0000000e
-#define SQ_DS_WRITE2ST64_B32           0x0000000f
-#define SQ_DS_CMPST_B32                0x00000010
-#define SQ_DS_CMPST_F32                0x00000011
-#define SQ_DS_MIN_F32                  0x00000012
-#define SQ_DS_MAX_F32                  0x00000013
-#define SQ_DS_NOP                      0x00000014
-#define SQ_DS_ADD_F32                  0x00000015
-#define SQ_DS_WRITE_ADDTID_B32         0x0000001d
-#define SQ_DS_WRITE_B8                 0x0000001e
-#define SQ_DS_WRITE_B16                0x0000001f
-#define SQ_DS_ADD_RTN_U32              0x00000020
-#define SQ_DS_SUB_RTN_U32              0x00000021
-#define SQ_DS_RSUB_RTN_U32             0x00000022
-#define SQ_DS_INC_RTN_U32              0x00000023
-#define SQ_DS_DEC_RTN_U32              0x00000024
-#define SQ_DS_MIN_RTN_I32              0x00000025
-#define SQ_DS_MAX_RTN_I32              0x00000026
-#define SQ_DS_MIN_RTN_U32              0x00000027
-#define SQ_DS_MAX_RTN_U32              0x00000028
-#define SQ_DS_AND_RTN_B32              0x00000029
-#define SQ_DS_OR_RTN_B32               0x0000002a
-#define SQ_DS_XOR_RTN_B32              0x0000002b
-#define SQ_DS_MSKOR_RTN_B32            0x0000002c
-#define SQ_DS_WRXCHG_RTN_B32           0x0000002d
-#define SQ_DS_WRXCHG2_RTN_B32          0x0000002e
-#define SQ_DS_WRXCHG2ST64_RTN_B32      0x0000002f
-#define SQ_DS_CMPST_RTN_B32            0x00000030
-#define SQ_DS_CMPST_RTN_F32            0x00000031
-#define SQ_DS_MIN_RTN_F32              0x00000032
-#define SQ_DS_MAX_RTN_F32              0x00000033
-#define SQ_DS_WRAP_RTN_B32             0x00000034
-#define SQ_DS_ADD_RTN_F32              0x00000035
-#define SQ_DS_READ_B32                 0x00000036
-#define SQ_DS_READ2_B32                0x00000037
-#define SQ_DS_READ2ST64_B32            0x00000038
-#define SQ_DS_READ_I8                  0x00000039
-#define SQ_DS_READ_U8                  0x0000003a
-#define SQ_DS_READ_I16                 0x0000003b
-#define SQ_DS_READ_U16                 0x0000003c
-#define SQ_DS_SWIZZLE_B32              0x0000003d
-#define SQ_DS_PERMUTE_B32              0x0000003e
-#define SQ_DS_BPERMUTE_B32             0x0000003f
-#define SQ_DS_ADD_U64                  0x00000040
-#define SQ_DS_SUB_U64                  0x00000041
-#define SQ_DS_RSUB_U64                 0x00000042
-#define SQ_DS_INC_U64                  0x00000043
-#define SQ_DS_DEC_U64                  0x00000044
-#define SQ_DS_MIN_I64                  0x00000045
-#define SQ_DS_MAX_I64                  0x00000046
-#define SQ_DS_MIN_U64                  0x00000047
-#define SQ_DS_MAX_U64                  0x00000048
-#define SQ_DS_AND_B64                  0x00000049
-#define SQ_DS_OR_B64                   0x0000004a
-#define SQ_DS_XOR_B64                  0x0000004b
-#define SQ_DS_MSKOR_B64                0x0000004c
-#define SQ_DS_WRITE_B64                0x0000004d
-#define SQ_DS_WRITE2_B64               0x0000004e
-#define SQ_DS_WRITE2ST64_B64           0x0000004f
-#define SQ_DS_CMPST_B64                0x00000050
-#define SQ_DS_CMPST_F64                0x00000051
-#define SQ_DS_MIN_F64                  0x00000052
-#define SQ_DS_MAX_F64                  0x00000053
-#define SQ_DS_ADD_RTN_U64              0x00000060
-#define SQ_DS_SUB_RTN_U64              0x00000061
-#define SQ_DS_RSUB_RTN_U64             0x00000062
-#define SQ_DS_INC_RTN_U64              0x00000063
-#define SQ_DS_DEC_RTN_U64              0x00000064
-#define SQ_DS_MIN_RTN_I64              0x00000065
-#define SQ_DS_MAX_RTN_I64              0x00000066
-#define SQ_DS_MIN_RTN_U64              0x00000067
-#define SQ_DS_MAX_RTN_U64              0x00000068
-#define SQ_DS_AND_RTN_B64              0x00000069
-#define SQ_DS_OR_RTN_B64               0x0000006a
-#define SQ_DS_XOR_RTN_B64              0x0000006b
-#define SQ_DS_MSKOR_RTN_B64            0x0000006c
-#define SQ_DS_WRXCHG_RTN_B64           0x0000006d
-#define SQ_DS_WRXCHG2_RTN_B64          0x0000006e
-#define SQ_DS_WRXCHG2ST64_RTN_B64      0x0000006f
-#define SQ_DS_CMPST_RTN_B64            0x00000070
-#define SQ_DS_CMPST_RTN_F64            0x00000071
-#define SQ_DS_MIN_RTN_F64              0x00000072
-#define SQ_DS_MAX_RTN_F64              0x00000073
-#define SQ_DS_READ_B64                 0x00000076
-#define SQ_DS_READ2_B64                0x00000077
-#define SQ_DS_READ2ST64_B64            0x00000078
-#define SQ_DS_CONDXCHG32_RTN_B64       0x0000007e
-#define SQ_DS_ADD_SRC2_U32             0x00000080
-#define SQ_DS_SUB_SRC2_U32             0x00000081
-#define SQ_DS_RSUB_SRC2_U32            0x00000082
-#define SQ_DS_INC_SRC2_U32             0x00000083
-#define SQ_DS_DEC_SRC2_U32             0x00000084
-#define SQ_DS_MIN_SRC2_I32             0x00000085
-#define SQ_DS_MAX_SRC2_I32             0x00000086
-#define SQ_DS_MIN_SRC2_U32             0x00000087
-#define SQ_DS_MAX_SRC2_U32             0x00000088
-#define SQ_DS_AND_SRC2_B32             0x00000089
-#define SQ_DS_OR_SRC2_B32              0x0000008a
-#define SQ_DS_XOR_SRC2_B32             0x0000008b
-#define SQ_DS_WRITE_SRC2_B32           0x0000008d
-#define SQ_DS_MIN_SRC2_F32             0x00000092
-#define SQ_DS_MAX_SRC2_F32             0x00000093
-#define SQ_DS_ADD_SRC2_F32             0x00000095
-#define SQ_DS_GWS_SEMA_RELEASE_ALL     0x00000098
-#define SQ_DS_GWS_INIT                 0x00000099
-#define SQ_DS_GWS_SEMA_V               0x0000009a
-#define SQ_DS_GWS_SEMA_BR              0x0000009b
-#define SQ_DS_GWS_SEMA_P               0x0000009c
-#define SQ_DS_GWS_BARRIER              0x0000009d
-#define SQ_DS_READ_ADDTID_B32          0x000000b6
-#define SQ_DS_CONSUME                  0x000000bd
-#define SQ_DS_APPEND                   0x000000be
-#define SQ_DS_ORDERED_COUNT            0x000000bf
-#define SQ_DS_ADD_SRC2_U64             0x000000c0
-#define SQ_DS_SUB_SRC2_U64             0x000000c1
-#define SQ_DS_RSUB_SRC2_U64            0x000000c2
-#define SQ_DS_INC_SRC2_U64             0x000000c3
-#define SQ_DS_DEC_SRC2_U64             0x000000c4
-#define SQ_DS_MIN_SRC2_I64             0x000000c5
-#define SQ_DS_MAX_SRC2_I64             0x000000c6
-#define SQ_DS_MIN_SRC2_U64             0x000000c7
-#define SQ_DS_MAX_SRC2_U64             0x000000c8
-#define SQ_DS_AND_SRC2_B64             0x000000c9
-#define SQ_DS_OR_SRC2_B64              0x000000ca
-#define SQ_DS_XOR_SRC2_B64             0x000000cb
-#define SQ_DS_WRITE_SRC2_B64           0x000000cd
-#define SQ_DS_MIN_SRC2_F64             0x000000d2
-#define SQ_DS_MAX_SRC2_F64             0x000000d3
-#define SQ_DS_WRITE_B96                0x000000de
-#define SQ_DS_WRITE_B128               0x000000df
-#define SQ_DS_CONDXCHG32_RTN_B128      0x000000fd
-#define SQ_DS_READ_B96                 0x000000fe
-#define SQ_DS_READ_B128                0x000000ff
-
-/*
- * VALUE_SQ_SDWA_SEL value
- */
-
-#define SQ_SDWA_BYTE_0                 0x00000000
-#define SQ_SDWA_BYTE_1                 0x00000001
-#define SQ_SDWA_BYTE_2                 0x00000002
-#define SQ_SDWA_BYTE_3                 0x00000003
-#define SQ_SDWA_WORD_0                 0x00000004
-#define SQ_SDWA_WORD_1                 0x00000005
-#define SQ_SDWA_DWORD                  0x00000006
-
-/*
- * VALUE_SQ_OP_VOP2 value
- */
-
-#define SQ_V_CNDMASK_B32               0x00000000
-#define SQ_V_ADD_F32                   0x00000001
-#define SQ_V_SUB_F32                   0x00000002
-#define SQ_V_SUBREV_F32                0x00000003
-#define SQ_V_MUL_LEGACY_F32            0x00000004
-#define SQ_V_MUL_F32                   0x00000005
-#define SQ_V_MUL_I32_I24               0x00000006
-#define SQ_V_MUL_HI_I32_I24            0x00000007
-#define SQ_V_MUL_U32_U24               0x00000008
-#define SQ_V_MUL_HI_U32_U24            0x00000009
-#define SQ_V_MIN_F32                   0x0000000a
-#define SQ_V_MAX_F32                   0x0000000b
-#define SQ_V_MIN_I32                   0x0000000c
-#define SQ_V_MAX_I32                   0x0000000d
-#define SQ_V_MIN_U32                   0x0000000e
-#define SQ_V_MAX_U32                   0x0000000f
-#define SQ_V_LSHRREV_B32               0x00000010
-#define SQ_V_ASHRREV_I32               0x00000011
-#define SQ_V_LSHLREV_B32               0x00000012
-#define SQ_V_AND_B32                   0x00000013
-#define SQ_V_OR_B32                    0x00000014
-#define SQ_V_XOR_B32                   0x00000015
-#define SQ_V_MAC_F32                   0x00000016
-#define SQ_V_MADMK_F32                 0x00000017
-#define SQ_V_MADAK_F32                 0x00000018
-#define SQ_V_ADD_CO_U32                0x00000019
-#define SQ_V_SUB_CO_U32                0x0000001a
-#define SQ_V_SUBREV_CO_U32             0x0000001b
-#define SQ_V_ADDC_CO_U32               0x0000001c
-#define SQ_V_SUBB_CO_U32               0x0000001d
-#define SQ_V_SUBBREV_CO_U32            0x0000001e
-#define SQ_V_ADD_F16                   0x0000001f
-#define SQ_V_SUB_F16                   0x00000020
-#define SQ_V_SUBREV_F16                0x00000021
-#define SQ_V_MUL_F16                   0x00000022
-#define SQ_V_MAC_F16                   0x00000023
-#define SQ_V_MADMK_F16                 0x00000024
-#define SQ_V_MADAK_F16                 0x00000025
-#define SQ_V_ADD_U16                   0x00000026
-#define SQ_V_SUB_U16                   0x00000027
-#define SQ_V_SUBREV_U16                0x00000028
-#define SQ_V_MUL_LO_U16                0x00000029
-#define SQ_V_LSHLREV_B16               0x0000002a
-#define SQ_V_LSHRREV_B16               0x0000002b
-#define SQ_V_ASHRREV_I16               0x0000002c
-#define SQ_V_MAX_F16                   0x0000002d
-#define SQ_V_MIN_F16                   0x0000002e
-#define SQ_V_MAX_U16                   0x0000002f
-#define SQ_V_MAX_I16                   0x00000030
-#define SQ_V_MIN_U16                   0x00000031
-#define SQ_V_MIN_I16                   0x00000032
-#define SQ_V_LDEXP_F16                 0x00000033
-#define SQ_V_ADD_U32                   0x00000034
-#define SQ_V_SUB_U32                   0x00000035
-#define SQ_V_SUBREV_U32                0x00000036
-
-/*
- * VALUE_SQ_SRC_VGPR value
- */
-
-#define SQ_SRC_VGPR0                   0x00000100
-
-/*
- * VALUE_SQ_OP_SOPP value
- */
-
-#define SQ_S_NOP                       0x00000000
-#define SQ_S_ENDPGM                    0x00000001
-#define SQ_S_BRANCH                    0x00000002
-#define SQ_S_WAKEUP                    0x00000003
-#define SQ_S_CBRANCH_SCC0              0x00000004
-#define SQ_S_CBRANCH_SCC1              0x00000005
-#define SQ_S_CBRANCH_VCCZ              0x00000006
-#define SQ_S_CBRANCH_VCCNZ             0x00000007
-#define SQ_S_CBRANCH_EXECZ             0x00000008
-#define SQ_S_CBRANCH_EXECNZ            0x00000009
-#define SQ_S_BARRIER                   0x0000000a
-#define SQ_S_SETKILL                   0x0000000b
-#define SQ_S_WAITCNT                   0x0000000c
-#define SQ_S_SETHALT                   0x0000000d
-#define SQ_S_SLEEP                     0x0000000e
-#define SQ_S_SETPRIO                   0x0000000f
-#define SQ_S_SENDMSG                   0x00000010
-#define SQ_S_SENDMSGHALT               0x00000011
-#define SQ_S_TRAP                      0x00000012
-#define SQ_S_ICACHE_INV                0x00000013
-#define SQ_S_INCPERFLEVEL              0x00000014
-#define SQ_S_DECPERFLEVEL              0x00000015
-#define SQ_S_TTRACEDATA                0x00000016
-#define SQ_S_CBRANCH_CDBGSYS           0x00000017
-#define SQ_S_CBRANCH_CDBGUSER          0x00000018
-#define SQ_S_CBRANCH_CDBGSYS_OR_USER   0x00000019
-#define SQ_S_CBRANCH_CDBGSYS_AND_USER  0x0000001a
-#define SQ_S_ENDPGM_SAVED              0x0000001b
-#define SQ_S_SET_GPR_IDX_OFF           0x0000001c
-#define SQ_S_SET_GPR_IDX_MODE          0x0000001d
-#define SQ_S_ENDPGM_ORDERED_PS_DONE    0x0000001e
-
-/*
- * VALUE_SQ_XNACK_MASK_LOHI value
- */
-
-#define SQ_XNACK_MASK_LO               0x00000068
-#define SQ_XNACK_MASK_HI               0x00000069
-
-/*
- * VALUE_SQ_SDWA_UNUSED value
- */
-
-#define SQ_SDWA_UNUSED_PAD             0x00000000
-#define SQ_SDWA_UNUSED_SEXT            0x00000001
-#define SQ_SDWA_UNUSED_PRESERVE        0x00000002
-
-/*
- * VALUE_SQ_OP_FLAT value
- */
-
-#define SQ_FLAT_LOAD_UBYTE             0x00000010
-#define SQ_FLAT_LOAD_SBYTE             0x00000011
-#define SQ_FLAT_LOAD_USHORT            0x00000012
-#define SQ_FLAT_LOAD_SSHORT            0x00000013
-#define SQ_FLAT_LOAD_DWORD             0x00000014
-#define SQ_FLAT_LOAD_DWORDX2           0x00000015
-#define SQ_FLAT_LOAD_DWORDX3           0x00000016
-#define SQ_FLAT_LOAD_DWORDX4           0x00000017
-#define SQ_FLAT_STORE_BYTE             0x00000018
-#define SQ_FLAT_STORE_SHORT            0x0000001a
-#define SQ_FLAT_STORE_DWORD            0x0000001c
-#define SQ_FLAT_STORE_DWORDX2          0x0000001d
-#define SQ_FLAT_STORE_DWORDX3          0x0000001e
-#define SQ_FLAT_STORE_DWORDX4          0x0000001f
-#define SQ_FLAT_ATOMIC_SWAP            0x00000040
-#define SQ_FLAT_ATOMIC_CMPSWAP         0x00000041
-#define SQ_FLAT_ATOMIC_ADD             0x00000042
-#define SQ_FLAT_ATOMIC_SUB             0x00000043
-#define SQ_FLAT_ATOMIC_SMIN            0x00000044
-#define SQ_FLAT_ATOMIC_UMIN            0x00000045
-#define SQ_FLAT_ATOMIC_SMAX            0x00000046
-#define SQ_FLAT_ATOMIC_UMAX            0x00000047
-#define SQ_FLAT_ATOMIC_AND             0x00000048
-#define SQ_FLAT_ATOMIC_OR              0x00000049
-#define SQ_FLAT_ATOMIC_XOR             0x0000004a
-#define SQ_FLAT_ATOMIC_INC             0x0000004b
-#define SQ_FLAT_ATOMIC_DEC             0x0000004c
-#define SQ_FLAT_ATOMIC_SWAP_X2         0x00000060
-#define SQ_FLAT_ATOMIC_CMPSWAP_X2      0x00000061
-#define SQ_FLAT_ATOMIC_ADD_X2          0x00000062
-#define SQ_FLAT_ATOMIC_SUB_X2          0x00000063
-#define SQ_FLAT_ATOMIC_SMIN_X2         0x00000064
-#define SQ_FLAT_ATOMIC_UMIN_X2         0x00000065
-#define SQ_FLAT_ATOMIC_SMAX_X2         0x00000066
-#define SQ_FLAT_ATOMIC_UMAX_X2         0x00000067
-#define SQ_FLAT_ATOMIC_AND_X2          0x00000068
-#define SQ_FLAT_ATOMIC_OR_X2           0x00000069
-#define SQ_FLAT_ATOMIC_XOR_X2          0x0000006a
-#define SQ_FLAT_ATOMIC_INC_X2          0x0000006b
-#define SQ_FLAT_ATOMIC_DEC_X2          0x0000006c
-
-/*
- * VALUE_SQ_OP_SOPC value
- */
-
-#define SQ_S_CMP_EQ_I32                0x00000000
-#define SQ_S_CMP_LG_I32                0x00000001
-#define SQ_S_CMP_GT_I32                0x00000002
-#define SQ_S_CMP_GE_I32                0x00000003
-#define SQ_S_CMP_LT_I32                0x00000004
-#define SQ_S_CMP_LE_I32                0x00000005
-#define SQ_S_CMP_EQ_U32                0x00000006
-#define SQ_S_CMP_LG_U32                0x00000007
-#define SQ_S_CMP_GT_U32                0x00000008
-#define SQ_S_CMP_GE_U32                0x00000009
-#define SQ_S_CMP_LT_U32                0x0000000a
-#define SQ_S_CMP_LE_U32                0x0000000b
-#define SQ_S_BITCMP0_B32               0x0000000c
-#define SQ_S_BITCMP1_B32               0x0000000d
-#define SQ_S_BITCMP0_B64               0x0000000e
-#define SQ_S_BITCMP1_B64               0x0000000f
-#define SQ_S_SETVSKIP                  0x00000010
-#define SQ_S_SET_GPR_IDX_ON            0x00000011
-#define SQ_S_CMP_EQ_U64                0x00000012
-#define SQ_S_CMP_LG_U64                0x00000013
-
-/*
- * VALUE_SQ_PARAM value
- */
-
-#define SQ_PARAM_P10                   0x00000000
-#define SQ_PARAM_P20                   0x00000001
-#define SQ_PARAM_P0                    0x00000002
-
-/*
- * VALUE_SQ_OP_FLAT_SCRATCH value
- */
-
-#define SQ_SCRATCH_LOAD_UBYTE          0x00000010
-#define SQ_SCRATCH_LOAD_SBYTE          0x00000011
-#define SQ_SCRATCH_LOAD_USHORT         0x00000012
-#define SQ_SCRATCH_LOAD_SSHORT         0x00000013
-#define SQ_SCRATCH_LOAD_DWORD          0x00000014
-#define SQ_SCRATCH_LOAD_DWORDX2        0x00000015
-#define SQ_SCRATCH_LOAD_DWORDX3        0x00000016
-#define SQ_SCRATCH_LOAD_DWORDX4        0x00000017
-#define SQ_SCRATCH_STORE_BYTE          0x00000018
-#define SQ_SCRATCH_STORE_SHORT         0x0000001a
-#define SQ_SCRATCH_STORE_DWORD         0x0000001c
-#define SQ_SCRATCH_STORE_DWORDX2       0x0000001d
-#define SQ_SCRATCH_STORE_DWORDX3       0x0000001e
-#define SQ_SCRATCH_STORE_DWORDX4       0x0000001f
-
-/*
- * VALUE_SQ_SEG value
- */
-
-#define SQ_FLAT                        0x00000000
-#define SQ_SCRATCH                     0x00000001
-#define SQ_GLOBAL                      0x00000002
-
-/*
- * VALUE_SQ_SSRC_0_63_INLINES value
- */
-
-#define SQ_SRC_0                       0x00000080
-#define SQ_SRC_1_INT                   0x00000081
-#define SQ_SRC_2_INT                   0x00000082
-#define SQ_SRC_3_INT                   0x00000083
-#define SQ_SRC_4_INT                   0x00000084
-#define SQ_SRC_5_INT                   0x00000085
-#define SQ_SRC_6_INT                   0x00000086
-#define SQ_SRC_7_INT                   0x00000087
-#define SQ_SRC_8_INT                   0x00000088
-#define SQ_SRC_9_INT                   0x00000089
-#define SQ_SRC_10_INT                  0x0000008a
-#define SQ_SRC_11_INT                  0x0000008b
-#define SQ_SRC_12_INT                  0x0000008c
-#define SQ_SRC_13_INT                  0x0000008d
-#define SQ_SRC_14_INT                  0x0000008e
-#define SQ_SRC_15_INT                  0x0000008f
-#define SQ_SRC_16_INT                  0x00000090
-#define SQ_SRC_17_INT                  0x00000091
-#define SQ_SRC_18_INT                  0x00000092
-#define SQ_SRC_19_INT                  0x00000093
-#define SQ_SRC_20_INT                  0x00000094
-#define SQ_SRC_21_INT                  0x00000095
-#define SQ_SRC_22_INT                  0x00000096
-#define SQ_SRC_23_INT                  0x00000097
-#define SQ_SRC_24_INT                  0x00000098
-#define SQ_SRC_25_INT                  0x00000099
-#define SQ_SRC_26_INT                  0x0000009a
-#define SQ_SRC_27_INT                  0x0000009b
-#define SQ_SRC_28_INT                  0x0000009c
-#define SQ_SRC_29_INT                  0x0000009d
-#define SQ_SRC_30_INT                  0x0000009e
-#define SQ_SRC_31_INT                  0x0000009f
-#define SQ_SRC_32_INT                  0x000000a0
-#define SQ_SRC_33_INT                  0x000000a1
-#define SQ_SRC_34_INT                  0x000000a2
-#define SQ_SRC_35_INT                  0x000000a3
-#define SQ_SRC_36_INT                  0x000000a4
-#define SQ_SRC_37_INT                  0x000000a5
-#define SQ_SRC_38_INT                  0x000000a6
-#define SQ_SRC_39_INT                  0x000000a7
-#define SQ_SRC_40_INT                  0x000000a8
-#define SQ_SRC_41_INT                  0x000000a9
-#define SQ_SRC_42_INT                  0x000000aa
-#define SQ_SRC_43_INT                  0x000000ab
-#define SQ_SRC_44_INT                  0x000000ac
-#define SQ_SRC_45_INT                  0x000000ad
-#define SQ_SRC_46_INT                  0x000000ae
-#define SQ_SRC_47_INT                  0x000000af
-#define SQ_SRC_48_INT                  0x000000b0
-#define SQ_SRC_49_INT                  0x000000b1
-#define SQ_SRC_50_INT                  0x000000b2
-#define SQ_SRC_51_INT                  0x000000b3
-#define SQ_SRC_52_INT                  0x000000b4
-#define SQ_SRC_53_INT                  0x000000b5
-#define SQ_SRC_54_INT                  0x000000b6
-#define SQ_SRC_55_INT                  0x000000b7
-#define SQ_SRC_56_INT                  0x000000b8
-#define SQ_SRC_57_INT                  0x000000b9
-#define SQ_SRC_58_INT                  0x000000ba
-#define SQ_SRC_59_INT                  0x000000bb
-#define SQ_SRC_60_INT                  0x000000bc
-#define SQ_SRC_61_INT                  0x000000bd
-#define SQ_SRC_62_INT                  0x000000be
-#define SQ_SRC_63_INT                  0x000000bf
-
-/*
- * VALUE_SQ_CNT value
- */
-
-#define SQ_CNT1                        0x00000000
-#define SQ_CNT2                        0x00000001
-#define SQ_CNT3                        0x00000002
-#define SQ_CNT4                        0x00000003
-
-/*******************************************************
- * DIDT Enums
- *******************************************************/
-
-/*******************************************************
- * SX Enums
- *******************************************************/
-
-/*
- * SX_BLEND_OPT enum
- */
-
-typedef enum SX_BLEND_OPT {
-BLEND_OPT_PRESERVE_NONE_IGNORE_ALL       = 0x00000000,
-BLEND_OPT_PRESERVE_ALL_IGNORE_NONE       = 0x00000001,
-BLEND_OPT_PRESERVE_C1_IGNORE_C0          = 0x00000002,
-BLEND_OPT_PRESERVE_C0_IGNORE_C1          = 0x00000003,
-BLEND_OPT_PRESERVE_A1_IGNORE_A0          = 0x00000004,
-BLEND_OPT_PRESERVE_A0_IGNORE_A1          = 0x00000005,
-BLEND_OPT_PRESERVE_NONE_IGNORE_A0        = 0x00000006,
-BLEND_OPT_PRESERVE_NONE_IGNORE_NONE      = 0x00000007,
-} SX_BLEND_OPT;
-
-/*
- * SX_OPT_COMB_FCN enum
- */
-
-typedef enum SX_OPT_COMB_FCN {
-OPT_COMB_NONE                            = 0x00000000,
-OPT_COMB_ADD                             = 0x00000001,
-OPT_COMB_SUBTRACT                        = 0x00000002,
-OPT_COMB_MIN                             = 0x00000003,
-OPT_COMB_MAX                             = 0x00000004,
-OPT_COMB_REVSUBTRACT                     = 0x00000005,
-OPT_COMB_BLEND_DISABLED                  = 0x00000006,
-OPT_COMB_SAFE_ADD                        = 0x00000007,
-} SX_OPT_COMB_FCN;
-
-/*
- * SX_DOWNCONVERT_FORMAT enum
- */
-
-typedef enum SX_DOWNCONVERT_FORMAT {
-SX_RT_EXPORT_NO_CONVERSION               = 0x00000000,
-SX_RT_EXPORT_32_R                        = 0x00000001,
-SX_RT_EXPORT_32_A                        = 0x00000002,
-SX_RT_EXPORT_10_11_11                    = 0x00000003,
-SX_RT_EXPORT_2_10_10_10                  = 0x00000004,
-SX_RT_EXPORT_8_8_8_8                     = 0x00000005,
-SX_RT_EXPORT_5_6_5                       = 0x00000006,
-SX_RT_EXPORT_1_5_5_5                     = 0x00000007,
-SX_RT_EXPORT_4_4_4_4                     = 0x00000008,
-SX_RT_EXPORT_16_16_GR                    = 0x00000009,
-SX_RT_EXPORT_16_16_AR                    = 0x0000000a,
-} SX_DOWNCONVERT_FORMAT;
-
-/*
- * SX_PERFCOUNTER_VALS enum
- */
-
-typedef enum SX_PERFCOUNTER_VALS {
-SX_PERF_SEL_PA_IDLE_CYCLES               = 0x00000000,
-SX_PERF_SEL_PA_REQ                       = 0x00000001,
-SX_PERF_SEL_PA_POS                       = 0x00000002,
-SX_PERF_SEL_CLOCK                        = 0x00000003,
-SX_PERF_SEL_GATE_EN1                     = 0x00000004,
-SX_PERF_SEL_GATE_EN2                     = 0x00000005,
-SX_PERF_SEL_GATE_EN3                     = 0x00000006,
-SX_PERF_SEL_GATE_EN4                     = 0x00000007,
-SX_PERF_SEL_SH_POS_STARVE                = 0x00000008,
-SX_PERF_SEL_SH_COLOR_STARVE              = 0x00000009,
-SX_PERF_SEL_SH_POS_STALL                 = 0x0000000a,
-SX_PERF_SEL_SH_COLOR_STALL               = 0x0000000b,
-SX_PERF_SEL_DB0_PIXELS                   = 0x0000000c,
-SX_PERF_SEL_DB0_HALF_QUADS               = 0x0000000d,
-SX_PERF_SEL_DB0_PIXEL_STALL              = 0x0000000e,
-SX_PERF_SEL_DB0_PIXEL_IDLE               = 0x0000000f,
-SX_PERF_SEL_DB0_PRED_PIXELS              = 0x00000010,
-SX_PERF_SEL_DB1_PIXELS                   = 0x00000011,
-SX_PERF_SEL_DB1_HALF_QUADS               = 0x00000012,
-SX_PERF_SEL_DB1_PIXEL_STALL              = 0x00000013,
-SX_PERF_SEL_DB1_PIXEL_IDLE               = 0x00000014,
-SX_PERF_SEL_DB1_PRED_PIXELS              = 0x00000015,
-SX_PERF_SEL_DB2_PIXELS                   = 0x00000016,
-SX_PERF_SEL_DB2_HALF_QUADS               = 0x00000017,
-SX_PERF_SEL_DB2_PIXEL_STALL              = 0x00000018,
-SX_PERF_SEL_DB2_PIXEL_IDLE               = 0x00000019,
-SX_PERF_SEL_DB2_PRED_PIXELS              = 0x0000001a,
-SX_PERF_SEL_DB3_PIXELS                   = 0x0000001b,
-SX_PERF_SEL_DB3_HALF_QUADS               = 0x0000001c,
-SX_PERF_SEL_DB3_PIXEL_STALL              = 0x0000001d,
-SX_PERF_SEL_DB3_PIXEL_IDLE               = 0x0000001e,
-SX_PERF_SEL_DB3_PRED_PIXELS              = 0x0000001f,
-SX_PERF_SEL_COL_BUSY                     = 0x00000020,
-SX_PERF_SEL_POS_BUSY                     = 0x00000021,
-SX_PERF_SEL_DB0_A2M_DISCARD_QUADS        = 0x00000022,
-SX_PERF_SEL_DB0_MRT0_BLEND_BYPASS        = 0x00000023,
-SX_PERF_SEL_DB0_MRT0_DONT_RD_DEST        = 0x00000024,
-SX_PERF_SEL_DB0_MRT0_DISCARD_SRC         = 0x00000025,
-SX_PERF_SEL_DB0_MRT0_SINGLE_QUADS        = 0x00000026,
-SX_PERF_SEL_DB0_MRT0_DOUBLE_QUADS        = 0x00000027,
-SX_PERF_SEL_DB0_MRT1_BLEND_BYPASS        = 0x00000028,
-SX_PERF_SEL_DB0_MRT1_DONT_RD_DEST        = 0x00000029,
-SX_PERF_SEL_DB0_MRT1_DISCARD_SRC         = 0x0000002a,
-SX_PERF_SEL_DB0_MRT1_SINGLE_QUADS        = 0x0000002b,
-SX_PERF_SEL_DB0_MRT1_DOUBLE_QUADS        = 0x0000002c,
-SX_PERF_SEL_DB0_MRT2_BLEND_BYPASS        = 0x0000002d,
-SX_PERF_SEL_DB0_MRT2_DONT_RD_DEST        = 0x0000002e,
-SX_PERF_SEL_DB0_MRT2_DISCARD_SRC         = 0x0000002f,
-SX_PERF_SEL_DB0_MRT2_SINGLE_QUADS        = 0x00000030,
-SX_PERF_SEL_DB0_MRT2_DOUBLE_QUADS        = 0x00000031,
-SX_PERF_SEL_DB0_MRT3_BLEND_BYPASS        = 0x00000032,
-SX_PERF_SEL_DB0_MRT3_DONT_RD_DEST        = 0x00000033,
-SX_PERF_SEL_DB0_MRT3_DISCARD_SRC         = 0x00000034,
-SX_PERF_SEL_DB0_MRT3_SINGLE_QUADS        = 0x00000035,
-SX_PERF_SEL_DB0_MRT3_DOUBLE_QUADS        = 0x00000036,
-SX_PERF_SEL_DB0_MRT4_BLEND_BYPASS        = 0x00000037,
-SX_PERF_SEL_DB0_MRT4_DONT_RD_DEST        = 0x00000038,
-SX_PERF_SEL_DB0_MRT4_DISCARD_SRC         = 0x00000039,
-SX_PERF_SEL_DB0_MRT4_SINGLE_QUADS        = 0x0000003a,
-SX_PERF_SEL_DB0_MRT4_DOUBLE_QUADS        = 0x0000003b,
-SX_PERF_SEL_DB0_MRT5_BLEND_BYPASS        = 0x0000003c,
-SX_PERF_SEL_DB0_MRT5_DONT_RD_DEST        = 0x0000003d,
-SX_PERF_SEL_DB0_MRT5_DISCARD_SRC         = 0x0000003e,
-SX_PERF_SEL_DB0_MRT5_SINGLE_QUADS        = 0x0000003f,
-SX_PERF_SEL_DB0_MRT5_DOUBLE_QUADS        = 0x00000040,
-SX_PERF_SEL_DB0_MRT6_BLEND_BYPASS        = 0x00000041,
-SX_PERF_SEL_DB0_MRT6_DONT_RD_DEST        = 0x00000042,
-SX_PERF_SEL_DB0_MRT6_DISCARD_SRC         = 0x00000043,
-SX_PERF_SEL_DB0_MRT6_SINGLE_QUADS        = 0x00000044,
-SX_PERF_SEL_DB0_MRT6_DOUBLE_QUADS        = 0x00000045,
-SX_PERF_SEL_DB0_MRT7_BLEND_BYPASS        = 0x00000046,
-SX_PERF_SEL_DB0_MRT7_DONT_RD_DEST        = 0x00000047,
-SX_PERF_SEL_DB0_MRT7_DISCARD_SRC         = 0x00000048,
-SX_PERF_SEL_DB0_MRT7_SINGLE_QUADS        = 0x00000049,
-SX_PERF_SEL_DB0_MRT7_DOUBLE_QUADS        = 0x0000004a,
-SX_PERF_SEL_DB1_A2M_DISCARD_QUADS        = 0x0000004b,
-SX_PERF_SEL_DB1_MRT0_BLEND_BYPASS        = 0x0000004c,
-SX_PERF_SEL_DB1_MRT0_DONT_RD_DEST        = 0x0000004d,
-SX_PERF_SEL_DB1_MRT0_DISCARD_SRC         = 0x0000004e,
-SX_PERF_SEL_DB1_MRT0_SINGLE_QUADS        = 0x0000004f,
-SX_PERF_SEL_DB1_MRT0_DOUBLE_QUADS        = 0x00000050,
-SX_PERF_SEL_DB1_MRT1_BLEND_BYPASS        = 0x00000051,
-SX_PERF_SEL_DB1_MRT1_DONT_RD_DEST        = 0x00000052,
-SX_PERF_SEL_DB1_MRT1_DISCARD_SRC         = 0x00000053,
-SX_PERF_SEL_DB1_MRT1_SINGLE_QUADS        = 0x00000054,
-SX_PERF_SEL_DB1_MRT1_DOUBLE_QUADS        = 0x00000055,
-SX_PERF_SEL_DB1_MRT2_BLEND_BYPASS        = 0x00000056,
-SX_PERF_SEL_DB1_MRT2_DONT_RD_DEST        = 0x00000057,
-SX_PERF_SEL_DB1_MRT2_DISCARD_SRC         = 0x00000058,
-SX_PERF_SEL_DB1_MRT2_SINGLE_QUADS        = 0x00000059,
-SX_PERF_SEL_DB1_MRT2_DOUBLE_QUADS        = 0x0000005a,
-SX_PERF_SEL_DB1_MRT3_BLEND_BYPASS        = 0x0000005b,
-SX_PERF_SEL_DB1_MRT3_DONT_RD_DEST        = 0x0000005c,
-SX_PERF_SEL_DB1_MRT3_DISCARD_SRC         = 0x0000005d,
-SX_PERF_SEL_DB1_MRT3_SINGLE_QUADS        = 0x0000005e,
-SX_PERF_SEL_DB1_MRT3_DOUBLE_QUADS        = 0x0000005f,
-SX_PERF_SEL_DB1_MRT4_BLEND_BYPASS        = 0x00000060,
-SX_PERF_SEL_DB1_MRT4_DONT_RD_DEST        = 0x00000061,
-SX_PERF_SEL_DB1_MRT4_DISCARD_SRC         = 0x00000062,
-SX_PERF_SEL_DB1_MRT4_SINGLE_QUADS        = 0x00000063,
-SX_PERF_SEL_DB1_MRT4_DOUBLE_QUADS        = 0x00000064,
-SX_PERF_SEL_DB1_MRT5_BLEND_BYPASS        = 0x00000065,
-SX_PERF_SEL_DB1_MRT5_DONT_RD_DEST        = 0x00000066,
-SX_PERF_SEL_DB1_MRT5_DISCARD_SRC         = 0x00000067,
-SX_PERF_SEL_DB1_MRT5_SINGLE_QUADS        = 0x00000068,
-SX_PERF_SEL_DB1_MRT5_DOUBLE_QUADS        = 0x00000069,
-SX_PERF_SEL_DB1_MRT6_BLEND_BYPASS        = 0x0000006a,
-SX_PERF_SEL_DB1_MRT6_DONT_RD_DEST        = 0x0000006b,
-SX_PERF_SEL_DB1_MRT6_DISCARD_SRC         = 0x0000006c,
-SX_PERF_SEL_DB1_MRT6_SINGLE_QUADS        = 0x0000006d,
-SX_PERF_SEL_DB1_MRT6_DOUBLE_QUADS        = 0x0000006e,
-SX_PERF_SEL_DB1_MRT7_BLEND_BYPASS        = 0x0000006f,
-SX_PERF_SEL_DB1_MRT7_DONT_RD_DEST        = 0x00000070,
-SX_PERF_SEL_DB1_MRT7_DISCARD_SRC         = 0x00000071,
-SX_PERF_SEL_DB1_MRT7_SINGLE_QUADS        = 0x00000072,
-SX_PERF_SEL_DB1_MRT7_DOUBLE_QUADS        = 0x00000073,
-SX_PERF_SEL_DB2_A2M_DISCARD_QUADS        = 0x00000074,
-SX_PERF_SEL_DB2_MRT0_BLEND_BYPASS        = 0x00000075,
-SX_PERF_SEL_DB2_MRT0_DONT_RD_DEST        = 0x00000076,
-SX_PERF_SEL_DB2_MRT0_DISCARD_SRC         = 0x00000077,
-SX_PERF_SEL_DB2_MRT0_SINGLE_QUADS        = 0x00000078,
-SX_PERF_SEL_DB2_MRT0_DOUBLE_QUADS        = 0x00000079,
-SX_PERF_SEL_DB2_MRT1_BLEND_BYPASS        = 0x0000007a,
-SX_PERF_SEL_DB2_MRT1_DONT_RD_DEST        = 0x0000007b,
-SX_PERF_SEL_DB2_MRT1_DISCARD_SRC         = 0x0000007c,
-SX_PERF_SEL_DB2_MRT1_SINGLE_QUADS        = 0x0000007d,
-SX_PERF_SEL_DB2_MRT1_DOUBLE_QUADS        = 0x0000007e,
-SX_PERF_SEL_DB2_MRT2_BLEND_BYPASS        = 0x0000007f,
-SX_PERF_SEL_DB2_MRT2_DONT_RD_DEST        = 0x00000080,
-SX_PERF_SEL_DB2_MRT2_DISCARD_SRC         = 0x00000081,
-SX_PERF_SEL_DB2_MRT2_SINGLE_QUADS        = 0x00000082,
-SX_PERF_SEL_DB2_MRT2_DOUBLE_QUADS        = 0x00000083,
-SX_PERF_SEL_DB2_MRT3_BLEND_BYPASS        = 0x00000084,
-SX_PERF_SEL_DB2_MRT3_DONT_RD_DEST        = 0x00000085,
-SX_PERF_SEL_DB2_MRT3_DISCARD_SRC         = 0x00000086,
-SX_PERF_SEL_DB2_MRT3_SINGLE_QUADS        = 0x00000087,
-SX_PERF_SEL_DB2_MRT3_DOUBLE_QUADS        = 0x00000088,
-SX_PERF_SEL_DB2_MRT4_BLEND_BYPASS        = 0x00000089,
-SX_PERF_SEL_DB2_MRT4_DONT_RD_DEST        = 0x0000008a,
-SX_PERF_SEL_DB2_MRT4_DISCARD_SRC         = 0x0000008b,
-SX_PERF_SEL_DB2_MRT4_SINGLE_QUADS        = 0x0000008c,
-SX_PERF_SEL_DB2_MRT4_DOUBLE_QUADS        = 0x0000008d,
-SX_PERF_SEL_DB2_MRT5_BLEND_BYPASS        = 0x0000008e,
-SX_PERF_SEL_DB2_MRT5_DONT_RD_DEST        = 0x0000008f,
-SX_PERF_SEL_DB2_MRT5_DISCARD_SRC         = 0x00000090,
-SX_PERF_SEL_DB2_MRT5_SINGLE_QUADS        = 0x00000091,
-SX_PERF_SEL_DB2_MRT5_DOUBLE_QUADS        = 0x00000092,
-SX_PERF_SEL_DB2_MRT6_BLEND_BYPASS        = 0x00000093,
-SX_PERF_SEL_DB2_MRT6_DONT_RD_DEST        = 0x00000094,
-SX_PERF_SEL_DB2_MRT6_DISCARD_SRC         = 0x00000095,
-SX_PERF_SEL_DB2_MRT6_SINGLE_QUADS        = 0x00000096,
-SX_PERF_SEL_DB2_MRT6_DOUBLE_QUADS        = 0x00000097,
-SX_PERF_SEL_DB2_MRT7_BLEND_BYPASS        = 0x00000098,
-SX_PERF_SEL_DB2_MRT7_DONT_RD_DEST        = 0x00000099,
-SX_PERF_SEL_DB2_MRT7_DISCARD_SRC         = 0x0000009a,
-SX_PERF_SEL_DB2_MRT7_SINGLE_QUADS        = 0x0000009b,
-SX_PERF_SEL_DB2_MRT7_DOUBLE_QUADS        = 0x0000009c,
-SX_PERF_SEL_DB3_A2M_DISCARD_QUADS        = 0x0000009d,
-SX_PERF_SEL_DB3_MRT0_BLEND_BYPASS        = 0x0000009e,
-SX_PERF_SEL_DB3_MRT0_DONT_RD_DEST        = 0x0000009f,
-SX_PERF_SEL_DB3_MRT0_DISCARD_SRC         = 0x000000a0,
-SX_PERF_SEL_DB3_MRT0_SINGLE_QUADS        = 0x000000a1,
-SX_PERF_SEL_DB3_MRT0_DOUBLE_QUADS        = 0x000000a2,
-SX_PERF_SEL_DB3_MRT1_BLEND_BYPASS        = 0x000000a3,
-SX_PERF_SEL_DB3_MRT1_DONT_RD_DEST        = 0x000000a4,
-SX_PERF_SEL_DB3_MRT1_DISCARD_SRC         = 0x000000a5,
-SX_PERF_SEL_DB3_MRT1_SINGLE_QUADS        = 0x000000a6,
-SX_PERF_SEL_DB3_MRT1_DOUBLE_QUADS        = 0x000000a7,
-SX_PERF_SEL_DB3_MRT2_BLEND_BYPASS        = 0x000000a8,
-SX_PERF_SEL_DB3_MRT2_DONT_RD_DEST        = 0x000000a9,
-SX_PERF_SEL_DB3_MRT2_DISCARD_SRC         = 0x000000aa,
-SX_PERF_SEL_DB3_MRT2_SINGLE_QUADS        = 0x000000ab,
-SX_PERF_SEL_DB3_MRT2_DOUBLE_QUADS        = 0x000000ac,
-SX_PERF_SEL_DB3_MRT3_BLEND_BYPASS        = 0x000000ad,
-SX_PERF_SEL_DB3_MRT3_DONT_RD_DEST        = 0x000000ae,
-SX_PERF_SEL_DB3_MRT3_DISCARD_SRC         = 0x000000af,
-SX_PERF_SEL_DB3_MRT3_SINGLE_QUADS        = 0x000000b0,
-SX_PERF_SEL_DB3_MRT3_DOUBLE_QUADS        = 0x000000b1,
-SX_PERF_SEL_DB3_MRT4_BLEND_BYPASS        = 0x000000b2,
-SX_PERF_SEL_DB3_MRT4_DONT_RD_DEST        = 0x000000b3,
-SX_PERF_SEL_DB3_MRT4_DISCARD_SRC         = 0x000000b4,
-SX_PERF_SEL_DB3_MRT4_SINGLE_QUADS        = 0x000000b5,
-SX_PERF_SEL_DB3_MRT4_DOUBLE_QUADS        = 0x000000b6,
-SX_PERF_SEL_DB3_MRT5_BLEND_BYPASS        = 0x000000b7,
-SX_PERF_SEL_DB3_MRT5_DONT_RD_DEST        = 0x000000b8,
-SX_PERF_SEL_DB3_MRT5_DISCARD_SRC         = 0x000000b9,
-SX_PERF_SEL_DB3_MRT5_SINGLE_QUADS        = 0x000000ba,
-SX_PERF_SEL_DB3_MRT5_DOUBLE_QUADS        = 0x000000bb,
-SX_PERF_SEL_DB3_MRT6_BLEND_BYPASS        = 0x000000bc,
-SX_PERF_SEL_DB3_MRT6_DONT_RD_DEST        = 0x000000bd,
-SX_PERF_SEL_DB3_MRT6_DISCARD_SRC         = 0x000000be,
-SX_PERF_SEL_DB3_MRT6_SINGLE_QUADS        = 0x000000bf,
-SX_PERF_SEL_DB3_MRT6_DOUBLE_QUADS        = 0x000000c0,
-SX_PERF_SEL_DB3_MRT7_BLEND_BYPASS        = 0x000000c1,
-SX_PERF_SEL_DB3_MRT7_DONT_RD_DEST        = 0x000000c2,
-SX_PERF_SEL_DB3_MRT7_DISCARD_SRC         = 0x000000c3,
-SX_PERF_SEL_DB3_MRT7_SINGLE_QUADS        = 0x000000c4,
-SX_PERF_SEL_DB3_MRT7_DOUBLE_QUADS        = 0x000000c5,
-} SX_PERFCOUNTER_VALS;
-
-/*******************************************************
- * DB Enums
- *******************************************************/
-
-/*
- * ForceControl enum
- */
-
-typedef enum ForceControl {
-FORCE_OFF                                = 0x00000000,
-FORCE_ENABLE                             = 0x00000001,
-FORCE_DISABLE                            = 0x00000002,
-FORCE_RESERVED                           = 0x00000003,
-} ForceControl;
-
-/*
- * ZSamplePosition enum
- */
-
-typedef enum ZSamplePosition {
-Z_SAMPLE_CENTER                          = 0x00000000,
-Z_SAMPLE_CENTROID                        = 0x00000001,
-} ZSamplePosition;
-
-/*
- * ZOrder enum
- */
-
-typedef enum ZOrder {
-LATE_Z                                   = 0x00000000,
-EARLY_Z_THEN_LATE_Z                      = 0x00000001,
-RE_Z                                     = 0x00000002,
-EARLY_Z_THEN_RE_Z                        = 0x00000003,
-} ZOrder;
-
-/*
- * ZpassControl enum
- */
-
-typedef enum ZpassControl {
-ZPASS_DISABLE                            = 0x00000000,
-ZPASS_SAMPLES                            = 0x00000001,
-ZPASS_PIXELS                             = 0x00000002,
-} ZpassControl;
-
-/*
- * ZModeForce enum
- */
-
-typedef enum ZModeForce {
-NO_FORCE                                 = 0x00000000,
-FORCE_EARLY_Z                            = 0x00000001,
-FORCE_LATE_Z                             = 0x00000002,
-FORCE_RE_Z                               = 0x00000003,
-} ZModeForce;
-
-/*
- * ZLimitSumm enum
- */
-
-typedef enum ZLimitSumm {
-FORCE_SUMM_OFF                           = 0x00000000,
-FORCE_SUMM_MINZ                          = 0x00000001,
-FORCE_SUMM_MAXZ                          = 0x00000002,
-FORCE_SUMM_BOTH                          = 0x00000003,
-} ZLimitSumm;
-
-/*
- * CompareFrag enum
- */
-
-typedef enum CompareFrag {
-FRAG_NEVER                               = 0x00000000,
-FRAG_LESS                                = 0x00000001,
-FRAG_EQUAL                               = 0x00000002,
-FRAG_LEQUAL                              = 0x00000003,
-FRAG_GREATER                             = 0x00000004,
-FRAG_NOTEQUAL                            = 0x00000005,
-FRAG_GEQUAL                              = 0x00000006,
-FRAG_ALWAYS                              = 0x00000007,
-} CompareFrag;
-
-/*
- * StencilOp enum
- */
-
-typedef enum StencilOp {
-STENCIL_KEEP                             = 0x00000000,
-STENCIL_ZERO                             = 0x00000001,
-STENCIL_ONES                             = 0x00000002,
-STENCIL_REPLACE_TEST                     = 0x00000003,
-STENCIL_REPLACE_OP                       = 0x00000004,
-STENCIL_ADD_CLAMP                        = 0x00000005,
-STENCIL_SUB_CLAMP                        = 0x00000006,
-STENCIL_INVERT                           = 0x00000007,
-STENCIL_ADD_WRAP                         = 0x00000008,
-STENCIL_SUB_WRAP                         = 0x00000009,
-STENCIL_AND                              = 0x0000000a,
-STENCIL_OR                               = 0x0000000b,
-STENCIL_XOR                              = 0x0000000c,
-STENCIL_NAND                             = 0x0000000d,
-STENCIL_NOR                              = 0x0000000e,
-STENCIL_XNOR                             = 0x0000000f,
-} StencilOp;
-
-/*
- * ConservativeZExport enum
- */
-
-typedef enum ConservativeZExport {
-EXPORT_ANY_Z                             = 0x00000000,
-EXPORT_LESS_THAN_Z                       = 0x00000001,
-EXPORT_GREATER_THAN_Z                    = 0x00000002,
-EXPORT_RESERVED                          = 0x00000003,
-} ConservativeZExport;
-
-/*
- * DbPSLControl enum
- */
-
-typedef enum DbPSLControl {
-PSLC_AUTO                                = 0x00000000,
-PSLC_ON_HANG_ONLY                        = 0x00000001,
-PSLC_ASAP                                = 0x00000002,
-PSLC_COUNTDOWN                           = 0x00000003,
-} DbPSLControl;
-
-/*
- * DbPRTFaultBehavior enum
- */
-
-typedef enum DbPRTFaultBehavior {
-FAULT_ZERO                               = 0x00000000,
-FAULT_ONE                                = 0x00000001,
-FAULT_FAIL                               = 0x00000002,
-FAULT_PASS                               = 0x00000003,
-} DbPRTFaultBehavior;
-
-/*
- * PerfCounter_Vals enum
- */
-
-typedef enum PerfCounter_Vals {
-DB_PERF_SEL_SC_DB_tile_sends             = 0x00000000,
-DB_PERF_SEL_SC_DB_tile_busy              = 0x00000001,
-DB_PERF_SEL_SC_DB_tile_stalls            = 0x00000002,
-DB_PERF_SEL_SC_DB_tile_events            = 0x00000003,
-DB_PERF_SEL_SC_DB_tile_tiles             = 0x00000004,
-DB_PERF_SEL_SC_DB_tile_covered           = 0x00000005,
-DB_PERF_SEL_hiz_tc_read_starved          = 0x00000006,
-DB_PERF_SEL_hiz_tc_write_stall           = 0x00000007,
-DB_PERF_SEL_hiz_qtiles_culled            = 0x00000008,
-DB_PERF_SEL_his_qtiles_culled            = 0x00000009,
-DB_PERF_SEL_DB_SC_tile_sends             = 0x0000000a,
-DB_PERF_SEL_DB_SC_tile_busy              = 0x0000000b,
-DB_PERF_SEL_DB_SC_tile_stalls            = 0x0000000c,
-DB_PERF_SEL_DB_SC_tile_df_stalls         = 0x0000000d,
-DB_PERF_SEL_DB_SC_tile_tiles             = 0x0000000e,
-DB_PERF_SEL_DB_SC_tile_culled            = 0x0000000f,
-DB_PERF_SEL_DB_SC_tile_hier_kill         = 0x00000010,
-DB_PERF_SEL_DB_SC_tile_fast_ops          = 0x00000011,
-DB_PERF_SEL_DB_SC_tile_no_ops            = 0x00000012,
-DB_PERF_SEL_DB_SC_tile_tile_rate         = 0x00000013,
-DB_PERF_SEL_DB_SC_tile_ssaa_kill         = 0x00000014,
-DB_PERF_SEL_DB_SC_tile_fast_z_ops        = 0x00000015,
-DB_PERF_SEL_DB_SC_tile_fast_stencil_ops  = 0x00000016,
-DB_PERF_SEL_SC_DB_quad_sends             = 0x00000017,
-DB_PERF_SEL_SC_DB_quad_busy              = 0x00000018,
-DB_PERF_SEL_SC_DB_quad_squads            = 0x00000019,
-DB_PERF_SEL_SC_DB_quad_tiles             = 0x0000001a,
-DB_PERF_SEL_SC_DB_quad_pixels            = 0x0000001b,
-DB_PERF_SEL_SC_DB_quad_killed_tiles      = 0x0000001c,
-DB_PERF_SEL_DB_SC_quad_sends             = 0x0000001d,
-DB_PERF_SEL_DB_SC_quad_busy              = 0x0000001e,
-DB_PERF_SEL_DB_SC_quad_stalls            = 0x0000001f,
-DB_PERF_SEL_DB_SC_quad_tiles             = 0x00000020,
-DB_PERF_SEL_DB_SC_quad_lit_quad          = 0x00000021,
-DB_PERF_SEL_DB_CB_tile_sends             = 0x00000022,
-DB_PERF_SEL_DB_CB_tile_busy              = 0x00000023,
-DB_PERF_SEL_DB_CB_tile_stalls            = 0x00000024,
-DB_PERF_SEL_SX_DB_quad_sends             = 0x00000025,
-DB_PERF_SEL_SX_DB_quad_busy              = 0x00000026,
-DB_PERF_SEL_SX_DB_quad_stalls            = 0x00000027,
-DB_PERF_SEL_SX_DB_quad_quads             = 0x00000028,
-DB_PERF_SEL_SX_DB_quad_pixels            = 0x00000029,
-DB_PERF_SEL_SX_DB_quad_exports           = 0x0000002a,
-DB_PERF_SEL_SH_quads_outstanding_sum     = 0x0000002b,
-DB_PERF_SEL_DB_CB_lquad_sends            = 0x0000002c,
-DB_PERF_SEL_DB_CB_lquad_busy             = 0x0000002d,
-DB_PERF_SEL_DB_CB_lquad_stalls           = 0x0000002e,
-DB_PERF_SEL_DB_CB_lquad_quads            = 0x0000002f,
-DB_PERF_SEL_tile_rd_sends                = 0x00000030,
-DB_PERF_SEL_mi_tile_rd_outstanding_sum   = 0x00000031,
-DB_PERF_SEL_quad_rd_sends                = 0x00000032,
-DB_PERF_SEL_quad_rd_busy                 = 0x00000033,
-DB_PERF_SEL_quad_rd_mi_stall             = 0x00000034,
-DB_PERF_SEL_quad_rd_rw_collision         = 0x00000035,
-DB_PERF_SEL_quad_rd_tag_stall            = 0x00000036,
-DB_PERF_SEL_quad_rd_32byte_reqs          = 0x00000037,
-DB_PERF_SEL_quad_rd_panic                = 0x00000038,
-DB_PERF_SEL_mi_quad_rd_outstanding_sum   = 0x00000039,
-DB_PERF_SEL_quad_rdret_sends             = 0x0000003a,
-DB_PERF_SEL_quad_rdret_busy              = 0x0000003b,
-DB_PERF_SEL_tile_wr_sends                = 0x0000003c,
-DB_PERF_SEL_tile_wr_acks                 = 0x0000003d,
-DB_PERF_SEL_mi_tile_wr_outstanding_sum   = 0x0000003e,
-DB_PERF_SEL_quad_wr_sends                = 0x0000003f,
-DB_PERF_SEL_quad_wr_busy                 = 0x00000040,
-DB_PERF_SEL_quad_wr_mi_stall             = 0x00000041,
-DB_PERF_SEL_quad_wr_coherency_stall      = 0x00000042,
-DB_PERF_SEL_quad_wr_acks                 = 0x00000043,
-DB_PERF_SEL_mi_quad_wr_outstanding_sum   = 0x00000044,
-DB_PERF_SEL_Tile_Cache_misses            = 0x00000045,
-DB_PERF_SEL_Tile_Cache_hits              = 0x00000046,
-DB_PERF_SEL_Tile_Cache_flushes           = 0x00000047,
-DB_PERF_SEL_Tile_Cache_surface_stall     = 0x00000048,
-DB_PERF_SEL_Tile_Cache_starves           = 0x00000049,
-DB_PERF_SEL_Tile_Cache_mem_return_starve  = 0x0000004a,
-DB_PERF_SEL_tcp_dispatcher_reads         = 0x0000004b,
-DB_PERF_SEL_tcp_prefetcher_reads         = 0x0000004c,
-DB_PERF_SEL_tcp_preloader_reads          = 0x0000004d,
-DB_PERF_SEL_tcp_dispatcher_flushes       = 0x0000004e,
-DB_PERF_SEL_tcp_prefetcher_flushes       = 0x0000004f,
-DB_PERF_SEL_tcp_preloader_flushes        = 0x00000050,
-DB_PERF_SEL_Depth_Tile_Cache_sends       = 0x00000051,
-DB_PERF_SEL_Depth_Tile_Cache_busy        = 0x00000052,
-DB_PERF_SEL_Depth_Tile_Cache_starves     = 0x00000053,
-DB_PERF_SEL_Depth_Tile_Cache_dtile_locked  = 0x00000054,
-DB_PERF_SEL_Depth_Tile_Cache_alloc_stall  = 0x00000055,
-DB_PERF_SEL_Depth_Tile_Cache_misses      = 0x00000056,
-DB_PERF_SEL_Depth_Tile_Cache_hits        = 0x00000057,
-DB_PERF_SEL_Depth_Tile_Cache_flushes     = 0x00000058,
-DB_PERF_SEL_Depth_Tile_Cache_noop_tile   = 0x00000059,
-DB_PERF_SEL_Depth_Tile_Cache_detailed_noop  = 0x0000005a,
-DB_PERF_SEL_Depth_Tile_Cache_event       = 0x0000005b,
-DB_PERF_SEL_Depth_Tile_Cache_tile_frees  = 0x0000005c,
-DB_PERF_SEL_Depth_Tile_Cache_data_frees  = 0x0000005d,
-DB_PERF_SEL_Depth_Tile_Cache_mem_return_starve  = 0x0000005e,
-DB_PERF_SEL_Stencil_Cache_misses         = 0x0000005f,
-DB_PERF_SEL_Stencil_Cache_hits           = 0x00000060,
-DB_PERF_SEL_Stencil_Cache_flushes        = 0x00000061,
-DB_PERF_SEL_Stencil_Cache_starves        = 0x00000062,
-DB_PERF_SEL_Stencil_Cache_frees          = 0x00000063,
-DB_PERF_SEL_Z_Cache_separate_Z_misses    = 0x00000064,
-DB_PERF_SEL_Z_Cache_separate_Z_hits      = 0x00000065,
-DB_PERF_SEL_Z_Cache_separate_Z_flushes   = 0x00000066,
-DB_PERF_SEL_Z_Cache_separate_Z_starves   = 0x00000067,
-DB_PERF_SEL_Z_Cache_pmask_misses         = 0x00000068,
-DB_PERF_SEL_Z_Cache_pmask_hits           = 0x00000069,
-DB_PERF_SEL_Z_Cache_pmask_flushes        = 0x0000006a,
-DB_PERF_SEL_Z_Cache_pmask_starves        = 0x0000006b,
-DB_PERF_SEL_Z_Cache_frees                = 0x0000006c,
-DB_PERF_SEL_Plane_Cache_misses           = 0x0000006d,
-DB_PERF_SEL_Plane_Cache_hits             = 0x0000006e,
-DB_PERF_SEL_Plane_Cache_flushes          = 0x0000006f,
-DB_PERF_SEL_Plane_Cache_starves          = 0x00000070,
-DB_PERF_SEL_Plane_Cache_frees            = 0x00000071,
-DB_PERF_SEL_flush_expanded_stencil       = 0x00000072,
-DB_PERF_SEL_flush_compressed_stencil     = 0x00000073,
-DB_PERF_SEL_flush_single_stencil         = 0x00000074,
-DB_PERF_SEL_planes_flushed               = 0x00000075,
-DB_PERF_SEL_flush_1plane                 = 0x00000076,
-DB_PERF_SEL_flush_2plane                 = 0x00000077,
-DB_PERF_SEL_flush_3plane                 = 0x00000078,
-DB_PERF_SEL_flush_4plane                 = 0x00000079,
-DB_PERF_SEL_flush_5plane                 = 0x0000007a,
-DB_PERF_SEL_flush_6plane                 = 0x0000007b,
-DB_PERF_SEL_flush_7plane                 = 0x0000007c,
-DB_PERF_SEL_flush_8plane                 = 0x0000007d,
-DB_PERF_SEL_flush_9plane                 = 0x0000007e,
-DB_PERF_SEL_flush_10plane                = 0x0000007f,
-DB_PERF_SEL_flush_11plane                = 0x00000080,
-DB_PERF_SEL_flush_12plane                = 0x00000081,
-DB_PERF_SEL_flush_13plane                = 0x00000082,
-DB_PERF_SEL_flush_14plane                = 0x00000083,
-DB_PERF_SEL_flush_15plane                = 0x00000084,
-DB_PERF_SEL_flush_16plane                = 0x00000085,
-DB_PERF_SEL_flush_expanded_z             = 0x00000086,
-DB_PERF_SEL_earlyZ_waiting_for_postZ_done  = 0x00000087,
-DB_PERF_SEL_reZ_waiting_for_postZ_done   = 0x00000088,
-DB_PERF_SEL_dk_tile_sends                = 0x00000089,
-DB_PERF_SEL_dk_tile_busy                 = 0x0000008a,
-DB_PERF_SEL_dk_tile_quad_starves         = 0x0000008b,
-DB_PERF_SEL_dk_tile_stalls               = 0x0000008c,
-DB_PERF_SEL_dk_squad_sends               = 0x0000008d,
-DB_PERF_SEL_dk_squad_busy                = 0x0000008e,
-DB_PERF_SEL_dk_squad_stalls              = 0x0000008f,
-DB_PERF_SEL_Op_Pipe_Busy                 = 0x00000090,
-DB_PERF_SEL_Op_Pipe_MC_Read_stall        = 0x00000091,
-DB_PERF_SEL_qc_busy                      = 0x00000092,
-DB_PERF_SEL_qc_xfc                       = 0x00000093,
-DB_PERF_SEL_qc_conflicts                 = 0x00000094,
-DB_PERF_SEL_qc_full_stall                = 0x00000095,
-DB_PERF_SEL_qc_in_preZ_tile_stalls_postZ  = 0x00000096,
-DB_PERF_SEL_qc_in_postZ_tile_stalls_preZ  = 0x00000097,
-DB_PERF_SEL_tsc_insert_summarize_stall   = 0x00000098,
-DB_PERF_SEL_tl_busy                      = 0x00000099,
-DB_PERF_SEL_tl_dtc_read_starved          = 0x0000009a,
-DB_PERF_SEL_tl_z_fetch_stall             = 0x0000009b,
-DB_PERF_SEL_tl_stencil_stall             = 0x0000009c,
-DB_PERF_SEL_tl_z_decompress_stall        = 0x0000009d,
-DB_PERF_SEL_tl_stencil_locked_stall      = 0x0000009e,
-DB_PERF_SEL_tl_events                    = 0x0000009f,
-DB_PERF_SEL_tl_summarize_squads          = 0x000000a0,
-DB_PERF_SEL_tl_flush_expand_squads       = 0x000000a1,
-DB_PERF_SEL_tl_expand_squads             = 0x000000a2,
-DB_PERF_SEL_tl_preZ_squads               = 0x000000a3,
-DB_PERF_SEL_tl_postZ_squads              = 0x000000a4,
-DB_PERF_SEL_tl_preZ_noop_squads          = 0x000000a5,
-DB_PERF_SEL_tl_postZ_noop_squads         = 0x000000a6,
-DB_PERF_SEL_tl_tile_ops                  = 0x000000a7,
-DB_PERF_SEL_tl_in_xfc                    = 0x000000a8,
-DB_PERF_SEL_tl_in_single_stencil_expand_stall  = 0x000000a9,
-DB_PERF_SEL_tl_in_fast_z_stall           = 0x000000aa,
-DB_PERF_SEL_tl_out_xfc                   = 0x000000ab,
-DB_PERF_SEL_tl_out_squads                = 0x000000ac,
-DB_PERF_SEL_zf_plane_multicycle          = 0x000000ad,
-DB_PERF_SEL_PostZ_Samples_passing_Z      = 0x000000ae,
-DB_PERF_SEL_PostZ_Samples_failing_Z      = 0x000000af,
-DB_PERF_SEL_PostZ_Samples_failing_S      = 0x000000b0,
-DB_PERF_SEL_PreZ_Samples_passing_Z       = 0x000000b1,
-DB_PERF_SEL_PreZ_Samples_failing_Z       = 0x000000b2,
-DB_PERF_SEL_PreZ_Samples_failing_S       = 0x000000b3,
-DB_PERF_SEL_ts_tc_update_stall           = 0x000000b4,
-DB_PERF_SEL_sc_kick_start                = 0x000000b5,
-DB_PERF_SEL_sc_kick_end                  = 0x000000b6,
-DB_PERF_SEL_clock_reg_active             = 0x000000b7,
-DB_PERF_SEL_clock_main_active            = 0x000000b8,
-DB_PERF_SEL_clock_mem_export_active      = 0x000000b9,
-DB_PERF_SEL_esr_ps_out_busy              = 0x000000ba,
-DB_PERF_SEL_esr_ps_lqf_busy              = 0x000000bb,
-DB_PERF_SEL_esr_ps_lqf_stall             = 0x000000bc,
-DB_PERF_SEL_etr_out_send                 = 0x000000bd,
-DB_PERF_SEL_etr_out_busy                 = 0x000000be,
-DB_PERF_SEL_etr_out_ltile_probe_fifo_full_stall  = 0x000000bf,
-DB_PERF_SEL_etr_out_cb_tile_stall        = 0x000000c0,
-DB_PERF_SEL_etr_out_esr_stall            = 0x000000c1,
-DB_PERF_SEL_esr_ps_sqq_busy              = 0x000000c2,
-DB_PERF_SEL_esr_ps_sqq_stall             = 0x000000c3,
-DB_PERF_SEL_esr_eot_fwd_busy             = 0x000000c4,
-DB_PERF_SEL_esr_eot_fwd_holding_squad    = 0x000000c5,
-DB_PERF_SEL_esr_eot_fwd_forward          = 0x000000c6,
-DB_PERF_SEL_esr_sqq_zi_busy              = 0x000000c7,
-DB_PERF_SEL_esr_sqq_zi_stall             = 0x000000c8,
-DB_PERF_SEL_postzl_sq_pt_busy            = 0x000000c9,
-DB_PERF_SEL_postzl_sq_pt_stall           = 0x000000ca,
-DB_PERF_SEL_postzl_se_busy               = 0x000000cb,
-DB_PERF_SEL_postzl_se_stall              = 0x000000cc,
-DB_PERF_SEL_postzl_partial_launch        = 0x000000cd,
-DB_PERF_SEL_postzl_full_launch           = 0x000000ce,
-DB_PERF_SEL_postzl_partial_waiting       = 0x000000cf,
-DB_PERF_SEL_postzl_tile_mem_stall        = 0x000000d0,
-DB_PERF_SEL_postzl_tile_init_stall       = 0x000000d1,
-DB_PEFF_SEL_prezl_tile_mem_stall         = 0x000000d2,
-DB_PERF_SEL_prezl_tile_init_stall        = 0x000000d3,
-DB_PERF_SEL_dtt_sm_clash_stall           = 0x000000d4,
-DB_PERF_SEL_dtt_sm_slot_stall            = 0x000000d5,
-DB_PERF_SEL_dtt_sm_miss_stall            = 0x000000d6,
-DB_PERF_SEL_mi_rdreq_busy                = 0x000000d7,
-DB_PERF_SEL_mi_rdreq_stall               = 0x000000d8,
-DB_PERF_SEL_mi_wrreq_busy                = 0x000000d9,
-DB_PERF_SEL_mi_wrreq_stall               = 0x000000da,
-DB_PERF_SEL_recomp_tile_to_1zplane_no_fastop  = 0x000000db,
-DB_PERF_SEL_dkg_tile_rate_tile           = 0x000000dc,
-DB_PERF_SEL_prezl_src_in_sends           = 0x000000dd,
-DB_PERF_SEL_prezl_src_in_stall           = 0x000000de,
-DB_PERF_SEL_prezl_src_in_squads          = 0x000000df,
-DB_PERF_SEL_prezl_src_in_squads_unrolled  = 0x000000e0,
-DB_PERF_SEL_prezl_src_in_tile_rate       = 0x000000e1,
-DB_PERF_SEL_prezl_src_in_tile_rate_unrolled  = 0x000000e2,
-DB_PERF_SEL_prezl_src_out_stall          = 0x000000e3,
-DB_PERF_SEL_postzl_src_in_sends          = 0x000000e4,
-DB_PERF_SEL_postzl_src_in_stall          = 0x000000e5,
-DB_PERF_SEL_postzl_src_in_squads         = 0x000000e6,
-DB_PERF_SEL_postzl_src_in_squads_unrolled  = 0x000000e7,
-DB_PERF_SEL_postzl_src_in_tile_rate      = 0x000000e8,
-DB_PERF_SEL_postzl_src_in_tile_rate_unrolled  = 0x000000e9,
-DB_PERF_SEL_postzl_src_out_stall         = 0x000000ea,
-DB_PERF_SEL_esr_ps_src_in_sends          = 0x000000eb,
-DB_PERF_SEL_esr_ps_src_in_stall          = 0x000000ec,
-DB_PERF_SEL_esr_ps_src_in_squads         = 0x000000ed,
-DB_PERF_SEL_esr_ps_src_in_squads_unrolled  = 0x000000ee,
-DB_PERF_SEL_esr_ps_src_in_tile_rate      = 0x000000ef,
-DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled  = 0x000000f0,
-DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled_to_pixel_rate  = 0x000000f1,
-DB_PERF_SEL_esr_ps_src_out_stall         = 0x000000f2,
-DB_PERF_SEL_depth_bounds_qtiles_culled   = 0x000000f3,
-DB_PERF_SEL_PreZ_Samples_failing_DB      = 0x000000f4,
-DB_PERF_SEL_PostZ_Samples_failing_DB     = 0x000000f5,
-DB_PERF_SEL_flush_compressed             = 0x000000f6,
-DB_PERF_SEL_flush_plane_le4              = 0x000000f7,
-DB_PERF_SEL_tiles_z_fully_summarized     = 0x000000f8,
-DB_PERF_SEL_tiles_stencil_fully_summarized  = 0x000000f9,
-DB_PERF_SEL_tiles_z_clear_on_expclear    = 0x000000fa,
-DB_PERF_SEL_tiles_s_clear_on_expclear    = 0x000000fb,
-DB_PERF_SEL_tiles_decomp_on_expclear     = 0x000000fc,
-DB_PERF_SEL_tiles_compressed_to_decompressed  = 0x000000fd,
-DB_PERF_SEL_Op_Pipe_Prez_Busy            = 0x000000fe,
-DB_PERF_SEL_Op_Pipe_Postz_Busy           = 0x000000ff,
-DB_PERF_SEL_di_dt_stall                  = 0x00000100,
-DB_PERF_SEL_DB_SC_quad_double_quad       = 0x00000101,
-DB_PERF_SEL_SX_DB_quad_export_quads      = 0x00000102,
-DB_PERF_SEL_SX_DB_quad_double_format     = 0x00000103,
-DB_PERF_SEL_SX_DB_quad_fast_format       = 0x00000104,
-DB_PERF_SEL_SX_DB_quad_slow_format       = 0x00000105,
-DB_PERF_SEL_DB_CB_lquad_export_quads     = 0x00000106,
-DB_PERF_SEL_DB_CB_lquad_double_format    = 0x00000107,
-DB_PERF_SEL_DB_CB_lquad_fast_format      = 0x00000108,
-DB_PERF_SEL_DB_CB_lquad_slow_format      = 0x00000109,
-DB_PERF_SEL_CB_DB_rdreq_sends            = 0x0000010a,
-DB_PERF_SEL_CB_DB_rdreq_prt_sends        = 0x0000010b,
-DB_PERF_SEL_CB_DB_wrreq_sends            = 0x0000010c,
-DB_PERF_SEL_CB_DB_wrreq_prt_sends        = 0x0000010d,
-DB_PERF_SEL_DB_CB_rdret_ack              = 0x0000010e,
-DB_PERF_SEL_DB_CB_rdret_nack             = 0x0000010f,
-DB_PERF_SEL_DB_CB_wrret_ack              = 0x00000110,
-DB_PERF_SEL_DB_CB_wrret_nack             = 0x00000111,
-DB_PERF_SEL_DFSM_squads_in               = 0x00000112,
-DB_PERF_SEL_DFSM_full_cleared_squads_out  = 0x00000113,
-DB_PERF_SEL_DFSM_quads_in                = 0x00000114,
-DB_PERF_SEL_DFSM_fully_cleared_quads_out  = 0x00000115,
-DB_PERF_SEL_DFSM_lit_pixels_in           = 0x00000116,
-DB_PERF_SEL_DFSM_fully_cleared_pixels_out  = 0x00000117,
-DB_PERF_SEL_DFSM_lit_samples_in          = 0x00000118,
-DB_PERF_SEL_DFSM_lit_samples_out         = 0x00000119,
-DB_PERF_SEL_DFSM_cycles_above_watermark  = 0x0000011a,
-DB_PERF_SEL_DFSM_cant_accept_squads_but_not_stalled_by_downstream  = 0x0000011b,
-DB_PERF_SEL_DFSM_stalled_by_downstream   = 0x0000011c,
-DB_PERF_SEL_DFSM_evicted_squads_above_watermark  = 0x0000011d,
-DB_PERF_SEL_DFSM_collisions_due_to_POPS_overflow  = 0x0000011e,
-DB_PERF_SEL_DFSM_collisions_detected_within_POPS_FIFO  = 0x0000011f,
-DB_PERF_SEL_DFSM_evicted_squads_due_to_prim_watermark  = 0x00000120,
-} PerfCounter_Vals;
-
-/*
- * RingCounterControl enum
- */
-
-typedef enum RingCounterControl {
-COUNTER_RING_SPLIT                       = 0x00000000,
-COUNTER_RING_0                           = 0x00000001,
-COUNTER_RING_1                           = 0x00000002,
-} RingCounterControl;
-
-/*
- * DbMemArbWatermarks enum
- */
-
-typedef enum DbMemArbWatermarks {
-TRANSFERRED_64_BYTES                     = 0x00000000,
-TRANSFERRED_128_BYTES                    = 0x00000001,
-TRANSFERRED_256_BYTES                    = 0x00000002,
-TRANSFERRED_512_BYTES                    = 0x00000003,
-TRANSFERRED_1024_BYTES                   = 0x00000004,
-TRANSFERRED_2048_BYTES                   = 0x00000005,
-TRANSFERRED_4096_BYTES                   = 0x00000006,
-TRANSFERRED_8192_BYTES                   = 0x00000007,
-} DbMemArbWatermarks;
-
-/*
- * DFSMFlushEvents enum
- */
-
-typedef enum DFSMFlushEvents {
-DB_FLUSH_AND_INV_DB_DATA_TS              = 0x00000000,
-DB_FLUSH_AND_INV_DB_META                 = 0x00000001,
-DB_CACHE_FLUSH                           = 0x00000002,
-DB_CACHE_FLUSH_TS                        = 0x00000003,
-DB_CACHE_FLUSH_AND_INV_EVENT             = 0x00000004,
-DB_CACHE_FLUSH_AND_INV_TS_EVENT          = 0x00000005,
-} DFSMFlushEvents;
-
-/*
- * PixelPipeCounterId enum
- */
-
-typedef enum PixelPipeCounterId {
-PIXEL_PIPE_OCCLUSION_COUNT_0             = 0x00000000,
-PIXEL_PIPE_OCCLUSION_COUNT_1             = 0x00000001,
-PIXEL_PIPE_OCCLUSION_COUNT_2             = 0x00000002,
-PIXEL_PIPE_OCCLUSION_COUNT_3             = 0x00000003,
-PIXEL_PIPE_SCREEN_MIN_EXTENTS_0          = 0x00000004,
-PIXEL_PIPE_SCREEN_MAX_EXTENTS_0          = 0x00000005,
-PIXEL_PIPE_SCREEN_MIN_EXTENTS_1          = 0x00000006,
-PIXEL_PIPE_SCREEN_MAX_EXTENTS_1          = 0x00000007,
-} PixelPipeCounterId;
-
-/*
- * PixelPipeStride enum
- */
-
-typedef enum PixelPipeStride {
-PIXEL_PIPE_STRIDE_32_BITS                = 0x00000000,
-PIXEL_PIPE_STRIDE_64_BITS                = 0x00000001,
-PIXEL_PIPE_STRIDE_128_BITS               = 0x00000002,
-PIXEL_PIPE_STRIDE_256_BITS               = 0x00000003,
-} PixelPipeStride;
-
-/*******************************************************
- * TA Enums
- *******************************************************/
-
-/*
- * TEX_BORDER_COLOR_TYPE enum
- */
-
-typedef enum TEX_BORDER_COLOR_TYPE {
-TEX_BorderColor_TransparentBlack         = 0x00000000,
-TEX_BorderColor_OpaqueBlack              = 0x00000001,
-TEX_BorderColor_OpaqueWhite              = 0x00000002,
-TEX_BorderColor_Register                 = 0x00000003,
-} TEX_BORDER_COLOR_TYPE;
-
-/*
- * TEX_CHROMA_KEY enum
- */
-
-typedef enum TEX_CHROMA_KEY {
-TEX_ChromaKey_Disabled                   = 0x00000000,
-TEX_ChromaKey_Kill                       = 0x00000001,
-TEX_ChromaKey_Blend                      = 0x00000002,
-TEX_ChromaKey_RESERVED_3                 = 0x00000003,
-} TEX_CHROMA_KEY;
-
-/*
- * TEX_CLAMP enum
- */
-
-typedef enum TEX_CLAMP {
-TEX_Clamp_Repeat                         = 0x00000000,
-TEX_Clamp_Mirror                         = 0x00000001,
-TEX_Clamp_ClampToLast                    = 0x00000002,
-TEX_Clamp_MirrorOnceToLast               = 0x00000003,
-TEX_Clamp_ClampHalfToBorder              = 0x00000004,
-TEX_Clamp_MirrorOnceHalfToBorder         = 0x00000005,
-TEX_Clamp_ClampToBorder                  = 0x00000006,
-TEX_Clamp_MirrorOnceToBorder             = 0x00000007,
-} TEX_CLAMP;
-
-/*
- * TEX_COORD_TYPE enum
- */
-
-typedef enum TEX_COORD_TYPE {
-TEX_CoordType_Unnormalized               = 0x00000000,
-TEX_CoordType_Normalized                 = 0x00000001,
-} TEX_COORD_TYPE;
-
-/*
- * TEX_DEPTH_COMPARE_FUNCTION enum
- */
-
-typedef enum TEX_DEPTH_COMPARE_FUNCTION {
-TEX_DepthCompareFunction_Never           = 0x00000000,
-TEX_DepthCompareFunction_Less            = 0x00000001,
-TEX_DepthCompareFunction_Equal           = 0x00000002,
-TEX_DepthCompareFunction_LessEqual       = 0x00000003,
-TEX_DepthCompareFunction_Greater         = 0x00000004,
-TEX_DepthCompareFunction_NotEqual        = 0x00000005,
-TEX_DepthCompareFunction_GreaterEqual    = 0x00000006,
-TEX_DepthCompareFunction_Always          = 0x00000007,
-} TEX_DEPTH_COMPARE_FUNCTION;
-
-/*
- * TEX_DIM enum
- */
-
-typedef enum TEX_DIM {
-TEX_Dim_1D                               = 0x00000000,
-TEX_Dim_2D                               = 0x00000001,
-TEX_Dim_3D                               = 0x00000002,
-TEX_Dim_CubeMap                          = 0x00000003,
-TEX_Dim_1DArray                          = 0x00000004,
-TEX_Dim_2DArray                          = 0x00000005,
-TEX_Dim_2D_MSAA                          = 0x00000006,
-TEX_Dim_2DArray_MSAA                     = 0x00000007,
-} TEX_DIM;
-
-/*
- * TEX_FORMAT_COMP enum
- */
-
-typedef enum TEX_FORMAT_COMP {
-TEX_FormatComp_Unsigned                  = 0x00000000,
-TEX_FormatComp_Signed                    = 0x00000001,
-TEX_FormatComp_UnsignedBiased            = 0x00000002,
-TEX_FormatComp_RESERVED_3                = 0x00000003,
-} TEX_FORMAT_COMP;
-
-/*
- * TEX_MAX_ANISO_RATIO enum
- */
-
-typedef enum TEX_MAX_ANISO_RATIO {
-TEX_MaxAnisoRatio_1to1                   = 0x00000000,
-TEX_MaxAnisoRatio_2to1                   = 0x00000001,
-TEX_MaxAnisoRatio_4to1                   = 0x00000002,
-TEX_MaxAnisoRatio_8to1                   = 0x00000003,
-TEX_MaxAnisoRatio_16to1                  = 0x00000004,
-TEX_MaxAnisoRatio_RESERVED_5             = 0x00000005,
-TEX_MaxAnisoRatio_RESERVED_6             = 0x00000006,
-TEX_MaxAnisoRatio_RESERVED_7             = 0x00000007,
-} TEX_MAX_ANISO_RATIO;
-
-/*
- * TEX_MIP_FILTER enum
- */
-
-typedef enum TEX_MIP_FILTER {
-TEX_MipFilter_None                       = 0x00000000,
-TEX_MipFilter_Point                      = 0x00000001,
-TEX_MipFilter_Linear                     = 0x00000002,
-TEX_MipFilter_Point_Aniso_Adj            = 0x00000003,
-} TEX_MIP_FILTER;
-
-/*
- * TEX_REQUEST_SIZE enum
- */
-
-typedef enum TEX_REQUEST_SIZE {
-TEX_RequestSize_32B                      = 0x00000000,
-TEX_RequestSize_64B                      = 0x00000001,
-TEX_RequestSize_128B                     = 0x00000002,
-TEX_RequestSize_2X64B                    = 0x00000003,
-} TEX_REQUEST_SIZE;
-
-/*
- * TEX_SAMPLER_TYPE enum
- */
-
-typedef enum TEX_SAMPLER_TYPE {
-TEX_SamplerType_Invalid                  = 0x00000000,
-TEX_SamplerType_Valid                    = 0x00000001,
-} TEX_SAMPLER_TYPE;
-
-/*
- * TEX_XY_FILTER enum
- */
-
-typedef enum TEX_XY_FILTER {
-TEX_XYFilter_Point                       = 0x00000000,
-TEX_XYFilter_Linear                      = 0x00000001,
-TEX_XYFilter_AnisoPoint                  = 0x00000002,
-TEX_XYFilter_AnisoLinear                 = 0x00000003,
-} TEX_XY_FILTER;
-
-/*
- * TEX_Z_FILTER enum
- */
-
-typedef enum TEX_Z_FILTER {
-TEX_ZFilter_None                         = 0x00000000,
-TEX_ZFilter_Point                        = 0x00000001,
-TEX_ZFilter_Linear                       = 0x00000002,
-TEX_ZFilter_RESERVED_3                   = 0x00000003,
-} TEX_Z_FILTER;
-
-/*
- * VTX_CLAMP enum
- */
-
-typedef enum VTX_CLAMP {
-VTX_Clamp_ClampToZero                    = 0x00000000,
-VTX_Clamp_ClampToNAN                     = 0x00000001,
-} VTX_CLAMP;
-
-/*
- * VTX_FETCH_TYPE enum
- */
-
-typedef enum VTX_FETCH_TYPE {
-VTX_FetchType_VertexData                 = 0x00000000,
-VTX_FetchType_InstanceData               = 0x00000001,
-VTX_FetchType_NoIndexOffset              = 0x00000002,
-VTX_FetchType_RESERVED_3                 = 0x00000003,
-} VTX_FETCH_TYPE;
-
-/*
- * VTX_FORMAT_COMP_ALL enum
- */
-
-typedef enum VTX_FORMAT_COMP_ALL {
-VTX_FormatCompAll_Unsigned               = 0x00000000,
-VTX_FormatCompAll_Signed                 = 0x00000001,
-} VTX_FORMAT_COMP_ALL;
-
-/*
- * VTX_MEM_REQUEST_SIZE enum
- */
-
-typedef enum VTX_MEM_REQUEST_SIZE {
-VTX_MemRequestSize_32B                   = 0x00000000,
-VTX_MemRequestSize_64B                   = 0x00000001,
-} VTX_MEM_REQUEST_SIZE;
-
-/*
- * TVX_DATA_FORMAT enum
- */
-
-typedef enum TVX_DATA_FORMAT {
-TVX_FMT_INVALID                          = 0x00000000,
-TVX_FMT_8                                = 0x00000001,
-TVX_FMT_4_4                              = 0x00000002,
-TVX_FMT_3_3_2                            = 0x00000003,
-TVX_FMT_RESERVED_4                       = 0x00000004,
-TVX_FMT_16                               = 0x00000005,
-TVX_FMT_16_FLOAT                         = 0x00000006,
-TVX_FMT_8_8                              = 0x00000007,
-TVX_FMT_5_6_5                            = 0x00000008,
-TVX_FMT_6_5_5                            = 0x00000009,
-TVX_FMT_1_5_5_5                          = 0x0000000a,
-TVX_FMT_4_4_4_4                          = 0x0000000b,
-TVX_FMT_5_5_5_1                          = 0x0000000c,
-TVX_FMT_32                               = 0x0000000d,
-TVX_FMT_32_FLOAT                         = 0x0000000e,
-TVX_FMT_16_16                            = 0x0000000f,
-TVX_FMT_16_16_FLOAT                      = 0x00000010,
-TVX_FMT_8_24                             = 0x00000011,
-TVX_FMT_8_24_FLOAT                       = 0x00000012,
-TVX_FMT_24_8                             = 0x00000013,
-TVX_FMT_24_8_FLOAT                       = 0x00000014,
-TVX_FMT_10_11_11                         = 0x00000015,
-TVX_FMT_10_11_11_FLOAT                   = 0x00000016,
-TVX_FMT_11_11_10                         = 0x00000017,
-TVX_FMT_11_11_10_FLOAT                   = 0x00000018,
-TVX_FMT_2_10_10_10                       = 0x00000019,
-TVX_FMT_8_8_8_8                          = 0x0000001a,
-TVX_FMT_10_10_10_2                       = 0x0000001b,
-TVX_FMT_X24_8_32_FLOAT                   = 0x0000001c,
-TVX_FMT_32_32                            = 0x0000001d,
-TVX_FMT_32_32_FLOAT                      = 0x0000001e,
-TVX_FMT_16_16_16_16                      = 0x0000001f,
-TVX_FMT_16_16_16_16_FLOAT                = 0x00000020,
-TVX_FMT_RESERVED_33                      = 0x00000021,
-TVX_FMT_32_32_32_32                      = 0x00000022,
-TVX_FMT_32_32_32_32_FLOAT                = 0x00000023,
-TVX_FMT_RESERVED_36                      = 0x00000024,
-TVX_FMT_1                                = 0x00000025,
-TVX_FMT_1_REVERSED                       = 0x00000026,
-TVX_FMT_GB_GR                            = 0x00000027,
-TVX_FMT_BG_RG                            = 0x00000028,
-TVX_FMT_32_AS_8                          = 0x00000029,
-TVX_FMT_32_AS_8_8                        = 0x0000002a,
-TVX_FMT_5_9_9_9_SHAREDEXP                = 0x0000002b,
-TVX_FMT_8_8_8                            = 0x0000002c,
-TVX_FMT_16_16_16                         = 0x0000002d,
-TVX_FMT_16_16_16_FLOAT                   = 0x0000002e,
-TVX_FMT_32_32_32                         = 0x0000002f,
-TVX_FMT_32_32_32_FLOAT                   = 0x00000030,
-TVX_FMT_BC1                              = 0x00000031,
-TVX_FMT_BC2                              = 0x00000032,
-TVX_FMT_BC3                              = 0x00000033,
-TVX_FMT_BC4                              = 0x00000034,
-TVX_FMT_BC5                              = 0x00000035,
-TVX_FMT_APC0                             = 0x00000036,
-TVX_FMT_APC1                             = 0x00000037,
-TVX_FMT_APC2                             = 0x00000038,
-TVX_FMT_APC3                             = 0x00000039,
-TVX_FMT_APC4                             = 0x0000003a,
-TVX_FMT_APC5                             = 0x0000003b,
-TVX_FMT_APC6                             = 0x0000003c,
-TVX_FMT_APC7                             = 0x0000003d,
-TVX_FMT_CTX1                             = 0x0000003e,
-TVX_FMT_RESERVED_63                      = 0x0000003f,
-} TVX_DATA_FORMAT;
-
-/*
- * TVX_DST_SEL enum
- */
-
-typedef enum TVX_DST_SEL {
-TVX_DstSel_X                             = 0x00000000,
-TVX_DstSel_Y                             = 0x00000001,
-TVX_DstSel_Z                             = 0x00000002,
-TVX_DstSel_W                             = 0x00000003,
-TVX_DstSel_0f                            = 0x00000004,
-TVX_DstSel_1f                            = 0x00000005,
-TVX_DstSel_RESERVED_6                    = 0x00000006,
-TVX_DstSel_Mask                          = 0x00000007,
-} TVX_DST_SEL;
-
-/*
- * TVX_ENDIAN_SWAP enum
- */
-
-typedef enum TVX_ENDIAN_SWAP {
-TVX_EndianSwap_None                      = 0x00000000,
-TVX_EndianSwap_8in16                     = 0x00000001,
-TVX_EndianSwap_8in32                     = 0x00000002,
-TVX_EndianSwap_8in64                     = 0x00000003,
-} TVX_ENDIAN_SWAP;
-
-/*
- * TVX_INST enum
- */
-
-typedef enum TVX_INST {
-TVX_Inst_NormalVertexFetch               = 0x00000000,
-TVX_Inst_SemanticVertexFetch             = 0x00000001,
-TVX_Inst_RESERVED_2                      = 0x00000002,
-TVX_Inst_LD                              = 0x00000003,
-TVX_Inst_GetTextureResInfo               = 0x00000004,
-TVX_Inst_GetNumberOfSamples              = 0x00000005,
-TVX_Inst_GetLOD                          = 0x00000006,
-TVX_Inst_GetGradientsH                   = 0x00000007,
-TVX_Inst_GetGradientsV                   = 0x00000008,
-TVX_Inst_SetTextureOffsets               = 0x00000009,
-TVX_Inst_KeepGradients                   = 0x0000000a,
-TVX_Inst_SetGradientsH                   = 0x0000000b,
-TVX_Inst_SetGradientsV                   = 0x0000000c,
-TVX_Inst_Pass                            = 0x0000000d,
-TVX_Inst_GetBufferResInfo                = 0x0000000e,
-TVX_Inst_RESERVED_15                     = 0x0000000f,
-TVX_Inst_Sample                          = 0x00000010,
-TVX_Inst_Sample_L                        = 0x00000011,
-TVX_Inst_Sample_LB                       = 0x00000012,
-TVX_Inst_Sample_LZ                       = 0x00000013,
-TVX_Inst_Sample_G                        = 0x00000014,
-TVX_Inst_Gather4                         = 0x00000015,
-TVX_Inst_Sample_G_LB                     = 0x00000016,
-TVX_Inst_Gather4_O                       = 0x00000017,
-TVX_Inst_Sample_C                        = 0x00000018,
-TVX_Inst_Sample_C_L                      = 0x00000019,
-TVX_Inst_Sample_C_LB                     = 0x0000001a,
-TVX_Inst_Sample_C_LZ                     = 0x0000001b,
-TVX_Inst_Sample_C_G                      = 0x0000001c,
-TVX_Inst_Gather4_C                       = 0x0000001d,
-TVX_Inst_Sample_C_G_LB                   = 0x0000001e,
-TVX_Inst_Gather4_C_O                     = 0x0000001f,
-} TVX_INST;
-
-/*
- * TVX_NUM_FORMAT_ALL enum
- */
-
-typedef enum TVX_NUM_FORMAT_ALL {
-TVX_NumFormatAll_Norm                    = 0x00000000,
-TVX_NumFormatAll_Int                     = 0x00000001,
-TVX_NumFormatAll_Scaled                  = 0x00000002,
-TVX_NumFormatAll_RESERVED_3              = 0x00000003,
-} TVX_NUM_FORMAT_ALL;
-
-/*
- * TVX_SRC_SEL enum
- */
-
-typedef enum TVX_SRC_SEL {
-TVX_SrcSel_X                             = 0x00000000,
-TVX_SrcSel_Y                             = 0x00000001,
-TVX_SrcSel_Z                             = 0x00000002,
-TVX_SrcSel_W                             = 0x00000003,
-TVX_SrcSel_0f                            = 0x00000004,
-TVX_SrcSel_1f                            = 0x00000005,
-} TVX_SRC_SEL;
-
-/*
- * TVX_SRF_MODE_ALL enum
- */
-
-typedef enum TVX_SRF_MODE_ALL {
-TVX_SRFModeAll_ZCMO                      = 0x00000000,
-TVX_SRFModeAll_NZ                        = 0x00000001,
-} TVX_SRF_MODE_ALL;
-
-/*
- * TVX_TYPE enum
- */
-
-typedef enum TVX_TYPE {
-TVX_Type_InvalidTextureResource          = 0x00000000,
-TVX_Type_InvalidVertexBuffer             = 0x00000001,
-TVX_Type_ValidTextureResource            = 0x00000002,
-TVX_Type_ValidVertexBuffer               = 0x00000003,
-} TVX_TYPE;
-
-/*******************************************************
- * PA Enums
- *******************************************************/
-
-/*
- * SU_PERFCNT_SEL enum
- */
-
-typedef enum SU_PERFCNT_SEL {
-PERF_PAPC_PASX_REQ                       = 0x00000000,
-PERF_PAPC_PASX_DISABLE_PIPE              = 0x00000001,
-PERF_PAPC_PASX_FIRST_VECTOR              = 0x00000002,
-PERF_PAPC_PASX_SECOND_VECTOR             = 0x00000003,
-PERF_PAPC_PASX_FIRST_DEAD                = 0x00000004,
-PERF_PAPC_PASX_SECOND_DEAD               = 0x00000005,
-PERF_PAPC_PASX_VTX_KILL_DISCARD          = 0x00000006,
-PERF_PAPC_PASX_VTX_NAN_DISCARD           = 0x00000007,
-PERF_PAPC_PA_INPUT_PRIM                  = 0x00000008,
-PERF_PAPC_PA_INPUT_NULL_PRIM             = 0x00000009,
-PERF_PAPC_PA_INPUT_EVENT_FLAG            = 0x0000000a,
-PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT       = 0x0000000b,
-PERF_PAPC_PA_INPUT_END_OF_PACKET         = 0x0000000c,
-PERF_PAPC_PA_INPUT_EXTENDED_EVENT        = 0x0000000d,
-PERF_PAPC_CLPR_CULL_PRIM                 = 0x0000000e,
-PERF_PAPC_CLPR_VVUCP_CULL_PRIM           = 0x0000000f,
-PERF_PAPC_CLPR_VV_CULL_PRIM              = 0x00000010,
-PERF_PAPC_CLPR_UCP_CULL_PRIM             = 0x00000011,
-PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM        = 0x00000012,
-PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM         = 0x00000013,
-PERF_PAPC_CLPR_CULL_TO_NULL_PRIM         = 0x00000014,
-PERF_PAPC_CLPR_VVUCP_CLIP_PRIM           = 0x00000015,
-PERF_PAPC_CLPR_VV_CLIP_PRIM              = 0x00000016,
-PERF_PAPC_CLPR_UCP_CLIP_PRIM             = 0x00000017,
-PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE      = 0x00000018,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_1          = 0x00000019,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_2          = 0x0000001a,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_3          = 0x0000001b,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_4          = 0x0000001c,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_5_8        = 0x0000001d,
-PERF_PAPC_CLPR_CLIP_PLANE_CNT_9_12       = 0x0000001e,
-PERF_PAPC_CLPR_CLIP_PLANE_NEAR           = 0x0000001f,
-PERF_PAPC_CLPR_CLIP_PLANE_FAR            = 0x00000020,
-PERF_PAPC_CLPR_CLIP_PLANE_LEFT           = 0x00000021,
-PERF_PAPC_CLPR_CLIP_PLANE_RIGHT          = 0x00000022,
-PERF_PAPC_CLPR_CLIP_PLANE_TOP            = 0x00000023,
-PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM         = 0x00000024,
-PERF_PAPC_CLPR_GSC_KILL_CULL_PRIM        = 0x00000025,
-PERF_PAPC_CLPR_RASTER_KILL_CULL_PRIM     = 0x00000026,
-PERF_PAPC_CLSM_NULL_PRIM                 = 0x00000027,
-PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM      = 0x00000028,
-PERF_PAPC_CLSM_CULL_TO_NULL_PRIM         = 0x00000029,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_1            = 0x0000002a,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_2            = 0x0000002b,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_3            = 0x0000002c,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_4            = 0x0000002d,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_5_8          = 0x0000002e,
-PERF_PAPC_CLSM_OUT_PRIM_CNT_9_13         = 0x0000002f,
-PERF_PAPC_CLIPGA_VTE_KILL_PRIM           = 0x00000030,
-PERF_PAPC_SU_INPUT_PRIM                  = 0x00000031,
-PERF_PAPC_SU_INPUT_CLIP_PRIM             = 0x00000032,
-PERF_PAPC_SU_INPUT_NULL_PRIM             = 0x00000033,
-PERF_PAPC_SU_INPUT_PRIM_DUAL             = 0x00000034,
-PERF_PAPC_SU_INPUT_CLIP_PRIM_DUAL        = 0x00000035,
-PERF_PAPC_SU_ZERO_AREA_CULL_PRIM         = 0x00000036,
-PERF_PAPC_SU_BACK_FACE_CULL_PRIM         = 0x00000037,
-PERF_PAPC_SU_FRONT_FACE_CULL_PRIM        = 0x00000038,
-PERF_PAPC_SU_POLYMODE_FACE_CULL          = 0x00000039,
-PERF_PAPC_SU_POLYMODE_BACK_CULL          = 0x0000003a,
-PERF_PAPC_SU_POLYMODE_FRONT_CULL         = 0x0000003b,
-PERF_PAPC_SU_POLYMODE_INVALID_FILL       = 0x0000003c,
-PERF_PAPC_SU_OUTPUT_PRIM                 = 0x0000003d,
-PERF_PAPC_SU_OUTPUT_CLIP_PRIM            = 0x0000003e,
-PERF_PAPC_SU_OUTPUT_NULL_PRIM            = 0x0000003f,
-PERF_PAPC_SU_OUTPUT_EVENT_FLAG           = 0x00000040,
-PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT      = 0x00000041,
-PERF_PAPC_SU_OUTPUT_END_OF_PACKET        = 0x00000042,
-PERF_PAPC_SU_OUTPUT_POLYMODE_FACE        = 0x00000043,
-PERF_PAPC_SU_OUTPUT_POLYMODE_BACK        = 0x00000044,
-PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT       = 0x00000045,
-PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE      = 0x00000046,
-PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK      = 0x00000047,
-PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT     = 0x00000048,
-PERF_PAPC_SU_OUTPUT_PRIM_DUAL            = 0x00000049,
-PERF_PAPC_SU_OUTPUT_CLIP_PRIM_DUAL       = 0x0000004a,
-PERF_PAPC_SU_OUTPUT_POLYMODE_DUAL        = 0x0000004b,
-PERF_PAPC_SU_OUTPUT_CLIP_POLYMODE_DUAL   = 0x0000004c,
-PERF_PAPC_PASX_REQ_IDLE                  = 0x0000004d,
-PERF_PAPC_PASX_REQ_BUSY                  = 0x0000004e,
-PERF_PAPC_PASX_REQ_STALLED               = 0x0000004f,
-PERF_PAPC_PASX_REC_IDLE                  = 0x00000050,
-PERF_PAPC_PASX_REC_BUSY                  = 0x00000051,
-PERF_PAPC_PASX_REC_STARVED_SX            = 0x00000052,
-PERF_PAPC_PASX_REC_STALLED               = 0x00000053,
-PERF_PAPC_PASX_REC_STALLED_POS_MEM       = 0x00000054,
-PERF_PAPC_PASX_REC_STALLED_CCGSM_IN      = 0x00000055,
-PERF_PAPC_CCGSM_IDLE                     = 0x00000056,
-PERF_PAPC_CCGSM_BUSY                     = 0x00000057,
-PERF_PAPC_CCGSM_STALLED                  = 0x00000058,
-PERF_PAPC_CLPRIM_IDLE                    = 0x00000059,
-PERF_PAPC_CLPRIM_BUSY                    = 0x0000005a,
-PERF_PAPC_CLPRIM_STALLED                 = 0x0000005b,
-PERF_PAPC_CLPRIM_STARVED_CCGSM           = 0x0000005c,
-PERF_PAPC_CLIPSM_IDLE                    = 0x0000005d,
-PERF_PAPC_CLIPSM_BUSY                    = 0x0000005e,
-PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH     = 0x0000005f,
-PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ       = 0x00000060,
-PERF_PAPC_CLIPSM_WAIT_CLIPGA             = 0x00000061,
-PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP     = 0x00000062,
-PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM         = 0x00000063,
-PERF_PAPC_CLIPGA_IDLE                    = 0x00000064,
-PERF_PAPC_CLIPGA_BUSY                    = 0x00000065,
-PERF_PAPC_CLIPGA_STARVED_VTE_CLIP        = 0x00000066,
-PERF_PAPC_CLIPGA_STALLED                 = 0x00000067,
-PERF_PAPC_CLIP_IDLE                      = 0x00000068,
-PERF_PAPC_CLIP_BUSY                      = 0x00000069,
-PERF_PAPC_SU_IDLE                        = 0x0000006a,
-PERF_PAPC_SU_BUSY                        = 0x0000006b,
-PERF_PAPC_SU_STARVED_CLIP                = 0x0000006c,
-PERF_PAPC_SU_STALLED_SC                  = 0x0000006d,
-PERF_PAPC_CL_DYN_SCLK_VLD                = 0x0000006e,
-PERF_PAPC_SU_DYN_SCLK_VLD                = 0x0000006f,
-PERF_PAPC_PA_REG_SCLK_VLD                = 0x00000070,
-PERF_PAPC_SU_MULTI_GPU_PRIM_FILTER_CULL  = 0x00000071,
-PERF_PAPC_PASX_SE0_REQ                   = 0x00000072,
-PERF_PAPC_PASX_SE1_REQ                   = 0x00000073,
-PERF_PAPC_PASX_SE0_FIRST_VECTOR          = 0x00000074,
-PERF_PAPC_PASX_SE0_SECOND_VECTOR         = 0x00000075,
-PERF_PAPC_PASX_SE1_FIRST_VECTOR          = 0x00000076,
-PERF_PAPC_PASX_SE1_SECOND_VECTOR         = 0x00000077,
-PERF_PAPC_SU_SE0_PRIM_FILTER_CULL        = 0x00000078,
-PERF_PAPC_SU_SE1_PRIM_FILTER_CULL        = 0x00000079,
-PERF_PAPC_SU_SE01_PRIM_FILTER_CULL       = 0x0000007a,
-PERF_PAPC_SU_SE0_OUTPUT_PRIM             = 0x0000007b,
-PERF_PAPC_SU_SE1_OUTPUT_PRIM             = 0x0000007c,
-PERF_PAPC_SU_SE01_OUTPUT_PRIM            = 0x0000007d,
-PERF_PAPC_SU_SE0_OUTPUT_NULL_PRIM        = 0x0000007e,
-PERF_PAPC_SU_SE1_OUTPUT_NULL_PRIM        = 0x0000007f,
-PERF_PAPC_SU_SE01_OUTPUT_NULL_PRIM       = 0x00000080,
-PERF_PAPC_SU_SE0_OUTPUT_FIRST_PRIM_SLOT  = 0x00000081,
-PERF_PAPC_SU_SE1_OUTPUT_FIRST_PRIM_SLOT  = 0x00000082,
-PERF_PAPC_SU_SE0_STALLED_SC              = 0x00000083,
-PERF_PAPC_SU_SE1_STALLED_SC              = 0x00000084,
-PERF_PAPC_SU_SE01_STALLED_SC             = 0x00000085,
-PERF_PAPC_CLSM_CLIPPING_PRIM             = 0x00000086,
-PERF_PAPC_SU_CULLED_PRIM                 = 0x00000087,
-PERF_PAPC_SU_OUTPUT_EOPG                 = 0x00000088,
-PERF_PAPC_SU_SE2_PRIM_FILTER_CULL        = 0x00000089,
-PERF_PAPC_SU_SE3_PRIM_FILTER_CULL        = 0x0000008a,
-PERF_PAPC_SU_SE2_OUTPUT_PRIM             = 0x0000008b,
-PERF_PAPC_SU_SE3_OUTPUT_PRIM             = 0x0000008c,
-PERF_PAPC_SU_SE2_OUTPUT_NULL_PRIM        = 0x0000008d,
-PERF_PAPC_SU_SE3_OUTPUT_NULL_PRIM        = 0x0000008e,
-PERF_PAPC_SU_SE0_OUTPUT_END_OF_PACKET    = 0x0000008f,
-PERF_PAPC_SU_SE1_OUTPUT_END_OF_PACKET    = 0x00000090,
-PERF_PAPC_SU_SE2_OUTPUT_END_OF_PACKET    = 0x00000091,
-PERF_PAPC_SU_SE3_OUTPUT_END_OF_PACKET    = 0x00000092,
-PERF_PAPC_SU_SE0_OUTPUT_EOPG             = 0x00000093,
-PERF_PAPC_SU_SE1_OUTPUT_EOPG             = 0x00000094,
-PERF_PAPC_SU_SE2_OUTPUT_EOPG             = 0x00000095,
-PERF_PAPC_SU_SE3_OUTPUT_EOPG             = 0x00000096,
-PERF_PAPC_SU_SE2_STALLED_SC              = 0x00000097,
-PERF_PAPC_SU_SE3_STALLED_SC              = 0x00000098,
-} SU_PERFCNT_SEL;
-
-/*
- * SC_PERFCNT_SEL enum
- */
-
-typedef enum SC_PERFCNT_SEL {
-SC_SRPS_WINDOW_VALID                     = 0x00000000,
-SC_PSSW_WINDOW_VALID                     = 0x00000001,
-SC_TPQZ_WINDOW_VALID                     = 0x00000002,
-SC_QZQP_WINDOW_VALID                     = 0x00000003,
-SC_TRPK_WINDOW_VALID                     = 0x00000004,
-SC_SRPS_WINDOW_VALID_BUSY                = 0x00000005,
-SC_PSSW_WINDOW_VALID_BUSY                = 0x00000006,
-SC_TPQZ_WINDOW_VALID_BUSY                = 0x00000007,
-SC_QZQP_WINDOW_VALID_BUSY                = 0x00000008,
-SC_TRPK_WINDOW_VALID_BUSY                = 0x00000009,
-SC_STARVED_BY_PA                         = 0x0000000a,
-SC_STALLED_BY_PRIMFIFO                   = 0x0000000b,
-SC_STALLED_BY_DB_TILE                    = 0x0000000c,
-SC_STARVED_BY_DB_TILE                    = 0x0000000d,
-SC_STALLED_BY_TILEORDERFIFO              = 0x0000000e,
-SC_STALLED_BY_TILEFIFO                   = 0x0000000f,
-SC_STALLED_BY_DB_QUAD                    = 0x00000010,
-SC_STARVED_BY_DB_QUAD                    = 0x00000011,
-SC_STALLED_BY_QUADFIFO                   = 0x00000012,
-SC_STALLED_BY_BCI                        = 0x00000013,
-SC_STALLED_BY_SPI                        = 0x00000014,
-SC_SCISSOR_DISCARD                       = 0x00000015,
-SC_BB_DISCARD                            = 0x00000016,
-SC_SUPERTILE_COUNT                       = 0x00000017,
-SC_SUPERTILE_PER_PRIM_H0                 = 0x00000018,
-SC_SUPERTILE_PER_PRIM_H1                 = 0x00000019,
-SC_SUPERTILE_PER_PRIM_H2                 = 0x0000001a,
-SC_SUPERTILE_PER_PRIM_H3                 = 0x0000001b,
-SC_SUPERTILE_PER_PRIM_H4                 = 0x0000001c,
-SC_SUPERTILE_PER_PRIM_H5                 = 0x0000001d,
-SC_SUPERTILE_PER_PRIM_H6                 = 0x0000001e,
-SC_SUPERTILE_PER_PRIM_H7                 = 0x0000001f,
-SC_SUPERTILE_PER_PRIM_H8                 = 0x00000020,
-SC_SUPERTILE_PER_PRIM_H9                 = 0x00000021,
-SC_SUPERTILE_PER_PRIM_H10                = 0x00000022,
-SC_SUPERTILE_PER_PRIM_H11                = 0x00000023,
-SC_SUPERTILE_PER_PRIM_H12                = 0x00000024,
-SC_SUPERTILE_PER_PRIM_H13                = 0x00000025,
-SC_SUPERTILE_PER_PRIM_H14                = 0x00000026,
-SC_SUPERTILE_PER_PRIM_H15                = 0x00000027,
-SC_SUPERTILE_PER_PRIM_H16                = 0x00000028,
-SC_TILE_PER_PRIM_H0                      = 0x00000029,
-SC_TILE_PER_PRIM_H1                      = 0x0000002a,
-SC_TILE_PER_PRIM_H2                      = 0x0000002b,
-SC_TILE_PER_PRIM_H3                      = 0x0000002c,
-SC_TILE_PER_PRIM_H4                      = 0x0000002d,
-SC_TILE_PER_PRIM_H5                      = 0x0000002e,
-SC_TILE_PER_PRIM_H6                      = 0x0000002f,
-SC_TILE_PER_PRIM_H7                      = 0x00000030,
-SC_TILE_PER_PRIM_H8                      = 0x00000031,
-SC_TILE_PER_PRIM_H9                      = 0x00000032,
-SC_TILE_PER_PRIM_H10                     = 0x00000033,
-SC_TILE_PER_PRIM_H11                     = 0x00000034,
-SC_TILE_PER_PRIM_H12                     = 0x00000035,
-SC_TILE_PER_PRIM_H13                     = 0x00000036,
-SC_TILE_PER_PRIM_H14                     = 0x00000037,
-SC_TILE_PER_PRIM_H15                     = 0x00000038,
-SC_TILE_PER_PRIM_H16                     = 0x00000039,
-SC_TILE_PER_SUPERTILE_H0                 = 0x0000003a,
-SC_TILE_PER_SUPERTILE_H1                 = 0x0000003b,
-SC_TILE_PER_SUPERTILE_H2                 = 0x0000003c,
-SC_TILE_PER_SUPERTILE_H3                 = 0x0000003d,
-SC_TILE_PER_SUPERTILE_H4                 = 0x0000003e,
-SC_TILE_PER_SUPERTILE_H5                 = 0x0000003f,
-SC_TILE_PER_SUPERTILE_H6                 = 0x00000040,
-SC_TILE_PER_SUPERTILE_H7                 = 0x00000041,
-SC_TILE_PER_SUPERTILE_H8                 = 0x00000042,
-SC_TILE_PER_SUPERTILE_H9                 = 0x00000043,
-SC_TILE_PER_SUPERTILE_H10                = 0x00000044,
-SC_TILE_PER_SUPERTILE_H11                = 0x00000045,
-SC_TILE_PER_SUPERTILE_H12                = 0x00000046,
-SC_TILE_PER_SUPERTILE_H13                = 0x00000047,
-SC_TILE_PER_SUPERTILE_H14                = 0x00000048,
-SC_TILE_PER_SUPERTILE_H15                = 0x00000049,
-SC_TILE_PER_SUPERTILE_H16                = 0x0000004a,
-SC_TILE_PICKED_H1                        = 0x0000004b,
-SC_TILE_PICKED_H2                        = 0x0000004c,
-SC_TILE_PICKED_H3                        = 0x0000004d,
-SC_TILE_PICKED_H4                        = 0x0000004e,
-SC_QZ0_TILE_COUNT                        = 0x0000004f,
-SC_QZ1_TILE_COUNT                        = 0x00000050,
-SC_QZ2_TILE_COUNT                        = 0x00000051,
-SC_QZ3_TILE_COUNT                        = 0x00000052,
-SC_QZ0_TILE_COVERED_COUNT                = 0x00000053,
-SC_QZ1_TILE_COVERED_COUNT                = 0x00000054,
-SC_QZ2_TILE_COVERED_COUNT                = 0x00000055,
-SC_QZ3_TILE_COVERED_COUNT                = 0x00000056,
-SC_QZ0_TILE_NOT_COVERED_COUNT            = 0x00000057,
-SC_QZ1_TILE_NOT_COVERED_COUNT            = 0x00000058,
-SC_QZ2_TILE_NOT_COVERED_COUNT            = 0x00000059,
-SC_QZ3_TILE_NOT_COVERED_COUNT            = 0x0000005a,
-SC_QZ0_QUAD_PER_TILE_H0                  = 0x0000005b,
-SC_QZ0_QUAD_PER_TILE_H1                  = 0x0000005c,
-SC_QZ0_QUAD_PER_TILE_H2                  = 0x0000005d,
-SC_QZ0_QUAD_PER_TILE_H3                  = 0x0000005e,
-SC_QZ0_QUAD_PER_TILE_H4                  = 0x0000005f,
-SC_QZ0_QUAD_PER_TILE_H5                  = 0x00000060,
-SC_QZ0_QUAD_PER_TILE_H6                  = 0x00000061,
-SC_QZ0_QUAD_PER_TILE_H7                  = 0x00000062,
-SC_QZ0_QUAD_PER_TILE_H8                  = 0x00000063,
-SC_QZ0_QUAD_PER_TILE_H9                  = 0x00000064,
-SC_QZ0_QUAD_PER_TILE_H10                 = 0x00000065,
-SC_QZ0_QUAD_PER_TILE_H11                 = 0x00000066,
-SC_QZ0_QUAD_PER_TILE_H12                 = 0x00000067,
-SC_QZ0_QUAD_PER_TILE_H13                 = 0x00000068,
-SC_QZ0_QUAD_PER_TILE_H14                 = 0x00000069,
-SC_QZ0_QUAD_PER_TILE_H15                 = 0x0000006a,
-SC_QZ0_QUAD_PER_TILE_H16                 = 0x0000006b,
-SC_QZ1_QUAD_PER_TILE_H0                  = 0x0000006c,
-SC_QZ1_QUAD_PER_TILE_H1                  = 0x0000006d,
-SC_QZ1_QUAD_PER_TILE_H2                  = 0x0000006e,
-SC_QZ1_QUAD_PER_TILE_H3                  = 0x0000006f,
-SC_QZ1_QUAD_PER_TILE_H4                  = 0x00000070,
-SC_QZ1_QUAD_PER_TILE_H5                  = 0x00000071,
-SC_QZ1_QUAD_PER_TILE_H6                  = 0x00000072,
-SC_QZ1_QUAD_PER_TILE_H7                  = 0x00000073,
-SC_QZ1_QUAD_PER_TILE_H8                  = 0x00000074,
-SC_QZ1_QUAD_PER_TILE_H9                  = 0x00000075,
-SC_QZ1_QUAD_PER_TILE_H10                 = 0x00000076,
-SC_QZ1_QUAD_PER_TILE_H11                 = 0x00000077,
-SC_QZ1_QUAD_PER_TILE_H12                 = 0x00000078,
-SC_QZ1_QUAD_PER_TILE_H13                 = 0x00000079,
-SC_QZ1_QUAD_PER_TILE_H14                 = 0x0000007a,
-SC_QZ1_QUAD_PER_TILE_H15                 = 0x0000007b,
-SC_QZ1_QUAD_PER_TILE_H16                 = 0x0000007c,
-SC_QZ2_QUAD_PER_TILE_H0                  = 0x0000007d,
-SC_QZ2_QUAD_PER_TILE_H1                  = 0x0000007e,
-SC_QZ2_QUAD_PER_TILE_H2                  = 0x0000007f,
-SC_QZ2_QUAD_PER_TILE_H3                  = 0x00000080,
-SC_QZ2_QUAD_PER_TILE_H4                  = 0x00000081,
-SC_QZ2_QUAD_PER_TILE_H5                  = 0x00000082,
-SC_QZ2_QUAD_PER_TILE_H6                  = 0x00000083,
-SC_QZ2_QUAD_PER_TILE_H7                  = 0x00000084,
-SC_QZ2_QUAD_PER_TILE_H8                  = 0x00000085,
-SC_QZ2_QUAD_PER_TILE_H9                  = 0x00000086,
-SC_QZ2_QUAD_PER_TILE_H10                 = 0x00000087,
-SC_QZ2_QUAD_PER_TILE_H11                 = 0x00000088,
-SC_QZ2_QUAD_PER_TILE_H12                 = 0x00000089,
-SC_QZ2_QUAD_PER_TILE_H13                 = 0x0000008a,
-SC_QZ2_QUAD_PER_TILE_H14                 = 0x0000008b,
-SC_QZ2_QUAD_PER_TILE_H15                 = 0x0000008c,
-SC_QZ2_QUAD_PER_TILE_H16                 = 0x0000008d,
-SC_QZ3_QUAD_PER_TILE_H0                  = 0x0000008e,
-SC_QZ3_QUAD_PER_TILE_H1                  = 0x0000008f,
-SC_QZ3_QUAD_PER_TILE_H2                  = 0x00000090,
-SC_QZ3_QUAD_PER_TILE_H3                  = 0x00000091,
-SC_QZ3_QUAD_PER_TILE_H4                  = 0x00000092,
-SC_QZ3_QUAD_PER_TILE_H5                  = 0x00000093,
-SC_QZ3_QUAD_PER_TILE_H6                  = 0x00000094,
-SC_QZ3_QUAD_PER_TILE_H7                  = 0x00000095,
-SC_QZ3_QUAD_PER_TILE_H8                  = 0x00000096,
-SC_QZ3_QUAD_PER_TILE_H9                  = 0x00000097,
-SC_QZ3_QUAD_PER_TILE_H10                 = 0x00000098,
-SC_QZ3_QUAD_PER_TILE_H11                 = 0x00000099,
-SC_QZ3_QUAD_PER_TILE_H12                 = 0x0000009a,
-SC_QZ3_QUAD_PER_TILE_H13                 = 0x0000009b,
-SC_QZ3_QUAD_PER_TILE_H14                 = 0x0000009c,
-SC_QZ3_QUAD_PER_TILE_H15                 = 0x0000009d,
-SC_QZ3_QUAD_PER_TILE_H16                 = 0x0000009e,
-SC_QZ0_QUAD_COUNT                        = 0x0000009f,
-SC_QZ1_QUAD_COUNT                        = 0x000000a0,
-SC_QZ2_QUAD_COUNT                        = 0x000000a1,
-SC_QZ3_QUAD_COUNT                        = 0x000000a2,
-SC_P0_HIZ_TILE_COUNT                     = 0x000000a3,
-SC_P1_HIZ_TILE_COUNT                     = 0x000000a4,
-SC_P2_HIZ_TILE_COUNT                     = 0x000000a5,
-SC_P3_HIZ_TILE_COUNT                     = 0x000000a6,
-SC_P0_HIZ_QUAD_PER_TILE_H0               = 0x000000a7,
-SC_P0_HIZ_QUAD_PER_TILE_H1               = 0x000000a8,
-SC_P0_HIZ_QUAD_PER_TILE_H2               = 0x000000a9,
-SC_P0_HIZ_QUAD_PER_TILE_H3               = 0x000000aa,
-SC_P0_HIZ_QUAD_PER_TILE_H4               = 0x000000ab,
-SC_P0_HIZ_QUAD_PER_TILE_H5               = 0x000000ac,
-SC_P0_HIZ_QUAD_PER_TILE_H6               = 0x000000ad,
-SC_P0_HIZ_QUAD_PER_TILE_H7               = 0x000000ae,
-SC_P0_HIZ_QUAD_PER_TILE_H8               = 0x000000af,
-SC_P0_HIZ_QUAD_PER_TILE_H9               = 0x000000b0,
-SC_P0_HIZ_QUAD_PER_TILE_H10              = 0x000000b1,
-SC_P0_HIZ_QUAD_PER_TILE_H11              = 0x000000b2,
-SC_P0_HIZ_QUAD_PER_TILE_H12              = 0x000000b3,
-SC_P0_HIZ_QUAD_PER_TILE_H13              = 0x000000b4,
-SC_P0_HIZ_QUAD_PER_TILE_H14              = 0x000000b5,
-SC_P0_HIZ_QUAD_PER_TILE_H15              = 0x000000b6,
-SC_P0_HIZ_QUAD_PER_TILE_H16              = 0x000000b7,
-SC_P1_HIZ_QUAD_PER_TILE_H0               = 0x000000b8,
-SC_P1_HIZ_QUAD_PER_TILE_H1               = 0x000000b9,
-SC_P1_HIZ_QUAD_PER_TILE_H2               = 0x000000ba,
-SC_P1_HIZ_QUAD_PER_TILE_H3               = 0x000000bb,
-SC_P1_HIZ_QUAD_PER_TILE_H4               = 0x000000bc,
-SC_P1_HIZ_QUAD_PER_TILE_H5               = 0x000000bd,
-SC_P1_HIZ_QUAD_PER_TILE_H6               = 0x000000be,
-SC_P1_HIZ_QUAD_PER_TILE_H7               = 0x000000bf,
-SC_P1_HIZ_QUAD_PER_TILE_H8               = 0x000000c0,
-SC_P1_HIZ_QUAD_PER_TILE_H9               = 0x000000c1,
-SC_P1_HIZ_QUAD_PER_TILE_H10              = 0x000000c2,
-SC_P1_HIZ_QUAD_PER_TILE_H11              = 0x000000c3,
-SC_P1_HIZ_QUAD_PER_TILE_H12              = 0x000000c4,
-SC_P1_HIZ_QUAD_PER_TILE_H13              = 0x000000c5,
-SC_P1_HIZ_QUAD_PER_TILE_H14              = 0x000000c6,
-SC_P1_HIZ_QUAD_PER_TILE_H15              = 0x000000c7,
-SC_P1_HIZ_QUAD_PER_TILE_H16              = 0x000000c8,
-SC_P2_HIZ_QUAD_PER_TILE_H0               = 0x000000c9,
-SC_P2_HIZ_QUAD_PER_TILE_H1               = 0x000000ca,
-SC_P2_HIZ_QUAD_PER_TILE_H2               = 0x000000cb,
-SC_P2_HIZ_QUAD_PER_TILE_H3               = 0x000000cc,
-SC_P2_HIZ_QUAD_PER_TILE_H4               = 0x000000cd,
-SC_P2_HIZ_QUAD_PER_TILE_H5               = 0x000000ce,
-SC_P2_HIZ_QUAD_PER_TILE_H6               = 0x000000cf,
-SC_P2_HIZ_QUAD_PER_TILE_H7               = 0x000000d0,
-SC_P2_HIZ_QUAD_PER_TILE_H8               = 0x000000d1,
-SC_P2_HIZ_QUAD_PER_TILE_H9               = 0x000000d2,
-SC_P2_HIZ_QUAD_PER_TILE_H10              = 0x000000d3,
-SC_P2_HIZ_QUAD_PER_TILE_H11              = 0x000000d4,
-SC_P2_HIZ_QUAD_PER_TILE_H12              = 0x000000d5,
-SC_P2_HIZ_QUAD_PER_TILE_H13              = 0x000000d6,
-SC_P2_HIZ_QUAD_PER_TILE_H14              = 0x000000d7,
-SC_P2_HIZ_QUAD_PER_TILE_H15              = 0x000000d8,
-SC_P2_HIZ_QUAD_PER_TILE_H16              = 0x000000d9,
-SC_P3_HIZ_QUAD_PER_TILE_H0               = 0x000000da,
-SC_P3_HIZ_QUAD_PER_TILE_H1               = 0x000000db,
-SC_P3_HIZ_QUAD_PER_TILE_H2               = 0x000000dc,
-SC_P3_HIZ_QUAD_PER_TILE_H3               = 0x000000dd,
-SC_P3_HIZ_QUAD_PER_TILE_H4               = 0x000000de,
-SC_P3_HIZ_QUAD_PER_TILE_H5               = 0x000000df,
-SC_P3_HIZ_QUAD_PER_TILE_H6               = 0x000000e0,
-SC_P3_HIZ_QUAD_PER_TILE_H7               = 0x000000e1,
-SC_P3_HIZ_QUAD_PER_TILE_H8               = 0x000000e2,
-SC_P3_HIZ_QUAD_PER_TILE_H9               = 0x000000e3,
-SC_P3_HIZ_QUAD_PER_TILE_H10              = 0x000000e4,
-SC_P3_HIZ_QUAD_PER_TILE_H11              = 0x000000e5,
-SC_P3_HIZ_QUAD_PER_TILE_H12              = 0x000000e6,
-SC_P3_HIZ_QUAD_PER_TILE_H13              = 0x000000e7,
-SC_P3_HIZ_QUAD_PER_TILE_H14              = 0x000000e8,
-SC_P3_HIZ_QUAD_PER_TILE_H15              = 0x000000e9,
-SC_P3_HIZ_QUAD_PER_TILE_H16              = 0x000000ea,
-SC_P0_HIZ_QUAD_COUNT                     = 0x000000eb,
-SC_P1_HIZ_QUAD_COUNT                     = 0x000000ec,
-SC_P2_HIZ_QUAD_COUNT                     = 0x000000ed,
-SC_P3_HIZ_QUAD_COUNT                     = 0x000000ee,
-SC_P0_DETAIL_QUAD_COUNT                  = 0x000000ef,
-SC_P1_DETAIL_QUAD_COUNT                  = 0x000000f0,
-SC_P2_DETAIL_QUAD_COUNT                  = 0x000000f1,
-SC_P3_DETAIL_QUAD_COUNT                  = 0x000000f2,
-SC_P0_DETAIL_QUAD_WITH_1_PIX             = 0x000000f3,
-SC_P0_DETAIL_QUAD_WITH_2_PIX             = 0x000000f4,
-SC_P0_DETAIL_QUAD_WITH_3_PIX             = 0x000000f5,
-SC_P0_DETAIL_QUAD_WITH_4_PIX             = 0x000000f6,
-SC_P1_DETAIL_QUAD_WITH_1_PIX             = 0x000000f7,
-SC_P1_DETAIL_QUAD_WITH_2_PIX             = 0x000000f8,
-SC_P1_DETAIL_QUAD_WITH_3_PIX             = 0x000000f9,
-SC_P1_DETAIL_QUAD_WITH_4_PIX             = 0x000000fa,
-SC_P2_DETAIL_QUAD_WITH_1_PIX             = 0x000000fb,
-SC_P2_DETAIL_QUAD_WITH_2_PIX             = 0x000000fc,
-SC_P2_DETAIL_QUAD_WITH_3_PIX             = 0x000000fd,
-SC_P2_DETAIL_QUAD_WITH_4_PIX             = 0x000000fe,
-SC_P3_DETAIL_QUAD_WITH_1_PIX             = 0x000000ff,
-SC_P3_DETAIL_QUAD_WITH_2_PIX             = 0x00000100,
-SC_P3_DETAIL_QUAD_WITH_3_PIX             = 0x00000101,
-SC_P3_DETAIL_QUAD_WITH_4_PIX             = 0x00000102,
-SC_EARLYZ_QUAD_COUNT                     = 0x00000103,
-SC_EARLYZ_QUAD_WITH_1_PIX                = 0x00000104,
-SC_EARLYZ_QUAD_WITH_2_PIX                = 0x00000105,
-SC_EARLYZ_QUAD_WITH_3_PIX                = 0x00000106,
-SC_EARLYZ_QUAD_WITH_4_PIX                = 0x00000107,
-SC_PKR_QUAD_PER_ROW_H1                   = 0x00000108,
-SC_PKR_QUAD_PER_ROW_H2                   = 0x00000109,
-SC_PKR_4X2_QUAD_SPLIT                    = 0x0000010a,
-SC_PKR_4X2_FILL_QUAD                     = 0x0000010b,
-SC_PKR_END_OF_VECTOR                     = 0x0000010c,
-SC_PKR_CONTROL_XFER                      = 0x0000010d,
-SC_PKR_DBHANG_FORCE_EOV                  = 0x0000010e,
-SC_REG_SCLK_BUSY                         = 0x0000010f,
-SC_GRP0_DYN_SCLK_BUSY                    = 0x00000110,
-SC_GRP1_DYN_SCLK_BUSY                    = 0x00000111,
-SC_GRP2_DYN_SCLK_BUSY                    = 0x00000112,
-SC_GRP3_DYN_SCLK_BUSY                    = 0x00000113,
-SC_GRP4_DYN_SCLK_BUSY                    = 0x00000114,
-SC_PA0_SC_DATA_FIFO_RD                   = 0x00000115,
-SC_PA0_SC_DATA_FIFO_WE                   = 0x00000116,
-SC_PA1_SC_DATA_FIFO_RD                   = 0x00000117,
-SC_PA1_SC_DATA_FIFO_WE                   = 0x00000118,
-SC_PS_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES   = 0x00000119,
-SC_PS_ARB_XFC_ONLY_PRIM_CYCLES           = 0x0000011a,
-SC_PS_ARB_XFC_ONLY_ONE_INC_PER_PRIM      = 0x0000011b,
-SC_PS_ARB_STALLED_FROM_BELOW             = 0x0000011c,
-SC_PS_ARB_STARVED_FROM_ABOVE             = 0x0000011d,
-SC_PS_ARB_SC_BUSY                        = 0x0000011e,
-SC_PS_ARB_PA_SC_BUSY                     = 0x0000011f,
-SC_PA2_SC_DATA_FIFO_RD                   = 0x00000120,
-SC_PA2_SC_DATA_FIFO_WE                   = 0x00000121,
-SC_PA3_SC_DATA_FIFO_RD                   = 0x00000122,
-SC_PA3_SC_DATA_FIFO_WE                   = 0x00000123,
-SC_PA_SC_DEALLOC_0_0_WE                  = 0x00000124,
-SC_PA_SC_DEALLOC_0_1_WE                  = 0x00000125,
-SC_PA_SC_DEALLOC_1_0_WE                  = 0x00000126,
-SC_PA_SC_DEALLOC_1_1_WE                  = 0x00000127,
-SC_PA_SC_DEALLOC_2_0_WE                  = 0x00000128,
-SC_PA_SC_DEALLOC_2_1_WE                  = 0x00000129,
-SC_PA_SC_DEALLOC_3_0_WE                  = 0x0000012a,
-SC_PA_SC_DEALLOC_3_1_WE                  = 0x0000012b,
-SC_PA0_SC_EOP_WE                         = 0x0000012c,
-SC_PA0_SC_EOPG_WE                        = 0x0000012d,
-SC_PA0_SC_EVENT_WE                       = 0x0000012e,
-SC_PA1_SC_EOP_WE                         = 0x0000012f,
-SC_PA1_SC_EOPG_WE                        = 0x00000130,
-SC_PA1_SC_EVENT_WE                       = 0x00000131,
-SC_PA2_SC_EOP_WE                         = 0x00000132,
-SC_PA2_SC_EOPG_WE                        = 0x00000133,
-SC_PA2_SC_EVENT_WE                       = 0x00000134,
-SC_PA3_SC_EOP_WE                         = 0x00000135,
-SC_PA3_SC_EOPG_WE                        = 0x00000136,
-SC_PA3_SC_EVENT_WE                       = 0x00000137,
-SC_PS_ARB_OOO_THRESHOLD_SWITCH_TO_DESIRED_FIFO  = 0x00000138,
-SC_PS_ARB_OOO_FIFO_EMPTY_SWITCH          = 0x00000139,
-SC_PS_ARB_NULL_PRIM_BUBBLE_POP           = 0x0000013a,
-SC_PS_ARB_EOP_POP_SYNC_POP               = 0x0000013b,
-SC_PS_ARB_EVENT_SYNC_POP                 = 0x0000013c,
-SC_SC_PS_ENG_MULTICYCLE_BUBBLE           = 0x0000013d,
-SC_PA0_SC_FPOV_WE                        = 0x0000013e,
-SC_PA1_SC_FPOV_WE                        = 0x0000013f,
-SC_PA2_SC_FPOV_WE                        = 0x00000140,
-SC_PA3_SC_FPOV_WE                        = 0x00000141,
-SC_PA0_SC_LPOV_WE                        = 0x00000142,
-SC_PA1_SC_LPOV_WE                        = 0x00000143,
-SC_PA2_SC_LPOV_WE                        = 0x00000144,
-SC_PA3_SC_LPOV_WE                        = 0x00000145,
-SC_SC_SPI_DEALLOC_0_0                    = 0x00000146,
-SC_SC_SPI_DEALLOC_0_1                    = 0x00000147,
-SC_SC_SPI_DEALLOC_0_2                    = 0x00000148,
-SC_SC_SPI_DEALLOC_1_0                    = 0x00000149,
-SC_SC_SPI_DEALLOC_1_1                    = 0x0000014a,
-SC_SC_SPI_DEALLOC_1_2                    = 0x0000014b,
-SC_SC_SPI_DEALLOC_2_0                    = 0x0000014c,
-SC_SC_SPI_DEALLOC_2_1                    = 0x0000014d,
-SC_SC_SPI_DEALLOC_2_2                    = 0x0000014e,
-SC_SC_SPI_DEALLOC_3_0                    = 0x0000014f,
-SC_SC_SPI_DEALLOC_3_1                    = 0x00000150,
-SC_SC_SPI_DEALLOC_3_2                    = 0x00000151,
-SC_SC_SPI_FPOV_0                         = 0x00000152,
-SC_SC_SPI_FPOV_1                         = 0x00000153,
-SC_SC_SPI_FPOV_2                         = 0x00000154,
-SC_SC_SPI_FPOV_3                         = 0x00000155,
-SC_SC_SPI_EVENT                          = 0x00000156,
-SC_PS_TS_EVENT_FIFO_PUSH                 = 0x00000157,
-SC_PS_TS_EVENT_FIFO_POP                  = 0x00000158,
-SC_PS_CTX_DONE_FIFO_PUSH                 = 0x00000159,
-SC_PS_CTX_DONE_FIFO_POP                  = 0x0000015a,
-SC_MULTICYCLE_BUBBLE_FREEZE              = 0x0000015b,
-SC_EOP_SYNC_WINDOW                       = 0x0000015c,
-SC_PA0_SC_NULL_WE                        = 0x0000015d,
-SC_PA0_SC_NULL_DEALLOC_WE                = 0x0000015e,
-SC_PA0_SC_DATA_FIFO_EOPG_RD              = 0x0000015f,
-SC_PA0_SC_DATA_FIFO_EOP_RD               = 0x00000160,
-SC_PA0_SC_DEALLOC_0_RD                   = 0x00000161,
-SC_PA0_SC_DEALLOC_1_RD                   = 0x00000162,
-SC_PA1_SC_DATA_FIFO_EOPG_RD              = 0x00000163,
-SC_PA1_SC_DATA_FIFO_EOP_RD               = 0x00000164,
-SC_PA1_SC_DEALLOC_0_RD                   = 0x00000165,
-SC_PA1_SC_DEALLOC_1_RD                   = 0x00000166,
-SC_PA1_SC_NULL_WE                        = 0x00000167,
-SC_PA1_SC_NULL_DEALLOC_WE                = 0x00000168,
-SC_PA2_SC_DATA_FIFO_EOPG_RD              = 0x00000169,
-SC_PA2_SC_DATA_FIFO_EOP_RD               = 0x0000016a,
-SC_PA2_SC_DEALLOC_0_RD                   = 0x0000016b,
-SC_PA2_SC_DEALLOC_1_RD                   = 0x0000016c,
-SC_PA2_SC_NULL_WE                        = 0x0000016d,
-SC_PA2_SC_NULL_DEALLOC_WE                = 0x0000016e,
-SC_PA3_SC_DATA_FIFO_EOPG_RD              = 0x0000016f,
-SC_PA3_SC_DATA_FIFO_EOP_RD               = 0x00000170,
-SC_PA3_SC_DEALLOC_0_RD                   = 0x00000171,
-SC_PA3_SC_DEALLOC_1_RD                   = 0x00000172,
-SC_PA3_SC_NULL_WE                        = 0x00000173,
-SC_PA3_SC_NULL_DEALLOC_WE                = 0x00000174,
-SC_PS_PA0_SC_FIFO_EMPTY                  = 0x00000175,
-SC_PS_PA0_SC_FIFO_FULL                   = 0x00000176,
-SC_PA0_PS_DATA_SEND                      = 0x00000177,
-SC_PS_PA1_SC_FIFO_EMPTY                  = 0x00000178,
-SC_PS_PA1_SC_FIFO_FULL                   = 0x00000179,
-SC_PA1_PS_DATA_SEND                      = 0x0000017a,
-SC_PS_PA2_SC_FIFO_EMPTY                  = 0x0000017b,
-SC_PS_PA2_SC_FIFO_FULL                   = 0x0000017c,
-SC_PA2_PS_DATA_SEND                      = 0x0000017d,
-SC_PS_PA3_SC_FIFO_EMPTY                  = 0x0000017e,
-SC_PS_PA3_SC_FIFO_FULL                   = 0x0000017f,
-SC_PA3_PS_DATA_SEND                      = 0x00000180,
-SC_BUSY_PROCESSING_MULTICYCLE_PRIM       = 0x00000181,
-SC_BUSY_CNT_NOT_ZERO                     = 0x00000182,
-SC_BM_BUSY                               = 0x00000183,
-SC_BACKEND_BUSY                          = 0x00000184,
-SC_SCF_SCB_INTERFACE_BUSY                = 0x00000185,
-SC_SCB_BUSY                              = 0x00000186,
-SC_STARVED_BY_PA_WITH_UNSELECTED_PA_NOT_EMPTY  = 0x00000187,
-SC_STARVED_BY_PA_WITH_UNSELECTED_PA_FULL  = 0x00000188,
-SC_PBB_BIN_HIST_NUM_PRIMS                = 0x00000189,
-SC_PBB_BATCH_HIST_NUM_PRIMS              = 0x0000018a,
-SC_PBB_BIN_HIST_NUM_CONTEXTS             = 0x0000018b,
-SC_PBB_BATCH_HIST_NUM_CONTEXTS           = 0x0000018c,
-SC_PBB_BIN_HIST_NUM_PERSISTENT_STATES    = 0x0000018d,
-SC_PBB_BATCH_HIST_NUM_PERSISTENT_STATES  = 0x0000018e,
-SC_PBB_BATCH_HIST_NUM_PS_WAVE_BREAKS     = 0x0000018f,
-SC_PBB_BATCH_HIST_NUM_TRIV_REJECTED_PRIMS  = 0x00000190,
-SC_PBB_BATCH_HIST_NUM_ROWS_PER_PRIM      = 0x00000191,
-SC_PBB_BATCH_HIST_NUM_COLUMNS_PER_ROW    = 0x00000192,
-SC_PBB_BUSY                              = 0x00000193,
-SC_PBB_BUSY_AND_RTR                      = 0x00000194,
-SC_PBB_STALLS_PA_DUE_TO_NO_TILES         = 0x00000195,
-SC_PBB_NUM_BINS                          = 0x00000196,
-SC_PBB_END_OF_BIN                        = 0x00000197,
-SC_PBB_END_OF_BATCH                      = 0x00000198,
-SC_PBB_PRIMBIN_PROCESSED                 = 0x00000199,
-SC_PBB_PRIM_ADDED_TO_BATCH               = 0x0000019a,
-SC_PBB_NONBINNED_PRIM                    = 0x0000019b,
-SC_PBB_TOTAL_REAL_PRIMS_OUT_OF_PBB       = 0x0000019c,
-SC_PBB_TOTAL_NULL_PRIMS_OUT_OF_PBB       = 0x0000019d,
-SC_PBB_IDLE_CLK_DUE_TO_ROW_TO_COLUMN_TRANSITION  = 0x0000019e,
-SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_ROW  = 0x0000019f,
-SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_COLUMN  = 0x000001a0,
-SC_PBB_BATCH_BREAK_DUE_TO_PERSISTENT_STATE  = 0x000001a1,
-SC_PBB_BATCH_BREAK_DUE_TO_CONTEXT_STATE  = 0x000001a2,
-SC_PBB_BATCH_BREAK_DUE_TO_PRIM           = 0x000001a3,
-SC_PBB_BATCH_BREAK_DUE_TO_PC_STORAGE     = 0x000001a4,
-SC_PBB_BATCH_BREAK_DUE_TO_EVENT          = 0x000001a5,
-SC_PBB_BATCH_BREAK_DUE_TO_FPOV_LIMIT     = 0x000001a6,
-SC_POPS_INTRA_WAVE_OVERLAPS              = 0x000001a7,
-SC_POPS_FORCE_EOV                        = 0x000001a8,
-SC_PKR_QUAD_OVERLAP_NOT_FOUND_IN_WAVE_TABLE  = 0x000001a9,
-SC_PKR_QUAD_OVERLAP_FOUND_IN_WAVE_TABLE  = 0x000001aa,
-} SC_PERFCNT_SEL;
-
-/*
- * SePairXsel enum
- */
-
-typedef enum SePairXsel {
-RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE   = 0x00000000,
-RASTER_CONFIG_SE_PAIR_XSEL_16_WIDE_TILE  = 0x00000001,
-RASTER_CONFIG_SE_PAIR_XSEL_32_WIDE_TILE  = 0x00000002,
-RASTER_CONFIG_SE_PAIR_XSEL_64_WIDE_TILE  = 0x00000003,
-RASTER_CONFIG_SE_PAIR_XSEL_128_WIDE_TILE  = 0x00000004,
-} SePairXsel;
-
-/*
- * SePairYsel enum
- */
-
-typedef enum SePairYsel {
-RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE   = 0x00000000,
-RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE  = 0x00000001,
-RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE  = 0x00000002,
-RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE  = 0x00000003,
-RASTER_CONFIG_SE_PAIR_YSEL_128_WIDE_TILE  = 0x00000004,
-} SePairYsel;
-
-/*
- * SePairMap enum
- */
-
-typedef enum SePairMap {
-RASTER_CONFIG_SE_PAIR_MAP_0              = 0x00000000,
-RASTER_CONFIG_SE_PAIR_MAP_1              = 0x00000001,
-RASTER_CONFIG_SE_PAIR_MAP_2              = 0x00000002,
-RASTER_CONFIG_SE_PAIR_MAP_3              = 0x00000003,
-} SePairMap;
-
-/*
- * SeXsel enum
- */
-
-typedef enum SeXsel {
-RASTER_CONFIG_SE_XSEL_8_WIDE_TILE        = 0x00000000,
-RASTER_CONFIG_SE_XSEL_16_WIDE_TILE       = 0x00000001,
-RASTER_CONFIG_SE_XSEL_32_WIDE_TILE       = 0x00000002,
-RASTER_CONFIG_SE_XSEL_64_WIDE_TILE       = 0x00000003,
-RASTER_CONFIG_SE_XSEL_128_WIDE_TILE      = 0x00000004,
-} SeXsel;
-
-/*
- * SeYsel enum
- */
-
-typedef enum SeYsel {
-RASTER_CONFIG_SE_YSEL_8_WIDE_TILE        = 0x00000000,
-RASTER_CONFIG_SE_YSEL_16_WIDE_TILE       = 0x00000001,
-RASTER_CONFIG_SE_YSEL_32_WIDE_TILE       = 0x00000002,
-RASTER_CONFIG_SE_YSEL_64_WIDE_TILE       = 0x00000003,
-RASTER_CONFIG_SE_YSEL_128_WIDE_TILE      = 0x00000004,
-} SeYsel;
-
-/*
- * SeMap enum
- */
-
-typedef enum SeMap {
-RASTER_CONFIG_SE_MAP_0                   = 0x00000000,
-RASTER_CONFIG_SE_MAP_1                   = 0x00000001,
-RASTER_CONFIG_SE_MAP_2                   = 0x00000002,
-RASTER_CONFIG_SE_MAP_3                   = 0x00000003,
-} SeMap;
-
-/*
- * ScXsel enum
- */
-
-typedef enum ScXsel {
-RASTER_CONFIG_SC_XSEL_8_WIDE_TILE        = 0x00000000,
-RASTER_CONFIG_SC_XSEL_16_WIDE_TILE       = 0x00000001,
-RASTER_CONFIG_SC_XSEL_32_WIDE_TILE       = 0x00000002,
-RASTER_CONFIG_SC_XSEL_64_WIDE_TILE       = 0x00000003,
-} ScXsel;
-
-/*
- * ScYsel enum
- */
-
-typedef enum ScYsel {
-RASTER_CONFIG_SC_YSEL_8_WIDE_TILE        = 0x00000000,
-RASTER_CONFIG_SC_YSEL_16_WIDE_TILE       = 0x00000001,
-RASTER_CONFIG_SC_YSEL_32_WIDE_TILE       = 0x00000002,
-RASTER_CONFIG_SC_YSEL_64_WIDE_TILE       = 0x00000003,
-} ScYsel;
-
-/*
- * ScMap enum
- */
-
-typedef enum ScMap {
-RASTER_CONFIG_SC_MAP_0                   = 0x00000000,
-RASTER_CONFIG_SC_MAP_1                   = 0x00000001,
-RASTER_CONFIG_SC_MAP_2                   = 0x00000002,
-RASTER_CONFIG_SC_MAP_3                   = 0x00000003,
-} ScMap;
-
-/*
- * PkrXsel2 enum
- */
-
-typedef enum PkrXsel2 {
-RASTER_CONFIG_PKR_XSEL2_0                = 0x00000000,
-RASTER_CONFIG_PKR_XSEL2_1                = 0x00000001,
-RASTER_CONFIG_PKR_XSEL2_2                = 0x00000002,
-RASTER_CONFIG_PKR_XSEL2_3                = 0x00000003,
-} PkrXsel2;
-
-/*
- * PkrXsel enum
- */
-
-typedef enum PkrXsel {
-RASTER_CONFIG_PKR_XSEL_0                 = 0x00000000,
-RASTER_CONFIG_PKR_XSEL_1                 = 0x00000001,
-RASTER_CONFIG_PKR_XSEL_2                 = 0x00000002,
-RASTER_CONFIG_PKR_XSEL_3                 = 0x00000003,
-} PkrXsel;
-
-/*
- * PkrYsel enum
- */
-
-typedef enum PkrYsel {
-RASTER_CONFIG_PKR_YSEL_0                 = 0x00000000,
-RASTER_CONFIG_PKR_YSEL_1                 = 0x00000001,
-RASTER_CONFIG_PKR_YSEL_2                 = 0x00000002,
-RASTER_CONFIG_PKR_YSEL_3                 = 0x00000003,
-} PkrYsel;
-
-/*
- * PkrMap enum
- */
-
-typedef enum PkrMap {
-RASTER_CONFIG_PKR_MAP_0                  = 0x00000000,
-RASTER_CONFIG_PKR_MAP_1                  = 0x00000001,
-RASTER_CONFIG_PKR_MAP_2                  = 0x00000002,
-RASTER_CONFIG_PKR_MAP_3                  = 0x00000003,
-} PkrMap;
-
-/*
- * RbXsel enum
- */
-
-typedef enum RbXsel {
-RASTER_CONFIG_RB_XSEL_0                  = 0x00000000,
-RASTER_CONFIG_RB_XSEL_1                  = 0x00000001,
-} RbXsel;
-
-/*
- * RbYsel enum
- */
-
-typedef enum RbYsel {
-RASTER_CONFIG_RB_YSEL_0                  = 0x00000000,
-RASTER_CONFIG_RB_YSEL_1                  = 0x00000001,
-} RbYsel;
-
-/*
- * RbXsel2 enum
- */
-
-typedef enum RbXsel2 {
-RASTER_CONFIG_RB_XSEL2_0                 = 0x00000000,
-RASTER_CONFIG_RB_XSEL2_1                 = 0x00000001,
-RASTER_CONFIG_RB_XSEL2_2                 = 0x00000002,
-RASTER_CONFIG_RB_XSEL2_3                 = 0x00000003,
-} RbXsel2;
-
-/*
- * RbMap enum
- */
-
-typedef enum RbMap {
-RASTER_CONFIG_RB_MAP_0                   = 0x00000000,
-RASTER_CONFIG_RB_MAP_1                   = 0x00000001,
-RASTER_CONFIG_RB_MAP_2                   = 0x00000002,
-RASTER_CONFIG_RB_MAP_3                   = 0x00000003,
-} RbMap;
-
-/*
- * BinningMode enum
- */
-
-typedef enum BinningMode {
-BINNING_ALLOWED                          = 0x00000000,
-FORCE_BINNING_ON                         = 0x00000001,
-DISABLE_BINNING_USE_NEW_SC               = 0x00000002,
-DISABLE_BINNING_USE_LEGACY_SC            = 0x00000003,
-} BinningMode;
-
-/*
- * BinEventCntl enum
- */
-
-typedef enum BinEventCntl {
-BINNER_BREAK_BATCH                       = 0x00000000,
-BINNER_PIPELINE                          = 0x00000001,
-BINNER_DROP_ASSERT                       = 0x00000002,
-} BinEventCntl;
-
-/*
- * CovToShaderSel enum
- */
-
-typedef enum CovToShaderSel {
-INPUT_COVERAGE                           = 0x00000000,
-INPUT_INNER_COVERAGE                     = 0x00000001,
-INPUT_DEPTH_COVERAGE                     = 0x00000002,
-RAW                                      = 0x00000003,
-} CovToShaderSel;
-
-/*******************************************************
- * RMI Enums
- *******************************************************/
-
-/*
- * RMIPerfSel enum
- */
-
-typedef enum RMIPerfSel {
-RMI_PERF_SEL_NONE                        = 0x00000000,
-RMI_PERF_SEL_BUSY                        = 0x00000001,
-RMI_PERF_SEL_REG_CLK_VLD                 = 0x00000002,
-RMI_PERF_SEL_DYN_CLK_CMN_VLD             = 0x00000003,
-RMI_PERF_SEL_DYN_CLK_RB_VLD              = 0x00000004,
-RMI_PERF_SEL_DYN_CLK_PERF_VLD            = 0x00000005,
-RMI_PERF_SEL_PERF_WINDOW                 = 0x00000006,
-RMI_PERF_SEL_EVENT_SEND                  = 0x00000007,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID0 = 0x00000008,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID1 = 0x00000009,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID2 = 0x0000000a,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID3 = 0x0000000b,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID4 = 0x0000000c,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID5 = 0x0000000d,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID6 = 0x0000000e,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID7 = 0x0000000f,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID8 = 0x00000010,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID9 = 0x00000011,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID10 = 0x00000012,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID11 = 0x00000013,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID12 = 0x00000014,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID13 = 0x00000015,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID14 = 0x00000016,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID15 = 0x00000017,
-RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID_ALL = 0x00000018,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID0 = 0x00000019,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID1 = 0x0000001a,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID2 = 0x0000001b,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID3 = 0x0000001c,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID4 = 0x0000001d,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID5 = 0x0000001e,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID6 = 0x0000001f,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID7 = 0x00000020,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID8 = 0x00000021,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID9 = 0x00000022,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID10 = 0x00000023,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID11 = 0x00000024,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID12 = 0x00000025,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID13 = 0x00000026,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID14 = 0x00000027,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID15 = 0x00000028,
-RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID_ALL = 0x00000029,
-RMI_PERF_SEL_UTCL1_TRANSLATION_MISS      = 0x0000002a,
-RMI_PERF_SEL_UTCL1_PERMISSION_MISS       = 0x0000002b,
-RMI_PERF_SEL_UTCL1_REQUEST               = 0x0000002c,
-RMI_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX    = 0x0000002d,
-RMI_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT    = 0x0000002e,
-RMI_PERF_SEL_UTCL1_LFIFO_FULL            = 0x0000002f,
-RMI_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES   = 0x00000030,
-RMI_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS  = 0x00000031,
-RMI_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL   = 0x00000032,
-RMI_PERF_SEL_UTCL1_HIT_FIFO_FULL         = 0x00000033,
-RMI_PERF_SEL_UTCL1_STALL_MULTI_MISS      = 0x00000034,
-RMI_PERF_SEL_RB_RMI_WRREQ_ALL_CID        = 0x00000035,
-RMI_PERF_SEL_RB_RMI_WRREQ_BUSY           = 0x00000036,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID0           = 0x00000037,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID1           = 0x00000038,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID2           = 0x00000039,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID3           = 0x0000003a,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID4           = 0x0000003b,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID5           = 0x0000003c,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID6           = 0x0000003d,
-RMI_PERF_SEL_RB_RMI_WRREQ_CID7           = 0x0000003e,
-RMI_PERF_SEL_RB_RMI_WRREQ_INFLIGHT_ALL_ORONE_CID = 0x0000003f,
-RMI_PERF_SEL_RB_RMI_WRREQ_BURST_LENGTH_ALL_ORONE_CID = 0x00000040,
-RMI_PERF_SEL_RB_RMI_WRREQ_BURST_ALL_ORONE_CID = 0x00000041,
-RMI_PERF_SEL_RB_RMI_WRREQ_RESIDENCY      = 0x00000042,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_ALL_CID  = 0x00000043,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID0     = 0x00000044,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID1     = 0x00000045,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID2     = 0x00000046,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID3     = 0x00000047,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID4     = 0x00000048,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID5     = 0x00000049,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID6     = 0x0000004a,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID7     = 0x0000004b,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK0    = 0x0000004c,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK1    = 0x0000004d,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK2    = 0x0000004e,
-RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK3    = 0x0000004f,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_ALL_CID     = 0x00000050,
-RMI_PERF_SEL_RB_RMI_RDREQ_ALL_CID        = 0x00000051,
-RMI_PERF_SEL_RB_RMI_RDREQ_BUSY           = 0x00000052,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID0        = 0x00000053,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID1        = 0x00000054,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID2        = 0x00000055,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID3        = 0x00000056,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID4        = 0x00000057,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID5        = 0x00000058,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID6        = 0x00000059,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_CID7        = 0x0000005a,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID0           = 0x0000005b,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID1           = 0x0000005c,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID2           = 0x0000005d,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID3           = 0x0000005e,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID4           = 0x0000005f,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID5           = 0x00000060,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID6           = 0x00000061,
-RMI_PERF_SEL_RB_RMI_RDREQ_CID7           = 0x00000062,
-RMI_PERF_SEL_RB_RMI_32BRDREQ_INFLIGHT_ALL_ORONE_CID = 0x00000063,
-RMI_PERF_SEL_RB_RMI_RDREQ_BURST_LENGTH_ALL_ORONE_CID = 0x00000064,
-RMI_PERF_SEL_RB_RMI_RDREQ_BURST_ALL_ORONE_CID = 0x00000065,
-RMI_PERF_SEL_RB_RMI_RDREQ_RESIDENCY      = 0x00000066,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_ALL_CID = 0x00000067,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID0  = 0x00000068,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID1  = 0x00000069,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID2  = 0x0000006a,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID3  = 0x0000006b,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID4  = 0x0000006c,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID5  = 0x0000006d,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID6  = 0x0000006e,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID7  = 0x0000006f,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK0 = 0x00000070,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK1 = 0x00000071,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK2 = 0x00000072,
-RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK3 = 0x00000073,
-RMI_PERF_SEL_RMI_TC_WRREQ_ALL_CID        = 0x00000074,
-RMI_PERF_SEL_RMI_TC_REQ_BUSY             = 0x00000075,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID0           = 0x00000076,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID1           = 0x00000077,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID2           = 0x00000078,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID3           = 0x00000079,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID4           = 0x0000007a,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID5           = 0x0000007b,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID6           = 0x0000007c,
-RMI_PERF_SEL_RMI_TC_WRREQ_CID7           = 0x0000007d,
-RMI_PERF_SEL_RMI_TC_WRREQ_INFLIGHT_ALL_CID = 0x0000007e,
-RMI_PERF_SEL_TC_RMI_WRRET_VALID_ALL_CID  = 0x0000007f,
-RMI_PERF_SEL_RMI_TC_RDREQ_ALL_CID        = 0x00000080,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID0           = 0x00000081,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID1           = 0x00000082,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID2           = 0x00000083,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID3           = 0x00000084,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID4           = 0x00000085,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID5           = 0x00000086,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID6           = 0x00000087,
-RMI_PERF_SEL_RMI_TC_RDREQ_CID7           = 0x00000088,
-RMI_PERF_SEL_RMI_TC_RDREQ_INFLIGHT_ALL_CID = 0x00000089,
-RMI_PERF_SEL_TC_RMI_RDRET_VALID_ALL_CID  = 0x0000008a,
-RMI_PERF_SEL_UTCL1_BUSY                  = 0x0000008b,
-RMI_PERF_SEL_RMI_UTC_REQ                 = 0x0000008c,
-RMI_PERF_SEL_RMI_UTC_BUSY                = 0x0000008d,
-RMI_PERF_SEL_UTCL1_UTCL2_REQ             = 0x0000008e,
-RMI_PERF_SEL_PROBE_UTCL1_XNACK_RETRY     = 0x0000008f,
-RMI_PERF_SEL_PROBE_UTCL1_ALL_FAULT       = 0x00000090,
-RMI_PERF_SEL_PROBE_UTCL1_PRT_FAULT       = 0x00000091,
-RMI_PERF_SEL_PROBE_UTCL1_VMID_BYPASS     = 0x00000092,
-RMI_PERF_SEL_PROBE_UTCL1_XNACK_NORETRY_FAULT = 0x00000093,
-RMI_PERF_SEL_XNACK_FIFO_NUM_USED         = 0x00000094,
-RMI_PERF_SEL_LAT_FIFO_NUM_USED           = 0x00000095,
-RMI_PERF_SEL_LAT_FIFO_BLOCKING_REQ       = 0x00000096,
-RMI_PERF_SEL_LAT_FIFO_NONBLOCKING_REQ    = 0x00000097,
-RMI_PERF_SEL_XNACK_FIFO_FULL             = 0x00000098,
-RMI_PERF_SEL_XNACK_FIFO_BUSY             = 0x00000099,
-RMI_PERF_SEL_LAT_FIFO_FULL               = 0x0000009a,
-RMI_PERF_SEL_SKID_FIFO_DEPTH             = 0x0000009b,
-RMI_PERF_SEL_TCIW_INFLIGHT_COUNT         = 0x0000009c,
-RMI_PERF_SEL_PRT_FIFO_NUM_USED           = 0x0000009d,
-RMI_PERF_SEL_PRT_FIFO_REQ                = 0x0000009e,
-RMI_PERF_SEL_PRT_FIFO_BUSY               = 0x0000009f,
-RMI_PERF_SEL_TCIW_REQ                    = 0x000000a0,
-RMI_PERF_SEL_TCIW_BUSY                   = 0x000000a1,
-RMI_PERF_SEL_SKID_FIFO_REQ               = 0x000000a2,
-RMI_PERF_SEL_SKID_FIFO_BUSY              = 0x000000a3,
-RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK0  = 0x000000a4,
-RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK1  = 0x000000a5,
-RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK2  = 0x000000a6,
-RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK3  = 0x000000a7,
-RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTR       = 0x000000a8,
-RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTR      = 0x000000a9,
-RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTRB      = 0x000000aa,
-RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTRB     = 0x000000ab,
-RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTR = 0x000000ac,
-RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTR = 0x000000ad,
-RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTRB = 0x000000ae,
-RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTRB = 0x000000af,
-RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTR = 0x000000b0,
-RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTR = 0x000000b1,
-RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTRB = 0x000000b2,
-RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTRB = 0x000000b3,
-RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTR = 0x000000b4,
-RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTR = 0x000000b5,
-RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTRB = 0x000000b6,
-RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTRB = 0x000000b7,
-RMI_PERF_SEL_POP_DEMUX_RTS_RTR           = 0x000000b8,
-RMI_PERF_SEL_POP_DEMUX_RTSB_RTR          = 0x000000b9,
-RMI_PERF_SEL_POP_DEMUX_RTS_RTRB          = 0x000000ba,
-RMI_PERF_SEL_POP_DEMUX_RTSB_RTRB         = 0x000000bb,
-RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTR        = 0x000000bc,
-RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTR       = 0x000000bd,
-RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTRB       = 0x000000be,
-RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTRB      = 0x000000bf,
-RMI_PERF_SEL_UTC_POP_RTS_RTR             = 0x000000c0,
-RMI_PERF_SEL_UTC_POP_RTSB_RTR            = 0x000000c1,
-RMI_PERF_SEL_UTC_POP_RTS_RTRB            = 0x000000c2,
-RMI_PERF_SEL_UTC_POP_RTSB_RTRB           = 0x000000c3,
-RMI_PERF_SEL_POP_XNACK_RTS_RTR           = 0x000000c4,
-RMI_PERF_SEL_POP_XNACK_RTSB_RTR          = 0x000000c5,
-RMI_PERF_SEL_POP_XNACK_RTS_RTRB          = 0x000000c6,
-RMI_PERF_SEL_POP_XNACK_RTSB_RTRB         = 0x000000c7,
-RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTR      = 0x000000c8,
-RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTR     = 0x000000c9,
-RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTRB     = 0x000000ca,
-RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTRB    = 0x000000cb,
-RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTR = 0x000000cc,
-RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTR = 0x000000cd,
-RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTRB = 0x000000ce,
-RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTRB = 0x000000cf,
-RMI_PERF_SEL_SKID_FIFO_IN_RTS            = 0x000000d0,
-RMI_PERF_SEL_SKID_FIFO_IN_RTSB           = 0x000000d1,
-RMI_PERF_SEL_SKID_FIFO_OUT_RTS           = 0x000000d2,
-RMI_PERF_SEL_SKID_FIFO_OUT_RTSB          = 0x000000d3,
-RMI_PERF_SEL_XBAR_PROBEGEN_READ_RTS_RTR  = 0x000000d4,
-RMI_PERF_SEL_XBAR_PROBEGEN_WRITE_RTS_RTR = 0x000000d5,
-RMI_PERF_SEL_XBAR_PROBEGEN_IN0_RTS_RTR   = 0x000000d6,
-RMI_PERF_SEL_XBAR_PROBEGEN_IN1_RTS_RTR   = 0x000000d7,
-RMI_PERF_SEL_XBAR_PROBEGEN_CB_RTS_RTR    = 0x000000d8,
-RMI_PERF_SEL_XBAR_PROBEGEN_DB_RTS_RTR    = 0x000000d9,
-RMI_PERF_SEL_REORDER_FIFO_REQ            = 0x000000da,
-RMI_PERF_SEL_REORDER_FIFO_BUSY           = 0x000000db,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_ALL_CID  = 0x000000dc,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID0     = 0x000000dd,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID1     = 0x000000de,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID2     = 0x000000df,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3     = 0x000000e0,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID4     = 0x000000e1,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID5     = 0x000000e2,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID6     = 0x000000e3,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID7     = 0x000000e4,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK0    = 0x000000e5,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK1    = 0x000000e6,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK2    = 0x000000e7,
-RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3    = 0x000000e8,
-} RMIPerfSel;
-
-
-#endif /*_vega10_ENUM_HEADER*/
-
diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/coord.cpp mesa-19.0.1/src/amd/addrlib/gfx9/coord.cpp
--- mesa-18.3.3/src/amd/addrlib/gfx9/coord.cpp	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/gfx9/coord.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,707 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// Coordinate class implementation
-#include "addrcommon.h"
-#include "coord.h"
-
-Coordinate::Coordinate()
-{
-    dim = 'x';
-    ord = 0;
-}
-
-Coordinate::Coordinate(INT_8 c, INT_32 n)
-{
-    set(c, n);
-}
-
-VOID Coordinate::set(INT_8 c, INT_32 n)
-{
-    dim = c;
-    ord = static_cast<INT_8>(n);
-}
-
-UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
-{
-    UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));
-    UINT_32 out = 0;
-
-    switch (dim)
-    {
-    case 'm': out = m & bit; break;
-    case 's': out = s & bit; break;
-    case 'x': out = x & bit; break;
-    case 'y': out = y & bit; break;
-    case 'z': out = z & bit; break;
-    }
-    return (out != 0) ? 1 : 0;
-}
-
-INT_8 Coordinate::getdim()
-{
-    return dim;
-}
-
-INT_8 Coordinate::getord()
-{
-    return ord;
-}
-
-BOOL_32 Coordinate::operator==(const Coordinate& b)
-{
-    return (dim == b.dim) && (ord == b.ord);
-}
-
-BOOL_32 Coordinate::operator<(const Coordinate& b)
-{
-    BOOL_32 ret;
-
-    if (dim == b.dim)
-    {
-        ret = ord < b.ord;
-    }
-    else
-    {
-        if (dim == 's' || b.dim == 'm')
-        {
-            ret = TRUE;
-        }
-        else if (b.dim == 's' || dim == 'm')
-        {
-            ret = FALSE;
-        }
-        else if (ord == b.ord)
-        {
-            ret = dim < b.dim;
-        }
-        else
-        {
-            ret = ord < b.ord;
-        }
-    }
-
-    return ret;
-}
-
-BOOL_32 Coordinate::operator>(const Coordinate& b)
-{
-    BOOL_32 lt = *this < b;
-    BOOL_32 eq = *this == b;
-    return !lt && !eq;
-}
-
-BOOL_32 Coordinate::operator<=(const Coordinate& b)
-{
-    return (*this < b) || (*this == b);
-}
-
-BOOL_32 Coordinate::operator>=(const Coordinate& b)
-{
-    return !(*this < b);
-}
-
-BOOL_32 Coordinate::operator!=(const Coordinate& b)
-{
-    return !(*this == b);
-}
-
-Coordinate& Coordinate::operator++(INT_32)
-{
-    ord++;
-    return *this;
-}
-
-// CoordTerm
-
-CoordTerm::CoordTerm()
-{
-    num_coords = 0;
-}
-
-VOID CoordTerm::Clear()
-{
-    num_coords = 0;
-}
-
-VOID CoordTerm::add(Coordinate& co)
-{
-    // This function adds a coordinate INT_32o the list
-    // It will prevent the same coordinate from appearing,
-    // and will keep the list ordered from smallest to largest
-    UINT_32 i;
-
-    for (i = 0; i < num_coords; i++)
-    {
-        if (m_coord[i] == co)
-        {
-            break;
-        }
-        if (m_coord[i] > co)
-        {
-            for (UINT_32 j = num_coords; j > i; j--)
-            {
-                m_coord[j] = m_coord[j - 1];
-            }
-            m_coord[i] = co;
-            num_coords++;
-            break;
-        }
-    }
-
-    if (i == num_coords)
-    {
-        m_coord[num_coords] = co;
-        num_coords++;
-    }
-}
-
-VOID CoordTerm::add(CoordTerm& cl)
-{
-    for (UINT_32 i = 0; i < cl.num_coords; i++)
-    {
-        add(cl.m_coord[i]);
-    }
-}
-
-BOOL_32 CoordTerm::remove(Coordinate& co)
-{
-    BOOL_32 remove = FALSE;
-    for (UINT_32 i = 0; i < num_coords; i++)
-    {
-        if (m_coord[i] == co)
-        {
-            remove = TRUE;
-            num_coords--;
-        }
-
-        if (remove)
-        {
-            m_coord[i] = m_coord[i + 1];
-        }
-    }
-    return remove;
-}
-
-BOOL_32 CoordTerm::Exists(Coordinate& co)
-{
-    BOOL_32 exists = FALSE;
-    for (UINT_32 i = 0; i < num_coords; i++)
-    {
-        if (m_coord[i] == co)
-        {
-            exists = TRUE;
-            break;
-        }
-    }
-    return exists;
-}
-
-VOID CoordTerm::copyto(CoordTerm& cl)
-{
-    cl.num_coords = num_coords;
-    for (UINT_32 i = 0; i < num_coords; i++)
-    {
-        cl.m_coord[i] = m_coord[i];
-    }
-}
-
-UINT_32 CoordTerm::getsize()
-{
-    return num_coords;
-}
-
-UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
-{
-    UINT_32 out = 0;
-    for (UINT_32 i = 0; i < num_coords; i++)
-    {
-        out = out ^ m_coord[i].ison(x, y, z, s, m);
-    }
-    return out;
-}
-
-VOID CoordTerm::getsmallest(Coordinate& co)
-{
-    co = m_coord[0];
-}
-
-UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
-{
-    for (UINT_32 i = start;  i < num_coords;)
-    {
-        if (((f == '<' && m_coord[i] < co) ||
-             (f == '>' && m_coord[i] > co) ||
-             (f == '=' && m_coord[i] == co)) &&
-            (axis == '\0' || axis == m_coord[i].getdim()))
-        {
-            for (UINT_32 j = i; j < num_coords - 1; j++)
-            {
-                m_coord[j] = m_coord[j + 1];
-            }
-            num_coords--;
-        }
-        else
-        {
-            i++;
-        }
-    }
-    return num_coords;
-}
-
-Coordinate& CoordTerm::operator[](UINT_32 i)
-{
-    return m_coord[i];
-}
-
-BOOL_32 CoordTerm::operator==(const CoordTerm& b)
-{
-    BOOL_32 ret = TRUE;
-
-    if (num_coords != b.num_coords)
-    {
-        ret = FALSE;
-    }
-    else
-    {
-        for (UINT_32 i = 0; i < num_coords; i++)
-        {
-            // Note: the lists will always be in order, so we can compare the two lists at time
-            if (m_coord[i] != b.m_coord[i])
-            {
-                ret = FALSE;
-                break;
-            }
-        }
-    }
-    return ret;
-}
-
-BOOL_32 CoordTerm::operator!=(const CoordTerm& b)
-{
-    return !(*this == b);
-}
-
-BOOL_32 CoordTerm::exceedRange(UINT_32 xRange, UINT_32 yRange, UINT_32 zRange, UINT_32 sRange)
-{
-    BOOL_32 exceed = FALSE;
-    for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++)
-    {
-        UINT_32 subject;
-        switch (m_coord[i].getdim())
-        {
-            case 'x':
-                subject = xRange;
-                break;
-            case 'y':
-                subject = yRange;
-                break;
-            case 'z':
-                subject = zRange;
-                break;
-            case 's':
-                subject = sRange;
-                break;
-            case 'm':
-                subject = 0;
-                break;
-            default:
-                // Invalid input!
-                ADDR_ASSERT_ALWAYS();
-                subject = 0;
-                break;
-        }
-
-        exceed = ((1u << m_coord[i].getord()) <= subject);
-    }
-
-    return exceed;
-}
-
-// coordeq
-CoordEq::CoordEq()
-{
-    m_numBits = 0;
-}
-
-VOID CoordEq::remove(Coordinate& co)
-{
-    for (UINT_32 i = 0; i < m_numBits; i++)
-    {
-        m_eq[i].remove(co);
-    }
-}
-
-BOOL_32 CoordEq::Exists(Coordinate& co)
-{
-    BOOL_32 exists = FALSE;
-
-    for (UINT_32 i = 0; i < m_numBits; i++)
-    {
-        if (m_eq[i].Exists(co))
-        {
-            exists = TRUE;
-        }
-    }
-    return exists;
-}
-
-VOID CoordEq::resize(UINT_32 n)
-{
-    if (n > m_numBits)
-    {
-        for (UINT_32 i = m_numBits; i < n; i++)
-        {
-            m_eq[i].Clear();
-        }
-    }
-    m_numBits = n;
-}
-
-UINT_32 CoordEq::getsize()
-{
-    return m_numBits;
-}
-
-UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
-{
-    UINT_64 out = 0;
-    for (UINT_32 i = 0; i < m_numBits; i++)
-    {
-        if (m_eq[i].getxor(x, y, z, s, m) != 0)
-        {
-            out |= (1ULL << i);
-        }
-    }
-    return out;
-}
-
-VOID CoordEq::solveAddr(
-    UINT_64 addr, UINT_32 sliceInM,
-    UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const
-{
-    UINT_32 xBitsValid = 0;
-    UINT_32 yBitsValid = 0;
-    UINT_32 zBitsValid = 0;
-    UINT_32 sBitsValid = 0;
-    UINT_32 mBitsValid = 0;
-
-    CoordEq temp = *this;
-
-    x = y = z = s = m = 0;
-
-    UINT_32 bitsLeft = 0;
-
-    for (UINT_32 i = 0; i < temp.m_numBits; i++)
-    {
-        UINT_32 termSize = temp.m_eq[i].getsize();
-
-        if (termSize == 1)
-        {
-            INT_8 bit = (addr >> i) & 1;
-            INT_8 dim = temp.m_eq[i][0].getdim();
-            INT_8 ord = temp.m_eq[i][0].getord();
-
-            ADDR_ASSERT((ord < 32) || (bit == 0));
-
-            switch (dim)
-            {
-                case 'x':
-                    xBitsValid |= (1 << ord);
-                    x |= (bit << ord);
-                    break;
-                case 'y':
-                    yBitsValid |= (1 << ord);
-                    y |= (bit << ord);
-                    break;
-                case 'z':
-                    zBitsValid |= (1 << ord);
-                    z |= (bit << ord);
-                    break;
-                case 's':
-                    sBitsValid |= (1 << ord);
-                    s |= (bit << ord);
-                    break;
-                case 'm':
-                    mBitsValid |= (1 << ord);
-                    m |= (bit << ord);
-                    break;
-                default:
-                    break;
-            }
-
-            temp.m_eq[i].Clear();
-        }
-        else if (termSize > 1)
-        {
-            bitsLeft++;
-        }
-    }
-
-    if (bitsLeft > 0)
-    {
-        if (sliceInM != 0)
-        {
-            z = m / sliceInM;
-            zBitsValid = 0xffffffff;
-        }
-
-        do
-        {
-            bitsLeft = 0;
-
-            for (UINT_32 i = 0; i < temp.m_numBits; i++)
-            {
-                UINT_32 termSize = temp.m_eq[i].getsize();
-
-                if (termSize == 1)
-                {
-                    INT_8 bit = (addr >> i) & 1;
-                    INT_8 dim = temp.m_eq[i][0].getdim();
-                    INT_8 ord = temp.m_eq[i][0].getord();
-
-                    ADDR_ASSERT((ord < 32) || (bit == 0));
-
-                    switch (dim)
-                    {
-                        case 'x':
-                            xBitsValid |= (1 << ord);
-                            x |= (bit << ord);
-                            break;
-                        case 'y':
-                            yBitsValid |= (1 << ord);
-                            y |= (bit << ord);
-                            break;
-                        case 'z':
-                            zBitsValid |= (1 << ord);
-                            z |= (bit << ord);
-                            break;
-                        case 's':
-                            ADDR_ASSERT_ALWAYS();
-                            break;
-                        case 'm':
-                            ADDR_ASSERT_ALWAYS();
-                            break;
-                        default:
-                            break;
-                    }
-
-                    temp.m_eq[i].Clear();
-                }
-                else if (termSize > 1)
-                {
-                    CoordTerm tmpTerm = temp.m_eq[i];
-
-                    for (UINT_32 j = 0; j < termSize; j++)
-                    {
-                        INT_8 dim = temp.m_eq[i][j].getdim();
-                        INT_8 ord = temp.m_eq[i][j].getord();
-
-                        switch (dim)
-                        {
-                            case 'x':
-                                if (xBitsValid & (1 << ord))
-                                {
-                                    UINT_32 v = (((x >> ord) & 1) << i);
-                                    addr ^= static_cast<UINT_64>(v);
-                                    tmpTerm.remove(temp.m_eq[i][j]);
-                                }
-                                break;
-                            case 'y':
-                                if (yBitsValid & (1 << ord))
-                                {
-                                    UINT_32 v = (((y >> ord) & 1) << i);
-                                    addr ^= static_cast<UINT_64>(v);
-                                    tmpTerm.remove(temp.m_eq[i][j]);
-                                }
-                                break;
-                            case 'z':
-                                if (zBitsValid & (1 << ord))
-                                {
-                                    UINT_32 v = (((z >> ord) & 1) << i);
-                                    addr ^= static_cast<UINT_64>(v);
-                                    tmpTerm.remove(temp.m_eq[i][j]);
-                                }
-                                break;
-                            case 's':
-                                ADDR_ASSERT_ALWAYS();
-                                break;
-                            case 'm':
-                                ADDR_ASSERT_ALWAYS();
-                                break;
-                            default:
-                                break;
-                        }
-                    }
-
-                    temp.m_eq[i] = tmpTerm;
-
-                    bitsLeft++;
-                }
-            }
-        } while (bitsLeft > 0);
-    }
-}
-
-VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num)
-{
-    o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num;
-    for (UINT_32 i = 0; i < o.m_numBits; i++)
-    {
-        m_eq[start + i].copyto(o.m_eq[i]);
-    }
-}
-
-VOID CoordEq::reverse(UINT_32 start, UINT_32 num)
-{
-    UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num;
-
-    for (UINT_32 i = 0; i < n / 2; i++)
-    {
-        CoordTerm temp;
-        m_eq[start + i].copyto(temp);
-        m_eq[start + n - 1 - i].copyto(m_eq[start + i]);
-        temp.copyto(m_eq[start + n - 1 - i]);
-    }
-}
-
-VOID CoordEq::xorin(CoordEq& x, UINT_32 start)
-{
-    UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits;
-    for (UINT_32 i = 0; i < n; i++)
-    {
-        m_eq[start + i].add(x.m_eq[i]);
-    }
-}
-
-UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
-{
-    for (UINT_32 i = start; i < m_numBits;)
-    {
-        UINT_32 m = m_eq[i].Filter(f, co, 0, axis);
-        if (m == 0)
-        {
-            for (UINT_32 j = i; j < m_numBits - 1; j++)
-            {
-                m_eq[j] = m_eq[j + 1];
-            }
-            m_numBits--;
-        }
-        else
-        {
-            i++;
-        }
-    }
-    return m_numBits;
-}
-
-VOID CoordEq::shift(INT_32 amount, INT_32 start)
-{
-    if (amount != 0)
-    {
-        INT_32 numBits = static_cast<INT_32>(m_numBits);
-        amount = -amount;
-        INT_32 inc = (amount < 0) ? -1 : 1;
-        INT_32 i = (amount < 0) ? numBits - 1 : start;
-        INT_32 end = (amount < 0) ? start - 1 : numBits;
-        for (; (inc > 0) ? i < end : i > end; i += inc)
-        {
-            if ((i + amount < start) || (i + amount >= numBits))
-            {
-                m_eq[i].Clear();
-            }
-            else
-            {
-                m_eq[i + amount].copyto(m_eq[i]);
-            }
-        }
-    }
-}
-
-CoordTerm& CoordEq::operator[](UINT_32 i)
-{
-    return m_eq[i];
-}
-
-VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end)
-{
-    if (end == 0)
-    {
-        ADDR_ASSERT(m_numBits > 0);
-        end = m_numBits - 1;
-    }
-    for (UINT_32 i = start; i <= end; i++)
-    {
-        UINT_32 select = (i - start) % 2;
-        Coordinate& c = (select == 0) ? c0 : c1;
-        m_eq[i].add(c);
-        c++;
-    }
-}
-
-VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end)
-{
-    if (end == 0)
-    {
-        ADDR_ASSERT(m_numBits > 0);
-        end = m_numBits - 1;
-    }
-    for (UINT_32 i = start; i <= end; i++)
-    {
-        UINT_32 select = (i - start) % 3;
-        Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2);
-        m_eq[i].add(c);
-        c++;
-    }
-}
-
-BOOL_32 CoordEq::operator==(const CoordEq& b)
-{
-    BOOL_32 ret = TRUE;
-
-    if (m_numBits != b.m_numBits)
-    {
-        ret = FALSE;
-    }
-    else
-    {
-        for (UINT_32 i = 0; i < m_numBits; i++)
-        {
-            if (m_eq[i] != b.m_eq[i])
-            {
-                ret = FALSE;
-                break;
-            }
-        }
-    }
-    return ret;
-}
-
-BOOL_32 CoordEq::operator!=(const CoordEq& b)
-{
-    return !(*this == b);
-}
-
diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/coord.h mesa-19.0.1/src/amd/addrlib/gfx9/coord.h
--- mesa-18.3.3/src/amd/addrlib/gfx9/coord.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/gfx9/coord.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,114 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-// Class used to define a coordinate bit
-
-#ifndef __COORD_H
-#define __COORD_H
-
-class Coordinate
-{
-public:
-    Coordinate();
-    Coordinate(INT_8 c, INT_32 n);
-
-    VOID set(INT_8 c, INT_32 n);
-    UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
-    INT_8   getdim();
-    INT_8   getord();
-
-    BOOL_32 operator==(const Coordinate& b);
-    BOOL_32 operator<(const Coordinate& b);
-    BOOL_32 operator>(const Coordinate& b);
-    BOOL_32 operator<=(const Coordinate& b);
-    BOOL_32 operator>=(const Coordinate& b);
-    BOOL_32 operator!=(const Coordinate& b);
-    Coordinate& operator++(INT_32);
-
-private:
-    INT_8 dim;
-    INT_8 ord;
-};
-
-class CoordTerm
-{
-public:
-    CoordTerm();
-    VOID Clear();
-    VOID add(Coordinate& co);
-    VOID add(CoordTerm& cl);
-    BOOL_32 remove(Coordinate& co);
-    BOOL_32 Exists(Coordinate& co);
-    VOID copyto(CoordTerm& cl);
-    UINT_32 getsize();
-    UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
-
-    VOID getsmallest(Coordinate& co);
-    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
-    Coordinate& operator[](UINT_32 i);
-    BOOL_32 operator==(const CoordTerm& b);
-    BOOL_32 operator!=(const CoordTerm& b);
-    BOOL_32 exceedRange(UINT_32 xRange, UINT_32 yRange = 0, UINT_32 zRange = 0, UINT_32 sRange = 0);
-
-private:
-    static const UINT_32 MaxCoords = 8;
-    UINT_32 num_coords;
-    Coordinate m_coord[MaxCoords];
-};
-
-class CoordEq
-{
-public:
-    CoordEq();
-    VOID remove(Coordinate& co);
-    BOOL_32 Exists(Coordinate& co);
-    VOID resize(UINT_32 n);
-    UINT_32 getsize();
-    virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
-    virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
-                           UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const;
-
-    VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
-    VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
-    VOID xorin(CoordEq& x, UINT_32 start = 0);
-    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
-    VOID shift(INT_32 amount, INT_32 start = 0);
-    virtual CoordTerm& operator[](UINT_32 i);
-    VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0);
-    VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0);
-
-    BOOL_32 operator==(const CoordEq& b);
-    BOOL_32 operator!=(const CoordEq& b);
-
-private:
-    static const UINT_32 MaxEqBits = 64;
-    UINT_32 m_numBits;
-
-    CoordTerm m_eq[MaxEqBits];
-};
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,5016 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-************************************************************************************************************************
-* @file  gfx9addrlib.cpp
-* @brief Contgfx9ns the implementation for the Gfx9Lib class.
-************************************************************************************************************************
-*/
-
-#include "gfx9addrlib.h"
-
-#include "gfx9_gb_reg.h"
-
-#include "amdgpu_asic_addr.h"
-
-#include "util/macros.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace Addr
-{
-
-/**
-************************************************************************************************************************
-*   Gfx9HwlInit
-*
-*   @brief
-*       Creates an Gfx9Lib object.
-*
-*   @return
-*       Returns an Gfx9Lib object pointer.
-************************************************************************************************************************
-*/
-Addr::Lib* Gfx9HwlInit(const Client* pClient)
-{
-    return V2::Gfx9Lib::CreateObj(pClient);
-}
-
-namespace V2
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                               Static Const Member
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
-{//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
-    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
-    {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
-    {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
-    {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
-
-    {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
-    {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
-    {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
-    {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
-
-    {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
-    {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
-    {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
-    {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
-
-    {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
-    {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
-    {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
-    {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
-
-    {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
-    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
-    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
-    {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
-
-    {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
-    {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
-    {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
-    {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
-
-    {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
-    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
-    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
-    {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
-
-    {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
-    {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
-    {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
-    {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
-    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
-};
-
-const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
-                                              8, 6, 5, 4, 3, 2, 1, 0};
-
-const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
-
-const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::Gfx9Lib
-*
-*   @brief
-*       Constructor
-*
-************************************************************************************************************************
-*/
-Gfx9Lib::Gfx9Lib(const Client* pClient)
-    :
-    Lib(pClient),
-    m_numEquations(0)
-{
-    m_class = AI_ADDRLIB;
-    memset(&m_settings, 0, sizeof(m_settings));
-    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::~Gfx9Lib
-*
-*   @brief
-*       Destructor
-************************************************************************************************************************
-*/
-Gfx9Lib::~Gfx9Lib()
-{
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeHtileInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeHtilenfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
-    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
-                                                       pIn->swizzleMode);
-
-    UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
-
-    UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
-
-    if ((numPipeTotal == 1) && (numRbTotal == 1))
-    {
-        numCompressBlkPerMetaBlkLog2 = 10;
-    }
-    else
-    {
-        if (m_settings.applyAliasFix)
-        {
-            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
-        }
-        else
-        {
-            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
-        }
-    }
-
-    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
-
-    Dim3d   metaBlkDim   = {8, 8, 1};
-    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
-    UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
-    UINT_32 heightAmp    = totalAmpBits - widthAmp;
-    metaBlkDim.w <<= widthAmp;
-    metaBlkDim.h <<= heightAmp;
-
-#if DEBUG
-    Dim3d metaBlkDimDbg = {8, 8, 1};
-    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
-    {
-        if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
-            ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
-        {
-            metaBlkDimDbg.h <<= 1;
-        }
-        else
-        {
-            metaBlkDimDbg.w <<= 1;
-        }
-    }
-    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
-#endif
-
-    UINT_32 numMetaBlkX;
-    UINT_32 numMetaBlkY;
-    UINT_32 numMetaBlkZ;
-
-    GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
-                   pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
-                   &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
-
-    const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
-    UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
-
-    if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
-    {
-        align *= (numPipeTotal >> 1);
-    }
-
-    align = Max(align, metaBlkSize);
-
-    if (m_settings.metaBaseAlignFix)
-    {
-        align = Max(align, GetBlockSize(pIn->swizzleMode));
-    }
-
-    if (m_settings.htileAlignFix)
-    {
-        const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
-        const INT_32 htileCachelineSizeLog2 = 11;
-        const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
-
-        INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
-
-        align <<= rbMaskPadding;
-    }
-
-    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
-    pOut->height     = numMetaBlkY * metaBlkDim.h;
-    pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
-
-    pOut->metaBlkWidth       = metaBlkDim.w;
-    pOut->metaBlkHeight      = metaBlkDim.h;
-    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
-
-    pOut->baseAlign  = align;
-    pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
-
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeCmaskInfo
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskInfo
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
-    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-// TODO: Clarify with AddrLib team
-//     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
-
-    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
-                                                       pIn->swizzleMode);
-
-    UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
-
-    UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
-
-    if ((numPipeTotal == 1) && (numRbTotal == 1))
-    {
-        numCompressBlkPerMetaBlkLog2 = 13;
-    }
-    else
-    {
-        if (m_settings.applyAliasFix)
-        {
-            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
-        }
-        else
-        {
-            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
-        }
-
-        numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
-    }
-
-    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
-
-    Dim2d metaBlkDim = {8, 8};
-    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
-    UINT_32 heightAmp = totalAmpBits >> 1;
-    UINT_32 widthAmp = totalAmpBits - heightAmp;
-    metaBlkDim.w <<= widthAmp;
-    metaBlkDim.h <<= heightAmp;
-
-#if DEBUG
-    Dim2d metaBlkDimDbg = {8, 8};
-    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
-    {
-        if (metaBlkDimDbg.h < metaBlkDimDbg.w)
-        {
-            metaBlkDimDbg.h <<= 1;
-        }
-        else
-        {
-            metaBlkDimDbg.w <<= 1;
-        }
-    }
-    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
-#endif
-
-    UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
-    UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
-    UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
-
-    UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
-
-    if (m_settings.metaBaseAlignFix)
-    {
-        sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
-    }
-
-    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
-    pOut->height     = numMetaBlkY * metaBlkDim.h;
-    pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
-    pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
-    pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
-
-    pOut->metaBlkWidth = metaBlkDim.w;
-    pOut->metaBlkHeight = metaBlkDim.h;
-
-    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
-
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetMetaMipInfo
-*
-*   @brief
-*       Get meta mip info
-*
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GetMetaMipInfo(
-    UINT_32 numMipLevels,           ///< [in]  number of mip levels
-    Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
-    BOOL_32 dataThick,              ///< [in]  data surface is thick
-    ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
-    UINT_32 mip0Width,              ///< [in]  mip0 width
-    UINT_32 mip0Height,             ///< [in]  mip0 height
-    UINT_32 mip0Depth,              ///< [in]  mip0 depth
-    UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
-    UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
-    UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
-    const
-{
-    UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
-    UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
-    UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
-    UINT_32 tailWidth   = pMetaBlkDim->w;
-    UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
-    UINT_32 tailDepth   = pMetaBlkDim->d;
-    BOOL_32 inTail      = FALSE;
-    AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
-
-    if (numMipLevels > 1)
-    {
-        if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
-        {
-            // Z major
-            major = ADDR_MAJOR_Z;
-        }
-        else if (numMetaBlkX >= numMetaBlkY)
-        {
-            // X major
-            major = ADDR_MAJOR_X;
-        }
-        else
-        {
-            // Y major
-            major = ADDR_MAJOR_Y;
-        }
-
-        inTail = ((mip0Width <= tailWidth) &&
-                  (mip0Height <= tailHeight) &&
-                  ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
-
-        if (inTail == FALSE)
-        {
-            UINT_32 orderLimit;
-            UINT_32 *pMipDim;
-            UINT_32 *pOrderDim;
-
-            if (major == ADDR_MAJOR_Z)
-            {
-                // Z major
-                pMipDim = &numMetaBlkY;
-                pOrderDim = &numMetaBlkZ;
-                orderLimit = 4;
-            }
-            else if (major == ADDR_MAJOR_X)
-            {
-                // X major
-                pMipDim = &numMetaBlkY;
-                pOrderDim = &numMetaBlkX;
-                orderLimit = 4;
-            }
-            else
-            {
-                // Y major
-                pMipDim = &numMetaBlkX;
-                pOrderDim = &numMetaBlkY;
-                orderLimit = 2;
-            }
-
-            if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
-            {
-                *pMipDim += 2;
-            }
-            else
-            {
-                *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
-            }
-        }
-    }
-
-    if (pInfo != NULL)
-    {
-        UINT_32 mipWidth  = mip0Width;
-        UINT_32 mipHeight = mip0Height;
-        UINT_32 mipDepth  = mip0Depth;
-        Dim3d   mipCoord  = {0};
-
-        for (UINT_32 mip = 0; mip < numMipLevels; mip++)
-        {
-            if (inTail)
-            {
-                GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
-                                   pMetaBlkDim);
-                break;
-            }
-            else
-            {
-                mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
-                mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
-                mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
-
-                pInfo[mip].inMiptail = FALSE;
-                pInfo[mip].startX = mipCoord.w;
-                pInfo[mip].startY = mipCoord.h;
-                pInfo[mip].startZ = mipCoord.d;
-                pInfo[mip].width  = mipWidth;
-                pInfo[mip].height = mipHeight;
-                pInfo[mip].depth  = dataThick ? mipDepth : 1;
-
-                if ((mip >= 3) || (mip & 1))
-                {
-                    switch (major)
-                    {
-                        case ADDR_MAJOR_X:
-                            mipCoord.w += mipWidth;
-                            break;
-                        case ADDR_MAJOR_Y:
-                            mipCoord.h += mipHeight;
-                            break;
-                        case ADDR_MAJOR_Z:
-                            mipCoord.d += mipDepth;
-                            break;
-                        default:
-                            break;
-                    }
-                }
-                else
-                {
-                    switch (major)
-                    {
-                        case ADDR_MAJOR_X:
-                            mipCoord.h += mipHeight;
-                            break;
-                        case ADDR_MAJOR_Y:
-                            mipCoord.w += mipWidth;
-                            break;
-                        case ADDR_MAJOR_Z:
-                            mipCoord.h += mipHeight;
-                            break;
-                        default:
-                            break;
-                    }
-                }
-
-                mipWidth  = Max(mipWidth >> 1, 1u);
-                mipHeight = Max(mipHeight >> 1, 1u);
-                mipDepth = Max(mipDepth >> 1, 1u);
-
-                inTail = ((mipWidth <= tailWidth) &&
-                          (mipHeight <= tailHeight) &&
-                          ((dataThick == FALSE) || (mipDepth <= tailDepth)));
-            }
-        }
-    }
-
-    *pNumMetaBlkX = numMetaBlkX;
-    *pNumMetaBlkY = numMetaBlkY;
-    *pNumMetaBlkZ = numMetaBlkZ;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeDccInfo
-*
-*   @brief
-*       Interface function to compute DCC key info
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
-    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
-    ) const
-{
-    BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
-    BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
-    BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
-
-    if (dataLinear)
-    {
-        metaLinear = TRUE;
-    }
-    else if (metaLinear == TRUE)
-    {
-        pipeAligned = FALSE;
-    }
-
-    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
-
-    if (metaLinear)
-    {
-        // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
-        ADDR_ASSERT_ALWAYS();
-
-        pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
-        pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
-    }
-    else
-    {
-        BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
-
-        UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
-
-        UINT_32 numFrags = Max(pIn->numFrags, 1u);
-        UINT_32 numSlices = Max(pIn->numSlices, 1u);
-
-        minMetaBlkSize /= numFrags;
-
-        UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
-
-        UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
-
-        if ((numPipeTotal > 1) || (numRbTotal > 1))
-        {
-            const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
-
-            numCompressBlkPerMetaBlk =
-                Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
-
-            if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
-            {
-                numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
-            }
-        }
-
-        Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
-        Dim3d metaBlkDim = compressBlkDim;
-
-        for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
-        {
-            if ((metaBlkDim.h < metaBlkDim.w) ||
-                ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
-            {
-                if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
-                {
-                    metaBlkDim.h <<= 1;
-                }
-                else
-                {
-                    metaBlkDim.d <<= 1;
-                }
-            }
-            else
-            {
-                if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
-                {
-                    metaBlkDim.w <<= 1;
-                }
-                else
-                {
-                    metaBlkDim.d <<= 1;
-                }
-            }
-        }
-
-        UINT_32 numMetaBlkX;
-        UINT_32 numMetaBlkY;
-        UINT_32 numMetaBlkZ;
-
-        GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
-                       pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
-                       &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
-
-        UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
-
-        if (numFrags > m_maxCompFrag)
-        {
-            sizeAlign *= (numFrags / m_maxCompFrag);
-        }
-
-        if (m_settings.metaBaseAlignFix)
-        {
-            sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
-        }
-
-        pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
-                           numCompressBlkPerMetaBlk * numFrags;
-        pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
-        pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
-
-        pOut->pitch = numMetaBlkX * metaBlkDim.w;
-        pOut->height = numMetaBlkY * metaBlkDim.h;
-        pOut->depth = numMetaBlkZ * metaBlkDim.d;
-
-        pOut->compressBlkWidth = compressBlkDim.w;
-        pOut->compressBlkHeight = compressBlkDim.h;
-        pOut->compressBlkDepth = compressBlkDim.d;
-
-        pOut->metaBlkWidth = metaBlkDim.w;
-        pOut->metaBlkHeight = metaBlkDim.h;
-        pOut->metaBlkDepth = metaBlkDim.d;
-
-        pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
-        pOut->fastClearSizePerSlice =
-            pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
-    }
-
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeMaxBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments
-*   @return
-*       maximum alignments
-************************************************************************************************************************
-*/
-UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
-{
-    return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments for metadata
-*   @return
-*       maximum alignments for metadata
-************************************************************************************************************************
-*/
-UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
-{
-    // Max base alignment for Htile
-    const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
-    const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
-
-    // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
-    // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
-    ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
-    const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
-
-    UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
-
-    if (maxNumPipeTotal > 2)
-    {
-        maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
-    }
-
-    maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
-
-    if (m_settings.metaBaseAlignFix)
-    {
-        maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
-    }
-
-    if (m_settings.htileAlignFix)
-    {
-        maxBaseAlignHtile *= maxNumPipeTotal;
-    }
-
-    // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
-
-    // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
-    UINT_32 maxBaseAlignDcc3D = 65536;
-
-    if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
-    {
-        maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
-    }
-
-    // Max base alignment for Msaa Dcc
-    UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
-
-    if (m_settings.metaBaseAlignFix)
-    {
-        maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
-    }
-
-    return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeCmaskAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
-    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
-    input.size            = sizeof(input);
-    input.cMaskFlags      = pIn->cMaskFlags;
-    input.colorFlags      = pIn->colorFlags;
-    input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-    input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-    input.numSlices       = Max(pIn->numSlices, 1u);
-    input.swizzleMode     = pIn->swizzleMode;
-    input.resourceType    = pIn->resourceType;
-
-    ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
-    output.size = sizeof(output);
-
-    ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
-
-    if (returnCode == ADDR_OK)
-    {
-        UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
-        UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
-        UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
-        UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
-
-        MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
-                                     Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
-                                     metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
-
-        const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
-
-        UINT_32 xb = pIn->x / output.metaBlkWidth;
-        UINT_32 yb = pIn->y / output.metaBlkHeight;
-        UINT_32 zb = pIn->slice;
-
-        UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-        UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-        UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
-
-        UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
-
-        pOut->addr = address >> 1;
-        pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
-
-
-        UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
-                                                           pIn->swizzleMode);
-
-        UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
-
-        pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeHtileAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
-    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pIn->numMipLevels > 1)
-    {
-        returnCode = ADDR_NOTIMPLEMENTED;
-    }
-    else
-    {
-        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.hTileFlags      = pIn->hTileFlags;
-        input.depthFlags      = pIn->depthflags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices, 1u);
-        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
-
-        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeHtileInfo(&input, &output);
-
-        if (returnCode == ADDR_OK)
-        {
-            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
-            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
-            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
-            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
-
-            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
-                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
-                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
-
-            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
-
-            UINT_32 xb = pIn->x / output.metaBlkWidth;
-            UINT_32 yb = pIn->y / output.metaBlkHeight;
-            UINT_32 zb = pIn->slice;
-
-            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
-
-            UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
-
-            pOut->addr = address >> 1;
-
-            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
-                                                               pIn->swizzleMode);
-
-            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
-
-            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeHtileCoordFromAddr
-*
-*   @brief
-*       Interface function stub of AddrComputeHtileCoordFromAddr
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
-    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (pIn->numMipLevels > 1)
-    {
-        returnCode = ADDR_NOTIMPLEMENTED;
-    }
-    else
-    {
-        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.hTileFlags      = pIn->hTileFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices, 1u);
-        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
-
-        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeHtileInfo(&input, &output);
-
-        if (returnCode == ADDR_OK)
-        {
-            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
-            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
-            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
-            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
-
-            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
-                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
-                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
-
-            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
-
-            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
-                                                               pIn->swizzleMode);
-
-            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
-
-            UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
-
-            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-
-            UINT_32 x, y, z, s, m;
-            pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
-
-            pOut->slice = m / sliceSizeInBlock;
-            pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
-            pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeDccAddrFromCoord
-*
-*   @brief
-*       Interface function stub of AddrComputeDccAddrFromCoord
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
-    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
-    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
-    {
-        returnCode = ADDR_NOTIMPLEMENTED;
-    }
-    else
-    {
-        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
-        input.size            = sizeof(input);
-        input.dccKeyFlags     = pIn->dccKeyFlags;
-        input.colorFlags      = pIn->colorFlags;
-        input.swizzleMode     = pIn->swizzleMode;
-        input.resourceType    = pIn->resourceType;
-        input.bpp             = pIn->bpp;
-        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
-        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
-        input.numSlices       = Max(pIn->numSlices, 1u);
-        input.numFrags        = Max(pIn->numFrags, 1u);
-        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
-
-        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
-        output.size = sizeof(output);
-
-        returnCode = ComputeDccInfo(&input, &output);
-
-        if (returnCode == ADDR_OK)
-        {
-            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
-            UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
-            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
-            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
-            UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
-            UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
-            UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
-            UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
-
-            MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
-                                         Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
-                                         metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
-                                         compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
-
-            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
-
-            UINT_32 xb = pIn->x / output.metaBlkWidth;
-            UINT_32 yb = pIn->y / output.metaBlkHeight;
-            UINT_32 zb = pIn->slice / output.metaBlkDepth;
-
-            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
-            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
-            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
-
-            UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
-
-            pOut->addr = address >> 1;
-
-            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
-                                                               pIn->swizzleMode);
-
-            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
-
-            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlInitGlobalParams
-*
-*   @brief
-*       Initializes global parameters
-*
-*   @return
-*       TRUE if all settings are valid
-*
-************************************************************************************************************************
-*/
-BOOL_32 Gfx9Lib::HwlInitGlobalParams(
-    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
-{
-    BOOL_32 valid = TRUE;
-
-    if (m_settings.isArcticIsland)
-    {
-        GB_ADDR_CONFIG gbAddrConfig;
-
-        gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
-
-        // These values are copied from CModel code
-        switch (gbAddrConfig.bits.NUM_PIPES)
-        {
-            case ADDR_CONFIG_1_PIPE:
-                m_pipes = 1;
-                m_pipesLog2 = 0;
-                break;
-            case ADDR_CONFIG_2_PIPE:
-                m_pipes = 2;
-                m_pipesLog2 = 1;
-                break;
-            case ADDR_CONFIG_4_PIPE:
-                m_pipes = 4;
-                m_pipesLog2 = 2;
-                break;
-            case ADDR_CONFIG_8_PIPE:
-                m_pipes = 8;
-                m_pipesLog2 = 3;
-                break;
-            case ADDR_CONFIG_16_PIPE:
-                m_pipes = 16;
-                m_pipesLog2 = 4;
-                break;
-            case ADDR_CONFIG_32_PIPE:
-                m_pipes = 32;
-                m_pipesLog2 = 5;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
-        {
-            case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
-                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
-                m_pipeInterleaveLog2 = 8;
-                break;
-            case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
-                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
-                m_pipeInterleaveLog2 = 9;
-                break;
-            case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
-                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
-                m_pipeInterleaveLog2 = 10;
-                break;
-            case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
-                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
-                m_pipeInterleaveLog2 = 11;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
-        // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
-        ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
-
-        switch (gbAddrConfig.bits.NUM_BANKS)
-        {
-            case ADDR_CONFIG_1_BANK:
-                m_banks = 1;
-                m_banksLog2 = 0;
-                break;
-            case ADDR_CONFIG_2_BANK:
-                m_banks = 2;
-                m_banksLog2 = 1;
-                break;
-            case ADDR_CONFIG_4_BANK:
-                m_banks = 4;
-                m_banksLog2 = 2;
-                break;
-            case ADDR_CONFIG_8_BANK:
-                m_banks = 8;
-                m_banksLog2 = 3;
-                break;
-            case ADDR_CONFIG_16_BANK:
-                m_banks = 16;
-                m_banksLog2 = 4;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
-        {
-            case ADDR_CONFIG_1_SHADER_ENGINE:
-                m_se = 1;
-                m_seLog2 = 0;
-                break;
-            case ADDR_CONFIG_2_SHADER_ENGINE:
-                m_se = 2;
-                m_seLog2 = 1;
-                break;
-            case ADDR_CONFIG_4_SHADER_ENGINE:
-                m_se = 4;
-                m_seLog2 = 2;
-                break;
-            case ADDR_CONFIG_8_SHADER_ENGINE:
-                m_se = 8;
-                m_seLog2 = 3;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        switch (gbAddrConfig.bits.NUM_RB_PER_SE)
-        {
-            case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
-                m_rbPerSe = 1;
-                m_rbPerSeLog2 = 0;
-                break;
-            case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
-                m_rbPerSe = 2;
-                m_rbPerSeLog2 = 1;
-                break;
-            case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
-                m_rbPerSe = 4;
-                m_rbPerSeLog2 = 2;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
-        {
-            case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
-                m_maxCompFrag = 1;
-                m_maxCompFragLog2 = 0;
-                break;
-            case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
-                m_maxCompFrag = 2;
-                m_maxCompFragLog2 = 1;
-                break;
-            case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
-                m_maxCompFrag = 4;
-                m_maxCompFragLog2 = 2;
-                break;
-            case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
-                m_maxCompFrag = 8;
-                m_maxCompFragLog2 = 3;
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
-        ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
-                    ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
-        m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
-
-        if ((m_rbPerSeLog2 == 1) &&
-            (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
-             ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
-        {
-            ADDR_ASSERT(m_settings.isVega10 == FALSE);
-            ADDR_ASSERT(m_settings.isRaven == FALSE);
-            ADDR_ASSERT(m_settings.isVega20 == FALSE);
-
-            if (m_settings.isVega12)
-            {
-                m_settings.htileCacheRbConflict = 1;
-            }
-        }
-    }
-    else
-    {
-        valid = FALSE;
-        ADDR_NOT_IMPLEMENTED();
-    }
-
-    if (valid)
-    {
-        InitEquationTable();
-    }
-
-    return valid;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlConvertChipFamily
-*
-*   @brief
-*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
-*   @return
-*       ChipFamily
-************************************************************************************************************************
-*/
-ChipFamily Gfx9Lib::HwlConvertChipFamily(
-    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
-    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
-{
-    ChipFamily family = ADDR_CHIP_FAMILY_AI;
-
-    switch (uChipFamily)
-    {
-        case FAMILY_AI:
-            m_settings.isArcticIsland = 1;
-            m_settings.isVega10    = ASICREV_IS_VEGA10_P(uChipRevision);
-            m_settings.isVega12    = ASICREV_IS_VEGA12_P(uChipRevision);
-            m_settings.isVega20    = ASICREV_IS_VEGA20_P(uChipRevision);
-            m_settings.isDce12 = 1;
-
-            if (m_settings.isVega10 == 0)
-            {
-                m_settings.htileAlignFix = 1;
-                m_settings.applyAliasFix = 1;
-            }
-
-            m_settings.metaBaseAlignFix = 1;
-
-            m_settings.depthPipeXorDisable = 1;
-            break;
-        case FAMILY_RV:
-            m_settings.isArcticIsland = 1;
-            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);
-
-            if (m_settings.isRaven)
-            {
-                m_settings.isDcn1   = 1;
-            }
-
-            m_settings.metaBaseAlignFix = 1;
-
-            if (ASICREV_IS_RAVEN(uChipRevision))
-            {
-                m_settings.depthPipeXorDisable = 1;
-            }
-            break;
-
-        default:
-            ADDR_ASSERT(!"This should be a Fusion");
-            break;
-    }
-
-    return family;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::InitRbEquation
-*
-*   @brief
-*       Init RB equation
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GetRbEquation(
-    CoordEq* pRbEq,             ///< [out] rb equation
-    UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
-    UINT_32  numSeLog2)         ///< [in] number of shader engine
-    const
-{
-    // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
-    UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
-    Coordinate cx('x', rbRegion);
-    Coordinate cy('y', rbRegion);
-
-    UINT_32 start = 0;
-    UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
-
-    // Clear the rb equation
-    pRbEq->resize(0);
-    pRbEq->resize(numRbTotalLog2);
-
-    if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
-    {
-        // Special case when more than 1 SE, and 2 RB per SE
-        (*pRbEq)[0].add(cx);
-        (*pRbEq)[0].add(cy);
-        cx++;
-        cy++;
-
-        if (m_settings.applyAliasFix == false)
-        {
-            (*pRbEq)[0].add(cy);
-        }
-
-        (*pRbEq)[0].add(cy);
-        start++;
-    }
-
-    UINT_32 numBits = 2 * (numRbTotalLog2 - start);
-
-    for (UINT_32 i = 0; i < numBits; i++)
-    {
-        UINT_32 idx =
-            start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
-
-        if ((i % 2) == 1)
-        {
-            (*pRbEq)[idx].add(cx);
-            cx++;
-        }
-        else
-        {
-            (*pRbEq)[idx].add(cy);
-            cy++;
-        }
-    }
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetDataEquation
-*
-*   @brief
-*       Get data equation for fmask and Z
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GetDataEquation(
-    CoordEq* pDataEq,               ///< [out] data surface equation
-    Gfx9DataType dataSurfaceType,   ///< [in] data surface type
-    AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
-    AddrResourceType resourceType,  ///< [in] data surface resource type
-    UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
-    UINT_32 numSamplesLog2)         ///< [in] data surface sample count
-    const
-{
-    Coordinate cx('x', 0);
-    Coordinate cy('y', 0);
-    Coordinate cz('z', 0);
-    Coordinate cs('s', 0);
-
-    // Clear the equation
-    pDataEq->resize(0);
-    pDataEq->resize(27);
-
-    if (dataSurfaceType == Gfx9DataColor)
-    {
-        if (IsLinear(swizzleMode))
-        {
-            Coordinate cm('m', 0);
-
-            pDataEq->resize(49);
-
-            for (UINT_32 i = 0; i < 49; i++)
-            {
-                (*pDataEq)[i].add(cm);
-                cm++;
-            }
-        }
-        else if (IsThick(resourceType, swizzleMode))
-        {
-            // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
-            UINT_32 i;
-            if (IsStandardSwizzle(resourceType, swizzleMode))
-            {
-                // Standard 3d swizzle
-                // Fill in bottom x bits
-                for (i = elementBytesLog2; i < 4; i++)
-                {
-                    (*pDataEq)[i].add(cx);
-                    cx++;
-                }
-                // Fill in 2 bits of y and then z
-                for (i = 4; i < 6; i++)
-                {
-                    (*pDataEq)[i].add(cy);
-                    cy++;
-                }
-                for (i = 6; i < 8; i++)
-                {
-                    (*pDataEq)[i].add(cz);
-                    cz++;
-                }
-                if (elementBytesLog2 < 2)
-                {
-                    // fill in z & y bit
-                    (*pDataEq)[8].add(cz);
-                    (*pDataEq)[9].add(cy);
-                    cz++;
-                    cy++;
-                }
-                else if (elementBytesLog2 == 2)
-                {
-                    // fill in y and x bit
-                    (*pDataEq)[8].add(cy);
-                    (*pDataEq)[9].add(cx);
-                    cy++;
-                    cx++;
-                }
-                else
-                {
-                    // fill in 2 x bits
-                    (*pDataEq)[8].add(cx);
-                    cx++;
-                    (*pDataEq)[9].add(cx);
-                    cx++;
-                }
-            }
-            else
-            {
-                // Z 3d swizzle
-                UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
-                UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
-                                2 : ((elementBytesLog2 == 1) ? 3 : 1);
-                pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
-                for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
-                {
-                    (*pDataEq)[i].add(cz);
-                    cz++;
-                }
-                if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
-                {
-                    // add an x and z
-                    (*pDataEq)[6].add(cx);
-                    (*pDataEq)[7].add(cz);
-                    cx++;
-                    cz++;
-                }
-                else if (elementBytesLog2 == 2)
-                {
-                    // add a y and z
-                    (*pDataEq)[6].add(cy);
-                    (*pDataEq)[7].add(cz);
-                    cy++;
-                    cz++;
-                }
-                // add y and x
-                (*pDataEq)[8].add(cy);
-                (*pDataEq)[9].add(cx);
-                cy++;
-                cx++;
-            }
-            // Fill in bit 10 and up
-            pDataEq->mort3d( cz, cy, cx, 10 );
-        }
-        else if (IsThin(resourceType, swizzleMode))
-        {
-            UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
-            // Color 2D
-            UINT_32 microYBits = (8 - elementBytesLog2) / 2;
-            UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
-            UINT_32 i;
-            // Fill in bottom x bits
-            for (i = elementBytesLog2; i < 4; i++)
-            {
-                (*pDataEq)[i].add(cx);
-                cx++;
-            }
-            // Fill in bottom y bits
-            for (i = 4; i < 4 + microYBits; i++)
-            {
-                (*pDataEq)[i].add(cy);
-                cy++;
-            }
-            // Fill in last of the micro_x bits
-            for (i = 4 + microYBits; i < 8; i++)
-            {
-                (*pDataEq)[i].add(cx);
-                cx++;
-            }
-            // Fill in x/y bits below sample split
-            pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
-            // Fill in sample bits
-            for (i = 0; i < numSamplesLog2; i++)
-            {
-                cs.set('s', i);
-                (*pDataEq)[tileSplitStart + i].add(cs);
-            }
-            // Fill in x/y bits above sample split
-            if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
-            {
-                pDataEq->mort2d(cx, cy, blockSizeLog2);
-            }
-            else
-            {
-                pDataEq->mort2d(cy, cx, blockSizeLog2);
-            }
-        }
-        else
-        {
-            ADDR_ASSERT_ALWAYS();
-        }
-    }
-    else
-    {
-        // Fmask or depth
-        UINT_32 sampleStart = elementBytesLog2;
-        UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
-        UINT_32 ymajStart = 6 + numSamplesLog2;
-
-        for (UINT_32 s = 0; s < numSamplesLog2; s++)
-        {
-            cs.set('s', s);
-            (*pDataEq)[sampleStart + s].add(cs);
-        }
-
-        // Put in the x-major order pixel bits
-        pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
-        // Put in the y-major order pixel bits
-        pDataEq->mort2d(cy, cx, ymajStart);
-    }
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetPipeEquation
-*
-*   @brief
-*       Get pipe equation
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GetPipeEquation(
-    CoordEq*         pPipeEq,            ///< [out] pipe equation
-    CoordEq*         pDataEq,            ///< [in] data equation
-    UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
-    UINT_32          numPipeLog2,        ///< [in] number of pipes
-    UINT_32          numSamplesLog2,     ///< [in] data surface sample count
-    Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
-    AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
-    AddrResourceType resourceType        ///< [in] data surface resource type
-    ) const
-{
-    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
-    CoordEq dataEq;
-
-    pDataEq->copy(dataEq);
-
-    if (dataSurfaceType == Gfx9DataColor)
-    {
-        INT_32 shift = static_cast<INT_32>(numSamplesLog2);
-        dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
-    }
-
-    dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
-
-    // This section should only apply to z/stencil, maybe fmask
-    // If the pipe bit is below the comp block size,
-    // then keep moving up the address until we find a bit that is above
-    UINT_32 pipeStart = 0;
-
-    if (dataSurfaceType != Gfx9DataColor)
-    {
-        Coordinate tileMin('x', 3);
-
-        while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
-        {
-            pipeStart++;
-        }
-
-        // if pipe is 0, then the first pipe bit is above the comp block size,
-        // so we don't need to do anything
-        // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
-        // we will get the same pipe equation
-        if (pipeStart != 0)
-        {
-            for (UINT_32 i = 0; i < numPipeLog2; i++)
-            {
-                // Copy the jth bit above pipe interleave to the current pipe equation bit
-                dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
-            }
-        }
-    }
-
-    if (IsPrt(swizzleMode))
-    {
-        // Clear out bits above the block size if prt's are enabled
-        dataEq.resize(blockSizeLog2);
-        dataEq.resize(48);
-    }
-
-    if (IsXor(swizzleMode))
-    {
-        CoordEq xorMask;
-
-        if (IsThick(resourceType, swizzleMode))
-        {
-            CoordEq xorMask2;
-
-            dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
-
-            xorMask.resize(numPipeLog2);
-
-            for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
-            {
-                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
-                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
-            }
-        }
-        else
-        {
-            // Xor in the bits above the pipe+gpu bits
-            dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
-
-            if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
-            {
-                Coordinate co;
-                CoordEq xorMask2;
-                // if 1xaa and not prt, then xor in the z bits
-                xorMask2.resize(0);
-                xorMask2.resize(numPipeLog2);
-                for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
-                {
-                    co.set('z', numPipeLog2 - 1 - pipeIdx);
-                    xorMask2[pipeIdx].add(co);
-                }
-
-                pPipeEq->xorin(xorMask2);
-            }
-        }
-
-        xorMask.reverse();
-        pPipeEq->xorin(xorMask);
-    }
-}
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetMetaEquation
-*
-*   @brief
-*       Get meta equation for cmask/htile/DCC
-*   @return
-*       Pointer to a calculated meta equation
-************************************************************************************************************************
-*/
-const CoordEq* Gfx9Lib::GetMetaEquation(
-    const MetaEqParams& metaEqParams)
-{
-    UINT_32 cachedMetaEqIndex;
-
-    for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
-    {
-        if (memcmp(&metaEqParams,
-                   &m_cachedMetaEqKey[cachedMetaEqIndex],
-                   static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
-        {
-            break;
-        }
-    }
-
-    CoordEq* pMetaEq = NULL;
-
-    if (cachedMetaEqIndex < MaxCachedMetaEq)
-    {
-        pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
-    }
-    else
-    {
-        m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
-
-        pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
-
-        m_metaEqOverrideIndex %= MaxCachedMetaEq;
-
-        GenMetaEquation(pMetaEq,
-                        metaEqParams.maxMip,
-                        metaEqParams.elementBytesLog2,
-                        metaEqParams.numSamplesLog2,
-                        metaEqParams.metaFlag,
-                        metaEqParams.dataSurfaceType,
-                        metaEqParams.swizzleMode,
-                        metaEqParams.resourceType,
-                        metaEqParams.metaBlkWidthLog2,
-                        metaEqParams.metaBlkHeightLog2,
-                        metaEqParams.metaBlkDepthLog2,
-                        metaEqParams.compBlkWidthLog2,
-                        metaEqParams.compBlkHeightLog2,
-                        metaEqParams.compBlkDepthLog2);
-    }
-
-    return pMetaEq;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GenMetaEquation
-*
-*   @brief
-*       Get meta equation for cmask/htile/DCC
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GenMetaEquation(
-    CoordEq*         pMetaEq,               ///< [out] meta equation
-    UINT_32          maxMip,                ///< [in] max mip Id
-    UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
-    UINT_32          numSamplesLog2,        ///< [in] data surface sample count
-    ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
-    Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
-    AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
-    AddrResourceType resourceType,          ///< [in] data surface resource type
-    UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
-    UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
-    UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
-    UINT_32          compBlkWidthLog2,      ///< [in] compress block width
-    UINT_32          compBlkHeightLog2,     ///< [in] compress block height
-    UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
-    const
-{
-    UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
-    UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
-
-    // Get the correct data address and rb equation
-    CoordEq dataEq;
-    GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
-                    elementBytesLog2, numSamplesLog2);
-
-    // Get pipe and rb equations
-    CoordEq pipeEquation;
-    GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
-                    numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
-    numPipeTotalLog2 = pipeEquation.getsize();
-
-    if (metaFlag.linear)
-    {
-        // Linear metadata supporting was removed for GFX9! No one can use this feature.
-        ADDR_ASSERT_ALWAYS();
-
-        ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
-
-        dataEq.copy(*pMetaEq);
-
-        if (IsLinear(swizzleMode))
-        {
-            if (metaFlag.pipeAligned)
-            {
-                // Remove the pipe bits
-                INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
-                pMetaEq->shift(-shift, pipeInterleaveLog2);
-            }
-            // Divide by comp block size, which for linear (which is always color) is 256 B
-            pMetaEq->shift(-8);
-
-            if (metaFlag.pipeAligned)
-            {
-                // Put pipe bits back in
-                pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
-
-                for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
-                {
-                    pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
-                }
-            }
-        }
-
-        pMetaEq->shift(1);
-    }
-    else
-    {
-        UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
-        UINT_32 compFragLog2 =
-            ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
-            maxCompFragLog2 : numSamplesLog2;
-
-        UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
-
-        // Make sure the metaaddr is cleared
-        pMetaEq->resize(0);
-        pMetaEq->resize(27);
-
-        if (IsThick(resourceType, swizzleMode))
-        {
-            Coordinate cx('x', 0);
-            Coordinate cy('y', 0);
-            Coordinate cz('z', 0);
-
-            if (maxMip > 0)
-            {
-                pMetaEq->mort3d(cy, cx, cz);
-            }
-            else
-            {
-                pMetaEq->mort3d(cx, cy, cz);
-            }
-        }
-        else
-        {
-            Coordinate cx('x', 0);
-            Coordinate cy('y', 0);
-            Coordinate cs;
-
-            if (maxMip > 0)
-            {
-                pMetaEq->mort2d(cy, cx, compFragLog2);
-            }
-            else
-            {
-                pMetaEq->mort2d(cx, cy, compFragLog2);
-            }
-
-            //------------------------------------------------------------------------------------------------------------------------
-            // Put the compressible fragments at the lsb
-            // the uncompressible frags will be at the msb of the micro address
-            //------------------------------------------------------------------------------------------------------------------------
-            for (UINT_32 s = 0; s < compFragLog2; s++)
-            {
-                cs.set('s', s);
-                (*pMetaEq)[s].add(cs);
-            }
-        }
-
-        // Keep a copy of the pipe equations
-        CoordEq origPipeEquation;
-        pipeEquation.copy(origPipeEquation);
-
-        Coordinate co;
-        // filter out everything under the compressed block size
-        co.set('x', compBlkWidthLog2);
-        pMetaEq->Filter('<', co, 0, 'x');
-        co.set('y', compBlkHeightLog2);
-        pMetaEq->Filter('<', co, 0, 'y');
-        co.set('z', compBlkDepthLog2);
-        pMetaEq->Filter('<', co, 0, 'z');
-
-        // For non-color, filter out sample bits
-        if (dataSurfaceType != Gfx9DataColor)
-        {
-            co.set('x', 0);
-            pMetaEq->Filter('<', co, 0, 's');
-        }
-
-        // filter out everything above the metablock size
-        co.set('x', metaBlkWidthLog2 - 1);
-        pMetaEq->Filter('>', co, 0, 'x');
-        co.set('y', metaBlkHeightLog2 - 1);
-        pMetaEq->Filter('>', co, 0, 'y');
-        co.set('z', metaBlkDepthLog2 - 1);
-        pMetaEq->Filter('>', co, 0, 'z');
-
-        // filter out everything above the metablock size for the channel bits
-        co.set('x', metaBlkWidthLog2 - 1);
-        pipeEquation.Filter('>', co, 0, 'x');
-        co.set('y', metaBlkHeightLog2 - 1);
-        pipeEquation.Filter('>', co, 0, 'y');
-        co.set('z', metaBlkDepthLog2 - 1);
-        pipeEquation.Filter('>', co, 0, 'z');
-
-        // Make sure we still have the same number of channel bits
-        if (pipeEquation.getsize() != numPipeTotalLog2)
-        {
-            ADDR_ASSERT_ALWAYS();
-        }
-
-        // Loop through all channel and rb bits,
-        // and make sure these components exist in the metadata address
-        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
-        {
-            for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
-            {
-                if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
-                {
-                    ADDR_ASSERT_ALWAYS();
-                }
-            }
-        }
-
-        const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
-        const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
-        const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
-        CoordEq       origRbEquation;
-
-        GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
-
-        CoordEq rbEquation = origRbEquation;
-
-        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
-        {
-            for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
-            {
-                if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
-                {
-                    ADDR_ASSERT_ALWAYS();
-                }
-            }
-        }
-
-        if (m_settings.applyAliasFix)
-        {
-            co.set('z', -1);
-        }
-
-        // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
-        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
-        {
-            for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
-            {
-                BOOL_32 isRbEquationInPipeEquation = FALSE;
-
-                if (m_settings.applyAliasFix)
-                {
-                    CoordTerm filteredPipeEq;
-                    filteredPipeEq = pipeEquation[j];
-
-                    filteredPipeEq.Filter('>', co, 0, 'z');
-
-                    isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
-                }
-                else
-                {
-                    isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
-                }
-
-                if (isRbEquationInPipeEquation)
-                {
-                    rbEquation[i].Clear();
-                }
-            }
-        }
-
-         bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
-
-        // Loop through each bit of the channel, get the smallest coordinate,
-        // and remove it from the metaaddr, and rb_equation
-        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
-        {
-            pipeEquation[i].getsmallest(co);
-
-            UINT_32 old_size = pMetaEq->getsize();
-            pMetaEq->Filter('=', co);
-            UINT_32 new_size = pMetaEq->getsize();
-            if (new_size != old_size-1)
-            {
-                ADDR_ASSERT_ALWAYS();
-            }
-            pipeEquation.remove(co);
-            for (UINT_32 j = 0; j < numRbTotalLog2; j++)
-            {
-                if (rbEquation[j].remove(co))
-                {
-                    // if we actually removed something from this bit, then add the remaining
-                    // channel bits, as these can be removed for this bit
-                    for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
-                    {
-                        if (pipeEquation[i][k] != co)
-                        {
-                            rbEquation[j].add(pipeEquation[i][k]);
-                            rbAppendedWithPipeBits[j] = true;
-                        }
-                    }
-                }
-            }
-        }
-
-        // Loop through the rb bits and see what remain;
-        // filter out the smallest coordinate if it remains
-        UINT_32 rbBitsLeft = 0;
-        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
-        {
-            BOOL_32 isRbEqAppended = FALSE;
-
-            if (m_settings.applyAliasFix)
-            {
-                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
-            }
-            else
-            {
-                isRbEqAppended = (rbEquation[i].getsize() > 0);
-            }
-
-            if (isRbEqAppended)
-            {
-                rbBitsLeft++;
-                rbEquation[i].getsmallest(co);
-                UINT_32 old_size = pMetaEq->getsize();
-                pMetaEq->Filter('=', co);
-                UINT_32 new_size = pMetaEq->getsize();
-                if (new_size != old_size - 1)
-                {
-                    // assert warning
-                }
-                for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
-                {
-                    if (rbEquation[j].remove(co))
-                    {
-                        // if we actually removed something from this bit, then add the remaining
-                        // rb bits, as these can be removed for this bit
-                        for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
-                        {
-                            if (rbEquation[i][k] != co)
-                            {
-                                rbEquation[j].add(rbEquation[i][k]);
-                                rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        // capture the size of the metaaddr
-        UINT_32 metaSize = pMetaEq->getsize();
-        // resize to 49 bits...make this a nibble address
-        pMetaEq->resize(49);
-        // Concatenate the macro address above the current address
-        for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
-        {
-            co.set('m', j);
-            (*pMetaEq)[i].add(co);
-        }
-
-        // Multiply by meta element size (in nibbles)
-        if (dataSurfaceType == Gfx9DataColor)
-        {
-            pMetaEq->shift(1);
-        }
-        else if (dataSurfaceType == Gfx9DataDepthStencil)
-        {
-            pMetaEq->shift(3);
-        }
-
-        //------------------------------------------------------------------------------------------
-        // Note the pipeInterleaveLog2+1 is because address is a nibble address
-        // Shift up from pipe interleave number of channel
-        // and rb bits left, and uncompressed fragments
-        //------------------------------------------------------------------------------------------
-
-        pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
-
-        // Put in the channel bits
-        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
-        {
-            origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
-        }
-
-        // Put in remaining rb bits
-        for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
-        {
-            BOOL_32 isRbEqAppended = FALSE;
-
-            if (m_settings.applyAliasFix)
-            {
-                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
-            }
-            else
-            {
-                isRbEqAppended = (rbEquation[i].getsize() > 0);
-            }
-
-            if (isRbEqAppended)
-            {
-                origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
-                // Mark any rb bit we add in to the rb mask
-                j++;
-            }
-        }
-
-        //------------------------------------------------------------------------------------------
-        // Put in the uncompressed fragment bits
-        //------------------------------------------------------------------------------------------
-        for (UINT_32 i = 0; i < uncompFragLog2; i++)
-        {
-            co.set('s', compFragLog2 + i);
-            (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
-        }
-    }
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::IsEquationSupported
-*
-*   @brief
-*       Check if equation is supported for given swizzle mode and resource type.
-*
-*   @return
-*       TRUE if supported
-************************************************************************************************************************
-*/
-BOOL_32 Gfx9Lib::IsEquationSupported(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode  swMode,
-    UINT_32          elementBytesLog2) const
-{
-    BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
-                        (IsLinear(swMode) == FALSE) &&
-                        (((IsTex2d(rsrcType) == TRUE) &&
-                          ((elementBytesLog2 < 4) ||
-                           ((IsRotateSwizzle(swMode) == FALSE) &&
-                            (IsZOrderSwizzle(swMode) == FALSE)))) ||
-                         ((IsTex3d(rsrcType) == TRUE) &&
-                          (IsRotateSwizzle(swMode) == FALSE) &&
-                          (IsBlock256b(swMode) == FALSE)));
-
-    return supported;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::InitEquationTable
-*
-*   @brief
-*       Initialize Equation table.
-*
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::InitEquationTable()
-{
-    memset(m_equationTable, 0, sizeof(m_equationTable));
-
-    // Loop all possible resource type (2D/3D)
-    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
-    {
-        AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
-
-        // Loop all possible swizzle mode
-        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
-        {
-            AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
-
-            // Loop all possible bpp
-            for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
-            {
-                UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
-
-                // Check if the input is supported
-                if (IsEquationSupported(rsrcType, swMode, bppIdx))
-                {
-                    ADDR_EQUATION equation;
-                    ADDR_E_RETURNCODE retCode;
-
-                    memset(&equation, 0, sizeof(ADDR_EQUATION));
-
-                    // Generate the equation
-                    if (IsBlock256b(swMode) && IsTex2d(rsrcType))
-                    {
-                        retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
-                    }
-                    else if (IsThin(rsrcType, swMode))
-                    {
-                        retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
-                    }
-                    else
-                    {
-                        retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
-                    }
-
-                    // Only fill the equation into the table if the return code is ADDR_OK,
-                    // otherwise if the return code is not ADDR_OK, it indicates this is not
-                    // a valid input, we do nothing but just fill invalid equation index
-                    // into the lookup table.
-                    if (retCode == ADDR_OK)
-                    {
-                        equationIndex = m_numEquations;
-                        ADDR_ASSERT(equationIndex < EquationTableSize);
-
-                        m_equationTable[equationIndex] = equation;
-
-                        m_numEquations++;
-                    }
-                    else
-                    {
-                        ADDR_ASSERT_ALWAYS();
-                    }
-                }
-
-                // Fill the index into the lookup table, if the combination is not supported
-                // fill the invalid equation index
-                m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
-            }
-        }
-    }
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlGetEquationIndex
-*
-*   @brief
-*       Interface function stub of GetEquationIndex
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-UINT_32 Gfx9Lib::HwlGetEquationIndex(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
-    ) const
-{
-    AddrResourceType rsrcType         = pIn->resourceType;
-    AddrSwizzleMode  swMode           = pIn->swizzleMode;
-    UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
-    UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
-
-    if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
-    {
-        UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
-        UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
-
-        index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
-    }
-
-    if (pOut->pMipInfo != NULL)
-    {
-        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
-        {
-            pOut->pMipInfo[i].equationIndex = index;
-        }
-    }
-
-    return index;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeBlock256Equation
-*
-*   @brief
-*       Interface function stub of ComputeBlock256Equation
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode  swMode,
-    UINT_32          elementBytesLog2,
-    ADDR_EQUATION*   pEquation) const
-{
-    ADDR_E_RETURNCODE ret = ADDR_OK;
-
-    pEquation->numBits = 8;
-
-    UINT_32 i = 0;
-    for (; i < elementBytesLog2; i++)
-    {
-        InitChannel(1, 0 , i, &pEquation->addr[i]);
-    }
-
-    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
-
-    const UINT_32 maxBitsUsed = 4;
-    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
-    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
-
-    for (i = 0; i < maxBitsUsed; i++)
-    {
-        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
-        InitChannel(1, 1, i, &y[i]);
-    }
-
-    if (IsStandardSwizzle(rsrcType, swMode))
-    {
-        switch (elementBytesLog2)
-        {
-            case 0:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = x[2];
-                pixelBit[3] = x[3];
-                pixelBit[4] = y[0];
-                pixelBit[5] = y[1];
-                pixelBit[6] = y[2];
-                pixelBit[7] = y[3];
-                break;
-            case 1:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = x[2];
-                pixelBit[3] = y[0];
-                pixelBit[4] = y[1];
-                pixelBit[5] = y[2];
-                pixelBit[6] = x[3];
-                break;
-            case 2:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = y[0];
-                pixelBit[3] = y[1];
-                pixelBit[4] = y[2];
-                pixelBit[5] = x[2];
-                break;
-            case 3:
-                pixelBit[0] = x[0];
-                pixelBit[1] = y[0];
-                pixelBit[2] = y[1];
-                pixelBit[3] = x[1];
-                pixelBit[4] = x[2];
-                break;
-            case 4:
-                pixelBit[0] = y[0];
-                pixelBit[1] = y[1];
-                pixelBit[2] = x[0];
-                pixelBit[3] = x[1];
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                ret = ADDR_INVALIDPARAMS;
-                break;
-        }
-    }
-    else if (IsDisplaySwizzle(rsrcType, swMode))
-    {
-        switch (elementBytesLog2)
-        {
-            case 0:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = x[2];
-                pixelBit[3] = y[1];
-                pixelBit[4] = y[0];
-                pixelBit[5] = y[2];
-                pixelBit[6] = x[3];
-                pixelBit[7] = y[3];
-                break;
-            case 1:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = x[2];
-                pixelBit[3] = y[0];
-                pixelBit[4] = y[1];
-                pixelBit[5] = y[2];
-                pixelBit[6] = x[3];
-                break;
-            case 2:
-                pixelBit[0] = x[0];
-                pixelBit[1] = x[1];
-                pixelBit[2] = y[0];
-                pixelBit[3] = x[2];
-                pixelBit[4] = y[1];
-                pixelBit[5] = y[2];
-                break;
-            case 3:
-                pixelBit[0] = x[0];
-                pixelBit[1] = y[0];
-                pixelBit[2] = x[1];
-                pixelBit[3] = x[2];
-                pixelBit[4] = y[1];
-                break;
-            case 4:
-                pixelBit[0] = x[0];
-                pixelBit[1] = y[0];
-                pixelBit[2] = x[1];
-                pixelBit[3] = y[1];
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                ret = ADDR_INVALIDPARAMS;
-                break;
-        }
-    }
-    else if (IsRotateSwizzle(swMode))
-    {
-        switch (elementBytesLog2)
-        {
-            case 0:
-                pixelBit[0] = y[0];
-                pixelBit[1] = y[1];
-                pixelBit[2] = y[2];
-                pixelBit[3] = x[1];
-                pixelBit[4] = x[0];
-                pixelBit[5] = x[2];
-                pixelBit[6] = x[3];
-                pixelBit[7] = y[3];
-                break;
-            case 1:
-                pixelBit[0] = y[0];
-                pixelBit[1] = y[1];
-                pixelBit[2] = y[2];
-                pixelBit[3] = x[0];
-                pixelBit[4] = x[1];
-                pixelBit[5] = x[2];
-                pixelBit[6] = x[3];
-                break;
-            case 2:
-                pixelBit[0] = y[0];
-                pixelBit[1] = y[1];
-                pixelBit[2] = x[0];
-                pixelBit[3] = y[2];
-                pixelBit[4] = x[1];
-                pixelBit[5] = x[2];
-                break;
-            case 3:
-                pixelBit[0] = y[0];
-                pixelBit[1] = x[0];
-                pixelBit[2] = y[1];
-                pixelBit[3] = x[1];
-                pixelBit[4] = x[2];
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-            case 4:
-                ret = ADDR_INVALIDPARAMS;
-                break;
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        ret = ADDR_INVALIDPARAMS;
-    }
-
-    // Post validation
-    if (ret == ADDR_OK)
-    {
-        MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
-        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
-                    (microBlockDim.w * (1 << elementBytesLog2)));
-        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeThinEquation
-*
-*   @brief
-*       Interface function stub of ComputeThinEquation
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode  swMode,
-    UINT_32          elementBytesLog2,
-    ADDR_EQUATION*   pEquation) const
-{
-    ADDR_E_RETURNCODE ret = ADDR_OK;
-
-    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
-
-    UINT_32 maxXorBits = blockSizeLog2;
-    if (IsNonPrtXor(swMode))
-    {
-        // For non-prt-xor, maybe need to initialize some more bits for xor
-        // The highest xor bit used in equation will be max the following 3 items:
-        // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
-        // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
-        // 3. blockSizeLog2
-
-        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
-        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
-                                     GetPipeXorBits(blockSizeLog2) +
-                                     2 * GetBankXorBits(blockSizeLog2));
-    }
-
-    const UINT_32 maxBitsUsed = 14;
-    ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
-    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
-    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
-
-    const UINT_32 extraXorBits = 16;
-    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
-    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
-
-    for (UINT_32 i = 0; i < maxBitsUsed; i++)
-    {
-        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
-        InitChannel(1, 1, i, &y[i]);
-    }
-
-    ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
-
-    for (UINT_32 i = 0; i < elementBytesLog2; i++)
-    {
-        InitChannel(1, 0 , i, &pixelBit[i]);
-    }
-
-    UINT_32 xIdx = 0;
-    UINT_32 yIdx = 0;
-    UINT_32 lowBits = 0;
-
-    if (IsZOrderSwizzle(swMode))
-    {
-        if (elementBytesLog2 <= 3)
-        {
-            for (UINT_32 i = elementBytesLog2; i < 6; i++)
-            {
-                pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
-            }
-
-            lowBits = 6;
-        }
-        else
-        {
-            ret = ADDR_INVALIDPARAMS;
-        }
-    }
-    else
-    {
-        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
-
-        if (ret == ADDR_OK)
-        {
-            Dim2d microBlockDim = Block256_2d[elementBytesLog2];
-            xIdx = Log2(microBlockDim.w);
-            yIdx = Log2(microBlockDim.h);
-            lowBits = 8;
-        }
-    }
-
-    if (ret == ADDR_OK)
-    {
-        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
-        {
-            pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
-        }
-
-        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
-        {
-            xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
-        }
-
-        if (IsXor(swMode))
-        {
-            // Fill XOR bits
-            UINT_32 pipeStart = m_pipeInterleaveLog2;
-            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
-
-            UINT_32 bankStart = pipeStart + pipeXorBits;
-            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
-
-            for (UINT_32 i = 0; i < pipeXorBits; i++)
-            {
-                UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
-                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
-            }
-
-            for (UINT_32 i = 0; i < bankXorBits; i++)
-            {
-                UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
-                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
-            }
-
-            if (IsPrt(swMode) == FALSE)
-            {
-                for (UINT_32 i = 0; i < pipeXorBits; i++)
-                {
-                    InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
-                }
-
-                for (UINT_32 i = 0; i < bankXorBits; i++)
-                {
-                    InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
-                }
-            }
-        }
-
-        pEquation->numBits = blockSizeLog2;
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeThickEquation
-*
-*   @brief
-*       Interface function stub of ComputeThickEquation
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
-    AddrResourceType rsrcType,
-    AddrSwizzleMode  swMode,
-    UINT_32          elementBytesLog2,
-    ADDR_EQUATION*   pEquation) const
-{
-    ADDR_E_RETURNCODE ret = ADDR_OK;
-
-    ADDR_ASSERT(IsTex3d(rsrcType));
-
-    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
-
-    UINT_32 maxXorBits = blockSizeLog2;
-    if (IsNonPrtXor(swMode))
-    {
-        // For non-prt-xor, maybe need to initialize some more bits for xor
-        // The highest xor bit used in equation will be max the following 3:
-        // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
-        // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
-        // 3. blockSizeLog2
-
-        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
-        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
-                                     GetPipeXorBits(blockSizeLog2) +
-                                     3 * GetBankXorBits(blockSizeLog2));
-    }
-
-    for (UINT_32 i = 0; i < elementBytesLog2; i++)
-    {
-        InitChannel(1, 0 , i, &pEquation->addr[i]);
-    }
-
-    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
-
-    const UINT_32 maxBitsUsed = 12;
-    ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
-    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
-    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
-    ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
-
-    const UINT_32 extraXorBits = 24;
-    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
-    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
-
-    for (UINT_32 i = 0; i < maxBitsUsed; i++)
-    {
-        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
-        InitChannel(1, 1, i, &y[i]);
-        InitChannel(1, 2, i, &z[i]);
-    }
-
-    if (IsZOrderSwizzle(swMode))
-    {
-        switch (elementBytesLog2)
-        {
-            case 0:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = x[1];
-                pixelBit[3]  = y[1];
-                pixelBit[4]  = z[0];
-                pixelBit[5]  = z[1];
-                pixelBit[6]  = x[2];
-                pixelBit[7]  = z[2];
-                pixelBit[8]  = y[2];
-                pixelBit[9]  = x[3];
-                break;
-            case 1:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = x[1];
-                pixelBit[3]  = y[1];
-                pixelBit[4]  = z[0];
-                pixelBit[5]  = z[1];
-                pixelBit[6]  = z[2];
-                pixelBit[7]  = y[2];
-                pixelBit[8]  = x[2];
-                break;
-            case 2:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = x[1];
-                pixelBit[3]  = z[0];
-                pixelBit[4]  = y[1];
-                pixelBit[5]  = z[1];
-                pixelBit[6]  = y[2];
-                pixelBit[7]  = x[2];
-                break;
-            case 3:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = z[0];
-                pixelBit[3]  = x[1];
-                pixelBit[4]  = z[1];
-                pixelBit[5]  = y[1];
-                pixelBit[6]  = x[2];
-                break;
-            case 4:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = z[0];
-                pixelBit[3]  = z[1];
-                pixelBit[4]  = y[1];
-                pixelBit[5]  = x[1];
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                ret = ADDR_INVALIDPARAMS;
-                break;
-        }
-    }
-    else if (IsStandardSwizzle(rsrcType, swMode))
-    {
-        switch (elementBytesLog2)
-        {
-            case 0:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = x[1];
-                pixelBit[2]  = x[2];
-                pixelBit[3]  = x[3];
-                pixelBit[4]  = y[0];
-                pixelBit[5]  = y[1];
-                pixelBit[6]  = z[0];
-                pixelBit[7]  = z[1];
-                pixelBit[8]  = z[2];
-                pixelBit[9]  = y[2];
-                break;
-            case 1:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = x[1];
-                pixelBit[2]  = x[2];
-                pixelBit[3]  = y[0];
-                pixelBit[4]  = y[1];
-                pixelBit[5]  = z[0];
-                pixelBit[6]  = z[1];
-                pixelBit[7]  = z[2];
-                pixelBit[8]  = y[2];
-                break;
-            case 2:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = x[1];
-                pixelBit[2]  = y[0];
-                pixelBit[3]  = y[1];
-                pixelBit[4]  = z[0];
-                pixelBit[5]  = z[1];
-                pixelBit[6]  = y[2];
-                pixelBit[7]  = x[2];
-                break;
-            case 3:
-                pixelBit[0]  = x[0];
-                pixelBit[1]  = y[0];
-                pixelBit[2]  = y[1];
-                pixelBit[3]  = z[0];
-                pixelBit[4]  = z[1];
-                pixelBit[5]  = x[1];
-                pixelBit[6]  = x[2];
-                break;
-            case 4:
-                pixelBit[0]  = y[0];
-                pixelBit[1]  = y[1];
-                pixelBit[2]  = z[0];
-                pixelBit[3]  = z[1];
-                pixelBit[4]  = x[0];
-                pixelBit[5]  = x[1];
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                ret = ADDR_INVALIDPARAMS;
-                break;
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        ret = ADDR_INVALIDPARAMS;
-    }
-
-    if (ret == ADDR_OK)
-    {
-        Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
-        UINT_32 xIdx = Log2(microBlockDim.w);
-        UINT_32 yIdx = Log2(microBlockDim.h);
-        UINT_32 zIdx = Log2(microBlockDim.d);
-
-        pixelBit = pEquation->addr;
-
-        const UINT_32 lowBits = 10;
-        ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
-        ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
-
-        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
-        {
-            if ((i % 3) == 0)
-            {
-                pixelBit[i] = x[xIdx++];
-            }
-            else if ((i % 3) == 1)
-            {
-                pixelBit[i] = z[zIdx++];
-            }
-            else
-            {
-                pixelBit[i] = y[yIdx++];
-            }
-        }
-
-        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
-        {
-            if ((i % 3) == 0)
-            {
-                xorExtra[i - blockSizeLog2] = x[xIdx++];
-            }
-            else if ((i % 3) == 1)
-            {
-                xorExtra[i - blockSizeLog2] = z[zIdx++];
-            }
-            else
-            {
-                xorExtra[i - blockSizeLog2] = y[yIdx++];
-            }
-        }
-
-        if (IsXor(swMode))
-        {
-            // Fill XOR bits
-            UINT_32 pipeStart = m_pipeInterleaveLog2;
-            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
-            for (UINT_32 i = 0; i < pipeXorBits; i++)
-            {
-                UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
-                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
-
-                UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
-                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
-            }
-
-            UINT_32 bankStart = pipeStart + pipeXorBits;
-            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
-            for (UINT_32 i = 0; i < bankXorBits; i++)
-            {
-                UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
-                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
-
-                UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
-                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
-                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
-
-                InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
-            }
-        }
-
-        pEquation->numBits = blockSizeLog2;
-    }
-
-    return ret;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::IsValidDisplaySwizzleMode
-*
-*   @brief
-*       Check if a swizzle mode is supported by display engine
-*
-*   @return
-*       TRUE is swizzle mode is supported by display engine
-************************************************************************************************************************
-*/
-BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
-{
-    BOOL_32 support = FALSE;
-
-    const AddrResourceType resourceType = pIn->resourceType;
-    (void)resourceType;
-    const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
-
-    if (m_settings.isDce12)
-    {
-        switch (swizzleMode)
-        {
-            case ADDR_SW_256B_D:
-            case ADDR_SW_256B_R:
-                support = (pIn->bpp == 32);
-                break;
-
-            case ADDR_SW_LINEAR:
-            case ADDR_SW_4KB_D:
-            case ADDR_SW_4KB_R:
-            case ADDR_SW_64KB_D:
-            case ADDR_SW_64KB_R:
-            case ADDR_SW_VAR_D:
-            case ADDR_SW_VAR_R:
-            case ADDR_SW_4KB_D_X:
-            case ADDR_SW_4KB_R_X:
-            case ADDR_SW_64KB_D_X:
-            case ADDR_SW_64KB_R_X:
-            case ADDR_SW_VAR_D_X:
-            case ADDR_SW_VAR_R_X:
-                support = (pIn->bpp <= 64);
-                break;
-
-            default:
-                break;
-        }
-    }
-    else if (m_settings.isDcn1)
-    {
-        switch (swizzleMode)
-        {
-            case ADDR_SW_4KB_D:
-            case ADDR_SW_64KB_D:
-            case ADDR_SW_VAR_D:
-            case ADDR_SW_64KB_D_T:
-            case ADDR_SW_4KB_D_X:
-            case ADDR_SW_64KB_D_X:
-            case ADDR_SW_VAR_D_X:
-                support = (pIn->bpp == 64);
-                break;
-
-            case ADDR_SW_LINEAR:
-            case ADDR_SW_4KB_S:
-            case ADDR_SW_64KB_S:
-            case ADDR_SW_VAR_S:
-            case ADDR_SW_64KB_S_T:
-            case ADDR_SW_4KB_S_X:
-            case ADDR_SW_64KB_S_X:
-            case ADDR_SW_VAR_S_X:
-                support = (pIn->bpp <= 64);
-                break;
-
-            default:
-                break;
-        }
-    }
-    else
-    {
-        ADDR_NOT_IMPLEMENTED();
-    }
-
-    return support;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputePipeBankXor
-*
-*   @brief
-*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
-*
-*   @return
-*       PipeBankXor value
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
-    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
-{
-    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
-    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
-    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
-
-    UINT_32 pipeXor = 0;
-    UINT_32 bankXor = 0;
-
-    const UINT_32 bankMask = (1 << bankBits) - 1;
-    const UINT_32 index    = pIn->surfIndex & bankMask;
-
-    const UINT_32 bpp      = pIn->flags.fmask ?
-                             GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
-    if (bankBits == 4)
-    {
-        static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
-        static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
-
-        bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
-    }
-    else if (bankBits > 0)
-    {
-        UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
-        bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
-        bankXor = (index * bankIncrease) & bankMask;
-    }
-
-    pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
-
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSlicePipeBankXor
-*
-*   @brief
-*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
-*
-*   @return
-*       PipeBankXor value
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
-    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
-{
-    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
-    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
-    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
-
-    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
-    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
-
-    pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
-
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
-*
-*   @brief
-*       Compute sub resource offset to support swizzle pattern
-*
-*   @return
-*       Offset
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
-    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
-{
-    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
-
-    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
-    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
-    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
-    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
-    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
-    UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
-
-    pOut->offset = pIn->slice * pIn->sliceSize +
-                   pIn->macroBlockOffset +
-                   (pIn->mipTailOffset ^ pipeBankXor) -
-                   static_cast<UINT_64>(pipeBankXor);
-    return ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
-*
-*   @brief
-*       Compute surface info sanity check
-*
-*   @return
-*       Offset
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
-{
-    BOOL_32 invalid = FALSE;
-
-    if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
-    {
-        invalid = TRUE;
-    }
-    else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
-             (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
-    {
-        invalid = TRUE;
-    }
-
-    BOOL_32 mipmap = (pIn->numMipLevels > 1);
-    BOOL_32 msaa   = (pIn->numFrags > 1);
-
-    ADDR2_SURFACE_FLAGS flags = pIn->flags;
-    BOOL_32 zbuffer = (flags.depth || flags.stencil);
-    BOOL_32 color   = flags.color;
-    BOOL_32 display = flags.display || flags.rotated;
-
-    AddrResourceType rsrcType    = pIn->resourceType;
-    BOOL_32          tex3d       = IsTex3d(rsrcType);
-    AddrSwizzleMode  swizzle     = pIn->swizzleMode;
-    BOOL_32          linear      = IsLinear(swizzle);
-    BOOL_32          blk256B     = IsBlock256b(swizzle);
-    BOOL_32          blkVar      = IsBlockVariable(swizzle);
-    BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
-    BOOL_32          prt         = flags.prt;
-    BOOL_32          stereo      = flags.qbStereo;
-
-    if (invalid == FALSE)
-    {
-        if ((pIn->numFrags > 1) &&
-            (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
-        {
-            // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
-            invalid = TRUE;
-        }
-    }
-
-    if (invalid == FALSE)
-    {
-        switch (rsrcType)
-        {
-            case ADDR_RSRC_TEX_1D:
-                invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
-                break;
-            case ADDR_RSRC_TEX_2D:
-                invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
-                break;
-            case ADDR_RSRC_TEX_3D:
-                invalid = msaa || zbuffer || display || stereo;
-                break;
-            default:
-                invalid = TRUE;
-                break;
-        }
-    }
-
-    if (invalid == FALSE)
-    {
-        if (display)
-        {
-            invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
-        }
-    }
-
-    if (invalid == FALSE)
-    {
-        if (linear)
-        {
-            invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
-                      zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
-        }
-        else
-        {
-            if (blk256B || blkVar || isNonPrtXor)
-            {
-                invalid = prt;
-                if (blk256B)
-                {
-                    invalid = invalid || zbuffer || tex3d || mipmap || msaa;
-                }
-            }
-
-            if (invalid == FALSE)
-            {
-                if (IsZOrderSwizzle(swizzle))
-                {
-                    invalid = color && msaa;
-                }
-                else if (IsStandardSwizzle(rsrcType, swizzle))
-                {
-                    invalid = zbuffer;
-                }
-                else if (IsDisplaySwizzle(rsrcType, swizzle))
-                {
-                    invalid = zbuffer;
-                }
-                else if (IsRotateSwizzle(swizzle))
-                {
-                    invalid = zbuffer || (pIn->bpp > 64) || tex3d;
-                }
-                else
-                {
-                    ADDR_ASSERT(!"invalid swizzle mode");
-                    invalid = TRUE;
-                }
-            }
-        }
-    }
-
-    ADDR_ASSERT(invalid == FALSE);
-
-    return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlGetPreferredSurfaceSetting
-*
-*   @brief
-*       Internal function to get suggested surface information for cliet to use
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
-    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
-{
-    // Macro define resource block type
-    enum AddrBlockType
-    {
-        AddrBlockMicro     = 0, // Resource uses 256B block
-        AddrBlock4KB       = 1, // Resource uses 4KB block
-        AddrBlock64KB      = 2, // Resource uses 64KB block
-        AddrBlockVar       = 3, // Resource uses var blcok
-        AddrBlockLinear    = 4, // Resource uses linear swizzle mode
-
-        AddrBlockMaxTiledType = AddrBlock64KB + 1,
-    };
-
-    enum AddrBlockSet
-    {
-        AddrBlockSetMicro     = 1 << AddrBlockMicro,
-        AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
-        AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
-        AddrBlockSetVar       = 1 << AddrBlockVar,
-        AddrBlockSetLinear    = 1 << AddrBlockLinear,
-
-        AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
-    };
-
-    enum AddrSwSet
-    {
-        AddrSwSetZ = 1 << ADDR_SW_Z,
-        AddrSwSetS = 1 << ADDR_SW_S,
-        AddrSwSetD = 1 << ADDR_SW_D,
-        AddrSwSetR = 1 << ADDR_SW_R,
-
-        AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
-    };
-
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-    ElemLib*          pElemLib   = GetElemLib();
-
-    // Set format to INVALID will skip this conversion
-    UINT_32 expandX = 1;
-    UINT_32 expandY = 1;
-    UINT_32 bpp     = pIn->bpp;
-    UINT_32 width   = pIn->width;
-    UINT_32 height  = pIn->height;
-
-    if (pIn->format != ADDR_FMT_INVALID)
-    {
-        // Don't care for this case
-        ElemMode elemMode = ADDR_UNCOMPRESSED;
-
-        // Get compression/expansion factors and element mode which indicates compression/expansion
-        bpp = pElemLib->GetBitsPerPixel(pIn->format,
-                                        &elemMode,
-                                        &expandX,
-                                        &expandY);
-
-        UINT_32 basePitch = 0;
-        GetElemLib()->AdjustSurfaceInfo(elemMode,
-                                        expandX,
-                                        expandY,
-                                        &bpp,
-                                        &basePitch,
-                                        &width,
-                                        &height);
-    }
-
-    UINT_32 numSamples   = Max(pIn->numSamples, 1u);
-    UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
-    UINT_32 slice        = Max(pIn->numSlices, 1u);
-    UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
-    UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign);
-
-    if (pIn->flags.fmask)
-    {
-        bpp        = GetFmaskBpp(numSamples, numFrags);
-        numFrags   = 1;
-        numSamples = 1;
-        pOut->resourceType = ADDR_RSRC_TEX_2D;
-    }
-    else
-    {
-        // The output may get changed for volume(3D) texture resource in future
-        pOut->resourceType = pIn->resourceType;
-    }
-
-    if (bpp < 8)
-    {
-        ADDR_ASSERT_ALWAYS();
-
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-    else if (IsTex1d(pOut->resourceType))
-    {
-        pOut->swizzleMode         = ADDR_SW_LINEAR;
-        pOut->validBlockSet.value = AddrBlockSetLinear;
-        pOut->canXor              = FALSE;
-    }
-    else
-    {
-        ADDR2_BLOCK_SET blockSet;
-        blockSet.value = 0;
-
-        ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet;
-        addrPreferredSwSet.value = AddrSwSetS;
-        addrValidSwSet           = addrPreferredSwSet;
-        clientPreferredSwSet     = pIn->preferredSwSet;
-
-        if (clientPreferredSwSet.value == 0)
-        {
-            clientPreferredSwSet.value = AddrSwSetAll;
-        }
-
-        // prt Xor and non-xor will have less height align requirement for stereo surface
-        BOOL_32 prtXor          = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE);
-        BOOL_32 displayResource = FALSE;
-
-        pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE);
-
-        // Filter out improper swType and blockSet by HW restriction
-        if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
-        {
-            ADDR_ASSERT(IsTex2d(pOut->resourceType));
-            blockSet.value           = AddrBlockSetMacro;
-            addrPreferredSwSet.value = AddrSwSetZ;
-            addrValidSwSet.value     = AddrSwSetZ;
-
-            if (pIn->flags.noMetadata == FALSE)
-            {
-                if (pIn->flags.depth &&
-                    pIn->flags.texture &&
-                    (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
-                {
-                    // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
-                    // equation from wrong address within memory range a tile covered and use the
-                    // garbage data for compressed Z reading which finally leads to corruption.
-                    pOut->canXor = FALSE;
-                    prtXor       = FALSE;
-                }
-
-                if (m_settings.htileCacheRbConflict &&
-                    (pIn->flags.depth || pIn->flags.stencil) &&
-                    (slice > 1) &&
-                    (pIn->flags.metaRbUnaligned == FALSE) &&
-                    (pIn->flags.metaPipeUnaligned == FALSE))
-                {
-                    // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
-                    pOut->canXor = FALSE;
-                }
-            }
-        }
-        else if (ElemLib::IsBlockCompressed(pIn->format))
-        {
-            // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes.
-            // Not sure under what circumstances "_D" would be appropriate as these formats
-            // are not displayable.
-            blockSet.value = AddrBlockSetMacro;
-
-            // This isn't to be used as texture and caller doesn't allow macro tiled.
-            if ((pIn->flags.texture == FALSE) &&
-                (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB))
-            {
-                blockSet.value |= AddrBlockSetLinear;
-            }
-
-            addrPreferredSwSet.value = AddrSwSetD;
-            addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
-        }
-        else if (ElemLib::IsMacroPixelPacked(pIn->format))
-        {
-            // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes.
-            // Its notclear under what circumstances the D or R modes would be appropriate
-            // since these formats are not displayable.
-            blockSet.value  = AddrBlockSetLinear | AddrBlockSetMacro;
-
-            addrPreferredSwSet.value = AddrSwSetS;
-            addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
-        }
-        else if (IsTex3d(pOut->resourceType))
-        {
-            blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
-
-            if (pIn->flags.prt)
-            {
-                // PRT cannot use SW_D which gives an unexpected block dimension
-                addrPreferredSwSet.value = AddrSwSetZ;
-                addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
-            }
-            else if ((numMipLevels > 1) && (slice >= width) && (slice >= height))
-            {
-                // When depth (Z) is the maximum dimension then must use one of the SW_*_S
-                // or SW_*_Z modes if mipmapping is desired on a 3D surface
-                addrPreferredSwSet.value = AddrSwSetZ;
-                addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS;
-            }
-            else if (pIn->flags.color)
-            {
-                addrPreferredSwSet.value = AddrSwSetD;
-                addrValidSwSet.value     = AddrSwSetZ | AddrSwSetS | AddrSwSetD;
-            }
-            else
-            {
-                addrPreferredSwSet.value = AddrSwSetZ;
-                addrValidSwSet.value     = AddrSwSetZ | AddrSwSetD;
-                if (bpp != 128)
-                {
-                    addrValidSwSet.value |= AddrSwSetS;
-                }
-            }
-        }
-        else
-        {
-            addrPreferredSwSet.value = ((pIn->flags.display == TRUE) ||
-                                        (pIn->flags.overlay == TRUE) ||
-                                        (pIn->bpp           == 128)) ? AddrSwSetD : AddrSwSetS;
-
-            addrValidSwSet.value     = AddrSwSetS | AddrSwSetD | AddrSwSetR;
-
-            if (numMipLevels > 1)
-            {
-                ADDR_ASSERT(numFrags == 1);
-                blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro;
-            }
-            else if ((numFrags > 1) || (numSamples > 1))
-            {
-                ADDR_ASSERT(IsTex2d(pOut->resourceType));
-                blockSet.value = AddrBlockSetMacro;
-            }
-            else
-            {
-                ADDR_ASSERT(IsTex2d(pOut->resourceType));
-                blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro;
-
-                displayResource = pIn->flags.rotated || pIn->flags.display;
-
-                if (displayResource)
-                {
-                    addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD;
-
-                    if (pIn->bpp > 64)
-                    {
-                        blockSet.value = 0;
-                    }
-                    else if (m_settings.isDce12)
-                    {
-                        if (pIn->bpp != 32)
-                        {
-                            blockSet.micro = FALSE;
-                        }
-
-                        // DCE12 does not support display surface to be _T swizzle mode
-                        prtXor = FALSE;
-
-                        addrValidSwSet.value = AddrSwSetD | AddrSwSetR;
-                    }
-                    else if (m_settings.isDcn1)
-                    {
-                        // _R is not supported by Dcn1
-                        if (pIn->bpp == 64)
-                        {
-                            addrPreferredSwSet.value = AddrSwSetD;
-                            addrValidSwSet.value     = AddrSwSetS | AddrSwSetD;
-                        }
-                        else
-                        {
-                            addrPreferredSwSet.value = AddrSwSetS;
-                            addrValidSwSet.value     = AddrSwSetS;
-                        }
-
-                        blockSet.micro = FALSE;
-                    }
-                    else
-                    {
-                        ADDR_NOT_IMPLEMENTED();
-                        returnCode = ADDR_NOTSUPPORTED;
-                    }
-                }
-            }
-        }
-
-        ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value);
-
-        pOut->clientPreferredSwSet = clientPreferredSwSet;
-
-        // Clamp client preferred set to valid set
-        clientPreferredSwSet.value &= addrValidSwSet.value;
-
-        pOut->validSwTypeSet = addrValidSwSet;
-
-        if (clientPreferredSwSet.value == 0)
-        {
-            // Client asks for an invalid swizzle type...
-            ADDR_ASSERT_ALWAYS();
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-        else
-        {
-            if (IsPow2(clientPreferredSwSet.value))
-            {
-                // Only one swizzle type left, use it directly
-                addrPreferredSwSet.value = clientPreferredSwSet.value;
-            }
-            else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0)
-            {
-                // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred
-                if (clientPreferredSwSet.sw_D)
-                {
-                    addrPreferredSwSet.value = AddrSwSetD;
-                }
-                else if (clientPreferredSwSet.sw_Z)
-                {
-                    addrPreferredSwSet.value = AddrSwSetZ;
-                }
-                else if (clientPreferredSwSet.sw_R)
-                {
-                    addrPreferredSwSet.value = AddrSwSetR;
-                }
-                else
-                {
-                    ADDR_ASSERT(clientPreferredSwSet.sw_S);
-                    addrPreferredSwSet.value = AddrSwSetS;
-                }
-            }
-
-            if ((numFrags > 1) &&
-                (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
-            {
-                // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
-                blockSet.macro4KB = FALSE;
-            }
-
-            if (pIn->flags.prt)
-            {
-                blockSet.value &= AddrBlockSetMacro64KB;
-            }
-
-            // Apply customized forbidden setting
-            blockSet.value &= ~pIn->forbiddenBlock.value;
-
-            if (pIn->maxAlign > 0)
-            {
-                if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
-                {
-                    blockSet.macro64KB = FALSE;
-                }
-
-                if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
-                {
-                    blockSet.macro4KB = FALSE;
-                }
-
-                if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
-                {
-                    blockSet.micro = FALSE;
-                }
-            }
-
-            Dim3d blkAlign[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
-            Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}};
-            UINT_64 padSize[AddrBlockMaxTiledType] = {0};
-
-            if (blockSet.micro)
-            {
-                returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w,
-                                                          &blkAlign[AddrBlockMicro].h,
-                                                          &blkAlign[AddrBlockMicro].d,
-                                                          bpp,
-                                                          numFrags,
-                                                          pOut->resourceType,
-                                                          ADDR_SW_256B);
-
-                if (returnCode == ADDR_OK)
-                {
-                    if (displayResource)
-                    {
-                        blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32);
-                    }
-                    else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) &&
-                             (minSizeAlign <= GetBlockSize(ADDR_SW_256B)))
-                    {
-                        // If one 256B block can contain the surface, don't bother bigger block type
-                        blockSet.macro4KB = FALSE;
-                        blockSet.macro64KB = FALSE;
-                        blockSet.var = FALSE;
-                    }
-
-                    padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height,
-                                                             slice, &paddedDim[AddrBlockMicro]);
-                }
-            }
-
-            if ((returnCode == ADDR_OK) && blockSet.macro4KB)
-            {
-                returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w,
-                                                          &blkAlign[AddrBlock4KB].h,
-                                                          &blkAlign[AddrBlock4KB].d,
-                                                          bpp,
-                                                          numFrags,
-                                                          pOut->resourceType,
-                                                          ADDR_SW_4KB);
-
-                if (returnCode == ADDR_OK)
-                {
-                    if (displayResource)
-                    {
-                        blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32);
-                    }
-
-                    padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height,
-                                                           slice, &paddedDim[AddrBlock4KB]);
-
-                    ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]);
-                }
-            }
-
-            if ((returnCode == ADDR_OK) && blockSet.macro64KB)
-            {
-                returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w,
-                                                          &blkAlign[AddrBlock64KB].h,
-                                                          &blkAlign[AddrBlock64KB].d,
-                                                          bpp,
-                                                          numFrags,
-                                                          pOut->resourceType,
-                                                          ADDR_SW_64KB);
-
-                if (returnCode == ADDR_OK)
-                {
-                    if (displayResource)
-                    {
-                        blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32);
-                    }
-
-                    padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height,
-                                                            slice, &paddedDim[AddrBlock64KB]);
-
-                    ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]);
-                    ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]);
-                }
-            }
-
-            if (returnCode == ADDR_OK)
-            {
-                UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u);
-
-                for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
-                {
-                    padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement);
-                }
-
-                // Use minimum block type which meets all conditions above if flag minimizeAlign was set
-                if (pIn->flags.minimizeAlign)
-                {
-                    // If padded size of 64KB block is larger than padded size of 256B block or 4KB
-                    // block, filter out 64KB block from candidate list
-                    if (blockSet.macro64KB &&
-                        ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) ||
-                         (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB]))))
-                    {
-                        blockSet.macro64KB = FALSE;
-                    }
-
-                    // If padded size of 4KB block is larger than padded size of 256B block,
-                    // filter out 4KB block from candidate list
-                    if (blockSet.macro4KB &&
-                        blockSet.micro &&
-                        (padSize[AddrBlockMicro] < padSize[AddrBlock4KB]))
-                    {
-                        blockSet.macro4KB = FALSE;
-                    }
-                }
-                // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint
-                else if (pIn->flags.opt4space)
-                {
-                    UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] :
-                                        (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]);
-
-                    threshold += threshold >> 1;
-
-                    if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold))
-                    {
-                        blockSet.macro64KB = FALSE;
-                    }
-
-                    if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold))
-                    {
-                        blockSet.macro4KB = FALSE;
-                    }
-                }
-                else
-                {
-                    if (blockSet.macro64KB &&
-                        (padSize[AddrBlock64KB] >= static_cast<UINT_64>(width) * height * slice * 2) &&
-                        ((blockSet.value & ~AddrBlockSetMacro64KB) != 0))
-                    {
-                        // If 64KB block waste more than half memory on padding, filter it out from
-                        // candidate list when it is not the only choice left
-                        blockSet.macro64KB = FALSE;
-                    }
-                }
-
-                if (blockSet.value == 0)
-                {
-                    // Bad things happen, client will not get any useful information from AddrLib.
-                    // Maybe we should fill in some output earlier instead of outputing nothing?
-                    ADDR_ASSERT_ALWAYS();
-                    returnCode = ADDR_INVALIDPARAMS;
-                }
-                else
-                {
-                    pOut->validBlockSet = blockSet;
-                    pOut->canXor = pOut->canXor &&
-                                   (blockSet.macro4KB || blockSet.macro64KB || blockSet.var);
-
-                    if (blockSet.macro64KB || blockSet.macro4KB)
-                    {
-                        if (addrPreferredSwSet.value == AddrSwSetZ)
-                        {
-                            pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z;
-                        }
-                        else if (addrPreferredSwSet.value == AddrSwSetS)
-                        {
-                            pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S;
-                        }
-                        else if (addrPreferredSwSet.value == AddrSwSetD)
-                        {
-                            pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D;
-                        }
-                        else
-                        {
-                            ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
-                            pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R;
-                        }
-
-                        if (prtXor && blockSet.macro64KB)
-                        {
-                            // Client wants PRTXOR, give back _T swizzle mode if 64KB is available
-                            const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z;
-                            pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + prtGap);
-                        }
-                        else if (pOut->canXor)
-                        {
-                            // Client wants XOR and this is allowed, return XOR version swizzle mode
-                            const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z;
-                            pOut->swizzleMode = static_cast<AddrSwizzleMode>(pOut->swizzleMode + xorGap);
-                        }
-                    }
-                    else if (blockSet.micro)
-                    {
-                        if (addrPreferredSwSet.value == AddrSwSetS)
-                        {
-                            pOut->swizzleMode = ADDR_SW_256B_S;
-                        }
-                        else if (addrPreferredSwSet.value == AddrSwSetD)
-                        {
-                            pOut->swizzleMode = ADDR_SW_256B_D;
-                        }
-                        else
-                        {
-                            ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR);
-                            pOut->swizzleMode = ADDR_SW_256B_R;
-                        }
-                    }
-                    else if (blockSet.linear)
-                    {
-                        // Fall into this branch doesn't mean linear is suitable, only no other choices!
-                        pOut->swizzleMode = ADDR_SW_LINEAR;
-                    }
-                    else
-                    {
-                        ADDR_ASSERT(blockSet.var);
-
-                        // Designer consider VAR swizzle mode is usless for most cases
-                        ADDR_UNHANDLED_CASE();
-
-                        returnCode = ADDR_NOTSUPPORTED;
-                    }
-
-#if DEBUG
-                    // Post sanity check, at least AddrLib should accept the output generated by its own
-                    if (pOut->swizzleMode != ADDR_SW_LINEAR)
-                    {
-                        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
-                        localIn.flags = pIn->flags;
-                        localIn.swizzleMode = pOut->swizzleMode;
-                        localIn.resourceType = pOut->resourceType;
-                        localIn.format = pIn->format;
-                        localIn.bpp = bpp;
-                        localIn.width = width;
-                        localIn.height = height;
-                        localIn.numSlices = slice;
-                        localIn.numMipLevels = numMipLevels;
-                        localIn.numSamples = numSamples;
-                        localIn.numFrags = numFrags;
-
-                        HwlComputeSurfaceInfoSanityCheck(&localIn);
-
-                    }
-#endif
-                }
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::ComputeStereoInfo
-*
-*   @brief
-*       Compute height alignment and right eye pipeBankXor for stereo surface
-*
-*   @return
-*       Error code
-*
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
-    UINT_32*                                pHeightAlign
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
-
-    if (eqIndex < m_numEquations)
-    {
-        if (IsXor(pIn->swizzleMode))
-        {
-            const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
-            const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
-            const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
-            const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
-            const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
-            MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
-
-            ADDR_ASSERT(maxYCoordBlock256 ==
-                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
-
-            const UINT_32 maxYCoordInBaseEquation =
-                (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
-
-            ADDR_ASSERT(maxYCoordInBaseEquation ==
-                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
-
-            const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
-
-            ADDR_ASSERT(maxYCoordInPipeXor ==
-                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
-
-            const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
-                                               0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
-
-            ADDR_ASSERT(maxYCoordInBankXor ==
-                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
-
-            const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
-
-            if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
-            {
-                *pHeightAlign = 1u << maxYCoordInPipeBankXor;
-
-                if (pOut->pStereoInfo != NULL)
-                {
-                    pOut->pStereoInfo->rightSwizzle = 0;
-
-                    if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
-                    {
-                        if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
-                        {
-                            pOut->pStereoInfo->rightSwizzle |= (1u << 1);
-                        }
-
-                        if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
-                        {
-                            pOut->pStereoInfo->rightSwizzle |=
-                                1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
-                        }
-
-                        ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
-                                    GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
-                                                       numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
-                    }
-                }
-            }
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        returnCode = ADDR_ERROR;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSurfaceInfoTiled
-*
-*   @brief
-*       Internal function to calculate alignment for tiled surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
-     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
-                                                                &pOut->blockHeight,
-                                                                &pOut->blockSlices,
-                                                                pIn->bpp,
-                                                                pIn->numFrags,
-                                                                pIn->resourceType,
-                                                                pIn->swizzleMode);
-
-    if (returnCode == ADDR_OK)
-    {
-        UINT_32 pitchAlignInElement = pOut->blockWidth;
-
-        if ((IsTex2d(pIn->resourceType) == TRUE) &&
-            (pIn->flags.display || pIn->flags.rotated) &&
-            (pIn->numMipLevels <= 1) &&
-            (pIn->numSamples <= 1) &&
-            (pIn->numFrags <= 1))
-        {
-            // Display engine needs pitch align to be at least 32 pixels.
-            pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
-        }
-
-        pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
-
-        if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
-        {
-            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
-            {
-                returnCode = ADDR_INVALIDPARAMS;
-            }
-            else if (pIn->pitchInElement < pOut->pitch)
-            {
-                returnCode = ADDR_INVALIDPARAMS;
-            }
-            else
-            {
-                pOut->pitch = pIn->pitchInElement;
-            }
-        }
-
-        UINT_32 heightAlign = 0;
-
-        if (pIn->flags.qbStereo)
-        {
-            returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
-        }
-
-        if (returnCode == ADDR_OK)
-        {
-            pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
-
-            if (heightAlign > 1)
-            {
-                pOut->height = PowTwoAlign(pOut->height, heightAlign);
-            }
-
-            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
-
-            pOut->epitchIsHeight   = FALSE;
-            pOut->mipChainInTail   = FALSE;
-            pOut->firstMipIdInTail = pIn->numMipLevels;
-
-            pOut->mipChainPitch    = pOut->pitch;
-            pOut->mipChainHeight   = pOut->height;
-            pOut->mipChainSlice    = pOut->numSlices;
-
-            if (pIn->numMipLevels > 1)
-            {
-                pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
-                                                         pIn->swizzleMode,
-                                                         pIn->bpp,
-                                                         pIn->width,
-                                                         pIn->height,
-                                                         pIn->numSlices,
-                                                         pOut->blockWidth,
-                                                         pOut->blockHeight,
-                                                         pOut->blockSlices,
-                                                         pIn->numMipLevels,
-                                                         pOut->pMipInfo);
-
-                const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
-
-                if (endingMipId == 0)
-                {
-                    const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
-                                                           pIn->swizzleMode,
-                                                           pOut->blockWidth,
-                                                           pOut->blockHeight,
-                                                           pOut->blockSlices);
-
-                    pOut->epitchIsHeight = TRUE;
-                    pOut->pitch          = tailMaxDim.w;
-                    pOut->height         = tailMaxDim.h;
-                    pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
-                                           tailMaxDim.d : pIn->numSlices;
-                    pOut->mipChainInTail = TRUE;
-                }
-                else
-                {
-                    UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
-                    UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
-
-                    AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
-                                                           pIn->swizzleMode,
-                                                           mip0WidthInBlk,
-                                                           mip0HeightInBlk,
-                                                           pOut->numSlices / pOut->blockSlices);
-                    if (majorMode == ADDR_MAJOR_Y)
-                    {
-                        UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
-
-                        if ((mip1WidthInBlk == 1) && (endingMipId > 2))
-                        {
-                            mip1WidthInBlk++;
-                        }
-
-                        pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
-
-                        pOut->epitchIsHeight = FALSE;
-                    }
-                    else
-                    {
-                        UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
-
-                        if ((mip1HeightInBlk == 1) && (endingMipId > 2))
-                        {
-                            mip1HeightInBlk++;
-                        }
-
-                        pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
-
-                        pOut->epitchIsHeight = TRUE;
-                    }
-                }
-
-                if (pOut->pMipInfo != NULL)
-                {
-                    UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
-
-                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
-                    {
-                        Dim3d   mipStartPos          = {0};
-                        UINT_32 mipTailOffsetInBytes = 0;
-
-                        mipStartPos = GetMipStartPos(pIn->resourceType,
-                                                     pIn->swizzleMode,
-                                                     pOut->pitch,
-                                                     pOut->height,
-                                                     pOut->numSlices,
-                                                     pOut->blockWidth,
-                                                     pOut->blockHeight,
-                                                     pOut->blockSlices,
-                                                     i,
-                                                     elementBytesLog2,
-                                                     &mipTailOffsetInBytes);
-
-                        UINT_32 pitchInBlock     =
-                            pOut->mipChainPitch / pOut->blockWidth;
-                        UINT_32 sliceInBlock     =
-                            (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
-                        UINT_64 blockIndex       =
-                            mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
-                        UINT_64 macroBlockOffset =
-                            blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
-
-                        pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
-                        pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
-                    }
-                }
-            }
-            else if (pOut->pMipInfo != NULL)
-            {
-                pOut->pMipInfo[0].pitch  = pOut->pitch;
-                pOut->pMipInfo[0].height = pOut->height;
-                pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
-                pOut->pMipInfo[0].offset = 0;
-            }
-
-            pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
-                              (pIn->bpp >> 3) * pIn->numFrags;
-            pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
-            pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
-
-            if (pIn->flags.prt)
-            {
-                pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSurfaceInfoLinear
-*
-*   @brief
-*       Internal function to calculate alignment for linear surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
-     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR_E_RETURNCODE returnCode   = ADDR_OK;
-    UINT_32           pitch        = 0;
-    UINT_32           actualHeight = 0;
-    UINT_32           elementBytes = pIn->bpp >> 3;
-    const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
-
-    if (IsTex1d(pIn->resourceType))
-    {
-        if (pIn->height > 1)
-        {
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-        else
-        {
-            const UINT_32 pitchAlignInElement = alignment / elementBytes;
-
-            pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
-            actualHeight = pIn->numMipLevels;
-
-            if (pIn->flags.prt == FALSE)
-            {
-                returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
-                                                        &pitch, &actualHeight);
-            }
-
-            if (returnCode == ADDR_OK)
-            {
-                if (pOut->pMipInfo != NULL)
-                {
-                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
-                    {
-                        pOut->pMipInfo[i].offset = pitch * elementBytes * i;
-                        pOut->pMipInfo[i].pitch  = pitch;
-                        pOut->pMipInfo[i].height = 1;
-                        pOut->pMipInfo[i].depth  = 1;
-                    }
-                }
-            }
-        }
-    }
-    else
-    {
-        returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
-    }
-
-    if ((pitch == 0) || (actualHeight == 0))
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    if (returnCode == ADDR_OK)
-    {
-        pOut->pitch          = pitch;
-        pOut->height         = pIn->height;
-        pOut->numSlices      = pIn->numSlices;
-        pOut->mipChainPitch  = pitch;
-        pOut->mipChainHeight = actualHeight;
-        pOut->mipChainSlice  = pOut->numSlices;
-        pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
-        pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
-        pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
-        pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
-        pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
-        pOut->blockHeight    = 1;
-        pOut->blockSlices    = 1;
-    }
-
-    // Post calculation validate
-    ADDR_ASSERT(pOut->sliceSize > 0);
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetMipChainInfo
-*
-*   @brief
-*       Internal function to get out information about mip chain
-*
-*   @return
-*       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
-************************************************************************************************************************
-*/
-UINT_32 Gfx9Lib::GetMipChainInfo(
-    AddrResourceType  resourceType,
-    AddrSwizzleMode   swizzleMode,
-    UINT_32           bpp,
-    UINT_32           mip0Width,
-    UINT_32           mip0Height,
-    UINT_32           mip0Depth,
-    UINT_32           blockWidth,
-    UINT_32           blockHeight,
-    UINT_32           blockDepth,
-    UINT_32           numMipLevel,
-    ADDR2_MIP_INFO*   pMipInfo) const
-{
-    const Dim3d tailMaxDim =
-        GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
-
-    UINT_32 mipPitch         = mip0Width;
-    UINT_32 mipHeight        = mip0Height;
-    UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
-    UINT_32 offset           = 0;
-    UINT_32 firstMipIdInTail = numMipLevel;
-    BOOL_32 inTail           = FALSE;
-    BOOL_32 finalDim         = FALSE;
-    BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
-    BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
-
-    for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
-    {
-        if (inTail)
-        {
-            if (finalDim == FALSE)
-            {
-                UINT_32 mipSize;
-
-                if (is3dThick)
-                {
-                    mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
-                }
-                else
-                {
-                    mipSize = mipPitch * mipHeight * (bpp >> 3);
-                }
-
-                if (mipSize <= 256)
-                {
-                    UINT_32 index = Log2(bpp >> 3);
-
-                    if (is3dThick)
-                    {
-                        mipPitch  = Block256_3dZ[index].w;
-                        mipHeight = Block256_3dZ[index].h;
-                        mipDepth  = Block256_3dZ[index].d;
-                    }
-                    else
-                    {
-                        mipPitch  = Block256_2d[index].w;
-                        mipHeight = Block256_2d[index].h;
-                    }
-
-                    finalDim = TRUE;
-                }
-            }
-        }
-        else
-        {
-            inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
-                                 mipPitch, mipHeight, mipDepth);
-
-            if (inTail)
-            {
-                firstMipIdInTail = mipId;
-                mipPitch         = tailMaxDim.w;
-                mipHeight        = tailMaxDim.h;
-
-                if (is3dThick)
-                {
-                    mipDepth = tailMaxDim.d;
-                }
-            }
-            else
-            {
-                mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
-                mipHeight = PowTwoAlign(mipHeight, blockHeight);
-
-                if (is3dThick)
-                {
-                    mipDepth = PowTwoAlign(mipDepth,  blockDepth);
-                }
-            }
-        }
-
-        if (pMipInfo != NULL)
-        {
-            pMipInfo[mipId].pitch  = mipPitch;
-            pMipInfo[mipId].height = mipHeight;
-            pMipInfo[mipId].depth  = mipDepth;
-            pMipInfo[mipId].offset = offset;
-        }
-
-        offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
-
-        if (finalDim)
-        {
-            if (is3dThin)
-            {
-                mipDepth = Max(mipDepth >> 1, 1u);
-            }
-        }
-        else
-        {
-            mipPitch  = Max(mipPitch >> 1, 1u);
-            mipHeight = Max(mipHeight >> 1, 1u);
-
-            if (is3dThick || is3dThin)
-            {
-                mipDepth = Max(mipDepth >> 1, 1u);
-            }
-        }
-    }
-
-    return firstMipIdInTail;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetMetaMiptailInfo
-*
-*   @brief
-*       Get mip tail coordinate information.
-*
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-VOID Gfx9Lib::GetMetaMiptailInfo(
-    ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
-    Dim3d                   mipCoord,       ///< [in] mip tail base coord
-    UINT_32                 numMipInTail,   ///< [in] number of mips in tail
-    Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
-    ) const
-{
-    BOOL_32 isThick   = (pMetaBlkDim->d > 1);
-    UINT_32 mipWidth  = pMetaBlkDim->w;
-    UINT_32 mipHeight = pMetaBlkDim->h >> 1;
-    UINT_32 mipDepth  = pMetaBlkDim->d;
-    UINT_32 minInc;
-
-    if (isThick)
-    {
-        minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
-    }
-    else if (pMetaBlkDim->h >= 1024)
-    {
-        minInc = 256;
-    }
-    else if (pMetaBlkDim->h == 512)
-    {
-        minInc = 128;
-    }
-    else
-    {
-        minInc = 64;
-    }
-
-    UINT_32 blk32MipId = 0xFFFFFFFF;
-
-    for (UINT_32 mip = 0; mip < numMipInTail; mip++)
-    {
-        pInfo[mip].inMiptail = TRUE;
-        pInfo[mip].startX = mipCoord.w;
-        pInfo[mip].startY = mipCoord.h;
-        pInfo[mip].startZ = mipCoord.d;
-        pInfo[mip].width = mipWidth;
-        pInfo[mip].height = mipHeight;
-        pInfo[mip].depth = mipDepth;
-
-        if (mipWidth <= 32)
-        {
-            if (blk32MipId == 0xFFFFFFFF)
-            {
-                blk32MipId = mip;
-            }
-
-            mipCoord.w = pInfo[blk32MipId].startX;
-            mipCoord.h = pInfo[blk32MipId].startY;
-            mipCoord.d = pInfo[blk32MipId].startZ;
-
-            switch (mip - blk32MipId)
-            {
-                case 0:
-                    mipCoord.w += 32;       // 16x16
-                    break;
-                case 1:
-                    mipCoord.h += 32;       // 8x8
-                    break;
-                case 2:
-                    mipCoord.h += 32;       // 4x4
-                    mipCoord.w += 16;
-                    break;
-                case 3:
-                    mipCoord.h += 32;       // 2x2
-                    mipCoord.w += 32;
-                    break;
-                case 4:
-                    mipCoord.h += 32;       // 1x1
-                    mipCoord.w += 48;
-                    break;
-                // The following are for BC/ASTC formats
-                case 5:
-                    mipCoord.h += 48;       // 1/2 x 1/2
-                    break;
-                case 6:
-                    mipCoord.h += 48;       // 1/4 x 1/4
-                    mipCoord.w += 16;
-                    break;
-                case 7:
-                    mipCoord.h += 48;       // 1/8 x 1/8
-                    mipCoord.w += 32;
-                    break;
-                case 8:
-                    mipCoord.h += 48;       // 1/16 x 1/16
-                    mipCoord.w += 48;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-
-            mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
-            mipHeight = mipWidth;
-
-            if (isThick)
-            {
-                mipDepth = mipWidth;
-            }
-        }
-        else
-        {
-            if (mipWidth <= minInc)
-            {
-                // if we're below the minimal increment...
-                if (isThick)
-                {
-                    // For 3d, just go in z direction
-                    mipCoord.d += mipDepth;
-                }
-                else
-                {
-                    // For 2d, first go across, then down
-                    if ((mipWidth * 2) == minInc)
-                    {
-                        // if we're 2 mips below, that's when we go back in x, and down in y
-                        mipCoord.w -= minInc;
-                        mipCoord.h += minInc;
-                    }
-                    else
-                    {
-                        // otherwise, just go across in x
-                        mipCoord.w += minInc;
-                    }
-                }
-            }
-            else
-            {
-                // On even mip, go down, otherwise, go across
-                if (mip & 1)
-                {
-                    mipCoord.w += mipWidth;
-                }
-                else
-                {
-                    mipCoord.h += mipHeight;
-                }
-            }
-            // Divide the width by 2
-            mipWidth >>= 1;
-            // After the first mip in tail, the mip is always a square
-            mipHeight = mipWidth;
-            // ...or for 3d, a cube
-            if (isThick)
-            {
-                mipDepth = mipWidth;
-            }
-        }
-    }
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::GetMipStartPos
-*
-*   @brief
-*       Internal function to get out information about mip logical start position
-*
-*   @return
-*       logical start position in macro block width/heith/depth of one mip level within one slice
-************************************************************************************************************************
-*/
-Dim3d Gfx9Lib::GetMipStartPos(
-    AddrResourceType  resourceType,
-    AddrSwizzleMode   swizzleMode,
-    UINT_32           width,
-    UINT_32           height,
-    UINT_32           depth,
-    UINT_32           blockWidth,
-    UINT_32           blockHeight,
-    UINT_32           blockDepth,
-    UINT_32           mipId,
-    UINT_32           log2ElementBytes,
-    UINT_32*          pMipTailBytesOffset) const
-{
-    Dim3d       mipStartPos = {0};
-    const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
-
-    // Report mip in tail if Mip0 is already in mip tail
-    BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
-    UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
-    UINT_32 mipIndexInTail = mipId;
-
-    if (inMipTail == FALSE)
-    {
-        // Mip 0 dimension, unit in block
-        UINT_32 mipWidthInBlk   = width  / blockWidth;
-        UINT_32 mipHeightInBlk  = height / blockHeight;
-        UINT_32 mipDepthInBlk   = depth  / blockDepth;
-        AddrMajorMode majorMode = GetMajorMode(resourceType,
-                                               swizzleMode,
-                                               mipWidthInBlk,
-                                               mipHeightInBlk,
-                                               mipDepthInBlk);
-
-        UINT_32 endingMip = mipId + 1;
-
-        for (UINT_32 i = 1; i <= mipId; i++)
-        {
-            if ((i == 1) || (i == 3))
-            {
-                if (majorMode == ADDR_MAJOR_Y)
-                {
-                    mipStartPos.w += mipWidthInBlk;
-                }
-                else
-                {
-                    mipStartPos.h += mipHeightInBlk;
-                }
-            }
-            else
-            {
-                if (majorMode == ADDR_MAJOR_X)
-                {
-                   mipStartPos.w += mipWidthInBlk;
-                }
-                else if (majorMode == ADDR_MAJOR_Y)
-                {
-                   mipStartPos.h += mipHeightInBlk;
-                }
-                else
-                {
-                   mipStartPos.d += mipDepthInBlk;
-                }
-            }
-
-            BOOL_32 inTail = FALSE;
-
-            if (IsThick(resourceType, swizzleMode))
-            {
-                UINT_32 dim = log2blkSize % 3;
-
-                if (dim == 0)
-                {
-                    inTail =
-                        (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
-                }
-                else if (dim == 1)
-                {
-                    inTail =
-                        (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
-                }
-                else
-                {
-                    inTail =
-                        (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
-                }
-            }
-            else
-            {
-                if (log2blkSize & 1)
-                {
-                    inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
-                }
-                else
-                {
-                    inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
-                }
-            }
-
-            if (inTail)
-            {
-                endingMip = i;
-                break;
-            }
-
-            mipWidthInBlk  = RoundHalf(mipWidthInBlk);
-            mipHeightInBlk = RoundHalf(mipHeightInBlk);
-            mipDepthInBlk  = RoundHalf(mipDepthInBlk);
-        }
-
-        if (mipId >= endingMip)
-        {
-            inMipTail      = TRUE;
-            mipIndexInTail = mipId - endingMip;
-        }
-    }
-
-    if (inMipTail)
-    {
-        UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
-        ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
-        *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
-    }
-
-    return mipStartPos;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
-*
-*   @brief
-*       Internal function to calculate address from coord for tiled swizzle surface
-*
-*   @return
-*       ADDR_E_RETURNCODE
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
-     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-     ) const
-{
-    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
-    localIn.swizzleMode  = pIn->swizzleMode;
-    localIn.flags        = pIn->flags;
-    localIn.resourceType = pIn->resourceType;
-    localIn.bpp          = pIn->bpp;
-    localIn.width        = Max(pIn->unalignedWidth, 1u);
-    localIn.height       = Max(pIn->unalignedHeight, 1u);
-    localIn.numSlices    = Max(pIn->numSlices, 1u);
-    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
-    localIn.numSamples   = Max(pIn->numSamples, 1u);
-    localIn.numFrags     = Max(pIn->numFrags, 1u);
-    if (localIn.numMipLevels <= 1)
-    {
-        localIn.pitchInElement = pIn->pitchInElement;
-    }
-
-    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
-    ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
-
-    BOOL_32 valid = (returnCode == ADDR_OK) &&
-                    (IsThin(pIn->resourceType, pIn->swizzleMode) ||
-                     IsThick(pIn->resourceType, pIn->swizzleMode)) &&
-                    ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
-
-    if (valid)
-    {
-        UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
-        Dim3d   mipStartPos        = {0};
-        UINT_32 mipTailBytesOffset = 0;
-
-        if (pIn->numMipLevels > 1)
-        {
-            // Mip-map chain cannot be MSAA surface
-            ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
-
-            mipStartPos = GetMipStartPos(pIn->resourceType,
-                                         pIn->swizzleMode,
-                                         localOut.pitch,
-                                         localOut.height,
-                                         localOut.numSlices,
-                                         localOut.blockWidth,
-                                         localOut.blockHeight,
-                                         localOut.blockSlices,
-                                         pIn->mipId,
-                                         log2ElementBytes,
-                                         &mipTailBytesOffset);
-        }
-
-        UINT_32 interleaveOffset = 0;
-        UINT_32 pipeBits = 0;
-        UINT_32 pipeXor = 0;
-        UINT_32 bankBits = 0;
-        UINT_32 bankXor = 0;
-
-        if (IsThin(pIn->resourceType, pIn->swizzleMode))
-        {
-            UINT_32 blockOffset = 0;
-            UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
-
-            if (IsZOrderSwizzle(pIn->swizzleMode))
-            {
-                // Morton generation
-                if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
-                {
-                    UINT_32 totalLowBits = 6 - log2ElementBytes;
-                    UINT_32 mortBits = totalLowBits / 2;
-                    UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
-                    // Are 9 bits enough?
-                    UINT_32 highBitsValue =
-                        MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
-                    blockOffset = lowBitsValue | highBitsValue;
-                    ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
-                }
-                else
-                {
-                    blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
-                }
-
-                // Fill LSBs with sample bits
-                if (pIn->numSamples > 1)
-                {
-                    blockOffset *= pIn->numSamples;
-                    blockOffset |= pIn->sample;
-                }
-
-                // Shift according to BytesPP
-                blockOffset <<= log2ElementBytes;
-            }
-            else
-            {
-                // Micro block offset
-                UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
-                blockOffset = microBlockOffset;
-
-                // Micro block dimension
-                ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
-                Dim2d microBlockDim = Block256_2d[log2ElementBytes];
-                // Morton generation, does 12 bit enough?
-                blockOffset |=
-                    MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
-
-                // Sample bits start location
-                UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
-                // Join sample bits information to the highest Macro block bits
-                if (IsNonPrtXor(pIn->swizzleMode))
-                {
-                    // Non-prt-Xor : xor highest Macro block bits with sample bits
-                    blockOffset = blockOffset ^ (pIn->sample << sampleStart);
-                }
-                else
-                {
-                    // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
-                    // after this op, the blockOffset only contains log2 Macro block size bits
-                    blockOffset %= (1 << sampleStart);
-                    blockOffset |= (pIn->sample << sampleStart);
-                    ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
-                }
-            }
-
-            if (IsXor(pIn->swizzleMode))
-            {
-                // Mask off bits above Macro block bits to keep page synonyms working for prt
-                if (IsPrt(pIn->swizzleMode))
-                {
-                    blockOffset &= ((1 << log2blkSize) - 1);
-                }
-
-                // Preserve offset inside pipe interleave
-                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
-                blockOffset >>= m_pipeInterleaveLog2;
-
-                // Pipe/Se xor bits
-                pipeBits = GetPipeXorBits(log2blkSize);
-                // Pipe xor
-                pipeXor = FoldXor2d(blockOffset, pipeBits);
-                blockOffset >>= pipeBits;
-
-                // Bank xor bits
-                bankBits = GetBankXorBits(log2blkSize);
-                // Bank Xor
-                bankXor = FoldXor2d(blockOffset, bankBits);
-                blockOffset >>= bankBits;
-
-                // Put all the part back together
-                blockOffset <<= bankBits;
-                blockOffset |= bankXor;
-                blockOffset <<= pipeBits;
-                blockOffset |= pipeXor;
-                blockOffset <<= m_pipeInterleaveLog2;
-                blockOffset |= interleaveOffset;
-            }
-
-            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
-            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
-
-            blockOffset |= mipTailBytesOffset;
-
-            if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
-            {
-                // Apply slice xor if not MSAA/PRT
-                blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
-                blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
-                                (m_pipeInterleaveLog2 + pipeBits));
-            }
-
-            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
-                                                  bankBits, pipeBits, &blockOffset);
-
-            blockOffset %= (1 << log2blkSize);
-
-            UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
-            UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
-            UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
-            UINT_64 macroBlockIndex =
-                (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
-                ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
-                ((pIn->x / localOut.blockWidth) + mipStartPos.w);
-
-            pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
-        }
-        else
-        {
-            UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
-
-            Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
-
-            UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
-                                              (pIn->y / microBlockDim.h),
-                                              (pIn->slice / microBlockDim.d),
-                                              8);
-
-            blockOffset <<= 10;
-            blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
-
-            if (IsXor(pIn->swizzleMode))
-            {
-                // Mask off bits above Macro block bits to keep page synonyms working for prt
-                if (IsPrt(pIn->swizzleMode))
-                {
-                    blockOffset &= ((1 << log2blkSize) - 1);
-                }
-
-                // Preserve offset inside pipe interleave
-                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
-                blockOffset >>= m_pipeInterleaveLog2;
-
-                // Pipe/Se xor bits
-                pipeBits = GetPipeXorBits(log2blkSize);
-                // Pipe xor
-                pipeXor = FoldXor3d(blockOffset, pipeBits);
-                blockOffset >>= pipeBits;
-
-                // Bank xor bits
-                bankBits = GetBankXorBits(log2blkSize);
-                // Bank Xor
-                bankXor = FoldXor3d(blockOffset, bankBits);
-                blockOffset >>= bankBits;
-
-                // Put all the part back together
-                blockOffset <<= bankBits;
-                blockOffset |= bankXor;
-                blockOffset <<= pipeBits;
-                blockOffset |= pipeXor;
-                blockOffset <<= m_pipeInterleaveLog2;
-                blockOffset |= interleaveOffset;
-            }
-
-            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
-            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
-            blockOffset |= mipTailBytesOffset;
-
-            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
-                                                  bankBits, pipeBits, &blockOffset);
-
-            blockOffset %= (1 << log2blkSize);
-
-            UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
-            UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
-            UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
-
-            UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
-            UINT_32 sliceSizeInBlock =
-                (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
-            UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
-
-            pOut->addr = blockOffset | (blockIndex << log2blkSize);
-        }
-    }
-    else
-    {
-        returnCode = ADDR_INVALIDPARAMS;
-    }
-
-    return returnCode;
-}
-
-/**
-************************************************************************************************************************
-*   Gfx9Lib::ComputeSurfaceInfoLinear
-*
-*   @brief
-*       Internal function to calculate padding for linear swizzle 2D/3D surface
-*
-*   @return
-*       N/A
-************************************************************************************************************************
-*/
-ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
-    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
-    UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
-    UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
-    ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    UINT_32 elementBytes        = pIn->bpp >> 3;
-    UINT_32 pitchAlignInElement = 0;
-
-    if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
-    {
-        ADDR_ASSERT(pIn->numMipLevels <= 1);
-        ADDR_ASSERT(pIn->numSlices <= 1);
-        pitchAlignInElement = 1;
-    }
-    else
-    {
-        pitchAlignInElement = (256 / elementBytes);
-    }
-
-    UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
-    UINT_32 slice0PaddedHeight = pIn->height;
-
-    returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
-                                            &mipChainWidth, &slice0PaddedHeight);
-
-    if (returnCode == ADDR_OK)
-    {
-        UINT_32 mipChainHeight = 0;
-        UINT_32 mipHeight      = pIn->height;
-
-        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
-        {
-            if (pMipInfo != NULL)
-            {
-                pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
-                pMipInfo[i].pitch  = mipChainWidth;
-                pMipInfo[i].height = mipHeight;
-                pMipInfo[i].depth  = 1;
-            }
-
-            mipChainHeight += mipHeight;
-            mipHeight = RoundHalf(mipHeight);
-            mipHeight = Max(mipHeight, 1u);
-        }
-
-        *pMipmap0PaddedWidth = mipChainWidth;
-        *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
-    }
-
-    return returnCode;
-}
-
-} // V2
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.h
--- mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,454 +0,0 @@
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-************************************************************************************************************************
-* @file  gfx9addrlib.h
-* @brief Contgfx9ns the Gfx9Lib class definition.
-************************************************************************************************************************
-*/
-
-#ifndef __GFX9_ADDR_LIB_H__
-#define __GFX9_ADDR_LIB_H__
-
-#include "addrlib2.h"
-#include "coord.h"
-
-namespace Addr
-{
-namespace V2
-{
-
-/**
-************************************************************************************************************************
-* @brief GFX9 specific settings structure.
-************************************************************************************************************************
-*/
-struct Gfx9ChipSettings
-{
-    struct
-    {
-        // Asic/Generation name
-        UINT_32 isArcticIsland      : 1;
-        UINT_32 isVega10            : 1;
-        UINT_32 isRaven             : 1;
-        UINT_32 isVega12            : 1;
-        UINT_32 isVega20            : 1;
-
-        // Display engine IP version name
-        UINT_32 isDce12             : 1;
-        UINT_32 isDcn1              : 1;
-
-        // Misc configuration bits
-        UINT_32 metaBaseAlignFix    : 1;
-        UINT_32 depthPipeXorDisable : 1;
-        UINT_32 htileAlignFix       : 1;
-        UINT_32 applyAliasFix       : 1;
-        UINT_32 htileCacheRbConflict: 1;
-        UINT_32 reserved2           : 27;
-    };
-};
-
-/**
-************************************************************************************************************************
-* @brief GFX9 data surface type.
-************************************************************************************************************************
-*/
-enum Gfx9DataType
-{
-    Gfx9DataColor,
-    Gfx9DataDepthStencil,
-    Gfx9DataFmask
-};
-
-/**
-************************************************************************************************************************
-* @brief GFX9 meta equation parameters
-************************************************************************************************************************
-*/
-struct MetaEqParams
-{
-    UINT_32          maxMip;
-    UINT_32          elementBytesLog2;
-    UINT_32          numSamplesLog2;
-    ADDR2_META_FLAGS metaFlag;
-    Gfx9DataType     dataSurfaceType;
-    AddrSwizzleMode  swizzleMode;
-    AddrResourceType resourceType;
-    UINT_32          metaBlkWidthLog2;
-    UINT_32          metaBlkHeightLog2;
-    UINT_32          metaBlkDepthLog2;
-    UINT_32          compBlkWidthLog2;
-    UINT_32          compBlkHeightLog2;
-    UINT_32          compBlkDepthLog2;
-};
-
-/**
-************************************************************************************************************************
-* @brief This class is the GFX9 specific address library
-*        function set.
-************************************************************************************************************************
-*/
-class Gfx9Lib : public Lib
-{
-public:
-    /// Creates Gfx9Lib object
-    static Addr::Lib* CreateObj(const Client* pClient)
-    {
-        VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient);
-        return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
-    }
-
-protected:
-    Gfx9Lib(const Client* pClient);
-    virtual ~Gfx9Lib();
-
-    virtual BOOL_32 HwlIsStandardSwizzle(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        return m_swizzleModeTable[swizzleMode].isStd ||
-               (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp);
-    }
-
-    virtual BOOL_32 HwlIsDisplaySwizzle(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp;
-    }
-
-    virtual BOOL_32 HwlIsThin(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        return ((IsTex2d(resourceType)  == TRUE) ||
-                ((IsTex3d(resourceType) == TRUE)                  &&
-                 (m_swizzleModeTable[swizzleMode].isZ   == FALSE) &&
-                 (m_swizzleModeTable[swizzleMode].isStd == FALSE)));
-    }
-
-    virtual BOOL_32 HwlIsThick(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode) const
-    {
-        return (IsTex3d(resourceType) &&
-                (m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd));
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
-        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
-        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
-        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
-        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
-        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
-        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
-        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
-        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);
-
-    virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
-        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);
-
-    virtual UINT_32 HwlGetEquationIndex(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2,
-        ADDR_EQUATION* pEquation) const;
-
-    // Get equation table pointer and number of equations
-    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
-    {
-        *ppEquationTable = m_equationTable;
-
-        return m_numEquations;
-    }
-
-    virtual BOOL_32 IsEquationSupported(
-        AddrResourceType rsrcType,
-        AddrSwizzleMode swMode,
-        UINT_32 elementBytesLog2) const;
-
-    UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
-    {
-        UINT_32 baseAlign;
-
-        if (IsXor(swizzleMode))
-        {
-            baseAlign = GetBlockSize(swizzleMode);
-        }
-        else
-        {
-            baseAlign = 256;
-        }
-
-        return baseAlign;
-    }
-
-    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
-        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
-        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
-        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
-        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
-        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
-        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
-        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
-         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
-         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
-        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
-
-    // Initialize equation table
-    VOID InitEquationTable();
-
-    ADDR_E_RETURNCODE ComputeStereoInfo(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
-        UINT_32*                                pHeightAlign) const;
-
-    UINT_32 GetMipChainInfo(
-        AddrResourceType  resourceType,
-        AddrSwizzleMode   swizzleMode,
-        UINT_32           bpp,
-        UINT_32           mip0Width,
-        UINT_32           mip0Height,
-        UINT_32           mip0Depth,
-        UINT_32           blockWidth,
-        UINT_32           blockHeight,
-        UINT_32           blockDepth,
-        UINT_32           numMipLevel,
-        ADDR2_MIP_INFO*   pMipInfo) const;
-
-    VOID GetMetaMiptailInfo(
-        ADDR2_META_MIP_INFO*    pInfo,
-        Dim3d                   mipCoord,
-        UINT_32                 numMipInTail,
-        Dim3d*                  pMetaBlkDim) const;
-
-    Dim3d GetMipStartPos(
-        AddrResourceType  resourceType,
-        AddrSwizzleMode   swizzleMode,
-        UINT_32           width,
-        UINT_32           height,
-        UINT_32           depth,
-        UINT_32           blockWidth,
-        UINT_32           blockHeight,
-        UINT_32           blockDepth,
-        UINT_32           mipId,
-        UINT_32           log2ElementBytes,
-        UINT_32*          pMipTailBytesOffset) const;
-
-    AddrMajorMode GetMajorMode(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode,
-        UINT_32          mip0WidthInBlk,
-        UINT_32          mip0HeightInBlk,
-        UINT_32          mip0DepthInBlk) const
-    {
-        BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk);
-        BOOL_32 xMajor = (yMajor == FALSE);
-
-        if (IsThick(resourceType, swizzleMode))
-        {
-            yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk);
-            xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk);
-        }
-
-        AddrMajorMode majorMode;
-        if (xMajor)
-        {
-            majorMode = ADDR_MAJOR_X;
-        }
-        else if (yMajor)
-        {
-            majorMode = ADDR_MAJOR_Y;
-        }
-        else
-        {
-            majorMode = ADDR_MAJOR_Z;
-        }
-
-        return majorMode;
-    }
-
-    Dim3d GetDccCompressBlk(
-        AddrResourceType resourceType,
-        AddrSwizzleMode  swizzleMode,
-        UINT_32          bpp) const
-    {
-        UINT_32 index = Log2(bpp >> 3);
-        Dim3d   compressBlkDim;
-
-        if (IsThin(resourceType, swizzleMode))
-        {
-            compressBlkDim.w = Block256_2d[index].w;
-            compressBlkDim.h = Block256_2d[index].h;
-            compressBlkDim.d = 1;
-        }
-        else if (IsStandardSwizzle(resourceType, swizzleMode))
-        {
-            compressBlkDim = Block256_3dS[index];
-        }
-        else
-        {
-            compressBlkDim = Block256_3dZ[index];
-        }
-
-        return compressBlkDim;
-    }
-
-
-    static const UINT_32          MaxSeLog2      = 3;
-    static const UINT_32          MaxRbPerSeLog2 = 2;
-
-    static const Dim3d            Block256_3dS[MaxNumOfBpp];
-    static const Dim3d            Block256_3dZ[MaxNumOfBpp];
-
-    static const UINT_32          MipTailOffset256B[];
-
-    static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
-
-    // Max number of swizzle mode supported for equation
-    static const UINT_32    MaxSwMode = 32;
-    // Max number of resource type (2D/3D) supported for equation
-    static const UINT_32    MaxRsrcType = 2;
-    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
-    static const UINT_32    MaxElementBytesLog2  = 5;
-    // Almost all swizzle mode + resource type support equation
-    static const UINT_32    EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
-    // Equation table
-    ADDR_EQUATION           m_equationTable[EquationTableSize];
-
-    // Number of equation entries in the table
-    UINT_32                 m_numEquations;
-    // Equation lookup table according to bpp and tile index
-    UINT_32                 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
-
-    static const UINT_32    MaxCachedMetaEq = 2;
-
-private:
-    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
-
-    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
-
-    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
-
-    VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
-
-    VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
-                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
-                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
-
-    VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
-                         UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
-                         UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
-                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
-
-    VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
-                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
-                         ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
-                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
-                         UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
-                         UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
-                         UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
-
-    const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
-
-    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
-
-    VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
-                        BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
-                        UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
-                        UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
-
-    BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
-    ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
-        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        UINT_32*                                pMipmap0PaddedWidth,
-        UINT_32*                                pSlice0PaddedHeight,
-        ADDR2_MIP_INFO*                         pMipInfo = NULL) const;
-
-    Gfx9ChipSettings m_settings;
-
-    CoordEq      m_cachedMetaEq[MaxCachedMetaEq];
-    MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
-    UINT_32      m_metaEqOverrideIndex;
-};
-
-} // V2
-} // Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/inc/addrinterface.h mesa-19.0.1/src/amd/addrlib/inc/addrinterface.h
--- mesa-18.3.3/src/amd/addrlib/inc/addrinterface.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/inc/addrinterface.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3715 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrinterface.h
+* @brief Contains the addrlib interfaces declaration and parameter defines
+****************************************************************************************************
+*/
+#ifndef __ADDR_INTERFACE_H__
+#define __ADDR_INTERFACE_H__
+
+#include "addrtypes.h"
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#define ADDRLIB_VERSION_MAJOR 6
+#define ADDRLIB_VERSION_MINOR 2
+#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
+
+/// Virtually all interface functions need ADDR_HANDLE as first parameter
+typedef VOID*   ADDR_HANDLE;
+
+/// Client handle used in callbacks
+typedef VOID*   ADDR_CLIENT_HANDLE;
+
+/**
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                  Callback functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*    typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+*         const ADDR_ALLOCSYSMEM_INPUT* pInput);
+*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+*         VOID* pVirtAddr);
+*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+*         const ADDR_DEBUGPRINT_INPUT* pInput);
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                               Create/Destroy/Config functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrCreate()
+*     AddrDestroy()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                  Surface functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeSurfaceInfo()
+*     AddrComputeSurfaceAddrFromCoord()
+*     AddrComputeSurfaceCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   HTile functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeHtileInfo()
+*     AddrComputeHtileAddrFromCoord()
+*     AddrComputeHtileCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   C-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeCmaskInfo()
+*     AddrComputeCmaskAddrFromCoord()
+*     AddrComputeCmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   F-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeFmaskInfo()
+*     AddrComputeFmaskAddrFromCoord()
+*     AddrComputeFmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                               Element/Utility functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     ElemFlt32ToDepthPixel()
+*     ElemFlt32ToColorPixel()
+*     AddrExtractBankPipeSwizzle()
+*     AddrCombineBankPipeSwizzle()
+*     AddrComputeSliceSwizzle()
+*     AddrConvertTileInfoToHW()
+*     AddrConvertTileIndex()
+*     AddrConvertTileIndex1()
+*     AddrGetTileIndex()
+*     AddrComputeBaseSwizzle()
+*     AddrUseTileIndex()
+*     AddrUseCombinedSwizzle()
+*
+**/
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                      Callback functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+* @brief channel setting structure
+****************************************************************************************************
+*/
+typedef union _ADDR_CHANNEL_SETTING
+{
+    struct
+    {
+        UINT_8 valid   : 1;    ///< Indicate whehter this channel setting is valid
+        UINT_8 channel : 2;    ///< 0 for x channel, 1 for y channel, 2 for z channel
+        UINT_8 index   : 5;    ///< Channel index
+    };
+    UINT_8 value;              ///< Value
+} ADDR_CHANNEL_SETTING;
+
+/**
+****************************************************************************************************
+* @brief address equation key structure
+****************************************************************************************************
+*/
+typedef union _ADDR_EQUATION_KEY
+{
+    struct
+    {
+        UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel
+        UINT_32 tileMode         : 5; ///< Tile mode
+        UINT_32 microTileType    : 3; ///< Micro tile type
+        UINT_32 pipeConfig       : 5; ///< pipe config
+        UINT_32 numBanksLog2     : 3; ///< Number of banks log2
+        UINT_32 bankWidth        : 4; ///< Bank width
+        UINT_32 bankHeight       : 4; ///< Bank height
+        UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio
+        UINT_32 prt              : 1; ///< SI only, indicate whether this equation is for prt
+        UINT_32 reserved         : 1; ///< Reserved bit
+    } fields;
+    UINT_32 value;
+} ADDR_EQUATION_KEY;
+
+/**
+****************************************************************************************************
+* @brief address equation structure
+****************************************************************************************************
+*/
+#define ADDR_MAX_EQUATION_BIT 20u
+
+// Invalid equation index
+#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF
+
+typedef struct _ADDR_EQUATION
+{
+    ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT];  ///< addr setting
+                                                       ///< each bit is result of addr ^ xor ^ xor2
+    ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT];  ///< xor setting
+    ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT];  ///< xor2 setting
+    UINT_32              numBits;                      ///< The number of bits in equation
+    BOOL_32              stackedDepthSlices;           ///< TRUE if depth slices are treated as being
+                                                       ///< stacked vertically prior to swizzling
+} ADDR_EQUATION;
+
+/**
+****************************************************************************************************
+* @brief Alloc system memory flags.
+* @note These flags are reserved for future use and if flags are added will minimize the impact
+*       of the client.
+****************************************************************************************************
+*/
+typedef union _ADDR_ALLOCSYSMEM_FLAGS
+{
+    struct
+    {
+        UINT_32 reserved    : 32;  ///< Reserved for future use.
+    } fields;
+    UINT_32 value;
+
+} ADDR_ALLOCSYSMEM_FLAGS;
+
+/**
+****************************************************************************************************
+* @brief Alloc system memory input structure
+****************************************************************************************************
+*/
+typedef struct _ADDR_ALLOCSYSMEM_INPUT
+{
+    UINT_32                 size;           ///< Size of this structure in bytes
+
+    ADDR_ALLOCSYSMEM_FLAGS  flags;          ///< System memory flags.
+    UINT_32                 sizeInBytes;    ///< System memory allocation size in bytes.
+    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
+} ADDR_ALLOCSYSMEM_INPUT;
+
+/**
+****************************************************************************************************
+* ADDR_ALLOCSYSMEM
+*   @brief
+*       Allocate system memory callback function. Returns valid pointer on success.
+****************************************************************************************************
+*/
+typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+    const ADDR_ALLOCSYSMEM_INPUT* pInput);
+
+/**
+****************************************************************************************************
+* @brief Free system memory input structure
+****************************************************************************************************
+*/
+typedef struct _ADDR_FREESYSMEM_INPUT
+{
+    UINT_32                 size;           ///< Size of this structure in bytes
+
+    VOID*                   pVirtAddr;      ///< Virtual address
+    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
+} ADDR_FREESYSMEM_INPUT;
+
+/**
+****************************************************************************************************
+* ADDR_FREESYSMEM
+*   @brief
+*       Free system memory callback function.
+*       Returns ADDR_OK on success.
+****************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+    const ADDR_FREESYSMEM_INPUT* pInput);
+
+/**
+****************************************************************************************************
+* @brief Print debug message input structure
+****************************************************************************************************
+*/
+typedef struct _ADDR_DEBUGPRINT_INPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    CHAR*               pDebugString;   ///< Debug print string
+    va_list             ap;             ///< Variable argument list
+    ADDR_CLIENT_HANDLE  hClient;        ///< Client handle
+} ADDR_DEBUGPRINT_INPUT;
+
+/**
+****************************************************************************************************
+* ADDR_DEBUGPRINT
+*   @brief
+*       Print debug message callback function.
+*       Returns ADDR_OK on success.
+****************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+    const ADDR_DEBUGPRINT_INPUT* pInput);
+
+/**
+****************************************************************************************************
+* ADDR_CALLBACKS
+*
+*   @brief
+*       Address Library needs client to provide system memory alloc/free routines.
+****************************************************************************************************
+*/
+typedef struct _ADDR_CALLBACKS
+{
+    ADDR_ALLOCSYSMEM allocSysMem;   ///< Routine to allocate system memory
+    ADDR_FREESYSMEM  freeSysMem;    ///< Routine to free system memory
+    ADDR_DEBUGPRINT  debugPrint;    ///< Routine to print debug message
+} ADDR_CALLBACKS;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Create/Destroy functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+* ADDR_CREATE_FLAGS
+*
+*   @brief
+*       This structure is used to pass some setup in creation of AddrLib
+*   @note
+****************************************************************************************************
+*/
+typedef union _ADDR_CREATE_FLAGS
+{
+    struct
+    {
+        UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
+        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
+                                               ///  output structure
+        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
+        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
+        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
+        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
+        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
+        UINT_32 reserved               : 25;   ///< Reserved bits for future use
+    };
+
+    UINT_32 value;
+} ADDR_CREATE_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR_REGISTER_VALUE
+*
+*   @brief
+*       Data from registers to setup AddrLib global data, used in AddrCreate
+****************************************************************************************************
+*/
+typedef struct _ADDR_REGISTER_VALUE
+{
+    UINT_32  gbAddrConfig;       ///< For R8xx, use GB_ADDR_CONFIG register value.
+                                 ///  For R6xx/R7xx, use GB_TILING_CONFIG.
+                                 ///  But they can be treated as the same.
+                                 ///  if this value is 0, use chip to set default value
+    UINT_32  backendDisables;    ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
+                                 ///  Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE
+
+                                 ///  R800 registers-----------------------------------------------
+    UINT_32  noOfBanks;          ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
+                                 ///  No enums for this value in h/w header files
+                                 ///  0: 4
+                                 ///  1: 8
+                                 ///  2: 16
+    UINT_32  noOfRanks;          ///  MC_ARB_RAMCFG.NOOFRANK
+                                 ///  0: 1
+                                 ///  1: 2
+                                 ///  SI (R1000) registers-----------------------------------------
+    const UINT_32* pTileConfig;  ///< Global tile setting tables
+    UINT_32  noOfEntries;        ///< Number of entries in pTileConfig
+
+                                 ///< CI registers-------------------------------------------------
+    const UINT_32* pMacroTileConfig;    ///< Global macro tile mode table
+    UINT_32  noOfMacroEntries;   ///< Number of entries in pMacroTileConfig
+
+                                 ///< GFX9 HW parameters
+    UINT_32  blockVarSizeLog2;   ///< SW_VAR_* block size
+} ADDR_REGISTER_VALUE;
+
+/**
+****************************************************************************************************
+* ADDR_CREATE_INPUT
+*
+*   @brief
+*       Parameters use to create an AddrLib Object. Caller must provide all fields.
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_32             chipEngine;          ///< Chip Engine
+    UINT_32             chipFamily;          ///< Chip Family
+    UINT_32             chipRevision;        ///< Chip Revision
+    ADDR_CALLBACKS      callbacks;           ///< Callbacks for sysmem alloc/free/print
+    ADDR_CREATE_FLAGS   createFlags;         ///< Flags to setup AddrLib
+    ADDR_REGISTER_VALUE regValue;            ///< Data from registers to setup AddrLib global data
+    ADDR_CLIENT_HANDLE  hClient;             ///< Client handle
+    UINT_32             minPitchAlignPixels; ///< Minimum pitch alignment in pixels
+} ADDR_CREATE_INPUT;
+
+/**
+****************************************************************************************************
+* ADDR_CREATEINFO_OUTPUT
+*
+*   @brief
+*       Return AddrLib handle to client driver
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_OUTPUT
+{
+    UINT_32              size;            ///< Size of this structure in bytes
+
+    ADDR_HANDLE          hLib;            ///< Address lib handle
+
+    UINT_32              numEquations;    ///< Number of equations in the table
+    const ADDR_EQUATION* pEquationTable;  ///< Pointer to the equation table
+} ADDR_CREATE_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrCreate
+*
+*   @brief
+*       Create AddrLib object, must be called before any interface calls
+*
+*   @return
+*       ADDR_OK if successful
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+    const ADDR_CREATE_INPUT*    pAddrCreateIn,
+    ADDR_CREATE_OUTPUT*         pAddrCreateOut);
+
+/**
+****************************************************************************************************
+*   AddrDestroy
+*
+*   @brief
+*       Destroy AddrLib object, must be called to free internally allocated resources.
+*
+*   @return
+*      ADDR_OK if successful
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+    ADDR_HANDLE hLib);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+* @brief
+*       Bank/tiling parameters. On function input, these can be set as desired or
+*       left 0 for AddrLib to calculate/default. On function output, these are the actual
+*       parameters used.
+* @note
+*       Valid bankWidth/bankHeight value:
+*       1,2,4,8. They are factors instead of pixels or bytes.
+*
+*       The bank number remains constant across each row of the
+*       macro tile as each pipe is selected, so the number of
+*       tiles in the x direction with the same bank number will
+*       be bank_width * num_pipes.
+****************************************************************************************************
+*/
+typedef struct _ADDR_TILEINFO
+{
+    ///  Any of these parameters can be set to 0 to use the HW default.
+    UINT_32     banks;              ///< Number of banks, numerical value
+    UINT_32     bankWidth;          ///< Number of tiles in the X direction in the same bank
+    UINT_32     bankHeight;         ///< Number of tiles in the Y direction in the same bank
+    UINT_32     macroAspectRatio;   ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
+    UINT_32     tileSplitBytes;     ///< Tile split size, in bytes
+    AddrPipeCfg pipeConfig;         ///< Pipe Config = HW enum + 1
+} ADDR_TILEINFO;
+
+// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
+// within 800 HWL - An AddrPipeCfg is added in above data structure
+typedef ADDR_TILEINFO ADDR_R800_TILEINFO;
+
+/**
+****************************************************************************************************
+* @brief
+*       Information needed by quad buffer stereo support
+****************************************************************************************************
+*/
+typedef struct _ADDR_QBSTEREOINFO
+{
+    UINT_32         eyeHeight;          ///< Height (in pixel rows) to right eye
+    UINT_32         rightOffset;        ///< Offset (in bytes) to right eye
+    UINT_32         rightSwizzle;       ///< TileSwizzle for right eyes
+} ADDR_QBSTEREOINFO;
+
+/**
+****************************************************************************************************
+*   ADDR_SURFACE_FLAGS
+*
+*   @brief
+*       Surface flags
+****************************************************************************************************
+*/
+typedef union _ADDR_SURFACE_FLAGS
+{
+    struct
+    {
+        UINT_32 color                : 1; ///< Flag indicates this is a color buffer
+        UINT_32 depth                : 1; ///< Flag indicates this is a depth/stencil buffer
+        UINT_32 stencil              : 1; ///< Flag indicates this is a stencil buffer
+        UINT_32 texture              : 1; ///< Flag indicates this is a texture
+        UINT_32 cube                 : 1; ///< Flag indicates this is a cubemap
+        UINT_32 volume               : 1; ///< Flag indicates this is a volume texture
+        UINT_32 fmask                : 1; ///< Flag indicates this is an fmask
+        UINT_32 cubeAsArray          : 1; ///< Flag indicates if treat cubemap as arrays
+        UINT_32 compressZ            : 1; ///< Flag indicates z buffer is compressed
+        UINT_32 overlay              : 1; ///< Flag indicates this is an overlay surface
+        UINT_32 noStencil            : 1; ///< Flag indicates this depth has no separate stencil
+        UINT_32 display              : 1; ///< Flag indicates this should match display controller req.
+        UINT_32 opt4Space            : 1; ///< Flag indicates this surface should be optimized for space
+                                          ///  i.e. save some memory but may lose performance
+        UINT_32 prt                  : 1; ///< Flag for partially resident texture
+        UINT_32 qbStereo             : 1; ///< Quad buffer stereo surface
+        UINT_32 pow2Pad              : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
+        UINT_32 interleaved          : 1; ///< Special flag for interleaved YUV surface padding
+        UINT_32 tcCompatible         : 1; ///< Flag indicates surface needs to be shader readable
+        UINT_32 dispTileType         : 1; ///< NI: force display Tiling for 128 bit shared resoruce
+        UINT_32 dccCompatible        : 1; ///< VI: whether to make MSAA surface support dcc fast clear
+        UINT_32 dccPipeWorkaround    : 1; ///< VI: whether to workaround the HW limit that
+                                          ///  dcc can't be enabled if pipe config of tile mode
+                                          ///  is different from that of ASIC, this flag
+                                          ///  is address lib internal flag, client should ignore it
+        UINT_32 czDispCompatible     : 1; ///< SI+: CZ family has a HW bug needs special alignment.
+                                          ///  This flag indicates we need to follow the
+                                          ///  alignment with CZ families or other ASICs under
+                                          ///  PX configuration + CZ.
+        UINT_32 nonSplit             : 1; ///< CI: depth texture should not be split
+        UINT_32 disableLinearOpt     : 1; ///< Disable tile mode optimization to linear
+        UINT_32 needEquation         : 1; ///< Make the surface tile setting equation compatible.
+                                          ///  This flag indicates we need to override tile
+                                          ///  mode to PRT_* tile mode to disable slice rotation,
+                                          ///  which is needed by swizzle pattern equation.
+        UINT_32 skipIndicesOutput    : 1; ///< Skipping indices in output.
+        UINT_32 rotateDisplay        : 1; ///< Rotate micro tile type
+        UINT_32 minimizeAlignment    : 1; ///< Minimize alignment
+        UINT_32 preferEquation       : 1; ///< Return equation index without adjusting tile mode
+        UINT_32 matchStencilTileCfg  : 1; ///< Select tile index of stencil as well as depth surface
+                                          ///  to make sure they share same tile config parameters
+        UINT_32 disallowLargeThickDegrade   : 1;    ///< Disallow large thick tile degrade
+        UINT_32 reserved             : 1; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_SURFACE_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    AddrTileMode        tileMode;           ///< Tile mode
+    AddrFormat          format;             ///< If format is set to valid one, bpp/width/height
+                                            ///  might be overwritten
+    UINT_32             bpp;                ///< Bits per pixel
+    UINT_32             numSamples;         ///< Number of samples
+    UINT_32             width;              ///< Width, in pixels
+    UINT_32             height;             ///< Height, in pixels
+    UINT_32             numSlices;          ///< Number of surface slices or depth
+    UINT_32             slice;              ///< Slice index
+    UINT_32             mipLevel;           ///< Current mipmap level
+    UINT_32             numMipLevels;       ///< Number of mips in mip chain
+    ADDR_SURFACE_FLAGS  flags;              ///< Surface type flags
+    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
+                                            ///  number of samples for normal AA; Set it to the
+                                            ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
+    ADDR_TILEINFO*      pTileInfo;          ///< 2D tile parameters. Set to 0 to default/calculate
+    AddrTileType        tileType;           ///< Micro tiling type, not needed when tileIndex != -1
+    INT_32              tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                            ///  while the global useTileIndex is set to 1
+    UINT_32             basePitch;          ///< Base level pitch in pixels, 0 means ignored, is a
+                                            ///  must for mip levels from SI+.
+                                            ///  Don't use pitch in blocks for compressed formats!
+    UINT_32             maxBaseAlign;       ///< Max base alignment request from client
+    UINT_32             pitchAlign;         ///< Pitch alignment request from client
+    UINT_32             heightAlign;        ///< Height alignment request from client
+} ADDR_COMPUTE_SURFACE_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfInfo
+*   @note
+        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
+        Pixel: Original pixel
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         pitch;          ///< Pitch in elements (in blocks for compressed formats)
+    UINT_32         height;         ///< Height in elements (in blocks for compressed formats)
+    UINT_32         depth;          ///< Number of slice/depth
+    UINT_64         surfSize;       ///< Surface size in bytes
+    AddrTileMode    tileMode;       ///< Actual tile mode. May differ from that in input
+    UINT_32         baseAlign;      ///< Base address alignment
+    UINT_32         pitchAlign;     ///< Pitch alignment, in elements
+    UINT_32         heightAlign;    ///< Height alignment, in elements
+    UINT_32         depthAlign;     ///< Depth alignment, aligned to thickness, for 3d texture
+    UINT_32         bpp;            ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
+    UINT_32         pixelPitch;     ///< Pitch in original pixels
+    UINT_32         pixelHeight;    ///< Height in original pixels
+    UINT_32         pixelBits;      ///< Original bits per pixel, passed from input
+    UINT_64         sliceSize;      ///< Size of slice specified by input's slice
+                                    ///  The result is controlled by surface flags & createFlags
+                                    ///  By default this value equals to surfSize for volume
+    UINT_32         pitchTileMax;   ///< PITCH_TILE_MAX value for h/w register
+    UINT_32         heightTileMax;  ///< HEIGHT_TILE_MAX value for h/w register
+    UINT_32         sliceTileMax;   ///< SLICE_TILE_MAX value for h/w register
+
+    UINT_32         numSamples;     ///< Pass the effective numSamples processed in this call
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Filled in if 0 on input
+    AddrTileType    tileType;       ///< Micro tiling type, only valid when tileIndex != -1
+    INT_32          tileIndex;      ///< Tile index, MAY be "downgraded"
+
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+    /// Output flags
+    struct
+    {
+        /// Special information to work around SI mipmap swizzle bug UBTS #317508
+        UINT_32     last2DLevel  : 1;  ///< TRUE if this is the last 2D(3D) tiled
+                                       ///< Only meaningful when create flag checkLast2DLevel is set
+        UINT_32     tcCompatible : 1;  ///< If the surface can be shader compatible
+        UINT_32     dccUnsupport : 1;  ///< If the surface can support DCC compressed rendering
+        UINT_32     prtTileIndex : 1;  ///< SI only, indicate the returned tile index is for PRT
+                                       ///< If address lib return true for mip 0, client should set prt flag
+                                       ///< for child mips in subsequent compute surface info calls
+        UINT_32     reserved     :28;  ///< Reserved bits
+    };
+
+    UINT_32         equationIndex;     ///< Equation index in the equation table;
+
+    UINT_32         blockWidth;        ///< Width in element inside one block(1D->Micro, 2D->Macro)
+    UINT_32         blockHeight;       ///< Height in element inside one block(1D->Micro, 2D->Macro)
+    UINT_32         blockSlices;       ///< Slice number inside one block(1D->Micro, 2D->Macro)
+
+    /// Stereo info
+    ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
+
+    INT_32          stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set
+} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceInfo
+*
+*   @brief
+*       Compute surface width/height/depth/alignments and suitable tiling mode
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_32         x;                  ///< X coordinate
+    UINT_32         y;                  ///< Y coordinate
+    UINT_32         slice;              ///< Slice index
+    UINT_32         sample;             ///< Sample index, use fragment index for EQAA
+
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSlices;          ///< Surface depth
+    UINT_32         numSamples;         ///< Number of samples
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    BOOL_32         isDepth;            ///< TRUE if the surface uses depth sample ordering within
+                                        ///  micro tile. Textures can also choose depth sample order
+    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
+                                        ///  the case that components are stored separately
+    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
+
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Used for 1D tiling above
+    AddrTileType    tileType;           ///< See defintion of AddrTileType
+    struct
+    {
+        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
+                                        ///  only flag. Only non-RT texture can set this to TRUE
+        UINT_32     reserved :31;       ///< Reserved for future use.
+    };
+    // 2D tiling needs following structure
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfaceAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Byte address
+    UINT_32 bitPosition;    ///< Bit position within surfaceAddr, 0-7.
+                            ///  For surface bpp < 8, e.g. FMT_1.
+    UINT_32 prtBlockIndex;  ///< Index of a PRT tile (64K block)
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address from a given coordinate.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_64         addr;               ///< Address in bytes
+    UINT_32         bitPosition;        ///< Bit position in addr. 0-7. for surface bpp < 8,
+                                        ///  e.g. FMT_1;
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         pitch;              ///< Pitch, in pixels
+    UINT_32         height;             ///< Height in pixels
+    UINT_32         numSlices;          ///< Surface depth
+    UINT_32         numSamples;         ///< Number of samples
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    BOOL_32         isDepth;            ///< Surface uses depth sample ordering within micro tile.
+                                        ///  Note: Textures can choose depth sample order as well.
+    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
+                                        ///  the case that components are stored separately
+    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
+
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Used for 1D tiling above
+    AddrTileType    tileType;           ///< See defintion of AddrTileType
+    struct
+    {
+        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
+                                        ///  only flag. Only non-RT texture can set this to TRUE
+        UINT_32     reserved :31;       ///< Reserved for future use.
+    };
+    // 2D tiling needs following structure
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfaceCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Index of slices
+    UINT_32 sample; ///< Index of samples, means fragment index for EQAA
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinate from a given surface address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR_HTILE_FLAGS
+*
+*   @brief
+*       HTILE flags
+****************************************************************************************************
+*/
+typedef union _ADDR_HTILE_FLAGS
+{
+    struct
+    {
+        UINT_32 tcCompatible          : 1;  ///< Flag indicates surface needs to be shader readable
+        UINT_32 skipTcCompatSizeAlign : 1;  ///< Flag indicates that addrLib will not align htile
+                                            ///  size to 256xBankxPipe when computing tc-compatible
+                                            ///  htile info.
+        UINT_32 reserved              : 30; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_HTILE_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_INFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeHtileInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    ADDR_HTILE_FLAGS   flags;           ///< HTILE flags
+    UINT_32            pitch;           ///< Surface pitch, in pixels
+    UINT_32            height;          ///< Surface height, in pixels
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. EG above only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. EG above only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeHtileInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
+{
+    UINT_32 size;               ///< Size of this structure in bytes
+
+    UINT_32 pitch;              ///< Pitch in pixels of depth buffer represented in this
+                                ///  HTile buffer. This might be larger than original depth
+                                ///  buffer pitch when called with an unaligned pitch.
+    UINT_32 height;             ///< Height in pixels, as above
+    UINT_64 htileBytes;         ///< Size of HTILE buffer, in bytes
+    UINT_32 baseAlign;          ///< Base alignment
+    UINT_32 bpp;                ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
+    UINT_32 macroWidth;         ///< Macro width in pixels, actually squared cache shape
+    UINT_32 macroHeight;        ///< Macro height in pixels
+    UINT_64 sliceSize;          ///< Slice size, in bytes.
+    BOOL_32 sliceInterleaved;   ///< Flag to indicate if different slice's htile is interleaved
+                                ///  Compute engine clear can't be used if htile is interleaved
+    BOOL_32 nextMipLevelCompressible;   ///< Flag to indicate whether HTILE can be enabled in
+                                        ///  next mip level, it also indicates if memory set based
+                                        ///  fast clear can be used for current mip level.
+} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeHtileAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    UINT_32            pitch;           ///< Pitch, in pixels
+    UINT_32            height;          ///< Height in pixels
+    UINT_32            x;               ///< X coordinate
+    UINT_32            y;               ///< Y coordinate
+    UINT_32            slice;           ///< Index of slice
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    ADDR_HTILE_FLAGS   flags;           ///< htile flags
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+    UINT_32            bpp;             ///< depth/stencil buffer bit per pixel size
+    UINT_32            zStencilAddr;    ///< tcCompatible Z/Stencil surface address
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeHtileAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Address in bytes
+    UINT_32 bitPosition;    ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
+                            ///  So we keep bitPosition for HTILE as well
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeHtileCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    UINT_64            addr;            ///< Address
+    UINT_32            bitPosition;     ///< Bit position 0 or 4. CMASK and HTILE share some methods
+                                        ///  so we keep bitPosition for HTILE as well
+    UINT_32            pitch;           ///< Pitch, in pixels
+    UINT_32            height;          ///< Height, in pixels
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeHtileCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Slice index
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR_CMASK_FLAGS
+*
+*   @brief
+*       CMASK flags
+****************************************************************************************************
+*/
+typedef union _ADDR_CMASK_FLAGS
+{
+    struct
+    {
+        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
+        UINT_32 reserved      :31; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_CMASK_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeCmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+
+    ADDR_CMASK_FLAGS    flags;           ///< CMASK flags
+    UINT_32             pitch;           ///< Pitch, in pixels, of color buffer
+    UINT_32             height;          ///< Height, in pixels, of color buffer
+    UINT_32             numSlices;       ///< Number of slices, of color buffer
+    BOOL_32             isLinear;        ///< Linear or tiled layout, Only SI can be linear
+    ADDR_TILEINFO*      pTileInfo;       ///< Tile info
+
+    INT_32              tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                         ///  while the global useTileIndex is set to 1
+    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                         ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeCmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 pitch;          ///< Pitch in pixels of color buffer which
+                            ///  this Cmask matches. The size might be larger than
+                            ///  original color buffer pitch when called with
+                            ///  an unaligned pitch.
+    UINT_32 height;         ///< Height in pixels, as above
+    UINT_64 cmaskBytes;     ///< Size in bytes of CMask buffer
+    UINT_32 baseAlign;      ///< Base alignment
+    UINT_32 blockMax;       ///< Cmask block size. Need this to set CB_COLORn_MASK register
+    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
+    UINT_32 macroHeight;    ///< Macro height in pixels
+    UINT_64 sliceSize;      ///< Slice size, in bytes.
+} ADDR_COMPUTE_CMASK_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeCmaskAddrFromCoord
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32          size;           ///< Size of this structure in bytes
+    UINT_32          x;              ///< X coordinate
+    UINT_32          y;              ///< Y coordinate
+    UINT_64          fmaskAddr;      ///< Fmask addr for tc compatible Cmask
+    UINT_32          slice;          ///< Slice index
+    UINT_32          pitch;          ///< Pitch in pixels, of color buffer
+    UINT_32          height;         ///< Height in pixels, of color buffer
+    UINT_32          numSlices;      ///< Number of slices
+    UINT_32          bpp;
+    BOOL_32          isLinear;       ///< Linear or tiled layout, Only SI can be linear
+    ADDR_CMASK_FLAGS flags;          ///< CMASK flags
+    ADDR_TILEINFO*   pTileInfo;      ///< Tile info
+
+    INT_32           tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                     ///< while the global useTileIndex is set to 1
+    INT_32           macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                     ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeCmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< CMASK address in bytes
+    UINT_32 bitPosition;    ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+                            ///  so the address may be located in bit 0 (0) or 4 (4)
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeCmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+{
+    UINT_32        size;            ///< Size of this structure in bytes
+
+    UINT_64        addr;            ///< CMASK address in bytes
+    UINT_32        bitPosition;     ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+                                    ///  so the address may be located in bit 0 (0) or 4 (4)
+    UINT_32        pitch;           ///< Pitch, in pixels
+    UINT_32        height;          ///< Height in pixels
+    UINT_32        numSlices;       ///< Number of slices
+    BOOL_32        isLinear;        ///< Linear or tiled layout, Only SI can be linear
+    ADDR_TILEINFO* pTileInfo;       ///< Tile info
+
+    INT_32         tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32         macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeCmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Slice index
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSlices;          ///< Number of slice/depth
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if the surface is for resolved fmask, only used
+                                        ///  by H/W clients. S/W should always set it to FALSE.
+        UINT_32 reserved:  31;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Clients must give valid data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+} ADDR_COMPUTE_FMASK_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         pitch;          ///< Pitch of fmask in pixels
+    UINT_32         height;         ///< Height of fmask in pixels
+    UINT_32         numSlices;      ///< Slices of fmask
+    UINT_64         fmaskBytes;     ///< Size of fmask in bytes
+    UINT_32         baseAlign;      ///< Base address alignment
+    UINT_32         pitchAlign;     ///< Pitch alignment
+    UINT_32         heightAlign;    ///< Height alignment
+    UINT_32         bpp;            ///< Bits per pixel of FMASK is: number of bit planes
+    UINT_32         numSamples;     ///< Number of samples, used for dump, export this since input
+                                    ///  may be changed in 9xx and above
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Fmask can have different
+                                    ///  bank_height from color buffer
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+    UINT_64         sliceSize;      ///< Size of slice in bytes
+} ADDR_COMPUTE_FMASK_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/depth/alignments and size in bytes
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_32         x;                  ///< X coordinate
+    UINT_32         y;                  ///< Y coordinate
+    UINT_32         slice;              ///< Slice index
+    UINT_32         plane;              ///< Plane number
+    UINT_32         sample;             ///< Sample index (fragment index for EQAA)
+
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by H/W clients
+        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
+        UINT_32 reserved:  30;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Fmask address
+    UINT_32 bitPosition;    ///< Bit position within fmaskAddr, 0-7.
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_64         addr;               ///< Address
+    UINT_32         bitPosition;        ///< Bit position within addr, 0-7.
+
+    UINT_32         pitch;              ///< Pitch, in pixels
+    UINT_32         height;             ///< Height in pixels
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments
+    AddrTileMode    tileMode;           ///< Tile mode
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by HW components
+        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
+        UINT_32 reserved:  30;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;       ///< Size of this structure in bytes
+
+    UINT_32 x;          ///< X coordinate
+    UINT_32 y;          ///< Y coordinate
+    UINT_32 slice;      ///< Slice index
+    UINT_32 plane;      ///< Plane number
+    UINT_32 sample;     ///< Sample index (fragment index for EQAA)
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute FMASK coordinate from an given address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                          Element/utility functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrGetVersion
+*
+*   @brief
+*       Get AddrLib version number
+****************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
+
+/**
+****************************************************************************************************
+*   AddrUseTileIndex
+*
+*   @brief
+*       Return TRUE if tileIndex is enabled in this address library
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);
+
+/**
+****************************************************************************************************
+*   AddrUseCombinedSwizzle
+*
+*   @brief
+*       Return TRUE if combined swizzle is enabled in this address library
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);
+
+/**
+****************************************************************************************************
+*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrExtractBankPipeSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         base256b;       ///< Base256b value
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
+
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrExtractBankPipeSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 bankSwizzle;    ///< Bank swizzle
+    UINT_32 pipeSwizzle;    ///< Pipe swizzle
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrExtractBankPipeSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if no error
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrCombineBankPipeSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         bankSwizzle;    ///< Bank swizzle
+    UINT_32         pipeSwizzle;    ///< Pipe swizzle
+    UINT_64         baseAddr;       ///< Base address (leave it zero for driver clients)
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
+
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrCombineBankPipeSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 tileSwizzle;    ///< Combined swizzle
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrCombineBankPipeSwizzle
+*
+*   @brief
+*       Combine Bank and Pipe swizzle
+*   @return
+*       ADDR_OK if no error
+*   @note
+*       baseAddr here is full MCAddress instead of base256b
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SLICESWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeSliceSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;           ///< Tile Mode
+    UINT_32         baseSwizzle;        ///< Base tile swizzle
+    UINT_32         slice;              ///< Slice index
+    UINT_64         baseAddr;           ///< Base address, driver should leave it 0 in most cases
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Actually banks needed here!
+
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_SLICESWIZZLE_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeSliceSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+{
+    UINT_32  size;           ///< Size of this structure in bytes
+
+    UINT_32  tileSwizzle;    ///< Recalculated tileSwizzle value
+} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeSliceSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if no error
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   AddrSwizzleGenOption
+*
+*   @brief
+*       Which swizzle generating options: legacy or linear
+****************************************************************************************************
+*/
+typedef enum _AddrSwizzleGenOption
+{
+    ADDR_SWIZZLE_GEN_DEFAULT    = 0,    ///< As is in client driver implemention for swizzle
+    ADDR_SWIZZLE_GEN_LINEAR     = 1,    ///< Using a linear increment of swizzle
+} AddrSwizzleGenOption;
+
+/**
+****************************************************************************************************
+*   AddrSwizzleOption
+*
+*   @brief
+*       Controls how swizzle is generated
+****************************************************************************************************
+*/
+typedef union _ADDR_SWIZZLE_OPTION
+{
+    struct
+    {
+        UINT_32 genOption       : 1;    ///< The way swizzle is generated, see AddrSwizzleGenOption
+        UINT_32 reduceBankBit   : 1;    ///< TRUE if we need reduce swizzle bits
+        UINT_32 reserved        :30;    ///< Reserved bits
+    };
+
+    UINT_32 value;
+
+} ADDR_SWIZZLE_OPTION;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeBaseSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    ADDR_SWIZZLE_OPTION option;         ///< Swizzle option
+    UINT_32             surfIndex;      ///< Index of this surface type
+    AddrTileMode        tileMode;       ///< Tile Mode
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*      pTileInfo;      ///< 2D tile parameters. Actually banks needed here!
+
+    INT_32              tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32              macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeBaseSwizzle
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 tileSwizzle;    ///< Combined swizzle
+} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeBaseSwizzle
+*
+*   @brief
+*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+*   @return
+*       ADDR_OK if no error
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   ELEM_GETEXPORTNORM_INPUT
+*
+*   @brief
+*       Input structure for ElemGetExportNorm
+*
+****************************************************************************************************
+*/
+typedef struct _ELEM_GETEXPORTNORM_INPUT
+{
+    UINT_32             size;       ///< Size of this structure in bytes
+
+    AddrColorFormat     format;     ///< Color buffer format; Client should use ColorFormat
+    AddrSurfaceNumber   num;        ///< Surface number type; Client should use NumberType
+    AddrSurfaceSwap     swap;       ///< Surface swap byte swap; Client should use SurfaceSwap
+    UINT_32             numSamples; ///< Number of samples
+} ELEM_GETEXPORTNORM_INPUT;
+
+/**
+****************************************************************************************************
+*  ElemGetExportNorm
+*
+*   @brief
+*       Helper function to check one format can be EXPORT_NUM, which is a register
+*       CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
+*       family
+*   @note
+*       The implementation is only for r600.
+*       00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
+*       clocks per export)
+*       01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
+*       clock per export)
+*
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+    ADDR_HANDLE                     hLib,
+    const ELEM_GETEXPORTNORM_INPUT* pIn);
+
+/**
+****************************************************************************************************
+*   ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+*   @brief
+*       Input structure for addrFlt32ToDepthPixel
+*
+****************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    AddrDepthFormat format;         ///< Depth buffer format
+    ADDR_FLT_32     comps[2];       ///< Component values (Z/stencil)
+} ELEM_FLT32TODEPTHPIXEL_INPUT;
+
+/**
+****************************************************************************************************
+*   ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+*   @brief
+*       Output structure for ElemFlt32ToDepthPixel
+*
+****************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_8* pPixel;         ///< Real depth value. Same data type as depth buffer.
+                            ///  Client must provide enough storage for this type.
+    UINT_32 depthBase;      ///< Tile base in bits for depth bits
+    UINT_32 stencilBase;    ///< Tile base in bits for stencil bits
+    UINT_32 depthBits;      ///< Bits for depth
+    UINT_32 stencilBits;    ///< Bits for stencil
+} ELEM_FLT32TODEPTHPIXEL_OUTPUT;
+
+/**
+****************************************************************************************************
+*   ElemFlt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       Return code
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+    ADDR_HANDLE                         hLib,
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+*   @brief
+*       Input structure for addrFlt32ToColorPixel
+*
+****************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
+{
+    UINT_32            size;           ///< Size of this structure in bytes
+
+    AddrColorFormat    format;         ///< Color buffer format
+    AddrSurfaceNumber  surfNum;        ///< Surface number
+    AddrSurfaceSwap    surfSwap;       ///< Surface swap
+    ADDR_FLT_32        comps[4];       ///< Component values (r/g/b/a)
+} ELEM_FLT32TOCOLORPIXEL_INPUT;
+
+/**
+****************************************************************************************************
+*   ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+*   @brief
+*       Output structure for ElemFlt32ToColorPixel
+*
+****************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
+{
+    UINT_32 size;       ///< Size of this structure in bytes
+
+    UINT_8* pPixel;     ///< Real color value. Same data type as color buffer.
+                        ///  Client must provide enough storage for this type.
+} ELEM_FLT32TOCOLORPIXEL_OUTPUT;
+
+/**
+****************************************************************************************************
+*   ElemFlt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       Return code
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+    ADDR_HANDLE                         hLib,
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ElemSize
+*
+*   @brief
+*       Get bits-per-element for specified format
+*
+*   @return
+*       Bits-per-element of specified format
+*
+****************************************************************************************************
+*/
+UINT_32 ADDR_API ElemSize(
+    ADDR_HANDLE hLib,
+    AddrFormat  format);
+
+/**
+****************************************************************************************************
+*   ADDR_CONVERT_TILEINFOTOHW_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileInfoToHW
+*   @note
+*       When reverse is TRUE, indices are igonred
+****************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+    BOOL_32         reverse;            ///< Convert control flag.
+                                        ///  FALSE: convert from real value to HW value;
+                                        ///  TRUE: convert from HW value to real value.
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;          ///< Tile parameters with real value
+
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+    UINT_32         bpp;                ///< Bits per pixel
+} ADDR_CONVERT_TILEINFOTOHW_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+*
+*   @brief
+*       Output structure for AddrConvertTileInfoToHW
+****************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*      pTileInfo;          ///< Tile parameters with hardware register value
+
+} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to hardware register value
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+    ADDR_HANDLE                             hLib,
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    INT_32          tileIndex;          ///< Tile index
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+    UINT_32         bpp;                ///< Bits per pixel
+    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX_OUTPUT
+*
+*   @brief
+*       Output structure for AddrConvertTileIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    AddrTileMode        tileMode;       ///< Tile mode
+    AddrTileType        tileType;       ///< Tile type
+    ADDR_TILEINFO*      pTileInfo;      ///< Tile info
+
+} ADDR_CONVERT_TILEINDEX_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+    ADDR_HANDLE                         hLib,
+    const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+    ADDR_CONVERT_TILEINDEX_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_GET_MACROMODEINDEX_INPUT
+*
+*   @brief
+*       Input structure for AddrGetMacroModeIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_GET_MACROMODEINDEX_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    ADDR_SURFACE_FLAGS  flags;              ///< Surface flag
+    INT_32              tileIndex;          ///< Tile index
+    UINT_32             bpp;                ///< Bits per pixel
+    UINT_32             numFrags;           ///< Number of color fragments
+} ADDR_GET_MACROMODEINDEX_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_GET_MACROMODEINDEX_OUTPUT
+*
+*   @brief
+*       Output structure for AddrGetMacroModeIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_GET_MACROMODEINDEX_OUTPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+} ADDR_GET_MACROMODEINDEX_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrGetMacroModeIndex
+*
+*   @brief
+*       Get macro mode index based on input parameters
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
+    ADDR_HANDLE                          hLib,
+    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
+    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX1_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileIndex1 (without macro mode index)
+****************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    INT_32          tileIndex;          ///< Tile index
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         numSamples;         ///< Number of samples
+    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX1_INPUT;
+
+/**
+****************************************************************************************************
+*   AddrConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+    ADDR_HANDLE                             hLib,
+    const ADDR_CONVERT_TILEINDEX1_INPUT*    pIn,
+    ADDR_CONVERT_TILEINDEX_OUTPUT*          pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_GET_TILEINDEX_INPUT
+*
+*   @brief
+*       Input structure for AddrGetTileIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;       ///< Tile mode
+    AddrTileType    tileType;       ///< Tile-type: disp/non-disp/...
+    ADDR_TILEINFO*  pTileInfo;      ///< Pointer to tile-info structure, can be NULL for linear/1D
+} ADDR_GET_TILEINDEX_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_GET_TILEINDEX_OUTPUT
+*
+*   @brief
+*       Output structure for AddrGetTileIndex
+****************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    INT_32          index;          ///< index in table
+} ADDR_GET_TILEINDEX_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrGetTileIndex
+*
+*   @brief
+*       Get the tiling mode index in table
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+    ADDR_HANDLE                     hLib,
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_PRT_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputePrtInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_INPUT
+{
+    AddrFormat          format;        ///< Surface format
+    UINT_32             baseMipWidth;  ///< Base mipmap width
+    UINT_32             baseMipHeight; ///< Base mipmap height
+    UINT_32             baseMipDepth;  ///< Base mipmap depth
+    UINT_32             numFrags;      ///< Number of fragments,
+} ADDR_PRT_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_PRT_INFO_OUTPUT
+*
+*   @brief
+*       Input structure for AddrComputePrtInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_OUTPUT
+{
+    UINT_32             prtTileWidth;
+    UINT_32             prtTileHeight;
+} ADDR_PRT_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputePrtInfo
+*
+*   @brief
+*       Compute prt surface related information
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+    ADDR_HANDLE                 hLib,
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   _ADDR_COMPUTE_DCCINFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeDccInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+    UINT_32             bpp;             ///< BitPP of color surface
+    UINT_32             numSamples;      ///< Sample number of color surface
+    UINT_64             colorSurfSize;   ///< Size of color surface to which dcc key is bound
+    AddrTileMode        tileMode;        ///< Tile mode of color surface
+    ADDR_TILEINFO       tileInfo;        ///< Tile info of color surface
+    UINT_32             tileSwizzle;     ///< Tile swizzle
+    INT_32              tileIndex;       ///< Tile index of color surface,
+                                         ///< MUST be -1 if you don't want to use it
+                                         ///< while the global useTileIndex is set to 1
+    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                         ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_DCCINFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR_COMPUTE_DCCINFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeDccInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
+{
+    UINT_32 size;                 ///< Size of this structure in bytes
+    UINT_32 dccRamBaseAlign;      ///< Base alignment of dcc key
+    UINT_64 dccRamSize;           ///< Size of dcc key
+    UINT_64 dccFastClearSize;     ///< Size of dcc key portion that can be fast cleared
+    BOOL_32 subLvlCompressible;   ///< Whether sub resource is compressiable
+    BOOL_32 dccRamSizeAligned;    ///< Whether the dcc key size is aligned
+} ADDR_COMPUTE_DCCINFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment
+*       info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,
+    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut);
+
+/**
+****************************************************************************************************
+*   ADDR_GET_MAX_ALINGMENTS_OUTPUT
+*
+*   @brief
+*       Output structure of AddrGetMaxAlignments
+****************************************************************************************************
+*/
+typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT
+{
+    UINT_32 size;                   ///< Size of this structure in bytes
+    UINT_32 baseAlign;              ///< Maximum base alignment in bytes
+} ADDR_GET_MAX_ALINGMENTS_OUTPUT;
+
+/**
+****************************************************************************************************
+*   AddrGetMaxAlignments
+*
+*   @brief
+*       Gets maximnum alignments
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
+    ADDR_HANDLE                     hLib,
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
+
+/**
+****************************************************************************************************
+*   AddrGetMaxMetaAlignments
+*
+*   @brief
+*       Gets maximnum alignments for metadata
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
+    ADDR_HANDLE                     hLib,
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut);
+
+/**
+****************************************************************************************************
+*                                Address library interface version 2
+*                                    available from Gfx9 hardware
+****************************************************************************************************
+*     Addr2ComputeSurfaceInfo()
+*     Addr2ComputeSurfaceAddrFromCoord()
+*     Addr2ComputeSurfaceCoordFromAddr()
+
+*     Addr2ComputeHtileInfo()
+*     Addr2ComputeHtileAddrFromCoord()
+*     Addr2ComputeHtileCoordFromAddr()
+*
+*     Addr2ComputeCmaskInfo()
+*     Addr2ComputeCmaskAddrFromCoord()
+*     Addr2ComputeCmaskCoordFromAddr()
+*
+*     Addr2ComputeFmaskInfo()
+*     Addr2ComputeFmaskAddrFromCoord()
+*     Addr2ComputeFmaskCoordFromAddr()
+*
+*     Addr2ComputeDccInfo()
+*
+**/
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR2_SURFACE_FLAGS
+*
+*   @brief
+*       Surface flags
+****************************************************************************************************
+*/
+typedef union _ADDR2_SURFACE_FLAGS
+{
+    struct
+    {
+        UINT_32 color             :  1; ///< This resource is a color buffer, can be used with RTV
+        UINT_32 depth             :  1; ///< Thie resource is a depth buffer, can be used with DSV
+        UINT_32 stencil           :  1; ///< Thie resource is a stencil buffer, can be used with DSV
+        UINT_32 fmask             :  1; ///< This is an fmask surface
+        UINT_32 overlay           :  1; ///< This is an overlay surface
+        UINT_32 display           :  1; ///< This resource is displable, can be used with DRV
+        UINT_32 prt               :  1; ///< This is a partially resident texture
+        UINT_32 qbStereo          :  1; ///< This is a quad buffer stereo surface
+        UINT_32 interleaved       :  1; ///< Special flag for interleaved YUV surface padding
+        UINT_32 texture           :  1; ///< This resource can be used with SRV
+        UINT_32 unordered         :  1; ///< This resource can be used with UAV
+        UINT_32 rotated           :  1; ///< This resource is rotated and displable
+        UINT_32 needEquation      :  1; ///< This resource needs equation to be generated if possible
+        UINT_32 opt4space         :  1; ///< This resource should be optimized for space
+        UINT_32 minimizeAlign     :  1; ///< This resource should use minimum alignment
+        UINT_32 noMetadata        :  1; ///< This resource has no metadata
+        UINT_32 metaRbUnaligned   :  1; ///< This resource has rb unaligned metadata
+        UINT_32 metaPipeUnaligned :  1; ///< This resource has pipe unaligned metadata
+        UINT_32 view3dAs2dArray   :  1; ///< This resource is a 3D resource viewed as 2D array
+        UINT_32 reserved          : 13; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR2_SURFACE_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_INFO_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeSurfaceInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT
+{
+    UINT_32               size;              ///< Size of this structure in bytes
+
+    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
+    AddrSwizzleMode       swizzleMode;       ///< Swizzle Mode for Gfx9
+    AddrResourceType      resourceType;      ///< Surface type
+    AddrFormat            format;            ///< Surface format
+    UINT_32               bpp;               ///< bits per pixel
+    UINT_32               width;             ///< Width (of mip0), in pixels
+    UINT_32               height;            ///< Height (of mip0), in pixels
+    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
+    UINT_32               numMipLevels;      ///< Total mipmap levels.
+    UINT_32               numSamples;        ///< Number of samples
+    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
+                                             ///  number of samples for normal AA; Set it to the
+                                             ///  number of fragments for EQAA
+    UINT_32               pitchInElement;    ///< Pitch in elements (blocks for compressed formats)
+    UINT_32               sliceAlign;        ///< Required slice size in bytes
+} ADDR2_COMPUTE_SURFACE_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_MIP_INFO
+*
+*   @brief
+*       Structure that contains information for mip level
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR2_MIP_INFO
+{
+    UINT_32             pitch;              ///< Pitch in elements
+    UINT_32             height;             ///< Padded height in elements
+    UINT_32             depth;              ///< Padded depth
+    UINT_32             pixelPitch;         ///< Pitch in pixels
+    UINT_32             pixelHeight;        ///< Padded height in pixels
+    UINT_32             equationIndex;      ///< Equation index in the equation table
+    UINT_64             offset;             ///< Offset in bytes from mip base, should only be used
+                                            ///< to setup vam surface descriptor, can't be used
+                                            ///< to setup swizzle pattern
+    UINT_64             macroBlockOffset;   ///< macro block offset in bytes from mip base
+    UINT_32             mipTailOffset;      ///< mip tail offset in bytes
+    UINT_32             mipTailCoordX;      ///< mip tail coord x
+    UINT_32             mipTailCoordY;      ///< mip tail coord y
+    UINT_32             mipTailCoordZ;      ///< mip tail coord z
+} ADDR2_MIP_INFO;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeSurfInfo
+*   @note
+        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
+        Pixel: Original pixel
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_INFO_OUTPUT
+{
+    UINT_32             size;                 ///< Size of this structure in bytes
+
+    UINT_32             pitch;                ///< Pitch in elements (blocks for compressed formats)
+    UINT_32             height;               ///< Padded height (of mip0) in elements
+    UINT_32             numSlices;            ///< Padded depth for 3d resource
+                                              ///< or padded number of slices for 2d array resource
+    UINT_32             mipChainPitch;        ///< Pitch (of total mip chain) in elements
+    UINT_32             mipChainHeight;       ///< Padded height (of total mip chain) in elements
+    UINT_32             mipChainSlice;        ///< Padded depth (of total mip chain)
+    UINT_64             sliceSize;            ///< Slice (total mip chain) size in bytes
+    UINT_64             surfSize;             ///< Surface (total mip chain) size in bytes
+    UINT_32             baseAlign;            ///< Base address alignment
+    UINT_32             bpp;                  ///< Bits per elements
+                                              ///  (e.g. blocks for BCn, 1/3 for 96bit)
+    UINT_32             pixelMipChainPitch;   ///< Mip chain pitch in original pixels
+    UINT_32             pixelMipChainHeight;  ///< Mip chain height in original pixels
+    UINT_32             pixelPitch;           ///< Pitch in original pixels
+    UINT_32             pixelHeight;          ///< Height in original pixels
+    UINT_32             pixelBits;            ///< Original bits per pixel, passed from input
+
+    UINT_32             blockWidth;           ///< Width in element inside one block
+    UINT_32             blockHeight;          ///< Height in element inside one block
+    UINT_32             blockSlices;          ///< Slice number inside one block
+                                              ///< Prt tile is one block, its width/height/slice
+                                              ///< equals to blcok width/height/slice
+
+    BOOL_32             epitchIsHeight;       ///< Whether to use height to program epitch register
+    /// Stereo info
+    ADDR_QBSTEREOINFO*  pStereoInfo;          ///< Stereo info, needed if qbStereo flag is TRUE
+    /// Mip info
+    ADDR2_MIP_INFO*     pMipInfo;             ///< Pointer to mip information array
+                                              ///  if it is not NULL, the array is assumed to
+                                              ///  contain numMipLevels entries
+
+    UINT_32             equationIndex;        ///< Equation index in the equation table of mip0
+    BOOL_32             mipChainInTail;       ///< If whole mipchain falls into mip tail block
+    UINT_32             firstMipIdInTail;     ///< The id of first mip in tail, if there is no mip
+                                              ///  in tail, it will be set to number of mip levels
+} ADDR2_COMPUTE_SURFACE_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceInfo
+*
+*   @brief
+*       Compute surface width/height/slices/alignments and suitable tiling mode
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
+    ADDR_HANDLE                                hLib,
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeSurfaceAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+
+    UINT_32             x;               ///< X coordinate
+    UINT_32             y;               ///< Y coordinate
+    UINT_32             slice;           ///< Slice index
+    UINT_32             sample;          ///< Sample index, use fragment index for EQAA
+    UINT_32             mipId;           ///< the mip ID in mip chain
+
+    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
+    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
+    AddrResourceType    resourceType;    ///< Surface type
+    UINT_32             bpp;             ///< Bits per pixel
+    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
+    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
+    UINT_32             numSlices;       ///< Surface original slices (of mip0)
+    UINT_32             numMipLevels;    ///< Total mipmap levels
+    UINT_32             numSamples;      ///< Number of samples
+    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
+                                         ///  number of samples for normal AA; Set it to the
+                                         ///  number of fragments for EQAA
+
+    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
+    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
+} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeSurfaceAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32    size;             ///< Size of this structure in bytes
+
+    UINT_64    addr;             ///< Byte address
+    UINT_32    bitPosition;      ///< Bit position within surfaceAddr, 0-7.
+                                 ///  For surface bpp < 8, e.g. FMT_1.
+    UINT_32    prtBlockIndex;    ///< Index of a PRT tile (64K block)
+} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address from a given coordinate.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                         hLib,
+    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,
+    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeSurfaceCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+
+    UINT_64             addr;            ///< Address in bytes
+    UINT_32             bitPosition;     ///< Bit position in addr. 0-7. for surface bpp < 8,
+                                         ///  e.g. FMT_1;
+
+    AddrSwizzleMode     swizzleMode;     ///< Swizzle mode for Gfx9
+    ADDR2_SURFACE_FLAGS flags;           ///< Surface flags
+    AddrResourceType    resourceType;    ///< Surface type
+    UINT_32             bpp;             ///< Bits per pixel
+    UINT_32             unalignedWidth;  ///< Surface original width (of mip0)
+    UINT_32             unalignedHeight; ///< Surface original height (of mip0)
+    UINT_32             numSlices;       ///< Surface original slices (of mip0)
+    UINT_32             numMipLevels;    ///< Total mipmap levels.
+    UINT_32             numSamples;      ///< Number of samples
+    UINT_32             numFrags;        ///< Number of fragments, leave it zero or the same as
+                                         ///  number of samples for normal AA; Set it to the
+                                         ///  number of fragments for EQAA
+
+    UINT_32             pipeBankXor;     ///< Combined swizzle used to do bank/pipe rotation
+    UINT_32             pitchInElement;  ///< Pitch in elements (blocks for compressed formats)
+} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeSurfaceCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+{
+    UINT_32    size;       ///< Size of this structure in bytes
+
+    UINT_32    x;          ///< X coordinate
+    UINT_32    y;          ///< Y coordinate
+    UINT_32    slice;      ///< Index of slices
+    UINT_32    sample;     ///< Index of samples, means fragment index for EQAA
+    UINT_32    mipId;      ///< mipmap level id
+} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinate from a given surface address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                         hLib,
+    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,
+    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR2_META_FLAGS
+*
+*   @brief
+*       Metadata flags
+****************************************************************************************************
+*/
+typedef union _ADDR2_META_FLAGS
+{
+    struct
+    {
+        UINT_32 pipeAligned :  1;    ///< if Metadata being pipe aligned
+        UINT_32 rbAligned   :  1;    ///< if Metadata being RB aligned
+        UINT_32 linear      :  1;    ///< if Metadata linear, GFX9 does not suppord this!
+        UINT_32 reserved    : 29;    ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR2_META_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR2_META_MIP_INFO
+*
+*   @brief
+*       Structure to store per mip metadata information
+****************************************************************************************************
+*/
+typedef struct _ADDR2_META_MIP_INFO
+{
+    BOOL_32    inMiptail;
+    union
+    {
+        struct
+        {
+            UINT_32    startX;
+            UINT_32    startY;
+            UINT_32    startZ;
+            UINT_32    width;
+            UINT_32    height;
+            UINT_32    depth;
+        };
+
+        struct
+        {
+            UINT_32    offset;      ///< Metadata offset within one slice,
+                                    ///  the thickness of a slice is meta block depth.
+            UINT_32    sliceSize;   ///< Metadata size within one slice,
+                                    ///  the thickness of a slice is meta block depth.
+        };
+    };
+} ADDR2_META_MIP_INFO;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_INFO_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputeHtileInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    ADDR2_META_FLAGS    hTileFlags;         ///< HTILE flags
+    ADDR2_SURFACE_FLAGS depthFlags;         ///< Depth surface flags
+    AddrSwizzleMode     swizzleMode;        ///< Depth surface swizzle mode
+    UINT_32             unalignedWidth;     ///< Depth surface original width (of mip0)
+    UINT_32             unalignedHeight;    ///< Depth surface original height (of mip0)
+    UINT_32             numSlices;          ///< Number of slices of depth surface (of mip0)
+    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
+    UINT_32             firstMipIdInTail;   ///  Id of the first mip in tail,
+                                            ///  if no mip is in tail, it should be set to
+                                            ///  number of mip levels
+} ADDR2_COMPUTE_HTILE_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputeHtileInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_INFO_OUTPUT
+{
+    UINT_32    size;                ///< Size of this structure in bytes
+
+    UINT_32    pitch;               ///< Pitch in pixels of depth buffer represented in this
+                                    ///  HTile buffer. This might be larger than original depth
+                                    ///  buffer pitch when called with an unaligned pitch.
+    UINT_32    height;              ///< Height in pixels, as above
+    UINT_32    baseAlign;           ///< Base alignment
+    UINT_32    sliceSize;           ///< Slice size, in bytes.
+    UINT_32    htileBytes;          ///< Size of HTILE buffer, in bytes
+    UINT_32    metaBlkWidth;        ///< Meta block width
+    UINT_32    metaBlkHeight;       ///< Meta block height
+    UINT_32    metaBlkNumPerSlice;  ///< Number of metablock within one slice
+
+    ADDR2_META_MIP_INFO* pMipInfo;  ///< HTILE mip information
+} ADDR2_COMPUTE_HTILE_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
+    ADDR_HANDLE                              hLib,
+    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
+    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeHtileAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_32             x;                   ///< X coordinate
+    UINT_32             y;                   ///< Y coordinate
+    UINT_32             slice;               ///< Index of slices
+    UINT_32             mipId;               ///< mipmap level id
+
+    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
+    ADDR2_SURFACE_FLAGS depthflags;          ///< Depth surface flags
+    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
+    UINT_32             bpp;                 ///< Depth surface bits per pixel
+    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
+    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
+    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
+    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
+    UINT_32             numSamples;          ///< Depth surface number of samples
+    UINT_32             pipeXor;             ///< Pipe xor setting
+} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeHtileAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32    size;    ///< Size of this structure in bytes
+
+    UINT_64    addr;    ///< Address in bytes
+} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                       hLib,
+    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,
+    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeHtileCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_64             addr;                ///< Address
+
+    ADDR2_META_FLAGS    hTileFlags;          ///< HTILE flags
+    ADDR2_SURFACE_FLAGS depthFlags;          ///< Depth surface flags
+    AddrSwizzleMode     swizzleMode;         ///< Depth surface swizzle mode
+    UINT_32             bpp;                 ///< Depth surface bits per pixel
+    UINT_32             unalignedWidth;      ///< Depth surface original width (of mip0)
+    UINT_32             unalignedHeight;     ///< Depth surface original height (of mip0)
+    UINT_32             numSlices;           ///< Depth surface original depth (of mip0)
+    UINT_32             numMipLevels;        ///< Depth surface total mipmap levels
+    UINT_32             numSamples;          ///< Depth surface number of samples
+    UINT_32             pipeXor;             ///< Pipe xor setting
+} ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeHtileCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+{
+    UINT_32    size;        ///< Size of this structure in bytes
+
+    UINT_32    x;           ///< X coordinate
+    UINT_32    y;           ///< Y coordinate
+    UINT_32    slice;       ///< Index of slices
+    UINT_32    mipId;       ///< mipmap level id
+} ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                       hLib,
+    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,
+    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputeCmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASKINFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    ADDR2_META_FLAGS    cMaskFlags;         ///< CMASK flags
+    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
+    AddrResourceType    resourceType;       ///< Color surface type
+    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
+    UINT_32             unalignedWidth;     ///< Color surface original width
+    UINT_32             unalignedHeight;    ///< Color surface original height
+    UINT_32             numSlices;          ///< Number of slices of color buffer
+} ADDR2_COMPUTE_CMASK_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputeCmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASK_INFO_OUTPUT
+{
+    UINT_32    size;          ///< Size of this structure in bytes
+
+    UINT_32    pitch;         ///< Pitch in pixels of color buffer which
+                              ///  this Cmask matches. The size might be larger than
+                              ///  original color buffer pitch when called with
+                              ///  an unaligned pitch.
+    UINT_32    height;        ///< Height in pixels, as above
+    UINT_32    baseAlign;     ///< Base alignment
+    UINT_32    sliceSize;     ///< Slice size, in bytes.
+    UINT_32    cmaskBytes;    ///< Size in bytes of CMask buffer
+    UINT_32    metaBlkWidth;  ///< Meta block width
+    UINT_32    metaBlkHeight; ///< Meta block height
+
+    UINT_32    metaBlkNumPerSlice; ///< Number of metablock within one slice
+} ADDR2_COMPUTE_CMASK_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
+    ADDR_HANDLE                              hLib,
+    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
+    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeCmaskAddrFromCoord
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_32             x;                   ///< X coordinate
+    UINT_32             y;                   ///< Y coordinate
+    UINT_32             slice;               ///< Index of slices
+
+    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
+    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
+    AddrResourceType    resourceType;        ///< Color surface type
+    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode
+
+    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
+    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
+    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
+
+    UINT_32             numSamples;          ///< Color surfae sample number
+    UINT_32             numFrags;            ///< Color surface fragment number
+
+    UINT_32             pipeXor;             ///< pipe Xor setting
+} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeCmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32    size;           ///< Size of this structure in bytes
+
+    UINT_64    addr;           ///< CMASK address in bytes
+    UINT_32    bitPosition;    ///< Bit position within addr, 0 or 4
+} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                      hLib,
+    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeCmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_64             addr;                ///< CMASK address in bytes
+    UINT_32             bitPosition;         ///< Bit position within addr, 0 or 4
+
+    ADDR2_META_FLAGS    cMaskFlags;          ///< CMASK flags
+    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
+    AddrResourceType    resourceType;        ///< Color surface type
+    AddrSwizzleMode     swizzleMode;         ///< FMask surface swizzle mode
+
+    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
+    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
+    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
+    UINT_32             numMipLevels;        ///< Color surface total mipmap levels.
+} ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeCmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32    size;        ///< Size of this structure in bytes
+
+    UINT_32    x;           ///< X coordinate
+    UINT_32    y;           ///< Y coordinate
+    UINT_32    slice;       ///< Index of slices
+    UINT_32    mipId;       ///< mipmap level id
+} ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                       hLib,
+    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,
+    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR2_FMASK_FLAGS
+*
+*   @brief
+*       FMASK flags
+****************************************************************************************************
+*/
+typedef union _ADDR2_FMASK_FLAGS
+{
+    struct
+    {
+        UINT_32 resolved :  1;    ///< TRUE if this is a resolved fmask, used by H/W clients
+                                  ///  by H/W clients. S/W should always set it to FALSE.
+        UINT_32 reserved : 31;    ///< Reserved for future use.
+    };
+
+    UINT_32 value;
+} ADDR2_FMASK_FLAGS;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeFmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_INFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    AddrSwizzleMode     swizzleMode;        ///< FMask surface swizzle mode
+    UINT_32             unalignedWidth;     ///< Color surface original width
+    UINT_32             unalignedHeight;    ///< Color surface original height
+    UINT_32             numSlices;          ///< Number of slices/depth
+    UINT_32             numSamples;         ///< Number of samples
+    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
+                                            ///  number of samples for normal AA; Set it to the
+                                            ///  number of fragments for EQAA
+    ADDR2_FMASK_FLAGS   fMaskFlags;         ///< FMASK flags
+} ADDR2_COMPUTE_FMASK_INFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeFmaskInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_INFO_OUTPUT
+{
+    UINT_32    size;           ///< Size of this structure in bytes
+
+    UINT_32    pitch;          ///< Pitch of fmask in pixels
+    UINT_32    height;         ///< Height of fmask in pixels
+    UINT_32    baseAlign;      ///< Base alignment
+    UINT_32    numSlices;      ///< Slices of fmask
+    UINT_32    fmaskBytes;     ///< Size of fmask in bytes
+    UINT_32    bpp;            ///< Bits per pixel of FMASK is: number of bit planes
+    UINT_32    numSamples;     ///< Number of samples
+    UINT_32    sliceSize;      ///< Size of slice in bytes
+} ADDR2_COMPUTE_FMASK_INFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/slices/alignments and size in bytes
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
+    ADDR_HANDLE                              hLib,
+    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,
+    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeFmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32            size;               ///< Size of this structure in bytes
+
+    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode
+    UINT_32            x;                  ///< X coordinate
+    UINT_32            y;                  ///< Y coordinate
+    UINT_32            slice;              ///< Slice index
+    UINT_32            sample;             ///< Sample index (fragment index for EQAA)
+    UINT_32            plane;              ///< Plane number
+
+    UINT_32            unalignedWidth;     ///< Color surface original width
+    UINT_32            unalignedHeight;    ///< Color surface original height
+    UINT_32            numSamples;         ///< Number of samples
+    UINT_32            numFrags;           ///< Number of fragments, leave it zero or the same as
+                                           ///  number of samples for normal AA; Set it to the
+                                           ///  number of fragments for EQAA
+    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation
+
+    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
+} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeFmaskAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32    size;           ///< Size of this structure in bytes
+
+    UINT_64    addr;           ///< Fmask address
+    UINT_32    bitPosition;    ///< Bit position within fmaskAddr, 0-7.
+} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                       hLib,
+    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,
+    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeFmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT
+{
+    UINT_32            size;               ///< Size of this structure in bytes
+
+    UINT_64            addr;               ///< Address
+    UINT_32            bitPosition;        ///< Bit position within addr, 0-7.
+    AddrSwizzleMode    swizzleMode;        ///< FMask surface swizzle mode
+
+    UINT_32            unalignedWidth;     ///< Color surface original width
+    UINT_32            unalignedHeight;    ///< Color surface original height
+    UINT_32            numSamples;         ///< Number of samples
+    UINT_32            numFrags;           ///< Number of fragments
+
+    UINT_32            tileSwizzle;        ///< Combined swizzle used to do bank/pipe rotation
+
+    ADDR2_FMASK_FLAGS  fMaskFlags; ///< FMASK flags
+} ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeFmaskCoordFromAddr
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32    size;      ///< Size of this structure in bytes
+
+    UINT_32    x;         ///< X coordinate
+    UINT_32    y;         ///< Y coordinate
+    UINT_32    slice;     ///< Slice index
+    UINT_32    sample;    ///< Sample index (fragment index for EQAA)
+    UINT_32    plane;     ///< Plane number
+} ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute FMASK coordinate from an given address
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                       hLib,
+    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,
+    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   _ADDR2_COMPUTE_DCCINFO_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputeDccInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    ADDR2_META_FLAGS    dccKeyFlags;        ///< DCC key flags
+    ADDR2_SURFACE_FLAGS colorFlags;         ///< Color surface flags
+    AddrResourceType    resourceType;       ///< Color surface type
+    AddrSwizzleMode     swizzleMode;        ///< Color surface swizzle mode
+    UINT_32             bpp;                ///< bits per pixel
+    UINT_32             unalignedWidth;     ///< Color surface original width (of mip0)
+    UINT_32             unalignedHeight;    ///< Color surface original height (of mip0)
+    UINT_32             numSlices;          ///< Number of slices, of color surface (of mip0)
+    UINT_32             numFrags;           ///< Fragment number of color surface
+    UINT_32             numMipLevels;       ///< Total mipmap levels of color surface
+    UINT_32             dataSurfaceSize;    ///< The padded size of all slices and mip levels
+                                            ///< useful in meta linear case
+    UINT_32             firstMipIdInTail;   ///< The id of first mip in tail, if no mip is in tail,
+                                            ///  it should be number of mip levels
+} ADDR2_COMPUTE_DCCINFO_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_DCCINFO_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputeDccInfo
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT
+{
+    UINT_32    size;               ///< Size of this structure in bytes
+
+    UINT_32    dccRamBaseAlign;    ///< Base alignment of dcc key
+    UINT_32    dccRamSize;         ///< Size of dcc key
+
+    UINT_32    pitch;              ///< DCC surface mip chain pitch
+    UINT_32    height;             ///< DCC surface mip chain height
+    UINT_32    depth;              ///< DCC surface mip chain depth
+
+    UINT_32    compressBlkWidth;   ///< DCC compress block width
+    UINT_32    compressBlkHeight;  ///< DCC compress block height
+    UINT_32    compressBlkDepth;   ///< DCC compress block depth
+
+    UINT_32    metaBlkWidth;       ///< DCC meta block width
+    UINT_32    metaBlkHeight;      ///< DCC meta block height
+    UINT_32    metaBlkDepth;       ///< DCC meta block depth
+
+    UINT_32    metaBlkNumPerSlice; ///< Number of metablock within one slice
+
+    union
+    {
+        UINT_32 fastClearSizePerSlice;  ///< Size of DCC within a slice should be fast cleared
+        UINT_32 dccRamSliceSize;        ///< DCC ram size per slice. For mipmap, it's
+                                        ///  the slize size of a mip chain, the thickness of a
+                                        ///  a slice is meta block depth
+    };
+
+    ADDR2_META_MIP_INFO* pMipInfo;      ///< DCC mip information
+} ADDR2_COMPUTE_DCCINFO_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment
+*       info
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
+    ADDR_HANDLE                           hLib,
+    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
+    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for Addr2ComputeDccAddrFromCoord
+*
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_32             x;                   ///< X coordinate
+    UINT_32             y;                   ///< Y coordinate
+    UINT_32             slice;               ///< Index of slices
+    UINT_32             sample;              ///< Index of samples, means fragment index for EQAA
+    UINT_32             mipId;               ///< mipmap level id
+
+    ADDR2_META_FLAGS    dccKeyFlags;         ///< DCC flags
+    ADDR2_SURFACE_FLAGS colorFlags;          ///< Color surface flags
+    AddrResourceType    resourceType;        ///< Color surface type
+    AddrSwizzleMode     swizzleMode;         ///< Color surface swizzle mode
+    UINT_32             bpp;                 ///< Color surface bits per pixel
+    UINT_32             unalignedWidth;      ///< Color surface original width (of mip0)
+    UINT_32             unalignedHeight;     ///< Color surface original height (of mip0)
+    UINT_32             numSlices;           ///< Color surface original slices (of mip0)
+    UINT_32             numMipLevels;        ///< Color surface mipmap levels
+    UINT_32             numFrags;            ///< Color surface fragment number
+
+    UINT_32             pipeXor;             ///< pipe Xor setting
+} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for Addr2ComputeDccAddrFromCoord
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32    size;           ///< Size of this structure in bytes
+
+    UINT_64    addr;           ///< DCC address in bytes
+} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeDccAddrFromCoord
+*
+*   @brief
+*       Compute DCC address according to coordinates (of MSAA color buffer)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
+    ADDR_HANDLE                                    hLib,
+    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*        pOut);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     Misc functions for Gfx9
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_PIPEBANKXOR_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputePipebankXor
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    UINT_32             surfIndex;          ///< Input surface index
+    ADDR2_SURFACE_FLAGS flags;              ///< Surface flag
+    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
+    AddrResourceType    resourceType;       ///< Surface resource type
+    AddrFormat          format;             ///< Surface format
+    UINT_32             numSamples;         ///< Number of samples
+    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
+                                            ///  number of samples for normal AA; Set it to the
+                                            ///  number of fragments for EQAA
+} ADDR2_COMPUTE_PIPEBANKXOR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputePipebankXor
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    UINT_32             pipeBankXor;        ///< Pipe bank xor
+} ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputePipeBankXor
+*
+*   @brief
+*       Calculate a valid bank pipe xor value for client to use.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
+    ADDR_HANDLE                            hLib,
+    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputeSlicePipeBankXor
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
+    AddrResourceType    resourceType;       ///< Surface resource type
+    UINT_32             basePipeBankXor;    ///< Base pipe bank xor
+    UINT_32             slice;              ///< Slice id
+    UINT_32             numSamples;         ///< Number of samples
+} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputeSlicePipeBankXor
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    UINT_32             pipeBankXor;        ///< Pipe bank xor
+} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSlicePipeBankXor
+*
+*   @brief
+*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
+    ADDR_HANDLE                                  hLib,
+    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
+*
+*   @brief
+*       Input structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    AddrSwizzleMode     swizzleMode;        ///< Surface swizzle mode
+    AddrResourceType    resourceType;       ///< Surface resource type
+    UINT_32             pipeBankXor;        ///< Per resource xor
+    UINT_32             slice;              ///< Slice id
+    UINT_64             sliceSize;          ///< Slice size of a mip chain
+    UINT_64             macroBlockOffset;   ///< Macro block offset, returned in ADDR2_MIP_INFO
+    UINT_32             mipTailOffset;      ///< Mip tail offset, returned in ADDR2_MIP_INFO
+} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2ComputeSubResourceOffsetForSwizzlePattern
+****************************************************************************************************
+*/
+typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+    UINT_64             offset;             ///< offset
+} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSubResourceOffsetForSwizzlePattern
+*
+*   @brief
+*       Calculate sub resource offset to support swizzle pattern.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
+    ADDR_HANDLE                                                     hLib,
+    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   ADDR2_BLOCK_SET
+*
+*   @brief
+*       Bit field that defines block type
+****************************************************************************************************
+*/
+typedef union _ADDR2_BLOCK_SET
+{
+    struct
+    {
+        UINT_32 micro       : 1;   // 256B block for 2D resource
+        UINT_32 macro4KB    : 1;   // 4KB for 2D/3D resource
+        UINT_32 macro64KB   : 1;   // 64KB for 2D/3D resource
+        UINT_32 var         : 1;   // VAR block
+        UINT_32 linear      : 1;   // Linear block
+        UINT_32 reserved    : 27;
+    };
+
+    UINT_32 value;
+} ADDR2_BLOCK_SET;
+
+/**
+****************************************************************************************************
+*   ADDR2_SWTYPE_SET
+*
+*   @brief
+*       Bit field that defines swizzle type
+****************************************************************************************************
+*/
+typedef union _ADDR2_SWTYPE_SET
+{
+    struct
+    {
+        UINT_32 sw_Z     : 1;   // SW_*_Z_*
+        UINT_32 sw_S     : 1;   // SW_*_S_*
+        UINT_32 sw_D     : 1;   // SW_*_D_*
+        UINT_32 sw_R     : 1;   // SW_*_R_*
+        UINT_32 reserved : 28;
+    };
+
+    UINT_32 value;
+} ADDR2_SWTYPE_SET;
+
+/**
+****************************************************************************************************
+*   ADDR2_SWMODE_SET
+*
+*   @brief
+*       Bit field that defines swizzle type
+****************************************************************************************************
+*/
+typedef union _ADDR2_SWMODE_SET
+{
+    struct
+    {
+        UINT_32 swLinear   : 1;
+        UINT_32 sw256B_S   : 1;
+        UINT_32 sw256B_D   : 1;
+        UINT_32 sw256B_R   : 1;
+        UINT_32 sw4KB_Z    : 1;
+        UINT_32 sw4KB_S    : 1;
+        UINT_32 sw4KB_D    : 1;
+        UINT_32 sw4KB_R    : 1;
+        UINT_32 sw64KB_Z   : 1;
+        UINT_32 sw64KB_S   : 1;
+        UINT_32 sw64KB_D   : 1;
+        UINT_32 sw64KB_R   : 1;
+        UINT_32 swVar_Z    : 1;
+        UINT_32 swVar_S    : 1;
+        UINT_32 swVar_D    : 1;
+        UINT_32 swVar_R    : 1;
+        UINT_32 sw64KB_Z_T : 1;
+        UINT_32 sw64KB_S_T : 1;
+        UINT_32 sw64KB_D_T : 1;
+        UINT_32 sw64KB_R_T : 1;
+        UINT_32 sw4KB_Z_X  : 1;
+        UINT_32 sw4KB_S_X  : 1;
+        UINT_32 sw4KB_D_X  : 1;
+        UINT_32 sw4KB_R_X  : 1;
+        UINT_32 sw64KB_Z_X : 1;
+        UINT_32 sw64KB_S_X : 1;
+        UINT_32 sw64KB_D_X : 1;
+        UINT_32 sw64KB_R_X : 1;
+        UINT_32 swVar_Z_X  : 1;
+        UINT_32 swVar_S_X  : 1;
+        UINT_32 swVar_D_X  : 1;
+        UINT_32 swVar_R_X  : 1;
+    };
+
+    UINT_32 value;
+} ADDR2_SWMODE_SET;
+
+/**
+****************************************************************************************************
+*   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
+*
+*   @brief
+*       Input structure of Addr2GetPreferredSurfaceSetting
+****************************************************************************************************
+*/
+typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT
+{
+    UINT_32               size;              ///< Size of this structure in bytes
+
+    ADDR2_SURFACE_FLAGS   flags;             ///< Surface flags
+    AddrResourceType      resourceType;      ///< Surface type
+    AddrFormat            format;            ///< Surface format
+    AddrResrouceLocation  resourceLoction;   ///< Surface heap choice
+    ADDR2_BLOCK_SET       forbiddenBlock;    ///< Client can use it to disable some block setting
+                                             ///< such as linear for DXTn, tiled for YUV
+    ADDR2_SWTYPE_SET      preferredSwSet;    ///< Client can use it to specify sw type(s) wanted
+    BOOL_32               noXor;             ///< Do not use xor mode for this resource
+    UINT_32               bpp;               ///< bits per pixel
+    UINT_32               width;             ///< Width (of mip0), in pixels
+    UINT_32               height;            ///< Height (of mip0), in pixels
+    UINT_32               numSlices;         ///< Number surface slice/depth (of mip0),
+    UINT_32               numMipLevels;      ///< Total mipmap levels.
+    UINT_32               numSamples;        ///< Number of samples
+    UINT_32               numFrags;          ///< Number of fragments, leave it zero or the same as
+                                             ///  number of samples for normal AA; Set it to the
+                                             ///  number of fragments for EQAA
+    UINT_32               maxAlign;          ///< maximum base/size alignment requested by client
+    UINT_32               minSizeAlign;      ///< memory allocated for surface in client driver will
+                                             ///  be padded to multiple of this value (in bytes)
+} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT;
+
+/**
+****************************************************************************************************
+*   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
+*
+*   @brief
+*       Output structure of Addr2GetPreferredSurfaceSetting
+****************************************************************************************************
+*/
+typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT
+{
+    UINT_32               size;                 ///< Size of this structure in bytes
+
+    AddrSwizzleMode       swizzleMode;          ///< Suggested swizzle mode to be used
+    AddrResourceType      resourceType;         ///< Suggested resource type to program HW
+    ADDR2_BLOCK_SET       validBlockSet;        ///< Valid block type bit conbination
+    BOOL_32               canXor;               ///< If client can use xor on a valid macro block
+                                                ///  type
+    ADDR2_SWTYPE_SET      validSwTypeSet;       ///< Valid swizzle type bit combination
+    ADDR2_SWTYPE_SET      clientPreferredSwSet; ///< Client-preferred swizzle type bit combination
+    ADDR2_SWMODE_SET      validSwModeSet;       ///< Valid swizzle mode bit combination
+} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT;
+
+/**
+****************************************************************************************************
+*   Addr2GetPreferredSurfaceSetting
+*
+*   @brief
+*       Suggest a preferred setting for client driver to program HW register
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
+    ADDR_HANDLE                                   hLib,
+    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut);
+
+/**
+****************************************************************************************************
+*   Addr2IsValidDisplaySwizzleMode
+*
+*   @brief
+*       Return whether the swizzle mode is supported by DCE / DCN.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
+    ADDR_HANDLE     hLib,
+    AddrSwizzleMode swizzleMode,
+    UINT_32         bpp,
+    bool            *result);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // __ADDR_INTERFACE_H__
diff -Nru mesa-18.3.3/src/amd/addrlib/inc/addrtypes.h mesa-19.0.1/src/amd/addrlib/inc/addrtypes.h
--- mesa-18.3.3/src/amd/addrlib/inc/addrtypes.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/inc/addrtypes.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,746 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrtypes.h
+* @brief Contains the helper function and constants
+****************************************************************************************************
+*/
+#ifndef __ADDR_TYPES_H__
+#define __ADDR_TYPES_H__
+
+#if defined(__APPLE__) && !defined(HAVE_TSERVER)
+// External definitions header maintained by Apple driver team, but not for diag team under Mac.
+// Helps address compilation issues & reduces code covered by NDA
+#include "addrExtDef.h"
+
+#else
+
+// Windows and/or Linux
+#if !defined(VOID)
+typedef void           VOID;
+#endif
+
+#if !defined(FLOAT)
+typedef float          FLOAT;
+#endif
+
+#if !defined(CHAR)
+typedef char           CHAR;
+#endif
+
+#if !defined(INT)
+typedef int            INT;
+#endif
+
+#include <stdarg.h> // va_list...etc need this header
+
+#endif // defined (__APPLE__) && !defined(HAVE_TSERVER)
+
+/**
+****************************************************************************************************
+*   Calling conventions
+****************************************************************************************************
+*/
+#ifndef ADDR_CDECL
+    #if defined(__GNUC__)
+        #define ADDR_CDECL __attribute__((cdecl))
+    #else
+        #define ADDR_CDECL __cdecl
+    #endif
+#endif
+
+#ifndef ADDR_STDCALL
+    #if defined(__GNUC__)
+        #if defined(__amd64__) || defined(__x86_64__)
+            #define ADDR_STDCALL
+        #else
+            #define ADDR_STDCALL __attribute__((stdcall))
+        #endif
+    #else
+        #define ADDR_STDCALL __stdcall
+    #endif
+#endif
+
+#ifndef ADDR_FASTCALL
+    #if defined(BRAHMA_ARM)
+        #define ADDR_FASTCALL
+    #elif defined(__GNUC__)
+        #if defined(__i386__)
+            #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #else
+            #define ADDR_FASTCALL
+        #endif
+    #else
+        #define ADDR_FASTCALL __fastcall
+    #endif
+#endif
+
+#ifndef GC_CDECL
+    #define GC_CDECL  ADDR_CDECL
+#endif
+
+#ifndef GC_STDCALL
+    #define GC_STDCALL  ADDR_STDCALL
+#endif
+
+#ifndef GC_FASTCALL
+    #define GC_FASTCALL  ADDR_FASTCALL
+#endif
+
+#if defined(__GNUC__)
+    #define ADDR_INLINE static inline   // inline needs to be static to link
+#else
+    // win32, win64, other platforms
+    #define ADDR_INLINE   __inline
+#endif // #if defined(__GNUC__)
+
+#define ADDR_API ADDR_FASTCALL //default call convention is fast call
+
+/**
+****************************************************************************************************
+* Global defines used by other modules
+****************************************************************************************************
+*/
+#if !defined(TILEINDEX_INVALID)
+#define TILEINDEX_INVALID                -1
+#endif
+
+#if !defined(TILEINDEX_LINEAR_GENERAL)
+#define TILEINDEX_LINEAR_GENERAL         -2
+#endif
+
+#if !defined(TILEINDEX_LINEAR_ALIGNED)
+#define TILEINDEX_LINEAR_ALIGNED          8
+#endif
+
+/**
+****************************************************************************************************
+* Return codes
+****************************************************************************************************
+*/
+typedef enum _ADDR_E_RETURNCODE
+{
+    // General Return
+    ADDR_OK    = 0,
+    ADDR_ERROR = 1,
+
+    // Specific Errors
+    ADDR_OUTOFMEMORY,
+    ADDR_INVALIDPARAMS,
+    ADDR_NOTSUPPORTED,
+    ADDR_NOTIMPLEMENTED,
+    ADDR_PARAMSIZEMISMATCH,
+    ADDR_INVALIDGBREGVALUES,
+
+} ADDR_E_RETURNCODE;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define tile modes for all H/W
+* @note
+*   R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
+*   ADDR_TM_2D_TILED_XTHICK
+*
+****************************************************************************************************
+*/
+typedef enum _AddrTileMode
+{
+    ADDR_TM_LINEAR_GENERAL      = 0,    ///< Least restrictions, pitch: multiple of 8 if not buffer
+    ADDR_TM_LINEAR_ALIGNED      = 1,    ///< Requests pitch or slice to be multiple of 64 pixels
+    ADDR_TM_1D_TILED_THIN1      = 2,    ///< Linear array of 8x8 tiles
+    ADDR_TM_1D_TILED_THICK      = 3,    ///< Linear array of 8x8x4 tiles
+    ADDR_TM_2D_TILED_THIN1      = 4,    ///< A set of macro tiles consist of 8x8 tiles
+    ADDR_TM_2D_TILED_THIN2      = 5,    ///< 600 HWL only, macro tile ratio is 1:4
+    ADDR_TM_2D_TILED_THIN4      = 6,    ///< 600 HWL only, macro tile ratio is 1:16
+    ADDR_TM_2D_TILED_THICK      = 7,    ///< A set of macro tiles consist of 8x8x4 tiles
+    ADDR_TM_2B_TILED_THIN1      = 8,    ///< 600 HWL only, with bank swap
+    ADDR_TM_2B_TILED_THIN2      = 9,    ///< 600 HWL only, with bank swap and ratio is 1:4
+    ADDR_TM_2B_TILED_THIN4      = 10,   ///< 600 HWL only, with bank swap and ratio is 1:16
+    ADDR_TM_2B_TILED_THICK      = 11,   ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
+    ADDR_TM_3D_TILED_THIN1      = 12,   ///< Macro tiling w/ pipe rotation between slices
+    ADDR_TM_3D_TILED_THICK      = 13,   ///< Macro tiling w/ pipe rotation bwtween slices, thick
+    ADDR_TM_3B_TILED_THIN1      = 14,   ///< 600 HWL only, with bank swap
+    ADDR_TM_3B_TILED_THICK      = 15,   ///< 600 HWL only, with bank swap, thick
+    ADDR_TM_2D_TILED_XTHICK     = 16,   ///< Tile is 8x8x8, valid from NI
+    ADDR_TM_3D_TILED_XTHICK     = 17,   ///< Tile is 8x8x8, valid from NI
+    ADDR_TM_POWER_SAVE          = 18,   ///< Power save mode, only used by KMD on NI
+    ADDR_TM_PRT_TILED_THIN1     = 19,   ///< No bank/pipe rotation or hashing beyond macrotile size
+    ADDR_TM_PRT_2D_TILED_THIN1  = 20,   ///< Same as 2D_TILED_THIN1, PRT only
+    ADDR_TM_PRT_3D_TILED_THIN1  = 21,   ///< Same as 3D_TILED_THIN1, PRT only
+    ADDR_TM_PRT_TILED_THICK     = 22,   ///< No bank/pipe rotation or hashing beyond macrotile size
+    ADDR_TM_PRT_2D_TILED_THICK  = 23,   ///< Same as 2D_TILED_THICK, PRT only
+    ADDR_TM_PRT_3D_TILED_THICK  = 24,   ///< Same as 3D_TILED_THICK, PRT only
+    ADDR_TM_UNKNOWN             = 25,   ///< Unkown tile mode, should be decided by address lib
+    ADDR_TM_COUNT               = 26,   ///< Must be the value of the last tile mode
+} AddrTileMode;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define swizzle modes for Gfx9 ASIC
+* @note
+*
+*   ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce
+*   ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce
+*   ADDR_SW_4KB_*  addressing block aligned size is 4KB, for 2D/3D resouce
+*   ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce
+*   ADDR_SW_VAR_*  addressing block aligned size is ASIC specific, for 2D/3D resouce
+*
+*   ADDR_SW_*_Z    For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask
+                   For 3D resouce, represents a swizzle mode similar to legacy thick tile mode
+*   ADDR_SW_*_S    represents standard swizzle mode defined by MS
+*   ADDR_SW_*_D    For 2D resouce, represents a swizzle mode for displayable resource
+*                  For 3D resouce, represents a swizzle mode which places each slice in order & pixel
+                   within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
+*   ADDR_SW_*_R    For 2D resouce only, represents a swizzle mode for rotated displayable resource
+*
+****************************************************************************************************
+*/
+typedef enum _AddrSwizzleMode
+{
+    ADDR_SW_LINEAR          = 0,
+    ADDR_SW_256B_S          = 1,
+    ADDR_SW_256B_D          = 2,
+    ADDR_SW_256B_R          = 3,
+    ADDR_SW_4KB_Z           = 4,
+    ADDR_SW_4KB_S           = 5,
+    ADDR_SW_4KB_D           = 6,
+    ADDR_SW_4KB_R           = 7,
+    ADDR_SW_64KB_Z          = 8,
+    ADDR_SW_64KB_S          = 9,
+    ADDR_SW_64KB_D          = 10,
+    ADDR_SW_64KB_R          = 11,
+    ADDR_SW_VAR_Z           = 12,
+    ADDR_SW_VAR_S           = 13,
+    ADDR_SW_VAR_D           = 14,
+    ADDR_SW_VAR_R           = 15,
+    ADDR_SW_64KB_Z_T        = 16,
+    ADDR_SW_64KB_S_T        = 17,
+    ADDR_SW_64KB_D_T        = 18,
+    ADDR_SW_64KB_R_T        = 19,
+    ADDR_SW_4KB_Z_X         = 20,
+    ADDR_SW_4KB_S_X         = 21,
+    ADDR_SW_4KB_D_X         = 22,
+    ADDR_SW_4KB_R_X         = 23,
+    ADDR_SW_64KB_Z_X        = 24,
+    ADDR_SW_64KB_S_X        = 25,
+    ADDR_SW_64KB_D_X        = 26,
+    ADDR_SW_64KB_R_X        = 27,
+    ADDR_SW_VAR_Z_X         = 28,
+    ADDR_SW_VAR_S_X         = 29,
+    ADDR_SW_VAR_D_X         = 30,
+    ADDR_SW_VAR_R_X         = 31,
+    ADDR_SW_LINEAR_GENERAL  = 32,
+    ADDR_SW_MAX_TYPE        = 33,
+
+    // Used for represent block with identical size
+    ADDR_SW_256B            = ADDR_SW_256B_S,
+    ADDR_SW_4KB             = ADDR_SW_4KB_S_X,
+    ADDR_SW_64KB            = ADDR_SW_64KB_S_X,
+    ADDR_SW_VAR             = ADDR_SW_VAR_S_X,
+} AddrSwizzleMode;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define image type
+* @note
+*   this is new for address library interface version 2
+*
+****************************************************************************************************
+*/
+typedef enum _AddrResourceType
+{
+    ADDR_RSRC_TEX_1D = 0,
+    ADDR_RSRC_TEX_2D = 1,
+    ADDR_RSRC_TEX_3D = 2,
+    ADDR_RSRC_MAX_TYPE = 3,
+} AddrResourceType;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define resource heap location
+* @note
+*   this is new for address library interface version 2
+*
+****************************************************************************************************
+*/
+typedef enum _AddrResrouceLocation
+{
+    ADDR_RSRC_LOC_UNDEF  = 0,   // Resource heap is undefined/unknown
+    ADDR_RSRC_LOC_LOCAL  = 1,   // CPU visable and CPU invisable local heap
+    ADDR_RSRC_LOC_USWC   = 2,   // CPU write-combined non-cached nonlocal heap
+    ADDR_RSRC_LOC_CACHED = 3,   // CPU cached nonlocal heap
+    ADDR_RSRC_LOC_INVIS  = 4,   // CPU invisable local heap only
+    ADDR_RSRC_LOC_MAX_TYPE = 5,
+} AddrResrouceLocation;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define resource basic swizzle mode
+* @note
+*   this is new for address library interface version 2
+*
+****************************************************************************************************
+*/
+typedef enum _AddrSwType
+{
+    ADDR_SW_Z  = 0,   // Resource basic swizzle mode is ZOrder
+    ADDR_SW_S  = 1,   // Resource basic swizzle mode is Standard
+    ADDR_SW_D  = 2,   // Resource basic swizzle mode is Display
+    ADDR_SW_R  = 3,   // Resource basic swizzle mode is Rotated
+} AddrSwType;
+
+/**
+****************************************************************************************************
+* @brief
+*   Neutral enums that define mipmap major mode
+* @note
+*   this is new for address library interface version 2
+*
+****************************************************************************************************
+*/
+typedef enum _AddrMajorMode
+{
+    ADDR_MAJOR_X = 0,
+    ADDR_MAJOR_Y = 1,
+    ADDR_MAJOR_Z = 2,
+    ADDR_MAJOR_MAX_TYPE = 3,
+} AddrMajorMode;
+
+/**
+****************************************************************************************************
+*   AddrFormat
+*
+*   @brief
+*       Neutral enum for SurfaceFormat
+*
+****************************************************************************************************
+*/
+typedef enum _AddrFormat {
+    ADDR_FMT_INVALID                              = 0x00000000,
+    ADDR_FMT_8                                    = 0x00000001,
+    ADDR_FMT_4_4                                  = 0x00000002,
+    ADDR_FMT_3_3_2                                = 0x00000003,
+    ADDR_FMT_RESERVED_4                           = 0x00000004,
+    ADDR_FMT_16                                   = 0x00000005,
+    ADDR_FMT_16_FLOAT                             = ADDR_FMT_16,
+    ADDR_FMT_8_8                                  = 0x00000007,
+    ADDR_FMT_5_6_5                                = 0x00000008,
+    ADDR_FMT_6_5_5                                = 0x00000009,
+    ADDR_FMT_1_5_5_5                              = 0x0000000a,
+    ADDR_FMT_4_4_4_4                              = 0x0000000b,
+    ADDR_FMT_5_5_5_1                              = 0x0000000c,
+    ADDR_FMT_32                                   = 0x0000000d,
+    ADDR_FMT_32_FLOAT                             = ADDR_FMT_32,
+    ADDR_FMT_16_16                                = 0x0000000f,
+    ADDR_FMT_16_16_FLOAT                          = ADDR_FMT_16_16,
+    ADDR_FMT_8_24                                 = 0x00000011,
+    ADDR_FMT_8_24_FLOAT                           = ADDR_FMT_8_24,
+    ADDR_FMT_24_8                                 = 0x00000013,
+    ADDR_FMT_24_8_FLOAT                           = ADDR_FMT_24_8,
+    ADDR_FMT_10_11_11                             = 0x00000015,
+    ADDR_FMT_10_11_11_FLOAT                       = ADDR_FMT_10_11_11,
+    ADDR_FMT_11_11_10                             = 0x00000017,
+    ADDR_FMT_11_11_10_FLOAT                       = ADDR_FMT_11_11_10,
+    ADDR_FMT_2_10_10_10                           = 0x00000019,
+    ADDR_FMT_8_8_8_8                              = 0x0000001a,
+    ADDR_FMT_10_10_10_2                           = 0x0000001b,
+    ADDR_FMT_X24_8_32_FLOAT                       = 0x0000001c,
+    ADDR_FMT_32_32                                = 0x0000001d,
+    ADDR_FMT_32_32_FLOAT                          = ADDR_FMT_32_32,
+    ADDR_FMT_16_16_16_16                          = 0x0000001f,
+    ADDR_FMT_16_16_16_16_FLOAT                    = ADDR_FMT_16_16_16_16,
+    ADDR_FMT_RESERVED_33                          = 0x00000021,
+    ADDR_FMT_32_32_32_32                          = 0x00000022,
+    ADDR_FMT_32_32_32_32_FLOAT                    = ADDR_FMT_32_32_32_32,
+    ADDR_FMT_RESERVED_36                          = 0x00000024,
+    ADDR_FMT_1                                    = 0x00000025,
+    ADDR_FMT_1_REVERSED                           = 0x00000026,
+    ADDR_FMT_GB_GR                                = 0x00000027,
+    ADDR_FMT_BG_RG                                = 0x00000028,
+    ADDR_FMT_32_AS_8                              = 0x00000029,
+    ADDR_FMT_32_AS_8_8                            = 0x0000002a,
+    ADDR_FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
+    ADDR_FMT_8_8_8                                = 0x0000002c,
+    ADDR_FMT_16_16_16                             = 0x0000002d,
+    ADDR_FMT_16_16_16_FLOAT                       = ADDR_FMT_16_16_16,
+    ADDR_FMT_32_32_32                             = 0x0000002f,
+    ADDR_FMT_32_32_32_FLOAT                       = ADDR_FMT_32_32_32,
+    ADDR_FMT_BC1                                  = 0x00000031,
+    ADDR_FMT_BC2                                  = 0x00000032,
+    ADDR_FMT_BC3                                  = 0x00000033,
+    ADDR_FMT_BC4                                  = 0x00000034,
+    ADDR_FMT_BC5                                  = 0x00000035,
+    ADDR_FMT_BC6                                  = 0x00000036,
+    ADDR_FMT_BC7                                  = 0x00000037,
+    ADDR_FMT_32_AS_32_32_32_32                    = 0x00000038,
+    ADDR_FMT_APC3                                 = 0x00000039,
+    ADDR_FMT_APC4                                 = 0x0000003a,
+    ADDR_FMT_APC5                                 = 0x0000003b,
+    ADDR_FMT_APC6                                 = 0x0000003c,
+    ADDR_FMT_APC7                                 = 0x0000003d,
+    ADDR_FMT_CTX1                                 = 0x0000003e,
+    ADDR_FMT_RESERVED_63                          = 0x0000003f,
+    ADDR_FMT_ASTC_4x4                             = 0x00000040,
+    ADDR_FMT_ASTC_5x4                             = 0x00000041,
+    ADDR_FMT_ASTC_5x5                             = 0x00000042,
+    ADDR_FMT_ASTC_6x5                             = 0x00000043,
+    ADDR_FMT_ASTC_6x6                             = 0x00000044,
+    ADDR_FMT_ASTC_8x5                             = 0x00000045,
+    ADDR_FMT_ASTC_8x6                             = 0x00000046,
+    ADDR_FMT_ASTC_8x8                             = 0x00000047,
+    ADDR_FMT_ASTC_10x5                            = 0x00000048,
+    ADDR_FMT_ASTC_10x6                            = 0x00000049,
+    ADDR_FMT_ASTC_10x8                            = 0x0000004a,
+    ADDR_FMT_ASTC_10x10                           = 0x0000004b,
+    ADDR_FMT_ASTC_12x10                           = 0x0000004c,
+    ADDR_FMT_ASTC_12x12                           = 0x0000004d,
+    ADDR_FMT_ETC2_64BPP                           = 0x0000004e,
+    ADDR_FMT_ETC2_128BPP                          = 0x0000004f,
+} AddrFormat;
+
+/**
+****************************************************************************************************
+*   AddrDepthFormat
+*
+*   @brief
+*       Neutral enum for addrFlt32ToDepthPixel
+*
+****************************************************************************************************
+*/
+typedef enum _AddrDepthFormat
+{
+    ADDR_DEPTH_INVALID                            = 0x00000000,
+    ADDR_DEPTH_16                                 = 0x00000001,
+    ADDR_DEPTH_X8_24                              = 0x00000002,
+    ADDR_DEPTH_8_24                               = 0x00000003,
+    ADDR_DEPTH_X8_24_FLOAT                        = 0x00000004,
+    ADDR_DEPTH_8_24_FLOAT                         = 0x00000005,
+    ADDR_DEPTH_32_FLOAT                           = 0x00000006,
+    ADDR_DEPTH_X24_8_32_FLOAT                     = 0x00000007,
+
+} AddrDepthFormat;
+
+/**
+****************************************************************************************************
+*   AddrColorFormat
+*
+*   @brief
+*       Neutral enum for ColorFormat
+*
+****************************************************************************************************
+*/
+typedef enum _AddrColorFormat
+{
+    ADDR_COLOR_INVALID                            = 0x00000000,
+    ADDR_COLOR_8                                  = 0x00000001,
+    ADDR_COLOR_4_4                                = 0x00000002,
+    ADDR_COLOR_3_3_2                              = 0x00000003,
+    ADDR_COLOR_RESERVED_4                         = 0x00000004,
+    ADDR_COLOR_16                                 = 0x00000005,
+    ADDR_COLOR_16_FLOAT                           = 0x00000006,
+    ADDR_COLOR_8_8                                = 0x00000007,
+    ADDR_COLOR_5_6_5                              = 0x00000008,
+    ADDR_COLOR_6_5_5                              = 0x00000009,
+    ADDR_COLOR_1_5_5_5                            = 0x0000000a,
+    ADDR_COLOR_4_4_4_4                            = 0x0000000b,
+    ADDR_COLOR_5_5_5_1                            = 0x0000000c,
+    ADDR_COLOR_32                                 = 0x0000000d,
+    ADDR_COLOR_32_FLOAT                           = 0x0000000e,
+    ADDR_COLOR_16_16                              = 0x0000000f,
+    ADDR_COLOR_16_16_FLOAT                        = 0x00000010,
+    ADDR_COLOR_8_24                               = 0x00000011,
+    ADDR_COLOR_8_24_FLOAT                         = 0x00000012,
+    ADDR_COLOR_24_8                               = 0x00000013,
+    ADDR_COLOR_24_8_FLOAT                         = 0x00000014,
+    ADDR_COLOR_10_11_11                           = 0x00000015,
+    ADDR_COLOR_10_11_11_FLOAT                     = 0x00000016,
+    ADDR_COLOR_11_11_10                           = 0x00000017,
+    ADDR_COLOR_11_11_10_FLOAT                     = 0x00000018,
+    ADDR_COLOR_2_10_10_10                         = 0x00000019,
+    ADDR_COLOR_8_8_8_8                            = 0x0000001a,
+    ADDR_COLOR_10_10_10_2                         = 0x0000001b,
+    ADDR_COLOR_X24_8_32_FLOAT                     = 0x0000001c,
+    ADDR_COLOR_32_32                              = 0x0000001d,
+    ADDR_COLOR_32_32_FLOAT                        = 0x0000001e,
+    ADDR_COLOR_16_16_16_16                        = 0x0000001f,
+    ADDR_COLOR_16_16_16_16_FLOAT                  = 0x00000020,
+    ADDR_COLOR_RESERVED_33                        = 0x00000021,
+    ADDR_COLOR_32_32_32_32                        = 0x00000022,
+    ADDR_COLOR_32_32_32_32_FLOAT                  = 0x00000023,
+} AddrColorFormat;
+
+/**
+****************************************************************************************************
+*   AddrSurfaceNumber
+*
+*   @brief
+*       Neutral enum for SurfaceNumber
+*
+****************************************************************************************************
+*/
+typedef enum _AddrSurfaceNumber {
+    ADDR_NUMBER_UNORM                             = 0x00000000,
+    ADDR_NUMBER_SNORM                             = 0x00000001,
+    ADDR_NUMBER_USCALED                           = 0x00000002,
+    ADDR_NUMBER_SSCALED                           = 0x00000003,
+    ADDR_NUMBER_UINT                              = 0x00000004,
+    ADDR_NUMBER_SINT                              = 0x00000005,
+    ADDR_NUMBER_SRGB                              = 0x00000006,
+    ADDR_NUMBER_FLOAT                             = 0x00000007,
+} AddrSurfaceNumber;
+
+/**
+****************************************************************************************************
+*   AddrSurfaceSwap
+*
+*   @brief
+*       Neutral enum for SurfaceSwap
+*
+****************************************************************************************************
+*/
+typedef enum _AddrSurfaceSwap {
+    ADDR_SWAP_STD                                 = 0x00000000,
+    ADDR_SWAP_ALT                                 = 0x00000001,
+    ADDR_SWAP_STD_REV                             = 0x00000002,
+    ADDR_SWAP_ALT_REV                             = 0x00000003,
+} AddrSurfaceSwap;
+
+/**
+****************************************************************************************************
+*   AddrHtileBlockSize
+*
+*   @brief
+*       Size of HTILE blocks, valid values are 4 or 8 for now
+****************************************************************************************************
+*/
+typedef enum _AddrHtileBlockSize
+{
+    ADDR_HTILE_BLOCKSIZE_4 = 4,
+    ADDR_HTILE_BLOCKSIZE_8 = 8,
+} AddrHtileBlockSize;
+
+/**
+****************************************************************************************************
+*   AddrPipeCfg
+*
+*   @brief
+*       The pipe configuration field specifies both the number of pipes and
+*       how pipes are interleaved on the surface.
+*       The expression of number of pipes, the shader engine tile size, and packer tile size
+*       is encoded in a PIPE_CONFIG register field.
+*       In general the number of pipes usually matches the number of memory channels of the
+*       hardware configuration.
+*       For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
+*       the number of ROP units(? TODO: which registers??)
+*       The enum value = hw enum + 1 which is to reserve 0 for requesting default.
+****************************************************************************************************
+*/
+typedef enum _AddrPipeCfg
+{
+    ADDR_PIPECFG_INVALID         = 0,
+    ADDR_PIPECFG_P2              = 1, /// 2 pipes,
+    ADDR_PIPECFG_P4_8x16         = 5, /// 4 pipes,
+    ADDR_PIPECFG_P4_16x16        = 6,
+    ADDR_PIPECFG_P4_16x32        = 7,
+    ADDR_PIPECFG_P4_32x32        = 8,
+    ADDR_PIPECFG_P8_16x16_8x16   = 9, /// 8 pipes
+    ADDR_PIPECFG_P8_16x32_8x16   = 10,
+    ADDR_PIPECFG_P8_32x32_8x16   = 11,
+    ADDR_PIPECFG_P8_16x32_16x16  = 12,
+    ADDR_PIPECFG_P8_32x32_16x16  = 13,
+    ADDR_PIPECFG_P8_32x32_16x32  = 14,
+    ADDR_PIPECFG_P8_32x64_32x32  = 15,
+    ADDR_PIPECFG_P16_32x32_8x16  = 17, /// 16 pipes
+    ADDR_PIPECFG_P16_32x32_16x16 = 18,
+    ADDR_PIPECFG_RESERVED        = 19, /// reserved for internal use
+    ADDR_PIPECFG_MAX             = 20,
+} AddrPipeCfg;
+
+/**
+****************************************************************************************************
+* AddrTileType
+*
+*   @brief
+*       Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
+****************************************************************************************************
+*/
+typedef enum _AddrTileType
+{
+    ADDR_DISPLAYABLE        = 0,    ///< Displayable tiling
+    ADDR_NON_DISPLAYABLE    = 1,    ///< Non-displayable tiling, a.k.a thin micro tiling
+    ADDR_DEPTH_SAMPLE_ORDER = 2,    ///< Same as non-displayable plus depth-sample-order
+    ADDR_ROTATED            = 3,    ///< Rotated displayable tiling
+    ADDR_THICK              = 4,    ///< Thick micro-tiling, only valid for THICK and XTHICK
+} AddrTileType;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  Type definitions: short system-independent names for address library types
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if !defined(__APPLE__) || defined(HAVE_TSERVER)
+
+#ifndef BOOL_32        // no bool type in C
+/// @brief Boolean type, since none is defined in C
+/// @ingroup type
+#define BOOL_32 int
+#endif
+
+#ifndef INT_32
+#define INT_32  int
+#endif
+
+#ifndef UINT_32
+#define UINT_32 unsigned int
+#endif
+
+#ifndef INT_16
+#define INT_16  short
+#endif
+
+#ifndef UINT_16
+#define UINT_16 unsigned short
+#endif
+
+#ifndef INT_8
+#define INT_8   char
+#endif
+
+#ifndef UINT_8
+#define UINT_8  unsigned char
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+//
+//  64-bit integer types depend on the compiler
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define INT_64   long long
+#define UINT_64  unsigned long long
+
+#elif defined( _WIN32 )
+#define INT_64   __int64
+#define UINT_64  unsigned __int64
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief 64-bit signed integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit signed integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+#define INT_64  long long OR __int64
+
+/// @brief 64-bit unsigned integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit unsigned integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+///
+#define UINT_64  unsigned long long OR unsigned __int64
+#endif
+
+#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER)
+
+//  ADDR64X is used to print addresses in hex form on both Windows and Linux
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define ADDR64X "llx"
+#define ADDR64D "lld"
+
+#elif defined( _WIN32 )
+#define ADDR64X "I64x"
+#define ADDR64D "I64d"
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief Addrlib device address 64-bit printf tag  (compiler dependent)
+/// @ingroup type
+///
+/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
+/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
+/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
+///
+#define ADDR64X "llx" OR "I64x"
+#define ADDR64D "lld" OR "I64d"
+#endif
+
+/// @brief Union for storing a 32-bit float or 32-bit integer
+/// @ingroup type
+///
+/// This union provides a simple way to convert between a 32-bit float
+/// and a 32-bit integer. It also prevents the compiler from producing
+/// code that alters NaN values when assiging or coying floats.
+/// Therefore, all address library routines that pass or return 32-bit
+/// floating point data do so by passing or returning a FLT_32.
+///
+typedef union {
+    INT_32   i;
+    UINT_32  u;
+    float    f;
+} ADDR_FLT_32;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  Macros for controlling linking and building on multiple systems
+//
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(_MSC_VER)
+#if defined(va_copy)
+#undef va_copy  //redefine va_copy to support VC2013
+#endif
+#endif
+
+#if !defined(va_copy)
+#define va_copy(dst, src) \
+    ((void) memcpy(&(dst), &(src), sizeof(va_list)))
+#endif
+
+#endif // __ADDR_TYPES_H__
+
diff -Nru mesa-18.3.3/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h mesa-19.0.1/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h
--- mesa-18.3.3/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,89 +0,0 @@
-#if !defined (__GFX9_GB_REG_H__)
-#define __GFX9_GB_REG_H__
-
-/*
- * Copyright © 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-#include "util/u_endian.h"
-
-#if defined(PIPE_ARCH_LITTLE_ENDIAN)
-#define LITTLEENDIAN_CPU
-#elif defined(PIPE_ARCH_BIG_ENDIAN)
-#define BIGENDIAN_CPU
-#endif
-
-//
-// Make sure the necessary endian defines are there.
-//
-#if defined(LITTLEENDIAN_CPU)
-#elif defined(BIGENDIAN_CPU)
-#else
-#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
-#endif
-
-union GB_ADDR_CONFIG {
-    struct {
-#if        defined(LITTLEENDIAN_CPU)
-        unsigned int                       NUM_PIPES : 3;
-        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
-        unsigned int            MAX_COMPRESSED_FRAGS : 2;
-        unsigned int            BANK_INTERLEAVE_SIZE : 3;
-        unsigned int                                 : 1;
-        unsigned int                       NUM_BANKS : 3;
-        unsigned int                                 : 1;
-        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
-        unsigned int              NUM_SHADER_ENGINES : 2;
-        unsigned int                        NUM_GPUS : 3;
-        unsigned int             MULTI_GPU_TILE_SIZE : 2;
-        unsigned int                   NUM_RB_PER_SE : 2;
-        unsigned int                        ROW_SIZE : 2;
-        unsigned int                 NUM_LOWER_PIPES : 1;
-        unsigned int                       SE_ENABLE : 1;
-#elif        defined(BIGENDIAN_CPU)
-        unsigned int                       SE_ENABLE : 1;
-        unsigned int                 NUM_LOWER_PIPES : 1;
-        unsigned int                        ROW_SIZE : 2;
-        unsigned int                   NUM_RB_PER_SE : 2;
-        unsigned int             MULTI_GPU_TILE_SIZE : 2;
-        unsigned int                        NUM_GPUS : 3;
-        unsigned int              NUM_SHADER_ENGINES : 2;
-        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
-        unsigned int                                 : 1;
-        unsigned int                       NUM_BANKS : 3;
-        unsigned int                                 : 1;
-        unsigned int            BANK_INTERLEAVE_SIZE : 3;
-        unsigned int            MAX_COMPRESSED_FRAGS : 2;
-        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
-        unsigned int                       NUM_PIPES : 3;
-#endif
-    } bitfields, bits;
-    unsigned int    u32All;
-    signed int    i32All;
-    float    f32All;
-};
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/inc/chip/r800/si_gb_reg.h mesa-19.0.1/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
--- mesa-18.3.3/src/amd/addrlib/inc/chip/r800/si_gb_reg.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/inc/chip/r800/si_gb_reg.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,163 +0,0 @@
-#if !defined (__SI_GB_REG_H__)
-#define __SI_GB_REG_H__
-
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-#include "util/u_endian.h"
-
-#if defined(PIPE_ARCH_LITTLE_ENDIAN)
-#define LITTLEENDIAN_CPU
-#elif defined(PIPE_ARCH_BIG_ENDIAN)
-#define BIGENDIAN_CPU
-#endif
-
-//
-// Make sure the necessary endian defines are there.
-//
-#if defined(LITTLEENDIAN_CPU)
-#elif defined(BIGENDIAN_CPU)
-#else
-#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
-#endif
-
-/*
- * GB_ADDR_CONFIG struct
- */
-
-#if     defined(LITTLEENDIAN_CPU)
-
-     typedef struct _GB_ADDR_CONFIG_T {
-          unsigned int num_pipes                      : 3;
-          unsigned int                                : 1;
-          unsigned int pipe_interleave_size           : 3;
-          unsigned int                                : 1;
-          unsigned int bank_interleave_size           : 3;
-          unsigned int                                : 1;
-          unsigned int num_shader_engines             : 2;
-          unsigned int                                : 2;
-          unsigned int shader_engine_tile_size        : 3;
-          unsigned int                                : 1;
-          unsigned int num_gpus                       : 3;
-          unsigned int                                : 1;
-          unsigned int multi_gpu_tile_size            : 2;
-          unsigned int                                : 2;
-          unsigned int row_size                       : 2;
-          unsigned int num_lower_pipes                : 1;
-          unsigned int                                : 1;
-     } GB_ADDR_CONFIG_T;
-
-#elif       defined(BIGENDIAN_CPU)
-
-     typedef struct _GB_ADDR_CONFIG_T {
-          unsigned int                                : 1;
-          unsigned int num_lower_pipes                : 1;
-          unsigned int row_size                       : 2;
-          unsigned int                                : 2;
-          unsigned int multi_gpu_tile_size            : 2;
-          unsigned int                                : 1;
-          unsigned int num_gpus                       : 3;
-          unsigned int                                : 1;
-          unsigned int shader_engine_tile_size        : 3;
-          unsigned int                                : 2;
-          unsigned int num_shader_engines             : 2;
-          unsigned int                                : 1;
-          unsigned int bank_interleave_size           : 3;
-          unsigned int                                : 1;
-          unsigned int pipe_interleave_size           : 3;
-          unsigned int                                : 1;
-          unsigned int num_pipes                      : 3;
-     } GB_ADDR_CONFIG_T;
-
-#endif
-
-typedef union {
-     unsigned int val : 32;
-     GB_ADDR_CONFIG_T f;
-} GB_ADDR_CONFIG;
-
-#if       defined(LITTLEENDIAN_CPU)
-
-     typedef struct _GB_TILE_MODE_T {
-          unsigned int micro_tile_mode                : 2;
-          unsigned int array_mode                     : 4;
-          unsigned int pipe_config                    : 5;
-          unsigned int tile_split                     : 3;
-          unsigned int bank_width                     : 2;
-          unsigned int bank_height                    : 2;
-          unsigned int macro_tile_aspect              : 2;
-          unsigned int num_banks                      : 2;
-          unsigned int micro_tile_mode_new            : 3;
-          unsigned int sample_split                   : 2;
-          unsigned int                                : 5;
-     } GB_TILE_MODE_T;
-
-     typedef struct _GB_MACROTILE_MODE_T {
-          unsigned int bank_width                     : 2;
-          unsigned int bank_height                    : 2;
-          unsigned int macro_tile_aspect              : 2;
-          unsigned int num_banks                      : 2;
-          unsigned int                                : 24;
-     } GB_MACROTILE_MODE_T;
-
-#elif          defined(BIGENDIAN_CPU)
-
-     typedef struct _GB_TILE_MODE_T {
-          unsigned int                                : 5;
-          unsigned int sample_split                   : 2;
-          unsigned int micro_tile_mode_new            : 3;
-          unsigned int num_banks                      : 2;
-          unsigned int macro_tile_aspect              : 2;
-          unsigned int bank_height                    : 2;
-          unsigned int bank_width                     : 2;
-          unsigned int tile_split                     : 3;
-          unsigned int pipe_config                    : 5;
-          unsigned int array_mode                     : 4;
-          unsigned int micro_tile_mode                : 2;
-     } GB_TILE_MODE_T;
-
-     typedef struct _GB_MACROTILE_MODE_T {
-          unsigned int                                : 24;
-          unsigned int num_banks                      : 2;
-          unsigned int macro_tile_aspect              : 2;
-          unsigned int bank_height                    : 2;
-          unsigned int bank_width                     : 2;
-     } GB_MACROTILE_MODE_T;
-
-#endif
-
-typedef union {
-     unsigned int val : 32;
-     GB_TILE_MODE_T f;
-} GB_TILE_MODE;
-
-typedef union {
-     unsigned int val : 32;
-     GB_MACROTILE_MODE_T f;
-} GB_MACROTILE_MODE;
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/meson.build mesa-19.0.1/src/amd/addrlib/meson.build
--- mesa-18.3.3/src/amd/addrlib/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -19,35 +19,33 @@
 # SOFTWARE.
 
 files_addrlib = files(
-  'addrinterface.cpp',
-  'addrinterface.h',
-  'addrtypes.h',
-  'core/addrcommon.h',
-  'core/addrelemlib.cpp',
-  'core/addrelemlib.h',
-  'core/addrlib.cpp',
-  'core/addrlib.h',
-  'core/addrlib1.cpp',
-  'core/addrlib1.h',
-  'core/addrlib2.cpp',
-  'core/addrlib2.h',
-  'core/addrobject.cpp',
-  'core/addrobject.h',
-  'gfx9/chip/gfx9_enum.h',
-  'gfx9/coord.cpp',
-  'gfx9/coord.h',
-  'gfx9/gfx9addrlib.cpp',
-  'gfx9/gfx9addrlib.h',
-  'amdgpu_asic_addr.h',
-  'inc/chip/gfx9/gfx9_gb_reg.h',
-  'inc/chip/r800/si_gb_reg.h',
-  'r800/chip/si_ci_vi_merged_enum.h',
-  'r800/ciaddrlib.cpp',
-  'r800/ciaddrlib.h',
-  'r800/egbaddrlib.cpp',
-  'r800/egbaddrlib.h',
-  'r800/siaddrlib.cpp',
-  'r800/siaddrlib.h',
+  'inc/addrinterface.h',
+  'inc/addrtypes.h',
+  'src/addrinterface.cpp',
+  'src/core/addrcommon.h',
+  'src/core/addrelemlib.cpp',
+  'src/core/addrelemlib.h',
+  'src/core/addrlib.cpp',
+  'src/core/addrlib.h',
+  'src/core/addrlib1.cpp',
+  'src/core/addrlib1.h',
+  'src/core/addrlib2.cpp',
+  'src/core/addrlib2.h',
+  'src/core/addrobject.cpp',
+  'src/core/addrobject.h',
+  'src/core/coord.cpp',
+  'src/core/coord.h',
+  'src/gfx9/gfx9addrlib.cpp',
+  'src/gfx9/gfx9addrlib.h',
+  'src/amdgpu_asic_addr.h',
+  'src/chip/gfx9/gfx9_gb_reg.h',
+  'src/chip/r800/si_gb_reg.h',
+  'src/r800/ciaddrlib.cpp',
+  'src/r800/ciaddrlib.h',
+  'src/r800/egbaddrlib.cpp',
+  'src/r800/egbaddrlib.h',
+  'src/r800/siaddrlib.cpp',
+  'src/r800/siaddrlib.h',
 )
 
 libamdgpu_addrlib = static_library(
@@ -55,7 +53,7 @@
   files_addrlib,
   include_directories : [
     include_directories(
-      'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip',
+      'inc', 'src', 'src/core', 'src/chip/gfx9', 'src/chip/r800',
     ),
     inc_amd_common, inc_common, inc_src,
   ],
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h mesa-19.0.1/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h
--- mesa-18.3.3/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,40 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-#if !defined (SI_CI_VI_MERGED_ENUM_HEADER)
-#define SI_CI_VI_MERGED_ENUM_HEADER
-
-typedef enum PipeInterleaveSize {
-ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
-ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
-} PipeInterleaveSize;
-
-typedef enum RowSize {
-ADDR_CONFIG_1KB_ROW                      = 0x00000000,
-ADDR_CONFIG_2KB_ROW                      = 0x00000001,
-ADDR_CONFIG_4KB_ROW                      = 0x00000002,
-} RowSize;
-
-#endif
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.cpp	2018-04-19 04:33:31.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,2341 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  ciaddrlib.cpp
-* @brief Contains the implementation for the CiLib class.
-****************************************************************************************************
-*/
-
-#include "ciaddrlib.h"
-
-#include "si_gb_reg.h"
-
-#include "amdgpu_asic_addr.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-*   CiHwlInit
-*
-*   @brief
-*       Creates an CiLib object.
-*
-*   @return
-*       Returns an CiLib object pointer.
-****************************************************************************************************
-*/
-Lib* CiHwlInit(const Client* pClient)
-{
-    return V1::CiLib::CreateObj(pClient);
-}
-
-namespace V1
-{
-
-/**
-****************************************************************************************************
-*   Mask
-*
-*   @brief
-*       Gets a mask of "width"
-*   @return
-*       Bit mask
-****************************************************************************************************
-*/
-static UINT_64 Mask(
-    UINT_32 width)  ///< Width of bits
-{
-    UINT_64 ret;
-
-    if (width >= sizeof(UINT_64)*8)
-    {
-        ret = ~((UINT_64) 0);
-    }
-    else
-    {
-        return (((UINT_64) 1) << width) - 1;
-    }
-    return ret;
-}
-
-/**
-****************************************************************************************************
-*   GetBits
-*
-*   @brief
-*       Gets bits within a range of [msb, lsb]
-*   @return
-*       Bits of this range
-****************************************************************************************************
-*/
-static UINT_64 GetBits(
-    UINT_64 bits,   ///< Source bits
-    UINT_32 msb,    ///< Most signicant bit
-    UINT_32 lsb)    ///< Least signicant bit
-{
-    UINT_64 ret = 0;
-
-    if (msb >= lsb)
-    {
-        ret = (bits >> lsb) & (Mask(1 + msb - lsb));
-    }
-    return ret;
-}
-
-/**
-****************************************************************************************************
-*   RemoveBits
-*
-*   @brief
-*       Removes bits within the range of [msb, lsb]
-*   @return
-*       Modified bits
-****************************************************************************************************
-*/
-static UINT_64 RemoveBits(
-    UINT_64 bits,   ///< Source bits
-    UINT_32 msb,    ///< Most signicant bit
-    UINT_32 lsb)    ///< Least signicant bit
-{
-    UINT_64 ret = bits;
-
-    if (msb >= lsb)
-    {
-        ret = GetBits(bits, lsb - 1, 0) // low bits
-            | (GetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits
-    }
-    return ret;
-}
-
-/**
-****************************************************************************************************
-*   InsertBits
-*
-*   @brief
-*       Inserts new bits into the range of [msb, lsb]
-*   @return
-*       Modified bits
-****************************************************************************************************
-*/
-static UINT_64 InsertBits(
-    UINT_64 bits,       ///< Source bits
-    UINT_64 newBits,    ///< New bits to be inserted
-    UINT_32 msb,        ///< Most signicant bit
-    UINT_32 lsb)        ///< Least signicant bit
-{
-    UINT_64 ret = bits;
-
-    if (msb >= lsb)
-    {
-        ret = GetBits(bits, lsb - 1, 0) // old low bitss
-             | (GetBits(newBits, msb - lsb, 0) << lsb) //new bits
-             | (GetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits
-    }
-    return ret;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::CiLib
-*
-*   @brief
-*       Constructor
-*
-****************************************************************************************************
-*/
-CiLib::CiLib(const Client* pClient)
-    :
-    SiLib(pClient),
-    m_noOfMacroEntries(0),
-    m_allowNonDispThickModes(FALSE)
-{
-    m_class = CI_ADDRLIB;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::~CiLib
-*
-*   @brief
-*       Destructor
-****************************************************************************************************
-*/
-CiLib::~CiLib()
-{
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeDccInfo
-*
-*   @brief
-*       Compute DCC key size, base alignment
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlComputeDccInfo(
-    const ADDR_COMPUTE_DCCINFO_INPUT*  pIn,
-    ADDR_COMPUTE_DCCINFO_OUTPUT*       pOut) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode))
-    {
-        UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8;
-
-        ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff));
-
-        if (pIn->numSamples > 1)
-        {
-            UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight);
-            UINT_32 samplesPerSplit  = pIn->tileInfo.tileSplitBytes / tileSizePerSample;
-
-            if (samplesPerSplit < pIn->numSamples)
-            {
-                UINT_32 numSplits = pIn->numSamples / samplesPerSplit;
-                UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
-
-                ADDR_ASSERT(IsPow2(fastClearBaseAlign));
-
-                dccFastClearSize /= numSplits;
-
-                if (0 != (dccFastClearSize & (fastClearBaseAlign - 1)))
-                {
-                    // Disable dcc fast clear
-                    // if key size of fisrt sample split is not pipe*interleave aligned
-                    dccFastClearSize = 0;
-                }
-            }
-        }
-
-        pOut->dccRamSize          = pIn->colorSurfSize >> 8;
-        pOut->dccRamBaseAlign     = pIn->tileInfo.banks *
-                                    HwlGetPipes(&pIn->tileInfo) *
-                                    m_pipeInterleaveBytes;
-        pOut->dccFastClearSize    = dccFastClearSize;
-        pOut->dccRamSizeAligned   = TRUE;
-
-        ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign));
-
-        if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1)))
-        {
-            pOut->subLvlCompressible = TRUE;
-        }
-        else
-        {
-            UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
-
-            if (pOut->dccRamSize == pOut->dccFastClearSize)
-            {
-                pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
-            }
-            if ((pOut->dccRamSize & (dccRamSizeAlign - 1)) != 0)
-            {
-                pOut->dccRamSizeAligned = FALSE;
-            }
-            pOut->dccRamSize          = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
-            pOut->subLvlCompressible  = FALSE;
-        }
-    }
-    else
-    {
-        returnCode = ADDR_NOTSUPPORTED;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeCmaskAddrFromCoord
-*
-*   @brief
-*       Compute tc compatible Cmask address from fmask ram address
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlComputeCmaskAddrFromCoord(
-    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] fmask addr/bpp/tile input
-    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] cmask address
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
-
-    if ((m_settings.isVolcanicIslands == TRUE) &&
-        (pIn->flags.tcCompatible == TRUE))
-    {
-        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
-        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
-        UINT_64 fmaskAddress = pIn->fmaskAddr;
-        UINT_32 elemBits     = pIn->bpp;
-        UINT_32 blockByte    = 64 * elemBits / 8;
-        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress,
-                                                                    0,
-                                                                    0,
-                                                                    4,   // cmask 4 bits
-                                                                    elemBits,
-                                                                    blockByte,
-                                                                    m_pipeInterleaveBytes,
-                                                                    numOfPipes,
-                                                                    numOfBanks,
-                                                                    1);
-        pOut->addr = (metaNibbleAddress >> 1);
-        pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0;
-        returnCode = ADDR_OK;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeHtileAddrFromCoord
-*
-*   @brief
-*       Compute tc compatible Htile address from depth/stencil address
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlComputeHtileAddrFromCoord(
-    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] depth/stencil addr/bpp/tile input
-    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] htile address
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
-
-    if ((m_settings.isVolcanicIslands == TRUE) &&
-        (pIn->flags.tcCompatible == TRUE))
-    {
-        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
-        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
-        UINT_64 zStencilAddr = pIn->zStencilAddr;
-        UINT_32 elemBits     = pIn->bpp;
-        UINT_32 blockByte    = 64 * elemBits / 8;
-        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(zStencilAddr,
-                                                                    0,
-                                                                    0,
-                                                                    32,  // htile 32 bits
-                                                                    elemBits,
-                                                                    blockByte,
-                                                                    m_pipeInterleaveBytes,
-                                                                    numOfPipes,
-                                                                    numOfBanks,
-                                                                    1);
-        pOut->addr = (metaNibbleAddress >> 1);
-        pOut->bitPosition = 0;
-        returnCode = ADDR_OK;
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlConvertChipFamily
-*
-*   @brief
-*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
-*   @return
-*       ChipFamily
-****************************************************************************************************
-*/
-ChipFamily CiLib::HwlConvertChipFamily(
-    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
-    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
-{
-    ChipFamily family = ADDR_CHIP_FAMILY_CI;
-
-    switch (uChipFamily)
-    {
-        case FAMILY_CI:
-            m_settings.isSeaIsland  = 1;
-            m_settings.isBonaire    = ASICREV_IS_BONAIRE_M(uChipRevision);
-            m_settings.isHawaii     = ASICREV_IS_HAWAII_P(uChipRevision);
-            break;
-        case FAMILY_KV:
-            m_settings.isKaveri     = 1;
-            m_settings.isSpectre    = ASICREV_IS_SPECTRE(uChipRevision);
-            m_settings.isSpooky     = ASICREV_IS_SPOOKY(uChipRevision);
-            m_settings.isKalindi    = ASICREV_IS_KALINDI(uChipRevision);
-            break;
-        case FAMILY_VI:
-            m_settings.isVolcanicIslands = 1;
-            m_settings.isIceland         = ASICREV_IS_ICELAND_M(uChipRevision);
-            m_settings.isTonga           = ASICREV_IS_TONGA_P(uChipRevision);
-            m_settings.isFiji            = ASICREV_IS_FIJI_P(uChipRevision);
-            m_settings.isPolaris10       = ASICREV_IS_POLARIS10_P(uChipRevision);
-            m_settings.isPolaris11       = ASICREV_IS_POLARIS11_M(uChipRevision);
-            m_settings.isPolaris12       = ASICREV_IS_POLARIS12_V(uChipRevision);
-            m_settings.isVegaM           = ASICREV_IS_VEGAM_P(uChipRevision);
-            family = ADDR_CHIP_FAMILY_VI;
-            break;
-        case FAMILY_CZ:
-            m_settings.isCarrizo         = 1;
-            m_settings.isVolcanicIslands = 1;
-            family = ADDR_CHIP_FAMILY_VI;
-            break;
-        default:
-            ADDR_ASSERT(!"This should be a unexpected Fusion");
-            break;
-    }
-
-    return family;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlInitGlobalParams
-*
-*   @brief
-*       Initializes global parameters
-*
-*   @return
-*       TRUE if all settings are valid
-*
-****************************************************************************************************
-*/
-BOOL_32 CiLib::HwlInitGlobalParams(
-    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
-{
-    BOOL_32  valid = TRUE;
-
-    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
-
-    valid = DecodeGbRegs(pRegValue);
-
-    // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should
-    // read the correct pipes from tile mode table
-    if (m_settings.isHawaii)
-    {
-        m_pipes = 16;
-    }
-    else if (m_settings.isBonaire || m_settings.isSpectre)
-    {
-        m_pipes = 4;
-    }
-    else // Treat other KV asics to be 2-pipe
-    {
-        m_pipes = 2;
-    }
-
-    // @todo: VI
-    // Move this to VI code path once created
-    if (m_settings.isTonga || m_settings.isPolaris10)
-    {
-        m_pipes = 8;
-    }
-    else if (m_settings.isIceland)
-    {
-        m_pipes = 2;
-    }
-    else if (m_settings.isFiji)
-    {
-        m_pipes = 16;
-    }
-    else if (m_settings.isPolaris11 || m_settings.isPolaris12)
-    {
-        m_pipes = 4;
-    }
-    else if (m_settings.isVegaM)
-    {
-        m_pipes = 16;
-    }
-
-    if (valid)
-    {
-        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
-    }
-    if (valid)
-    {
-        valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries);
-    }
-
-    if (valid)
-    {
-        InitEquationTable();
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlPostCheckTileIndex
-*
-*   @brief
-*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
-*       tile mode/type/info and change the index if needed
-*   @return
-*       Tile index.
-****************************************************************************************************
-*/
-INT_32 CiLib::HwlPostCheckTileIndex(
-    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
-    AddrTileMode         mode,      ///< [in] Tile mode
-    AddrTileType         type,      ///< [in] Tile type
-    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
-    ) const
-{
-    INT_32 index = curIndex;
-
-    if (mode == ADDR_TM_LINEAR_GENERAL)
-    {
-        index = TileIndexLinearGeneral;
-    }
-    else
-    {
-        BOOL_32 macroTiled = IsMacroTiled(mode);
-
-        // We need to find a new index if either of them is true
-        // 1. curIndex is invalid
-        // 2. tile mode is changed
-        // 3. tile info does not match for macro tiled
-        if ((index == TileIndexInvalid)         ||
-            (mode != m_tileTable[index].mode)   ||
-            (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig))
-        {
-            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
-            {
-                if (macroTiled)
-                {
-                    // macro tile modes need all to match
-                    if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) &&
-                        (mode == m_tileTable[index].mode) &&
-                        (type == m_tileTable[index].type))
-                    {
-                        // tileSplitBytes stored in m_tileTable is only valid for depth entries
-                        if (type == ADDR_DEPTH_SAMPLE_ORDER)
-                        {
-                            if (Min(m_tileTable[index].info.tileSplitBytes,
-                                    m_rowSize) == pInfo->tileSplitBytes)
-                            {
-                                break;
-                            }
-                        }
-                        else // other entries are determined by other 3 fields
-                        {
-                            break;
-                        }
-                    }
-                }
-                else if (mode == ADDR_TM_LINEAR_ALIGNED)
-                {
-                    // linear mode only needs tile mode to match
-                    if (mode == m_tileTable[index].mode)
-                    {
-                        break;
-                    }
-                }
-                else
-                {
-                    // micro tile modes only need tile mode and tile type to match
-                    if (mode == m_tileTable[index].mode &&
-                        type == m_tileTable[index].type)
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
-
-    if (index >= static_cast<INT_32>(m_noOfEntries))
-    {
-        index = TileIndexInvalid;
-    }
-
-    return index;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlSetupTileCfg
-*
-*   @brief
-*       Map tile index to tile setting.
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg(
-    UINT_32         bpp,            ///< Bits per pixel
-    INT_32          index,          ///< Tile index
-    INT_32          macroModeIndex, ///< Index in macro tile mode table(CI)
-    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
-    AddrTileMode*   pMode,          ///< [out] Tile mode
-    AddrTileType*   pType           ///< [out] Tile type
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    // Global flag to control usage of tileIndex
-    if (UseTileIndex(index))
-    {
-        if (index == TileIndexLinearGeneral)
-        {
-            pInfo->banks = 2;
-            pInfo->bankWidth = 1;
-            pInfo->bankHeight = 1;
-            pInfo->macroAspectRatio = 1;
-            pInfo->tileSplitBytes = 64;
-            pInfo->pipeConfig = ADDR_PIPECFG_P2;
-        }
-        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
-        {
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-        else
-        {
-            const TileConfig* pCfgTable = GetTileSetting(index);
-
-            if (pInfo != NULL)
-            {
-                if (IsMacroTiled(pCfgTable->mode))
-                {
-                    ADDR_ASSERT((macroModeIndex != TileIndexInvalid) &&
-                                (macroModeIndex != TileIndexNoMacroIndex));
-
-                    UINT_32 tileSplit;
-
-                    *pInfo = m_macroTileTable[macroModeIndex];
-
-                    if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER)
-                    {
-                        tileSplit = pCfgTable->info.tileSplitBytes;
-                    }
-                    else
-                    {
-                        if (bpp > 0)
-                        {
-                            UINT_32 thickness = Thickness(pCfgTable->mode);
-                            UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
-                            // Non-depth entries store a split factor
-                            UINT_32 sampleSplit = m_tileTable[index].info.tileSplitBytes;
-                            tileSplit = Max(256u, sampleSplit * tileBytes1x);
-                        }
-                        else
-                        {
-                            // Return tileBytes instead if not enough info
-                            tileSplit = pInfo->tileSplitBytes;
-                        }
-                    }
-
-                    // Clamp to row_size
-                    pInfo->tileSplitBytes = Min(m_rowSize, tileSplit);
-
-                    pInfo->pipeConfig = pCfgTable->info.pipeConfig;
-                }
-                else // 1D and linear modes, we return default value stored in table
-                {
-                    *pInfo = pCfgTable->info;
-                }
-            }
-
-            if (pMode != NULL)
-            {
-                *pMode = pCfgTable->mode;
-            }
-
-            if (pType != NULL)
-            {
-                *pType = pCfgTable->type;
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeSurfaceInfo
-*
-*   @brief
-*       Entry of CI's ComputeSurfaceInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    // If tileIndex is invalid, force macroModeIndex to be invalid, too
-    if (pIn->tileIndex == TileIndexInvalid)
-    {
-        pOut->macroModeIndex = TileIndexInvalid;
-    }
-
-    ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut);
-
-    if ((pIn->mipLevel > 0) &&
-        (pOut->tcCompatible == TRUE) &&
-        (pOut->tileMode != pIn->tileMode) &&
-        (m_settings.isVolcanicIslands == TRUE))
-    {
-        pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut);
-    }
-
-    if (pOut->macroModeIndex == TileIndexNoMacroIndex)
-    {
-        pOut->macroModeIndex = TileIndexInvalid;
-    }
-
-    if ((pIn->flags.matchStencilTileCfg == TRUE) &&
-        (pIn->flags.depth == TRUE))
-    {
-        pOut->stencilTileIdx = TileIndexInvalid;
-
-        if ((MinDepth2DThinIndex <= pOut->tileIndex) &&
-            (MaxDepth2DThinIndex >= pOut->tileIndex))
-        {
-            BOOL_32 depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
-
-            if ((depthStencil2DTileConfigMatch == FALSE) &&
-                (pOut->tcCompatible == TRUE))
-            {
-                pOut->macroModeIndex = TileIndexInvalid;
-
-                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
-                localIn.tileIndex = TileIndexInvalid;
-                localIn.pTileInfo = NULL;
-                localIn.flags.tcCompatible = FALSE;
-
-                SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
-
-                ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex));
-
-                depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
-            }
-
-            if ((depthStencil2DTileConfigMatch == FALSE) &&
-                (pIn->numSamples <= 1))
-            {
-                pOut->macroModeIndex = TileIndexInvalid;
-
-                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
-                localIn.tileMode = ADDR_TM_1D_TILED_THIN1;
-                localIn.tileIndex = TileIndexInvalid;
-                localIn.pTileInfo = NULL;
-
-                retCode = SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
-            }
-        }
-
-        if (pOut->tileIndex == Depth1DThinIndex)
-        {
-            pOut->stencilTileIdx = Depth1DThinIndex;
-        }
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlFmaskSurfaceInfo
-*   @brief
-*       Entry of r800's ComputeFmaskInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE CiLib::HwlComputeFmaskInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
-    )
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    ADDR_TILEINFO tileInfo = {0};
-    ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn;
-    fmaskIn = *pIn;
-
-    AddrTileMode tileMode = pIn->tileMode;
-
-    // Use internal tile info if pOut does not have a valid pTileInfo
-    if (pOut->pTileInfo == NULL)
-    {
-        pOut->pTileInfo = &tileInfo;
-    }
-
-    ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1     ||
-                tileMode == ADDR_TM_3D_TILED_THIN1     ||
-                tileMode == ADDR_TM_PRT_TILED_THIN1    ||
-                tileMode == ADDR_TM_PRT_2D_TILED_THIN1 ||
-                tileMode == ADDR_TM_PRT_3D_TILED_THIN1);
-
-    ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1);
-    ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1);
-
-    // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable
-    INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15;
-    ADDR_SURFACE_FLAGS flags = {{0}};
-    flags.fmask = 1;
-
-    INT_32 macroModeIndex = TileIndexInvalid;
-
-    UINT_32 numSamples = pIn->numSamples;
-    UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags;
-
-    UINT_32 bpp = QLog2(numFrags);
-
-    // EQAA needs one more bit
-    if (numSamples > numFrags)
-    {
-        bpp++;
-    }
-
-    if (bpp == 3)
-    {
-        bpp = 4;
-    }
-
-    bpp = Max(8u, bpp * numSamples);
-
-    macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo);
-
-    fmaskIn.tileIndex = tileIndex;
-    fmaskIn.pTileInfo = pOut->pTileInfo;
-    pOut->macroModeIndex = macroModeIndex;
-    pOut->tileIndex = tileIndex;
-
-    retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut);
-
-    if (retCode == ADDR_OK)
-    {
-        pOut->tileIndex =
-            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
-                                  pOut->tileIndex);
-    }
-
-    // Resets pTileInfo to NULL if the internal tile info is used
-    if (pOut->pTileInfo == &tileInfo)
-    {
-        pOut->pTileInfo = NULL;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlFmaskPreThunkSurfInfo
-*
-*   @brief
-*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-VOID CiLib::HwlFmaskPreThunkSurfInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
-    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
-    ) const
-{
-    pSurfIn->tileIndex = pFmaskIn->tileIndex;
-    pSurfOut->macroModeIndex  = pFmaskOut->macroModeIndex;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlFmaskPostThunkSurfInfo
-*
-*   @brief
-*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-VOID CiLib::HwlFmaskPostThunkSurfInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
-    ) const
-{
-    pFmaskOut->tileIndex = pSurfOut->tileIndex;
-    pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlDegradeThickTileMode
-*
-*   @brief
-*       Degrades valid tile mode for thick modes if needed
-*
-*   @return
-*       Suitable tile mode
-****************************************************************************************************
-*/
-AddrTileMode CiLib::HwlDegradeThickTileMode(
-    AddrTileMode        baseTileMode,   ///< [in] base tile mode
-    UINT_32             numSlices,      ///< [in] current number of slices
-    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
-    ) const
-{
-    return baseTileMode;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlOptimizeTileMode
-*
-*   @brief
-*       Optimize tile mode on CI
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlOptimizeTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-
-    // Override 2D/3D macro tile mode to PRT_* tile mode if
-    // client driver requests this surface is equation compatible
-    if (IsMacroTiled(tileMode) == TRUE)
-    {
-        if ((pInOut->flags.needEquation == TRUE) &&
-            (pInOut->numSamples <= 1) &&
-            (IsPrtTileMode(tileMode) == FALSE))
-        {
-            if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K)))
-            {
-                UINT_32 thickness = Thickness(tileMode);
-
-                if (thickness == 1)
-                {
-                    tileMode = ADDR_TM_PRT_TILED_THIN1;
-                }
-                else
-                {
-                    static const UINT_32 PrtTileBytes = 0x10000;
-                    // First prt thick tile index in the tile mode table
-                    static const UINT_32 PrtThickTileIndex = 22;
-                    ADDR_TILEINFO tileInfo = {0};
-
-                    HwlComputeMacroModeIndex(PrtThickTileIndex,
-                                             pInOut->flags,
-                                             pInOut->bpp,
-                                             pInOut->numSamples,
-                                             &tileInfo);
-
-                    UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
-                                             thickness * HwlGetPipes(&tileInfo) *
-                                             tileInfo.banks * tileInfo.bankWidth *
-                                             tileInfo.bankHeight;
-
-                    if (macroTileBytes <= PrtTileBytes)
-                    {
-                        tileMode = ADDR_TM_PRT_TILED_THICK;
-                    }
-                    else
-                    {
-                        tileMode = ADDR_TM_PRT_TILED_THIN1;
-                    }
-                }
-            }
-        }
-
-        if (pInOut->maxBaseAlign != 0)
-        {
-            pInOut->flags.dccPipeWorkaround = FALSE;
-        }
-    }
-
-    if (tileMode != pInOut->tileMode)
-    {
-        pInOut->tileMode = tileMode;
-    }
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlOverrideTileMode
-*
-*   @brief
-*       Override THICK to THIN, for specific formats on CI
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlOverrideTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-    AddrTileType tileType = pInOut->tileType;
-
-    // currently, all CI/VI family do not
-    // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and
-    // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1
-    switch (tileMode)
-    {
-        case ADDR_TM_PRT_2D_TILED_THICK:
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            tileMode = ADDR_TM_PRT_TILED_THICK;
-            break;
-        case ADDR_TM_PRT_2D_TILED_THIN1:
-        case ADDR_TM_PRT_3D_TILED_THIN1:
-            tileMode = ADDR_TM_PRT_TILED_THIN1;
-            break;
-        default:
-            break;
-    }
-
-    // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table
-    if (!m_settings.isBonaire)
-    {
-        UINT_32 thickness = Thickness(tileMode);
-
-        // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1)
-        if (thickness > 1)
-        {
-            switch (pInOut->format)
-            {
-                // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp
-                // tcpError("Thick micro tiling is not supported for format...
-                case ADDR_FMT_X24_8_32_FLOAT:
-                case ADDR_FMT_32_AS_8:
-                case ADDR_FMT_32_AS_8_8:
-                case ADDR_FMT_32_AS_32_32_32_32:
-
-                // packed formats
-                case ADDR_FMT_GB_GR:
-                case ADDR_FMT_BG_RG:
-                case ADDR_FMT_1_REVERSED:
-                case ADDR_FMT_1:
-                case ADDR_FMT_BC1:
-                case ADDR_FMT_BC2:
-                case ADDR_FMT_BC3:
-                case ADDR_FMT_BC4:
-                case ADDR_FMT_BC5:
-                case ADDR_FMT_BC6:
-                case ADDR_FMT_BC7:
-                    switch (tileMode)
-                    {
-                        case ADDR_TM_1D_TILED_THICK:
-                            tileMode = ADDR_TM_1D_TILED_THIN1;
-                            break;
-
-                        case ADDR_TM_2D_TILED_XTHICK:
-                        case ADDR_TM_2D_TILED_THICK:
-                            tileMode = ADDR_TM_2D_TILED_THIN1;
-                            break;
-
-                        case ADDR_TM_3D_TILED_XTHICK:
-                        case ADDR_TM_3D_TILED_THICK:
-                            tileMode = ADDR_TM_3D_TILED_THIN1;
-                            break;
-
-                        case ADDR_TM_PRT_TILED_THICK:
-                            tileMode = ADDR_TM_PRT_TILED_THIN1;
-                            break;
-
-                        case ADDR_TM_PRT_2D_TILED_THICK:
-                            tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
-                            break;
-
-                        case ADDR_TM_PRT_3D_TILED_THICK:
-                            tileMode = ADDR_TM_PRT_3D_TILED_THIN1;
-                            break;
-
-                        default:
-                            break;
-
-                    }
-
-                    // Switch tile type from thick to thin
-                    if (tileMode != pInOut->tileMode)
-                    {
-                        // see tileIndex: 13-18
-                        tileType = ADDR_NON_DISPLAYABLE;
-                    }
-
-                    break;
-                default:
-                    break;
-            }
-        }
-    }
-
-    if (tileMode != pInOut->tileMode)
-    {
-        pInOut->tileMode = tileMode;
-        pInOut->tileType = tileType;
-    }
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlSelectTileMode
-*
-*   @brief
-*       Select tile modes.
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlSelectTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode;
-    AddrTileType tileType;
-
-    if (pInOut->flags.rotateDisplay)
-    {
-        tileMode = ADDR_TM_2D_TILED_THIN1;
-        tileType = ADDR_ROTATED;
-    }
-    else if (pInOut->flags.volume)
-    {
-        BOOL_32 bThin = (m_settings.isBonaire == TRUE) ||
-                        ((m_allowNonDispThickModes == TRUE) && (pInOut->flags.color == TRUE));
-
-        if (pInOut->numSlices >= 8)
-        {
-            tileMode = ADDR_TM_2D_TILED_XTHICK;
-            tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
-        }
-        else if (pInOut->numSlices >= 4)
-        {
-            tileMode = ADDR_TM_2D_TILED_THICK;
-            tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
-        }
-        else
-        {
-            tileMode = ADDR_TM_2D_TILED_THIN1;
-            tileType = ADDR_NON_DISPLAYABLE;
-        }
-    }
-    else
-    {
-        tileMode = ADDR_TM_2D_TILED_THIN1;
-
-        if (pInOut->flags.depth || pInOut->flags.stencil)
-        {
-            tileType = ADDR_DEPTH_SAMPLE_ORDER;
-        }
-        else if ((pInOut->bpp <= 32) ||
-                 (pInOut->flags.display == TRUE) ||
-                 (pInOut->flags.overlay == TRUE))
-        {
-            tileType = ADDR_DISPLAYABLE;
-        }
-        else
-        {
-            tileType = ADDR_NON_DISPLAYABLE;
-        }
-    }
-
-    if (pInOut->flags.prt)
-    {
-        if (Thickness(tileMode) > 1)
-        {
-            tileMode = ADDR_TM_PRT_TILED_THICK;
-            tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
-        }
-        else
-        {
-            tileMode = ADDR_TM_PRT_TILED_THIN1;
-        }
-    }
-
-    pInOut->tileMode = tileMode;
-    pInOut->tileType = tileType;
-
-    if ((pInOut->flags.dccCompatible == FALSE) &&
-        (pInOut->flags.tcCompatible == FALSE))
-    {
-        pInOut->flags.opt4Space = TRUE;
-        pInOut->maxBaseAlign = Block64K;
-    }
-
-    // Optimize tile mode if possible
-    OptimizeTileMode(pInOut);
-
-    HwlOverrideTileMode(pInOut);
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlSetPrtTileMode
-*
-*   @brief
-*       Set PRT tile mode.
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlSetPrtTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-    AddrTileType tileType = pInOut->tileType;
-
-    if (Thickness(tileMode) > 1)
-    {
-        tileMode = ADDR_TM_PRT_TILED_THICK;
-        tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
-    }
-    else
-    {
-        tileMode = ADDR_TM_PRT_TILED_THIN1;
-        tileType = (tileType == ADDR_THICK) ? ADDR_NON_DISPLAYABLE : tileType;
-    }
-
-    pInOut->tileMode = tileMode;
-    pInOut->tileType = tileType;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlSetupTileInfo
-*
-*   @brief
-*       Setup default value of tile info for SI
-****************************************************************************************************
-*/
-VOID CiLib::HwlSetupTileInfo(
-    AddrTileMode                        tileMode,       ///< [in] Tile mode
-    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
-    UINT_32                             bpp,            ///< [in] Bits per pixel
-    UINT_32                             pitch,          ///< [in] Pitch in pixels
-    UINT_32                             height,         ///< [in] Height in pixels
-    UINT_32                             numSamples,     ///< [in] Number of samples
-    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
-    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
-    AddrTileType                        inTileType,     ///< [in] Tile type
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
-    ) const
-{
-    UINT_32 thickness = Thickness(tileMode);
-    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
-    INT index = TileIndexInvalid;
-    INT macroModeIndex = TileIndexInvalid;
-
-    // Fail-safe code
-    if (IsLinear(tileMode) == FALSE)
-    {
-        // Thick tile modes must use thick micro tile mode but Bonaire does not support due to
-        // old derived netlists (UBTS 404321)
-        if (thickness > 1)
-        {
-            if (m_settings.isBonaire)
-            {
-                inTileType = ADDR_NON_DISPLAYABLE;
-            }
-            else if ((m_allowNonDispThickModes == FALSE) ||
-                     (inTileType != ADDR_NON_DISPLAYABLE) ||
-                     // There is no PRT_THICK + THIN entry in tile mode table except Bonaire
-                     (IsPrtTileMode(tileMode) == TRUE))
-            {
-                inTileType = ADDR_THICK;
-            }
-        }
-        // 128 bpp tiling must be non-displayable.
-        // Fmask reuse color buffer's entry but bank-height field can be from another entry
-        // To simplify the logic, fmask entry should be picked from non-displayable ones
-        else if (bpp == 128 || flags.fmask)
-        {
-            inTileType = ADDR_NON_DISPLAYABLE;
-        }
-        // These two modes only have non-disp entries though they can be other micro tile modes
-        else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1)
-        {
-            inTileType = ADDR_NON_DISPLAYABLE;
-        }
-
-        if (flags.depth || flags.stencil)
-        {
-            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
-        }
-    }
-
-    // tcCompatible flag is only meaningful for gfx8.
-    if (m_settings.isVolcanicIslands == FALSE)
-    {
-        flags.tcCompatible = FALSE;
-    }
-
-    if (IsTileInfoAllZero(pTileInfo))
-    {
-        // See table entries 0-4
-        if (flags.depth || flags.stencil)
-        {
-            // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
-            UINT_32 tileSize = thickness * bpp * numSamples * 8;
-
-            // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
-            if (m_rowSize < tileSize)
-            {
-                flags.tcCompatible = FALSE;
-            }
-
-            if (flags.nonSplit | flags.tcCompatible | flags.needEquation)
-            {
-                // Texture readable depth surface should not be split
-                switch (tileSize)
-                {
-                    case 64:
-                        index = 0;
-                        break;
-                    case 128:
-                        index = 1;
-                        break;
-                    case 256:
-                        index = 2;
-                        break;
-                    case 512:
-                        index = 3;
-                        break;
-                    default:
-                        index = 4;
-                        break;
-                }
-            }
-            else
-            {
-                // Depth and stencil need to use the same index, thus the pre-defined tile_split
-                // can meet the requirement to choose the same macro mode index
-                // uncompressed depth/stencil are not supported for now
-                switch (numSamples)
-                {
-                    case 1:
-                        index = 0;
-                        break;
-                    case 2:
-                    case 4:
-                        index = 1;
-                        break;
-                    case 8:
-                        index = 2;
-                        break;
-                    default:
-                        break;
-                }
-            }
-        }
-
-        // See table entries 5-6
-        if (inTileType == ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_1D_TILED_THIN1:
-                    index = 5;
-                    break;
-                case ADDR_TM_PRT_TILED_THIN1:
-                    index = 6;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        // See table entries 8-12
-        if (inTileType == ADDR_DISPLAYABLE)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_1D_TILED_THIN1:
-                    index = 9;
-                    break;
-                case ADDR_TM_2D_TILED_THIN1:
-                    index = 10;
-                    break;
-                case ADDR_TM_PRT_TILED_THIN1:
-                    index = 11;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        // See table entries 13-18
-        if (inTileType == ADDR_NON_DISPLAYABLE)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_1D_TILED_THIN1:
-                    index = 13;
-                    break;
-                case ADDR_TM_2D_TILED_THIN1:
-                    index = 14;
-                    break;
-                case ADDR_TM_3D_TILED_THIN1:
-                    index = 15;
-                    break;
-                case ADDR_TM_PRT_TILED_THIN1:
-                    index = 16;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        // See table entries 19-26
-        if (thickness > 1)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_1D_TILED_THICK:
-                    // special check for bonaire, for the compatablity between old KMD and new UMD
-                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18;
-                    break;
-                case ADDR_TM_2D_TILED_THICK:
-                    // special check for bonaire, for the compatablity between old KMD and new UMD
-                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24;
-                    break;
-                case ADDR_TM_3D_TILED_THICK:
-                    index = 21;
-                    break;
-                case ADDR_TM_PRT_TILED_THICK:
-                    index = 22;
-                    break;
-                case ADDR_TM_2D_TILED_XTHICK:
-                    index = 25;
-                    break;
-                case ADDR_TM_3D_TILED_XTHICK:
-                    index = 26;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        // See table entries 27-30
-        if (inTileType == ADDR_ROTATED)
-        {
-            switch (tileMode)
-            {
-                case ADDR_TM_1D_TILED_THIN1:
-                    index = 27;
-                    break;
-                case ADDR_TM_2D_TILED_THIN1:
-                    index = 28;
-                    break;
-                case ADDR_TM_PRT_TILED_THIN1:
-                    index = 29;
-                    break;
-                case ADDR_TM_PRT_2D_TILED_THIN1:
-                    index = 30;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        if (m_pipes >= 8)
-        {
-            ADDR_ASSERT((index + 1) < static_cast<INT_32>(m_noOfEntries));
-            // Only do this when tile mode table is updated.
-            if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) &&
-                (m_tileTable[index + 1].mode == tileMode))
-            {
-                static const UINT_32 PrtTileBytes = 0x10000;
-                ADDR_TILEINFO tileInfo = {0};
-
-                HwlComputeMacroModeIndex(index, flags, bpp, numSamples, &tileInfo);
-
-                UINT_32 macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness *
-                                         HwlGetPipes(&tileInfo) * tileInfo.banks *
-                                         tileInfo.bankWidth * tileInfo.bankHeight;
-
-                if (macroTileBytes != PrtTileBytes)
-                {
-                    // Switching to next tile mode entry to make sure macro tile size is 64KB
-                    index += 1;
-
-                    tileInfo.pipeConfig = m_tileTable[index].info.pipeConfig;
-
-                    macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness *
-                                     HwlGetPipes(&tileInfo) * tileInfo.banks *
-                                     tileInfo.bankWidth * tileInfo.bankHeight;
-
-                    ADDR_ASSERT(macroTileBytes == PrtTileBytes);
-
-                    flags.tcCompatible = FALSE;
-                    pOut->dccUnsupport = TRUE;
-                }
-            }
-        }
-    }
-    else
-    {
-        // A pre-filled tile info is ready
-        index = pOut->tileIndex;
-        macroModeIndex = pOut->macroModeIndex;
-
-        // pass tile type back for post tile index compute
-        pOut->tileType = inTileType;
-
-        if (flags.depth || flags.stencil)
-        {
-            // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
-            UINT_32 tileSize = thickness * bpp * numSamples * 8;
-
-            // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
-            if (m_rowSize < tileSize)
-            {
-                flags.tcCompatible = FALSE;
-            }
-        }
-
-        UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
-
-        if (m_pipes != numPipes)
-        {
-            pOut->dccUnsupport = TRUE;
-        }
-    }
-
-    // We only need to set up tile info if there is a valid index but macroModeIndex is invalid
-    if ((index != TileIndexInvalid) && (macroModeIndex == TileIndexInvalid))
-    {
-        macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo);
-
-        // Copy to pOut->tileType/tileIndex/macroModeIndex
-        pOut->tileIndex = index;
-        pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea
-        pOut->macroModeIndex = macroModeIndex;
-    }
-    else if (tileMode == ADDR_TM_LINEAR_GENERAL)
-    {
-        pOut->tileIndex = TileIndexLinearGeneral;
-
-        // Copy linear-aligned entry??
-        *pTileInfo = m_tileTable[8].info;
-    }
-    else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
-    {
-        pOut->tileIndex = 8;
-        *pTileInfo = m_tileTable[8].info;
-    }
-
-    if (flags.tcCompatible)
-    {
-        flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut);
-    }
-
-    pOut->tcCompatible = flags.tcCompatible;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::ReadGbTileMode
-*
-*   @brief
-*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
-****************************************************************************************************
-*/
-VOID CiLib::ReadGbTileMode(
-    UINT_32       regValue,   ///< [in] GB_TILE_MODE register
-    TileConfig*   pCfg        ///< [out] output structure
-    ) const
-{
-    GB_TILE_MODE gbTileMode;
-    gbTileMode.val = regValue;
-
-    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode_new);
-    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
-
-    if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER)
-    {
-        pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
-    }
-    else
-    {
-        pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split;
-    }
-
-    UINT_32 regArrayMode = gbTileMode.f.array_mode;
-
-    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
-
-    switch (regArrayMode)
-    {
-        case 5:
-            pCfg->mode = ADDR_TM_PRT_TILED_THIN1;
-            break;
-        case 6:
-            pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1;
-            break;
-        case 8:
-            pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
-            break;
-        case 9:
-            pCfg->mode = ADDR_TM_PRT_TILED_THICK;
-            break;
-        case 0xa:
-            pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK;
-            break;
-        case 0xb:
-            pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1;
-            break;
-        case 0xe:
-            pCfg->mode = ADDR_TM_3D_TILED_XTHICK;
-            break;
-        case 0xf:
-            pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK;
-            break;
-        default:
-            break;
-    }
-
-    // Fail-safe code for these always convert tile info, as the non-macro modes
-    // return the entry of tile mode table directly without looking up macro mode table
-    if (!IsMacroTiled(pCfg->mode))
-    {
-        pCfg->info.banks = 2;
-        pCfg->info.bankWidth = 1;
-        pCfg->info.bankHeight = 1;
-        pCfg->info.macroAspectRatio = 1;
-        pCfg->info.tileSplitBytes = 64;
-    }
-}
-
-/**
-****************************************************************************************************
-*   CiLib::InitTileSettingTable
-*
-*   @brief
-*       Initialize the ADDR_TILE_CONFIG table.
-*   @return
-*       TRUE if tile table is correctly initialized
-****************************************************************************************************
-*/
-BOOL_32 CiLib::InitTileSettingTable(
-    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
-    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
-    )
-{
-    BOOL_32 initOk = TRUE;
-
-    ADDR_ASSERT(noOfEntries <= TileTableSize);
-
-    memset(m_tileTable, 0, sizeof(m_tileTable));
-
-    if (noOfEntries != 0)
-    {
-        m_noOfEntries = noOfEntries;
-    }
-    else
-    {
-        m_noOfEntries = TileTableSize;
-    }
-
-    if (pCfg) // From Client
-    {
-        for (UINT_32 i = 0; i < m_noOfEntries; i++)
-        {
-            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        initOk = FALSE;
-    }
-
-    if (initOk)
-    {
-        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
-
-        if (m_settings.isBonaire == FALSE)
-        {
-            // Check if entry 18 is "thick+thin" combination
-            if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) &&
-                (m_tileTable[18].type == ADDR_NON_DISPLAYABLE))
-            {
-                m_allowNonDispThickModes = TRUE;
-                ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK);
-            }
-        }
-        else
-        {
-            m_allowNonDispThickModes = TRUE;
-        }
-
-        // Assume the first entry is always programmed with full pipes
-        m_pipes = HwlGetPipes(&m_tileTable[0].info);
-    }
-
-    return initOk;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::ReadGbMacroTileCfg
-*
-*   @brief
-*       Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG.
-****************************************************************************************************
-*/
-VOID CiLib::ReadGbMacroTileCfg(
-    UINT_32             regValue,   ///< [in] GB_MACRO_TILE_MODE register
-    ADDR_TILEINFO*      pCfg        ///< [out] output structure
-    ) const
-{
-    GB_MACROTILE_MODE gbTileMode;
-    gbTileMode.val = regValue;
-
-    pCfg->bankHeight = 1 << gbTileMode.f.bank_height;
-    pCfg->bankWidth = 1 << gbTileMode.f.bank_width;
-    pCfg->banks = 1 << (gbTileMode.f.num_banks + 1);
-    pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::InitMacroTileCfgTable
-*
-*   @brief
-*       Initialize the ADDR_MACRO_TILE_CONFIG table.
-*   @return
-*       TRUE if macro tile table is correctly initialized
-****************************************************************************************************
-*/
-BOOL_32 CiLib::InitMacroTileCfgTable(
-    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
-    UINT_32         noOfMacroEntries     ///< [in] Numbe of entries in the table above
-    )
-{
-    BOOL_32 initOk = TRUE;
-
-    ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize);
-
-    memset(m_macroTileTable, 0, sizeof(m_macroTileTable));
-
-    if (noOfMacroEntries != 0)
-    {
-        m_noOfMacroEntries = noOfMacroEntries;
-    }
-    else
-    {
-        m_noOfMacroEntries = MacroTileTableSize;
-    }
-
-    if (pCfg) // From Client
-    {
-        for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
-        {
-            ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]);
-
-            m_macroTileTable[i].tileSplitBytes = 64 << (i % 8);
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        initOk = FALSE;
-    }
-    return initOk;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeMacroModeIndex
-*
-*   @brief
-*       Computes macro tile mode index
-*   @return
-*       TRUE if macro tile table is correctly initialized
-****************************************************************************************************
-*/
-INT_32 CiLib::HwlComputeMacroModeIndex(
-    INT_32              tileIndex,      ///< [in] Tile mode index
-    ADDR_SURFACE_FLAGS  flags,          ///< [in] Surface flags
-    UINT_32             bpp,            ///< [in] Bit per pixel
-    UINT_32             numSamples,     ///< [in] Number of samples
-    ADDR_TILEINFO*      pTileInfo,      ///< [out] Pointer to ADDR_TILEINFO
-    AddrTileMode*       pTileMode,      ///< [out] Pointer to AddrTileMode
-    AddrTileType*       pTileType       ///< [out] Pointer to AddrTileType
-    ) const
-{
-    INT_32 macroModeIndex = TileIndexInvalid;
-
-    AddrTileMode tileMode = m_tileTable[tileIndex].mode;
-    AddrTileType tileType = m_tileTable[tileIndex].type;
-    UINT_32 thickness = Thickness(tileMode);
-
-    if (!IsMacroTiled(tileMode))
-    {
-        *pTileInfo = m_tileTable[tileIndex].info;
-        macroModeIndex = TileIndexNoMacroIndex;
-    }
-    else
-    {
-        UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
-        UINT_32 tileSplit;
-
-        if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            // Depth entries store real tileSplitBytes
-            tileSplit = m_tileTable[tileIndex].info.tileSplitBytes;
-        }
-        else
-        {
-            // Non-depth entries store a split factor
-            UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
-            UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
-
-            tileSplit = colorTileSplit;
-        }
-
-        UINT_32 tileSplitC = Min(m_rowSize, tileSplit);
-        UINT_32 tileBytes;
-
-        if (flags.fmask)
-        {
-            tileBytes = Min(tileSplitC, tileBytes1x);
-        }
-        else
-        {
-            tileBytes = Min(tileSplitC, numSamples * tileBytes1x);
-        }
-
-        if (tileBytes < 64)
-        {
-            tileBytes = 64;
-        }
-
-        macroModeIndex = Log2(tileBytes / 64);
-
-        if (flags.prt || IsPrtTileMode(tileMode))
-        {
-            macroModeIndex += PrtMacroModeOffset;
-            *pTileInfo = m_macroTileTable[macroModeIndex];
-        }
-        else
-        {
-            *pTileInfo = m_macroTileTable[macroModeIndex];
-        }
-
-        pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig;
-
-        pTileInfo->tileSplitBytes = tileSplitC;
-    }
-
-    if (NULL != pTileMode)
-    {
-        *pTileMode = tileMode;
-    }
-
-    if (NULL != pTileType)
-    {
-        *pTileType = tileType;
-    }
-
-    return macroModeIndex;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeTileDataWidthAndHeightLinear
-*
-*   @brief
-*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
-*
-*   @note
-*       MacroWidth and macroHeight are measured in pixels
-****************************************************************************************************
-*/
-VOID CiLib::HwlComputeTileDataWidthAndHeightLinear(
-    UINT_32*        pMacroWidth,     ///< [out] macro tile width
-    UINT_32*        pMacroHeight,    ///< [out] macro tile height
-    UINT_32         bpp,             ///< [in] bits per pixel
-    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
-    ) const
-{
-    ADDR_ASSERT(pTileInfo != NULL);
-
-    UINT_32 numTiles;
-
-    switch (pTileInfo->pipeConfig)
-    {
-        case ADDR_PIPECFG_P16_32x32_8x16:
-        case ADDR_PIPECFG_P16_32x32_16x16:
-        case ADDR_PIPECFG_P8_32x64_32x32:
-        case ADDR_PIPECFG_P8_32x32_16x32:
-        case ADDR_PIPECFG_P8_32x32_16x16:
-        case ADDR_PIPECFG_P8_32x32_8x16:
-        case ADDR_PIPECFG_P4_32x32:
-            numTiles = 8;
-            break;
-        default:
-            numTiles = 4;
-            break;
-    }
-
-    *pMacroWidth    = numTiles * MicroTileWidth;
-    *pMacroHeight   = numTiles * MicroTileHeight;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeMetadataNibbleAddress
-*
-*   @brief
-*        calculate meta data address based on input information
-*
-*   &parameter
-*        uncompressedDataByteAddress - address of a pixel in color surface
-*        dataBaseByteAddress         - base address of color surface
-*        metadataBaseByteAddress     - base address of meta ram
-*        metadataBitSize             - meta key size, 8 for DCC, 4 for cmask
-*        elementBitSize              - element size of color surface
-*        blockByteSize               - compression block size, 256 for DCC
-*        pipeInterleaveBytes         - pipe interleave size
-*        numOfPipes                  - number of pipes
-*        numOfBanks                  - number of banks
-*        numOfSamplesPerSplit        - number of samples per tile split
-*   @return
-*        meta data nibble address (nibble address is used to support DCC compatible cmask)
-*
-****************************************************************************************************
-*/
-UINT_64 CiLib::HwlComputeMetadataNibbleAddress(
-    UINT_64 uncompressedDataByteAddress,
-    UINT_64 dataBaseByteAddress,
-    UINT_64 metadataBaseByteAddress,
-    UINT_32 metadataBitSize,
-    UINT_32 elementBitSize,
-    UINT_32 blockByteSize,
-    UINT_32 pipeInterleaveBytes,
-    UINT_32 numOfPipes,
-    UINT_32 numOfBanks,
-    UINT_32 numOfSamplesPerSplit) const
-{
-    ///--------------------------------------------------------------------------------------------
-    /// Get pipe interleave, bank and pipe bits
-    ///--------------------------------------------------------------------------------------------
-    UINT_32 pipeInterleaveBits  = Log2(pipeInterleaveBytes);
-    UINT_32 pipeBits            = Log2(numOfPipes);
-    UINT_32 bankBits            = Log2(numOfBanks);
-
-    ///--------------------------------------------------------------------------------------------
-    /// Clear pipe and bank swizzles
-    ///--------------------------------------------------------------------------------------------
-    UINT_32 dataMacrotileBits        = pipeInterleaveBits + pipeBits + bankBits;
-    UINT_32 metadataMacrotileBits    = pipeInterleaveBits + pipeBits + bankBits;
-
-    UINT_64 dataMacrotileClearMask     = ~((1L << dataMacrotileBits) - 1);
-    UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1);
-
-    UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask;
-    UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask;
-
-    ///--------------------------------------------------------------------------------------------
-    /// Modify metadata base before adding in so that when final address is divided by data ratio,
-    /// the base address returns to where it should be
-    ///--------------------------------------------------------------------------------------------
-    ADDR_ASSERT((0 != metadataBitSize));
-    UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 /
-                                  metadataBitSize;
-    UINT_64 offset = uncompressedDataByteAddress -
-                     dataBaseByteAddressNoSwizzle +
-                     metadataBaseShifted;
-
-    ///--------------------------------------------------------------------------------------------
-    /// Save bank data bits
-    ///--------------------------------------------------------------------------------------------
-    UINT_32 lsb = pipeBits + pipeInterleaveBits;
-    UINT_32 msb = bankBits - 1 + lsb;
-
-    UINT_64 bankDataBits = GetBits(offset, msb, lsb);
-
-    ///--------------------------------------------------------------------------------------------
-    /// Save pipe data bits
-    ///--------------------------------------------------------------------------------------------
-    lsb = pipeInterleaveBits;
-    msb = pipeBits - 1 + lsb;
-
-    UINT_64 pipeDataBits = GetBits(offset, msb, lsb);
-
-    ///--------------------------------------------------------------------------------------------
-    /// Remove pipe and bank bits
-    ///--------------------------------------------------------------------------------------------
-    lsb = pipeInterleaveBits;
-    msb = dataMacrotileBits - 1;
-
-    UINT_64 offsetWithoutPipeBankBits = RemoveBits(offset, msb, lsb);
-
-    ADDR_ASSERT((0 != blockByteSize));
-    UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize;
-
-    UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit;
-    UINT_32 blocksInTile = tileSize / blockByteSize;
-
-    if (0 == blocksInTile)
-    {
-        lsb = 0;
-    }
-    else
-    {
-        lsb = Log2(blocksInTile);
-    }
-    msb = bankBits - 1 + lsb;
-
-    UINT_64 blockInBankpipeWithBankBits = InsertBits(blockInBankpipe, bankDataBits, msb, lsb);
-
-    /// NOTE *2 because we are converting to Nibble address in this step
-    UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8;
-
-
-    ///--------------------------------------------------------------------------------------------
-    /// Reinsert pipe bits back into the final address
-    ///--------------------------------------------------------------------------------------------
-    lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb.
-    msb = pipeBits - 1 + lsb;
-    UINT_64 metadataAddress = InsertBits(metaAddressInPipe, pipeDataBits, msb, lsb);
-
-    return metadataAddress;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeSurfaceAlignmentsMacroTiled
-*
-*   @brief
-*       Hardware layer function to compute alignment request for macro tile mode
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlComputeSurfaceAlignmentsMacroTiled(
-    AddrTileMode                      tileMode,           ///< [in] tile mode
-    UINT_32                           bpp,                ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
-    UINT_32                           mipLevel,           ///< [in] mip level
-    UINT_32                           numSamples,         ///< [in] number of samples
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
-    ) const
-{
-    // This is to workaround a H/W limitation that DCC doesn't work when pipe config is switched to
-    // P4. In theory, all asics that have such switching should be patched but we now only know what
-    // to pad for Fiji.
-    if ((m_settings.isFiji == TRUE) &&
-        (flags.dccPipeWorkaround == TRUE) &&
-        (flags.prt == FALSE) &&
-        (mipLevel == 0) &&
-        (tileMode == ADDR_TM_PRT_TILED_THIN1) &&
-        (pOut->dccUnsupport == TRUE))
-    {
-        pOut->pitchAlign   = PowTwoAlign(pOut->pitchAlign, 256);
-        // In case the client still requests DCC usage.
-        pOut->dccUnsupport = FALSE;
-    }
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlPadDimensions
-*
-*   @brief
-*       Helper function to pad dimensions
-*
-****************************************************************************************************
-*/
-VOID CiLib::HwlPadDimensions(
-    AddrTileMode        tileMode,    ///< [in] tile mode
-    UINT_32             bpp,         ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
-    UINT_32             numSamples,  ///< [in] number of samples
-    ADDR_TILEINFO*      pTileInfo,   ///< [in] tile info
-    UINT_32             mipLevel,    ///< [in] mip level
-    UINT_32*            pPitch,      ///< [in,out] pitch in pixels
-    UINT_32*            pPitchAlign, ///< [in,out] pitch alignment
-    UINT_32             height,      ///< [in] height in pixels
-    UINT_32             heightAlign  ///< [in] height alignment
-    ) const
-{
-    if ((m_settings.isVolcanicIslands == TRUE) &&
-        (flags.dccCompatible == TRUE) &&
-        (numSamples > 1) &&
-        (mipLevel == 0) &&
-        (IsMacroTiled(tileMode) == TRUE))
-    {
-        UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight);
-        UINT_32 samplesPerSplit  = pTileInfo->tileSplitBytes / tileSizePerSample;
-
-        if (samplesPerSplit < numSamples)
-        {
-            UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256;
-            UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * height * bpp * samplesPerSplit);
-
-            ADDR_ASSERT(IsPow2(dccFastClearByteAlign));
-
-            if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1)))
-            {
-                UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign /
-                                                BITS_TO_BYTES(bpp) /
-                                                samplesPerSplit;
-                UINT_32 macroTilePixelAlign = (*pPitchAlign) * heightAlign;
-
-                if ((dccFastClearPixelAlign >= macroTilePixelAlign) &&
-                    ((dccFastClearPixelAlign % macroTilePixelAlign) == 0))
-                {
-                    UINT_32 dccFastClearPitchAlignInMacroTile =
-                        dccFastClearPixelAlign / macroTilePixelAlign;
-                    UINT_32 heightInMacroTile = height / heightAlign;
-
-                    while ((heightInMacroTile > 1) &&
-                           ((heightInMacroTile % 2) == 0) &&
-                           (dccFastClearPitchAlignInMacroTile > 1) &&
-                           ((dccFastClearPitchAlignInMacroTile % 2) == 0))
-                    {
-                        heightInMacroTile >>= 1;
-                        dccFastClearPitchAlignInMacroTile >>= 1;
-                    }
-
-                    UINT_32 dccFastClearPitchAlignInPixels =
-                        (*pPitchAlign) * dccFastClearPitchAlignInMacroTile;
-
-                    if (IsPow2(dccFastClearPitchAlignInPixels))
-                    {
-                        *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels);
-                    }
-                    else
-                    {
-                        *pPitch += (dccFastClearPitchAlignInPixels - 1);
-                        *pPitch /= dccFastClearPitchAlignInPixels;
-                        *pPitch *= dccFastClearPitchAlignInPixels;
-                    }
-
-                    *pPitchAlign = dccFastClearPitchAlignInPixels;
-                }
-            }
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeMaxBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments
-*   @return
-*       maximum alignments
-****************************************************************************************************
-*/
-UINT_32 CiLib::HwlComputeMaxBaseAlignments() const
-{
-    const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
-
-    // Initial size is 64 KiB for PRT.
-    UINT_32 maxBaseAlign = 64 * 1024;
-
-    for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
-    {
-        // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
-        UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes;
-
-        UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks *
-                            m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight;
-
-        if (baseAlign > maxBaseAlign)
-        {
-            maxBaseAlign = baseAlign;
-        }
-    }
-
-    return maxBaseAlign;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::HwlComputeMaxMetaBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments for metadata
-*   @return
-*       maximum alignments for metadata
-****************************************************************************************************
-*/
-UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const
-{
-    UINT_32 maxBank = 1;
-
-    for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
-    {
-        if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode))
-        {
-            maxBank = Max(maxBank, m_macroTileTable[i].banks);
-        }
-    }
-
-    return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::DepthStencilTileCfgMatch
-*
-*   @brief
-*       Try to find a tile index for stencil which makes its tile config parameters matches to depth
-*   @return
-*       TRUE if such tile index for stencil can be found
-****************************************************************************************************
-*/
-BOOL_32 CiLib::DepthStencilTileCfgMatch(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    BOOL_32 depthStencil2DTileConfigMatch = FALSE;
-
-    for (INT_32 stencilTileIndex = MinDepth2DThinIndex;
-         stencilTileIndex <= MaxDepth2DThinIndex;
-         stencilTileIndex++)
-    {
-        ADDR_TILEINFO tileInfo = {0};
-        INT_32 stencilMacroIndex = HwlComputeMacroModeIndex(stencilTileIndex,
-                                                            pIn->flags,
-                                                            8,
-                                                            pIn->numSamples,
-                                                            &tileInfo);
-
-        if (stencilMacroIndex != TileIndexNoMacroIndex)
-        {
-            if ((m_macroTileTable[stencilMacroIndex].banks ==
-                 m_macroTileTable[pOut->macroModeIndex].banks) &&
-                (m_macroTileTable[stencilMacroIndex].bankWidth ==
-                 m_macroTileTable[pOut->macroModeIndex].bankWidth) &&
-                (m_macroTileTable[stencilMacroIndex].bankHeight ==
-                 m_macroTileTable[pOut->macroModeIndex].bankHeight) &&
-                (m_macroTileTable[stencilMacroIndex].macroAspectRatio ==
-                 m_macroTileTable[pOut->macroModeIndex].macroAspectRatio) &&
-                (m_macroTileTable[stencilMacroIndex].pipeConfig ==
-                 m_macroTileTable[pOut->macroModeIndex].pipeConfig))
-            {
-                if ((pOut->tcCompatible == FALSE) ||
-                    (tileInfo.tileSplitBytes >= MicroTileWidth * MicroTileHeight * pIn->numSamples))
-                {
-                    depthStencil2DTileConfigMatch = TRUE;
-                    pOut->stencilTileIdx = stencilTileIndex;
-                    break;
-                }
-            }
-        }
-        else
-        {
-            ADDR_ASSERT_ALWAYS();
-        }
-    }
-
-    return depthStencil2DTileConfigMatch;
-}
-
-/**
-****************************************************************************************************
-*   CiLib::DepthStencilTileCfgMatch
-*
-*   @brief
-*       Check if tc compatibility is available
-*   @return
-*       If tc compatibility is not available
-****************************************************************************************************
-*/
-BOOL_32 CiLib::CheckTcCompatibility(
-    const ADDR_TILEINFO*                    pTileInfo,    ///< [in] input tile info
-    UINT_32                                 bpp,          ///< [in] Bits per pixel
-    AddrTileMode                            tileMode,     ///< [in] input tile mode
-    AddrTileType                            tileType,     ///< [in] input tile type
-    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut          ///< [in] output surf info
-    ) const
-{
-    BOOL_32 tcCompatible = TRUE;
-
-    if (IsMacroTiled(tileMode))
-    {
-        if (tileType != ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil
-            // tileSplit case was handled at tileIndex selecting time.
-            INT_32 tileIndex = pOut->tileIndex;
-
-            if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE))
-            {
-                tileIndex = HwlPostCheckTileIndex(pTileInfo, tileMode, tileType, tileIndex);
-            }
-
-            if (tileIndex != TileIndexInvalid)
-            {
-                UINT_32 thickness = Thickness(tileMode);
-
-                ADDR_ASSERT(static_cast<UINT_32>(tileIndex) < TileTableSize);
-                // Non-depth entries store a split factor
-                UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
-                UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
-                UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
-
-                if (m_rowSize < colorTileSplit)
-                {
-                    tcCompatible = FALSE;
-                }
-            }
-        }
-    }
-    else
-    {
-        // Client should not enable tc compatible for linear and 1D tile modes.
-        tcCompatible = FALSE;
-    }
-
-    return tcCompatible;
-}
-
-} // V1
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,201 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  ciaddrlib.h
-* @brief Contains the CiLib class definition.
-****************************************************************************************************
-*/
-
-#ifndef __CI_ADDR_LIB_H__
-#define __CI_ADDR_LIB_H__
-
-#include "addrlib1.h"
-#include "siaddrlib.h"
-
-namespace Addr
-{
-namespace V1
-{
-
-/**
-****************************************************************************************************
-* @brief This class is the CI specific address library
-*        function set.
-****************************************************************************************************
-*/
-class CiLib : public SiLib
-{
-public:
-    /// Creates CiLib object
-    static Addr::Lib* CreateObj(const Client* pClient)
-    {
-        VOID* pMem = Object::ClientAlloc(sizeof(CiLib), pClient);
-        return (pMem != NULL) ? new (pMem) CiLib(pClient) : NULL;
-    }
-
-private:
-    CiLib(const Client* pClient);
-    virtual ~CiLib();
-
-protected:
-
-    // Hwl interface - defined in AddrLib1
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
-
-    virtual ChipFamily HwlConvertChipFamily(
-        UINT_32 uChipFamily, UINT_32 uChipRevision);
-
-    virtual BOOL_32 HwlInitGlobalParams(
-        const ADDR_CREATE_INPUT* pCreateIn);
-
-    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
-        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
-        AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
-
-    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
-        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
-        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
-
-    virtual INT_32 HwlComputeMacroModeIndex(
-        INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
-        ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
-        ) const;
-
-    // Sub-hwl interface - defined in EgBasedLib
-    virtual VOID HwlSetupTileInfo(
-        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
-        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual INT_32 HwlPostCheckTileIndex(
-        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
-        INT curIndex = TileIndexInvalid) const;
-
-    virtual VOID HwlFmaskPreThunkSurfInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
-        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
-
-    virtual VOID HwlFmaskPostThunkSurfInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
-
-    virtual AddrTileMode HwlDegradeThickTileMode(
-        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
-
-    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    /// Overwrite tile setting to PRT
-    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
-        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
-        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
-        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
-        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
-        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*       pOut) const;
-
-    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
-
-    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
-
-    virtual VOID HwlPadDimensions(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
-        UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const;
-
-    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-private:
-    VOID ReadGbTileMode(
-        UINT_32 regValue, TileConfig* pCfg) const;
-
-    VOID ReadGbMacroTileCfg(
-        UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
-
-    BOOL_32 InitTileSettingTable(
-        const UINT_32 *pSetting, UINT_32 noOfEntries);
-
-    BOOL_32 InitMacroTileCfgTable(
-        const UINT_32 *pSetting, UINT_32 noOfEntries);
-
-    UINT_64 HwlComputeMetadataNibbleAddress(
-        UINT_64 uncompressedDataByteAddress,
-        UINT_64 dataBaseByteAddress,
-        UINT_64 metadataBaseByteAddress,
-        UINT_32 metadataBitSize,
-        UINT_32 elementBitSize,
-        UINT_32 blockByteSize,
-        UINT_32 pipeInterleaveBytes,
-        UINT_32 numOfPipes,
-        UINT_32 numOfBanks,
-        UINT_32 numOfSamplesPerSplit) const;
-
-    BOOL_32 DepthStencilTileCfgMatch(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) const;
-
-    BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode,
-                                 AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    static const UINT_32    MacroTileTableSize = 16;
-    static const UINT_32    PrtMacroModeOffset = MacroTileTableSize / 2;
-    static const INT_32     MinDepth2DThinIndex = 0;
-    static const INT_32     MaxDepth2DThinIndex = 4;
-    static const INT_32     Depth1DThinIndex = 5;
-
-    ADDR_TILEINFO           m_macroTileTable[MacroTileTableSize];
-    UINT_32                 m_noOfMacroEntries;
-    BOOL_32                 m_allowNonDispThickModes;
-};
-
-} // V1
-} // Addr
-
-#endif
-
-
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,4168 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  egbaddrlib.cpp
-* @brief Contains the EgBasedLib class implementation.
-****************************************************************************************************
-*/
-
-#include "egbaddrlib.h"
-
-#include "util/macros.h"
-
-namespace Addr
-{
-namespace V1
-{
-
-/**
-****************************************************************************************************
-*   EgBasedLib::EgBasedLib
-*
-*   @brief
-*       Constructor
-*
-*   @note
-*
-****************************************************************************************************
-*/
-EgBasedLib::EgBasedLib(const Client* pClient)
-    :
-    Lib(pClient),
-    m_ranks(0),
-    m_logicalBanks(0),
-    m_bankInterleave(1)
-{
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::~EgBasedLib
-*
-*   @brief
-*       Destructor
-****************************************************************************************************
-*/
-EgBasedLib::~EgBasedLib()
-{
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::DispatchComputeSurfaceInfo
-*
-*   @brief
-*       Compute surface sizes include padded pitch,height,slices,total size in bytes,
-*       meanwhile output suitable tile mode and base alignment might be changed in this
-*       call as well. Results are returned through output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    AddrTileMode        tileMode      = pIn->tileMode;
-    UINT_32             bpp           = pIn->bpp;
-    UINT_32             numSamples    = pIn->numSamples;
-    UINT_32             numFrags      = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
-    UINT_32             pitch         = pIn->width;
-    UINT_32             height        = pIn->height;
-    UINT_32             numSlices     = pIn->numSlices;
-    UINT_32             mipLevel      = pIn->mipLevel;
-    ADDR_SURFACE_FLAGS  flags         = pIn->flags;
-
-    ADDR_TILEINFO       tileInfoDef   = {0};
-    ADDR_TILEINFO*      pTileInfo     = &tileInfoDef;
-    UINT_32             padDims       = 0;
-    BOOL_32             valid;
-
-    if (pIn->flags.disallowLargeThickDegrade == 0)
-    {
-        tileMode = DegradeLargeThickTile(tileMode, bpp);
-    }
-
-    // Only override numSamples for NI above
-    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
-    {
-        if (numFrags != numSamples) // This means EQAA
-        {
-            // The real surface size needed is determined by number of fragments
-            numSamples = numFrags;
-        }
-
-        // Save altered numSamples in pOut
-        pOut->numSamples = numSamples;
-    }
-
-    // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo
-    ADDR_ASSERT(pOut->pTileInfo);
-
-    if (pOut->pTileInfo != NULL)
-    {
-        pTileInfo = pOut->pTileInfo;
-    }
-
-    // Set default values
-    if (pIn->pTileInfo != NULL)
-    {
-        if (pTileInfo != pIn->pTileInfo)
-        {
-            *pTileInfo = *pIn->pTileInfo;
-        }
-    }
-    else
-    {
-        memset(pTileInfo, 0, sizeof(ADDR_TILEINFO));
-    }
-
-    // For macro tile mode, we should calculate default tiling parameters
-    HwlSetupTileInfo(tileMode,
-                     flags,
-                     bpp,
-                     pitch,
-                     height,
-                     numSamples,
-                     pIn->pTileInfo,
-                     pTileInfo,
-                     pIn->tileType,
-                     pOut);
-
-    if (flags.cube)
-    {
-        if (mipLevel == 0)
-        {
-            padDims = 2;
-        }
-
-        if (numSlices == 1)
-        {
-            // This is calculating one face, remove cube flag
-            flags.cube = 0;
-        }
-    }
-
-    switch (tileMode)
-    {
-        case ADDR_TM_LINEAR_GENERAL://fall through
-        case ADDR_TM_LINEAR_ALIGNED:
-            valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims);
-            break;
-
-        case ADDR_TM_1D_TILED_THIN1://fall through
-        case ADDR_TM_1D_TILED_THICK:
-            valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode);
-            break;
-
-        case ADDR_TM_2D_TILED_THIN1:    //fall through
-        case ADDR_TM_2D_TILED_THICK:    //fall through
-        case ADDR_TM_3D_TILED_THIN1:    //fall through
-        case ADDR_TM_3D_TILED_THICK:    //fall through
-        case ADDR_TM_2D_TILED_XTHICK:   //fall through
-        case ADDR_TM_3D_TILED_XTHICK:   //fall through
-        case ADDR_TM_PRT_TILED_THIN1:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_TILED_THICK:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THICK://fall through
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode);
-            break;
-
-        default:
-            valid = FALSE;
-            ADDR_ASSERT_ALWAYS();
-            break;
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceInfoLinear
-*
-*   @brief
-*       Compute linear surface sizes include padded pitch, height, slices, total size in
-*       bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode
-*       will not be changed here. Results are returned through output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceInfoLinear(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] Input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,   ///< [out] Output structure
-    UINT_32                                 padDims ///< [in] Dimensions to padd
-    ) const
-{
-    UINT_32 expPitch = pIn->width;
-    UINT_32 expHeight = pIn->height;
-    UINT_32 expNumSlices = pIn->numSlices;
-
-    // No linear MSAA on real H/W, keep this for TGL
-    UINT_32 numSamples = pOut->numSamples;
-
-    const UINT_32 microTileThickness = 1;
-
-    //
-    // Compute the surface alignments.
-    //
-    ComputeSurfaceAlignmentsLinear(pIn->tileMode,
-                                   pIn->bpp,
-                                   pIn->flags,
-                                   &pOut->baseAlign,
-                                   &pOut->pitchAlign,
-                                   &pOut->heightAlign);
-
-    if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1))
-    {
-#if !ALT_TEST
-        // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch
-        // alignment since PITCH_TILE_MAX is in unit of 8 pixels.
-        // It is OK if it is accessed per line.
-        ADDR_ASSERT((pIn->width % 8) == 0);
-#endif
-    }
-
-    pOut->depthAlign = microTileThickness;
-
-    expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
-
-    //
-    // Pad pitch and height to the required granularities.
-    //
-    PadDimensions(pIn->tileMode,
-                  pIn->bpp,
-                  pIn->flags,
-                  numSamples,
-                  pOut->pTileInfo,
-                  padDims,
-                  pIn->mipLevel,
-                  &expPitch, &pOut->pitchAlign,
-                  &expHeight, pOut->heightAlign,
-                  &expNumSlices, microTileThickness);
-
-    expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
-
-    //
-    // Adjust per HWL
-    //
-
-    UINT_64 logicalSliceSize;
-
-    logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode,
-                                                  pIn->bpp,
-                                                  numSamples,
-                                                  pOut->baseAlign,
-                                                  pOut->pitchAlign,
-                                                  &expPitch,
-                                                  &expHeight,
-                                                  &pOut->heightAlign);
-
-    if ((pIn->pitchAlign != 0) || (pIn->heightAlign != 0))
-    {
-        if (pIn->pitchAlign != 0)
-        {
-           ADDR_ASSERT((pIn->pitchAlign % pOut->pitchAlign) == 0);
-           pOut->pitchAlign = pIn->pitchAlign;
-
-            if (IsPow2(pOut->pitchAlign))
-            {
-                expPitch = PowTwoAlign(expPitch, pOut->pitchAlign);
-            }
-            else
-            {
-                expPitch += pOut->pitchAlign - 1;
-                expPitch /= pOut->pitchAlign;
-                expPitch *= pOut->pitchAlign;
-            }
-        }
-
-        if (pIn->heightAlign != 0)
-        {
-           ADDR_ASSERT((pIn->heightAlign % pOut->heightAlign) == 0);
-           pOut->heightAlign = pIn->heightAlign;
-
-            if (IsPow2(pOut->heightAlign))
-            {
-                expHeight = PowTwoAlign(expHeight, pOut->heightAlign);
-            }
-            else
-            {
-                expHeight += pOut->heightAlign - 1;
-                expHeight /= pOut->heightAlign;
-                expHeight *= pOut->heightAlign;
-            }
-        }
-
-        logicalSliceSize = BITS_TO_BYTES(expPitch * expHeight * pIn->bpp);
-    }
-
-    pOut->pitch = expPitch;
-    pOut->height = expHeight;
-    pOut->depth = expNumSlices;
-
-    pOut->surfSize = logicalSliceSize * expNumSlices;
-
-    pOut->tileMode = pIn->tileMode;
-
-    return TRUE;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceInfoMicroTiled
-*
-*   @brief
-*       Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total
-*       size in bytes, meanwhile alignments as well. Results are returned through output
-*       parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
-    UINT_32                                 padDims,    ///< [in] Dimensions to padd
-    AddrTileMode                            expTileMode ///< [in] Expected tile mode
-    ) const
-{
-    BOOL_32 valid = TRUE;
-
-    UINT_32 microTileThickness;
-    UINT_32 expPitch = pIn->width;
-    UINT_32 expHeight = pIn->height;
-    UINT_32 expNumSlices = pIn->numSlices;
-
-    // No 1D MSAA on real H/W, keep this for TGL
-    UINT_32 numSamples = pOut->numSamples;
-
-    //
-    // Compute the micro tile thickness.
-    //
-    microTileThickness = Thickness(expTileMode);
-
-    //
-    // Extra override for mip levels
-    //
-    if (pIn->mipLevel > 0)
-    {
-        //
-        // Reduce tiling mode from thick to thin if the number of slices is less than the
-        // micro tile thickness.
-        //
-        if ((expTileMode == ADDR_TM_1D_TILED_THICK) &&
-            (expNumSlices < ThickTileThickness))
-        {
-            expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL);
-            if (expTileMode != ADDR_TM_1D_TILED_THICK)
-            {
-                microTileThickness = 1;
-            }
-        }
-    }
-
-    //
-    // Compute the surface restrictions.
-    //
-    ComputeSurfaceAlignmentsMicroTiled(expTileMode,
-                                       pIn->bpp,
-                                       pIn->flags,
-                                       pIn->mipLevel,
-                                       numSamples,
-                                       &pOut->baseAlign,
-                                       &pOut->pitchAlign,
-                                       &pOut->heightAlign);
-
-    pOut->depthAlign = microTileThickness;
-
-    //
-    // Pad pitch and height to the required granularities.
-    // Compute surface size.
-    // Return parameters.
-    //
-    PadDimensions(expTileMode,
-                  pIn->bpp,
-                  pIn->flags,
-                  numSamples,
-                  pOut->pTileInfo,
-                  padDims,
-                  pIn->mipLevel,
-                  &expPitch, &pOut->pitchAlign,
-                  &expHeight, pOut->heightAlign,
-                  &expNumSlices, microTileThickness);
-
-    //
-    // Get HWL specific pitch adjustment
-    //
-    UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness,
-                                                              pIn->bpp,
-                                                              pIn->flags,
-                                                              numSamples,
-                                                              pOut->baseAlign,
-                                                              pOut->pitchAlign,
-                                                              &expPitch,
-                                                              &expHeight);
-
-
-    pOut->pitch = expPitch;
-    pOut->height = expHeight;
-    pOut->depth = expNumSlices;
-
-    pOut->surfSize = logicalSliceSize * expNumSlices;
-
-    pOut->tileMode = expTileMode;
-
-    return valid;
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceInfoMacroTiled
-*
-*   @brief
-*       Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total
-*       size in bytes, meanwhile output suitable tile mode and alignments might be changed
-*       in this call as well. Results are returned through output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceInfoMacroTiled(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
-    UINT_32                                 padDims,    ///< [in] Dimensions to padd
-    AddrTileMode                            expTileMode ///< [in] Expected tile mode
-    ) const
-{
-    BOOL_32 valid = TRUE;
-
-    AddrTileMode origTileMode = expTileMode;
-    UINT_32 microTileThickness;
-
-    UINT_32 paddedPitch;
-    UINT_32 paddedHeight;
-    UINT_64 bytesPerSlice;
-
-    UINT_32 expPitch     = pIn->width;
-    UINT_32 expHeight    = pIn->height;
-    UINT_32 expNumSlices = pIn->numSlices;
-
-    UINT_32 numSamples = pOut->numSamples;
-
-    //
-    // Compute the surface restrictions as base
-    // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled
-    //
-    valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
-                                               pIn->bpp,
-                                               pIn->flags,
-                                               pIn->mipLevel,
-                                               numSamples,
-                                               pOut);
-
-    if (valid)
-    {
-        //
-        // Compute the micro tile thickness.
-        //
-        microTileThickness = Thickness(expTileMode);
-
-        //
-        // Find the correct tiling mode for mip levels
-        //
-        if (pIn->mipLevel > 0)
-        {
-            //
-            // Try valid tile mode
-            //
-            expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
-                                                         pIn->bpp,
-                                                         expPitch,
-                                                         expHeight,
-                                                         expNumSlices,
-                                                         numSamples,
-                                                         pOut->blockWidth,
-                                                         pOut->blockHeight,
-                                                         pOut->pTileInfo);
-
-            if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled
-            {
-                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode);
-            }
-            else if (microTileThickness != Thickness(expTileMode))
-            {
-                //
-                // Re-compute if thickness changed since bank-height may be changed!
-                //
-                return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode);
-            }
-        }
-
-        paddedPitch     = expPitch;
-        paddedHeight    = expHeight;
-
-        //
-        // Re-cal alignment
-        //
-        if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled
-        {
-            valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
-                                                       pIn->bpp,
-                                                       pIn->flags,
-                                                       pIn->mipLevel,
-                                                       numSamples,
-                                                       pOut);
-        }
-
-        //
-        // Do padding
-        //
-        PadDimensions(expTileMode,
-                      pIn->bpp,
-                      pIn->flags,
-                      numSamples,
-                      pOut->pTileInfo,
-                      padDims,
-                      pIn->mipLevel,
-                      &paddedPitch, &pOut->pitchAlign,
-                      &paddedHeight, pOut->heightAlign,
-                      &expNumSlices, microTileThickness);
-
-        if (pIn->flags.qbStereo &&
-            (pOut->pStereoInfo != NULL))
-        {
-            UINT_32 stereoHeightAlign = HwlStereoCheckRightOffsetPadding(pOut->pTileInfo);
-
-            if (stereoHeightAlign != 0)
-            {
-                paddedHeight = PowTwoAlign(paddedHeight, stereoHeightAlign);
-            }
-        }
-
-        if ((pIn->flags.needEquation == TRUE) &&
-            (m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
-            (pIn->numMipLevels > 1) &&
-            (pIn->mipLevel == 0))
-        {
-            BOOL_32 convertTo1D = FALSE;
-
-            ADDR_ASSERT(Thickness(expTileMode) == 1);
-
-            for (UINT_32 i = 1; i < pIn->numMipLevels; i++)
-            {
-                UINT_32 mipPitch = Max(1u, paddedPitch >> i);
-                UINT_32 mipHeight = Max(1u, pIn->height >> i);
-                UINT_32 mipSlices = pIn->flags.volume ?
-                                    Max(1u, pIn->numSlices >> i) : pIn->numSlices;
-                expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
-                                                             pIn->bpp,
-                                                             mipPitch,
-                                                             mipHeight,
-                                                             mipSlices,
-                                                             numSamples,
-                                                             pOut->blockWidth,
-                                                             pOut->blockHeight,
-                                                             pOut->pTileInfo);
-
-                if (IsMacroTiled(expTileMode))
-                {
-                    if (PowTwoAlign(mipPitch, pOut->blockWidth) !=
-                        PowTwoAlign(mipPitch, pOut->pitchAlign))
-                    {
-                        convertTo1D = TRUE;
-                        break;
-                    }
-                }
-                else
-                {
-                    break;
-                }
-            }
-
-            if (convertTo1D)
-            {
-                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1);
-            }
-        }
-
-        pOut->pitch = paddedPitch;
-        // Put this check right here to workaround special mipmap cases which the original height
-        // is needed.
-        // The original height is pre-stored in pOut->height in PostComputeMipLevel and
-        // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too.
-        if (m_configFlags.checkLast2DLevel && (numSamples == 1)) // Don't check MSAA
-        {
-            // Set a TRUE in pOut if next Level is the first 1D sub level
-            HwlCheckLastMacroTiledLvl(pIn, pOut);
-        }
-        pOut->height = paddedHeight;
-
-        pOut->depth = expNumSlices;
-
-        //
-        // Compute the size of a slice.
-        //
-        bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) *
-                                      paddedHeight * NextPow2(pIn->bpp) * numSamples);
-
-        pOut->surfSize = bytesPerSlice * expNumSlices;
-
-        pOut->tileMode = expTileMode;
-
-        pOut->depthAlign = microTileThickness;
-
-    } // if (valid)
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceAlignmentsLinear
-*
-*   @brief
-*       Compute linear surface alignment, calculation results are returned through
-*       output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsLinear(
-    AddrTileMode        tileMode,          ///< [in] tile mode
-    UINT_32             bpp,               ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
-    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
-    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
-    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
-    ) const
-{
-    BOOL_32 valid = TRUE;
-
-    switch (tileMode)
-    {
-        case ADDR_TM_LINEAR_GENERAL:
-            //
-            // The required base alignment and pitch and height granularities is to 1 element.
-            //
-            *pBaseAlign   = (bpp > 8) ? bpp / 8 : 1;
-            *pPitchAlign  = 1;
-            *pHeightAlign = 1;
-            break;
-        case ADDR_TM_LINEAR_ALIGNED:
-            //
-            // The required alignment for base is the pipe interleave size.
-            // The required granularity for pitch is hwl dependent.
-            // The required granularity for height is one row.
-            //
-            *pBaseAlign     = m_pipeInterleaveBytes;
-            *pPitchAlign    = HwlGetPitchAlignmentLinear(bpp, flags);
-            *pHeightAlign   = 1;
-            break;
-        default:
-            *pBaseAlign     = 1;
-            *pPitchAlign    = 1;
-            *pHeightAlign   = 1;
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    AdjustPitchAlignment(flags, pPitchAlign);
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceAlignmentsMicroTiled
-*
-*   @brief
-*       Compute 1D tiled surface alignment, calculation results are returned through
-*       output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled(
-    AddrTileMode        tileMode,          ///< [in] tile mode
-    UINT_32             bpp,               ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
-    UINT_32             mipLevel,          ///< [in] mip level
-    UINT_32             numSamples,        ///< [in] number of samples
-    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
-    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
-    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
-    ) const
-{
-    BOOL_32 valid = TRUE;
-
-    //
-    // The required alignment for base is the pipe interleave size.
-    //
-    *pBaseAlign   = m_pipeInterleaveBytes;
-
-    *pPitchAlign  = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples);
-
-    *pHeightAlign = MicroTileHeight;
-
-    AdjustPitchAlignment(flags, pPitchAlign);
-
-    // Workaround 2 for 1D tiling -  There is HW bug for Carrizo,
-    // where it requires the following alignments for 1D tiling.
-    if (flags.czDispCompatible && (mipLevel == 0))
-    {
-        *pBaseAlign  = PowTwoAlign(*pBaseAlign, 4096);                         //Base address MOD 4096 = 0
-        *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp)));  //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
-    }
-    // end Carrizo workaround for 1D tilling
-
-    return valid;
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlReduceBankWidthHeight
-*
-*   @brief
-*       Additional checks, reduce bankHeight/bankWidth if needed and possible
-*       tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::HwlReduceBankWidthHeight(
-    UINT_32             tileSize,           ///< [in] tile size
-    UINT_32             bpp,                ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
-    UINT_32             numSamples,         ///< [in] number of samples
-    UINT_32             bankHeightAlign,    ///< [in] bank height alignment
-    UINT_32             pipes,              ///< [in] pipes
-    ADDR_TILEINFO*      pTileInfo           ///< [in,out] bank structure.
-    ) const
-{
-    UINT_32 macroAspectAlign;
-    BOOL_32 valid = TRUE;
-
-    if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize)
-    {
-        BOOL_32 stillGreater = TRUE;
-
-        // Try reducing bankWidth first
-        if (stillGreater && pTileInfo->bankWidth > 1)
-        {
-            while (stillGreater && pTileInfo->bankWidth > 0)
-            {
-                pTileInfo->bankWidth >>= 1;
-
-                if (pTileInfo->bankWidth == 0)
-                {
-                    pTileInfo->bankWidth = 1;
-                    break;
-                }
-
-                stillGreater =
-                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
-            }
-
-            // bankWidth is reduced above, so we need to recalculate bankHeight and ratio
-            bankHeightAlign = Max(1u,
-                                  m_pipeInterleaveBytes * m_bankInterleave /
-                                  (tileSize * pTileInfo->bankWidth)
-                                  );
-
-            // We cannot increase bankHeight so just assert this case.
-            ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0);
-
-            if (numSamples == 1)
-            {
-                macroAspectAlign = Max(1u,
-                                   m_pipeInterleaveBytes * m_bankInterleave /
-                                   (tileSize * pipes * pTileInfo->bankWidth)
-                                   );
-                pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio,
-                                                          macroAspectAlign);
-            }
-        }
-
-        // Early quit bank_height degradation for "64" bit z buffer
-        if (flags.depth && bpp >= 64)
-        {
-            stillGreater = FALSE;
-        }
-
-        // Then try reducing bankHeight
-        if (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
-        {
-            while (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
-            {
-                pTileInfo->bankHeight >>= 1;
-
-                if (pTileInfo->bankHeight < bankHeightAlign)
-                {
-                    pTileInfo->bankHeight = bankHeightAlign;
-                    break;
-                }
-
-                stillGreater =
-                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
-            }
-        }
-
-        valid = !stillGreater;
-
-        // Generate a warning if we still fail to meet this constraint
-        if (valid == FALSE)
-        {
-            ADDR_WARN(
-                0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)",
-                tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize));
-        }
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceAlignmentsMacroTiled
-*
-*   @brief
-*       Compute 2D tiled surface alignment, calculation results are returned through
-*       output parameters.
-*
-*   @return
-*       TRUE if no error occurs
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMacroTiled(
-    AddrTileMode                      tileMode,           ///< [in] tile mode
-    UINT_32                           bpp,                ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
-    UINT_32                           mipLevel,           ///< [in] mip level
-    UINT_32                           numSamples,         ///< [in] number of samples
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
-    ) const
-{
-    ADDR_TILEINFO* pTileInfo = pOut->pTileInfo;
-
-    BOOL_32 valid = SanityCheckMacroTiled(pTileInfo);
-
-    if (valid)
-    {
-        UINT_32 macroTileWidth;
-        UINT_32 macroTileHeight;
-
-        UINT_32 tileSize;
-        UINT_32 bankHeightAlign;
-        UINT_32 macroAspectAlign;
-
-        UINT_32 thickness = Thickness(tileMode);
-        UINT_32 pipes = HwlGetPipes(pTileInfo);
-
-        //
-        // Align bank height first according to latest h/w spec
-        //
-
-        // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples)
-        tileSize = Min(pTileInfo->tileSplitBytes,
-                       BITS_TO_BYTES(64 * thickness * bpp * numSamples));
-
-        // bank_height_align =
-        // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width))
-        bankHeightAlign = Max(1u,
-                              m_pipeInterleaveBytes * m_bankInterleave /
-                              (tileSize * pTileInfo->bankWidth)
-                              );
-
-        pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign);
-
-        // num_pipes * bank_width * macro_tile_aspect >=
-        // (pipe_interleave_size * bank_interleave) / tile_size
-        if (numSamples == 1)
-        {
-            // this restriction is only for mipmap (mipmap's numSamples must be 1)
-            macroAspectAlign = Max(1u,
-                                   m_pipeInterleaveBytes * m_bankInterleave /
-                                   (tileSize * pipes * pTileInfo->bankWidth)
-                                   );
-            pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign);
-        }
-
-        valid = HwlReduceBankWidthHeight(tileSize,
-                                         bpp,
-                                         flags,
-                                         numSamples,
-                                         bankHeightAlign,
-                                         pipes,
-                                         pTileInfo);
-
-        //
-        // The required granularity for pitch is the macro tile width.
-        //
-        macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes *
-            pTileInfo->macroAspectRatio;
-
-        pOut->pitchAlign = macroTileWidth;
-        pOut->blockWidth = macroTileWidth;
-
-        AdjustPitchAlignment(flags, &pOut->pitchAlign);
-
-        //
-        // The required granularity for height is the macro tile height.
-        //
-        macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
-            pTileInfo->macroAspectRatio;
-
-        pOut->heightAlign = macroTileHeight;
-        pOut->blockHeight = macroTileHeight;
-
-        //
-        // Compute base alignment
-        //
-        pOut->baseAlign =
-            pipes * pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize;
-
-        HwlComputeSurfaceAlignmentsMacroTiled(tileMode, bpp, flags, mipLevel, numSamples, pOut);
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::SanityCheckMacroTiled
-*
-*   @brief
-*       Check if macro-tiled parameters are valid
-*   @return
-*       TRUE if valid
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::SanityCheckMacroTiled(
-    ADDR_TILEINFO* pTileInfo   ///< [in] macro-tiled parameters
-    ) const
-{
-    BOOL_32 valid       = TRUE;
-    MAYBE_UNUSED UINT_32 numPipes = HwlGetPipes(pTileInfo);
-
-    switch (pTileInfo->banks)
-    {
-        case 2: //fall through
-        case 4: //fall through
-        case 8: //fall through
-        case 16:
-            break;
-        default:
-            valid = FALSE;
-            break;
-
-    }
-
-    if (valid)
-    {
-        switch (pTileInfo->bankWidth)
-        {
-            case 1: //fall through
-            case 2: //fall through
-            case 4: //fall through
-            case 8:
-                break;
-            default:
-                valid = FALSE;
-                break;
-        }
-    }
-
-    if (valid)
-    {
-        switch (pTileInfo->bankHeight)
-        {
-            case 1: //fall through
-            case 2: //fall through
-            case 4: //fall through
-            case 8:
-                break;
-            default:
-                valid = FALSE;
-                break;
-        }
-    }
-
-    if (valid)
-    {
-        switch (pTileInfo->macroAspectRatio)
-        {
-            case 1: //fall through
-            case 2: //fall through
-            case 4: //fall through
-            case 8:
-                break;
-            default:
-                valid = FALSE;
-                break;
-        }
-    }
-
-    if (valid)
-    {
-        if (pTileInfo->banks < pTileInfo->macroAspectRatio)
-        {
-            // This will generate macro tile height <= 1
-            valid = FALSE;
-        }
-    }
-
-    if (valid)
-    {
-        if (pTileInfo->tileSplitBytes > m_rowSize)
-        {
-            ADDR_WARN(0, ("tileSplitBytes is bigger than row size"));
-        }
-    }
-
-    if (valid)
-    {
-        valid = HwlSanityCheckMacroTiled(pTileInfo);
-    }
-
-    ADDR_ASSERT(valid == TRUE);
-
-    // Add this assert for guidance
-    ADDR_ASSERT(numPipes * pTileInfo->banks >= 4);
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceMipLevelTileMode
-*
-*   @brief
-*       Compute valid tile mode for surface mipmap sub-levels
-*
-*   @return
-*       Suitable tile mode
-****************************************************************************************************
-*/
-AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode(
-    AddrTileMode        baseTileMode,   ///< [in] base tile mode
-    UINT_32             bpp,            ///< [in] bits per pixels
-    UINT_32             pitch,          ///< [in] current level pitch
-    UINT_32             height,         ///< [in] current level height
-    UINT_32             numSlices,      ///< [in] current number of slices
-    UINT_32             numSamples,     ///< [in] number of samples
-    UINT_32             pitchAlign,     ///< [in] pitch alignment
-    UINT_32             heightAlign,    ///< [in] height alignment
-    ADDR_TILEINFO*      pTileInfo       ///< [in] ptr to bank structure
-    ) const
-{
-    UINT_64 bytesPerSlice;
-    (void)bytesPerSlice;
-    UINT_32 bytesPerTile;
-
-    AddrTileMode expTileMode = baseTileMode;
-    UINT_32 microTileThickness = Thickness(expTileMode);
-    UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave;
-
-    //
-    // Compute the size of a slice.
-    //
-    bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
-    bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
-
-    //
-    // Reduce tiling mode from thick to thin if the number of slices is less than the
-    // micro tile thickness.
-    //
-    if (numSlices < microTileThickness)
-    {
-        expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile);
-    }
-
-    if (bytesPerTile > pTileInfo->tileSplitBytes)
-    {
-        bytesPerTile = pTileInfo->tileSplitBytes;
-    }
-
-    UINT_32 threshold1 =
-        bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio;
-
-    UINT_32 threshold2 =
-        bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight;
-
-    //
-    // Reduce the tile mode from 2D/3D to 1D in following conditions
-    //
-    switch (expTileMode)
-    {
-        case ADDR_TM_2D_TILED_THIN1: //fall through
-        case ADDR_TM_3D_TILED_THIN1:
-        case ADDR_TM_PRT_TILED_THIN1:
-        case ADDR_TM_PRT_2D_TILED_THIN1:
-        case ADDR_TM_PRT_3D_TILED_THIN1:
-            if ((pitch < pitchAlign) ||
-                (height < heightAlign) ||
-                (interleaveSize > threshold1) ||
-                (interleaveSize > threshold2))
-            {
-                expTileMode = ADDR_TM_1D_TILED_THIN1;
-            }
-            break;
-        case ADDR_TM_2D_TILED_THICK: //fall through
-        case ADDR_TM_3D_TILED_THICK:
-        case ADDR_TM_2D_TILED_XTHICK:
-        case ADDR_TM_3D_TILED_XTHICK:
-        case ADDR_TM_PRT_TILED_THICK:
-        case ADDR_TM_PRT_2D_TILED_THICK:
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            if ((pitch < pitchAlign) ||
-                (height < heightAlign))
-            {
-                expTileMode = ADDR_TM_1D_TILED_THICK;
-            }
-            break;
-        default:
-            break;
-    }
-
-    return expTileMode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlGetAlignmentInfoMacroTiled
-*   @brief
-*       Get alignment info for giving tile mode
-*   @return
-*       TRUE if getting alignment is OK
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::HwlGetAlignmentInfoMacroTiled(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,             ///< [in] create surface info
-    UINT_32*                               pPitchAlign,     ///< [out] pitch alignment
-    UINT_32*                               pHeightAlign,    ///< [out] height alignment
-    UINT_32*                               pSizeAlign       ///< [out] size alignment
-    ) const
-{
-    BOOL_32 valid = TRUE;
-
-    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
-
-    UINT_32 numSamples = (pIn->numFrags == 0) ? pIn->numSamples : pIn->numFrags;
-
-    ADDR_ASSERT(pIn->pTileInfo);
-    ADDR_TILEINFO tileInfo = *pIn->pTileInfo;
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
-    out.pTileInfo = &tileInfo;
-
-    if (UseTileIndex(pIn->tileIndex))
-    {
-        out.tileIndex = pIn->tileIndex;
-        out.macroModeIndex = TileIndexInvalid;
-    }
-
-    HwlSetupTileInfo(pIn->tileMode,
-                     pIn->flags,
-                     pIn->bpp,
-                     pIn->width,
-                     pIn->height,
-                     numSamples,
-                     &tileInfo,
-                     &tileInfo,
-                     pIn->tileType,
-                     &out);
-
-    valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode,
-                                               pIn->bpp,
-                                               pIn->flags,
-                                               pIn->mipLevel,
-                                               numSamples,
-                                               &out);
-
-    if (valid)
-    {
-        *pPitchAlign  = out.pitchAlign;
-        *pHeightAlign = out.heightAlign;
-        *pSizeAlign   = out.baseAlign;
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlDegradeThickTileMode
-*
-*   @brief
-*       Degrades valid tile mode for thick modes if needed
-*
-*   @return
-*       Suitable tile mode
-****************************************************************************************************
-*/
-AddrTileMode EgBasedLib::HwlDegradeThickTileMode(
-    AddrTileMode        baseTileMode,   ///< [in] base tile mode
-    UINT_32             numSlices,      ///< [in] current number of slices
-    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
-    ) const
-{
-    ADDR_ASSERT(numSlices < Thickness(baseTileMode));
-    // if pBytesPerTile is NULL, this is a don't-care....
-    UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64;
-
-    AddrTileMode expTileMode = baseTileMode;
-    switch (baseTileMode)
-    {
-        case ADDR_TM_1D_TILED_THICK:
-            expTileMode = ADDR_TM_1D_TILED_THIN1;
-            bytesPerTile >>= 2;
-            break;
-        case ADDR_TM_2D_TILED_THICK:
-            expTileMode = ADDR_TM_2D_TILED_THIN1;
-            bytesPerTile >>= 2;
-            break;
-        case ADDR_TM_3D_TILED_THICK:
-            expTileMode = ADDR_TM_3D_TILED_THIN1;
-            bytesPerTile >>= 2;
-            break;
-        case ADDR_TM_2D_TILED_XTHICK:
-            if (numSlices < ThickTileThickness)
-            {
-                expTileMode = ADDR_TM_2D_TILED_THIN1;
-                bytesPerTile >>= 3;
-            }
-            else
-            {
-                expTileMode = ADDR_TM_2D_TILED_THICK;
-                bytesPerTile >>= 1;
-            }
-            break;
-        case ADDR_TM_3D_TILED_XTHICK:
-            if (numSlices < ThickTileThickness)
-            {
-                expTileMode = ADDR_TM_3D_TILED_THIN1;
-                bytesPerTile >>= 3;
-            }
-            else
-            {
-                expTileMode = ADDR_TM_3D_TILED_THICK;
-                bytesPerTile >>= 1;
-            }
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-            break;
-    }
-
-    if (pBytesPerTile != NULL)
-    {
-        *pBytesPerTile = bytesPerTile;
-    }
-
-    return expTileMode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::DispatchComputeSurfaceAddrFromCoord
-*
-*   @brief
-*       Compute surface address from given coord (x, y, slice,sample)
-*
-*   @return
-*       Address in bytes
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord(
-    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    UINT_32             x                  = pIn->x;
-    UINT_32             y                  = pIn->y;
-    UINT_32             slice              = pIn->slice;
-    UINT_32             sample             = pIn->sample;
-    UINT_32             bpp                = pIn->bpp;
-    UINT_32             pitch              = pIn->pitch;
-    UINT_32             height             = pIn->height;
-    UINT_32             numSlices          = pIn->numSlices;
-    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
-    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
-    AddrTileMode        tileMode           = pIn->tileMode;
-    AddrTileType        microTileType      = pIn->tileType;
-    BOOL_32             ignoreSE           = pIn->ignoreSE;
-    BOOL_32             isDepthSampleOrder = pIn->isDepth;
-    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
-
-    UINT_32*            pBitPosition       = &pOut->bitPosition;
-    UINT_64             addr;
-
-    // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
-    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
-    {
-        isDepthSampleOrder = TRUE;
-    }
-
-    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
-    {
-        if (numFrags != numSamples)
-        {
-            numSamples = numFrags;
-            ADDR_ASSERT(sample < numSamples);
-        }
-
-        /// @note
-        /// 128 bit/thick tiled surface doesn't support display tiling and
-        /// mipmap chain must have the same tileType, so please fill tileType correctly
-        if (IsLinear(pIn->tileMode) == FALSE)
-        {
-            if (bpp >= 128 || Thickness(tileMode) > 1)
-            {
-                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
-            }
-        }
-    }
-
-    switch (tileMode)
-    {
-        case ADDR_TM_LINEAR_GENERAL://fall through
-        case ADDR_TM_LINEAR_ALIGNED:
-            addr = ComputeSurfaceAddrFromCoordLinear(x,
-                                                     y,
-                                                     slice,
-                                                     sample,
-                                                     bpp,
-                                                     pitch,
-                                                     height,
-                                                     numSlices,
-                                                     pBitPosition);
-            break;
-        case ADDR_TM_1D_TILED_THIN1://fall through
-        case ADDR_TM_1D_TILED_THICK:
-            addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
-                                                         y,
-                                                         slice,
-                                                         sample,
-                                                         bpp,
-                                                         pitch,
-                                                         height,
-                                                         numSamples,
-                                                         tileMode,
-                                                         microTileType,
-                                                         isDepthSampleOrder,
-                                                         pBitPosition);
-            break;
-        case ADDR_TM_2D_TILED_THIN1:    //fall through
-        case ADDR_TM_2D_TILED_THICK:    //fall through
-        case ADDR_TM_3D_TILED_THIN1:    //fall through
-        case ADDR_TM_3D_TILED_THICK:    //fall through
-        case ADDR_TM_2D_TILED_XTHICK:   //fall through
-        case ADDR_TM_3D_TILED_XTHICK:   //fall through
-        case ADDR_TM_PRT_TILED_THIN1:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_TILED_THICK:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THICK://fall through
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            UINT_32 pipeSwizzle;
-            UINT_32 bankSwizzle;
-
-            if (m_configFlags.useCombinedSwizzle)
-            {
-                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
-                                       &bankSwizzle, &pipeSwizzle);
-            }
-            else
-            {
-                pipeSwizzle = pIn->pipeSwizzle;
-                bankSwizzle = pIn->bankSwizzle;
-            }
-
-            addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
-                                                         y,
-                                                         slice,
-                                                         sample,
-                                                         bpp,
-                                                         pitch,
-                                                         height,
-                                                         numSamples,
-                                                         tileMode,
-                                                         microTileType,
-                                                         ignoreSE,
-                                                         isDepthSampleOrder,
-                                                         pipeSwizzle,
-                                                         bankSwizzle,
-                                                         pTileInfo,
-                                                         pBitPosition);
-            break;
-        default:
-            addr = 0;
-            ADDR_ASSERT_ALWAYS();
-            break;
-    }
-
-    return addr;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeMacroTileEquation
-*
-*   @brief
-*       Computes the address equation in macro tile
-*   @return
-*       If equation can be computed
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::ComputeMacroTileEquation(
-    UINT_32             log2BytesPP,            ///< [in] log2 of bytes per pixel
-    AddrTileMode        tileMode,               ///< [in] tile mode
-    AddrTileType        microTileType,          ///< [in] micro tiling type
-    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
-    ADDR_EQUATION*      pEquation               ///< [out] Equation for addressing in macro tile
-    ) const
-{
-    ADDR_E_RETURNCODE retCode;
-
-    // Element equation within a tile
-    retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation);
-
-    if (retCode == ADDR_OK)
-    {
-        // Tile equesiton with signle pipe bank
-        UINT_32 numPipes              = HwlGetPipes(pTileInfo);
-        UINT_32 numPipeBits           = Log2(numPipes);
-
-        for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++)
-        {
-            pEquation->addr[pEquation->numBits].valid = 1;
-            pEquation->addr[pEquation->numBits].channel = 0;
-            pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits;
-            pEquation->numBits++;
-        }
-
-        for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++)
-        {
-            pEquation->addr[pEquation->numBits].valid = 1;
-            pEquation->addr[pEquation->numBits].channel = 1;
-            pEquation->addr[pEquation->numBits].index = i + 3;
-            pEquation->numBits++;
-        }
-
-        ADDR_EQUATION equation;
-        memset(&equation, 0, sizeof(ADDR_EQUATION));
-
-        UINT_32 thresholdX = 32;
-        UINT_32 thresholdY = 32;
-
-        if (IsPrtNoRotationTileMode(tileMode))
-        {
-            UINT_32 macroTilePitch  =
-                (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
-            UINT_32 macroTileHeight =
-                (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) /
-                pTileInfo->macroAspectRatio;
-            thresholdX = Log2(macroTilePitch);
-            thresholdY = Log2(macroTileHeight);
-        }
-
-        // Pipe equation
-        retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation);
-
-        if (retCode == ADDR_OK)
-        {
-            UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes);
-
-            if (pEquation->numBits > pipeBitStart)
-            {
-                UINT_32 numLeftShift = pEquation->numBits - pipeBitStart;
-
-                for (UINT_32 i = 0; i < numLeftShift; i++)
-                {
-                    pEquation->addr[pEquation->numBits + equation.numBits - i - 1] =
-                        pEquation->addr[pEquation->numBits - i - 1];
-                    pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] =
-                        pEquation->xor1[pEquation->numBits - i - 1];
-                    pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] =
-                        pEquation->xor2[pEquation->numBits - i - 1];
-                }
-            }
-
-            for (UINT_32 i = 0; i < equation.numBits; i++)
-            {
-                pEquation->addr[pipeBitStart + i] = equation.addr[i];
-                pEquation->xor1[pipeBitStart + i] = equation.xor1[i];
-                pEquation->xor2[pipeBitStart + i] = equation.xor2[i];
-                pEquation->numBits++;
-            }
-
-            // Bank equation
-            memset(&equation, 0, sizeof(ADDR_EQUATION));
-
-            retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY,
-                                          pTileInfo, &equation);
-
-            if (retCode == ADDR_OK)
-            {
-                UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave);
-
-                if (pEquation->numBits > bankBitStart)
-                {
-                    UINT_32 numLeftShift = pEquation->numBits - bankBitStart;
-
-                    for (UINT_32 i = 0; i < numLeftShift; i++)
-                    {
-                        pEquation->addr[pEquation->numBits + equation.numBits - i - 1] =
-                            pEquation->addr[pEquation->numBits - i - 1];
-                        pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] =
-                            pEquation->xor1[pEquation->numBits - i - 1];
-                        pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] =
-                            pEquation->xor2[pEquation->numBits - i - 1];
-                    }
-                }
-
-                for (UINT_32 i = 0; i < equation.numBits; i++)
-                {
-                    pEquation->addr[bankBitStart + i] = equation.addr[i];
-                    pEquation->xor1[bankBitStart + i] = equation.xor1[i];
-                    pEquation->xor2[bankBitStart + i] = equation.xor2[i];
-                    pEquation->numBits++;
-                }
-            }
-        }
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled
-*
-*   @brief
-*       Computes the surface address and bit position from a
-*       coordinate for 2D tilied (macro tiled)
-*   @return
-*       The byte address
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMacroTiled(
-    UINT_32             x,                      ///< [in] x coordinate
-    UINT_32             y,                      ///< [in] y coordinate
-    UINT_32             slice,                  ///< [in] slice index
-    UINT_32             sample,                 ///< [in] sample index
-    UINT_32             bpp,                    ///< [in] bits per pixel
-    UINT_32             pitch,                  ///< [in] surface pitch, in pixels
-    UINT_32             height,                 ///< [in] surface height, in pixels
-    UINT_32             numSamples,             ///< [in] number of samples
-    AddrTileMode        tileMode,               ///< [in] tile mode
-    AddrTileType        microTileType,          ///< [in] micro tiling type
-    BOOL_32             ignoreSE,               ///< [in] TRUE if shader enginers can be ignored
-    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if it depth sample ordering is used
-    UINT_32             pipeSwizzle,            ///< [in] pipe swizzle
-    UINT_32             bankSwizzle,            ///< [in] bank swizzle
-    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
-                                                ///  **All fields to be valid on entry**
-    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
-    ) const
-{
-    UINT_64 addr;
-
-    UINT_32 microTileBytes;
-    UINT_32 microTileBits;
-    UINT_32 sampleOffset;
-    UINT_32 pixelIndex;
-    UINT_32 pixelOffset;
-    UINT_32 elementOffset;
-    UINT_32 tileSplitSlice;
-    UINT_32 pipe;
-    UINT_32 bank;
-    UINT_64 sliceBytes;
-    UINT_64 sliceOffset;
-    UINT_32 macroTilePitch;
-    UINT_32 macroTileHeight;
-    UINT_32 macroTilesPerRow;
-    UINT_32 macroTilesPerSlice;
-    UINT_64 macroTileBytes;
-    UINT_32 macroTileIndexX;
-    UINT_32 macroTileIndexY;
-    UINT_64 macroTileOffset;
-    UINT_64 totalOffset;
-    UINT_64 pipeInterleaveMask;
-    UINT_64 bankInterleaveMask;
-    UINT_64 pipeInterleaveOffset;
-    UINT_32 bankInterleaveOffset;
-    UINT_64 offset;
-    UINT_32 tileRowIndex;
-    UINT_32 tileColumnIndex;
-    UINT_32 tileIndex;
-    UINT_32 tileOffset;
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    //
-    // Compute the number of group, pipe, and bank bits.
-    //
-    UINT_32 numPipes              = HwlGetPipes(pTileInfo);
-    UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes);
-    UINT_32 numPipeBits           = Log2(numPipes);
-    UINT_32 numBankInterleaveBits = Log2(m_bankInterleave);
-    UINT_32 numBankBits           = Log2(pTileInfo->banks);
-
-    //
-    // Compute the micro tile size.
-    //
-    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
-
-    microTileBytes = microTileBits / 8;
-    //
-    // Compute the pixel index within the micro tile.
-    //
-    pixelIndex = ComputePixelIndexWithinMicroTile(x,
-                                                  y,
-                                                  slice,
-                                                  bpp,
-                                                  tileMode,
-                                                  microTileType);
-
-    //
-    // Compute the sample offset and pixel offset.
-    //
-    if (isDepthSampleOrder)
-    {
-        //
-        // For depth surfaces, samples are stored contiguously for each element, so the sample
-        // offset is the sample number times the element size.
-        //
-        sampleOffset = sample * bpp;
-        pixelOffset  = pixelIndex * bpp * numSamples;
-    }
-    else
-    {
-        //
-        // For color surfaces, all elements for a particular sample are stored contiguously, so
-        // the sample offset is the sample number times the micro tile size divided yBit the number
-        // of samples.
-        //
-        sampleOffset = sample * (microTileBits / numSamples);
-        pixelOffset  = pixelIndex * bpp;
-    }
-
-    //
-    // Compute the element offset.
-    //
-    elementOffset = pixelOffset + sampleOffset;
-
-    *pBitPosition = static_cast<UINT_32>(elementOffset % 8);
-
-    elementOffset /= 8; //bit-to-byte
-
-    //
-    // Determine if tiles need to be split across slices.
-    //
-    // If the size of the micro tile is larger than the tile split size, then the tile will be
-    // split across multiple slices.
-    //
-    UINT_32 slicesPerTile = 1;
-
-    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
-    {   //don't support for thick mode
-
-        //
-        // Compute the number of slices per tile.
-        //
-        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
-
-        //
-        // Compute the tile split slice number for use in rotating the bank.
-        //
-        tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes;
-
-        //
-        // Adjust the element offset to account for the portion of the tile that is being moved to
-        // a new slice..
-        //
-        elementOffset %= pTileInfo->tileSplitBytes;
-
-        //
-        // Adjust the microTileBytes size to tileSplitBytes size since
-        // a new slice..
-        //
-        microTileBytes = pTileInfo->tileSplitBytes;
-    }
-    else
-    {
-        tileSplitSlice = 0;
-    }
-
-    //
-    // Compute macro tile pitch and height.
-    //
-    macroTilePitch  =
-        (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
-    macroTileHeight =
-        (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio;
-
-    //
-    // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually
-    //
-    macroTileBytes =
-        static_cast<UINT_64>(microTileBytes) *
-        (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) /
-        (numPipes * pTileInfo->banks);
-
-    //
-    // Compute the number of macro tiles per row.
-    //
-    macroTilesPerRow = pitch / macroTilePitch;
-
-    //
-    // Compute the offset to the macro tile containing the specified coordinate.
-    //
-    macroTileIndexX = x / macroTilePitch;
-    macroTileIndexY = y / macroTileHeight;
-    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
-
-    //
-    // Compute the number of macro tiles per slice.
-    //
-    macroTilesPerSlice = macroTilesPerRow  * (height / macroTileHeight);
-
-    //
-    // Compute the slice size.
-    //
-    sliceBytes = macroTilesPerSlice * macroTileBytes;
-
-    //
-    // Compute the slice offset.
-    //
-    sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness));
-
-    //
-    // Compute tile offest
-    //
-    tileRowIndex    = (y / MicroTileHeight) % pTileInfo->bankHeight;
-    tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth;
-    tileIndex        = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex;
-    tileOffset       = tileIndex * microTileBytes;
-
-    //
-    // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting
-    // for the pipe and bank bits in the middle of the address.
-    //
-    totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset;
-
-    //
-    // Get the pipe and bank.
-    //
-
-    // when the tileMode is PRT type, then adjust x and y coordinates
-    if (IsPrtNoRotationTileMode(tileMode))
-    {
-        x = x % macroTilePitch;
-        y = y % macroTileHeight;
-    }
-
-    pipe = ComputePipeFromCoord(x,
-                                y,
-                                slice,
-                                tileMode,
-                                pipeSwizzle,
-                                ignoreSE,
-                                pTileInfo);
-
-    bank = ComputeBankFromCoord(x,
-                                y,
-                                slice,
-                                tileMode,
-                                bankSwizzle,
-                                tileSplitSlice,
-                                pTileInfo);
-
-
-    //
-    // Split the offset to put some bits below the pipe+bank bits and some above.
-    //
-    pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1;
-    bankInterleaveMask = (1 << numBankInterleaveBits) - 1;
-    pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
-    bankInterleaveOffset = static_cast<UINT_32>((totalOffset >> numPipeInterleaveBits) &
-                                                bankInterleaveMask);
-    offset               =  totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits);
-
-    //
-    // Assemble the address from its components.
-    //
-    addr  = pipeInterleaveOffset;
-    // This is to remove /analyze warnings
-    UINT_32 pipeBits            = pipe                 <<  numPipeInterleaveBits;
-    UINT_32 bankInterleaveBits  = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits);
-    UINT_32 bankBits            = bank                 << (numPipeInterleaveBits + numPipeBits +
-                                                           numBankInterleaveBits);
-    UINT_64 offsetBits          = offset               << (numPipeInterleaveBits + numPipeBits +
-                                                           numBankInterleaveBits + numBankBits);
-
-    addr |= pipeBits;
-    addr |= bankInterleaveBits;
-    addr |= bankBits;
-    addr |= offsetBits;
-
-    return addr;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled
-*
-*   @brief
-*       Computes the surface address and bit position from a coordinate for 1D tilied
-*       (micro tiled)
-*   @return
-*       The byte address
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled(
-    UINT_32             x,                      ///< [in] x coordinate
-    UINT_32             y,                      ///< [in] y coordinate
-    UINT_32             slice,                  ///< [in] slice index
-    UINT_32             sample,                 ///< [in] sample index
-    UINT_32             bpp,                    ///< [in] bits per pixel
-    UINT_32             pitch,                  ///< [in] pitch, in pixels
-    UINT_32             height,                 ///< [in] height, in pixels
-    UINT_32             numSamples,             ///< [in] number of samples
-    AddrTileMode        tileMode,               ///< [in] tile mode
-    AddrTileType        microTileType,          ///< [in] micro tiling type
-    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if depth sample ordering is used
-    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
-    ) const
-{
-    UINT_64 addr = 0;
-
-    UINT_32 microTileBytes;
-    UINT_64 sliceBytes;
-    UINT_32 microTilesPerRow;
-    UINT_32 microTileIndexX;
-    UINT_32 microTileIndexY;
-    UINT_32 microTileIndexZ;
-    UINT_64 sliceOffset;
-    UINT_64 microTileOffset;
-    UINT_32 sampleOffset;
-    UINT_32 pixelIndex;
-    UINT_32 pixelOffset;
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    //
-    // Compute the micro tile size.
-    //
-    microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples);
-
-    //
-    // Compute the slice size.
-    //
-    sliceBytes =
-        BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples);
-
-    //
-    // Compute the number of micro tiles per row.
-    //
-    microTilesPerRow = pitch / MicroTileWidth;
-
-    //
-    // Compute the micro tile index.
-    //
-    microTileIndexX = x     / MicroTileWidth;
-    microTileIndexY = y     / MicroTileHeight;
-    microTileIndexZ = slice / microTileThickness;
-
-    //
-    // Compute the slice offset.
-    //
-    sliceOffset = static_cast<UINT_64>(microTileIndexZ) * sliceBytes;
-
-    //
-    // Compute the offset to the micro tile containing the specified coordinate.
-    //
-    microTileOffset = (static_cast<UINT_64>(microTileIndexY) * microTilesPerRow + microTileIndexX) *
-        microTileBytes;
-
-    //
-    // Compute the pixel index within the micro tile.
-    //
-    pixelIndex = ComputePixelIndexWithinMicroTile(x,
-                                                  y,
-                                                  slice,
-                                                  bpp,
-                                                  tileMode,
-                                                  microTileType);
-
-    // Compute the sample offset.
-    //
-    if (isDepthSampleOrder)
-    {
-        //
-        // For depth surfaces, samples are stored contiguously for each element, so the sample
-        // offset is the sample number times the element size.
-        //
-        sampleOffset = sample * bpp;
-        pixelOffset = pixelIndex * bpp * numSamples;
-    }
-    else
-    {
-        //
-        // For color surfaces, all elements for a particular sample are stored contiguously, so
-        // the sample offset is the sample number times the micro tile size divided yBit the number
-        // of samples.
-        //
-        sampleOffset = sample * (microTileBytes*8 / numSamples);
-        pixelOffset = pixelIndex * bpp;
-    }
-
-    //
-    // Compute the bit position of the pixel.  Each element is stored with one bit per sample.
-    //
-
-    UINT_32 elemOffset = sampleOffset + pixelOffset;
-
-    *pBitPosition = elemOffset % 8;
-    elemOffset /= 8;
-
-    //
-    // Combine the slice offset, micro tile offset, sample offset, and pixel offsets.
-    //
-    addr = sliceOffset + microTileOffset + elemOffset;
-
-    return addr;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputePixelCoordFromOffset
-*
-*   @brief
-*       Compute pixel coordinate from offset inside a micro tile
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID EgBasedLib::HwlComputePixelCoordFromOffset(
-    UINT_32         offset,             ///< [in] offset inside micro tile in bits
-    UINT_32         bpp,                ///< [in] bits per pixel
-    UINT_32         numSamples,         ///< [in] number of samples
-    AddrTileMode    tileMode,           ///< [in] tile mode
-    UINT_32         tileBase,           ///< [in] base offset within a tile
-    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
-    UINT_32*        pX,                 ///< [out] x coordinate
-    UINT_32*        pY,                 ///< [out] y coordinate
-    UINT_32*        pSlice,             ///< [out] slice index
-    UINT_32*        pSample,            ///< [out] sample index
-    AddrTileType    microTileType,      ///< [in] micro tiling type
-    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if depth sample order in microtile is used
-    ) const
-{
-    UINT_32 x = 0;
-    UINT_32 y = 0;
-    UINT_32 z = 0;
-    UINT_32 thickness = Thickness(tileMode);
-
-    // For planar surface, we adjust offset acoording to tile base
-    if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder)
-    {
-        offset -= tileBase;
-
-        ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE ||
-                    microTileType == ADDR_DEPTH_SAMPLE_ORDER);
-
-        bpp = compBits;
-    }
-
-    UINT_32 sampleTileBits;
-    UINT_32 samplePixelBits;
-    UINT_32 pixelIndex;
-
-    if (isDepthSampleOrder)
-    {
-        samplePixelBits = bpp * numSamples;
-        pixelIndex = offset / samplePixelBits;
-        *pSample = (offset % samplePixelBits) / bpp;
-    }
-    else
-    {
-        sampleTileBits = MicroTilePixels * bpp * thickness;
-        *pSample = offset / sampleTileBits;
-        pixelIndex = (offset % sampleTileBits) / bpp;
-    }
-
-    if (microTileType != ADDR_THICK)
-    {
-        if (microTileType == ADDR_DISPLAYABLE) // displayable
-        {
-            switch (bpp)
-            {
-                case 8:
-                    x = pixelIndex & 0x7;
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
-                    break;
-                case 16:
-                    x = pixelIndex & 0x7;
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
-                    break;
-                case 32:
-                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
-                    break;
-                case 64:
-                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
-                    break;
-                case 128:
-                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1));
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0));
-                    break;
-                default:
-                    break;
-            }
-        }
-        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
-        {
-            x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
-            y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
-        }
-        else if (microTileType == ADDR_ROTATED)
-        {
-            /*
-                8-Bit Elements
-                element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] }
-
-                16-Bit Elements
-                element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] }
-
-                32-Bit Elements
-                element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] }
-
-                64-Bit Elements
-                element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] }
-            */
-            switch(bpp)
-            {
-                case 8:
-                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
-                    y = pixelIndex & 0x7;
-                    break;
-                case 16:
-                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
-                    y = pixelIndex & 0x7;
-                    break;
-                case 32:
-                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
-                    y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
-                    break;
-                case 64:
-                    x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
-                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    break;
-            }
-        }
-
-        if (thickness > 1) // thick
-        {
-            z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6));
-        }
-    }
-    else
-    {
-        ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1));
-        /*
-            8-Bit Elements and 16-Bit Elements
-            element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] }
-
-            32-Bit Elements
-            element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] }
-
-            64-Bit Elements and 128-Bit Elements
-            element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] }
-
-            The equation to compute the element index for the extra thick tile:
-            element_index[8] = z[2]
-        */
-        switch (bpp)
-        {
-            case 8:
-            case 16: // fall-through
-                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
-                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
-                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4));
-                break;
-            case 32:
-                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
-                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
-                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3));
-                break;
-            case 64:
-            case 128: // fall-through
-                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0));
-                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
-                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2));
-                break;
-            default:
-                ADDR_ASSERT_ALWAYS();
-                break;
-        }
-
-        if (thickness == 8)
-        {
-            z += Bits2Number(3,_BIT(pixelIndex,8),0,0);
-        }
-    }
-
-    *pX = x;
-    *pY = y;
-    *pSlice += z;
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::DispatchComputeSurfaceCoordFromAddrDispatch
-*
-*   @brief
-*       Compute (x,y,slice,sample) coordinates from surface address
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID EgBasedLib::DispatchComputeSurfaceCoordFromAddr(
-    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    UINT_64             addr               = pIn->addr;
-    UINT_32             bitPosition        = pIn->bitPosition;
-    UINT_32             bpp                = pIn->bpp;
-    UINT_32             pitch              = pIn->pitch;
-    UINT_32             height             = pIn->height;
-    UINT_32             numSlices          = pIn->numSlices;
-    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
-    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
-    AddrTileMode        tileMode           = pIn->tileMode;
-    UINT_32             tileBase           = pIn->tileBase;
-    UINT_32             compBits           = pIn->compBits;
-    AddrTileType        microTileType      = pIn->tileType;
-    BOOL_32             ignoreSE           = pIn->ignoreSE;
-    BOOL_32             isDepthSampleOrder = pIn->isDepth;
-    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
-
-    UINT_32*            pX                 = &pOut->x;
-    UINT_32*            pY                 = &pOut->y;
-    UINT_32*            pSlice             = &pOut->slice;
-    UINT_32*            pSample            = &pOut->sample;
-
-    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
-    {
-        isDepthSampleOrder = TRUE;
-    }
-
-    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
-    {
-        if (numFrags != numSamples)
-        {
-            numSamples = numFrags;
-        }
-
-        /// @note
-        /// 128 bit/thick tiled surface doesn't support display tiling and
-        /// mipmap chain must have the same tileType, so please fill tileType correctly
-        if (IsLinear(pIn->tileMode) == FALSE)
-        {
-            if (bpp >= 128 || Thickness(tileMode) > 1)
-            {
-                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
-            }
-        }
-    }
-
-    switch (tileMode)
-    {
-        case ADDR_TM_LINEAR_GENERAL://fall through
-        case ADDR_TM_LINEAR_ALIGNED:
-            ComputeSurfaceCoordFromAddrLinear(addr,
-                                              bitPosition,
-                                              bpp,
-                                              pitch,
-                                              height,
-                                              numSlices,
-                                              pX,
-                                              pY,
-                                              pSlice,
-                                              pSample);
-            break;
-        case ADDR_TM_1D_TILED_THIN1://fall through
-        case ADDR_TM_1D_TILED_THICK:
-            ComputeSurfaceCoordFromAddrMicroTiled(addr,
-                                                  bitPosition,
-                                                  bpp,
-                                                  pitch,
-                                                  height,
-                                                  numSamples,
-                                                  tileMode,
-                                                  tileBase,
-                                                  compBits,
-                                                  pX,
-                                                  pY,
-                                                  pSlice,
-                                                  pSample,
-                                                  microTileType,
-                                                  isDepthSampleOrder);
-            break;
-        case ADDR_TM_2D_TILED_THIN1:    //fall through
-        case ADDR_TM_2D_TILED_THICK:    //fall through
-        case ADDR_TM_3D_TILED_THIN1:    //fall through
-        case ADDR_TM_3D_TILED_THICK:    //fall through
-        case ADDR_TM_2D_TILED_XTHICK:   //fall through
-        case ADDR_TM_3D_TILED_XTHICK:   //fall through
-        case ADDR_TM_PRT_TILED_THIN1:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
-        case ADDR_TM_PRT_TILED_THICK:   //fall through
-        case ADDR_TM_PRT_2D_TILED_THICK://fall through
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            UINT_32 pipeSwizzle;
-            UINT_32 bankSwizzle;
-
-            if (m_configFlags.useCombinedSwizzle)
-            {
-                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
-                                       &bankSwizzle, &pipeSwizzle);
-            }
-            else
-            {
-                pipeSwizzle = pIn->pipeSwizzle;
-                bankSwizzle = pIn->bankSwizzle;
-            }
-
-            ComputeSurfaceCoordFromAddrMacroTiled(addr,
-                                                  bitPosition,
-                                                  bpp,
-                                                  pitch,
-                                                  height,
-                                                  numSamples,
-                                                  tileMode,
-                                                  tileBase,
-                                                  compBits,
-                                                  microTileType,
-                                                  ignoreSE,
-                                                  isDepthSampleOrder,
-                                                  pipeSwizzle,
-                                                  bankSwizzle,
-                                                  pTileInfo,
-                                                  pX,
-                                                  pY,
-                                                  pSlice,
-                                                  pSample);
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-    }
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled
-*
-*   @brief
-*       Compute surface coordinates from address for macro tiled surface
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled(
-    UINT_64             addr,               ///< [in] byte address
-    UINT_32             bitPosition,        ///< [in] bit position
-    UINT_32             bpp,                ///< [in] bits per pixel
-    UINT_32             pitch,              ///< [in] pitch in pixels
-    UINT_32             height,             ///< [in] height in pixels
-    UINT_32             numSamples,         ///< [in] number of samples
-    AddrTileMode        tileMode,           ///< [in] tile mode
-    UINT_32             tileBase,           ///< [in] tile base offset
-    UINT_32             compBits,           ///< [in] component bits (for planar surface)
-    AddrTileType        microTileType,      ///< [in] micro tiling type
-    BOOL_32             ignoreSE,           ///< [in] TRUE if shader engines can be ignored
-    BOOL_32             isDepthSampleOrder, ///< [in] TRUE if depth sample order is used
-    UINT_32             pipeSwizzle,        ///< [in] pipe swizzle
-    UINT_32             bankSwizzle,        ///< [in] bank swizzle
-    ADDR_TILEINFO*      pTileInfo,          ///< [in] bank structure.
-                                            ///  **All fields to be valid on entry**
-    UINT_32*            pX,                 ///< [out] X coord
-    UINT_32*            pY,                 ///< [out] Y coord
-    UINT_32*            pSlice,             ///< [out] slice index
-    UINT_32*            pSample             ///< [out] sample index
-    ) const
-{
-    UINT_32 mx;
-    UINT_32 my;
-    UINT_64 tileBits;
-    UINT_64 macroTileBits;
-    UINT_32 slices;
-    UINT_32 tileSlices;
-    UINT_64 elementOffset;
-    UINT_64 macroTileIndex;
-    UINT_32 tileIndex;
-    UINT_64 totalOffset;
-
-
-    UINT_32 bank;
-    UINT_32 pipe;
-    UINT_32 groupBits = m_pipeInterleaveBytes << 3;
-    UINT_32 pipes = HwlGetPipes(pTileInfo);
-    UINT_32 banks = pTileInfo->banks;
-
-    UINT_32 bankInterleave = m_bankInterleave;
-
-    UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition;
-
-    //
-    // remove bits for bank and pipe
-    //
-    totalOffset = (addrBits % groupBits) +
-        (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) +
-        (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave;
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples;
-
-    UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits);
-    //
-    // Determine if tiles need to be split across slices.
-    //
-    // If the size of the micro tile is larger than the tile split size, then the tile will be
-    // split across multiple slices.
-    //
-    UINT_32 slicesPerTile = 1; //_State->TileSlices
-
-    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
-    {   //don't support for thick mode
-
-        //
-        // Compute the number of slices per tile.
-        //
-        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
-    }
-
-    tileBits = microTileBits / slicesPerTile; // micro tile bits
-
-    // in micro tiles because not MicroTileWidth timed.
-    UINT_32 macroWidth  = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio;
-    // in micro tiles as well
-    UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio;
-
-    UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth;
-
-    macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes);
-
-    macroTileIndex = totalOffset / macroTileBits;
-
-    // pitchMacros * height / heightMacros;  macroTilesPerSlice == _State->SliceMacros
-    UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height /
-        (macroHeight * MicroTileWidth);
-
-    slices = static_cast<UINT_32>(macroTileIndex / macroTilesPerSlice);
-
-    *pSlice = static_cast<UINT_32>(slices / slicesPerTile * microTileThickness);
-
-    //
-    // calculate element offset and x[2:0], y[2:0], z[1:0] for thick
-    //
-    tileSlices = slices % slicesPerTile;
-
-    elementOffset  = tileSlices * tileBits;
-    elementOffset += totalOffset % tileBits;
-
-    UINT_32 coordZ = 0;
-
-    HwlComputePixelCoordFromOffset(static_cast<UINT_32>(elementOffset),
-                                   bpp,
-                                   numSamples,
-                                   tileMode,
-                                   tileBase,
-                                   compBits,
-                                   pX,
-                                   pY,
-                                   &coordZ,
-                                   pSample,
-                                   microTileType,
-                                   isDepthSampleOrder);
-
-    macroTileIndex = macroTileIndex % macroTilesPerSlice;
-    *pY += static_cast<UINT_32>(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight);
-    *pX += static_cast<UINT_32>(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth);
-
-    *pSlice += coordZ;
-
-    tileIndex = static_cast<UINT_32>((totalOffset % macroTileBits) / tileBits);
-
-    my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight;
-    mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth;
-
-    *pY += my;
-    *pX += mx;
-
-    bank = ComputeBankFromAddr(addr, banks, pipes);
-    pipe = ComputePipeFromAddr(addr, pipes);
-
-    HwlComputeSurfaceCoord2DFromBankPipe(tileMode,
-                                         pX,
-                                         pY,
-                                         *pSlice,
-                                         bank,
-                                         pipe,
-                                         bankSwizzle,
-                                         pipeSwizzle,
-                                         tileSlices,
-                                         ignoreSE,
-                                         pTileInfo);
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSurfaceCoord2DFromBankPipe
-*
-*   @brief
-*       Compute surface x,y coordinates from bank/pipe info
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID EgBasedLib::ComputeSurfaceCoord2DFromBankPipe(
-    AddrTileMode        tileMode,   ///< [in] tile mode
-    UINT_32             x,          ///< [in] x coordinate
-    UINT_32             y,          ///< [in] y coordinate
-    UINT_32             slice,      ///< [in] slice index
-    UINT_32             bank,       ///< [in] bank number
-    UINT_32             pipe,       ///< [in] pipe number
-    UINT_32             bankSwizzle,///< [in] bank swizzle
-    UINT_32             pipeSwizzle,///< [in] pipe swizzle
-    UINT_32             tileSlices, ///< [in] slices in a micro tile
-    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
-    CoordFromBankPipe*  pOutput     ///< [out] pointer to extracted x/y bits
-    ) const
-{
-    UINT_32 yBit3 = 0;
-    UINT_32 yBit4 = 0;
-    UINT_32 yBit5 = 0;
-    UINT_32 yBit6 = 0;
-
-    UINT_32 xBit3 = 0;
-    UINT_32 xBit4 = 0;
-    UINT_32 xBit5 = 0;
-
-    UINT_32 tileSplitRotation;
-
-    UINT_32 numPipes = HwlGetPipes(pTileInfo);
-
-    UINT_32 bankRotation = ComputeBankRotation(tileMode,
-                                               pTileInfo->banks, numPipes);
-
-    UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes);
-
-    UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes);
-    UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight);
-
-    //calculate the bank and pipe before rotation and swizzle
-
-    switch (tileMode)
-    {
-        case ADDR_TM_2D_TILED_THIN1:  //fall through
-        case ADDR_TM_2D_TILED_THICK:  //fall through
-        case ADDR_TM_2D_TILED_XTHICK: //fall through
-        case ADDR_TM_3D_TILED_THIN1:  //fall through
-        case ADDR_TM_3D_TILED_THICK:  //fall through
-        case ADDR_TM_3D_TILED_XTHICK:
-            tileSplitRotation = ((pTileInfo->banks / 2) + 1);
-            break;
-        default:
-            tileSplitRotation =  0;
-            break;
-    }
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    bank ^= tileSplitRotation * tileSlices;
-    if (pipeRotation == 0)
-    {
-        bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle;
-        bank %= pTileInfo->banks;
-        pipe ^= pipeSwizzle;
-    }
-    else
-    {
-        bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle;
-        bank %= pTileInfo->banks;
-        pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle;
-    }
-
-    if (pTileInfo->macroAspectRatio == 1)
-    {
-        switch (pTileInfo->banks)
-        {
-            case 2:
-                yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0);
-                break;
-            case 4:
-                yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0);
-                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
-                break;
-            case 8:
-                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
-                yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0);
-                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5;
-                break;
-            case 16:
-                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3);
-                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2);
-                yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0);
-                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6;
-                break;
-            default:
-                break;
-        }
-
-    }
-    else if (pTileInfo->macroAspectRatio == 2)
-    {
-        switch (pTileInfo->banks)
-        {
-            case 2: //xBit3 = yBit3^b0
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0);
-                break;
-            case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
-                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
-                break;
-            case 8: //xBit4, xBit5, yBit5 are known
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
-                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
-                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2);
-                break;
-            case 16://x4,x5,x6,y6 are known
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0
-                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
-                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2
-                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1
-                break;
-            default:
-                break;
-        }
-    }
-    else if (pTileInfo->macroAspectRatio == 4)
-    {
-        switch (pTileInfo->banks)
-        {
-            case 4: //yBit3, yBit4
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
-                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0);
-                break;
-            case 8: //xBit5, yBit4, yBit5
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
-                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
-                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^  _BIT(yBit,2);
-                break;
-            case 16: //xBit5, xBit6, yBit5, yBit6
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6
-                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6;
-                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6;
-                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5;
-                break;
-            default:
-                break;
-        }
-    }
-    else if (pTileInfo->macroAspectRatio == 8)
-    {
-        switch (pTileInfo->banks)
-        {
-            case 8: //yBit3, yBit4, yBit5
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5;
-                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5;
-                xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0);
-                break;
-            case 16: //xBit6, yBit4, yBit5, yBit6
-                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0
-                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1
-                xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2
-                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
-                break;
-            default:
-                break;
-        }
-    }
-
-    pOutput->xBits = xBit;
-    pOutput->yBits = yBit;
-
-    pOutput->xBit3 = xBit3;
-    pOutput->xBit4 = xBit4;
-    pOutput->xBit5 = xBit5;
-    pOutput->yBit3 = yBit3;
-    pOutput->yBit4 = yBit4;
-    pOutput->yBit5 = yBit5;
-    pOutput->yBit6 = yBit6;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlExtractBankPipeSwizzle
-*   @brief
-*       Entry of EgBasedLib ExtractBankPipeSwizzle
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlExtractBankPipeSwizzle(
-    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,   ///< [in] input structure
-    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut   ///< [out] output structure
-    ) const
-{
-    ExtractBankPipeSwizzle(pIn->base256b,
-                           pIn->pTileInfo,
-                           &pOut->bankSwizzle,
-                           &pOut->pipeSwizzle);
-
-    return ADDR_OK;
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlCombineBankPipeSwizzle
-*   @brief
-*       Combine bank/pipe swizzle
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlCombineBankPipeSwizzle(
-    UINT_32         bankSwizzle,    ///< [in] bank swizzle
-    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
-    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
-    UINT_64         baseAddr,       ///< [in] base address
-    UINT_32*        pTileSwizzle    ///< [out] combined swizzle
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    if (pTileSwizzle)
-    {
-        *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo);
-    }
-    else
-    {
-        retCode = ADDR_INVALIDPARAMS;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeBaseSwizzle
-*   @brief
-*       Compute base swizzle
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle(
-    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
-    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut
-    ) const
-{
-    UINT_32 bankSwizzle = 0;
-    UINT_32 pipeSwizzle = 0;
-    ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
-
-    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
-    ADDR_ASSERT(pIn->pTileInfo);
-
-    /// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
-    static const UINT_8 bankRotationArray[4][16] = {
-        { 0, 0,  0, 0,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_2_BANK
-        { 0, 1,  2, 3,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_4_BANK
-        { 0, 3,  6, 1,  4, 7,  2, 5, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_8_BANK
-        { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
-    };
-
-    UINT_32 pipes = HwlGetPipes(pTileInfo);
-    (void)pipes;
-    UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
-    UINT_32 hwNumBanks;
-
-    // Uses less bank swizzle bits
-    if (pIn->option.reduceBankBit && banks > 2)
-    {
-        banks >>= 1;
-    }
-
-    switch (banks)
-    {
-        case 2:
-            hwNumBanks = 0;
-            break;
-        case 4:
-            hwNumBanks = 1;
-            break;
-        case 8:
-            hwNumBanks = 2;
-            break;
-        case 16:
-            hwNumBanks = 3;
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-            hwNumBanks = 0;
-            break;
-    }
-
-    if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR)
-    {
-        bankSwizzle = pIn->surfIndex & (banks - 1);
-    }
-    else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT)
-    {
-        bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)];
-    }
-
-    if (IsMacro3dTiled(pIn->tileMode))
-    {
-        pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1);
-    }
-
-    return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle);
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ExtractBankPipeSwizzle
-*   @brief
-*       Extract bank/pipe swizzle from base256b
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID EgBasedLib::ExtractBankPipeSwizzle(
-    UINT_32         base256b,       ///< [in] input base256b register value
-    ADDR_TILEINFO*  pTileInfo,      ///< [in] 2D tile parameters. Client must provide all data
-    UINT_32*        pBankSwizzle,   ///< [out] bank swizzle
-    UINT_32*        pPipeSwizzle    ///< [out] pipe swizzle
-    ) const
-{
-    UINT_32 bankSwizzle = 0;
-    UINT_32 pipeSwizzle = 0;
-
-    if (base256b != 0)
-    {
-        UINT_32 numPipes        = HwlGetPipes(pTileInfo);
-        UINT_32 bankBits        = QLog2(pTileInfo->banks);
-        UINT_32 pipeBits        = QLog2(numPipes);
-        UINT_32 groupBytes      = m_pipeInterleaveBytes;
-        UINT_32 bankInterleave  = m_bankInterleave;
-
-        pipeSwizzle =
-            (base256b / (groupBytes >> 8)) & ((1<<pipeBits)-1);
-
-        bankSwizzle =
-            (base256b / (groupBytes >> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1);
-    }
-
-    *pPipeSwizzle = pipeSwizzle;
-    *pBankSwizzle = bankSwizzle;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::GetBankPipeSwizzle
-*   @brief
-*       Combine bank/pipe swizzle
-*   @return
-*       Base256b bits (only filled bank/pipe bits)
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::GetBankPipeSwizzle(
-    UINT_32         bankSwizzle,    ///< [in] bank swizzle
-    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
-    UINT_64         baseAddr,       ///< [in] base address
-    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
-    ) const
-{
-    UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo));
-    UINT_32 bankInterleaveBits = QLog2(m_bankInterleave);
-    UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
-
-    baseAddr ^= tileSwizzle * m_pipeInterleaveBytes;
-    baseAddr >>= 8;
-
-    return static_cast<UINT_32>(baseAddr);
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeSliceTileSwizzle
-*   @brief
-*       Compute cubemap/3d texture faces/slices tile swizzle
-*   @return
-*       Tile swizzle
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeSliceTileSwizzle(
-    AddrTileMode        tileMode,       ///< [in] Tile mode
-    UINT_32             baseSwizzle,    ///< [in] Base swizzle
-    UINT_32             slice,          ///< [in] Slice index, Cubemap face index, 0 means +X
-    UINT_64             baseAddr,       ///< [in] Base address
-    ADDR_TILEINFO* pTileInfo       ///< [in] Bank structure
-    ) const
-{
-    UINT_32 tileSwizzle = 0;
-
-    if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode
-    {
-        UINT_32 firstSlice = slice / Thickness(tileMode);
-
-        UINT_32 numPipes = HwlGetPipes(pTileInfo);
-        UINT_32 numBanks = pTileInfo->banks;
-
-        UINT_32 pipeRotation;
-        UINT_32 bankRotation;
-
-        UINT_32 bankSwizzle = 0;
-        UINT_32 pipeSwizzle = 0;
-
-        pipeRotation = ComputePipeRotation(tileMode, numPipes);
-        bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes);
-
-        if (baseSwizzle != 0)
-        {
-            ExtractBankPipeSwizzle(baseSwizzle,
-                                   pTileInfo,
-                                   &bankSwizzle,
-                                   &pipeSwizzle);
-        }
-
-        if (pipeRotation == 0) //2D mode
-        {
-            bankSwizzle += firstSlice * bankRotation;
-            bankSwizzle %= numBanks;
-        }
-        else //3D mode
-        {
-            pipeSwizzle += firstSlice * pipeRotation;
-            pipeSwizzle %= numPipes;
-            bankSwizzle += firstSlice * bankRotation / numPipes;
-            bankSwizzle %= numBanks;
-        }
-
-        tileSwizzle = GetBankPipeSwizzle(bankSwizzle,
-                                         pipeSwizzle,
-                                         baseAddr,
-                                         pTileInfo);
-    }
-
-    return tileSwizzle;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeQbStereoRightSwizzle
-*
-*   @brief
-*       Compute right eye swizzle
-*   @return
-*       swizzle
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::HwlComputeQbStereoRightSwizzle(
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo  ///< [in] Surface info, must be valid
-    ) const
-{
-    UINT_32 bankBits    = 0;
-    UINT_32 swizzle     = 0;
-
-    // The assumption is default swizzle for left eye is 0
-    if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo)
-    {
-        bankBits = ComputeBankFromCoord(0, pInfo->height, 0,
-                                        pInfo->tileMode, 0, 0, pInfo->pTileInfo);
-
-        if (bankBits)
-        {
-            HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle);
-        }
-    }
-
-    return swizzle;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeBankFromCoord
-*
-*   @brief
-*       Compute bank number from coordinates
-*   @return
-*       Bank number
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeBankFromCoord(
-    UINT_32         x,              ///< [in] x coordinate
-    UINT_32         y,              ///< [in] y coordinate
-    UINT_32         slice,          ///< [in] slice index
-    AddrTileMode    tileMode,       ///< [in] tile mode
-    UINT_32         bankSwizzle,    ///< [in] bank swizzle
-    UINT_32         tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the
-                                    ///  tile split size, then the pixel will be moved to a separate
-                                    ///  slice. This value equals pixelOffset / tileSplitBytes
-                                    ///  in this case. Otherwise this is 0.
-    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
-    ) const
-{
-    UINT_32 pipes = HwlGetPipes(pTileInfo);
-    UINT_32 bankBit0 = 0;
-    UINT_32 bankBit1 = 0;
-    UINT_32 bankBit2 = 0;
-    UINT_32 bankBit3 = 0;
-    UINT_32 sliceRotation;
-    UINT_32 tileSplitRotation;
-    UINT_32 bank;
-    UINT_32 numBanks    = pTileInfo->banks;
-    UINT_32 bankWidth   = pTileInfo->bankWidth;
-    UINT_32 bankHeight  = pTileInfo->bankHeight;
-
-    UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes);
-    UINT_32 ty = y / MicroTileHeight / bankHeight;
-
-    UINT_32 x3 = _BIT(tx,0);
-    UINT_32 x4 = _BIT(tx,1);
-    UINT_32 x5 = _BIT(tx,2);
-    UINT_32 x6 = _BIT(tx,3);
-    UINT_32 y3 = _BIT(ty,0);
-    UINT_32 y4 = _BIT(ty,1);
-    UINT_32 y5 = _BIT(ty,2);
-    UINT_32 y6 = _BIT(ty,3);
-
-    switch (numBanks)
-    {
-        case 16:
-            bankBit0 = x3 ^ y6;
-            bankBit1 = x4 ^ y5 ^ y6;
-            bankBit2 = x5 ^ y4;
-            bankBit3 = x6 ^ y3;
-            break;
-        case 8:
-            bankBit0 = x3 ^ y5;
-            bankBit1 = x4 ^ y4 ^ y5;
-            bankBit2 = x5 ^ y3;
-            break;
-        case 4:
-            bankBit0 = x3 ^ y4;
-            bankBit1 = x4 ^ y3;
-            break;
-        case 2:
-            bankBit0 = x3 ^ y3;
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-            break;
-    }
-
-    bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3);
-
-    //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0);
-
-    bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo);
-    //
-    // Compute bank rotation for the slice.
-    //
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    switch (tileMode)
-    {
-        case ADDR_TM_2D_TILED_THIN1:  // fall through
-        case ADDR_TM_2D_TILED_THICK:  // fall through
-        case ADDR_TM_2D_TILED_XTHICK:
-            sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness);
-            break;
-        case ADDR_TM_3D_TILED_THIN1:  // fall through
-        case ADDR_TM_3D_TILED_THICK:  // fall through
-        case ADDR_TM_3D_TILED_XTHICK:
-            sliceRotation =
-                Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes;
-            break;
-        default:
-            sliceRotation =  0;
-            break;
-    }
-
-
-    //
-    // Compute bank rotation for the tile split slice.
-    //
-    // The sample slice will be non-zero if samples must be split across multiple slices.
-    // This situation arises when the micro tile size multiplied yBit the number of samples exceeds
-    // the split size (set in GB_ADDR_CONFIG).
-    //
-    switch (tileMode)
-    {
-        case ADDR_TM_2D_TILED_THIN1: //fall through
-        case ADDR_TM_3D_TILED_THIN1: //fall through
-        case ADDR_TM_PRT_2D_TILED_THIN1: //fall through
-        case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
-            tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice;
-            break;
-        default:
-            tileSplitRotation =  0;
-            break;
-    }
-
-    //
-    // Apply bank rotation for the slice and tile split slice.
-    //
-    bank ^= bankSwizzle + sliceRotation;
-    bank ^= tileSplitRotation;
-
-    bank &= (numBanks - 1);
-
-    return bank;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeBankFromAddr
-*
-*   @brief
-*       Compute the bank number from an address
-*   @return
-*       Bank number
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeBankFromAddr(
-    UINT_64 addr,       ///< [in] address
-    UINT_32 numBanks,   ///< [in] number of banks
-    UINT_32 numPipes    ///< [in] number of pipes
-    ) const
-{
-    UINT_32 bank;
-
-    //
-    // The LSBs of the address are arranged as follows:
-    //   bank | bankInterleave | pipe | pipeInterleave
-    //
-    // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and
-    // mask the bank bits.
-    //
-    bank = static_cast<UINT_32>(
-        (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) &
-        (numBanks - 1)
-        );
-
-    return bank;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputePipeRotation
-*
-*   @brief
-*       Compute pipe rotation value
-*   @return
-*       Pipe rotation
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputePipeRotation(
-    AddrTileMode tileMode,  ///< [in] tile mode
-    UINT_32      numPipes   ///< [in] number of pipes
-    ) const
-{
-   UINT_32 rotation;
-
-    switch (tileMode)
-    {
-        case ADDR_TM_3D_TILED_THIN1:        //fall through
-        case ADDR_TM_3D_TILED_THICK:        //fall through
-        case ADDR_TM_3D_TILED_XTHICK:       //fall through
-        case ADDR_TM_PRT_3D_TILED_THIN1:    //fall through
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);
-            break;
-        default:
-            rotation = 0;
-    }
-
-    return rotation;
-}
-
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeBankRotation
-*
-*   @brief
-*       Compute bank rotation value
-*   @return
-*       Bank rotation
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeBankRotation(
-    AddrTileMode tileMode,  ///< [in] tile mode
-    UINT_32      numBanks,  ///< [in] number of banks
-    UINT_32      numPipes   ///< [in] number of pipes
-    ) const
-{
-    UINT_32 rotation;
-
-    switch (tileMode)
-    {
-        case ADDR_TM_2D_TILED_THIN1: // fall through
-        case ADDR_TM_2D_TILED_THICK: // fall through
-        case ADDR_TM_2D_TILED_XTHICK:
-        case ADDR_TM_PRT_2D_TILED_THIN1:
-        case ADDR_TM_PRT_2D_TILED_THICK:
-            // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank
-            rotation =  numBanks / 2 - 1;
-            break;
-        case ADDR_TM_3D_TILED_THIN1: // fall through
-        case ADDR_TM_3D_TILED_THICK: // fall through
-        case ADDR_TM_3D_TILED_XTHICK:
-        case ADDR_TM_PRT_3D_TILED_THIN1:
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);    // rotate pipes & banks
-            break;
-        default:
-            rotation = 0;
-    }
-
-    return rotation;
-}
-
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeHtileBytes
-*
-*   @brief
-*       Compute htile size in bytes
-*
-*   @return
-*       Htile size in bytes
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::ComputeHtileBytes(
-    UINT_32 pitch,        ///< [in] pitch
-    UINT_32 height,       ///< [in] height
-    UINT_32 bpp,          ///< [in] bits per pixel
-    BOOL_32 isLinear,     ///< [in] if it is linear mode
-    UINT_32 numSlices,    ///< [in] number of slices
-    UINT_64* sliceBytes,  ///< [out] bytes per slice
-    UINT_32 baseAlign     ///< [in] base alignments
-    ) const
-{
-    UINT_64 surfBytes;
-
-    const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits);
-
-    *sliceBytes = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp / 64);
-
-    if (m_configFlags.useHtileSliceAlign)
-    {
-        // Align the sliceSize to htilecachelinesize * pipes at first
-        *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes);
-        surfBytes  = *sliceBytes * numSlices;
-    }
-    else
-    {
-        // Align the surfSize to htilecachelinesize * pipes at last
-        surfBytes  = *sliceBytes * numSlices;
-        surfBytes  = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes);
-    }
-
-    return surfBytes;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::DispatchComputeFmaskInfo
-*
-*   @brief
-*       Compute fmask sizes include padded pitch, height, slices, total size in bytes,
-*       meanwhile output suitable tile mode and alignments as well. Results are returned
-*       through output parameters.
-*
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::DispatchComputeFmaskInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut)  ///< [out] output structure
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    ADDR_COMPUTE_SURFACE_INFO_INPUT  surfIn     = {0};
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut    = {0};
-
-    // Setup input structure
-    surfIn.tileMode          = pIn->tileMode;
-    surfIn.width             = pIn->pitch;
-    surfIn.height            = pIn->height;
-    surfIn.numSlices         = pIn->numSlices;
-    surfIn.pTileInfo         = pIn->pTileInfo;
-    surfIn.tileType          = ADDR_NON_DISPLAYABLE;
-    surfIn.flags.fmask       = 1;
-
-    // Setup output structure
-    surfOut.pTileInfo       = pOut->pTileInfo;
-
-    // Setup hwl specific fields
-    HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut);
-
-    surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples);
-
-    // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples
-    surfOut.numSamples = surfIn.numSamples;
-
-    retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut);
-
-    // Save bpp field for surface dump support
-    surfOut.bpp = surfIn.bpp;
-
-    if (retCode == ADDR_OK)
-    {
-        pOut->bpp               = surfOut.bpp;
-        pOut->pitch             = surfOut.pitch;
-        pOut->height            = surfOut.height;
-        pOut->numSlices         = surfOut.depth;
-        pOut->fmaskBytes        = surfOut.surfSize;
-        pOut->baseAlign         = surfOut.baseAlign;
-        pOut->pitchAlign        = surfOut.pitchAlign;
-        pOut->heightAlign       = surfOut.heightAlign;
-
-        if (surfOut.depth > 1)
-        {
-            // For fmask, expNumSlices is stored in depth.
-            pOut->sliceSize = surfOut.surfSize / surfOut.depth;
-        }
-        else
-        {
-            pOut->sliceSize = surfOut.surfSize;
-        }
-
-        // Save numSamples field for surface dump support
-        pOut->numSamples        = surfOut.numSamples;
-
-        HwlFmaskPostThunkSurfInfo(&surfOut, pOut);
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlFmaskSurfaceInfo
-*   @brief
-*       Entry of EgBasedLib ComputeFmaskInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
-    )
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    ADDR_TILEINFO tileInfo = {0};
-
-    // Use internal tile info if pOut does not have a valid pTileInfo
-    if (pOut->pTileInfo == NULL)
-    {
-        pOut->pTileInfo = &tileInfo;
-    }
-
-    retCode = DispatchComputeFmaskInfo(pIn, pOut);
-
-    if (retCode == ADDR_OK)
-    {
-        pOut->tileIndex =
-            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
-                                  pOut->tileIndex);
-    }
-
-    // Resets pTileInfo to NULL if the internal tile info is used
-    if (pOut->pTileInfo == &tileInfo)
-    {
-        pOut->pTileInfo = NULL;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeFmaskAddrFromCoord
-*   @brief
-*       Entry of EgBasedLib ComputeFmaskAddrFromCoord
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord(
-    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeFmaskCoordFromAddr
-*   @brief
-*       Entry of EgBasedLib ComputeFmaskCoordFromAddr
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr(
-    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
-    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeFmaskNumPlanesFromNumSamples
-*
-*   @brief
-*       Compute fmask number of planes from number of samples
-*
-*   @return
-*       Number of planes
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeFmaskNumPlanesFromNumSamples(
-    UINT_32 numSamples)     ///< [in] number of samples
-{
-    UINT_32 numPlanes;
-
-    //
-    // FMASK is stored such that each micro tile is composed of elements containing N bits, where
-    // N is the number of samples.  There is a micro tile for each bit in the FMASK address, and
-    // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially.
-    // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element.
-    // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and
-    // 2 samples.  The FMASK for an 8-sample surface looks like a general surface with 8 bits per
-    // element and 4 samples.  R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces.
-    // This was changed for R8xx to simplify the logic in the CB.
-    //
-    switch (numSamples)
-    {
-        case 2:
-            numPlanes = 1;
-            break;
-        case 4:
-            numPlanes = 2;
-            break;
-        case 8:
-            numPlanes = 4;
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-            numPlanes = 0;
-            break;
-    }
-    return numPlanes;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::ComputeFmaskResolvedBppFromNumSamples
-*
-*   @brief
-*       Compute resolved fmask effective bpp based on number of samples
-*
-*   @return
-*       bpp
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::ComputeFmaskResolvedBppFromNumSamples(
-    UINT_32 numSamples)     ///< number of samples
-{
-    UINT_32 bpp;
-
-    //
-    // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit
-    // so that the texture unit can read compressed multi-sample color data.
-    // These surfaces store each index value packed per element.
-    // Each element contains at least num_samples * log2(num_samples) bits.
-    // Resolved FMASK surfaces are addressed as follows:
-    // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
-    // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
-    // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample.
-
-    switch (numSamples)
-    {
-        case 2:
-            bpp = 8;
-            break;
-        case 4:
-            bpp = 8;
-            break;
-        case 8:
-            bpp = 32;
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-            bpp = 0;
-            break;
-    }
-    return bpp;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::IsTileInfoAllZero
-*
-*   @brief
-*       Return TRUE if all field are zero
-*   @note
-*       Since NULL input is consider to be all zero
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::IsTileInfoAllZero(
-    const ADDR_TILEINFO* pTileInfo)
-{
-    BOOL_32 allZero = TRUE;
-
-    if (pTileInfo)
-    {
-        if ((pTileInfo->banks            != 0)  ||
-            (pTileInfo->bankWidth        != 0)  ||
-            (pTileInfo->bankHeight       != 0)  ||
-            (pTileInfo->macroAspectRatio != 0)  ||
-            (pTileInfo->tileSplitBytes   != 0)  ||
-            (pTileInfo->pipeConfig       != 0)
-            )
-        {
-            allZero = FALSE;
-        }
-    }
-
-    return allZero;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlTileInfoEqual
-*
-*   @brief
-*       Return TRUE if all field are equal
-*   @note
-*       Only takes care of current HWL's data
-****************************************************************************************************
-*/
-BOOL_32 EgBasedLib::HwlTileInfoEqual(
-    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
-    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
-    ) const
-{
-    BOOL_32 equal = FALSE;
-
-    if (pLeft->banks == pRight->banks           &&
-        pLeft->bankWidth == pRight->bankWidth   &&
-        pLeft->bankHeight == pRight->bankHeight &&
-        pLeft->macroAspectRatio == pRight->macroAspectRatio &&
-        pLeft->tileSplitBytes == pRight->tileSplitBytes)
-    {
-        equal = TRUE;
-    }
-
-    return equal;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlConvertTileInfoToHW
-*   @brief
-*       Entry of EgBasedLib ConvertTileInfoToHW
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlConvertTileInfoToHW(
-    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
-    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode   = ADDR_OK;
-
-    ADDR_TILEINFO *pTileInfoIn  = pIn->pTileInfo;
-    ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo;
-
-    if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL))
-    {
-        if (pIn->reverse == FALSE)
-        {
-            switch (pTileInfoIn->banks)
-            {
-                case 2:
-                    pTileInfoOut->banks = 0;
-                    break;
-                case 4:
-                    pTileInfoOut->banks = 1;
-                    break;
-                case 8:
-                    pTileInfoOut->banks = 2;
-                    break;
-                case 16:
-                    pTileInfoOut->banks = 3;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->banks = 0;
-                    break;
-            }
-
-            switch (pTileInfoIn->bankWidth)
-            {
-                case 1:
-                    pTileInfoOut->bankWidth = 0;
-                    break;
-                case 2:
-                    pTileInfoOut->bankWidth = 1;
-                    break;
-                case 4:
-                    pTileInfoOut->bankWidth = 2;
-                    break;
-                case 8:
-                    pTileInfoOut->bankWidth = 3;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->bankWidth = 0;
-                    break;
-            }
-
-            switch (pTileInfoIn->bankHeight)
-            {
-                case 1:
-                    pTileInfoOut->bankHeight = 0;
-                    break;
-                case 2:
-                    pTileInfoOut->bankHeight = 1;
-                    break;
-                case 4:
-                    pTileInfoOut->bankHeight = 2;
-                    break;
-                case 8:
-                    pTileInfoOut->bankHeight = 3;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->bankHeight = 0;
-                    break;
-            }
-
-            switch (pTileInfoIn->macroAspectRatio)
-            {
-                case 1:
-                    pTileInfoOut->macroAspectRatio = 0;
-                    break;
-                case 2:
-                    pTileInfoOut->macroAspectRatio = 1;
-                    break;
-                case 4:
-                    pTileInfoOut->macroAspectRatio = 2;
-                    break;
-                case 8:
-                    pTileInfoOut->macroAspectRatio = 3;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->macroAspectRatio = 0;
-                    break;
-            }
-
-            switch (pTileInfoIn->tileSplitBytes)
-            {
-                case 64:
-                    pTileInfoOut->tileSplitBytes = 0;
-                    break;
-                case 128:
-                    pTileInfoOut->tileSplitBytes = 1;
-                    break;
-                case 256:
-                    pTileInfoOut->tileSplitBytes = 2;
-                    break;
-                case 512:
-                    pTileInfoOut->tileSplitBytes = 3;
-                    break;
-                case 1024:
-                    pTileInfoOut->tileSplitBytes = 4;
-                    break;
-                case 2048:
-                    pTileInfoOut->tileSplitBytes = 5;
-                    break;
-                case 4096:
-                    pTileInfoOut->tileSplitBytes = 6;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->tileSplitBytes = 0;
-                    break;
-            }
-        }
-        else
-        {
-            switch (pTileInfoIn->banks)
-            {
-                case 0:
-                    pTileInfoOut->banks = 2;
-                    break;
-                case 1:
-                    pTileInfoOut->banks = 4;
-                    break;
-                case 2:
-                    pTileInfoOut->banks = 8;
-                    break;
-                case 3:
-                    pTileInfoOut->banks = 16;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->banks = 2;
-                    break;
-            }
-
-            switch (pTileInfoIn->bankWidth)
-            {
-                case 0:
-                    pTileInfoOut->bankWidth = 1;
-                    break;
-                case 1:
-                    pTileInfoOut->bankWidth = 2;
-                    break;
-                case 2:
-                    pTileInfoOut->bankWidth = 4;
-                    break;
-                case 3:
-                    pTileInfoOut->bankWidth = 8;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->bankWidth = 1;
-                    break;
-            }
-
-            switch (pTileInfoIn->bankHeight)
-            {
-                case 0:
-                    pTileInfoOut->bankHeight = 1;
-                    break;
-                case 1:
-                    pTileInfoOut->bankHeight = 2;
-                    break;
-                case 2:
-                    pTileInfoOut->bankHeight = 4;
-                    break;
-                case 3:
-                    pTileInfoOut->bankHeight = 8;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->bankHeight = 1;
-                    break;
-            }
-
-            switch (pTileInfoIn->macroAspectRatio)
-            {
-                case 0:
-                    pTileInfoOut->macroAspectRatio = 1;
-                    break;
-                case 1:
-                    pTileInfoOut->macroAspectRatio = 2;
-                    break;
-                case 2:
-                    pTileInfoOut->macroAspectRatio = 4;
-                    break;
-                case 3:
-                    pTileInfoOut->macroAspectRatio = 8;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->macroAspectRatio = 1;
-                    break;
-            }
-
-            switch (pTileInfoIn->tileSplitBytes)
-            {
-                case 0:
-                    pTileInfoOut->tileSplitBytes = 64;
-                    break;
-                case 1:
-                    pTileInfoOut->tileSplitBytes = 128;
-                    break;
-                case 2:
-                    pTileInfoOut->tileSplitBytes = 256;
-                    break;
-                case 3:
-                    pTileInfoOut->tileSplitBytes = 512;
-                    break;
-                case 4:
-                    pTileInfoOut->tileSplitBytes = 1024;
-                    break;
-                case 5:
-                    pTileInfoOut->tileSplitBytes = 2048;
-                    break;
-                case 6:
-                    pTileInfoOut->tileSplitBytes = 4096;
-                    break;
-                default:
-                    ADDR_ASSERT_ALWAYS();
-                    retCode = ADDR_INVALIDPARAMS;
-                    pTileInfoOut->tileSplitBytes = 64;
-                    break;
-            }
-        }
-
-        if (pTileInfoIn != pTileInfoOut)
-        {
-            pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig;
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        retCode = ADDR_INVALIDPARAMS;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeSurfaceInfo
-*   @brief
-*       Entry of EgBasedLib ComputeSurfaceInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    if (pIn->numSamples < pIn->numFrags)
-    {
-        retCode = ADDR_INVALIDPARAMS;
-    }
-
-    ADDR_TILEINFO tileInfo = {0};
-
-    if (retCode == ADDR_OK)
-    {
-        // Uses internal tile info if pOut does not have a valid pTileInfo
-        if (pOut->pTileInfo == NULL)
-        {
-            pOut->pTileInfo = &tileInfo;
-        }
-
-        if (DispatchComputeSurfaceInfo(pIn, pOut) == FALSE)
-        {
-            retCode = ADDR_INVALIDPARAMS;
-        }
-
-        // In case client uses tile info as input and would like to calculate a correct size and
-        // alignment together with tile info as output when the tile info is not suppose to have any
-        // matching indices in tile mode tables.
-        if (pIn->flags.skipIndicesOutput == FALSE)
-        {
-            // Returns an index
-            pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo,
-                                                    pOut->tileMode,
-                                                    pOut->tileType,
-                                                    pOut->tileIndex);
-
-            if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid))
-            {
-                pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex,
-                                                                pIn->flags,
-                                                                pIn->bpp,
-                                                                pIn->numSamples,
-                                                                pOut->pTileInfo);
-            }
-        }
-
-        // Resets pTileInfo to NULL if the internal tile info is used
-        if (pOut->pTileInfo == &tileInfo)
-        {
-#if DEBUG
-            // Client does not pass in a valid pTileInfo
-            if (IsMacroTiled(pOut->tileMode))
-            {
-                // If a valid index is returned, then no pTileInfo is okay
-                ADDR_ASSERT((m_configFlags.useTileIndex == FALSE) ||
-                            (pOut->tileIndex != TileIndexInvalid));
-
-                if (IsTileInfoAllZero(pIn->pTileInfo) == FALSE)
-                {
-                    // The initial value of pIn->pTileInfo is copied to tileInfo
-                    // We do not expect any of these value to be changed nor any 0 of inputs
-                    ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks);
-                    ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth);
-                    ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight);
-                    ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio);
-                    ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes);
-                }
-            }
-#endif
-            pOut->pTileInfo = NULL;
-        }
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeSurfaceAddrFromCoord
-*   @brief
-*       Entry of EgBasedLib ComputeSurfaceAddrFromCoord
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceAddrFromCoord(
-    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    if (
-#if !ALT_TEST // Overflow test needs this out-of-boundary coord
-        (pIn->x > pIn->pitch)   ||
-        (pIn->y > pIn->height)  ||
-#endif
-        (pIn->numSamples > m_maxSamples))
-    {
-        retCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut);
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeSurfaceCoordFromAddr
-*   @brief
-*       Entry of EgBasedLib ComputeSurfaceCoordFromAddr
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceCoordFromAddr(
-    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    if ((pIn->bitPosition >= 8) ||
-        (pIn->numSamples > m_maxSamples))
-    {
-        retCode = ADDR_INVALIDPARAMS;
-    }
-    else
-    {
-        DispatchComputeSurfaceCoordFromAddr(pIn, pOut);
-    }
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeSliceTileSwizzle
-*   @brief
-*       Entry of EgBasedLib ComputeSurfaceCoordFromAddr
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE EgBasedLib::HwlComputeSliceTileSwizzle(
-    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0))
-    {
-
-        pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode,
-                                                    pIn->baseSwizzle,
-                                                    pIn->slice,
-                                                    pIn->baseAddr,
-                                                    pIn->pTileInfo);
-    }
-    else
-    {
-        retCode = ADDR_INVALIDPARAMS;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeHtileBpp
-*
-*   @brief
-*       Compute htile bpp
-*
-*   @return
-*       Htile bpp
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::HwlComputeHtileBpp(
-    BOOL_32 isWidth8,   ///< [in] TRUE if block width is 8
-    BOOL_32 isHeight8   ///< [in] TRUE if block height is 8
-    ) const
-{
-    // only support 8x8 mode
-    ADDR_ASSERT(isWidth8 && isHeight8);
-    return 32;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlComputeHtileBaseAlign
-*
-*   @brief
-*       Compute htile base alignment
-*
-*   @return
-*       Htile base alignment
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::HwlComputeHtileBaseAlign(
-    BOOL_32         isTcCompatible, ///< [in] if TC compatible
-    BOOL_32         isLinear,       ///< [in] if it is linear mode
-    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
-    ) const
-{
-    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
-
-    if (isTcCompatible)
-    {
-        ADDR_ASSERT(pTileInfo != NULL);
-        if (pTileInfo)
-        {
-            baseAlign *= pTileInfo->banks;
-        }
-    }
-
-    return baseAlign;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlGetPitchAlignmentMicroTiled
-*
-*   @brief
-*       Compute 1D tiled surface pitch alignment, calculation results are returned through
-*       output parameters.
-*
-*   @return
-*       pitch alignment
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::HwlGetPitchAlignmentMicroTiled(
-    AddrTileMode        tileMode,          ///< [in] tile mode
-    UINT_32             bpp,               ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
-    UINT_32             numSamples         ///< [in] number of samples
-    ) const
-{
-    UINT_32 pitchAlign;
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    UINT_32 pixelsPerMicroTile;
-    UINT_32 pixelsPerPipeInterleave;
-    UINT_32 microTilesPerPipeInterleave;
-
-    //
-    // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for
-    // stencil buffer since pitch alignment is related to bpp.
-    // For a depth only buffer do not set this.
-    //
-    // Note: this actually does not work for mipmap but mipmap depth texture is not really
-    // sampled with mipmap.
-    //
-    if (flags.depth && (flags.noStencil == FALSE))
-    {
-        bpp = 8;
-    }
-
-    pixelsPerMicroTile = MicroTilePixels * microTileThickness;
-    pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples);
-    microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile;
-
-    pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth);
-
-    return pitchAlign;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlGetSizeAdjustmentMicroTiled
-*
-*   @brief
-*       Adjust 1D tiled surface pitch and slice size
-*
-*   @return
-*       Logical slice size in bytes
-****************************************************************************************************
-*/
-UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled(
-    UINT_32             thickness,      ///< [in] thickness
-    UINT_32             bpp,            ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
-    UINT_32             numSamples,     ///< [in] number of samples
-    UINT_32             baseAlign,      ///< [in] base alignment
-    UINT_32             pitchAlign,     ///< [in] pitch alignment
-    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
-    UINT_32*            pHeight         ///< [in,out] pointer to height
-    ) const
-{
-    UINT_64 logicalSliceSize;
-    MAYBE_UNUSED UINT_64 physicalSliceSize;
-
-    UINT_32 pitch   = *pPitch;
-    UINT_32 height  = *pHeight;
-
-    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
-    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
-
-    // Physical slice: multiplied by thickness
-    physicalSliceSize =  logicalSliceSize * thickness;
-
-    //
-    // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes
-    //
-    ADDR_ASSERT((physicalSliceSize % baseAlign) == 0);
-
-    return logicalSliceSize;
-}
-
-/**
-****************************************************************************************************
-*   EgBasedLib::HwlStereoCheckRightOffsetPadding
-*
-*   @brief
-*       check if the height needs extra padding for stereo right eye offset, to avoid swizzling
-*
-*   @return
-*       TRUE is the extra padding is needed
-*
-****************************************************************************************************
-*/
-UINT_32 EgBasedLib::HwlStereoCheckRightOffsetPadding(
-    ADDR_TILEINFO* pTileInfo    ///< Tiling info
-    ) const
-{
-    UINT_32 stereoHeightAlign = 0;
-
-    if (pTileInfo->macroAspectRatio > 2)
-    {
-        // Since 3D rendering treats right eye surface starting from y == "eye height" while
-        // display engine treats it to be 0, so the bank bits may be different.
-        // Additional padding in height is required to make sure it's possible
-        // to achieve synonym by adjusting bank swizzle of right eye surface.
-
-        static const UINT_32 StereoAspectRatio = 2;
-        stereoHeightAlign = pTileInfo->banks *
-            pTileInfo->bankHeight *
-            MicroTileHeight /
-            StereoAspectRatio;
-    }
-
-    return stereoHeightAlign;
-}
-
-} // V1
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,430 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  egbaddrlib.h
-* @brief Contains the EgBasedLib class definition.
-****************************************************************************************************
-*/
-
-#ifndef __EG_BASED_ADDR_LIB_H__
-#define __EG_BASED_ADDR_LIB_H__
-
-#include "addrlib1.h"
-
-namespace Addr
-{
-namespace V1
-{
-/// Structures for functions
-struct CoordFromBankPipe
-{
-    UINT_32 xBits : 3;
-    UINT_32 yBits : 4;
-
-    UINT_32 xBit3 : 1;
-    UINT_32 xBit4 : 1;
-    UINT_32 xBit5 : 1;
-    UINT_32 yBit3 : 1;
-    UINT_32 yBit4 : 1;
-    UINT_32 yBit5 : 1;
-    UINT_32 yBit6 : 1;
-};
-
-/**
-****************************************************************************************************
-* @brief This class is the Evergreen based address library
-* @note  Abstract class
-****************************************************************************************************
-*/
-class EgBasedLib : public Lib
-{
-protected:
-    EgBasedLib(const Client* pClient);
-    virtual ~EgBasedLib();
-
-public:
-
-    /// Surface info functions
-
-    // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
-    //       On input:
-    //       One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
-    //       H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
-    //       It then returns the actual tiling configuration used.
-    //       Other methods' TileInfo must be valid on entry
-    BOOL_32 DispatchComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
-
-protected:
-    // Hwl interface
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
-        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
-        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
-        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
-        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
-        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
-        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
-        UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
-        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
-        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
-        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
-        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
-
-    virtual UINT_32 HwlComputeHtileBpp(
-        BOOL_32 isWidth8, BOOL_32 isHeight8) const;
-
-    virtual UINT_32 HwlComputeHtileBaseAlign(
-        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
-
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
-        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
-        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
-
-    virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const;
-
-    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
-
-    virtual VOID HwlComputePixelCoordFromOffset(
-        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
-        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
-        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
-
-    /// Return Cmask block max
-    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
-    {
-        return 0x3FFF; // 14 bits, 0n16383
-    }
-
-    // Sub-hwl interface
-    /// Pure virtual function to setup tile info (indices) if client requests to do so
-    virtual VOID HwlSetupTileInfo(
-        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
-        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
-
-    /// Pure virtual function to get pitch alignment for linear modes
-    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
-
-    /// Pure virtual function to get size adjustment for linear modes
-    virtual UINT_64 HwlGetSizeAdjustmentLinear(
-        AddrTileMode tileMode,
-        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
-        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
-
-    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
-
-    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
-        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
-        UINT_32 baseAlign, UINT_32 pitchAlign,
-        UINT_32 *pPitch, UINT_32 *pHeight) const;
-
-        /// Pure virtual function to do extra sanity check
-    virtual BOOL_32 HwlSanityCheckMacroTiled(
-        ADDR_TILEINFO* pTileInfo) const = 0;
-
-    /// Pure virtual function to check current level to be the last macro tiled one
-    virtual VOID HwlCheckLastMacroTiledLvl(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
-
-    /// Adjusts bank before bank is modified by rotation
-    virtual UINT_32 HwlPreAdjustBank(
-        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO*  pTileInfo) const = 0;
-
-    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
-        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
-        UINT_32 bank, UINT_32 pipe,
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
-        BOOL_32 ignoreSE,
-        ADDR_TILEINFO* pTileInfo) const = 0;
-
-    virtual BOOL_32 HwlTileInfoEqual(
-        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
-
-    virtual AddrTileMode HwlDegradeThickTileMode(
-        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
-
-    virtual INT_32 HwlPostCheckTileIndex(
-        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
-        INT curIndex = TileIndexInvalid) const
-    {
-        return TileIndexInvalid;
-    }
-
-    virtual VOID HwlFmaskPreThunkSurfInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
-        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
-    {
-    }
-
-    virtual VOID HwlFmaskPostThunkSurfInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
-    {
-    }
-
-    virtual UINT_32 HwlStereoCheckRightOffsetPadding(ADDR_TILEINFO* pTileInfo) const;
-
-    virtual BOOL_32 HwlReduceBankWidthHeight(
-        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
-        UINT_32 bankHeightAlign, UINT_32 pipes,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    // Protected non-virtual functions
-
-    /// Mip level functions
-    AddrTileMode ComputeSurfaceMipLevelTileMode(
-        AddrTileMode baseTileMode, UINT_32 bpp,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
-        UINT_32 pitchAlign, UINT_32 heightAlign,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    /// Swizzle functions
-    VOID ExtractBankPipeSwizzle(
-        UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
-        UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
-
-    UINT_32 GetBankPipeSwizzle(
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
-        UINT_64 baseAddr, ADDR_TILEINFO*  pTileInfo) const;
-
-    UINT_32 ComputeSliceTileSwizzle(
-        AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    /// Addressing functions
-    virtual ADDR_E_RETURNCODE ComputeBankEquation(
-        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
-        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
-    {
-        return ADDR_NOTSUPPORTED;
-    }
-
-    UINT_32 ComputeBankFromCoord(
-        UINT_32 x, UINT_32 y, UINT_32 slice,
-        AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    UINT_32 ComputeBankFromAddr(
-        UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
-
-    UINT_32 ComputePipeRotation(
-        AddrTileMode tileMode, UINT_32 numPipes) const;
-
-    UINT_32 ComputeBankRotation(
-        AddrTileMode tileMode, UINT_32 numBanks,
-        UINT_32 numPipes) const;
-
-    VOID ComputeSurfaceCoord2DFromBankPipe(
-        AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
-        UINT_32 bank, UINT_32 pipe,
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
-        ADDR_TILEINFO* pTileInfo,
-        CoordFromBankPipe *pOutput) const;
-
-    /// Htile/Cmask functions
-    UINT_64 ComputeHtileBytes(
-        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
-        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
-
-    ADDR_E_RETURNCODE ComputeMacroTileEquation(
-        UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType,
-        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
-
-    // Static functions
-    static BOOL_32 IsTileInfoAllZero(const ADDR_TILEINFO* pTileInfo);
-    static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
-    static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
-
-    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
-    {
-    }
-
-private:
-
-    BOOL_32 ComputeSurfaceInfoLinear(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
-        UINT_32 padDims) const;
-
-    BOOL_32 ComputeSurfaceInfoMicroTiled(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
-        UINT_32 padDims,
-        AddrTileMode expTileMode) const;
-
-    BOOL_32 ComputeSurfaceInfoMacroTiled(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
-        UINT_32 padDims,
-        AddrTileMode expTileMode) const;
-
-    BOOL_32 ComputeSurfaceAlignmentsLinear(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
-
-    BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 mipLevel, UINT_32 numSamples,
-        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
-
-    BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 mipLevel, UINT_32 numSamples,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    /// Surface addressing functions
-    UINT_64 DispatchComputeSurfaceAddrFromCoord(
-        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    VOID DispatchComputeSurfaceCoordFromAddr(
-        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
-
-    UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
-        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode,
-        AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
-        UINT_32* pBitPosition) const;
-
-    UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
-        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode,
-        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
-        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
-        ADDR_TILEINFO* pTileInfo,
-        UINT_32* pBitPosition) const;
-
-    VOID ComputeSurfaceCoordFromAddrMacroTiled(
-        UINT_64 addr, UINT_32 bitPosition,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
-        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
-        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
-        ADDR_TILEINFO* pTileInfo,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
-
-    /// Fmask functions
-    UINT_64 DispatchComputeFmaskAddrFromCoord(
-        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
-
-    VOID DispatchComputeFmaskCoordFromAddr(
-        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
-        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
-
-    // FMASK related methods - private
-    UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
-        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
-        BOOL_32 resolved, UINT_32* pBitPosition) const;
-
-    VOID ComputeFmaskCoordFromAddrMicroTiled(
-        UINT_64 addr, UINT_32 bitPosition,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode, BOOL_32 resolved,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
-
-    VOID ComputeFmaskCoordFromAddrMacroTiled(
-        UINT_64 addr, UINT_32 bitPosition,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
-        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
-        BOOL_32 ignoreSE,
-        ADDR_TILEINFO* pTileInfo,
-        BOOL_32 resolved,
-        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
-
-    UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
-        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
-        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
-        BOOL_32 ignoreSE,
-        ADDR_TILEINFO* pTileInfo,
-        BOOL_32 resolved,
-        UINT_32* pBitPosition) const;
-
-    /// Sanity check functions
-    BOOL_32 SanityCheckMacroTiled(
-        ADDR_TILEINFO* pTileInfo) const;
-
-protected:
-    UINT_32 m_ranks;                ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
-    UINT_32 m_logicalBanks;         ///< Logical banks = m_banks * m_ranks if m_banks != 16
-    UINT_32 m_bankInterleave;       ///< Bank interleave, as a multiple of pipe interleave size
-};
-
-} // V1
-} // Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.cpp	2018-04-19 04:33:31.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,3872 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  siaddrlib.cpp
-* @brief Contains the implementation for the SiLib class.
-****************************************************************************************************
-*/
-
-#include "siaddrlib.h"
-#include "si_gb_reg.h"
-
-#include "amdgpu_asic_addr.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////
-namespace Addr
-{
-
-/**
-****************************************************************************************************
-*   SiHwlInit
-*
-*   @brief
-*       Creates an SiLib object.
-*
-*   @return
-*       Returns an SiLib object pointer.
-****************************************************************************************************
-*/
-Lib* SiHwlInit(const Client* pClient)
-{
-    return V1::SiLib::CreateObj(pClient);
-}
-
-namespace V1
-{
-
-// We don't support MSAA for equation
-const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] =
-{
-    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  0, non-AA compressed depth or any stencil
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  1, 2xAA/4xAA compressed depth with or without stencil
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  2, 8xAA compressed depth with or without stencil
-    {FALSE, TRUE,  FALSE, FALSE, FALSE},    //  3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth
-    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  4, 1D depth
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  5, 16 bpp depth PRT (4xMSAA)
-    {FALSE, FALSE, TRUE,  FALSE, FALSE},    //  6, 32 bpp depth PRT (non-MSAA)
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  7, 32 bpp depth PRT (4xMSAA)
-    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  8, Linear
-    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  9, 1D display
-    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 10, 8 bpp color (displayable)
-    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 11, 16 bpp color (displayable)
-    {FALSE, FALSE, TRUE,  TRUE,  FALSE},    // 12, 32/64 bpp color (displayable)
-    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 13, 1D thin
-    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 14, 8 bpp color non-displayable
-    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 15, 16 bpp color non-displayable
-    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 16, 32 bpp color non-displayable
-    {FALSE, FALSE, FALSE, TRUE,  TRUE },    // 17, 64/128 bpp color non-displayable
-    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 18, 1D THICK
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 19, 2D XTHICK
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 20, 2D THICK
-    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 21, 8 bpp 2D PRTs (non-MSAA)
-    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 22, 16 bpp 2D PRTs (non-MSAA)
-    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 23, 32 bpp 2D PRTs (non-MSAA)
-    {FALSE, FALSE, FALSE, TRUE,  FALSE},    // 24, 64 bpp 2D PRTs (non-MSAA)
-    {FALSE, FALSE, FALSE, FALSE, TRUE },    // 25, 128bpp 2D PRTs (non-MSAA)
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 26, none
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 27, none
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 28, none
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 29, none
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 30, 64bpp 2D PRTs (4xMSAA)
-    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 31, none
-};
-
-/**
-****************************************************************************************************
-*   SiLib::SiLib
-*
-*   @brief
-*       Constructor
-*
-****************************************************************************************************
-*/
-SiLib::SiLib(const Client* pClient)
-    :
-    EgBasedLib(pClient),
-    m_noOfEntries(0),
-    m_numEquations(0)
-{
-    m_class = SI_ADDRLIB;
-    memset(&m_settings, 0, sizeof(m_settings));
-}
-
-/**
-****************************************************************************************************
-*   SiLib::~SiLib
-*
-*   @brief
-*       Destructor
-****************************************************************************************************
-*/
-SiLib::~SiLib()
-{
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetPipes
-*
-*   @brief
-*       Get number pipes
-*   @return
-*       num pipes
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlGetPipes(
-    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
-    ) const
-{
-    UINT_32 numPipes;
-
-    if (pTileInfo)
-    {
-        numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        numPipes = m_pipes; // Suppose we should still have a global pipes
-    }
-
-    return numPipes;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::GetPipePerSurf
-*   @brief
-*       get pipe num base on inputing tileinfo->pipeconfig
-*   @return
-*       pipe number
-****************************************************************************************************
-*/
-UINT_32 SiLib::GetPipePerSurf(
-    AddrPipeCfg pipeConfig   ///< [in] pipe config
-    ) const
-{
-    UINT_32 numPipes = 0;
-
-    switch (pipeConfig)
-    {
-        case ADDR_PIPECFG_P2:
-            numPipes = 2;
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-        case ADDR_PIPECFG_P4_16x16:
-        case ADDR_PIPECFG_P4_16x32:
-        case ADDR_PIPECFG_P4_32x32:
-            numPipes = 4;
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-        case ADDR_PIPECFG_P8_16x32_8x16:
-        case ADDR_PIPECFG_P8_32x32_8x16:
-        case ADDR_PIPECFG_P8_16x32_16x16:
-        case ADDR_PIPECFG_P8_32x32_16x16:
-        case ADDR_PIPECFG_P8_32x32_16x32:
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P16_32x32_8x16:
-        case ADDR_PIPECFG_P16_32x32_16x16:
-            numPipes = 16;
-            break;
-        default:
-            ADDR_ASSERT(!"Invalid pipe config");
-            numPipes = m_pipes;
-    }
-    return numPipes;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::ComputeBankEquation
-*
-*   @brief
-*       Compute bank equation
-*
-*   @return
-*       If equation can be computed
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::ComputeBankEquation(
-    UINT_32         log2BytesPP,    ///< [in] log2 of bytes per pixel
-    UINT_32         threshX,        ///< [in] threshold for x channel
-    UINT_32         threshY,        ///< [in] threshold for y channel
-    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
-    ADDR_EQUATION*  pEquation       ///< [out] bank equation
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    UINT_32 pipes = HwlGetPipes(pTileInfo);
-    UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth);
-    UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight);
-
-    ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart);
-    ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1);
-    ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2);
-    ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3);
-    ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart);
-    ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1);
-    ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2);
-    ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3);
-
-    x3.value = (threshX > bankXStart)     ? x3.value : 0;
-    x4.value = (threshX > bankXStart + 1) ? x4.value : 0;
-    x5.value = (threshX > bankXStart + 2) ? x5.value : 0;
-    x6.value = (threshX > bankXStart + 3) ? x6.value : 0;
-    y3.value = (threshY > bankYStart)     ? y3.value : 0;
-    y4.value = (threshY > bankYStart + 1) ? y4.value : 0;
-    y5.value = (threshY > bankYStart + 2) ? y5.value : 0;
-    y6.value = (threshY > bankYStart + 3) ? y6.value : 0;
-
-    switch (pTileInfo->banks)
-    {
-        case 16:
-            if (pTileInfo->macroAspectRatio == 1)
-            {
-                pEquation->addr[0] = y6;
-                pEquation->xor1[0] = x3;
-                pEquation->addr[1] = y5;
-                pEquation->xor1[1] = y6;
-                pEquation->xor2[1] = x4;
-                pEquation->addr[2] = y4;
-                pEquation->xor1[2] = x5;
-                pEquation->addr[3] = y3;
-                pEquation->xor1[3] = x6;
-            }
-            else if (pTileInfo->macroAspectRatio == 2)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y6;
-                pEquation->addr[1] = y5;
-                pEquation->xor1[1] = y6;
-                pEquation->xor2[1] = x4;
-                pEquation->addr[2] = y4;
-                pEquation->xor1[2] = x5;
-                pEquation->addr[3] = y3;
-                pEquation->xor1[3] = x6;
-            }
-            else if (pTileInfo->macroAspectRatio == 4)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y6;
-                pEquation->addr[1] = x4;
-                pEquation->xor1[1] = y5;
-                pEquation->xor2[1] = y6;
-                pEquation->addr[2] = y4;
-                pEquation->xor1[2] = x5;
-                pEquation->addr[3] = y3;
-                pEquation->xor1[3] = x6;
-            }
-            else if (pTileInfo->macroAspectRatio == 8)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y6;
-                pEquation->addr[1] = x4;
-                pEquation->xor1[1] = y5;
-                pEquation->xor2[1] = y6;
-                pEquation->addr[2] = x5;
-                pEquation->xor1[2] = y4;
-                pEquation->addr[3] = y3;
-                pEquation->xor1[3] = x6;
-            }
-            else
-            {
-                ADDR_ASSERT_ALWAYS();
-            }
-            pEquation->numBits = 4;
-            break;
-        case 8:
-            if (pTileInfo->macroAspectRatio == 1)
-            {
-                pEquation->addr[0] = y5;
-                pEquation->xor1[0] = x3;
-                pEquation->addr[1] = y4;
-                pEquation->xor1[1] = y5;
-                pEquation->xor2[1] = x4;
-                pEquation->addr[2] = y3;
-                pEquation->xor1[2] = x5;
-            }
-            else if (pTileInfo->macroAspectRatio == 2)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y5;
-                pEquation->addr[1] = y4;
-                pEquation->xor1[1] = y5;
-                pEquation->xor2[1] = x4;
-                pEquation->addr[2] = y3;
-                pEquation->xor1[2] = x5;
-            }
-            else if (pTileInfo->macroAspectRatio == 4)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y5;
-                pEquation->addr[1] = x4;
-                pEquation->xor1[1] = y4;
-                pEquation->xor2[1] = y5;
-                pEquation->addr[2] = y3;
-                pEquation->xor1[2] = x5;
-            }
-            else
-            {
-                ADDR_ASSERT_ALWAYS();
-            }
-            pEquation->numBits = 3;
-            break;
-        case 4:
-            if (pTileInfo->macroAspectRatio == 1)
-            {
-                pEquation->addr[0] = y4;
-                pEquation->xor1[0] = x3;
-                pEquation->addr[1] = y3;
-                pEquation->xor1[1] = x4;
-            }
-            else if (pTileInfo->macroAspectRatio == 2)
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y4;
-                pEquation->addr[1] = y3;
-                pEquation->xor1[1] = x4;
-            }
-            else
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y4;
-                pEquation->addr[1] = x4;
-                pEquation->xor1[1] = y3;
-            }
-            pEquation->numBits = 2;
-            break;
-        case 2:
-            if (pTileInfo->macroAspectRatio == 1)
-            {
-                pEquation->addr[0] = y3;
-                pEquation->xor1[0] = x3;
-            }
-            else
-            {
-                pEquation->addr[0] = x3;
-                pEquation->xor1[0] = y3;
-            }
-            pEquation->numBits = 1;
-            break;
-        default:
-            pEquation->numBits = 0;
-            retCode = ADDR_NOTSUPPORTED;
-            ADDR_ASSERT_ALWAYS();
-            break;
-    }
-
-    for (UINT_32 i = 0; i < pEquation->numBits; i++)
-    {
-        if (pEquation->addr[i].value == 0)
-        {
-            if (pEquation->xor1[i].value == 0)
-            {
-                // 00X -> X00
-                pEquation->addr[i].value = pEquation->xor2[i].value;
-                pEquation->xor2[i].value = 0;
-            }
-            else
-            {
-                pEquation->addr[i].value = pEquation->xor1[i].value;
-
-                if (pEquation->xor2[i].value != 0)
-                {
-                    // 0XY -> XY0
-                    pEquation->xor1[i].value = pEquation->xor2[i].value;
-                    pEquation->xor2[i].value = 0;
-                }
-                else
-                {
-                    // 0X0 -> X00
-                    pEquation->xor1[i].value = 0;
-                }
-            }
-        }
-        else if (pEquation->xor1[i].value == 0)
-        {
-            if (pEquation->xor2[i].value != 0)
-            {
-                // X0Y -> XY0
-                pEquation->xor1[i].value = pEquation->xor2[i].value;
-                pEquation->xor2[i].value = 0;
-            }
-        }
-    }
-
-    if ((pTileInfo->bankWidth == 1) &&
-        ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
-         (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)))
-    {
-        retCode = ADDR_NOTSUPPORTED;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::ComputePipeEquation
-*
-*   @brief
-*       Compute pipe equation
-*
-*   @return
-*       If equation can be computed
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::ComputePipeEquation(
-    UINT_32        log2BytesPP, ///< [in] Log2 of bytes per pixel
-    UINT_32        threshX,     ///< [in] Threshold for X channel
-    UINT_32        threshY,     ///< [in] Threshold for Y channel
-    ADDR_TILEINFO* pTileInfo,   ///< [in] Tile info
-    ADDR_EQUATION* pEquation    ///< [out] Pipe configure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode = ADDR_OK;
-
-    ADDR_CHANNEL_SETTING* pAddr = pEquation->addr;
-    ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1;
-    ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2;
-
-    ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP);
-    ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP);
-    ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP);
-    ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP);
-    ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3);
-    ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4);
-    ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5);
-    ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6);
-
-    x3.value = (threshX > 3) ? x3.value : 0;
-    x4.value = (threshX > 4) ? x4.value : 0;
-    x5.value = (threshX > 5) ? x5.value : 0;
-    x6.value = (threshX > 6) ? x6.value : 0;
-    y3.value = (threshY > 3) ? y3.value : 0;
-    y4.value = (threshY > 4) ? y4.value : 0;
-    y5.value = (threshY > 5) ? y5.value : 0;
-    y6.value = (threshY > 6) ? y6.value : 0;
-
-    switch (pTileInfo->pipeConfig)
-    {
-        case ADDR_PIPECFG_P2:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pEquation->numBits = 1;
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-            pAddr[0] = x4;
-            pXor1[0] = y3;
-            pAddr[1] = x3;
-            pXor1[1] = y4;
-            pEquation->numBits = 2;
-            break;
-        case ADDR_PIPECFG_P4_16x16:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x4;
-            pXor1[1] = y4;
-            pEquation->numBits = 2;
-            break;
-        case ADDR_PIPECFG_P4_16x32:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x4;
-            pXor1[1] = y5;
-            pEquation->numBits = 2;
-            break;
-        case ADDR_PIPECFG_P4_32x32:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x5;
-            pAddr[1] = x5;
-            pXor1[1] = y5;
-            pEquation->numBits = 2;
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-            pAddr[0] = x4;
-            pXor1[0] = y3;
-            pXor2[0] = x5;
-            pAddr[1] = x3;
-            pXor1[1] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_16x32_8x16:
-            pAddr[0] = x4;
-            pXor1[0] = y3;
-            pXor2[0] = x5;
-            pAddr[1] = x3;
-            pXor1[1] = y4;
-            pAddr[2] = x4;
-            pXor1[2] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_16x32_16x16:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x5;
-            pXor1[1] = y4;
-            pAddr[2] = x4;
-            pXor1[2] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_32x32_8x16:
-            pAddr[0] = x4;
-            pXor1[0] = y3;
-            pXor2[0] = x5;
-            pAddr[1] = x3;
-            pXor1[1] = y4;
-            pAddr[2] = x5;
-            pXor1[2] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x16:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x4;
-            pXor1[1] = y4;
-            pAddr[2] = x5;
-            pXor1[2] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x32:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x4;
-            pXor1[1] = y6;
-            pAddr[2] = x5;
-            pXor1[2] = y5;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x5;
-            pAddr[1] = x6;
-            pXor1[1] = y5;
-            pAddr[2] = x5;
-            pXor1[2] = y6;
-            pEquation->numBits = 3;
-            break;
-        case ADDR_PIPECFG_P16_32x32_8x16:
-            pAddr[0] = x4;
-            pXor1[0] = y3;
-            pAddr[1] = x3;
-            pXor1[1] = y4;
-            pAddr[2] = x5;
-            pXor1[2] = y6;
-            pAddr[3] = x6;
-            pXor1[3] = y5;
-            pEquation->numBits = 4;
-            break;
-        case ADDR_PIPECFG_P16_32x32_16x16:
-            pAddr[0] = x3;
-            pXor1[0] = y3;
-            pXor2[0] = x4;
-            pAddr[1] = x4;
-            pXor1[1] = y4;
-            pAddr[2] = x5;
-            pXor1[2] = y6;
-            pAddr[3] = x6;
-            pXor1[3] = y5;
-            pEquation->numBits = 4;
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-            pEquation->numBits = 0;
-            retCode = ADDR_NOTSUPPORTED;
-            break;
-    }
-
-    if (m_settings.isVegaM && (pEquation->numBits == 4))
-    {
-        ADDR_CHANNEL_SETTING addeMsb = pAddr[0];
-        ADDR_CHANNEL_SETTING xor1Msb = pXor1[0];
-        ADDR_CHANNEL_SETTING xor2Msb = pXor2[0];
-
-        pAddr[0] = pAddr[1];
-        pXor1[0] = pXor1[1];
-        pXor2[0] = pXor2[1];
-
-        pAddr[1] = pAddr[2];
-        pXor1[1] = pXor1[2];
-        pXor2[1] = pXor2[2];
-
-        pAddr[2] = pAddr[3];
-        pXor1[2] = pXor1[3];
-        pXor2[2] = pXor2[3];
-
-        pAddr[3] = addeMsb;
-        pXor1[3] = xor1Msb;
-        pXor2[3] = xor2Msb;
-    }
-
-    for (UINT_32 i = 0; i < pEquation->numBits; i++)
-    {
-        if (pAddr[i].value == 0)
-        {
-            if (pXor1[i].value == 0)
-            {
-                pAddr[i].value = pXor2[i].value;
-            }
-            else
-            {
-                pAddr[i].value = pXor1[i].value;
-                pXor1[i].value = 0;
-            }
-        }
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::ComputePipeFromCoord
-*
-*   @brief
-*       Compute pipe number from coordinates
-*   @return
-*       Pipe number
-****************************************************************************************************
-*/
-UINT_32 SiLib::ComputePipeFromCoord(
-    UINT_32         x,              ///< [in] x coordinate
-    UINT_32         y,              ///< [in] y coordinate
-    UINT_32         slice,          ///< [in] slice index
-    AddrTileMode    tileMode,       ///< [in] tile mode
-    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
-    BOOL_32         ignoreSE,       ///< [in] TRUE if shader engines are ignored
-    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
-    ) const
-{
-    UINT_32 pipe;
-    UINT_32 pipeBit0 = 0;
-    UINT_32 pipeBit1 = 0;
-    UINT_32 pipeBit2 = 0;
-    UINT_32 pipeBit3 = 0;
-    UINT_32 sliceRotation;
-    UINT_32 numPipes = 0;
-
-    UINT_32 tx = x / MicroTileWidth;
-    UINT_32 ty = y / MicroTileHeight;
-    UINT_32 x3 = _BIT(tx,0);
-    UINT_32 x4 = _BIT(tx,1);
-    UINT_32 x5 = _BIT(tx,2);
-    UINT_32 x6 = _BIT(tx,3);
-    UINT_32 y3 = _BIT(ty,0);
-    UINT_32 y4 = _BIT(ty,1);
-    UINT_32 y5 = _BIT(ty,2);
-    UINT_32 y6 = _BIT(ty,3);
-
-    switch (pTileInfo->pipeConfig)
-    {
-        case ADDR_PIPECFG_P2:
-            pipeBit0 = x3 ^ y3;
-            numPipes = 2;
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-            pipeBit0 = x4 ^ y3;
-            pipeBit1 = x3 ^ y4;
-            numPipes = 4;
-            break;
-        case ADDR_PIPECFG_P4_16x16:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x4 ^ y4;
-            numPipes = 4;
-            break;
-        case ADDR_PIPECFG_P4_16x32:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x4 ^ y5;
-            numPipes = 4;
-            break;
-        case ADDR_PIPECFG_P4_32x32:
-            pipeBit0 = x3 ^ y3 ^ x5;
-            pipeBit1 = x5 ^ y5;
-            numPipes = 4;
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-            pipeBit0 = x4 ^ y3 ^ x5;
-            pipeBit1 = x3 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_16x32_8x16:
-            pipeBit0 = x4 ^ y3 ^ x5;
-            pipeBit1 = x3 ^ y4;
-            pipeBit2 = x4 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_16x32_16x16:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x5 ^ y4;
-            pipeBit2 = x4 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_32x32_8x16:
-            pipeBit0 = x4 ^ y3 ^ x5;
-            pipeBit1 = x3 ^ y4;
-            pipeBit2 = x5 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x16:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x4 ^ y4;
-            pipeBit2 = x5 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x32:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x4 ^ y6;
-            pipeBit2 = x5 ^ y5;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            pipeBit0 = x3 ^ y3 ^ x5;
-            pipeBit1 = x6 ^ y5;
-            pipeBit2 = x5 ^ y6;
-            numPipes = 8;
-            break;
-        case ADDR_PIPECFG_P16_32x32_8x16:
-            pipeBit0 = x4 ^ y3;
-            pipeBit1 = x3 ^ y4;
-            pipeBit2 = x5 ^ y6;
-            pipeBit3 = x6 ^ y5;
-            numPipes = 16;
-            break;
-        case ADDR_PIPECFG_P16_32x32_16x16:
-            pipeBit0 = x3 ^ y3 ^ x4;
-            pipeBit1 = x4 ^ y4;
-            pipeBit2 = x5 ^ y6;
-            pipeBit3 = x6 ^ y5;
-            numPipes = 16;
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    if (m_settings.isVegaM && (numPipes == 16))
-    {
-        UINT_32 pipeMsb = pipeBit0;
-        pipeBit0 = pipeBit1;
-        pipeBit1 = pipeBit2;
-        pipeBit2 = pipeBit3;
-        pipeBit3 = pipeMsb;
-    }
-
-    pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3);
-
-    UINT_32 microTileThickness = Thickness(tileMode);
-
-    //
-    // Apply pipe rotation for the slice.
-    //
-    switch (tileMode)
-    {
-        case ADDR_TM_3D_TILED_THIN1:    //fall through thin
-        case ADDR_TM_3D_TILED_THICK:    //fall through thick
-        case ADDR_TM_3D_TILED_XTHICK:
-            sliceRotation =
-                Max(1, static_cast<INT_32>(numPipes / 2) - 1) * (slice / microTileThickness);
-            break;
-        default:
-            sliceRotation = 0;
-            break;
-    }
-    pipeSwizzle += sliceRotation;
-    pipeSwizzle &= (numPipes - 1);
-
-    pipe = pipe ^ pipeSwizzle;
-
-    return pipe;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::ComputeTileCoordFromPipeAndElemIdx
-*
-*   @brief
-*       Compute (x,y) of a tile within a macro tile from address
-*   @return
-*       Pipe number
-****************************************************************************************************
-*/
-VOID SiLib::ComputeTileCoordFromPipeAndElemIdx(
-    UINT_32         elemIdx,          ///< [in] per pipe element index within a macro tile
-    UINT_32         pipe,             ///< [in] pipe index
-    AddrPipeCfg     pipeCfg,          ///< [in] pipe config
-    UINT_32         pitchInMacroTile, ///< [in] surface pitch in macro tile
-    UINT_32         x,                ///< [in] x coordinate of the (0,0) tile in a macro tile
-    UINT_32         y,                ///< [in] y coordinate of the (0,0) tile in a macro tile
-    UINT_32*        pX,               ///< [out] x coordinate
-    UINT_32*        pY                ///< [out] y coordinate
-    ) const
-{
-    UINT_32 pipebit0 = _BIT(pipe,0);
-    UINT_32 pipebit1 = _BIT(pipe,1);
-    UINT_32 pipebit2 = _BIT(pipe,2);
-    UINT_32 pipebit3 = _BIT(pipe,3);
-    UINT_32 elemIdx0 = _BIT(elemIdx,0);
-    UINT_32 elemIdx1 = _BIT(elemIdx,1);
-    UINT_32 elemIdx2 = _BIT(elemIdx,2);
-    UINT_32 x3 = 0;
-    UINT_32 x4 = 0;
-    UINT_32 x5 = 0;
-    UINT_32 x6 = 0;
-    UINT_32 y3 = 0;
-    UINT_32 y4 = 0;
-    UINT_32 y5 = 0;
-    UINT_32 y6 = 0;
-
-    switch(pipeCfg)
-    {
-        case ADDR_PIPECFG_P2:
-            x4 = elemIdx2;
-            y4 = elemIdx1 ^ x4;
-            y3 = elemIdx0 ^ x4;
-            x3 = pipebit0 ^ y3;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-            x4 = elemIdx1;
-            y4 = elemIdx0 ^ x4;
-            x3 = pipebit1 ^ y4;
-            y3 = pipebit0 ^ x4;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P4_16x16:
-            x4 = elemIdx1;
-            y3 = elemIdx0 ^ x4;
-            y4 = pipebit1 ^ x4;
-            x3 = pipebit0 ^ y3 ^ x4;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P4_16x32:
-            x3 = elemIdx0 ^ pipebit0;
-            y5 = _BIT(y,5);
-            x4 = pipebit1 ^ y5;
-            y3 = pipebit0 ^ x3 ^ x4;
-            y4 = elemIdx1 ^ x4;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P4_32x32:
-            x4 = elemIdx2;
-            y3 = elemIdx0 ^ x4;
-            y4 = elemIdx1 ^ x4;
-            if((pitchInMacroTile % 2) == 0)
-            {   //even
-                y5 = _BIT(y,5);
-                x5 = pipebit1 ^ y5;
-                x3 = pipebit0 ^ y3 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            else
-            {   //odd
-                x5 = _BIT(x,5);
-                x3 = pipebit0 ^ y3 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(2, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-            x4 = elemIdx0;
-            y5 = _BIT(y,5);
-            x5 = _BIT(x,5);
-            x3 = pipebit1 ^ y5;
-            y4 = pipebit2 ^ x4;
-            y3 = pipebit0 ^ x5 ^ x4;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P8_16x32_8x16:
-            x3 = elemIdx0;
-            y4 = pipebit1 ^ x3;
-            y5 = _BIT(y,5);
-            x5 = _BIT(x,5);
-            x4 = pipebit2 ^ y5;
-            y3 = pipebit0 ^ x4 ^ x5;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P8_32x32_8x16:
-            x4 = elemIdx1;
-            y4 = elemIdx0 ^ x4;
-            x3 = pipebit1 ^ y4;
-            if((pitchInMacroTile % 2) == 0)
-            {  //even
-                y5 = _BIT(y,5);
-                x5 = _BIT(x,5);
-                x5 = pipebit2 ^ y5;
-                y3 = pipebit0 ^ x4 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            else
-            {  //odd
-                x5 = _BIT(x,5);
-                y3 = pipebit0 ^ x4 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(2, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P8_16x32_16x16:
-            x3 = elemIdx0;
-            x5 = _BIT(x,5);
-            y5 = _BIT(y,5);
-            x4 = pipebit2 ^ y5;
-            y4 = pipebit1 ^ x5;
-            y3 = pipebit0 ^ x3 ^ x4;
-            *pY = Bits2Number(2, y4, y3);
-            *pX = Bits2Number(2, x4, x3);
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x16:
-            x4 = elemIdx1;
-            y3 = elemIdx0 ^ x4;
-            x3 = y3^x4^pipebit0;
-            y4 = pipebit1 ^ x4;
-            if((pitchInMacroTile % 2) == 0)
-            {   //even
-                y5 = _BIT(y,5);
-                x5 = pipebit2 ^ y5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            else
-            {   //odd
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(2, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x32:
-            if((pitchInMacroTile % 2) == 0)
-            {   //even
-                y5 = _BIT(y,5);
-                y6 = _BIT(y,6);
-                x4 = pipebit1 ^ y6;
-                y3 = elemIdx0 ^ x4;
-                y4 = elemIdx1 ^ x4;
-                x3 = pipebit0 ^ y3 ^ x4;
-                x5 = pipebit2 ^ y5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            else
-            {   //odd
-                y6 = _BIT(y,6);
-                x4 = pipebit1 ^ y6;
-                y3 = elemIdx0 ^ x4;
-                y4 = elemIdx1 ^ x4;
-                x3 = pipebit0 ^ y3 ^ x4;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(2, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            x4 = elemIdx2;
-            y3 = elemIdx0 ^ x4;
-            y4 = elemIdx1 ^ x4;
-            if((pitchInMacroTile % 4) == 0)
-            {   //multiple of 4
-                y5 = _BIT(y,5);
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                x6 = pipebit1 ^ y5;
-                x3 = pipebit0 ^ y3 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(4, x6, x5, x4, x3);
-            }
-            else
-            {
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                x3 = pipebit0 ^ y3 ^ x5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P16_32x32_8x16:
-            x4 = elemIdx1;
-            y4 = elemIdx0 ^ x4;
-            y3 = pipebit0 ^ x4;
-            x3 = pipebit1 ^ y4;
-            if((pitchInMacroTile % 4) == 0)
-            {   //multiple of 4
-                y5 = _BIT(y,5);
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                x6 = pipebit3 ^ y5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(4, x6, x5,x4, x3);
-            }
-            else
-            {
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            break;
-        case ADDR_PIPECFG_P16_32x32_16x16:
-            x4 = elemIdx1;
-            y3 = elemIdx0 ^ x4;
-            y4 = pipebit1 ^ x4;
-            x3 = pipebit0 ^ y3 ^ x4;
-            if((pitchInMacroTile % 4) == 0)
-            {   //multiple of 4
-                y5 = _BIT(y,5);
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                x6 = pipebit3 ^ y5;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(4, x6, x5, x4, x3);
-            }
-            else
-            {
-                y6 = _BIT(y,6);
-                x5 = pipebit2 ^ y6;
-                *pY = Bits2Number(2, y4, y3);
-                *pX = Bits2Number(3, x5, x4, x3);
-            }
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::TileCoordToMaskElementIndex
-*
-*   @brief
-*       Compute element index from coordinates in tiles
-*   @return
-*       Element index
-****************************************************************************************************
-*/
-UINT_32 SiLib::TileCoordToMaskElementIndex(
-    UINT_32         tx,                 ///< [in] x coord, in Tiles
-    UINT_32         ty,                 ///< [in] y coord, in Tiles
-    AddrPipeCfg     pipeConfig,         ///< [in] pipe config
-    UINT_32*        macroShift,         ///< [out] macro shift
-    UINT_32*        elemIdxBits         ///< [out] tile offset bits
-    ) const
-{
-    UINT_32 elemIdx = 0;
-    UINT_32 elemIdx0, elemIdx1, elemIdx2;
-    UINT_32 tx0, tx1;
-    UINT_32 ty0, ty1;
-
-    tx0 = _BIT(tx,0);
-    tx1 = _BIT(tx,1);
-    ty0 = _BIT(ty,0);
-    ty1 = _BIT(ty,1);
-
-    switch(pipeConfig)
-    {
-        case ADDR_PIPECFG_P2:
-            *macroShift = 3;
-            *elemIdxBits =3;
-            elemIdx2 = tx1;
-            elemIdx1 = tx1 ^ ty1;
-            elemIdx0 = tx1 ^ ty0;
-            elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0);
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-            *macroShift = 2;
-            *elemIdxBits =2;
-            elemIdx1 = tx1;
-            elemIdx0 = tx1 ^ ty1;
-            elemIdx = Bits2Number(2,elemIdx1,elemIdx0);
-            break;
-        case ADDR_PIPECFG_P4_16x16:
-            *macroShift = 2;
-            *elemIdxBits =2;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P4_16x32:
-            *macroShift = 2;
-            *elemIdxBits =2;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1^ty1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P4_32x32:
-            *macroShift = 2;
-            *elemIdxBits =3;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1^ty1;
-            elemIdx2 = tx1;
-            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-            *macroShift = 1;
-            *elemIdxBits =1;
-            elemIdx0 = tx1;
-            elemIdx = elemIdx0;
-            break;
-        case ADDR_PIPECFG_P8_16x32_8x16:
-            *macroShift = 1;
-            *elemIdxBits =1;
-            elemIdx0 = tx0;
-            elemIdx = elemIdx0;
-            break;
-        case ADDR_PIPECFG_P8_32x32_8x16:
-            *macroShift = 1;
-            *elemIdxBits =2;
-            elemIdx1 = tx1;
-            elemIdx0 = tx1^ty1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P8_16x32_16x16:
-            *macroShift = 1;
-            *elemIdxBits =1;
-            elemIdx0 = tx0;
-            elemIdx = elemIdx0;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x16:
-            *macroShift = 1;
-            *elemIdxBits =2;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x32:
-            *macroShift = 1;
-            *elemIdxBits =2;
-            elemIdx0 =  tx1^ty0;
-            elemIdx1 = tx1^ty1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            *macroShift = 1;
-            *elemIdxBits =3;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1^ty1;
-            elemIdx2 = tx1;
-            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P16_32x32_8x16:
-            *macroShift = 0;
-            *elemIdxBits =2;
-            elemIdx0 = tx1^ty1;
-            elemIdx1 = tx1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        case ADDR_PIPECFG_P16_32x32_16x16:
-            *macroShift = 0;
-            *elemIdxBits =2;
-            elemIdx0 = tx1^ty0;
-            elemIdx1 = tx1;
-            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
-            break;
-        default:
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    return elemIdx;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeTileDataWidthAndHeightLinear
-*
-*   @brief
-*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
-*
-*   @return
-*       N/A
-*
-*   @note
-*       MacroWidth and macroHeight are measured in pixels
-****************************************************************************************************
-*/
-VOID SiLib::HwlComputeTileDataWidthAndHeightLinear(
-    UINT_32*        pMacroWidth,     ///< [out] macro tile width
-    UINT_32*        pMacroHeight,    ///< [out] macro tile height
-    UINT_32         bpp,             ///< [in] bits per pixel
-    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
-    ) const
-{
-    ADDR_ASSERT(pTileInfo != NULL);
-    UINT_32 macroWidth;
-    UINT_32 macroHeight;
-
-    /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles
-    /// but for P8_32x64_32x32, it must be padded out to 8 tiles
-    /// Actually there are more pipe configs which need 8-tile padding but SI family
-    /// has a bug which is fixed in CI family
-    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) ||
-        (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
-        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16))
-    {
-        macroWidth  = 8*MicroTileWidth;
-        macroHeight = 8*MicroTileHeight;
-    }
-    else
-    {
-        macroWidth  = 4*MicroTileWidth;
-        macroHeight = 4*MicroTileHeight;
-    }
-
-    *pMacroWidth    = macroWidth;
-    *pMacroHeight   = macroHeight;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeHtileBytes
-*
-*   @brief
-*       Compute htile size in bytes
-*
-*   @return
-*       Htile size in bytes
-****************************************************************************************************
-*/
-UINT_64 SiLib::HwlComputeHtileBytes(
-    UINT_32     pitch,          ///< [in] pitch
-    UINT_32     height,         ///< [in] height
-    UINT_32     bpp,            ///< [in] bits per pixel
-    BOOL_32     isLinear,       ///< [in] if it is linear mode
-    UINT_32     numSlices,      ///< [in] number of slices
-    UINT_64*    pSliceBytes,    ///< [out] bytes per slice
-    UINT_32     baseAlign       ///< [in] base alignments
-    ) const
-{
-    return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign);
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeXmaskAddrFromCoord
-*
-*   @brief
-*       Compute address from coordinates for htile/cmask
-*   @return
-*       Byte address
-****************************************************************************************************
-*/
-UINT_64 SiLib::HwlComputeXmaskAddrFromCoord(
-    UINT_32        pitch,          ///< [in] pitch
-    UINT_32        height,         ///< [in] height
-    UINT_32        x,              ///< [in] x coord
-    UINT_32        y,              ///< [in] y coord
-    UINT_32        slice,          ///< [in] slice/depth index
-    UINT_32        numSlices,      ///< [in] number of slices
-    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
-    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
-    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
-    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
-    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
-    ) const
-{
-    UINT_32 tx = x / MicroTileWidth;
-    UINT_32 ty = y / MicroTileHeight;
-    UINT_32 newPitch;
-    UINT_32 newHeight;
-    UINT_64 totalBytes;
-    UINT_32 macroWidth;
-    UINT_32 macroHeight;
-    UINT_64 pSliceBytes;
-    UINT_32 pBaseAlign;
-    UINT_32 tileNumPerPipe;
-    UINT_32 elemBits;
-
-    if (factor == 2) //CMASK
-    {
-        ADDR_CMASK_FLAGS flags = {{0}};
-
-        tileNumPerPipe = 256;
-
-        ComputeCmaskInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &macroWidth,
-                         &macroHeight);
-        elemBits = CmaskElemBits;
-    }
-    else //HTile
-    {
-        ADDR_HTILE_FLAGS flags = {{0}};
-
-        tileNumPerPipe = 512;
-
-        ComputeHtileInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         TRUE,
-                         TRUE,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &macroWidth,
-                         &macroHeight,
-                         &pSliceBytes,
-                         &pBaseAlign);
-        elemBits = 32;
-    }
-
-    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
-    const UINT_32 heightInTile = newHeight / MicroTileWidth;
-    UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies.
-    UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies.
-    UINT_32 microX;
-    UINT_32 microY;
-    UINT_64 microOffset;
-    UINT_32 microShift;
-    UINT_64 totalOffset;
-    UINT_32 elemIdxBits;
-    UINT_32 elemIdx =
-        TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, &microShift, &elemIdxBits);
-
-    UINT_32 numPipes = HwlGetPipes(pTileInfo);
-
-    if (isLinear)
-    {   //linear addressing
-        // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full
-        // slice memory foot print instead of divided by numPipes.
-        microX = tx / 4; // Macro Tile is 4x4
-        microY = ty / 4 ;
-        microNumber = static_cast<UINT_64>(microX + microY * (pitchInTile / 4)) << microShift;
-
-        UINT_32 sliceBits = pitchInTile * heightInTile;
-
-        // do htile single slice alignment if the flag is true
-        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
-        {
-            sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits);
-        }
-        macroOffset = slice * (sliceBits / numPipes) * elemBits ;
-    }
-    else
-    {   //tiled addressing
-        const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles
-        const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight;
-        const UINT_32 pitchInCL = pitchInTile / macroWidthInTile;
-        const UINT_32 heightInCL = heightInTile / macroHeightInTile;
-
-        const UINT_32 macroX = x / macroWidth;
-        const UINT_32 macroY = y / macroHeight;
-        const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL;
-
-        // Per pipe starting offset of the cache line in which this tile lies.
-        microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4
-        microY = (y % macroHeight) / MicroTileHeight / 4 ;
-        microNumber = static_cast<UINT_64>(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift;
-
-        macroOffset = macroNumber * tileNumPerPipe * elemBits;
-    }
-
-    if(elemIdxBits == microShift)
-    {
-        microNumber += elemIdx;
-    }
-    else
-    {
-        microNumber >>= elemIdxBits;
-        microNumber <<= elemIdxBits;
-        microNumber += elemIdx;
-    }
-
-    microOffset = elemBits * microNumber;
-    totalOffset = microOffset + macroOffset;
-
-    UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo);
-    UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) +
-                   pipe * (m_pipeInterleaveBytes * 8) +
-                   totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes;
-    *pBitPosition = static_cast<UINT_32>(addrInBits) % 8;
-    UINT_64 addr = addrInBits / 8;
-
-    return addr;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeXmaskCoordFromAddr
-*
-*   @brief
-*       Compute the coord from an address of a cmask/htile
-*
-*   @return
-*       N/A
-*
-*   @note
-*       This method is reused by htile, so rename to Xmask
-****************************************************************************************************
-*/
-VOID SiLib::HwlComputeXmaskCoordFromAddr(
-    UINT_64         addr,           ///< [in] address
-    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
-    UINT_32         pitch,          ///< [in] pitch
-    UINT_32         height,         ///< [in] height
-    UINT_32         numSlices,      ///< [in] number of slices
-    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
-    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
-    BOOL_32         isWidth8,       ///< [in] Not used by SI
-    BOOL_32         isHeight8,      ///< [in] Not used by SI
-    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
-    UINT_32*        pX,             ///< [out] x coord
-    UINT_32*        pY,             ///< [out] y coord
-    UINT_32*        pSlice          ///< [out] slice index
-    ) const
-{
-    UINT_32 newPitch;
-    UINT_32 newHeight;
-    UINT_64 totalBytes;
-    UINT_32 clWidth;
-    UINT_32 clHeight;
-    UINT_32 tileNumPerPipe;
-    UINT_64 sliceBytes;
-
-    *pX = 0;
-    *pY = 0;
-    *pSlice = 0;
-
-    if (factor == 2) //CMASK
-    {
-        ADDR_CMASK_FLAGS flags = {{0}};
-
-        tileNumPerPipe = 256;
-
-        ComputeCmaskInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &clWidth,
-                         &clHeight);
-    }
-    else //HTile
-    {
-        ADDR_HTILE_FLAGS flags = {{0}};
-
-        tileNumPerPipe = 512;
-
-        ComputeHtileInfo(flags,
-                         pitch,
-                         height,
-                         numSlices,
-                         isLinear,
-                         TRUE,
-                         TRUE,
-                         pTileInfo,
-                         &newPitch,
-                         &newHeight,
-                         &totalBytes,
-                         &clWidth,
-                         &clHeight,
-                         &sliceBytes);
-    }
-
-    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
-    const UINT_32 heightInTile = newHeight / MicroTileWidth;
-    const UINT_32 pitchInMacroTile = pitchInTile / 4;
-    UINT_32 macroShift;
-    UINT_32 elemIdxBits;
-    // get macroShift and elemIdxBits
-    TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, &macroShift, &elemIdxBits);
-
-    const UINT_32 numPipes = HwlGetPipes(pTileInfo);
-    const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes);
-    // per pipe
-    UINT_64 localOffset = (addr % m_pipeInterleaveBytes) +
-        (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes;
-
-    UINT_32 tileIndex;
-    if (factor == 2) //CMASK
-    {
-        tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0));
-    }
-    else
-    {
-        tileIndex = (UINT_32)(localOffset / 4);
-    }
-
-    UINT_32 macroOffset;
-    if (isLinear)
-    {
-        UINT_32 sliceSizeInTile = pitchInTile * heightInTile;
-
-        // do htile single slice alignment if the flag is true
-        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
-        {
-            sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast<UINT_32>(sliceBytes) / 64);
-        }
-        *pSlice = tileIndex / (sliceSizeInTile / numPipes);
-        macroOffset = tileIndex % (sliceSizeInTile / numPipes);
-    }
-    else
-    {
-        const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles
-        const UINT_32 clHeightInTile = clHeight / MicroTileHeight;
-        const UINT_32 pitchInCL = pitchInTile / clWidthInTile;
-        const UINT_32 heightInCL = heightInTile / clHeightInTile;
-        const UINT_32 clIndex = tileIndex / tileNumPerPipe;
-
-        UINT_32 clX = clIndex % pitchInCL;
-        UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL;
-
-        *pX = clX * clWidthInTile * MicroTileWidth;
-        *pY = clY * clHeightInTile * MicroTileHeight;
-        *pSlice = clIndex / (heightInCL * pitchInCL);
-
-        macroOffset = tileIndex % tileNumPerPipe;
-    }
-
-    UINT_32 elemIdx = macroOffset & 7;
-    macroOffset >>= elemIdxBits;
-
-    if (elemIdxBits != macroShift)
-    {
-        macroOffset <<= (elemIdxBits - macroShift);
-
-        UINT_32 pipebit1 = _BIT(pipe,1);
-        UINT_32 pipebit2 = _BIT(pipe,2);
-        UINT_32 pipebit3 = _BIT(pipe,3);
-        if (pitchInMacroTile % 2)
-        {   //odd
-            switch (pTileInfo->pipeConfig)
-            {
-                case ADDR_PIPECFG_P4_32x32:
-                    macroOffset |= pipebit1;
-                    break;
-                case ADDR_PIPECFG_P8_32x32_8x16:
-                case ADDR_PIPECFG_P8_32x32_16x16:
-                case ADDR_PIPECFG_P8_32x32_16x32:
-                    macroOffset |= pipebit2;
-                    break;
-                default:
-                    break;
-            }
-
-        }
-
-        if (pitchInMacroTile % 4)
-        {
-            if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)
-            {
-                macroOffset |= (pipebit1<<1);
-            }
-            if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
-               (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16))
-            {
-                macroOffset |= (pipebit3<<1);
-            }
-        }
-    }
-
-    UINT_32 macroX;
-    UINT_32 macroY;
-
-    if (isLinear)
-    {
-        macroX = macroOffset % pitchInMacroTile;
-        macroY = macroOffset / pitchInMacroTile;
-    }
-    else
-    {
-        const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4);
-        macroX = macroOffset % clWidthInMacroTile;
-        macroY = macroOffset / clWidthInMacroTile;
-    }
-
-    *pX += macroX * 4 * MicroTileWidth;
-    *pY += macroY * 4 * MicroTileHeight;
-
-    UINT_32 microX;
-    UINT_32 microY;
-    ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile,
-                                       *pX, *pY, &microX, &microY);
-
-    *pX += microX * MicroTileWidth;
-    *pY += microY * MicroTileWidth;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetPitchAlignmentLinear
-*   @brief
-*       Get pitch alignment
-*   @return
-*       pitch alignment
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlGetPitchAlignmentLinear(
-    UINT_32             bpp,    ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags   ///< [in] surface flags
-    ) const
-{
-    UINT_32 pitchAlign;
-
-    // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment
-    if (flags.interleaved)
-    {
-        pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp));
-
-    }
-    else
-    {
-        pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
-    }
-
-    return pitchAlign;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetSizeAdjustmentLinear
-*
-*   @brief
-*       Adjust linear surface pitch and slice size
-*
-*   @return
-*       Logical slice size in bytes
-****************************************************************************************************
-*/
-UINT_64 SiLib::HwlGetSizeAdjustmentLinear(
-    AddrTileMode        tileMode,       ///< [in] tile mode
-    UINT_32             bpp,            ///< [in] bits per pixel
-    UINT_32             numSamples,     ///< [in] number of samples
-    UINT_32             baseAlign,      ///< [in] base alignment
-    UINT_32             pitchAlign,     ///< [in] pitch alignment
-    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
-    UINT_32*            pHeight,        ///< [in,out] pointer to height
-    UINT_32*            pHeightAlign    ///< [in,out] pointer to height align
-    ) const
-{
-    UINT_64 sliceSize;
-    if (tileMode == ADDR_TM_LINEAR_GENERAL)
-    {
-        sliceSize = BITS_TO_BYTES(static_cast<UINT_64>(*pPitch) * (*pHeight) * bpp * numSamples);
-    }
-    else
-    {
-        UINT_32 pitch   = *pPitch;
-        UINT_32 height  = *pHeight;
-
-        UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp);
-        UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
-
-        // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value)
-        UINT_64 pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
-
-        while (pixelPerSlice % sliceAlignInPixel)
-        {
-            pitch += pitchAlign;
-            pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
-        }
-
-        *pPitch = pitch;
-
-        UINT_32 heightAlign = 1;
-
-        while ((pitch * heightAlign) % sliceAlignInPixel)
-        {
-            heightAlign++;
-        }
-
-        *pHeightAlign = heightAlign;
-
-        sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp);
-    }
-
-    return sliceSize;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlPreHandleBaseLvl3xPitch
-*
-*   @brief
-*       Pre-handler of 3x pitch (96 bit) adjustment
-*
-*   @return
-*       Expected pitch
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlPreHandleBaseLvl3xPitch(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
-    UINT_32                                 expPitch    ///< [in] pitch
-    ) const
-{
-    ADDR_ASSERT(pIn->width == expPitch);
-
-    // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to
-    // do here
-    if (pIn->flags.pow2Pad == FALSE)
-    {
-        Addr::V1::Lib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
-    }
-    else
-    {
-        ADDR_ASSERT(IsPow2(expPitch));
-    }
-
-    return expPitch;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlPostHandleBaseLvl3xPitch
-*
-*   @brief
-*       Post-handler of 3x pitch adjustment
-*
-*   @return
-*       Expected pitch
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlPostHandleBaseLvl3xPitch(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
-    UINT_32                                 expPitch    ///< [in] pitch
-    ) const
-{
-    /**
-     * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should
-     *  be able to compute a correct pitch from it as h/w address library is doing the job.
-     */
-    // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here
-    if (pIn->flags.pow2Pad == FALSE)
-    {
-        Addr::V1::Lib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
-    }
-
-    return expPitch;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetPitchAlignmentMicroTiled
-*
-*   @brief
-*       Compute 1D tiled surface pitch alignment
-*
-*   @return
-*       pitch alignment
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlGetPitchAlignmentMicroTiled(
-    AddrTileMode        tileMode,          ///< [in] tile mode
-    UINT_32             bpp,               ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
-    UINT_32             numSamples         ///< [in] number of samples
-    ) const
-{
-    UINT_32 pitchAlign;
-
-    if (flags.qbStereo)
-    {
-        pitchAlign = EgBasedLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples);
-    }
-    else
-    {
-        pitchAlign = 8;
-    }
-
-    return pitchAlign;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetSizeAdjustmentMicroTiled
-*
-*   @brief
-*       Adjust 1D tiled surface pitch and slice size
-*
-*   @return
-*       Logical slice size in bytes
-****************************************************************************************************
-*/
-UINT_64 SiLib::HwlGetSizeAdjustmentMicroTiled(
-    UINT_32             thickness,      ///< [in] thickness
-    UINT_32             bpp,            ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
-    UINT_32             numSamples,     ///< [in] number of samples
-    UINT_32             baseAlign,      ///< [in] base alignment
-    UINT_32             pitchAlign,     ///< [in] pitch alignment
-    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
-    UINT_32*            pHeight         ///< [in,out] pointer to height
-    ) const
-{
-    UINT_64 logicalSliceSize;
-    UINT_64 physicalSliceSize;
-
-    UINT_32 pitch   = *pPitch;
-    UINT_32 height  = *pHeight;
-
-    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
-    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
-
-    // Physical slice: multiplied by thickness
-    physicalSliceSize =  logicalSliceSize * thickness;
-
-    // Pitch alignment is always 8, so if slice size is not padded to base alignment
-    // (pipe_interleave_size), we need to increase pitch
-    while ((physicalSliceSize % baseAlign) != 0)
-    {
-        pitch += pitchAlign;
-
-        logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
-
-        physicalSliceSize =  logicalSliceSize * thickness;
-    }
-
-#if !ALT_TEST
-    //
-    // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since
-    // the stencil plane may have larger pitch if the slice size is smaller than base alignment.
-    //
-    // Note: this actually does not work for mipmap but mipmap depth texture is not really
-    // sampled with mipmap.
-    //
-    if (flags.depth && (flags.noStencil == FALSE))
-    {
-        ADDR_ASSERT(numSamples == 1);
-
-        UINT_64 logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height; // 1 byte stencil
-
-        while ((logicalSiceSizeStencil % baseAlign) != 0)
-        {
-            pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's
-
-            logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height;
-        }
-
-        if (pitch != *pPitch)
-        {
-            // If this is a mipmap, this padded one cannot be sampled as a whole mipmap!
-            logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp);
-        }
-    }
-#endif
-    *pPitch = pitch;
-
-    // No adjust for pHeight
-
-    return logicalSliceSize;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlConvertChipFamily
-*
-*   @brief
-*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
-*   @return
-*       ChipFamily
-****************************************************************************************************
-*/
-ChipFamily SiLib::HwlConvertChipFamily(
-    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
-    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
-{
-    ChipFamily family = ADDR_CHIP_FAMILY_SI;
-
-    switch (uChipFamily)
-    {
-        case FAMILY_SI:
-            m_settings.isSouthernIsland = 1;
-            m_settings.isTahiti     = ASICREV_IS_TAHITI_P(uChipRevision);
-            m_settings.isPitCairn   = ASICREV_IS_PITCAIRN_PM(uChipRevision);
-            m_settings.isCapeVerde  = ASICREV_IS_CAPEVERDE_M(uChipRevision);
-            m_settings.isOland      = ASICREV_IS_OLAND_M(uChipRevision);
-            m_settings.isHainan     = ASICREV_IS_HAINAN_V(uChipRevision);
-            break;
-        default:
-            ADDR_ASSERT(!"This should be a Fusion");
-            break;
-    }
-
-    return family;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlSetupTileInfo
-*
-*   @brief
-*       Setup default value of tile info for SI
-****************************************************************************************************
-*/
-VOID SiLib::HwlSetupTileInfo(
-    AddrTileMode                        tileMode,       ///< [in] Tile mode
-    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
-    UINT_32                             bpp,            ///< [in] Bits per pixel
-    UINT_32                             pitch,          ///< [in] Pitch in pixels
-    UINT_32                             height,         ///< [in] Height in pixels
-    UINT_32                             numSamples,     ///< [in] Number of samples
-    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
-    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
-    AddrTileType                        inTileType,     ///< [in] Tile type
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
-    ) const
-{
-    UINT_32 thickness = Thickness(tileMode);
-    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
-    INT index = TileIndexInvalid;
-
-    // Fail-safe code
-    if (IsLinear(tileMode) == FALSE)
-    {
-        // 128 bpp/thick tiling must be non-displayable.
-        // Fmask reuse color buffer's entry but bank-height field can be from another entry
-        // To simplify the logic, fmask entry should be picked from non-displayable ones
-        if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt)
-        {
-            inTileType = ADDR_NON_DISPLAYABLE;
-        }
-
-        if (flags.depth || flags.stencil)
-        {
-            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
-        }
-    }
-
-    // Partial valid fields are not allowed for SI.
-    if (IsTileInfoAllZero(pTileInfo))
-    {
-        if (IsMacroTiled(tileMode))
-        {
-            if (flags.prt)
-            {
-                if (numSamples == 1)
-                {
-                    if (flags.depth)
-                    {
-                        switch (bpp)
-                        {
-                            case 16:
-                                index = 3;
-                                break;
-                            case 32:
-                                index = 6;
-                                break;
-                            default:
-                                ADDR_ASSERT_ALWAYS();
-                                break;
-                        }
-                    }
-                    else
-                    {
-                        switch (bpp)
-                        {
-                            case 8:
-                                index = 21;
-                                break;
-                            case 16:
-                                index = 22;
-                                break;
-                            case 32:
-                                index = 23;
-                                break;
-                            case 64:
-                                index = 24;
-                                break;
-                            case 128:
-                                index = 25;
-                                break;
-                            default:
-                                break;
-                        }
-
-                        if (thickness > 1)
-                        {
-                            ADDR_ASSERT(bpp != 128);
-                            index += 5;
-                        }
-                    }
-                }
-                else
-                {
-                    ADDR_ASSERT(numSamples == 4);
-
-                    if (flags.depth)
-                    {
-                        switch (bpp)
-                        {
-                            case 16:
-                                index = 5;
-                                break;
-                            case 32:
-                                index = 7;
-                                break;
-                            default:
-                                ADDR_ASSERT_ALWAYS();
-                                break;
-                        }
-                    }
-                    else
-                    {
-                        switch (bpp)
-                        {
-                            case 8:
-                                index = 23;
-                                break;
-                            case 16:
-                                index = 24;
-                                break;
-                            case 32:
-                                index = 25;
-                                break;
-                            case 64:
-                                index = 30;
-                                break;
-                            default:
-                                ADDR_ASSERT_ALWAYS();
-                                break;
-                        }
-                    }
-                }
-            }//end of PRT part
-            // See table entries 0-7
-            else if (flags.depth || flags.stencil)
-            {
-                if (flags.compressZ)
-                {
-                    if (flags.stencil)
-                    {
-                        index = 0;
-                    }
-                    else
-                    {
-                        // optimal tile index for compressed depth/stencil.
-                        switch (numSamples)
-                        {
-                            case 1:
-                                index = 0;
-                                break;
-                            case 2:
-                            case 4:
-                                index = 1;
-                                break;
-                            case 8:
-                                index = 2;
-                                break;
-                            default:
-                                break;
-                        }
-                    }
-                }
-                else // unCompressZ
-                {
-                    index = 3;
-                }
-            }
-            else //non PRT & non Depth & non Stencil
-            {
-                // See table entries 9-12
-                if (inTileType == ADDR_DISPLAYABLE)
-                {
-                    switch (bpp)
-                    {
-                        case 8:
-                            index = 10;
-                            break;
-                        case 16:
-                            index = 11;
-                            break;
-                        case 32:
-                            index = 12;
-                            break;
-                        case 64:
-                            index = 12;
-                            break;
-                        default:
-                            break;
-                    }
-                }
-                else
-                {
-                    // See table entries 13-17
-                    if (thickness == 1)
-                    {
-                        if (flags.fmask)
-                        {
-                            UINT_32 fmaskPixelSize = bpp * numSamples;
-
-                            switch (fmaskPixelSize)
-                            {
-                                case 8:
-                                    index = 14;
-                                    break;
-                                case 16:
-                                    index = 15;
-                                    break;
-                                case 32:
-                                    index = 16;
-                                    break;
-                                case 64:
-                                    index = 17;
-                                    break;
-                                default:
-                                    ADDR_ASSERT_ALWAYS();
-                            }
-                        }
-                        else
-                        {
-                            switch (bpp)
-                            {
-                                case 8:
-                                    index = 14;
-                                    break;
-                                case 16:
-                                    index = 15;
-                                    break;
-                                case 32:
-                                    index = 16;
-                                    break;
-                                case 64:
-                                    index = 17;
-                                    break;
-                                case 128:
-                                    index = 17;
-                                    break;
-                                default:
-                                    break;
-                            }
-                        }
-                    }
-                    else // thick tiling - entries 18-20
-                    {
-                        switch (thickness)
-                        {
-                            case 4:
-                                index = 20;
-                                break;
-                            case 8:
-                                index = 19;
-                                break;
-                            default:
-                                break;
-                        }
-                    }
-                }
-            }
-        }
-        else
-        {
-            if (tileMode == ADDR_TM_LINEAR_ALIGNED)
-            {
-                index = 8;
-            }
-            else if (tileMode == ADDR_TM_LINEAR_GENERAL)
-            {
-                index = TileIndexLinearGeneral;
-            }
-            else
-            {
-                if (flags.depth || flags.stencil)
-                {
-                    index = 4;
-                }
-                else if (inTileType == ADDR_DISPLAYABLE)
-                {
-                    index = 9;
-                }
-                else if (thickness == 1)
-                {
-                    index = 13;
-                }
-                else
-                {
-                    index = 18;
-                }
-            }
-        }
-
-        if (index >= 0 && index <= 31)
-        {
-            *pTileInfo      = m_tileTable[index].info;
-            pOut->tileType  = m_tileTable[index].type;
-        }
-
-        if (index == TileIndexLinearGeneral)
-        {
-            *pTileInfo      = m_tileTable[8].info;
-            pOut->tileType  = m_tileTable[8].type;
-        }
-    }
-    else
-    {
-        if (pTileInfoIn)
-        {
-            if (flags.stencil && pTileInfoIn->tileSplitBytes == 0)
-            {
-                // Stencil always uses index 0
-                *pTileInfo = m_tileTable[0].info;
-            }
-        }
-        // Pass through tile type
-        pOut->tileType = inTileType;
-    }
-
-    pOut->tileIndex = index;
-    pOut->prtTileIndex = flags.prt;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::DecodeGbRegs
-*
-*   @brief
-*       Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks
-*
-*   @return
-*       TRUE if all settings are valid
-*
-****************************************************************************************************
-*/
-BOOL_32 SiLib::DecodeGbRegs(
-    const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input
-{
-    GB_ADDR_CONFIG  reg;
-    BOOL_32         valid = TRUE;
-
-    reg.val = pRegValue->gbAddrConfig;
-
-    switch (reg.f.pipe_interleave_size)
-    {
-        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
-            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
-            break;
-        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
-            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
-            break;
-        default:
-            valid = FALSE;
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    switch (reg.f.row_size)
-    {
-        case ADDR_CONFIG_1KB_ROW:
-            m_rowSize = ADDR_ROWSIZE_1KB;
-            break;
-        case ADDR_CONFIG_2KB_ROW:
-            m_rowSize = ADDR_ROWSIZE_2KB;
-            break;
-        case ADDR_CONFIG_4KB_ROW:
-            m_rowSize = ADDR_ROWSIZE_4KB;
-            break;
-        default:
-            valid = FALSE;
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    switch (pRegValue->noOfBanks)
-    {
-        case 0:
-            m_banks = 4;
-            break;
-        case 1:
-            m_banks = 8;
-            break;
-        case 2:
-            m_banks = 16;
-            break;
-        default:
-            valid = FALSE;
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    switch (pRegValue->noOfRanks)
-    {
-        case 0:
-            m_ranks = 1;
-            break;
-        case 1:
-            m_ranks = 2;
-            break;
-        default:
-            valid = FALSE;
-            ADDR_UNHANDLED_CASE();
-            break;
-    }
-
-    m_logicalBanks = m_banks * m_ranks;
-
-    ADDR_ASSERT(m_logicalBanks <= 16);
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlInitGlobalParams
-*
-*   @brief
-*       Initializes global parameters
-*
-*   @return
-*       TRUE if all settings are valid
-*
-****************************************************************************************************
-*/
-BOOL_32 SiLib::HwlInitGlobalParams(
-    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
-{
-    BOOL_32 valid = TRUE;
-    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
-
-    valid = DecodeGbRegs(pRegValue);
-
-    if (valid)
-    {
-        if (m_settings.isTahiti || m_settings.isPitCairn)
-        {
-            m_pipes = 8;
-        }
-        else if (m_settings.isCapeVerde || m_settings.isOland)
-        {
-            m_pipes = 4;
-        }
-        else
-        {
-            // Hainan is 2-pipe (m_settings.isHainan == 1)
-            m_pipes = 2;
-        }
-
-        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
-
-        if (valid)
-        {
-            InitEquationTable();
-        }
-
-        m_maxSamples = 16;
-    }
-
-    return valid;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlConvertTileInfoToHW
-*   @brief
-*       Entry of si's ConvertTileInfoToHW
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::HwlConvertTileInfoToHW(
-    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
-    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
-    ) const
-{
-    ADDR_E_RETURNCODE retCode   = ADDR_OK;
-
-    retCode = EgBasedLib::HwlConvertTileInfoToHW(pIn, pOut);
-
-    if (retCode == ADDR_OK)
-    {
-        if (pIn->reverse == FALSE)
-        {
-            if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID)
-            {
-                retCode = ADDR_INVALIDPARAMS;
-            }
-            else
-            {
-                pOut->pTileInfo->pipeConfig =
-                    static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig - 1);
-            }
-        }
-        else
-        {
-            pOut->pTileInfo->pipeConfig =
-                static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig + 1);
-        }
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeXmaskCoordYFrom8Pipe
-*
-*   @brief
-*       Compute the Y coord which will be added to Xmask Y
-*       coord.
-*   @return
-*       Y coord
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlComputeXmaskCoordYFrom8Pipe(
-    UINT_32         pipe,       ///< [in] pipe id
-    UINT_32         x           ///< [in] tile coord x, which is original x coord / 8
-    ) const
-{
-    // This function should never be called since it is 6xx/8xx specfic.
-    // Keep this empty implementation to avoid any mis-use.
-    ADDR_ASSERT_ALWAYS();
-
-    return 0;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeSurfaceCoord2DFromBankPipe
-*
-*   @brief
-*       Compute surface x,y coordinates from bank/pipe info
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID SiLib::HwlComputeSurfaceCoord2DFromBankPipe(
-    AddrTileMode        tileMode,   ///< [in] tile mode
-    UINT_32*            pX,         ///< [in,out] x coordinate
-    UINT_32*            pY,         ///< [in,out] y coordinate
-    UINT_32             slice,      ///< [in] slice index
-    UINT_32             bank,       ///< [in] bank number
-    UINT_32             pipe,       ///< [in] pipe number
-    UINT_32             bankSwizzle,///< [in] bank swizzle
-    UINT_32             pipeSwizzle,///< [in] pipe swizzle
-    UINT_32             tileSlices, ///< [in] slices in a micro tile
-    BOOL_32             ignoreSE,   ///< [in] TRUE if shader engines are ignored
-    ADDR_TILEINFO*      pTileInfo   ///< [in] bank structure. **All fields to be valid on entry**
-    ) const
-{
-    UINT_32 xBit;
-    UINT_32 yBit;
-    UINT_32 yBit3 = 0;
-    UINT_32 yBit4 = 0;
-    UINT_32 yBit5 = 0;
-    UINT_32 yBit6 = 0;
-
-    UINT_32 xBit3 = 0;
-    UINT_32 xBit4 = 0;
-    UINT_32 xBit5 = 0;
-
-    UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
-
-    CoordFromBankPipe xyBits = {0};
-    ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe,
-                                      bankSwizzle, pipeSwizzle, tileSlices, pTileInfo,
-                                      &xyBits);
-    yBit3 = xyBits.yBit3;
-    yBit4 = xyBits.yBit4;
-    yBit5 = xyBits.yBit5;
-    yBit6 = xyBits.yBit6;
-
-    xBit3 = xyBits.xBit3;
-    xBit4 = xyBits.xBit4;
-    xBit5 = xyBits.xBit5;
-
-    yBit = xyBits.yBits;
-
-    UINT_32 yBitTemp = 0;
-
-    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
-        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))
-    {
-        ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1);
-        UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1;
-
-        ADDR_ASSERT(yBitToCheck <= 3);
-
-        yBitTemp = _BIT(yBit, yBitToCheck);
-
-        xBit3 = 0;
-    }
-
-    yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3);
-    xBit = Bits2Number(3, xBit5, xBit4, xBit3);
-
-    *pY += yBit * pTileInfo->bankHeight * MicroTileHeight;
-    *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth;
-
-    //calculate the bank and pipe bits in x, y
-    UINT_32 xTile; //x in micro tile
-    UINT_32 x3 = 0;
-    UINT_32 x4 = 0;
-    UINT_32 x5 = 0;
-    UINT_32 x6 = 0;
-    UINT_32 y = *pY;
-
-    UINT_32 pipeBit0 = _BIT(pipe,0);
-    UINT_32 pipeBit1 = _BIT(pipe,1);
-    UINT_32 pipeBit2 = _BIT(pipe,2);
-
-    UINT_32 y3 = _BIT(y, 3);
-    UINT_32 y4 = _BIT(y, 4);
-    UINT_32 y5 = _BIT(y, 5);
-    UINT_32 y6 = _BIT(y, 6);
-
-    // bankbit0 after ^x4^x5
-    UINT_32 bankBit00 = _BIT(bank,0);
-    UINT_32 bankBit0 = 0;
-
-    switch (pTileInfo->pipeConfig)
-    {
-        case ADDR_PIPECFG_P2:
-            x3 = pipeBit0 ^ y3;
-            break;
-        case ADDR_PIPECFG_P4_8x16:
-            x4 = pipeBit0 ^ y3;
-            x3 = pipeBit0 ^ y4;
-            break;
-        case ADDR_PIPECFG_P4_16x16:
-            x4 = pipeBit1 ^ y4;
-            x3 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P4_16x32:
-            x4 = pipeBit1 ^ y4;
-            x3 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P4_32x32:
-            x5 = pipeBit1 ^ y5;
-            x3 = pipeBit0 ^ y3 ^ x5;
-            bankBit0 = yBitTemp ^ x5;
-            x4 = bankBit00 ^ x5 ^ bankBit0;
-            *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8;
-            break;
-        case ADDR_PIPECFG_P8_16x16_8x16:
-            x3 = pipeBit1 ^ y5;
-            x4 = pipeBit2 ^ y4;
-            x5 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P8_16x32_8x16:
-            x3 = pipeBit1 ^ y4;
-            x4 = pipeBit2 ^ y5;
-            x5 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P8_32x32_8x16:
-            x3 = pipeBit1 ^ y4;
-            x5 = pipeBit2 ^ y5;
-            x4 = pipeBit0 ^ y3 ^ x5;
-            break;
-        case ADDR_PIPECFG_P8_16x32_16x16:
-            x4 = pipeBit2 ^ y5;
-            x5 = pipeBit1 ^ y4;
-            x3 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x16:
-            x5 = pipeBit2 ^ y5;
-            x4 = pipeBit1 ^ y4;
-            x3 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P8_32x32_16x32:
-            x5 = pipeBit2 ^ y5;
-            x4 = pipeBit1 ^ y6;
-            x3 = pipeBit0 ^ y3 ^ x4;
-            break;
-        case ADDR_PIPECFG_P8_32x64_32x32:
-            x6 = pipeBit1 ^ y5;
-            x5 = pipeBit2 ^ y6;
-            x3 = pipeBit0 ^ y3 ^ x5;
-            bankBit0 = yBitTemp ^ x6;
-            x4 = bankBit00 ^ x5 ^ bankBit0;
-            *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8;
-            break;
-        default:
-            ADDR_ASSERT_ALWAYS();
-    }
-
-    xTile = Bits2Number(3, x5, x4, x3);
-
-    *pX += xTile << 3;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlPreAdjustBank
-*
-*   @brief
-*       Adjust bank before calculating address acoording to bank/pipe
-*   @return
-*       Adjusted bank
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlPreAdjustBank(
-    UINT_32         tileX,      ///< [in] x coordinate in unit of tile
-    UINT_32         bank,       ///< [in] bank
-    ADDR_TILEINFO*  pTileInfo   ///< [in] tile info
-    ) const
-{
-    if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
-        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1))
-    {
-        UINT_32 bankBit0 = _BIT(bank, 0);
-        UINT_32 x4 = _BIT(tileX, 1);
-        UINT_32 x5 = _BIT(tileX, 2);
-
-        bankBit0 = bankBit0 ^ x4 ^ x5;
-        bank |= bankBit0;
-
-        ADDR_ASSERT(pTileInfo->macroAspectRatio > 1);
-    }
-
-    return bank;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeSurfaceInfo
-*
-*   @brief
-*       Entry of si's ComputeSurfaceInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
-    ) const
-{
-    pOut->tileIndex = pIn->tileIndex;
-
-    ADDR_E_RETURNCODE retCode = EgBasedLib::HwlComputeSurfaceInfo(pIn, pOut);
-
-    UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex);
-
-    if (((pIn->flags.needEquation   == TRUE) ||
-         (pIn->flags.preferEquation == TRUE)) &&
-        (pIn->numSamples <= 1) &&
-        (tileIndex < TileTableSize))
-    {
-        static const UINT_32 SiUncompressDepthTileIndex = 3;
-
-        if ((pIn->numSlices > 1) &&
-            (IsMacroTiled(pOut->tileMode) == TRUE) &&
-            ((m_chipFamily == ADDR_CHIP_FAMILY_SI) ||
-             (IsPrtTileMode(pOut->tileMode) == FALSE)))
-        {
-            pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
-        }
-        else if ((pIn->flags.prt == FALSE) &&
-                 (m_uncompressDepthEqIndex != 0) &&
-                 (tileIndex == SiUncompressDepthTileIndex))
-        {
-            pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3);
-        }
-        else
-        {
-
-            pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
-        }
-
-        if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX)
-        {
-            pOut->blockWidth = m_blockWidth[pOut->equationIndex];
-
-            pOut->blockHeight = m_blockHeight[pOut->equationIndex];
-
-            pOut->blockSlices = m_blockSlices[pOut->equationIndex];
-        }
-    }
-    else
-    {
-        pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
-    }
-
-    return retCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeMipLevel
-*   @brief
-*       Compute MipLevel info (including level 0)
-*   @return
-*       TRUE if HWL's handled
-****************************************************************************************************
-*/
-BOOL_32 SiLib::HwlComputeMipLevel(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
-    ) const
-{
-    // basePitch is calculated from level 0 so we only check this for mipLevel > 0
-    if (pIn->mipLevel > 0)
-    {
-        // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if
-        // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being
-        // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2.
-        if (ElemLib::IsExpand3x(pIn->format) == FALSE)
-        {
-            // Sublevel pitches are generated from base level pitch instead of width on SI
-            // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain
-            ADDR_ASSERT((pIn->flags.pow2Pad == FALSE) ||
-                        ((pIn->basePitch != 0) && IsPow2(pIn->basePitch)));
-        }
-
-        if (pIn->basePitch != 0)
-        {
-            pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel);
-        }
-    }
-
-    // pow2Pad is done in PostComputeMipLevel
-
-    return TRUE;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlCheckLastMacroTiledLvl
-*
-*   @brief
-*       Sets pOut->last2DLevel to TRUE if it is
-*   @note
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlCheckLastMacroTiledLvl(
-    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut      ///< [in,out] Output structure (used as input, too)
-    ) const
-{
-    // pow2Pad covers all mipmap cases
-    if (pIn->flags.pow2Pad)
-    {
-        ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
-
-        UINT_32 nextPitch;
-        UINT_32 nextHeight;
-        UINT_32 nextSlices;
-
-        AddrTileMode nextTileMode;
-
-        if (pIn->mipLevel == 0 || pIn->basePitch == 0)
-        {
-            // Base level or fail-safe case (basePitch == 0)
-            nextPitch = pOut->pitch >> 1;
-        }
-        else
-        {
-            // Sub levels
-            nextPitch = pIn->basePitch >> (pIn->mipLevel + 1);
-        }
-
-        // nextHeight must be shifted from this level's original height rather than a pow2 padded
-        // one but this requires original height stored somewhere (pOut->height)
-        ADDR_ASSERT(pOut->height != 0);
-
-        // next level's height is just current level's >> 1 in pixels
-        nextHeight = pOut->height >> 1;
-        // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block
-        // compressed foramts
-        if (ElemLib::IsBlockCompressed(pIn->format))
-        {
-            nextHeight = (nextHeight + 3) / 4;
-        }
-        nextHeight = NextPow2(nextHeight);
-
-        // nextSlices may be 0 if this level's is 1
-        if (pIn->flags.volume)
-        {
-            nextSlices = Max(1u, pIn->numSlices >> 1);
-        }
-        else
-        {
-            nextSlices = pIn->numSlices;
-        }
-
-        nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode,
-                                                      pIn->bpp,
-                                                      nextPitch,
-                                                      nextHeight,
-                                                      nextSlices,
-                                                      pIn->numSamples,
-                                                      pOut->blockWidth,
-                                                      pOut->blockHeight,
-                                                      pOut->pTileInfo);
-
-        pOut->last2DLevel = IsMicroTiled(nextTileMode);
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlDegradeThickTileMode
-*
-*   @brief
-*       Degrades valid tile mode for thick modes if needed
-*
-*   @return
-*       Suitable tile mode
-****************************************************************************************************
-*/
-AddrTileMode SiLib::HwlDegradeThickTileMode(
-    AddrTileMode        baseTileMode,   ///< base tile mode
-    UINT_32             numSlices,      ///< current number of slices
-    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
-    ) const
-{
-    return EgBasedLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile);
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlTileInfoEqual
-*
-*   @brief
-*       Return TRUE if all field are equal
-*   @note
-*       Only takes care of current HWL's data
-****************************************************************************************************
-*/
-BOOL_32 SiLib::HwlTileInfoEqual(
-    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
-    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
-    ) const
-{
-    BOOL_32 equal = FALSE;
-
-    if (pLeft->pipeConfig == pRight->pipeConfig)
-    {
-        equal =  EgBasedLib::HwlTileInfoEqual(pLeft, pRight);
-    }
-
-    return equal;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::GetTileSettings
-*
-*   @brief
-*       Get tile setting infos by index.
-*   @return
-*       Tile setting info.
-****************************************************************************************************
-*/
-const TileConfig* SiLib::GetTileSetting(
-    UINT_32 index          ///< [in] Tile index
-    ) const
-{
-    ADDR_ASSERT(index < m_noOfEntries);
-    return &m_tileTable[index];
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlPostCheckTileIndex
-*
-*   @brief
-*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
-*       tile mode/type/info and change the index if needed
-*   @return
-*       Tile index.
-****************************************************************************************************
-*/
-INT_32 SiLib::HwlPostCheckTileIndex(
-    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
-    AddrTileMode         mode,      ///< [in] Tile mode
-    AddrTileType         type,      ///< [in] Tile type
-    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
-    ) const
-{
-    INT_32 index = curIndex;
-
-    if (mode == ADDR_TM_LINEAR_GENERAL)
-    {
-        index = TileIndexLinearGeneral;
-    }
-    else
-    {
-        BOOL_32 macroTiled = IsMacroTiled(mode);
-
-        // We need to find a new index if either of them is true
-        // 1. curIndex is invalid
-        // 2. tile mode is changed
-        // 3. tile info does not match for macro tiled
-        if ((index == TileIndexInvalid         ||
-            (mode != m_tileTable[index].mode)  ||
-            (macroTiled && (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) == FALSE))))
-        {
-            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
-            {
-                if (macroTiled)
-                {
-                    // macro tile modes need all to match
-                    if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) &&
-                        (mode == m_tileTable[index].mode)                 &&
-                        (type == m_tileTable[index].type))
-                    {
-                        break;
-                    }
-                }
-                else if (mode == ADDR_TM_LINEAR_ALIGNED)
-                {
-                    // linear mode only needs tile mode to match
-                    if (mode == m_tileTable[index].mode)
-                    {
-                        break;
-                    }
-                }
-                else
-                {
-                    // micro tile modes only need tile mode and tile type to match
-                    if (mode == m_tileTable[index].mode &&
-                        type == m_tileTable[index].type)
-                    {
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
-
-    if (index >= static_cast<INT_32>(m_noOfEntries))
-    {
-        index = TileIndexInvalid;
-    }
-
-    return index;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlSetupTileCfg
-*
-*   @brief
-*       Map tile index to tile setting.
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg(
-    UINT_32         bpp,            ///< Bits per pixel
-    INT_32          index,          ///< Tile index
-    INT_32          macroModeIndex, ///< Index in macro tile mode table(CI)
-    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
-    AddrTileMode*   pMode,          ///< [out] Tile mode
-    AddrTileType*   pType           ///< [out] Tile type
-    ) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    // Global flag to control usage of tileIndex
-    if (UseTileIndex(index))
-    {
-        if (index == TileIndexLinearGeneral)
-        {
-            if (pMode)
-            {
-                *pMode = ADDR_TM_LINEAR_GENERAL;
-            }
-
-            if (pType)
-            {
-                *pType = ADDR_DISPLAYABLE;
-            }
-
-            if (pInfo)
-            {
-                pInfo->banks = 2;
-                pInfo->bankWidth = 1;
-                pInfo->bankHeight = 1;
-                pInfo->macroAspectRatio = 1;
-                pInfo->tileSplitBytes = 64;
-                pInfo->pipeConfig = ADDR_PIPECFG_P2;
-            }
-        }
-        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
-        {
-            returnCode = ADDR_INVALIDPARAMS;
-        }
-        else
-        {
-            const TileConfig* pCfgTable = GetTileSetting(index);
-
-            if (pInfo)
-            {
-                *pInfo = pCfgTable->info;
-            }
-            else
-            {
-                if (IsMacroTiled(pCfgTable->mode))
-                {
-                    returnCode = ADDR_INVALIDPARAMS;
-                }
-            }
-
-            if (pMode)
-            {
-                *pMode = pCfgTable->mode;
-            }
-
-            if (pType)
-            {
-                *pType = pCfgTable->type;
-            }
-        }
-    }
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::ReadGbTileMode
-*
-*   @brief
-*       Convert GB_TILE_MODE HW value to TileConfig.
-*   @return
-*       NA.
-****************************************************************************************************
-*/
-VOID SiLib::ReadGbTileMode(
-    UINT_32     regValue,   ///< [in] GB_TILE_MODE register
-    TileConfig* pCfg        ///< [out] output structure
-    ) const
-{
-    GB_TILE_MODE gbTileMode;
-    gbTileMode.val = regValue;
-
-    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode);
-    pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height;
-    pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width;
-    pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1);
-    pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
-    pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
-    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
-
-    UINT_32 regArrayMode = gbTileMode.f.array_mode;
-
-    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
-
-    if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK
-    {
-        pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
-    }
-    else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK
-    {
-        pCfg->mode = static_cast<AddrTileMode>(pCfg->mode + 3);
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::InitTileSettingTable
-*
-*   @brief
-*       Initialize the ADDR_TILE_CONFIG table.
-*   @return
-*       TRUE if tile table is correctly initialized
-****************************************************************************************************
-*/
-BOOL_32 SiLib::InitTileSettingTable(
-    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
-    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
-    )
-{
-    BOOL_32 initOk = TRUE;
-
-    ADDR_ASSERT(noOfEntries <= TileTableSize);
-
-    memset(m_tileTable, 0, sizeof(m_tileTable));
-
-    if (noOfEntries != 0)
-    {
-        m_noOfEntries = noOfEntries;
-    }
-    else
-    {
-        m_noOfEntries = TileTableSize;
-    }
-
-    if (pCfg) // From Client
-    {
-        for (UINT_32 i = 0; i < m_noOfEntries; i++)
-        {
-            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
-        }
-    }
-    else
-    {
-        ADDR_ASSERT_ALWAYS();
-        initOk = FALSE;
-    }
-
-    if (initOk)
-    {
-        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
-    }
-
-    return initOk;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlGetTileIndex
-*
-*   @brief
-*       Return the virtual/real index for given mode/type/info
-*   @return
-*       ADDR_OK if successful.
-****************************************************************************************************
-*/
-ADDR_E_RETURNCODE SiLib::HwlGetTileIndex(
-    const ADDR_GET_TILEINDEX_INPUT* pIn,
-    ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
-{
-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
-
-    pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType);
-
-    return returnCode;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlFmaskPreThunkSurfInfo
-*
-*   @brief
-*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-VOID SiLib::HwlFmaskPreThunkSurfInfo(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
-    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
-    ) const
-{
-    pSurfIn->tileIndex = pFmaskIn->tileIndex;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlFmaskPostThunkSurfInfo
-*
-*   @brief
-*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
-*   @return
-*       ADDR_E_RETURNCODE
-****************************************************************************************************
-*/
-VOID SiLib::HwlFmaskPostThunkSurfInfo(
-    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
-    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
-    ) const
-{
-    pFmaskOut->macroModeIndex = TileIndexInvalid;
-    pFmaskOut->tileIndex = pSurfOut->tileIndex;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeFmaskBits
-*   @brief
-*       Computes fmask bits
-*   @return
-*       Fmask bits
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlComputeFmaskBits(
-    const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-    UINT_32* pNumSamples
-    ) const
-{
-    UINT_32 numSamples = pIn->numSamples;
-    UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags);
-    UINT_32 bpp;
-
-    if (numFrags != numSamples) // EQAA
-    {
-        ADDR_ASSERT(numFrags <= 8);
-
-        if (pIn->resolved == FALSE)
-        {
-            if (numFrags == 1)
-            {
-                bpp          = 1;
-                numSamples   = numSamples == 16 ? 16 : 8;
-            }
-            else if (numFrags == 2)
-            {
-                ADDR_ASSERT(numSamples >= 4);
-
-                bpp          = 2;
-                numSamples   = numSamples;
-            }
-            else if (numFrags == 4)
-            {
-                ADDR_ASSERT(numSamples >= 4);
-
-                bpp          = 4;
-                numSamples   = numSamples;
-            }
-            else // numFrags == 8
-            {
-                ADDR_ASSERT(numSamples == 16);
-
-                bpp          = 4;
-                numSamples   = numSamples;
-            }
-        }
-        else
-        {
-            if (numFrags == 1)
-            {
-                bpp          = (numSamples == 16) ? 16 : 8;
-                numSamples   = 1;
-            }
-            else if (numFrags == 2)
-            {
-                ADDR_ASSERT(numSamples >= 4);
-
-                bpp          = numSamples*2;
-                numSamples   = 1;
-            }
-            else if (numFrags == 4)
-            {
-                ADDR_ASSERT(numSamples >= 4);
-
-                bpp          = numSamples*4;
-                numSamples   = 1;
-            }
-            else // numFrags == 8
-            {
-                ADDR_ASSERT(numSamples >= 16);
-
-                bpp          = 16*4;
-                numSamples   = 1;
-            }
-        }
-    }
-    else // Normal AA
-    {
-        if (pIn->resolved == FALSE)
-        {
-            bpp          = ComputeFmaskNumPlanesFromNumSamples(numSamples);
-            numSamples   = numSamples == 2 ? 8 : numSamples;
-        }
-        else
-        {
-            // The same as 8XX
-            bpp          = ComputeFmaskResolvedBppFromNumSamples(numSamples);
-            numSamples   = 1; // 1x sample
-        }
-    }
-
-    SafeAssign(pNumSamples, numSamples);
-
-    return bpp;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlOptimizeTileMode
-*
-*   @brief
-*       Optimize tile mode on SI
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlOptimizeTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-
-    if ((pInOut->flags.needEquation == TRUE) &&
-        (IsMacroTiled(tileMode) == TRUE) &&
-        (pInOut->numSamples <= 1))
-    {
-        UINT_32 thickness = Thickness(tileMode);
-
-        if (thickness > 1)
-        {
-            tileMode = ADDR_TM_1D_TILED_THICK;
-        }
-        else if (pInOut->numSlices > 1)
-        {
-            tileMode = ADDR_TM_1D_TILED_THIN1;
-        }
-        else
-        {
-            tileMode = ADDR_TM_2D_TILED_THIN1;
-        }
-    }
-
-    if (tileMode != pInOut->tileMode)
-    {
-        pInOut->tileMode = tileMode;
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlOverrideTileMode
-*
-*   @brief
-*       Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI.
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlOverrideTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut          ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode = pInOut->tileMode;
-
-    switch (tileMode)
-    {
-        case ADDR_TM_PRT_TILED_THIN1:
-            tileMode = ADDR_TM_2D_TILED_THIN1;
-            break;
-
-        case ADDR_TM_PRT_TILED_THICK:
-            tileMode = ADDR_TM_2D_TILED_THICK;
-            break;
-
-        case ADDR_TM_PRT_2D_TILED_THICK:
-            tileMode = ADDR_TM_2D_TILED_THICK;
-            break;
-
-        case ADDR_TM_PRT_3D_TILED_THICK:
-            tileMode = ADDR_TM_3D_TILED_THICK;
-            break;
-
-        default:
-            break;
-    }
-
-    if (tileMode != pInOut->tileMode)
-    {
-        pInOut->tileMode  = tileMode;
-        // Only PRT tile modes are overridden for now. Revisit this once new modes are added above.
-        pInOut->flags.prt = TRUE;
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlSetPrtTileMode
-*
-*   @brief
-*       Set prt tile modes.
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlSetPrtTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
-    ) const
-{
-    pInOut->tileMode = ADDR_TM_2D_TILED_THIN1;
-    pInOut->tileType = (pInOut->tileType == ADDR_DEPTH_SAMPLE_ORDER) ?
-                       ADDR_DEPTH_SAMPLE_ORDER : ADDR_NON_DISPLAYABLE;
-    pInOut->flags.prt = TRUE;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlSelectTileMode
-*
-*   @brief
-*       Select tile modes.
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlSelectTileMode(
-    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
-    ) const
-{
-    AddrTileMode tileMode;
-    AddrTileType tileType;
-
-    if (pInOut->flags.volume)
-    {
-        if (pInOut->numSlices >= 8)
-        {
-            tileMode = ADDR_TM_2D_TILED_XTHICK;
-        }
-        else if (pInOut->numSlices >= 4)
-        {
-            tileMode = ADDR_TM_2D_TILED_THICK;
-        }
-        else
-        {
-            tileMode = ADDR_TM_2D_TILED_THIN1;
-        }
-        tileType = ADDR_NON_DISPLAYABLE;
-    }
-    else
-    {
-        tileMode = ADDR_TM_2D_TILED_THIN1;
-
-        if (pInOut->flags.depth || pInOut->flags.stencil)
-        {
-            tileType = ADDR_DEPTH_SAMPLE_ORDER;
-        }
-        else if ((pInOut->bpp <= 32) ||
-                 (pInOut->flags.display == TRUE) ||
-                 (pInOut->flags.overlay == TRUE))
-        {
-            tileType = ADDR_DISPLAYABLE;
-        }
-        else
-        {
-            tileType = ADDR_NON_DISPLAYABLE;
-        }
-    }
-
-    if (pInOut->flags.prt)
-    {
-        tileMode = ADDR_TM_2D_TILED_THIN1;
-        tileType = (tileType == ADDR_DISPLAYABLE) ? ADDR_NON_DISPLAYABLE : tileType;
-    }
-
-    pInOut->tileMode = tileMode;
-    pInOut->tileType = tileType;
-
-    // Optimize tile mode if possible
-    pInOut->flags.opt4Space = TRUE;
-
-    // Optimize tile mode if possible
-    OptimizeTileMode(pInOut);
-
-    HwlOverrideTileMode(pInOut);
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeMaxBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments
-*   @return
-*       maximum alignments
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlComputeMaxBaseAlignments() const
-{
-    const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
-
-    // Initial size is 64 KiB for PRT.
-    UINT_32 maxBaseAlign = 64 * 1024;
-
-    for (UINT_32 i = 0; i < m_noOfEntries; i++)
-    {
-        if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) &&
-            (IsPrtTileMode(m_tileTable[i].mode) == FALSE))
-        {
-            // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
-            UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes,
-                                   MicroTilePixels * 8 * 16);
-
-            UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks *
-                                m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight;
-
-            if (baseAlign > maxBaseAlign)
-            {
-                maxBaseAlign = baseAlign;
-            }
-        }
-    }
-
-    return maxBaseAlign;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeMaxMetaBaseAlignments
-*
-*   @brief
-*       Gets maximum alignments for metadata
-*   @return
-*       maximum alignments for metadata
-****************************************************************************************************
-*/
-UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const
-{
-    UINT_32 maxPipe = 1;
-
-    for (UINT_32 i = 0; i < m_noOfEntries; i++)
-    {
-        maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info));
-    }
-
-    return m_pipeInterleaveBytes * maxPipe;
-}
-
-/**
-****************************************************************************************************
-*   SiLib::HwlComputeSurfaceAlignmentsMacroTiled
-*
-*   @brief
-*       Hardware layer function to compute alignment request for macro tile mode
-*
-*   @return
-*       N/A
-*
-****************************************************************************************************
-*/
-VOID SiLib::HwlComputeSurfaceAlignmentsMacroTiled(
-    AddrTileMode                      tileMode,           ///< [in] tile mode
-    UINT_32                           bpp,                ///< [in] bits per pixel
-    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
-    UINT_32                           mipLevel,           ///< [in] mip level
-    UINT_32                           numSamples,         ///< [in] number of samples
-    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
-    ) const
-{
-    if ((mipLevel == 0) && (flags.prt))
-    {
-        UINT_32 macroTileSize = pOut->blockWidth * pOut->blockHeight * numSamples * bpp / 8;
-
-        if (macroTileSize < PrtTileSize)
-        {
-            UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
-
-            ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
-
-            pOut->pitchAlign *= numMacroTiles;
-            pOut->baseAlign  *= numMacroTiles;
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::InitEquationTable
-*
-*   @brief
-*       Initialize Equation table.
-*
-*   @return
-*       N/A
-****************************************************************************************************
-*/
-VOID SiLib::InitEquationTable()
-{
-    ADDR_EQUATION_KEY equationKeyTable[EquationTableSize];
-    memset(equationKeyTable, 0, sizeof(equationKeyTable));
-
-    memset(m_equationTable, 0, sizeof(m_equationTable));
-
-    memset(m_blockWidth, 0, sizeof(m_blockWidth));
-
-    memset(m_blockHeight, 0, sizeof(m_blockHeight));
-
-    memset(m_blockSlices, 0, sizeof(m_blockSlices));
-
-    // Loop all possible bpp
-    for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++)
-    {
-        // Get bits per pixel
-        UINT_32 bpp = 1 << (log2ElementBytes + 3);
-
-        // Loop all possible tile index
-        for (INT_32 tileIndex = 0; tileIndex < static_cast<INT_32>(m_noOfEntries); tileIndex++)
-        {
-            UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
-
-            TileConfig tileConfig = m_tileTable[tileIndex];
-
-            ADDR_SURFACE_FLAGS flags = {{0}};
-
-            // Compute tile info, hardcode numSamples to 1 because MSAA is not supported
-            // in swizzle pattern equation
-            HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL);
-
-            // Check if the input is supported
-            if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE)
-            {
-                ADDR_EQUATION_KEY  key   = {{0}};
-
-                // Generate swizzle equation key from bpp and tile config
-                key.fields.log2ElementBytes = log2ElementBytes;
-                key.fields.tileMode         = tileConfig.mode;
-                // Treat depth micro tile type and non-display micro tile type as the same key
-                // because they have the same equation actually
-                key.fields.microTileType    = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ?
-                                              ADDR_NON_DISPLAYABLE : tileConfig.type;
-                key.fields.pipeConfig       = tileConfig.info.pipeConfig;
-                key.fields.numBanksLog2     = Log2(tileConfig.info.banks);
-                key.fields.bankWidth        = tileConfig.info.bankWidth;
-                key.fields.bankHeight       = tileConfig.info.bankHeight;
-                key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio;
-                key.fields.prt              = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
-                                               ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0;
-
-                // Find in the table if the equation has been built based on the key
-                for (UINT_32 i = 0; i < m_numEquations; i++)
-                {
-                    if (key.value == equationKeyTable[i].value)
-                    {
-                        equationIndex = i;
-                        break;
-                    }
-                }
-
-                // If found, just fill the index into the lookup table and no need
-                // to generate the equation again. Otherwise, generate the equation.
-                if (equationIndex == ADDR_INVALID_EQUATION_INDEX)
-                {
-                    ADDR_EQUATION equation;
-                    ADDR_E_RETURNCODE retCode;
-
-                    memset(&equation, 0, sizeof(ADDR_EQUATION));
-
-                    // Generate the equation
-                    if (IsMicroTiled(tileConfig.mode))
-                    {
-                        retCode = ComputeMicroTileEquation(log2ElementBytes,
-                                                           tileConfig.mode,
-                                                           tileConfig.type,
-                                                           &equation);
-                    }
-                    else
-                    {
-                        retCode = ComputeMacroTileEquation(log2ElementBytes,
-                                                           tileConfig.mode,
-                                                           tileConfig.type,
-                                                           &tileConfig.info,
-                                                           &equation);
-                    }
-                    // Only fill the equation into the table if the return code is ADDR_OK,
-                    // otherwise if the return code is not ADDR_OK, it indicates this is not
-                    // a valid input, we do nothing but just fill invalid equation index
-                    // into the lookup table.
-                    if (retCode == ADDR_OK)
-                    {
-                        equationIndex = m_numEquations;
-                        ADDR_ASSERT(equationIndex < EquationTableSize);
-
-                        m_blockSlices[equationIndex] = Thickness(tileConfig.mode);
-
-                        if (IsMicroTiled(tileConfig.mode))
-                        {
-                            m_blockWidth[equationIndex]  = MicroTileWidth;
-                            m_blockHeight[equationIndex] = MicroTileHeight;
-                        }
-                        else
-                        {
-                            const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
-
-                            m_blockWidth[equationIndex]  =
-                                HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
-                                pTileInfo->macroAspectRatio;
-                            m_blockHeight[equationIndex] =
-                                MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
-                                pTileInfo->macroAspectRatio;
-
-                            if (key.fields.prt)
-                            {
-                                UINT_32 macroTileSize =
-                                    m_blockWidth[equationIndex] * m_blockHeight[equationIndex] *
-                                    bpp / 8;
-
-                                if (macroTileSize < PrtTileSize)
-                                {
-                                    UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
-
-                                    ADDR_ASSERT(macroTileSize == (1u << equation.numBits));
-                                    ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
-
-                                    UINT_32 numBits = Log2(numMacroTiles);
-
-                                    UINT_32 xStart = Log2(m_blockWidth[equationIndex]) +
-                                                     log2ElementBytes;
-
-                                    m_blockWidth[equationIndex] *= numMacroTiles;
-
-                                    for (UINT_32 i = 0; i < numBits; i++)
-                                    {
-                                        equation.addr[equation.numBits + i].valid = 1;
-                                        equation.addr[equation.numBits + i].index = xStart + i;
-                                    }
-
-                                    equation.numBits += numBits;
-                                }
-                            }
-                        }
-
-                        equationKeyTable[equationIndex] = key;
-                        m_equationTable[equationIndex]  = equation;
-
-                        m_numEquations++;
-                    }
-                }
-            }
-
-            // Fill the index into the lookup table, if the combination is not supported
-            // fill the invalid equation index
-            m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex;
-        }
-
-        if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
-        {
-            // For tile index 3 which is shared between PRT depth and uncompressed depth
-            m_uncompressDepthEqIndex = m_numEquations;
-
-            for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++)
-            {
-                TileConfig        tileConfig = m_tileTable[3];
-                ADDR_EQUATION     equation;
-                ADDR_E_RETURNCODE retCode;
-
-                memset(&equation, 0, sizeof(ADDR_EQUATION));
-
-                retCode = ComputeMacroTileEquation(log2ElemBytes,
-                                                   tileConfig.mode,
-                                                   tileConfig.type,
-                                                   &tileConfig.info,
-                                                   &equation);
-
-                if (retCode == ADDR_OK)
-                {
-                    UINT_32 equationIndex = m_numEquations;
-                    ADDR_ASSERT(equationIndex < EquationTableSize);
-
-                    m_blockSlices[equationIndex] = 1;
-
-                    const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
-
-                    m_blockWidth[equationIndex]  =
-                        HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
-                        pTileInfo->macroAspectRatio;
-                    m_blockHeight[equationIndex] =
-                        MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
-                        pTileInfo->macroAspectRatio;
-
-                    m_equationTable[equationIndex] = equation;
-
-                    m_numEquations++;
-                }
-            }
-        }
-    }
-}
-
-/**
-****************************************************************************************************
-*   SiLib::IsEquationSupported
-*
-*   @brief
-*       Check if it is supported for given bpp and tile config to generate a equation.
-*
-*   @return
-*       TRUE if supported
-****************************************************************************************************
-*/
-BOOL_32 SiLib::IsEquationSupported(
-    UINT_32    bpp,             ///< Bits per pixel
-    TileConfig tileConfig,      ///< Tile config
-    INT_32     tileIndex,       ///< Tile index
-    UINT_32    elementBytesLog2 ///< Log2 of element bytes
-    ) const
-{
-    BOOL_32 supported = TRUE;
-
-    // Linear tile mode is not supported in swizzle pattern equation
-    if (IsLinear(tileConfig.mode))
-    {
-        supported = FALSE;
-    }
-    // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use,
-    // which is not supported in swizzle pattern equation due to slice rotation
-    else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK)  ||
-             (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) ||
-             (tileConfig.mode == ADDR_TM_3D_TILED_THIN1)  ||
-             (tileConfig.mode == ADDR_TM_3D_TILED_THICK)  ||
-             (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK))
-    {
-        supported = FALSE;
-    }
-    // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth
-    else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32))
-    {
-        supported = FALSE;
-    }
-    // Tile split is not supported in swizzle pattern equation
-    else if (IsMacroTiled(tileConfig.mode))
-    {
-        UINT_32 thickness = Thickness(tileConfig.mode);
-        if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes)
-        {
-            supported = FALSE;
-        }
-
-        if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
-        {
-            supported = m_EquationSupport[tileIndex][elementBytesLog2];
-        }
-    }
-
-    return supported;
-}
-
-} // V1
-} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.h	2018-04-19 04:33:31.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,348 +0,0 @@
-/*
- * Copyright © 2014 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
- * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- */
-
-/**
-****************************************************************************************************
-* @file  siaddrlib.h
-* @brief Contains the R800Lib class definition.
-****************************************************************************************************
-*/
-
-#ifndef __SI_ADDR_LIB_H__
-#define __SI_ADDR_LIB_H__
-
-#include "addrlib1.h"
-#include "egbaddrlib.h"
-
-namespace Addr
-{
-namespace V1
-{
-
-/**
-****************************************************************************************************
-* @brief Describes the information in tile mode table
-****************************************************************************************************
-*/
-struct TileConfig
-{
-    AddrTileMode  mode;
-    AddrTileType  type;
-    ADDR_TILEINFO info;
-};
-
-/**
-****************************************************************************************************
-* @brief SI specific settings structure.
-****************************************************************************************************
-*/
-struct SiChipSettings
-{
-    UINT_32 isSouthernIsland  : 1;
-    UINT_32 isTahiti          : 1;
-    UINT_32 isPitCairn        : 1;
-    UINT_32 isCapeVerde       : 1;
-    // Oland/Hainan are of GFXIP 6.0, similar with SI
-    UINT_32 isOland           : 1;
-    UINT_32 isHainan          : 1;
-
-    // CI
-    UINT_32 isSeaIsland       : 1;
-    UINT_32 isBonaire         : 1;
-    UINT_32 isKaveri          : 1;
-    UINT_32 isSpectre         : 1;
-    UINT_32 isSpooky          : 1;
-    UINT_32 isKalindi         : 1;
-    // Hawaii is GFXIP 7.2
-    UINT_32 isHawaii          : 1;
-
-    // VI
-    UINT_32 isVolcanicIslands : 1;
-    UINT_32 isIceland         : 1;
-    UINT_32 isTonga           : 1;
-    UINT_32 isFiji            : 1;
-    UINT_32 isPolaris10       : 1;
-    UINT_32 isPolaris11       : 1;
-    UINT_32 isPolaris12       : 1;
-    UINT_32 isVegaM           : 1;
-    // VI fusion
-    UINT_32 isCarrizo         : 1;
-};
-
-/**
-****************************************************************************************************
-* @brief This class is the SI specific address library
-*        function set.
-****************************************************************************************************
-*/
-class SiLib : public EgBasedLib
-{
-public:
-    /// Creates SiLib object
-    static Addr::Lib* CreateObj(const Client* pClient)
-    {
-        VOID* pMem = Object::ClientAlloc(sizeof(SiLib), pClient);
-        return (pMem != NULL) ? new (pMem) SiLib(pClient) : NULL;
-    }
-
-protected:
-    SiLib(const Client* pClient);
-    virtual ~SiLib();
-
-    // Hwl interface - defined in AddrLib1
-    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
-        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
-        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
-
-    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
-        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
-        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
-        ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
-
-    virtual VOID HwlComputeXmaskCoordFromAddr(
-        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
-        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
-        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
-
-    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
-        const ADDR_GET_TILEINDEX_INPUT* pIn,
-        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const;
-
-    virtual BOOL_32 HwlComputeMipLevel(
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
-
-    virtual ChipFamily HwlConvertChipFamily(
-        UINT_32 uChipFamily, UINT_32 uChipRevision);
-
-    virtual BOOL_32 HwlInitGlobalParams(
-        const ADDR_CREATE_INPUT* pCreateIn);
-
-    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
-        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
-        ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
-
-    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
-        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
-        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
-
-    virtual UINT_64 HwlComputeHtileBytes(
-        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
-        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
-
-    virtual ADDR_E_RETURNCODE ComputeBankEquation(
-        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
-        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
-
-    virtual ADDR_E_RETURNCODE ComputePipeEquation(
-        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
-        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
-
-    virtual UINT_32 ComputePipeFromCoord(
-        UINT_32 x, UINT_32 y, UINT_32 slice,
-        AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
-
-    /// Pre-handler of 3x pitch (96 bit) adjustment
-    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
-    /// Post-handler of 3x pitch adjustment
-    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
-
-    /// Dummy function to finalize the inheritance
-    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
-        UINT_32 pipe, UINT_32 x) const;
-
-    // Sub-hwl interface - defined in EgBasedLib
-    virtual VOID HwlSetupTileInfo(
-        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
-        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
-        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
-        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
-
-    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
-        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
-        UINT_32 baseAlign, UINT_32 pitchAlign,
-        UINT_32 *pPitch, UINT_32 *pHeight) const;
-
-    virtual VOID HwlCheckLastMacroTiledLvl(
-        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    virtual BOOL_32 HwlTileInfoEqual(
-        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
-
-    virtual AddrTileMode HwlDegradeThickTileMode(
-        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
-
-    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    /// Overwrite tile setting to PRT
-    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
-
-    virtual BOOL_32 HwlSanityCheckMacroTiled(
-        ADDR_TILEINFO* pTileInfo) const
-    {
-        return TRUE;
-    }
-
-    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
-
-    virtual UINT_64 HwlGetSizeAdjustmentLinear(
-        AddrTileMode tileMode,
-        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
-        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
-
-    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
-        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
-        UINT_32 bank, UINT_32 pipe,
-        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
-        BOOL_32 ignoreSE,
-        ADDR_TILEINFO* pTileInfo) const;
-
-    virtual UINT_32 HwlPreAdjustBank(
-        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
-
-    virtual INT_32 HwlPostCheckTileIndex(
-        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
-        INT curIndex = TileIndexInvalid) const;
-
-    virtual VOID HwlFmaskPreThunkSurfInfo(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
-        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
-        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
-        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
-
-    virtual VOID HwlFmaskPostThunkSurfInfo(
-        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
-        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
-
-    virtual UINT_32 HwlComputeFmaskBits(
-        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
-        UINT_32* pNumSamples) const;
-
-    virtual BOOL_32 HwlReduceBankWidthHeight(
-        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
-        UINT_32 bankHeightAlign, UINT_32 pipes,
-        ADDR_TILEINFO* pTileInfo) const
-    {
-        return TRUE;
-    }
-
-    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
-
-    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
-
-    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
-        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
-        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
-
-    // Get equation table pointer and number of equations
-    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
-    {
-        *ppEquationTable = m_equationTable;
-
-        return m_numEquations;
-    }
-
-    // Check if it is supported for given bpp and tile config to generate an equation
-    BOOL_32 IsEquationSupported(
-        UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
-
-    // Protected non-virtual functions
-    VOID ComputeTileCoordFromPipeAndElemIdx(
-        UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
-        UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
-
-    UINT_32 TileCoordToMaskElementIndex(
-        UINT_32 tx, UINT_32 ty, AddrPipeCfg  pipeConfig,
-        UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
-
-    BOOL_32 DecodeGbRegs(
-        const ADDR_REGISTER_VALUE* pRegValue);
-
-    const TileConfig* GetTileSetting(
-        UINT_32 index) const;
-
-    // Initialize equation table
-    VOID InitEquationTable();
-
-    UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
-
-    static const UINT_32    TileTableSize = 32;
-    TileConfig              m_tileTable[TileTableSize];
-    UINT_32                 m_noOfEntries;
-
-    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
-    static const UINT_32    MaxNumElementBytes  = 5;
-
-    static const BOOL_32    m_EquationSupport[TileTableSize][MaxNumElementBytes];
-
-    // Prt tile mode index mask
-    static const UINT_32    SiPrtTileIndexMask = ((1 << 3)  | (1 << 5)  | (1 << 6)  | (1 << 7)  |
-                                                  (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
-                                                  (1 << 25) | (1 << 30));
-
-    // More than half slots in tile mode table can't support equation
-    static const UINT_32    EquationTableSize   = (MaxNumElementBytes * TileTableSize) / 2;
-    // Equation table
-    ADDR_EQUATION           m_equationTable[EquationTableSize];
-    UINT_32                 m_numMacroBits[EquationTableSize];
-    UINT_32                 m_blockWidth[EquationTableSize];
-    UINT_32                 m_blockHeight[EquationTableSize];
-    UINT_32                 m_blockSlices[EquationTableSize];
-    // Number of equation entries in the table
-    UINT_32                 m_numEquations;
-    // Equation lookup table according to bpp and tile index
-    UINT_32                 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
-
-    UINT_32                 m_uncompressDepthEqIndex;
-
-    SiChipSettings          m_settings;
-
-private:
-
-    VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;
-    BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries);
-};
-
-} // V1
-} // Addr
-
-#endif
-
diff -Nru mesa-18.3.3/src/amd/addrlib/src/addrinterface.cpp mesa-19.0.1/src/amd/addrlib/src/addrinterface.cpp
--- mesa-18.3.3/src/amd/addrlib/src/addrinterface.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/addrinterface.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1740 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrinterface.cpp
+* @brief Contains the addrlib interface functions
+****************************************************************************************************
+*/
+#include "addrinterface.h"
+#include "addrlib1.h"
+#include "addrlib2.h"
+
+#include "addrcommon.h"
+
+#include "util/macros.h"
+
+using namespace Addr;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Create/Destroy/Config functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrCreate
+*
+*   @brief
+*       Create address lib object
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+    const ADDR_CREATE_INPUT*    pAddrCreateIn,  ///< [in] infomation for creating address lib object
+    ADDR_CREATE_OUTPUT*         pAddrCreateOut) ///< [out] address lib handle
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    {
+        returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut);
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrDestroy
+*
+*   @brief
+*       Destroy address lib object
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+    ADDR_HANDLE hLib) ///< address lib handle
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (hLib)
+    {
+        Lib* pLib = Lib::GetLib(hLib);
+        pLib->Destroy();
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceInfo
+*
+*   @brief
+*       Calculate surface width/height/depth/alignments and suitable tiling mode
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+    ADDR_HANDLE                             hLib, ///< address lib handle
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,  ///< [in] surface information
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) ///< [out] surface parameters and alignments
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address according to coordinates
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,  ///< [in] surface info and coordinates
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] surface address
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinates according to surface address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,  ///< [in] surface info and address
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) ///< [out] coordinates
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+    ADDR_HANDLE                             hLib, ///< address lib handle
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Htile info and coordinates
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Htile address
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,  ///< [in] Htile info and address
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Htile coordinates
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+    ADDR_HANDLE                             hLib, ///< address lib handle
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Cmask info and coordinates
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Cmask address
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Cmask info and address
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Cmask coordinates
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/depth/alignments and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+    ADDR_HANDLE                             hLib, ///< address lib handle
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Fmask info and coordinates
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Fmask address
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Fmask info and address
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Fmask coordinates
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+    ADDR_HANDLE                             hLib,   ///< handle of addrlib
+    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,    ///< [in] input
+    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut)   ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeDccInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Below functions are element related or helper functions
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrGetVersion
+*
+*   @brief
+*       Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
+*       defined in addrinterface.h to see if there is a mismatch.
+****************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
+{
+    UINT_32 version = 0;
+
+    Addr::Lib* pLib = Lib::GetLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        version = pLib->GetVersion();
+    }
+
+    return version;
+}
+
+/**
+****************************************************************************************************
+*   AddrUseTileIndex
+*
+*   @brief
+*       Return TRUE if tileIndex is enabled in this address library
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
+{
+    BOOL_32 useTileIndex = FALSE;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        useTileIndex = pLib->UseTileIndex(0);
+    }
+
+    return useTileIndex;
+}
+
+/**
+****************************************************************************************************
+*   AddrUseCombinedSwizzle
+*
+*   @brief
+*       Return TRUE if combined swizzle is enabled in this address library
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
+{
+    BOOL_32 useCombinedSwizzle = FALSE;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        useCombinedSwizzle = pLib->UseCombinedSwizzle();
+    }
+
+    return useCombinedSwizzle;
+}
+
+/**
+****************************************************************************************************
+*   AddrExtractBankPipeSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,     ///< addrlib handle
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,      ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut)     ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrCombineBankPipeSwizzle
+*
+*   @brief
+*       Combine Bank and Pipe swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeSliceSwizzle
+*
+*   @brief
+*       Compute a swizzle for slice from a base swizzle
+*   @return
+*       ADDR_OK if no error
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*      pIn,
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*           pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputeBaseSwizzle
+*
+*   @brief
+*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+*   @return
+*       ADDR_OK if no error
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   ElemFlt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+    ADDR_HANDLE                         hLib,    ///< addrlib handle
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,     ///< [in] per-component value
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    Lib* pLib = Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        pLib->Flt32ToDepthPixel(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   ElemFlt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+    ADDR_HANDLE                         hLib,    ///< addrlib handle
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,     ///< [in] format, surface number and swap value
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    Lib* pLib = Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        pLib->Flt32ToColorPixel(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   ElemGetExportNorm
+*
+*   @brief
+*       Helper function to check one format can be EXPORT_NUM,
+*       which is a register CB_COLOR_INFO.SURFACE_FORMAT.
+*       FP16 can be reported as EXPORT_NORM for rv770 in r600
+*       family
+*
+****************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+    ADDR_HANDLE                     hLib, ///< addrlib handle
+    const ELEM_GETEXPORTNORM_INPUT* pIn)  ///< [in] input structure
+{
+    Addr::Lib* pLib = Lib::GetLib(hLib);
+    BOOL_32 enabled = FALSE;
+
+    MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        enabled = pLib->GetExportNorm(pIn);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    ADDR_ASSERT(returnCode == ADDR_OK);
+
+    return enabled;
+}
+
+/**
+****************************************************************************************************
+*   ElemSize
+*
+*   @brief
+*       Get bits-per-element for specified format
+*
+*   @return
+*       Bits-per-element of specified format
+*
+****************************************************************************************************
+*/
+UINT_32 ADDR_API ElemSize(
+    ADDR_HANDLE hLib,
+    AddrFormat  format)
+{
+    UINT_32 bpe = 0;
+
+    Addr::Lib* pLib = Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        bpe = pLib->GetBpe(format);
+    }
+
+    return bpe;
+}
+
+/**
+****************************************************************************************************
+*   AddrConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to hardware register value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+    ADDR_HANDLE                             hLib, ///< address lib handle
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,  ///< [in] tile info with real value
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut) ///< [out] tile info with HW register value
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+    ADDR_HANDLE                          hLib, ///< address lib handle
+    const ADDR_CONVERT_TILEINDEX_INPUT*  pIn,  ///< [in] input - tile index
+    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileIndex(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrGetMacroModeIndex
+*
+*   @brief
+*       Get macro mode index based on input parameters
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex(
+    ADDR_HANDLE                          hLib, ///< address lib handle
+    const ADDR_GET_MACROMODEINDEX_INPUT* pIn,  ///< [in] input
+    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut) ///< [out] macro mode index
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->GetMacroModeIndex(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+    ADDR_HANDLE                          hLib, ///< address lib handle
+    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,  ///< [in] input - tile index
+    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileIndex1(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrGetTileIndex
+*
+*   @brief
+*       Get tile index from tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+*   @note
+*       Only meaningful for SI (and above)
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+    ADDR_HANDLE                     hLib,
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut)
+{
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->GetTileIndex(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrComputePrtInfo
+*
+*   @brief
+*       Interface function for ComputePrtInfo
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+    ADDR_HANDLE                 hLib,
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    V1::Lib* pLib = V1::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputePrtInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrGetMaxAlignments
+*
+*   @brief
+*       Convert maximum alignments
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments(
+    ADDR_HANDLE                     hLib, ///< address lib handle
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
+{
+    Addr::Lib* pLib = Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->GetMaxAlignments(pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   AddrGetMaxMetaAlignments
+*
+*   @brief
+*       Convert maximum alignments for metadata
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments(
+    ADDR_HANDLE                     hLib, ///< address lib handle
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure
+{
+    Addr::Lib* pLib = Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->GetMaxMetaAlignments(pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions for Addr2
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceInfo
+*
+*   @brief
+*       Calculate surface width/height/depth/alignments and suitable tiling mode
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo(
+    ADDR_HANDLE                                hLib, ///< address lib handle
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*    pIn,  ///< [in] surface information
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*         pOut) ///< [out] surface parameters and alignments
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address according to coordinates
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                         hLib, ///< address lib handle
+    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] surface info and coordinates
+    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] surface address
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinates according to surface address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                         hLib, ///< address lib handle
+    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT*    pIn,  ///< [in] surface info and address
+    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] coordinates
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions for Addr2
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo(
+    ADDR_HANDLE                              hLib, ///< address lib handle
+    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
+    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Htile info and coordinates
+    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Htile address
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*    pIn,  ///< [in] Htile info and address
+    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Htile coordinates
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions for Addr2
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo(
+    ADDR_HANDLE                              hLib, ///< address lib handle
+    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
+    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Cmask info and coordinates
+    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Cmask address
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Cmask info and address
+    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Cmask coordinates
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions for Addr2
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/depth/alignments and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo(
+    ADDR_HANDLE                              hLib, ///< address lib handle
+    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
+    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Fmask info and coordinates
+    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Fmask address
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                       hLib, ///< address lib handle
+    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*    pIn,  ///< [in] Fmask info and address
+    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*         pOut) ///< [out] Fmask coordinates
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions for Addr2
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Addr2ComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo(
+    ADDR_HANDLE                           hLib,   ///< handle of addrlib
+    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input
+    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut)   ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeDccInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeDccAddrFromCoord
+*
+*   @brief
+*       Compute DCC key address according to coordinates
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< address lib handle
+    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*    pIn,  ///< [in] Dcc info and coordinates
+    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*         pOut) ///< [out] Dcc address
+{
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeDccAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputePipeBankXor
+*
+*   @brief
+*       Calculate a valid bank pipe xor value for client to use.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor(
+    ADDR_HANDLE                            hLib, ///< handle of addrlib
+    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
+    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputePipeBankXor(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSlicePipeBankXor
+*
+*   @brief
+*       Calculate slice pipe bank xor value based on base pipe bank xor and slice id.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor(
+    ADDR_HANDLE                                  hLib, ///< handle of addrlib
+    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,  ///< [in] input
+    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2ComputeSubResourceOffsetForSwizzlePattern
+*
+*   @brief
+*       Calculate sub resource offset for swizzle pattern.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern(
+    ADDR_HANDLE                                                     hLib, ///< handle of addrlib
+    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,  ///< [in] input
+    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2GetPreferredSurfaceSetting
+*
+*   @brief
+*       Suggest a preferred setting for client driver to program HW register
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting(
+    ADDR_HANDLE                                   hLib, ///< handle of addrlib
+    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,  ///< [in] input
+    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->Addr2GetPreferredSurfaceSetting(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Addr2IsValidDisplaySwizzleMode
+*
+*   @brief
+*       Return whether the swizzle mode is supported by DCE / DCN.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode(
+    ADDR_HANDLE     hLib,
+    AddrSwizzleMode swizzleMode,
+    UINT_32         bpp,
+    bool            *result)
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    V2::Lib* pLib = V2::Lib::GetLib(hLib);
+
+    if (pLib != NULL)
+    {
+        ADDR2_COMPUTE_SURFACE_INFO_INPUT in;
+        in.swizzleMode = swizzleMode;
+        in.bpp = bpp;
+
+        *result = pLib->IsValidDisplaySwizzleMode(&in);
+        returnCode = ADDR_OK;
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
diff -Nru mesa-18.3.3/src/amd/addrlib/src/amdgpu_asic_addr.h mesa-19.0.1/src/amd/addrlib/src/amdgpu_asic_addr.h
--- mesa-18.3.3/src/amd/addrlib/src/amdgpu_asic_addr.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/amdgpu_asic_addr.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2017-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef _AMDGPU_ASIC_ADDR_H
+#define _AMDGPU_ASIC_ADDR_H
+
+#define ATI_VENDOR_ID         0x1002
+#define AMD_VENDOR_ID         0x1022
+
+// AMDGPU_VENDOR_IS_AMD(vendorId)
+#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
+
+#define FAMILY_UNKNOWN 0x00
+#define FAMILY_TN      0x69
+#define FAMILY_SI      0x6E
+#define FAMILY_CI      0x78
+#define FAMILY_KV      0x7D
+#define FAMILY_VI      0x82
+#define FAMILY_POLARIS 0x82
+#define FAMILY_CZ      0x87
+#define FAMILY_AI      0x8D
+#define FAMILY_RV      0x8E
+
+// AMDGPU_FAMILY_IS(familyId, familyName)
+#define FAMILY_IS(f, fn)     (f == FAMILY_##fn)
+#define FAMILY_IS_TN(f)      FAMILY_IS(f, TN)
+#define FAMILY_IS_SI(f)      FAMILY_IS(f, SI)
+#define FAMILY_IS_CI(f)      FAMILY_IS(f, CI)
+#define FAMILY_IS_KV(f)      FAMILY_IS(f, KV)
+#define FAMILY_IS_VI(f)      FAMILY_IS(f, VI)
+#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
+#define FAMILY_IS_CZ(f)      FAMILY_IS(f, CZ)
+#define FAMILY_IS_AI(f)      FAMILY_IS(f, AI)
+#define FAMILY_IS_RV(f)      FAMILY_IS(f, RV)
+
+#define AMDGPU_UNKNOWN          0xFF
+
+#define AMDGPU_TAHITI_RANGE     0x05, 0x14
+#define AMDGPU_PITCAIRN_RANGE   0x15, 0x28
+#define AMDGPU_CAPEVERDE_RANGE  0x29, 0x3C
+#define AMDGPU_OLAND_RANGE      0x3C, 0x46
+#define AMDGPU_HAINAN_RANGE     0x46, 0xFF
+
+#define AMDGPU_BONAIRE_RANGE    0x14, 0x28
+#define AMDGPU_HAWAII_RANGE     0x28, 0x3C
+
+#define AMDGPU_SPECTRE_RANGE    0x01, 0x41
+#define AMDGPU_SPOOKY_RANGE     0x41, 0x81
+#define AMDGPU_KALINDI_RANGE    0x81, 0xA1
+#define AMDGPU_GODAVARI_RANGE   0xA1, 0xFF
+
+#define AMDGPU_ICELAND_RANGE    0x01, 0x14
+#define AMDGPU_TONGA_RANGE      0x14, 0x28
+#define AMDGPU_FIJI_RANGE       0x3C, 0x50
+
+#define AMDGPU_POLARIS10_RANGE  0x50, 0x5A
+#define AMDGPU_POLARIS11_RANGE  0x5A, 0x64
+#define AMDGPU_POLARIS12_RANGE  0x64, 0x6E
+#define AMDGPU_VEGAM_RANGE      0x6E, 0xFF
+
+#define AMDGPU_CARRIZO_RANGE    0x01, 0x21
+#define AMDGPU_BRISTOL_RANGE    0x10, 0x21
+#define AMDGPU_STONEY_RANGE     0x61, 0xFF
+
+#define AMDGPU_VEGA10_RANGE     0x01, 0x14
+#define AMDGPU_VEGA12_RANGE     0x14, 0x28
+#define AMDGPU_VEGA20_RANGE     0x28, 0xFF
+
+#define AMDGPU_RAVEN_RANGE      0x01, 0x81
+#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF
+
+#define AMDGPU_EXPAND_FIX(x) x
+#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
+#define AMDGPU_IN_RANGE(val, ...)   AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
+
+// ASICREV_IS(eRevisionId, revisionName)
+#define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
+#define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
+#define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
+#define ASICREV_IS_CAPEVERDE_M(r)      ASICREV_IS(r, CAPEVERDE)
+#define ASICREV_IS_OLAND_M(r)          ASICREV_IS(r, OLAND)
+#define ASICREV_IS_HAINAN_V(r)         ASICREV_IS(r, HAINAN)
+
+#define ASICREV_IS_BONAIRE_M(r)        ASICREV_IS(r, BONAIRE)
+#define ASICREV_IS_HAWAII_P(r)         ASICREV_IS(r, HAWAII)
+
+#define ASICREV_IS_SPECTRE(r)          ASICREV_IS(r, SPECTRE)
+#define ASICREV_IS_SPOOKY(r)           ASICREV_IS(r, SPOOKY)
+#define ASICREV_IS_KALINDI(r)          ASICREV_IS(r, KALINDI)
+#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)
+
+#define ASICREV_IS_ICELAND_M(r)        ASICREV_IS(r, ICELAND)
+#define ASICREV_IS_TONGA_P(r)          ASICREV_IS(r, TONGA)
+#define ASICREV_IS_FIJI_P(r)           ASICREV_IS(r, FIJI)
+
+#define ASICREV_IS_POLARIS10_P(r)      ASICREV_IS(r, POLARIS10)
+#define ASICREV_IS_POLARIS11_M(r)      ASICREV_IS(r, POLARIS11)
+#define ASICREV_IS_POLARIS12_V(r)      ASICREV_IS(r, POLARIS12)
+#define ASICREV_IS_VEGAM_P(r)          ASICREV_IS(r, VEGAM)
+
+#define ASICREV_IS_CARRIZO(r)          ASICREV_IS(r, CARRIZO)
+#define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
+#define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)
+
+#define ASICREV_IS_VEGA10_M(r)         ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
+#define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
+#define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)
+
+#define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
+#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
+
+#endif // _AMDGPU_ASIC_ADDR_H
diff -Nru mesa-18.3.3/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h mesa-19.0.1/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h
--- mesa-18.3.3/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,89 @@
+#if !defined (__GFX9_GB_REG_H__)
+#define __GFX9_GB_REG_H__
+
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#include "util/u_endian.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
+//
+// Make sure the necessary endian defines are there.
+//
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+union GB_ADDR_CONFIG {
+    struct {
+#if        defined(LITTLEENDIAN_CPU)
+        unsigned int                       NUM_PIPES : 3;
+        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
+        unsigned int            MAX_COMPRESSED_FRAGS : 2;
+        unsigned int            BANK_INTERLEAVE_SIZE : 3;
+        unsigned int                                 : 1;
+        unsigned int                       NUM_BANKS : 3;
+        unsigned int                                 : 1;
+        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
+        unsigned int              NUM_SHADER_ENGINES : 2;
+        unsigned int                        NUM_GPUS : 3;
+        unsigned int             MULTI_GPU_TILE_SIZE : 2;
+        unsigned int                   NUM_RB_PER_SE : 2;
+        unsigned int                        ROW_SIZE : 2;
+        unsigned int                 NUM_LOWER_PIPES : 1;
+        unsigned int                       SE_ENABLE : 1;
+#elif        defined(BIGENDIAN_CPU)
+        unsigned int                       SE_ENABLE : 1;
+        unsigned int                 NUM_LOWER_PIPES : 1;
+        unsigned int                        ROW_SIZE : 2;
+        unsigned int                   NUM_RB_PER_SE : 2;
+        unsigned int             MULTI_GPU_TILE_SIZE : 2;
+        unsigned int                        NUM_GPUS : 3;
+        unsigned int              NUM_SHADER_ENGINES : 2;
+        unsigned int         SHADER_ENGINE_TILE_SIZE : 3;
+        unsigned int                                 : 1;
+        unsigned int                       NUM_BANKS : 3;
+        unsigned int                                 : 1;
+        unsigned int            BANK_INTERLEAVE_SIZE : 3;
+        unsigned int            MAX_COMPRESSED_FRAGS : 2;
+        unsigned int            PIPE_INTERLEAVE_SIZE : 3;
+        unsigned int                       NUM_PIPES : 3;
+#endif
+    } bitfields, bits;
+    unsigned int    u32All;
+    signed int    i32All;
+    float    f32All;
+};
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/chip/r800/si_gb_reg.h mesa-19.0.1/src/amd/addrlib/src/chip/r800/si_gb_reg.h
--- mesa-18.3.3/src/amd/addrlib/src/chip/r800/si_gb_reg.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/chip/r800/si_gb_reg.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,163 @@
+#if !defined (__SI_GB_REG_H__)
+#define __SI_GB_REG_H__
+
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#include "util/u_endian.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
+//
+// Make sure the necessary endian defines are there.
+//
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * GB_ADDR_CONFIG struct
+ */
+
+#if     defined(LITTLEENDIAN_CPU)
+
+     typedef struct _GB_ADDR_CONFIG_T {
+          unsigned int num_pipes                      : 3;
+          unsigned int                                : 1;
+          unsigned int pipe_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int bank_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int num_shader_engines             : 2;
+          unsigned int                                : 2;
+          unsigned int shader_engine_tile_size        : 3;
+          unsigned int                                : 1;
+          unsigned int num_gpus                       : 3;
+          unsigned int                                : 1;
+          unsigned int multi_gpu_tile_size            : 2;
+          unsigned int                                : 2;
+          unsigned int row_size                       : 2;
+          unsigned int num_lower_pipes                : 1;
+          unsigned int                                : 1;
+     } GB_ADDR_CONFIG_T;
+
+#elif       defined(BIGENDIAN_CPU)
+
+     typedef struct _GB_ADDR_CONFIG_T {
+          unsigned int                                : 1;
+          unsigned int num_lower_pipes                : 1;
+          unsigned int row_size                       : 2;
+          unsigned int                                : 2;
+          unsigned int multi_gpu_tile_size            : 2;
+          unsigned int                                : 1;
+          unsigned int num_gpus                       : 3;
+          unsigned int                                : 1;
+          unsigned int shader_engine_tile_size        : 3;
+          unsigned int                                : 2;
+          unsigned int num_shader_engines             : 2;
+          unsigned int                                : 1;
+          unsigned int bank_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int pipe_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int num_pipes                      : 3;
+     } GB_ADDR_CONFIG_T;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     GB_ADDR_CONFIG_T f;
+} GB_ADDR_CONFIG;
+
+#if       defined(LITTLEENDIAN_CPU)
+
+     typedef struct _GB_TILE_MODE_T {
+          unsigned int micro_tile_mode                : 2;
+          unsigned int array_mode                     : 4;
+          unsigned int pipe_config                    : 5;
+          unsigned int tile_split                     : 3;
+          unsigned int bank_width                     : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int num_banks                      : 2;
+          unsigned int micro_tile_mode_new            : 3;
+          unsigned int sample_split                   : 2;
+          unsigned int                                : 5;
+     } GB_TILE_MODE_T;
+
+     typedef struct _GB_MACROTILE_MODE_T {
+          unsigned int bank_width                     : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int num_banks                      : 2;
+          unsigned int                                : 24;
+     } GB_MACROTILE_MODE_T;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _GB_TILE_MODE_T {
+          unsigned int                                : 5;
+          unsigned int sample_split                   : 2;
+          unsigned int micro_tile_mode_new            : 3;
+          unsigned int num_banks                      : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int bank_width                     : 2;
+          unsigned int tile_split                     : 3;
+          unsigned int pipe_config                    : 5;
+          unsigned int array_mode                     : 4;
+          unsigned int micro_tile_mode                : 2;
+     } GB_TILE_MODE_T;
+
+     typedef struct _GB_MACROTILE_MODE_T {
+          unsigned int                                : 24;
+          unsigned int num_banks                      : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int bank_width                     : 2;
+     } GB_MACROTILE_MODE_T;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     GB_TILE_MODE_T f;
+} GB_TILE_MODE;
+
+typedef union {
+     unsigned int val : 32;
+     GB_MACROTILE_MODE_T f;
+} GB_MACROTILE_MODE;
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrcommon.h mesa-19.0.1/src/amd/addrlib/src/core/addrcommon.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrcommon.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrcommon.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,922 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrcommon.h
+* @brief Contains the helper function and constants.
+****************************************************************************************************
+*/
+
+#ifndef __ADDR_COMMON_H__
+#define __ADDR_COMMON_H__
+
+#include "addrinterface.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#if !defined(DEBUG)
+#ifdef NDEBUG
+#define DEBUG 0
+#else
+#define DEBUG 1
+#endif
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Platform specific debug break defines
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+    #if defined(__GNUC__)
+        #define ADDR_DBG_BREAK()    assert(false)
+    #elif defined(__APPLE__)
+        #define ADDR_DBG_BREAK()    { IOPanic("");}
+    #else
+        #define ADDR_DBG_BREAK()    { __debugbreak(); }
+    #endif
+#else
+    #define ADDR_DBG_BREAK()
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug assertions used in AddrLib
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(_WIN32) && (_MSC_VER >= 1400)
+    #define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
+#else
+    #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0)
+#endif
+
+#define ADDR_ASSERT(__e) assert(__e)
+#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
+#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
+#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug print macro from legacy address library
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+
+#define ADDR_PRNT(a)    Object::DebugPrint a
+
+/// @brief Macro for reporting informational messages
+/// @ingroup util
+///
+/// This macro optionally prints an informational message to stdout.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
+///
+#define ADDR_INFO(cond, a)         \
+{ if (!(cond)) { ADDR_PRNT(a); } }
+
+/// @brief Macro for reporting error warning messages
+/// @ingroup util
+///
+/// This macro optionally prints an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number.
+///
+#define ADDR_WARN(cond, a)         \
+{ if (!(cond))                     \
+  { ADDR_PRNT(a);                  \
+    ADDR_PRNT(("  WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
+} }
+
+/// @brief Macro for reporting fatal error conditions
+/// @ingroup util
+///
+/// This macro optionally stops execution of the current routine
+/// after printing an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number, then stops execution.
+///
+#define ADDR_EXIT(cond, a)         \
+{ if (!(cond))                     \
+  { ADDR_PRNT(a); ADDR_DBG_BREAK();\
+} }
+
+#else // DEBUG
+
+#define ADDRDPF 1 ? (void)0 : (void)
+
+#define ADDR_PRNT(a)
+
+#define ADDR_DBG_BREAK()
+
+#define ADDR_INFO(cond, a)
+
+#define ADDR_WARN(cond, a)
+
+#define ADDR_EXIT(cond, a)
+
+#endif // DEBUG
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1]
+
+namespace Addr
+{
+
+namespace V1
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Common constants
+////////////////////////////////////////////////////////////////////////////////////////////////////
+static const UINT_32 MicroTileWidth      = 8;       ///< Micro tile width, for 1D and 2D tiling
+static const UINT_32 MicroTileHeight     = 8;       ///< Micro tile height, for 1D and 2D tiling
+static const UINT_32 ThickTileThickness  = 4;       ///< Micro tile thickness, for THICK modes
+static const UINT_32 XThickTileThickness = 8;       ///< Extra thick tiling thickness
+static const UINT_32 PowerSaveTileBytes  = 64;      ///< Nuber of bytes per tile for power save 64
+static const UINT_32 CmaskCacheBits      = 1024;    ///< Number of bits for CMASK cache
+static const UINT_32 CmaskElemBits       = 4;       ///< Number of bits for CMASK element
+static const UINT_32 HtileCacheBits      = 16384;   ///< Number of bits for HTILE cache 512*32
+
+static const UINT_32 MicroTilePixels     = MicroTileWidth * MicroTileHeight;
+
+static const INT_32 TileIndexInvalid        = TILEINDEX_INVALID;
+static const INT_32 TileIndexLinearGeneral  = TILEINDEX_LINEAR_GENERAL;
+static const INT_32 TileIndexNoMacroIndex   = -3;
+
+} // V1
+
+namespace V2
+{
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Common constants
+////////////////////////////////////////////////////////////////////////////////////////////////////
+static const UINT_32 MaxSurfaceHeight = 16384;
+
+} // V2
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Common macros
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#define BITS_PER_BYTE 8
+#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
+#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
+
+/// Helper macros to select a single bit from an int (undefined later in section)
+#define _BIT(v,b)      (((v) >> (b) ) & 1)
+
+/**
+****************************************************************************************************
+* @brief Enums to identify AddrLib type
+****************************************************************************************************
+*/
+enum LibClass
+{
+    BASE_ADDRLIB = 0x0,
+    R600_ADDRLIB = 0x6,
+    R800_ADDRLIB = 0x8,
+    SI_ADDRLIB   = 0xa,
+    CI_ADDRLIB   = 0xb,
+    AI_ADDRLIB   = 0xd,
+};
+
+/**
+****************************************************************************************************
+* ChipFamily
+*
+*   @brief
+*       Neutral enums that specifies chip family.
+*
+****************************************************************************************************
+*/
+enum ChipFamily
+{
+    ADDR_CHIP_FAMILY_IVLD,    ///< Invalid family
+    ADDR_CHIP_FAMILY_R6XX,
+    ADDR_CHIP_FAMILY_R7XX,
+    ADDR_CHIP_FAMILY_R8XX,
+    ADDR_CHIP_FAMILY_NI,
+    ADDR_CHIP_FAMILY_SI,
+    ADDR_CHIP_FAMILY_CI,
+    ADDR_CHIP_FAMILY_VI,
+    ADDR_CHIP_FAMILY_AI,
+};
+
+/**
+****************************************************************************************************
+* ConfigFlags
+*
+*   @brief
+*       This structure is used to set configuration flags.
+****************************************************************************************************
+*/
+union ConfigFlags
+{
+    struct
+    {
+        /// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS
+        UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
+        UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
+        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
+                                               ///  output structure
+        UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
+        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
+        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
+        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
+        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
+        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
+        UINT_32 disableLinearOpt       : 1;    ///< Disallow tile modes to be optimized to linear
+        UINT_32 use32bppFor422Fmt      : 1;    ///< View 422 formats as 32 bits per pixel element
+        UINT_32 reserved               : 21;   ///< Reserved bits for future use
+    };
+
+    UINT_32 value;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Misc helper functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   AddrXorReduce
+*
+*   @brief
+*       Xor the right-side numberOfBits bits of x.
+****************************************************************************************************
+*/
+static inline UINT_32 XorReduce(
+    UINT_32 x,
+    UINT_32 numberOfBits)
+{
+    UINT_32 i;
+    UINT_32 result = x & 1;
+
+    for (i=1; i<numberOfBits; i++)
+    {
+        result ^= ((x>>i) & 1);
+    }
+
+    return result;
+}
+
+/**
+****************************************************************************************************
+*   IsPow2
+*
+*   @brief
+*       Check if the size (UINT_32) is pow 2
+****************************************************************************************************
+*/
+static inline UINT_32 IsPow2(
+    UINT_32 dim)        ///< [in] dimension of miplevel
+{
+    ADDR_ASSERT(dim > 0);
+    return !(dim & (dim - 1));
+}
+
+/**
+****************************************************************************************************
+*   IsPow2
+*
+*   @brief
+*       Check if the size (UINT_64) is pow 2
+****************************************************************************************************
+*/
+static inline UINT_64 IsPow2(
+    UINT_64 dim)        ///< [in] dimension of miplevel
+{
+    ADDR_ASSERT(dim > 0);
+    return !(dim & (dim - 1));
+}
+
+/**
+****************************************************************************************************
+*   ByteAlign
+*
+*   @brief
+*       Align UINT_32 "x" to "align" alignment, "align" should be power of 2
+****************************************************************************************************
+*/
+static inline UINT_32 PowTwoAlign(
+    UINT_32 x,
+    UINT_32 align)
+{
+    //
+    // Assert that x is a power of two.
+    //
+    ADDR_ASSERT(IsPow2(align));
+    return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+****************************************************************************************************
+*   ByteAlign
+*
+*   @brief
+*       Align UINT_64 "x" to "align" alignment, "align" should be power of 2
+****************************************************************************************************
+*/
+static inline UINT_64 PowTwoAlign(
+    UINT_64 x,
+    UINT_64 align)
+{
+    //
+    // Assert that x is a power of two.
+    //
+    ADDR_ASSERT(IsPow2(align));
+    return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+****************************************************************************************************
+*   Min
+*
+*   @brief
+*       Get the min value between two unsigned values
+****************************************************************************************************
+*/
+static inline UINT_32 Min(
+    UINT_32 value1,
+    UINT_32 value2)
+{
+    return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+****************************************************************************************************
+*   Min
+*
+*   @brief
+*       Get the min value between two signed values
+****************************************************************************************************
+*/
+static inline INT_32 Min(
+    INT_32 value1,
+    INT_32 value2)
+{
+    return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+****************************************************************************************************
+*   Max
+*
+*   @brief
+*       Get the max value between two unsigned values
+****************************************************************************************************
+*/
+static inline UINT_32 Max(
+    UINT_32 value1,
+    UINT_32 value2)
+{
+    return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+****************************************************************************************************
+*   Max
+*
+*   @brief
+*       Get the max value between two signed values
+****************************************************************************************************
+*/
+static inline INT_32 Max(
+    INT_32 value1,
+    INT_32 value2)
+{
+    return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+****************************************************************************************************
+*   NextPow2
+*
+*   @brief
+*       Compute the mipmap's next level dim size
+****************************************************************************************************
+*/
+static inline UINT_32 NextPow2(
+    UINT_32 dim)        ///< [in] dimension of miplevel
+{
+    UINT_32 newDim = 1;
+
+    if (dim > 0x7fffffff)
+    {
+        ADDR_ASSERT_ALWAYS();
+        newDim = 0x80000000;
+    }
+    else
+    {
+        while (newDim < dim)
+        {
+            newDim <<= 1;
+        }
+    }
+
+    return newDim;
+}
+
+/**
+****************************************************************************************************
+*   Log2NonPow2
+*
+*   @brief
+*       Compute log of base 2 no matter the target is power of 2 or not
+****************************************************************************************************
+*/
+static inline UINT_32 Log2NonPow2(
+    UINT_32 x)      ///< [in] the value should calculate log based 2
+{
+    UINT_32 y;
+
+    y = 0;
+    while (x > 1)
+    {
+        x >>= 1;
+        y++;
+    }
+
+    return y;
+}
+
+/**
+****************************************************************************************************
+*   Log2
+*
+*   @brief
+*       Compute log of base 2
+****************************************************************************************************
+*/
+static inline UINT_32 Log2(
+    UINT_32 x)      ///< [in] the value should calculate log based 2
+{
+    // Assert that x is a power of two.
+    ADDR_ASSERT(IsPow2(x));
+
+    return Log2NonPow2(x);
+}
+
+/**
+****************************************************************************************************
+*   QLog2
+*
+*   @brief
+*       Compute log of base 2 quickly (<= 16)
+****************************************************************************************************
+*/
+static inline UINT_32 QLog2(
+    UINT_32 x)      ///< [in] the value should calculate log based 2
+{
+    ADDR_ASSERT(x <= 16);
+
+    UINT_32 y = 0;
+
+    switch (x)
+    {
+        case 1:
+            y = 0;
+            break;
+        case 2:
+            y = 1;
+            break;
+        case 4:
+            y = 2;
+            break;
+        case 8:
+            y = 3;
+            break;
+        case 16:
+            y = 4;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+
+    return y;
+}
+
+/**
+****************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment
+****************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    UINT_32*    pLVal,  ///< [in] Pointer to left val
+    UINT_32     rVal)   ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+/**
+****************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment for 64bit values
+****************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    UINT_64*    pLVal,  ///< [in] Pointer to left val
+    UINT_64     rVal)   ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+/**
+****************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment for AddrTileMode
+****************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    AddrTileMode*    pLVal, ///< [in] Pointer to left val
+    AddrTileMode     rVal)  ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+/**
+****************************************************************************************************
+*   RoundHalf
+*
+*   @brief
+*       return (x + 1) / 2
+****************************************************************************************************
+*/
+static inline UINT_32 RoundHalf(
+    UINT_32     x)     ///< [in] input value
+{
+    ADDR_ASSERT(x != 0);
+
+#if 1
+    return (x >> 1) + (x & 1);
+#else
+    return (x + 1) >> 1;
+#endif
+}
+
+/**
+****************************************************************************************************
+*   SumGeo
+*
+*   @brief
+*       Calculate sum of a geometric progression whose ratio is 1/2
+****************************************************************************************************
+*/
+static inline UINT_32 SumGeo(
+    UINT_32     base,   ///< [in] First term in the geometric progression
+    UINT_32     num)    ///< [in] Number of terms to be added into sum
+{
+    ADDR_ASSERT(base > 0);
+
+    UINT_32 sum = 0;
+    UINT_32 i = 0;
+    for (; (i < num) && (base > 1); i++)
+    {
+        sum += base;
+        base = RoundHalf(base);
+    }
+    sum += num - i;
+
+    return sum;
+}
+
+/**
+****************************************************************************************************
+*   GetBit
+*
+*   @brief
+*       Extract bit N value (0 or 1) of a UINT32 value.
+****************************************************************************************************
+*/
+static inline UINT_32 GetBit(
+    UINT_32     u32,   ///< [in] UINT32 value
+    UINT_32     pos)   ///< [in] bit position from LSB, valid range is [0..31]
+{
+    ADDR_ASSERT(pos <= 31);
+
+    return (u32 >> pos) & 0x1;
+}
+
+/**
+****************************************************************************************************
+*   GetBits
+*
+*   @brief
+*       Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos
+*       srcStartPos: 0~31 for UINT_32
+*       bitsNum    : 1~32 for UINT_32
+*       srcStartPos: 0~31 for UINT_32
+*                                                                 src start position
+*                                                                          |
+*       src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0]
+*                                   || Bits num || copy length  || Bits num ||
+*       dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0]
+*                                              |
+*                                     dst start position
+****************************************************************************************************
+*/
+static inline UINT_32 GetBits(
+    UINT_32 src,
+    UINT_32 srcStartPos,
+    UINT_32 bitsNum,
+    UINT_32 dstStartPos)
+{
+    ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0));
+    ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32));
+
+    return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos);
+}
+
+/**
+****************************************************************************************************
+*   MortonGen2d
+*
+*   @brief
+*       Generate 2D Morton interleave code with num lowest bits in each channel
+****************************************************************************************************
+*/
+static inline UINT_32 MortonGen2d(
+    UINT_32     x,     ///< [in] First channel
+    UINT_32     y,     ///< [in] Second channel
+    UINT_32     num)   ///< [in] Number of bits extracted from each channel
+{
+    UINT_32 mort = 0;
+
+    for (UINT_32 i = 0; i < num; i++)
+    {
+        mort |= (GetBit(y, i) << (2 * i));
+        mort |= (GetBit(x, i) << (2 * i + 1));
+    }
+
+    return mort;
+}
+
+/**
+****************************************************************************************************
+*   MortonGen3d
+*
+*   @brief
+*       Generate 3D Morton interleave code with num lowest bits in each channel
+****************************************************************************************************
+*/
+static inline UINT_32 MortonGen3d(
+    UINT_32     x,     ///< [in] First channel
+    UINT_32     y,     ///< [in] Second channel
+    UINT_32     z,     ///< [in] Third channel
+    UINT_32     num)   ///< [in] Number of bits extracted from each channel
+{
+    UINT_32 mort = 0;
+
+    for (UINT_32 i = 0; i < num; i++)
+    {
+        mort |= (GetBit(z, i) << (3 * i));
+        mort |= (GetBit(y, i) << (3 * i + 1));
+        mort |= (GetBit(x, i) << (3 * i + 2));
+    }
+
+    return mort;
+}
+
+/**
+****************************************************************************************************
+*   ReverseBitVector
+*
+*   @brief
+*       Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1]
+****************************************************************************************************
+*/
+static inline UINT_32 ReverseBitVector(
+    UINT_32     v,     ///< [in] Reverse operation base value
+    UINT_32     num)   ///< [in] Number of bits used in reverse operation
+{
+    UINT_32 reverse = 0;
+
+    for (UINT_32 i = 0; i < num; i++)
+    {
+        reverse |= (GetBit(v, num - 1 - i) << i);
+    }
+
+    return reverse;
+}
+
+/**
+****************************************************************************************************
+*   FoldXor2d
+*
+*   @brief
+*       Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1]
+****************************************************************************************************
+*/
+static inline UINT_32 FoldXor2d(
+    UINT_32     v,     ///< [in] Xor operation base value
+    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
+{
+    return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num);
+}
+
+/**
+****************************************************************************************************
+*   DeMort
+*
+*   @brief
+*       Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2]
+****************************************************************************************************
+*/
+static inline UINT_32 DeMort(
+    UINT_32     v,     ///< [in] DeMort operation base value
+    UINT_32     num)   ///< [in] Number of bits used in fold DeMort operation
+{
+    UINT_32 d = 0;
+
+    for (UINT_32 i = 0; i < num; i++)
+    {
+        d |= ((v & (1 << (i << 1))) >> i);
+    }
+
+    return d;
+}
+
+/**
+****************************************************************************************************
+*   FoldXor3d
+*
+*   @brief
+*       v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1]
+****************************************************************************************************
+*/
+static inline UINT_32 FoldXor3d(
+    UINT_32     v,     ///< [in] Xor operation base value
+    UINT_32     num)   ///< [in] Number of bits used in fold xor operation
+{
+    UINT_32 t = v & ((1 << num) - 1);
+    t ^= ReverseBitVector(DeMort(v >> num, num), num);
+    t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num);
+
+    return t;
+}
+
+/**
+****************************************************************************************************
+*   InitChannel
+*
+*   @brief
+*       Set channel initialization value via a return value
+****************************************************************************************************
+*/
+static inline ADDR_CHANNEL_SETTING InitChannel(
+    UINT_32     valid,     ///< [in] valid setting
+    UINT_32     channel,   ///< [in] channel setting
+    UINT_32     index)     ///< [in] index setting
+{
+    ADDR_CHANNEL_SETTING t;
+    t.valid = valid;
+    t.channel = channel;
+    t.index = index;
+
+    return t;
+}
+
+/**
+****************************************************************************************************
+*   InitChannel
+*
+*   @brief
+*       Set channel initialization value via channel pointer
+****************************************************************************************************
+*/
+static inline VOID InitChannel(
+    UINT_32     valid,              ///< [in] valid setting
+    UINT_32     channel,            ///< [in] channel setting
+    UINT_32     index,              ///< [in] index setting
+    ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized
+{
+    pChanSet->valid = valid;
+    pChanSet->channel = channel;
+    pChanSet->index = index;
+}
+
+/**
+****************************************************************************************************
+*   InitChannel
+*
+*   @brief
+*       Set channel initialization value via another channel
+****************************************************************************************************
+*/
+static inline VOID InitChannel(
+    ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from
+    ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized
+{
+    pChanDst->valid = pChanSrc->valid;
+    pChanDst->channel = pChanSrc->channel;
+    pChanDst->index = pChanSrc->index;
+}
+
+/**
+****************************************************************************************************
+*   GetMaxValidChannelIndex
+*
+*   @brief
+*       Get max valid index for a specific channel
+****************************************************************************************************
+*/
+static inline UINT_32 GetMaxValidChannelIndex(
+    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
+    UINT_32                     searchCount,///< [in] number of channel setting to be searched
+    UINT_32                     channel)    ///< [in] channel to be searched
+{
+    UINT_32 index = 0;
+
+    for (UINT_32 i = 0; i < searchCount; i++)
+    {
+        if (pChanSet[i].valid && (pChanSet[i].channel == channel))
+        {
+            index = Max(index, static_cast<UINT_32>(pChanSet[i].index));
+        }
+    }
+
+    return index;
+}
+
+/**
+****************************************************************************************************
+*   GetCoordActiveMask
+*
+*   @brief
+*       Get bit mask which indicates which positions in the equation match the target coord
+****************************************************************************************************
+*/
+static inline UINT_32 GetCoordActiveMask(
+    const ADDR_CHANNEL_SETTING *pChanSet,   ///< [in] channel setting to be initialized
+    UINT_32                     searchCount,///< [in] number of channel setting to be searched
+    UINT_32                     channel,    ///< [in] channel to be searched
+    UINT_32                     index)      ///< [in] index to be searched
+{
+    UINT_32 mask = 0;
+
+    for (UINT_32 i = 0; i < searchCount; i++)
+    {
+        if ((pChanSet[i].valid   == TRUE)    &&
+            (pChanSet[i].channel == channel) &&
+            (pChanSet[i].index   == index))
+        {
+            mask |= (1 << i);
+        }
+    }
+
+    return mask;
+}
+
+} // Addr
+
+#endif // __ADDR_COMMON_H__
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1830 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrelemlib.cpp
+* @brief Contains the class implementation for element/pixel related functions.
+****************************************************************************************************
+*/
+
+#include "addrelemlib.h"
+#include "addrlib.h"
+
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+*   ElemLib::ElemLib
+*
+*   @brief
+*       constructor
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+ElemLib::ElemLib(
+    Lib* pAddrLib)  ///< [in] Parent addrlib instance pointer
+    :
+    Object(pAddrLib->GetClient()),
+    m_pAddrLib(pAddrLib)
+{
+    switch (m_pAddrLib->GetChipFamily())
+    {
+        case ADDR_CHIP_FAMILY_R6XX:
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+            m_fp16ExportNorm = 0;
+            break;
+        case ADDR_CHIP_FAMILY_R7XX:
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+            m_fp16ExportNorm = 1;
+            break;
+        case ADDR_CHIP_FAMILY_R8XX:
+        case ADDR_CHIP_FAMILY_NI: // Same as 8xx
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+            m_fp16ExportNorm = 1;
+            break;
+        default:
+            m_fp16ExportNorm = 1;
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+            break;
+    }
+
+    m_configFlags.value = 0;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::~ElemLib
+*
+*   @brief
+*       destructor
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+ElemLib::~ElemLib()
+{
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::Create
+*
+*   @brief
+*       Creates and initializes AddrLib object.
+*
+*   @return
+*       Returns point to ADDR_CREATEINFO if successful.
+****************************************************************************************************
+*/
+ElemLib* ElemLib::Create(
+    const Lib* pAddrLib)   ///< [in] Pointer of parent AddrLib instance
+{
+    ElemLib* pElemLib = NULL;
+
+    if (pAddrLib)
+    {
+        VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient());
+        if (pObj)
+        {
+            pElemLib = new(pObj) ElemLib(const_cast<Lib* const>(pAddrLib));
+        }
+    }
+
+    return pElemLib;
+}
+
+/**************************************************************************************************
+*   ElemLib::Flt32sToInt32s
+*
+*   @brief
+*       Convert a ADDR_FLT_32 value to Int32 value
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::Flt32sToInt32s(
+    ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
+    UINT_32         bits,       ///< [in] nubmer of bits in value
+    NumberType      numberType, ///< [in] the type of number
+    UINT_32*        pResult)    ///< [out] Int32 value
+{
+    UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
+    UINT_32 uscale;
+    UINT_32 sign;
+
+    //convert each component to an INT_32
+    switch ( numberType )
+    {
+        case ADDR_NO_NUMBER:    //fall through
+        case ADDR_ZERO:         //fall through
+        case ADDR_ONE:          //fall through
+        case ADDR_EPSILON:      //fall through
+            return;        // these are zero-bit components, so don't set result
+
+        case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
+            uscale = (1<<bits) - 1;
+            if (bits == 32)               // special case unsigned 32-bit int
+            {
+                *pResult = value.i;
+            }
+            else
+            {
+                if ((value.i < 0) || (value.u > uscale))
+                {
+                    *pResult = uscale;
+                }
+                else
+                {
+                    *pResult = value.i;
+                }
+                return;
+            }
+
+        // The algorithm used in the DB and TX differs at one value for 24-bit unorms
+        case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
+            if ((bits==24) && (value.i == 0x33000000))
+            {
+                *pResult = 1;
+                return;
+            }              // Else treat like ADDR_UNORM_R6XX
+
+        case ADDR_UNORM_R6XX:            // unsigned repeating fraction
+            if (value.f <= 0)
+            {
+                *pResult = 0;            // first clamp to [0..1]
+            }
+            else
+            {
+                if (value.f >= 1)
+                {
+                     *pResult = (1<<bits) - 1;
+                }
+                else
+                {
+                    if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
+                    {
+                        *pResult = 0;                        // NaN, so force to 0
+                    }
+
+                    #if 0 // floating point version for documentation
+                    else
+                    {
+                        FLOAT f = value.f * ((1<<bits) - 1);
+                        *pResult = static_cast<INT_32>(f + (round/256.0f));
+                    }
+                    #endif
+                    else
+                    {
+                        ADDR_FLT_32 scaled;
+                        ADDR_FLT_32 shifted;
+                        UINT_64 truncated, rounded;
+                        UINT_32 altShift;
+                        UINT_32 mask = (1 << bits) - 1;
+                        UINT_32 half = 1 << (bits - 1);
+                        UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
+                        UINT_64 temp = mant24 - (mant24>>bits) -
+                            static_cast<INT_32>((mant24 & mask) > half);
+                        UINT_32 exp8 = value.i >> 23;
+                        UINT_32 shift = 126 - exp8 + 24 - bits;
+                        UINT_64 final;
+
+                        if (shift >= 32) // This is zero, even with maximum dither add
+                        {
+                            final = 0;
+                        }
+                        else
+                        {
+                            final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
+                        }
+                        //ADDR_EXIT( *pResult == final,
+                        //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
+                        //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
+                        if (final > mask)
+                        {
+                            final = mask;
+                        }
+
+                        scaled.f  = value.f * ((1<<bits) - 1);
+                        shifted.f = (scaled.f * 256);
+                        truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
+                        altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
+                        truncated = (altShift > 60) ? 0 : truncated >> altShift;
+                        rounded   = static_cast<INT_32>((round + truncated) >> 8);
+                        //if (rounded > ((1<<bits) - 1))
+                        //    rounded = ((1<<bits) - 1);
+                        *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
+                    }
+                }
+            }
+
+            return;
+
+        case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
+            *pResult = value.i;
+            return;
+
+        // @@ FIX ROUNDING in this code, fix the denorm case
+        case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
+            sign = (value.i >> 31) & 1;
+            if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
+            {
+                if ((value.i&0x007FFFFF) != 0)             // then if NaN
+                {
+                    *pResult = 0;                       // return 0
+                }
+                else
+                {
+                    *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
+                }
+                return;
+            }
+            if (value.f <= 0)
+            {
+                *pResult = 0;
+            }
+            else
+            {
+                if (value.f>=1)
+                {
+                    *pResult = 0xF << (bits-4);
+                }
+                else
+                {
+                    if ((value.i>>23) > 112 )
+                    {
+                        // 24-bit float: normalized
+                        // value.i += 1 << (22-bits+4);
+                        // round the IEEE mantissa to mantissa size
+                        // @@ NOTE: add code to support rounding
+                        value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
+                        *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
+                    }
+                    else
+                    {
+                        // 24-bit float: denormalized
+                        value.f = value.f / (1<<28) / (1<<28);
+                        value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
+                        // value.i += 1 << (22-bits+4);
+                        // round the IEEE mantissa to mantissa size
+                        // @@ NOTE: add code to support rounding
+                        *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
+                    }
+                }
+            }
+
+            return;
+
+        default:                    // invalid number mode
+            //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
+            break;
+
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::Int32sToPixel
+*
+*   @brief
+*       Pack 32-bit integer values into an uncompressed pixel,
+*       in the proper order
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This entry point packes four 32-bit integer values into
+*       an uncompressed pixel. The pixel values are specifies in
+*       standard order, e.g. depth/stencil. This routine asserts
+*       if called on compressed pixel.
+****************************************************************************************************
+*/
+VOID ElemLib::Int32sToPixel(
+    UINT_32              numComps,      ///< [in] number of components
+    UINT_32*             pComps,        ///< [in] compnents
+    UINT_32*             pCompBits,     ///< [in] total bits in each component
+    UINT_32*             pCompStart,    ///< [in] the first bit position of each component
+    ComponentFlags       properties,    ///< [in] properties about byteAligned, exportNorm
+    UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
+    UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
+{
+    UINT_32 i;
+    UINT_32 j;
+    UINT_32 start;
+    UINT_32 size;
+    UINT_32 byte;
+    UINT_32 value = 0;
+    UINT_32 compMask;
+    UINT_32 elemMask=0;
+    UINT_32 elementXor = 0;  // address xor when reading bytes from elements
+
+    // @@ NOTE: assert if called on a compressed format!
+
+    if (properties.byteAligned)    // Components are all byte-sized
+    {
+        for (i = 0; i < numComps; i++)        // Then for each component
+        {
+            // Copy the bytes of the component into the element
+            start = pCompStart[i] / 8;
+            size  = pCompBits[i]  / 8;
+            for (j = 0; j < size; j++)
+            {
+                pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
+            }
+        }
+    }
+    else                        // Element is 32-bits or less, components are bit fields
+    {
+        // First, extract each component in turn and combine it into a 32-bit value
+        for (i = 0; i < numComps; i++)
+        {
+            compMask = (1 << pCompBits[i]) - 1;
+            elemMask |= compMask << pCompStart[i];
+            value |= (pComps[i] & compMask) << pCompStart[i];
+        }
+
+        // Mext, copy the masked value into the element
+        size = (resultBits + 7) / 8;
+        for (i = 0; i < size; i++)
+        {
+            byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
+            pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   Flt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::Flt32ToDepthPixel(
+    AddrDepthFormat     format,     ///< [in] Depth format
+    const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
+    UINT_8*             pPixel      ///< [out] depth pixel value
+    ) const
+{
+    UINT_32 i;
+    UINT_32 values[2];
+    ComponentFlags properties;  // byteAligned, exportNorm
+    UINT_32 resultBits = 0;     // result bits: total bits per pixel after decompression
+
+    PixelFormatInfo fmt;
+
+    // get type for each component
+    PixGetDepthCompInfo(format, &fmt);
+
+    //initialize properties
+    properties.byteAligned = TRUE;
+    properties.exportNorm  = TRUE;
+    properties.floatComp   = FALSE;
+
+    //set properties and result bits
+    for (i = 0; i < 2; i++)
+    {
+        if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
+        {
+            properties.byteAligned = FALSE;
+        }
+
+        if (resultBits < fmt.compStart[i] + fmt.compBit[i])
+        {
+            resultBits = fmt.compStart[i] + fmt.compBit[i];
+        }
+
+        // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+        if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
+        {
+            properties.exportNorm = FALSE;
+        }
+
+        // Mark if there are any floating point components
+        if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
+        {
+            properties.floatComp = TRUE;
+        }
+    }
+
+    // Convert the two input floats to integer values
+    for (i = 0; i < 2; i++)
+    {
+        Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
+    }
+
+    // Then pack the two integer components, in the proper order
+    Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
+
+}
+
+/**
+****************************************************************************************************
+*   Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::Flt32ToColorPixel(
+    AddrColorFormat     format,     ///< [in] Color format
+    AddrSurfaceNumber   surfNum,    ///< [in] Surface number
+    AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
+    const ADDR_FLT_32   comps[4],   ///< [in] four components of color
+    UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
+    ) const
+{
+    PixelFormatInfo pixelInfo;
+
+    UINT_32 i;
+    UINT_32 values[4];
+    ComponentFlags properties;    // byteAligned, exportNorm
+    UINT_32 resultBits = 0;       // result bits: total bits per pixel after decompression
+
+    memset(&pixelInfo, 0, sizeof(PixelFormatInfo));
+
+    PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
+
+    //initialize properties
+    properties.byteAligned = TRUE;
+    properties.exportNorm  = TRUE;
+    properties.floatComp   = FALSE;
+
+    //set properties and result bits
+    for (i = 0; i < 4; i++)
+    {
+        if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
+        {
+            properties.byteAligned = FALSE;
+        }
+
+        if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
+        {
+            resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
+        }
+
+        if (m_fp16ExportNorm)
+        {
+            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+            // or if it's not FP and <=16 bits
+            if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
+                && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
+            {
+                properties.exportNorm = FALSE;
+            }
+        }
+        else
+        {
+            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+            if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
+            {
+                properties.exportNorm = FALSE;
+            }
+        }
+
+        // Mark if there are any floating point components
+        if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
+             (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
+        {
+            properties.floatComp = TRUE;
+        }
+    }
+
+    // Convert the four input floats to integer values
+    for (i = 0; i < 4; i++)
+    {
+        Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
+    }
+
+    // Then pack the four integer components, in the proper order
+    Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
+                  properties, resultBits, pPixel);
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetCompType
+*
+*   @brief
+*       Fill per component info
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID ElemLib::GetCompType(
+    AddrColorFormat   format,     ///< [in] surface format
+    AddrSurfaceNumber numType,  ///< [in] number type
+    PixelFormatInfo*  pInfo)       ///< [in][out] per component info out
+{
+    BOOL_32 handled = FALSE;
+
+    // Floating point formats override the number format
+    switch (format)
+    {
+        case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
+        case ADDR_COLOR_16_16_FLOAT:
+        case ADDR_COLOR_16_16_16_16_FLOAT:
+        case ADDR_COLOR_32_FLOAT:
+        case ADDR_COLOR_32_32_FLOAT:
+        case ADDR_COLOR_32_32_32_32_FLOAT:
+        case ADDR_COLOR_10_11_11_FLOAT:
+        case ADDR_COLOR_11_11_10_FLOAT:
+            numType = ADDR_NUMBER_FLOAT;
+            break;
+            // Special handling for the depth formats
+        case ADDR_COLOR_8_24:                // fall through for these 2 similar format
+        case ADDR_COLOR_24_8:
+            for (UINT_32 c = 0; c < 4; c++)
+            {
+                if (pInfo->compBit[c] == 8)
+                {
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                }
+                else if (pInfo->compBit[c]  == 24)
+                {
+                    pInfo->numType[c] = ADDR_UNORM_R6XX;
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                }
+            }
+            handled = TRUE;
+            break;
+        case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
+        case ADDR_COLOR_24_8_FLOAT:
+        case ADDR_COLOR_X24_8_32_FLOAT:
+            for (UINT_32 c = 0; c < 4; c++)
+            {
+                if (pInfo->compBit[c] == 8)
+                {
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                }
+                else if (pInfo->compBit[c] == 24)
+                {
+                    pInfo->numType[c] = ADDR_U4FLOATC;
+                }
+                else if (pInfo->compBit[c] == 32)
+                {
+                    pInfo->numType[c] = ADDR_S8FLOAT32;
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                }
+            }
+            handled = TRUE;
+            break;
+        default:
+            break;
+    }
+
+    if (!handled)
+    {
+        for (UINT_32 c = 0; c < 4; c++)
+        {
+            // Assign a number type for each component
+            AddrSurfaceNumber cnum;
+
+            // First handle default component values
+            if (pInfo->compBit[c] == 0)
+            {
+                if (c < 3)
+                {
+                    pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
+                }
+                else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+                {
+                    pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
+                }
+                continue;
+            }
+            // Now handle small components
+            else if (pInfo->compBit[c] == 1)
+            {
+                if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+                {
+                    cnum = ADDR_NUMBER_UINT;
+                }
+                else
+                {
+                    cnum = ADDR_NUMBER_UNORM;
+                }
+            }
+            else
+            {
+                cnum = numType;
+            }
+
+            // If no default, set the number type fom num, compbits, and architecture
+            switch (cnum)
+            {
+                case ADDR_NUMBER_SRGB:
+                    pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_UNORM:
+                    pInfo->numType[c] = ADDR_UNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_SNORM:
+                    pInfo->numType[c] = ADDR_SNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_USCALED:
+                    pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
+                    break;
+                case ADDR_NUMBER_SSCALED:
+                    pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
+                    break;
+                case ADDR_NUMBER_FLOAT:
+                    if (pInfo->compBit[c] == 32)
+                    {
+                        pInfo->numType[c] = ADDR_S8FLOAT32;
+                    }
+                    else if (pInfo->compBit[c] == 16)
+                    {
+                        pInfo->numType[c] = ADDR_S5FLOAT;
+                    }
+                    else if (pInfo->compBit[c] >= 10)
+                    {
+                        pInfo->numType[c] = ADDR_U5FLOAT;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT_ALWAYS();
+                    }
+                    break;
+                case ADDR_NUMBER_SINT:
+                    pInfo->numType[c] = ADDR_SINT_BITS;
+                    break;
+                case ADDR_NUMBER_UINT:
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                    break;
+
+                default:
+                    ADDR_ASSERT(!"Invalid number type");
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                    break;
+             }
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetCompSwap
+*
+*   @brief
+*       Get components swapped for color surface
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID ElemLib::GetCompSwap(
+    AddrSurfaceSwap  swap,   ///< [in] swap mode
+    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
+{
+    switch (pInfo->comps)
+    {
+        case 4:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // BGRA
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 1, 2, pInfo );
+                    break;    // ABGR
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 0, 2, pInfo );
+                    SwapComps( 0, 1, pInfo );
+                    break;    // ARGB
+                default:
+                    break;
+            }
+            break;
+        case 3:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 0, 2, pInfo );
+                    break;    // AGR
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // BGR
+                case ADDR_SWAP_ALT:
+                    SwapComps( 2, 3, pInfo );
+                    break;    // RGA
+                default:
+                    break;    // RGB
+            }
+            break;
+        case 2:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 1, pInfo );
+                    SwapComps( 1, 3, pInfo );
+                    break;    // AR
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 1, pInfo );
+                    break;    // GR
+                case ADDR_SWAP_ALT:
+                    SwapComps( 1, 3, pInfo );
+                    break;    // RA
+                default:
+                    break;    // RG
+            }
+            break;
+        case 1:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    break;    // A
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // B
+                case ADDR_SWAP_ALT:
+                    SwapComps( 0, 1, pInfo );
+                    break;    // G
+                default:
+                    break;    // R
+            }
+            break;
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetCompSwap
+*
+*   @brief
+*       Get components swapped for color surface
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID ElemLib::SwapComps(
+    UINT_32          c0,     ///< [in] component index 0
+    UINT_32          c1,     ///< [in] component index 1
+    PixelFormatInfo* pInfo)  ///< [in,out] output per component info
+{
+    UINT_32 start;
+    UINT_32 bits;
+
+    start = pInfo->compStart[c0];
+    pInfo->compStart[c0] = pInfo->compStart[c1];
+    pInfo->compStart[c1] = start;
+
+    bits  = pInfo->compBit[c0];
+    pInfo->compBit[c0] = pInfo->compBit[c1];
+    pInfo->compBit[c1] = bits;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::PixGetColorCompInfo
+*
+*   @brief
+*       Get per component info for color surface
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID ElemLib::PixGetColorCompInfo(
+    AddrColorFormat   format, ///< [in] surface format, read from register
+    AddrSurfaceNumber number, ///< [in] pixel number type
+    AddrSurfaceSwap   swap,   ///< [in] component swap mode
+    PixelFormatInfo*  pInfo   ///< [out] output per component info
+    ) const
+{
+    // 1. Get componet bits
+    switch (format)
+    {
+        case ADDR_COLOR_8:
+            GetCompBits(8, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_1_5_5_5:
+            GetCompBits(5, 5, 5, 1, pInfo);
+            break;
+        case ADDR_COLOR_5_6_5:
+            GetCompBits(8, 6, 5, 0, pInfo);
+            break;
+        case ADDR_COLOR_6_5_5:
+            GetCompBits(5, 5, 6, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_8:
+            GetCompBits(8, 8, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_4_4_4_4:
+            GetCompBits(4, 4, 4, 4, pInfo);
+            break;
+        case ADDR_COLOR_16:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_8_8_8:
+            GetCompBits(8, 8, 8, 8, pInfo);
+            break;
+        case ADDR_COLOR_2_10_10_10:
+            GetCompBits(10, 10, 10, 2, pInfo);
+            break;
+        case ADDR_COLOR_10_11_11:
+            GetCompBits(11, 11, 10, 0, pInfo);
+            break;
+        case ADDR_COLOR_11_11_10:
+            GetCompBits(10, 11, 11, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16:
+            GetCompBits(16, 16, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_16_16:
+            GetCompBits(16, 16, 16, 16, pInfo);
+            break;
+        case ADDR_COLOR_16_FLOAT:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_FLOAT:
+            GetCompBits(16, 16, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_FLOAT:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32_FLOAT:
+            GetCompBits(32, 32, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_16_16_FLOAT:
+            GetCompBits(16, 16, 16, 16, pInfo);
+            break;
+        case ADDR_COLOR_32_32_32_32_FLOAT:
+            GetCompBits(32, 32, 32, 32, pInfo);
+            break;
+
+        case ADDR_COLOR_32:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32:
+            GetCompBits(32, 32, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32_32_32:
+            GetCompBits(32, 32, 32, 32, pInfo);
+            break;
+        case ADDR_COLOR_10_10_10_2:
+            GetCompBits(2, 10, 10, 10, pInfo);
+            break;
+        case ADDR_COLOR_10_11_11_FLOAT:
+            GetCompBits(11, 11, 10, 0, pInfo);
+            break;
+        case ADDR_COLOR_11_11_10_FLOAT:
+            GetCompBits(10, 11, 11, 0, pInfo);
+            break;
+        case ADDR_COLOR_5_5_5_1:
+            GetCompBits(1, 5, 5, 5, pInfo);
+            break;
+        case ADDR_COLOR_3_3_2:
+            GetCompBits(2, 3, 3, 0, pInfo);
+            break;
+        case ADDR_COLOR_4_4:
+            GetCompBits(4, 4, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_24:
+        case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
+            GetCompBits(24, 8, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_24_8:
+        case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
+            GetCompBits(8, 24, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_X24_8_32_FLOAT:
+            GetCompBits(32, 8, 0, 0, pInfo);
+            break;
+
+        case ADDR_COLOR_INVALID:
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+        default:
+            ADDR_ASSERT(0);
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+    }
+
+    // 2. Get component number type
+
+    GetCompType(format, number, pInfo);
+
+    // 3. Swap components if needed
+
+    GetCompSwap(swap, pInfo);
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::PixGetDepthCompInfo
+*
+*   @brief
+*       Get per component info for depth surface
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID ElemLib::PixGetDepthCompInfo(
+    AddrDepthFormat  format,     ///< [in] surface format, read from register
+    PixelFormatInfo* pInfo       ///< [out] output per component bits and type
+    ) const
+{
+    if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
+    {
+        if (format == ADDR_DEPTH_8_24_FLOAT)
+        {
+            format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
+        }
+
+        if (format == ADDR_DEPTH_X8_24_FLOAT)
+        {
+            format = ADDR_DEPTH_32_FLOAT;
+        }
+    }
+
+    switch (format)
+    {
+        case ADDR_DEPTH_16:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_8_24:
+        case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
+            GetCompBits(24, 8, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_X8_24:
+        case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
+            GetCompBits(24, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_32_FLOAT:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_X24_8_32_FLOAT:
+            GetCompBits(32, 8, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_INVALID:
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+        default:
+            ADDR_ASSERT(0);
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+    }
+
+    switch (format)
+    {
+        case ADDR_DEPTH_16:
+            pInfo->numType [0] = ADDR_UNORM_R6XX;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_8_24:
+            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        case ADDR_DEPTH_8_24_FLOAT:
+            pInfo->numType [0] = ADDR_U4FLOATC;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        case ADDR_DEPTH_X8_24:
+            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_X8_24_FLOAT:
+            pInfo->numType [0] = ADDR_U4FLOATC;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_32_FLOAT:
+            pInfo->numType [0] = ADDR_S8FLOAT32;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_X24_8_32_FLOAT:
+            pInfo->numType [0] = ADDR_S8FLOAT32;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        default:
+            pInfo->numType [0] = ADDR_NO_NUMBER;
+            pInfo->numType [1] = ADDR_NO_NUMBER;
+            break;
+    }
+
+    pInfo->numType [2] = ADDR_NO_NUMBER;
+    pInfo->numType [3] = ADDR_NO_NUMBER;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::PixGetExportNorm
+*
+*   @brief
+*       Check if fp16 export norm can be enabled.
+*
+*   @return
+*       TRUE if this can be enabled.
+*
+****************************************************************************************************
+*/
+BOOL_32 ElemLib::PixGetExportNorm(
+    AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
+    AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
+    AddrSurfaceSwap     swap            ///< [in] components swap type
+    ) const
+{
+    BOOL_32 enabled = TRUE;
+
+    PixelFormatInfo formatInfo;
+
+    PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
+
+    for (UINT_32 c = 0; c < 4; c++)
+    {
+        if (m_fp16ExportNorm)
+        {
+            if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
+                (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
+                (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
+                (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
+                (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
+                (formatInfo.numType[c] != ADDR_U3FLOATM))
+            {
+                enabled = FALSE;
+                break;
+            }
+        }
+        else
+        {
+            if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
+            {
+                enabled = FALSE;
+                break;
+            }
+        }
+    }
+
+    return enabled;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::AdjustSurfaceInfo
+*
+*   @brief
+*       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::AdjustSurfaceInfo(
+    ElemMode        elemMode,       ///< [in] element mode
+    UINT_32         expandX,        ///< [in] decompression expansion factor in X
+    UINT_32         expandY,        ///< [in] decompression expansion factor in Y
+    UINT_32*        pBpp,           ///< [in,out] bpp
+    UINT_32*        pBasePitch,     ///< [in,out] base pitch
+    UINT_32*        pWidth,         ///< [in,out] width
+    UINT_32*        pHeight)        ///< [in,out] height
+{
+    UINT_32 packedBits;
+    UINT_32 basePitch;
+    UINT_32 width;
+    UINT_32 height;
+    UINT_32 bpp;
+    BOOL_32 bBCnFormat = FALSE;
+
+    ADDR_ASSERT(pBpp != NULL);
+    ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
+
+    if (pBpp)
+    {
+        bpp = *pBpp;
+
+        switch (elemMode)
+        {
+            case ADDR_EXPANDED:
+                packedBits = bpp / expandX / expandY;
+                break;
+            case ADDR_PACKED_STD: // Different bit order
+            case ADDR_PACKED_REV:
+                packedBits = bpp * expandX * expandY;
+                break;
+            case ADDR_PACKED_GBGR:
+            case ADDR_PACKED_BGRG:
+                packedBits = bpp; // 32-bit packed ==> 2 32-bit result
+                break;
+            case ADDR_PACKED_BC1: // Fall through
+            case ADDR_PACKED_BC4:
+                packedBits = 64;
+                bBCnFormat = TRUE;
+                break;
+            case ADDR_PACKED_BC2: // Fall through
+            case ADDR_PACKED_BC3: // Fall through
+            case ADDR_PACKED_BC5: // Fall through
+                bBCnFormat = TRUE;
+                // fall through
+            case ADDR_PACKED_ASTC:
+            case ADDR_PACKED_ETC2_128BPP:
+                packedBits = 128;
+                break;
+            case ADDR_PACKED_ETC2_64BPP:
+                packedBits = 64;
+                break;
+            case ADDR_ROUND_BY_HALF:  // Fall through
+            case ADDR_ROUND_TRUNCATE: // Fall through
+            case ADDR_ROUND_DITHER:   // Fall through
+            case ADDR_UNCOMPRESSED:
+                packedBits = bpp;
+                break;
+            default:
+                packedBits = bpp;
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        *pBpp = packedBits;
+    }
+
+    if (pWidth && pHeight && pBasePitch)
+    {
+        basePitch = *pBasePitch;
+        width     = *pWidth;
+        height    = *pHeight;
+
+        if ((expandX > 1) || (expandY > 1))
+        {
+            if (elemMode == ADDR_EXPANDED)
+            {
+                basePitch *= expandX;
+                width     *= expandX;
+                height    *= expandY;
+            }
+            else
+            {
+                // Evergreen family workaround
+                if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX))
+                {
+                    // For BCn we now pad it to POW2 at the beginning so it is safe to
+                    // divide by 4 directly
+                    basePitch = basePitch / expandX;
+                    width     = width  / expandX;
+                    height    = height / expandY;
+#if DEBUG
+                    width     = (width == 0) ? 1 : width;
+                    height    = (height == 0) ? 1 : height;
+
+                    if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
+                        (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
+                    {
+                        // if this assertion is hit we may have issues if app samples
+                        // rightmost/bottommost pixels
+                        ADDR_ASSERT_ALWAYS();
+                    }
+#endif
+                }
+                else // Not BCn format we still keep old way (FMT_1? No real test yet)
+                {
+                    basePitch = (basePitch + expandX - 1) / expandX;
+                    width     = (width + expandX - 1) / expandX;
+                    height    = (height + expandY - 1) / expandY;
+                }
+            }
+
+            *pBasePitch = basePitch; // 0 is legal value for base pitch.
+            *pWidth     = (width == 0) ? 1 : width;
+            *pHeight    = (height == 0) ? 1 : height;
+        } //if (pWidth && pHeight && pBasePitch)
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::RestoreSurfaceInfo
+*
+*   @brief
+*       Reverse operation of AdjustSurfaceInfo
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::RestoreSurfaceInfo(
+    ElemMode        elemMode,       ///< [in] element mode
+    UINT_32         expandX,        ///< [in] decompression expansion factor in X
+    UINT_32         expandY,        ///< [out] decompression expansion factor in Y
+    UINT_32*        pBpp,           ///< [in,out] bpp
+    UINT_32*        pWidth,         ///< [in,out] width
+    UINT_32*        pHeight)        ///< [in,out] height
+{
+    UINT_32 originalBits;
+    UINT_32 width;
+    UINT_32 height;
+    UINT_32 bpp;
+
+    BOOL_32 bBCnFormat = FALSE;
+    (void)bBCnFormat;
+
+    ADDR_ASSERT(pBpp != NULL);
+    ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
+
+    if (pBpp)
+    {
+        bpp = *pBpp;
+
+        switch (elemMode)
+        {
+        case ADDR_EXPANDED:
+            originalBits = bpp * expandX * expandY;
+            break;
+        case ADDR_PACKED_STD: // Different bit order
+        case ADDR_PACKED_REV:
+            originalBits = bpp / expandX / expandY;
+            break;
+        case ADDR_PACKED_GBGR:
+        case ADDR_PACKED_BGRG:
+            originalBits = bpp; // 32-bit packed ==> 2 32-bit result
+            break;
+        case ADDR_PACKED_BC1: // Fall through
+        case ADDR_PACKED_BC4:
+            originalBits = 64;
+            bBCnFormat = TRUE;
+            break;
+        case ADDR_PACKED_BC2: // Fall through
+        case ADDR_PACKED_BC3: // Fall through
+        case ADDR_PACKED_BC5:
+            bBCnFormat = TRUE;
+            // fall through
+        case ADDR_PACKED_ASTC:
+        case ADDR_PACKED_ETC2_128BPP:
+            originalBits = 128;
+            break;
+        case ADDR_PACKED_ETC2_64BPP:
+            originalBits = 64;
+            break;
+        case ADDR_ROUND_BY_HALF:  // Fall through
+        case ADDR_ROUND_TRUNCATE: // Fall through
+        case ADDR_ROUND_DITHER:   // Fall through
+        case ADDR_UNCOMPRESSED:
+            originalBits = bpp;
+            break;
+        default:
+            originalBits = bpp;
+            ADDR_ASSERT_ALWAYS();
+            break;
+        }
+
+        *pBpp = originalBits;
+    }
+
+    if (pWidth && pHeight)
+    {
+        width    = *pWidth;
+        height   = *pHeight;
+
+        if ((expandX > 1) || (expandY > 1))
+        {
+            if (elemMode == ADDR_EXPANDED)
+            {
+                width /= expandX;
+                height /= expandY;
+            }
+            else
+            {
+                width *= expandX;
+                height *= expandY;
+            }
+        }
+
+        *pWidth  = (width == 0) ? 1 : width;
+        *pHeight = (height == 0) ? 1 : height;
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetBitsPerPixel
+*
+*   @brief
+*       Compute the total bits per element according to a format
+*       code. For compressed formats, this is not the same as
+*       the number of bits per decompressed element.
+*
+*   @return
+*       Bits per pixel
+****************************************************************************************************
+*/
+UINT_32 ElemLib::GetBitsPerPixel(
+    AddrFormat          format,         ///< [in] surface format code
+    ElemMode*           pElemMode,      ///< [out] element mode
+    UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
+    UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
+    UINT_32*            pUnusedBits)    ///< [out] bits unused
+{
+    UINT_32 bpp;
+    UINT_32 expandX = 1;
+    UINT_32 expandY = 1;
+    UINT_32 bitUnused = 0;
+    ElemMode elemMode = ADDR_UNCOMPRESSED; // default value
+
+    switch (format)
+    {
+        case ADDR_FMT_8:
+            bpp = 8;
+            break;
+        case ADDR_FMT_1_5_5_5:
+        case ADDR_FMT_5_6_5:
+        case ADDR_FMT_6_5_5:
+        case ADDR_FMT_8_8:
+        case ADDR_FMT_4_4_4_4:
+        case ADDR_FMT_16:
+            bpp = 16;
+            break;
+        case ADDR_FMT_GB_GR:
+            elemMode = ADDR_PACKED_GBGR;
+            bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
+            expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
+            break;
+        case ADDR_FMT_BG_RG:
+            elemMode = ADDR_PACKED_BGRG;
+            bpp      = m_configFlags.use32bppFor422Fmt ? 32 : 16;
+            expandX  = m_configFlags.use32bppFor422Fmt ? 2 : 1;
+            break;
+        case ADDR_FMT_8_8_8_8:
+        case ADDR_FMT_2_10_10_10:
+        case ADDR_FMT_10_11_11:
+        case ADDR_FMT_11_11_10:
+        case ADDR_FMT_16_16:
+        case ADDR_FMT_32:
+        case ADDR_FMT_24_8:
+            bpp = 32;
+            break;
+        case ADDR_FMT_16_16_16_16:
+        case ADDR_FMT_32_32:
+        case ADDR_FMT_CTX1:
+            bpp = 64;
+            break;
+        case ADDR_FMT_32_32_32_32:
+            bpp = 128;
+            break;
+        case ADDR_FMT_INVALID:
+            bpp = 0;
+            break;
+        case ADDR_FMT_1_REVERSED:
+            elemMode = ADDR_PACKED_REV;
+            expandX = 8;
+            bpp = 1;
+            break;
+        case ADDR_FMT_1:
+            elemMode = ADDR_PACKED_STD;
+            expandX = 8;
+            bpp = 1;
+            break;
+        case ADDR_FMT_4_4:
+        case ADDR_FMT_3_3_2:
+            bpp = 8;
+            break;
+        case ADDR_FMT_5_5_5_1:
+            bpp = 16;
+            break;
+        case ADDR_FMT_32_AS_8:
+        case ADDR_FMT_32_AS_8_8:
+        case ADDR_FMT_8_24:
+        case ADDR_FMT_10_10_10_2:
+        case ADDR_FMT_5_9_9_9_SHAREDEXP:
+            bpp = 32;
+            break;
+        case ADDR_FMT_X24_8_32_FLOAT:
+            bpp = 64;
+            bitUnused = 24;
+            break;
+        case ADDR_FMT_8_8_8:
+            elemMode = ADDR_EXPANDED;
+            bpp = 24;//@@ 8;      // read 3 elements per pixel
+            expandX = 3;
+            break;
+        case ADDR_FMT_16_16_16:
+            elemMode = ADDR_EXPANDED;
+            bpp = 48;//@@ 16;      // read 3 elements per pixel
+            expandX = 3;
+            break;
+        case ADDR_FMT_32_32_32:
+            elemMode = ADDR_EXPANDED;
+            expandX = 3;
+            bpp = 96;//@@ 32;      // read 3 elements per pixel
+            break;
+        case ADDR_FMT_BC1:
+            elemMode = ADDR_PACKED_BC1;
+            expandX = 4;
+            expandY = 4;
+            bpp = 64;
+            break;
+        case ADDR_FMT_BC4:
+            elemMode = ADDR_PACKED_BC4;
+            expandX = 4;
+            expandY = 4;
+            bpp = 64;
+            break;
+        case ADDR_FMT_BC2:
+            elemMode = ADDR_PACKED_BC2;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+        case ADDR_FMT_BC3:
+            elemMode = ADDR_PACKED_BC3;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+        case ADDR_FMT_BC5:
+        case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
+        case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
+            elemMode = ADDR_PACKED_BC5;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+
+        case ADDR_FMT_ETC2_64BPP:
+            elemMode = ADDR_PACKED_ETC2_64BPP;
+            expandX  = 4;
+            expandY  = 4;
+            bpp      = 64;
+            break;
+
+        case ADDR_FMT_ETC2_128BPP:
+            elemMode = ADDR_PACKED_ETC2_128BPP;
+            expandX  = 4;
+            expandY  = 4;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_4x4:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 4;
+            expandY  = 4;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_5x4:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 5;
+            expandY  = 4;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_5x5:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 5;
+            expandY  = 5;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_6x5:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 6;
+            expandY  = 5;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_6x6:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 6;
+            expandY  = 6;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_8x5:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 8;
+            expandY  = 5;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_8x6:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 8;
+            expandY  = 6;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_8x8:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 8;
+            expandY  = 8;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_10x5:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 10;
+            expandY  = 5;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_10x6:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 10;
+            expandY  = 6;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_10x8:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 10;
+            expandY  = 8;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_10x10:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 10;
+            expandY  = 10;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_12x10:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 12;
+            expandY  = 10;
+            bpp      = 128;
+            break;
+
+        case ADDR_FMT_ASTC_12x12:
+            elemMode = ADDR_PACKED_ASTC;
+            expandX  = 12;
+            expandY  = 12;
+            bpp      = 128;
+            break;
+
+        default:
+            bpp = 0;
+            ADDR_ASSERT_ALWAYS();
+            break;
+            // @@ or should this be an error?
+    }
+
+    SafeAssign(pExpandX, expandX);
+    SafeAssign(pExpandY, expandY);
+    SafeAssign(pUnusedBits, bitUnused);
+    SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
+
+    return bpp;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetCompBits
+*
+*   @brief
+*       Set each component's bit size and bit start. And set element mode and number type
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::GetCompBits(
+    UINT_32          c0,        ///< [in] bits of component 0
+    UINT_32          c1,        ///< [in] bits of component 1
+    UINT_32          c2,        ///< [in] bits of component 2
+    UINT_32          c3,        ///< [in] bits of component 3
+    PixelFormatInfo* pInfo,     ///< [out] per component info out
+    ElemMode         elemMode)  ///< [in] element mode
+{
+    pInfo->comps = 0;
+
+    pInfo->compBit[0] = c0;
+    pInfo->compBit[1] = c1;
+    pInfo->compBit[2] = c2;
+    pInfo->compBit[3] = c3;
+
+    pInfo->compStart[0] = 0;
+    pInfo->compStart[1] = c0;
+    pInfo->compStart[2] = c0+c1;
+    pInfo->compStart[3] = c0+c1+c2;
+
+    pInfo->elemMode = elemMode;
+    // still needed since component swap may depend on number of components
+    for (INT i=0; i<4; i++)
+    {
+        if (pInfo->compBit[i] == 0)
+        {
+            pInfo->compStart[i]  = 0;       // all null components start at bit 0
+            pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
+        }
+        else
+        {
+            pInfo->comps++;
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::GetCompBits
+*
+*   @brief
+*       Set the clear color (or clear depth/stencil) for a surface
+*
+*   @note
+*       If clearColor is zero, a default clear value is used in place of comps[4].
+*       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID ElemLib::SetClearComps(
+    ADDR_FLT_32 comps[4],   ///< [in,out] components
+    BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
+    BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
+{
+    INT_32 i;
+
+    // Use default clearvalues if clearColor is disabled
+    if (clearColor == FALSE)
+    {
+        for (i=0; i<3; i++)
+        {
+            comps[i].f = 0.0;
+        }
+        comps[3].f = 1.0;
+    }
+
+    // Otherwise use the (modified) clear value
+    else
+    {
+        for (i=0; i<4; i++)
+        {   // If full precision, use clear value unchanged
+            if (float32)
+            {
+                // Do nothing
+                //comps[i] = comps[i];
+            }
+            // Else if it is a NaN, use the standard NaN value
+            else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
+            {
+                comps[i].u = 0xFFC00000;
+            }
+            // Else reduce the mantissa precision
+            else
+            {
+                comps[i].u = comps[i].u & 0xFFFFF000;
+            }
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::IsBlockCompressed
+*
+*   @brief
+*       TRUE if this is block compressed format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+****************************************************************************************************
+*/
+BOOL_32 ElemLib::IsBlockCompressed(
+    AddrFormat format)  ///< [in] Format
+{
+    return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) ||
+            ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP)));
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::IsCompressed
+*
+*   @brief
+*       TRUE if this is block compressed format or 1 bit format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+****************************************************************************************************
+*/
+BOOL_32 ElemLib::IsCompressed(
+    AddrFormat format)  ///< [in] Format
+{
+    return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::IsExpand3x
+*
+*   @brief
+*       TRUE if this is 3x expand format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+****************************************************************************************************
+*/
+BOOL_32 ElemLib::IsExpand3x(
+    AddrFormat format)  ///< [in] Format
+{
+    BOOL_32 is3x = FALSE;
+
+    switch (format)
+    {
+        case ADDR_FMT_8_8_8:
+        case ADDR_FMT_16_16_16:
+        case ADDR_FMT_32_32_32:
+            is3x = TRUE;
+            break;
+        default:
+            break;
+    }
+
+    return is3x;
+}
+
+/**
+****************************************************************************************************
+*   ElemLib::IsMacroPixelPacked
+*
+*   @brief
+*       TRUE if this is a macro-pixel-packed format.
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+****************************************************************************************************
+*/
+BOOL_32 ElemLib::IsMacroPixelPacked(
+    AddrFormat format)  ///< [in] Format
+{
+    BOOL_32 isMacroPixelPacked = FALSE;
+
+    switch (format)
+    {
+        case ADDR_FMT_BG_RG:
+        case ADDR_FMT_GB_GR:
+            isMacroPixelPacked = TRUE;
+            break;
+        default:
+            break;
+    }
+
+    return isMacroPixelPacked;
+}
+
+}
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.h mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrelemlib.h
+* @brief Contains the class for element/pixel related functions.
+****************************************************************************************************
+*/
+
+#ifndef __ELEM_LIB_H__
+#define __ELEM_LIB_H__
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrcommon.h"
+
+namespace Addr
+{
+
+class Lib;
+
+// The masks for property bits within the Properties INT_32
+union ComponentFlags
+{
+    struct
+    {
+        UINT_32 byteAligned    : 1;    ///< all components are byte aligned
+        UINT_32 exportNorm     : 1;    ///< components support R6xx NORM compression
+        UINT_32 floatComp      : 1;    ///< there is at least one floating point component
+    };
+
+    UINT_32 value;
+};
+
+// Copy from legacy lib's NumberType
+enum NumberType
+{
+    // The following number types have the range [-1..1]
+    ADDR_NO_NUMBER,         // This component doesn't exist and has no default value
+    ADDR_EPSILON,           // Force component value to integer 0x00000001
+    ADDR_ZERO,              // Force component value to integer 0x00000000
+    ADDR_ONE,               // Force component value to floating point 1.0
+    // Above values don't have any bits per component (keep ADDR_ONE the last of these)
+
+    ADDR_UNORM,             // Unsigned normalized (repeating fraction) full precision
+    ADDR_SNORM,             // Signed normalized (repeating fraction) full precision
+    ADDR_GAMMA,             // Gamma-corrected, full precision
+
+    ADDR_UNORM_R5XXRB,      // Unsigned normalized (repeating fraction) for r5xx RB
+    ADDR_SNORM_R5XXRB,      // Signed normalized (repeating fraction) for r5xx RB
+    ADDR_GAMMA_R5XXRB,      // Gamma-corrected for r5xx RB (note: unnormalized value)
+    ADDR_UNORM_R5XXBC,      // Unsigned normalized (repeating fraction) for r5xx BC
+    ADDR_SNORM_R5XXBC,      // Signed normalized (repeating fraction) for r5xx BC
+    ADDR_GAMMA_R5XXBC,      // Gamma-corrected for r5xx BC (note: unnormalized value)
+
+    ADDR_UNORM_R6XX,        // Unsigned normalized (repeating fraction) for R6xx
+    ADDR_UNORM_R6XXDB,      // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
+    ADDR_SNORM_R6XX,        // Signed normalized (repeating fraction) for R6xx
+    ADDR_GAMMA8_R6XX,       // Gamma-corrected for r6xx
+    ADDR_GAMMA8_R7XX_TP,    // Gamma-corrected for r7xx TP 12bit unorm 8.4.
+
+    ADDR_U4FLOATC,          // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
+    ADDR_GAMMA_4SEG,        // Gamma-corrected, four segment approximation
+    ADDR_U0FIXED,           // Unsigned 0.N-bit fixed point
+
+    // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
+    ADDR_USCALED,           // Unsigned integer converted to/from floating point
+    ADDR_SSCALED,           // Signed integer converted to/from floating point
+    ADDR_USCALED_R5XXRB,    // Unsigned integer to/from floating point for r5xx RB
+    ADDR_SSCALED_R5XXRB,    // Signed integer to/from floating point for r5xx RB
+    ADDR_UINT_BITS,         // Keep in unsigned integer form, clamped to specified range
+    ADDR_SINT_BITS,         // Keep in signed integer form, clamped to specified range
+    ADDR_UINTBITS,          // @@ remove Keep in unsigned integer form, use modulus to reduce bits
+    ADDR_SINTBITS,          // @@ remove Keep in signed integer form, use modulus to reduce bits
+
+    // The following number types and ADDR_U4FLOATC have exponents
+    // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
+    ADDR_S8FLOAT,           // Signed floating point with 8-bit exponent, bias=127
+    ADDR_S8FLOAT32,         // 32-bit IEEE float, passes through NaN values
+    ADDR_S5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
+    ADDR_S5FLOATM,          // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
+    ADDR_U5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
+    ADDR_U3FLOATM,          // Unsigned floating point with 3-bit exponent, bias=3
+
+    ADDR_S5FIXED,           // Signed 5.N-bit fixed point, with rounding
+
+    ADDR_END_NUMBER         // Used for range comparisons
+};
+
+// Copy from legacy lib's AddrElement
+enum ElemMode
+{
+    // These formats allow both packing an unpacking
+    ADDR_ROUND_BY_HALF,      // add 1/2 and truncate when packing this element
+    ADDR_ROUND_TRUNCATE,     // truncate toward 0 for sign/mag, else toward neg
+    ADDR_ROUND_DITHER,       // Pack by dithering -- requires (x,y) position
+
+    // These formats only allow unpacking, no packing
+    ADDR_UNCOMPRESSED,       // Elements are not compressed: one data element per pixel/texel
+    ADDR_EXPANDED,           // Elements are split up and stored in multiple data elements
+    ADDR_PACKED_STD,         // Elements are compressed into ExpandX by ExpandY data elements
+    ADDR_PACKED_REV,         // Like ADDR_PACKED, but X order of pixels is reverved
+    ADDR_PACKED_GBGR,        // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
+    ADDR_PACKED_BGRG,        // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
+    ADDR_PACKED_BC1,         // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC2,         // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC3,         // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC4,         // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC5,         // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_ETC2_64BPP,  // ETC2 formats that use 64bpp to represent each 4x4 block
+    ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block
+    ADDR_PACKED_ASTC,        // Various ASTC formats, all are 128bpp with varying block sizes
+
+    // These formats provide various kinds of compression
+    ADDR_ZPLANE_R5XX,        // Compressed Zplane using r5xx architecture format
+    ADDR_ZPLANE_R6XX,        // Compressed Zplane using r6xx architecture format
+    //@@ Fill in the compression modes
+
+    ADDR_END_ELEMENT         // Used for range comparisons
+};
+
+enum DepthPlanarType
+{
+    ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
+    ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
+    ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
+};
+
+/**
+****************************************************************************************************
+*   PixelFormatInfo
+*
+*   @brief
+*       Per component info
+*
+****************************************************************************************************
+*/
+struct PixelFormatInfo
+{
+    UINT_32             compBit[4];
+    NumberType          numType[4];
+    UINT_32             compStart[4];
+    ElemMode            elemMode;
+    UINT_32             comps;          ///< Number of components
+};
+
+/**
+****************************************************************************************************
+* @brief This class contains asic indepentent element related attributes and operations
+****************************************************************************************************
+*/
+class ElemLib : public Object
+{
+protected:
+    ElemLib(Lib* pAddrLib);
+
+public:
+
+    /// Makes this class virtual
+    virtual ~ElemLib();
+
+    static ElemLib* Create(
+        const Lib* pAddrLib);
+
+    /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
+    BOOL_32 PixGetExportNorm(
+        AddrColorFormat colorFmt,
+        AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
+
+    /// Below method are asic independent, so make them just static.
+    /// Remove static if we need different operation in hwl.
+
+    VOID    Flt32ToDepthPixel(
+        AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
+
+    VOID    Flt32ToColorPixel(
+        AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
+        const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
+
+    static VOID    Flt32sToInt32s(
+        ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult);
+
+    static VOID    Int32sToPixel(
+        UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
+        ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel);
+
+    VOID    PixGetColorCompInfo(
+        AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
+        PixelFormatInfo* pInfo) const;
+
+    VOID    PixGetDepthCompInfo(
+        AddrDepthFormat format, PixelFormatInfo* pInfo) const;
+
+    UINT_32 GetBitsPerPixel(
+        AddrFormat format, ElemMode* pElemMode = NULL,
+        UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
+
+    static VOID    SetClearComps(
+        ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
+
+    VOID    AdjustSurfaceInfo(
+        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+        UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
+
+    VOID    RestoreSurfaceInfo(
+        ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+        UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
+
+    /// Checks if depth and stencil are planar inside a tile
+    BOOL_32 IsDepthStencilTilePlanar()
+    {
+        return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
+    }
+
+    /// Sets m_configFlags, copied from AddrLib
+    VOID    SetConfigFlags(ConfigFlags flags)
+    {
+        m_configFlags = flags;
+    }
+
+    static BOOL_32 IsCompressed(AddrFormat format);
+    static BOOL_32 IsBlockCompressed(AddrFormat format);
+    static BOOL_32 IsExpand3x(AddrFormat format);
+    static BOOL_32 IsMacroPixelPacked(AddrFormat format);
+
+protected:
+
+    static VOID    GetCompBits(
+        UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
+        PixelFormatInfo* pInfo,
+        ElemMode elemMode = ADDR_ROUND_BY_HALF);
+
+    static VOID    GetCompType(
+        AddrColorFormat format, AddrSurfaceNumber numType,
+        PixelFormatInfo* pInfo);
+
+    static VOID    GetCompSwap(
+        AddrSurfaceSwap swap, PixelFormatInfo* pInfo);
+
+    static VOID    SwapComps(
+        UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo);
+
+private:
+
+    UINT_32             m_fp16ExportNorm;   ///< If allow FP16 to be reported as EXPORT_NORM
+    DepthPlanarType     m_depthPlanarType;
+
+    ConfigFlags         m_configFlags;      ///< Copy of AddrLib's configFlags
+    Addr::Lib* const    m_pAddrLib;         ///< Pointer to parent addrlib instance
+};
+
+} //Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,4061 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addr1lib.cpp
+* @brief Contains the implementation for the Addr::V1::Lib base class.
+****************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrlib1.h"
+#include "addrcommon.h"
+
+namespace Addr
+{
+namespace V1
+{
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Static Const Member
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+const TileModeFlags Lib::ModeFlags[ADDR_TM_COUNT] =
+{// T   L  1  2  3  P  Pr B
+    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
+    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
+    {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
+    {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
+    {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
+    {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
+    {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
+    {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
+    {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
+    {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
+    {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
+    {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
+    {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
+    {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
+    {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
+    {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
+    {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
+    {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
+    {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
+    {0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_UNKNOWN
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Constructor/Destructor
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::AddrLib1
+*
+*   @brief
+*       Constructor for the AddrLib1 class
+*
+****************************************************************************************************
+*/
+Lib::Lib()
+    :
+    Addr::Lib()
+{
+}
+
+/**
+****************************************************************************************************
+*   Lib::Lib
+*
+*   @brief
+*       Constructor for the Addr::V1::Lib class with hClient as parameter
+*
+****************************************************************************************************
+*/
+Lib::Lib(const Client* pClient)
+    :
+    Addr::Lib(pClient)
+{
+}
+
+/**
+****************************************************************************************************
+*   Lib::~AddrLib1
+*
+*   @brief
+*       Destructor for the AddrLib1 class
+*
+****************************************************************************************************
+*/
+Lib::~Lib()
+{
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetLib
+*
+*   @brief
+*       Get AddrLib1 pointer
+*
+*   @return
+*      An Addr::V1::Lib class pointer
+****************************************************************************************************
+*/
+Lib* Lib::GetLib(
+    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
+{
+    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
+    if ((pAddrLib != NULL) &&
+        ((pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_IVLD) ||
+         (pAddrLib->GetChipFamily() > ADDR_CHIP_FAMILY_VI)))
+    {
+        // only valid and pre-VI ASIC can use AddrLib1 function.
+        ADDR_ASSERT_ALWAYS();
+        hLib = NULL;
+    }
+    return static_cast<Lib*>(hLib);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Surface Methods
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
+     const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    // We suggest client do sanity check but a check here is also good
+    if (pIn->bpp > 128)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if ((pIn->tileMode == ADDR_TM_UNKNOWN) && (pIn->mipLevel > 0))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    // Thick modes don't support multisample
+    if ((Thickness(pIn->tileMode) > 1) && (pIn->numSamples > 1))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        // Get a local copy of input structure and only reference pIn for unadjusted values
+        ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+        ADDR_TILEINFO tileInfoNull = {0};
+
+        if (UseTileInfo())
+        {
+            // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
+            // Otherwise the default 0's in tileInfoNull are used.
+            if (pIn->pTileInfo)
+            {
+                tileInfoNull = *pIn->pTileInfo;
+            }
+            localIn.pTileInfo  = &tileInfoNull;
+        }
+
+        localIn.numSamples = (pIn->numSamples == 0) ? 1 : pIn->numSamples;
+
+        // Do mipmap check first
+        // If format is BCn, pre-pad dimension to power-of-two according to HWL
+        ComputeMipLevel(&localIn);
+
+        if (m_configFlags.checkLast2DLevel)
+        {
+            // Save this level's original height in pixels
+            pOut->height = pIn->height;
+        }
+
+        UINT_32 expandX = 1;
+        UINT_32 expandY = 1;
+        ElemMode elemMode;
+
+        // Save outputs that may not go through HWL
+        pOut->pixelBits = localIn.bpp;
+        pOut->numSamples = localIn.numSamples;
+        pOut->last2DLevel = FALSE;
+        pOut->tcCompatible = FALSE;
+
+#if !ALT_TEST
+        if (localIn.numSamples > 1)
+        {
+            ADDR_ASSERT(localIn.mipLevel == 0);
+        }
+#endif
+
+        if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
+        {
+            // Get compression/expansion factors and element mode
+            // (which indicates compression/expansion
+            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
+                                                        &elemMode,
+                                                        &expandX,
+                                                        &expandY);
+
+            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
+            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
+            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
+            // restrictions are different.
+            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
+            // but we use this flag to skip RestoreSurfaceInfo below
+
+            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
+            {
+                ADDR_ASSERT(IsLinear(localIn.tileMode));
+            }
+
+            GetElemLib()->AdjustSurfaceInfo(elemMode,
+                                            expandX,
+                                            expandY,
+                                            &localIn.bpp,
+                                            &localIn.basePitch,
+                                            &localIn.width,
+                                            &localIn.height);
+
+            // Overwrite these parameters if we have a valid format
+        }
+        else if (localIn.bpp != 0)
+        {
+            localIn.width  = (localIn.width != 0) ? localIn.width : 1;
+            localIn.height = (localIn.height != 0) ? localIn.height : 1;
+        }
+        else // Rule out some invalid parameters
+        {
+            ADDR_ASSERT_ALWAYS();
+
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+
+        // Check mipmap after surface expansion
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = PostComputeMipLevel(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (UseTileIndex(localIn.tileIndex))
+            {
+                // Make sure pTileInfo is not NULL
+                ADDR_ASSERT(localIn.pTileInfo);
+
+                UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);
+
+                INT_32 macroModeIndex = TileIndexNoMacroIndex;
+
+                if (localIn.tileIndex != TileIndexLinearGeneral)
+                {
+                    // Try finding a macroModeIndex
+                    macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
+                                                              localIn.flags,
+                                                              localIn.bpp,
+                                                              numSamples,
+                                                              localIn.pTileInfo,
+                                                              &localIn.tileMode,
+                                                              &localIn.tileType);
+                }
+
+                // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+                if (macroModeIndex == TileIndexNoMacroIndex)
+                {
+                    returnCode = HwlSetupTileCfg(localIn.bpp,
+                                                 localIn.tileIndex, macroModeIndex,
+                                                 localIn.pTileInfo,
+                                                 &localIn.tileMode, &localIn.tileType);
+                }
+                // If macroModeIndex is invalid, then assert this is not macro tiled
+                else if (macroModeIndex == TileIndexInvalid)
+                {
+                    ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
+                }
+
+                pOut->macroModeIndex = macroModeIndex;
+            }
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            localIn.flags.dccPipeWorkaround = localIn.flags.dccCompatible;
+
+            if (localIn.tileMode == ADDR_TM_UNKNOWN)
+            {
+                // HWL layer may override tile mode if necessary
+                HwlSelectTileMode(&localIn);
+            }
+            else
+            {
+                // HWL layer may override tile mode if necessary
+                HwlOverrideTileMode(&localIn);
+
+                // Optimize tile mode if possible
+                OptimizeTileMode(&localIn);
+            }
+        }
+
+        // Call main function to compute surface info
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            // Since bpp might be changed we just pass it through
+            pOut->bpp  = localIn.bpp;
+
+            // Also original width/height/bpp
+            pOut->pixelPitch    = pOut->pitch;
+            pOut->pixelHeight   = pOut->height;
+
+#if DEBUG
+            if (localIn.flags.display)
+            {
+                ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
+            }
+#endif //DEBUG
+
+            if (localIn.format != ADDR_FMT_INVALID)
+            {
+                //
+                // Note: For 96 bit surface, the pixelPitch returned might be an odd number, but it
+                // is okay to program texture pitch as HW's mip calculator would multiply 3 first,
+                // then do the appropriate paddings (linear alignment requirement and possible the
+                // nearest power-of-two for mipmaps), which results in the original pitch.
+                //
+                GetElemLib()->RestoreSurfaceInfo(elemMode,
+                                                 expandX,
+                                                 expandY,
+                                                 &localIn.bpp,
+                                                 &pOut->pixelPitch,
+                                                 &pOut->pixelHeight);
+            }
+
+            if (localIn.flags.qbStereo)
+            {
+                if (pOut->pStereoInfo)
+                {
+                    ComputeQbStereoInfo(pOut);
+                }
+            }
+
+            if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
+            {
+                pOut->sliceSize = pOut->surfSize;
+            }
+            else // For array: sliceSize is likely to have slice-padding (the last one)
+            {
+                pOut->sliceSize = pOut->surfSize / pOut->depth;
+
+                // array or cubemap
+                if (pIn->numSlices > 1)
+                {
+                    // If this is the last slice then add the padding size to this slice
+                    if (pIn->slice == (pIn->numSlices - 1))
+                    {
+                        pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
+                    }
+                    else if (m_configFlags.checkLast2DLevel)
+                    {
+                        // Reset last2DLevel flag if this is not the last array slice
+                        pOut->last2DLevel = FALSE;
+                    }
+                }
+            }
+
+            pOut->pitchTileMax = pOut->pitch / 8 - 1;
+            pOut->heightTileMax = pOut->height / 8 - 1;
+            pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
+        }
+    }
+
+    ValidBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            const ADDR_SURFACE_FLAGS flags = {{0}};
+            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+                                                             flags,
+                                                             input.bpp,
+                                                             numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode,
+                                                             &input.tileType);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode, &input.tileType);
+            }
+            // If macroModeIndex is invalid, then assert this is not macro tiled
+            else if (macroModeIndex == TileIndexInvalid)
+            {
+                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+            }
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);
+
+            if (returnCode == ADDR_OK)
+            {
+                pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeSurfaceCoordFromAddr.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            const ADDR_SURFACE_FLAGS flags = {{0}};
+            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+                                                             flags,
+                                                             input.bpp,
+                                                             numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode,
+                                                             &input.tileType);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode, &input.tileType);
+            }
+            // If macroModeIndex is invalid, then assert this is not macro tiled
+            else if (macroModeIndex == TileIndexInvalid)
+            {
+                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+            }
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSliceTileSwizzle
+*
+*   @brief
+*       Interface function stub of ComputeSliceTileSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSliceTileSwizzle(
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SLICESWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex,
+                                         input.pTileInfo, &input.tileMode);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ExtractBankPipeSwizzle
+*
+*   @brief
+*       Interface function stub of AddrExtractBankPipeSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ExtractBankPipeSwizzle(
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::CombineBankPipeSwizzle
+*
+*   @brief
+*       Interface function stub of AddrCombineBankPipeSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::CombineBankPipeSwizzle(
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
+                                                   pIn->pipeSwizzle,
+                                                   pIn->pTileInfo,
+                                                   pIn->baseAddr,
+                                                   &pOut->tileSwizzle);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeBaseSwizzle
+*
+*   @brief
+*       Interface function stub of AddrCompueBaseSwizzle.
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeBaseSwizzle(
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (IsMacroTiled(pIn->tileMode))
+            {
+                returnCode = HwlComputeBaseSwizzle(pIn, pOut);
+            }
+            else
+            {
+                pOut->tileSwizzle = 0;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeFmaskInfo
+*
+*   @brief
+*       Interface function stub of ComputeFmaskInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    // No thick MSAA
+    if (Thickness(pIn->tileMode) > 1)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_FMASK_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+
+            if (pOut->pTileInfo)
+            {
+                // Use temp tile info for calcalation
+                input.pTileInfo = pOut->pTileInfo;
+            }
+            else
+            {
+                input.pTileInfo = &tileInfoNull;
+            }
+
+            ADDR_SURFACE_FLAGS flags = {{0}};
+            flags.fmask = 1;
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
+                                                             flags,
+                                                             HwlComputeFmaskBits(pIn, NULL),
+                                                             pIn->numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(0, input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode);
+            }
+
+            ADDR_ASSERT(macroModeIndex != TileIndexInvalid);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->numSamples > 1)
+            {
+                returnCode = HwlComputeFmaskInfo(pIn, pOut);
+            }
+            else
+            {
+                memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
+
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+        }
+    }
+
+    ValidBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_ASSERT(pIn->numSamples > 1);
+
+        if (pIn->numSamples > 1)
+        {
+            returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
+        }
+        else
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut           ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_ASSERT(pIn->numSamples > 1);
+
+        if (pIn->numSamples > 1)
+        {
+            returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
+        }
+        else
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to HW register value in HW layer
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_CONVERT_TILEINFOTOHW_INPUT input;
+        // if pIn->reverse is TRUE, indices are ignored
+        if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex,
+                                         input.macroModeIndex, input.pTileInfo);
+
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlConvertTileInfoToHW(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ConvertTileIndex(
+    const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+
+        returnCode = HwlSetupTileCfg(pIn->bpp, pIn->tileIndex, pIn->macroModeIndex,
+                                     pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+        if (returnCode == ADDR_OK && pIn->tileInfoHw)
+        {
+            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+            hwInput.pTileInfo = pOut->pTileInfo;
+            hwInput.tileIndex = -1;
+            hwOutput.pTileInfo = pOut->pTileInfo;
+
+            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetMacroModeIndex
+*
+*   @brief
+*       Get macro mode index based on input info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::GetMacroModeIndex(
+    const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input structure
+    ADDR_GET_MACROMODEINDEX_OUTPUT*      pOut ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags())
+    {
+        if ((pIn->size != sizeof(ADDR_GET_MACROMODEINDEX_INPUT)) ||
+            (pOut->size != sizeof(ADDR_GET_MACROMODEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfo = {0};
+        pOut->macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, pIn->flags, pIn->bpp,
+                                                        pIn->numFrags, &tileInfo);
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ConvertTileIndex1(
+    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,   ///< [in] input structure
+    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut         ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_SURFACE_FLAGS flags = {{0}};
+
+        HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
+                                 pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+        if (pIn->tileInfoHw)
+        {
+            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+            hwInput.pTileInfo = pOut->pTileInfo;
+            hwInput.tileIndex = -1;
+            hwOutput.pTileInfo = pOut->pTileInfo;
+
+            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetTileIndex
+*
+*   @brief
+*       Get tile index from tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::GetTileIndex(
+    const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
+    ADDR_GET_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
+            (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        returnCode = HwlGetTileIndex(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::Thickness
+*
+*   @brief
+*       Get tile mode thickness
+*
+*   @return
+*       Tile mode thickness
+****************************************************************************************************
+*/
+UINT_32 Lib::Thickness(
+    AddrTileMode tileMode)    ///< [in] tile mode
+{
+    return ModeFlags[tileMode].thickness;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               CMASK/HTILE
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::ComputeHtileInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeHtilenfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->flags.tcCompatible)
+            {
+                const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8);
+                const UINT_32 align     = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes;
+
+                if (pIn->numSlices > 1)
+                {
+                    const UINT_32 surfBytes = (sliceSize * pIn->numSlices);
+
+                    pOut->sliceSize        = sliceSize;
+                    pOut->htileBytes       = pIn->flags.skipTcCompatSizeAlign ?
+                                             surfBytes : PowTwoAlign(surfBytes, align);
+                    pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE;
+                }
+                else
+                {
+                    pOut->sliceSize        = pIn->flags.skipTcCompatSizeAlign ?
+                                             sliceSize : PowTwoAlign(sliceSize, align);
+                    pOut->htileBytes       = pOut->sliceSize;
+                    pOut->sliceInterleaved = FALSE;
+                }
+
+                pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE;
+
+                pOut->pitch       = pIn->pitch;
+                pOut->height      = pIn->height;
+                pOut->baseAlign   = align;
+                pOut->macroWidth  = 0;
+                pOut->macroHeight = 0;
+                pOut->bpp         = 32;
+            }
+            else
+            {
+                pOut->bpp = ComputeHtileInfo(pIn->flags,
+                                             pIn->pitch,
+                                             pIn->height,
+                                             pIn->numSlices,
+                                             pIn->isLinear,
+                                             isWidth8,
+                                             isHeight8,
+                                             pIn->pTileInfo,
+                                             &pOut->pitch,
+                                             &pOut->height,
+                                             &pOut->htileBytes,
+                                             &pOut->macroWidth,
+                                             &pOut->macroHeight,
+                                             &pOut->sliceSize,
+                                             &pOut->baseAlign);
+            }
+        }
+    }
+
+    ValidMetaBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskInfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = ComputeCmaskInfo(pIn->flags,
+                                          pIn->pitch,
+                                          pIn->height,
+                                          pIn->numSlices,
+                                          pIn->isLinear,
+                                          pIn->pTileInfo,
+                                          &pOut->pitch,
+                                          &pOut->height,
+                                          &pOut->cmaskBytes,
+                                          &pOut->macroWidth,
+                                          &pOut->macroHeight,
+                                          &pOut->sliceSize,
+                                          &pOut->baseAlign,
+                                          &pOut->blockMax);
+        }
+    }
+
+    ValidMetaBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeDccInfo
+*
+*   @brief
+*       Interface function to compute DCC key info
+*
+*   @return
+*       return code of HwlComputeDccInfo
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeDccInfo(
+    const ADDR_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE ret = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
+        {
+            ret = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (ret == ADDR_OK)
+    {
+        ADDR_COMPUTE_DCCINFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+
+            ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex,
+                                  &input.tileInfo, &input.tileMode);
+
+            pIn = &input;
+        }
+
+        if (ret == ADDR_OK)
+        {
+            ret = HwlComputeDccInfo(pIn, pOut);
+
+            ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
+        }
+    }
+
+    return ret;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeHtileAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->flags.tcCompatible)
+            {
+                HwlComputeHtileAddrFromCoord(pIn, pOut);
+            }
+            else
+            {
+                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+                                                          pIn->height,
+                                                          pIn->x,
+                                                          pIn->y,
+                                                          pIn->slice,
+                                                          pIn->numSlices,
+                                                          1,
+                                                          pIn->isLinear,
+                                                          isWidth8,
+                                                          isHeight8,
+                                                          pIn->pTileInfo,
+                                                          &pOut->bitPosition);
+            }
+        }
+    }
+
+    return returnCode;
+
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeHtileCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            HwlComputeXmaskCoordFromAddr(pIn->addr,
+                                         pIn->bitPosition,
+                                         pIn->pitch,
+                                         pIn->height,
+                                         pIn->numSlices,
+                                         1,
+                                         pIn->isLinear,
+                                         isWidth8,
+                                         isHeight8,
+                                         pIn->pTileInfo,
+                                         &pOut->x,
+                                         &pOut->y,
+                                         &pOut->slice);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->flags.tcCompatible == TRUE)
+            {
+                returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
+            }
+            else
+            {
+                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+                                                          pIn->height,
+                                                          pIn->x,
+                                                          pIn->y,
+                                                          pIn->slice,
+                                                          pIn->numSlices,
+                                                          2,
+                                                          pIn->isLinear,
+                                                          FALSE, //this is cmask, isWidth8 is not needed
+                                                          FALSE, //this is cmask, isHeight8 is not needed
+                                                          pIn->pTileInfo,
+                                                          &pOut->bitPosition);
+            }
+
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            HwlComputeXmaskCoordFromAddr(pIn->addr,
+                                         pIn->bitPosition,
+                                         pIn->pitch,
+                                         pIn->height,
+                                         pIn->numSlices,
+                                         2,
+                                         pIn->isLinear,
+                                         FALSE,
+                                         FALSE,
+                                         pIn->pTileInfo,
+                                         &pOut->x,
+                                         &pOut->y,
+                                         &pOut->slice);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeTileDataWidthAndHeight
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE)
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+****************************************************************************************************
+*/
+VOID Lib::ComputeTileDataWidthAndHeight(
+    UINT_32         bpp,             ///< [in] bits per pixel
+    UINT_32         cacheBits,       ///< [in] bits of cache
+    ADDR_TILEINFO*  pTileInfo,       ///< [in] Tile info
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight     ///< [out] macro tile height
+    ) const
+{
+    UINT_32 height = 1;
+    UINT_32 width  = cacheBits / bpp;
+    UINT_32 pipes  = HwlGetPipes(pTileInfo);
+
+    // Double height until the macro-tile is close to square
+    // Height can only be doubled if width is even
+
+    while ((width > height * 2 * pipes) && !(width & 1))
+    {
+        width  /= 2;
+        height *= 2;
+    }
+
+    *pMacroWidth  = 8 * width;
+    *pMacroHeight = 8 * height * pipes;
+
+    // Note: The above iterative comptuation is equivalent to the following
+    //
+    //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
+    //int macroHeight = pow2( 3+log2(pipes)+log2_height );
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+****************************************************************************************************
+*/
+VOID Lib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(bpp != 4);              // Cmask does not support linear layout prior to SI
+    *pMacroWidth  = 8 * 512 / bpp;      // Align width to 512-bit memory accesses
+    *pMacroHeight = 8 * m_pipes;        // Align height to number of pipes
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeHtileInfo
+*
+*   @brief
+*       Compute htile pitch,width, bytes per 2D slice
+*
+*   @return
+*       Htile bpp i.e. How many bits for an 8x8 tile
+*       Also returns by output parameters:
+*       *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
+****************************************************************************************************
+*/
+UINT_32 Lib::ComputeHtileInfo(
+    ADDR_HTILE_FLAGS flags,             ///< [in] htile flags
+    UINT_32          pitchIn,           ///< [in] pitch input
+    UINT_32          heightIn,          ///< [in] height input
+    UINT_32          numSlices,         ///< [in] number of slices
+    BOOL_32          isLinear,          ///< [in] if it is linear mode
+    BOOL_32          isWidth8,          ///< [in] if htile block width is 8
+    BOOL_32          isHeight8,         ///< [in] if htile block height is 8
+    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
+    UINT_32*         pPitchOut,         ///< [out] pitch output
+    UINT_32*         pHeightOut,        ///< [out] height output
+    UINT_64*         pHtileBytes,       ///< [out] bytes per 2D slice
+    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
+    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
+    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
+    UINT_32*         pBaseAlign         ///< [out] base alignment
+    ) const
+{
+
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_32 baseAlign;
+    UINT_64 surfBytes;
+    UINT_64 sliceBytes;
+
+    numSlices = Max(1u, numSlices);
+
+    const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
+    const UINT_32 cacheBits = HtileCacheBits;
+
+    if (isLinear)
+    {
+        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
+                                               &macroHeight,
+                                               bpp,
+                                               pTileInfo);
+    }
+    else
+    {
+        ComputeTileDataWidthAndHeight(bpp,
+                                      cacheBits,
+                                      pTileInfo,
+                                      &macroWidth,
+                                      &macroHeight);
+    }
+
+    *pPitchOut = PowTwoAlign(pitchIn,  macroWidth);
+    *pHeightOut = PowTwoAlign(heightIn,  macroHeight);
+
+    baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);
+
+    surfBytes = HwlComputeHtileBytes(*pPitchOut,
+                                     *pHeightOut,
+                                     bpp,
+                                     isLinear,
+                                     numSlices,
+                                     &sliceBytes,
+                                     baseAlign);
+
+    *pHtileBytes = surfBytes;
+
+    //
+    // Use SafeAssign since they are optional
+    //
+    SafeAssign(pMacroWidth, macroWidth);
+
+    SafeAssign(pMacroHeight, macroHeight);
+
+    SafeAssign(pSliceSize,  sliceBytes);
+
+    SafeAssign(pBaseAlign, baseAlign);
+
+    return bpp;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskBaseAlign
+*
+*   @brief
+*       Compute cmask base alignment
+*
+*   @return
+*       Cmask base alignment
+****************************************************************************************************
+*/
+UINT_32 Lib::ComputeCmaskBaseAlign(
+    ADDR_CMASK_FLAGS flags,           ///< [in] Cmask flags
+    ADDR_TILEINFO*   pTileInfo        ///< [in] Tile info
+    ) const
+{
+    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+    if (flags.tcCompatible)
+    {
+        ADDR_ASSERT(pTileInfo != NULL);
+        if (pTileInfo)
+        {
+            baseAlign *= pTileInfo->banks;
+        }
+    }
+
+    return baseAlign;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskBytes
+*
+*   @brief
+*       Compute cmask size in bytes
+*
+*   @return
+*       Cmask size in bytes
+****************************************************************************************************
+*/
+UINT_64 Lib::ComputeCmaskBytes(
+    UINT_32 pitch,        ///< [in] pitch
+    UINT_32 height,       ///< [in] height
+    UINT_32 numSlices     ///< [in] number of slices
+    ) const
+{
+    return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) /
+        MicroTilePixels;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeCmaskInfo
+*
+*   @brief
+*       Compute cmask pitch,width, bytes per 2D slice
+*
+*   @return
+*       BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
+*       macro-tile dimensions
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
+    ADDR_CMASK_FLAGS flags,            ///< [in] cmask flags
+    UINT_32          pitchIn,           ///< [in] pitch input
+    UINT_32          heightIn,          ///< [in] height input
+    UINT_32          numSlices,         ///< [in] number of slices
+    BOOL_32          isLinear,          ///< [in] is linear mode
+    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
+    UINT_32*         pPitchOut,         ///< [out] pitch output
+    UINT_32*         pHeightOut,        ///< [out] height output
+    UINT_64*         pCmaskBytes,       ///< [out] bytes per 2D slice
+    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
+    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
+    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
+    UINT_32*         pBaseAlign,        ///< [out] base alignment
+    UINT_32*         pBlockMax          ///< [out] block max == slice / 128 / 128 - 1
+    ) const
+{
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_32 baseAlign;
+    UINT_64 surfBytes;
+    UINT_64 sliceBytes;
+
+    numSlices = Max(1u, numSlices);
+
+    const UINT_32 bpp = CmaskElemBits;
+    const UINT_32 cacheBits = CmaskCacheBits;
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (isLinear)
+    {
+        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
+                                               &macroHeight,
+                                               bpp,
+                                               pTileInfo);
+    }
+    else
+    {
+        ComputeTileDataWidthAndHeight(bpp,
+                                      cacheBits,
+                                      pTileInfo,
+                                      &macroWidth,
+                                      &macroHeight);
+    }
+
+    *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
+    *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);
+
+    sliceBytes = ComputeCmaskBytes(*pPitchOut,
+                                   *pHeightOut,
+                                   1);
+
+    baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);
+
+    while (sliceBytes % baseAlign)
+    {
+        *pHeightOut += macroHeight;
+
+        sliceBytes = ComputeCmaskBytes(*pPitchOut,
+                                       *pHeightOut,
+                                       1);
+    }
+
+    surfBytes = sliceBytes * numSlices;
+
+    *pCmaskBytes = surfBytes;
+
+    //
+    // Use SafeAssign since they are optional
+    //
+    SafeAssign(pMacroWidth, macroWidth);
+
+    SafeAssign(pMacroHeight, macroHeight);
+
+    SafeAssign(pBaseAlign, baseAlign);
+
+    SafeAssign(pSliceSize, sliceBytes);
+
+    UINT_32 slice = (*pPitchOut) * (*pHeightOut);
+    UINT_32 blockMax = slice / 128 / 128 - 1;
+
+#if DEBUG
+    if (slice % (64*256) != 0)
+    {
+        ADDR_ASSERT_ALWAYS();
+    }
+#endif //DEBUG
+
+    UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();
+
+    if (blockMax > maxBlockMax)
+    {
+        blockMax = maxBlockMax;
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    SafeAssign(pBlockMax, blockMax);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeXmaskCoordYFromPipe
+*
+*   @brief
+*       Compute the Y coord from pipe number for cmask/htile
+*
+*   @return
+*       Y coordinate
+*
+****************************************************************************************************
+*/
+UINT_32 Lib::ComputeXmaskCoordYFromPipe(
+    UINT_32         pipe,       ///< [in] pipe number
+    UINT_32         x           ///< [in] x coordinate
+    ) const
+{
+    UINT_32 pipeBit0;
+    UINT_32 pipeBit1;
+    UINT_32 xBit0;
+    UINT_32 xBit1;
+    UINT_32 yBit0;
+    UINT_32 yBit1;
+
+    UINT_32 y = 0;
+
+    UINT_32 numPipes = m_pipes; // SI has its implementation
+    //
+    // Convert pipe + x to y coordinate.
+    //
+    switch (numPipes)
+    {
+        case 1:
+            //
+            // 1 pipe
+            //
+            // p0 = 0
+            //
+            y = 0;
+            break;
+        case 2:
+            //
+            // 2 pipes
+            //
+            // p0 = x0 ^ y0
+            //
+            // y0 = p0 ^ x0
+            //
+            pipeBit0 = pipe & 0x1;
+
+            xBit0 = x & 0x1;
+
+            yBit0 = pipeBit0 ^ xBit0;
+
+            y = yBit0;
+            break;
+        case 4:
+            //
+            // 4 pipes
+            //
+            // p0 = x1 ^ y0
+            // p1 = x0 ^ y1
+            //
+            // y0 = p0 ^ x1
+            // y1 = p1 ^ x0
+            //
+            pipeBit0 =  pipe & 0x1;
+            pipeBit1 = (pipe & 0x2) >> 1;
+
+            xBit0 =  x & 0x1;
+            xBit1 = (x & 0x2) >> 1;
+
+            yBit0 = pipeBit0 ^ xBit1;
+            yBit1 = pipeBit1 ^ xBit0;
+
+            y = (yBit0 |
+                 (yBit1 << 1));
+            break;
+        case 8:
+            //
+            // 8 pipes
+            //
+            // r600 and r800 have different method
+            //
+            y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
+            break;
+        default:
+            break;
+    }
+    return y;
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlComputeXmaskCoordFromAddr
+*
+*   @brief
+*       Compute the coord from an address of a cmask/htile
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This method is reused by htile, so rename to Xmask
+****************************************************************************************************
+*/
+VOID Lib::HwlComputeXmaskCoordFromAddr(
+    UINT_64         addr,           ///< [in] address
+    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32         pitch,          ///< [in] pitch
+    UINT_32         height,         ///< [in] height
+    UINT_32         numSlices,      ///< [in] number of slices
+    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
+    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32         isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32         isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
+    UINT_32*        pX,             ///< [out] x coord
+    UINT_32*        pY,             ///< [out] y coord
+    UINT_32*        pSlice          ///< [out] slice index
+    ) const
+{
+    UINT_32 pipe;
+    UINT_32 numPipes;
+    UINT_32 numGroupBits;
+    (void)numGroupBits;
+    UINT_32 numPipeBits;
+    UINT_32 macroTilePitch;
+    UINT_32 macroTileHeight;
+
+    UINT_64 bitAddr;
+
+    UINT_32 microTileCoordY;
+
+    UINT_32 elemBits;
+
+    UINT_32 pitchAligned = pitch;
+    UINT_32 heightAligned = height;
+    UINT_64 totalBytes;
+
+    UINT_64 elemOffset;
+
+    UINT_64 macroIndex;
+    UINT_32 microIndex;
+
+    UINT_64 macroNumber;
+    UINT_32 microNumber;
+
+    UINT_32 macroX;
+    UINT_32 macroY;
+    UINT_32 macroZ;
+
+    UINT_32 microX;
+    UINT_32 microY;
+
+    UINT_32 tilesPerMacro;
+    UINT_32 macrosPerPitch;
+    UINT_32 macrosPerSlice;
+
+    //
+    // Extract pipe.
+    //
+    numPipes = HwlGetPipes(pTileInfo);
+    pipe = ComputePipeFromAddr(addr, numPipes);
+
+    //
+    // Compute the number of group and pipe bits.
+    //
+    numGroupBits = Log2(m_pipeInterleaveBytes);
+    numPipeBits  = Log2(numPipes);
+
+    UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
+    UINT_32 pipes = numPipes;
+
+    //
+    // Compute the micro tile size, in bits. And macro tile pitch and height.
+    //
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        elemBits = CmaskElemBits;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &pitchAligned,
+                         &heightAligned,
+                         &totalBytes,
+                         &macroTilePitch,
+                         &macroTileHeight);
+    }
+    else  //HTILE
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        if (factor != 1)
+        {
+            factor = 1;
+        }
+
+        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         isWidth8,
+                         isHeight8,
+                         pTileInfo,
+                         &pitchAligned,
+                         &heightAligned,
+                         &totalBytes,
+                         &macroTilePitch,
+                         &macroTileHeight);
+    }
+
+    // Should use aligned dims
+    //
+    pitch = pitchAligned;
+    height = heightAligned;
+
+    //
+    // Convert byte address to bit address.
+    //
+    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+    //
+    // Remove pipe bits from address.
+    //
+
+    bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);
+
+    elemOffset = bitAddr / elemBits;
+
+    tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;
+
+    macrosPerPitch = pitch / (macroTilePitch/factor);
+    macrosPerSlice = macrosPerPitch * height / macroTileHeight;
+
+    macroIndex = elemOffset / factor / tilesPerMacro;
+    microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor));
+
+    macroNumber = macroIndex * factor + microIndex % factor;
+    microNumber = microIndex / factor;
+
+    macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch));
+    macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch);
+    macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice));
+
+    microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
+    microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));
+
+    *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
+    *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
+    *pSlice = macroZ;
+
+    microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
+                                                 *pX/MicroTileWidth);
+
+    //
+    // Assemble final coordinates.
+    //
+    *pY += microTileCoordY * MicroTileHeight;
+
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlComputeXmaskAddrFromCoord
+*
+*   @brief
+*       Compute the address from an address of cmask (prior to si)
+*
+*   @return
+*       Address in bytes
+*
+****************************************************************************************************
+*/
+UINT_64 Lib::HwlComputeXmaskAddrFromCoord(
+    UINT_32        pitch,          ///< [in] pitch
+    UINT_32        height,         ///< [in] height
+    UINT_32        x,              ///< [in] x coord
+    UINT_32        y,              ///< [in] y coord
+    UINT_32        slice,          ///< [in] slice/depth index
+    UINT_32        numSlices,      ///< [in] number of slices
+    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
+    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
+    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    UINT_64 addr;
+    UINT_32 numGroupBits;
+    UINT_32 numPipeBits;
+    UINT_32 newPitch = 0;
+    UINT_32 newHeight = 0;
+    UINT_64 sliceBytes = 0;
+    UINT_64 totalBytes = 0;
+    UINT_64 sliceOffset;
+    UINT_32 pipe;
+    UINT_32 macroTileWidth;
+    UINT_32 macroTileHeight;
+    UINT_32 macroTilesPerRow;
+    UINT_32 macroTileBytes;
+    UINT_32 macroTileIndexX;
+    UINT_32 macroTileIndexY;
+    UINT_64 macroTileOffset;
+    UINT_32 pixelBytesPerRow;
+    UINT_32 pixelOffsetX;
+    UINT_32 pixelOffsetY;
+    UINT_32 pixelOffset;
+    UINT_64 totalOffset;
+    UINT_64 offsetLo;
+    UINT_64 offsetHi;
+    UINT_64 groupMask;
+
+    UINT_32 elemBits = 0;
+
+    UINT_32 numPipes = m_pipes; // This function is accessed prior to si only
+
+    if (factor == 2) //CMASK
+    {
+        elemBits = CmaskElemBits;
+
+        // For asics before SI, cmask is always tiled
+        isLinear = FALSE;
+    }
+    else //HTILE
+    {
+        if (factor != 1) // Fix compile warning
+        {
+            factor = 1;
+        }
+
+        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+    }
+
+    //
+    // Compute the number of group bits and pipe bits.
+    //
+    numGroupBits = Log2(m_pipeInterleaveBytes);
+    numPipeBits  = Log2(numPipes);
+
+    //
+    // Compute macro tile dimensions.
+    //
+    if (factor == 2) // CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroTileWidth,
+                         &macroTileHeight);
+
+        sliceBytes = totalBytes / numSlices;
+    }
+    else // HTILE
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         isWidth8,
+                         isHeight8,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroTileWidth,
+                         &macroTileHeight,
+                         &sliceBytes);
+    }
+
+    sliceOffset = slice * sliceBytes;
+
+    //
+    // Get the pipe.  Note that neither slice rotation nor pipe swizzling apply for CMASK.
+    //
+    pipe = ComputePipeFromCoord(x,
+                                y,
+                                0,
+                                ADDR_TM_2D_TILED_THIN1,
+                                0,
+                                FALSE,
+                                pTileInfo);
+
+    //
+    // Compute the number of macro tiles per row.
+    //
+    macroTilesPerRow = newPitch / macroTileWidth;
+
+    //
+    // Compute the number of bytes per macro tile.
+    //
+    macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);
+
+    //
+    // Compute the offset to the macro tile containing the specified coordinate.
+    //
+    macroTileIndexX = x / macroTileWidth;
+    macroTileIndexY = y / macroTileHeight;
+    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+    //
+    // Compute the pixel offset within the macro tile.
+    //
+    pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;
+
+    //
+    // The nibbles are interleaved (see below), so the part of the offset relative to the x
+    // coordinate repeats halfway across the row. (Not for HTILE)
+    //
+    if (factor == 2)
+    {
+        pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
+    }
+    else
+    {
+        pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
+    }
+
+    //
+    // Compute the y offset within the macro tile.
+    //
+    pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;
+
+    pixelOffset = pixelOffsetX + pixelOffsetY;
+
+    //
+    // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
+    // pipe bits in the middle of the address.
+    //
+    totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;
+
+    //
+    // Split the offset to put some bits below the pipe bits and some above.
+    //
+    groupMask = (1 << numGroupBits) - 1;
+    offsetLo  = totalOffset &  groupMask;
+    offsetHi  = (totalOffset & ~groupMask) << numPipeBits;
+
+    //
+    // Assemble the address from its components.
+    //
+    addr  = offsetLo;
+    addr |= offsetHi;
+    // This is to remove warning with /analyze option
+    UINT_32 pipeBits = pipe << numGroupBits;
+    addr |= pipeBits;
+
+    //
+    // Compute the bit position.  The lower nibble is used when the x coordinate within the macro
+    // tile is less than half of the macro tile width, and the upper nibble is used when the x
+    // coordinate within the macro tile is greater than or equal to half the macro tile width.
+    //
+    *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;
+
+    return addr;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Surface Addressing Shared
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceAddrFromCoordLinear
+*
+*   @brief
+*       Compute address from coord for linear surface
+*
+*   @return
+*       Address in bytes
+*
+****************************************************************************************************
+*/
+UINT_64 Lib::ComputeSurfaceAddrFromCoordLinear(
+    UINT_32  x,              ///< [in] x coord
+    UINT_32  y,              ///< [in] y coord
+    UINT_32  slice,          ///< [in] slice/depth index
+    UINT_32  sample,         ///< [in] sample index
+    UINT_32  bpp,            ///< [in] bits per pixel
+    UINT_32  pitch,          ///< [in] pitch
+    UINT_32  height,         ///< [in] height
+    UINT_32  numSlices,      ///< [in] number of slices
+    UINT_32* pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
+
+    UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
+    UINT_64 rowOffset   = static_cast<UINT_64>(y) * pitch;
+    UINT_64 pixOffset   = x;
+
+    UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;
+
+    *pBitPosition = static_cast<UINT_32>(addr % 8);
+    addr /= 8;
+
+    return addr;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddrLinear
+*
+*   @brief
+*       Compute the coord from an address of a linear surface
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID Lib::ComputeSurfaceCoordFromAddrLinear(
+    UINT_64  addr,           ///< [in] address
+    UINT_32  bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32  bpp,            ///< [in] bits per pixel
+    UINT_32  pitch,          ///< [in] pitch
+    UINT_32  height,         ///< [in] height
+    UINT_32  numSlices,      ///< [in] number of slices
+    UINT_32* pX,             ///< [out] x coord
+    UINT_32* pY,             ///< [out] y coord
+    UINT_32* pSlice,         ///< [out] slice/depth index
+    UINT_32* pSample         ///< [out] sample index
+    ) const
+{
+    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
+    const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;
+
+    *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch);
+    *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height);
+    *pSlice  = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices);
+    *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices);
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddrMicroTiled
+*
+*   @brief
+*       Compute the coord from an address of a micro tiled surface
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled(
+    UINT_64         addr,               ///< [in] address
+    UINT_32         bitPosition,        ///< [in] bitPosition in a byte
+    UINT_32         bpp,                ///< [in] bits per pixel
+    UINT_32         pitch,              ///< [in] pitch
+    UINT_32         height,             ///< [in] height
+    UINT_32         numSamples,         ///< [in] number of samples
+    AddrTileMode    tileMode,           ///< [in] tile mode
+    UINT_32         tileBase,           ///< [in] base offset within a tile
+    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
+    UINT_32*        pX,                 ///< [out] x coord
+    UINT_32*        pY,                 ///< [out] y coord
+    UINT_32*        pSlice,             ///< [out] slice/depth index
+    UINT_32*        pSample,            ///< [out] sample index,
+    AddrTileType    microTileType,      ///< [in] micro tiling order
+    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if in depth sample order
+    ) const
+{
+    UINT_64 bitAddr;
+    UINT_32 microTileThickness;
+    UINT_32 microTileBits;
+    UINT_64 sliceBits;
+    UINT_64 rowBits;
+    UINT_32 sliceIndex;
+    UINT_32 microTileCoordX;
+    UINT_32 microTileCoordY;
+    UINT_32 pixelOffset;
+    UINT_32 pixelCoordX = 0;
+    UINT_32 pixelCoordY = 0;
+    UINT_32 pixelCoordZ = 0;
+    UINT_32 pixelCoordS = 0;
+
+    //
+    // Convert byte address to bit address.
+    //
+    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+    //
+    // Compute the micro tile size, in bits.
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_1D_TILED_THICK:
+            microTileThickness = ThickTileThickness;
+            break;
+        default:
+            microTileThickness = 1;
+            break;
+    }
+
+    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+    //
+    // Compute number of bits per slice and number of bits per row of micro tiles.
+    //
+    sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;
+
+    rowBits   = (pitch / MicroTileWidth) * microTileBits;
+
+    //
+    // Extract the slice index.
+    //
+    sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits);
+    bitAddr -= sliceIndex * sliceBits;
+
+    //
+    // Extract the y coordinate of the micro tile.
+    //
+    microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight;
+    bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;
+
+    //
+    // Extract the x coordinate of the micro tile.
+    //
+    microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth;
+
+    //
+    // Compute the pixel offset within the micro tile.
+    //
+    pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits);
+
+    //
+    // Extract pixel coordinates from the offset.
+    //
+    HwlComputePixelCoordFromOffset(pixelOffset,
+                                   bpp,
+                                   numSamples,
+                                   tileMode,
+                                   tileBase,
+                                   compBits,
+                                   &pixelCoordX,
+                                   &pixelCoordY,
+                                   &pixelCoordZ,
+                                   &pixelCoordS,
+                                   microTileType,
+                                   isDepthSampleOrder);
+
+    //
+    // Assemble final coordinates.
+    //
+    *pX     = microTileCoordX + pixelCoordX;
+    *pY     = microTileCoordY + pixelCoordY;
+    *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
+    *pSample = pixelCoordS;
+
+    if (microTileThickness > 1)
+    {
+        *pSample = 0;
+    }
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputePipeFromAddr
+*
+*   @brief
+*       Compute the pipe number from an address
+*
+*   @return
+*       Pipe number
+*
+****************************************************************************************************
+*/
+UINT_32 Lib::ComputePipeFromAddr(
+    UINT_64 addr,        ///< [in] address
+    UINT_32 numPipes     ///< [in] number of banks
+    ) const
+{
+    UINT_32 pipe;
+
+    UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms
+
+    // R600
+    // The LSBs of the address are arranged as follows:
+    //   bank | pipe | group
+    //
+    // To get the pipe number, shift off the group bits and mask the pipe bits.
+    //
+
+    // R800
+    // The LSBs of the address are arranged as follows:
+    //   bank | bankInterleave | pipe | pipeInterleave
+    //
+    // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
+    //
+
+    pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1);
+
+    return pipe;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeMicroTileEquation
+*
+*   @brief
+*       Compute micro tile equation
+*
+*   @return
+*       If equation can be computed
+*
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation(
+    UINT_32         log2BytesPP,    ///< [in] log2 of bytes per pixel
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    AddrTileType    microTileType,  ///< [in] pixel order in display/non-display mode
+    ADDR_EQUATION*  pEquation       ///< [out] equation
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    for (UINT_32 i = 0; i < log2BytesPP; i++)
+    {
+        pEquation->addr[i].valid = 1;
+        pEquation->addr[i].channel = 0;
+        pEquation->addr[i].index = i;
+    }
+
+    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP];
+
+    ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0);
+    ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1);
+    ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2);
+    ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0);
+    ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1);
+    ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2);
+    ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0);
+    ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1);
+    ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2);
+
+    UINT_32 thickness = Thickness(tileMode);
+    UINT_32 bpp = 1 << (log2BytesPP + 3);
+
+    if (microTileType != ADDR_THICK)
+    {
+        if (microTileType == ADDR_DISPLAYABLE)
+        {
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit[0] = x0;
+                    pixelBit[1] = x1;
+                    pixelBit[2] = x2;
+                    pixelBit[3] = y1;
+                    pixelBit[4] = y0;
+                    pixelBit[5] = y2;
+                    break;
+                case 16:
+                    pixelBit[0] = x0;
+                    pixelBit[1] = x1;
+                    pixelBit[2] = x2;
+                    pixelBit[3] = y0;
+                    pixelBit[4] = y1;
+                    pixelBit[5] = y2;
+                    break;
+                case 32:
+                    pixelBit[0] = x0;
+                    pixelBit[1] = x1;
+                    pixelBit[2] = y0;
+                    pixelBit[3] = x2;
+                    pixelBit[4] = y1;
+                    pixelBit[5] = y2;
+                    break;
+                case 64:
+                    pixelBit[0] = x0;
+                    pixelBit[1] = y0;
+                    pixelBit[2] = x1;
+                    pixelBit[3] = x2;
+                    pixelBit[4] = y1;
+                    pixelBit[5] = y2;
+                    break;
+                case 128:
+                    pixelBit[0] = y0;
+                    pixelBit[1] = x0;
+                    pixelBit[2] = x1;
+                    pixelBit[3] = x2;
+                    pixelBit[4] = y1;
+                    pixelBit[5] = y2;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            pixelBit[0] = x0;
+            pixelBit[1] = y0;
+            pixelBit[2] = x1;
+            pixelBit[3] = y1;
+            pixelBit[4] = x2;
+            pixelBit[5] = y2;
+        }
+        else if (microTileType == ADDR_ROTATED)
+        {
+            ADDR_ASSERT(thickness == 1);
+
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit[0] = y0;
+                    pixelBit[1] = y1;
+                    pixelBit[2] = y2;
+                    pixelBit[3] = x1;
+                    pixelBit[4] = x0;
+                    pixelBit[5] = x2;
+                    break;
+                case 16:
+                    pixelBit[0] = y0;
+                    pixelBit[1] = y1;
+                    pixelBit[2] = y2;
+                    pixelBit[3] = x0;
+                    pixelBit[4] = x1;
+                    pixelBit[5] = x2;
+                    break;
+                case 32:
+                    pixelBit[0] = y0;
+                    pixelBit[1] = y1;
+                    pixelBit[2] = x0;
+                    pixelBit[3] = y2;
+                    pixelBit[4] = x1;
+                    pixelBit[5] = x2;
+                    break;
+                case 64:
+                    pixelBit[0] = y0;
+                    pixelBit[1] = x0;
+                    pixelBit[2] = y1;
+                    pixelBit[3] = x1;
+                    pixelBit[4] = x2;
+                    pixelBit[5] = y2;
+                    break;
+                default:
+                    retCode = ADDR_NOTSUPPORTED;
+                    break;
+            }
+        }
+
+        if (thickness > 1)
+        {
+            pixelBit[6] = z0;
+            pixelBit[7] = z1;
+            pEquation->numBits = 8 + log2BytesPP;
+        }
+        else
+        {
+            pEquation->numBits = 6 + log2BytesPP;
+        }
+    }
+    else // ADDR_THICK
+    {
+        ADDR_ASSERT(thickness > 1);
+
+        switch (bpp)
+        {
+            case 8:
+            case 16:
+                pixelBit[0] = x0;
+                pixelBit[1] = y0;
+                pixelBit[2] = x1;
+                pixelBit[3] = y1;
+                pixelBit[4] = z0;
+                pixelBit[5] = z1;
+                break;
+            case 32:
+                pixelBit[0] = x0;
+                pixelBit[1] = y0;
+                pixelBit[2] = x1;
+                pixelBit[3] = z0;
+                pixelBit[4] = y1;
+                pixelBit[5] = z1;
+                break;
+            case 64:
+            case 128:
+                pixelBit[0] = x0;
+                pixelBit[1] = y0;
+                pixelBit[2] = z0;
+                pixelBit[3] = x1;
+                pixelBit[4] = y1;
+                pixelBit[5] = z1;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        pixelBit[6] = x2;
+        pixelBit[7] = y2;
+        pEquation->numBits = 8 + log2BytesPP;
+    }
+
+    if (thickness == 8)
+    {
+        pixelBit[8] = z2;
+        pEquation->numBits = 9 + log2BytesPP;
+    }
+
+    // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices,
+    // which is not supported by our address lib
+    pEquation->stackedDepthSlices = FALSE;
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputePixelIndexWithinMicroTile
+*
+*   @brief
+*       Compute the pixel index inside a micro tile of surface
+*
+*   @return
+*       Pixel index
+*
+****************************************************************************************************
+*/
+UINT_32 Lib::ComputePixelIndexWithinMicroTile(
+    UINT_32         x,              ///< [in] x coord
+    UINT_32         y,              ///< [in] y coord
+    UINT_32         z,              ///< [in] slice/depth index
+    UINT_32         bpp,            ///< [in] bits per pixel
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    AddrTileType    microTileType   ///< [in] pixel order in display/non-display mode
+    ) const
+{
+    UINT_32 pixelBit0 = 0;
+    UINT_32 pixelBit1 = 0;
+    UINT_32 pixelBit2 = 0;
+    UINT_32 pixelBit3 = 0;
+    UINT_32 pixelBit4 = 0;
+    UINT_32 pixelBit5 = 0;
+    UINT_32 pixelBit6 = 0;
+    UINT_32 pixelBit7 = 0;
+    UINT_32 pixelBit8 = 0;
+    UINT_32 pixelNumber;
+
+    UINT_32 x0 = _BIT(x, 0);
+    UINT_32 x1 = _BIT(x, 1);
+    UINT_32 x2 = _BIT(x, 2);
+    UINT_32 y0 = _BIT(y, 0);
+    UINT_32 y1 = _BIT(y, 1);
+    UINT_32 y2 = _BIT(y, 2);
+    UINT_32 z0 = _BIT(z, 0);
+    UINT_32 z1 = _BIT(z, 1);
+    UINT_32 z2 = _BIT(z, 2);
+
+    UINT_32 thickness = Thickness(tileMode);
+
+    // Compute the pixel number within the micro tile.
+
+    if (microTileType != ADDR_THICK)
+    {
+        if (microTileType == ADDR_DISPLAYABLE)
+        {
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = x2;
+                    pixelBit3 = y1;
+                    pixelBit4 = y0;
+                    pixelBit5 = y2;
+                    break;
+                case 16:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = x2;
+                    pixelBit3 = y0;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 32:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = y0;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 64:
+                    pixelBit0 = x0;
+                    pixelBit1 = y0;
+                    pixelBit2 = x1;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 128:
+                    pixelBit0 = y0;
+                    pixelBit1 = x0;
+                    pixelBit2 = x1;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            pixelBit0 = x0;
+            pixelBit1 = y0;
+            pixelBit2 = x1;
+            pixelBit3 = y1;
+            pixelBit4 = x2;
+            pixelBit5 = y2;
+        }
+        else if (microTileType == ADDR_ROTATED)
+        {
+            ADDR_ASSERT(thickness == 1);
+
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = y2;
+                    pixelBit3 = x1;
+                    pixelBit4 = x0;
+                    pixelBit5 = x2;
+                    break;
+                case 16:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = y2;
+                    pixelBit3 = x0;
+                    pixelBit4 = x1;
+                    pixelBit5 = x2;
+                    break;
+                case 32:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = x0;
+                    pixelBit3 = y2;
+                    pixelBit4 = x1;
+                    pixelBit5 = x2;
+                    break;
+                case 64:
+                    pixelBit0 = y0;
+                    pixelBit1 = x0;
+                    pixelBit2 = y1;
+                    pixelBit3 = x1;
+                    pixelBit4 = x2;
+                    pixelBit5 = y2;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+
+        if (thickness > 1)
+        {
+            pixelBit6 = z0;
+            pixelBit7 = z1;
+        }
+    }
+    else // ADDR_THICK
+    {
+        ADDR_ASSERT(thickness > 1);
+
+        switch (bpp)
+        {
+            case 8:
+            case 16:
+                pixelBit0 = x0;
+                pixelBit1 = y0;
+                pixelBit2 = x1;
+                pixelBit3 = y1;
+                pixelBit4 = z0;
+                pixelBit5 = z1;
+                break;
+            case 32:
+                pixelBit0 = x0;
+                pixelBit1 = y0;
+                pixelBit2 = x1;
+                pixelBit3 = z0;
+                pixelBit4 = y1;
+                pixelBit5 = z1;
+                break;
+            case 64:
+            case 128:
+                pixelBit0 = x0;
+                pixelBit1 = y0;
+                pixelBit2 = z0;
+                pixelBit3 = x1;
+                pixelBit4 = y1;
+                pixelBit5 = z1;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        pixelBit6 = x2;
+        pixelBit7 = y2;
+    }
+
+    if (thickness == 8)
+    {
+        pixelBit8 = z2;
+    }
+
+    pixelNumber = ((pixelBit0     ) |
+                   (pixelBit1 << 1) |
+                   (pixelBit2 << 2) |
+                   (pixelBit3 << 3) |
+                   (pixelBit4 << 4) |
+                   (pixelBit5 << 5) |
+                   (pixelBit6 << 6) |
+                   (pixelBit7 << 7) |
+                   (pixelBit8 << 8));
+
+    return pixelNumber;
+}
+
+/**
+****************************************************************************************************
+*   Lib::AdjustPitchAlignment
+*
+*   @brief
+*       Adjusts pitch alignment for flipping surface
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID Lib::AdjustPitchAlignment(
+    ADDR_SURFACE_FLAGS  flags,      ///< [in] Surface flags
+    UINT_32*            pPitchAlign ///< [out] Pointer to pitch alignment
+    ) const
+{
+    // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
+    // Maybe it will be fixed in future but let's make it general for now.
+    if (flags.display || flags.overlay)
+    {
+        *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);
+
+        if(flags.display)
+        {
+            *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   Lib::PadDimensions
+*
+*   @brief
+*       Helper function to pad dimensions
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID Lib::PadDimensions(
+    AddrTileMode        tileMode,    ///< [in] tile mode
+    UINT_32             bpp,         ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
+    UINT_32             numSamples,  ///< [in] number of samples
+    ADDR_TILEINFO*      pTileInfo,   ///< [in,out] bank structure.
+    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
+    UINT_32             mipLevel,    ///< [in] MipLevel
+    UINT_32*            pPitch,      ///< [in,out] pitch in pixels
+    UINT_32*            pPitchAlign, ///< [in,out] pitch align could be changed in HwlPadDimensions
+    UINT_32*            pHeight,     ///< [in,out] height in pixels
+    UINT_32             heightAlign, ///< [in] height alignment
+    UINT_32*            pSlices,     ///< [in,out] number of slices
+    UINT_32             sliceAlign   ///< [in] number of slice alignment
+    ) const
+{
+    UINT_32 pitchAlign = *pPitchAlign;
+    UINT_32 thickness = Thickness(tileMode);
+
+    ADDR_ASSERT(padDims <= 3);
+
+    //
+    // Override padding for mip levels
+    //
+    if (mipLevel > 0)
+    {
+        if (flags.cube)
+        {
+            // for cubemap, we only pad when client call with 6 faces as an identity
+            if (*pSlices > 1)
+            {
+                padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
+            }
+            else
+            {
+                padDims = 2;
+            }
+        }
+    }
+
+    // Any possibilities that padDims is 0?
+    if (padDims == 0)
+    {
+        padDims = 3;
+    }
+
+    if (IsPow2(pitchAlign))
+    {
+        *pPitch = PowTwoAlign((*pPitch), pitchAlign);
+    }
+    else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
+    {
+        *pPitch += pitchAlign - 1;
+        *pPitch /= pitchAlign;
+        *pPitch *= pitchAlign;
+    }
+
+    if (padDims > 1)
+    {
+        if (IsPow2(heightAlign))
+        {
+            *pHeight = PowTwoAlign((*pHeight), heightAlign);
+        }
+        else
+        {
+            *pHeight += heightAlign - 1;
+            *pHeight /= heightAlign;
+            *pHeight *= heightAlign;
+        }
+    }
+
+    if (padDims > 2 || thickness > 1)
+    {
+        // for cubemap single face, we do not pad slices.
+        // if we pad it, the slice number should be set to 6 and current mip level > 1
+        if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
+        {
+            *pSlices = NextPow2(*pSlices);
+        }
+
+        // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
+        if (thickness > 1)
+        {
+            *pSlices = PowTwoAlign((*pSlices), sliceAlign);
+        }
+
+    }
+
+    HwlPadDimensions(tileMode,
+                     bpp,
+                     flags,
+                     numSamples,
+                     pTileInfo,
+                     mipLevel,
+                     pPitch,
+                     pPitchAlign,
+                     *pHeight,
+                     heightAlign);
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlPreHandleBaseLvl3xPitch
+*
+*   @brief
+*       Pre-handler of 3x pitch (96 bit) adjustment
+*
+*   @return
+*       Expected pitch
+****************************************************************************************************
+*/
+UINT_32 Lib::HwlPreHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    ADDR_ASSERT(pIn->width == expPitch);
+    //
+    // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
+    //
+    if (ElemLib::IsExpand3x(pIn->format) &&
+        pIn->mipLevel == 0 &&
+        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        expPitch /= 3;
+        expPitch = NextPow2(expPitch);
+    }
+
+    return expPitch;
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlPostHandleBaseLvl3xPitch
+*
+*   @brief
+*       Post-handler of 3x pitch adjustment
+*
+*   @return
+*       Expected pitch
+****************************************************************************************************
+*/
+UINT_32 Lib::HwlPostHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    //
+    // 96 bits surface of sub levels require element pitch of 32 bits instead
+    // So we just return pitch in 32 bit pixels without timing 3
+    //
+    if (ElemLib::IsExpand3x(pIn->format) &&
+        pIn->mipLevel == 0 &&
+        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        expPitch *= 3;
+    }
+
+    return expPitch;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsMacroTiled
+*
+*   @brief
+*       Check if the tile mode is macro tiled
+*
+*   @return
+*       TRUE if it is macro tiled (2D/2B/3D/3B)
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsMacroTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+   return ModeFlags[tileMode].isMacro;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsMacro3dTiled
+*
+*   @brief
+*       Check if the tile mode is 3D macro tiled
+*
+*   @return
+*       TRUE if it is 3D macro tiled
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsMacro3dTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return ModeFlags[tileMode].isMacro3d;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsMicroTiled
+*
+*   @brief
+*       Check if the tile mode is micro tiled
+*
+*   @return
+*       TRUE if micro tiled
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsMicroTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return ModeFlags[tileMode].isMicro;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsLinear
+*
+*   @brief
+*       Check if the tile mode is linear
+*
+*   @return
+*       TRUE if linear
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsLinear(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return ModeFlags[tileMode].isLinear;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsPrtNoRotationTileMode
+*
+*   @brief
+*       Return TRUE if it is prt tile without rotation
+*   @note
+*       This function just used by CI
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsPrtNoRotationTileMode(
+    AddrTileMode tileMode)
+{
+    return ModeFlags[tileMode].isPrtNoRotation;
+}
+
+/**
+****************************************************************************************************
+*   Lib::IsPrtTileMode
+*
+*   @brief
+*       Return TRUE if it is prt tile
+*   @note
+*       This function just used by CI
+****************************************************************************************************
+*/
+BOOL_32 Lib::IsPrtTileMode(
+    AddrTileMode tileMode)
+{
+    return ModeFlags[tileMode].isPrt;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeMipLevel
+*
+*   @brief
+*       Compute mipmap level width/height/slices
+*   @return
+*      N/A
+****************************************************************************************************
+*/
+VOID Lib::ComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
+    ) const
+{
+    // Check if HWL has handled
+    BOOL_32 hwlHandled = FALSE;
+    (void)hwlHandled;
+
+    if (ElemLib::IsBlockCompressed(pIn->format))
+    {
+        if (pIn->mipLevel == 0)
+        {
+            // DXTn's level 0 must be multiple of 4
+            // But there are exceptions:
+            // 1. Internal surface creation in hostblt/vsblt/etc...
+            // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
+            pIn->width = PowTwoAlign(pIn->width, 4);
+            pIn->height = PowTwoAlign(pIn->height, 4);
+        }
+    }
+
+    hwlHandled = HwlComputeMipLevel(pIn);
+}
+
+/**
+****************************************************************************************************
+*   Lib::DegradeTo1D
+*
+*   @brief
+*       Check if surface can be degraded to 1D
+*   @return
+*       TRUE if degraded
+****************************************************************************************************
+*/
+BOOL_32 Lib::DegradeTo1D(
+    UINT_32 width,                  ///< surface width
+    UINT_32 height,                 ///< surface height
+    UINT_32 macroTilePitchAlign,    ///< macro tile pitch align
+    UINT_32 macroTileHeightAlign    ///< macro tile height align
+    )
+{
+    BOOL_32 degrade = ((width < macroTilePitchAlign) || (height < macroTileHeightAlign));
+
+    // Check whether 2D tiling still has too much footprint
+    if (degrade == FALSE)
+    {
+        // Only check width and height as slices are aligned to thickness
+        UINT_64 unalignedSize = width * height;
+
+        UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign);
+        UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign);
+        UINT_64 alignedSize = alignedPitch * alignedHeight;
+
+        // alignedSize > 1.5 * unalignedSize
+        if (2 * alignedSize > 3 * unalignedSize)
+        {
+            degrade = TRUE;
+        }
+    }
+
+    return degrade;
+}
+
+/**
+****************************************************************************************************
+*   Lib::OptimizeTileMode
+*
+*   @brief
+*       Check if base level's tile mode can be optimized (degraded)
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID Lib::OptimizeTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*  pInOut     ///< [in, out] structure for surface info
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+
+    BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) ||
+                    (pInOut->flags.minimizeAlignment == TRUE) ||
+                    (pInOut->maxBaseAlign != 0);
+
+    BOOL_32 convertToPrt = FALSE;
+
+    // Optimization can only be done on level 0 and samples <= 1
+    if ((doOpt == TRUE)                     &&
+        (pInOut->mipLevel == 0)             &&
+        (IsPrtTileMode(tileMode) == FALSE)  &&
+        (pInOut->flags.prt == FALSE))
+    {
+        UINT_32 width = pInOut->width;
+        UINT_32 height = pInOut->height;
+        UINT_32 thickness = Thickness(tileMode);
+        BOOL_32 macroTiledOK = TRUE;
+        UINT_32 macroWidthAlign = 0;
+        UINT_32 macroHeightAlign = 0;
+        UINT_32 macroSizeAlign = 0;
+
+        if (IsMacroTiled(tileMode))
+        {
+            macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut,
+                                                         &macroWidthAlign,
+                                                         &macroHeightAlign,
+                                                         &macroSizeAlign);
+        }
+
+        if (macroTiledOK)
+        {
+            if ((pInOut->flags.display == FALSE) &&
+                (pInOut->flags.opt4Space == TRUE) &&
+                (pInOut->numSamples <= 1))
+            {
+                // Check if linear mode is optimal
+                if ((pInOut->height == 1) &&
+                    (IsLinear(tileMode) == FALSE) &&
+                    (ElemLib::IsBlockCompressed(pInOut->format) == FALSE) &&
+                    (pInOut->flags.depth == FALSE) &&
+                    (pInOut->flags.stencil == FALSE) &&
+                    (m_configFlags.disableLinearOpt == FALSE) &&
+                    (pInOut->flags.disableLinearOpt == FALSE))
+                {
+                    tileMode = ADDR_TM_LINEAR_ALIGNED;
+                }
+                else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE))
+                {
+                    if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
+                    {
+                        tileMode = (thickness == 1) ?
+                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
+                    }
+                    else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0))
+                    {
+                        // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
+                        // thinner modes, we should re-evaluate whether the corresponding
+                        // thinner modes should be degraded. If so, we choose 1D thick mode instead.
+                        tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp);
+
+                        if (tileMode != pInOut->tileMode)
+                        {
+                            // Get thickness again after large thick degrade
+                            thickness = Thickness(tileMode);
+
+                            ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pInOut;
+                            input.tileMode = tileMode;
+
+                            macroTiledOK = HwlGetAlignmentInfoMacroTiled(&input,
+                                                                         &macroWidthAlign,
+                                                                         &macroHeightAlign,
+                                                                         &macroSizeAlign);
+
+                            if (macroTiledOK &&
+                                DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign))
+                            {
+                                tileMode = ADDR_TM_1D_TILED_THICK;
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (macroTiledOK)
+            {
+                if ((pInOut->flags.minimizeAlignment == TRUE) &&
+                    (pInOut->numSamples <= 1) &&
+                    (IsMacroTiled(tileMode) == TRUE))
+                {
+                    UINT_32 macroSize = PowTwoAlign(width, macroWidthAlign) *
+                                        PowTwoAlign(height, macroHeightAlign);
+                    UINT_32 microSize = PowTwoAlign(width, MicroTileWidth) *
+                                        PowTwoAlign(height, MicroTileHeight);
+
+                    if (macroSize > microSize)
+                    {
+                        tileMode = (thickness == 1) ?
+                                   ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
+                    }
+                }
+
+                if ((pInOut->maxBaseAlign != 0) &&
+                    (IsMacroTiled(tileMode) == TRUE))
+                {
+                    if (macroSizeAlign > pInOut->maxBaseAlign)
+                    {
+                        if (pInOut->numSamples > 1)
+                        {
+                            ADDR_ASSERT(pInOut->maxBaseAlign >= Block64K);
+
+                            convertToPrt = TRUE;
+                        }
+                        else if (pInOut->maxBaseAlign < Block64K)
+                        {
+                            tileMode = (thickness == 1) ?
+                                       ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
+                        }
+                        else
+                        {
+                            convertToPrt = TRUE;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (convertToPrt)
+    {
+        if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1))
+        {
+            pInOut->tileMode = ADDR_TM_1D_TILED_THIN1;
+        }
+        else
+        {
+            HwlSetPrtTileMode(pInOut);
+        }
+    }
+    else if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode = tileMode;
+    }
+
+    HwlOptimizeTileMode(pInOut);
+}
+
+/**
+****************************************************************************************************
+*   Lib::DegradeLargeThickTile
+*
+*   @brief
+*       Check if the thickness needs to be reduced if a tile is too large
+*   @return
+*       The degraded tile mode (unchanged if not degraded)
+****************************************************************************************************
+*/
+AddrTileMode Lib::DegradeLargeThickTile(
+    AddrTileMode tileMode,
+    UINT_32 bpp) const
+{
+    // Override tilemode
+    // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
+    // it is better to just use THIN mode in this case
+    UINT_32 thickness = Thickness(tileMode);
+
+    if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
+    {
+        UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);
+
+        if (tileSize > m_rowSize)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_2D_TILED_XTHICK:
+                    if ((tileSize >> 1) <= m_rowSize)
+                    {
+                        tileMode = ADDR_TM_2D_TILED_THICK;
+                        break;
+                    }
+                    // else fall through
+                case ADDR_TM_2D_TILED_THICK:
+                    tileMode    = ADDR_TM_2D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_3D_TILED_XTHICK:
+                    if ((tileSize >> 1) <= m_rowSize)
+                    {
+                        tileMode = ADDR_TM_3D_TILED_THICK;
+                        break;
+                    }
+                    // else fall through
+                case ADDR_TM_3D_TILED_THICK:
+                    tileMode    = ADDR_TM_3D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_2D_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_3D_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    return tileMode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::PostComputeMipLevel
+*   @brief
+*       Compute MipLevel info (including level 0) after surface adjustment
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::PostComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pIn,   ///< [in,out] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut   ///< [out] Output structure
+    ) const
+{
+    // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
+    // required by CFX  for Hw Compatibility between NI and SI. Otherwise it is only needed for
+    // mipLevel > 0. Any h/w has different requirement should implement its own virtual function
+
+    if (pIn->flags.pow2Pad)
+    {
+        pIn->width      = NextPow2(pIn->width);
+        pIn->height     = NextPow2(pIn->height);
+        pIn->numSlices  = NextPow2(pIn->numSlices);
+    }
+    else if (pIn->mipLevel > 0)
+    {
+        pIn->width      = NextPow2(pIn->width);
+        pIn->height     = NextPow2(pIn->height);
+
+        if (!pIn->flags.cube)
+        {
+            pIn->numSlices = NextPow2(pIn->numSlices);
+        }
+
+        // for cubemap, we keep its value at first
+    }
+
+    return ADDR_OK;
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::HwlSetupTileCfg(
+    UINT_32         bpp,              ///< Bits per pixel
+    INT_32          index,            ///< [in] Tile index
+    INT_32          macroModeIndex,   ///< [in] Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,            ///< [out] Tile Info
+    AddrTileMode*   pMode,            ///< [out] Tile mode
+    AddrTileType*   pType             ///< [out] Tile type
+    ) const
+{
+    return ADDR_NOTSUPPORTED;
+}
+
+/**
+****************************************************************************************************
+*   Lib::HwlGetPipes
+*
+*   @brief
+*       Get number pipes
+*   @return
+*       num pipes
+****************************************************************************************************
+*/
+UINT_32 Lib::HwlGetPipes(
+    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
+    ) const
+{
+    //pTileInfo can be NULL when asic is 6xx and 8xx.
+    return m_pipes;
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputeQbStereoInfo
+*
+*   @brief
+*       Get quad buffer stereo information
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID Lib::ComputeQbStereoInfo(
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [in,out] updated pOut+pStereoInfo
+    ) const
+{
+    ADDR_ASSERT(pOut->bpp >= 8);
+    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
+
+    // Save original height
+    pOut->pStereoInfo->eyeHeight = pOut->height;
+
+    // Right offset
+    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
+
+    pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
+    // Double height
+    pOut->height <<= 1;
+    pOut->pixelHeight <<= 1;
+
+    // Double size
+    pOut->surfSize <<= 1;
+
+    // Right start address meets the base align since it is guaranteed by AddrLib1
+
+    // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
+}
+
+/**
+****************************************************************************************************
+*   Lib::ComputePrtInfo
+*
+*   @brief
+*       Compute prt surface related info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputePrtInfo(
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut) const
+{
+    ADDR_ASSERT(pOut != NULL);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    UINT_32     expandX = 1;
+    UINT_32     expandY = 1;
+    ElemMode    elemMode;
+
+    UINT_32     bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
+                                                    &elemMode,
+                                                    &expandX,
+                                                    &expandY);
+
+    if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    UINT_32     numFrags = pIn->numFrags;
+    ADDR_ASSERT(numFrags <= 8);
+
+    UINT_32     tileWidth = 0;
+    UINT_32     tileHeight = 0;
+    if (returnCode == ADDR_OK)
+    {
+        // 3D texture without depth or 2d texture
+        if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
+        {
+            if (bpp == 8)
+            {
+                tileWidth = 256;
+                tileHeight = 256;
+            }
+            else if (bpp == 16)
+            {
+                tileWidth = 256;
+                tileHeight = 128;
+            }
+            else if (bpp == 32)
+            {
+                tileWidth = 128;
+                tileHeight = 128;
+            }
+            else if (bpp == 64)
+            {
+                // assume it is BC1/4
+                tileWidth = 512;
+                tileHeight = 256;
+
+                if (elemMode == ADDR_UNCOMPRESSED)
+                {
+                    tileWidth = 128;
+                    tileHeight = 64;
+                }
+            }
+            else if (bpp == 128)
+            {
+                // assume it is BC2/3/5/6H/7
+                tileWidth = 256;
+                tileHeight = 256;
+
+                if (elemMode == ADDR_UNCOMPRESSED)
+                {
+                    tileWidth = 64;
+                    tileHeight = 64;
+                }
+            }
+
+            if (numFrags == 2)
+            {
+                tileWidth = tileWidth / 2;
+            }
+            else if (numFrags == 4)
+            {
+                tileWidth = tileWidth / 2;
+                tileHeight = tileHeight / 2;
+            }
+            else if (numFrags == 8)
+            {
+                tileWidth = tileWidth / 4;
+                tileHeight = tileHeight / 2;
+            }
+        }
+        else    // 1d
+        {
+            tileHeight = 1;
+            if (bpp == 8)
+            {
+                tileWidth = 65536;
+            }
+            else if (bpp == 16)
+            {
+                tileWidth = 32768;
+            }
+            else if (bpp == 32)
+            {
+                tileWidth = 16384;
+            }
+            else if (bpp == 64)
+            {
+                tileWidth = 8192;
+            }
+            else if (bpp == 128)
+            {
+                tileWidth = 4096;
+            }
+        }
+    }
+
+    pOut->prtTileWidth = tileWidth;
+    pOut->prtTileHeight = tileHeight;
+
+    return returnCode;
+}
+
+} // V1
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,544 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrlib1.h
+* @brief Contains the Addr::V1::Lib class definition.
+****************************************************************************************************
+*/
+
+#ifndef __ADDR_LIB1_H__
+#define __ADDR_LIB1_H__
+
+#include "addrlib.h"
+
+namespace Addr
+{
+namespace V1
+{
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define bank swap size
+****************************************************************************************************
+*/
+enum SampleSplitSize
+{
+    ADDR_SAMPLESPLIT_1KB = 1024,
+    ADDR_SAMPLESPLIT_2KB = 2048,
+    ADDR_SAMPLESPLIT_4KB = 4096,
+    ADDR_SAMPLESPLIT_8KB = 8192,
+};
+
+/**
+****************************************************************************************************
+* @brief Flags for AddrTileMode
+****************************************************************************************************
+*/
+struct TileModeFlags
+{
+    UINT_32 thickness       : 4;
+    UINT_32 isLinear        : 1;
+    UINT_32 isMicro         : 1;
+    UINT_32 isMacro         : 1;
+    UINT_32 isMacro3d       : 1;
+    UINT_32 isPrt           : 1;
+    UINT_32 isPrtNoRotation : 1;
+    UINT_32 isBankSwapped   : 1;
+};
+
+static const UINT_32 Block64K = 0x10000;
+static const UINT_32 PrtTileSize = Block64K;
+
+/**
+****************************************************************************************************
+* @brief This class contains asic independent address lib functionalities
+****************************************************************************************************
+*/
+class Lib : public Addr::Lib
+{
+public:
+    virtual ~Lib();
+
+    static Lib* GetLib(
+        ADDR_HANDLE hLib);
+
+    /// Returns tileIndex support
+    BOOL_32 UseTileIndex(INT_32 index) const
+    {
+        return m_configFlags.useTileIndex && (index != TileIndexInvalid);
+    }
+
+    /// Returns combined swizzle support
+    BOOL_32 UseCombinedSwizzle() const
+    {
+        return m_configFlags.useCombinedSwizzle;
+    }
+
+    //
+    // Interface stubs
+    //
+    ADDR_E_RETURNCODE ComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE CombineBankPipeSwizzle(
+        const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+        ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileIndex(
+        const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE GetMacroModeIndex(
+        const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
+        ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileIndex1(
+        const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
+        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE GetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileInfo(
+        const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskInfo(
+        const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
+        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
+        const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
+        const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputePrtInfo(
+        const ADDR_PRT_INFO_INPUT*  pIn,
+        ADDR_PRT_INFO_OUTPUT*       pOut) const;
+protected:
+    Lib();  // Constructor is protected
+    Lib(const Client* pClient);
+
+    /// Pure Virtual function for Hwl computing surface info
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface address from coord
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface coord from address
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface tile swizzle
+    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
+    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl combining bank/pipe swizzle
+    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
+        UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
+
+    /// Pure Virtual function for Hwl computing base swizzle
+    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE base align
+    virtual UINT_32 HwlComputeHtileBaseAlign(
+        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE bpp
+    virtual UINT_32 HwlComputeHtileBpp(
+        BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE bytes
+    virtual UINT_64 HwlComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
+
+    /// Pure Virtual function for Hwl computing FMASK info
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
+
+    /// Pure Virtual function for Hwl FMASK address from coord
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl FMASK coord from address
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl convert tile info from real value to HW value
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl compute mipmap info
+    virtual BOOL_32 HwlComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
+
+    /// Pure Virtual function for Hwl compute max cmask blockMax value
+    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
+
+    /// Pure Virtual function for Hwl compute fmask bits
+    virtual UINT_32 HwlComputeFmaskBits(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        UINT_32* pNumSamples) const = 0;
+
+    /// Virtual function to get index (not pure then no need to implement this in all hwls
+    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Virtual function for Hwl to compute Dcc info
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Virtual function to get cmask address for tc compatible cmask
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Virtual function to get htile address for tc compatible htile
+    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
+        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    // Compute attributes
+
+    // HTILE
+    UINT_32    ComputeHtileInfo(
+        ADDR_HTILE_FLAGS flags,
+        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
+        BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO*  pTileInfo,
+        UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
+        UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
+        UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
+
+    // CMASK
+    ADDR_E_RETURNCODE ComputeCmaskInfo(
+        ADDR_CMASK_FLAGS flags,
+        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
+        UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    // CMASK & HTILE addressing
+    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
+        UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
+        BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
+        UINT_32* bitPosition) const;
+
+    virtual VOID HwlComputeXmaskCoordFromAddr(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+    // Surface mipmap
+    VOID    ComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    /// Pure Virtual function for Hwl to get macro tiled alignment info
+    virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0;
+
+    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
+    {
+        // not supported in hwl layer
+    }
+
+    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
+    {
+        // not supported in hwl layer
+    }
+
+    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
+    {
+        // not supported in hwl layer
+    }
+
+    AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
+
+    VOID PadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+        UINT_32* pSlices, UINT_32 sliceAlign) const;
+
+    virtual VOID HwlPadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const
+    {
+    }
+
+    //
+    // Addressing shared for linear/1D tiling
+    //
+    UINT_64 ComputeSurfaceAddrFromCoordLinear(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32* pBitPosition) const;
+
+    VOID    ComputeSurfaceCoordFromAddrLinear(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+    VOID    ComputeSurfaceCoordFromAddrMicroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+    ADDR_E_RETURNCODE ComputeMicroTileEquation(
+        UINT_32 bpp, AddrTileMode tileMode,
+        AddrTileType microTileType, ADDR_EQUATION* pEquation) const;
+
+    UINT_32 ComputePixelIndexWithinMicroTile(
+        UINT_32 x, UINT_32 y, UINT_32 z,
+        UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
+
+    /// Pure Virtual function for Hwl computing coord from offset inside micro tile
+    virtual VOID HwlComputePixelCoordFromOffset(
+        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
+
+    //
+    // Addressing shared by all
+    //
+    virtual UINT_32 HwlGetPipes(
+        const ADDR_TILEINFO* pTileInfo) const;
+
+    UINT_32 ComputePipeFromAddr(
+        UINT_64 addr, UINT_32 numPipes) const;
+
+    virtual ADDR_E_RETURNCODE ComputePipeEquation(
+        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Pure Virtual function for Hwl computing pipe from coord
+    virtual UINT_32 ComputePipeFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
+        UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
+    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+        UINT_32 pipe, UINT_32 x) const = 0;
+
+    //
+    // Misc helper
+    //
+    static const TileModeFlags ModeFlags[ADDR_TM_COUNT];
+
+    static UINT_32 Thickness(
+        AddrTileMode tileMode);
+
+    // Checking tile mode
+    static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
+    static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
+    static BOOL_32 IsLinear(AddrTileMode tileMode);
+    static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
+    static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
+    static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
+
+    /// Return TRUE if tile info is needed
+    BOOL_32 UseTileInfo() const
+    {
+        return !m_configFlags.ignoreTileInfo;
+    }
+
+    /// Adjusts pitch alignment for flipping surface
+    VOID    AdjustPitchAlignment(
+        ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
+
+    /// Overwrite tile config according to tile index
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
+        ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
+
+    /// Overwrite macro tile config according to tile index
+    virtual INT_32 HwlComputeMacroModeIndex(
+        INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+        ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
+        ) const
+    {
+        return TileIndexNoMacroIndex;
+    }
+
+    /// Pre-handler of 3x pitch (96 bit) adjustment
+    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Post-handler of 3x pitch adjustment
+    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Check miplevel after surface adjustment
+    ADDR_E_RETURNCODE PostComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    /// Quad buffer stereo support, has its implementation in ind. layer
+    VOID ComputeQbStereoInfo(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    /// Pure virutual function to compute stereo bank swizzle for right eye
+    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    /// Overwrite tile setting to PRT
+    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
+    {
+    }
+
+    static BOOL_32 DegradeTo1D(
+        UINT_32 width, UINT_32 height,
+        UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign);
+
+private:
+    // Disallow the copy constructor
+    Lib(const Lib& a);
+
+    // Disallow the assignment operator
+    Lib& operator=(const Lib& a);
+
+    UINT_32 ComputeCmaskBaseAlign(
+        ADDR_CMASK_FLAGS flags, ADDR_TILEINFO*  pTileInfo) const;
+
+    UINT_64 ComputeCmaskBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
+
+    //
+    // CMASK/HTILE shared methods
+    //
+    VOID    ComputeTileDataWidthAndHeight(
+        UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
+
+    UINT_32 ComputeXmaskCoordYFromPipe(
+        UINT_32 pipe, UINT_32 x) const;
+};
+
+} // V1
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1883 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+************************************************************************************************************************
+* @file  addrlib2.cpp
+* @brief Contains the implementation for the AddrLib2 base class.
+************************************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrlib2.h"
+#include "addrcommon.h"
+
+namespace Addr
+{
+namespace V2
+{
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Static Const Member
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}};
+
+const Dim3d Lib::Block1K_3d[]  = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Constructor/Destructor
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+************************************************************************************************************************
+*   Lib::Lib
+*
+*   @brief
+*       Constructor for the Addr::V2::Lib class
+*
+************************************************************************************************************************
+*/
+Lib::Lib()
+    :
+    Addr::Lib()
+{
+}
+
+/**
+************************************************************************************************************************
+*   Lib::Lib
+*
+*   @brief
+*       Constructor for the AddrLib2 class with hClient as parameter
+*
+************************************************************************************************************************
+*/
+Lib::Lib(const Client* pClient)
+    :
+    Addr::Lib(pClient)
+{
+}
+
+/**
+************************************************************************************************************************
+*   Lib::~Lib
+*
+*   @brief
+*       Destructor for the AddrLib2 class
+*
+************************************************************************************************************************
+*/
+Lib::~Lib()
+{
+}
+
+/**
+************************************************************************************************************************
+*   Lib::GetLib
+*
+*   @brief
+*       Get Addr::V2::Lib pointer
+*
+*   @return
+*      An Addr::V2::Lib class pointer
+************************************************************************************************************************
+*/
+Lib* Lib::GetLib(
+    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
+{
+    Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib);
+    if ((pAddrLib != NULL) &&
+        (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI))
+    {
+        // only valid and GFX9+ ASIC can use AddrLib2 function.
+        ADDR_ASSERT_ALWAYS();
+        hLib = NULL;
+    }
+    return static_cast<Lib*>(hLib);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Surface Methods
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo(
+     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    // Adjust coming parameters.
+    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+    localIn.width        = Max(pIn->width, 1u);
+    localIn.height       = Max(pIn->height, 1u);
+    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
+    localIn.numSlices    = Max(pIn->numSlices, 1u);
+    localIn.numSamples   = Max(pIn->numSamples, 1u);
+    localIn.numFrags     = (localIn.numFrags == 0) ? localIn.numSamples : pIn->numFrags;
+
+    UINT_32  expandX  = 1;
+    UINT_32  expandY  = 1;
+    ElemMode elemMode = ADDR_UNCOMPRESSED;
+
+    if (returnCode == ADDR_OK)
+    {
+        // Set format to INVALID will skip this conversion
+        if (localIn.format != ADDR_FMT_INVALID)
+        {
+            // Get compression/expansion factors and element mode which indicates compression/expansion
+            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
+                                                        &elemMode,
+                                                        &expandX,
+                                                        &expandY);
+
+            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
+            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
+            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
+            // restrictions are different.
+            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
+            // but we use this flag to skip RestoreSurfaceInfo below
+
+            if ((elemMode == ADDR_EXPANDED) && (expandX > 1))
+            {
+                ADDR_ASSERT(IsLinear(localIn.swizzleMode));
+            }
+
+            UINT_32 basePitch = 0;
+            GetElemLib()->AdjustSurfaceInfo(elemMode,
+                                            expandX,
+                                            expandY,
+                                            &localIn.bpp,
+                                            &basePitch,
+                                            &localIn.width,
+                                            &localIn.height);
+
+            // Overwrite these parameters if we have a valid format
+        }
+
+        if (localIn.bpp != 0)
+        {
+            localIn.width  = Max(localIn.width, 1u);
+            localIn.height = Max(localIn.height, 1u);
+        }
+        else // Rule out some invalid parameters
+        {
+            ADDR_ASSERT_ALWAYS();
+
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        returnCode = ComputeSurfaceInfoSanityCheck(&localIn);
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        VerifyMipLevelInfo(pIn);
+
+        if (IsLinear(pIn->swizzleMode))
+        {
+            // linear mode
+            returnCode = ComputeSurfaceInfoLinear(&localIn, pOut);
+        }
+        else
+        {
+            // tiled mode
+            returnCode = ComputeSurfaceInfoTiled(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->bpp = localIn.bpp;
+            pOut->pixelPitch = pOut->pitch;
+            pOut->pixelHeight = pOut->height;
+            pOut->pixelMipChainPitch = pOut->mipChainPitch;
+            pOut->pixelMipChainHeight = pOut->mipChainHeight;
+            pOut->pixelBits = localIn.bpp;
+
+            if (localIn.format != ADDR_FMT_INVALID)
+            {
+                UINT_32 pixelBits = pOut->pixelBits;
+
+                GetElemLib()->RestoreSurfaceInfo(elemMode,
+                                                 expandX,
+                                                 expandY,
+                                                 &pOut->pixelBits,
+                                                 &pOut->pixelPitch,
+                                                 &pOut->pixelHeight);
+
+                GetElemLib()->RestoreSurfaceInfo(elemMode,
+                                                 expandX,
+                                                 expandY,
+                                                 &pixelBits,
+                                                 &pOut->pixelMipChainPitch,
+                                                 &pOut->pixelMipChainHeight);
+
+                if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL))
+                {
+                    for (UINT_32 i = 0; i < localIn.numMipLevels; i++)
+                    {
+                        pOut->pMipInfo[i].pixelPitch  = pOut->pMipInfo[i].pitch;
+                        pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height;
+
+                        GetElemLib()->RestoreSurfaceInfo(elemMode,
+                                                         expandX,
+                                                         expandY,
+                                                         &pixelBits,
+                                                         &pOut->pMipInfo[i].pixelPitch,
+                                                         &pOut->pMipInfo[i].pixelHeight);
+                    }
+                }
+            }
+
+            if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0))
+            {
+                pOut->equationIndex = GetEquationIndex(&localIn, pOut);
+            }
+
+            if (localIn.flags.qbStereo)
+            {
+                if (pOut->pStereoInfo != NULL)
+                {
+                    ComputeQbStereoInfo(pOut);
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(pOut->surfSize != 0);
+
+    ValidBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord(
+    const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn;
+    localIn.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
+    localIn.unalignedHeight = Max(pIn->unalignedHeight, 1u);
+    localIn.numMipLevels    = Max(pIn->numMipLevels, 1u);
+    localIn.numSlices       = Max(pIn->numSlices, 1u);
+    localIn.numSamples      = Max(pIn->numSamples, 1u);
+    localIn.numFrags        = Max(pIn->numFrags, 1u);
+
+    if ((localIn.bpp < 8)        ||
+        (localIn.bpp > 128)      ||
+        ((localIn.bpp % 8) != 0) ||
+        (localIn.sample >= localIn.numSamples)  ||
+        (localIn.slice >= localIn.numSlices)    ||
+        (localIn.mipId >= localIn.numMipLevels) ||
+        (IsTex3d(localIn.resourceType) &&
+         (Valid3DMipSliceIdConstraint(localIn.numSlices, localIn.mipId, localIn.slice) == FALSE)))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        if (IsLinear(localIn.swizzleMode))
+        {
+            returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut);
+        }
+        else
+        {
+            returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeSurfaceCoordFromAddr.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr(
+    const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if ((pIn->bpp < 8)        ||
+        (pIn->bpp > 128)      ||
+        ((pIn->bpp % 8) != 0) ||
+        (pIn->bitPosition >= 8))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        if (IsLinear(pIn->swizzleMode))
+        {
+            returnCode = ComputeSurfaceCoordFromAddrLinear(pIn, pOut);
+        }
+        else
+        {
+            returnCode = ComputeSurfaceCoordFromAddrTiled(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               CMASK/HTILE
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeHtileInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeHtilenfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileInfo(
+    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeHtileInfo(pIn, pOut);
+
+        ValidMetaBaseAlignments(pOut->baseAlign);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeHtileAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord(
+    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeHtileAddrFromCoord(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeHtileCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr(
+    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeHtileCoordFromAddr(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeCmaskInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskInfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskInfo(
+    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else if (pIn->cMaskFlags.linear)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeCmaskInfo(pIn, pOut);
+
+        ValidMetaBaseAlignments(pOut->baseAlign);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord(
+    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr(
+    const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
+
+    ADDR_NOT_IMPLEMENTED();
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeFmaskInfo
+*
+*   @brief
+*       Interface function stub of ComputeFmaskInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskInfo(
+    const ADDR2_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    BOOL_32 valid = (IsZOrderSwizzle(pIn->swizzleMode) == TRUE) &&
+                    ((pIn->numSamples > 0) || (pIn->numFrags > 0));
+
+    if (GetFillSizeFieldsFlags())
+    {
+        if ((pIn->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT)))
+        {
+            valid = FALSE;
+        }
+    }
+
+    if (valid == FALSE)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn = {0};
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
+
+        localIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
+        localOut.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
+
+        localIn.swizzleMode  = pIn->swizzleMode;
+        localIn.numSlices    = Max(pIn->numSlices, 1u);
+        localIn.width        = Max(pIn->unalignedWidth, 1u);
+        localIn.height       = Max(pIn->unalignedHeight, 1u);
+        localIn.bpp          = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
+        localIn.flags.fmask  = 1;
+        localIn.numFrags     = 1;
+        localIn.numSamples   = 1;
+        localIn.resourceType = ADDR_RSRC_TEX_2D;
+
+        if (localIn.bpp == 8)
+        {
+            localIn.format = ADDR_FMT_8;
+        }
+        else if (localIn.bpp == 16)
+        {
+            localIn.format = ADDR_FMT_16;
+        }
+        else if (localIn.bpp == 32)
+        {
+            localIn.format = ADDR_FMT_32;
+        }
+        else
+        {
+            localIn.format = ADDR_FMT_32_32;
+        }
+
+        returnCode = ComputeSurfaceInfo(&localIn, &localOut);
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->pitch      = localOut.pitch;
+            pOut->height     = localOut.height;
+            pOut->baseAlign  = localOut.baseAlign;
+            pOut->numSlices  = localOut.numSlices;
+            pOut->fmaskBytes = static_cast<UINT_32>(localOut.surfSize);
+            pOut->sliceSize  = static_cast<UINT_32>(localOut.sliceSize);
+            pOut->bpp        = localIn.bpp;
+            pOut->numSamples = 1;
+        }
+    }
+
+    ValidBaseAlignments(pOut->baseAlign);
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord(
+    const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
+
+    ADDR_NOT_IMPLEMENTED();
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr(
+    const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
+    ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*       pOut     ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
+
+    ADDR_NOT_IMPLEMENTED();
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeDccInfo
+*
+*   @brief
+*       Interface function to compute DCC key info
+*
+*   @return
+*       return code of HwlComputeDccInfo
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeDccInfo(
+    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeDccInfo(pIn, pOut);
+
+        ValidMetaBaseAlignments(pOut->dccRamBaseAlign);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeDccAddrFromCoord
+*
+*   @brief
+*       Interface function stub of ComputeDccAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord(
+    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeDccAddrFromCoord(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputePipeBankXor
+*
+*   @brief
+*       Interface function stub of Addr2ComputePipeBankXor.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputePipeBankXor(
+    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut)
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputePipeBankXor(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSlicePipeBankXor
+*
+*   @brief
+*       Interface function stub of Addr2ComputeSlicePipeBankXor.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor(
+    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut)
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else if ((IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) ||
+             (IsNonPrtXor(pIn->swizzleMode) == FALSE) ||
+             (pIn->numSamples > 1))
+    {
+        returnCode = ADDR_NOTSUPPORTED;
+    }
+    else
+    {
+        returnCode = HwlComputeSlicePipeBankXor(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSubResourceOffsetForSwizzlePattern
+*
+*   @brief
+*       Interface function stub of Addr2ComputeSubResourceOffsetForSwizzlePattern.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern(
+    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut)
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ExtractPipeBankXor
+*
+*   @brief
+*       Internal function to extract bank and pipe xor bits from combined xor bits.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ExtractPipeBankXor(
+    UINT_32  pipeBankXor,
+    UINT_32  bankBits,
+    UINT_32  pipeBits,
+    UINT_32* pBankX,
+    UINT_32* pPipeX)
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if (pipeBankXor < (1u << (pipeBits + bankBits)))
+    {
+        *pPipeX = pipeBankXor % (1 << pipeBits);
+        *pBankX = pipeBankXor >> pipeBits;
+        returnCode = ADDR_OK;
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceInfoSanityCheck
+*
+*   @brief
+*       Internal function to do basic sanity check before compute surface info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT*  pIn   ///< [in] input structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        (pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlComputeSurfaceInfoSanityCheck(pIn);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ApplyCustomizedPitchHeight
+*
+*   @brief
+*       Helper function to override hw required row pitch/slice pitch by customrized one
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+    UINT_32  elementBytes,                          ///< [in] element bytes per element
+    UINT_32  pitchAlignInElement,                   ///< [in] pitch alignment in element
+    UINT_32* pPitch,                                ///< [in/out] pitch
+    UINT_32* pHeight                                ///< [in/out] height
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pIn->numMipLevels <= 1)
+    {
+        if (pIn->pitchInElement > 0)
+        {
+            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
+            {
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+            else if (pIn->pitchInElement < (*pPitch))
+            {
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+            else
+            {
+                *pPitch = pIn->pitchInElement;
+            }
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->sliceAlign > 0)
+            {
+                UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / (*pPitch);
+
+                if (customizedHeight * elementBytes * (*pPitch) != pIn->sliceAlign)
+                {
+                    returnCode = ADDR_INVALIDPARAMS;
+                }
+                else if ((pIn->numSlices > 1) && ((*pHeight) != customizedHeight))
+                {
+                    returnCode = ADDR_INVALIDPARAMS;
+                }
+                else
+                {
+                    *pHeight = customizedHeight;
+                }
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceInfoLinear
+*
+*   @brief
+*       Internal function to calculate alignment for linear swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear(
+     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    return HwlComputeSurfaceInfoLinear(pIn, pOut);
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceInfoTiled
+*
+*   @brief
+*       Internal function to calculate alignment for tiled swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoTiled(
+     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    return HwlComputeSurfaceInfoTiled(pIn, pOut);
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceAddrFromCoordLinear
+*
+*   @brief
+*       Internal function to calculate address from coord for linear swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear(
+     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1) && (pIn->pipeBankXor == 0);
+
+    if (valid)
+    {
+        if (IsTex1d(pIn->resourceType))
+        {
+            valid = (pIn->y == 0);
+        }
+    }
+
+    if (valid)
+    {
+        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
+        ADDR2_MIP_INFO                    mipInfo[MaxMipLevels];
+
+        localIn.bpp          = pIn->bpp;
+        localIn.flags        = pIn->flags;
+        localIn.width        = Max(pIn->unalignedWidth, 1u);
+        localIn.height       = Max(pIn->unalignedHeight, 1u);
+        localIn.numSlices    = Max(pIn->numSlices, 1u);
+        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
+        localIn.resourceType = pIn->resourceType;
+
+        if (localIn.numMipLevels <= 1)
+        {
+            localIn.pitchInElement = pIn->pitchInElement;
+        }
+
+        localOut.pMipInfo = mipInfo;
+
+        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->addr        = (localOut.sliceSize * pIn->slice) +
+                                mipInfo[pIn->mipId].offset +
+                                (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3);
+            pOut->bitPosition = 0;
+        }
+        else
+        {
+            valid = FALSE;
+        }
+    }
+
+    if (valid == FALSE)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceAddrFromCoordTiled
+*
+*   @brief
+*       Internal function to calculate address from coord for tiled swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled(
+     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut);
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddrLinear
+*
+*   @brief
+*       Internal function to calculate coord from address for linear swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear(
+     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1);
+
+    if (valid)
+    {
+        if (IsTex1d(pIn->resourceType))
+        {
+            valid = (pIn->unalignedHeight == 1);
+        }
+    }
+
+    if (valid)
+    {
+        ADDR2_COMPUTE_SURFACE_INFO_INPUT  localIn  = {0};
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
+        localIn.bpp          = pIn->bpp;
+        localIn.flags        = pIn->flags;
+        localIn.width        = Max(pIn->unalignedWidth, 1u);
+        localIn.height       = Max(pIn->unalignedHeight, 1u);
+        localIn.numSlices    = Max(pIn->numSlices, 1u);
+        localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
+        localIn.resourceType = pIn->resourceType;
+        if (localIn.numMipLevels <= 1)
+        {
+            localIn.pitchInElement = pIn->pitchInElement;
+        }
+        returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut);
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->slice = static_cast<UINT_32>(pIn->addr / localOut.sliceSize);
+            pOut->sample = 0;
+
+            UINT_32 offsetInSlice = static_cast<UINT_32>(pIn->addr % localOut.sliceSize);
+            UINT_32 elementBytes = pIn->bpp >> 3;
+            UINT_32 mipOffsetInSlice = 0;
+            UINT_32 mipSize = 0;
+            UINT_32 mipId = 0;
+            for (; mipId < pIn->numMipLevels ; mipId++)
+            {
+                if (IsTex1d(pIn->resourceType))
+                {
+                    mipSize = localOut.pitch * elementBytes;
+                }
+                else
+                {
+                    UINT_32 currentMipHeight = (PowTwoAlign(localIn.height, (1 << mipId))) >> mipId;
+                    mipSize = currentMipHeight * localOut.pitch * elementBytes;
+                }
+
+                if (mipSize == 0)
+                {
+                    valid = FALSE;
+                    break;
+                }
+                else if ((mipSize + mipOffsetInSlice) > offsetInSlice)
+                {
+                    break;
+                }
+                else
+                {
+                    mipOffsetInSlice += mipSize;
+                    if ((mipId == (pIn->numMipLevels - 1)) ||
+                        (mipOffsetInSlice >= localOut.sliceSize))
+                    {
+                        valid = FALSE;
+                    }
+                }
+            }
+
+            if (valid)
+            {
+                pOut->mipId = mipId;
+
+                UINT_32 elemOffsetInMip = (offsetInSlice - mipOffsetInSlice) / elementBytes;
+                if (IsTex1d(pIn->resourceType))
+                {
+                    if (elemOffsetInMip < localOut.pitch)
+                    {
+                        pOut->x = elemOffsetInMip;
+                        pOut->y = 0;
+                    }
+                    else
+                    {
+                        valid = FALSE;
+                    }
+                }
+                else
+                {
+                    pOut->y = elemOffsetInMip / localOut.pitch;
+                    pOut->x = elemOffsetInMip % localOut.pitch;
+                }
+
+                if ((pOut->slice >= pIn->numSlices)    ||
+                    (pOut->mipId >= pIn->numMipLevels) ||
+                    (pOut->x >= Max((pIn->unalignedWidth >> pOut->mipId), 1u))  ||
+                    (pOut->y >= Max((pIn->unalignedHeight >> pOut->mipId), 1u)) ||
+                    (IsTex3d(pIn->resourceType) &&
+                     (FALSE == Valid3DMipSliceIdConstraint(pIn->numSlices,
+                                                           pOut->mipId,
+                                                           pOut->slice))))
+                {
+                    valid = FALSE;
+                }
+            }
+        }
+        else
+        {
+            valid = FALSE;
+        }
+    }
+
+    if (valid == FALSE)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurfaceCoordFromAddrTiled
+*
+*   @brief
+*       Internal function to calculate coord from address for tiled swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled(
+     const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED;
+
+    ADDR_NOT_IMPLEMENTED();
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeBlockDimensionForSurf
+*
+*   @brief
+*       Internal function to get block width/height/depth in element from surface input params.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf(
+    UINT_32*         pWidth,
+    UINT_32*         pHeight,
+    UINT_32*         pDepth,
+    UINT_32          bpp,
+    UINT_32          numSamples,
+    AddrResourceType resourceType,
+    AddrSwizzleMode  swizzleMode) const
+{
+    ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth,
+                                                         pHeight,
+                                                         pDepth,
+                                                         bpp,
+                                                         resourceType,
+                                                         swizzleMode);
+
+    if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode))
+    {
+        const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
+        const UINT_32 log2sample  = Log2(numSamples);
+        const UINT_32 q           = log2sample >> 1;
+        const UINT_32 r           = log2sample & 1;
+
+        if (log2blkSize & 1)
+        {
+            *pWidth  >>= q;
+            *pHeight >>= (q + r);
+        }
+        else
+        {
+            *pWidth  >>= (q + r);
+            *pHeight >>= q;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeBlockDimension
+*
+*   @brief
+*       Internal function to get block width/height/depth in element without considering MSAA case
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeBlockDimension(
+    UINT_32*          pWidth,
+    UINT_32*          pHeight,
+    UINT_32*          pDepth,
+    UINT_32           bpp,
+    AddrResourceType  resourceType,
+    AddrSwizzleMode   swizzleMode) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    UINT_32 eleBytes                 = bpp >> 3;
+    UINT_32 microBlockSizeTableIndex = Log2(eleBytes);
+    UINT_32 log2blkSize              = GetBlockSizeLog2(swizzleMode);
+
+    if (IsThin(resourceType, swizzleMode))
+    {
+        UINT_32 log2blkSizeIn256B = log2blkSize - 8;
+        UINT_32 widthAmp          = log2blkSizeIn256B / 2;
+        UINT_32 heightAmp         = log2blkSizeIn256B - widthAmp;
+
+        ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0]));
+
+        *pWidth  = (Block256_2d[microBlockSizeTableIndex].w << widthAmp);
+        *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp);
+        *pDepth  = 1;
+    }
+    else if (IsThick(resourceType, swizzleMode))
+    {
+        UINT_32 log2blkSizeIn1KB = log2blkSize - 10;
+        UINT_32 averageAmp       = log2blkSizeIn1KB / 3;
+        UINT_32 restAmp          = log2blkSizeIn1KB % 3;
+
+        ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0]));
+
+        *pWidth  = Block1K_3d[microBlockSizeTableIndex].w << averageAmp;
+        *pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2));
+        *pDepth  = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0));
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::GetMipTailDim
+*
+*   @brief
+*       Internal function to get out max dimension of first level in mip tail
+*
+*   @return
+*       Max Width/Height/Depth value of the first mip fitted in mip tail
+************************************************************************************************************************
+*/
+Dim3d Lib::GetMipTailDim(
+    AddrResourceType  resourceType,
+    AddrSwizzleMode   swizzleMode,
+    UINT_32           blockWidth,
+    UINT_32           blockHeight,
+    UINT_32           blockDepth) const
+{
+    Dim3d   out         = {blockWidth, blockHeight, blockDepth};
+    UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode);
+
+    if (IsThick(resourceType, swizzleMode))
+    {
+        UINT_32 dim = log2blkSize % 3;
+
+        if (dim == 0)
+        {
+            out.h >>= 1;
+        }
+        else if (dim == 1)
+        {
+            out.w >>= 1;
+        }
+        else
+        {
+            out.d >>= 1;
+        }
+    }
+    else
+    {
+        if (log2blkSize & 1)
+        {
+            out.h >>= 1;
+        }
+        else
+        {
+            out.w >>= 1;
+        }
+    }
+
+    return out;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurface2DMicroBlockOffset
+*
+*   @brief
+*       Internal function to calculate micro block (256B) offset from coord for 2D resource
+*
+*   @return
+*       micro block (256B) offset for 2D resource
+************************************************************************************************************************
+*/
+UINT_32 Lib::ComputeSurface2DMicroBlockOffset(
+    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
+{
+    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
+
+    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
+    UINT_32 microBlockOffset = 0;
+    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
+    {
+        UINT_32 xBits = pIn->x << log2ElementBytes;
+        microBlockOffset = (xBits & 0xf) | ((pIn->y & 0x3) << 4);
+        if (log2ElementBytes < 3)
+        {
+            microBlockOffset |= (pIn->y & 0x4) << 4;
+            if (log2ElementBytes == 0)
+            {
+                microBlockOffset |= (pIn->y & 0x8) << 4;
+            }
+            else
+            {
+                microBlockOffset |= (xBits & 0x10) << 3;
+            }
+        }
+        else
+        {
+            microBlockOffset |= (xBits & 0x30) << 2;
+        }
+    }
+    else if (IsDisplaySwizzle(pIn->resourceType, pIn->swizzleMode))
+    {
+        if (log2ElementBytes == 4)
+        {
+            microBlockOffset = (GetBit(pIn->x, 0) << 4) |
+                               (GetBit(pIn->y, 0) << 5) |
+                               (GetBit(pIn->x, 1) << 6) |
+                               (GetBit(pIn->y, 1) << 7);
+        }
+        else
+        {
+            microBlockOffset = GetBits(pIn->x, 0, 3, log2ElementBytes)     |
+                               GetBits(pIn->y, 1, 2, 3 + log2ElementBytes) |
+                               GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
+                               GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
+            microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
+                               (GetBit(pIn->y, 0) << 4) |
+                               GetBits(microBlockOffset, 4, 3, 5);
+        }
+    }
+    else if (IsRotateSwizzle(pIn->swizzleMode))
+    {
+        microBlockOffset = GetBits(pIn->y, 0, 3, log2ElementBytes) |
+                           GetBits(pIn->x, 1, 2, 3 + log2ElementBytes) |
+                           GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) |
+                           GetBits(pIn->y, 3, 1, 6 + log2ElementBytes);
+        microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) |
+                           (GetBit(pIn->x, 0) << 4) |
+                           GetBits(microBlockOffset, 4, 3, 5);
+        if (log2ElementBytes == 3)
+        {
+           microBlockOffset = GetBits(microBlockOffset, 0, 6, 0) |
+                              GetBits(pIn->x, 1, 2, 6);
+        }
+    }
+
+    return microBlockOffset;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeSurface3DMicroBlockOffset
+*
+*   @brief
+*       Internal function to calculate micro block (1KB) offset from coord for 3D resource
+*
+*   @return
+*       micro block (1KB) offset for 3D resource
+************************************************************************************************************************
+*/
+UINT_32 Lib::ComputeSurface3DMicroBlockOffset(
+    const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const
+{
+    ADDR_ASSERT(IsThick(pIn->resourceType, pIn->swizzleMode));
+
+    UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3);
+    UINT_32 microBlockOffset = 0;
+    if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode))
+    {
+        if (log2ElementBytes == 0)
+        {
+            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
+        }
+        else if (log2ElementBytes == 1)
+        {
+            microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1);
+        }
+        else if (log2ElementBytes == 2)
+        {
+            microBlockOffset = ((pIn->y & 4) >> 2) | ((pIn->x & 4) >> 1);
+        }
+        else if (log2ElementBytes == 3)
+        {
+            microBlockOffset = (pIn->x & 6) >> 1;
+        }
+        else
+        {
+            microBlockOffset = pIn->x & 3;
+        }
+
+        microBlockOffset <<= 8;
+
+        UINT_32 xBits = pIn->x << log2ElementBytes;
+        microBlockOffset |= (xBits & 0xf) | ((pIn->y & 0x3) << 4) | ((pIn->slice & 0x3) << 6);
+    }
+    else if (IsZOrderSwizzle(pIn->swizzleMode))
+    {
+        UINT_32 xh, yh, zh;
+
+        if (log2ElementBytes == 0)
+        {
+            microBlockOffset =
+                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
+            microBlockOffset = microBlockOffset | ((pIn->slice & 3) << 4) | ((pIn->x & 4) << 4);
+
+            xh = pIn->x >> 3;
+            yh = pIn->y >> 2;
+            zh = pIn->slice >> 2;
+        }
+        else if (log2ElementBytes == 1)
+        {
+            microBlockOffset =
+                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2);
+            microBlockOffset = (microBlockOffset << 1) | ((pIn->slice & 3) << 5);
+
+            xh = pIn->x >> 2;
+            yh = pIn->y >> 2;
+            zh = pIn->slice >> 2;
+        }
+        else if (log2ElementBytes == 2)
+        {
+            microBlockOffset =
+                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->slice & 1) << 3);
+            microBlockOffset = (microBlockOffset << 2) | ((pIn->y & 2) << 5);
+
+            xh = pIn->x >> 2;
+            yh = pIn->y >> 2;
+            zh = pIn->slice >> 1;
+        }
+        else if (log2ElementBytes == 3)
+        {
+            microBlockOffset =
+                (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2) | ((pIn->x & 2) << 2);
+            microBlockOffset <<= 3;
+
+            xh = pIn->x >> 2;
+            yh = pIn->y >> 1;
+            zh = pIn->slice >> 1;
+        }
+        else
+        {
+            microBlockOffset =
+                (((pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2)) << 4);
+
+            xh = pIn->x >> 1;
+            yh = pIn->y >> 1;
+            zh = pIn->slice >> 1;
+        }
+
+        microBlockOffset |= ((MortonGen3d(xh, yh, zh, 1) << 7) & 0x380);
+    }
+
+    return microBlockOffset;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::GetPipeXorBits
+*
+*   @brief
+*       Internal function to get bits number for pipe/se xor operation
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+UINT_32 Lib::GetPipeXorBits(
+    UINT_32 macroBlockBits) const
+{
+    ADDR_ASSERT(macroBlockBits >= m_pipeInterleaveLog2);
+
+    // Total available xor bits
+    UINT_32 xorBits = macroBlockBits - m_pipeInterleaveLog2;
+
+    // Pipe/Se xor bits
+    UINT_32 pipeBits = Min(xorBits, m_pipesLog2 + m_seLog2);
+
+    return pipeBits;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::GetBankXorBits
+*
+*   @brief
+*       Internal function to get bits number for pipe/se xor operation
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+UINT_32 Lib::GetBankXorBits(
+    UINT_32 macroBlockBits) const
+{
+    UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
+
+    // Bank xor bits
+    UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);
+
+    return bankBits;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::Addr2GetPreferredSurfaceSetting
+*
+*   @brief
+*       Internal function to get suggested surface information for cliet to use
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting(
+    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    if ((GetFillSizeFieldsFlags() == TRUE) &&
+        ((pIn->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT)) ||
+         (pOut->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT))))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        returnCode = HwlGetPreferredSurfaceSetting(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeBlock256Equation
+*
+*   @brief
+*       Compute equation for block 256B
+*
+*   @return
+*       If equation computed successfully
+*
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeBlock256Equation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode swMode,
+    UINT_32 elementBytesLog2,
+    ADDR_EQUATION* pEquation) const
+{
+    ADDR_E_RETURNCODE ret;
+
+    if (IsBlock256b(swMode))
+    {
+        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        ret = ADDR_INVALIDPARAMS;
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeThinEquation
+*
+*   @brief
+*       Compute equation for 2D/3D resource which use THIN mode
+*
+*   @return
+*       If equation computed successfully
+*
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeThinEquation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode swMode,
+    UINT_32 elementBytesLog2,
+    ADDR_EQUATION* pEquation) const
+{
+    ADDR_E_RETURNCODE ret;
+
+    if (IsThin(rsrcType, swMode))
+    {
+        ret = HwlComputeThinEquation(rsrcType, swMode, elementBytesLog2, pEquation);
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        ret = ADDR_INVALIDPARAMS;
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeThickEquation
+*
+*   @brief
+*       Compute equation for 3D resource which use THICK mode
+*
+*   @return
+*       If equation computed successfully
+*
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::ComputeThickEquation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode swMode,
+    UINT_32 elementBytesLog2,
+    ADDR_EQUATION* pEquation) const
+{
+    ADDR_E_RETURNCODE ret;
+
+    if (IsThick(rsrcType, swMode))
+    {
+        ret = HwlComputeThickEquation(rsrcType, swMode, elementBytesLog2, pEquation);
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        ret = ADDR_INVALIDPARAMS;
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Lib::ComputeQbStereoInfo
+*
+*   @brief
+*       Get quad buffer stereo information
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Lib::ComputeQbStereoInfo(
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut    ///< [in,out] updated pOut+pStereoInfo
+    ) const
+{
+    ADDR_ASSERT(pOut->bpp >= 8);
+    ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
+
+    // Save original height
+    pOut->pStereoInfo->eyeHeight = pOut->height;
+
+    // Right offset
+    pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
+
+    // Double height
+    pOut->height <<= 1;
+
+    ADDR_ASSERT(pOut->height <= MaxSurfaceHeight);
+
+    pOut->pixelHeight <<= 1;
+
+    // Double size
+    pOut->surfSize <<= 1;
+}
+
+} // V2
+} // Addr
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,836 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+************************************************************************************************************************
+* @file  addrlib2.h
+* @brief Contains the Addr::V2::Lib class definition.
+************************************************************************************************************************
+*/
+
+#ifndef __ADDR2_LIB2_H__
+#define __ADDR2_LIB2_H__
+
+#include "addrlib.h"
+
+namespace Addr
+{
+namespace V2
+{
+
+/**
+************************************************************************************************************************
+* @brief Flags for SwizzleModeTable
+************************************************************************************************************************
+*/
+struct SwizzleModeFlags
+{
+    // Swizzle mode
+    UINT_32 isLinear        : 1;    // Linear
+
+    // Block size
+    UINT_32 is256b          : 1;    // Block size is 256B
+    UINT_32 is4kb           : 1;    // Block size is 4KB
+    UINT_32 is64kb          : 1;    // Block size is 64KB
+    UINT_32 isVar           : 1;    // Block size is variable
+
+    UINT_32 isZ             : 1;    // Z order swizzle mode
+    UINT_32 isStd           : 1;    // Standard swizzle mode
+    UINT_32 isDisp          : 1;    // Display swizzle mode
+    UINT_32 isRot           : 1;    // Rotate swizzle mode
+
+    // XOR mode
+    UINT_32 isXor           : 1;    // XOR after swizzle if set
+
+    UINT_32 isT             : 1;    // T mode
+
+    UINT_32 isRtOpt         : 1;    // mode opt for render target
+};
+
+struct Dim2d
+{
+    UINT_32 w;
+    UINT_32 h;
+};
+
+struct Dim3d
+{
+    UINT_32 w;
+    UINT_32 h;
+    UINT_32 d;
+};
+
+// Macro define resource block type
+enum AddrBlockType
+{
+    AddrBlockMicro     = 0, // Resource uses 256B block
+    AddrBlock4KB       = 1, // Resource uses 4KB block
+    AddrBlock64KB      = 2, // Resource uses 64KB block
+    AddrBlockVar       = 3, // Resource uses var block, only valid for GFX9
+    AddrBlockLinear    = 4, // Resource uses linear swizzle mode
+
+    AddrBlockMaxTiledType = AddrBlock64KB + 1,
+};
+
+enum AddrBlockSet
+{
+    AddrBlockSetMicro     = 1 << AddrBlockMicro,
+    AddrBlockSetMacro4KB  = 1 << AddrBlock4KB,
+    AddrBlockSetMacro64KB = 1 << AddrBlock64KB,
+    AddrBlockSetVar       = 1 << AddrBlockVar,
+    AddrBlockSetLinear    = 1 << AddrBlockLinear,
+
+    AddrBlockSetMacro   = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB,
+};
+
+enum AddrSwSet
+{
+    AddrSwSetZ = 1 << ADDR_SW_Z,
+    AddrSwSetS = 1 << ADDR_SW_S,
+    AddrSwSetD = 1 << ADDR_SW_D,
+    AddrSwSetR = 1 << ADDR_SW_R,
+
+    AddrSwSetAll         = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
+};
+
+/**
+************************************************************************************************************************
+* @brief This class contains asic independent address lib functionalities
+************************************************************************************************************************
+*/
+class Lib : public Addr::Lib
+{
+public:
+    virtual ~Lib();
+
+    static Lib* GetLib(
+        ADDR_HANDLE hLib);
+
+    //
+    // Interface stubs
+    //
+
+    // For data surface
+    ADDR_E_RETURNCODE ComputeSurfaceInfo(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
+        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
+        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
+
+    // For HTile
+    ADDR_E_RETURNCODE ComputeHtileInfo(
+        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
+        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
+        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);
+
+    // For CMask
+    ADDR_E_RETURNCODE ComputeCmaskInfo(
+        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
+        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
+        const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*      pOut) const;
+
+    // For FMask
+    ADDR_E_RETURNCODE ComputeFmaskInfo(
+        const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_FMASK_INFO_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
+        const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
+        const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*      pOut) const;
+
+    // For DCC key
+    ADDR_E_RETURNCODE ComputeDccInfo(
+        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeDccAddrFromCoord(
+        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    // Misc
+    ADDR_E_RETURNCODE ComputePipeBankXor(
+        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE ComputeSlicePipeBankXor(
+        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern(
+        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut);
+
+    ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting(
+        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;
+
+    virtual BOOL_32 IsValidDisplaySwizzleMode(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTIMPLEMENTED;
+    }
+
+protected:
+    Lib();  // Constructor is protected
+    Lib(const Client* pClient);
+
+    static const UINT_32 MaxNumOfBpp = 5;
+    static const UINT_32 MaxNumOfAA  = 4;
+
+    static const Dim2d Block256_2d[MaxNumOfBpp];
+    static const Dim3d Block1K_3d[MaxNumOfBpp];
+
+    static const UINT_32 PrtAlignment = 64 * 1024;
+    static const UINT_32 MaxMacroBits = 20;
+
+    static const UINT_32 MaxMipLevels = 16;
+
+    // Checking block size
+    BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].is256b;
+    }
+
+    BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].is4kb;
+    }
+
+    BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].is64kb;
+    }
+
+    BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isVar;
+    }
+
+    // Checking swizzle mode
+    BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isLinear;
+    }
+
+    BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isRtOpt;
+    }
+
+    BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isZ;
+    }
+
+    BOOL_32 IsStandardSwizzle(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isStd;
+    }
+
+    BOOL_32 IsDisplaySwizzle(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isDisp;
+    }
+
+    BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isRot;
+    }
+
+    BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
+    {
+        return HwlIsStandardSwizzle(resourceType, swizzleMode);
+    }
+
+    BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
+    {
+        return HwlIsDisplaySwizzle(resourceType, swizzleMode);
+    }
+
+    BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isXor;
+    }
+
+    BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isT;
+    }
+
+    BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const
+    {
+        return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE));
+    }
+
+    // Checking resource type
+    static BOOL_32 IsTex1d(AddrResourceType resourceType)
+    {
+        return (resourceType == ADDR_RSRC_TEX_1D);
+    }
+
+    static BOOL_32 IsTex2d(AddrResourceType resourceType)
+    {
+        return (resourceType == ADDR_RSRC_TEX_2D);
+    }
+
+    static BOOL_32 IsTex3d(AddrResourceType resourceType)
+    {
+        return (resourceType == ADDR_RSRC_TEX_3D);
+    }
+
+    BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
+    {
+        return HwlIsThick(resourceType, swizzleMode);
+    }
+
+    BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
+    {
+        return HwlIsThin(resourceType, swizzleMode);
+    }
+
+    UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const
+    {
+        UINT_32 blockSizeLog2 = 0;
+
+        if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode))
+        {
+            blockSizeLog2 = 8;
+        }
+        else if (IsBlock4kb(swizzleMode))
+        {
+            blockSizeLog2 = 12;
+        }
+        else if (IsBlock64kb(swizzleMode))
+        {
+            blockSizeLog2 = 16;
+        }
+        else if (IsBlockVariable(swizzleMode))
+        {
+            blockSizeLog2 = m_blockVarSizeLog2;
+        }
+        else
+        {
+            ADDR_ASSERT_ALWAYS();
+        }
+
+        return blockSizeLog2;
+    }
+
+    UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const
+    {
+        return (1 << GetBlockSizeLog2(swizzleMode));
+    }
+
+    static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag)
+    {
+        sample = (sample == 0) ? 1 : sample;
+        frag   = (frag   == 0) ? sample : frag;
+
+        UINT_32 fmaskBpp = QLog2(frag);
+
+        if (sample > frag)
+        {
+            fmaskBpp++;
+        }
+
+        if (fmaskBpp == 3)
+        {
+            fmaskBpp = 4;
+        }
+
+        fmaskBpp = Max(8u, fmaskBpp * sample);
+
+        return fmaskBpp;
+    }
+
+    virtual BOOL_32 HwlIsStandardSwizzle(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return FALSE;
+    }
+
+    virtual BOOL_32 HwlIsDisplaySwizzle(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return FALSE;
+    }
+
+    virtual BOOL_32 HwlIsThin(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return FALSE;
+    }
+
+    virtual BOOL_32 HwlIsThick(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return FALSE;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
+        const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,
+        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
+        const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,
+        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,
+        ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
+        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut)
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut)
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
+        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut)
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
+        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut)
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual UINT_32 HwlGetEquationIndex(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_INVALID_EQUATION_INDEX;
+    }
+
+    UINT_32 GetEquationIndex(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
+    {
+        return HwlGetEquationIndex(pIn, pOut);
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
+        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
+        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
+        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
+        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTSUPPORTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
+         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTIMPLEMENTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
+         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTIMPLEMENTED;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
+        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const
+    {
+        ADDR_NOT_IMPLEMENTED();
+        return ADDR_NOTIMPLEMENTED;
+    }
+
+    ADDR_E_RETURNCODE ComputeBlock256Equation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    ADDR_E_RETURNCODE ComputeThinEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    ADDR_E_RETURNCODE ComputeThickEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceInfoLinear(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceInfoTiled(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
+        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled(
+        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear(
+        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled(
+        const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) const;
+
+    UINT_32 ComputeSurface2DMicroBlockOffset(
+        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
+
+    UINT_32 ComputeSurface3DMicroBlockOffset(
+        const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
+
+    // Misc
+    ADDR_E_RETURNCODE ComputeBlockDimensionForSurf(
+        UINT_32*         pWidth,
+        UINT_32*         pHeight,
+        UINT_32*         pDepth,
+        UINT_32          bpp,
+        UINT_32          numSamples,
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const;
+
+    ADDR_E_RETURNCODE ComputeBlockDimension(
+        UINT_32*          pWidth,
+        UINT_32*          pHeight,
+        UINT_32*          pDepth,
+        UINT_32           bpp,
+        AddrResourceType  resourceType,
+        AddrSwizzleMode   swizzleMode) const;
+
+    static UINT_64 ComputePadSize(
+        const Dim3d*      pBlkDim,
+        UINT_32           width,
+        UINT_32           height,
+        UINT_32           numSlices,
+        Dim3d*            pPadDim)
+    {
+        pPadDim->w = PowTwoAlign(width ,pBlkDim->w);
+        pPadDim->h = PowTwoAlign(height ,pBlkDim->h);
+        pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d);
+        return static_cast<UINT_64>(pPadDim->w) * pPadDim->h * pPadDim->d;
+    }
+
+    static ADDR_E_RETURNCODE ExtractPipeBankXor(
+        UINT_32  pipeBankXor,
+        UINT_32  bankBits,
+        UINT_32  pipeBits,
+        UINT_32* pBankX,
+        UINT_32* pPipeX);
+
+    static BOOL_32 Valid3DMipSliceIdConstraint(
+        UINT_32 numSlices,
+        UINT_32 mipId,
+        UINT_32 slice)
+    {
+        return (Max((numSlices >> mipId), 1u) > slice);
+    }
+
+    Dim3d GetMipTailDim(
+        AddrResourceType  resourceType,
+        AddrSwizzleMode   swizzleMode,
+        UINT_32           blockWidth,
+        UINT_32           blockHeight,
+        UINT_32           blockDepth) const;
+
+    BOOL_32 IsInMipTail(
+        AddrResourceType  resourceType,
+        AddrSwizzleMode   swizzleMode,
+        Dim3d             mipTailDim,
+        UINT_32           width,
+        UINT_32           height,
+        UINT_32           depth) const
+    {
+        BOOL_32 inTail = ((width <= mipTailDim.w) &&
+                          (height <= mipTailDim.h) &&
+                          (IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));
+
+        return inTail;
+    }
+
+    static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType)
+    {
+        return ((resourceType == ADDR_RSRC_LOC_LOCAL) ||
+                (resourceType == ADDR_RSRC_LOC_INVIS));
+    }
+
+    static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType)
+    {
+        return (resourceType == ADDR_RSRC_LOC_INVIS);
+    }
+
+    static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType)
+    {
+        return ((resourceType == ADDR_RSRC_LOC_USWC) ||
+                (resourceType == ADDR_RSRC_LOC_CACHED));
+    }
+
+    UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
+    {
+        UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0;
+
+        if (IsXor(swizzleMode))
+        {
+            UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2;
+
+            numPipeLog2 = Min(numPipeLog2, maxPipeLog2);
+        }
+
+        return numPipeLog2;
+    }
+
+    UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
+    {
+        return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode));
+    }
+
+    VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+    {
+#if DEBUG
+        if (pIn->numMipLevels > 1)
+        {
+            UINT_32 actualMipLevels = 1;
+            switch (pIn->resourceType)
+            {
+                case ADDR_RSRC_TEX_3D:
+                    // Fall through to share 2D case
+                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
+                case ADDR_RSRC_TEX_2D:
+                    // Fall through to share 1D case
+                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
+                case ADDR_RSRC_TEX_1D:
+                    // Base 1D case
+                    actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+            // Client pass wrong number of MipLevels to addrlib and result will be bad.
+            // Not sure if we should fail this calling instead of putting an assertion here.
+            ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels);
+        }
+#endif
+    }
+
+    ADDR_E_RETURNCODE ApplyCustomerPipeBankXor(
+        AddrSwizzleMode swizzleMode,
+        UINT_32         pipeBankXor,
+        UINT_32         bankBits,
+        UINT_32         pipeBits,
+        UINT_32*        pBlockOffset) const
+    {
+        ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+        if (IsXor(swizzleMode))
+        {
+            // Apply driver set bankPipeXor
+            UINT_32 bankX = 0;
+            UINT_32 pipeX = 0;
+            returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX);
+            *pBlockOffset ^= (pipeX << m_pipeInterleaveLog2);
+            *pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits));
+        }
+
+        return returnCode;
+    }
+
+    UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const;
+    UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const;
+
+    ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        UINT_32                                 elementBytes,
+        UINT_32                                 pitchAlignInElement,
+        UINT_32*                                pPitch,
+        UINT_32*                                pHeight) const;
+
+    VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    UINT_32 m_se;                       ///< Number of shader engine
+    UINT_32 m_rbPerSe;                  ///< Number of render backend per shader engine
+    UINT_32 m_maxCompFrag;              ///< Number of max compressed fragment
+
+    UINT_32 m_banksLog2;                ///< Number of bank Log2
+    UINT_32 m_pipesLog2;                ///< Number of pipe per shader engine Log2
+    UINT_32 m_seLog2;                   ///< Number of shader engine Log2
+    UINT_32 m_rbPerSeLog2;              ///< Number of render backend per shader engine Log2
+    UINT_32 m_maxCompFragLog2;          ///< Number of max compressed fragment Log2
+
+    UINT_32 m_pipeInterleaveLog2;       ///< Log2 of pipe interleave bytes
+
+    UINT_32 m_blockVarSizeLog2;         ///< Log2 of block var size
+
+    SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE];  ///< Swizzle mode table
+
+private:
+    // Disallow the copy constructor
+    Lib(const Lib& a);
+
+    // Disallow the assignment operator
+    Lib& operator=(const Lib& a);
+};
+
+} // V2
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,655 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrlib.cpp
+* @brief Contains the implementation for the Addr::Lib class.
+****************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrlib.h"
+#include "addrcommon.h"
+
+#if defined(__APPLE__)
+
+UINT_32 div64_32(UINT_64 n, UINT_32 base)
+{
+    UINT_64 rem = n;
+    UINT_64 b = base;
+    UINT_64 res, d = 1;
+    UINT_32 high = rem >> 32;
+
+    res = 0;
+    if (high >= base)
+    {
+        high /= base;
+        res = (UINT_64) high << 32;
+        rem -= (UINT_64) (high * base) << 32;
+    }
+
+    while (((INT_64)b > 0) && (b < rem))
+    {
+        b = b + b;
+        d = d + d;
+    }
+
+    do
+    {
+        if (rem >= b)
+        {
+            rem -= b;
+            res += d;
+        }
+        b >>= 1;
+        d >>= 1;
+    } while (d);
+
+    n = res;
+    return rem;
+}
+
+extern "C"
+UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
+{
+    return div64_32(n, base);
+}
+
+#endif // __APPLE__
+
+namespace Addr
+{
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Constructor/Destructor
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::Lib
+*
+*   @brief
+*       Constructor for the AddrLib class
+*
+****************************************************************************************************
+*/
+Lib::Lib() :
+    m_class(BASE_ADDRLIB),
+    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+    m_chipRevision(0),
+    m_version(ADDRLIB_VERSION),
+    m_pipes(0),
+    m_banks(0),
+    m_pipeInterleaveBytes(0),
+    m_rowSize(0),
+    m_minPitchAlignPixels(1),
+    m_maxSamples(8),
+    m_pElemLib(NULL)
+{
+    m_configFlags.value = 0;
+}
+
+/**
+****************************************************************************************************
+*   Lib::Lib
+*
+*   @brief
+*       Constructor for the AddrLib class with hClient as parameter
+*
+****************************************************************************************************
+*/
+Lib::Lib(const Client* pClient) :
+    Object(pClient),
+    m_class(BASE_ADDRLIB),
+    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+    m_chipRevision(0),
+    m_version(ADDRLIB_VERSION),
+    m_pipes(0),
+    m_banks(0),
+    m_pipeInterleaveBytes(0),
+    m_rowSize(0),
+    m_minPitchAlignPixels(1),
+    m_maxSamples(8),
+    m_pElemLib(NULL)
+{
+    m_configFlags.value = 0;
+}
+
+/**
+****************************************************************************************************
+*   Lib::~AddrLib
+*
+*   @brief
+*       Destructor for the AddrLib class
+*
+****************************************************************************************************
+*/
+Lib::~Lib()
+{
+    if (m_pElemLib)
+    {
+        delete m_pElemLib;
+        m_pElemLib = NULL;
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Initialization/Helper
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::Create
+*
+*   @brief
+*       Creates and initializes AddrLib object.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::Create(
+    const ADDR_CREATE_INPUT* pCreateIn,     ///< [in] pointer to ADDR_CREATE_INPUT
+    ADDR_CREATE_OUTPUT*      pCreateOut)    ///< [out] pointer to ADDR_CREATE_OUTPUT
+{
+    Lib* pLib = NULL;
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pCreateIn->createFlags.fillSizeFields == TRUE)
+    {
+        if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
+            (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if ((returnCode == ADDR_OK)                    &&
+        (pCreateIn->callbacks.allocSysMem != NULL) &&
+        (pCreateIn->callbacks.freeSysMem != NULL))
+    {
+        Client client = {
+            pCreateIn->hClient,
+            pCreateIn->callbacks
+        };
+
+        switch (pCreateIn->chipEngine)
+        {
+            case CIASICIDGFXENGINE_SOUTHERNISLAND:
+                switch (pCreateIn->chipFamily)
+                {
+                    case FAMILY_SI:
+                        pLib = SiHwlInit(&client);
+                        break;
+                    case FAMILY_VI:
+                    case FAMILY_CZ:
+                    case FAMILY_CI:
+                    case FAMILY_KV: // CI based fusion
+                        pLib = CiHwlInit(&client);
+                        break;
+                    default:
+                        ADDR_ASSERT_ALWAYS();
+                        break;
+                }
+                break;
+            case CIASICIDGFXENGINE_ARCTICISLAND:
+                switch (pCreateIn->chipFamily)
+                {
+                    case FAMILY_AI:
+                    case FAMILY_RV:
+                        pLib = Gfx9HwlInit(&client);
+                        break;
+                    default:
+                        ADDR_ASSERT_ALWAYS();
+                        break;
+                }
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+    }
+
+    if (pLib != NULL)
+    {
+        BOOL_32 initValid;
+
+        // Pass createFlags to configFlags first since these flags may be overwritten
+        pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
+        pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
+        pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
+        pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
+        pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
+        pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
+        pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
+        pLib->m_configFlags.disableLinearOpt    = FALSE;
+
+        pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
+
+        pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
+
+        // Global parameters initialized and remaining configFlags bits are set as well
+        initValid = pLib->HwlInitGlobalParams(pCreateIn);
+
+        if (initValid)
+        {
+            pLib->m_pElemLib = ElemLib::Create(pLib);
+        }
+        else
+        {
+            pLib->m_pElemLib = NULL; // Don't go on allocating element lib
+            returnCode = ADDR_INVALIDGBREGVALUES;
+        }
+
+        if (pLib->m_pElemLib == NULL)
+        {
+            delete pLib;
+            pLib = NULL;
+            ADDR_ASSERT_ALWAYS();
+        }
+        else
+        {
+            pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
+        }
+    }
+
+    pCreateOut->hLib = pLib;
+
+    if ((pLib != NULL) &&
+        (returnCode == ADDR_OK))
+    {
+        pCreateOut->numEquations =
+            pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable);
+
+        pLib->SetMaxAlignments();
+
+    }
+    else if ((pLib == NULL) &&
+             (returnCode == ADDR_OK))
+    {
+        // Unknown failures, we return the general error code
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::SetChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*      N/A
+****************************************************************************************************
+*/
+VOID Lib::SetChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision);
+
+    ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
+
+    m_chipFamily   = family;
+    m_chipRevision = uChipRevision;
+}
+
+/**
+****************************************************************************************************
+*   Lib::SetMinPitchAlignPixels
+*
+*   @brief
+*       Set m_minPitchAlignPixels with input param
+*
+*   @return
+*      N/A
+****************************************************************************************************
+*/
+VOID Lib::SetMinPitchAlignPixels(
+    UINT_32 minPitchAlignPixels)    ///< [in] minmum pitch alignment in pixels
+{
+    m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels;
+}
+
+/**
+****************************************************************************************************
+*   Lib::SetMaxAlignments
+*
+*   @brief
+*       Set max alignments
+*
+*   @return
+*      N/A
+****************************************************************************************************
+*/
+VOID Lib::SetMaxAlignments()
+{
+    m_maxBaseAlign     = HwlComputeMaxBaseAlignments();
+    m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments();
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetLib
+*
+*   @brief
+*       Get AddrLib pointer
+*
+*   @return
+*      An AddrLib class pointer
+****************************************************************************************************
+*/
+Lib* Lib::GetLib(
+    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
+{
+    return static_cast<Addr::Lib*>(hLib);
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetMaxAlignments
+*
+*   @brief
+*       Gets maximum alignments for data surface (include FMask)
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::GetMaxAlignments(
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        if (m_maxBaseAlign != 0)
+        {
+            pOut->baseAlign = m_maxBaseAlign;
+        }
+        else
+        {
+            returnCode = ADDR_NOTIMPLEMENTED;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetMaxMetaAlignments
+*
+*   @brief
+*       Gets maximum alignments for metadata (CMask, DCC and HTile)
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments(
+    ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        if (m_maxMetaBaseAlign != 0)
+        {
+            pOut->baseAlign = m_maxMetaBaseAlign;
+        }
+        else
+        {
+            returnCode = ADDR_NOTIMPLEMENTED;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::Bits2Number
+*
+*   @brief
+*       Cat a array of binary bit to a number
+*
+*   @return
+*       The number combined with the array of bits
+****************************************************************************************************
+*/
+UINT_32 Lib::Bits2Number(
+    UINT_32 bitNum,     ///< [in] how many bits
+    ...)                ///< [in] varaible bits value starting from MSB
+{
+    UINT_32 number = 0;
+    UINT_32 i;
+    va_list bits_ptr;
+
+    va_start(bits_ptr, bitNum);
+
+    for(i = 0; i < bitNum; i++)
+    {
+        number |= va_arg(bits_ptr, UINT_32);
+        number <<= 1;
+    }
+
+    number >>= 1;
+
+    va_end(bits_ptr);
+
+    return number;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Element lib
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+****************************************************************************************************
+*   Lib::Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel(
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
+            (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel);
+
+        UINT_32 depthBase = 0;
+        UINT_32 stencilBase = 0;
+        UINT_32 depthBits = 0;
+        UINT_32 stencilBits = 0;
+
+        switch (pIn->format)
+        {
+            case ADDR_DEPTH_16:
+                depthBits = 16;
+                break;
+            case ADDR_DEPTH_X8_24:
+            case ADDR_DEPTH_8_24:
+            case ADDR_DEPTH_X8_24_FLOAT:
+            case ADDR_DEPTH_8_24_FLOAT:
+                depthBase = 8;
+                depthBits = 24;
+                stencilBits = 8;
+                break;
+            case ADDR_DEPTH_32_FLOAT:
+                depthBits = 32;
+                break;
+            case ADDR_DEPTH_X24_8_32_FLOAT:
+                depthBase = 8;
+                depthBits = 32;
+                stencilBits = 8;
+                break;
+            default:
+                break;
+        }
+
+        // Overwrite base since R800 has no "tileBase"
+        if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
+        {
+            depthBase = 0;
+            stencilBase = 0;
+        }
+
+        depthBase *= 64;
+        stencilBase *= 64;
+
+        pOut->stencilBase = stencilBase;
+        pOut->depthBase = depthBase;
+        pOut->depthBits = depthBits;
+        pOut->stencilBits = stencilBits;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE Lib::Flt32ToColorPixel(
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
+            (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        GetElemLib()->Flt32ToColorPixel(pIn->format,
+                                        pIn->surfNum,
+                                        pIn->surfSwap,
+                                        pIn->comps,
+                                        pOut->pPixel);
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetExportNorm
+*
+*   @brief
+*       Check one format can be EXPORT_NUM
+*   @return
+*       TRUE if EXPORT_NORM can be used
+****************************************************************************************************
+*/
+BOOL_32 Lib::GetExportNorm(
+    const ELEM_GETEXPORTNORM_INPUT* pIn) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 enabled = FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap);
+    }
+
+    return enabled;
+}
+
+/**
+****************************************************************************************************
+*   Lib::GetBpe
+*
+*   @brief
+*       Get bits-per-element for specified format
+*   @return
+*       bits-per-element of specified format
+****************************************************************************************************
+*/
+UINT_32 Lib::GetBpe(AddrFormat format) const
+{
+    return GetElemLib()->GetBitsPerPixel(format);
+}
+
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,415 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrlib.h
+* @brief Contains the Addr::Lib base class definition.
+****************************************************************************************************
+*/
+
+#ifndef __ADDR_LIB_H__
+#define __ADDR_LIB_H__
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrelemlib.h"
+
+#include "amdgpu_asic_addr.h"
+
+#ifndef CIASICIDGFXENGINE_R600
+#define CIASICIDGFXENGINE_R600 0x00000006
+#endif
+
+#ifndef CIASICIDGFXENGINE_R800
+#define CIASICIDGFXENGINE_R800 0x00000008
+#endif
+
+#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
+#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
+#endif
+
+#ifndef CIASICIDGFXENGINE_ARCTICISLAND
+#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
+#endif
+
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define pipeinterleave
+****************************************************************************************************
+*/
+enum PipeInterleave
+{
+    ADDR_PIPEINTERLEAVE_256B = 256,
+    ADDR_PIPEINTERLEAVE_512B = 512,
+    ADDR_PIPEINTERLEAVE_1KB  = 1024,
+    ADDR_PIPEINTERLEAVE_2KB  = 2048,
+};
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define DRAM row size
+****************************************************************************************************
+*/
+enum RowSize
+{
+    ADDR_ROWSIZE_1KB = 1024,
+    ADDR_ROWSIZE_2KB = 2048,
+    ADDR_ROWSIZE_4KB = 4096,
+    ADDR_ROWSIZE_8KB = 8192,
+};
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define bank interleave
+****************************************************************************************************
+*/
+enum BankInterleave
+{
+    ADDR_BANKINTERLEAVE_1 = 1,
+    ADDR_BANKINTERLEAVE_2 = 2,
+    ADDR_BANKINTERLEAVE_4 = 4,
+    ADDR_BANKINTERLEAVE_8 = 8,
+};
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define shader engine tile size
+****************************************************************************************************
+*/
+enum ShaderEngineTileSize
+{
+    ADDR_SE_TILESIZE_16 = 16,
+    ADDR_SE_TILESIZE_32 = 32,
+};
+
+/**
+****************************************************************************************************
+* @brief Neutral enums that define bank swap size
+****************************************************************************************************
+*/
+enum BankSwapSize
+{
+    ADDR_BANKSWAP_128B = 128,
+    ADDR_BANKSWAP_256B = 256,
+    ADDR_BANKSWAP_512B = 512,
+    ADDR_BANKSWAP_1KB = 1024,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define max compressed fragments config
+****************************************************************************************************
+*/
+enum NumMaxCompressedFragmentsConfig
+{
+    ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS   = 0x00000000,
+    ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS   = 0x00000001,
+    ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS   = 0x00000002,
+    ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS   = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num pipes config
+****************************************************************************************************
+*/
+enum NumPipesConfig
+{
+    ADDR_CONFIG_1_PIPE                       = 0x00000000,
+    ADDR_CONFIG_2_PIPE                       = 0x00000001,
+    ADDR_CONFIG_4_PIPE                       = 0x00000002,
+    ADDR_CONFIG_8_PIPE                       = 0x00000003,
+    ADDR_CONFIG_16_PIPE                      = 0x00000004,
+    ADDR_CONFIG_32_PIPE                      = 0x00000005,
+    ADDR_CONFIG_64_PIPE                      = 0x00000006,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num banks config
+****************************************************************************************************
+*/
+enum NumBanksConfig
+{
+    ADDR_CONFIG_1_BANK                       = 0x00000000,
+    ADDR_CONFIG_2_BANK                       = 0x00000001,
+    ADDR_CONFIG_4_BANK                       = 0x00000002,
+    ADDR_CONFIG_8_BANK                       = 0x00000003,
+    ADDR_CONFIG_16_BANK                      = 0x00000004,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num rb per shader engine config
+****************************************************************************************************
+*/
+enum NumRbPerShaderEngineConfig
+{
+    ADDR_CONFIG_1_RB_PER_SHADER_ENGINE       = 0x00000000,
+    ADDR_CONFIG_2_RB_PER_SHADER_ENGINE       = 0x00000001,
+    ADDR_CONFIG_4_RB_PER_SHADER_ENGINE       = 0x00000002,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define num shader engines config
+****************************************************************************************************
+*/
+enum NumShaderEnginesConfig
+{
+    ADDR_CONFIG_1_SHADER_ENGINE              = 0x00000000,
+    ADDR_CONFIG_2_SHADER_ENGINE              = 0x00000001,
+    ADDR_CONFIG_4_SHADER_ENGINE              = 0x00000002,
+    ADDR_CONFIG_8_SHADER_ENGINE              = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define pipe interleave size config
+****************************************************************************************************
+*/
+enum PipeInterleaveSizeConfig
+{
+    ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
+    ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
+    ADDR_CONFIG_PIPE_INTERLEAVE_1KB          = 0x00000002,
+    ADDR_CONFIG_PIPE_INTERLEAVE_2KB          = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define row size config
+****************************************************************************************************
+*/
+enum RowSizeConfig
+{
+    ADDR_CONFIG_1KB_ROW                      = 0x00000000,
+    ADDR_CONFIG_2KB_ROW                      = 0x00000001,
+    ADDR_CONFIG_4KB_ROW                      = 0x00000002,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define bank interleave size config
+****************************************************************************************************
+*/
+enum BankInterleaveSizeConfig
+{
+    ADDR_CONFIG_BANK_INTERLEAVE_1            = 0x00000000,
+    ADDR_CONFIG_BANK_INTERLEAVE_2            = 0x00000001,
+    ADDR_CONFIG_BANK_INTERLEAVE_4            = 0x00000002,
+    ADDR_CONFIG_BANK_INTERLEAVE_8            = 0x00000003,
+};
+
+/**
+****************************************************************************************************
+* @brief Enums that define engine tile size config
+****************************************************************************************************
+*/
+enum ShaderEngineTileSizeConfig
+{
+    ADDR_CONFIG_SE_TILE_16                   = 0x00000000,
+    ADDR_CONFIG_SE_TILE_32                   = 0x00000001,
+};
+
+/**
+****************************************************************************************************
+* @brief This class contains asic independent address lib functionalities
+****************************************************************************************************
+*/
+class Lib : public Object
+{
+public:
+    virtual ~Lib();
+
+    static ADDR_E_RETURNCODE Create(
+        const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
+
+    /// Pair of Create
+    VOID Destroy()
+    {
+        delete this;
+    }
+
+    static Lib* GetLib(ADDR_HANDLE hLib);
+
+    /// Returns AddrLib version (from compiled binary instead include file)
+    UINT_32 GetVersion()
+    {
+        return m_version;
+    }
+
+    /// Returns asic chip family name defined by AddrLib
+    ChipFamily GetChipFamily()
+    {
+        return m_chipFamily;
+    }
+
+    ADDR_E_RETURNCODE Flt32ToDepthPixel(
+        const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+        ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE Flt32ToColorPixel(
+        const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+        ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
+
+    BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const;
+
+    ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const;
+
+    UINT_32 GetBpe(AddrFormat format) const;
+
+protected:
+    Lib();  // Constructor is protected
+    Lib(const Client* pClient);
+
+    /// Pure virtual function to get max base alignments
+    virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0;
+
+    /// Gets maximum alignements for metadata
+    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const
+    {
+        ADDR_NOT_IMPLEMENTED();
+
+        return 0;
+    }
+
+    VOID ValidBaseAlignments(UINT_32 alignment) const
+    {
+#if DEBUG
+        ADDR_ASSERT(alignment <= m_maxBaseAlign);
+#endif
+    }
+
+    VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const
+    {
+#if DEBUG
+        ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign);
+#endif
+    }
+
+    //
+    // Initialization
+    //
+    /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
+    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0;
+
+    /// Pure Virtual function for Hwl converting chip family
+    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
+
+    /// Get equation table pointer and number of equations
+    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
+    {
+        *ppEquationTable = NULL;
+
+        return 0;
+    }
+
+    //
+    // Misc helper
+    //
+    static UINT_32 Bits2Number(UINT_32 bitNum, ...);
+
+    static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
+    {
+        return (numFrags != 0) ? numFrags : Max(1u, numSamples);
+    }
+
+    /// Returns pointer of ElemLib
+    ElemLib* GetElemLib() const
+    {
+        return m_pElemLib;
+    }
+
+    /// Returns fillSizeFields flag
+    UINT_32 GetFillSizeFieldsFlags() const
+    {
+        return m_configFlags.fillSizeFields;
+    }
+
+private:
+    // Disallow the copy constructor
+    Lib(const Lib& a);
+
+    // Disallow the assignment operator
+    Lib& operator=(const Lib& a);
+
+    VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
+
+    VOID SetMaxAlignments();
+
+protected:
+    LibClass    m_class;        ///< Store class type (HWL type)
+
+    ChipFamily  m_chipFamily;   ///< Chip family translated from the one in atiid.h
+
+    UINT_32     m_chipRevision; ///< Revision id from xxx_id.h
+
+    UINT_32     m_version;      ///< Current version
+
+    //
+    // Global parameters
+    //
+    ConfigFlags m_configFlags;          ///< Global configuration flags. Note this is setup by
+                                        ///  AddrLib instead of Client except forceLinearAligned
+
+    UINT_32     m_pipes;                ///< Number of pipes
+    UINT_32     m_banks;                ///< Number of banks
+                                        ///  For r800 this is MC_ARB_RAMCFG.NOOFBANK
+                                        ///  Keep it here to do default parameter calculation
+
+    UINT_32     m_pipeInterleaveBytes;
+                                        ///< Specifies the size of contiguous address space
+                                        ///  within each tiling pipe when making linear
+                                        ///  accesses. (Formerly Group Size)
+
+    UINT_32     m_rowSize;              ///< DRAM row size, in bytes
+
+    UINT_32     m_minPitchAlignPixels;  ///< Minimum pitch alignment in pixels
+    UINT_32     m_maxSamples;           ///< Max numSamples
+
+    UINT_32     m_maxBaseAlign;         ///< Max base alignment for data surface
+    UINT_32     m_maxMetaBaseAlign;     ///< Max base alignment for metadata
+
+private:
+    ElemLib*    m_pElemLib;             ///< Element Lib pointer
+};
+
+Lib* SiHwlInit   (const Client* pClient);
+Lib* CiHwlInit   (const Client* pClient);
+Lib* Gfx9HwlInit (const Client* pClient);
+
+} // Addr
+
+#endif
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrobject.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrobject.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/addrobject.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrobject.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,233 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrobject.cpp
+* @brief Contains the Object base class implementation.
+****************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrobject.h"
+
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+*   Object::Object
+*
+*   @brief
+*       Constructor for the Object class.
+****************************************************************************************************
+*/
+Object::Object()
+{
+    m_client.handle = NULL;
+    m_client.callbacks.allocSysMem = NULL;
+    m_client.callbacks.freeSysMem = NULL;
+    m_client.callbacks.debugPrint = NULL;
+}
+
+/**
+****************************************************************************************************
+*   Object::Object
+*
+*   @brief
+*       Constructor for the Object class.
+****************************************************************************************************
+*/
+Object::Object(const Client* pClient)
+{
+    m_client = *pClient;
+}
+
+/**
+****************************************************************************************************
+*   Object::~Object
+*
+*   @brief
+*       Destructor for the Object class.
+****************************************************************************************************
+*/
+Object::~Object()
+{
+}
+
+/**
+****************************************************************************************************
+*   Object::ClientAlloc
+*
+*   @brief
+*       Calls instanced allocSysMem inside Client
+****************************************************************************************************
+*/
+VOID* Object::ClientAlloc(
+    size_t         objSize,    ///< [in] Size to allocate
+    const Client*  pClient)    ///< [in] Client pointer
+{
+    VOID* pObjMem = NULL;
+
+    if (pClient->callbacks.allocSysMem != NULL)
+    {
+        ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
+
+        allocInput.size        = sizeof(ADDR_ALLOCSYSMEM_INPUT);
+        allocInput.flags.value = 0;
+        allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
+        allocInput.hClient     = pClient->handle;
+
+        pObjMem = pClient->callbacks.allocSysMem(&allocInput);
+    }
+
+    return pObjMem;
+}
+
+/**
+****************************************************************************************************
+*   Object::Alloc
+*
+*   @brief
+*       A wrapper of ClientAlloc
+****************************************************************************************************
+*/
+VOID* Object::Alloc(
+    size_t objSize      ///< [in] Size to allocate
+    ) const
+{
+    return ClientAlloc(objSize, &m_client);
+}
+
+/**
+****************************************************************************************************
+*   Object::ClientFree
+*
+*   @brief
+*       Calls freeSysMem inside Client
+****************************************************************************************************
+*/
+VOID Object::ClientFree(
+    VOID*          pObjMem,    ///< [in] User virtual address to free.
+    const Client*  pClient)    ///< [in] Client pointer
+{
+    if (pClient->callbacks.freeSysMem != NULL)
+    {
+        if (pObjMem != NULL)
+        {
+            ADDR_FREESYSMEM_INPUT freeInput = {0};
+
+            freeInput.size      = sizeof(ADDR_FREESYSMEM_INPUT);
+            freeInput.hClient   = pClient->handle;
+            freeInput.pVirtAddr = pObjMem;
+
+            pClient->callbacks.freeSysMem(&freeInput);
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   Object::Free
+*
+*   @brief
+*       A wrapper of ClientFree
+****************************************************************************************************
+*/
+VOID Object::Free(
+    VOID* pObjMem       ///< [in] User virtual address to free.
+    ) const
+{
+    ClientFree(pObjMem, &m_client);
+}
+
+/**
+****************************************************************************************************
+*   Object::operator new
+*
+*   @brief
+*       Placement new operator. (with pre-allocated memory pointer)
+*
+*   @return
+*       Returns pre-allocated memory pointer.
+****************************************************************************************************
+*/
+VOID* Object::operator new(
+    size_t objSize,     ///< [in] Size to allocate
+    VOID*  pMem)        ///< [in] Pre-allocated pointer
+{
+    return pMem;
+}
+
+/**
+****************************************************************************************************
+*   Object::operator delete
+*
+*   @brief
+*       Frees Object object memory.
+****************************************************************************************************
+*/
+VOID Object::operator delete(
+    VOID* pObjMem)      ///< [in] User virtual address to free.
+{
+    Object* pObj = static_cast<Object*>(pObjMem);
+    ClientFree(pObjMem, &pObj->m_client);
+}
+
+/**
+****************************************************************************************************
+*   Object::DebugPrint
+*
+*   @brief
+*       Print debug message
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID Object::DebugPrint(
+    const CHAR* pDebugString,     ///< [in] Debug string
+    ...
+    ) const
+{
+#if DEBUG
+    if (m_client.callbacks.debugPrint != NULL)
+    {
+        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
+
+        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
+        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
+        debugPrintInput.hClient      = m_client.handle;
+        va_start(debugPrintInput.ap, pDebugString);
+
+        m_client.callbacks.debugPrint(&debugPrintInput);
+
+        va_end(debugPrintInput.ap);
+    }
+#endif
+}
+
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrobject.h mesa-19.0.1/src/amd/addrlib/src/core/addrobject.h
--- mesa-18.3.3/src/amd/addrlib/src/core/addrobject.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/addrobject.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  addrobject.h
+* @brief Contains the Object base class definition.
+****************************************************************************************************
+*/
+
+#ifndef __ADDR_OBJECT_H__
+#define __ADDR_OBJECT_H__
+
+#include "addrtypes.h"
+#include "addrcommon.h"
+
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+* @brief This structure contains client specific data
+****************************************************************************************************
+*/
+struct Client
+{
+    ADDR_CLIENT_HANDLE  handle;
+    ADDR_CALLBACKS      callbacks;
+};
+/**
+****************************************************************************************************
+* @brief This class is the base class for all ADDR class objects.
+****************************************************************************************************
+*/
+class Object
+{
+public:
+    Object();
+    Object(const Client* pClient);
+    virtual ~Object();
+
+    VOID* operator new(size_t size, VOID* pMem);
+    VOID  operator delete(VOID* pObj);
+    /// Microsoft compiler requires a matching delete implementation, which seems to be called when
+    /// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is
+    /// added to eliminate the warning.
+    VOID  operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); }
+
+    VOID* Alloc(size_t size) const;
+    VOID  Free(VOID* pObj) const;
+
+    VOID DebugPrint(const CHAR* pDebugString, ...) const;
+
+    const Client* GetClient() const {return &m_client;}
+
+protected:
+    Client m_client;
+
+    static VOID* ClientAlloc(size_t size, const Client* pClient);
+    static VOID  ClientFree(VOID* pObj, const Client* pClient);
+
+private:
+    // disallow the copy constructor
+    Object(const Object& a);
+
+    // disallow the assignment operator
+    Object& operator=(const Object& a);
+};
+
+} // Addr
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/coord.cpp mesa-19.0.1/src/amd/addrlib/src/core/coord.cpp
--- mesa-18.3.3/src/amd/addrlib/src/core/coord.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/coord.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,714 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+// Coordinate class implementation
+#include "addrcommon.h"
+#include "coord.h"
+
+namespace Addr
+{
+namespace V2
+{
+
+Coordinate::Coordinate()
+{
+    dim = 'x';
+    ord = 0;
+}
+
+Coordinate::Coordinate(INT_8 c, INT_32 n)
+{
+    set(c, n);
+}
+
+VOID Coordinate::set(INT_8 c, INT_32 n)
+{
+    dim = c;
+    ord = static_cast<INT_8>(n);
+}
+
+UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
+{
+    UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));
+    UINT_32 out = 0;
+
+    switch (dim)
+    {
+    case 'm': out = m & bit; break;
+    case 's': out = s & bit; break;
+    case 'x': out = x & bit; break;
+    case 'y': out = y & bit; break;
+    case 'z': out = z & bit; break;
+    }
+    return (out != 0) ? 1 : 0;
+}
+
+INT_8 Coordinate::getdim()
+{
+    return dim;
+}
+
+INT_8 Coordinate::getord()
+{
+    return ord;
+}
+
+BOOL_32 Coordinate::operator==(const Coordinate& b)
+{
+    return (dim == b.dim) && (ord == b.ord);
+}
+
+BOOL_32 Coordinate::operator<(const Coordinate& b)
+{
+    BOOL_32 ret;
+
+    if (dim == b.dim)
+    {
+        ret = ord < b.ord;
+    }
+    else
+    {
+        if (dim == 's' || b.dim == 'm')
+        {
+            ret = TRUE;
+        }
+        else if (b.dim == 's' || dim == 'm')
+        {
+            ret = FALSE;
+        }
+        else if (ord == b.ord)
+        {
+            ret = dim < b.dim;
+        }
+        else
+        {
+            ret = ord < b.ord;
+        }
+    }
+
+    return ret;
+}
+
+BOOL_32 Coordinate::operator>(const Coordinate& b)
+{
+    BOOL_32 lt = *this < b;
+    BOOL_32 eq = *this == b;
+    return !lt && !eq;
+}
+
+BOOL_32 Coordinate::operator<=(const Coordinate& b)
+{
+    return (*this < b) || (*this == b);
+}
+
+BOOL_32 Coordinate::operator>=(const Coordinate& b)
+{
+    return !(*this < b);
+}
+
+BOOL_32 Coordinate::operator!=(const Coordinate& b)
+{
+    return !(*this == b);
+}
+
+Coordinate& Coordinate::operator++(INT_32)
+{
+    ord++;
+    return *this;
+}
+
+// CoordTerm
+
+CoordTerm::CoordTerm()
+{
+    num_coords = 0;
+}
+
+VOID CoordTerm::Clear()
+{
+    num_coords = 0;
+}
+
+VOID CoordTerm::add(Coordinate& co)
+{
+    // This function adds a coordinate INT_32o the list
+    // It will prevent the same coordinate from appearing,
+    // and will keep the list ordered from smallest to largest
+    UINT_32 i;
+
+    for (i = 0; i < num_coords; i++)
+    {
+        if (m_coord[i] == co)
+        {
+            break;
+        }
+        if (m_coord[i] > co)
+        {
+            for (UINT_32 j = num_coords; j > i; j--)
+            {
+                m_coord[j] = m_coord[j - 1];
+            }
+            m_coord[i] = co;
+            num_coords++;
+            break;
+        }
+    }
+
+    if (i == num_coords)
+    {
+        m_coord[num_coords] = co;
+        num_coords++;
+    }
+}
+
+VOID CoordTerm::add(CoordTerm& cl)
+{
+    for (UINT_32 i = 0; i < cl.num_coords; i++)
+    {
+        add(cl.m_coord[i]);
+    }
+}
+
+BOOL_32 CoordTerm::remove(Coordinate& co)
+{
+    BOOL_32 remove = FALSE;
+    for (UINT_32 i = 0; i < num_coords; i++)
+    {
+        if (m_coord[i] == co)
+        {
+            remove = TRUE;
+            num_coords--;
+        }
+
+        if (remove)
+        {
+            m_coord[i] = m_coord[i + 1];
+        }
+    }
+    return remove;
+}
+
+BOOL_32 CoordTerm::Exists(Coordinate& co)
+{
+    BOOL_32 exists = FALSE;
+    for (UINT_32 i = 0; i < num_coords; i++)
+    {
+        if (m_coord[i] == co)
+        {
+            exists = TRUE;
+            break;
+        }
+    }
+    return exists;
+}
+
+VOID CoordTerm::copyto(CoordTerm& cl)
+{
+    cl.num_coords = num_coords;
+    for (UINT_32 i = 0; i < num_coords; i++)
+    {
+        cl.m_coord[i] = m_coord[i];
+    }
+}
+
+UINT_32 CoordTerm::getsize()
+{
+    return num_coords;
+}
+
+UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
+{
+    UINT_32 out = 0;
+    for (UINT_32 i = 0; i < num_coords; i++)
+    {
+        out = out ^ m_coord[i].ison(x, y, z, s, m);
+    }
+    return out;
+}
+
+VOID CoordTerm::getsmallest(Coordinate& co)
+{
+    co = m_coord[0];
+}
+
+UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
+{
+    for (UINT_32 i = start;  i < num_coords;)
+    {
+        if (((f == '<' && m_coord[i] < co) ||
+             (f == '>' && m_coord[i] > co) ||
+             (f == '=' && m_coord[i] == co)) &&
+            (axis == '\0' || axis == m_coord[i].getdim()))
+        {
+            for (UINT_32 j = i; j < num_coords - 1; j++)
+            {
+                m_coord[j] = m_coord[j + 1];
+            }
+            num_coords--;
+        }
+        else
+        {
+            i++;
+        }
+    }
+    return num_coords;
+}
+
+Coordinate& CoordTerm::operator[](UINT_32 i)
+{
+    return m_coord[i];
+}
+
+BOOL_32 CoordTerm::operator==(const CoordTerm& b)
+{
+    BOOL_32 ret = TRUE;
+
+    if (num_coords != b.num_coords)
+    {
+        ret = FALSE;
+    }
+    else
+    {
+        for (UINT_32 i = 0; i < num_coords; i++)
+        {
+            // Note: the lists will always be in order, so we can compare the two lists at time
+            if (m_coord[i] != b.m_coord[i])
+            {
+                ret = FALSE;
+                break;
+            }
+        }
+    }
+    return ret;
+}
+
+BOOL_32 CoordTerm::operator!=(const CoordTerm& b)
+{
+    return !(*this == b);
+}
+
+BOOL_32 CoordTerm::exceedRange(UINT_32 xRange, UINT_32 yRange, UINT_32 zRange, UINT_32 sRange)
+{
+    BOOL_32 exceed = FALSE;
+    for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++)
+    {
+        UINT_32 subject;
+        switch (m_coord[i].getdim())
+        {
+            case 'x':
+                subject = xRange;
+                break;
+            case 'y':
+                subject = yRange;
+                break;
+            case 'z':
+                subject = zRange;
+                break;
+            case 's':
+                subject = sRange;
+                break;
+            case 'm':
+                subject = 0;
+                break;
+            default:
+                // Invalid input!
+                ADDR_ASSERT_ALWAYS();
+                subject = 0;
+                break;
+        }
+
+        exceed = ((1u << m_coord[i].getord()) <= subject);
+    }
+
+    return exceed;
+}
+
+// coordeq
+CoordEq::CoordEq()
+{
+    m_numBits = 0;
+}
+
+VOID CoordEq::remove(Coordinate& co)
+{
+    for (UINT_32 i = 0; i < m_numBits; i++)
+    {
+        m_eq[i].remove(co);
+    }
+}
+
+BOOL_32 CoordEq::Exists(Coordinate& co)
+{
+    BOOL_32 exists = FALSE;
+
+    for (UINT_32 i = 0; i < m_numBits; i++)
+    {
+        if (m_eq[i].Exists(co))
+        {
+            exists = TRUE;
+        }
+    }
+    return exists;
+}
+
+VOID CoordEq::resize(UINT_32 n)
+{
+    if (n > m_numBits)
+    {
+        for (UINT_32 i = m_numBits; i < n; i++)
+        {
+            m_eq[i].Clear();
+        }
+    }
+    m_numBits = n;
+}
+
+UINT_32 CoordEq::getsize()
+{
+    return m_numBits;
+}
+
+UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
+{
+    UINT_64 out = 0;
+    for (UINT_32 i = 0; i < m_numBits; i++)
+    {
+        if (m_eq[i].getxor(x, y, z, s, m) != 0)
+        {
+            out |= (1ULL << i);
+        }
+    }
+    return out;
+}
+
+VOID CoordEq::solveAddr(
+    UINT_64 addr, UINT_32 sliceInM,
+    UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const
+{
+    UINT_32 xBitsValid = 0;
+    UINT_32 yBitsValid = 0;
+    UINT_32 zBitsValid = 0;
+    UINT_32 sBitsValid = 0;
+    UINT_32 mBitsValid = 0;
+
+    CoordEq temp = *this;
+
+    x = y = z = s = m = 0;
+
+    UINT_32 bitsLeft = 0;
+
+    for (UINT_32 i = 0; i < temp.m_numBits; i++)
+    {
+        UINT_32 termSize = temp.m_eq[i].getsize();
+
+        if (termSize == 1)
+        {
+            INT_8 bit = (addr >> i) & 1;
+            INT_8 dim = temp.m_eq[i][0].getdim();
+            INT_8 ord = temp.m_eq[i][0].getord();
+
+            ADDR_ASSERT((ord < 32) || (bit == 0));
+
+            switch (dim)
+            {
+                case 'x':
+                    xBitsValid |= (1 << ord);
+                    x |= (bit << ord);
+                    break;
+                case 'y':
+                    yBitsValid |= (1 << ord);
+                    y |= (bit << ord);
+                    break;
+                case 'z':
+                    zBitsValid |= (1 << ord);
+                    z |= (bit << ord);
+                    break;
+                case 's':
+                    sBitsValid |= (1 << ord);
+                    s |= (bit << ord);
+                    break;
+                case 'm':
+                    mBitsValid |= (1 << ord);
+                    m |= (bit << ord);
+                    break;
+                default:
+                    break;
+            }
+
+            temp.m_eq[i].Clear();
+        }
+        else if (termSize > 1)
+        {
+            bitsLeft++;
+        }
+    }
+
+    if (bitsLeft > 0)
+    {
+        if (sliceInM != 0)
+        {
+            z = m / sliceInM;
+            zBitsValid = 0xffffffff;
+        }
+
+        do
+        {
+            bitsLeft = 0;
+
+            for (UINT_32 i = 0; i < temp.m_numBits; i++)
+            {
+                UINT_32 termSize = temp.m_eq[i].getsize();
+
+                if (termSize == 1)
+                {
+                    INT_8 bit = (addr >> i) & 1;
+                    INT_8 dim = temp.m_eq[i][0].getdim();
+                    INT_8 ord = temp.m_eq[i][0].getord();
+
+                    ADDR_ASSERT((ord < 32) || (bit == 0));
+
+                    switch (dim)
+                    {
+                        case 'x':
+                            xBitsValid |= (1 << ord);
+                            x |= (bit << ord);
+                            break;
+                        case 'y':
+                            yBitsValid |= (1 << ord);
+                            y |= (bit << ord);
+                            break;
+                        case 'z':
+                            zBitsValid |= (1 << ord);
+                            z |= (bit << ord);
+                            break;
+                        case 's':
+                            ADDR_ASSERT_ALWAYS();
+                            break;
+                        case 'm':
+                            ADDR_ASSERT_ALWAYS();
+                            break;
+                        default:
+                            break;
+                    }
+
+                    temp.m_eq[i].Clear();
+                }
+                else if (termSize > 1)
+                {
+                    CoordTerm tmpTerm = temp.m_eq[i];
+
+                    for (UINT_32 j = 0; j < termSize; j++)
+                    {
+                        INT_8 dim = temp.m_eq[i][j].getdim();
+                        INT_8 ord = temp.m_eq[i][j].getord();
+
+                        switch (dim)
+                        {
+                            case 'x':
+                                if (xBitsValid & (1 << ord))
+                                {
+                                    UINT_32 v = (((x >> ord) & 1) << i);
+                                    addr ^= static_cast<UINT_64>(v);
+                                    tmpTerm.remove(temp.m_eq[i][j]);
+                                }
+                                break;
+                            case 'y':
+                                if (yBitsValid & (1 << ord))
+                                {
+                                    UINT_32 v = (((y >> ord) & 1) << i);
+                                    addr ^= static_cast<UINT_64>(v);
+                                    tmpTerm.remove(temp.m_eq[i][j]);
+                                }
+                                break;
+                            case 'z':
+                                if (zBitsValid & (1 << ord))
+                                {
+                                    UINT_32 v = (((z >> ord) & 1) << i);
+                                    addr ^= static_cast<UINT_64>(v);
+                                    tmpTerm.remove(temp.m_eq[i][j]);
+                                }
+                                break;
+                            case 's':
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                            case 'm':
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                            default:
+                                break;
+                        }
+                    }
+
+                    temp.m_eq[i] = tmpTerm;
+
+                    bitsLeft++;
+                }
+            }
+        } while (bitsLeft > 0);
+    }
+}
+
+VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num)
+{
+    o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num;
+    for (UINT_32 i = 0; i < o.m_numBits; i++)
+    {
+        m_eq[start + i].copyto(o.m_eq[i]);
+    }
+}
+
+VOID CoordEq::reverse(UINT_32 start, UINT_32 num)
+{
+    UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num;
+
+    for (UINT_32 i = 0; i < n / 2; i++)
+    {
+        CoordTerm temp;
+        m_eq[start + i].copyto(temp);
+        m_eq[start + n - 1 - i].copyto(m_eq[start + i]);
+        temp.copyto(m_eq[start + n - 1 - i]);
+    }
+}
+
+VOID CoordEq::xorin(CoordEq& x, UINT_32 start)
+{
+    UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits;
+    for (UINT_32 i = 0; i < n; i++)
+    {
+        m_eq[start + i].add(x.m_eq[i]);
+    }
+}
+
+UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
+{
+    for (UINT_32 i = start; i < m_numBits;)
+    {
+        UINT_32 m = m_eq[i].Filter(f, co, 0, axis);
+        if (m == 0)
+        {
+            for (UINT_32 j = i; j < m_numBits - 1; j++)
+            {
+                m_eq[j] = m_eq[j + 1];
+            }
+            m_numBits--;
+        }
+        else
+        {
+            i++;
+        }
+    }
+    return m_numBits;
+}
+
+VOID CoordEq::shift(INT_32 amount, INT_32 start)
+{
+    if (amount != 0)
+    {
+        INT_32 numBits = static_cast<INT_32>(m_numBits);
+        amount = -amount;
+        INT_32 inc = (amount < 0) ? -1 : 1;
+        INT_32 i = (amount < 0) ? numBits - 1 : start;
+        INT_32 end = (amount < 0) ? start - 1 : numBits;
+        for (; (inc > 0) ? i < end : i > end; i += inc)
+        {
+            if ((i + amount < start) || (i + amount >= numBits))
+            {
+                m_eq[i].Clear();
+            }
+            else
+            {
+                m_eq[i + amount].copyto(m_eq[i]);
+            }
+        }
+    }
+}
+
+CoordTerm& CoordEq::operator[](UINT_32 i)
+{
+    return m_eq[i];
+}
+
+VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end)
+{
+    if (end == 0)
+    {
+        ADDR_ASSERT(m_numBits > 0);
+        end = m_numBits - 1;
+    }
+    for (UINT_32 i = start; i <= end; i++)
+    {
+        UINT_32 select = (i - start) % 2;
+        Coordinate& c = (select == 0) ? c0 : c1;
+        m_eq[i].add(c);
+        c++;
+    }
+}
+
+VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end)
+{
+    if (end == 0)
+    {
+        ADDR_ASSERT(m_numBits > 0);
+        end = m_numBits - 1;
+    }
+    for (UINT_32 i = start; i <= end; i++)
+    {
+        UINT_32 select = (i - start) % 3;
+        Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2);
+        m_eq[i].add(c);
+        c++;
+    }
+}
+
+BOOL_32 CoordEq::operator==(const CoordEq& b)
+{
+    BOOL_32 ret = TRUE;
+
+    if (m_numBits != b.m_numBits)
+    {
+        ret = FALSE;
+    }
+    else
+    {
+        for (UINT_32 i = 0; i < m_numBits; i++)
+        {
+            if (m_eq[i] != b.m_eq[i])
+            {
+                ret = FALSE;
+                break;
+            }
+        }
+    }
+    return ret;
+}
+
+BOOL_32 CoordEq::operator!=(const CoordEq& b)
+{
+    return !(*this == b);
+}
+
+} // V2
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/coord.h mesa-19.0.1/src/amd/addrlib/src/core/coord.h
--- mesa-18.3.3/src/amd/addrlib/src/core/coord.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/core/coord.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+// Class used to define a coordinate bit
+
+#ifndef __COORD_H
+#define __COORD_H
+
+namespace Addr
+{
+namespace V2
+{
+
+class Coordinate
+{
+public:
+    Coordinate();
+    Coordinate(INT_8 c, INT_32 n);
+
+    VOID set(INT_8 c, INT_32 n);
+    UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
+    INT_8   getdim();
+    INT_8   getord();
+
+    BOOL_32 operator==(const Coordinate& b);
+    BOOL_32 operator<(const Coordinate& b);
+    BOOL_32 operator>(const Coordinate& b);
+    BOOL_32 operator<=(const Coordinate& b);
+    BOOL_32 operator>=(const Coordinate& b);
+    BOOL_32 operator!=(const Coordinate& b);
+    Coordinate& operator++(INT_32);
+
+private:
+    INT_8 dim;
+    INT_8 ord;
+};
+
+class CoordTerm
+{
+public:
+    CoordTerm();
+    VOID Clear();
+    VOID add(Coordinate& co);
+    VOID add(CoordTerm& cl);
+    BOOL_32 remove(Coordinate& co);
+    BOOL_32 Exists(Coordinate& co);
+    VOID copyto(CoordTerm& cl);
+    UINT_32 getsize();
+    UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
+
+    VOID getsmallest(Coordinate& co);
+    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
+    Coordinate& operator[](UINT_32 i);
+    BOOL_32 operator==(const CoordTerm& b);
+    BOOL_32 operator!=(const CoordTerm& b);
+    BOOL_32 exceedRange(UINT_32 xRange, UINT_32 yRange = 0, UINT_32 zRange = 0, UINT_32 sRange = 0);
+
+private:
+    static const UINT_32 MaxCoords = 8;
+    UINT_32 num_coords;
+    Coordinate m_coord[MaxCoords];
+};
+
+class CoordEq
+{
+public:
+    CoordEq();
+    VOID remove(Coordinate& co);
+    BOOL_32 Exists(Coordinate& co);
+    VOID resize(UINT_32 n);
+    UINT_32 getsize();
+    virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
+    virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
+                           UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const;
+
+    VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
+    VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
+    VOID xorin(CoordEq& x, UINT_32 start = 0);
+    UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
+    VOID shift(INT_32 amount, INT_32 start = 0);
+    virtual CoordTerm& operator[](UINT_32 i);
+    VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0);
+    VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0);
+
+    BOOL_32 operator==(const CoordEq& b);
+    BOOL_32 operator!=(const CoordEq& b);
+
+private:
+    static const UINT_32 MaxEqBits = 64;
+    UINT_32 m_numBits;
+
+    CoordTerm m_eq[MaxEqBits];
+};
+
+} // V2
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,4871 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+************************************************************************************************************************
+* @file  gfx9addrlib.cpp
+* @brief Contgfx9ns the implementation for the Gfx9Lib class.
+************************************************************************************************************************
+*/
+
+#include "gfx9addrlib.h"
+
+#include "gfx9_gb_reg.h"
+
+#include "amdgpu_asic_addr.h"
+
+#include "util/macros.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Addr
+{
+
+/**
+************************************************************************************************************************
+*   Gfx9HwlInit
+*
+*   @brief
+*       Creates an Gfx9Lib object.
+*
+*   @return
+*       Returns an Gfx9Lib object pointer.
+************************************************************************************************************************
+*/
+Addr::Lib* Gfx9HwlInit(const Client* pClient)
+{
+    return V2::Gfx9Lib::CreateObj(pClient);
+}
+
+namespace V2
+{
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Static Const Member
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
+{//Linear 256B  4KB  64KB   Var    Z    Std   Disp  Rot   XOR    T     RtOpt
+    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR
+    {0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_256B_S
+    {0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_256B_D
+    {0,    1,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_256B_R
+
+    {0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_Z
+    {0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_4KB_S
+    {0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_4KB_D
+    {0,    0,    1,    0,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_4KB_R
+
+    {0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_Z
+    {0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_64KB_S
+    {0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_64KB_D
+    {0,    0,    0,    1,    0,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_64KB_R
+
+    {0,    0,    0,    0,    1,    1,    0,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_Z
+    {0,    0,    0,    0,    1,    0,    1,    0,    0,    0,    0,    0}, // ADDR_SW_VAR_S
+    {0,    0,    0,    0,    1,    0,    0,    1,    0,    0,    0,    0}, // ADDR_SW_VAR_D
+    {0,    0,    0,    0,    1,    0,    0,    0,    1,    0,    0,    0}, // ADDR_SW_VAR_R
+
+    {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_Z_T
+    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    1,    0}, // ADDR_SW_64KB_S_T
+    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    1,    0}, // ADDR_SW_64KB_D_T
+    {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    1,    0}, // ADDR_SW_64KB_R_T
+
+    {0,    0,    1,    0,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_Z_x
+    {0,    0,    1,    0,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_4KB_S_x
+    {0,    0,    1,    0,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_4KB_D_x
+    {0,    0,    1,    0,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_4KB_R_x
+
+    {0,    0,    0,    1,    0,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_Z_X
+    {0,    0,    0,    1,    0,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_64KB_S_X
+    {0,    0,    0,    1,    0,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_64KB_D_X
+    {0,    0,    0,    1,    0,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_64KB_R_X
+
+    {0,    0,    0,    0,    1,    1,    0,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_Z_X
+    {0,    0,    0,    0,    1,    0,    1,    0,    0,    1,    0,    0}, // ADDR_SW_VAR_S_X
+    {0,    0,    0,    0,    1,    0,    0,    1,    0,    1,    0,    0}, // ADDR_SW_VAR_D_X
+    {0,    0,    0,    0,    1,    0,    0,    0,    1,    1,    0,    0}, // ADDR_SW_VAR_R_X
+    {1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0}, // ADDR_SW_LINEAR_GENERAL
+};
+
+const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16,
+                                              8, 6, 5, 4, 3, 2, 1, 0};
+
+const Dim3d   Gfx9Lib::Block256_3dS[]  = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}};
+
+const Dim3d   Gfx9Lib::Block256_3dZ[]  = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::Gfx9Lib
+*
+*   @brief
+*       Constructor
+*
+************************************************************************************************************************
+*/
+Gfx9Lib::Gfx9Lib(const Client* pClient)
+    :
+    Lib(pClient),
+    m_numEquations(0)
+{
+    m_class = AI_ADDRLIB;
+    memset(&m_settings, 0, sizeof(m_settings));
+    memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::~Gfx9Lib
+*
+*   @brief
+*       Destructor
+************************************************************************************************************************
+*/
+Gfx9Lib::~Gfx9Lib()
+{
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeHtileInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeHtilenfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo(
+    const ADDR2_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned,
+                                                       pIn->swizzleMode);
+
+    UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1;
+
+    UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2;
+
+    if ((numPipeTotal == 1) && (numRbTotal == 1))
+    {
+        numCompressBlkPerMetaBlkLog2 = 10;
+    }
+    else
+    {
+        if (m_settings.applyAliasFix)
+        {
+            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+        }
+        else
+        {
+            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+        }
+    }
+
+    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
+
+    Dim3d   metaBlkDim   = {8, 8, 1};
+    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
+    UINT_32 widthAmp     = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits);
+    UINT_32 heightAmp    = totalAmpBits - widthAmp;
+    metaBlkDim.w <<= widthAmp;
+    metaBlkDim.h <<= heightAmp;
+
+#if DEBUG
+    Dim3d metaBlkDimDbg = {8, 8, 1};
+    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
+    {
+        if ((metaBlkDimDbg.h < metaBlkDimDbg.w) ||
+            ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w)))
+        {
+            metaBlkDimDbg.h <<= 1;
+        }
+        else
+        {
+            metaBlkDimDbg.w <<= 1;
+        }
+    }
+    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
+#endif
+
+    UINT_32 numMetaBlkX;
+    UINT_32 numMetaBlkY;
+    UINT_32 numMetaBlkZ;
+
+    GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo,
+                   pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices,
+                   &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
+
+    const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2;
+    UINT_32       align       = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+
+    if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2))
+    {
+        align *= (numPipeTotal >> 1);
+    }
+
+    align = Max(align, metaBlkSize);
+
+    if (m_settings.metaBaseAlignFix)
+    {
+        align = Max(align, GetBlockSize(pIn->swizzleMode));
+    }
+
+    if (m_settings.htileAlignFix)
+    {
+        const INT_32 metaBlkSizeLog2        = numCompressBlkPerMetaBlkLog2 + 2;
+        const INT_32 htileCachelineSizeLog2 = 11;
+        const INT_32 maxNumOfRbMaskBits     = 1 + Log2(numPipeTotal) + Log2(numRbTotal);
+
+        INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits));
+
+        align <<= rbMaskPadding;
+    }
+
+    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
+    pOut->height     = numMetaBlkY * metaBlkDim.h;
+    pOut->sliceSize  = numMetaBlkX * numMetaBlkY * metaBlkSize;
+
+    pOut->metaBlkWidth       = metaBlkDim.w;
+    pOut->metaBlkHeight      = metaBlkDim.h;
+    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+
+    pOut->baseAlign  = align;
+    pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align);
+
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeCmaskInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskInfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo(
+    const ADDR2_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+// TODO: Clarify with AddrLib team
+//     ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
+
+    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned,
+                                                       pIn->swizzleMode);
+
+    UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1;
+
+    UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk;
+
+    if ((numPipeTotal == 1) && (numRbTotal == 1))
+    {
+        numCompressBlkPerMetaBlkLog2 = 13;
+    }
+    else
+    {
+        if (m_settings.applyAliasFix)
+        {
+            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2);
+        }
+        else
+        {
+            numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10;
+        }
+
+        numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u);
+    }
+
+    numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2;
+
+    Dim2d metaBlkDim = {8, 8};
+    UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2;
+    UINT_32 heightAmp = totalAmpBits >> 1;
+    UINT_32 widthAmp = totalAmpBits - heightAmp;
+    metaBlkDim.w <<= widthAmp;
+    metaBlkDim.h <<= heightAmp;
+
+#if DEBUG
+    Dim2d metaBlkDimDbg = {8, 8};
+    for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++)
+    {
+        if (metaBlkDimDbg.h < metaBlkDimDbg.w)
+        {
+            metaBlkDimDbg.h <<= 1;
+        }
+        else
+        {
+            metaBlkDimDbg.w <<= 1;
+        }
+    }
+    ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h));
+#endif
+
+    UINT_32 numMetaBlkX = (pIn->unalignedWidth  + metaBlkDim.w - 1) / metaBlkDim.w;
+    UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h;
+    UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u);
+
+    UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+
+    if (m_settings.metaBaseAlignFix)
+    {
+        sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+    }
+
+    pOut->pitch      = numMetaBlkX * metaBlkDim.w;
+    pOut->height     = numMetaBlkY * metaBlkDim.h;
+    pOut->sliceSize  = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1;
+    pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign);
+    pOut->baseAlign  = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign);
+
+    pOut->metaBlkWidth = metaBlkDim.w;
+    pOut->metaBlkHeight = metaBlkDim.h;
+
+    pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetMetaMipInfo
+*
+*   @brief
+*       Get meta mip info
+*
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GetMetaMipInfo(
+    UINT_32 numMipLevels,           ///< [in]  number of mip levels
+    Dim3d* pMetaBlkDim,             ///< [in]  meta block dimension
+    BOOL_32 dataThick,              ///< [in]  data surface is thick
+    ADDR2_META_MIP_INFO* pInfo,     ///< [out] meta mip info
+    UINT_32 mip0Width,              ///< [in]  mip0 width
+    UINT_32 mip0Height,             ///< [in]  mip0 height
+    UINT_32 mip0Depth,              ///< [in]  mip0 depth
+    UINT_32* pNumMetaBlkX,          ///< [out] number of metablock X in mipchain
+    UINT_32* pNumMetaBlkY,          ///< [out] number of metablock Y in mipchain
+    UINT_32* pNumMetaBlkZ)          ///< [out] number of metablock Z in mipchain
+    const
+{
+    UINT_32 numMetaBlkX = (mip0Width  + pMetaBlkDim->w - 1) / pMetaBlkDim->w;
+    UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h;
+    UINT_32 numMetaBlkZ = (mip0Depth  + pMetaBlkDim->d - 1) / pMetaBlkDim->d;
+    UINT_32 tailWidth   = pMetaBlkDim->w;
+    UINT_32 tailHeight  = pMetaBlkDim->h >> 1;
+    UINT_32 tailDepth   = pMetaBlkDim->d;
+    BOOL_32 inTail      = FALSE;
+    AddrMajorMode major = ADDR_MAJOR_MAX_TYPE;
+
+    if (numMipLevels > 1)
+    {
+        if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY))
+        {
+            // Z major
+            major = ADDR_MAJOR_Z;
+        }
+        else if (numMetaBlkX >= numMetaBlkY)
+        {
+            // X major
+            major = ADDR_MAJOR_X;
+        }
+        else
+        {
+            // Y major
+            major = ADDR_MAJOR_Y;
+        }
+
+        inTail = ((mip0Width <= tailWidth) &&
+                  (mip0Height <= tailHeight) &&
+                  ((dataThick == FALSE) || (mip0Depth <= tailDepth)));
+
+        if (inTail == FALSE)
+        {
+            UINT_32 orderLimit;
+            UINT_32 *pMipDim;
+            UINT_32 *pOrderDim;
+
+            if (major == ADDR_MAJOR_Z)
+            {
+                // Z major
+                pMipDim = &numMetaBlkY;
+                pOrderDim = &numMetaBlkZ;
+                orderLimit = 4;
+            }
+            else if (major == ADDR_MAJOR_X)
+            {
+                // X major
+                pMipDim = &numMetaBlkY;
+                pOrderDim = &numMetaBlkX;
+                orderLimit = 4;
+            }
+            else
+            {
+                // Y major
+                pMipDim = &numMetaBlkX;
+                pOrderDim = &numMetaBlkY;
+                orderLimit = 2;
+            }
+
+            if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3))
+            {
+                *pMipDim += 2;
+            }
+            else
+            {
+                *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1));
+            }
+        }
+    }
+
+    if (pInfo != NULL)
+    {
+        UINT_32 mipWidth  = mip0Width;
+        UINT_32 mipHeight = mip0Height;
+        UINT_32 mipDepth  = mip0Depth;
+        Dim3d   mipCoord  = {0};
+
+        for (UINT_32 mip = 0; mip < numMipLevels; mip++)
+        {
+            if (inTail)
+            {
+                GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip,
+                                   pMetaBlkDim);
+                break;
+            }
+            else
+            {
+                mipWidth  = PowTwoAlign(mipWidth, pMetaBlkDim->w);
+                mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h);
+                mipDepth  = PowTwoAlign(mipDepth, pMetaBlkDim->d);
+
+                pInfo[mip].inMiptail = FALSE;
+                pInfo[mip].startX = mipCoord.w;
+                pInfo[mip].startY = mipCoord.h;
+                pInfo[mip].startZ = mipCoord.d;
+                pInfo[mip].width  = mipWidth;
+                pInfo[mip].height = mipHeight;
+                pInfo[mip].depth  = dataThick ? mipDepth : 1;
+
+                if ((mip >= 3) || (mip & 1))
+                {
+                    switch (major)
+                    {
+                        case ADDR_MAJOR_X:
+                            mipCoord.w += mipWidth;
+                            break;
+                        case ADDR_MAJOR_Y:
+                            mipCoord.h += mipHeight;
+                            break;
+                        case ADDR_MAJOR_Z:
+                            mipCoord.d += mipDepth;
+                            break;
+                        default:
+                            break;
+                    }
+                }
+                else
+                {
+                    switch (major)
+                    {
+                        case ADDR_MAJOR_X:
+                            mipCoord.h += mipHeight;
+                            break;
+                        case ADDR_MAJOR_Y:
+                            mipCoord.w += mipWidth;
+                            break;
+                        case ADDR_MAJOR_Z:
+                            mipCoord.h += mipHeight;
+                            break;
+                        default:
+                            break;
+                    }
+                }
+
+                mipWidth  = Max(mipWidth >> 1, 1u);
+                mipHeight = Max(mipHeight >> 1, 1u);
+                mipDepth = Max(mipDepth >> 1, 1u);
+
+                inTail = ((mipWidth <= tailWidth) &&
+                          (mipHeight <= tailHeight) &&
+                          ((dataThick == FALSE) || (mipDepth <= tailDepth)));
+            }
+        }
+    }
+
+    *pNumMetaBlkX = numMetaBlkX;
+    *pNumMetaBlkY = numMetaBlkY;
+    *pNumMetaBlkZ = numMetaBlkZ;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeDccInfo
+*
+*   @brief
+*       Interface function to compute DCC key info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo(
+    const ADDR2_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    BOOL_32 dataLinear = IsLinear(pIn->swizzleMode);
+    BOOL_32 metaLinear = pIn->dccKeyFlags.linear;
+    BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned;
+
+    if (dataLinear)
+    {
+        metaLinear = TRUE;
+    }
+    else if (metaLinear == TRUE)
+    {
+        pipeAligned = FALSE;
+    }
+
+    UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode);
+
+    if (metaLinear)
+    {
+        // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9.
+        ADDR_ASSERT_ALWAYS();
+
+        pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes;
+        pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign);
+    }
+    else
+    {
+        BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode);
+
+        UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096;
+
+        UINT_32 numFrags = Max(pIn->numFrags, 1u);
+        UINT_32 numSlices = Max(pIn->numSlices, 1u);
+
+        minMetaBlkSize /= numFrags;
+
+        UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize;
+
+        UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1;
+
+        if ((numPipeTotal > 1) || (numRbTotal > 1))
+        {
+            const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10);
+
+            numCompressBlkPerMetaBlk =
+                Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize));
+
+            if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp)
+            {
+                numCompressBlkPerMetaBlk = 65536 * pIn->bpp;
+            }
+        }
+
+        Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp);
+        Dim3d metaBlkDim = compressBlkDim;
+
+        for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1)
+        {
+            if ((metaBlkDim.h < metaBlkDim.w) ||
+                ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w)))
+            {
+                if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d))
+                {
+                    metaBlkDim.h <<= 1;
+                }
+                else
+                {
+                    metaBlkDim.d <<= 1;
+                }
+            }
+            else
+            {
+                if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d))
+                {
+                    metaBlkDim.w <<= 1;
+                }
+                else
+                {
+                    metaBlkDim.d <<= 1;
+                }
+            }
+        }
+
+        UINT_32 numMetaBlkX;
+        UINT_32 numMetaBlkY;
+        UINT_32 numMetaBlkZ;
+
+        GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo,
+                       pIn->unalignedWidth, pIn->unalignedHeight, numSlices,
+                       &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ);
+
+        UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes;
+
+        if (numFrags > m_maxCompFrag)
+        {
+            sizeAlign *= (numFrags / m_maxCompFrag);
+        }
+
+        if (m_settings.metaBaseAlignFix)
+        {
+            sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode));
+        }
+
+        pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ *
+                           numCompressBlkPerMetaBlk * numFrags;
+        pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign);
+        pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign);
+
+        pOut->pitch = numMetaBlkX * metaBlkDim.w;
+        pOut->height = numMetaBlkY * metaBlkDim.h;
+        pOut->depth = numMetaBlkZ * metaBlkDim.d;
+
+        pOut->compressBlkWidth = compressBlkDim.w;
+        pOut->compressBlkHeight = compressBlkDim.h;
+        pOut->compressBlkDepth = compressBlkDim.d;
+
+        pOut->metaBlkWidth = metaBlkDim.w;
+        pOut->metaBlkHeight = metaBlkDim.h;
+        pOut->metaBlkDepth = metaBlkDim.d;
+
+        pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY;
+        pOut->fastClearSizePerSlice =
+            pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag);
+    }
+
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeMaxBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments
+*   @return
+*       maximum alignments
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const
+{
+    return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB);
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeMaxMetaBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments for metadata
+*   @return
+*       maximum alignments for metadata
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const
+{
+    // Max base alignment for Htile
+    const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z);
+    const UINT_32 maxNumRbTotal   = m_se * m_rbPerSe;
+
+    // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2),
+    // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic.
+    ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u));
+    const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u);
+
+    UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes;
+
+    if (maxNumPipeTotal > 2)
+    {
+        maxBaseAlignHtile *= (maxNumPipeTotal >> 1);
+    }
+
+    maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile);
+
+    if (m_settings.metaBaseAlignFix)
+    {
+        maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB));
+    }
+
+    if (m_settings.htileAlignFix)
+    {
+        maxBaseAlignHtile *= maxNumPipeTotal;
+    }
+
+    // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate
+
+    // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate
+    UINT_32 maxBaseAlignDcc3D = 65536;
+
+    if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1))
+    {
+        maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u);
+    }
+
+    // Max base alignment for Msaa Dcc
+    UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag);
+
+    if (m_settings.metaBaseAlignFix)
+    {
+        maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB));
+    }
+
+    return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D));
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord(
+    const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0};
+    input.size            = sizeof(input);
+    input.cMaskFlags      = pIn->cMaskFlags;
+    input.colorFlags      = pIn->colorFlags;
+    input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
+    input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
+    input.numSlices       = Max(pIn->numSlices, 1u);
+    input.swizzleMode     = pIn->swizzleMode;
+    input.resourceType    = pIn->resourceType;
+
+    ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0};
+    output.size = sizeof(output);
+
+    ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
+
+    if (returnCode == ADDR_OK)
+    {
+        UINT_32 fmaskBpp              = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
+        UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3);
+        UINT_32 metaBlkWidthLog2      = Log2(output.metaBlkWidth);
+        UINT_32 metaBlkHeightLog2     = Log2(output.metaBlkHeight);
+
+        MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags,
+                                     Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType,
+                                     metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+        const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+
+        UINT_32 xb = pIn->x / output.metaBlkWidth;
+        UINT_32 yb = pIn->y / output.metaBlkHeight;
+        UINT_32 zb = pIn->slice;
+
+        UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
+        UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
+        UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+
+        UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+
+        pOut->addr = address >> 1;
+        pOut->bitPosition = static_cast<UINT_32>((address & 1) << 2);
+
+        UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned,
+                                                           pIn->swizzleMode);
+
+        UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+
+        pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeHtileAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord(
+    const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pIn->numMipLevels > 1)
+    {
+        returnCode = ADDR_NOTIMPLEMENTED;
+    }
+    else
+    {
+        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
+        input.size            = sizeof(input);
+        input.hTileFlags      = pIn->hTileFlags;
+        input.depthFlags      = pIn->depthflags;
+        input.swizzleMode     = pIn->swizzleMode;
+        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
+        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
+        input.numSlices       = Max(pIn->numSlices, 1u);
+        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
+
+        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
+        output.size = sizeof(output);
+
+        returnCode = ComputeHtileInfo(&input, &output);
+
+        if (returnCode == ADDR_OK)
+        {
+            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
+            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
+            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
+            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
+
+            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+
+            UINT_32 xb = pIn->x / output.metaBlkWidth;
+            UINT_32 yb = pIn->y / output.metaBlkHeight;
+            UINT_32 zb = pIn->slice;
+
+            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
+            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
+            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+
+            UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex);
+
+            pOut->addr = address >> 1;
+
+            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
+                                                               pIn->swizzleMode);
+
+            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+
+            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeHtileCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr(
+    const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut)   ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pIn->numMipLevels > 1)
+    {
+        returnCode = ADDR_NOTIMPLEMENTED;
+    }
+    else
+    {
+        ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0};
+        input.size            = sizeof(input);
+        input.hTileFlags      = pIn->hTileFlags;
+        input.swizzleMode     = pIn->swizzleMode;
+        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
+        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
+        input.numSlices       = Max(pIn->numSlices, 1u);
+        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
+
+        ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0};
+        output.size = sizeof(output);
+
+        returnCode = ComputeHtileInfo(&input, &output);
+
+        if (returnCode == ADDR_OK)
+        {
+            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
+            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
+            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
+            UINT_32 numSamplesLog2    = Log2(pIn->numSamples);
+
+            MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags,
+                                         Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D,
+                                         metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0};
+
+            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+
+            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned,
+                                                               pIn->swizzleMode);
+
+            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+
+            UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1;
+
+            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
+            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
+
+            UINT_32 x, y, z, s, m;
+            pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m);
+
+            pOut->slice = m / sliceSizeInBlock;
+            pOut->y     = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y;
+            pOut->x     = (m % pitchInBlock) * output.metaBlkWidth + x;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeDccAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeDccAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord(
+    const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT*  pIn,
+    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear)
+    {
+        returnCode = ADDR_NOTIMPLEMENTED;
+    }
+    else
+    {
+        ADDR2_COMPUTE_DCCINFO_INPUT input = {0};
+        input.size            = sizeof(input);
+        input.dccKeyFlags     = pIn->dccKeyFlags;
+        input.colorFlags      = pIn->colorFlags;
+        input.swizzleMode     = pIn->swizzleMode;
+        input.resourceType    = pIn->resourceType;
+        input.bpp             = pIn->bpp;
+        input.unalignedWidth  = Max(pIn->unalignedWidth, 1u);
+        input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
+        input.numSlices       = Max(pIn->numSlices, 1u);
+        input.numFrags        = Max(pIn->numFrags, 1u);
+        input.numMipLevels    = Max(pIn->numMipLevels, 1u);
+
+        ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0};
+        output.size = sizeof(output);
+
+        returnCode = ComputeDccInfo(&input, &output);
+
+        if (returnCode == ADDR_OK)
+        {
+            UINT_32 elementBytesLog2  = Log2(pIn->bpp >> 3);
+            UINT_32 numSamplesLog2    = Log2(pIn->numFrags);
+            UINT_32 metaBlkWidthLog2  = Log2(output.metaBlkWidth);
+            UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight);
+            UINT_32 metaBlkDepthLog2  = Log2(output.metaBlkDepth);
+            UINT_32 compBlkWidthLog2  = Log2(output.compressBlkWidth);
+            UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight);
+            UINT_32 compBlkDepthLog2  = Log2(output.compressBlkDepth);
+
+            MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags,
+                                         Gfx9DataColor, pIn->swizzleMode, pIn->resourceType,
+                                         metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2,
+                                         compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2};
+
+            const CoordEq* pMetaEq = GetMetaEquation(metaEqParams);
+
+            UINT_32 xb = pIn->x / output.metaBlkWidth;
+            UINT_32 yb = pIn->y / output.metaBlkHeight;
+            UINT_32 zb = pIn->slice / output.metaBlkDepth;
+
+            UINT_32 pitchInBlock     = output.pitch / output.metaBlkWidth;
+            UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock;
+            UINT_32 blockIndex       = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+
+            UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex);
+
+            pOut->addr = address >> 1;
+
+            UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned,
+                                                               pIn->swizzleMode);
+
+            UINT_64 pipeXor = static_cast<UINT_64>(pIn->pipeXor & ((1 << numPipeBits) - 1));
+
+            pOut->addr ^= (pipeXor << m_pipeInterleaveLog2);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlInitGlobalParams
+*
+*   @brief
+*       Initializes global parameters
+*
+*   @return
+*       TRUE if all settings are valid
+*
+************************************************************************************************************************
+*/
+BOOL_32 Gfx9Lib::HwlInitGlobalParams(
+    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+    BOOL_32 valid = TRUE;
+
+    if (m_settings.isArcticIsland)
+    {
+        GB_ADDR_CONFIG gbAddrConfig;
+
+        gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
+
+        // These values are copied from CModel code
+        switch (gbAddrConfig.bits.NUM_PIPES)
+        {
+            case ADDR_CONFIG_1_PIPE:
+                m_pipes = 1;
+                m_pipesLog2 = 0;
+                break;
+            case ADDR_CONFIG_2_PIPE:
+                m_pipes = 2;
+                m_pipesLog2 = 1;
+                break;
+            case ADDR_CONFIG_4_PIPE:
+                m_pipes = 4;
+                m_pipesLog2 = 2;
+                break;
+            case ADDR_CONFIG_8_PIPE:
+                m_pipes = 8;
+                m_pipesLog2 = 3;
+                break;
+            case ADDR_CONFIG_16_PIPE:
+                m_pipes = 16;
+                m_pipesLog2 = 4;
+                break;
+            case ADDR_CONFIG_32_PIPE:
+                m_pipes = 32;
+                m_pipesLog2 = 5;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
+        {
+            case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
+                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
+                m_pipeInterleaveLog2 = 8;
+                break;
+            case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
+                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
+                m_pipeInterleaveLog2 = 9;
+                break;
+            case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
+                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
+                m_pipeInterleaveLog2 = 10;
+                break;
+            case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
+                m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
+                m_pipeInterleaveLog2 = 11;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits,
+        // and any larger value requires a post-process (left shift) on the output pipeBankXor bits.
+        ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
+
+        switch (gbAddrConfig.bits.NUM_BANKS)
+        {
+            case ADDR_CONFIG_1_BANK:
+                m_banks = 1;
+                m_banksLog2 = 0;
+                break;
+            case ADDR_CONFIG_2_BANK:
+                m_banks = 2;
+                m_banksLog2 = 1;
+                break;
+            case ADDR_CONFIG_4_BANK:
+                m_banks = 4;
+                m_banksLog2 = 2;
+                break;
+            case ADDR_CONFIG_8_BANK:
+                m_banks = 8;
+                m_banksLog2 = 3;
+                break;
+            case ADDR_CONFIG_16_BANK:
+                m_banks = 16;
+                m_banksLog2 = 4;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        switch (gbAddrConfig.bits.NUM_SHADER_ENGINES)
+        {
+            case ADDR_CONFIG_1_SHADER_ENGINE:
+                m_se = 1;
+                m_seLog2 = 0;
+                break;
+            case ADDR_CONFIG_2_SHADER_ENGINE:
+                m_se = 2;
+                m_seLog2 = 1;
+                break;
+            case ADDR_CONFIG_4_SHADER_ENGINE:
+                m_se = 4;
+                m_seLog2 = 2;
+                break;
+            case ADDR_CONFIG_8_SHADER_ENGINE:
+                m_se = 8;
+                m_seLog2 = 3;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        switch (gbAddrConfig.bits.NUM_RB_PER_SE)
+        {
+            case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE:
+                m_rbPerSe = 1;
+                m_rbPerSeLog2 = 0;
+                break;
+            case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE:
+                m_rbPerSe = 2;
+                m_rbPerSeLog2 = 1;
+                break;
+            case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE:
+                m_rbPerSe = 4;
+                m_rbPerSeLog2 = 2;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
+        {
+            case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
+                m_maxCompFrag = 1;
+                m_maxCompFragLog2 = 0;
+                break;
+            case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
+                m_maxCompFrag = 2;
+                m_maxCompFragLog2 = 1;
+                break;
+            case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
+                m_maxCompFrag = 4;
+                m_maxCompFragLog2 = 2;
+                break;
+            case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
+                m_maxCompFrag = 8;
+                m_maxCompFragLog2 = 3;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2;
+        ADDR_ASSERT((m_blockVarSizeLog2 == 0) ||
+                    ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u)));
+        m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u);
+
+        if ((m_rbPerSeLog2 == 1) &&
+            (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) ||
+             ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2)))))
+        {
+            ADDR_ASSERT(m_settings.isVega10 == FALSE);
+            ADDR_ASSERT(m_settings.isRaven == FALSE);
+            ADDR_ASSERT(m_settings.isVega20 == FALSE);
+
+            if (m_settings.isVega12)
+            {
+                m_settings.htileCacheRbConflict = 1;
+            }
+        }
+    }
+    else
+    {
+        valid = FALSE;
+        ADDR_NOT_IMPLEMENTED();
+    }
+
+    if (valid)
+    {
+        InitEquationTable();
+    }
+
+    return valid;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlConvertChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*       ChipFamily
+************************************************************************************************************************
+*/
+ChipFamily Gfx9Lib::HwlConvertChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    ChipFamily family = ADDR_CHIP_FAMILY_AI;
+
+    switch (uChipFamily)
+    {
+        case FAMILY_AI:
+            m_settings.isArcticIsland = 1;
+            m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision);
+            m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision);
+            m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision);
+            m_settings.isDce12 = 1;
+
+            if (m_settings.isVega10 == 0)
+            {
+                m_settings.htileAlignFix = 1;
+                m_settings.applyAliasFix = 1;
+            }
+
+            m_settings.metaBaseAlignFix = 1;
+
+            m_settings.depthPipeXorDisable = 1;
+            break;
+        case FAMILY_RV:
+            m_settings.isArcticIsland = 1;
+
+            if (ASICREV_IS_RAVEN(uChipRevision))
+            {
+                m_settings.isRaven = 1;
+
+                m_settings.depthPipeXorDisable = 1;
+            }
+
+            if (ASICREV_IS_RAVEN2(uChipRevision))
+            {
+                m_settings.isRaven = 1;
+            }
+
+            if (m_settings.isRaven == 0)
+            {
+                m_settings.htileAlignFix = 1;
+                m_settings.applyAliasFix = 1;
+            }
+
+            m_settings.isDcn1 = m_settings.isRaven;
+
+            m_settings.metaBaseAlignFix = 1;
+            break;
+
+        default:
+            ADDR_ASSERT(!"This should be a Fusion");
+            break;
+    }
+
+    return family;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::InitRbEquation
+*
+*   @brief
+*       Init RB equation
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GetRbEquation(
+    CoordEq* pRbEq,             ///< [out] rb equation
+    UINT_32  numRbPerSeLog2,    ///< [in] number of rb per shader engine
+    UINT_32  numSeLog2)         ///< [in] number of shader engine
+    const
+{
+    // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32
+    UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4;
+    Coordinate cx('x', rbRegion);
+    Coordinate cy('y', rbRegion);
+
+    UINT_32 start = 0;
+    UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2;
+
+    // Clear the rb equation
+    pRbEq->resize(0);
+    pRbEq->resize(numRbTotalLog2);
+
+    if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1))
+    {
+        // Special case when more than 1 SE, and 2 RB per SE
+        (*pRbEq)[0].add(cx);
+        (*pRbEq)[0].add(cy);
+        cx++;
+        cy++;
+
+        if (m_settings.applyAliasFix == false)
+        {
+            (*pRbEq)[0].add(cy);
+        }
+
+        (*pRbEq)[0].add(cy);
+        start++;
+    }
+
+    UINT_32 numBits = 2 * (numRbTotalLog2 - start);
+
+    for (UINT_32 i = 0; i < numBits; i++)
+    {
+        UINT_32 idx =
+            start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i);
+
+        if ((i % 2) == 1)
+        {
+            (*pRbEq)[idx].add(cx);
+            cx++;
+        }
+        else
+        {
+            (*pRbEq)[idx].add(cy);
+            cy++;
+        }
+    }
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetDataEquation
+*
+*   @brief
+*       Get data equation for fmask and Z
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GetDataEquation(
+    CoordEq* pDataEq,               ///< [out] data surface equation
+    Gfx9DataType dataSurfaceType,   ///< [in] data surface type
+    AddrSwizzleMode swizzleMode,    ///< [in] data surface swizzle mode
+    AddrResourceType resourceType,  ///< [in] data surface resource type
+    UINT_32 elementBytesLog2,       ///< [in] data surface element bytes
+    UINT_32 numSamplesLog2)         ///< [in] data surface sample count
+    const
+{
+    Coordinate cx('x', 0);
+    Coordinate cy('y', 0);
+    Coordinate cz('z', 0);
+    Coordinate cs('s', 0);
+
+    // Clear the equation
+    pDataEq->resize(0);
+    pDataEq->resize(27);
+
+    if (dataSurfaceType == Gfx9DataColor)
+    {
+        if (IsLinear(swizzleMode))
+        {
+            Coordinate cm('m', 0);
+
+            pDataEq->resize(49);
+
+            for (UINT_32 i = 0; i < 49; i++)
+            {
+                (*pDataEq)[i].add(cm);
+                cm++;
+            }
+        }
+        else if (IsThick(resourceType, swizzleMode))
+        {
+            // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d
+            UINT_32 i;
+            if (IsStandardSwizzle(resourceType, swizzleMode))
+            {
+                // Standard 3d swizzle
+                // Fill in bottom x bits
+                for (i = elementBytesLog2; i < 4; i++)
+                {
+                    (*pDataEq)[i].add(cx);
+                    cx++;
+                }
+                // Fill in 2 bits of y and then z
+                for (i = 4; i < 6; i++)
+                {
+                    (*pDataEq)[i].add(cy);
+                    cy++;
+                }
+                for (i = 6; i < 8; i++)
+                {
+                    (*pDataEq)[i].add(cz);
+                    cz++;
+                }
+                if (elementBytesLog2 < 2)
+                {
+                    // fill in z & y bit
+                    (*pDataEq)[8].add(cz);
+                    (*pDataEq)[9].add(cy);
+                    cz++;
+                    cy++;
+                }
+                else if (elementBytesLog2 == 2)
+                {
+                    // fill in y and x bit
+                    (*pDataEq)[8].add(cy);
+                    (*pDataEq)[9].add(cx);
+                    cy++;
+                    cx++;
+                }
+                else
+                {
+                    // fill in 2 x bits
+                    (*pDataEq)[8].add(cx);
+                    cx++;
+                    (*pDataEq)[9].add(cx);
+                    cx++;
+                }
+            }
+            else
+            {
+                // Z 3d swizzle
+                UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5);
+                UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ?
+                                2 : ((elementBytesLog2 == 1) ? 3 : 1);
+                pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd);
+                for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++)
+                {
+                    (*pDataEq)[i].add(cz);
+                    cz++;
+                }
+                if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3))
+                {
+                    // add an x and z
+                    (*pDataEq)[6].add(cx);
+                    (*pDataEq)[7].add(cz);
+                    cx++;
+                    cz++;
+                }
+                else if (elementBytesLog2 == 2)
+                {
+                    // add a y and z
+                    (*pDataEq)[6].add(cy);
+                    (*pDataEq)[7].add(cz);
+                    cy++;
+                    cz++;
+                }
+                // add y and x
+                (*pDataEq)[8].add(cy);
+                (*pDataEq)[9].add(cx);
+                cy++;
+                cx++;
+            }
+            // Fill in bit 10 and up
+            pDataEq->mort3d( cz, cy, cx, 10 );
+        }
+        else if (IsThin(resourceType, swizzleMode))
+        {
+            UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
+            // Color 2D
+            UINT_32 microYBits = (8 - elementBytesLog2) / 2;
+            UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2;
+            UINT_32 i;
+            // Fill in bottom x bits
+            for (i = elementBytesLog2; i < 4; i++)
+            {
+                (*pDataEq)[i].add(cx);
+                cx++;
+            }
+            // Fill in bottom y bits
+            for (i = 4; i < 4 + microYBits; i++)
+            {
+                (*pDataEq)[i].add(cy);
+                cy++;
+            }
+            // Fill in last of the micro_x bits
+            for (i = 4 + microYBits; i < 8; i++)
+            {
+                (*pDataEq)[i].add(cx);
+                cx++;
+            }
+            // Fill in x/y bits below sample split
+            pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1);
+            // Fill in sample bits
+            for (i = 0; i < numSamplesLog2; i++)
+            {
+                cs.set('s', i);
+                (*pDataEq)[tileSplitStart + i].add(cs);
+            }
+            // Fill in x/y bits above sample split
+            if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1))
+            {
+                pDataEq->mort2d(cx, cy, blockSizeLog2);
+            }
+            else
+            {
+                pDataEq->mort2d(cy, cx, blockSizeLog2);
+            }
+        }
+        else
+        {
+            ADDR_ASSERT_ALWAYS();
+        }
+    }
+    else
+    {
+        // Fmask or depth
+        UINT_32 sampleStart = elementBytesLog2;
+        UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2;
+        UINT_32 ymajStart = 6 + numSamplesLog2;
+
+        for (UINT_32 s = 0; s < numSamplesLog2; s++)
+        {
+            cs.set('s', s);
+            (*pDataEq)[sampleStart + s].add(cs);
+        }
+
+        // Put in the x-major order pixel bits
+        pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1);
+        // Put in the y-major order pixel bits
+        pDataEq->mort2d(cy, cx, ymajStart);
+    }
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetPipeEquation
+*
+*   @brief
+*       Get pipe equation
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GetPipeEquation(
+    CoordEq*         pPipeEq,            ///< [out] pipe equation
+    CoordEq*         pDataEq,            ///< [in] data equation
+    UINT_32          pipeInterleaveLog2, ///< [in] pipe interleave
+    UINT_32          numPipeLog2,        ///< [in] number of pipes
+    UINT_32          numSamplesLog2,     ///< [in] data surface sample count
+    Gfx9DataType     dataSurfaceType,    ///< [in] data surface type
+    AddrSwizzleMode  swizzleMode,        ///< [in] data surface swizzle mode
+    AddrResourceType resourceType        ///< [in] data surface resource type
+    ) const
+{
+    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode);
+    CoordEq dataEq;
+
+    pDataEq->copy(dataEq);
+
+    if (dataSurfaceType == Gfx9DataColor)
+    {
+        INT_32 shift = static_cast<INT_32>(numSamplesLog2);
+        dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2);
+    }
+
+    dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2);
+
+    // This section should only apply to z/stencil, maybe fmask
+    // If the pipe bit is below the comp block size,
+    // then keep moving up the address until we find a bit that is above
+    UINT_32 pipeStart = 0;
+
+    if (dataSurfaceType != Gfx9DataColor)
+    {
+        Coordinate tileMin('x', 3);
+
+        while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin)
+        {
+            pipeStart++;
+        }
+
+        // if pipe is 0, then the first pipe bit is above the comp block size,
+        // so we don't need to do anything
+        // Note, this if condition is not necessary, since if we execute the loop when pipe==0,
+        // we will get the same pipe equation
+        if (pipeStart != 0)
+        {
+            for (UINT_32 i = 0; i < numPipeLog2; i++)
+            {
+                // Copy the jth bit above pipe interleave to the current pipe equation bit
+                dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]);
+            }
+        }
+    }
+
+    if (IsPrt(swizzleMode))
+    {
+        // Clear out bits above the block size if prt's are enabled
+        dataEq.resize(blockSizeLog2);
+        dataEq.resize(48);
+    }
+
+    if (IsXor(swizzleMode))
+    {
+        CoordEq xorMask;
+
+        if (IsThick(resourceType, swizzleMode))
+        {
+            CoordEq xorMask2;
+
+            dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2);
+
+            xorMask.resize(numPipeLog2);
+
+            for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
+            {
+                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]);
+                xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]);
+            }
+        }
+        else
+        {
+            // Xor in the bits above the pipe+gpu bits
+            dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2);
+
+            if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE))
+            {
+                Coordinate co;
+                CoordEq xorMask2;
+                // if 1xaa and not prt, then xor in the z bits
+                xorMask2.resize(0);
+                xorMask2.resize(numPipeLog2);
+                for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++)
+                {
+                    co.set('z', numPipeLog2 - 1 - pipeIdx);
+                    xorMask2[pipeIdx].add(co);
+                }
+
+                pPipeEq->xorin(xorMask2);
+            }
+        }
+
+        xorMask.reverse();
+        pPipeEq->xorin(xorMask);
+    }
+}
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetMetaEquation
+*
+*   @brief
+*       Get meta equation for cmask/htile/DCC
+*   @return
+*       Pointer to a calculated meta equation
+************************************************************************************************************************
+*/
+const CoordEq* Gfx9Lib::GetMetaEquation(
+    const MetaEqParams& metaEqParams)
+{
+    UINT_32 cachedMetaEqIndex;
+
+    for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++)
+    {
+        if (memcmp(&metaEqParams,
+                   &m_cachedMetaEqKey[cachedMetaEqIndex],
+                   static_cast<UINT_32>(sizeof(metaEqParams))) == 0)
+        {
+            break;
+        }
+    }
+
+    CoordEq* pMetaEq = NULL;
+
+    if (cachedMetaEqIndex < MaxCachedMetaEq)
+    {
+        pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex];
+    }
+    else
+    {
+        m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams;
+
+        pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++];
+
+        m_metaEqOverrideIndex %= MaxCachedMetaEq;
+
+        GenMetaEquation(pMetaEq,
+                        metaEqParams.maxMip,
+                        metaEqParams.elementBytesLog2,
+                        metaEqParams.numSamplesLog2,
+                        metaEqParams.metaFlag,
+                        metaEqParams.dataSurfaceType,
+                        metaEqParams.swizzleMode,
+                        metaEqParams.resourceType,
+                        metaEqParams.metaBlkWidthLog2,
+                        metaEqParams.metaBlkHeightLog2,
+                        metaEqParams.metaBlkDepthLog2,
+                        metaEqParams.compBlkWidthLog2,
+                        metaEqParams.compBlkHeightLog2,
+                        metaEqParams.compBlkDepthLog2);
+    }
+
+    return pMetaEq;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GenMetaEquation
+*
+*   @brief
+*       Get meta equation for cmask/htile/DCC
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GenMetaEquation(
+    CoordEq*         pMetaEq,               ///< [out] meta equation
+    UINT_32          maxMip,                ///< [in] max mip Id
+    UINT_32          elementBytesLog2,      ///< [in] data surface element bytes
+    UINT_32          numSamplesLog2,        ///< [in] data surface sample count
+    ADDR2_META_FLAGS metaFlag,              ///< [in] meta falg
+    Gfx9DataType     dataSurfaceType,       ///< [in] data surface type
+    AddrSwizzleMode  swizzleMode,           ///< [in] data surface swizzle mode
+    AddrResourceType resourceType,          ///< [in] data surface resource type
+    UINT_32          metaBlkWidthLog2,      ///< [in] meta block width
+    UINT_32          metaBlkHeightLog2,     ///< [in] meta block height
+    UINT_32          metaBlkDepthLog2,      ///< [in] meta block depth
+    UINT_32          compBlkWidthLog2,      ///< [in] compress block width
+    UINT_32          compBlkHeightLog2,     ///< [in] compress block height
+    UINT_32          compBlkDepthLog2)      ///< [in] compress block depth
+    const
+{
+    UINT_32 numPipeTotalLog2   = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode);
+    UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2;
+
+    // Get the correct data address and rb equation
+    CoordEq dataEq;
+    GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType,
+                    elementBytesLog2, numSamplesLog2);
+
+    // Get pipe and rb equations
+    CoordEq pipeEquation;
+    GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2,
+                    numSamplesLog2, dataSurfaceType, swizzleMode, resourceType);
+    numPipeTotalLog2 = pipeEquation.getsize();
+
+    if (metaFlag.linear)
+    {
+        // Linear metadata supporting was removed for GFX9! No one can use this feature.
+        ADDR_ASSERT_ALWAYS();
+
+        ADDR_ASSERT(dataSurfaceType == Gfx9DataColor);
+
+        dataEq.copy(*pMetaEq);
+
+        if (IsLinear(swizzleMode))
+        {
+            if (metaFlag.pipeAligned)
+            {
+                // Remove the pipe bits
+                INT_32 shift = static_cast<INT_32>(numPipeTotalLog2);
+                pMetaEq->shift(-shift, pipeInterleaveLog2);
+            }
+            // Divide by comp block size, which for linear (which is always color) is 256 B
+            pMetaEq->shift(-8);
+
+            if (metaFlag.pipeAligned)
+            {
+                // Put pipe bits back in
+                pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2);
+
+                for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
+                {
+                    pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]);
+                }
+            }
+        }
+
+        pMetaEq->shift(1);
+    }
+    else
+    {
+        UINT_32 maxCompFragLog2 = static_cast<INT_32>(m_maxCompFragLog2);
+        UINT_32 compFragLog2 =
+            ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ?
+            maxCompFragLog2 : numSamplesLog2;
+
+        UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2;
+
+        // Make sure the metaaddr is cleared
+        pMetaEq->resize(0);
+        pMetaEq->resize(27);
+
+        if (IsThick(resourceType, swizzleMode))
+        {
+            Coordinate cx('x', 0);
+            Coordinate cy('y', 0);
+            Coordinate cz('z', 0);
+
+            if (maxMip > 0)
+            {
+                pMetaEq->mort3d(cy, cx, cz);
+            }
+            else
+            {
+                pMetaEq->mort3d(cx, cy, cz);
+            }
+        }
+        else
+        {
+            Coordinate cx('x', 0);
+            Coordinate cy('y', 0);
+            Coordinate cs;
+
+            if (maxMip > 0)
+            {
+                pMetaEq->mort2d(cy, cx, compFragLog2);
+            }
+            else
+            {
+                pMetaEq->mort2d(cx, cy, compFragLog2);
+            }
+
+            //------------------------------------------------------------------------------------------------------------------------
+            // Put the compressible fragments at the lsb
+            // the uncompressible frags will be at the msb of the micro address
+            //------------------------------------------------------------------------------------------------------------------------
+            for (UINT_32 s = 0; s < compFragLog2; s++)
+            {
+                cs.set('s', s);
+                (*pMetaEq)[s].add(cs);
+            }
+        }
+
+        // Keep a copy of the pipe equations
+        CoordEq origPipeEquation;
+        pipeEquation.copy(origPipeEquation);
+
+        Coordinate co;
+        // filter out everything under the compressed block size
+        co.set('x', compBlkWidthLog2);
+        pMetaEq->Filter('<', co, 0, 'x');
+        co.set('y', compBlkHeightLog2);
+        pMetaEq->Filter('<', co, 0, 'y');
+        co.set('z', compBlkDepthLog2);
+        pMetaEq->Filter('<', co, 0, 'z');
+
+        // For non-color, filter out sample bits
+        if (dataSurfaceType != Gfx9DataColor)
+        {
+            co.set('x', 0);
+            pMetaEq->Filter('<', co, 0, 's');
+        }
+
+        // filter out everything above the metablock size
+        co.set('x', metaBlkWidthLog2 - 1);
+        pMetaEq->Filter('>', co, 0, 'x');
+        co.set('y', metaBlkHeightLog2 - 1);
+        pMetaEq->Filter('>', co, 0, 'y');
+        co.set('z', metaBlkDepthLog2 - 1);
+        pMetaEq->Filter('>', co, 0, 'z');
+
+        // filter out everything above the metablock size for the channel bits
+        co.set('x', metaBlkWidthLog2 - 1);
+        pipeEquation.Filter('>', co, 0, 'x');
+        co.set('y', metaBlkHeightLog2 - 1);
+        pipeEquation.Filter('>', co, 0, 'y');
+        co.set('z', metaBlkDepthLog2 - 1);
+        pipeEquation.Filter('>', co, 0, 'z');
+
+        // Make sure we still have the same number of channel bits
+        if (pipeEquation.getsize() != numPipeTotalLog2)
+        {
+            ADDR_ASSERT_ALWAYS();
+        }
+
+        // Loop through all channel and rb bits,
+        // and make sure these components exist in the metadata address
+        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
+        {
+            for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--)
+            {
+                if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE)
+                {
+                    ADDR_ASSERT_ALWAYS();
+                }
+            }
+        }
+
+        const UINT_32 numSeLog2     = metaFlag.rbAligned ? m_seLog2      : 0;
+        const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0;
+        const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2;
+        CoordEq       origRbEquation;
+
+        GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2);
+
+        CoordEq rbEquation = origRbEquation;
+
+        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
+        {
+            for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--)
+            {
+                if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE)
+                {
+                    ADDR_ASSERT_ALWAYS();
+                }
+            }
+        }
+
+        if (m_settings.applyAliasFix)
+        {
+            co.set('z', -1);
+        }
+
+        // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it
+        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
+        {
+            for (UINT_32 j = 0; j < numPipeTotalLog2; j++)
+            {
+                BOOL_32 isRbEquationInPipeEquation = FALSE;
+
+                if (m_settings.applyAliasFix)
+                {
+                    CoordTerm filteredPipeEq;
+                    filteredPipeEq = pipeEquation[j];
+
+                    filteredPipeEq.Filter('>', co, 0, 'z');
+
+                    isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq);
+                }
+                else
+                {
+                    isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]);
+                }
+
+                if (isRbEquationInPipeEquation)
+                {
+                    rbEquation[i].Clear();
+                }
+            }
+        }
+
+         bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {};
+
+        // Loop through each bit of the channel, get the smallest coordinate,
+        // and remove it from the metaaddr, and rb_equation
+        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
+        {
+            pipeEquation[i].getsmallest(co);
+
+            UINT_32 old_size = pMetaEq->getsize();
+            pMetaEq->Filter('=', co);
+            UINT_32 new_size = pMetaEq->getsize();
+            if (new_size != old_size-1)
+            {
+                ADDR_ASSERT_ALWAYS();
+            }
+            pipeEquation.remove(co);
+            for (UINT_32 j = 0; j < numRbTotalLog2; j++)
+            {
+                if (rbEquation[j].remove(co))
+                {
+                    // if we actually removed something from this bit, then add the remaining
+                    // channel bits, as these can be removed for this bit
+                    for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++)
+                    {
+                        if (pipeEquation[i][k] != co)
+                        {
+                            rbEquation[j].add(pipeEquation[i][k]);
+                            rbAppendedWithPipeBits[j] = true;
+                        }
+                    }
+                }
+            }
+        }
+
+        // Loop through the rb bits and see what remain;
+        // filter out the smallest coordinate if it remains
+        UINT_32 rbBitsLeft = 0;
+        for (UINT_32 i = 0; i < numRbTotalLog2; i++)
+        {
+            BOOL_32 isRbEqAppended = FALSE;
+
+            if (m_settings.applyAliasFix)
+            {
+                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+            }
+            else
+            {
+                isRbEqAppended = (rbEquation[i].getsize() > 0);
+            }
+
+            if (isRbEqAppended)
+            {
+                rbBitsLeft++;
+                rbEquation[i].getsmallest(co);
+                UINT_32 old_size = pMetaEq->getsize();
+                pMetaEq->Filter('=', co);
+                UINT_32 new_size = pMetaEq->getsize();
+                if (new_size != old_size - 1)
+                {
+                    // assert warning
+                }
+                for (UINT_32 j = i + 1; j < numRbTotalLog2; j++)
+                {
+                    if (rbEquation[j].remove(co))
+                    {
+                        // if we actually removed something from this bit, then add the remaining
+                        // rb bits, as these can be removed for this bit
+                        for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++)
+                        {
+                            if (rbEquation[i][k] != co)
+                            {
+                                rbEquation[j].add(rbEquation[i][k]);
+                                rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // capture the size of the metaaddr
+        UINT_32 metaSize = pMetaEq->getsize();
+        // resize to 49 bits...make this a nibble address
+        pMetaEq->resize(49);
+        // Concatenate the macro address above the current address
+        for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++)
+        {
+            co.set('m', j);
+            (*pMetaEq)[i].add(co);
+        }
+
+        // Multiply by meta element size (in nibbles)
+        if (dataSurfaceType == Gfx9DataColor)
+        {
+            pMetaEq->shift(1);
+        }
+        else if (dataSurfaceType == Gfx9DataDepthStencil)
+        {
+            pMetaEq->shift(3);
+        }
+
+        //------------------------------------------------------------------------------------------
+        // Note the pipeInterleaveLog2+1 is because address is a nibble address
+        // Shift up from pipe interleave number of channel
+        // and rb bits left, and uncompressed fragments
+        //------------------------------------------------------------------------------------------
+
+        pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1);
+
+        // Put in the channel bits
+        for (UINT_32 i = 0; i < numPipeTotalLog2; i++)
+        {
+            origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]);
+        }
+
+        // Put in remaining rb bits
+        for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2)
+        {
+            BOOL_32 isRbEqAppended = FALSE;
+
+            if (m_settings.applyAliasFix)
+            {
+                isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0));
+            }
+            else
+            {
+                isRbEqAppended = (rbEquation[i].getsize() > 0);
+            }
+
+            if (isRbEqAppended)
+            {
+                origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]);
+                // Mark any rb bit we add in to the rb mask
+                j++;
+            }
+        }
+
+        //------------------------------------------------------------------------------------------
+        // Put in the uncompressed fragment bits
+        //------------------------------------------------------------------------------------------
+        for (UINT_32 i = 0; i < uncompFragLog2; i++)
+        {
+            co.set('s', compFragLog2 + i);
+            (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co);
+        }
+    }
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::IsEquationSupported
+*
+*   @brief
+*       Check if equation is supported for given swizzle mode and resource type.
+*
+*   @return
+*       TRUE if supported
+************************************************************************************************************************
+*/
+BOOL_32 Gfx9Lib::IsEquationSupported(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode  swMode,
+    UINT_32          elementBytesLog2) const
+{
+    BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) &&
+                        (IsLinear(swMode) == FALSE) &&
+                        (((IsTex2d(rsrcType) == TRUE) &&
+                          ((elementBytesLog2 < 4) ||
+                           ((IsRotateSwizzle(swMode) == FALSE) &&
+                            (IsZOrderSwizzle(swMode) == FALSE)))) ||
+                         ((IsTex3d(rsrcType) == TRUE) &&
+                          (IsRotateSwizzle(swMode) == FALSE) &&
+                          (IsBlock256b(swMode) == FALSE)));
+
+    return supported;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::InitEquationTable
+*
+*   @brief
+*       Initialize Equation table.
+*
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::InitEquationTable()
+{
+    memset(m_equationTable, 0, sizeof(m_equationTable));
+
+    // Loop all possible resource type (2D/3D)
+    for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
+    {
+        AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
+
+        // Loop all possible swizzle mode
+        for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++)
+        {
+            AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
+
+            // Loop all possible bpp
+            for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++)
+            {
+                UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
+
+                // Check if the input is supported
+                if (IsEquationSupported(rsrcType, swMode, bppIdx))
+                {
+                    ADDR_EQUATION equation;
+                    ADDR_E_RETURNCODE retCode;
+
+                    memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+                    // Generate the equation
+                    if (IsBlock256b(swMode) && IsTex2d(rsrcType))
+                    {
+                        retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation);
+                    }
+                    else if (IsThin(rsrcType, swMode))
+                    {
+                        retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation);
+                    }
+                    else
+                    {
+                        retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation);
+                    }
+
+                    // Only fill the equation into the table if the return code is ADDR_OK,
+                    // otherwise if the return code is not ADDR_OK, it indicates this is not
+                    // a valid input, we do nothing but just fill invalid equation index
+                    // into the lookup table.
+                    if (retCode == ADDR_OK)
+                    {
+                        equationIndex = m_numEquations;
+                        ADDR_ASSERT(equationIndex < EquationTableSize);
+
+                        m_equationTable[equationIndex] = equation;
+
+                        m_numEquations++;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT_ALWAYS();
+                    }
+                }
+
+                // Fill the index into the lookup table, if the combination is not supported
+                // fill the invalid equation index
+                m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex;
+            }
+        }
+    }
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlGetEquationIndex
+*
+*   @brief
+*       Interface function stub of GetEquationIndex
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::HwlGetEquationIndex(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut
+    ) const
+{
+    AddrResourceType rsrcType         = pIn->resourceType;
+    AddrSwizzleMode  swMode           = pIn->swizzleMode;
+    UINT_32          elementBytesLog2 = Log2(pIn->bpp >> 3);
+    UINT_32          index            = ADDR_INVALID_EQUATION_INDEX;
+
+    if (IsEquationSupported(rsrcType, swMode, elementBytesLog2))
+    {
+        UINT_32 rsrcTypeIdx = static_cast<UINT_32>(rsrcType) - 1;
+        UINT_32 swModeIdx   = static_cast<UINT_32>(swMode);
+
+        index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2];
+    }
+
+    if (pOut->pMipInfo != NULL)
+    {
+        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+        {
+            pOut->pMipInfo[i].equationIndex = index;
+        }
+    }
+
+    return index;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeBlock256Equation
+*
+*   @brief
+*       Interface function stub of ComputeBlock256Equation
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode  swMode,
+    UINT_32          elementBytesLog2,
+    ADDR_EQUATION*   pEquation) const
+{
+    ADDR_E_RETURNCODE ret = ADDR_OK;
+
+    pEquation->numBits = 8;
+
+    UINT_32 i = 0;
+    for (; i < elementBytesLog2; i++)
+    {
+        InitChannel(1, 0 , i, &pEquation->addr[i]);
+    }
+
+    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
+
+    const UINT_32 maxBitsUsed = 4;
+    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
+    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
+
+    for (i = 0; i < maxBitsUsed; i++)
+    {
+        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
+        InitChannel(1, 1, i, &y[i]);
+    }
+
+    if (IsStandardSwizzle(rsrcType, swMode))
+    {
+        switch (elementBytesLog2)
+        {
+            case 0:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = x[2];
+                pixelBit[3] = x[3];
+                pixelBit[4] = y[0];
+                pixelBit[5] = y[1];
+                pixelBit[6] = y[2];
+                pixelBit[7] = y[3];
+                break;
+            case 1:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = x[2];
+                pixelBit[3] = y[0];
+                pixelBit[4] = y[1];
+                pixelBit[5] = y[2];
+                pixelBit[6] = x[3];
+                break;
+            case 2:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = y[0];
+                pixelBit[3] = y[1];
+                pixelBit[4] = y[2];
+                pixelBit[5] = x[2];
+                break;
+            case 3:
+                pixelBit[0] = x[0];
+                pixelBit[1] = y[0];
+                pixelBit[2] = y[1];
+                pixelBit[3] = x[1];
+                pixelBit[4] = x[2];
+                break;
+            case 4:
+                pixelBit[0] = y[0];
+                pixelBit[1] = y[1];
+                pixelBit[2] = x[0];
+                pixelBit[3] = x[1];
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                ret = ADDR_INVALIDPARAMS;
+                break;
+        }
+    }
+    else if (IsDisplaySwizzle(rsrcType, swMode))
+    {
+        switch (elementBytesLog2)
+        {
+            case 0:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = x[2];
+                pixelBit[3] = y[1];
+                pixelBit[4] = y[0];
+                pixelBit[5] = y[2];
+                pixelBit[6] = x[3];
+                pixelBit[7] = y[3];
+                break;
+            case 1:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = x[2];
+                pixelBit[3] = y[0];
+                pixelBit[4] = y[1];
+                pixelBit[5] = y[2];
+                pixelBit[6] = x[3];
+                break;
+            case 2:
+                pixelBit[0] = x[0];
+                pixelBit[1] = x[1];
+                pixelBit[2] = y[0];
+                pixelBit[3] = x[2];
+                pixelBit[4] = y[1];
+                pixelBit[5] = y[2];
+                break;
+            case 3:
+                pixelBit[0] = x[0];
+                pixelBit[1] = y[0];
+                pixelBit[2] = x[1];
+                pixelBit[3] = x[2];
+                pixelBit[4] = y[1];
+                break;
+            case 4:
+                pixelBit[0] = x[0];
+                pixelBit[1] = y[0];
+                pixelBit[2] = x[1];
+                pixelBit[3] = y[1];
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                ret = ADDR_INVALIDPARAMS;
+                break;
+        }
+    }
+    else if (IsRotateSwizzle(swMode))
+    {
+        switch (elementBytesLog2)
+        {
+            case 0:
+                pixelBit[0] = y[0];
+                pixelBit[1] = y[1];
+                pixelBit[2] = y[2];
+                pixelBit[3] = x[1];
+                pixelBit[4] = x[0];
+                pixelBit[5] = x[2];
+                pixelBit[6] = x[3];
+                pixelBit[7] = y[3];
+                break;
+            case 1:
+                pixelBit[0] = y[0];
+                pixelBit[1] = y[1];
+                pixelBit[2] = y[2];
+                pixelBit[3] = x[0];
+                pixelBit[4] = x[1];
+                pixelBit[5] = x[2];
+                pixelBit[6] = x[3];
+                break;
+            case 2:
+                pixelBit[0] = y[0];
+                pixelBit[1] = y[1];
+                pixelBit[2] = x[0];
+                pixelBit[3] = y[2];
+                pixelBit[4] = x[1];
+                pixelBit[5] = x[2];
+                break;
+            case 3:
+                pixelBit[0] = y[0];
+                pixelBit[1] = x[0];
+                pixelBit[2] = y[1];
+                pixelBit[3] = x[1];
+                pixelBit[4] = x[2];
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+            case 4:
+                ret = ADDR_INVALIDPARAMS;
+                break;
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        ret = ADDR_INVALIDPARAMS;
+    }
+
+    // Post validation
+    if (ret == ADDR_OK)
+    {
+        MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
+        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
+                    (microBlockDim.w * (1 << elementBytesLog2)));
+        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeThinEquation
+*
+*   @brief
+*       Interface function stub of ComputeThinEquation
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode  swMode,
+    UINT_32          elementBytesLog2,
+    ADDR_EQUATION*   pEquation) const
+{
+    ADDR_E_RETURNCODE ret = ADDR_OK;
+
+    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
+
+    UINT_32 maxXorBits = blockSizeLog2;
+    if (IsNonPrtXor(swMode))
+    {
+        // For non-prt-xor, maybe need to initialize some more bits for xor
+        // The highest xor bit used in equation will be max the following 3 items:
+        // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits
+        // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits
+        // 3. blockSizeLog2
+
+        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2));
+        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
+                                     GetPipeXorBits(blockSizeLog2) +
+                                     2 * GetBankXorBits(blockSizeLog2));
+    }
+
+    const UINT_32 maxBitsUsed = 14;
+    ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits);
+    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
+    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
+
+    const UINT_32 extraXorBits = 16;
+    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
+    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
+
+    for (UINT_32 i = 0; i < maxBitsUsed; i++)
+    {
+        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
+        InitChannel(1, 1, i, &y[i]);
+    }
+
+    ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr;
+
+    for (UINT_32 i = 0; i < elementBytesLog2; i++)
+    {
+        InitChannel(1, 0 , i, &pixelBit[i]);
+    }
+
+    UINT_32 xIdx = 0;
+    UINT_32 yIdx = 0;
+    UINT_32 lowBits = 0;
+
+    if (IsZOrderSwizzle(swMode))
+    {
+        if (elementBytesLog2 <= 3)
+        {
+            for (UINT_32 i = elementBytesLog2; i < 6; i++)
+            {
+                pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++];
+            }
+
+            lowBits = 6;
+        }
+        else
+        {
+            ret = ADDR_INVALIDPARAMS;
+        }
+    }
+    else
+    {
+        ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation);
+
+        if (ret == ADDR_OK)
+        {
+            Dim2d microBlockDim = Block256_2d[elementBytesLog2];
+            xIdx = Log2(microBlockDim.w);
+            yIdx = Log2(microBlockDim.h);
+            lowBits = 8;
+        }
+    }
+
+    if (ret == ADDR_OK)
+    {
+        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
+        {
+            pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
+        }
+
+        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
+        {
+            xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++];
+        }
+
+        if (IsXor(swMode))
+        {
+            // Fill XOR bits
+            UINT_32 pipeStart = m_pipeInterleaveLog2;
+            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
+
+            UINT_32 bankStart = pipeStart + pipeXorBits;
+            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
+
+            for (UINT_32 i = 0; i < pipeXorBits; i++)
+            {
+                UINT_32               xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i;
+                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
+            }
+
+            for (UINT_32 i = 0; i < bankXorBits; i++)
+            {
+                UINT_32               xor1BitPos = bankStart + 2 * bankXorBits - 1 - i;
+                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
+            }
+
+            if (IsPrt(swMode) == FALSE)
+            {
+                for (UINT_32 i = 0; i < pipeXorBits; i++)
+                {
+                    InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]);
+                }
+
+                for (UINT_32 i = 0; i < bankXorBits; i++)
+                {
+                    InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]);
+                }
+            }
+        }
+
+        pEquation->numBits = blockSizeLog2;
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeThickEquation
+*
+*   @brief
+*       Interface function stub of ComputeThickEquation
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation(
+    AddrResourceType rsrcType,
+    AddrSwizzleMode  swMode,
+    UINT_32          elementBytesLog2,
+    ADDR_EQUATION*   pEquation) const
+{
+    ADDR_E_RETURNCODE ret = ADDR_OK;
+
+    ADDR_ASSERT(IsTex3d(rsrcType));
+
+    UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
+
+    UINT_32 maxXorBits = blockSizeLog2;
+    if (IsNonPrtXor(swMode))
+    {
+        // For non-prt-xor, maybe need to initialize some more bits for xor
+        // The highest xor bit used in equation will be max the following 3:
+        // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits
+        // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits
+        // 3. blockSizeLog2
+
+        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2));
+        maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 +
+                                     GetPipeXorBits(blockSizeLog2) +
+                                     3 * GetBankXorBits(blockSizeLog2));
+    }
+
+    for (UINT_32 i = 0; i < elementBytesLog2; i++)
+    {
+        InitChannel(1, 0 , i, &pEquation->addr[i]);
+    }
+
+    ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2];
+
+    const UINT_32 maxBitsUsed = 12;
+    ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits);
+    ADDR_CHANNEL_SETTING x[maxBitsUsed] = {};
+    ADDR_CHANNEL_SETTING y[maxBitsUsed] = {};
+    ADDR_CHANNEL_SETTING z[maxBitsUsed] = {};
+
+    const UINT_32 extraXorBits = 24;
+    ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2);
+    ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {};
+
+    for (UINT_32 i = 0; i < maxBitsUsed; i++)
+    {
+        InitChannel(1, 0, elementBytesLog2 + i, &x[i]);
+        InitChannel(1, 1, i, &y[i]);
+        InitChannel(1, 2, i, &z[i]);
+    }
+
+    if (IsZOrderSwizzle(swMode))
+    {
+        switch (elementBytesLog2)
+        {
+            case 0:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = x[1];
+                pixelBit[3]  = y[1];
+                pixelBit[4]  = z[0];
+                pixelBit[5]  = z[1];
+                pixelBit[6]  = x[2];
+                pixelBit[7]  = z[2];
+                pixelBit[8]  = y[2];
+                pixelBit[9]  = x[3];
+                break;
+            case 1:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = x[1];
+                pixelBit[3]  = y[1];
+                pixelBit[4]  = z[0];
+                pixelBit[5]  = z[1];
+                pixelBit[6]  = z[2];
+                pixelBit[7]  = y[2];
+                pixelBit[8]  = x[2];
+                break;
+            case 2:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = x[1];
+                pixelBit[3]  = z[0];
+                pixelBit[4]  = y[1];
+                pixelBit[5]  = z[1];
+                pixelBit[6]  = y[2];
+                pixelBit[7]  = x[2];
+                break;
+            case 3:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = z[0];
+                pixelBit[3]  = x[1];
+                pixelBit[4]  = z[1];
+                pixelBit[5]  = y[1];
+                pixelBit[6]  = x[2];
+                break;
+            case 4:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = z[0];
+                pixelBit[3]  = z[1];
+                pixelBit[4]  = y[1];
+                pixelBit[5]  = x[1];
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                ret = ADDR_INVALIDPARAMS;
+                break;
+        }
+    }
+    else if (IsStandardSwizzle(rsrcType, swMode))
+    {
+        switch (elementBytesLog2)
+        {
+            case 0:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = x[1];
+                pixelBit[2]  = x[2];
+                pixelBit[3]  = x[3];
+                pixelBit[4]  = y[0];
+                pixelBit[5]  = y[1];
+                pixelBit[6]  = z[0];
+                pixelBit[7]  = z[1];
+                pixelBit[8]  = z[2];
+                pixelBit[9]  = y[2];
+                break;
+            case 1:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = x[1];
+                pixelBit[2]  = x[2];
+                pixelBit[3]  = y[0];
+                pixelBit[4]  = y[1];
+                pixelBit[5]  = z[0];
+                pixelBit[6]  = z[1];
+                pixelBit[7]  = z[2];
+                pixelBit[8]  = y[2];
+                break;
+            case 2:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = x[1];
+                pixelBit[2]  = y[0];
+                pixelBit[3]  = y[1];
+                pixelBit[4]  = z[0];
+                pixelBit[5]  = z[1];
+                pixelBit[6]  = y[2];
+                pixelBit[7]  = x[2];
+                break;
+            case 3:
+                pixelBit[0]  = x[0];
+                pixelBit[1]  = y[0];
+                pixelBit[2]  = y[1];
+                pixelBit[3]  = z[0];
+                pixelBit[4]  = z[1];
+                pixelBit[5]  = x[1];
+                pixelBit[6]  = x[2];
+                break;
+            case 4:
+                pixelBit[0]  = y[0];
+                pixelBit[1]  = y[1];
+                pixelBit[2]  = z[0];
+                pixelBit[3]  = z[1];
+                pixelBit[4]  = x[0];
+                pixelBit[5]  = x[1];
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                ret = ADDR_INVALIDPARAMS;
+                break;
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        ret = ADDR_INVALIDPARAMS;
+    }
+
+    if (ret == ADDR_OK)
+    {
+        Dim3d microBlockDim = Block1K_3d[elementBytesLog2];
+        UINT_32 xIdx = Log2(microBlockDim.w);
+        UINT_32 yIdx = Log2(microBlockDim.h);
+        UINT_32 zIdx = Log2(microBlockDim.d);
+
+        pixelBit = pEquation->addr;
+
+        const UINT_32 lowBits = 10;
+        ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1);
+        ADDR_ASSERT(pEquation->addr[lowBits].valid == 0);
+
+        for (UINT_32 i = lowBits; i < blockSizeLog2; i++)
+        {
+            if ((i % 3) == 0)
+            {
+                pixelBit[i] = x[xIdx++];
+            }
+            else if ((i % 3) == 1)
+            {
+                pixelBit[i] = z[zIdx++];
+            }
+            else
+            {
+                pixelBit[i] = y[yIdx++];
+            }
+        }
+
+        for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++)
+        {
+            if ((i % 3) == 0)
+            {
+                xorExtra[i - blockSizeLog2] = x[xIdx++];
+            }
+            else if ((i % 3) == 1)
+            {
+                xorExtra[i - blockSizeLog2] = z[zIdx++];
+            }
+            else
+            {
+                xorExtra[i - blockSizeLog2] = y[yIdx++];
+            }
+        }
+
+        if (IsXor(swMode))
+        {
+            // Fill XOR bits
+            UINT_32 pipeStart = m_pipeInterleaveLog2;
+            UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2);
+            for (UINT_32 i = 0; i < pipeXorBits; i++)
+            {
+                UINT_32               xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i);
+                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src);
+
+                UINT_32               xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i);
+                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src);
+            }
+
+            UINT_32 bankStart = pipeStart + pipeXorBits;
+            UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2);
+            for (UINT_32 i = 0; i < bankXorBits; i++)
+            {
+                UINT_32               xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i);
+                ADDR_CHANNEL_SETTING* pXor1Src   = (xor1BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor1[bankStart + i], pXor1Src);
+
+                UINT_32               xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i);
+                ADDR_CHANNEL_SETTING* pXor2Src   = (xor2BitPos < blockSizeLog2) ?
+                                                   &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2];
+
+                InitChannel(&pEquation->xor2[bankStart + i], pXor2Src);
+            }
+        }
+
+        pEquation->numBits = blockSizeLog2;
+    }
+
+    return ret;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::IsValidDisplaySwizzleMode
+*
+*   @brief
+*       Check if a swizzle mode is supported by display engine
+*
+*   @return
+*       TRUE is swizzle mode is supported by display engine
+************************************************************************************************************************
+*/
+BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+{
+    BOOL_32 support = FALSE;
+
+    const AddrResourceType resourceType = pIn->resourceType;
+    (void)resourceType;
+    const AddrSwizzleMode swizzleMode = pIn->swizzleMode;
+
+    if (m_settings.isDce12)
+    {
+        switch (swizzleMode)
+        {
+            case ADDR_SW_256B_D:
+            case ADDR_SW_256B_R:
+                support = (pIn->bpp == 32);
+                break;
+
+            case ADDR_SW_LINEAR:
+            case ADDR_SW_4KB_D:
+            case ADDR_SW_4KB_R:
+            case ADDR_SW_64KB_D:
+            case ADDR_SW_64KB_R:
+            case ADDR_SW_VAR_D:
+            case ADDR_SW_VAR_R:
+            case ADDR_SW_4KB_D_X:
+            case ADDR_SW_4KB_R_X:
+            case ADDR_SW_64KB_D_X:
+            case ADDR_SW_64KB_R_X:
+            case ADDR_SW_VAR_D_X:
+            case ADDR_SW_VAR_R_X:
+                support = (pIn->bpp <= 64);
+                break;
+
+            default:
+                break;
+        }
+    }
+    else if (m_settings.isDcn1)
+    {
+        switch (swizzleMode)
+        {
+            case ADDR_SW_4KB_D:
+            case ADDR_SW_64KB_D:
+            case ADDR_SW_VAR_D:
+            case ADDR_SW_64KB_D_T:
+            case ADDR_SW_4KB_D_X:
+            case ADDR_SW_64KB_D_X:
+            case ADDR_SW_VAR_D_X:
+                support = (pIn->bpp == 64);
+                break;
+
+            case ADDR_SW_LINEAR:
+            case ADDR_SW_4KB_S:
+            case ADDR_SW_64KB_S:
+            case ADDR_SW_VAR_S:
+            case ADDR_SW_64KB_S_T:
+            case ADDR_SW_4KB_S_X:
+            case ADDR_SW_64KB_S_X:
+            case ADDR_SW_VAR_S_X:
+                support = (pIn->bpp <= 64);
+                break;
+
+            default:
+                break;
+        }
+    }
+    else
+    {
+        ADDR_NOT_IMPLEMENTED();
+    }
+
+    return support;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputePipeBankXor
+*
+*   @brief
+*       Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
+*
+*   @return
+*       PipeBankXor value
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor(
+    const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const
+{
+    if (IsXor(pIn->swizzleMode))
+    {
+        UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
+        UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
+        UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
+
+        UINT_32 pipeXor = 0;
+        UINT_32 bankXor = 0;
+
+        const UINT_32 bankMask = (1 << bankBits) - 1;
+        const UINT_32 index    = pIn->surfIndex & bankMask;
+
+        const UINT_32 bpp      = pIn->flags.fmask ?
+                                 GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format);
+        if (bankBits == 4)
+        {
+            static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10};
+            static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10};
+
+            bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index];
+        }
+        else if (bankBits > 0)
+        {
+            UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1;
+            bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease;
+            bankXor = (index * bankIncrease) & bankMask;
+        }
+
+        pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor;
+    }
+    else
+    {
+        pOut->pipeBankXor = 0;
+    }
+
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSlicePipeBankXor
+*
+*   @brief
+*       Generate slice PipeBankXor value based on base PipeBankXor value and slice id
+*
+*   @return
+*       PipeBankXor value
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor(
+    const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+    ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const
+{
+    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
+    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
+    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
+
+    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
+    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
+
+    pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits));
+
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern
+*
+*   @brief
+*       Compute sub resource offset to support swizzle pattern
+*
+*   @return
+*       Offset
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
+    const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+    ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const
+{
+    ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
+
+    UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode);
+    UINT_32 pipeBits       = GetPipeXorBits(macroBlockBits);
+    UINT_32 bankBits       = GetBankXorBits(macroBlockBits);
+    UINT_32 pipeXor        = ReverseBitVector(pIn->slice, pipeBits);
+    UINT_32 bankXor        = ReverseBitVector(pIn->slice >> pipeBits, bankBits);
+    UINT_32 pipeBankXor    = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2;
+
+    pOut->offset = pIn->slice * pIn->sliceSize +
+                   pIn->macroBlockOffset +
+                   (pIn->mipTailOffset ^ pipeBankXor) -
+                   static_cast<UINT_64>(pipeBankXor);
+    return ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSurfaceInfoSanityCheck
+*
+*   @brief
+*       Compute surface info sanity check
+*
+*   @return
+*       Offset
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+{
+    BOOL_32 invalid = FALSE;
+
+    if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
+    {
+        invalid = TRUE;
+    }
+    else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE)    ||
+             (pIn->resourceType >= ADDR_RSRC_MAX_TYPE))
+    {
+        invalid = TRUE;
+    }
+
+    BOOL_32 mipmap = (pIn->numMipLevels > 1);
+    BOOL_32 msaa   = (pIn->numFrags > 1);
+
+    ADDR2_SURFACE_FLAGS flags = pIn->flags;
+    BOOL_32 zbuffer = (flags.depth || flags.stencil);
+    BOOL_32 color   = flags.color;
+    BOOL_32 display = flags.display || flags.rotated;
+
+    AddrResourceType rsrcType    = pIn->resourceType;
+    BOOL_32          tex3d       = IsTex3d(rsrcType);
+    BOOL_32          thin3d      = tex3d && flags.view3dAs2dArray;
+    AddrSwizzleMode  swizzle     = pIn->swizzleMode;
+    BOOL_32          linear      = IsLinear(swizzle);
+    BOOL_32          blk256B     = IsBlock256b(swizzle);
+    BOOL_32          blkVar      = IsBlockVariable(swizzle);
+    BOOL_32          isNonPrtXor = IsNonPrtXor(swizzle);
+    BOOL_32          prt         = flags.prt;
+    BOOL_32          stereo      = flags.qbStereo;
+
+    if (invalid == FALSE)
+    {
+        if ((pIn->numFrags > 1) &&
+            (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
+        {
+            // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
+            invalid = TRUE;
+        }
+    }
+
+    if (invalid == FALSE)
+    {
+        switch (rsrcType)
+        {
+            case ADDR_RSRC_TEX_1D:
+                invalid = msaa || zbuffer || display || (linear == FALSE) || stereo;
+                break;
+            case ADDR_RSRC_TEX_2D:
+                invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap);
+                break;
+            case ADDR_RSRC_TEX_3D:
+                invalid = msaa || zbuffer || display || stereo;
+                break;
+            default:
+                invalid = TRUE;
+                break;
+        }
+    }
+
+    if (invalid == FALSE)
+    {
+        if (display)
+        {
+            invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE);
+        }
+    }
+
+    if (invalid == FALSE)
+    {
+        if (linear)
+        {
+            invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) ||
+                      zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0);
+        }
+        else
+        {
+            if (blk256B || blkVar || isNonPrtXor)
+            {
+                invalid = prt;
+                if (blk256B)
+                {
+                    invalid = invalid || zbuffer || tex3d || mipmap || msaa;
+                }
+            }
+
+            if (invalid == FALSE)
+            {
+                if (IsZOrderSwizzle(swizzle))
+                {
+                    invalid = (color && msaa) || thin3d;
+                }
+                else if (IsStandardSwizzle(swizzle))
+                {
+                    invalid = zbuffer || thin3d;
+                }
+                else if (IsDisplaySwizzle(swizzle))
+                {
+                    invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType));
+                }
+                else if (IsRotateSwizzle(swizzle))
+                {
+                    invalid = zbuffer || (pIn->bpp > 64) || tex3d;
+                }
+                else
+                {
+                    ADDR_ASSERT(!"invalid swizzle mode");
+                    invalid = TRUE;
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(invalid == FALSE);
+
+    return invalid ? ADDR_INVALIDPARAMS : ADDR_OK;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlGetPreferredSurfaceSetting
+*
+*   @brief
+*       Internal function to get suggested surface information for cliet to use
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
+    const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+    ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+    ElemLib*          pElemLib   = GetElemLib();
+
+    UINT_32 bpp          = pIn->bpp;
+    UINT_32 width        = pIn->width;
+    UINT_32 height       = pIn->height;
+    UINT_32 numSamples   = Max(pIn->numSamples, 1u);
+    UINT_32 numFrags     = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
+
+    if (pIn->flags.fmask)
+    {
+        bpp                = GetFmaskBpp(numSamples, numFrags);
+        numFrags           = 1;
+        numSamples         = 1;
+        pOut->resourceType = ADDR_RSRC_TEX_2D;
+    }
+    else
+    {
+        // Set format to INVALID will skip this conversion
+        if (pIn->format != ADDR_FMT_INVALID)
+        {
+            UINT_32 expandX, expandY;
+
+            // Don't care for this case
+            ElemMode elemMode = ADDR_UNCOMPRESSED;
+
+            // Get compression/expansion factors and element mode which indicates compression/expansion
+            bpp = pElemLib->GetBitsPerPixel(pIn->format,
+                                            &elemMode,
+                                            &expandX,
+                                            &expandY);
+
+            UINT_32 basePitch = 0;
+            GetElemLib()->AdjustSurfaceInfo(elemMode,
+                                            expandX,
+                                            expandY,
+                                            &bpp,
+                                            &basePitch,
+                                            &width,
+                                            &height);
+        }
+
+        // The output may get changed for volume(3D) texture resource in future
+        pOut->resourceType = pIn->resourceType;
+    }
+
+    const UINT_32 numSlices    = Max(pIn->numSlices, 1u);
+    const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
+    const BOOL_32 msaa         = (numFrags > 1) || (numSamples > 1);
+    const BOOL_32 displayRsrc  = pIn->flags.display || pIn->flags.rotated;
+
+    // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9
+    ADDR2_SWMODE_SET allowedSwModeSet = {};
+    allowedSwModeSet.value |= pIn->forbiddenBlock.linear    ? 0 : Gfx9LinearSwModeMask;
+    allowedSwModeSet.value |= pIn->forbiddenBlock.micro     ? 0 : Gfx9Blk256BSwModeMask;
+    allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB  ? 0 : Gfx9Blk4KBSwModeMask;
+    allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask;
+
+    if (pIn->preferredSwSet.value != 0)
+    {
+        allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask;
+        allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask;
+        allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask;
+        allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask;
+    }
+
+    if (pIn->noXor)
+    {
+        allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
+    }
+
+    if (pIn->maxAlign > 0)
+    {
+        if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB))
+        {
+            allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask;
+        }
+
+        if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB))
+        {
+            allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask;
+        }
+
+        if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B))
+        {
+            allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
+        }
+    }
+
+    // Filter out invalid swizzle mode(s) by image attributes and HW restrictions
+    switch (pOut->resourceType)
+    {
+        case ADDR_RSRC_TEX_1D:
+            allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask;
+            break;
+
+        case ADDR_RSRC_TEX_2D:
+            allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask;
+
+            if (bpp > 64)
+            {
+                allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask);
+            }
+            break;
+
+        case ADDR_RSRC_TEX_3D:
+            allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask;
+
+            if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height))
+            {
+                // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap
+                // When depth (Z) is the maximum dimension then must use one of the SW_*_S
+                // or SW_*_Z modes if mipmapping is desired on a 3D surface
+                allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask;
+            }
+
+            if ((bpp == 128) && pIn->flags.color)
+            {
+                allowedSwModeSet.value &= ~Gfx9StandardSwModeMask;
+            }
+
+            if (pIn->flags.view3dAs2dArray)
+            {
+                allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask;
+            }
+            break;
+
+        default:
+            ADDR_ASSERT_ALWAYS();
+            allowedSwModeSet.value = 0;
+            break;
+    }
+
+    if (pIn->format == ADDR_FMT_32_32_32)
+    {
+        allowedSwModeSet.value &= Gfx9LinearSwModeMask;
+    }
+
+    if (ElemLib::IsBlockCompressed(pIn->format))
+    {
+        if (pIn->flags.texture)
+        {
+            allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask;
+        }
+        else
+        {
+            allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask;
+        }
+    }
+
+    if (ElemLib::IsMacroPixelPacked(pIn->format) ||
+        (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
+    {
+        allowedSwModeSet.value &= ~Gfx9ZSwModeMask;
+    }
+
+    if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil)
+    {
+        allowedSwModeSet.value &= Gfx9ZSwModeMask;
+
+        if (pIn->flags.noMetadata == FALSE)
+        {
+            if (pIn->flags.depth &&
+                pIn->flags.texture &&
+                (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2))))
+            {
+                // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane
+                // equation from wrong address within memory range a tile covered and use the
+                // garbage data for compressed Z reading which finally leads to corruption.
+                allowedSwModeSet.value &= ~Gfx9XorSwModeMask;
+            }
+
+            if (m_settings.htileCacheRbConflict &&
+                (pIn->flags.depth || pIn->flags.stencil) &&
+                (numSlices > 1) &&
+                (pIn->flags.metaRbUnaligned == FALSE) &&
+                (pIn->flags.metaPipeUnaligned == FALSE))
+            {
+                // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency
+                allowedSwModeSet.value &= ~Gfx9XSwModeMask;
+            }
+        }
+    }
+
+    if (msaa)
+    {
+        allowedSwModeSet.value &= Gfx9MsaaSwModeMask;
+    }
+
+    if ((numFrags > 1) &&
+        (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags)))
+    {
+        // MSAA surface must have blk_bytes/pipe_interleave >= num_samples
+        allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
+    }
+
+    if (numMipLevels > 1)
+    {
+        allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask;
+    }
+
+    if (displayRsrc)
+    {
+        if (m_settings.isDce12)
+        {
+            allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask;
+        }
+        else if (m_settings.isDcn1)
+        {
+            allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask;
+        }
+        else
+        {
+            ADDR_NOT_IMPLEMENTED();
+        }
+    }
+
+    if (allowedSwModeSet.value != 0)
+    {
+#if DEBUG
+        // Post sanity check, at least AddrLib should accept the output generated by its own
+        ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
+        localIn.flags        = pIn->flags;
+        localIn.resourceType = pOut->resourceType;
+        localIn.format       = pIn->format;
+        localIn.bpp          = bpp;
+        localIn.width        = width;
+        localIn.height       = height;
+        localIn.numSlices    = numSlices;
+        localIn.numMipLevels = numMipLevels;
+        localIn.numSamples   = numSamples;
+        localIn.numFrags     = numFrags;
+
+        UINT_32 validateSwModeSet = allowedSwModeSet.value;
+        for (UINT_32 i = 0; validateSwModeSet != 0; i++)
+        {
+            if (validateSwModeSet & 1)
+            {
+                localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
+                HwlComputeSurfaceInfoSanityCheck(&localIn);
+            }
+
+            validateSwModeSet >>= 1;
+        }
+#endif
+
+        pOut->validSwModeSet = allowedSwModeSet;
+        pOut->canXor         = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE;
+        pOut->validBlockSet  = GetAllowedBlockSet(allowedSwModeSet);
+        pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
+
+        pOut->clientPreferredSwSet = pIn->preferredSwSet;
+
+        if (pOut->clientPreferredSwSet.value == 0)
+        {
+            pOut->clientPreferredSwSet.value = AddrSwSetAll;
+        }
+
+        if (allowedSwModeSet.value == Gfx9LinearSwModeMask)
+        {
+            pOut->swizzleMode = ADDR_SW_LINEAR;
+        }
+        else
+        {
+            // Always ignore linear swizzle mode if there is other choice.
+            allowedSwModeSet.swLinear = 0;
+
+            ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet);
+
+            // Determine block size if there is 2 or more block type candidates
+            if (IsPow2(allowedBlockSet.value) == FALSE)
+            {
+                const AddrSwizzleMode swMode[AddrBlockMaxTiledType]  = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB};
+                Dim3d                 blkDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
+                Dim3d                 padDim[AddrBlockMaxTiledType]  = {{0}, {0}, {0}};
+                UINT_64               padSize[AddrBlockMaxTiledType] = {0};
+
+                const UINT_32 ratioLow           = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
+                const UINT_32 ratioHi            = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
+                const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
+                UINT_32       minSizeBlk         = AddrBlockMicro;
+                UINT_64       minSize            = 0;
+
+                for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
+                {
+                    if (allowedBlockSet.value & (1 << i))
+                    {
+                        ComputeBlockDimensionForSurf(&blkDim[i].w,
+                                                     &blkDim[i].h,
+                                                     &blkDim[i].d,
+                                                     bpp,
+                                                     numFrags,
+                                                     pOut->resourceType,
+                                                     swMode[i]);
+
+                        if (displayRsrc)
+                        {
+                            blkDim[i].w = PowTwoAlign(blkDim[i].w, 32);
+                        }
+
+                        padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
+                        padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
+
+                        if ((minSize == 0) ||
+                            ((padSize[i] * ratioHi) <= (minSize * ratioLow)))
+                        {
+                            minSize    = padSize[i];
+                            minSizeBlk = i;
+                        }
+                    }
+                }
+
+                if ((allowedBlockSet.micro == TRUE)      &&
+                    (width  <= blkDim[AddrBlockMicro].w) &&
+                    (height <= blkDim[AddrBlockMicro].h) &&
+                    (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B)))
+                {
+                    minSizeBlk = AddrBlockMicro;
+                }
+
+                if (minSizeBlk == AddrBlockMicro)
+                {
+                    allowedSwModeSet.value &= Gfx9Blk256BSwModeMask;
+                }
+                else if (minSizeBlk == AddrBlock4KB)
+                {
+                    allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask;
+                }
+                else
+                {
+                    ADDR_ASSERT(minSizeBlk == AddrBlock64KB);
+                    allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask;
+                }
+            }
+
+            // Block type should be determined.
+            ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value));
+
+            ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
+
+            // Determine swizzle type if there is 2 or more swizzle type candidates
+            if (IsPow2(allowedSwSet.value) == FALSE)
+            {
+                if (ElemLib::IsBlockCompressed(pIn->format))
+                {
+                    if (allowedSwSet.sw_D)
+                    {
+                        allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT(allowedSwSet.sw_S);
+                        allowedSwModeSet.value &= Gfx9StandardSwModeMask;
+                    }
+                }
+                else if (ElemLib::IsMacroPixelPacked(pIn->format))
+                {
+                    if (allowedSwSet.sw_S)
+                    {
+                        allowedSwModeSet.value &= Gfx9StandardSwModeMask;
+                    }
+                    else if (allowedSwSet.sw_D)
+                    {
+                        allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT(allowedSwSet.sw_R);
+                        allowedSwModeSet.value &= Gfx9RotateSwModeMask;
+                    }
+                }
+                else if (pOut->resourceType == ADDR_RSRC_TEX_3D)
+                {
+                    if (pIn->flags.color && allowedSwSet.sw_D)
+                    {
+                        allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
+                    }
+                    else if (allowedSwSet.sw_Z)
+                    {
+                        allowedSwModeSet.value &= Gfx9ZSwModeMask;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT(allowedSwSet.sw_S);
+                        allowedSwModeSet.value &= Gfx9StandardSwModeMask;
+                    }
+                }
+                else
+                {
+                    if (pIn->flags.rotated && allowedSwSet.sw_R)
+                    {
+                        allowedSwModeSet.value &= Gfx9RotateSwModeMask;
+                    }
+                    else if (displayRsrc && allowedSwSet.sw_D)
+                    {
+                        allowedSwModeSet.value &= Gfx9DisplaySwModeMask;
+                    }
+                    else if (allowedSwSet.sw_S)
+                    {
+                        allowedSwModeSet.value &= Gfx9StandardSwModeMask;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT(allowedSwSet.sw_Z);
+                        allowedSwModeSet.value &= Gfx9ZSwModeMask;
+                    }
+                }
+            }
+
+            // Swizzle type should be determined.
+            ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
+
+            // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type +
+            // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
+            // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
+            pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
+        }
+    }
+    else
+    {
+        // Invalid combination...
+        ADDR_ASSERT_ALWAYS();
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::ComputeStereoInfo
+*
+*   @brief
+*       Compute height alignment and right eye pipeBankXor for stereo surface
+*
+*   @return
+*       Error code
+*
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
+    UINT_32*                                pHeightAlign
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut);
+
+    if (eqIndex < m_numEquations)
+    {
+        if (IsXor(pIn->swizzleMode))
+        {
+            const UINT_32        blkSizeLog2       = GetBlockSizeLog2(pIn->swizzleMode);
+            const UINT_32        numPipeBits       = GetPipeXorBits(blkSizeLog2);
+            const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
+            const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
+            const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
+            MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];
+
+            ADDR_ASSERT(maxYCoordBlock256 ==
+                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
+
+            const UINT_32 maxYCoordInBaseEquation =
+                (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256;
+
+            ADDR_ASSERT(maxYCoordInBaseEquation ==
+                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1));
+
+            const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits;
+
+            ADDR_ASSERT(maxYCoordInPipeXor ==
+                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1));
+
+            const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ?
+                                               0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits;
+
+            ADDR_ASSERT(maxYCoordInBankXor ==
+                        GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1));
+
+            const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor);
+
+            if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation)
+            {
+                *pHeightAlign = 1u << maxYCoordInPipeBankXor;
+
+                if (pOut->pStereoInfo != NULL)
+                {
+                    pOut->pStereoInfo->rightSwizzle = 0;
+
+                    if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0)
+                    {
+                        if (maxYCoordInPipeXor == maxYCoordInPipeBankXor)
+                        {
+                            pOut->pStereoInfo->rightSwizzle |= (1u << 1);
+                        }
+
+                        if (maxYCoordInBankXor == maxYCoordInPipeBankXor)
+                        {
+                            pOut->pStereoInfo->rightSwizzle |=
+                                1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1);
+                        }
+
+                        ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle ==
+                                    GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2],
+                                                       numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor));
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSurfaceInfoTiled
+*
+*   @brief
+*       Internal function to calculate alignment for tiled surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled(
+     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
+                                                                &pOut->blockHeight,
+                                                                &pOut->blockSlices,
+                                                                pIn->bpp,
+                                                                pIn->numFrags,
+                                                                pIn->resourceType,
+                                                                pIn->swizzleMode);
+
+    if (returnCode == ADDR_OK)
+    {
+        UINT_32 pitchAlignInElement = pOut->blockWidth;
+
+        if ((IsTex2d(pIn->resourceType) == TRUE) &&
+            (pIn->flags.display || pIn->flags.rotated) &&
+            (pIn->numMipLevels <= 1) &&
+            (pIn->numSamples <= 1) &&
+            (pIn->numFrags <= 1))
+        {
+            // Display engine needs pitch align to be at least 32 pixels.
+            pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32);
+        }
+
+        pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement);
+
+        if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0))
+        {
+            if ((pIn->pitchInElement % pitchAlignInElement) != 0)
+            {
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+            else if (pIn->pitchInElement < pOut->pitch)
+            {
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+            else
+            {
+                pOut->pitch = pIn->pitchInElement;
+            }
+        }
+
+        UINT_32 heightAlign = 0;
+
+        if (pIn->flags.qbStereo)
+        {
+            returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
+
+            if (heightAlign > 1)
+            {
+                pOut->height = PowTwoAlign(pOut->height, heightAlign);
+            }
+
+            pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
+
+            pOut->epitchIsHeight   = FALSE;
+            pOut->mipChainInTail   = FALSE;
+            pOut->firstMipIdInTail = pIn->numMipLevels;
+
+            pOut->mipChainPitch    = pOut->pitch;
+            pOut->mipChainHeight   = pOut->height;
+            pOut->mipChainSlice    = pOut->numSlices;
+
+            if (pIn->numMipLevels > 1)
+            {
+                pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType,
+                                                         pIn->swizzleMode,
+                                                         pIn->bpp,
+                                                         pIn->width,
+                                                         pIn->height,
+                                                         pIn->numSlices,
+                                                         pOut->blockWidth,
+                                                         pOut->blockHeight,
+                                                         pOut->blockSlices,
+                                                         pIn->numMipLevels,
+                                                         pOut->pMipInfo);
+
+                const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1);
+
+                if (endingMipId == 0)
+                {
+                    const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
+                                                           pIn->swizzleMode,
+                                                           pOut->blockWidth,
+                                                           pOut->blockHeight,
+                                                           pOut->blockSlices);
+
+                    pOut->epitchIsHeight = TRUE;
+                    pOut->pitch          = tailMaxDim.w;
+                    pOut->height         = tailMaxDim.h;
+                    pOut->numSlices      = IsThick(pIn->resourceType, pIn->swizzleMode) ?
+                                           tailMaxDim.d : pIn->numSlices;
+                    pOut->mipChainInTail = TRUE;
+                }
+                else
+                {
+                    UINT_32 mip0WidthInBlk  = pOut->pitch  / pOut->blockWidth;
+                    UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight;
+
+                    AddrMajorMode majorMode = GetMajorMode(pIn->resourceType,
+                                                           pIn->swizzleMode,
+                                                           mip0WidthInBlk,
+                                                           mip0HeightInBlk,
+                                                           pOut->numSlices / pOut->blockSlices);
+                    if (majorMode == ADDR_MAJOR_Y)
+                    {
+                        UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk);
+
+                        if ((mip1WidthInBlk == 1) && (endingMipId > 2))
+                        {
+                            mip1WidthInBlk++;
+                        }
+
+                        pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth);
+
+                        pOut->epitchIsHeight = FALSE;
+                    }
+                    else
+                    {
+                        UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk);
+
+                        if ((mip1HeightInBlk == 1) && (endingMipId > 2))
+                        {
+                            mip1HeightInBlk++;
+                        }
+
+                        pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight);
+
+                        pOut->epitchIsHeight = TRUE;
+                    }
+                }
+
+                if (pOut->pMipInfo != NULL)
+                {
+                    UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3);
+
+                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+                    {
+                        Dim3d   mipStartPos          = {0};
+                        UINT_32 mipTailOffsetInBytes = 0;
+
+                        mipStartPos = GetMipStartPos(pIn->resourceType,
+                                                     pIn->swizzleMode,
+                                                     pOut->pitch,
+                                                     pOut->height,
+                                                     pOut->numSlices,
+                                                     pOut->blockWidth,
+                                                     pOut->blockHeight,
+                                                     pOut->blockSlices,
+                                                     i,
+                                                     elementBytesLog2,
+                                                     &mipTailOffsetInBytes);
+
+                        UINT_32 pitchInBlock     =
+                            pOut->mipChainPitch / pOut->blockWidth;
+                        UINT_32 sliceInBlock     =
+                            (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock;
+                        UINT_64 blockIndex       =
+                            mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w;
+                        UINT_64 macroBlockOffset =
+                            blockIndex << GetBlockSizeLog2(pIn->swizzleMode);
+
+                        pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset;
+                        pOut->pMipInfo[i].mipTailOffset    = mipTailOffsetInBytes;
+                    }
+                }
+            }
+            else if (pOut->pMipInfo != NULL)
+            {
+                pOut->pMipInfo[0].pitch  = pOut->pitch;
+                pOut->pMipInfo[0].height = pOut->height;
+                pOut->pMipInfo[0].depth  = IsTex3d(pIn->resourceType)? pOut->numSlices : 1;
+                pOut->pMipInfo[0].offset = 0;
+            }
+
+            pOut->sliceSize = static_cast<UINT_64>(pOut->mipChainPitch) * pOut->mipChainHeight *
+                              (pIn->bpp >> 3) * pIn->numFrags;
+            pOut->surfSize  = pOut->sliceSize * pOut->mipChainSlice;
+            pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode);
+
+            if ((IsBlock256b(pIn->swizzleMode) == FALSE) &&
+                (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) &&
+                (pIn->flags.texture == TRUE) &&
+                (pIn->flags.noMetadata == FALSE) &&
+                (pIn->flags.metaPipeUnaligned == FALSE))
+            {
+                // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC...
+                // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will
+                // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of
+                // them, which may cause invalid metadata to be fetched.
+                pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes);
+            }
+
+            if (pIn->flags.prt)
+            {
+                pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment);
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSurfaceInfoLinear
+*
+*   @brief
+*       Internal function to calculate alignment for linear surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear(
+     const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode   = ADDR_OK;
+    UINT_32           pitch        = 0;
+    UINT_32           actualHeight = 0;
+    UINT_32           elementBytes = pIn->bpp >> 3;
+    const UINT_32     alignment    = pIn->flags.prt ? PrtAlignment : 256;
+
+    if (IsTex1d(pIn->resourceType))
+    {
+        if (pIn->height > 1)
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+        else
+        {
+            const UINT_32 pitchAlignInElement = alignment / elementBytes;
+
+            pitch        = PowTwoAlign(pIn->width, pitchAlignInElement);
+            actualHeight = pIn->numMipLevels;
+
+            if (pIn->flags.prt == FALSE)
+            {
+                returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+                                                        &pitch, &actualHeight);
+            }
+
+            if (returnCode == ADDR_OK)
+            {
+                if (pOut->pMipInfo != NULL)
+                {
+                    for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+                    {
+                        pOut->pMipInfo[i].offset = pitch * elementBytes * i;
+                        pOut->pMipInfo[i].pitch  = pitch;
+                        pOut->pMipInfo[i].height = 1;
+                        pOut->pMipInfo[i].depth  = 1;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo);
+    }
+
+    if ((pitch == 0) || (actualHeight == 0))
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        pOut->pitch          = pitch;
+        pOut->height         = pIn->height;
+        pOut->numSlices      = pIn->numSlices;
+        pOut->mipChainPitch  = pitch;
+        pOut->mipChainHeight = actualHeight;
+        pOut->mipChainSlice  = pOut->numSlices;
+        pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE;
+        pOut->sliceSize      = static_cast<UINT_64>(pOut->pitch) * actualHeight * elementBytes;
+        pOut->surfSize       = pOut->sliceSize * pOut->numSlices;
+        pOut->baseAlign      = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment;
+        pOut->blockWidth     = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
+        pOut->blockHeight    = 1;
+        pOut->blockSlices    = 1;
+    }
+
+    // Post calculation validate
+    ADDR_ASSERT(pOut->sliceSize > 0);
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetMipChainInfo
+*
+*   @brief
+*       Internal function to get out information about mip chain
+*
+*   @return
+*       Smaller value between Id of first mip fitted in mip tail and max Id of mip being created
+************************************************************************************************************************
+*/
+UINT_32 Gfx9Lib::GetMipChainInfo(
+    AddrResourceType  resourceType,
+    AddrSwizzleMode   swizzleMode,
+    UINT_32           bpp,
+    UINT_32           mip0Width,
+    UINT_32           mip0Height,
+    UINT_32           mip0Depth,
+    UINT_32           blockWidth,
+    UINT_32           blockHeight,
+    UINT_32           blockDepth,
+    UINT_32           numMipLevel,
+    ADDR2_MIP_INFO*   pMipInfo) const
+{
+    const Dim3d tailMaxDim =
+        GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
+
+    UINT_32 mipPitch         = mip0Width;
+    UINT_32 mipHeight        = mip0Height;
+    UINT_32 mipDepth         = IsTex3d(resourceType) ? mip0Depth : 1;
+    UINT_32 offset           = 0;
+    UINT_32 firstMipIdInTail = numMipLevel;
+    BOOL_32 inTail           = FALSE;
+    BOOL_32 finalDim         = FALSE;
+    BOOL_32 is3dThick        = IsThick(resourceType, swizzleMode);
+    BOOL_32 is3dThin         = IsTex3d(resourceType) && (is3dThick == FALSE);
+
+    for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++)
+    {
+        if (inTail)
+        {
+            if (finalDim == FALSE)
+            {
+                UINT_32 mipSize;
+
+                if (is3dThick)
+                {
+                    mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3);
+                }
+                else
+                {
+                    mipSize = mipPitch * mipHeight * (bpp >> 3);
+                }
+
+                if (mipSize <= 256)
+                {
+                    UINT_32 index = Log2(bpp >> 3);
+
+                    if (is3dThick)
+                    {
+                        mipPitch  = Block256_3dZ[index].w;
+                        mipHeight = Block256_3dZ[index].h;
+                        mipDepth  = Block256_3dZ[index].d;
+                    }
+                    else
+                    {
+                        mipPitch  = Block256_2d[index].w;
+                        mipHeight = Block256_2d[index].h;
+                    }
+
+                    finalDim = TRUE;
+                }
+            }
+        }
+        else
+        {
+            inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim,
+                                 mipPitch, mipHeight, mipDepth);
+
+            if (inTail)
+            {
+                firstMipIdInTail = mipId;
+                mipPitch         = tailMaxDim.w;
+                mipHeight        = tailMaxDim.h;
+
+                if (is3dThick)
+                {
+                    mipDepth = tailMaxDim.d;
+                }
+            }
+            else
+            {
+                mipPitch  = PowTwoAlign(mipPitch,  blockWidth);
+                mipHeight = PowTwoAlign(mipHeight, blockHeight);
+
+                if (is3dThick)
+                {
+                    mipDepth = PowTwoAlign(mipDepth,  blockDepth);
+                }
+            }
+        }
+
+        if (pMipInfo != NULL)
+        {
+            pMipInfo[mipId].pitch  = mipPitch;
+            pMipInfo[mipId].height = mipHeight;
+            pMipInfo[mipId].depth  = mipDepth;
+            pMipInfo[mipId].offset = offset;
+        }
+
+        offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3));
+
+        if (finalDim)
+        {
+            if (is3dThin)
+            {
+                mipDepth = Max(mipDepth >> 1, 1u);
+            }
+        }
+        else
+        {
+            mipPitch  = Max(mipPitch >> 1, 1u);
+            mipHeight = Max(mipHeight >> 1, 1u);
+
+            if (is3dThick || is3dThin)
+            {
+                mipDepth = Max(mipDepth >> 1, 1u);
+            }
+        }
+    }
+
+    return firstMipIdInTail;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetMetaMiptailInfo
+*
+*   @brief
+*       Get mip tail coordinate information.
+*
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+VOID Gfx9Lib::GetMetaMiptailInfo(
+    ADDR2_META_MIP_INFO*    pInfo,          ///< [out] output structure to store per mip coord
+    Dim3d                   mipCoord,       ///< [in] mip tail base coord
+    UINT_32                 numMipInTail,   ///< [in] number of mips in tail
+    Dim3d*                  pMetaBlkDim     ///< [in] meta block width/height/depth
+    ) const
+{
+    BOOL_32 isThick   = (pMetaBlkDim->d > 1);
+    UINT_32 mipWidth  = pMetaBlkDim->w;
+    UINT_32 mipHeight = pMetaBlkDim->h >> 1;
+    UINT_32 mipDepth  = pMetaBlkDim->d;
+    UINT_32 minInc;
+
+    if (isThick)
+    {
+        minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32);
+    }
+    else if (pMetaBlkDim->h >= 1024)
+    {
+        minInc = 256;
+    }
+    else if (pMetaBlkDim->h == 512)
+    {
+        minInc = 128;
+    }
+    else
+    {
+        minInc = 64;
+    }
+
+    UINT_32 blk32MipId = 0xFFFFFFFF;
+
+    for (UINT_32 mip = 0; mip < numMipInTail; mip++)
+    {
+        pInfo[mip].inMiptail = TRUE;
+        pInfo[mip].startX = mipCoord.w;
+        pInfo[mip].startY = mipCoord.h;
+        pInfo[mip].startZ = mipCoord.d;
+        pInfo[mip].width = mipWidth;
+        pInfo[mip].height = mipHeight;
+        pInfo[mip].depth = mipDepth;
+
+        if (mipWidth <= 32)
+        {
+            if (blk32MipId == 0xFFFFFFFF)
+            {
+                blk32MipId = mip;
+            }
+
+            mipCoord.w = pInfo[blk32MipId].startX;
+            mipCoord.h = pInfo[blk32MipId].startY;
+            mipCoord.d = pInfo[blk32MipId].startZ;
+
+            switch (mip - blk32MipId)
+            {
+                case 0:
+                    mipCoord.w += 32;       // 16x16
+                    break;
+                case 1:
+                    mipCoord.h += 32;       // 8x8
+                    break;
+                case 2:
+                    mipCoord.h += 32;       // 4x4
+                    mipCoord.w += 16;
+                    break;
+                case 3:
+                    mipCoord.h += 32;       // 2x2
+                    mipCoord.w += 32;
+                    break;
+                case 4:
+                    mipCoord.h += 32;       // 1x1
+                    mipCoord.w += 48;
+                    break;
+                // The following are for BC/ASTC formats
+                case 5:
+                    mipCoord.h += 48;       // 1/2 x 1/2
+                    break;
+                case 6:
+                    mipCoord.h += 48;       // 1/4 x 1/4
+                    mipCoord.w += 16;
+                    break;
+                case 7:
+                    mipCoord.h += 48;       // 1/8 x 1/8
+                    mipCoord.w += 32;
+                    break;
+                case 8:
+                    mipCoord.h += 48;       // 1/16 x 1/16
+                    mipCoord.w += 48;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+
+            mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8;
+            mipHeight = mipWidth;
+
+            if (isThick)
+            {
+                mipDepth = mipWidth;
+            }
+        }
+        else
+        {
+            if (mipWidth <= minInc)
+            {
+                // if we're below the minimal increment...
+                if (isThick)
+                {
+                    // For 3d, just go in z direction
+                    mipCoord.d += mipDepth;
+                }
+                else
+                {
+                    // For 2d, first go across, then down
+                    if ((mipWidth * 2) == minInc)
+                    {
+                        // if we're 2 mips below, that's when we go back in x, and down in y
+                        mipCoord.w -= minInc;
+                        mipCoord.h += minInc;
+                    }
+                    else
+                    {
+                        // otherwise, just go across in x
+                        mipCoord.w += minInc;
+                    }
+                }
+            }
+            else
+            {
+                // On even mip, go down, otherwise, go across
+                if (mip & 1)
+                {
+                    mipCoord.w += mipWidth;
+                }
+                else
+                {
+                    mipCoord.h += mipHeight;
+                }
+            }
+            // Divide the width by 2
+            mipWidth >>= 1;
+            // After the first mip in tail, the mip is always a square
+            mipHeight = mipWidth;
+            // ...or for 3d, a cube
+            if (isThick)
+            {
+                mipDepth = mipWidth;
+            }
+        }
+    }
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::GetMipStartPos
+*
+*   @brief
+*       Internal function to get out information about mip logical start position
+*
+*   @return
+*       logical start position in macro block width/heith/depth of one mip level within one slice
+************************************************************************************************************************
+*/
+Dim3d Gfx9Lib::GetMipStartPos(
+    AddrResourceType  resourceType,
+    AddrSwizzleMode   swizzleMode,
+    UINT_32           width,
+    UINT_32           height,
+    UINT_32           depth,
+    UINT_32           blockWidth,
+    UINT_32           blockHeight,
+    UINT_32           blockDepth,
+    UINT_32           mipId,
+    UINT_32           log2ElementBytes,
+    UINT_32*          pMipTailBytesOffset) const
+{
+    Dim3d       mipStartPos = {0};
+    const Dim3d tailMaxDim  = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth);
+
+    // Report mip in tail if Mip0 is already in mip tail
+    BOOL_32 inMipTail      = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth);
+    UINT_32 log2blkSize    = GetBlockSizeLog2(swizzleMode);
+    UINT_32 mipIndexInTail = mipId;
+
+    if (inMipTail == FALSE)
+    {
+        // Mip 0 dimension, unit in block
+        UINT_32 mipWidthInBlk   = width  / blockWidth;
+        UINT_32 mipHeightInBlk  = height / blockHeight;
+        UINT_32 mipDepthInBlk   = depth  / blockDepth;
+        AddrMajorMode majorMode = GetMajorMode(resourceType,
+                                               swizzleMode,
+                                               mipWidthInBlk,
+                                               mipHeightInBlk,
+                                               mipDepthInBlk);
+
+        UINT_32 endingMip = mipId + 1;
+
+        for (UINT_32 i = 1; i <= mipId; i++)
+        {
+            if ((i == 1) || (i == 3))
+            {
+                if (majorMode == ADDR_MAJOR_Y)
+                {
+                    mipStartPos.w += mipWidthInBlk;
+                }
+                else
+                {
+                    mipStartPos.h += mipHeightInBlk;
+                }
+            }
+            else
+            {
+                if (majorMode == ADDR_MAJOR_X)
+                {
+                   mipStartPos.w += mipWidthInBlk;
+                }
+                else if (majorMode == ADDR_MAJOR_Y)
+                {
+                   mipStartPos.h += mipHeightInBlk;
+                }
+                else
+                {
+                   mipStartPos.d += mipDepthInBlk;
+                }
+            }
+
+            BOOL_32 inTail = FALSE;
+
+            if (IsThick(resourceType, swizzleMode))
+            {
+                UINT_32 dim = log2blkSize % 3;
+
+                if (dim == 0)
+                {
+                    inTail =
+                        (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2);
+                }
+                else if (dim == 1)
+                {
+                    inTail =
+                        (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2);
+                }
+                else
+                {
+                    inTail =
+                        (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1);
+                }
+            }
+            else
+            {
+                if (log2blkSize & 1)
+                {
+                    inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1);
+                }
+                else
+                {
+                    inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2);
+                }
+            }
+
+            if (inTail)
+            {
+                endingMip = i;
+                break;
+            }
+
+            mipWidthInBlk  = RoundHalf(mipWidthInBlk);
+            mipHeightInBlk = RoundHalf(mipHeightInBlk);
+            mipDepthInBlk  = RoundHalf(mipDepthInBlk);
+        }
+
+        if (mipId >= endingMip)
+        {
+            inMipTail      = TRUE;
+            mipIndexInTail = mipId - endingMip;
+        }
+    }
+
+    if (inMipTail)
+    {
+        UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize;
+        ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32));
+        *pMipTailBytesOffset = MipTailOffset256B[index] << 8;
+    }
+
+    return mipStartPos;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled
+*
+*   @brief
+*       Internal function to calculate address from coord for tiled swizzle surface
+*
+*   @return
+*       ADDR_E_RETURNCODE
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled(
+     const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+     ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0};
+    localIn.swizzleMode  = pIn->swizzleMode;
+    localIn.flags        = pIn->flags;
+    localIn.resourceType = pIn->resourceType;
+    localIn.bpp          = pIn->bpp;
+    localIn.width        = Max(pIn->unalignedWidth, 1u);
+    localIn.height       = Max(pIn->unalignedHeight, 1u);
+    localIn.numSlices    = Max(pIn->numSlices, 1u);
+    localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
+    localIn.numSamples   = Max(pIn->numSamples, 1u);
+    localIn.numFrags     = Max(pIn->numFrags, 1u);
+    if (localIn.numMipLevels <= 1)
+    {
+        localIn.pitchInElement = pIn->pitchInElement;
+    }
+
+    ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
+    ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut);
+
+    BOOL_32 valid = (returnCode == ADDR_OK) &&
+                    (IsThin(pIn->resourceType, pIn->swizzleMode) ||
+                     IsThick(pIn->resourceType, pIn->swizzleMode)) &&
+                    ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode)));
+
+    if (valid)
+    {
+        UINT_32 log2ElementBytes   = Log2(pIn->bpp >> 3);
+        Dim3d   mipStartPos        = {0};
+        UINT_32 mipTailBytesOffset = 0;
+
+        if (pIn->numMipLevels > 1)
+        {
+            // Mip-map chain cannot be MSAA surface
+            ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1));
+
+            mipStartPos = GetMipStartPos(pIn->resourceType,
+                                         pIn->swizzleMode,
+                                         localOut.pitch,
+                                         localOut.height,
+                                         localOut.numSlices,
+                                         localOut.blockWidth,
+                                         localOut.blockHeight,
+                                         localOut.blockSlices,
+                                         pIn->mipId,
+                                         log2ElementBytes,
+                                         &mipTailBytesOffset);
+        }
+
+        UINT_32 interleaveOffset = 0;
+        UINT_32 pipeBits = 0;
+        UINT_32 pipeXor = 0;
+        UINT_32 bankBits = 0;
+        UINT_32 bankXor = 0;
+
+        if (IsThin(pIn->resourceType, pIn->swizzleMode))
+        {
+            UINT_32 blockOffset = 0;
+            UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
+
+            if (IsZOrderSwizzle(pIn->swizzleMode))
+            {
+                // Morton generation
+                if ((log2ElementBytes == 0) || (log2ElementBytes == 2))
+                {
+                    UINT_32 totalLowBits = 6 - log2ElementBytes;
+                    UINT_32 mortBits = totalLowBits / 2;
+                    UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits);
+                    // Are 9 bits enough?
+                    UINT_32 highBitsValue =
+                        MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits;
+                    blockOffset = lowBitsValue | highBitsValue;
+                    ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue);
+                }
+                else
+                {
+                    blockOffset = MortonGen2d(pIn->y, pIn->x, 13);
+                }
+
+                // Fill LSBs with sample bits
+                if (pIn->numSamples > 1)
+                {
+                    blockOffset *= pIn->numSamples;
+                    blockOffset |= pIn->sample;
+                }
+
+                // Shift according to BytesPP
+                blockOffset <<= log2ElementBytes;
+            }
+            else
+            {
+                // Micro block offset
+                UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn);
+                blockOffset = microBlockOffset;
+
+                // Micro block dimension
+                ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp);
+                Dim2d microBlockDim = Block256_2d[log2ElementBytes];
+                // Morton generation, does 12 bit enough?
+                blockOffset |=
+                    MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8;
+
+                // Sample bits start location
+                UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples);
+                // Join sample bits information to the highest Macro block bits
+                if (IsNonPrtXor(pIn->swizzleMode))
+                {
+                    // Non-prt-Xor : xor highest Macro block bits with sample bits
+                    blockOffset = blockOffset ^ (pIn->sample << sampleStart);
+                }
+                else
+                {
+                    // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits
+                    // after this op, the blockOffset only contains log2 Macro block size bits
+                    blockOffset %= (1 << sampleStart);
+                    blockOffset |= (pIn->sample << sampleStart);
+                    ADDR_ASSERT((blockOffset >> log2blkSize) == 0);
+                }
+            }
+
+            if (IsXor(pIn->swizzleMode))
+            {
+                // Mask off bits above Macro block bits to keep page synonyms working for prt
+                if (IsPrt(pIn->swizzleMode))
+                {
+                    blockOffset &= ((1 << log2blkSize) - 1);
+                }
+
+                // Preserve offset inside pipe interleave
+                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
+                blockOffset >>= m_pipeInterleaveLog2;
+
+                // Pipe/Se xor bits
+                pipeBits = GetPipeXorBits(log2blkSize);
+                // Pipe xor
+                pipeXor = FoldXor2d(blockOffset, pipeBits);
+                blockOffset >>= pipeBits;
+
+                // Bank xor bits
+                bankBits = GetBankXorBits(log2blkSize);
+                // Bank Xor
+                bankXor = FoldXor2d(blockOffset, bankBits);
+                blockOffset >>= bankBits;
+
+                // Put all the part back together
+                blockOffset <<= bankBits;
+                blockOffset |= bankXor;
+                blockOffset <<= pipeBits;
+                blockOffset |= pipeXor;
+                blockOffset <<= m_pipeInterleaveLog2;
+                blockOffset |= interleaveOffset;
+            }
+
+            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
+            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
+
+            blockOffset |= mipTailBytesOffset;
+
+            if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1))
+            {
+                // Apply slice xor if not MSAA/PRT
+                blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2);
+                blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) <<
+                                (m_pipeInterleaveLog2 + pipeBits));
+            }
+
+            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
+                                                  bankBits, pipeBits, &blockOffset);
+
+            blockOffset %= (1 << log2blkSize);
+
+            UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth;
+            UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight;
+            UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock;
+            UINT_64 macroBlockIndex =
+                (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock +
+                ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock +
+                ((pIn->x / localOut.blockWidth) + mipStartPos.w);
+
+            pOut->addr = blockOffset | (macroBlockIndex << log2blkSize);
+        }
+        else
+        {
+            UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode);
+
+            Dim3d microBlockDim = Block1K_3d[log2ElementBytes];
+
+            UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w),
+                                              (pIn->y / microBlockDim.h),
+                                              (pIn->slice / microBlockDim.d),
+                                              8);
+
+            blockOffset <<= 10;
+            blockOffset |= ComputeSurface3DMicroBlockOffset(pIn);
+
+            if (IsXor(pIn->swizzleMode))
+            {
+                // Mask off bits above Macro block bits to keep page synonyms working for prt
+                if (IsPrt(pIn->swizzleMode))
+                {
+                    blockOffset &= ((1 << log2blkSize) - 1);
+                }
+
+                // Preserve offset inside pipe interleave
+                interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1);
+                blockOffset >>= m_pipeInterleaveLog2;
+
+                // Pipe/Se xor bits
+                pipeBits = GetPipeXorBits(log2blkSize);
+                // Pipe xor
+                pipeXor = FoldXor3d(blockOffset, pipeBits);
+                blockOffset >>= pipeBits;
+
+                // Bank xor bits
+                bankBits = GetBankXorBits(log2blkSize);
+                // Bank Xor
+                bankXor = FoldXor3d(blockOffset, bankBits);
+                blockOffset >>= bankBits;
+
+                // Put all the part back together
+                blockOffset <<= bankBits;
+                blockOffset |= bankXor;
+                blockOffset <<= pipeBits;
+                blockOffset |= pipeXor;
+                blockOffset <<= m_pipeInterleaveLog2;
+                blockOffset |= interleaveOffset;
+            }
+
+            ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset));
+            ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize)));
+            blockOffset |= mipTailBytesOffset;
+
+            returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor,
+                                                  bankBits, pipeBits, &blockOffset);
+
+            blockOffset %= (1 << log2blkSize);
+
+            UINT_32 xb = pIn->x / localOut.blockWidth  + mipStartPos.w;
+            UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h;
+            UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d;
+
+            UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth;
+            UINT_32 sliceSizeInBlock =
+                (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock;
+            UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
+
+            pOut->addr = blockOffset | (blockIndex << log2blkSize);
+        }
+    }
+    else
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    return returnCode;
+}
+
+/**
+************************************************************************************************************************
+*   Gfx9Lib::ComputeSurfaceInfoLinear
+*
+*   @brief
+*       Internal function to calculate padding for linear swizzle 2D/3D surface
+*
+*   @return
+*       N/A
+************************************************************************************************************************
+*/
+ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding(
+    const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,                    ///< [in] input srtucture
+    UINT_32*                                pMipmap0PaddedWidth,    ///< [out] padded width in element
+    UINT_32*                                pSlice0PaddedHeight,    ///< [out] padded height for HW
+    ADDR2_MIP_INFO*                         pMipInfo                ///< [out] per mip information
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    UINT_32 elementBytes        = pIn->bpp >> 3;
+    UINT_32 pitchAlignInElement = 0;
+
+    if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL)
+    {
+        ADDR_ASSERT(pIn->numMipLevels <= 1);
+        ADDR_ASSERT(pIn->numSlices <= 1);
+        pitchAlignInElement = 1;
+    }
+    else
+    {
+        pitchAlignInElement = (256 / elementBytes);
+    }
+
+    UINT_32 mipChainWidth      = PowTwoAlign(pIn->width, pitchAlignInElement);
+    UINT_32 slice0PaddedHeight = pIn->height;
+
+    returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement,
+                                            &mipChainWidth, &slice0PaddedHeight);
+
+    if (returnCode == ADDR_OK)
+    {
+        UINT_32 mipChainHeight = 0;
+        UINT_32 mipHeight      = pIn->height;
+        UINT_32 mipDepth       = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
+
+        for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
+        {
+            if (pMipInfo != NULL)
+            {
+                pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes;
+                pMipInfo[i].pitch  = mipChainWidth;
+                pMipInfo[i].height = mipHeight;
+                pMipInfo[i].depth  = mipDepth;
+            }
+
+            mipChainHeight += mipHeight;
+            mipHeight = RoundHalf(mipHeight);
+            mipHeight = Max(mipHeight, 1u);
+        }
+
+        *pMipmap0PaddedWidth = mipChainWidth;
+        *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight;
+    }
+
+    return returnCode;
+}
+
+} // V2
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.h mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.h
--- mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,629 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+************************************************************************************************************************
+* @file  gfx9addrlib.h
+* @brief Contgfx9ns the Gfx9Lib class definition.
+************************************************************************************************************************
+*/
+
+#ifndef __GFX9_ADDR_LIB_H__
+#define __GFX9_ADDR_LIB_H__
+
+#include "addrlib2.h"
+#include "coord.h"
+
+namespace Addr
+{
+namespace V2
+{
+
+/**
+************************************************************************************************************************
+* @brief GFX9 specific settings structure.
+************************************************************************************************************************
+*/
+struct Gfx9ChipSettings
+{
+    struct
+    {
+        // Asic/Generation name
+        UINT_32 isArcticIsland      : 1;
+        UINT_32 isVega10            : 1;
+        UINT_32 isRaven             : 1;
+        UINT_32 isVega12            : 1;
+        UINT_32 isVega20            : 1;
+        UINT_32 reserved0           : 27;
+
+        // Display engine IP version name
+        UINT_32 isDce12             : 1;
+        UINT_32 isDcn1              : 1;
+
+        // Misc configuration bits
+        UINT_32 metaBaseAlignFix    : 1;
+        UINT_32 depthPipeXorDisable : 1;
+        UINT_32 htileAlignFix       : 1;
+        UINT_32 applyAliasFix       : 1;
+        UINT_32 htileCacheRbConflict: 1;
+        UINT_32 reserved2           : 27;
+    };
+};
+
+/**
+************************************************************************************************************************
+* @brief GFX9 data surface type.
+************************************************************************************************************************
+*/
+enum Gfx9DataType
+{
+    Gfx9DataColor,
+    Gfx9DataDepthStencil,
+    Gfx9DataFmask
+};
+
+const UINT_32 Gfx9LinearSwModeMask = (1u << ADDR_SW_LINEAR);
+
+const UINT_32 Gfx9Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
+                                      (1u << ADDR_SW_256B_D) |
+                                      (1u << ADDR_SW_256B_R);
+
+const UINT_32 Gfx9Blk4KBSwModeMask = (1u << ADDR_SW_4KB_Z)   |
+                                     (1u << ADDR_SW_4KB_S)   |
+                                     (1u << ADDR_SW_4KB_D)   |
+                                     (1u << ADDR_SW_4KB_R)   |
+                                     (1u << ADDR_SW_4KB_Z_X) |
+                                     (1u << ADDR_SW_4KB_S_X) |
+                                     (1u << ADDR_SW_4KB_D_X) |
+                                     (1u << ADDR_SW_4KB_R_X);
+
+const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z)   |
+                                      (1u << ADDR_SW_64KB_S)   |
+                                      (1u << ADDR_SW_64KB_D)   |
+                                      (1u << ADDR_SW_64KB_R)   |
+                                      (1u << ADDR_SW_64KB_Z_T) |
+                                      (1u << ADDR_SW_64KB_S_T) |
+                                      (1u << ADDR_SW_64KB_D_T) |
+                                      (1u << ADDR_SW_64KB_R_T) |
+                                      (1u << ADDR_SW_64KB_Z_X) |
+                                      (1u << ADDR_SW_64KB_S_X) |
+                                      (1u << ADDR_SW_64KB_D_X) |
+                                      (1u << ADDR_SW_64KB_R_X);
+
+const UINT_32 Gfx9BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z)   |
+                                     (1u << ADDR_SW_VAR_S)   |
+                                     (1u << ADDR_SW_VAR_D)   |
+                                     (1u << ADDR_SW_VAR_R)   |
+                                     (1u << ADDR_SW_VAR_Z_X) |
+                                     (1u << ADDR_SW_VAR_S_X) |
+                                     (1u << ADDR_SW_VAR_D_X) |
+                                     (1u << ADDR_SW_VAR_R_X);
+
+const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z)    |
+                                (1u << ADDR_SW_64KB_Z)   |
+                                (1u << ADDR_SW_VAR_Z)    |
+                                (1u << ADDR_SW_64KB_Z_T) |
+                                (1u << ADDR_SW_4KB_Z_X)  |
+                                (1u << ADDR_SW_64KB_Z_X) |
+                                (1u << ADDR_SW_VAR_Z_X);
+
+const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S)   |
+                                       (1u << ADDR_SW_4KB_S)    |
+                                       (1u << ADDR_SW_64KB_S)   |
+                                       (1u << ADDR_SW_VAR_S)    |
+                                       (1u << ADDR_SW_64KB_S_T) |
+                                       (1u << ADDR_SW_4KB_S_X)  |
+                                       (1u << ADDR_SW_64KB_S_X) |
+                                       (1u << ADDR_SW_VAR_S_X);
+
+const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D)   |
+                                      (1u << ADDR_SW_4KB_D)    |
+                                      (1u << ADDR_SW_64KB_D)   |
+                                      (1u << ADDR_SW_VAR_D)    |
+                                      (1u << ADDR_SW_64KB_D_T) |
+                                      (1u << ADDR_SW_4KB_D_X)  |
+                                      (1u << ADDR_SW_64KB_D_X) |
+                                      (1u << ADDR_SW_VAR_D_X);
+
+const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R)   |
+                                     (1u << ADDR_SW_4KB_R)    |
+                                     (1u << ADDR_SW_64KB_R)   |
+                                     (1u << ADDR_SW_VAR_R)    |
+                                     (1u << ADDR_SW_64KB_R_T) |
+                                     (1u << ADDR_SW_4KB_R_X)  |
+                                     (1u << ADDR_SW_64KB_R_X) |
+                                     (1u << ADDR_SW_VAR_R_X);
+
+const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X)  |
+                                (1u << ADDR_SW_4KB_S_X)  |
+                                (1u << ADDR_SW_4KB_D_X)  |
+                                (1u << ADDR_SW_4KB_R_X)  |
+                                (1u << ADDR_SW_64KB_Z_X) |
+                                (1u << ADDR_SW_64KB_S_X) |
+                                (1u << ADDR_SW_64KB_D_X) |
+                                (1u << ADDR_SW_64KB_R_X) |
+                                (1u << ADDR_SW_VAR_Z_X)  |
+                                (1u << ADDR_SW_VAR_S_X)  |
+                                (1u << ADDR_SW_VAR_D_X)  |
+                                (1u << ADDR_SW_VAR_R_X);
+
+const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) |
+                                (1u << ADDR_SW_64KB_S_T) |
+                                (1u << ADDR_SW_64KB_D_T) |
+                                (1u << ADDR_SW_64KB_R_T);
+
+const UINT_32 Gfx9XorSwModeMask = Gfx9XSwModeMask |
+                                  Gfx9TSwModeMask;
+
+const UINT_32 Gfx9AllSwModeMask = Gfx9LinearSwModeMask   |
+                                  Gfx9ZSwModeMask        |
+                                  Gfx9StandardSwModeMask |
+                                  Gfx9DisplaySwModeMask  |
+                                  Gfx9RotateSwModeMask;
+
+const UINT_32 Gfx9Rsrc1dSwModeMask = Gfx9LinearSwModeMask;
+
+const UINT_32 Gfx9Rsrc2dSwModeMask = Gfx9AllSwModeMask;
+
+const UINT_32 Gfx9Rsrc3dSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9RotateSwModeMask;
+
+const UINT_32 Gfx9Rsrc2dPrtSwModeMask = (Gfx9Blk4KBSwModeMask | Gfx9Blk64KBSwModeMask) & ~Gfx9XSwModeMask;
+
+const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwModeMask & ~Gfx9DisplaySwModeMask;
+
+const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask;
+
+const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask;
+
+const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR)   |
+                                        (1u << ADDR_SW_4KB_D)    |
+                                        (1u << ADDR_SW_4KB_R)    |
+                                        (1u << ADDR_SW_64KB_D)   |
+                                        (1u << ADDR_SW_64KB_R)   |
+                                        (1u << ADDR_SW_VAR_D)    |
+                                        (1u << ADDR_SW_VAR_R)    |
+                                        (1u << ADDR_SW_4KB_D_X)  |
+                                        (1u << ADDR_SW_4KB_R_X)  |
+                                        (1u << ADDR_SW_64KB_D_X) |
+                                        (1u << ADDR_SW_64KB_R_X) |
+                                        (1u << ADDR_SW_VAR_D_X)  |
+                                        (1u << ADDR_SW_VAR_R_X);
+
+const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
+                                     (1u << ADDR_SW_256B_R) |
+                                     Dce12NonBpp32SwModeMask;
+
+const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR)   |
+                                       (1u << ADDR_SW_4KB_S)    |
+                                       (1u << ADDR_SW_64KB_S)   |
+                                       (1u << ADDR_SW_VAR_S)    |
+                                       (1u << ADDR_SW_64KB_S_T) |
+                                       (1u << ADDR_SW_4KB_S_X)  |
+                                       (1u << ADDR_SW_64KB_S_X) |
+                                       (1u << ADDR_SW_VAR_S_X);
+
+const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D)    |
+                                    (1u << ADDR_SW_64KB_D)   |
+                                    (1u << ADDR_SW_VAR_D)    |
+                                    (1u << ADDR_SW_64KB_D_T) |
+                                    (1u << ADDR_SW_4KB_D_X)  |
+                                    (1u << ADDR_SW_64KB_D_X) |
+                                    (1u << ADDR_SW_VAR_D_X)  |
+                                    Dcn1NonBpp64SwModeMask;
+
+/**
+************************************************************************************************************************
+* @brief GFX9 meta equation parameters
+************************************************************************************************************************
+*/
+struct MetaEqParams
+{
+    UINT_32          maxMip;
+    UINT_32          elementBytesLog2;
+    UINT_32          numSamplesLog2;
+    ADDR2_META_FLAGS metaFlag;
+    Gfx9DataType     dataSurfaceType;
+    AddrSwizzleMode  swizzleMode;
+    AddrResourceType resourceType;
+    UINT_32          metaBlkWidthLog2;
+    UINT_32          metaBlkHeightLog2;
+    UINT_32          metaBlkDepthLog2;
+    UINT_32          compBlkWidthLog2;
+    UINT_32          compBlkHeightLog2;
+    UINT_32          compBlkDepthLog2;
+};
+
+/**
+************************************************************************************************************************
+* @brief This class is the GFX9 specific address library
+*        function set.
+************************************************************************************************************************
+*/
+class Gfx9Lib : public Lib
+{
+public:
+    /// Creates Gfx9Lib object
+    static Addr::Lib* CreateObj(const Client* pClient)
+    {
+        VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient);
+        return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
+    }
+
+    virtual BOOL_32 IsValidDisplaySwizzleMode(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+protected:
+    Gfx9Lib(const Client* pClient);
+    virtual ~Gfx9Lib();
+
+    virtual BOOL_32 HwlIsStandardSwizzle(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        return m_swizzleModeTable[swizzleMode].isStd ||
+               (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp);
+    }
+
+    virtual BOOL_32 HwlIsDisplaySwizzle(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp;
+    }
+
+    virtual BOOL_32 HwlIsThin(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        return ((IsTex2d(resourceType)  == TRUE) ||
+                ((IsTex3d(resourceType) == TRUE)                  &&
+                 (m_swizzleModeTable[swizzleMode].isZ   == FALSE) &&
+                 (m_swizzleModeTable[swizzleMode].isStd == FALSE)));
+    }
+
+    virtual BOOL_32 HwlIsThick(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode) const
+    {
+        return (IsTex3d(resourceType) &&
+                (m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd));
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
+        const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_INFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
+        const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_INFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR2_COMPUTE_DCCINFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
+        const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
+        const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+        ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*      pOut);
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
+        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT*      pOut);
+
+    virtual UINT_32 HwlGetEquationIndex(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2,
+        ADDR_EQUATION* pEquation) const;
+
+    // Get equation table pointer and number of equations
+    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
+    {
+        *ppEquationTable = m_equationTable;
+
+        return m_numEquations;
+    }
+
+    virtual BOOL_32 IsEquationSupported(
+        AddrResourceType rsrcType,
+        AddrSwizzleMode swMode,
+        UINT_32 elementBytesLog2) const;
+
+    UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
+    {
+        UINT_32 baseAlign;
+
+        if (IsXor(swizzleMode))
+        {
+            baseAlign = GetBlockSize(swizzleMode);
+        }
+        else
+        {
+            baseAlign = 256;
+        }
+
+        return baseAlign;
+    }
+
+    virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
+        const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
+        const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
+        ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
+        const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
+        ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
+        const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
+        ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
+         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
+         const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+         ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
+        const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) const;
+
+    // Initialize equation table
+    VOID InitEquationTable();
+
+    ADDR_E_RETURNCODE ComputeStereoInfo(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR2_COMPUTE_SURFACE_INFO_OUTPUT*      pOut,
+        UINT_32*                                pHeightAlign) const;
+
+    UINT_32 GetMipChainInfo(
+        AddrResourceType  resourceType,
+        AddrSwizzleMode   swizzleMode,
+        UINT_32           bpp,
+        UINT_32           mip0Width,
+        UINT_32           mip0Height,
+        UINT_32           mip0Depth,
+        UINT_32           blockWidth,
+        UINT_32           blockHeight,
+        UINT_32           blockDepth,
+        UINT_32           numMipLevel,
+        ADDR2_MIP_INFO*   pMipInfo) const;
+
+    VOID GetMetaMiptailInfo(
+        ADDR2_META_MIP_INFO*    pInfo,
+        Dim3d                   mipCoord,
+        UINT_32                 numMipInTail,
+        Dim3d*                  pMetaBlkDim) const;
+
+    Dim3d GetMipStartPos(
+        AddrResourceType  resourceType,
+        AddrSwizzleMode   swizzleMode,
+        UINT_32           width,
+        UINT_32           height,
+        UINT_32           depth,
+        UINT_32           blockWidth,
+        UINT_32           blockHeight,
+        UINT_32           blockDepth,
+        UINT_32           mipId,
+        UINT_32           log2ElementBytes,
+        UINT_32*          pMipTailBytesOffset) const;
+
+    AddrMajorMode GetMajorMode(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode,
+        UINT_32          mip0WidthInBlk,
+        UINT_32          mip0HeightInBlk,
+        UINT_32          mip0DepthInBlk) const
+    {
+        BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk);
+        BOOL_32 xMajor = (yMajor == FALSE);
+
+        if (IsThick(resourceType, swizzleMode))
+        {
+            yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk);
+            xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk);
+        }
+
+        AddrMajorMode majorMode;
+        if (xMajor)
+        {
+            majorMode = ADDR_MAJOR_X;
+        }
+        else if (yMajor)
+        {
+            majorMode = ADDR_MAJOR_Y;
+        }
+        else
+        {
+            majorMode = ADDR_MAJOR_Z;
+        }
+
+        return majorMode;
+    }
+
+    Dim3d GetDccCompressBlk(
+        AddrResourceType resourceType,
+        AddrSwizzleMode  swizzleMode,
+        UINT_32          bpp) const
+    {
+        UINT_32 index = Log2(bpp >> 3);
+        Dim3d   compressBlkDim;
+
+        if (IsThin(resourceType, swizzleMode))
+        {
+            compressBlkDim.w = Block256_2d[index].w;
+            compressBlkDim.h = Block256_2d[index].h;
+            compressBlkDim.d = 1;
+        }
+        else if (IsStandardSwizzle(resourceType, swizzleMode))
+        {
+            compressBlkDim = Block256_3dS[index];
+        }
+        else
+        {
+            compressBlkDim = Block256_3dZ[index];
+        }
+
+        return compressBlkDim;
+    }
+
+    static const UINT_32          MaxSeLog2      = 3;
+    static const UINT_32          MaxRbPerSeLog2 = 2;
+
+    static const Dim3d            Block256_3dS[MaxNumOfBpp];
+    static const Dim3d            Block256_3dZ[MaxNumOfBpp];
+
+    static const UINT_32          MipTailOffset256B[];
+
+    static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
+
+    // Max number of swizzle mode supported for equation
+    static const UINT_32    MaxSwMode = 32;
+    // Max number of resource type (2D/3D) supported for equation
+    static const UINT_32    MaxRsrcType = 2;
+    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
+    static const UINT_32    MaxElementBytesLog2  = 5;
+    // Almost all swizzle mode + resource type support equation
+    static const UINT_32    EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType;
+    // Equation table
+    ADDR_EQUATION           m_equationTable[EquationTableSize];
+
+    // Number of equation entries in the table
+    UINT_32                 m_numEquations;
+    // Equation lookup table according to bpp and tile index
+    UINT_32                 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2];
+
+    static const UINT_32    MaxCachedMetaEq = 2;
+
+private:
+    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
+
+    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
+
+    virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
+
+    VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
+
+    VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
+                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
+                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
+
+    VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
+                         UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
+                         UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
+                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
+
+    VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
+                         UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
+                         ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
+                         AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
+                         UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
+                         UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
+                         UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
+
+    const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
+
+    virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
+                        BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
+                        UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
+                        UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
+        const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        UINT_32*                                pMipmap0PaddedWidth,
+        UINT_32*                                pSlice0PaddedHeight,
+        ADDR2_MIP_INFO*                         pMipInfo = NULL) const;
+
+    static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet)
+    {
+        ADDR2_BLOCK_SET allowedBlockSet = {};
+
+        allowedBlockSet.micro     = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
+        allowedBlockSet.macro4KB  = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask)  ? TRUE : FALSE;
+        allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
+        allowedBlockSet.var       = (allowedSwModeSet.value & Gfx9BlkVarSwModeMask)  ? TRUE : FALSE;
+        allowedBlockSet.linear    = (allowedSwModeSet.value & Gfx9LinearSwModeMask)  ? TRUE : FALSE;
+
+        return allowedBlockSet;
+    }
+
+    static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
+    {
+        ADDR2_SWTYPE_SET allowedSwSet = {};
+
+        allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask)        ? TRUE : FALSE;
+        allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
+        allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask)  ? TRUE : FALSE;
+        allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask)   ? TRUE : FALSE;
+
+        return allowedSwSet;
+    }
+
+    Gfx9ChipSettings m_settings;
+
+    CoordEq      m_cachedMetaEq[MaxCachedMetaEq];
+    MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
+    UINT_32      m_metaEqOverrideIndex;
+};
+
+} // V2
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,2339 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  ciaddrlib.cpp
+* @brief Contains the implementation for the CiLib class.
+****************************************************************************************************
+*/
+
+#include "ciaddrlib.h"
+
+#include "si_gb_reg.h"
+
+#include "amdgpu_asic_addr.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+*   CiHwlInit
+*
+*   @brief
+*       Creates an CiLib object.
+*
+*   @return
+*       Returns an CiLib object pointer.
+****************************************************************************************************
+*/
+Lib* CiHwlInit(const Client* pClient)
+{
+    return V1::CiLib::CreateObj(pClient);
+}
+
+namespace V1
+{
+
+/**
+****************************************************************************************************
+*   Mask
+*
+*   @brief
+*       Gets a mask of "width"
+*   @return
+*       Bit mask
+****************************************************************************************************
+*/
+static UINT_64 Mask(
+    UINT_32 width)  ///< Width of bits
+{
+    UINT_64 ret;
+
+    if (width >= sizeof(UINT_64)*8)
+    {
+        ret = ~((UINT_64) 0);
+    }
+    else
+    {
+        return (((UINT_64) 1) << width) - 1;
+    }
+    return ret;
+}
+
+/**
+****************************************************************************************************
+*   GetBits
+*
+*   @brief
+*       Gets bits within a range of [msb, lsb]
+*   @return
+*       Bits of this range
+****************************************************************************************************
+*/
+static UINT_64 GetBits(
+    UINT_64 bits,   ///< Source bits
+    UINT_32 msb,    ///< Most signicant bit
+    UINT_32 lsb)    ///< Least signicant bit
+{
+    UINT_64 ret = 0;
+
+    if (msb >= lsb)
+    {
+        ret = (bits >> lsb) & (Mask(1 + msb - lsb));
+    }
+    return ret;
+}
+
+/**
+****************************************************************************************************
+*   RemoveBits
+*
+*   @brief
+*       Removes bits within the range of [msb, lsb]
+*   @return
+*       Modified bits
+****************************************************************************************************
+*/
+static UINT_64 RemoveBits(
+    UINT_64 bits,   ///< Source bits
+    UINT_32 msb,    ///< Most signicant bit
+    UINT_32 lsb)    ///< Least signicant bit
+{
+    UINT_64 ret = bits;
+
+    if (msb >= lsb)
+    {
+        ret = GetBits(bits, lsb - 1, 0) // low bits
+            | (GetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits
+    }
+    return ret;
+}
+
+/**
+****************************************************************************************************
+*   InsertBits
+*
+*   @brief
+*       Inserts new bits into the range of [msb, lsb]
+*   @return
+*       Modified bits
+****************************************************************************************************
+*/
+static UINT_64 InsertBits(
+    UINT_64 bits,       ///< Source bits
+    UINT_64 newBits,    ///< New bits to be inserted
+    UINT_32 msb,        ///< Most signicant bit
+    UINT_32 lsb)        ///< Least signicant bit
+{
+    UINT_64 ret = bits;
+
+    if (msb >= lsb)
+    {
+        ret = GetBits(bits, lsb - 1, 0) // old low bitss
+             | (GetBits(newBits, msb - lsb, 0) << lsb) //new bits
+             | (GetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits
+    }
+    return ret;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::CiLib
+*
+*   @brief
+*       Constructor
+*
+****************************************************************************************************
+*/
+CiLib::CiLib(const Client* pClient)
+    :
+    SiLib(pClient),
+    m_noOfMacroEntries(0),
+    m_allowNonDispThickModes(FALSE)
+{
+    m_class = CI_ADDRLIB;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::~CiLib
+*
+*   @brief
+*       Destructor
+****************************************************************************************************
+*/
+CiLib::~CiLib()
+{
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlComputeDccInfo(
+    const ADDR_COMPUTE_DCCINFO_INPUT*  pIn,
+    ADDR_COMPUTE_DCCINFO_OUTPUT*       pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode))
+    {
+        UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8;
+
+        ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff));
+
+        if (pIn->numSamples > 1)
+        {
+            UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight);
+            UINT_32 samplesPerSplit  = pIn->tileInfo.tileSplitBytes / tileSizePerSample;
+
+            if (samplesPerSplit < pIn->numSamples)
+            {
+                UINT_32 numSplits = pIn->numSamples / samplesPerSplit;
+                UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+                ADDR_ASSERT(IsPow2(fastClearBaseAlign));
+
+                dccFastClearSize /= numSplits;
+
+                if (0 != (dccFastClearSize & (fastClearBaseAlign - 1)))
+                {
+                    // Disable dcc fast clear
+                    // if key size of fisrt sample split is not pipe*interleave aligned
+                    dccFastClearSize = 0;
+                }
+            }
+        }
+
+        pOut->dccRamSize          = pIn->colorSurfSize >> 8;
+        pOut->dccRamBaseAlign     = pIn->tileInfo.banks *
+                                    HwlGetPipes(&pIn->tileInfo) *
+                                    m_pipeInterleaveBytes;
+        pOut->dccFastClearSize    = dccFastClearSize;
+        pOut->dccRamSizeAligned   = TRUE;
+
+        ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign));
+
+        if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1)))
+        {
+            pOut->subLvlCompressible = TRUE;
+        }
+        else
+        {
+            UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+            if (pOut->dccRamSize == pOut->dccFastClearSize)
+            {
+                pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+            }
+            if ((pOut->dccRamSize & (dccRamSizeAlign - 1)) != 0)
+            {
+                pOut->dccRamSizeAligned = FALSE;
+            }
+            pOut->dccRamSize          = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+            pOut->subLvlCompressible  = FALSE;
+        }
+    }
+    else
+    {
+        returnCode = ADDR_NOTSUPPORTED;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute tc compatible Cmask address from fmask ram address
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlComputeCmaskAddrFromCoord(
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] fmask addr/bpp/tile input
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] cmask address
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
+
+    if ((m_settings.isVolcanicIslands == TRUE) &&
+        (pIn->flags.tcCompatible == TRUE))
+    {
+        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
+        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
+        UINT_64 fmaskAddress = pIn->fmaskAddr;
+        UINT_32 elemBits     = pIn->bpp;
+        UINT_32 blockByte    = 64 * elemBits / 8;
+        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress,
+                                                                    0,
+                                                                    0,
+                                                                    4,   // cmask 4 bits
+                                                                    elemBits,
+                                                                    blockByte,
+                                                                    m_pipeInterleaveBytes,
+                                                                    numOfPipes,
+                                                                    numOfBanks,
+                                                                    1);
+        pOut->addr = (metaNibbleAddress >> 1);
+        pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0;
+        returnCode = ADDR_OK;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute tc compatible Htile address from depth/stencil address
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlComputeHtileAddrFromCoord(
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] depth/stencil addr/bpp/tile input
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] htile address
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
+
+    if ((m_settings.isVolcanicIslands == TRUE) &&
+        (pIn->flags.tcCompatible == TRUE))
+    {
+        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
+        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
+        UINT_64 zStencilAddr = pIn->zStencilAddr;
+        UINT_32 elemBits     = pIn->bpp;
+        UINT_32 blockByte    = 64 * elemBits / 8;
+        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(zStencilAddr,
+                                                                    0,
+                                                                    0,
+                                                                    32,  // htile 32 bits
+                                                                    elemBits,
+                                                                    blockByte,
+                                                                    m_pipeInterleaveBytes,
+                                                                    numOfPipes,
+                                                                    numOfBanks,
+                                                                    1);
+        pOut->addr = (metaNibbleAddress >> 1);
+        pOut->bitPosition = 0;
+        returnCode = ADDR_OK;
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlConvertChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*       ChipFamily
+****************************************************************************************************
+*/
+ChipFamily CiLib::HwlConvertChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    ChipFamily family = ADDR_CHIP_FAMILY_CI;
+
+    switch (uChipFamily)
+    {
+        case FAMILY_CI:
+            m_settings.isSeaIsland  = 1;
+            m_settings.isBonaire    = ASICREV_IS_BONAIRE_M(uChipRevision);
+            m_settings.isHawaii     = ASICREV_IS_HAWAII_P(uChipRevision);
+            break;
+        case FAMILY_KV:
+            m_settings.isKaveri     = 1;
+            m_settings.isSpectre    = ASICREV_IS_SPECTRE(uChipRevision);
+            m_settings.isSpooky     = ASICREV_IS_SPOOKY(uChipRevision);
+            m_settings.isKalindi    = ASICREV_IS_KALINDI(uChipRevision);
+            break;
+        case FAMILY_VI:
+            m_settings.isVolcanicIslands = 1;
+            m_settings.isIceland         = ASICREV_IS_ICELAND_M(uChipRevision);
+            m_settings.isTonga           = ASICREV_IS_TONGA_P(uChipRevision);
+            m_settings.isFiji            = ASICREV_IS_FIJI_P(uChipRevision);
+            m_settings.isPolaris10       = ASICREV_IS_POLARIS10_P(uChipRevision);
+            m_settings.isPolaris11       = ASICREV_IS_POLARIS11_M(uChipRevision);
+            m_settings.isPolaris12       = ASICREV_IS_POLARIS12_V(uChipRevision);
+            m_settings.isVegaM           = ASICREV_IS_VEGAM_P(uChipRevision);
+            family = ADDR_CHIP_FAMILY_VI;
+            break;
+        case FAMILY_CZ:
+            m_settings.isCarrizo         = 1;
+            m_settings.isVolcanicIslands = 1;
+            family = ADDR_CHIP_FAMILY_VI;
+            break;
+        default:
+            ADDR_ASSERT(!"This should be a unexpected Fusion");
+            break;
+    }
+
+    return family;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlInitGlobalParams
+*
+*   @brief
+*       Initializes global parameters
+*
+*   @return
+*       TRUE if all settings are valid
+*
+****************************************************************************************************
+*/
+BOOL_32 CiLib::HwlInitGlobalParams(
+    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+    BOOL_32  valid = TRUE;
+
+    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+    valid = DecodeGbRegs(pRegValue);
+
+    // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should
+    // read the correct pipes from tile mode table
+    if (m_settings.isHawaii)
+    {
+        m_pipes = 16;
+    }
+    else if (m_settings.isBonaire || m_settings.isSpectre)
+    {
+        m_pipes = 4;
+    }
+    else // Treat other KV asics to be 2-pipe
+    {
+        m_pipes = 2;
+    }
+
+    // @todo: VI
+    // Move this to VI code path once created
+    if (m_settings.isTonga || m_settings.isPolaris10)
+    {
+        m_pipes = 8;
+    }
+    else if (m_settings.isIceland)
+    {
+        m_pipes = 2;
+    }
+    else if (m_settings.isFiji)
+    {
+        m_pipes = 16;
+    }
+    else if (m_settings.isPolaris11 || m_settings.isPolaris12)
+    {
+        m_pipes = 4;
+    }
+    else if (m_settings.isVegaM)
+    {
+        m_pipes = 16;
+    }
+
+    if (valid)
+    {
+        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+    }
+    if (valid)
+    {
+        valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries);
+    }
+
+    if (valid)
+    {
+        InitEquationTable();
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlPostCheckTileIndex
+*
+*   @brief
+*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+*       tile mode/type/info and change the index if needed
+*   @return
+*       Tile index.
+****************************************************************************************************
+*/
+INT_32 CiLib::HwlPostCheckTileIndex(
+    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
+    AddrTileMode         mode,      ///< [in] Tile mode
+    AddrTileType         type,      ///< [in] Tile type
+    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
+    ) const
+{
+    INT_32 index = curIndex;
+
+    if (mode == ADDR_TM_LINEAR_GENERAL)
+    {
+        index = TileIndexLinearGeneral;
+    }
+    else
+    {
+        BOOL_32 macroTiled = IsMacroTiled(mode);
+
+        // We need to find a new index if either of them is true
+        // 1. curIndex is invalid
+        // 2. tile mode is changed
+        // 3. tile info does not match for macro tiled
+        if ((index == TileIndexInvalid)         ||
+            (mode != m_tileTable[index].mode)   ||
+            (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig))
+        {
+            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
+            {
+                if (macroTiled)
+                {
+                    // macro tile modes need all to match
+                    if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) &&
+                        (mode == m_tileTable[index].mode) &&
+                        (type == m_tileTable[index].type))
+                    {
+                        // tileSplitBytes stored in m_tileTable is only valid for depth entries
+                        if (type == ADDR_DEPTH_SAMPLE_ORDER)
+                        {
+                            if (Min(m_tileTable[index].info.tileSplitBytes,
+                                    m_rowSize) == pInfo->tileSplitBytes)
+                            {
+                                break;
+                            }
+                        }
+                        else // other entries are determined by other 3 fields
+                        {
+                            break;
+                        }
+                    }
+                }
+                else if (mode == ADDR_TM_LINEAR_ALIGNED)
+                {
+                    // linear mode only needs tile mode to match
+                    if (mode == m_tileTable[index].mode)
+                    {
+                        break;
+                    }
+                }
+                else
+                {
+                    // micro tile modes only need tile mode and tile type to match
+                    if (mode == m_tileTable[index].mode &&
+                        type == m_tileTable[index].type)
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
+
+    if (index >= static_cast<INT_32>(m_noOfEntries))
+    {
+        index = TileIndexInvalid;
+    }
+
+    return index;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg(
+    UINT_32         bpp,            ///< Bits per pixel
+    INT_32          index,          ///< Tile index
+    INT_32          macroModeIndex, ///< Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
+    AddrTileMode*   pMode,          ///< [out] Tile mode
+    AddrTileType*   pType           ///< [out] Tile type
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    // Global flag to control usage of tileIndex
+    if (UseTileIndex(index))
+    {
+        if (index == TileIndexLinearGeneral)
+        {
+            pInfo->banks = 2;
+            pInfo->bankWidth = 1;
+            pInfo->bankHeight = 1;
+            pInfo->macroAspectRatio = 1;
+            pInfo->tileSplitBytes = 64;
+            pInfo->pipeConfig = ADDR_PIPECFG_P2;
+        }
+        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+        else
+        {
+            const TileConfig* pCfgTable = GetTileSetting(index);
+
+            if (pInfo != NULL)
+            {
+                if (IsMacroTiled(pCfgTable->mode))
+                {
+                    ADDR_ASSERT((macroModeIndex != TileIndexInvalid) &&
+                                (macroModeIndex != TileIndexNoMacroIndex));
+
+                    UINT_32 tileSplit;
+
+                    *pInfo = m_macroTileTable[macroModeIndex];
+
+                    if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER)
+                    {
+                        tileSplit = pCfgTable->info.tileSplitBytes;
+                    }
+                    else
+                    {
+                        if (bpp > 0)
+                        {
+                            UINT_32 thickness = Thickness(pCfgTable->mode);
+                            UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
+                            // Non-depth entries store a split factor
+                            UINT_32 sampleSplit = m_tileTable[index].info.tileSplitBytes;
+                            tileSplit = Max(256u, sampleSplit * tileBytes1x);
+                        }
+                        else
+                        {
+                            // Return tileBytes instead if not enough info
+                            tileSplit = pInfo->tileSplitBytes;
+                        }
+                    }
+
+                    // Clamp to row_size
+                    pInfo->tileSplitBytes = Min(m_rowSize, tileSplit);
+
+                    pInfo->pipeConfig = pCfgTable->info.pipeConfig;
+                }
+                else // 1D and linear modes, we return default value stored in table
+                {
+                    *pInfo = pCfgTable->info;
+                }
+            }
+
+            if (pMode != NULL)
+            {
+                *pMode = pCfgTable->mode;
+            }
+
+            if (pType != NULL)
+            {
+                *pType = pCfgTable->type;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeSurfaceInfo
+*
+*   @brief
+*       Entry of CI's ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    // If tileIndex is invalid, force macroModeIndex to be invalid, too
+    if (pIn->tileIndex == TileIndexInvalid)
+    {
+        pOut->macroModeIndex = TileIndexInvalid;
+    }
+
+    ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut);
+
+    if ((pIn->mipLevel > 0) &&
+        (pOut->tcCompatible == TRUE) &&
+        (pOut->tileMode != pIn->tileMode) &&
+        (m_settings.isVolcanicIslands == TRUE))
+    {
+        pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut);
+    }
+
+    if (pOut->macroModeIndex == TileIndexNoMacroIndex)
+    {
+        pOut->macroModeIndex = TileIndexInvalid;
+    }
+
+    if ((pIn->flags.matchStencilTileCfg == TRUE) &&
+        (pIn->flags.depth == TRUE))
+    {
+        pOut->stencilTileIdx = TileIndexInvalid;
+
+        if ((MinDepth2DThinIndex <= pOut->tileIndex) &&
+            (MaxDepth2DThinIndex >= pOut->tileIndex))
+        {
+            BOOL_32 depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
+
+            if ((depthStencil2DTileConfigMatch == FALSE) &&
+                (pOut->tcCompatible == TRUE))
+            {
+                pOut->macroModeIndex = TileIndexInvalid;
+
+                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+                localIn.tileIndex = TileIndexInvalid;
+                localIn.pTileInfo = NULL;
+                localIn.flags.tcCompatible = FALSE;
+
+                SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
+
+                ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex));
+
+                depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut);
+            }
+
+            if ((depthStencil2DTileConfigMatch == FALSE) &&
+                (pIn->numSamples <= 1))
+            {
+                pOut->macroModeIndex = TileIndexInvalid;
+
+                ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+                localIn.tileMode = ADDR_TM_1D_TILED_THIN1;
+                localIn.tileIndex = TileIndexInvalid;
+                localIn.pTileInfo = NULL;
+
+                retCode = SiLib::HwlComputeSurfaceInfo(&localIn, pOut);
+            }
+        }
+
+        if (pOut->tileIndex == Depth1DThinIndex)
+        {
+            pOut->stencilTileIdx = Depth1DThinIndex;
+        }
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlFmaskSurfaceInfo
+*   @brief
+*       Entry of r800's ComputeFmaskInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE CiLib::HwlComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_TILEINFO tileInfo = {0};
+    ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn;
+    fmaskIn = *pIn;
+
+    AddrTileMode tileMode = pIn->tileMode;
+
+    // Use internal tile info if pOut does not have a valid pTileInfo
+    if (pOut->pTileInfo == NULL)
+    {
+        pOut->pTileInfo = &tileInfo;
+    }
+
+    ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1     ||
+                tileMode == ADDR_TM_3D_TILED_THIN1     ||
+                tileMode == ADDR_TM_PRT_TILED_THIN1    ||
+                tileMode == ADDR_TM_PRT_2D_TILED_THIN1 ||
+                tileMode == ADDR_TM_PRT_3D_TILED_THIN1);
+
+    ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1);
+    ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1);
+
+    // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable
+    INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15;
+    ADDR_SURFACE_FLAGS flags = {{0}};
+    flags.fmask = 1;
+
+    INT_32 macroModeIndex = TileIndexInvalid;
+
+    UINT_32 numSamples = pIn->numSamples;
+    UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags;
+
+    UINT_32 bpp = QLog2(numFrags);
+
+    // EQAA needs one more bit
+    if (numSamples > numFrags)
+    {
+        bpp++;
+    }
+
+    if (bpp == 3)
+    {
+        bpp = 4;
+    }
+
+    bpp = Max(8u, bpp * numSamples);
+
+    macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo);
+
+    fmaskIn.tileIndex = tileIndex;
+    fmaskIn.pTileInfo = pOut->pTileInfo;
+    pOut->macroModeIndex = macroModeIndex;
+    pOut->tileIndex = tileIndex;
+
+    retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->tileIndex =
+            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+                                  pOut->tileIndex);
+    }
+
+    // Resets pTileInfo to NULL if the internal tile info is used
+    if (pOut->pTileInfo == &tileInfo)
+    {
+        pOut->pTileInfo = NULL;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlFmaskPreThunkSurfInfo
+*
+*   @brief
+*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+VOID CiLib::HwlFmaskPreThunkSurfInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
+    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
+    ) const
+{
+    pSurfIn->tileIndex = pFmaskIn->tileIndex;
+    pSurfOut->macroModeIndex  = pFmaskOut->macroModeIndex;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlFmaskPostThunkSurfInfo
+*
+*   @brief
+*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+VOID CiLib::HwlFmaskPostThunkSurfInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
+    ) const
+{
+    pFmaskOut->tileIndex = pSurfOut->tileIndex;
+    pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+****************************************************************************************************
+*/
+AddrTileMode CiLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
+    ) const
+{
+    return baseTileMode;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlOptimizeTileMode
+*
+*   @brief
+*       Optimize tile mode on CI
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlOptimizeTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+
+    // Override 2D/3D macro tile mode to PRT_* tile mode if
+    // client driver requests this surface is equation compatible
+    if (IsMacroTiled(tileMode) == TRUE)
+    {
+        if ((pInOut->flags.needEquation == TRUE) &&
+            (pInOut->numSamples <= 1) &&
+            (IsPrtTileMode(tileMode) == FALSE))
+        {
+            if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K)))
+            {
+                UINT_32 thickness = Thickness(tileMode);
+
+                if (thickness == 1)
+                {
+                    tileMode = ADDR_TM_PRT_TILED_THIN1;
+                }
+                else
+                {
+                    static const UINT_32 PrtTileBytes = 0x10000;
+                    // First prt thick tile index in the tile mode table
+                    static const UINT_32 PrtThickTileIndex = 22;
+                    ADDR_TILEINFO tileInfo = {0};
+
+                    HwlComputeMacroModeIndex(PrtThickTileIndex,
+                                             pInOut->flags,
+                                             pInOut->bpp,
+                                             pInOut->numSamples,
+                                             &tileInfo);
+
+                    UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples *
+                                             thickness * HwlGetPipes(&tileInfo) *
+                                             tileInfo.banks * tileInfo.bankWidth *
+                                             tileInfo.bankHeight;
+
+                    if (macroTileBytes <= PrtTileBytes)
+                    {
+                        tileMode = ADDR_TM_PRT_TILED_THICK;
+                    }
+                    else
+                    {
+                        tileMode = ADDR_TM_PRT_TILED_THIN1;
+                    }
+                }
+            }
+        }
+
+        if (pInOut->maxBaseAlign != 0)
+        {
+            pInOut->flags.dccPipeWorkaround = FALSE;
+        }
+    }
+
+    if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode = tileMode;
+    }
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlOverrideTileMode
+*
+*   @brief
+*       Override THICK to THIN, for specific formats on CI
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlOverrideTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+    AddrTileType tileType = pInOut->tileType;
+
+    // currently, all CI/VI family do not
+    // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and
+    // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1
+    switch (tileMode)
+    {
+        case ADDR_TM_PRT_2D_TILED_THICK:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            tileMode = ADDR_TM_PRT_TILED_THICK;
+            break;
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+            tileMode = ADDR_TM_PRT_TILED_THIN1;
+            break;
+        default:
+            break;
+    }
+
+    // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table
+    if (!m_settings.isBonaire)
+    {
+        UINT_32 thickness = Thickness(tileMode);
+
+        // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1)
+        if (thickness > 1)
+        {
+            switch (pInOut->format)
+            {
+                // tcpError("Thick micro tiling is not supported for format...
+                case ADDR_FMT_X24_8_32_FLOAT:
+                case ADDR_FMT_32_AS_8:
+                case ADDR_FMT_32_AS_8_8:
+                case ADDR_FMT_32_AS_32_32_32_32:
+
+                // packed formats
+                case ADDR_FMT_GB_GR:
+                case ADDR_FMT_BG_RG:
+                case ADDR_FMT_1_REVERSED:
+                case ADDR_FMT_1:
+                case ADDR_FMT_BC1:
+                case ADDR_FMT_BC2:
+                case ADDR_FMT_BC3:
+                case ADDR_FMT_BC4:
+                case ADDR_FMT_BC5:
+                case ADDR_FMT_BC6:
+                case ADDR_FMT_BC7:
+                    switch (tileMode)
+                    {
+                        case ADDR_TM_1D_TILED_THICK:
+                            tileMode = ADDR_TM_1D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_2D_TILED_XTHICK:
+                        case ADDR_TM_2D_TILED_THICK:
+                            tileMode = ADDR_TM_2D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_3D_TILED_XTHICK:
+                        case ADDR_TM_3D_TILED_THICK:
+                            tileMode = ADDR_TM_3D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_TILED_THICK:
+                            tileMode = ADDR_TM_PRT_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_2D_TILED_THICK:
+                            tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_3D_TILED_THICK:
+                            tileMode = ADDR_TM_PRT_3D_TILED_THIN1;
+                            break;
+
+                        default:
+                            break;
+
+                    }
+
+                    // Switch tile type from thick to thin
+                    if (tileMode != pInOut->tileMode)
+                    {
+                        // see tileIndex: 13-18
+                        tileType = ADDR_NON_DISPLAYABLE;
+                    }
+
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode = tileMode;
+        pInOut->tileType = tileType;
+    }
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlSelectTileMode
+*
+*   @brief
+*       Select tile modes.
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlSelectTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode;
+    AddrTileType tileType;
+
+    if (pInOut->flags.rotateDisplay)
+    {
+        tileMode = ADDR_TM_2D_TILED_THIN1;
+        tileType = ADDR_ROTATED;
+    }
+    else if (pInOut->flags.volume)
+    {
+        BOOL_32 bThin = (m_settings.isBonaire == TRUE) ||
+                        ((m_allowNonDispThickModes == TRUE) && (pInOut->flags.color == TRUE));
+
+        if (pInOut->numSlices >= 8)
+        {
+            tileMode = ADDR_TM_2D_TILED_XTHICK;
+            tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
+        }
+        else if (pInOut->numSlices >= 4)
+        {
+            tileMode = ADDR_TM_2D_TILED_THICK;
+            tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
+        }
+        else
+        {
+            tileMode = ADDR_TM_2D_TILED_THIN1;
+            tileType = ADDR_NON_DISPLAYABLE;
+        }
+    }
+    else
+    {
+        tileMode = ADDR_TM_2D_TILED_THIN1;
+
+        if (pInOut->flags.depth || pInOut->flags.stencil)
+        {
+            tileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+        else if ((pInOut->bpp <= 32) ||
+                 (pInOut->flags.display == TRUE) ||
+                 (pInOut->flags.overlay == TRUE))
+        {
+            tileType = ADDR_DISPLAYABLE;
+        }
+        else
+        {
+            tileType = ADDR_NON_DISPLAYABLE;
+        }
+    }
+
+    if (pInOut->flags.prt)
+    {
+        if (Thickness(tileMode) > 1)
+        {
+            tileMode = ADDR_TM_PRT_TILED_THICK;
+            tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
+        }
+        else
+        {
+            tileMode = ADDR_TM_PRT_TILED_THIN1;
+        }
+    }
+
+    pInOut->tileMode = tileMode;
+    pInOut->tileType = tileType;
+
+    if ((pInOut->flags.dccCompatible == FALSE) &&
+        (pInOut->flags.tcCompatible == FALSE))
+    {
+        pInOut->flags.opt4Space = TRUE;
+        pInOut->maxBaseAlign = Block64K;
+    }
+
+    // Optimize tile mode if possible
+    OptimizeTileMode(pInOut);
+
+    HwlOverrideTileMode(pInOut);
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlSetPrtTileMode
+*
+*   @brief
+*       Set PRT tile mode.
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlSetPrtTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+    AddrTileType tileType = pInOut->tileType;
+
+    if (Thickness(tileMode) > 1)
+    {
+        tileMode = ADDR_TM_PRT_TILED_THICK;
+        tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK;
+    }
+    else
+    {
+        tileMode = ADDR_TM_PRT_TILED_THIN1;
+        tileType = (tileType == ADDR_THICK) ? ADDR_NON_DISPLAYABLE : tileType;
+    }
+
+    pInOut->tileMode = tileMode;
+    pInOut->tileType = tileType;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlSetupTileInfo
+*
+*   @brief
+*       Setup default value of tile info for SI
+****************************************************************************************************
+*/
+VOID CiLib::HwlSetupTileInfo(
+    AddrTileMode                        tileMode,       ///< [in] Tile mode
+    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
+    UINT_32                             bpp,            ///< [in] Bits per pixel
+    UINT_32                             pitch,          ///< [in] Pitch in pixels
+    UINT_32                             height,         ///< [in] Height in pixels
+    UINT_32                             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
+    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
+    AddrTileType                        inTileType,     ///< [in] Tile type
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
+    ) const
+{
+    UINT_32 thickness = Thickness(tileMode);
+    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+    INT index = TileIndexInvalid;
+    INT macroModeIndex = TileIndexInvalid;
+
+    // Fail-safe code
+    if (IsLinear(tileMode) == FALSE)
+    {
+        // Thick tile modes must use thick micro tile mode but Bonaire does not support due to
+        // old derived netlists (UBTS 404321)
+        if (thickness > 1)
+        {
+            if (m_settings.isBonaire)
+            {
+                inTileType = ADDR_NON_DISPLAYABLE;
+            }
+            else if ((m_allowNonDispThickModes == FALSE) ||
+                     (inTileType != ADDR_NON_DISPLAYABLE) ||
+                     // There is no PRT_THICK + THIN entry in tile mode table except Bonaire
+                     (IsPrtTileMode(tileMode) == TRUE))
+            {
+                inTileType = ADDR_THICK;
+            }
+        }
+        // 128 bpp tiling must be non-displayable.
+        // Fmask reuse color buffer's entry but bank-height field can be from another entry
+        // To simplify the logic, fmask entry should be picked from non-displayable ones
+        else if (bpp == 128 || flags.fmask)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+        // These two modes only have non-disp entries though they can be other micro tile modes
+        else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+
+        if (flags.depth || flags.stencil)
+        {
+            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+    }
+
+    // tcCompatible flag is only meaningful for gfx8.
+    if (m_settings.isVolcanicIslands == FALSE)
+    {
+        flags.tcCompatible = FALSE;
+    }
+
+    if (IsTileInfoAllZero(pTileInfo))
+    {
+        // See table entries 0-4
+        if (flags.depth || flags.stencil)
+        {
+            // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
+            UINT_32 tileSize = thickness * bpp * numSamples * 8;
+
+            // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
+            if (m_rowSize < tileSize)
+            {
+                flags.tcCompatible = FALSE;
+            }
+
+            if (flags.nonSplit | flags.tcCompatible | flags.needEquation)
+            {
+                // Texture readable depth surface should not be split
+                switch (tileSize)
+                {
+                    case 64:
+                        index = 0;
+                        break;
+                    case 128:
+                        index = 1;
+                        break;
+                    case 256:
+                        index = 2;
+                        break;
+                    case 512:
+                        index = 3;
+                        break;
+                    default:
+                        index = 4;
+                        break;
+                }
+            }
+            else
+            {
+                // Depth and stencil need to use the same index, thus the pre-defined tile_split
+                // can meet the requirement to choose the same macro mode index
+                // uncompressed depth/stencil are not supported for now
+                switch (numSamples)
+                {
+                    case 1:
+                        index = 0;
+                        break;
+                    case 2:
+                    case 4:
+                        index = 1;
+                        break;
+                    case 8:
+                        index = 2;
+                        break;
+                    default:
+                        break;
+                }
+            }
+        }
+
+        // See table entries 5-6
+        if (inTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 5;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 6;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 8-12
+        if (inTileType == ADDR_DISPLAYABLE)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 9;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 10;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 11;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 13-18
+        if (inTileType == ADDR_NON_DISPLAYABLE)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 13;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 14;
+                    break;
+                case ADDR_TM_3D_TILED_THIN1:
+                    index = 15;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 16;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 19-26
+        if (thickness > 1)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THICK:
+                    // special check for bonaire, for the compatablity between old KMD and new UMD
+                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18;
+                    break;
+                case ADDR_TM_2D_TILED_THICK:
+                    // special check for bonaire, for the compatablity between old KMD and new UMD
+                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24;
+                    break;
+                case ADDR_TM_3D_TILED_THICK:
+                    index = 21;
+                    break;
+                case ADDR_TM_PRT_TILED_THICK:
+                    index = 22;
+                    break;
+                case ADDR_TM_2D_TILED_XTHICK:
+                    index = 25;
+                    break;
+                case ADDR_TM_3D_TILED_XTHICK:
+                    index = 26;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 27-30
+        if (inTileType == ADDR_ROTATED)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 27;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 28;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 29;
+                    break;
+                case ADDR_TM_PRT_2D_TILED_THIN1:
+                    index = 30;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        if (m_pipes >= 8)
+        {
+            ADDR_ASSERT((index + 1) < static_cast<INT_32>(m_noOfEntries));
+            // Only do this when tile mode table is updated.
+            if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) &&
+                (m_tileTable[index + 1].mode == tileMode))
+            {
+                static const UINT_32 PrtTileBytes = 0x10000;
+                ADDR_TILEINFO tileInfo = {0};
+
+                HwlComputeMacroModeIndex(index, flags, bpp, numSamples, &tileInfo);
+
+                UINT_32 macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness *
+                                         HwlGetPipes(&tileInfo) * tileInfo.banks *
+                                         tileInfo.bankWidth * tileInfo.bankHeight;
+
+                if (macroTileBytes != PrtTileBytes)
+                {
+                    // Switching to next tile mode entry to make sure macro tile size is 64KB
+                    index += 1;
+
+                    tileInfo.pipeConfig = m_tileTable[index].info.pipeConfig;
+
+                    macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness *
+                                     HwlGetPipes(&tileInfo) * tileInfo.banks *
+                                     tileInfo.bankWidth * tileInfo.bankHeight;
+
+                    ADDR_ASSERT(macroTileBytes == PrtTileBytes);
+
+                    flags.tcCompatible = FALSE;
+                    pOut->dccUnsupport = TRUE;
+                }
+            }
+        }
+    }
+    else
+    {
+        // A pre-filled tile info is ready
+        index = pOut->tileIndex;
+        macroModeIndex = pOut->macroModeIndex;
+
+        // pass tile type back for post tile index compute
+        pOut->tileType = inTileType;
+
+        if (flags.depth || flags.stencil)
+        {
+            // tileSize = thickness * bpp * numSamples * 8 * 8 / 8
+            UINT_32 tileSize = thickness * bpp * numSamples * 8;
+
+            // Turn off tc compatible if row_size is smaller than tile size (tile split occurs).
+            if (m_rowSize < tileSize)
+            {
+                flags.tcCompatible = FALSE;
+            }
+        }
+
+        UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+
+        if (m_pipes != numPipes)
+        {
+            pOut->dccUnsupport = TRUE;
+        }
+    }
+
+    // We only need to set up tile info if there is a valid index but macroModeIndex is invalid
+    if ((index != TileIndexInvalid) && (macroModeIndex == TileIndexInvalid))
+    {
+        macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo);
+
+        // Copy to pOut->tileType/tileIndex/macroModeIndex
+        pOut->tileIndex = index;
+        pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea
+        pOut->macroModeIndex = macroModeIndex;
+    }
+    else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+    {
+        pOut->tileIndex = TileIndexLinearGeneral;
+
+        // Copy linear-aligned entry??
+        *pTileInfo = m_tileTable[8].info;
+    }
+    else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        pOut->tileIndex = 8;
+        *pTileInfo = m_tileTable[8].info;
+    }
+
+    if (flags.tcCompatible)
+    {
+        flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut);
+    }
+
+    pOut->tcCompatible = flags.tcCompatible;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::ReadGbTileMode
+*
+*   @brief
+*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
+****************************************************************************************************
+*/
+VOID CiLib::ReadGbTileMode(
+    UINT_32       regValue,   ///< [in] GB_TILE_MODE register
+    TileConfig*   pCfg        ///< [out] output structure
+    ) const
+{
+    GB_TILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode_new);
+    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
+
+    if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+    }
+    else
+    {
+        pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split;
+    }
+
+    UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
+
+    switch (regArrayMode)
+    {
+        case 5:
+            pCfg->mode = ADDR_TM_PRT_TILED_THIN1;
+            break;
+        case 6:
+            pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1;
+            break;
+        case 8:
+            pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+            break;
+        case 9:
+            pCfg->mode = ADDR_TM_PRT_TILED_THICK;
+            break;
+        case 0xa:
+            pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK;
+            break;
+        case 0xb:
+            pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1;
+            break;
+        case 0xe:
+            pCfg->mode = ADDR_TM_3D_TILED_XTHICK;
+            break;
+        case 0xf:
+            pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK;
+            break;
+        default:
+            break;
+    }
+
+    // Fail-safe code for these always convert tile info, as the non-macro modes
+    // return the entry of tile mode table directly without looking up macro mode table
+    if (!IsMacroTiled(pCfg->mode))
+    {
+        pCfg->info.banks = 2;
+        pCfg->info.bankWidth = 1;
+        pCfg->info.bankHeight = 1;
+        pCfg->info.macroAspectRatio = 1;
+        pCfg->info.tileSplitBytes = 64;
+    }
+}
+
+/**
+****************************************************************************************************
+*   CiLib::InitTileSettingTable
+*
+*   @brief
+*       Initialize the ADDR_TILE_CONFIG table.
+*   @return
+*       TRUE if tile table is correctly initialized
+****************************************************************************************************
+*/
+BOOL_32 CiLib::InitTileSettingTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+    memset(m_tileTable, 0, sizeof(m_tileTable));
+
+    if (noOfEntries != 0)
+    {
+        m_noOfEntries = noOfEntries;
+    }
+    else
+    {
+        m_noOfEntries = TileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfEntries; i++)
+        {
+            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+
+    if (initOk)
+    {
+        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+
+        if (m_settings.isBonaire == FALSE)
+        {
+            // Check if entry 18 is "thick+thin" combination
+            if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) &&
+                (m_tileTable[18].type == ADDR_NON_DISPLAYABLE))
+            {
+                m_allowNonDispThickModes = TRUE;
+                ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK);
+            }
+        }
+        else
+        {
+            m_allowNonDispThickModes = TRUE;
+        }
+
+        // Assume the first entry is always programmed with full pipes
+        m_pipes = HwlGetPipes(&m_tileTable[0].info);
+    }
+
+    return initOk;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::ReadGbMacroTileCfg
+*
+*   @brief
+*       Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG.
+****************************************************************************************************
+*/
+VOID CiLib::ReadGbMacroTileCfg(
+    UINT_32             regValue,   ///< [in] GB_MACRO_TILE_MODE register
+    ADDR_TILEINFO*      pCfg        ///< [out] output structure
+    ) const
+{
+    GB_MACROTILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->bankHeight = 1 << gbTileMode.f.bank_height;
+    pCfg->bankWidth = 1 << gbTileMode.f.bank_width;
+    pCfg->banks = 1 << (gbTileMode.f.num_banks + 1);
+    pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::InitMacroTileCfgTable
+*
+*   @brief
+*       Initialize the ADDR_MACRO_TILE_CONFIG table.
+*   @return
+*       TRUE if macro tile table is correctly initialized
+****************************************************************************************************
+*/
+BOOL_32 CiLib::InitMacroTileCfgTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfMacroEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize);
+
+    memset(m_macroTileTable, 0, sizeof(m_macroTileTable));
+
+    if (noOfMacroEntries != 0)
+    {
+        m_noOfMacroEntries = noOfMacroEntries;
+    }
+    else
+    {
+        m_noOfMacroEntries = MacroTileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
+        {
+            ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]);
+
+            m_macroTileTable[i].tileSplitBytes = 64 << (i % 8);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+    return initOk;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeMacroModeIndex
+*
+*   @brief
+*       Computes macro tile mode index
+*   @return
+*       TRUE if macro tile table is correctly initialized
+****************************************************************************************************
+*/
+INT_32 CiLib::HwlComputeMacroModeIndex(
+    INT_32              tileIndex,      ///< [in] Tile mode index
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] Surface flags
+    UINT_32             bpp,            ///< [in] Bit per pixel
+    UINT_32             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*      pTileInfo,      ///< [out] Pointer to ADDR_TILEINFO
+    AddrTileMode*       pTileMode,      ///< [out] Pointer to AddrTileMode
+    AddrTileType*       pTileType       ///< [out] Pointer to AddrTileType
+    ) const
+{
+    INT_32 macroModeIndex = TileIndexInvalid;
+
+    AddrTileMode tileMode = m_tileTable[tileIndex].mode;
+    AddrTileType tileType = m_tileTable[tileIndex].type;
+    UINT_32 thickness = Thickness(tileMode);
+
+    if (!IsMacroTiled(tileMode))
+    {
+        *pTileInfo = m_tileTable[tileIndex].info;
+        macroModeIndex = TileIndexNoMacroIndex;
+    }
+    else
+    {
+        UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
+        UINT_32 tileSplit;
+
+        if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            // Depth entries store real tileSplitBytes
+            tileSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+        }
+        else
+        {
+            // Non-depth entries store a split factor
+            UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+            UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
+
+            tileSplit = colorTileSplit;
+        }
+
+        UINT_32 tileSplitC = Min(m_rowSize, tileSplit);
+        UINT_32 tileBytes;
+
+        if (flags.fmask)
+        {
+            tileBytes = Min(tileSplitC, tileBytes1x);
+        }
+        else
+        {
+            tileBytes = Min(tileSplitC, numSamples * tileBytes1x);
+        }
+
+        if (tileBytes < 64)
+        {
+            tileBytes = 64;
+        }
+
+        macroModeIndex = Log2(tileBytes / 64);
+
+        if (flags.prt || IsPrtTileMode(tileMode))
+        {
+            macroModeIndex += PrtMacroModeOffset;
+            *pTileInfo = m_macroTileTable[macroModeIndex];
+        }
+        else
+        {
+            *pTileInfo = m_macroTileTable[macroModeIndex];
+        }
+
+        pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig;
+
+        pTileInfo->tileSplitBytes = tileSplitC;
+    }
+
+    if (NULL != pTileMode)
+    {
+        *pTileMode = tileMode;
+    }
+
+    if (NULL != pTileType)
+    {
+        *pTileType = tileType;
+    }
+
+    return macroModeIndex;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+****************************************************************************************************
+*/
+VOID CiLib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(pTileInfo != NULL);
+
+    UINT_32 numTiles;
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P16_32x32_8x16:
+        case ADDR_PIPECFG_P16_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x64_32x32:
+        case ADDR_PIPECFG_P8_32x32_16x32:
+        case ADDR_PIPECFG_P8_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_8x16:
+        case ADDR_PIPECFG_P4_32x32:
+            numTiles = 8;
+            break;
+        default:
+            numTiles = 4;
+            break;
+    }
+
+    *pMacroWidth    = numTiles * MicroTileWidth;
+    *pMacroHeight   = numTiles * MicroTileHeight;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeMetadataNibbleAddress
+*
+*   @brief
+*        calculate meta data address based on input information
+*
+*   &parameter
+*        uncompressedDataByteAddress - address of a pixel in color surface
+*        dataBaseByteAddress         - base address of color surface
+*        metadataBaseByteAddress     - base address of meta ram
+*        metadataBitSize             - meta key size, 8 for DCC, 4 for cmask
+*        elementBitSize              - element size of color surface
+*        blockByteSize               - compression block size, 256 for DCC
+*        pipeInterleaveBytes         - pipe interleave size
+*        numOfPipes                  - number of pipes
+*        numOfBanks                  - number of banks
+*        numOfSamplesPerSplit        - number of samples per tile split
+*   @return
+*        meta data nibble address (nibble address is used to support DCC compatible cmask)
+*
+****************************************************************************************************
+*/
+UINT_64 CiLib::HwlComputeMetadataNibbleAddress(
+    UINT_64 uncompressedDataByteAddress,
+    UINT_64 dataBaseByteAddress,
+    UINT_64 metadataBaseByteAddress,
+    UINT_32 metadataBitSize,
+    UINT_32 elementBitSize,
+    UINT_32 blockByteSize,
+    UINT_32 pipeInterleaveBytes,
+    UINT_32 numOfPipes,
+    UINT_32 numOfBanks,
+    UINT_32 numOfSamplesPerSplit) const
+{
+    ///--------------------------------------------------------------------------------------------
+    /// Get pipe interleave, bank and pipe bits
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 pipeInterleaveBits  = Log2(pipeInterleaveBytes);
+    UINT_32 pipeBits            = Log2(numOfPipes);
+    UINT_32 bankBits            = Log2(numOfBanks);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Clear pipe and bank swizzles
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 dataMacrotileBits        = pipeInterleaveBits + pipeBits + bankBits;
+    UINT_32 metadataMacrotileBits    = pipeInterleaveBits + pipeBits + bankBits;
+
+    UINT_64 dataMacrotileClearMask     = ~((1L << dataMacrotileBits) - 1);
+    UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1);
+
+    UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask;
+    UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask;
+
+    ///--------------------------------------------------------------------------------------------
+    /// Modify metadata base before adding in so that when final address is divided by data ratio,
+    /// the base address returns to where it should be
+    ///--------------------------------------------------------------------------------------------
+    ADDR_ASSERT((0 != metadataBitSize));
+    UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 /
+                                  metadataBitSize;
+    UINT_64 offset = uncompressedDataByteAddress -
+                     dataBaseByteAddressNoSwizzle +
+                     metadataBaseShifted;
+
+    ///--------------------------------------------------------------------------------------------
+    /// Save bank data bits
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 lsb = pipeBits + pipeInterleaveBits;
+    UINT_32 msb = bankBits - 1 + lsb;
+
+    UINT_64 bankDataBits = GetBits(offset, msb, lsb);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Save pipe data bits
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits;
+    msb = pipeBits - 1 + lsb;
+
+    UINT_64 pipeDataBits = GetBits(offset, msb, lsb);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Remove pipe and bank bits
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits;
+    msb = dataMacrotileBits - 1;
+
+    UINT_64 offsetWithoutPipeBankBits = RemoveBits(offset, msb, lsb);
+
+    ADDR_ASSERT((0 != blockByteSize));
+    UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize;
+
+    UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit;
+    UINT_32 blocksInTile = tileSize / blockByteSize;
+
+    if (0 == blocksInTile)
+    {
+        lsb = 0;
+    }
+    else
+    {
+        lsb = Log2(blocksInTile);
+    }
+    msb = bankBits - 1 + lsb;
+
+    UINT_64 blockInBankpipeWithBankBits = InsertBits(blockInBankpipe, bankDataBits, msb, lsb);
+
+    /// NOTE *2 because we are converting to Nibble address in this step
+    UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8;
+
+    ///--------------------------------------------------------------------------------------------
+    /// Reinsert pipe bits back into the final address
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb.
+    msb = pipeBits - 1 + lsb;
+    UINT_64 metadataAddress = InsertBits(metaAddressInPipe, pipeDataBits, msb, lsb);
+
+    return metadataAddress;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeSurfaceAlignmentsMacroTiled
+*
+*   @brief
+*       Hardware layer function to compute alignment request for macro tile mode
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlComputeSurfaceAlignmentsMacroTiled(
+    AddrTileMode                      tileMode,           ///< [in] tile mode
+    UINT_32                           bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
+    UINT_32                           mipLevel,           ///< [in] mip level
+    UINT_32                           numSamples,         ///< [in] number of samples
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
+    ) const
+{
+    // This is to workaround a H/W limitation that DCC doesn't work when pipe config is switched to
+    // P4. In theory, all asics that have such switching should be patched but we now only know what
+    // to pad for Fiji.
+    if ((m_settings.isFiji == TRUE) &&
+        (flags.dccPipeWorkaround == TRUE) &&
+        (flags.prt == FALSE) &&
+        (mipLevel == 0) &&
+        (tileMode == ADDR_TM_PRT_TILED_THIN1) &&
+        (pOut->dccUnsupport == TRUE))
+    {
+        pOut->pitchAlign   = PowTwoAlign(pOut->pitchAlign, 256);
+        // In case the client still requests DCC usage.
+        pOut->dccUnsupport = FALSE;
+    }
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlPadDimensions
+*
+*   @brief
+*       Helper function to pad dimensions
+*
+****************************************************************************************************
+*/
+VOID CiLib::HwlPadDimensions(
+    AddrTileMode        tileMode,    ///< [in] tile mode
+    UINT_32             bpp,         ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
+    UINT_32             numSamples,  ///< [in] number of samples
+    ADDR_TILEINFO*      pTileInfo,   ///< [in] tile info
+    UINT_32             mipLevel,    ///< [in] mip level
+    UINT_32*            pPitch,      ///< [in,out] pitch in pixels
+    UINT_32*            pPitchAlign, ///< [in,out] pitch alignment
+    UINT_32             height,      ///< [in] height in pixels
+    UINT_32             heightAlign  ///< [in] height alignment
+    ) const
+{
+    if ((m_settings.isVolcanicIslands == TRUE) &&
+        (flags.dccCompatible == TRUE) &&
+        (numSamples > 1) &&
+        (mipLevel == 0) &&
+        (IsMacroTiled(tileMode) == TRUE))
+    {
+        UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight);
+        UINT_32 samplesPerSplit  = pTileInfo->tileSplitBytes / tileSizePerSample;
+
+        if (samplesPerSplit < numSamples)
+        {
+            UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256;
+            UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * height * bpp * samplesPerSplit);
+
+            ADDR_ASSERT(IsPow2(dccFastClearByteAlign));
+
+            if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1)))
+            {
+                UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign /
+                                                BITS_TO_BYTES(bpp) /
+                                                samplesPerSplit;
+                UINT_32 macroTilePixelAlign = (*pPitchAlign) * heightAlign;
+
+                if ((dccFastClearPixelAlign >= macroTilePixelAlign) &&
+                    ((dccFastClearPixelAlign % macroTilePixelAlign) == 0))
+                {
+                    UINT_32 dccFastClearPitchAlignInMacroTile =
+                        dccFastClearPixelAlign / macroTilePixelAlign;
+                    UINT_32 heightInMacroTile = height / heightAlign;
+
+                    while ((heightInMacroTile > 1) &&
+                           ((heightInMacroTile % 2) == 0) &&
+                           (dccFastClearPitchAlignInMacroTile > 1) &&
+                           ((dccFastClearPitchAlignInMacroTile % 2) == 0))
+                    {
+                        heightInMacroTile >>= 1;
+                        dccFastClearPitchAlignInMacroTile >>= 1;
+                    }
+
+                    UINT_32 dccFastClearPitchAlignInPixels =
+                        (*pPitchAlign) * dccFastClearPitchAlignInMacroTile;
+
+                    if (IsPow2(dccFastClearPitchAlignInPixels))
+                    {
+                        *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels);
+                    }
+                    else
+                    {
+                        *pPitch += (dccFastClearPitchAlignInPixels - 1);
+                        *pPitch /= dccFastClearPitchAlignInPixels;
+                        *pPitch *= dccFastClearPitchAlignInPixels;
+                    }
+
+                    *pPitchAlign = dccFastClearPitchAlignInPixels;
+                }
+            }
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeMaxBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments
+*   @return
+*       maximum alignments
+****************************************************************************************************
+*/
+UINT_32 CiLib::HwlComputeMaxBaseAlignments() const
+{
+    const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
+
+    // Initial size is 64 KiB for PRT.
+    UINT_32 maxBaseAlign = 64 * 1024;
+
+    for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
+    {
+        // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
+        UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes;
+
+        UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks *
+                            m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight;
+
+        if (baseAlign > maxBaseAlign)
+        {
+            maxBaseAlign = baseAlign;
+        }
+    }
+
+    return maxBaseAlign;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::HwlComputeMaxMetaBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments for metadata
+*   @return
+*       maximum alignments for metadata
+****************************************************************************************************
+*/
+UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const
+{
+    UINT_32 maxBank = 1;
+
+    for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
+    {
+        if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode))
+        {
+            maxBank = Max(maxBank, m_macroTileTable[i].banks);
+        }
+    }
+
+    return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::DepthStencilTileCfgMatch
+*
+*   @brief
+*       Try to find a tile index for stencil which makes its tile config parameters matches to depth
+*   @return
+*       TRUE if such tile index for stencil can be found
+****************************************************************************************************
+*/
+BOOL_32 CiLib::DepthStencilTileCfgMatch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    BOOL_32 depthStencil2DTileConfigMatch = FALSE;
+
+    for (INT_32 stencilTileIndex = MinDepth2DThinIndex;
+         stencilTileIndex <= MaxDepth2DThinIndex;
+         stencilTileIndex++)
+    {
+        ADDR_TILEINFO tileInfo = {0};
+        INT_32 stencilMacroIndex = HwlComputeMacroModeIndex(stencilTileIndex,
+                                                            pIn->flags,
+                                                            8,
+                                                            pIn->numSamples,
+                                                            &tileInfo);
+
+        if (stencilMacroIndex != TileIndexNoMacroIndex)
+        {
+            if ((m_macroTileTable[stencilMacroIndex].banks ==
+                 m_macroTileTable[pOut->macroModeIndex].banks) &&
+                (m_macroTileTable[stencilMacroIndex].bankWidth ==
+                 m_macroTileTable[pOut->macroModeIndex].bankWidth) &&
+                (m_macroTileTable[stencilMacroIndex].bankHeight ==
+                 m_macroTileTable[pOut->macroModeIndex].bankHeight) &&
+                (m_macroTileTable[stencilMacroIndex].macroAspectRatio ==
+                 m_macroTileTable[pOut->macroModeIndex].macroAspectRatio) &&
+                (m_macroTileTable[stencilMacroIndex].pipeConfig ==
+                 m_macroTileTable[pOut->macroModeIndex].pipeConfig))
+            {
+                if ((pOut->tcCompatible == FALSE) ||
+                    (tileInfo.tileSplitBytes >= MicroTileWidth * MicroTileHeight * pIn->numSamples))
+                {
+                    depthStencil2DTileConfigMatch = TRUE;
+                    pOut->stencilTileIdx = stencilTileIndex;
+                    break;
+                }
+            }
+        }
+        else
+        {
+            ADDR_ASSERT_ALWAYS();
+        }
+    }
+
+    return depthStencil2DTileConfigMatch;
+}
+
+/**
+****************************************************************************************************
+*   CiLib::DepthStencilTileCfgMatch
+*
+*   @brief
+*       Check if tc compatibility is available
+*   @return
+*       If tc compatibility is not available
+****************************************************************************************************
+*/
+BOOL_32 CiLib::CheckTcCompatibility(
+    const ADDR_TILEINFO*                    pTileInfo,    ///< [in] input tile info
+    UINT_32                                 bpp,          ///< [in] Bits per pixel
+    AddrTileMode                            tileMode,     ///< [in] input tile mode
+    AddrTileType                            tileType,     ///< [in] input tile type
+    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut          ///< [in] output surf info
+    ) const
+{
+    BOOL_32 tcCompatible = TRUE;
+
+    if (IsMacroTiled(tileMode))
+    {
+        if (tileType != ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil
+            // tileSplit case was handled at tileIndex selecting time.
+            INT_32 tileIndex = pOut->tileIndex;
+
+            if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE))
+            {
+                tileIndex = HwlPostCheckTileIndex(pTileInfo, tileMode, tileType, tileIndex);
+            }
+
+            if (tileIndex != TileIndexInvalid)
+            {
+                UINT_32 thickness = Thickness(tileMode);
+
+                ADDR_ASSERT(static_cast<UINT_32>(tileIndex) < TileTableSize);
+                // Non-depth entries store a split factor
+                UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+                UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
+                UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
+
+                if (m_rowSize < colorTileSplit)
+                {
+                    tcCompatible = FALSE;
+                }
+            }
+        }
+    }
+    else
+    {
+        // Client should not enable tc compatible for linear and 1D tile modes.
+        tcCompatible = FALSE;
+    }
+
+    return tcCompatible;
+}
+
+} // V1
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  ciaddrlib.h
+* @brief Contains the CiLib class definition.
+****************************************************************************************************
+*/
+
+#ifndef __CI_ADDR_LIB_H__
+#define __CI_ADDR_LIB_H__
+
+#include "addrlib1.h"
+#include "siaddrlib.h"
+
+namespace Addr
+{
+namespace V1
+{
+
+/**
+****************************************************************************************************
+* @brief This class is the CI specific address library
+*        function set.
+****************************************************************************************************
+*/
+class CiLib : public SiLib
+{
+public:
+    /// Creates CiLib object
+    static Addr::Lib* CreateObj(const Client* pClient)
+    {
+        VOID* pMem = Object::ClientAlloc(sizeof(CiLib), pClient);
+        return (pMem != NULL) ? new (pMem) CiLib(pClient) : NULL;
+    }
+
+private:
+    CiLib(const Client* pClient);
+    virtual ~CiLib();
+
+protected:
+
+    // Hwl interface - defined in AddrLib1
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    virtual ChipFamily HwlConvertChipFamily(
+        UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    virtual BOOL_32 HwlInitGlobalParams(
+        const ADDR_CREATE_INPUT* pCreateIn);
+
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
+        AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual INT_32 HwlComputeMacroModeIndex(
+        INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+        ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
+        ) const;
+
+    // Sub-hwl interface - defined in EgBasedLib
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const;
+
+    virtual VOID HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+    virtual VOID HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    /// Overwrite tile setting to PRT
+    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
+        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*       pOut) const;
+
+    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
+
+    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
+
+    virtual VOID HwlPadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const;
+
+    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+private:
+
+    VOID ReadGbTileMode(
+        UINT_32 regValue, TileConfig* pCfg) const;
+
+    VOID ReadGbMacroTileCfg(
+        UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
+
+private:
+    BOOL_32 InitTileSettingTable(
+        const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+    BOOL_32 InitMacroTileCfgTable(
+        const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+    UINT_64 HwlComputeMetadataNibbleAddress(
+        UINT_64 uncompressedDataByteAddress,
+        UINT_64 dataBaseByteAddress,
+        UINT_64 metadataBaseByteAddress,
+        UINT_32 metadataBitSize,
+        UINT_32 elementBitSize,
+        UINT_32 blockByteSize,
+        UINT_32 pipeInterleaveBytes,
+        UINT_32 numOfPipes,
+        UINT_32 numOfBanks,
+        UINT_32 numOfSamplesPerSplit) const;
+
+    BOOL_32 DepthStencilTileCfgMatch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) const;
+
+    BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode,
+                                 AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    static const UINT_32    MacroTileTableSize = 16;
+    static const UINT_32    PrtMacroModeOffset = MacroTileTableSize / 2;
+    static const INT_32     MinDepth2DThinIndex = 0;
+    static const INT_32     MaxDepth2DThinIndex = 4;
+    static const INT_32     Depth1DThinIndex = 5;
+
+    ADDR_TILEINFO           m_macroTileTable[MacroTileTableSize];
+    UINT_32                 m_noOfMacroEntries;
+    BOOL_32                 m_allowNonDispThickModes;
+};
+
+} // V1
+} // Addr
+
+#endif
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,4156 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  egbaddrlib.cpp
+* @brief Contains the EgBasedLib class implementation.
+****************************************************************************************************
+*/
+
+#include "egbaddrlib.h"
+
+#include "util/macros.h"
+
+namespace Addr
+{
+namespace V1
+{
+
+/**
+****************************************************************************************************
+*   EgBasedLib::EgBasedLib
+*
+*   @brief
+*       Constructor
+*
+*   @note
+*
+****************************************************************************************************
+*/
+EgBasedLib::EgBasedLib(const Client* pClient)
+    :
+    Lib(pClient),
+    m_ranks(0),
+    m_logicalBanks(0),
+    m_bankInterleave(1)
+{
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::~EgBasedLib
+*
+*   @brief
+*       Destructor
+****************************************************************************************************
+*/
+EgBasedLib::~EgBasedLib()
+{
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::DispatchComputeSurfaceInfo
+*
+*   @brief
+*       Compute surface sizes include padded pitch,height,slices,total size in bytes,
+*       meanwhile output suitable tile mode and base alignment might be changed in this
+*       call as well. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    AddrTileMode        tileMode      = pIn->tileMode;
+    UINT_32             bpp           = pIn->bpp;
+    UINT_32             numSamples    = pIn->numSamples;
+    UINT_32             numFrags      = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    UINT_32             pitch         = pIn->width;
+    UINT_32             height        = pIn->height;
+    UINT_32             numSlices     = pIn->numSlices;
+    UINT_32             mipLevel      = pIn->mipLevel;
+    ADDR_SURFACE_FLAGS  flags         = pIn->flags;
+
+    ADDR_TILEINFO       tileInfoDef   = {0};
+    ADDR_TILEINFO*      pTileInfo     = &tileInfoDef;
+    UINT_32             padDims       = 0;
+    BOOL_32             valid;
+
+    if (pIn->flags.disallowLargeThickDegrade == 0)
+    {
+        tileMode = DegradeLargeThickTile(tileMode, bpp);
+    }
+
+    // Only override numSamples for NI above
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples) // This means EQAA
+        {
+            // The real surface size needed is determined by number of fragments
+            numSamples = numFrags;
+        }
+
+        // Save altered numSamples in pOut
+        pOut->numSamples = numSamples;
+    }
+
+    // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo
+    ADDR_ASSERT(pOut->pTileInfo);
+
+    if (pOut->pTileInfo != NULL)
+    {
+        pTileInfo = pOut->pTileInfo;
+    }
+
+    // Set default values
+    if (pIn->pTileInfo != NULL)
+    {
+        if (pTileInfo != pIn->pTileInfo)
+        {
+            *pTileInfo = *pIn->pTileInfo;
+        }
+    }
+    else
+    {
+        memset(pTileInfo, 0, sizeof(ADDR_TILEINFO));
+    }
+
+    // For macro tile mode, we should calculate default tiling parameters
+    HwlSetupTileInfo(tileMode,
+                     flags,
+                     bpp,
+                     pitch,
+                     height,
+                     numSamples,
+                     pIn->pTileInfo,
+                     pTileInfo,
+                     pIn->tileType,
+                     pOut);
+
+    if (flags.cube)
+    {
+        if (mipLevel == 0)
+        {
+            padDims = 2;
+        }
+
+        if (numSlices == 1)
+        {
+            // This is calculating one face, remove cube flag
+            flags.cube = 0;
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims);
+            break;
+
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode);
+            break;
+
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode);
+            break;
+
+        default:
+            valid = FALSE;
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceInfoLinear
+*
+*   @brief
+*       Compute linear surface sizes include padded pitch, height, slices, total size in
+*       bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode
+*       will not be changed here. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceInfoLinear(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,   ///< [out] Output structure
+    UINT_32                                 padDims ///< [in] Dimensions to padd
+    ) const
+{
+    UINT_32 expPitch = pIn->width;
+    UINT_32 expHeight = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    // No linear MSAA on real H/W, keep this for TGL
+    UINT_32 numSamples = pOut->numSamples;
+
+    const UINT_32 microTileThickness = 1;
+
+    //
+    // Compute the surface alignments.
+    //
+    ComputeSurfaceAlignmentsLinear(pIn->tileMode,
+                                   pIn->bpp,
+                                   pIn->flags,
+                                   &pOut->baseAlign,
+                                   &pOut->pitchAlign,
+                                   &pOut->heightAlign);
+
+    if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1))
+    {
+#if !ALT_TEST
+        // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch
+        // alignment since PITCH_TILE_MAX is in unit of 8 pixels.
+        // It is OK if it is accessed per line.
+        ADDR_ASSERT((pIn->width % 8) == 0);
+#endif
+    }
+
+    pOut->depthAlign = microTileThickness;
+
+    expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+
+    //
+    // Pad pitch and height to the required granularities.
+    //
+    PadDimensions(pIn->tileMode,
+                  pIn->bpp,
+                  pIn->flags,
+                  numSamples,
+                  pOut->pTileInfo,
+                  padDims,
+                  pIn->mipLevel,
+                  &expPitch, &pOut->pitchAlign,
+                  &expHeight, pOut->heightAlign,
+                  &expNumSlices, microTileThickness);
+
+    expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+
+    //
+    // Adjust per HWL
+    //
+
+    UINT_64 logicalSliceSize;
+
+    logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode,
+                                                  pIn->bpp,
+                                                  numSamples,
+                                                  pOut->baseAlign,
+                                                  pOut->pitchAlign,
+                                                  &expPitch,
+                                                  &expHeight,
+                                                  &pOut->heightAlign);
+
+    if ((pIn->pitchAlign != 0) || (pIn->heightAlign != 0))
+    {
+        if (pIn->pitchAlign != 0)
+        {
+           ADDR_ASSERT((pIn->pitchAlign % pOut->pitchAlign) == 0);
+           pOut->pitchAlign = pIn->pitchAlign;
+
+            if (IsPow2(pOut->pitchAlign))
+            {
+                expPitch = PowTwoAlign(expPitch, pOut->pitchAlign);
+            }
+            else
+            {
+                expPitch += pOut->pitchAlign - 1;
+                expPitch /= pOut->pitchAlign;
+                expPitch *= pOut->pitchAlign;
+            }
+        }
+
+        if (pIn->heightAlign != 0)
+        {
+           ADDR_ASSERT((pIn->heightAlign % pOut->heightAlign) == 0);
+           pOut->heightAlign = pIn->heightAlign;
+
+            if (IsPow2(pOut->heightAlign))
+            {
+                expHeight = PowTwoAlign(expHeight, pOut->heightAlign);
+            }
+            else
+            {
+                expHeight += pOut->heightAlign - 1;
+                expHeight /= pOut->heightAlign;
+                expHeight *= pOut->heightAlign;
+            }
+        }
+
+        logicalSliceSize = BITS_TO_BYTES(expPitch * expHeight * pIn->bpp);
+    }
+
+    pOut->pitch = expPitch;
+    pOut->height = expHeight;
+    pOut->depth = expNumSlices;
+
+    pOut->surfSize = logicalSliceSize * expNumSlices;
+
+    pOut->tileMode = pIn->tileMode;
+
+    return TRUE;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceInfoMicroTiled
+*
+*   @brief
+*       Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total
+*       size in bytes, meanwhile alignments as well. Results are returned through output
+*       parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
+    UINT_32                                 padDims,    ///< [in] Dimensions to padd
+    AddrTileMode                            expTileMode ///< [in] Expected tile mode
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    UINT_32 microTileThickness;
+    UINT_32 expPitch = pIn->width;
+    UINT_32 expHeight = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    // No 1D MSAA on real H/W, keep this for TGL
+    UINT_32 numSamples = pOut->numSamples;
+
+    //
+    // Compute the micro tile thickness.
+    //
+    microTileThickness = Thickness(expTileMode);
+
+    //
+    // Extra override for mip levels
+    //
+    if (pIn->mipLevel > 0)
+    {
+        //
+        // Reduce tiling mode from thick to thin if the number of slices is less than the
+        // micro tile thickness.
+        //
+        if ((expTileMode == ADDR_TM_1D_TILED_THICK) &&
+            (expNumSlices < ThickTileThickness))
+        {
+            expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL);
+            if (expTileMode != ADDR_TM_1D_TILED_THICK)
+            {
+                microTileThickness = 1;
+            }
+        }
+    }
+
+    //
+    // Compute the surface restrictions.
+    //
+    ComputeSurfaceAlignmentsMicroTiled(expTileMode,
+                                       pIn->bpp,
+                                       pIn->flags,
+                                       pIn->mipLevel,
+                                       numSamples,
+                                       &pOut->baseAlign,
+                                       &pOut->pitchAlign,
+                                       &pOut->heightAlign);
+
+    pOut->depthAlign = microTileThickness;
+
+    //
+    // Pad pitch and height to the required granularities.
+    // Compute surface size.
+    // Return parameters.
+    //
+    PadDimensions(expTileMode,
+                  pIn->bpp,
+                  pIn->flags,
+                  numSamples,
+                  pOut->pTileInfo,
+                  padDims,
+                  pIn->mipLevel,
+                  &expPitch, &pOut->pitchAlign,
+                  &expHeight, pOut->heightAlign,
+                  &expNumSlices, microTileThickness);
+
+    //
+    // Get HWL specific pitch adjustment
+    //
+    UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness,
+                                                              pIn->bpp,
+                                                              pIn->flags,
+                                                              numSamples,
+                                                              pOut->baseAlign,
+                                                              pOut->pitchAlign,
+                                                              &expPitch,
+                                                              &expHeight);
+
+    pOut->pitch = expPitch;
+    pOut->height = expHeight;
+    pOut->depth = expNumSlices;
+
+    pOut->surfSize = logicalSliceSize * expNumSlices;
+
+    pOut->tileMode = expTileMode;
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceInfoMacroTiled
+*
+*   @brief
+*       Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total
+*       size in bytes, meanwhile output suitable tile mode and alignments might be changed
+*       in this call as well. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceInfoMacroTiled(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
+    UINT_32                                 padDims,    ///< [in] Dimensions to padd
+    AddrTileMode                            expTileMode ///< [in] Expected tile mode
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    AddrTileMode origTileMode = expTileMode;
+    UINT_32 microTileThickness;
+
+    UINT_32 paddedPitch;
+    UINT_32 paddedHeight;
+    UINT_64 bytesPerSlice;
+
+    UINT_32 expPitch     = pIn->width;
+    UINT_32 expHeight    = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    UINT_32 numSamples = pOut->numSamples;
+
+    //
+    // Compute the surface restrictions as base
+    // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled
+    //
+    valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+                                               pIn->bpp,
+                                               pIn->flags,
+                                               pIn->mipLevel,
+                                               numSamples,
+                                               pOut);
+
+    if (valid)
+    {
+        //
+        // Compute the micro tile thickness.
+        //
+        microTileThickness = Thickness(expTileMode);
+
+        //
+        // Find the correct tiling mode for mip levels
+        //
+        if (pIn->mipLevel > 0)
+        {
+            //
+            // Try valid tile mode
+            //
+            expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
+                                                         pIn->bpp,
+                                                         expPitch,
+                                                         expHeight,
+                                                         expNumSlices,
+                                                         numSamples,
+                                                         pOut->blockWidth,
+                                                         pOut->blockHeight,
+                                                         pOut->pTileInfo);
+
+            if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled
+            {
+                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode);
+            }
+            else if (microTileThickness != Thickness(expTileMode))
+            {
+                //
+                // Re-compute if thickness changed since bank-height may be changed!
+                //
+                return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode);
+            }
+        }
+
+        paddedPitch     = expPitch;
+        paddedHeight    = expHeight;
+
+        //
+        // Re-cal alignment
+        //
+        if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled
+        {
+            valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+                                                       pIn->bpp,
+                                                       pIn->flags,
+                                                       pIn->mipLevel,
+                                                       numSamples,
+                                                       pOut);
+        }
+
+        //
+        // Do padding
+        //
+        PadDimensions(expTileMode,
+                      pIn->bpp,
+                      pIn->flags,
+                      numSamples,
+                      pOut->pTileInfo,
+                      padDims,
+                      pIn->mipLevel,
+                      &paddedPitch, &pOut->pitchAlign,
+                      &paddedHeight, pOut->heightAlign,
+                      &expNumSlices, microTileThickness);
+
+        if (pIn->flags.qbStereo &&
+            (pOut->pStereoInfo != NULL))
+        {
+            UINT_32 stereoHeightAlign = HwlStereoCheckRightOffsetPadding(pOut->pTileInfo);
+
+            if (stereoHeightAlign != 0)
+            {
+                paddedHeight = PowTwoAlign(paddedHeight, stereoHeightAlign);
+            }
+        }
+
+        if ((pIn->flags.needEquation == TRUE) &&
+            (m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
+            (pIn->numMipLevels > 1) &&
+            (pIn->mipLevel == 0))
+        {
+            BOOL_32 convertTo1D = FALSE;
+
+            ADDR_ASSERT(Thickness(expTileMode) == 1);
+
+            for (UINT_32 i = 1; i < pIn->numMipLevels; i++)
+            {
+                UINT_32 mipPitch = Max(1u, paddedPitch >> i);
+                UINT_32 mipHeight = Max(1u, pIn->height >> i);
+                UINT_32 mipSlices = pIn->flags.volume ?
+                                    Max(1u, pIn->numSlices >> i) : pIn->numSlices;
+                expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
+                                                             pIn->bpp,
+                                                             mipPitch,
+                                                             mipHeight,
+                                                             mipSlices,
+                                                             numSamples,
+                                                             pOut->blockWidth,
+                                                             pOut->blockHeight,
+                                                             pOut->pTileInfo);
+
+                if (IsMacroTiled(expTileMode))
+                {
+                    if (PowTwoAlign(mipPitch, pOut->blockWidth) !=
+                        PowTwoAlign(mipPitch, pOut->pitchAlign))
+                    {
+                        convertTo1D = TRUE;
+                        break;
+                    }
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            if (convertTo1D)
+            {
+                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1);
+            }
+        }
+
+        pOut->pitch = paddedPitch;
+        // Put this check right here to workaround special mipmap cases which the original height
+        // is needed.
+        // The original height is pre-stored in pOut->height in PostComputeMipLevel and
+        // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too.
+        if (m_configFlags.checkLast2DLevel && (numSamples == 1)) // Don't check MSAA
+        {
+            // Set a TRUE in pOut if next Level is the first 1D sub level
+            HwlCheckLastMacroTiledLvl(pIn, pOut);
+        }
+        pOut->height = paddedHeight;
+
+        pOut->depth = expNumSlices;
+
+        //
+        // Compute the size of a slice.
+        //
+        bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) *
+                                      paddedHeight * NextPow2(pIn->bpp) * numSamples);
+
+        pOut->surfSize = bytesPerSlice * expNumSlices;
+
+        pOut->tileMode = expTileMode;
+
+        pOut->depthAlign = microTileThickness;
+
+    } // if (valid)
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceAlignmentsLinear
+*
+*   @brief
+*       Compute linear surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsLinear(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
+    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
+    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL:
+            //
+            // The required base alignment and pitch and height granularities is to 1 element.
+            //
+            *pBaseAlign   = (bpp > 8) ? bpp / 8 : 1;
+            *pPitchAlign  = 1;
+            *pHeightAlign = 1;
+            break;
+        case ADDR_TM_LINEAR_ALIGNED:
+            //
+            // The required alignment for base is the pipe interleave size.
+            // The required granularity for pitch is hwl dependent.
+            // The required granularity for height is one row.
+            //
+            *pBaseAlign     = m_pipeInterleaveBytes;
+            *pPitchAlign    = HwlGetPitchAlignmentLinear(bpp, flags);
+            *pHeightAlign   = 1;
+            break;
+        default:
+            *pBaseAlign     = 1;
+            *pPitchAlign    = 1;
+            *pHeightAlign   = 1;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    AdjustPitchAlignment(flags, pPitchAlign);
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceAlignmentsMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             mipLevel,          ///< [in] mip level
+    UINT_32             numSamples,        ///< [in] number of samples
+    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
+    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
+    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    //
+    // The required alignment for base is the pipe interleave size.
+    //
+    *pBaseAlign   = m_pipeInterleaveBytes;
+
+    *pPitchAlign  = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples);
+
+    *pHeightAlign = MicroTileHeight;
+
+    AdjustPitchAlignment(flags, pPitchAlign);
+
+    // Workaround 2 for 1D tiling -  There is HW bug for Carrizo,
+    // where it requires the following alignments for 1D tiling.
+    if (flags.czDispCompatible && (mipLevel == 0))
+    {
+        *pBaseAlign  = PowTwoAlign(*pBaseAlign, 4096);                         //Base address MOD 4096 = 0
+        *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp)));  //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
+    }
+    // end Carrizo workaround for 1D tilling
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlReduceBankWidthHeight
+*
+*   @brief
+*       Additional checks, reduce bankHeight/bankWidth if needed and possible
+*       tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::HwlReduceBankWidthHeight(
+    UINT_32             tileSize,           ///< [in] tile size
+    UINT_32             bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
+    UINT_32             numSamples,         ///< [in] number of samples
+    UINT_32             bankHeightAlign,    ///< [in] bank height alignment
+    UINT_32             pipes,              ///< [in] pipes
+    ADDR_TILEINFO*      pTileInfo           ///< [in,out] bank structure.
+    ) const
+{
+    UINT_32 macroAspectAlign;
+    BOOL_32 valid = TRUE;
+
+    if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize)
+    {
+        BOOL_32 stillGreater = TRUE;
+
+        // Try reducing bankWidth first
+        if (stillGreater && pTileInfo->bankWidth > 1)
+        {
+            while (stillGreater && pTileInfo->bankWidth > 0)
+            {
+                pTileInfo->bankWidth >>= 1;
+
+                if (pTileInfo->bankWidth == 0)
+                {
+                    pTileInfo->bankWidth = 1;
+                    break;
+                }
+
+                stillGreater =
+                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+            }
+
+            // bankWidth is reduced above, so we need to recalculate bankHeight and ratio
+            bankHeightAlign = Max(1u,
+                                  m_pipeInterleaveBytes * m_bankInterleave /
+                                  (tileSize * pTileInfo->bankWidth)
+                                  );
+
+            // We cannot increase bankHeight so just assert this case.
+            ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0);
+
+            if (numSamples == 1)
+            {
+                macroAspectAlign = Max(1u,
+                                   m_pipeInterleaveBytes * m_bankInterleave /
+                                   (tileSize * pipes * pTileInfo->bankWidth)
+                                   );
+                pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio,
+                                                          macroAspectAlign);
+            }
+        }
+
+        // Early quit bank_height degradation for "64" bit z buffer
+        if (flags.depth && bpp >= 64)
+        {
+            stillGreater = FALSE;
+        }
+
+        // Then try reducing bankHeight
+        if (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+        {
+            while (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+            {
+                pTileInfo->bankHeight >>= 1;
+
+                if (pTileInfo->bankHeight < bankHeightAlign)
+                {
+                    pTileInfo->bankHeight = bankHeightAlign;
+                    break;
+                }
+
+                stillGreater =
+                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+            }
+        }
+
+        valid = !stillGreater;
+
+        // Generate a warning if we still fail to meet this constraint
+        if (valid == FALSE)
+        {
+            ADDR_WARN(
+                0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)",
+                tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize));
+        }
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceAlignmentsMacroTiled
+*
+*   @brief
+*       Compute 2D tiled surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMacroTiled(
+    AddrTileMode                      tileMode,           ///< [in] tile mode
+    UINT_32                           bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
+    UINT_32                           mipLevel,           ///< [in] mip level
+    UINT_32                           numSamples,         ///< [in] number of samples
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
+    ) const
+{
+    ADDR_TILEINFO* pTileInfo = pOut->pTileInfo;
+
+    BOOL_32 valid = SanityCheckMacroTiled(pTileInfo);
+
+    if (valid)
+    {
+        UINT_32 macroTileWidth;
+        UINT_32 macroTileHeight;
+
+        UINT_32 tileSize;
+        UINT_32 bankHeightAlign;
+        UINT_32 macroAspectAlign;
+
+        UINT_32 thickness = Thickness(tileMode);
+        UINT_32 pipes = HwlGetPipes(pTileInfo);
+
+        //
+        // Align bank height first according to latest h/w spec
+        //
+
+        // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples)
+        tileSize = Min(pTileInfo->tileSplitBytes,
+                       BITS_TO_BYTES(64 * thickness * bpp * numSamples));
+
+        // bank_height_align =
+        // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width))
+        bankHeightAlign = Max(1u,
+                              m_pipeInterleaveBytes * m_bankInterleave /
+                              (tileSize * pTileInfo->bankWidth)
+                              );
+
+        pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign);
+
+        // num_pipes * bank_width * macro_tile_aspect >=
+        // (pipe_interleave_size * bank_interleave) / tile_size
+        if (numSamples == 1)
+        {
+            // this restriction is only for mipmap (mipmap's numSamples must be 1)
+            macroAspectAlign = Max(1u,
+                                   m_pipeInterleaveBytes * m_bankInterleave /
+                                   (tileSize * pipes * pTileInfo->bankWidth)
+                                   );
+            pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign);
+        }
+
+        valid = HwlReduceBankWidthHeight(tileSize,
+                                         bpp,
+                                         flags,
+                                         numSamples,
+                                         bankHeightAlign,
+                                         pipes,
+                                         pTileInfo);
+
+        //
+        // The required granularity for pitch is the macro tile width.
+        //
+        macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes *
+            pTileInfo->macroAspectRatio;
+
+        pOut->pitchAlign = macroTileWidth;
+        pOut->blockWidth = macroTileWidth;
+
+        AdjustPitchAlignment(flags, &pOut->pitchAlign);
+
+        //
+        // The required granularity for height is the macro tile height.
+        //
+        macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+            pTileInfo->macroAspectRatio;
+
+        pOut->heightAlign = macroTileHeight;
+        pOut->blockHeight = macroTileHeight;
+
+        //
+        // Compute base alignment
+        //
+        pOut->baseAlign =
+            pipes * pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize;
+
+        HwlComputeSurfaceAlignmentsMacroTiled(tileMode, bpp, flags, mipLevel, numSamples, pOut);
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::SanityCheckMacroTiled
+*
+*   @brief
+*       Check if macro-tiled parameters are valid
+*   @return
+*       TRUE if valid
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::SanityCheckMacroTiled(
+    ADDR_TILEINFO* pTileInfo   ///< [in] macro-tiled parameters
+    ) const
+{
+    BOOL_32 valid       = TRUE;
+    MAYBE_UNUSED UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+    switch (pTileInfo->banks)
+    {
+        case 2: //fall through
+        case 4: //fall through
+        case 8: //fall through
+        case 16:
+            break;
+        default:
+            valid = FALSE;
+            break;
+
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->bankWidth)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->bankHeight)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->macroAspectRatio)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        if (pTileInfo->banks < pTileInfo->macroAspectRatio)
+        {
+            // This will generate macro tile height <= 1
+            valid = FALSE;
+        }
+    }
+
+    if (valid)
+    {
+        if (pTileInfo->tileSplitBytes > m_rowSize)
+        {
+            ADDR_WARN(0, ("tileSplitBytes is bigger than row size"));
+        }
+    }
+
+    if (valid)
+    {
+        valid = HwlSanityCheckMacroTiled(pTileInfo);
+    }
+
+    ADDR_ASSERT(valid == TRUE);
+
+    // Add this assert for guidance
+    ADDR_ASSERT(numPipes * pTileInfo->banks >= 4);
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceMipLevelTileMode
+*
+*   @brief
+*       Compute valid tile mode for surface mipmap sub-levels
+*
+*   @return
+*       Suitable tile mode
+****************************************************************************************************
+*/
+AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             bpp,            ///< [in] bits per pixels
+    UINT_32             pitch,          ///< [in] current level pitch
+    UINT_32             height,         ///< [in] current level height
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32             heightAlign,    ///< [in] height alignment
+    ADDR_TILEINFO*      pTileInfo       ///< [in] ptr to bank structure
+    ) const
+{
+    UINT_64 bytesPerSlice;
+    (void)bytesPerSlice;
+    UINT_32 bytesPerTile;
+
+    AddrTileMode expTileMode = baseTileMode;
+    UINT_32 microTileThickness = Thickness(expTileMode);
+    UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave;
+
+    //
+    // Compute the size of a slice.
+    //
+    bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+    bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
+
+    //
+    // Reduce tiling mode from thick to thin if the number of slices is less than the
+    // micro tile thickness.
+    //
+    if (numSlices < microTileThickness)
+    {
+        expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile);
+    }
+
+    if (bytesPerTile > pTileInfo->tileSplitBytes)
+    {
+        bytesPerTile = pTileInfo->tileSplitBytes;
+    }
+
+    UINT_32 threshold1 =
+        bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio;
+
+    UINT_32 threshold2 =
+        bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight;
+
+    //
+    // Reduce the tile mode from 2D/3D to 1D in following conditions
+    //
+    switch (expTileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: //fall through
+        case ADDR_TM_3D_TILED_THIN1:
+        case ADDR_TM_PRT_TILED_THIN1:
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+            if ((pitch < pitchAlign) ||
+                (height < heightAlign) ||
+                (interleaveSize > threshold1) ||
+                (interleaveSize > threshold2))
+            {
+                expTileMode = ADDR_TM_1D_TILED_THIN1;
+            }
+            break;
+        case ADDR_TM_2D_TILED_THICK: //fall through
+        case ADDR_TM_3D_TILED_THICK:
+        case ADDR_TM_2D_TILED_XTHICK:
+        case ADDR_TM_3D_TILED_XTHICK:
+        case ADDR_TM_PRT_TILED_THICK:
+        case ADDR_TM_PRT_2D_TILED_THICK:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            if ((pitch < pitchAlign) ||
+                (height < heightAlign))
+            {
+                expTileMode = ADDR_TM_1D_TILED_THICK;
+            }
+            break;
+        default:
+            break;
+    }
+
+    return expTileMode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlGetAlignmentInfoMacroTiled
+*   @brief
+*       Get alignment info for giving tile mode
+*   @return
+*       TRUE if getting alignment is OK
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::HwlGetAlignmentInfoMacroTiled(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,             ///< [in] create surface info
+    UINT_32*                               pPitchAlign,     ///< [out] pitch alignment
+    UINT_32*                               pHeightAlign,    ///< [out] height alignment
+    UINT_32*                               pSizeAlign       ///< [out] size alignment
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+    UINT_32 numSamples = (pIn->numFrags == 0) ? pIn->numSamples : pIn->numFrags;
+
+    ADDR_ASSERT(pIn->pTileInfo);
+    ADDR_TILEINFO tileInfo = *pIn->pTileInfo;
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
+    out.pTileInfo = &tileInfo;
+
+    if (UseTileIndex(pIn->tileIndex))
+    {
+        out.tileIndex = pIn->tileIndex;
+        out.macroModeIndex = TileIndexInvalid;
+    }
+
+    HwlSetupTileInfo(pIn->tileMode,
+                     pIn->flags,
+                     pIn->bpp,
+                     pIn->width,
+                     pIn->height,
+                     numSamples,
+                     &tileInfo,
+                     &tileInfo,
+                     pIn->tileType,
+                     &out);
+
+    valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode,
+                                               pIn->bpp,
+                                               pIn->flags,
+                                               pIn->mipLevel,
+                                               numSamples,
+                                               &out);
+
+    if (valid)
+    {
+        *pPitchAlign  = out.pitchAlign;
+        *pHeightAlign = out.heightAlign;
+        *pSizeAlign   = out.baseAlign;
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+****************************************************************************************************
+*/
+AddrTileMode EgBasedLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
+    ) const
+{
+    ADDR_ASSERT(numSlices < Thickness(baseTileMode));
+    // if pBytesPerTile is NULL, this is a don't-care....
+    UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64;
+
+    AddrTileMode expTileMode = baseTileMode;
+    switch (baseTileMode)
+    {
+        case ADDR_TM_1D_TILED_THICK:
+            expTileMode = ADDR_TM_1D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_2D_TILED_THICK:
+            expTileMode = ADDR_TM_2D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_3D_TILED_THICK:
+            expTileMode = ADDR_TM_3D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_2D_TILED_XTHICK:
+            if (numSlices < ThickTileThickness)
+            {
+                expTileMode = ADDR_TM_2D_TILED_THIN1;
+                bytesPerTile >>= 3;
+            }
+            else
+            {
+                expTileMode = ADDR_TM_2D_TILED_THICK;
+                bytesPerTile >>= 1;
+            }
+            break;
+        case ADDR_TM_3D_TILED_XTHICK:
+            if (numSlices < ThickTileThickness)
+            {
+                expTileMode = ADDR_TM_3D_TILED_THIN1;
+                bytesPerTile >>= 3;
+            }
+            else
+            {
+                expTileMode = ADDR_TM_3D_TILED_THICK;
+                bytesPerTile >>= 1;
+            }
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    if (pBytesPerTile != NULL)
+    {
+        *pBytesPerTile = bytesPerTile;
+    }
+
+    return expTileMode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::DispatchComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address from given coord (x, y, slice,sample)
+*
+*   @return
+*       Address in bytes
+****************************************************************************************************
+*/
+UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    UINT_32             x                  = pIn->x;
+    UINT_32             y                  = pIn->y;
+    UINT_32             slice              = pIn->slice;
+    UINT_32             sample             = pIn->sample;
+    UINT_32             bpp                = pIn->bpp;
+    UINT_32             pitch              = pIn->pitch;
+    UINT_32             height             = pIn->height;
+    UINT_32             numSlices          = pIn->numSlices;
+    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    AddrTileMode        tileMode           = pIn->tileMode;
+    AddrTileType        microTileType      = pIn->tileType;
+    BOOL_32             ignoreSE           = pIn->ignoreSE;
+    BOOL_32             isDepthSampleOrder = pIn->isDepth;
+    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
+
+    UINT_32*            pBitPosition       = &pOut->bitPosition;
+    UINT_64             addr;
+
+    // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
+    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        isDepthSampleOrder = TRUE;
+    }
+
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples)
+        {
+            numSamples = numFrags;
+            ADDR_ASSERT(sample < numSamples);
+        }
+
+        /// @note
+        /// 128 bit/thick tiled surface doesn't support display tiling and
+        /// mipmap chain must have the same tileType, so please fill tileType correctly
+        if (IsLinear(pIn->tileMode) == FALSE)
+        {
+            if (bpp >= 128 || Thickness(tileMode) > 1)
+            {
+                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+            }
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            addr = ComputeSurfaceAddrFromCoordLinear(x,
+                                                     y,
+                                                     slice,
+                                                     sample,
+                                                     bpp,
+                                                     pitch,
+                                                     height,
+                                                     numSlices,
+                                                     pBitPosition);
+            break;
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+                                                         y,
+                                                         slice,
+                                                         sample,
+                                                         bpp,
+                                                         pitch,
+                                                         height,
+                                                         numSamples,
+                                                         tileMode,
+                                                         microTileType,
+                                                         isDepthSampleOrder,
+                                                         pBitPosition);
+            break;
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+                                                         y,
+                                                         slice,
+                                                         sample,
+                                                         bpp,
+                                                         pitch,
+                                                         height,
+                                                         numSamples,
+                                                         tileMode,
+                                                         microTileType,
+                                                         ignoreSE,
+                                                         isDepthSampleOrder,
+                                                         pipeSwizzle,
+                                                         bankSwizzle,
+                                                         pTileInfo,
+                                                         pBitPosition);
+            break;
+        default:
+            addr = 0;
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    return addr;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeMacroTileEquation
+*
+*   @brief
+*       Computes the address equation in macro tile
+*   @return
+*       If equation can be computed
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::ComputeMacroTileEquation(
+    UINT_32             log2BytesPP,            ///< [in] log2 of bytes per pixel
+    AddrTileMode        tileMode,               ///< [in] tile mode
+    AddrTileType        microTileType,          ///< [in] micro tiling type
+    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
+    ADDR_EQUATION*      pEquation               ///< [out] Equation for addressing in macro tile
+    ) const
+{
+    ADDR_E_RETURNCODE retCode;
+
+    // Element equation within a tile
+    retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation);
+
+    if (retCode == ADDR_OK)
+    {
+        // Tile equesiton with signle pipe bank
+        UINT_32 numPipes              = HwlGetPipes(pTileInfo);
+        UINT_32 numPipeBits           = Log2(numPipes);
+
+        for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++)
+        {
+            pEquation->addr[pEquation->numBits].valid = 1;
+            pEquation->addr[pEquation->numBits].channel = 0;
+            pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits;
+            pEquation->numBits++;
+        }
+
+        for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++)
+        {
+            pEquation->addr[pEquation->numBits].valid = 1;
+            pEquation->addr[pEquation->numBits].channel = 1;
+            pEquation->addr[pEquation->numBits].index = i + 3;
+            pEquation->numBits++;
+        }
+
+        ADDR_EQUATION equation;
+        memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+        UINT_32 thresholdX = 32;
+        UINT_32 thresholdY = 32;
+
+        if (IsPrtNoRotationTileMode(tileMode))
+        {
+            UINT_32 macroTilePitch  =
+                (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
+            UINT_32 macroTileHeight =
+                (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) /
+                pTileInfo->macroAspectRatio;
+            thresholdX = Log2(macroTilePitch);
+            thresholdY = Log2(macroTileHeight);
+        }
+
+        // Pipe equation
+        retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation);
+
+        if (retCode == ADDR_OK)
+        {
+            UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes);
+
+            if (pEquation->numBits > pipeBitStart)
+            {
+                UINT_32 numLeftShift = pEquation->numBits - pipeBitStart;
+
+                for (UINT_32 i = 0; i < numLeftShift; i++)
+                {
+                    pEquation->addr[pEquation->numBits + equation.numBits - i - 1] =
+                        pEquation->addr[pEquation->numBits - i - 1];
+                    pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] =
+                        pEquation->xor1[pEquation->numBits - i - 1];
+                    pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] =
+                        pEquation->xor2[pEquation->numBits - i - 1];
+                }
+            }
+
+            for (UINT_32 i = 0; i < equation.numBits; i++)
+            {
+                pEquation->addr[pipeBitStart + i] = equation.addr[i];
+                pEquation->xor1[pipeBitStart + i] = equation.xor1[i];
+                pEquation->xor2[pipeBitStart + i] = equation.xor2[i];
+                pEquation->numBits++;
+            }
+
+            // Bank equation
+            memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+            retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY,
+                                          pTileInfo, &equation);
+
+            if (retCode == ADDR_OK)
+            {
+                UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave);
+
+                if (pEquation->numBits > bankBitStart)
+                {
+                    UINT_32 numLeftShift = pEquation->numBits - bankBitStart;
+
+                    for (UINT_32 i = 0; i < numLeftShift; i++)
+                    {
+                        pEquation->addr[pEquation->numBits + equation.numBits - i - 1] =
+                            pEquation->addr[pEquation->numBits - i - 1];
+                        pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] =
+                            pEquation->xor1[pEquation->numBits - i - 1];
+                        pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] =
+                            pEquation->xor2[pEquation->numBits - i - 1];
+                    }
+                }
+
+                for (UINT_32 i = 0; i < equation.numBits; i++)
+                {
+                    pEquation->addr[bankBitStart + i] = equation.addr[i];
+                    pEquation->xor1[bankBitStart + i] = equation.xor1[i];
+                    pEquation->xor2[bankBitStart + i] = equation.xor2[i];
+                    pEquation->numBits++;
+                }
+            }
+        }
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+*   @brief
+*       Computes the surface address and bit position from a
+*       coordinate for 2D tilied (macro tiled)
+*   @return
+*       The byte address
+****************************************************************************************************
+*/
+UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMacroTiled(
+    UINT_32             x,                      ///< [in] x coordinate
+    UINT_32             y,                      ///< [in] y coordinate
+    UINT_32             slice,                  ///< [in] slice index
+    UINT_32             sample,                 ///< [in] sample index
+    UINT_32             bpp,                    ///< [in] bits per pixel
+    UINT_32             pitch,                  ///< [in] surface pitch, in pixels
+    UINT_32             height,                 ///< [in] surface height, in pixels
+    UINT_32             numSamples,             ///< [in] number of samples
+    AddrTileMode        tileMode,               ///< [in] tile mode
+    AddrTileType        microTileType,          ///< [in] micro tiling type
+    BOOL_32             ignoreSE,               ///< [in] TRUE if shader enginers can be ignored
+    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if it depth sample ordering is used
+    UINT_32             pipeSwizzle,            ///< [in] pipe swizzle
+    UINT_32             bankSwizzle,            ///< [in] bank swizzle
+    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
+                                                ///  **All fields to be valid on entry**
+    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
+    ) const
+{
+    UINT_64 addr;
+
+    UINT_32 microTileBytes;
+    UINT_32 microTileBits;
+    UINT_32 sampleOffset;
+    UINT_32 pixelIndex;
+    UINT_32 pixelOffset;
+    UINT_32 elementOffset;
+    UINT_32 tileSplitSlice;
+    UINT_32 pipe;
+    UINT_32 bank;
+    UINT_64 sliceBytes;
+    UINT_64 sliceOffset;
+    UINT_32 macroTilePitch;
+    UINT_32 macroTileHeight;
+    UINT_32 macroTilesPerRow;
+    UINT_32 macroTilesPerSlice;
+    UINT_64 macroTileBytes;
+    UINT_32 macroTileIndexX;
+    UINT_32 macroTileIndexY;
+    UINT_64 macroTileOffset;
+    UINT_64 totalOffset;
+    UINT_64 pipeInterleaveMask;
+    UINT_64 bankInterleaveMask;
+    UINT_64 pipeInterleaveOffset;
+    UINT_32 bankInterleaveOffset;
+    UINT_64 offset;
+    UINT_32 tileRowIndex;
+    UINT_32 tileColumnIndex;
+    UINT_32 tileIndex;
+    UINT_32 tileOffset;
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    //
+    // Compute the number of group, pipe, and bank bits.
+    //
+    UINT_32 numPipes              = HwlGetPipes(pTileInfo);
+    UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes);
+    UINT_32 numPipeBits           = Log2(numPipes);
+    UINT_32 numBankInterleaveBits = Log2(m_bankInterleave);
+    UINT_32 numBankBits           = Log2(pTileInfo->banks);
+
+    //
+    // Compute the micro tile size.
+    //
+    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+    microTileBytes = microTileBits / 8;
+    //
+    // Compute the pixel index within the micro tile.
+    //
+    pixelIndex = ComputePixelIndexWithinMicroTile(x,
+                                                  y,
+                                                  slice,
+                                                  bpp,
+                                                  tileMode,
+                                                  microTileType);
+
+    //
+    // Compute the sample offset and pixel offset.
+    //
+    if (isDepthSampleOrder)
+    {
+        //
+        // For depth surfaces, samples are stored contiguously for each element, so the sample
+        // offset is the sample number times the element size.
+        //
+        sampleOffset = sample * bpp;
+        pixelOffset  = pixelIndex * bpp * numSamples;
+    }
+    else
+    {
+        //
+        // For color surfaces, all elements for a particular sample are stored contiguously, so
+        // the sample offset is the sample number times the micro tile size divided yBit the number
+        // of samples.
+        //
+        sampleOffset = sample * (microTileBits / numSamples);
+        pixelOffset  = pixelIndex * bpp;
+    }
+
+    //
+    // Compute the element offset.
+    //
+    elementOffset = pixelOffset + sampleOffset;
+
+    *pBitPosition = static_cast<UINT_32>(elementOffset % 8);
+
+    elementOffset /= 8; //bit-to-byte
+
+    //
+    // Determine if tiles need to be split across slices.
+    //
+    // If the size of the micro tile is larger than the tile split size, then the tile will be
+    // split across multiple slices.
+    //
+    UINT_32 slicesPerTile = 1;
+
+    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+    {   //don't support for thick mode
+
+        //
+        // Compute the number of slices per tile.
+        //
+        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+
+        //
+        // Compute the tile split slice number for use in rotating the bank.
+        //
+        tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes;
+
+        //
+        // Adjust the element offset to account for the portion of the tile that is being moved to
+        // a new slice..
+        //
+        elementOffset %= pTileInfo->tileSplitBytes;
+
+        //
+        // Adjust the microTileBytes size to tileSplitBytes size since
+        // a new slice..
+        //
+        microTileBytes = pTileInfo->tileSplitBytes;
+    }
+    else
+    {
+        tileSplitSlice = 0;
+    }
+
+    //
+    // Compute macro tile pitch and height.
+    //
+    macroTilePitch  =
+        (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
+    macroTileHeight =
+        (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio;
+
+    //
+    // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually
+    //
+    macroTileBytes =
+        static_cast<UINT_64>(microTileBytes) *
+        (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) /
+        (numPipes * pTileInfo->banks);
+
+    //
+    // Compute the number of macro tiles per row.
+    //
+    macroTilesPerRow = pitch / macroTilePitch;
+
+    //
+    // Compute the offset to the macro tile containing the specified coordinate.
+    //
+    macroTileIndexX = x / macroTilePitch;
+    macroTileIndexY = y / macroTileHeight;
+    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+    //
+    // Compute the number of macro tiles per slice.
+    //
+    macroTilesPerSlice = macroTilesPerRow  * (height / macroTileHeight);
+
+    //
+    // Compute the slice size.
+    //
+    sliceBytes = macroTilesPerSlice * macroTileBytes;
+
+    //
+    // Compute the slice offset.
+    //
+    sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness));
+
+    //
+    // Compute tile offest
+    //
+    tileRowIndex    = (y / MicroTileHeight) % pTileInfo->bankHeight;
+    tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth;
+    tileIndex        = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex;
+    tileOffset       = tileIndex * microTileBytes;
+
+    //
+    // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting
+    // for the pipe and bank bits in the middle of the address.
+    //
+    totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset;
+
+    //
+    // Get the pipe and bank.
+    //
+
+    // when the tileMode is PRT type, then adjust x and y coordinates
+    if (IsPrtNoRotationTileMode(tileMode))
+    {
+        x = x % macroTilePitch;
+        y = y % macroTileHeight;
+    }
+
+    pipe = ComputePipeFromCoord(x,
+                                y,
+                                slice,
+                                tileMode,
+                                pipeSwizzle,
+                                ignoreSE,
+                                pTileInfo);
+
+    bank = ComputeBankFromCoord(x,
+                                y,
+                                slice,
+                                tileMode,
+                                bankSwizzle,
+                                tileSplitSlice,
+                                pTileInfo);
+
+    //
+    // Split the offset to put some bits below the pipe+bank bits and some above.
+    //
+    pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1;
+    bankInterleaveMask = (1 << numBankInterleaveBits) - 1;
+    pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
+    bankInterleaveOffset = static_cast<UINT_32>((totalOffset >> numPipeInterleaveBits) &
+                                                bankInterleaveMask);
+    offset               =  totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits);
+
+    //
+    // Assemble the address from its components.
+    //
+    addr  = pipeInterleaveOffset;
+    // This is to remove /analyze warnings
+    UINT_32 pipeBits            = pipe                 <<  numPipeInterleaveBits;
+    UINT_32 bankInterleaveBits  = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits);
+    UINT_32 bankBits            = bank                 << (numPipeInterleaveBits + numPipeBits +
+                                                           numBankInterleaveBits);
+    UINT_64 offsetBits          = offset               << (numPipeInterleaveBits + numPipeBits +
+                                                           numBankInterleaveBits + numBankBits);
+
+    addr |= pipeBits;
+    addr |= bankInterleaveBits;
+    addr |= bankBits;
+    addr |= offsetBits;
+
+    return addr;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+*   @brief
+*       Computes the surface address and bit position from a coordinate for 1D tilied
+*       (micro tiled)
+*   @return
+*       The byte address
+****************************************************************************************************
+*/
+UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled(
+    UINT_32             x,                      ///< [in] x coordinate
+    UINT_32             y,                      ///< [in] y coordinate
+    UINT_32             slice,                  ///< [in] slice index
+    UINT_32             sample,                 ///< [in] sample index
+    UINT_32             bpp,                    ///< [in] bits per pixel
+    UINT_32             pitch,                  ///< [in] pitch, in pixels
+    UINT_32             height,                 ///< [in] height, in pixels
+    UINT_32             numSamples,             ///< [in] number of samples
+    AddrTileMode        tileMode,               ///< [in] tile mode
+    AddrTileType        microTileType,          ///< [in] micro tiling type
+    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if depth sample ordering is used
+    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
+    ) const
+{
+    UINT_64 addr = 0;
+
+    UINT_32 microTileBytes;
+    UINT_64 sliceBytes;
+    UINT_32 microTilesPerRow;
+    UINT_32 microTileIndexX;
+    UINT_32 microTileIndexY;
+    UINT_32 microTileIndexZ;
+    UINT_64 sliceOffset;
+    UINT_64 microTileOffset;
+    UINT_32 sampleOffset;
+    UINT_32 pixelIndex;
+    UINT_32 pixelOffset;
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    //
+    // Compute the micro tile size.
+    //
+    microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples);
+
+    //
+    // Compute the slice size.
+    //
+    sliceBytes =
+        BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples);
+
+    //
+    // Compute the number of micro tiles per row.
+    //
+    microTilesPerRow = pitch / MicroTileWidth;
+
+    //
+    // Compute the micro tile index.
+    //
+    microTileIndexX = x     / MicroTileWidth;
+    microTileIndexY = y     / MicroTileHeight;
+    microTileIndexZ = slice / microTileThickness;
+
+    //
+    // Compute the slice offset.
+    //
+    sliceOffset = static_cast<UINT_64>(microTileIndexZ) * sliceBytes;
+
+    //
+    // Compute the offset to the micro tile containing the specified coordinate.
+    //
+    microTileOffset = (static_cast<UINT_64>(microTileIndexY) * microTilesPerRow + microTileIndexX) *
+        microTileBytes;
+
+    //
+    // Compute the pixel index within the micro tile.
+    //
+    pixelIndex = ComputePixelIndexWithinMicroTile(x,
+                                                  y,
+                                                  slice,
+                                                  bpp,
+                                                  tileMode,
+                                                  microTileType);
+
+    // Compute the sample offset.
+    //
+    if (isDepthSampleOrder)
+    {
+        //
+        // For depth surfaces, samples are stored contiguously for each element, so the sample
+        // offset is the sample number times the element size.
+        //
+        sampleOffset = sample * bpp;
+        pixelOffset = pixelIndex * bpp * numSamples;
+    }
+    else
+    {
+        //
+        // For color surfaces, all elements for a particular sample are stored contiguously, so
+        // the sample offset is the sample number times the micro tile size divided yBit the number
+        // of samples.
+        //
+        sampleOffset = sample * (microTileBytes*8 / numSamples);
+        pixelOffset = pixelIndex * bpp;
+    }
+
+    //
+    // Compute the bit position of the pixel.  Each element is stored with one bit per sample.
+    //
+
+    UINT_32 elemOffset = sampleOffset + pixelOffset;
+
+    *pBitPosition = elemOffset % 8;
+    elemOffset /= 8;
+
+    //
+    // Combine the slice offset, micro tile offset, sample offset, and pixel offsets.
+    //
+    addr = sliceOffset + microTileOffset + elemOffset;
+
+    return addr;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputePixelCoordFromOffset
+*
+*   @brief
+*       Compute pixel coordinate from offset inside a micro tile
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID EgBasedLib::HwlComputePixelCoordFromOffset(
+    UINT_32         offset,             ///< [in] offset inside micro tile in bits
+    UINT_32         bpp,                ///< [in] bits per pixel
+    UINT_32         numSamples,         ///< [in] number of samples
+    AddrTileMode    tileMode,           ///< [in] tile mode
+    UINT_32         tileBase,           ///< [in] base offset within a tile
+    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
+    UINT_32*        pX,                 ///< [out] x coordinate
+    UINT_32*        pY,                 ///< [out] y coordinate
+    UINT_32*        pSlice,             ///< [out] slice index
+    UINT_32*        pSample,            ///< [out] sample index
+    AddrTileType    microTileType,      ///< [in] micro tiling type
+    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if depth sample order in microtile is used
+    ) const
+{
+    UINT_32 x = 0;
+    UINT_32 y = 0;
+    UINT_32 z = 0;
+    UINT_32 thickness = Thickness(tileMode);
+
+    // For planar surface, we adjust offset acoording to tile base
+    if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder)
+    {
+        offset -= tileBase;
+
+        ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE ||
+                    microTileType == ADDR_DEPTH_SAMPLE_ORDER);
+
+        bpp = compBits;
+    }
+
+    UINT_32 sampleTileBits;
+    UINT_32 samplePixelBits;
+    UINT_32 pixelIndex;
+
+    if (isDepthSampleOrder)
+    {
+        samplePixelBits = bpp * numSamples;
+        pixelIndex = offset / samplePixelBits;
+        *pSample = (offset % samplePixelBits) / bpp;
+    }
+    else
+    {
+        sampleTileBits = MicroTilePixels * bpp * thickness;
+        *pSample = offset / sampleTileBits;
+        pixelIndex = (offset % sampleTileBits) / bpp;
+    }
+
+    if (microTileType != ADDR_THICK)
+    {
+        if (microTileType == ADDR_DISPLAYABLE) // displayable
+        {
+            switch (bpp)
+            {
+                case 8:
+                    x = pixelIndex & 0x7;
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+                    break;
+                case 16:
+                    x = pixelIndex & 0x7;
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+                    break;
+                case 32:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+                    break;
+                case 64:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                    break;
+                case 128:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0));
+                    break;
+                default:
+                    break;
+            }
+        }
+        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+            y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+        }
+        else if (microTileType == ADDR_ROTATED)
+        {
+            /*
+                8-Bit Elements
+                element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] }
+
+                16-Bit Elements
+                element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] }
+
+                32-Bit Elements
+                element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] }
+
+                64-Bit Elements
+                element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] }
+            */
+            switch(bpp)
+            {
+                case 8:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+                    y = pixelIndex & 0x7;
+                    break;
+                case 16:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+                    y = pixelIndex & 0x7;
+                    break;
+                case 32:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+                    y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+                    break;
+                case 64:
+                    x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+
+        if (thickness > 1) // thick
+        {
+            z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6));
+        }
+    }
+    else
+    {
+        ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1));
+        /*
+            8-Bit Elements and 16-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] }
+
+            32-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] }
+
+            64-Bit Elements and 128-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] }
+
+            The equation to compute the element index for the extra thick tile:
+            element_index[8] = z[2]
+        */
+        switch (bpp)
+        {
+            case 8:
+            case 16: // fall-through
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4));
+                break;
+            case 32:
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3));
+                break;
+            case 64:
+            case 128: // fall-through
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2));
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        if (thickness == 8)
+        {
+            z += Bits2Number(3,_BIT(pixelIndex,8),0,0);
+        }
+    }
+
+    *pX = x;
+    *pY = y;
+    *pSlice += z;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::DispatchComputeSurfaceCoordFromAddrDispatch
+*
+*   @brief
+*       Compute (x,y,slice,sample) coordinates from surface address
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID EgBasedLib::DispatchComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    UINT_64             addr               = pIn->addr;
+    UINT_32             bitPosition        = pIn->bitPosition;
+    UINT_32             bpp                = pIn->bpp;
+    UINT_32             pitch              = pIn->pitch;
+    UINT_32             height             = pIn->height;
+    UINT_32             numSlices          = pIn->numSlices;
+    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    AddrTileMode        tileMode           = pIn->tileMode;
+    UINT_32             tileBase           = pIn->tileBase;
+    UINT_32             compBits           = pIn->compBits;
+    AddrTileType        microTileType      = pIn->tileType;
+    BOOL_32             ignoreSE           = pIn->ignoreSE;
+    BOOL_32             isDepthSampleOrder = pIn->isDepth;
+    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
+
+    UINT_32*            pX                 = &pOut->x;
+    UINT_32*            pY                 = &pOut->y;
+    UINT_32*            pSlice             = &pOut->slice;
+    UINT_32*            pSample            = &pOut->sample;
+
+    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        isDepthSampleOrder = TRUE;
+    }
+
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples)
+        {
+            numSamples = numFrags;
+        }
+
+        /// @note
+        /// 128 bit/thick tiled surface doesn't support display tiling and
+        /// mipmap chain must have the same tileType, so please fill tileType correctly
+        if (IsLinear(pIn->tileMode) == FALSE)
+        {
+            if (bpp >= 128 || Thickness(tileMode) > 1)
+            {
+                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+            }
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            ComputeSurfaceCoordFromAddrLinear(addr,
+                                              bitPosition,
+                                              bpp,
+                                              pitch,
+                                              height,
+                                              numSlices,
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pSample);
+            break;
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            ComputeSurfaceCoordFromAddrMicroTiled(addr,
+                                                  bitPosition,
+                                                  bpp,
+                                                  pitch,
+                                                  height,
+                                                  numSamples,
+                                                  tileMode,
+                                                  tileBase,
+                                                  compBits,
+                                                  pX,
+                                                  pY,
+                                                  pSlice,
+                                                  pSample,
+                                                  microTileType,
+                                                  isDepthSampleOrder);
+            break;
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            ComputeSurfaceCoordFromAddrMacroTiled(addr,
+                                                  bitPosition,
+                                                  bpp,
+                                                  pitch,
+                                                  height,
+                                                  numSamples,
+                                                  tileMode,
+                                                  tileBase,
+                                                  compBits,
+                                                  microTileType,
+                                                  ignoreSE,
+                                                  isDepthSampleOrder,
+                                                  pipeSwizzle,
+                                                  bankSwizzle,
+                                                  pTileInfo,
+                                                  pX,
+                                                  pY,
+                                                  pSlice,
+                                                  pSample);
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled
+*
+*   @brief
+*       Compute surface coordinates from address for macro tiled surface
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled(
+    UINT_64             addr,               ///< [in] byte address
+    UINT_32             bitPosition,        ///< [in] bit position
+    UINT_32             bpp,                ///< [in] bits per pixel
+    UINT_32             pitch,              ///< [in] pitch in pixels
+    UINT_32             height,             ///< [in] height in pixels
+    UINT_32             numSamples,         ///< [in] number of samples
+    AddrTileMode        tileMode,           ///< [in] tile mode
+    UINT_32             tileBase,           ///< [in] tile base offset
+    UINT_32             compBits,           ///< [in] component bits (for planar surface)
+    AddrTileType        microTileType,      ///< [in] micro tiling type
+    BOOL_32             ignoreSE,           ///< [in] TRUE if shader engines can be ignored
+    BOOL_32             isDepthSampleOrder, ///< [in] TRUE if depth sample order is used
+    UINT_32             pipeSwizzle,        ///< [in] pipe swizzle
+    UINT_32             bankSwizzle,        ///< [in] bank swizzle
+    ADDR_TILEINFO*      pTileInfo,          ///< [in] bank structure.
+                                            ///  **All fields to be valid on entry**
+    UINT_32*            pX,                 ///< [out] X coord
+    UINT_32*            pY,                 ///< [out] Y coord
+    UINT_32*            pSlice,             ///< [out] slice index
+    UINT_32*            pSample             ///< [out] sample index
+    ) const
+{
+    UINT_32 mx;
+    UINT_32 my;
+    UINT_64 tileBits;
+    UINT_64 macroTileBits;
+    UINT_32 slices;
+    UINT_32 tileSlices;
+    UINT_64 elementOffset;
+    UINT_64 macroTileIndex;
+    UINT_32 tileIndex;
+    UINT_64 totalOffset;
+
+    UINT_32 bank;
+    UINT_32 pipe;
+    UINT_32 groupBits = m_pipeInterleaveBytes << 3;
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    UINT_32 banks = pTileInfo->banks;
+
+    UINT_32 bankInterleave = m_bankInterleave;
+
+    UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition;
+
+    //
+    // remove bits for bank and pipe
+    //
+    totalOffset = (addrBits % groupBits) +
+        (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) +
+        (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave;
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples;
+
+    UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits);
+    //
+    // Determine if tiles need to be split across slices.
+    //
+    // If the size of the micro tile is larger than the tile split size, then the tile will be
+    // split across multiple slices.
+    //
+    UINT_32 slicesPerTile = 1; //_State->TileSlices
+
+    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+    {   //don't support for thick mode
+
+        //
+        // Compute the number of slices per tile.
+        //
+        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+    }
+
+    tileBits = microTileBits / slicesPerTile; // micro tile bits
+
+    // in micro tiles because not MicroTileWidth timed.
+    UINT_32 macroWidth  = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio;
+    // in micro tiles as well
+    UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio;
+
+    UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth;
+
+    macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes);
+
+    macroTileIndex = totalOffset / macroTileBits;
+
+    // pitchMacros * height / heightMacros;  macroTilesPerSlice == _State->SliceMacros
+    UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height /
+        (macroHeight * MicroTileWidth);
+
+    slices = static_cast<UINT_32>(macroTileIndex / macroTilesPerSlice);
+
+    *pSlice = static_cast<UINT_32>(slices / slicesPerTile * microTileThickness);
+
+    //
+    // calculate element offset and x[2:0], y[2:0], z[1:0] for thick
+    //
+    tileSlices = slices % slicesPerTile;
+
+    elementOffset  = tileSlices * tileBits;
+    elementOffset += totalOffset % tileBits;
+
+    UINT_32 coordZ = 0;
+
+    HwlComputePixelCoordFromOffset(static_cast<UINT_32>(elementOffset),
+                                   bpp,
+                                   numSamples,
+                                   tileMode,
+                                   tileBase,
+                                   compBits,
+                                   pX,
+                                   pY,
+                                   &coordZ,
+                                   pSample,
+                                   microTileType,
+                                   isDepthSampleOrder);
+
+    macroTileIndex = macroTileIndex % macroTilesPerSlice;
+    *pY += static_cast<UINT_32>(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight);
+    *pX += static_cast<UINT_32>(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth);
+
+    *pSlice += coordZ;
+
+    tileIndex = static_cast<UINT_32>((totalOffset % macroTileBits) / tileBits);
+
+    my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight;
+    mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth;
+
+    *pY += my;
+    *pX += mx;
+
+    bank = ComputeBankFromAddr(addr, banks, pipes);
+    pipe = ComputePipeFromAddr(addr, pipes);
+
+    HwlComputeSurfaceCoord2DFromBankPipe(tileMode,
+                                         pX,
+                                         pY,
+                                         *pSlice,
+                                         bank,
+                                         pipe,
+                                         bankSwizzle,
+                                         pipeSwizzle,
+                                         tileSlices,
+                                         ignoreSE,
+                                         pTileInfo);
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSurfaceCoord2DFromBankPipe
+*
+*   @brief
+*       Compute surface x,y coordinates from bank/pipe info
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID EgBasedLib::ComputeSurfaceCoord2DFromBankPipe(
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    UINT_32             x,          ///< [in] x coordinate
+    UINT_32             y,          ///< [in] y coordinate
+    UINT_32             slice,      ///< [in] slice index
+    UINT_32             bank,       ///< [in] bank number
+    UINT_32             pipe,       ///< [in] pipe number
+    UINT_32             bankSwizzle,///< [in] bank swizzle
+    UINT_32             pipeSwizzle,///< [in] pipe swizzle
+    UINT_32             tileSlices, ///< [in] slices in a micro tile
+    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
+    CoordFromBankPipe*  pOutput     ///< [out] pointer to extracted x/y bits
+    ) const
+{
+    UINT_32 yBit3 = 0;
+    UINT_32 yBit4 = 0;
+    UINT_32 yBit5 = 0;
+    UINT_32 yBit6 = 0;
+
+    UINT_32 xBit3 = 0;
+    UINT_32 xBit4 = 0;
+    UINT_32 xBit5 = 0;
+
+    UINT_32 tileSplitRotation;
+
+    UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+    UINT_32 bankRotation = ComputeBankRotation(tileMode,
+                                               pTileInfo->banks, numPipes);
+
+    UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes);
+
+    UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes);
+    UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight);
+
+    //calculate the bank and pipe before rotation and swizzle
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1:  //fall through
+        case ADDR_TM_2D_TILED_THICK:  //fall through
+        case ADDR_TM_2D_TILED_XTHICK: //fall through
+        case ADDR_TM_3D_TILED_THIN1:  //fall through
+        case ADDR_TM_3D_TILED_THICK:  //fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+            tileSplitRotation = ((pTileInfo->banks / 2) + 1);
+            break;
+        default:
+            tileSplitRotation =  0;
+            break;
+    }
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    bank ^= tileSplitRotation * tileSlices;
+    if (pipeRotation == 0)
+    {
+        bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle;
+        bank %= pTileInfo->banks;
+        pipe ^= pipeSwizzle;
+    }
+    else
+    {
+        bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle;
+        bank %= pTileInfo->banks;
+        pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle;
+    }
+
+    if (pTileInfo->macroAspectRatio == 1)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 2:
+                yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                break;
+            case 4:
+                yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+                break;
+            case 8:
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5;
+                break;
+            case 16:
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3);
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2);
+                yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0);
+                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6;
+                break;
+            default:
+                break;
+        }
+
+    }
+    else if (pTileInfo->macroAspectRatio == 2)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 2: //xBit3 = yBit3^b0
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0);
+                break;
+            case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+                break;
+            case 8: //xBit4, xBit5, yBit5 are known
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2);
+                break;
+            case 16://x4,x5,x6,y6 are known
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2
+                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1
+                break;
+            default:
+                break;
+        }
+    }
+    else if (pTileInfo->macroAspectRatio == 4)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 4: //yBit3, yBit4
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0);
+                break;
+            case 8: //xBit5, yBit4, yBit5
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^  _BIT(yBit,2);
+                break;
+            case 16: //xBit5, xBit6, yBit5, yBit6
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6;
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6;
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5;
+                break;
+            default:
+                break;
+        }
+    }
+    else if (pTileInfo->macroAspectRatio == 8)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 8: //yBit3, yBit4, yBit5
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5;
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5;
+                xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0);
+                break;
+            case 16: //xBit6, yBit4, yBit5, yBit6
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1
+                xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+                break;
+            default:
+                break;
+        }
+    }
+
+    pOutput->xBits = xBit;
+    pOutput->yBits = yBit;
+
+    pOutput->xBit3 = xBit3;
+    pOutput->xBit4 = xBit4;
+    pOutput->xBit5 = xBit5;
+    pOutput->yBit3 = yBit3;
+    pOutput->yBit4 = yBit4;
+    pOutput->yBit5 = yBit5;
+    pOutput->yBit6 = yBit6;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlExtractBankPipeSwizzle
+*   @brief
+*       Entry of EgBasedLib ExtractBankPipeSwizzle
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlExtractBankPipeSwizzle(
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,   ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut   ///< [out] output structure
+    ) const
+{
+    ExtractBankPipeSwizzle(pIn->base256b,
+                           pIn->pTileInfo,
+                           &pOut->bankSwizzle,
+                           &pOut->pipeSwizzle);
+
+    return ADDR_OK;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlCombineBankPipeSwizzle
+*   @brief
+*       Combine bank/pipe swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlCombineBankPipeSwizzle(
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
+    UINT_64         baseAddr,       ///< [in] base address
+    UINT_32*        pTileSwizzle    ///< [out] combined swizzle
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pTileSwizzle)
+    {
+        *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo);
+    }
+    else
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeBaseSwizzle
+*   @brief
+*       Compute base swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle(
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut
+    ) const
+{
+    UINT_32 bankSwizzle = 0;
+    UINT_32 pipeSwizzle = 0;
+    ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+
+    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+    ADDR_ASSERT(pIn->pTileInfo);
+
+    /// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
+    static const UINT_8 bankRotationArray[4][16] = {
+        { 0, 0,  0, 0,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_2_BANK
+        { 0, 1,  2, 3,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_4_BANK
+        { 0, 3,  6, 1,  4, 7,  2, 5, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_8_BANK
+        { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
+    };
+
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    (void)pipes;
+    UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
+    UINT_32 hwNumBanks;
+
+    // Uses less bank swizzle bits
+    if (pIn->option.reduceBankBit && banks > 2)
+    {
+        banks >>= 1;
+    }
+
+    switch (banks)
+    {
+        case 2:
+            hwNumBanks = 0;
+            break;
+        case 4:
+            hwNumBanks = 1;
+            break;
+        case 8:
+            hwNumBanks = 2;
+            break;
+        case 16:
+            hwNumBanks = 3;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            hwNumBanks = 0;
+            break;
+    }
+
+    if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR)
+    {
+        bankSwizzle = pIn->surfIndex & (banks - 1);
+    }
+    else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT)
+    {
+        bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)];
+    }
+
+    if (IsMacro3dTiled(pIn->tileMode))
+    {
+        pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1);
+    }
+
+    return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle);
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ExtractBankPipeSwizzle
+*   @brief
+*       Extract bank/pipe swizzle from base256b
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID EgBasedLib::ExtractBankPipeSwizzle(
+    UINT_32         base256b,       ///< [in] input base256b register value
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] 2D tile parameters. Client must provide all data
+    UINT_32*        pBankSwizzle,   ///< [out] bank swizzle
+    UINT_32*        pPipeSwizzle    ///< [out] pipe swizzle
+    ) const
+{
+    UINT_32 bankSwizzle = 0;
+    UINT_32 pipeSwizzle = 0;
+
+    if (base256b != 0)
+    {
+        UINT_32 numPipes        = HwlGetPipes(pTileInfo);
+        UINT_32 bankBits        = QLog2(pTileInfo->banks);
+        UINT_32 pipeBits        = QLog2(numPipes);
+        UINT_32 groupBytes      = m_pipeInterleaveBytes;
+        UINT_32 bankInterleave  = m_bankInterleave;
+
+        pipeSwizzle =
+            (base256b / (groupBytes >> 8)) & ((1<<pipeBits)-1);
+
+        bankSwizzle =
+            (base256b / (groupBytes >> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1);
+    }
+
+    *pPipeSwizzle = pipeSwizzle;
+    *pBankSwizzle = bankSwizzle;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::GetBankPipeSwizzle
+*   @brief
+*       Combine bank/pipe swizzle
+*   @return
+*       Base256b bits (only filled bank/pipe bits)
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::GetBankPipeSwizzle(
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    UINT_64         baseAddr,       ///< [in] base address
+    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
+    ) const
+{
+    UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo));
+    UINT_32 bankInterleaveBits = QLog2(m_bankInterleave);
+    UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
+
+    baseAddr ^= tileSwizzle * m_pipeInterleaveBytes;
+    baseAddr >>= 8;
+
+    return static_cast<UINT_32>(baseAddr);
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeSliceTileSwizzle
+*   @brief
+*       Compute cubemap/3d texture faces/slices tile swizzle
+*   @return
+*       Tile swizzle
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeSliceTileSwizzle(
+    AddrTileMode        tileMode,       ///< [in] Tile mode
+    UINT_32             baseSwizzle,    ///< [in] Base swizzle
+    UINT_32             slice,          ///< [in] Slice index, Cubemap face index, 0 means +X
+    UINT_64             baseAddr,       ///< [in] Base address
+    ADDR_TILEINFO* pTileInfo       ///< [in] Bank structure
+    ) const
+{
+    UINT_32 tileSwizzle = 0;
+
+    if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode
+    {
+        UINT_32 firstSlice = slice / Thickness(tileMode);
+
+        UINT_32 numPipes = HwlGetPipes(pTileInfo);
+        UINT_32 numBanks = pTileInfo->banks;
+
+        UINT_32 pipeRotation;
+        UINT_32 bankRotation;
+
+        UINT_32 bankSwizzle = 0;
+        UINT_32 pipeSwizzle = 0;
+
+        pipeRotation = ComputePipeRotation(tileMode, numPipes);
+        bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes);
+
+        if (baseSwizzle != 0)
+        {
+            ExtractBankPipeSwizzle(baseSwizzle,
+                                   pTileInfo,
+                                   &bankSwizzle,
+                                   &pipeSwizzle);
+        }
+
+        if (pipeRotation == 0) //2D mode
+        {
+            bankSwizzle += firstSlice * bankRotation;
+            bankSwizzle %= numBanks;
+        }
+        else //3D mode
+        {
+            pipeSwizzle += firstSlice * pipeRotation;
+            pipeSwizzle %= numPipes;
+            bankSwizzle += firstSlice * bankRotation / numPipes;
+            bankSwizzle %= numBanks;
+        }
+
+        tileSwizzle = GetBankPipeSwizzle(bankSwizzle,
+                                         pipeSwizzle,
+                                         baseAddr,
+                                         pTileInfo);
+    }
+
+    return tileSwizzle;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeQbStereoRightSwizzle
+*
+*   @brief
+*       Compute right eye swizzle
+*   @return
+*       swizzle
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::HwlComputeQbStereoRightSwizzle(
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo  ///< [in] Surface info, must be valid
+    ) const
+{
+    UINT_32 bankBits    = 0;
+    UINT_32 swizzle     = 0;
+
+    // The assumption is default swizzle for left eye is 0
+    if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo)
+    {
+        bankBits = ComputeBankFromCoord(0, pInfo->height, 0,
+                                        pInfo->tileMode, 0, 0, pInfo->pTileInfo);
+
+        if (bankBits)
+        {
+            HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle);
+        }
+    }
+
+    return swizzle;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeBankFromCoord
+*
+*   @brief
+*       Compute bank number from coordinates
+*   @return
+*       Bank number
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeBankFromCoord(
+    UINT_32         x,              ///< [in] x coordinate
+    UINT_32         y,              ///< [in] y coordinate
+    UINT_32         slice,          ///< [in] slice index
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the
+                                    ///  tile split size, then the pixel will be moved to a separate
+                                    ///  slice. This value equals pixelOffset / tileSplitBytes
+                                    ///  in this case. Otherwise this is 0.
+    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
+    ) const
+{
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    UINT_32 bankBit0 = 0;
+    UINT_32 bankBit1 = 0;
+    UINT_32 bankBit2 = 0;
+    UINT_32 bankBit3 = 0;
+    UINT_32 sliceRotation;
+    UINT_32 tileSplitRotation;
+    UINT_32 bank;
+    UINT_32 numBanks    = pTileInfo->banks;
+    UINT_32 bankWidth   = pTileInfo->bankWidth;
+    UINT_32 bankHeight  = pTileInfo->bankHeight;
+
+    UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes);
+    UINT_32 ty = y / MicroTileHeight / bankHeight;
+
+    UINT_32 x3 = _BIT(tx,0);
+    UINT_32 x4 = _BIT(tx,1);
+    UINT_32 x5 = _BIT(tx,2);
+    UINT_32 x6 = _BIT(tx,3);
+    UINT_32 y3 = _BIT(ty,0);
+    UINT_32 y4 = _BIT(ty,1);
+    UINT_32 y5 = _BIT(ty,2);
+    UINT_32 y6 = _BIT(ty,3);
+
+    switch (numBanks)
+    {
+        case 16:
+            bankBit0 = x3 ^ y6;
+            bankBit1 = x4 ^ y5 ^ y6;
+            bankBit2 = x5 ^ y4;
+            bankBit3 = x6 ^ y3;
+            break;
+        case 8:
+            bankBit0 = x3 ^ y5;
+            bankBit1 = x4 ^ y4 ^ y5;
+            bankBit2 = x5 ^ y3;
+            break;
+        case 4:
+            bankBit0 = x3 ^ y4;
+            bankBit1 = x4 ^ y3;
+            break;
+        case 2:
+            bankBit0 = x3 ^ y3;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3);
+
+    //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0);
+
+    bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo);
+    //
+    // Compute bank rotation for the slice.
+    //
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1:  // fall through
+        case ADDR_TM_2D_TILED_THICK:  // fall through
+        case ADDR_TM_2D_TILED_XTHICK:
+            sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness);
+            break;
+        case ADDR_TM_3D_TILED_THIN1:  // fall through
+        case ADDR_TM_3D_TILED_THICK:  // fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+            sliceRotation =
+                Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes;
+            break;
+        default:
+            sliceRotation =  0;
+            break;
+    }
+
+    //
+    // Compute bank rotation for the tile split slice.
+    //
+    // The sample slice will be non-zero if samples must be split across multiple slices.
+    // This situation arises when the micro tile size multiplied yBit the number of samples exceeds
+    // the split size (set in GB_ADDR_CONFIG).
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: //fall through
+        case ADDR_TM_3D_TILED_THIN1: //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1: //fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
+            tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice;
+            break;
+        default:
+            tileSplitRotation =  0;
+            break;
+    }
+
+    //
+    // Apply bank rotation for the slice and tile split slice.
+    //
+    bank ^= bankSwizzle + sliceRotation;
+    bank ^= tileSplitRotation;
+
+    bank &= (numBanks - 1);
+
+    return bank;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeBankFromAddr
+*
+*   @brief
+*       Compute the bank number from an address
+*   @return
+*       Bank number
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeBankFromAddr(
+    UINT_64 addr,       ///< [in] address
+    UINT_32 numBanks,   ///< [in] number of banks
+    UINT_32 numPipes    ///< [in] number of pipes
+    ) const
+{
+    UINT_32 bank;
+
+    //
+    // The LSBs of the address are arranged as follows:
+    //   bank | bankInterleave | pipe | pipeInterleave
+    //
+    // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and
+    // mask the bank bits.
+    //
+    bank = static_cast<UINT_32>(
+        (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) &
+        (numBanks - 1)
+        );
+
+    return bank;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputePipeRotation
+*
+*   @brief
+*       Compute pipe rotation value
+*   @return
+*       Pipe rotation
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputePipeRotation(
+    AddrTileMode tileMode,  ///< [in] tile mode
+    UINT_32      numPipes   ///< [in] number of pipes
+    ) const
+{
+   UINT_32 rotation;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_3D_TILED_THIN1:        //fall through
+        case ADDR_TM_3D_TILED_THICK:        //fall through
+        case ADDR_TM_3D_TILED_XTHICK:       //fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);
+            break;
+        default:
+            rotation = 0;
+    }
+
+    return rotation;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeBankRotation
+*
+*   @brief
+*       Compute bank rotation value
+*   @return
+*       Bank rotation
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeBankRotation(
+    AddrTileMode tileMode,  ///< [in] tile mode
+    UINT_32      numBanks,  ///< [in] number of banks
+    UINT_32      numPipes   ///< [in] number of pipes
+    ) const
+{
+    UINT_32 rotation;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: // fall through
+        case ADDR_TM_2D_TILED_THICK: // fall through
+        case ADDR_TM_2D_TILED_XTHICK:
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_2D_TILED_THICK:
+            // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank
+            rotation =  numBanks / 2 - 1;
+            break;
+        case ADDR_TM_3D_TILED_THIN1: // fall through
+        case ADDR_TM_3D_TILED_THICK: // fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);    // rotate pipes & banks
+            break;
+        default:
+            rotation = 0;
+    }
+
+    return rotation;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeHtileBytes
+*
+*   @brief
+*       Compute htile size in bytes
+*
+*   @return
+*       Htile size in bytes
+****************************************************************************************************
+*/
+UINT_64 EgBasedLib::ComputeHtileBytes(
+    UINT_32 pitch,        ///< [in] pitch
+    UINT_32 height,       ///< [in] height
+    UINT_32 bpp,          ///< [in] bits per pixel
+    BOOL_32 isLinear,     ///< [in] if it is linear mode
+    UINT_32 numSlices,    ///< [in] number of slices
+    UINT_64* sliceBytes,  ///< [out] bytes per slice
+    UINT_32 baseAlign     ///< [in] base alignments
+    ) const
+{
+    UINT_64 surfBytes;
+
+    const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits);
+
+    *sliceBytes = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp / 64);
+
+    if (m_configFlags.useHtileSliceAlign)
+    {
+        // Align the sliceSize to htilecachelinesize * pipes at first
+        *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes);
+        surfBytes  = *sliceBytes * numSlices;
+    }
+    else
+    {
+        // Align the surfSize to htilecachelinesize * pipes at last
+        surfBytes  = *sliceBytes * numSlices;
+        surfBytes  = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes);
+    }
+
+    return surfBytes;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::DispatchComputeFmaskInfo
+*
+*   @brief
+*       Compute fmask sizes include padded pitch, height, slices, total size in bytes,
+*       meanwhile output suitable tile mode and alignments as well. Results are returned
+*       through output parameters.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::DispatchComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut)  ///< [out] output structure
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_COMPUTE_SURFACE_INFO_INPUT  surfIn     = {0};
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut    = {0};
+
+    // Setup input structure
+    surfIn.tileMode          = pIn->tileMode;
+    surfIn.width             = pIn->pitch;
+    surfIn.height            = pIn->height;
+    surfIn.numSlices         = pIn->numSlices;
+    surfIn.pTileInfo         = pIn->pTileInfo;
+    surfIn.tileType          = ADDR_NON_DISPLAYABLE;
+    surfIn.flags.fmask       = 1;
+
+    // Setup output structure
+    surfOut.pTileInfo       = pOut->pTileInfo;
+
+    // Setup hwl specific fields
+    HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut);
+
+    surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples);
+
+    // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples
+    surfOut.numSamples = surfIn.numSamples;
+
+    retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut);
+
+    // Save bpp field for surface dump support
+    surfOut.bpp = surfIn.bpp;
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->bpp               = surfOut.bpp;
+        pOut->pitch             = surfOut.pitch;
+        pOut->height            = surfOut.height;
+        pOut->numSlices         = surfOut.depth;
+        pOut->fmaskBytes        = surfOut.surfSize;
+        pOut->baseAlign         = surfOut.baseAlign;
+        pOut->pitchAlign        = surfOut.pitchAlign;
+        pOut->heightAlign       = surfOut.heightAlign;
+
+        if (surfOut.depth > 1)
+        {
+            // For fmask, expNumSlices is stored in depth.
+            pOut->sliceSize = surfOut.surfSize / surfOut.depth;
+        }
+        else
+        {
+            pOut->sliceSize = surfOut.surfSize;
+        }
+
+        // Save numSamples field for surface dump support
+        pOut->numSamples        = surfOut.numSamples;
+
+        HwlFmaskPostThunkSurfInfo(&surfOut, pOut);
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlFmaskSurfaceInfo
+*   @brief
+*       Entry of EgBasedLib ComputeFmaskInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_TILEINFO tileInfo = {0};
+
+    // Use internal tile info if pOut does not have a valid pTileInfo
+    if (pOut->pTileInfo == NULL)
+    {
+        pOut->pTileInfo = &tileInfo;
+    }
+
+    retCode = DispatchComputeFmaskInfo(pIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->tileIndex =
+            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+                                  pOut->tileIndex);
+    }
+
+    // Resets pTileInfo to NULL if the internal tile info is used
+    if (pOut->pTileInfo == &tileInfo)
+    {
+        pOut->pTileInfo = NULL;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeFmaskAddrFromCoord
+*   @brief
+*       Entry of EgBasedLib ComputeFmaskAddrFromCoord
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord(
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeFmaskCoordFromAddr
+*   @brief
+*       Entry of EgBasedLib ComputeFmaskCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr(
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeFmaskNumPlanesFromNumSamples
+*
+*   @brief
+*       Compute fmask number of planes from number of samples
+*
+*   @return
+*       Number of planes
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeFmaskNumPlanesFromNumSamples(
+    UINT_32 numSamples)     ///< [in] number of samples
+{
+    UINT_32 numPlanes;
+
+    //
+    // FMASK is stored such that each micro tile is composed of elements containing N bits, where
+    // N is the number of samples.  There is a micro tile for each bit in the FMASK address, and
+    // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially.
+    // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element.
+    // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and
+    // 2 samples.  The FMASK for an 8-sample surface looks like a general surface with 8 bits per
+    // element and 4 samples.  R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces.
+    // This was changed for R8xx to simplify the logic in the CB.
+    //
+    switch (numSamples)
+    {
+        case 2:
+            numPlanes = 1;
+            break;
+        case 4:
+            numPlanes = 2;
+            break;
+        case 8:
+            numPlanes = 4;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            numPlanes = 0;
+            break;
+    }
+    return numPlanes;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::ComputeFmaskResolvedBppFromNumSamples
+*
+*   @brief
+*       Compute resolved fmask effective bpp based on number of samples
+*
+*   @return
+*       bpp
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::ComputeFmaskResolvedBppFromNumSamples(
+    UINT_32 numSamples)     ///< number of samples
+{
+    UINT_32 bpp;
+
+    //
+    // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit
+    // so that the texture unit can read compressed multi-sample color data.
+    // These surfaces store each index value packed per element.
+    // Each element contains at least num_samples * log2(num_samples) bits.
+    // Resolved FMASK surfaces are addressed as follows:
+    // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+    // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+    // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample.
+
+    switch (numSamples)
+    {
+        case 2:
+            bpp = 8;
+            break;
+        case 4:
+            bpp = 8;
+            break;
+        case 8:
+            bpp = 32;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            bpp = 0;
+            break;
+    }
+    return bpp;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::IsTileInfoAllZero
+*
+*   @brief
+*       Return TRUE if all field are zero
+*   @note
+*       Since NULL input is consider to be all zero
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::IsTileInfoAllZero(
+    const ADDR_TILEINFO* pTileInfo)
+{
+    BOOL_32 allZero = TRUE;
+
+    if (pTileInfo)
+    {
+        if ((pTileInfo->banks            != 0)  ||
+            (pTileInfo->bankWidth        != 0)  ||
+            (pTileInfo->bankHeight       != 0)  ||
+            (pTileInfo->macroAspectRatio != 0)  ||
+            (pTileInfo->tileSplitBytes   != 0)  ||
+            (pTileInfo->pipeConfig       != 0)
+            )
+        {
+            allZero = FALSE;
+        }
+    }
+
+    return allZero;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlTileInfoEqual
+*
+*   @brief
+*       Return TRUE if all field are equal
+*   @note
+*       Only takes care of current HWL's data
+****************************************************************************************************
+*/
+BOOL_32 EgBasedLib::HwlTileInfoEqual(
+    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+    ) const
+{
+    BOOL_32 equal = FALSE;
+
+    if (pLeft->banks == pRight->banks           &&
+        pLeft->bankWidth == pRight->bankWidth   &&
+        pLeft->bankHeight == pRight->bankHeight &&
+        pLeft->macroAspectRatio == pRight->macroAspectRatio &&
+        pLeft->tileSplitBytes == pRight->tileSplitBytes)
+    {
+        equal = TRUE;
+    }
+
+    return equal;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlConvertTileInfoToHW
+*   @brief
+*       Entry of EgBasedLib ConvertTileInfoToHW
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode   = ADDR_OK;
+
+    ADDR_TILEINFO *pTileInfoIn  = pIn->pTileInfo;
+    ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo;
+
+    if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL))
+    {
+        if (pIn->reverse == FALSE)
+        {
+            switch (pTileInfoIn->banks)
+            {
+                case 2:
+                    pTileInfoOut->banks = 0;
+                    break;
+                case 4:
+                    pTileInfoOut->banks = 1;
+                    break;
+                case 8:
+                    pTileInfoOut->banks = 2;
+                    break;
+                case 16:
+                    pTileInfoOut->banks = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->banks = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankWidth)
+            {
+                case 1:
+                    pTileInfoOut->bankWidth = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->bankWidth = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->bankWidth = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankWidth = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankHeight)
+            {
+                case 1:
+                    pTileInfoOut->bankHeight = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->bankHeight = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->bankHeight = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankHeight = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->macroAspectRatio)
+            {
+                case 1:
+                    pTileInfoOut->macroAspectRatio = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->macroAspectRatio = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->macroAspectRatio = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->macroAspectRatio = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->tileSplitBytes)
+            {
+                case 64:
+                    pTileInfoOut->tileSplitBytes = 0;
+                    break;
+                case 128:
+                    pTileInfoOut->tileSplitBytes = 1;
+                    break;
+                case 256:
+                    pTileInfoOut->tileSplitBytes = 2;
+                    break;
+                case 512:
+                    pTileInfoOut->tileSplitBytes = 3;
+                    break;
+                case 1024:
+                    pTileInfoOut->tileSplitBytes = 4;
+                    break;
+                case 2048:
+                    pTileInfoOut->tileSplitBytes = 5;
+                    break;
+                case 4096:
+                    pTileInfoOut->tileSplitBytes = 6;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->tileSplitBytes = 0;
+                    break;
+            }
+        }
+        else
+        {
+            switch (pTileInfoIn->banks)
+            {
+                case 0:
+                    pTileInfoOut->banks = 2;
+                    break;
+                case 1:
+                    pTileInfoOut->banks = 4;
+                    break;
+                case 2:
+                    pTileInfoOut->banks = 8;
+                    break;
+                case 3:
+                    pTileInfoOut->banks = 16;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->banks = 2;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankWidth)
+            {
+                case 0:
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->bankWidth = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->bankWidth = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->bankWidth = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankHeight)
+            {
+                case 0:
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->bankHeight = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->bankHeight = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->bankHeight = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->macroAspectRatio)
+            {
+                case 0:
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->macroAspectRatio = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->macroAspectRatio = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->macroAspectRatio = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->tileSplitBytes)
+            {
+                case 0:
+                    pTileInfoOut->tileSplitBytes = 64;
+                    break;
+                case 1:
+                    pTileInfoOut->tileSplitBytes = 128;
+                    break;
+                case 2:
+                    pTileInfoOut->tileSplitBytes = 256;
+                    break;
+                case 3:
+                    pTileInfoOut->tileSplitBytes = 512;
+                    break;
+                case 4:
+                    pTileInfoOut->tileSplitBytes = 1024;
+                    break;
+                case 5:
+                    pTileInfoOut->tileSplitBytes = 2048;
+                    break;
+                case 6:
+                    pTileInfoOut->tileSplitBytes = 4096;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->tileSplitBytes = 64;
+                    break;
+            }
+        }
+
+        if (pTileInfoIn != pTileInfoOut)
+        {
+            pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig;
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeSurfaceInfo
+*   @brief
+*       Entry of EgBasedLib ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pIn->numSamples < pIn->numFrags)
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    ADDR_TILEINFO tileInfo = {0};
+
+    if (retCode == ADDR_OK)
+    {
+        // Uses internal tile info if pOut does not have a valid pTileInfo
+        if (pOut->pTileInfo == NULL)
+        {
+            pOut->pTileInfo = &tileInfo;
+        }
+
+        if (DispatchComputeSurfaceInfo(pIn, pOut) == FALSE)
+        {
+            retCode = ADDR_INVALIDPARAMS;
+        }
+
+        // In case client uses tile info as input and would like to calculate a correct size and
+        // alignment together with tile info as output when the tile info is not suppose to have any
+        // matching indices in tile mode tables.
+        if (pIn->flags.skipIndicesOutput == FALSE)
+        {
+            // Returns an index
+            pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo,
+                                                    pOut->tileMode,
+                                                    pOut->tileType,
+                                                    pOut->tileIndex);
+
+            if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid))
+            {
+                pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex,
+                                                                pIn->flags,
+                                                                pIn->bpp,
+                                                                pIn->numSamples,
+                                                                pOut->pTileInfo);
+            }
+        }
+
+        // Resets pTileInfo to NULL if the internal tile info is used
+        if (pOut->pTileInfo == &tileInfo)
+        {
+#if DEBUG
+            // Client does not pass in a valid pTileInfo
+            if (IsMacroTiled(pOut->tileMode))
+            {
+                // If a valid index is returned, then no pTileInfo is okay
+                ADDR_ASSERT((m_configFlags.useTileIndex == FALSE) ||
+                            (pOut->tileIndex != TileIndexInvalid));
+
+                if (IsTileInfoAllZero(pIn->pTileInfo) == FALSE)
+                {
+                    // The initial value of pIn->pTileInfo is copied to tileInfo
+                    // We do not expect any of these value to be changed nor any 0 of inputs
+                    ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks);
+                    ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth);
+                    ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight);
+                    ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio);
+                    ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes);
+                }
+            }
+#endif
+            pOut->pTileInfo = NULL;
+        }
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeSurfaceAddrFromCoord
+*   @brief
+*       Entry of EgBasedLib ComputeSurfaceAddrFromCoord
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (
+#if !ALT_TEST // Overflow test needs this out-of-boundary coord
+        (pIn->x > pIn->pitch)   ||
+        (pIn->y > pIn->height)  ||
+#endif
+        (pIn->numSamples > m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut);
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeSurfaceCoordFromAddr
+*   @brief
+*       Entry of EgBasedLib ComputeSurfaceCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if ((pIn->bitPosition >= 8) ||
+        (pIn->numSamples > m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        DispatchComputeSurfaceCoordFromAddr(pIn, pOut);
+    }
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeSliceTileSwizzle
+*   @brief
+*       Entry of EgBasedLib ComputeSurfaceCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedLib::HwlComputeSliceTileSwizzle(
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0))
+    {
+
+        pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode,
+                                                    pIn->baseSwizzle,
+                                                    pIn->slice,
+                                                    pIn->baseAddr,
+                                                    pIn->pTileInfo);
+    }
+    else
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeHtileBpp
+*
+*   @brief
+*       Compute htile bpp
+*
+*   @return
+*       Htile bpp
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::HwlComputeHtileBpp(
+    BOOL_32 isWidth8,   ///< [in] TRUE if block width is 8
+    BOOL_32 isHeight8   ///< [in] TRUE if block height is 8
+    ) const
+{
+    // only support 8x8 mode
+    ADDR_ASSERT(isWidth8 && isHeight8);
+    return 32;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlComputeHtileBaseAlign
+*
+*   @brief
+*       Compute htile base alignment
+*
+*   @return
+*       Htile base alignment
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::HwlComputeHtileBaseAlign(
+    BOOL_32         isTcCompatible, ///< [in] if TC compatible
+    BOOL_32         isLinear,       ///< [in] if it is linear mode
+    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
+    ) const
+{
+    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+    if (isTcCompatible)
+    {
+        ADDR_ASSERT(pTileInfo != NULL);
+        if (pTileInfo)
+        {
+            baseAlign *= pTileInfo->banks;
+        }
+    }
+
+    return baseAlign;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlGetPitchAlignmentMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface pitch alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       pitch alignment
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::HwlGetPitchAlignmentMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             numSamples         ///< [in] number of samples
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    UINT_32 pixelsPerMicroTile;
+    UINT_32 pixelsPerPipeInterleave;
+    UINT_32 microTilesPerPipeInterleave;
+
+    //
+    // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for
+    // stencil buffer since pitch alignment is related to bpp.
+    // For a depth only buffer do not set this.
+    //
+    // Note: this actually does not work for mipmap but mipmap depth texture is not really
+    // sampled with mipmap.
+    //
+    if (flags.depth && (flags.noStencil == FALSE))
+    {
+        bpp = 8;
+    }
+
+    pixelsPerMicroTile = MicroTilePixels * microTileThickness;
+    pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples);
+    microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile;
+
+    pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth);
+
+    return pitchAlign;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlGetSizeAdjustmentMicroTiled
+*
+*   @brief
+*       Adjust 1D tiled surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+****************************************************************************************************
+*/
+UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled(
+    UINT_32             thickness,      ///< [in] thickness
+    UINT_32             bpp,            ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
+    UINT_32*            pHeight         ///< [in,out] pointer to height
+    ) const
+{
+    UINT_64 logicalSliceSize;
+    MAYBE_UNUSED UINT_64 physicalSliceSize;
+
+    UINT_32 pitch   = *pPitch;
+    UINT_32 height  = *pHeight;
+
+    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+    // Physical slice: multiplied by thickness
+    physicalSliceSize =  logicalSliceSize * thickness;
+
+    //
+    // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes
+    //
+    ADDR_ASSERT((physicalSliceSize % baseAlign) == 0);
+
+    return logicalSliceSize;
+}
+
+/**
+****************************************************************************************************
+*   EgBasedLib::HwlStereoCheckRightOffsetPadding
+*
+*   @brief
+*       check if the height needs extra padding for stereo right eye offset, to avoid swizzling
+*
+*   @return
+*       TRUE is the extra padding is needed
+*
+****************************************************************************************************
+*/
+UINT_32 EgBasedLib::HwlStereoCheckRightOffsetPadding(
+    ADDR_TILEINFO* pTileInfo    ///< Tiling info
+    ) const
+{
+    UINT_32 stereoHeightAlign = 0;
+
+    if (pTileInfo->macroAspectRatio > 2)
+    {
+        // Since 3D rendering treats right eye surface starting from y == "eye height" while
+        // display engine treats it to be 0, so the bank bits may be different.
+        // Additional padding in height is required to make sure it's possible
+        // to achieve synonym by adjusting bank swizzle of right eye surface.
+
+        static const UINT_32 StereoAspectRatio = 2;
+        stereoHeightAlign = pTileInfo->banks *
+            pTileInfo->bankHeight *
+            MicroTileHeight /
+            StereoAspectRatio;
+    }
+
+    return stereoHeightAlign;
+}
+
+} // V1
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,430 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  egbaddrlib.h
+* @brief Contains the EgBasedLib class definition.
+****************************************************************************************************
+*/
+
+#ifndef __EG_BASED_ADDR_LIB_H__
+#define __EG_BASED_ADDR_LIB_H__
+
+#include "addrlib1.h"
+
+namespace Addr
+{
+namespace V1
+{
+/// Structures for functions
+struct CoordFromBankPipe
+{
+    UINT_32 xBits : 3;
+    UINT_32 yBits : 4;
+
+    UINT_32 xBit3 : 1;
+    UINT_32 xBit4 : 1;
+    UINT_32 xBit5 : 1;
+    UINT_32 yBit3 : 1;
+    UINT_32 yBit4 : 1;
+    UINT_32 yBit5 : 1;
+    UINT_32 yBit6 : 1;
+};
+
+/**
+****************************************************************************************************
+* @brief This class is the Evergreen based address library
+* @note  Abstract class
+****************************************************************************************************
+*/
+class EgBasedLib : public Lib
+{
+protected:
+    EgBasedLib(const Client* pClient);
+    virtual ~EgBasedLib();
+
+public:
+
+    /// Surface info functions
+
+    // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
+    //       On input:
+    //       One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
+    //       H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
+    //       It then returns the actual tiling configuration used.
+    //       Other methods' TileInfo must be valid on entry
+    BOOL_32 DispatchComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+protected:
+    // Hwl interface
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
+        UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    virtual UINT_32 HwlComputeHtileBpp(
+        BOOL_32 isWidth8, BOOL_32 isHeight8) const;
+
+    virtual UINT_32 HwlComputeHtileBaseAlign(
+        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const;
+
+    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
+
+    virtual VOID HwlComputePixelCoordFromOffset(
+        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+    /// Return Cmask block max
+    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
+    {
+        return 0x3FFF; // 14 bits, 0n16383
+    }
+
+    // Sub-hwl interface
+    /// Pure virtual function to setup tile info (indices) if client requests to do so
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Pure virtual function to get pitch alignment for linear modes
+    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
+
+    /// Pure virtual function to get size adjustment for linear modes
+    virtual UINT_64 HwlGetSizeAdjustmentLinear(
+        AddrTileMode tileMode,
+        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
+
+    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+        /// Pure virtual function to do extra sanity check
+    virtual BOOL_32 HwlSanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure virtual function to check current level to be the last macro tiled one
+    virtual VOID HwlCheckLastMacroTiledLvl(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Adjusts bank before bank is modified by rotation
+    virtual UINT_32 HwlPreAdjustBank(
+        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO*  pTileInfo) const = 0;
+
+    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const = 0;
+
+    virtual BOOL_32 HwlTileInfoEqual(
+        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const
+    {
+        return TileIndexInvalid;
+    }
+
+    virtual VOID HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
+    {
+    }
+
+    virtual VOID HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
+    {
+    }
+
+    virtual UINT_32 HwlStereoCheckRightOffsetPadding(ADDR_TILEINFO* pTileInfo) const;
+
+    virtual BOOL_32 HwlReduceBankWidthHeight(
+        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 bankHeightAlign, UINT_32 pipes,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    // Protected non-virtual functions
+
+    /// Mip level functions
+    AddrTileMode ComputeSurfaceMipLevelTileMode(
+        AddrTileMode baseTileMode, UINT_32 bpp,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
+        UINT_32 pitchAlign, UINT_32 heightAlign,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    /// Swizzle functions
+    VOID ExtractBankPipeSwizzle(
+        UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
+        UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
+
+    UINT_32 GetBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
+        UINT_64 baseAddr, ADDR_TILEINFO*  pTileInfo) const;
+
+    UINT_32 ComputeSliceTileSwizzle(
+        AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    /// Addressing functions
+    virtual ADDR_E_RETURNCODE ComputeBankEquation(
+        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
+        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    UINT_32 ComputeBankFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice,
+        AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    UINT_32 ComputeBankFromAddr(
+        UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
+
+    UINT_32 ComputePipeRotation(
+        AddrTileMode tileMode, UINT_32 numPipes) const;
+
+    UINT_32 ComputeBankRotation(
+        AddrTileMode tileMode, UINT_32 numBanks,
+        UINT_32 numPipes) const;
+
+    VOID ComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        ADDR_TILEINFO* pTileInfo,
+        CoordFromBankPipe *pOutput) const;
+
+    /// Htile/Cmask functions
+    UINT_64 ComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
+
+    ADDR_E_RETURNCODE ComputeMacroTileEquation(
+        UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType,
+        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
+
+    // Static functions
+    static BOOL_32 IsTileInfoAllZero(const ADDR_TILEINFO* pTileInfo);
+    static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
+    static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
+
+    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
+    {
+    }
+
+private:
+
+    BOOL_32 ComputeSurfaceInfoLinear(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims) const;
+
+    BOOL_32 ComputeSurfaceInfoMicroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims,
+        AddrTileMode expTileMode) const;
+
+    BOOL_32 ComputeSurfaceInfoMacroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims,
+        AddrTileMode expTileMode) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsLinear(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples,
+        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    /// Surface addressing functions
+    UINT_64 DispatchComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    VOID DispatchComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
+        UINT_32* pBitPosition) const;
+
+    UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode,
+        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        ADDR_TILEINFO* pTileInfo,
+        UINT_32* pBitPosition) const;
+
+    VOID ComputeSurfaceCoordFromAddrMacroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        ADDR_TILEINFO* pTileInfo,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+    /// Fmask functions
+    UINT_64 DispatchComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    VOID DispatchComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    // FMASK related methods - private
+    UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+        BOOL_32 resolved, UINT_32* pBitPosition) const;
+
+    VOID ComputeFmaskCoordFromAddrMicroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, BOOL_32 resolved,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+    VOID ComputeFmaskCoordFromAddrMacroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo,
+        BOOL_32 resolved,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+    UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo,
+        BOOL_32 resolved,
+        UINT_32* pBitPosition) const;
+
+    /// Sanity check functions
+    BOOL_32 SanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const;
+
+protected:
+    UINT_32 m_ranks;                ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
+    UINT_32 m_logicalBanks;         ///< Logical banks = m_banks * m_ranks if m_banks != 16
+    UINT_32 m_bankInterleave;       ///< Bank interleave, as a multiple of pipe interleave size
+};
+
+} // V1
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.cpp
--- mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3872 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  siaddrlib.cpp
+* @brief Contains the implementation for the SiLib class.
+****************************************************************************************************
+*/
+
+#include "siaddrlib.h"
+#include "si_gb_reg.h"
+
+#include "amdgpu_asic_addr.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////
+namespace Addr
+{
+
+/**
+****************************************************************************************************
+*   SiHwlInit
+*
+*   @brief
+*       Creates an SiLib object.
+*
+*   @return
+*       Returns an SiLib object pointer.
+****************************************************************************************************
+*/
+Lib* SiHwlInit(const Client* pClient)
+{
+    return V1::SiLib::CreateObj(pClient);
+}
+
+namespace V1
+{
+
+// We don't support MSAA for equation
+const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] =
+{
+    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  0, non-AA compressed depth or any stencil
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  1, 2xAA/4xAA compressed depth with or without stencil
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  2, 8xAA compressed depth with or without stencil
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    //  3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth
+    {TRUE,  TRUE,  TRUE,  FALSE, FALSE},    //  4, 1D depth
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  5, 16 bpp depth PRT (4xMSAA)
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    //  6, 32 bpp depth PRT (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    //  7, 32 bpp depth PRT (4xMSAA)
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  8, Linear
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    //  9, 1D display
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 10, 8 bpp color (displayable)
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 11, 16 bpp color (displayable)
+    {FALSE, FALSE, TRUE,  TRUE,  FALSE},    // 12, 32/64 bpp color (displayable)
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 13, 1D thin
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 14, 8 bpp color non-displayable
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 15, 16 bpp color non-displayable
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 16, 32 bpp color non-displayable
+    {FALSE, FALSE, FALSE, TRUE,  TRUE },    // 17, 64/128 bpp color non-displayable
+    {TRUE,  TRUE,  TRUE,  TRUE,  TRUE },    // 18, 1D THICK
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 19, 2D XTHICK
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 20, 2D THICK
+    {TRUE,  FALSE, FALSE, FALSE, FALSE},    // 21, 8 bpp 2D PRTs (non-MSAA)
+    {FALSE, TRUE,  FALSE, FALSE, FALSE},    // 22, 16 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, TRUE,  FALSE, FALSE},    // 23, 32 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, TRUE,  FALSE},    // 24, 64 bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, TRUE },    // 25, 128bpp 2D PRTs (non-MSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 26, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 27, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 28, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 29, none
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 30, 64bpp 2D PRTs (4xMSAA)
+    {FALSE, FALSE, FALSE, FALSE, FALSE},    // 31, none
+};
+
+/**
+****************************************************************************************************
+*   SiLib::SiLib
+*
+*   @brief
+*       Constructor
+*
+****************************************************************************************************
+*/
+SiLib::SiLib(const Client* pClient)
+    :
+    EgBasedLib(pClient),
+    m_noOfEntries(0),
+    m_numEquations(0)
+{
+    m_class = SI_ADDRLIB;
+    memset(&m_settings, 0, sizeof(m_settings));
+}
+
+/**
+****************************************************************************************************
+*   SiLib::~SiLib
+*
+*   @brief
+*       Destructor
+****************************************************************************************************
+*/
+SiLib::~SiLib()
+{
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetPipes
+*
+*   @brief
+*       Get number pipes
+*   @return
+*       num pipes
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlGetPipes(
+    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
+    ) const
+{
+    UINT_32 numPipes;
+
+    if (pTileInfo)
+    {
+        numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        numPipes = m_pipes; // Suppose we should still have a global pipes
+    }
+
+    return numPipes;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::GetPipePerSurf
+*   @brief
+*       get pipe num base on inputing tileinfo->pipeconfig
+*   @return
+*       pipe number
+****************************************************************************************************
+*/
+UINT_32 SiLib::GetPipePerSurf(
+    AddrPipeCfg pipeConfig   ///< [in] pipe config
+    ) const
+{
+    UINT_32 numPipes = 0;
+
+    switch (pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            numPipes = 2;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+        case ADDR_PIPECFG_P4_16x16:
+        case ADDR_PIPECFG_P4_16x32:
+        case ADDR_PIPECFG_P4_32x32:
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+        case ADDR_PIPECFG_P8_16x32_8x16:
+        case ADDR_PIPECFG_P8_32x32_8x16:
+        case ADDR_PIPECFG_P8_16x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_16x32:
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            numPipes = 16;
+            break;
+        default:
+            ADDR_ASSERT(!"Invalid pipe config");
+            numPipes = m_pipes;
+    }
+    return numPipes;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::ComputeBankEquation
+*
+*   @brief
+*       Compute bank equation
+*
+*   @return
+*       If equation can be computed
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::ComputeBankEquation(
+    UINT_32         log2BytesPP,    ///< [in] log2 of bytes per pixel
+    UINT_32         threshX,        ///< [in] threshold for x channel
+    UINT_32         threshY,        ///< [in] threshold for y channel
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
+    ADDR_EQUATION*  pEquation       ///< [out] bank equation
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth);
+    UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight);
+
+    ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart);
+    ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1);
+    ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2);
+    ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3);
+    ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart);
+    ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1);
+    ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2);
+    ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3);
+
+    x3.value = (threshX > bankXStart)     ? x3.value : 0;
+    x4.value = (threshX > bankXStart + 1) ? x4.value : 0;
+    x5.value = (threshX > bankXStart + 2) ? x5.value : 0;
+    x6.value = (threshX > bankXStart + 3) ? x6.value : 0;
+    y3.value = (threshY > bankYStart)     ? y3.value : 0;
+    y4.value = (threshY > bankYStart + 1) ? y4.value : 0;
+    y5.value = (threshY > bankYStart + 2) ? y5.value : 0;
+    y6.value = (threshY > bankYStart + 3) ? y6.value : 0;
+
+    switch (pTileInfo->banks)
+    {
+        case 16:
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y6;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y5;
+                pEquation->xor1[1] = y6;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = y5;
+                pEquation->xor1[1] = y6;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 4)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = y6;
+                pEquation->addr[2] = y4;
+                pEquation->xor1[2] = x5;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else if (pTileInfo->macroAspectRatio == 8)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y6;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = y6;
+                pEquation->addr[2] = x5;
+                pEquation->xor1[2] = y4;
+                pEquation->addr[3] = y3;
+                pEquation->xor1[3] = x6;
+            }
+            else
+            {
+                ADDR_ASSERT_ALWAYS();
+            }
+            pEquation->numBits = 4;
+            break;
+        case 8:
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y5;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y5;
+                pEquation->addr[1] = y4;
+                pEquation->xor1[1] = y5;
+                pEquation->xor2[1] = x4;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else if (pTileInfo->macroAspectRatio == 4)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y5;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y4;
+                pEquation->xor2[1] = y5;
+                pEquation->addr[2] = y3;
+                pEquation->xor1[2] = x5;
+            }
+            else
+            {
+                ADDR_ASSERT_ALWAYS();
+            }
+            pEquation->numBits = 3;
+            break;
+        case 4:
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y4;
+                pEquation->xor1[0] = x3;
+                pEquation->addr[1] = y3;
+                pEquation->xor1[1] = x4;
+            }
+            else if (pTileInfo->macroAspectRatio == 2)
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y4;
+                pEquation->addr[1] = y3;
+                pEquation->xor1[1] = x4;
+            }
+            else
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y4;
+                pEquation->addr[1] = x4;
+                pEquation->xor1[1] = y3;
+            }
+            pEquation->numBits = 2;
+            break;
+        case 2:
+            if (pTileInfo->macroAspectRatio == 1)
+            {
+                pEquation->addr[0] = y3;
+                pEquation->xor1[0] = x3;
+            }
+            else
+            {
+                pEquation->addr[0] = x3;
+                pEquation->xor1[0] = y3;
+            }
+            pEquation->numBits = 1;
+            break;
+        default:
+            pEquation->numBits = 0;
+            retCode = ADDR_NOTSUPPORTED;
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    for (UINT_32 i = 0; i < pEquation->numBits; i++)
+    {
+        if (pEquation->addr[i].value == 0)
+        {
+            if (pEquation->xor1[i].value == 0)
+            {
+                // 00X -> X00
+                pEquation->addr[i].value = pEquation->xor2[i].value;
+                pEquation->xor2[i].value = 0;
+            }
+            else
+            {
+                pEquation->addr[i].value = pEquation->xor1[i].value;
+
+                if (pEquation->xor2[i].value != 0)
+                {
+                    // 0XY -> XY0
+                    pEquation->xor1[i].value = pEquation->xor2[i].value;
+                    pEquation->xor2[i].value = 0;
+                }
+                else
+                {
+                    // 0X0 -> X00
+                    pEquation->xor1[i].value = 0;
+                }
+            }
+        }
+        else if (pEquation->xor1[i].value == 0)
+        {
+            if (pEquation->xor2[i].value != 0)
+            {
+                // X0Y -> XY0
+                pEquation->xor1[i].value = pEquation->xor2[i].value;
+                pEquation->xor2[i].value = 0;
+            }
+        }
+    }
+
+    if ((pTileInfo->bankWidth == 1) &&
+        ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+         (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)))
+    {
+        retCode = ADDR_NOTSUPPORTED;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::ComputePipeEquation
+*
+*   @brief
+*       Compute pipe equation
+*
+*   @return
+*       If equation can be computed
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::ComputePipeEquation(
+    UINT_32        log2BytesPP, ///< [in] Log2 of bytes per pixel
+    UINT_32        threshX,     ///< [in] Threshold for X channel
+    UINT_32        threshY,     ///< [in] Threshold for Y channel
+    ADDR_TILEINFO* pTileInfo,   ///< [in] Tile info
+    ADDR_EQUATION* pEquation    ///< [out] Pipe configure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_CHANNEL_SETTING* pAddr = pEquation->addr;
+    ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1;
+    ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2;
+
+    ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP);
+    ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP);
+    ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP);
+    ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP);
+    ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3);
+    ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4);
+    ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5);
+    ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6);
+
+    x3.value = (threshX > 3) ? x3.value : 0;
+    x4.value = (threshX > 4) ? x4.value : 0;
+    x5.value = (threshX > 5) ? x5.value : 0;
+    x6.value = (threshX > 6) ? x6.value : 0;
+    y3.value = (threshY > 3) ? y3.value : 0;
+    y4.value = (threshY > 4) ? y4.value : 0;
+    y5.value = (threshY > 5) ? y5.value : 0;
+    y6.value = (threshY > 6) ? y6.value : 0;
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pEquation->numBits = 1;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            pAddr[0] = x4;
+            pXor1[0] = y3;
+            pAddr[1] = x3;
+            pXor1[1] = y4;
+            pEquation->numBits = 2;
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x4;
+            pXor1[1] = y4;
+            pEquation->numBits = 2;
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x4;
+            pXor1[1] = y5;
+            pEquation->numBits = 2;
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x5;
+            pAddr[1] = x5;
+            pXor1[1] = y5;
+            pEquation->numBits = 2;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            pAddr[0] = x4;
+            pXor1[0] = y3;
+            pXor2[0] = x5;
+            pAddr[1] = x3;
+            pXor1[1] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            pAddr[0] = x4;
+            pXor1[0] = y3;
+            pXor2[0] = x5;
+            pAddr[1] = x3;
+            pXor1[1] = y4;
+            pAddr[2] = x4;
+            pXor1[2] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x5;
+            pXor1[1] = y4;
+            pAddr[2] = x4;
+            pXor1[2] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            pAddr[0] = x4;
+            pXor1[0] = y3;
+            pXor2[0] = x5;
+            pAddr[1] = x3;
+            pXor1[1] = y4;
+            pAddr[2] = x5;
+            pXor1[2] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x4;
+            pXor1[1] = y4;
+            pAddr[2] = x5;
+            pXor1[2] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x4;
+            pXor1[1] = y6;
+            pAddr[2] = x5;
+            pXor1[2] = y5;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x5;
+            pAddr[1] = x6;
+            pXor1[1] = y5;
+            pAddr[2] = x5;
+            pXor1[2] = y6;
+            pEquation->numBits = 3;
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            pAddr[0] = x4;
+            pXor1[0] = y3;
+            pAddr[1] = x3;
+            pXor1[1] = y4;
+            pAddr[2] = x5;
+            pXor1[2] = y6;
+            pAddr[3] = x6;
+            pXor1[3] = y5;
+            pEquation->numBits = 4;
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            pAddr[0] = x3;
+            pXor1[0] = y3;
+            pXor2[0] = x4;
+            pAddr[1] = x4;
+            pXor1[1] = y4;
+            pAddr[2] = x5;
+            pXor1[2] = y6;
+            pAddr[3] = x6;
+            pXor1[3] = y5;
+            pEquation->numBits = 4;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            pEquation->numBits = 0;
+            retCode = ADDR_NOTSUPPORTED;
+            break;
+    }
+
+    if (m_settings.isVegaM && (pEquation->numBits == 4))
+    {
+        ADDR_CHANNEL_SETTING addeMsb = pAddr[0];
+        ADDR_CHANNEL_SETTING xor1Msb = pXor1[0];
+        ADDR_CHANNEL_SETTING xor2Msb = pXor2[0];
+
+        pAddr[0] = pAddr[1];
+        pXor1[0] = pXor1[1];
+        pXor2[0] = pXor2[1];
+
+        pAddr[1] = pAddr[2];
+        pXor1[1] = pXor1[2];
+        pXor2[1] = pXor2[2];
+
+        pAddr[2] = pAddr[3];
+        pXor1[2] = pXor1[3];
+        pXor2[2] = pXor2[3];
+
+        pAddr[3] = addeMsb;
+        pXor1[3] = xor1Msb;
+        pXor2[3] = xor2Msb;
+    }
+
+    for (UINT_32 i = 0; i < pEquation->numBits; i++)
+    {
+        if (pAddr[i].value == 0)
+        {
+            if (pXor1[i].value == 0)
+            {
+                pAddr[i].value = pXor2[i].value;
+            }
+            else
+            {
+                pAddr[i].value = pXor1[i].value;
+                pXor1[i].value = 0;
+            }
+        }
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::ComputePipeFromCoord
+*
+*   @brief
+*       Compute pipe number from coordinates
+*   @return
+*       Pipe number
+****************************************************************************************************
+*/
+UINT_32 SiLib::ComputePipeFromCoord(
+    UINT_32         x,              ///< [in] x coordinate
+    UINT_32         y,              ///< [in] y coordinate
+    UINT_32         slice,          ///< [in] slice index
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    BOOL_32         ignoreSE,       ///< [in] TRUE if shader engines are ignored
+    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
+    ) const
+{
+    UINT_32 pipe;
+    UINT_32 pipeBit0 = 0;
+    UINT_32 pipeBit1 = 0;
+    UINT_32 pipeBit2 = 0;
+    UINT_32 pipeBit3 = 0;
+    UINT_32 sliceRotation;
+    UINT_32 numPipes = 0;
+
+    UINT_32 tx = x / MicroTileWidth;
+    UINT_32 ty = y / MicroTileHeight;
+    UINT_32 x3 = _BIT(tx,0);
+    UINT_32 x4 = _BIT(tx,1);
+    UINT_32 x5 = _BIT(tx,2);
+    UINT_32 x6 = _BIT(tx,3);
+    UINT_32 y3 = _BIT(ty,0);
+    UINT_32 y4 = _BIT(ty,1);
+    UINT_32 y5 = _BIT(ty,2);
+    UINT_32 y6 = _BIT(ty,3);
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            pipeBit0 = x3 ^ y3;
+            numPipes = 2;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            pipeBit0 = x4 ^ y3;
+            pipeBit1 = x3 ^ y4;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y5;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            pipeBit0 = x3 ^ y3 ^ x5;
+            pipeBit1 = x5 ^ y5;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x4 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x5 ^ y4;
+            pipeBit2 = x4 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y6;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            pipeBit0 = x3 ^ y3 ^ x5;
+            pipeBit1 = x6 ^ y5;
+            pipeBit2 = x5 ^ y6;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            pipeBit0 = x4 ^ y3;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x5 ^ y6;
+            pipeBit3 = x6 ^ y5;
+            numPipes = 16;
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            pipeBit2 = x5 ^ y6;
+            pipeBit3 = x6 ^ y5;
+            numPipes = 16;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    if (m_settings.isVegaM && (numPipes == 16))
+    {
+        UINT_32 pipeMsb = pipeBit0;
+        pipeBit0 = pipeBit1;
+        pipeBit1 = pipeBit2;
+        pipeBit2 = pipeBit3;
+        pipeBit3 = pipeMsb;
+    }
+
+    pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3);
+
+    UINT_32 microTileThickness = Thickness(tileMode);
+
+    //
+    // Apply pipe rotation for the slice.
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_3D_TILED_THIN1:    //fall through thin
+        case ADDR_TM_3D_TILED_THICK:    //fall through thick
+        case ADDR_TM_3D_TILED_XTHICK:
+            sliceRotation =
+                Max(1, static_cast<INT_32>(numPipes / 2) - 1) * (slice / microTileThickness);
+            break;
+        default:
+            sliceRotation = 0;
+            break;
+    }
+    pipeSwizzle += sliceRotation;
+    pipeSwizzle &= (numPipes - 1);
+
+    pipe = pipe ^ pipeSwizzle;
+
+    return pipe;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::ComputeTileCoordFromPipeAndElemIdx
+*
+*   @brief
+*       Compute (x,y) of a tile within a macro tile from address
+*   @return
+*       Pipe number
+****************************************************************************************************
+*/
+VOID SiLib::ComputeTileCoordFromPipeAndElemIdx(
+    UINT_32         elemIdx,          ///< [in] per pipe element index within a macro tile
+    UINT_32         pipe,             ///< [in] pipe index
+    AddrPipeCfg     pipeCfg,          ///< [in] pipe config
+    UINT_32         pitchInMacroTile, ///< [in] surface pitch in macro tile
+    UINT_32         x,                ///< [in] x coordinate of the (0,0) tile in a macro tile
+    UINT_32         y,                ///< [in] y coordinate of the (0,0) tile in a macro tile
+    UINT_32*        pX,               ///< [out] x coordinate
+    UINT_32*        pY                ///< [out] y coordinate
+    ) const
+{
+    UINT_32 pipebit0 = _BIT(pipe,0);
+    UINT_32 pipebit1 = _BIT(pipe,1);
+    UINT_32 pipebit2 = _BIT(pipe,2);
+    UINT_32 pipebit3 = _BIT(pipe,3);
+    UINT_32 elemIdx0 = _BIT(elemIdx,0);
+    UINT_32 elemIdx1 = _BIT(elemIdx,1);
+    UINT_32 elemIdx2 = _BIT(elemIdx,2);
+    UINT_32 x3 = 0;
+    UINT_32 x4 = 0;
+    UINT_32 x5 = 0;
+    UINT_32 x6 = 0;
+    UINT_32 y3 = 0;
+    UINT_32 y4 = 0;
+    UINT_32 y5 = 0;
+    UINT_32 y6 = 0;
+
+    switch(pipeCfg)
+    {
+        case ADDR_PIPECFG_P2:
+            x4 = elemIdx2;
+            y4 = elemIdx1 ^ x4;
+            y3 = elemIdx0 ^ x4;
+            x3 = pipebit0 ^ y3;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            y3 = pipebit0 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            y4 = pipebit1 ^ x4;
+            x3 = pipebit0 ^ y3 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            x3 = elemIdx0 ^ pipebit0;
+            y5 = _BIT(y,5);
+            x4 = pipebit1 ^ y5;
+            y3 = pipebit0 ^ x3 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            x4 = elemIdx2;
+            y3 = elemIdx0 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                x5 = pipebit1 ^ y5;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                x5 = _BIT(x,5);
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            x4 = elemIdx0;
+            y5 = _BIT(y,5);
+            x5 = _BIT(x,5);
+            x3 = pipebit1 ^ y5;
+            y4 = pipebit2 ^ x4;
+            y3 = pipebit0 ^ x5 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            x3 = elemIdx0;
+            y4 = pipebit1 ^ x3;
+            y5 = _BIT(y,5);
+            x5 = _BIT(x,5);
+            x4 = pipebit2 ^ y5;
+            y3 = pipebit0 ^ x4 ^ x5;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            if((pitchInMacroTile % 2) == 0)
+            {  //even
+                y5 = _BIT(y,5);
+                x5 = _BIT(x,5);
+                x5 = pipebit2 ^ y5;
+                y3 = pipebit0 ^ x4 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {  //odd
+                x5 = _BIT(x,5);
+                y3 = pipebit0 ^ x4 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            x3 = elemIdx0;
+            x5 = _BIT(x,5);
+            y5 = _BIT(y,5);
+            x4 = pipebit2 ^ y5;
+            y4 = pipebit1 ^ x5;
+            y3 = pipebit0 ^ x3 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            x3 = y3^x4^pipebit0;
+            y4 = pipebit1 ^ x4;
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                x5 = pipebit2 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x4 = pipebit1 ^ y6;
+                y3 = elemIdx0 ^ x4;
+                y4 = elemIdx1 ^ x4;
+                x3 = pipebit0 ^ y3 ^ x4;
+                x5 = pipebit2 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                y6 = _BIT(y,6);
+                x4 = pipebit1 ^ y6;
+                y3 = elemIdx0 ^ x4;
+                y4 = elemIdx1 ^ x4;
+                x3 = pipebit0 ^ y3 ^ x4;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            x4 = elemIdx2;
+            y3 = elemIdx0 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit1 ^ y5;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5, x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            y3 = pipebit0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit3 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5,x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            y4 = pipebit1 ^ x4;
+            x3 = pipebit0 ^ y3 ^ x4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit3 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5, x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::TileCoordToMaskElementIndex
+*
+*   @brief
+*       Compute element index from coordinates in tiles
+*   @return
+*       Element index
+****************************************************************************************************
+*/
+UINT_32 SiLib::TileCoordToMaskElementIndex(
+    UINT_32         tx,                 ///< [in] x coord, in Tiles
+    UINT_32         ty,                 ///< [in] y coord, in Tiles
+    AddrPipeCfg     pipeConfig,         ///< [in] pipe config
+    UINT_32*        macroShift,         ///< [out] macro shift
+    UINT_32*        elemIdxBits         ///< [out] tile offset bits
+    ) const
+{
+    UINT_32 elemIdx = 0;
+    UINT_32 elemIdx0, elemIdx1, elemIdx2;
+    UINT_32 tx0, tx1;
+    UINT_32 ty0, ty1;
+
+    tx0 = _BIT(tx,0);
+    tx1 = _BIT(tx,1);
+    ty0 = _BIT(ty,0);
+    ty1 = _BIT(ty,1);
+
+    switch(pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            *macroShift = 3;
+            *elemIdxBits =3;
+            elemIdx2 = tx1;
+            elemIdx1 = tx1 ^ ty1;
+            elemIdx0 = tx1 ^ ty0;
+            elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx1 = tx1;
+            elemIdx0 = tx1 ^ ty1;
+            elemIdx = Bits2Number(2,elemIdx1,elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            *macroShift = 2;
+            *elemIdxBits =3;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx2 = tx1;
+            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx1;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx0;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx1 = tx1;
+            elemIdx0 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx0;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx0 =  tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            *macroShift = 1;
+            *elemIdxBits =3;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx2 = tx1;
+            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            *macroShift = 0;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty1;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            *macroShift = 0;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    return elemIdx;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+****************************************************************************************************
+*/
+VOID SiLib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(pTileInfo != NULL);
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+
+    /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles
+    /// but for P8_32x64_32x32, it must be padded out to 8 tiles
+    /// Actually there are more pipe configs which need 8-tile padding but SI family
+    /// has a bug which is fixed in CI family
+    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16))
+    {
+        macroWidth  = 8*MicroTileWidth;
+        macroHeight = 8*MicroTileHeight;
+    }
+    else
+    {
+        macroWidth  = 4*MicroTileWidth;
+        macroHeight = 4*MicroTileHeight;
+    }
+
+    *pMacroWidth    = macroWidth;
+    *pMacroHeight   = macroHeight;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeHtileBytes
+*
+*   @brief
+*       Compute htile size in bytes
+*
+*   @return
+*       Htile size in bytes
+****************************************************************************************************
+*/
+UINT_64 SiLib::HwlComputeHtileBytes(
+    UINT_32     pitch,          ///< [in] pitch
+    UINT_32     height,         ///< [in] height
+    UINT_32     bpp,            ///< [in] bits per pixel
+    BOOL_32     isLinear,       ///< [in] if it is linear mode
+    UINT_32     numSlices,      ///< [in] number of slices
+    UINT_64*    pSliceBytes,    ///< [out] bytes per slice
+    UINT_32     baseAlign       ///< [in] base alignments
+    ) const
+{
+    return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign);
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeXmaskAddrFromCoord
+*
+*   @brief
+*       Compute address from coordinates for htile/cmask
+*   @return
+*       Byte address
+****************************************************************************************************
+*/
+UINT_64 SiLib::HwlComputeXmaskAddrFromCoord(
+    UINT_32        pitch,          ///< [in] pitch
+    UINT_32        height,         ///< [in] height
+    UINT_32        x,              ///< [in] x coord
+    UINT_32        y,              ///< [in] y coord
+    UINT_32        slice,          ///< [in] slice/depth index
+    UINT_32        numSlices,      ///< [in] number of slices
+    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
+    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
+    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    UINT_32 tx = x / MicroTileWidth;
+    UINT_32 ty = y / MicroTileHeight;
+    UINT_32 newPitch;
+    UINT_32 newHeight;
+    UINT_64 totalBytes;
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_64 pSliceBytes;
+    UINT_32 pBaseAlign;
+    UINT_32 tileNumPerPipe;
+    UINT_32 elemBits;
+
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 256;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroWidth,
+                         &macroHeight);
+        elemBits = CmaskElemBits;
+    }
+    else //HTile
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 512;
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         TRUE,
+                         TRUE,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroWidth,
+                         &macroHeight,
+                         &pSliceBytes,
+                         &pBaseAlign);
+        elemBits = 32;
+    }
+
+    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+    const UINT_32 heightInTile = newHeight / MicroTileWidth;
+    UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies.
+    UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies.
+    UINT_32 microX;
+    UINT_32 microY;
+    UINT_64 microOffset;
+    UINT_32 microShift;
+    UINT_64 totalOffset;
+    UINT_32 elemIdxBits;
+    UINT_32 elemIdx =
+        TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, &microShift, &elemIdxBits);
+
+    UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+    if (isLinear)
+    {   //linear addressing
+        // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full
+        // slice memory foot print instead of divided by numPipes.
+        microX = tx / 4; // Macro Tile is 4x4
+        microY = ty / 4 ;
+        microNumber = static_cast<UINT_64>(microX + microY * (pitchInTile / 4)) << microShift;
+
+        UINT_32 sliceBits = pitchInTile * heightInTile;
+
+        // do htile single slice alignment if the flag is true
+        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
+        {
+            sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits);
+        }
+        macroOffset = slice * (sliceBits / numPipes) * elemBits ;
+    }
+    else
+    {   //tiled addressing
+        const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles
+        const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight;
+        const UINT_32 pitchInCL = pitchInTile / macroWidthInTile;
+        const UINT_32 heightInCL = heightInTile / macroHeightInTile;
+
+        const UINT_32 macroX = x / macroWidth;
+        const UINT_32 macroY = y / macroHeight;
+        const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL;
+
+        // Per pipe starting offset of the cache line in which this tile lies.
+        microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4
+        microY = (y % macroHeight) / MicroTileHeight / 4 ;
+        microNumber = static_cast<UINT_64>(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift;
+
+        macroOffset = macroNumber * tileNumPerPipe * elemBits;
+    }
+
+    if(elemIdxBits == microShift)
+    {
+        microNumber += elemIdx;
+    }
+    else
+    {
+        microNumber >>= elemIdxBits;
+        microNumber <<= elemIdxBits;
+        microNumber += elemIdx;
+    }
+
+    microOffset = elemBits * microNumber;
+    totalOffset = microOffset + macroOffset;
+
+    UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo);
+    UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) +
+                   pipe * (m_pipeInterleaveBytes * 8) +
+                   totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes;
+    *pBitPosition = static_cast<UINT_32>(addrInBits) % 8;
+    UINT_64 addr = addrInBits / 8;
+
+    return addr;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeXmaskCoordFromAddr
+*
+*   @brief
+*       Compute the coord from an address of a cmask/htile
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This method is reused by htile, so rename to Xmask
+****************************************************************************************************
+*/
+VOID SiLib::HwlComputeXmaskCoordFromAddr(
+    UINT_64         addr,           ///< [in] address
+    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32         pitch,          ///< [in] pitch
+    UINT_32         height,         ///< [in] height
+    UINT_32         numSlices,      ///< [in] number of slices
+    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
+    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32         isWidth8,       ///< [in] Not used by SI
+    BOOL_32         isHeight8,      ///< [in] Not used by SI
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
+    UINT_32*        pX,             ///< [out] x coord
+    UINT_32*        pY,             ///< [out] y coord
+    UINT_32*        pSlice          ///< [out] slice index
+    ) const
+{
+    UINT_32 newPitch;
+    UINT_32 newHeight;
+    UINT_64 totalBytes;
+    UINT_32 clWidth;
+    UINT_32 clHeight;
+    UINT_32 tileNumPerPipe;
+    UINT_64 sliceBytes;
+
+    *pX = 0;
+    *pY = 0;
+    *pSlice = 0;
+
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 256;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &clWidth,
+                         &clHeight);
+    }
+    else //HTile
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 512;
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         TRUE,
+                         TRUE,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &clWidth,
+                         &clHeight,
+                         &sliceBytes);
+    }
+
+    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+    const UINT_32 heightInTile = newHeight / MicroTileWidth;
+    const UINT_32 pitchInMacroTile = pitchInTile / 4;
+    UINT_32 macroShift;
+    UINT_32 elemIdxBits;
+    // get macroShift and elemIdxBits
+    TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, &macroShift, &elemIdxBits);
+
+    const UINT_32 numPipes = HwlGetPipes(pTileInfo);
+    const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes);
+    // per pipe
+    UINT_64 localOffset = (addr % m_pipeInterleaveBytes) +
+        (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes;
+
+    UINT_32 tileIndex;
+    if (factor == 2) //CMASK
+    {
+        tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0));
+    }
+    else
+    {
+        tileIndex = (UINT_32)(localOffset / 4);
+    }
+
+    UINT_32 macroOffset;
+    if (isLinear)
+    {
+        UINT_32 sliceSizeInTile = pitchInTile * heightInTile;
+
+        // do htile single slice alignment if the flag is true
+        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
+        {
+            sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast<UINT_32>(sliceBytes) / 64);
+        }
+        *pSlice = tileIndex / (sliceSizeInTile / numPipes);
+        macroOffset = tileIndex % (sliceSizeInTile / numPipes);
+    }
+    else
+    {
+        const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles
+        const UINT_32 clHeightInTile = clHeight / MicroTileHeight;
+        const UINT_32 pitchInCL = pitchInTile / clWidthInTile;
+        const UINT_32 heightInCL = heightInTile / clHeightInTile;
+        const UINT_32 clIndex = tileIndex / tileNumPerPipe;
+
+        UINT_32 clX = clIndex % pitchInCL;
+        UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL;
+
+        *pX = clX * clWidthInTile * MicroTileWidth;
+        *pY = clY * clHeightInTile * MicroTileHeight;
+        *pSlice = clIndex / (heightInCL * pitchInCL);
+
+        macroOffset = tileIndex % tileNumPerPipe;
+    }
+
+    UINT_32 elemIdx = macroOffset & 7;
+    macroOffset >>= elemIdxBits;
+
+    if (elemIdxBits != macroShift)
+    {
+        macroOffset <<= (elemIdxBits - macroShift);
+
+        UINT_32 pipebit1 = _BIT(pipe,1);
+        UINT_32 pipebit2 = _BIT(pipe,2);
+        UINT_32 pipebit3 = _BIT(pipe,3);
+        if (pitchInMacroTile % 2)
+        {   //odd
+            switch (pTileInfo->pipeConfig)
+            {
+                case ADDR_PIPECFG_P4_32x32:
+                    macroOffset |= pipebit1;
+                    break;
+                case ADDR_PIPECFG_P8_32x32_8x16:
+                case ADDR_PIPECFG_P8_32x32_16x16:
+                case ADDR_PIPECFG_P8_32x32_16x32:
+                    macroOffset |= pipebit2;
+                    break;
+                default:
+                    break;
+            }
+
+        }
+
+        if (pitchInMacroTile % 4)
+        {
+            if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)
+            {
+                macroOffset |= (pipebit1<<1);
+            }
+            if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+               (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16))
+            {
+                macroOffset |= (pipebit3<<1);
+            }
+        }
+    }
+
+    UINT_32 macroX;
+    UINT_32 macroY;
+
+    if (isLinear)
+    {
+        macroX = macroOffset % pitchInMacroTile;
+        macroY = macroOffset / pitchInMacroTile;
+    }
+    else
+    {
+        const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4);
+        macroX = macroOffset % clWidthInMacroTile;
+        macroY = macroOffset / clWidthInMacroTile;
+    }
+
+    *pX += macroX * 4 * MicroTileWidth;
+    *pY += macroY * 4 * MicroTileHeight;
+
+    UINT_32 microX;
+    UINT_32 microY;
+    ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile,
+                                       *pX, *pY, &microX, &microY);
+
+    *pX += microX * MicroTileWidth;
+    *pY += microY * MicroTileWidth;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetPitchAlignmentLinear
+*   @brief
+*       Get pitch alignment
+*   @return
+*       pitch alignment
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlGetPitchAlignmentLinear(
+    UINT_32             bpp,    ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags   ///< [in] surface flags
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment
+    if (flags.interleaved)
+    {
+        pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp));
+
+    }
+    else
+    {
+        pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
+    }
+
+    return pitchAlign;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetSizeAdjustmentLinear
+*
+*   @brief
+*       Adjust linear surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+****************************************************************************************************
+*/
+UINT_64 SiLib::HwlGetSizeAdjustmentLinear(
+    AddrTileMode        tileMode,       ///< [in] tile mode
+    UINT_32             bpp,            ///< [in] bits per pixel
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
+    UINT_32*            pHeight,        ///< [in,out] pointer to height
+    UINT_32*            pHeightAlign    ///< [in,out] pointer to height align
+    ) const
+{
+    UINT_64 sliceSize;
+    if (tileMode == ADDR_TM_LINEAR_GENERAL)
+    {
+        sliceSize = BITS_TO_BYTES(static_cast<UINT_64>(*pPitch) * (*pHeight) * bpp * numSamples);
+    }
+    else
+    {
+        UINT_32 pitch   = *pPitch;
+        UINT_32 height  = *pHeight;
+
+        UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp);
+        UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
+
+        // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value)
+        UINT_64 pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
+
+        while (pixelPerSlice % sliceAlignInPixel)
+        {
+            pitch += pitchAlign;
+            pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
+        }
+
+        *pPitch = pitch;
+
+        UINT_32 heightAlign = 1;
+
+        while ((pitch * heightAlign) % sliceAlignInPixel)
+        {
+            heightAlign++;
+        }
+
+        *pHeightAlign = heightAlign;
+
+        sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp);
+    }
+
+    return sliceSize;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlPreHandleBaseLvl3xPitch
+*
+*   @brief
+*       Pre-handler of 3x pitch (96 bit) adjustment
+*
+*   @return
+*       Expected pitch
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlPreHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    ADDR_ASSERT(pIn->width == expPitch);
+
+    // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to
+    // do here
+    if (pIn->flags.pow2Pad == FALSE)
+    {
+        Addr::V1::Lib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+    }
+    else
+    {
+        ADDR_ASSERT(IsPow2(expPitch));
+    }
+
+    return expPitch;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlPostHandleBaseLvl3xPitch
+*
+*   @brief
+*       Post-handler of 3x pitch adjustment
+*
+*   @return
+*       Expected pitch
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlPostHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    /**
+     * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should
+     *  be able to compute a correct pitch from it as h/w address library is doing the job.
+     */
+    // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here
+    if (pIn->flags.pow2Pad == FALSE)
+    {
+        Addr::V1::Lib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+    }
+
+    return expPitch;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetPitchAlignmentMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface pitch alignment
+*
+*   @return
+*       pitch alignment
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlGetPitchAlignmentMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             numSamples         ///< [in] number of samples
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    if (flags.qbStereo)
+    {
+        pitchAlign = EgBasedLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples);
+    }
+    else
+    {
+        pitchAlign = 8;
+    }
+
+    return pitchAlign;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetSizeAdjustmentMicroTiled
+*
+*   @brief
+*       Adjust 1D tiled surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+****************************************************************************************************
+*/
+UINT_64 SiLib::HwlGetSizeAdjustmentMicroTiled(
+    UINT_32             thickness,      ///< [in] thickness
+    UINT_32             bpp,            ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in,out] pointer to pitch
+    UINT_32*            pHeight         ///< [in,out] pointer to height
+    ) const
+{
+    UINT_64 logicalSliceSize;
+    UINT_64 physicalSliceSize;
+
+    UINT_32 pitch   = *pPitch;
+    UINT_32 height  = *pHeight;
+
+    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+    // Physical slice: multiplied by thickness
+    physicalSliceSize =  logicalSliceSize * thickness;
+
+    // Pitch alignment is always 8, so if slice size is not padded to base alignment
+    // (pipe_interleave_size), we need to increase pitch
+    while ((physicalSliceSize % baseAlign) != 0)
+    {
+        pitch += pitchAlign;
+
+        logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+        physicalSliceSize =  logicalSliceSize * thickness;
+    }
+
+#if !ALT_TEST
+    //
+    // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since
+    // the stencil plane may have larger pitch if the slice size is smaller than base alignment.
+    //
+    // Note: this actually does not work for mipmap but mipmap depth texture is not really
+    // sampled with mipmap.
+    //
+    if (flags.depth && (flags.noStencil == FALSE))
+    {
+        ADDR_ASSERT(numSamples == 1);
+
+        UINT_64 logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height; // 1 byte stencil
+
+        while ((logicalSiceSizeStencil % baseAlign) != 0)
+        {
+            pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's
+
+            logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height;
+        }
+
+        if (pitch != *pPitch)
+        {
+            // If this is a mipmap, this padded one cannot be sampled as a whole mipmap!
+            logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp);
+        }
+    }
+#endif
+    *pPitch = pitch;
+
+    // No adjust for pHeight
+
+    return logicalSliceSize;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlConvertChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*       ChipFamily
+****************************************************************************************************
+*/
+ChipFamily SiLib::HwlConvertChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    ChipFamily family = ADDR_CHIP_FAMILY_SI;
+
+    switch (uChipFamily)
+    {
+        case FAMILY_SI:
+            m_settings.isSouthernIsland = 1;
+            m_settings.isTahiti     = ASICREV_IS_TAHITI_P(uChipRevision);
+            m_settings.isPitCairn   = ASICREV_IS_PITCAIRN_PM(uChipRevision);
+            m_settings.isCapeVerde  = ASICREV_IS_CAPEVERDE_M(uChipRevision);
+            m_settings.isOland      = ASICREV_IS_OLAND_M(uChipRevision);
+            m_settings.isHainan     = ASICREV_IS_HAINAN_V(uChipRevision);
+            break;
+        default:
+            ADDR_ASSERT(!"This should be a Fusion");
+            break;
+    }
+
+    return family;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlSetupTileInfo
+*
+*   @brief
+*       Setup default value of tile info for SI
+****************************************************************************************************
+*/
+VOID SiLib::HwlSetupTileInfo(
+    AddrTileMode                        tileMode,       ///< [in] Tile mode
+    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
+    UINT_32                             bpp,            ///< [in] Bits per pixel
+    UINT_32                             pitch,          ///< [in] Pitch in pixels
+    UINT_32                             height,         ///< [in] Height in pixels
+    UINT_32                             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
+    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
+    AddrTileType                        inTileType,     ///< [in] Tile type
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
+    ) const
+{
+    UINT_32 thickness = Thickness(tileMode);
+    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+    INT index = TileIndexInvalid;
+
+    // Fail-safe code
+    if (IsLinear(tileMode) == FALSE)
+    {
+        // 128 bpp/thick tiling must be non-displayable.
+        // Fmask reuse color buffer's entry but bank-height field can be from another entry
+        // To simplify the logic, fmask entry should be picked from non-displayable ones
+        if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+
+        if (flags.depth || flags.stencil)
+        {
+            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+    }
+
+    // Partial valid fields are not allowed for SI.
+    if (IsTileInfoAllZero(pTileInfo))
+    {
+        if (IsMacroTiled(tileMode))
+        {
+            if (flags.prt)
+            {
+                if (numSamples == 1)
+                {
+                    if (flags.depth)
+                    {
+                        switch (bpp)
+                        {
+                            case 16:
+                                index = 3;
+                                break;
+                            case 32:
+                                index = 6;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        switch (bpp)
+                        {
+                            case 8:
+                                index = 21;
+                                break;
+                            case 16:
+                                index = 22;
+                                break;
+                            case 32:
+                                index = 23;
+                                break;
+                            case 64:
+                                index = 24;
+                                break;
+                            case 128:
+                                index = 25;
+                                break;
+                            default:
+                                break;
+                        }
+
+                        if (thickness > 1)
+                        {
+                            ADDR_ASSERT(bpp != 128);
+                            index += 5;
+                        }
+                    }
+                }
+                else
+                {
+                    ADDR_ASSERT(numSamples == 4);
+
+                    if (flags.depth)
+                    {
+                        switch (bpp)
+                        {
+                            case 16:
+                                index = 5;
+                                break;
+                            case 32:
+                                index = 7;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        switch (bpp)
+                        {
+                            case 8:
+                                index = 23;
+                                break;
+                            case 16:
+                                index = 24;
+                                break;
+                            case 32:
+                                index = 25;
+                                break;
+                            case 64:
+                                index = 30;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                }
+            }//end of PRT part
+            // See table entries 0-7
+            else if (flags.depth || flags.stencil)
+            {
+                if (flags.compressZ)
+                {
+                    if (flags.stencil)
+                    {
+                        index = 0;
+                    }
+                    else
+                    {
+                        // optimal tile index for compressed depth/stencil.
+                        switch (numSamples)
+                        {
+                            case 1:
+                                index = 0;
+                                break;
+                            case 2:
+                            case 4:
+                                index = 1;
+                                break;
+                            case 8:
+                                index = 2;
+                                break;
+                            default:
+                                break;
+                        }
+                    }
+                }
+                else // unCompressZ
+                {
+                    index = 3;
+                }
+            }
+            else //non PRT & non Depth & non Stencil
+            {
+                // See table entries 9-12
+                if (inTileType == ADDR_DISPLAYABLE)
+                {
+                    switch (bpp)
+                    {
+                        case 8:
+                            index = 10;
+                            break;
+                        case 16:
+                            index = 11;
+                            break;
+                        case 32:
+                            index = 12;
+                            break;
+                        case 64:
+                            index = 12;
+                            break;
+                        default:
+                            break;
+                    }
+                }
+                else
+                {
+                    // See table entries 13-17
+                    if (thickness == 1)
+                    {
+                        if (flags.fmask)
+                        {
+                            UINT_32 fmaskPixelSize = bpp * numSamples;
+
+                            switch (fmaskPixelSize)
+                            {
+                                case 8:
+                                    index = 14;
+                                    break;
+                                case 16:
+                                    index = 15;
+                                    break;
+                                case 32:
+                                    index = 16;
+                                    break;
+                                case 64:
+                                    index = 17;
+                                    break;
+                                default:
+                                    ADDR_ASSERT_ALWAYS();
+                            }
+                        }
+                        else
+                        {
+                            switch (bpp)
+                            {
+                                case 8:
+                                    index = 14;
+                                    break;
+                                case 16:
+                                    index = 15;
+                                    break;
+                                case 32:
+                                    index = 16;
+                                    break;
+                                case 64:
+                                    index = 17;
+                                    break;
+                                case 128:
+                                    index = 17;
+                                    break;
+                                default:
+                                    break;
+                            }
+                        }
+                    }
+                    else // thick tiling - entries 18-20
+                    {
+                        switch (thickness)
+                        {
+                            case 4:
+                                index = 20;
+                                break;
+                            case 8:
+                                index = 19;
+                                break;
+                            default:
+                                break;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+            {
+                index = 8;
+            }
+            else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+            {
+                index = TileIndexLinearGeneral;
+            }
+            else
+            {
+                if (flags.depth || flags.stencil)
+                {
+                    index = 4;
+                }
+                else if (inTileType == ADDR_DISPLAYABLE)
+                {
+                    index = 9;
+                }
+                else if (thickness == 1)
+                {
+                    index = 13;
+                }
+                else
+                {
+                    index = 18;
+                }
+            }
+        }
+
+        if (index >= 0 && index <= 31)
+        {
+            *pTileInfo      = m_tileTable[index].info;
+            pOut->tileType  = m_tileTable[index].type;
+        }
+
+        if (index == TileIndexLinearGeneral)
+        {
+            *pTileInfo      = m_tileTable[8].info;
+            pOut->tileType  = m_tileTable[8].type;
+        }
+    }
+    else
+    {
+        if (pTileInfoIn)
+        {
+            if (flags.stencil && pTileInfoIn->tileSplitBytes == 0)
+            {
+                // Stencil always uses index 0
+                *pTileInfo = m_tileTable[0].info;
+            }
+        }
+        // Pass through tile type
+        pOut->tileType = inTileType;
+    }
+
+    pOut->tileIndex = index;
+    pOut->prtTileIndex = flags.prt;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::DecodeGbRegs
+*
+*   @brief
+*       Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks
+*
+*   @return
+*       TRUE if all settings are valid
+*
+****************************************************************************************************
+*/
+BOOL_32 SiLib::DecodeGbRegs(
+    const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input
+{
+    GB_ADDR_CONFIG  reg;
+    BOOL_32         valid = TRUE;
+
+    reg.val = pRegValue->gbAddrConfig;
+
+    switch (reg.f.pipe_interleave_size)
+    {
+        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
+            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
+            break;
+        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
+            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (reg.f.row_size)
+    {
+        case ADDR_CONFIG_1KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_1KB;
+            break;
+        case ADDR_CONFIG_2KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_2KB;
+            break;
+        case ADDR_CONFIG_4KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_4KB;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (pRegValue->noOfBanks)
+    {
+        case 0:
+            m_banks = 4;
+            break;
+        case 1:
+            m_banks = 8;
+            break;
+        case 2:
+            m_banks = 16;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (pRegValue->noOfRanks)
+    {
+        case 0:
+            m_ranks = 1;
+            break;
+        case 1:
+            m_ranks = 2;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    m_logicalBanks = m_banks * m_ranks;
+
+    ADDR_ASSERT(m_logicalBanks <= 16);
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlInitGlobalParams
+*
+*   @brief
+*       Initializes global parameters
+*
+*   @return
+*       TRUE if all settings are valid
+*
+****************************************************************************************************
+*/
+BOOL_32 SiLib::HwlInitGlobalParams(
+    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+    BOOL_32 valid = TRUE;
+    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+    valid = DecodeGbRegs(pRegValue);
+
+    if (valid)
+    {
+        if (m_settings.isTahiti || m_settings.isPitCairn)
+        {
+            m_pipes = 8;
+        }
+        else if (m_settings.isCapeVerde || m_settings.isOland)
+        {
+            m_pipes = 4;
+        }
+        else
+        {
+            // Hainan is 2-pipe (m_settings.isHainan == 1)
+            m_pipes = 2;
+        }
+
+        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+
+        if (valid)
+        {
+            InitEquationTable();
+        }
+
+        m_maxSamples = 16;
+    }
+
+    return valid;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlConvertTileInfoToHW
+*   @brief
+*       Entry of si's ConvertTileInfoToHW
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::HwlConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode   = ADDR_OK;
+
+    retCode = EgBasedLib::HwlConvertTileInfoToHW(pIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        if (pIn->reverse == FALSE)
+        {
+            if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID)
+            {
+                retCode = ADDR_INVALIDPARAMS;
+            }
+            else
+            {
+                pOut->pTileInfo->pipeConfig =
+                    static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig - 1);
+            }
+        }
+        else
+        {
+            pOut->pTileInfo->pipeConfig =
+                static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig + 1);
+        }
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeXmaskCoordYFrom8Pipe
+*
+*   @brief
+*       Compute the Y coord which will be added to Xmask Y
+*       coord.
+*   @return
+*       Y coord
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlComputeXmaskCoordYFrom8Pipe(
+    UINT_32         pipe,       ///< [in] pipe id
+    UINT_32         x           ///< [in] tile coord x, which is original x coord / 8
+    ) const
+{
+    // This function should never be called since it is 6xx/8xx specfic.
+    // Keep this empty implementation to avoid any mis-use.
+    ADDR_ASSERT_ALWAYS();
+
+    return 0;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeSurfaceCoord2DFromBankPipe
+*
+*   @brief
+*       Compute surface x,y coordinates from bank/pipe info
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID SiLib::HwlComputeSurfaceCoord2DFromBankPipe(
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    UINT_32*            pX,         ///< [in,out] x coordinate
+    UINT_32*            pY,         ///< [in,out] y coordinate
+    UINT_32             slice,      ///< [in] slice index
+    UINT_32             bank,       ///< [in] bank number
+    UINT_32             pipe,       ///< [in] pipe number
+    UINT_32             bankSwizzle,///< [in] bank swizzle
+    UINT_32             pipeSwizzle,///< [in] pipe swizzle
+    UINT_32             tileSlices, ///< [in] slices in a micro tile
+    BOOL_32             ignoreSE,   ///< [in] TRUE if shader engines are ignored
+    ADDR_TILEINFO*      pTileInfo   ///< [in] bank structure. **All fields to be valid on entry**
+    ) const
+{
+    UINT_32 xBit;
+    UINT_32 yBit;
+    UINT_32 yBit3 = 0;
+    UINT_32 yBit4 = 0;
+    UINT_32 yBit5 = 0;
+    UINT_32 yBit6 = 0;
+
+    UINT_32 xBit3 = 0;
+    UINT_32 xBit4 = 0;
+    UINT_32 xBit5 = 0;
+
+    UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+
+    CoordFromBankPipe xyBits = {0};
+    ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe,
+                                      bankSwizzle, pipeSwizzle, tileSlices, pTileInfo,
+                                      &xyBits);
+    yBit3 = xyBits.yBit3;
+    yBit4 = xyBits.yBit4;
+    yBit5 = xyBits.yBit5;
+    yBit6 = xyBits.yBit6;
+
+    xBit3 = xyBits.xBit3;
+    xBit4 = xyBits.xBit4;
+    xBit5 = xyBits.xBit5;
+
+    yBit = xyBits.yBits;
+
+    UINT_32 yBitTemp = 0;
+
+    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))
+    {
+        ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1);
+        UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1;
+
+        ADDR_ASSERT(yBitToCheck <= 3);
+
+        yBitTemp = _BIT(yBit, yBitToCheck);
+
+        xBit3 = 0;
+    }
+
+    yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3);
+    xBit = Bits2Number(3, xBit5, xBit4, xBit3);
+
+    *pY += yBit * pTileInfo->bankHeight * MicroTileHeight;
+    *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth;
+
+    //calculate the bank and pipe bits in x, y
+    UINT_32 xTile; //x in micro tile
+    UINT_32 x3 = 0;
+    UINT_32 x4 = 0;
+    UINT_32 x5 = 0;
+    UINT_32 x6 = 0;
+    UINT_32 y = *pY;
+
+    UINT_32 pipeBit0 = _BIT(pipe,0);
+    UINT_32 pipeBit1 = _BIT(pipe,1);
+    UINT_32 pipeBit2 = _BIT(pipe,2);
+
+    UINT_32 y3 = _BIT(y, 3);
+    UINT_32 y4 = _BIT(y, 4);
+    UINT_32 y5 = _BIT(y, 5);
+    UINT_32 y6 = _BIT(y, 6);
+
+    // bankbit0 after ^x4^x5
+    UINT_32 bankBit00 = _BIT(bank,0);
+    UINT_32 bankBit0 = 0;
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            x3 = pipeBit0 ^ y3;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            x4 = pipeBit0 ^ y3;
+            x3 = pipeBit0 ^ y4;
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            x5 = pipeBit1 ^ y5;
+            x3 = pipeBit0 ^ y3 ^ x5;
+            bankBit0 = yBitTemp ^ x5;
+            x4 = bankBit00 ^ x5 ^ bankBit0;
+            *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            x3 = pipeBit1 ^ y5;
+            x4 = pipeBit2 ^ y4;
+            x5 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            x3 = pipeBit1 ^ y4;
+            x4 = pipeBit2 ^ y5;
+            x5 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            x3 = pipeBit1 ^ y4;
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit0 ^ y3 ^ x5;
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            x4 = pipeBit2 ^ y5;
+            x5 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit1 ^ y6;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            x6 = pipeBit1 ^ y5;
+            x5 = pipeBit2 ^ y6;
+            x3 = pipeBit0 ^ y3 ^ x5;
+            bankBit0 = yBitTemp ^ x6;
+            x4 = bankBit00 ^ x5 ^ bankBit0;
+            *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+
+    xTile = Bits2Number(3, x5, x4, x3);
+
+    *pX += xTile << 3;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlPreAdjustBank
+*
+*   @brief
+*       Adjust bank before calculating address acoording to bank/pipe
+*   @return
+*       Adjusted bank
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlPreAdjustBank(
+    UINT_32         tileX,      ///< [in] x coordinate in unit of tile
+    UINT_32         bank,       ///< [in] bank
+    ADDR_TILEINFO*  pTileInfo   ///< [in] tile info
+    ) const
+{
+    if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1))
+    {
+        UINT_32 bankBit0 = _BIT(bank, 0);
+        UINT_32 x4 = _BIT(tileX, 1);
+        UINT_32 x5 = _BIT(tileX, 2);
+
+        bankBit0 = bankBit0 ^ x4 ^ x5;
+        bank |= bankBit0;
+
+        ADDR_ASSERT(pTileInfo->macroAspectRatio > 1);
+    }
+
+    return bank;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeSurfaceInfo
+*
+*   @brief
+*       Entry of si's ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    pOut->tileIndex = pIn->tileIndex;
+
+    ADDR_E_RETURNCODE retCode = EgBasedLib::HwlComputeSurfaceInfo(pIn, pOut);
+
+    UINT_32 tileIndex = static_cast<UINT_32>(pOut->tileIndex);
+
+    if (((pIn->flags.needEquation   == TRUE) ||
+         (pIn->flags.preferEquation == TRUE)) &&
+        (pIn->numSamples <= 1) &&
+        (tileIndex < TileTableSize))
+    {
+        static const UINT_32 SiUncompressDepthTileIndex = 3;
+
+        if ((pIn->numSlices > 1) &&
+            (IsMacroTiled(pOut->tileMode) == TRUE) &&
+            ((m_chipFamily == ADDR_CHIP_FAMILY_SI) ||
+             (IsPrtTileMode(pOut->tileMode) == FALSE)))
+        {
+            pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
+        }
+        else if ((pIn->flags.prt == FALSE) &&
+                 (m_uncompressDepthEqIndex != 0) &&
+                 (tileIndex == SiUncompressDepthTileIndex))
+        {
+            pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3);
+        }
+        else
+        {
+
+            pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex];
+        }
+
+        if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX)
+        {
+            pOut->blockWidth = m_blockWidth[pOut->equationIndex];
+
+            pOut->blockHeight = m_blockHeight[pOut->equationIndex];
+
+            pOut->blockSlices = m_blockSlices[pOut->equationIndex];
+        }
+    }
+    else
+    {
+        pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX;
+    }
+
+    return retCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeMipLevel
+*   @brief
+*       Compute MipLevel info (including level 0)
+*   @return
+*       TRUE if HWL's handled
+****************************************************************************************************
+*/
+BOOL_32 SiLib::HwlComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure
+    ) const
+{
+    // basePitch is calculated from level 0 so we only check this for mipLevel > 0
+    if (pIn->mipLevel > 0)
+    {
+        // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if
+        // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being
+        // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2.
+        if (ElemLib::IsExpand3x(pIn->format) == FALSE)
+        {
+            // Sublevel pitches are generated from base level pitch instead of width on SI
+            // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain
+            ADDR_ASSERT((pIn->flags.pow2Pad == FALSE) ||
+                        ((pIn->basePitch != 0) && IsPow2(pIn->basePitch)));
+        }
+
+        if (pIn->basePitch != 0)
+        {
+            pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel);
+        }
+    }
+
+    // pow2Pad is done in PostComputeMipLevel
+
+    return TRUE;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlCheckLastMacroTiledLvl
+*
+*   @brief
+*       Sets pOut->last2DLevel to TRUE if it is
+*   @note
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlCheckLastMacroTiledLvl(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut      ///< [in,out] Output structure (used as input, too)
+    ) const
+{
+    // pow2Pad covers all mipmap cases
+    if (pIn->flags.pow2Pad)
+    {
+        ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+        UINT_32 nextPitch;
+        UINT_32 nextHeight;
+        UINT_32 nextSlices;
+
+        AddrTileMode nextTileMode;
+
+        if (pIn->mipLevel == 0 || pIn->basePitch == 0)
+        {
+            // Base level or fail-safe case (basePitch == 0)
+            nextPitch = pOut->pitch >> 1;
+        }
+        else
+        {
+            // Sub levels
+            nextPitch = pIn->basePitch >> (pIn->mipLevel + 1);
+        }
+
+        // nextHeight must be shifted from this level's original height rather than a pow2 padded
+        // one but this requires original height stored somewhere (pOut->height)
+        ADDR_ASSERT(pOut->height != 0);
+
+        // next level's height is just current level's >> 1 in pixels
+        nextHeight = pOut->height >> 1;
+        // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block
+        // compressed foramts
+        if (ElemLib::IsBlockCompressed(pIn->format))
+        {
+            nextHeight = (nextHeight + 3) / 4;
+        }
+        nextHeight = NextPow2(nextHeight);
+
+        // nextSlices may be 0 if this level's is 1
+        if (pIn->flags.volume)
+        {
+            nextSlices = Max(1u, pIn->numSlices >> 1);
+        }
+        else
+        {
+            nextSlices = pIn->numSlices;
+        }
+
+        nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode,
+                                                      pIn->bpp,
+                                                      nextPitch,
+                                                      nextHeight,
+                                                      nextSlices,
+                                                      pIn->numSamples,
+                                                      pOut->blockWidth,
+                                                      pOut->blockHeight,
+                                                      pOut->pTileInfo);
+
+        pOut->last2DLevel = IsMicroTiled(nextTileMode);
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+****************************************************************************************************
+*/
+AddrTileMode SiLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< base tile mode
+    UINT_32             numSlices,      ///< current number of slices
+    UINT_32*            pBytesPerTile   ///< [in,out] pointer to bytes per slice
+    ) const
+{
+    return EgBasedLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile);
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlTileInfoEqual
+*
+*   @brief
+*       Return TRUE if all field are equal
+*   @note
+*       Only takes care of current HWL's data
+****************************************************************************************************
+*/
+BOOL_32 SiLib::HwlTileInfoEqual(
+    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+    ) const
+{
+    BOOL_32 equal = FALSE;
+
+    if (pLeft->pipeConfig == pRight->pipeConfig)
+    {
+        equal =  EgBasedLib::HwlTileInfoEqual(pLeft, pRight);
+    }
+
+    return equal;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::GetTileSettings
+*
+*   @brief
+*       Get tile setting infos by index.
+*   @return
+*       Tile setting info.
+****************************************************************************************************
+*/
+const TileConfig* SiLib::GetTileSetting(
+    UINT_32 index          ///< [in] Tile index
+    ) const
+{
+    ADDR_ASSERT(index < m_noOfEntries);
+    return &m_tileTable[index];
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlPostCheckTileIndex
+*
+*   @brief
+*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+*       tile mode/type/info and change the index if needed
+*   @return
+*       Tile index.
+****************************************************************************************************
+*/
+INT_32 SiLib::HwlPostCheckTileIndex(
+    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
+    AddrTileMode         mode,      ///< [in] Tile mode
+    AddrTileType         type,      ///< [in] Tile type
+    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
+    ) const
+{
+    INT_32 index = curIndex;
+
+    if (mode == ADDR_TM_LINEAR_GENERAL)
+    {
+        index = TileIndexLinearGeneral;
+    }
+    else
+    {
+        BOOL_32 macroTiled = IsMacroTiled(mode);
+
+        // We need to find a new index if either of them is true
+        // 1. curIndex is invalid
+        // 2. tile mode is changed
+        // 3. tile info does not match for macro tiled
+        if ((index == TileIndexInvalid         ||
+            (mode != m_tileTable[index].mode)  ||
+            (macroTiled && (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) == FALSE))))
+        {
+            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
+            {
+                if (macroTiled)
+                {
+                    // macro tile modes need all to match
+                    if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) &&
+                        (mode == m_tileTable[index].mode)                 &&
+                        (type == m_tileTable[index].type))
+                    {
+                        break;
+                    }
+                }
+                else if (mode == ADDR_TM_LINEAR_ALIGNED)
+                {
+                    // linear mode only needs tile mode to match
+                    if (mode == m_tileTable[index].mode)
+                    {
+                        break;
+                    }
+                }
+                else
+                {
+                    // micro tile modes only need tile mode and tile type to match
+                    if (mode == m_tileTable[index].mode &&
+                        type == m_tileTable[index].type)
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
+
+    if (index >= static_cast<INT_32>(m_noOfEntries))
+    {
+        index = TileIndexInvalid;
+    }
+
+    return index;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg(
+    UINT_32         bpp,            ///< Bits per pixel
+    INT_32          index,          ///< Tile index
+    INT_32          macroModeIndex, ///< Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
+    AddrTileMode*   pMode,          ///< [out] Tile mode
+    AddrTileType*   pType           ///< [out] Tile type
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    // Global flag to control usage of tileIndex
+    if (UseTileIndex(index))
+    {
+        if (index == TileIndexLinearGeneral)
+        {
+            if (pMode)
+            {
+                *pMode = ADDR_TM_LINEAR_GENERAL;
+            }
+
+            if (pType)
+            {
+                *pType = ADDR_DISPLAYABLE;
+            }
+
+            if (pInfo)
+            {
+                pInfo->banks = 2;
+                pInfo->bankWidth = 1;
+                pInfo->bankHeight = 1;
+                pInfo->macroAspectRatio = 1;
+                pInfo->tileSplitBytes = 64;
+                pInfo->pipeConfig = ADDR_PIPECFG_P2;
+            }
+        }
+        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+        else
+        {
+            const TileConfig* pCfgTable = GetTileSetting(index);
+
+            if (pInfo)
+            {
+                *pInfo = pCfgTable->info;
+            }
+            else
+            {
+                if (IsMacroTiled(pCfgTable->mode))
+                {
+                    returnCode = ADDR_INVALIDPARAMS;
+                }
+            }
+
+            if (pMode)
+            {
+                *pMode = pCfgTable->mode;
+            }
+
+            if (pType)
+            {
+                *pType = pCfgTable->type;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::ReadGbTileMode
+*
+*   @brief
+*       Convert GB_TILE_MODE HW value to TileConfig.
+*   @return
+*       NA.
+****************************************************************************************************
+*/
+VOID SiLib::ReadGbTileMode(
+    UINT_32     regValue,   ///< [in] GB_TILE_MODE register
+    TileConfig* pCfg        ///< [out] output structure
+    ) const
+{
+    GB_TILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode);
+    pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height;
+    pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width;
+    pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1);
+    pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+    pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
+
+    UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
+
+    if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK
+    {
+        pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+    }
+    else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK
+    {
+        pCfg->mode = static_cast<AddrTileMode>(pCfg->mode + 3);
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::InitTileSettingTable
+*
+*   @brief
+*       Initialize the ADDR_TILE_CONFIG table.
+*   @return
+*       TRUE if tile table is correctly initialized
+****************************************************************************************************
+*/
+BOOL_32 SiLib::InitTileSettingTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+    memset(m_tileTable, 0, sizeof(m_tileTable));
+
+    if (noOfEntries != 0)
+    {
+        m_noOfEntries = noOfEntries;
+    }
+    else
+    {
+        m_noOfEntries = TileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfEntries; i++)
+        {
+            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+
+    if (initOk)
+    {
+        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+    }
+
+    return initOk;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlGetTileIndex
+*
+*   @brief
+*       Return the virtual/real index for given mode/type/info
+*   @return
+*       ADDR_OK if successful.
+****************************************************************************************************
+*/
+ADDR_E_RETURNCODE SiLib::HwlGetTileIndex(
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType);
+
+    return returnCode;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlFmaskPreThunkSurfInfo
+*
+*   @brief
+*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+VOID SiLib::HwlFmaskPreThunkSurfInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
+    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
+    ) const
+{
+    pSurfIn->tileIndex = pFmaskIn->tileIndex;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlFmaskPostThunkSurfInfo
+*
+*   @brief
+*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+****************************************************************************************************
+*/
+VOID SiLib::HwlFmaskPostThunkSurfInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
+    ) const
+{
+    pFmaskOut->macroModeIndex = TileIndexInvalid;
+    pFmaskOut->tileIndex = pSurfOut->tileIndex;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeFmaskBits
+*   @brief
+*       Computes fmask bits
+*   @return
+*       Fmask bits
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlComputeFmaskBits(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+    UINT_32* pNumSamples
+    ) const
+{
+    UINT_32 numSamples = pIn->numSamples;
+    UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags);
+    UINT_32 bpp;
+
+    if (numFrags != numSamples) // EQAA
+    {
+        ADDR_ASSERT(numFrags <= 8);
+
+        if (pIn->resolved == FALSE)
+        {
+            if (numFrags == 1)
+            {
+                bpp          = 1;
+                numSamples   = numSamples == 16 ? 16 : 8;
+            }
+            else if (numFrags == 2)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = 2;
+                numSamples   = numSamples;
+            }
+            else if (numFrags == 4)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = 4;
+                numSamples   = numSamples;
+            }
+            else // numFrags == 8
+            {
+                ADDR_ASSERT(numSamples == 16);
+
+                bpp          = 4;
+                numSamples   = numSamples;
+            }
+        }
+        else
+        {
+            if (numFrags == 1)
+            {
+                bpp          = (numSamples == 16) ? 16 : 8;
+                numSamples   = 1;
+            }
+            else if (numFrags == 2)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = numSamples*2;
+                numSamples   = 1;
+            }
+            else if (numFrags == 4)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = numSamples*4;
+                numSamples   = 1;
+            }
+            else // numFrags == 8
+            {
+                ADDR_ASSERT(numSamples >= 16);
+
+                bpp          = 16*4;
+                numSamples   = 1;
+            }
+        }
+    }
+    else // Normal AA
+    {
+        if (pIn->resolved == FALSE)
+        {
+            bpp          = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+            numSamples   = numSamples == 2 ? 8 : numSamples;
+        }
+        else
+        {
+            // The same as 8XX
+            bpp          = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+            numSamples   = 1; // 1x sample
+        }
+    }
+
+    SafeAssign(pNumSamples, numSamples);
+
+    return bpp;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlOptimizeTileMode
+*
+*   @brief
+*       Optimize tile mode on SI
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlOptimizeTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut      ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+
+    if ((pInOut->flags.needEquation == TRUE) &&
+        (IsMacroTiled(tileMode) == TRUE) &&
+        (pInOut->numSamples <= 1))
+    {
+        UINT_32 thickness = Thickness(tileMode);
+
+        if (thickness > 1)
+        {
+            tileMode = ADDR_TM_1D_TILED_THICK;
+        }
+        else if (pInOut->numSlices > 1)
+        {
+            tileMode = ADDR_TM_1D_TILED_THIN1;
+        }
+        else
+        {
+            tileMode = ADDR_TM_2D_TILED_THIN1;
+        }
+    }
+
+    if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode = tileMode;
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlOverrideTileMode
+*
+*   @brief
+*       Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI.
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlOverrideTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pInOut          ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode = pInOut->tileMode;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_PRT_TILED_THIN1:
+            tileMode = ADDR_TM_2D_TILED_THIN1;
+            break;
+
+        case ADDR_TM_PRT_TILED_THICK:
+            tileMode = ADDR_TM_2D_TILED_THICK;
+            break;
+
+        case ADDR_TM_PRT_2D_TILED_THICK:
+            tileMode = ADDR_TM_2D_TILED_THICK;
+            break;
+
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            tileMode = ADDR_TM_3D_TILED_THICK;
+            break;
+
+        default:
+            break;
+    }
+
+    if (tileMode != pInOut->tileMode)
+    {
+        pInOut->tileMode  = tileMode;
+        // Only PRT tile modes are overridden for now. Revisit this once new modes are added above.
+        pInOut->flags.prt = TRUE;
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlSetPrtTileMode
+*
+*   @brief
+*       Set prt tile modes.
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlSetPrtTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
+    ) const
+{
+    pInOut->tileMode = ADDR_TM_2D_TILED_THIN1;
+    pInOut->tileType = (pInOut->tileType == ADDR_DEPTH_SAMPLE_ORDER) ?
+                       ADDR_DEPTH_SAMPLE_ORDER : ADDR_NON_DISPLAYABLE;
+    pInOut->flags.prt = TRUE;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlSelectTileMode
+*
+*   @brief
+*       Select tile modes.
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlSelectTileMode(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut     ///< [in,out] input output structure
+    ) const
+{
+    AddrTileMode tileMode;
+    AddrTileType tileType;
+
+    if (pInOut->flags.volume)
+    {
+        if (pInOut->numSlices >= 8)
+        {
+            tileMode = ADDR_TM_2D_TILED_XTHICK;
+        }
+        else if (pInOut->numSlices >= 4)
+        {
+            tileMode = ADDR_TM_2D_TILED_THICK;
+        }
+        else
+        {
+            tileMode = ADDR_TM_2D_TILED_THIN1;
+        }
+        tileType = ADDR_NON_DISPLAYABLE;
+    }
+    else
+    {
+        tileMode = ADDR_TM_2D_TILED_THIN1;
+
+        if (pInOut->flags.depth || pInOut->flags.stencil)
+        {
+            tileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+        else if ((pInOut->bpp <= 32) ||
+                 (pInOut->flags.display == TRUE) ||
+                 (pInOut->flags.overlay == TRUE))
+        {
+            tileType = ADDR_DISPLAYABLE;
+        }
+        else
+        {
+            tileType = ADDR_NON_DISPLAYABLE;
+        }
+    }
+
+    if (pInOut->flags.prt)
+    {
+        tileMode = ADDR_TM_2D_TILED_THIN1;
+        tileType = (tileType == ADDR_DISPLAYABLE) ? ADDR_NON_DISPLAYABLE : tileType;
+    }
+
+    pInOut->tileMode = tileMode;
+    pInOut->tileType = tileType;
+
+    // Optimize tile mode if possible
+    pInOut->flags.opt4Space = TRUE;
+
+    // Optimize tile mode if possible
+    OptimizeTileMode(pInOut);
+
+    HwlOverrideTileMode(pInOut);
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeMaxBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments
+*   @return
+*       maximum alignments
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlComputeMaxBaseAlignments() const
+{
+    const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info);
+
+    // Initial size is 64 KiB for PRT.
+    UINT_32 maxBaseAlign = 64 * 1024;
+
+    for (UINT_32 i = 0; i < m_noOfEntries; i++)
+    {
+        if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) &&
+            (IsPrtTileMode(m_tileTable[i].mode) == FALSE))
+        {
+            // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice.
+            UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes,
+                                   MicroTilePixels * 8 * 16);
+
+            UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks *
+                                m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight;
+
+            if (baseAlign > maxBaseAlign)
+            {
+                maxBaseAlign = baseAlign;
+            }
+        }
+    }
+
+    return maxBaseAlign;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeMaxMetaBaseAlignments
+*
+*   @brief
+*       Gets maximum alignments for metadata
+*   @return
+*       maximum alignments for metadata
+****************************************************************************************************
+*/
+UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const
+{
+    UINT_32 maxPipe = 1;
+
+    for (UINT_32 i = 0; i < m_noOfEntries; i++)
+    {
+        maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info));
+    }
+
+    return m_pipeInterleaveBytes * maxPipe;
+}
+
+/**
+****************************************************************************************************
+*   SiLib::HwlComputeSurfaceAlignmentsMacroTiled
+*
+*   @brief
+*       Hardware layer function to compute alignment request for macro tile mode
+*
+*   @return
+*       N/A
+*
+****************************************************************************************************
+*/
+VOID SiLib::HwlComputeSurfaceAlignmentsMacroTiled(
+    AddrTileMode                      tileMode,           ///< [in] tile mode
+    UINT_32                           bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS                flags,              ///< [in] surface flags
+    UINT_32                           mipLevel,           ///< [in] mip level
+    UINT_32                           numSamples,         ///< [in] number of samples
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut                ///< [in,out] Surface output
+    ) const
+{
+    if ((mipLevel == 0) && (flags.prt))
+    {
+        UINT_32 macroTileSize = pOut->blockWidth * pOut->blockHeight * numSamples * bpp / 8;
+
+        if (macroTileSize < PrtTileSize)
+        {
+            UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
+
+            ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
+
+            pOut->pitchAlign *= numMacroTiles;
+            pOut->baseAlign  *= numMacroTiles;
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::InitEquationTable
+*
+*   @brief
+*       Initialize Equation table.
+*
+*   @return
+*       N/A
+****************************************************************************************************
+*/
+VOID SiLib::InitEquationTable()
+{
+    ADDR_EQUATION_KEY equationKeyTable[EquationTableSize];
+    memset(equationKeyTable, 0, sizeof(equationKeyTable));
+
+    memset(m_equationTable, 0, sizeof(m_equationTable));
+
+    memset(m_blockWidth, 0, sizeof(m_blockWidth));
+
+    memset(m_blockHeight, 0, sizeof(m_blockHeight));
+
+    memset(m_blockSlices, 0, sizeof(m_blockSlices));
+
+    // Loop all possible bpp
+    for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++)
+    {
+        // Get bits per pixel
+        UINT_32 bpp = 1 << (log2ElementBytes + 3);
+
+        // Loop all possible tile index
+        for (INT_32 tileIndex = 0; tileIndex < static_cast<INT_32>(m_noOfEntries); tileIndex++)
+        {
+            UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
+
+            TileConfig tileConfig = m_tileTable[tileIndex];
+
+            ADDR_SURFACE_FLAGS flags = {{0}};
+
+            // Compute tile info, hardcode numSamples to 1 because MSAA is not supported
+            // in swizzle pattern equation
+            HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL);
+
+            // Check if the input is supported
+            if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE)
+            {
+                ADDR_EQUATION_KEY  key   = {{0}};
+
+                // Generate swizzle equation key from bpp and tile config
+                key.fields.log2ElementBytes = log2ElementBytes;
+                key.fields.tileMode         = tileConfig.mode;
+                // Treat depth micro tile type and non-display micro tile type as the same key
+                // because they have the same equation actually
+                key.fields.microTileType    = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ?
+                                              ADDR_NON_DISPLAYABLE : tileConfig.type;
+                key.fields.pipeConfig       = tileConfig.info.pipeConfig;
+                key.fields.numBanksLog2     = Log2(tileConfig.info.banks);
+                key.fields.bankWidth        = tileConfig.info.bankWidth;
+                key.fields.bankHeight       = tileConfig.info.bankHeight;
+                key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio;
+                key.fields.prt              = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) &&
+                                               ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0;
+
+                // Find in the table if the equation has been built based on the key
+                for (UINT_32 i = 0; i < m_numEquations; i++)
+                {
+                    if (key.value == equationKeyTable[i].value)
+                    {
+                        equationIndex = i;
+                        break;
+                    }
+                }
+
+                // If found, just fill the index into the lookup table and no need
+                // to generate the equation again. Otherwise, generate the equation.
+                if (equationIndex == ADDR_INVALID_EQUATION_INDEX)
+                {
+                    ADDR_EQUATION equation;
+                    ADDR_E_RETURNCODE retCode;
+
+                    memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+                    // Generate the equation
+                    if (IsMicroTiled(tileConfig.mode))
+                    {
+                        retCode = ComputeMicroTileEquation(log2ElementBytes,
+                                                           tileConfig.mode,
+                                                           tileConfig.type,
+                                                           &equation);
+                    }
+                    else
+                    {
+                        retCode = ComputeMacroTileEquation(log2ElementBytes,
+                                                           tileConfig.mode,
+                                                           tileConfig.type,
+                                                           &tileConfig.info,
+                                                           &equation);
+                    }
+                    // Only fill the equation into the table if the return code is ADDR_OK,
+                    // otherwise if the return code is not ADDR_OK, it indicates this is not
+                    // a valid input, we do nothing but just fill invalid equation index
+                    // into the lookup table.
+                    if (retCode == ADDR_OK)
+                    {
+                        equationIndex = m_numEquations;
+                        ADDR_ASSERT(equationIndex < EquationTableSize);
+
+                        m_blockSlices[equationIndex] = Thickness(tileConfig.mode);
+
+                        if (IsMicroTiled(tileConfig.mode))
+                        {
+                            m_blockWidth[equationIndex]  = MicroTileWidth;
+                            m_blockHeight[equationIndex] = MicroTileHeight;
+                        }
+                        else
+                        {
+                            const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
+
+                            m_blockWidth[equationIndex]  =
+                                HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
+                                pTileInfo->macroAspectRatio;
+                            m_blockHeight[equationIndex] =
+                                MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+                                pTileInfo->macroAspectRatio;
+
+                            if (key.fields.prt)
+                            {
+                                UINT_32 macroTileSize =
+                                    m_blockWidth[equationIndex] * m_blockHeight[equationIndex] *
+                                    bpp / 8;
+
+                                if (macroTileSize < PrtTileSize)
+                                {
+                                    UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
+
+                                    ADDR_ASSERT(macroTileSize == (1u << equation.numBits));
+                                    ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
+
+                                    UINT_32 numBits = Log2(numMacroTiles);
+
+                                    UINT_32 xStart = Log2(m_blockWidth[equationIndex]) +
+                                                     log2ElementBytes;
+
+                                    m_blockWidth[equationIndex] *= numMacroTiles;
+
+                                    for (UINT_32 i = 0; i < numBits; i++)
+                                    {
+                                        equation.addr[equation.numBits + i].valid = 1;
+                                        equation.addr[equation.numBits + i].index = xStart + i;
+                                    }
+
+                                    equation.numBits += numBits;
+                                }
+                            }
+                        }
+
+                        equationKeyTable[equationIndex] = key;
+                        m_equationTable[equationIndex]  = equation;
+
+                        m_numEquations++;
+                    }
+                }
+            }
+
+            // Fill the index into the lookup table, if the combination is not supported
+            // fill the invalid equation index
+            m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex;
+        }
+
+        if (m_chipFamily == ADDR_CHIP_FAMILY_SI)
+        {
+            // For tile index 3 which is shared between PRT depth and uncompressed depth
+            m_uncompressDepthEqIndex = m_numEquations;
+
+            for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++)
+            {
+                TileConfig        tileConfig = m_tileTable[3];
+                ADDR_EQUATION     equation;
+                ADDR_E_RETURNCODE retCode;
+
+                memset(&equation, 0, sizeof(ADDR_EQUATION));
+
+                retCode = ComputeMacroTileEquation(log2ElemBytes,
+                                                   tileConfig.mode,
+                                                   tileConfig.type,
+                                                   &tileConfig.info,
+                                                   &equation);
+
+                if (retCode == ADDR_OK)
+                {
+                    UINT_32 equationIndex = m_numEquations;
+                    ADDR_ASSERT(equationIndex < EquationTableSize);
+
+                    m_blockSlices[equationIndex] = 1;
+
+                    const ADDR_TILEINFO* pTileInfo = &tileConfig.info;
+
+                    m_blockWidth[equationIndex]  =
+                        HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth *
+                        pTileInfo->macroAspectRatio;
+                    m_blockHeight[equationIndex] =
+                        MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+                        pTileInfo->macroAspectRatio;
+
+                    m_equationTable[equationIndex] = equation;
+
+                    m_numEquations++;
+                }
+            }
+        }
+    }
+}
+
+/**
+****************************************************************************************************
+*   SiLib::IsEquationSupported
+*
+*   @brief
+*       Check if it is supported for given bpp and tile config to generate a equation.
+*
+*   @return
+*       TRUE if supported
+****************************************************************************************************
+*/
+BOOL_32 SiLib::IsEquationSupported(
+    UINT_32    bpp,             ///< Bits per pixel
+    TileConfig tileConfig,      ///< Tile config
+    INT_32     tileIndex,       ///< Tile index
+    UINT_32    elementBytesLog2 ///< Log2 of element bytes
+    ) const
+{
+    BOOL_32 supported = TRUE;
+
+    // Linear tile mode is not supported in swizzle pattern equation
+    if (IsLinear(tileConfig.mode))
+    {
+        supported = FALSE;
+    }
+    // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use,
+    // which is not supported in swizzle pattern equation due to slice rotation
+    else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK)  ||
+             (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) ||
+             (tileConfig.mode == ADDR_TM_3D_TILED_THIN1)  ||
+             (tileConfig.mode == ADDR_TM_3D_TILED_THICK)  ||
+             (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK))
+    {
+        supported = FALSE;
+    }
+    // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth
+    else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32))
+    {
+        supported = FALSE;
+    }
+    // Tile split is not supported in swizzle pattern equation
+    else if (IsMacroTiled(tileConfig.mode))
+    {
+        UINT_32 thickness = Thickness(tileConfig.mode);
+        if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes)
+        {
+            supported = FALSE;
+        }
+
+        if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
+        {
+            supported = m_EquationSupport[tileIndex][elementBytesLog2];
+        }
+    }
+
+    return supported;
+}
+
+} // V1
+} // Addr
diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.h
--- mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,347 @@
+/*
+ * Copyright © 2007-2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+****************************************************************************************************
+* @file  siaddrlib.h
+* @brief Contains the R800Lib class definition.
+****************************************************************************************************
+*/
+
+#ifndef __SI_ADDR_LIB_H__
+#define __SI_ADDR_LIB_H__
+
+#include "addrlib1.h"
+#include "egbaddrlib.h"
+
+namespace Addr
+{
+namespace V1
+{
+
+/**
+****************************************************************************************************
+* @brief Describes the information in tile mode table
+****************************************************************************************************
+*/
+struct TileConfig
+{
+    AddrTileMode  mode;
+    AddrTileType  type;
+    ADDR_TILEINFO info;
+};
+
+/**
+****************************************************************************************************
+* @brief SI specific settings structure.
+****************************************************************************************************
+*/
+struct SiChipSettings
+{
+    UINT_32 isSouthernIsland  : 1;
+    UINT_32 isTahiti          : 1;
+    UINT_32 isPitCairn        : 1;
+    UINT_32 isCapeVerde       : 1;
+    // Oland/Hainan are of GFXIP 6.0, similar with SI
+    UINT_32 isOland           : 1;
+    UINT_32 isHainan          : 1;
+
+    // CI
+    UINT_32 isSeaIsland       : 1;
+    UINT_32 isBonaire         : 1;
+    UINT_32 isKaveri          : 1;
+    UINT_32 isSpectre         : 1;
+    UINT_32 isSpooky          : 1;
+    UINT_32 isKalindi         : 1;
+    // Hawaii is GFXIP 7.2
+    UINT_32 isHawaii          : 1;
+
+    // VI
+    UINT_32 isVolcanicIslands : 1;
+    UINT_32 isIceland         : 1;
+    UINT_32 isTonga           : 1;
+    UINT_32 isFiji            : 1;
+    UINT_32 isPolaris10       : 1;
+    UINT_32 isPolaris11       : 1;
+    UINT_32 isPolaris12       : 1;
+    UINT_32 isVegaM           : 1;
+    UINT_32 isCarrizo         : 1;
+};
+
+/**
+****************************************************************************************************
+* @brief This class is the SI specific address library
+*        function set.
+****************************************************************************************************
+*/
+class SiLib : public EgBasedLib
+{
+public:
+    /// Creates SiLib object
+    static Addr::Lib* CreateObj(const Client* pClient)
+    {
+        VOID* pMem = Object::ClientAlloc(sizeof(SiLib), pClient);
+        return (pMem != NULL) ? new (pMem) SiLib(pClient) : NULL;
+    }
+
+protected:
+    SiLib(const Client* pClient);
+    virtual ~SiLib();
+
+    // Hwl interface - defined in AddrLib1
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
+
+    virtual VOID HwlComputeXmaskCoordFromAddr(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const;
+
+    virtual BOOL_32 HwlComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    virtual ChipFamily HwlConvertChipFamily(
+        UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    virtual BOOL_32 HwlInitGlobalParams(
+        const ADDR_CREATE_INPUT* pCreateIn);
+
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
+        ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_64 HwlComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
+
+    virtual ADDR_E_RETURNCODE ComputeBankEquation(
+        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
+        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
+
+    virtual ADDR_E_RETURNCODE ComputePipeEquation(
+        UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
+        ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
+
+    virtual UINT_32 ComputePipeFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice,
+        AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
+
+    /// Pre-handler of 3x pitch (96 bit) adjustment
+    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Post-handler of 3x pitch adjustment
+    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+
+    /// Dummy function to finalize the inheritance
+    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+        UINT_32 pipe, UINT_32 x) const;
+
+    // Sub-hwl interface - defined in EgBasedLib
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+    virtual VOID HwlCheckLastMacroTiledLvl(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual BOOL_32 HwlTileInfoEqual(
+        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    /// Overwrite tile setting to PRT
+    virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
+
+    virtual BOOL_32 HwlSanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const
+    {
+        return TRUE;
+    }
+
+    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentLinear(
+        AddrTileMode tileMode,
+        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
+
+    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_32 HwlPreAdjustBank(
+        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const;
+
+    virtual VOID HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+    virtual VOID HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+    virtual UINT_32 HwlComputeFmaskBits(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        UINT_32* pNumSamples) const;
+
+    virtual BOOL_32 HwlReduceBankWidthHeight(
+        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 bankHeightAlign, UINT_32 pipes,
+        ADDR_TILEINFO* pTileInfo) const
+    {
+        return TRUE;
+    }
+
+    virtual UINT_32 HwlComputeMaxBaseAlignments() const;
+
+    virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
+
+    virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    // Get equation table pointer and number of equations
+    virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
+    {
+        *ppEquationTable = m_equationTable;
+
+        return m_numEquations;
+    }
+
+    // Check if it is supported for given bpp and tile config to generate an equation
+    BOOL_32 IsEquationSupported(
+        UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
+
+    // Protected non-virtual functions
+    VOID ComputeTileCoordFromPipeAndElemIdx(
+        UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
+        UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
+
+    UINT_32 TileCoordToMaskElementIndex(
+        UINT_32 tx, UINT_32 ty, AddrPipeCfg  pipeConfig,
+        UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
+
+    BOOL_32 DecodeGbRegs(
+        const ADDR_REGISTER_VALUE* pRegValue);
+
+    const TileConfig* GetTileSetting(
+        UINT_32 index) const;
+
+    // Initialize equation table
+    VOID InitEquationTable();
+
+    UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
+
+    static const UINT_32    TileTableSize = 32;
+    TileConfig              m_tileTable[TileTableSize];
+    UINT_32                 m_noOfEntries;
+
+    // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
+    static const UINT_32    MaxNumElementBytes  = 5;
+
+    static const BOOL_32    m_EquationSupport[TileTableSize][MaxNumElementBytes];
+
+    // Prt tile mode index mask
+    static const UINT_32    SiPrtTileIndexMask = ((1 << 3)  | (1 << 5)  | (1 << 6)  | (1 << 7)  |
+                                                  (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
+                                                  (1 << 25) | (1 << 30));
+
+    // More than half slots in tile mode table can't support equation
+    static const UINT_32    EquationTableSize   = (MaxNumElementBytes * TileTableSize) / 2;
+    // Equation table
+    ADDR_EQUATION           m_equationTable[EquationTableSize];
+    UINT_32                 m_numMacroBits[EquationTableSize];
+    UINT_32                 m_blockWidth[EquationTableSize];
+    UINT_32                 m_blockHeight[EquationTableSize];
+    UINT_32                 m_blockSlices[EquationTableSize];
+    // Number of equation entries in the table
+    UINT_32                 m_numEquations;
+    // Equation lookup table according to bpp and tile index
+    UINT_32                 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
+
+    UINT_32                 m_uncompressDepthEqIndex;
+
+    SiChipSettings          m_settings;
+
+private:
+
+    VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;
+    BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries);
+};
+
+} // V1
+} // Addr
+
+#endif
+
diff -Nru mesa-18.3.3/src/amd/Android.addrlib.mk mesa-19.0.1/src/amd/Android.addrlib.mk
--- mesa-18.3.3/src/amd/Android.addrlib.mk	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/amd/Android.addrlib.mk	2019-03-31 23:16:37.000000000 +0000
@@ -33,12 +33,11 @@
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/amd/common \
-	$(MESA_TOP)/src/amd/addrlib \
-	$(MESA_TOP)/src/amd/addrlib/core \
-	$(MESA_TOP)/src/amd/addrlib/inc/chip/gfx9 \
-	$(MESA_TOP)/src/amd/addrlib/inc/chip/r800 \
-	$(MESA_TOP)/src/amd/addrlib/gfx9/chip \
-	$(MESA_TOP)/src/amd/addrlib/r800/chip
+	$(MESA_TOP)/src/amd/addrlib/inc \
+	$(MESA_TOP)/src/amd/addrlib/src \
+	$(MESA_TOP)/src/amd/addrlib/src/core \
+	$(MESA_TOP)/src/amd/addrlib/src/chip/gfx9 \
+	$(MESA_TOP)/src/amd/addrlib/src/chip/r800
 
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
 	$(LOCAL_PATH) \
diff -Nru mesa-18.3.3/src/amd/common/ac_debug.c mesa-19.0.1/src/amd/common/ac_debug.c
--- mesa-18.3.3/src/amd/common/ac_debug.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -233,6 +233,7 @@
 		if (op == PKT3_SET_CONTEXT_REG ||
 		    op == PKT3_SET_CONFIG_REG ||
 		    op == PKT3_SET_UCONFIG_REG ||
+		    op == PKT3_SET_UCONFIG_REG_INDEX ||
 		    op == PKT3_SET_SH_REG)
 			fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n",
 				name, predicate);
@@ -252,6 +253,7 @@
 		ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib);
 		break;
 	case PKT3_SET_UCONFIG_REG:
+	case PKT3_SET_UCONFIG_REG_INDEX:
 		ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib);
 		break;
 	case PKT3_SET_SH_REG:
diff -Nru mesa-18.3.3/src/amd/common/ac_gpu_info.c mesa-19.0.1/src/amd/common/ac_gpu_info.c
--- mesa-18.3.3/src/amd/common/ac_gpu_info.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_gpu_info.c	2019-03-31 23:16:37.000000000 +0000
@@ -455,7 +455,7 @@
 	ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
 	ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
 	ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment);
-       assert(ib_align);
+	assert(ib_align);
 	info->ib_start_alignment = ib_align;
 
 	return true;
diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_build.c mesa-19.0.1/src/amd/common/ac_llvm_build.c
--- mesa-18.3.3/src/amd/common/ac_llvm_build.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_llvm_build.c	2019-03-31 23:16:37.000000000 +0000
@@ -75,7 +75,7 @@
 	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
 	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
 	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
-	ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
+	ctx->intptr = ctx->i32;
 	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
 	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
@@ -229,6 +229,15 @@
 	return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
 }
 
+LLVMValueRef
+ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+	LLVMTypeRef type = LLVMTypeOf(v);
+	if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
+		return v;
+	return ac_to_integer(ctx, v);
+}
+
 static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
 {
 	if (t == ctx->i16 || t == ctx->f16)
@@ -897,6 +906,37 @@
 }
 
 LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+		       LLVMValueRef llvm_chan,
+		       LLVMValueRef attr_number,
+		       LLVMValueRef params,
+		       LLVMValueRef i,
+		       LLVMValueRef j)
+{
+	LLVMValueRef args[6];
+	LLVMValueRef p1;
+
+	args[0] = i;
+	args[1] = llvm_chan;
+	args[2] = attr_number;
+	args[3] = ctx->i1false;
+	args[4] = params;
+
+	p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+				ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+
+	args[0] = p1;
+	args[1] = j;
+	args[2] = llvm_chan;
+	args[3] = attr_number;
+	args[4] = ctx->i1false;
+	args[5] = params;
+
+	return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+				  ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 		       LLVMValueRef parameter,
 		       LLVMValueRef llvm_chan,
@@ -915,6 +955,14 @@
 }
 
 LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+	         LLVMValueRef base_ptr,
+	         LLVMValueRef index)
+{
+	return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+}
+
+LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef base_ptr,
 	      LLVMValueRef index)
@@ -1161,6 +1209,47 @@
 				  ac_get_load_intr_attribs(can_speculate));
 }
 
+static LLVMValueRef
+ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
+				  LLVMValueRef rsrc,
+				  LLVMValueRef vindex,
+				  LLVMValueRef voffset,
+				  LLVMValueRef soffset,
+				  unsigned num_channels,
+				  bool glc,
+				  bool slc,
+				  bool can_speculate,
+				  bool use_format,
+				  bool structurized)
+{
+	LLVMValueRef args[5];
+	int idx = 0;
+	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+	if (structurized)
+		args[idx++] = vindex ? vindex : ctx->i32_0;
+	args[idx++] = voffset ? voffset : ctx->i32_0;
+	args[idx++] = soffset ? soffset : ctx->i32_0;
+	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+	LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
+	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	const char *indexing_kind = structurized ? "struct" : "raw";
+	char name[256];
+
+	if (use_format) {
+		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
+			 indexing_kind, type_names[func]);
+	} else {
+		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
+			 indexing_kind, type_names[func]);
+	}
+
+	return ac_build_intrinsic(ctx, name, types[func], args,
+				  idx,
+				  ac_get_load_intr_attribs(can_speculate));
+}
+
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
 		     LLVMValueRef rsrc,
@@ -1180,8 +1269,8 @@
 	if (soffset)
 		offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
 
-	/* TODO: VI and later generations can use SMEM with GLC=1.*/
-	if (allow_smem && !glc && !slc) {
+	if (allow_smem && !slc &&
+	    (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) {
 		assert(vindex == NULL);
 
 		LLVMValueRef result[8];
@@ -1191,11 +1280,19 @@
 				offset = LLVMBuildAdd(ctx->builder, offset,
 						      LLVMConstInt(ctx->i32, 4, 0), "");
 			}
-			LLVMValueRef args[2] = {rsrc, offset};
-			result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
-						       ctx->f32, args, 2,
+			const char *intrname =
+				HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32"
+						    : "llvm.SI.load.const.v4i32";
+			unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2;
+			LLVMValueRef args[3] = {
+				rsrc,
+				offset,
+				glc ? ctx->i32_1 : ctx->i32_0,
+			};
+			result[i] = ac_build_intrinsic(ctx, intrname,
+						       ctx->f32, args, num_args,
 						       AC_FUNC_ATTR_READNONE |
-						       AC_FUNC_ATTR_LEGACY);
+						       (HAVE_LLVM < 0x0800 ? AC_FUNC_ATTR_LEGACY : 0));
 		}
 		if (num_channels == 1)
 			return result[0];
@@ -1218,6 +1315,11 @@
 					 bool glc,
 					 bool can_speculate)
 {
+	if (HAVE_LLVM >= 0x800) {
+		return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
+							 num_channels, glc, false,
+							 can_speculate, true, true);
+	}
 	return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
 					   num_channels, glc, false,
 					   can_speculate, true);
@@ -1231,6 +1333,12 @@
                                                   bool glc,
                                                   bool can_speculate)
 {
+	if (HAVE_LLVM >= 0x800) {
+		return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
+							 num_channels, glc, false,
+							 can_speculate, true, true);
+	}
+
 	LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
 	LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, "");
 	stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
@@ -1342,99 +1450,28 @@
 	      int idx,
 	      LLVMValueRef val)
 {
-	LLVMValueRef tl, trbl, args[2];
+	unsigned tl_lanes[4], trbl_lanes[4];
+	LLVMValueRef tl, trbl;
 	LLVMValueRef result;
 
-	if (HAVE_LLVM >= 0x0700) {
-		unsigned tl_lanes[4], trbl_lanes[4];
-
-		for (unsigned i = 0; i < 4; ++i) {
-			tl_lanes[i] = i & mask;
-			trbl_lanes[i] = (i & mask) + idx;
-		}
-
-		tl = ac_build_quad_swizzle(ctx, val,
-		                           tl_lanes[0], tl_lanes[1],
-		                           tl_lanes[2], tl_lanes[3]);
-		trbl = ac_build_quad_swizzle(ctx, val,
-		                             trbl_lanes[0], trbl_lanes[1],
-		                             trbl_lanes[2], trbl_lanes[3]);
-	} else if (ctx->chip_class >= VI) {
-		LLVMValueRef thread_id, tl_tid, trbl_tid;
-		thread_id = ac_get_thread_id(ctx);
-
-		tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
-				      LLVMConstInt(ctx->i32, mask, false), "");
-
-		trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-					LLVMConstInt(ctx->i32, idx, false), "");
-
-		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
-				       LLVMConstInt(ctx->i32, 4, false), "");
-		args[1] = val;
-		tl = ac_build_intrinsic(ctx,
-					"llvm.amdgcn.ds.bpermute", ctx->i32,
-					args, 2,
-					AC_FUNC_ATTR_READNONE |
-					AC_FUNC_ATTR_CONVERGENT);
-
-		args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
-				       LLVMConstInt(ctx->i32, 4, false), "");
-		trbl = ac_build_intrinsic(ctx,
-					  "llvm.amdgcn.ds.bpermute", ctx->i32,
-					  args, 2,
-					  AC_FUNC_ATTR_READNONE |
-					  AC_FUNC_ATTR_CONVERGENT);
-	} else {
-		uint32_t masks[2] = {};
-
-		switch (mask) {
-		case AC_TID_MASK_TOP_LEFT:
-			masks[0] = 0x8000;
-			if (idx == 1)
-				masks[1] = 0x8055;
-			else
-				masks[1] = 0x80aa;
-
-			break;
-		case AC_TID_MASK_TOP:
-			masks[0] = 0x8044;
-			masks[1] = 0x80ee;
-			break;
-		case AC_TID_MASK_LEFT:
-			masks[0] = 0x80a0;
-			masks[1] = 0x80f5;
-			break;
-		default:
-			assert(0);
-		}
-
-		args[0] = val;
-		args[1] = LLVMConstInt(ctx->i32, masks[0], false);
-
-		tl = ac_build_intrinsic(ctx,
-					"llvm.amdgcn.ds.swizzle", ctx->i32,
-					args, 2,
-					AC_FUNC_ATTR_READNONE |
-					AC_FUNC_ATTR_CONVERGENT);
-
-		args[1] = LLVMConstInt(ctx->i32, masks[1], false);
-		trbl = ac_build_intrinsic(ctx,
-					"llvm.amdgcn.ds.swizzle", ctx->i32,
-					args, 2,
-					AC_FUNC_ATTR_READNONE |
-					AC_FUNC_ATTR_CONVERGENT);
+	for (unsigned i = 0; i < 4; ++i) {
+		tl_lanes[i] = i & mask;
+		trbl_lanes[i] = (i & mask) + idx;
 	}
 
+	tl = ac_build_quad_swizzle(ctx, val,
+				   tl_lanes[0], tl_lanes[1],
+				   tl_lanes[2], tl_lanes[3]);
+	trbl = ac_build_quad_swizzle(ctx, val,
+				     trbl_lanes[0], trbl_lanes[1],
+				     trbl_lanes[2], trbl_lanes[3]);
+
 	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
 	trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
 	result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
 
-	if (HAVE_LLVM >= 0x0700) {
-		result = ac_build_intrinsic(ctx,
-			"llvm.amdgcn.wqm.f32", ctx->f32,
-			&result, 1, 0);
-	}
+	result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
+				    &result, 1, 0);
 
 	return result;
 }
@@ -1679,171 +1716,6 @@
 	unreachable("bad atomic op");
 }
 
-/* LLVM 6 and older */
-static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx,
-						struct ac_image_args *a)
-{
-	LLVMValueRef args[16];
-	LLVMTypeRef retty = ctx->v4f32;
-	const char *name = NULL;
-	const char *atomic_subop = "";
-	char intr_name[128], coords_type[64];
-
-	bool sample = a->opcode == ac_image_sample ||
-		      a->opcode == ac_image_gather4 ||
-		      a->opcode == ac_image_get_lod;
-	bool atomic = a->opcode == ac_image_atomic ||
-		      a->opcode == ac_image_atomic_cmpswap;
-	bool da = a->dim == ac_image_cube ||
-		  a->dim == ac_image_1darray ||
-		  a->dim == ac_image_2darray ||
-		  a->dim == ac_image_2darraymsaa;
-	if (a->opcode == ac_image_get_lod)
-		da = false;
-
-	unsigned num_coords =
-		a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
-	LLVMValueRef addr;
-	unsigned num_addr = 0;
-
-	if (a->opcode == ac_image_get_lod) {
-		switch (a->dim) {
-		case ac_image_1darray:
-			num_coords = 1;
-			break;
-		case ac_image_2darray:
-		case ac_image_cube:
-			num_coords = 2;
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (a->offset)
-		args[num_addr++] = ac_to_integer(ctx, a->offset);
-	if (a->bias)
-		args[num_addr++] = ac_to_integer(ctx, a->bias);
-	if (a->compare)
-		args[num_addr++] = ac_to_integer(ctx, a->compare);
-	if (a->derivs[0]) {
-		unsigned num_derivs = ac_num_derivs(a->dim);
-		for (unsigned i = 0; i < num_derivs; ++i)
-			args[num_addr++] = ac_to_integer(ctx, a->derivs[i]);
-	}
-	for (unsigned i = 0; i < num_coords; ++i)
-		args[num_addr++] = ac_to_integer(ctx, a->coords[i]);
-	if (a->lod)
-		args[num_addr++] = ac_to_integer(ctx, a->lod);
-
-	unsigned pad_goal = util_next_power_of_two(num_addr);
-	while (num_addr < pad_goal)
-		args[num_addr++] = LLVMGetUndef(ctx->i32);
-
-	addr = ac_build_gather_values(ctx, args, num_addr);
-
-	unsigned num_args = 0;
-	if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
-		args[num_args++] = a->data[0];
-		if (a->opcode == ac_image_atomic_cmpswap)
-			args[num_args++] = a->data[1];
-	}
-
-	unsigned coords_arg = num_args;
-	if (sample)
-		args[num_args++] = ac_to_float(ctx, addr);
-	else
-		args[num_args++] = ac_to_integer(ctx, addr);
-
-	args[num_args++] = a->resource;
-	if (sample)
-		args[num_args++] = a->sampler;
-	if (!atomic) {
-		args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
-		if (sample)
-			args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
-		args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false;
-		args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
-		args[num_args++] = ctx->i1false; /* lwe */
-		args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
-	} else {
-		args[num_args++] = ctx->i1false; /* r128 */
-		args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
-		args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
-	}
-
-	switch (a->opcode) {
-	case ac_image_sample:
-		name = "llvm.amdgcn.image.sample";
-		break;
-	case ac_image_gather4:
-		name = "llvm.amdgcn.image.gather4";
-		break;
-	case ac_image_load:
-		name = "llvm.amdgcn.image.load";
-		break;
-	case ac_image_load_mip:
-		name = "llvm.amdgcn.image.load.mip";
-		break;
-	case ac_image_store:
-		name = "llvm.amdgcn.image.store";
-		retty = ctx->voidt;
-		break;
-	case ac_image_store_mip:
-		name = "llvm.amdgcn.image.store.mip";
-		retty = ctx->voidt;
-		break;
-	case ac_image_atomic:
-	case ac_image_atomic_cmpswap:
-		name = "llvm.amdgcn.image.atomic.";
-		retty = ctx->i32;
-		if (a->opcode == ac_image_atomic_cmpswap) {
-			atomic_subop = "cmpswap";
-		} else {
-			atomic_subop = get_atomic_name(a->atomic);
-		}
-		break;
-	case ac_image_get_lod:
-		name = "llvm.amdgcn.image.getlod";
-		break;
-	case ac_image_get_resinfo:
-		name = "llvm.amdgcn.image.getresinfo";
-		break;
-	default:
-		unreachable("invalid image opcode");
-	}
-
-	ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type,
-				    sizeof(coords_type));
-
-	if (atomic) {
-		snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s",
-			 atomic_subop, coords_type);
-	} else {
-		bool lod_suffix =
-			a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
-
-		snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
-			name,
-			a->compare ? ".c" : "",
-			a->bias ? ".b" :
-			lod_suffix ? ".l" :
-			a->derivs[0] ? ".d" :
-			a->level_zero ? ".lz" : "",
-			a->offset ? ".o" : "",
-			coords_type);
-	}
-
-	LLVMValueRef result =
-		ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
-				   a->attributes);
-	if (!sample && retty == ctx->v4f32) {
-		result = LLVMBuildBitCast(ctx->builder, result,
-					  ctx->v4i32, "");
-	}
-	return result;
-}
-
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
 				   struct ac_image_args *a)
 {
@@ -1868,9 +1740,6 @@
 	       (a->level_zero ? 1 : 0) +
 	       (a->derivs[0] ? 1 : 0) <= 1);
 
-	if (HAVE_LLVM < 0x0700)
-		return ac_build_image_opcode_llvm6(ctx, a);
-
 	if (a->opcode == ac_image_get_lod) {
 		switch (dim) {
 		case ac_image_1darray:
@@ -2659,9 +2528,6 @@
 
 LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
 {
-	if (!HAVE_32BIT_POINTERS)
-		return ac_array_in_const_addr_space(elem_type);
-
 	return LLVMPointerType(LLVMArrayType(elem_type, 0),
 			       AC_ADDR_SPACE_CONST_32BIT);
 }
@@ -2807,8 +2673,7 @@
 	ctx->flow_depth--;
 }
 
-static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond,
-			 int label_id)
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
 {
 	struct ac_llvm_flow *flow = push_flow(ctx);
 	LLVMBasicBlockRef if_block;
@@ -2825,7 +2690,7 @@
 {
 	LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
 					  value, ctx->f32_0, "");
-	if_cond_emit(ctx, cond, label_id);
+	ac_build_ifcc(ctx, cond, label_id);
 }
 
 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
@@ -2834,7 +2699,7 @@
 	LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
 					  ac_to_integer(ctx, value),
 					  ctx->i32_0, "");
-	if_cond_emit(ctx, cond, label_id);
+	ac_build_ifcc(ctx, cond, label_id);
 }
 
 LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
@@ -3294,24 +3159,44 @@
 	}
 }
 
-/* TODO: add inclusive and excluse scan functions for SI chip class.  */
+/**
+ * \param maxprefix specifies that the result only needs to be correct for a
+ *     prefix of this many threads
+ *
+ * TODO: add inclusive and excluse scan functions for SI chip class.
+ */
 static LLVMValueRef
-ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity)
+ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
+	      unsigned maxprefix)
 {
 	LLVMValueRef result, tmp;
 	result = src;
+	if (maxprefix <= 1)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 2)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 3)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 4)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 8)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 16)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
+	if (maxprefix <= 32)
+		return result;
 	tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
 	result = ac_build_alu_op(ctx, result, tmp, op);
 	return result;
@@ -3320,14 +3205,24 @@
 LLVMValueRef
 ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
 {
-	ac_build_optimization_barrier(ctx, &src);
 	LLVMValueRef result;
-	LLVMValueRef identity = get_reduction_identity(ctx, op,
-								ac_get_type_size(LLVMTypeOf(src)));
-	result = LLVMBuildBitCast(ctx->builder,
-								ac_build_set_inactive(ctx, src, identity),
-								LLVMTypeOf(identity), "");
-	result = ac_build_scan(ctx, op, result, identity);
+
+	if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+		LLVMBuilderRef builder = ctx->builder;
+		src = LLVMBuildZExt(builder, src, ctx->i32, "");
+		result = ac_build_ballot(ctx, src);
+		result = ac_build_mbcnt(ctx, result);
+		result = LLVMBuildAdd(builder, result, src, "");
+		return result;
+	}
+
+	ac_build_optimization_barrier(ctx, &src);
+
+	LLVMValueRef identity =
+		get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+	result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+				  LLVMTypeOf(identity), "");
+	result = ac_build_scan(ctx, op, result, identity, 64);
 
 	return ac_build_wwm(ctx, result);
 }
@@ -3335,15 +3230,24 @@
 LLVMValueRef
 ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
 {
-	ac_build_optimization_barrier(ctx, &src);
 	LLVMValueRef result;
-	LLVMValueRef identity = get_reduction_identity(ctx, op,
-								ac_get_type_size(LLVMTypeOf(src)));
-	result = LLVMBuildBitCast(ctx->builder,
-								ac_build_set_inactive(ctx, src, identity),
-								LLVMTypeOf(identity), "");
+
+	if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+		LLVMBuilderRef builder = ctx->builder;
+		src = LLVMBuildZExt(builder, src, ctx->i32, "");
+		result = ac_build_ballot(ctx, src);
+		result = ac_build_mbcnt(ctx, result);
+		return result;
+	}
+
+	ac_build_optimization_barrier(ctx, &src);
+
+	LLVMValueRef identity =
+		get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+	result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+				  LLVMTypeOf(identity), "");
 	result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false);
-	result = ac_build_scan(ctx, op, result, identity);
+	result = ac_build_scan(ctx, op, result, identity, 64);
 
 	return ac_build_wwm(ctx, result);
 }
@@ -3401,6 +3305,175 @@
 	}
 }
 
+/**
+ * "Top half" of a scan that reduces per-wave values across an entire
+ * workgroup.
+ *
+ * The source value must be present in the highest lane of the wave, and the
+ * highest lane must be live.
+ */
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	if (ws->maxwaves <= 1)
+		return;
+
+	const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false);
+	LLVMBuilderRef builder = ctx->builder;
+	LLVMValueRef tid = ac_get_thread_id(ctx);
+	LLVMValueRef tmp;
+
+	tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, "");
+	ac_build_ifcc(ctx, tmp, 1000);
+	LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
+	ac_build_endif(ctx, 1000);
+}
+
+/**
+ * "Bottom half" of a scan that reduces per-wave values across an entire
+ * workgroup.
+ *
+ * The caller must place a barrier between the top and bottom halves.
+ */
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	const LLVMTypeRef type = LLVMTypeOf(ws->src);
+	const LLVMValueRef identity =
+		get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
+
+	if (ws->maxwaves <= 1) {
+		ws->result_reduce = ws->src;
+		ws->result_inclusive = ws->src;
+		ws->result_exclusive = identity;
+		return;
+	}
+	assert(ws->maxwaves <= 32);
+
+	LLVMBuilderRef builder = ctx->builder;
+	LLVMValueRef tid = ac_get_thread_id(ctx);
+	LLVMBasicBlockRef bbs[2];
+	LLVMValueRef phivalues_scan[2];
+	LLVMValueRef tmp, tmp2;
+
+	bbs[0] = LLVMGetInsertBlock(builder);
+	phivalues_scan[0] = LLVMGetUndef(type);
+
+	if (ws->enable_reduce)
+		tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
+	else if (ws->enable_inclusive)
+		tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
+	else
+		tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
+	ac_build_ifcc(ctx, tmp, 1001);
+	{
+		tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
+
+		ac_build_optimization_barrier(ctx, &tmp);
+
+		bbs[1] = LLVMGetInsertBlock(builder);
+		phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves);
+	}
+	ac_build_endif(ctx, 1001);
+
+	const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
+
+	if (ws->enable_reduce) {
+		tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
+		ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
+	}
+	if (ws->enable_inclusive)
+		ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
+	if (ws->enable_exclusive) {
+		tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
+		tmp = ac_build_readlane(ctx, scan, tmp);
+		tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
+		ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
+	}
+}
+
+/**
+ * Inclusive scan of a per-wave value across an entire workgroup.
+ *
+ * This implies an s_barrier instruction.
+ *
+ * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
+ * of the workgroup are live. (This requirement cannot easily be relaxed in a
+ * useful manner because of the barrier in the algorithm.)
+ */
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	ac_build_wg_wavescan_top(ctx, ws);
+	ac_build_s_barrier(ctx);
+	ac_build_wg_wavescan_bottom(ctx, ws);
+}
+
+/**
+ * "Top half" of a scan that reduces per-thread values across an entire
+ * workgroup.
+ *
+ * All lanes must be active when this code runs.
+ */
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	if (ws->enable_exclusive) {
+		ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
+		if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
+			ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
+		ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
+	} else {
+		ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
+	}
+
+	bool enable_inclusive = ws->enable_inclusive;
+	bool enable_exclusive = ws->enable_exclusive;
+	ws->enable_inclusive = false;
+	ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+	ac_build_wg_wavescan_top(ctx, ws);
+	ws->enable_inclusive = enable_inclusive;
+	ws->enable_exclusive = enable_exclusive;
+}
+
+/**
+ * "Bottom half" of a scan that reduces per-thread values across an entire
+ * workgroup.
+ *
+ * The caller must place a barrier between the top and bottom halves.
+ */
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	bool enable_inclusive = ws->enable_inclusive;
+	bool enable_exclusive = ws->enable_exclusive;
+	ws->enable_inclusive = false;
+	ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+	ac_build_wg_wavescan_bottom(ctx, ws);
+	ws->enable_inclusive = enable_inclusive;
+	ws->enable_exclusive = enable_exclusive;
+
+	/* ws->result_reduce is already the correct value */
+	if (ws->enable_inclusive)
+		ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op);
+	if (ws->enable_exclusive)
+		ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
+}
+
+/**
+ * A scan that reduces per-thread values across an entire workgroup.
+ *
+ * The caller must ensure that all lanes are active when this code runs
+ * (WWM is insufficient!), because there is an implied barrier.
+ */
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+	ac_build_wg_scan_top(ctx, ws);
+	ac_build_s_barrier(ctx);
+	ac_build_wg_scan_bottom(ctx, ws);
+}
+
 LLVMValueRef
 ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
 		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_build.h mesa-19.0.1/src/amd/common/ac_llvm_build.h
--- mesa-18.3.3/src/amd/common/ac_llvm_build.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_llvm_build.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,14 +34,12 @@
 extern "C" {
 #endif
 
-#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
-
 enum {
-	AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+	AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
 	AC_ADDR_SPACE_GLOBAL = 1,
-	AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+	AC_ADDR_SPACE_GDS = 2,
 	AC_ADDR_SPACE_LDS = 3,
-	AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+	AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
 	AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
@@ -128,6 +126,7 @@
 
 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
 
@@ -218,6 +217,14 @@
 		   LLVMValueRef j);
 
 LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+		       LLVMValueRef llvm_chan,
+		       LLVMValueRef attr_number,
+		       LLVMValueRef params,
+		       LLVMValueRef i,
+		       LLVMValueRef j);
+
+LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 		       LLVMValueRef parameter,
 		       LLVMValueRef llvm_chan,
@@ -225,6 +232,11 @@
 		       LLVMValueRef params);
 
 LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+	         LLVMValueRef base_ptr,
+	         LLVMValueRef index);
+
+LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef base_ptr,
 	      LLVMValueRef index);
@@ -481,6 +493,7 @@
 void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
 void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
 void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
 void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
 		 int lable_id);
 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
@@ -524,6 +537,42 @@
 LLVMValueRef
 ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
 
+/**
+ * Common arguments for a scan/reduce operation that accumulates per-wave
+ * values across an entire workgroup, while respecting the order of waves.
+ */
+struct ac_wg_scan {
+	bool enable_reduce;
+	bool enable_exclusive;
+	bool enable_inclusive;
+	nir_op op;
+	LLVMValueRef src; /* clobbered! */
+	LLVMValueRef result_reduce;
+	LLVMValueRef result_exclusive;
+	LLVMValueRef result_inclusive;
+	LLVMValueRef extra;
+	LLVMValueRef waveidx;
+	LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+	/* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+	LLVMValueRef scratch;
+	unsigned maxwaves;
+};
+
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
 LLVMValueRef
 ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
 		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_helper.cpp mesa-19.0.1/src/amd/common/ac_llvm_helper.cpp
--- mesa-18.3.3/src/amd/common/ac_llvm_helper.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_llvm_helper.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -39,9 +39,6 @@
 #include <llvm/Transforms/IPO.h>
 
 #include <llvm/IR/LegacyPassManager.h>
-#if HAVE_LLVM < 0x0700
-#include "llvm/Support/raw_ostream.h"
-#endif
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
@@ -132,9 +129,7 @@
 	llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
 
 	if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
-#if HAVE_LLVM >= 0x0700
 				    nullptr,
-#endif
 				    llvm::TargetMachine::CGFT_ObjectFile)) {
 		fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
 		delete p;
@@ -170,7 +165,5 @@
 
 void ac_enable_global_isel(LLVMTargetMachineRef tm)
 {
-#if HAVE_LLVM >= 0x0700
   reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
-#endif
 }
diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_util.c mesa-19.0.1/src/amd/common/ac_llvm_util.c
--- mesa-18.3.3/src/amd/common/ac_llvm_util.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_llvm_util.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,9 +30,7 @@
 #include <llvm-c/Support.h>
 #include <llvm-c/Transforms/IPO.h>
 #include <llvm-c/Transforms/Scalar.h>
-#if HAVE_LLVM >= 0x0700
 #include <llvm-c/Transforms/Utils.h>
-#endif
 #include "c11/threads.h"
 #include "gallivm/lp_bld_misc.h"
 #include "util/u_math.h"
@@ -132,11 +130,11 @@
 	case CHIP_RAVEN:
 		return "gfx902";
 	case CHIP_VEGA12:
-		return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
+		return "gfx904";
 	case CHIP_VEGA20:
-		return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
+		return "gfx906";
 	case CHIP_RAVEN2:
-		return "gfx902"; /* TODO: use gfx909 when it's available */
+		return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902";
 	default:
 		return "";
 	}
@@ -153,7 +151,8 @@
 	LLVMTargetRef target = ac_get_llvm_target(triple);
 
 	snprintf(features, sizeof(features),
-		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
+		 "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+		 HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
 		 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
 		 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
 		 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
@@ -302,7 +301,6 @@
 
 bool
 ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
-		      bool okay_to_leak_target_library_info,
 		      enum radeon_family family,
 		      enum ac_target_machine_options tm_options)
 {
@@ -323,12 +321,10 @@
 			goto fail;
 	}
 
-	if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
-		compiler->target_library_info =
-			ac_create_target_library_info(triple);
-		if (!compiler->target_library_info)
-			goto fail;
-	}
+	compiler->target_library_info =
+		ac_create_target_library_info(triple);
+	if (!compiler->target_library_info)
+		goto fail;
 
 	compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
 					      tm_options & AC_TM_CHECK_IR);
@@ -346,11 +342,8 @@
 {
 	if (compiler->passmgr)
 		LLVMDisposePassManager(compiler->passmgr);
-#if HAVE_LLVM >= 0x0700
-	/* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */
 	if (compiler->target_library_info)
 		ac_dispose_target_library_info(compiler->target_library_info);
-#endif
 	if (compiler->low_opt_tm)
 		LLVMDisposeTargetMachine(compiler->low_opt_tm);
 	if (compiler->tm)
diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_util.h mesa-19.0.1/src/amd/common/ac_llvm_util.h
--- mesa-18.3.3/src/amd/common/ac_llvm_util.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_llvm_util.h	2019-03-31 23:16:37.000000000 +0000
@@ -134,7 +134,6 @@
 
 
 bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
-			   bool okay_to_leak_target_library_info,
 			   enum radeon_family family,
 			   enum ac_target_machine_options tm_options);
 void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
diff -Nru mesa-18.3.3/src/amd/common/ac_nir_to_llvm.c mesa-19.0.1/src/amd/common/ac_nir_to_llvm.c
--- mesa-18.3.3/src/amd/common/ac_nir_to_llvm.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_nir_to_llvm.c	2019-03-31 23:16:37.000000000 +0000
@@ -270,8 +270,9 @@
 {
 	LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
 				       ctx->i32_0, "");
-	return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1),
-			       ac_to_integer(ctx, src2), "");
+	return LLVMBuildSelect(ctx->builder, v,
+			       ac_to_integer_or_pointer(ctx, src1),
+			       ac_to_integer_or_pointer(ctx, src2), "");
 }
 
 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
@@ -428,12 +429,12 @@
 {
 	LLVMValueRef result;
 
-	if (HAVE_LLVM < 0x0700) {
+	if (HAVE_LLVM >= 0x0800) {
 		LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
 		result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
 		result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
 	} else {
-		/* FIXME: LLVM 7 returns incorrect result when count is 0.
+		/* FIXME: LLVM 7+ returns incorrect result when count is 0.
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
 		 */
 		LLVMValueRef zero = ctx->i32_0;
@@ -686,34 +687,34 @@
 						     LLVMTypeOf(src[0]), ""),
 				       "");
 		break;
-	case nir_op_ilt:
+	case nir_op_ilt32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
 		break;
-	case nir_op_ine:
+	case nir_op_ine32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
 		break;
-	case nir_op_ieq:
+	case nir_op_ieq32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
 		break;
-	case nir_op_ige:
+	case nir_op_ige32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
 		break;
-	case nir_op_ult:
+	case nir_op_ult32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
 		break;
-	case nir_op_uge:
+	case nir_op_uge32:
 		result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
 		break;
-	case nir_op_feq:
+	case nir_op_feq32:
 		result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
 		break;
-	case nir_op_fne:
+	case nir_op_fne32:
 		result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
 		break;
-	case nir_op_flt:
+	case nir_op_flt32:
 		result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
 		break;
-	case nir_op_fge:
+	case nir_op_fge32:
 		result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
 		break;
 	case nir_op_fabs:
@@ -915,7 +916,7 @@
 		else
 			result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
 		break;
-	case nir_op_bcsel:
+	case nir_op_b32csel:
 		result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
 		break;
 	case nir_op_find_lsb:
@@ -940,16 +941,20 @@
 		src[1] = ac_to_integer(&ctx->ac, src[1]);
 		result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
 		break;
-	case nir_op_b2f:
+	case nir_op_b2f16:
+	case nir_op_b2f32:
+	case nir_op_b2f64:
 		result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 		break;
-	case nir_op_f2b:
+	case nir_op_f2b32:
 		result = emit_f2b(&ctx->ac, src[0]);
 		break;
-	case nir_op_b2i:
+	case nir_op_b2i16:
+	case nir_op_b2i32:
+	case nir_op_b2i64:
 		result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 		break;
-	case nir_op_i2b:
+	case nir_op_i2b32:
 		src[0] = ac_to_integer(&ctx->ac, src[0]);
 		result = emit_i2b(&ctx->ac, src[0]);
 		break;
@@ -1095,7 +1100,7 @@
 
 	if (result) {
 		assert(instr->dest.dest.is_ssa);
-		result = ac_to_integer(&ctx->ac, result);
+		result = ac_to_integer_or_pointer(&ctx->ac, result);
 		ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
 	}
 }
@@ -1458,6 +1463,30 @@
 	}
 }
 
+static unsigned get_cache_policy(struct ac_nir_context *ctx,
+				 enum gl_access_qualifier access,
+				 bool may_store_unaligned,
+				 bool writeonly_memory)
+{
+	unsigned cache_policy = 0;
+
+	/* SI has a TC L1 bug causing corruption of 8bit/16bit stores.  All
+	 * store opcodes not aligned to a dword are affected. The only way to
+	 * get unaligned stores is through shader images.
+	 */
+	if (((may_store_unaligned && ctx->ac.chip_class == SI) ||
+	     /* If this is write-only, don't keep data in L1 to prevent
+	      * evicting L1 cache lines that may be needed by other
+	      * instructions.
+	      */
+	     writeonly_memory ||
+	     access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
+		cache_policy |= ac_glc;
+	}
+
+	return cache_policy;
+}
+
 static void visit_store_ssbo(struct ac_nir_context *ctx,
                              nir_intrinsic_instr *instr)
 {
@@ -1466,10 +1495,9 @@
 	int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
 	unsigned writemask = nir_intrinsic_write_mask(instr);
 	enum gl_access_qualifier access = nir_intrinsic_access(instr);
-	LLVMValueRef glc = ctx->ac.i1false;
-
-	if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
-		glc = ctx->ac.i1true;
+	bool writeonly_memory = access & ACCESS_NON_READABLE;
+	unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
+	LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false;
 
 	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
 				        get_src(ctx, instr->src[1]), true);
@@ -1625,10 +1653,8 @@
 	int elem_size_bytes = instr->dest.ssa.bit_size / 8;
 	int num_components = instr->num_components;
 	enum gl_access_qualifier access = nir_intrinsic_access(instr);
-	LLVMValueRef glc = ctx->ac.i1false;
-
-	if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
-		glc = ctx->ac.i1true;
+	unsigned cache_policy = get_cache_policy(ctx, access, false, false);
+	LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false;
 
 	LLVMValueRef offset = get_src(ctx, instr->src[1]);
 	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
@@ -1641,7 +1667,7 @@
 	LLVMValueRef results[4];
 	for (int i = 0; i < num_components;) {
 		int num_elems = num_components - i;
-		if (elem_size_bytes < 4)
+		if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
 			num_elems = 1;
 		if (num_elems * elem_size_bytes > 16)
 			num_elems = 16 / elem_size_bytes;
@@ -1858,23 +1884,36 @@
 	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
 
 	LLVMValueRef values[8];
-	int idx = var->data.driver_location;
+	int idx = 0;
 	int ve = instr->dest.ssa.num_components;
-	unsigned comp = var->data.location_frac;
+	unsigned comp = 0;
 	LLVMValueRef indir_index;
 	LLVMValueRef ret;
 	unsigned const_index;
-	unsigned stride = var->data.compact ? 1 : 4;
-	bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
-	             var->data.mode == nir_var_shader_in;
-
-	get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
-	                 &const_index, &indir_index);
+	unsigned stride = 4;
+	int mode = nir_var_mem_shared;
+	
+	if (var) {
+		bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
+			var->data.mode == nir_var_shader_in;
+		idx = var->data.driver_location;
+		comp = var->data.location_frac;
+		mode = var->data.mode;
+
+		get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
+				 &const_index, &indir_index);
+
+		if (var->data.compact) {
+			stride = 1;
+			const_index += comp;
+			comp = 0;
+		}
+	}
 
 	if (instr->dest.ssa.bit_size == 64)
 		ve *= 2;
 
-	switch (var->data.mode) {
+	switch (mode) {
 	case nir_var_shader_in:
 		if (ctx->stage == MESA_SHADER_TESS_CTRL ||
 		    ctx->stage == MESA_SHADER_TESS_EVAL) {
@@ -1911,7 +1950,7 @@
 				values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
 		}
 		break;
-	case nir_var_local:
+	case nir_var_function_temp:
 		for (unsigned chan = 0; chan < ve; chan++) {
 			if (indir_index) {
 				unsigned count = glsl_count_attribute_slots(
@@ -1929,7 +1968,7 @@
 			}
 		}
 		break;
-	case nir_var_shared: {
+	case nir_var_mem_shared: {
 		LLVMValueRef address = get_src(ctx, instr->src[0]);
 		LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
 		return LLVMBuildBitCast(ctx->ac.builder, val,
@@ -1971,18 +2010,28 @@
 visit_store_var(struct ac_nir_context *ctx,
 		nir_intrinsic_instr *instr)
 {
-        nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+	nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+	nir_variable *var = nir_deref_instr_get_variable(deref);
 
 	LLVMValueRef temp_ptr, value;
-	int idx = var->data.driver_location;
-	unsigned comp = var->data.location_frac;
+	int idx = 0;
+	unsigned comp = 0;
 	LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
 	int writemask = instr->const_index[0];
 	LLVMValueRef indir_index;
 	unsigned const_index;
 
-	get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false,
-	                 NULL, NULL, &const_index, &indir_index);
+	if (var) {
+		get_deref_offset(ctx, deref, false,
+		                 NULL, NULL, &const_index, &indir_index);
+		idx = var->data.driver_location;
+		comp = var->data.location_frac;
+
+		if (var->data.compact) {
+			const_index += comp;
+			comp = 0;
+		}
+	}
 
 	if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
 
@@ -1995,7 +2044,7 @@
 
 	writemask = writemask << comp;
 
-	switch (var->data.mode) {
+	switch (deref->mode) {
 	case nir_var_shader_out:
 
 		if (ctx->stage == MESA_SHADER_TESS_CTRL) {
@@ -2004,8 +2053,8 @@
 			unsigned const_index = 0;
 			const bool is_patch = var->data.patch;
 
-			get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
-			                 false, NULL, is_patch ? NULL : &vertex_index,
+			get_deref_offset(ctx, deref, false, NULL,
+			                 is_patch ? NULL : &vertex_index,
 			                 &const_index, &indir_index);
 
 			ctx->abi->store_tcs_outputs(ctx->abi, var,
@@ -2043,7 +2092,7 @@
 			}
 		}
 		break;
-	case nir_var_local:
+	case nir_var_function_temp:
 		for (unsigned chan = 0; chan < 8; chan++) {
 			if (!(writemask & (1 << chan)))
 				continue;
@@ -2068,11 +2117,11 @@
 			}
 		}
 		break;
-	case nir_var_shared: {
+	case nir_var_mem_shared: {
 		int writemask = instr->const_index[0];
 		LLVMValueRef address = get_src(ctx, instr->src[0]);
 		LLVMValueRef val = get_src(ctx, instr->src[1]);
-		if (util_is_power_of_two_nonzero(writemask)) {
+		if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) {
 			val = LLVMBuildBitCast(
 			   ctx->ac.builder, val,
 			   LLVMGetElementType(LLVMTypeOf(address)), "");
@@ -2198,10 +2247,10 @@
 	return sample_index;
 }
 
-static nir_variable *get_image_variable(const nir_intrinsic_instr *instr)
+static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
 {
 	assert(instr->src[0].is_ssa);
-	return nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+	return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 }
 
 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
@@ -2216,7 +2265,7 @@
 			     const nir_intrinsic_instr *instr,
 			     struct ac_image_args *args)
 {
-	const struct glsl_type *type = glsl_without_array(get_image_variable(instr)->type);
+	const struct glsl_type *type = get_image_deref(instr)->type;
 
 	LLVMValueRef src0 = get_src(ctx, instr->src[1]);
 	LLVMValueRef masks[] = {
@@ -2235,7 +2284,7 @@
 	bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
 	count = image_type_to_components_count(dim, is_array);
 
-	if (is_ms) {
+	if (is_ms && instr->intrinsic == nir_intrinsic_image_deref_load) {
 		LLVMValueRef fmask_load_address[3];
 		int chan;
 
@@ -2325,10 +2374,13 @@
 				     const nir_intrinsic_instr *instr)
 {
 	LLVMValueRef res;
-	const nir_variable *var = get_image_variable(instr);
-	const struct glsl_type *type = var->type;
+	const nir_deref_instr *image_deref = get_image_deref(instr);
+	const struct glsl_type *type = image_deref->type;
+	const nir_variable *var = nir_deref_instr_get_variable(image_deref);
+	struct ac_image_args args = {};
 
-	type = glsl_without_array(type);
+	args.cache_policy =
+		get_cache_policy(ctx, var->data.image.access, false, false);
 
 	const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
@@ -2340,16 +2392,16 @@
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");
 
-		/* TODO: set "glc" and "can_speculate" when OpenGL needs it. */
+		/* TODO: set "can_speculate" when OpenGL needs it. */
 		res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
 						  ctx->ac.i32_0, num_channels,
-						  false, false);
+						  !!(args.cache_policy & ac_glc),
+						  false);
 		res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
 
 		res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
 		res = ac_to_integer(&ctx->ac, res);
 	} else {
-		struct ac_image_args args = {};
 		args.opcode = ac_image_load;
 		get_image_coords(ctx, instr, &args);
 		args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
@@ -2357,8 +2409,6 @@
 					    glsl_sampler_type_is_array(type));
 		args.dmask = 15;
 		args.attributes = AC_FUNC_ATTR_READONLY;
-		if (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT))
-			args.cache_policy |= ac_glc;
 
 		res = ac_build_image_opcode(&ctx->ac, &args);
 	}
@@ -2369,13 +2419,15 @@
 			      nir_intrinsic_instr *instr)
 {
 	LLVMValueRef params[8];
-	const nir_variable *var = get_image_variable(instr);
-	const struct glsl_type *type = glsl_without_array(var->type);
+	const nir_deref_instr *image_deref = get_image_deref(instr);
+	const struct glsl_type *type = image_deref->type;
+	const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 	const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
-	LLVMValueRef glc = ctx->ac.i1false;
-	bool force_glc = ctx->ac.chip_class == SI;
-	if (force_glc)
-		glc = ctx->ac.i1true;
+	bool writeonly_memory = var->data.image.access & ACCESS_NON_READABLE;
+	struct ac_image_args args = {};
+
+	args.cache_policy = get_cache_policy(ctx, var->data.image.access, true,
+					     writeonly_memory);
 
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
 		char name[48];
@@ -2393,14 +2445,19 @@
 						    ctx->ac.i32_0, ""); /* vindex */
 		params[3] = ctx->ac.i32_0; /* voffset */
 		snprintf(name, sizeof(name), "%s.%s",
-		                            "llvm.amdgcn.buffer.store.format",
+		         HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format"
+		                            : "llvm.amdgcn.buffer.store.format",
 		         types[CLAMP(src_channels, 1, 3) - 1]);
 
-		params[4] = glc;  /* glc */
-		params[5] = ctx->ac.i1false;  /* slc */
+		if (HAVE_LLVM >= 0x800) {
+			params[4] = ctx->ac.i32_0; /* soffset */
+			params[5] = (args.cache_policy & ac_glc) ? ctx->ac.i32_1 : ctx->ac.i32_0;
+		} else {
+			params[4] = LLVMConstInt(ctx->ac.i1, !!(args.cache_policy & ac_glc), 0);
+			params[5] = ctx->ac.i1false;  /* slc */
+		}
 		ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0);
 	} else {
-		struct ac_image_args args = {};
 		args.opcode = ac_image_store;
 		args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		get_image_coords(ctx, instr, &args);
@@ -2408,8 +2465,6 @@
 		args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
 					    glsl_sampler_type_is_array(type));
 		args.dmask = 15;
-		if (force_glc || (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT)))
-			args.cache_policy |= ac_glc;
 
 		ac_build_image_opcode(&ctx->ac, &args);
 	}
@@ -2421,13 +2476,12 @@
 {
 	LLVMValueRef params[7];
 	int param_count = 0;
-	const nir_variable *var = get_image_variable(instr);
+	const struct glsl_type *type = get_image_deref(instr)->type;
 
 	bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap;
 	const char *atomic_name;
-	char intrinsic_name[41];
+	char intrinsic_name[64];
 	enum ac_atomic_op atomic_subop;
-	const struct glsl_type *type = glsl_without_array(var->type);
 	MAYBE_UNUSED int length;
 
 	bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
@@ -2478,10 +2532,18 @@
 		params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 								ctx->ac.i32_0, ""); /* vindex */
 		params[param_count++] = ctx->ac.i32_0; /* voffset */
-		params[param_count++] = ctx->ac.i1false;  /* slc */
+		if (HAVE_LLVM >= 0x800) {
+			params[param_count++] = ctx->ac.i32_0; /* soffset */
+			params[param_count++] = ctx->ac.i32_0;  /* slc */
+
+			length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+			                  "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
+		} else {
+			params[param_count++] = ctx->ac.i1false;  /* slc */
 
-		length = snprintf(intrinsic_name, sizeof(intrinsic_name),
-				  "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+			length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+			                  "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+		}
 
 		assert(length < sizeof(intrinsic_name));
 		return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
@@ -2505,8 +2567,7 @@
 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
 					const nir_intrinsic_instr *instr)
 {
-	const nir_variable *var = get_image_variable(instr);
-	const struct glsl_type *type = glsl_without_array(var->type);
+	const struct glsl_type *type = get_image_deref(instr)->type;
 
 	struct ac_image_args args = { 0 };
 	args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type),
@@ -2524,8 +2585,7 @@
 				     const nir_intrinsic_instr *instr)
 {
 	LLVMValueRef res;
-	const nir_variable *var = get_image_variable(instr);
-	const struct glsl_type *type = glsl_without_array(var->type);
+	const struct glsl_type *type = get_image_deref(instr)->type;
 
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
 		return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
@@ -2878,12 +2938,10 @@
 
 	}
 
-	LLVMValueRef array_idx = ctx->ac.i32_0;
+	LLVMValueRef attrib_idx = ctx->ac.i32_0;
 	while(deref_instr->deref_type != nir_deref_type_var) {
 		if (deref_instr->deref_type == nir_deref_type_array) {
-			unsigned array_size = glsl_get_aoa_size(deref_instr->type);
-			if (!array_size)
-				array_size = 1;
+			unsigned array_size = glsl_count_attribute_slots(deref_instr->type, false);
 
 			LLVMValueRef offset;
 			nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
@@ -2896,23 +2954,26 @@
 						      LLVMConstInt(ctx->ac.i32, array_size, false), "");
 			}
 
-			array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, "");
+			attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, "");
+			deref_instr = nir_src_as_deref(deref_instr->parent);
+		} else if (deref_instr->deref_type == nir_deref_type_struct) {
+			LLVMValueRef offset;
+			unsigned sidx = deref_instr->strct.index;
 			deref_instr = nir_src_as_deref(deref_instr->parent);
+			offset = LLVMConstInt(ctx->ac.i32, glsl_get_record_location_offset(deref_instr->type, sidx), false);
+			attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, "");
 		} else {
 			unreachable("Unsupported deref type");
 		}
 
 	}
 
-	unsigned input_array_size = glsl_get_aoa_size(var->type);
-	if (!input_array_size)
-		input_array_size = 1;
-
+	unsigned attrib_size = glsl_count_attribute_slots(var->type, false);
 	for (chan = 0; chan < 4; chan++) {
-		LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, input_array_size));
+		LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, attrib_size));
 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-		for (unsigned idx = 0; idx < input_array_size; ++idx) {
+		for (unsigned idx = 0; idx < attrib_size; ++idx) {
 			LLVMValueRef v, attr_number;
 
 			attr_number = LLVMConstInt(ctx->ac.i32, input_base + idx, false);
@@ -2935,7 +2996,7 @@
 							LLVMConstInt(ctx->ac.i32, idx, false), "");
 		}
 
-		result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, array_idx, "");
+		result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, "");
 
 	}
 	return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
@@ -3032,7 +3093,8 @@
 			ctx->abi->frag_pos[2],
 			ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
 		};
-		result = ac_build_gather_values(&ctx->ac, values, 4);
+		result = ac_to_integer(&ctx->ac,
+		                       ac_build_gather_values(&ctx->ac, values, 4));
 		break;
 	}
 	case nir_intrinsic_load_front_face:
@@ -3257,6 +3319,27 @@
 	}
 }
 
+static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
+						    unsigned base_index,
+						    unsigned constant_index,
+						    LLVMValueRef dynamic_index)
+{
+	LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
+	LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+					  LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
+
+	/* Bindless uniforms are 64bit so multiple index by 8 */
+	index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
+	offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+
+	LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
+
+	LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
+						NULL, 0, false, false, true, true);
+
+	return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
+}
+
 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
 				     nir_deref_instr *deref_instr,
 				     enum ac_descriptor_type desc_type,
@@ -3275,30 +3358,49 @@
 		base_index = tex_instr->sampler_index;
 	} else {
 		while(deref_instr->deref_type != nir_deref_type_var) {
-			unsigned array_size = glsl_get_aoa_size(deref_instr->type);
-			if (!array_size)
-				array_size = 1;
-
-			assert(deref_instr->deref_type == nir_deref_type_array);
-			nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
-			if (const_value) {
-				constant_index += array_size * const_value->u32[0];
+			if (deref_instr->deref_type == nir_deref_type_array) {
+				unsigned array_size = glsl_get_aoa_size(deref_instr->type);
+				if (!array_size)
+					array_size = 1;
+
+				nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
+				if (const_value) {
+					constant_index += array_size * const_value->u32[0];
+				} else {
+					LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
+
+					indirect = LLVMBuildMul(ctx->ac.builder, indirect,
+						LLVMConstInt(ctx->ac.i32, array_size, false), "");
+
+					if (!index)
+						index = indirect;
+					else
+						index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+				}
+
+				deref_instr = nir_src_as_deref(deref_instr->parent);
+			} else if (deref_instr->deref_type == nir_deref_type_struct) {
+				unsigned sidx = deref_instr->strct.index;
+				deref_instr = nir_src_as_deref(deref_instr->parent);
+				constant_index += glsl_get_record_location_offset(deref_instr->type, sidx);
 			} else {
-				LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
-
-				indirect = LLVMBuildMul(ctx->ac.builder, indirect,
-					LLVMConstInt(ctx->ac.i32, array_size, false), "");
-
-                                if (!index)
-					index = indirect;
-				else
-					index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+				unreachable("Unsupported deref type");
 			}
-
-			deref_instr = nir_src_as_deref(deref_instr->parent);
 		}
 		descriptor_set = deref_instr->var->data.descriptor_set;
-		base_index = deref_instr->var->data.binding;
+
+		if (deref_instr->var->data.bindless) {
+			/* For now just assert on unhandled variable types */
+			assert(deref_instr->var->data.mode == nir_var_uniform);
+
+			base_index = deref_instr->var->data.driver_location;
+			bindless = true;
+
+			index = index ? index : ctx->ac.i32_0;
+			index = get_bindless_index_from_uniform(ctx, base_index,
+								constant_index, index);
+		} else
+			base_index = deref_instr->var->data.binding;
 	}
 
 	return ctx->abi->load_sampler_desc(ctx->abi,
@@ -3731,10 +3833,77 @@
 	}
 }
 
+static LLVMTypeRef
+glsl_base_to_llvm_type(struct ac_llvm_context *ac,
+		       enum glsl_base_type type)
+{
+	switch (type) {
+	case GLSL_TYPE_INT:
+	case GLSL_TYPE_UINT:
+	case GLSL_TYPE_BOOL:
+	case GLSL_TYPE_SUBROUTINE:
+		return ac->i32;
+	case GLSL_TYPE_INT16:
+	case GLSL_TYPE_UINT16:
+		return ac->i16;
+	case GLSL_TYPE_FLOAT:
+		return ac->f32;
+	case GLSL_TYPE_FLOAT16:
+		return ac->f16;
+	case GLSL_TYPE_INT64:
+	case GLSL_TYPE_UINT64:
+		return ac->i64;
+	case GLSL_TYPE_DOUBLE:
+		return ac->f64;
+	default:
+		unreachable("unknown GLSL type");
+	}
+}
+
+static LLVMTypeRef
+glsl_to_llvm_type(struct ac_llvm_context *ac,
+		  const struct glsl_type *type)
+{
+	if (glsl_type_is_scalar(type)) {
+		return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
+	}
+
+	if (glsl_type_is_vector(type)) {
+		return LLVMVectorType(
+		   glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
+		   glsl_get_vector_elements(type));
+	}
+
+	if (glsl_type_is_matrix(type)) {
+		return LLVMArrayType(
+		   glsl_to_llvm_type(ac, glsl_get_column_type(type)),
+		   glsl_get_matrix_columns(type));
+	}
+
+	if (glsl_type_is_array(type)) {
+		return LLVMArrayType(
+		   glsl_to_llvm_type(ac, glsl_get_array_element(type)),
+		   glsl_get_length(type));
+	}
+
+	assert(glsl_type_is_struct(type));
+
+	LLVMTypeRef member_types[glsl_get_length(type)];
+
+	for (unsigned i = 0; i < glsl_get_length(type); i++) {
+		member_types[i] =
+			glsl_to_llvm_type(ac,
+					  glsl_get_struct_field(type, i));
+	}
+
+	return LLVMStructTypeInContext(ac->context, member_types,
+				       glsl_get_length(type), false);
+}
+
 static void visit_deref(struct ac_nir_context *ctx,
                         nir_deref_instr *instr)
 {
-	if (instr->mode != nir_var_shared)
+	if (instr->mode != nir_var_mem_shared)
 		return;
 
 	LLVMValueRef result = NULL;
@@ -3752,6 +3921,27 @@
 		result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
 		                       get_src(ctx, instr->arr.index));
 		break;
+	case nir_deref_type_ptr_as_array:
+		result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+		                          get_src(ctx, instr->arr.index));
+		break;
+	case nir_deref_type_cast: {
+		result = get_src(ctx, instr->parent);
+
+		LLVMTypeRef pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
+		LLVMTypeRef type = LLVMPointerType(pointee_type, AC_ADDR_SPACE_LDS);
+
+		if (LLVMTypeOf(result) != type) {
+			if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+				result = LLVMBuildBitCast(ctx->ac.builder, result,
+				                          type, "");
+			} else {
+				result = LLVMBuildIntToPtr(ctx->ac.builder, result,
+				                           type, "");
+			}
+		}
+		break;
+	}
 	default:
 		unreachable("Unhandled deref_instr deref type");
 	}
@@ -3900,68 +4090,6 @@
 	}
 }
 
-static LLVMTypeRef
-glsl_base_to_llvm_type(struct ac_llvm_context *ac,
-		       enum glsl_base_type type)
-{
-	switch (type) {
-	case GLSL_TYPE_INT:
-	case GLSL_TYPE_UINT:
-	case GLSL_TYPE_BOOL:
-	case GLSL_TYPE_SUBROUTINE:
-		return ac->i32;
-	case GLSL_TYPE_FLOAT: /* TODO handle mediump */
-		return ac->f32;
-	case GLSL_TYPE_INT64:
-	case GLSL_TYPE_UINT64:
-		return ac->i64;
-	case GLSL_TYPE_DOUBLE:
-		return ac->f64;
-	default:
-		unreachable("unknown GLSL type");
-	}
-}
-
-static LLVMTypeRef
-glsl_to_llvm_type(struct ac_llvm_context *ac,
-		  const struct glsl_type *type)
-{
-	if (glsl_type_is_scalar(type)) {
-		return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
-	}
-
-	if (glsl_type_is_vector(type)) {
-		return LLVMVectorType(
-		   glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
-		   glsl_get_vector_elements(type));
-	}
-
-	if (glsl_type_is_matrix(type)) {
-		return LLVMArrayType(
-		   glsl_to_llvm_type(ac, glsl_get_column_type(type)),
-		   glsl_get_matrix_columns(type));
-	}
-
-	if (glsl_type_is_array(type)) {
-		return LLVMArrayType(
-		   glsl_to_llvm_type(ac, glsl_get_array_element(type)),
-		   glsl_get_length(type));
-	}
-
-	assert(glsl_type_is_struct(type));
-
-	LLVMTypeRef member_types[glsl_get_length(type)];
-
-	for (unsigned i = 0; i < glsl_get_length(type); i++) {
-		member_types[i] =
-			glsl_to_llvm_type(ac,
-					  glsl_get_struct_field(type, i));
-	}
-
-	return LLVMStructTypeInContext(ac->context, member_types,
-				       glsl_get_length(type), false);
-}
-
 static void
 setup_locals(struct ac_nir_context *ctx,
 	     struct nir_function *func)
@@ -4031,13 +4159,13 @@
 
 	setup_locals(&ctx, func);
 
-	if (nir->info.stage == MESA_SHADER_COMPUTE)
+	if (gl_shader_stage_is_compute(nir->info.stage))
 		setup_shared(&ctx, nir);
 
 	visit_cf_list(&ctx, &func->impl->body);
 	phi_post_pass(&ctx);
 
-	if (nir->info.stage != MESA_SHADER_COMPUTE)
+	if (!gl_shader_stage_is_compute(nir->info.stage))
 		ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
 				      ctx.abi->outputs);
 
@@ -4080,7 +4208,168 @@
 	 * See the following thread for more details of the problem:
 	 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
 	 */
-	indirect_mask |= nir_var_local;
+	indirect_mask |= nir_var_function_temp;
 
 	nir_lower_indirect_derefs(nir, indirect_mask);
 }
+
+static unsigned
+get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+{
+	if (intrin->intrinsic != nir_intrinsic_store_deref)
+		return 0;
+
+	nir_variable *var =
+		nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
+
+	if (var->data.mode != nir_var_shader_out)
+		return 0;
+
+	unsigned writemask = 0;
+	const int location = var->data.location;
+	unsigned first_component = var->data.location_frac;
+	unsigned num_comps = intrin->dest.ssa.num_components;
+
+	if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+		writemask = ((1 << (num_comps + 1)) - 1) << first_component;
+	else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+		writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
+
+	return writemask;
+}
+
+static void
+scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
+	       unsigned *cond_block_tf_writemask,
+	       bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
+{
+	switch (cf_node->type) {
+	case nir_cf_node_block: {
+		nir_block *block = nir_cf_node_as_block(cf_node);
+		nir_foreach_instr(instr, block) {
+			if (instr->type != nir_instr_type_intrinsic)
+				continue;
+
+			nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+			if (intrin->intrinsic == nir_intrinsic_barrier) {
+
+				/* If we find a barrier in nested control flow put this in the
+				 * too hard basket. In GLSL this is not possible but it is in
+				 * SPIR-V.
+				 */
+				if (is_nested_cf) {
+					*tessfactors_are_def_in_all_invocs = false;
+					return;
+				}
+
+				/* The following case must be prevented:
+				 *    gl_TessLevelInner = ...;
+				 *    barrier();
+				 *    if (gl_InvocationID == 1)
+				 *       gl_TessLevelInner = ...;
+				 *
+				 * If you consider disjoint code segments separated by barriers, each
+				 * such segment that writes tess factor channels should write the same
+				 * channels in all codepaths within that segment.
+				 */
+				if (upper_block_tf_writemask || cond_block_tf_writemask) {
+					/* Accumulate the result: */
+					*tessfactors_are_def_in_all_invocs &=
+						!(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
+
+					/* Analyze the next code segment from scratch. */
+					*upper_block_tf_writemask = 0;
+					*cond_block_tf_writemask = 0;
+				}
+			} else
+				*upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
+		}
+
+		break;
+	}
+	case nir_cf_node_if: {
+		unsigned then_tessfactor_writemask = 0;
+		unsigned else_tessfactor_writemask = 0;
+
+		nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+		foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
+			scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
+				       cond_block_tf_writemask,
+				       tessfactors_are_def_in_all_invocs, true);
+		}
+
+		foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
+			scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
+				       cond_block_tf_writemask,
+				       tessfactors_are_def_in_all_invocs, true);
+		}
+
+		if (then_tessfactor_writemask || else_tessfactor_writemask) {
+			/* If both statements write the same tess factor channels,
+			 * we can say that the upper block writes them too.
+			 */
+			*upper_block_tf_writemask |= then_tessfactor_writemask &
+				else_tessfactor_writemask;
+			*cond_block_tf_writemask |= then_tessfactor_writemask |
+				else_tessfactor_writemask;
+		}
+
+		break;
+	}
+	case nir_cf_node_loop: {
+		nir_loop *loop = nir_cf_node_as_loop(cf_node);
+		foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
+			scan_tess_ctrl(nested_node, cond_block_tf_writemask,
+				       cond_block_tf_writemask,
+				       tessfactors_are_def_in_all_invocs, true);
+		}
+
+		break;
+	}
+	default:
+		unreachable("unknown cf node type");
+	}
+}
+
+bool
+ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
+{
+	assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
+
+	/* The pass works as follows:
+	 * If all codepaths write tess factors, we can say that all
+	 * invocations define tess factors.
+	 *
+	 * Each tess factor channel is tracked separately.
+	 */
+	unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
+	unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
+
+	/* Initial value = true. Here the pass will accumulate results from
+	 * multiple segments surrounded by barriers. If tess factors aren't
+	 * written at all, it's a shader bug and we don't care if this will be
+	 * true.
+	 */
+	bool tessfactors_are_def_in_all_invocs = true;
+
+	nir_foreach_function(function, nir) {
+		if (function->impl) {
+			foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
+				scan_tess_ctrl(node, &main_block_tf_writemask,
+					       &cond_block_tf_writemask,
+					       &tessfactors_are_def_in_all_invocs,
+					       false);
+			}
+		}
+	}
+
+	/* Accumulate the result for the last code segment separated by a
+	 * barrier.
+	 */
+	if (main_block_tf_writemask || cond_block_tf_writemask) {
+		tessfactors_are_def_in_all_invocs &=
+			!(cond_block_tf_writemask & ~main_block_tf_writemask);
+	}
+
+	return tessfactors_are_def_in_all_invocs;
+}
diff -Nru mesa-18.3.3/src/amd/common/ac_nir_to_llvm.h mesa-19.0.1/src/amd/common/ac_nir_to_llvm.h
--- mesa-18.3.3/src/amd/common/ac_nir_to_llvm.h	2018-03-13 20:41:43.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_nir_to_llvm.h	2019-03-31 23:16:37.000000000 +0000
@@ -47,6 +47,8 @@
 
 void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
 
+bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
+
 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
 		      struct nir_shader *nir);
 
diff -Nru mesa-18.3.3/src/amd/common/ac_surface.c mesa-19.0.1/src/amd/common/ac_surface.c
--- mesa-18.3.3/src/amd/common/ac_surface.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_surface.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,7 +27,7 @@
 
 #include "ac_surface.h"
 #include "amd_family.h"
-#include "addrlib/amdgpu_asic_addr.h"
+#include "addrlib/src/amdgpu_asic_addr.h"
 #include "ac_gpu_info.h"
 #include "util/macros.h"
 #include "util/u_atomic.h"
@@ -39,7 +39,7 @@
 #include <amdgpu.h>
 #include <amdgpu_drm.h>
 
-#include "addrlib/addrinterface.h"
+#include "addrlib/inc/addrinterface.h"
 
 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
@@ -1038,8 +1038,7 @@
 static int
 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
 				ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
-				bool is_fmask, unsigned flags,
-				AddrSwizzleMode *swizzle_mode)
+				bool is_fmask, AddrSwizzleMode *swizzle_mode)
 {
 	ADDR_E_RETURNCODE ret;
 	ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
@@ -1064,16 +1063,6 @@
 	sin.numSamples = in->numSamples;
 	sin.numFrags = in->numFrags;
 
-	if (flags & RADEON_SURF_SCANOUT) {
-		sin.preferredSwSet.sw_D = 1;
-		/* Raven only allows S for displayable surfaces with < 64 bpp, so
-		 * allow it as fallback */
-		sin.preferredSwSet.sw_S = 1;
-	} else if (in->flags.depth || in->flags.stencil || is_fmask)
-		sin.preferredSwSet.sw_Z = 1;
-	else
-		sin.preferredSwSet.sw_S = 1;
-
 	if (is_fmask) {
 		sin.flags.display = 0;
 		sin.flags.color = 0;
@@ -1273,8 +1262,7 @@
 			fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
 
 			ret = gfx9_get_preferred_swizzle_mode(addrlib, in,
-							      true, surf->flags,
-							      &fin.swizzleMode);
+							      true, &fin.swizzleMode);
 			if (ret != ADDR_OK)
 				return ret;
 
@@ -1424,11 +1412,13 @@
 		AddrSurfInfoIn.bpp = surf->bpe * 8;
 	}
 
-	AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+	bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+	AddrSurfInfoIn.flags.color = is_color_surface &&
+	                             !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
 	AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
 	AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
 	/* flags.texture currently refers to TC-compatible HTILE */
-	AddrSurfInfoIn.flags.texture = AddrSurfInfoIn.flags.color ||
+	AddrSurfInfoIn.flags.texture = is_color_surface ||
 				       surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
 	AddrSurfInfoIn.flags.opt4space = 1;
 
@@ -1476,8 +1466,7 @@
 		}
 
 		r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn,
-						    false, surf->flags,
-						    &AddrSurfInfoIn.swizzleMode);
+						    false, &AddrSurfInfoIn.swizzleMode);
 		if (r)
 			return r;
 		break;
@@ -1513,8 +1502,7 @@
 
 		if (!AddrSurfInfoIn.flags.depth) {
 			r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn,
-							    false, surf->flags,
-							    &AddrSurfInfoIn.swizzleMode);
+							    false, &AddrSurfInfoIn.swizzleMode);
 			if (r)
 				return r;
 		} else
@@ -1530,10 +1518,12 @@
 
 	/* Query whether the surface is displayable. */
 	bool displayable = false;
-	r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode,
+	if (!config->is_3d && !config->is_cube) {
+		r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode,
 					   surf->bpe * 8, &displayable);
-	if (r)
-		return r;
+		if (r)
+			return r;
+	}
 	surf->is_displayable = displayable;
 
 	switch (surf->u.gfx9.surf.swizzle_mode) {
@@ -1594,10 +1584,6 @@
 			assert(0);
 	}
 
-	/* Temporary workaround to prevent VM faults and hangs. */
-	if (info->family == CHIP_VEGA12)
-		surf->fmask_size *= 8;
-
 	return 0;
 }
 
diff -Nru mesa-18.3.3/src/amd/common/ac_surface.h mesa-19.0.1/src/amd/common/ac_surface.h
--- mesa-18.3.3/src/amd/common/ac_surface.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/common/ac_surface.h	2019-03-31 23:16:37.000000000 +0000
@@ -68,6 +68,7 @@
 #define RADEON_SURF_IMPORTED                    (1 << 24)
 #define RADEON_SURF_OPTIMIZE_FOR_SPACE          (1 << 25)
 #define RADEON_SURF_SHAREABLE                   (1 << 26)
+#define RADEON_SURF_NO_RENDER_TARGET            (1 << 27)
 
 struct legacy_surf_level {
     uint64_t                    offset;
diff -Nru mesa-18.3.3/src/amd/common/sid.h mesa-19.0.1/src/amd/common/sid.h
--- mesa-18.3.3/src/amd/common/sid.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/common/sid.h	2019-03-31 23:16:37.000000000 +0000
@@ -133,11 +133,11 @@
 #define     S_370_WR_ONE_ADDR(x)		(((unsigned)(x) & 0x1) << 16)
 #define     S_370_DST_SEL(x)			(((unsigned)(x) & 0xf) << 8)
 #define       V_370_MEM_MAPPED_REGISTER		0
-#define       V_370_MEMORY_SYNC			1
+#define       V_370_MEM_GRBM			1 /* sync across GRBM */
 #define       V_370_TC_L2			2
 #define       V_370_GDS				3
 #define       V_370_RESERVED			4
-#define       V_370_MEM_ASYNC			5
+#define       V_370_MEM				5 /* not on SI */
 #define   R_371_DST_ADDR_LO			0x371
 #define   R_372_DST_ADDR_HI			0x372
 #define PKT3_DRAW_INDEX_INDIRECT_MULTI         0x38
@@ -211,12 +211,14 @@
 #define PKT3_SET_SH_REG                        0x76
 #define PKT3_SET_SH_REG_OFFSET                 0x77
 #define PKT3_SET_UCONFIG_REG                   0x79 /* new for CIK */
+#define PKT3_SET_UCONFIG_REG_INDEX             0x7A /* new for GFX9, CP ucode version >= 26 */
 #define PKT3_LOAD_CONST_RAM                    0x80
 #define PKT3_WRITE_CONST_RAM                   0x81
 #define PKT3_DUMP_CONST_RAM                    0x83
 #define PKT3_INCREMENT_CE_COUNTER              0x84
 #define PKT3_INCREMENT_DE_COUNTER              0x85
 #define PKT3_WAIT_ON_CE_COUNTER                0x86
+#define PKT3_LOAD_CONTEXT_REG                  0x9F /* new for VI */
 
 #define PKT_TYPE_S(x)                   (((unsigned)(x) & 0x3) << 30)
 #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
@@ -2435,6 +2437,9 @@
 #define   S_008F30_FILTER_MODE(x)                                     (((unsigned)(x) & 0x03) << 29)
 #define   G_008F30_FILTER_MODE(x)                                     (((x) >> 29) & 0x03)
 #define   C_008F30_FILTER_MODE                                        0x9FFFFFFF
+#define     V_008F30_SQ_IMG_FILTER_MODE_BLEND                       0x00
+#define     V_008F30_SQ_IMG_FILTER_MODE_MIN                         0x01
+#define     V_008F30_SQ_IMG_FILTER_MODE_MAX                         0x02
 /* VI */
 #define   S_008F30_COMPAT_MODE(x)                                     (((unsigned)(x) & 0x1) << 31)
 #define   G_008F30_COMPAT_MODE(x)                                     (((x) >> 31) & 0x1)
diff -Nru mesa-18.3.3/src/amd/common/sid_tables.py mesa-19.0.1/src/amd/common/sid_tables.py
--- mesa-18.3.3/src/amd/common/sid_tables.py	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/common/sid_tables.py	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,4 @@
-from __future__ import print_function
+from __future__ import print_function, division, unicode_literals
 
 CopyRight = '''
 /*
diff -Nru mesa-18.3.3/src/amd/Makefile.addrlib.am mesa-19.0.1/src/amd/Makefile.addrlib.am
--- mesa-18.3.3/src/amd/Makefile.addrlib.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/Makefile.addrlib.am	2019-03-31 23:16:37.000000000 +0000
@@ -26,12 +26,11 @@
 	-I$(top_srcdir)/src/ \
 	-I$(top_srcdir)/include \
 	-I$(srcdir)/common \
-	-I$(srcdir)/addrlib \
-	-I$(srcdir)/addrlib/core \
-	-I$(srcdir)/addrlib/inc/chip/gfx9 \
-	-I$(srcdir)/addrlib/inc/chip/r800 \
-	-I$(srcdir)/addrlib/gfx9/chip \
-	-I$(srcdir)/addrlib/r800/chip
+	-I$(srcdir)/addrlib/inc \
+	-I$(srcdir)/addrlib/src \
+	-I$(srcdir)/addrlib/src/core \
+	-I$(srcdir)/addrlib/src/chip/gfx9 \
+	-I$(srcdir)/addrlib/src/chip/r800
 
 addrlib_libamdgpu_addrlib_la_CXXFLAGS = \
 	$(VISIBILITY_CXXFLAGS) $(CXX11_CXXFLAGS)
diff -Nru mesa-18.3.3/src/amd/Makefile.sources mesa-19.0.1/src/amd/Makefile.sources
--- mesa-18.3.3/src/amd/Makefile.sources	2018-03-13 20:41:43.000000000 +0000
+++ mesa-19.0.1/src/amd/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -5,35 +5,33 @@
 	common/amd_kernel_code_t.h
 
 ADDRLIB_FILES = \
-	addrlib/addrinterface.cpp \
-	addrlib/addrinterface.h \
-	addrlib/addrtypes.h \
-	addrlib/amdgpu_asic_addr.h \
-	addrlib/core/addrcommon.h \
-	addrlib/core/addrelemlib.cpp \
-	addrlib/core/addrelemlib.h \
-	addrlib/core/addrlib.cpp \
-	addrlib/core/addrlib.h \
-	addrlib/core/addrlib1.cpp \
-	addrlib/core/addrlib1.h \
-	addrlib/core/addrlib2.cpp \
-	addrlib/core/addrlib2.h \
-	addrlib/core/addrobject.cpp \
-	addrlib/core/addrobject.h \
-	addrlib/gfx9/chip/gfx9_enum.h \
-	addrlib/gfx9/coord.cpp \
-	addrlib/gfx9/coord.h \
-	addrlib/gfx9/gfx9addrlib.cpp \
-	addrlib/gfx9/gfx9addrlib.h \
-	addrlib/inc/chip/gfx9/gfx9_gb_reg.h \
-	addrlib/inc/chip/r800/si_gb_reg.h \
-	addrlib/r800/chip/si_ci_vi_merged_enum.h \
-	addrlib/r800/ciaddrlib.cpp \
-	addrlib/r800/ciaddrlib.h \
-	addrlib/r800/egbaddrlib.cpp \
-	addrlib/r800/egbaddrlib.h \
-	addrlib/r800/siaddrlib.cpp \
-	addrlib/r800/siaddrlib.h
+	addrlib/inc/addrinterface.h \
+	addrlib/inc/addrtypes.h \
+	addrlib/src/addrinterface.cpp \
+	addrlib/src/amdgpu_asic_addr.h \
+	addrlib/src/core/addrcommon.h \
+	addrlib/src/core/addrelemlib.cpp \
+	addrlib/src/core/addrelemlib.h \
+	addrlib/src/core/addrlib.cpp \
+	addrlib/src/core/addrlib.h \
+	addrlib/src/core/addrlib1.cpp \
+	addrlib/src/core/addrlib1.h \
+	addrlib/src/core/addrlib2.cpp \
+	addrlib/src/core/addrlib2.h \
+	addrlib/src/core/addrobject.cpp \
+	addrlib/src/core/addrobject.h \
+	addrlib/src/core/coord.cpp \
+	addrlib/src/core/coord.h \
+	addrlib/src/gfx9/gfx9addrlib.cpp \
+	addrlib/src/gfx9/gfx9addrlib.h \
+	addrlib/src/chip/gfx9/gfx9_gb_reg.h \
+	addrlib/src/chip/r800/si_gb_reg.h \
+	addrlib/src/r800/ciaddrlib.cpp \
+	addrlib/src/r800/ciaddrlib.h \
+	addrlib/src/r800/egbaddrlib.cpp \
+	addrlib/src/r800/egbaddrlib.h \
+	addrlib/src/r800/siaddrlib.cpp \
+	addrlib/src/r800/siaddrlib.h
 
 AMD_COMPILER_FILES = \
 	common/ac_binary.c \
diff -Nru mesa-18.3.3/src/amd/vulkan/Makefile.sources mesa-19.0.1/src/amd/vulkan/Makefile.sources
--- mesa-18.3.3/src/amd/vulkan/Makefile.sources	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -50,6 +50,7 @@
 	radv_meta_copy.c \
 	radv_meta_decompress.c \
 	radv_meta_fast_clear.c \
+	radv_meta_fmask_expand.c \
 	radv_meta_resolve.c \
 	radv_meta_resolve_cs.c \
 	radv_meta_resolve_fs.c \
diff -Nru mesa-18.3.3/src/amd/vulkan/meson.build mesa-19.0.1/src/amd/vulkan/meson.build
--- mesa-18.3.3/src/amd/vulkan/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -78,6 +78,7 @@
   'radv_meta_copy.c',
   'radv_meta_decompress.c',
   'radv_meta_fast_clear.c',
+  'radv_meta_fmask_expand.c',
   'radv_meta_resolve.c',
   'radv_meta_resolve_cs.c',
   'radv_meta_resolve_fs.c',
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_android.c mesa-19.0.1/src/amd/vulkan/radv_android.c
--- mesa-18.3.3/src/amd/vulkan/radv_android.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_android.c	2019-03-31 23:16:37.000000000 +0000
@@ -111,7 +111,7 @@
 	VkResult result;
 
 	if (gralloc_info->handle->numFds != 1) {
-		return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+		return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE,
 		                 "VkNativeBufferANDROID::handle::numFds is %d, "
 		                 "expected 1", gralloc_info->handle->numFds);
 	}
@@ -126,7 +126,7 @@
 
 	const VkImportMemoryFdInfoKHR import_info = {
 		.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
-		.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+		.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
 		.fd = dup(dma_buf),
 	};
 
@@ -230,16 +230,16 @@
 	 * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
 	 */
 
-	const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+	const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
 		.format = format,
 		.type = VK_IMAGE_TYPE_2D,
 		.tiling = VK_IMAGE_TILING_OPTIMAL,
 		.usage = imageUsage,
 	};
 
-	VkImageFormatProperties2KHR image_format_props = {
-		.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
+	VkImageFormatProperties2 image_format_props = {
+		.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
 	};
 
 	/* Check that requested format and usage are supported. */
@@ -303,7 +303,7 @@
 		semaphore_result = radv_ImportSemaphoreFdKHR(device,
 		                                             &(VkImportSemaphoreFdInfoKHR) {
 		                                                 .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
-		                                                 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR,
+		                                                 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
 		                                                 .fd = semaphore_fd,
 		                                                 .semaphore = semaphore,
 		                                            });
@@ -314,7 +314,7 @@
 		fence_result = radv_ImportFenceFdKHR(device,
 		                                     &(VkImportFenceFdInfoKHR) {
 		                                         .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
-		                                         .flags = VK_FENCE_IMPORT_TEMPORARY_BIT_KHR,
+		                                         .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
 		                                         .fd = fence_fd,
 		                                         .fence = fence,
 		                                     });
@@ -351,7 +351,7 @@
 		result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
 		                                &(VkSemaphoreGetFdInfoKHR) {
 		                                    .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
-		                                    .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR,
+		                                    .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
 		                                    .semaphore = pWaitSemaphores[i],
 		                            }, &tmp_fd);
 		if (result != VK_SUCCESS) {
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_cmd_buffer.c mesa-19.0.1/src/amd/vulkan/radv_cmd_buffer.c
--- mesa-18.3.3/src/amd/vulkan/radv_cmd_buffer.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -57,8 +57,7 @@
 					 VkImageLayout dst_layout,
 					 uint32_t src_family,
 					 uint32_t dst_family,
-					 const VkImageSubresourceRange *range,
-					 VkImageAspectFlags pending_clears);
+					 const VkImageSubresourceRange *range);
 
 const struct radv_dynamic_state default_dynamic_state = {
 	.viewport = {
@@ -333,18 +332,21 @@
 		cmd_buffer->descriptors[i].push_dirty = false;
 	}
 
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+	    cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
-		unsigned eop_bug_offset;
+		unsigned fence_offset, eop_bug_offset;
 		void *fence_ptr;
 
-		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
-					     &cmd_buffer->gfx9_fence_offset,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset,
 					     &fence_ptr);
-		cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
+
+		cmd_buffer->gfx9_fence_va =
+			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+		cmd_buffer->gfx9_fence_va += fence_offset;
 
 		/* Allocate a buffer for the EOP bug on GFX9. */
-		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
 					     &eop_bug_offset, &fence_ptr);
 		cmd_buffer->gfx9_eop_bug_va =
 			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -373,7 +375,8 @@
 				       RADEON_DOMAIN_GTT,
 				       RADEON_FLAG_CPU_ACCESS|
 				       RADEON_FLAG_NO_INTERPROCESS_SHARING |
-				       RADEON_FLAG_32BIT);
+				       RADEON_FLAG_32BIT,
+				       RADV_BO_PRIORITY_UPLOAD_BUFFER);
 
 	if (!bo) {
 		cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -414,6 +417,8 @@
 			     unsigned *out_offset,
 			     void **ptr)
 {
+	assert(util_is_power_of_two_nonzero(alignment));
+
 	uint64_t offset = align(cmd_buffer->upload.offset, alignment);
 	if (offset + size > cmd_buffer->upload.size) {
 		if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
@@ -454,7 +459,7 @@
 	radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, va);
@@ -487,24 +492,16 @@
 			   enum radv_cmd_flush_bits flags)
 {
 	if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
-		uint32_t *ptr = NULL;
-		uint64_t va = 0;
-
 		assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
 				RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
 
-		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
-			va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) +
-			     cmd_buffer->gfx9_fence_offset;
-			ptr = &cmd_buffer->gfx9_fence_idx;
-		}
-
 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
 
 		/* Force wait for graphics or compute engines to be idle. */
 		si_cs_emit_cache_flush(cmd_buffer->cs,
 				       cmd_buffer->device->physical_device->rad_info.chip_class,
-				       ptr, va,
+				       &cmd_buffer->gfx9_fence_idx,
+				       cmd_buffer->gfx9_fence_va,
 				       radv_cmd_buffer_uses_mec(cmd_buffer),
 				       flags, cmd_buffer->gfx9_eop_bug_va);
 	}
@@ -595,8 +592,7 @@
 	if (loc->sgpr_idx == -1)
 		return;
 
-	assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
-	assert(!loc->indirect);
+	assert(loc->num_sgprs == 1);
 
 	radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
 				 base_reg + loc->sgpr_idx * 4, va, false);
@@ -625,14 +621,12 @@
 		struct radv_userdata_info *loc = &locs->descriptor_sets[start];
 		unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
 
-		radv_emit_shader_pointer_head(cs, sh_offset, count,
-					      HAVE_32BIT_POINTERS);
+		radv_emit_shader_pointer_head(cs, sh_offset, count, true);
 		for (int i = 0; i < count; i++) {
 			struct radv_descriptor_set *set =
 				descriptors_state->sets[start + i];
 
-			radv_emit_shader_pointer_body(device, cs, set->va,
-						      HAVE_32BIT_POINTERS);
+			radv_emit_shader_pointer_body(device, cs, set->va, true);
 		}
 	}
 }
@@ -664,6 +658,8 @@
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
 	}
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
@@ -860,10 +856,13 @@
 		sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
 		sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
 	}
+	/* TODO: avoid redundantly setting context registers */
 	radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
 	radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
 	radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
 	radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
@@ -887,6 +886,15 @@
 
 	radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
 
+	if (!cmd_buffer->state.emitted_pipeline ||
+	    cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
+	    cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
+	    memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf,
+	           pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) {
+		radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
+		cmd_buffer->state.context_roll_without_scissor_emitted = true;
+	}
+
 	for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
 		if (!pipeline->shaders[i])
 			continue;
@@ -923,6 +931,8 @@
 			  cmd_buffer->state.dynamic.scissor.scissors,
 			  cmd_buffer->state.dynamic.viewport.viewports,
 			  cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = false;
 }
 
 static void
@@ -1062,6 +1072,11 @@
 			radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
 		}
 	}
+
+	if (radv_image_has_dcc(image)) {
+		/* Drawing with DCC enabled also compresses colorbuffers. */
+		radv_update_dcc_metadata(cmd_buffer, image, true);
+	}
 }
 
 static void
@@ -1215,6 +1230,8 @@
 		radv_update_zrange_precision(cmd_buffer, &att->ds, image,
 					     layout, false);
 	}
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 /**
@@ -1242,7 +1259,7 @@
 		++reg_count;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
@@ -1266,7 +1283,7 @@
 	va += image->offset + image->tc_compat_zrange_offset;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
@@ -1279,7 +1296,6 @@
 				      struct radv_image *image,
 				      VkClearDepthStencilValue ds_clear_value)
 {
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	uint64_t va = radv_buffer_get_va(image->bo);
 	va += image->offset + image->tc_compat_zrange_offset;
 	uint32_t cond_val;
@@ -1341,17 +1357,27 @@
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		++reg_count;
 
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-			COPY_DATA_DST_SEL(COPY_DATA_REG) |
-			(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
-	radeon_emit(cs, 0);
+	uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
 
-	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-	radeon_emit(cs, 0);
+	if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
+		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+		radeon_emit(cs, reg_count);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, reg >> 2);
+		radeon_emit(cs, 0);
+
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+		radeon_emit(cs, 0);
+	}
 }
 
 /*
@@ -1360,9 +1386,31 @@
  * cmask eliminate is required.
  */
 void
-radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
-				  struct radv_image *image,
-				  bool value)
+radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
+			 struct radv_image *image, bool value)
+{
+	uint64_t pred_val = value;
+	uint64_t va = radv_buffer_get_va(image->bo);
+	va += image->offset + image->fce_pred_offset;
+
+	assert(radv_image_has_dcc(image));
+
+	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
+				    S_370_WR_CONFIRM(1) |
+				    S_370_ENGINE_SEL(V_370_PFP));
+	radeon_emit(cmd_buffer->cs, va);
+	radeon_emit(cmd_buffer->cs, va >> 32);
+	radeon_emit(cmd_buffer->cs, pred_val);
+	radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
+/**
+ * Update the DCC predicate to reflect the compression state.
+ */
+void
+radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
+			 struct radv_image *image, bool value)
 {
 	uint64_t pred_val = value;
 	uint64_t va = radv_buffer_get_va(image->bo);
@@ -1371,7 +1419,7 @@
 	assert(radv_image_has_dcc(image));
 
 	radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
-	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
 				    S_370_WR_CONFIRM(1) |
 				    S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cmd_buffer->cs, va);
@@ -1409,6 +1457,8 @@
 	radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
 	radeon_emit(cs, color_values[0]);
 	radeon_emit(cs, color_values[1]);
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 /**
@@ -1427,7 +1477,7 @@
 	assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
 	radeon_emit(cs, va);
@@ -1471,17 +1521,25 @@
 
 	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-			COPY_DATA_DST_SEL(COPY_DATA_REG) |
-			COPY_DATA_COUNT_SEL);
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, reg >> 2);
-	radeon_emit(cs, 0);
+	if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
+		radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+		radeon_emit(cs, 2);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				COPY_DATA_COUNT_SEL);
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, reg >> 2);
+		radeon_emit(cs, 0);
 
-	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
-	radeon_emit(cs, 0);
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+		radeon_emit(cs, 0);
+	}
 }
 
 static void
@@ -1490,6 +1548,7 @@
 	int i;
 	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	unsigned num_bpp64_colorbufs = 0;
 
 	/* this may happen for inherited secondary recording */
 	if (!framebuffer)
@@ -1513,6 +1572,9 @@
 		radv_emit_fb_color_state(cmd_buffer, i, att, image, layout);
 
 		radv_load_color_clear_metadata(cmd_buffer, image, i);
+
+		if (image->surface.bpe >= 8)
+			num_bpp64_colorbufs++;
 	}
 
 	if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
@@ -1548,6 +1610,23 @@
 			       S_028208_BR_X(framebuffer->width) |
 			       S_028208_BR_Y(framebuffer->height));
 
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+		uint8_t watermark = 4; /* Default value for VI. */
+
+		/* For optimal DCC performance. */
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+			if (num_bpp64_colorbufs >= 5) {
+				watermark = 8;
+			} else {
+				watermark = 6;
+			}
+		}
+
+		radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
+				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
+				       S_028424_OVERWRITE_COMBINER_WATERMARK(watermark));
+	}
+
 	if (cmd_buffer->device->dfsm_allowed) {
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
@@ -1641,6 +1720,8 @@
 	}
 
 	radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
@@ -1702,8 +1783,7 @@
 {
 	struct radv_descriptor_state *descriptors_state =
 		radv_get_descriptors_state(cmd_buffer, bind_point);
-	uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2;
-	uint32_t size = MAX_SETS * 4 * ptr_size;
+	uint32_t size = MAX_SETS * 4;
 	uint32_t offset;
 	void *ptr;
 	
@@ -1712,14 +1792,12 @@
 		return;
 
 	for (unsigned i = 0; i < MAX_SETS; i++) {
-		uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size;
+		uint32_t *uptr = ((uint32_t *)ptr) + i;
 		uint64_t set_va = 0;
 		struct radv_descriptor_set *set = descriptors_state->sets[i];
 		if (descriptors_state->valid & (1u << i))
 			set_va = set->va;
 		uptr[0] = set_va & 0xffffffff;
-		if (ptr_size == 2)
-			uptr[1] = set_va >> 32;
 	}
 
 	uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -2014,10 +2092,60 @@
 	radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
 }
 
+struct radv_draw_info {
+	/**
+	 * Number of vertices.
+	 */
+	uint32_t count;
+
+	/**
+	 * Index of the first vertex.
+	 */
+	int32_t vertex_offset;
+
+	/**
+	 * First instance id.
+	 */
+	uint32_t first_instance;
+
+	/**
+	 * Number of instances.
+	 */
+	uint32_t instance_count;
+
+	/**
+	 * First index (indexed draws only).
+	 */
+	uint32_t first_index;
+
+	/**
+	 * Whether it's an indexed draw.
+	 */
+	bool indexed;
+
+	/**
+	 * Indirect draw parameters resource.
+	 */
+	struct radv_buffer *indirect;
+	uint64_t indirect_offset;
+	uint32_t stride;
+
+	/**
+	 * Draw count parameters resource.
+	 */
+	struct radv_buffer *count_buffer;
+	uint64_t count_buffer_offset;
+
+	/**
+	 * Stream output parameters resource.
+	 */
+	struct radv_buffer *strmout_buffer;
+	uint64_t strmout_buffer_offset;
+};
+
 static void
-radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw,
-			 bool instanced_draw, bool indirect_draw,
-			 uint32_t draw_vertex_count)
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
+			 const struct radv_draw_info *draw_info)
 {
 	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
 	struct radv_cmd_state *state = &cmd_buffer->state;
@@ -2027,8 +2155,9 @@
 
 	/* Draw state. */
 	ia_multi_vgt_param =
-		si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
-					  indirect_draw, draw_vertex_count);
+		si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
+					  draw_info->indirect,
+					  draw_info->indirect ? 0 : draw_info->count);
 
 	if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
 		if (info->chip_class >= GFX9) {
@@ -2048,7 +2177,7 @@
 
 	/* Primitive restart. */
 	primitive_reset_en =
-		indexed_draw && state->pipeline->graphics.prim_restart_enable;
+		draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
 
 	if (primitive_reset_en != state->last_primitive_reset_en) {
 		state->last_primitive_reset_en = primitive_reset_en;
@@ -2074,6 +2203,27 @@
 			state->last_primitive_reset_index = primitive_reset_index;
 		}
 	}
+
+	if (draw_info->strmout_buffer) {
+		uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
+
+		va += draw_info->strmout_buffer->offset +
+		      draw_info->strmout_buffer_offset;
+
+		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+				       draw_info->stride);
+
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				COPY_DATA_WR_CONFIRM);
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+		radeon_emit(cs, 0); /* unused */
+
+		radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
+	}
 }
 
 static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
@@ -2254,11 +2404,21 @@
 	range.baseArrayLayer = view->base_layer;
 	range.layerCount = cmd_buffer->state.framebuffer->layers;
 
+	if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+		/* If the current subpass uses multiview, the driver might have
+		 * performed a fast color/depth clear to the whole image
+		 * (including all layers). To make sure the driver will
+		 * decompress the image correctly (if needed), we have to
+		 * account for the "real" number of layers. If the view mask is
+		 * sparse, this will decompress more layers than needed.
+		 */
+		range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
+	}
+
 	radv_handle_image_transition(cmd_buffer,
 				     view->image,
 				     cmd_buffer->state.attachments[idx].current_layout,
-				     att.layout, 0, 0, &range,
-				     cmd_buffer->state.attachments[idx].pending_clear_aspects);
+				     att.layout, 0, 0, &range);
 
 	cmd_buffer->state.attachments[idx].current_layout = att.layout;
 
@@ -2717,7 +2877,7 @@
 
 void radv_CmdPushDescriptorSetWithTemplateKHR(
 	VkCommandBuffer                             commandBuffer,
-	VkDescriptorUpdateTemplateKHR               descriptorUpdateTemplate,
+	VkDescriptorUpdateTemplate                  descriptorUpdateTemplate,
 	VkPipelineLayout                            _layout,
 	uint32_t                                    set,
 	const void*                                 pData)
@@ -2789,6 +2949,8 @@
 	if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
 		return;
 
+	assert(!pipeline->ctx_cs.cdw);
+
 	cmd_buffer->state.emitted_compute_pipeline = pipeline;
 
 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
@@ -2880,6 +3042,11 @@
 	assert(firstViewport < MAX_VIEWPORTS);
 	assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
 
+	if (!memcmp(state->dynamic.viewport.viewports + firstViewport,
+		    pViewports, viewportCount * sizeof(*pViewports))) {
+		return;
+	}
+
 	memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
 	       viewportCount * sizeof(*pViewports));
 
@@ -2899,6 +3066,11 @@
 	assert(firstScissor < MAX_SCISSORS);
 	assert(total_count >= 1 && total_count <= MAX_SCISSORS);
 
+	if (!memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
+		    scissorCount * sizeof(*pScissors))) {
+		return;
+	}
+
 	memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
 	       scissorCount * sizeof(*pScissors));
 
@@ -2910,6 +3082,10 @@
 	float                                       lineWidth)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+	if (cmd_buffer->state.dynamic.line_width == lineWidth)
+		return;
+
 	cmd_buffer->state.dynamic.line_width = lineWidth;
 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
 }
@@ -2921,12 +3097,19 @@
 	float                                       depthBiasSlopeFactor)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+
+	if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
+	    state->dynamic.depth_bias.clamp == depthBiasClamp &&
+	    state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
+		return;
+	}
 
-	cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
-	cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
-	cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
+	state->dynamic.depth_bias.bias = depthBiasConstantFactor;
+	state->dynamic.depth_bias.clamp = depthBiasClamp;
+	state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
 }
 
 void radv_CmdSetBlendConstants(
@@ -2934,11 +3117,14 @@
 	const float                                 blendConstants[4])
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+
+	if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
+		return;
 
-	memcpy(cmd_buffer->state.dynamic.blend_constants,
-	       blendConstants, sizeof(float) * 4);
+	memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
 }
 
 void radv_CmdSetDepthBounds(
@@ -2947,11 +3133,17 @@
 	float                                       maxDepthBounds)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
 
-	cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
-	cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+	if (state->dynamic.depth_bounds.min == minDepthBounds &&
+	    state->dynamic.depth_bounds.max == maxDepthBounds) {
+		return;
+	}
+
+	state->dynamic.depth_bounds.min = minDepthBounds;
+	state->dynamic.depth_bounds.max = maxDepthBounds;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
 }
 
 void radv_CmdSetStencilCompareMask(
@@ -2960,13 +3152,21 @@
 	uint32_t                                    compareMask)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
+	bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
+
+	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+		return;
+	}
 
 	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-		cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
+		state->dynamic.stencil_compare_mask.front = compareMask;
 	if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-		cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
+		state->dynamic.stencil_compare_mask.back = compareMask;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
 }
 
 void radv_CmdSetStencilWriteMask(
@@ -2975,13 +3175,21 @@
 	uint32_t                                    writeMask)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
+	bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
+
+	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+		return;
+	}
 
 	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
-		cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
+		state->dynamic.stencil_write_mask.front = writeMask;
 	if (faceMask & VK_STENCIL_FACE_BACK_BIT)
-		cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
+		state->dynamic.stencil_write_mask.back = writeMask;
 
-	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+	state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
 }
 
 void radv_CmdSetStencilReference(
@@ -2990,6 +3198,14 @@
 	uint32_t                                    reference)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	bool front_same = state->dynamic.stencil_reference.front == reference;
+	bool back_same = state->dynamic.stencil_reference.back == reference;
+
+	if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+	    (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+		return;
+	}
 
 	if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
 		cmd_buffer->state.dynamic.stencil_reference.front = reference;
@@ -3012,6 +3228,11 @@
 	assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
 	assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
 
+	if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
+		    pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
+		return;
+	}
+
 	typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
 	             pDiscardRectangles, discardRectangleCount);
 
@@ -3177,7 +3398,7 @@
 void radv_TrimCommandPool(
     VkDevice                                    device,
     VkCommandPool                               commandPool,
-    VkCommandPoolTrimFlagsKHR                   flags)
+    VkCommandPoolTrimFlags                      flags)
 {
 	RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
 
@@ -3341,57 +3562,6 @@
 	}
 }
 
-struct radv_draw_info {
-	/**
-	 * Number of vertices.
-	 */
-	uint32_t count;
-
-	/**
-	 * Index of the first vertex.
-	 */
-	int32_t vertex_offset;
-
-	/**
-	 * First instance id.
-	 */
-	uint32_t first_instance;
-
-	/**
-	 * Number of instances.
-	 */
-	uint32_t instance_count;
-
-	/**
-	 * First index (indexed draws only).
-	 */
-	uint32_t first_index;
-
-	/**
-	 * Whether it's an indexed draw.
-	 */
-	bool indexed;
-
-	/**
-	 * Indirect draw parameters resource.
-	 */
-	struct radv_buffer *indirect;
-	uint64_t indirect_offset;
-	uint32_t stride;
-
-	/**
-	 * Draw count parameters resource.
-	 */
-	struct radv_buffer *count_buffer;
-	uint64_t count_buffer_offset;
-
-	/**
-	 * Stream output parameters resource.
-	 */
-	struct radv_buffer *strmout_buffer;
-	uint64_t strmout_buffer_offset;
-};
-
 static void
 radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
 		       const struct radv_draw_info *info)
@@ -3400,27 +3570,6 @@
 	struct radeon_winsys *ws = cmd_buffer->device->ws;
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 
-	if (info->strmout_buffer) {
-		uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo);
-
-		va += info->strmout_buffer->offset +
-		      info->strmout_buffer_offset;
-
-		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
-				       info->stride);
-
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-				COPY_DATA_DST_SEL(COPY_DATA_REG) |
-				COPY_DATA_WR_CONFIRM);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
-		radeon_emit(cs, 0); /* unused */
-
-		radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo);
-	}
-
 	if (info->indirect) {
 		uint64_t va = radv_buffer_get_va(info->indirect->bo);
 		uint64_t count_va = 0;
@@ -3539,31 +3688,30 @@
  * any context registers.
  */
 static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
-                                            bool indexed_draw)
+                                            const struct radv_draw_info *info)
 {
 	struct radv_cmd_state *state = &cmd_buffer->state;
 
 	if (!cmd_buffer->device->physical_device->has_scissor_bug)
 		return false;
 
+	if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
+		return true;
+
 	uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
 
 	/* Index, vertex and streamout buffers don't change context regs, and
-	 * pipeline is handled later.
+	 * pipeline is already handled.
 	 */
 	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
 			 RADV_CMD_DIRTY_VERTEX_BUFFER |
 			 RADV_CMD_DIRTY_STREAMOUT_BUFFER |
 			 RADV_CMD_DIRTY_PIPELINE);
 
-	/* Assume all state changes except  these two can imply context rolls. */
 	if (cmd_buffer->state.dirty & used_states)
 		return true;
 
-	if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
-		return true;
-
-	if (indexed_draw && state->pipeline->graphics.prim_restart_enable &&
+	if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
 	    (state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index)
 		return true;
 
@@ -3574,7 +3722,7 @@
 radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
 			      const struct radv_draw_info *info)
 {
-	bool late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info->indexed);
+	bool late_scissor_emission;
 
 	if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
 	    cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
@@ -3583,6 +3731,12 @@
 	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
 		radv_emit_graphics_pipeline(cmd_buffer);
 
+	/* This should be before the cmd_buffer->state.dirty is cleared
+	 * (excluding RADV_CMD_DIRTY_PIPELINE) and after
+	 * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
+	late_scissor_emission =
+		radv_need_late_scissor_emission(cmd_buffer, info);
+
 	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
 		radv_emit_framebuffer_state(cmd_buffer);
 
@@ -3602,9 +3756,7 @@
 
 	radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
 
-	radv_emit_draw_registers(cmd_buffer, info->indexed,
-				 info->instance_count > 1, info->indirect,
-				 info->indirect ? 0 : info->count);
+	radv_emit_draw_registers(cmd_buffer, info);
 
 	if (late_scissor_emission)
 		radv_emit_scissor(cmd_buffer);
@@ -3626,6 +3778,19 @@
 		radeon_check_space(cmd_buffer->device->ws,
 				   cmd_buffer->cs, 4096);
 
+	if (likely(!info->indirect)) {
+		/* SI-CI treat instance_count==0 as instance_count==1. There is
+		 * no workaround for indirect draws, but we can at least skip
+		 * direct draws.
+		 */
+		if (unlikely(!info->instance_count))
+			return;
+
+		/* Handle count == 0. */
+		if (unlikely(!info->count && !info->strmout_buffer))
+			return;
+	}
+
 	/* Use optimal packet order based on whether we need to sync the
 	 * pipeline.
 	 */
@@ -3995,7 +4160,6 @@
 		}
 
 		if (loc->sgpr_idx != -1) {
-			assert(!loc->indirect);
 			assert(loc->num_sgprs == 3);
 
 			radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
@@ -4237,14 +4401,12 @@
 					       VkImageLayout dst_layout,
 					       unsigned src_queue_mask,
 					       unsigned dst_queue_mask,
-					       const VkImageSubresourceRange *range,
-					       VkImageAspectFlags pending_clears)
+					       const VkImageSubresourceRange *range)
 {
 	if (!radv_image_has_htile(image))
 		return;
 
-	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
-	           radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
+	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
 		/* TODO: merge with the clear if applicable */
 		radv_initialize_htile(cmd_buffer, image, range, 0);
 	} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
@@ -4281,6 +4443,27 @@
 	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
 }
 
+void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_image *image)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	static const uint32_t fmask_clear_values[4] = {
+		0x00000000,
+		0x02020202,
+		0xE4E4E4E4,
+		0x76543210
+	};
+	uint32_t log2_samples = util_logbase2(image->info.samples);
+	uint32_t value = fmask_clear_values[log2_samples];
+
+	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+			     RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+	state->flush_bits |= radv_clear_fmask(cmd_buffer, image, value);
+
+	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value)
 {
@@ -4316,6 +4499,10 @@
 		radv_initialise_cmask(cmd_buffer, image, value);
 	}
 
+	if (radv_image_has_fmask(image)) {
+		radv_initialize_fmask(cmd_buffer, image);
+	}
+
 	if (radv_image_has_dcc(image)) {
 		uint32_t value = 0xffffffffu; /* Fully expanded mode. */
 		bool need_decompress_pass = false;
@@ -4328,8 +4515,8 @@
 
 		radv_initialize_dcc(cmd_buffer, image, value);
 
-		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image,
-						  need_decompress_pass);
+		radv_update_fce_metadata(cmd_buffer, image,
+					 need_decompress_pass);
 	}
 
 	if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
@@ -4371,6 +4558,13 @@
 		    !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
 			radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
 		}
+
+		if (radv_image_has_fmask(image)) {
+			if (src_layout != VK_IMAGE_LAYOUT_GENERAL &&
+			    dst_layout == VK_IMAGE_LAYOUT_GENERAL) {
+				radv_expand_fmask_image_inplace(cmd_buffer, image, range);
+			}
+		}
 	}
 }
 
@@ -4380,8 +4574,7 @@
 					 VkImageLayout dst_layout,
 					 uint32_t src_family,
 					 uint32_t dst_family,
-					 const VkImageSubresourceRange *range,
-					 VkImageAspectFlags pending_clears)
+					 const VkImageSubresourceRange *range)
 {
 	if (image->exclusive && src_family != dst_family) {
 		/* This is an acquire or a release operation and there will be
@@ -4411,7 +4604,7 @@
 		radv_handle_depth_image_transition(cmd_buffer, image,
 						   src_layout, dst_layout,
 						   src_queue_mask, dst_queue_mask,
-						   range, pending_clears);
+						   range);
 	} else {
 		radv_handle_color_image_transition(cmd_buffer, image,
 						   src_layout, dst_layout,
@@ -4448,7 +4641,7 @@
 
 		MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
 
-		si_emit_wait_fence(cs, va, 1, 0xffffffff);
+		radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
 		assert(cmd_buffer->cs->cdw <= cdw_max);
 	}
 
@@ -4485,14 +4678,15 @@
 					     pImageMemoryBarriers[i].newLayout,
 					     pImageMemoryBarriers[i].srcQueueFamilyIndex,
 					     pImageMemoryBarriers[i].dstQueueFamilyIndex,
-					     &pImageMemoryBarriers[i].subresourceRange,
-					     0);
+					     &pImageMemoryBarriers[i].subresourceRange);
 	}
 
 	/* Make sure CP DMA is idle because the driver might have performed a
 	 * DMA operation for copying or filling buffers/images.
 	 */
-	si_cp_dma_wait_for_idle(cmd_buffer);
+	if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
+				  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+		si_cp_dma_wait_for_idle(cmd_buffer);
 
 	cmd_buffer->state.flush_bits |= dst_flush_bits;
 }
@@ -4549,14 +4743,16 @@
 	/* Make sure CP DMA is idle because the driver might have performed a
 	 * DMA operation for copying or filling buffers/images.
 	 */
-	si_cp_dma_wait_for_idle(cmd_buffer);
+	if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
+			 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+		si_cp_dma_wait_for_idle(cmd_buffer);
 
 	/* TODO: Emit EOS events for syncing PS/CS stages. */
 
 	if (!(stageMask & ~top_of_pipe_flags)) {
 		/* Just need to sync the PFP engine. */
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 				S_370_WR_CONFIRM(1) |
 				S_370_ENGINE_SEL(V_370_PFP));
 		radeon_emit(cs, va);
@@ -4565,7 +4761,7 @@
 	} else if (!(stageMask & ~post_index_fetch_flags)) {
 		/* Sync ME because PFP reads index and indirect buffers. */
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 				S_370_WR_CONFIRM(1) |
 				S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cs, va);
@@ -4577,7 +4773,7 @@
 					   cmd_buffer->device->physical_device->rad_info.chip_class,
 					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
-					   EOP_DATA_SEL_VALUE_32BIT, va, 2, value,
+					   EOP_DATA_SEL_VALUE_32BIT, va, value,
 					   cmd_buffer->gfx9_eop_bug_va);
 	}
 
@@ -4709,7 +4905,7 @@
 		enabled_mask |= 1 << idx;
 	}
 
-	cmd_buffer->state.streamout.enabled_mask = enabled_mask;
+	cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
 
 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 }
@@ -4729,6 +4925,8 @@
 		    S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
 	radeon_emit(cs, so->hw_enabled_mask &
 			so->enabled_stream_buffers_mask);
+
+	cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
 static void
@@ -4805,6 +5003,8 @@
 		radeon_emit(cs, sb[i].size >> 2);	/* BUFFER_SIZE (in DW) */
 		radeon_emit(cs, so->stride_in_dw[i]);			/* VTX_STRIDE (in DW) */
 
+		cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
 		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
 			/* The array of counter buffers is optional. */
 			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
@@ -4885,6 +5085,8 @@
 		 * that the primitives-emitted query won't increment.
 		 */
 		radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+
+		cmd_buffer->state.context_roll_without_scissor_emitted = true;
 	}
 
 	radv_set_streamout_enable(cmd_buffer, false);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_debug.c mesa-19.0.1/src/amd/vulkan/radv_debug.c
--- mesa-18.3.3/src/amd/vulkan/radv_debug.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,7 +63,8 @@
 	device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
 					     RADEON_DOMAIN_VRAM,
 					     RADEON_FLAG_CPU_ACCESS|
-					     RADEON_FLAG_NO_INTERPROCESS_SHARING);
+					     RADEON_FLAG_NO_INTERPROCESS_SHARING,
+					     RADV_BO_PRIORITY_UPLOAD_BUFFER);
 	if (!device->trace_bo)
 		return false;
 
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_debug.h mesa-19.0.1/src/amd/vulkan/radv_debug.h
--- mesa-18.3.3/src/amd/vulkan/radv_debug.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_debug.h	2019-03-31 23:16:37.000000000 +0000
@@ -50,15 +50,15 @@
 	RADV_DEBUG_STARTUP           = 0x100000,
 	RADV_DEBUG_CHECKIR           = 0x200000,
 	RADV_DEBUG_NOTHREADLLVM      = 0x400000,
+	RADV_DEBUG_NOBINNING         = 0x800000,
 };
 
 enum {
 	RADV_PERFTEST_NO_BATCHCHAIN  =   0x1,
 	RADV_PERFTEST_SISCHED        =   0x2,
 	RADV_PERFTEST_LOCAL_BOS      =   0x4,
-	RADV_PERFTEST_BINNING     =   0x8,
-	RADV_PERFTEST_OUT_OF_ORDER   =  0x10,
-	RADV_PERFTEST_DCC_MSAA       =  0x20,
+	RADV_PERFTEST_OUT_OF_ORDER   =   0x8,
+	RADV_PERFTEST_DCC_MSAA       =  0x10,
 };
 
 bool
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.c mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.c
--- mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.c	2019-03-31 23:16:37.000000000 +0000
@@ -84,7 +84,9 @@
 	uint32_t immutable_sampler_count = 0;
 	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
 		max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
-		if (pCreateInfo->pBindings[j].pImmutableSamplers)
+		if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+		     pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+		     pCreateInfo->pBindings[j].pImmutableSamplers)
 			immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
 	}
 
@@ -182,7 +184,9 @@
 			set_layout->has_variable_descriptors = true;
 		}
 
-		if (binding->pImmutableSamplers) {
+		if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+		     binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+		    binding->pImmutableSamplers) {
 			set_layout->binding[b].immutable_samplers_offset = samplers_offset;
 			set_layout->binding[b].immutable_samplers_equal =
 				has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
@@ -345,6 +349,7 @@
 	layout->num_sets = pCreateInfo->setLayoutCount;
 
 	unsigned dynamic_offset_count = 0;
+	uint16_t dynamic_shader_stages = 0;
 
 
 	_mesa_sha1_init(&ctx);
@@ -356,6 +361,7 @@
 		layout->set[set].dynamic_offset_start = dynamic_offset_count;
 		for (uint32_t b = 0; b < set_layout->binding_count; b++) {
 			dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
+			dynamic_shader_stages |= set_layout->dynamic_shader_stages;
 			if (set_layout->binding[b].immutable_samplers_offset)
 				_mesa_sha1_update(&ctx, radv_immutable_samplers(set_layout, set_layout->binding + b),
 				                  set_layout->binding[b].array_size * 4 * sizeof(uint32_t));
@@ -365,6 +371,7 @@
 	}
 
 	layout->dynamic_offset_count = dynamic_offset_count;
+	layout->dynamic_shader_stages = dynamic_shader_stages;
 	layout->push_constant_size = 0;
 
 	for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
@@ -412,7 +419,7 @@
 
 	if (pool->host_memory_base) {
 		if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
 
 		set = (struct radv_descriptor_set*)pool->host_memory_ptr;
 		pool->host_memory_ptr += mem_size;
@@ -437,7 +444,7 @@
 
 		if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
 			vk_free2(&device->alloc, NULL, set);
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
 		}
 
 		/* try to allocate linearly first, so that we don't spend
@@ -466,7 +473,7 @@
 
 			if (pool->size - offset < layout_size) {
 				vk_free2(&device->alloc, NULL, set);
-				return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+				return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
 			}
 			set->bo = pool->bo;
 			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
@@ -478,7 +485,7 @@
 			pool->entries[index].set = set;
 			pool->entry_count++;
 		} else
-			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+			return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
 	}
 
 	if (layout->has_immutable_samplers) {
@@ -595,7 +602,8 @@
 						     RADEON_DOMAIN_VRAM,
 						     RADEON_FLAG_NO_INTERPROCESS_SHARING |
 						     RADEON_FLAG_READ_ONLY |
-						     RADEON_FLAG_32BIT);
+						     RADEON_FLAG_32BIT,
+						     RADV_BO_PRIORITY_DESCRIPTOR);
 		pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
 	}
 	pool->size = bo_size;
@@ -961,9 +969,11 @@
 			}
 			src_ptr += src_binding_layout->size / 4;
 			dst_ptr += dst_binding_layout->size / 4;
-			dst_buffer_list[j] = src_buffer_list[j];
-			++src_buffer_list;
-			++dst_buffer_list;
+
+			if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+				/* Sampler descriptors don't have a buffer list. */
+				dst_buffer_list[j] = src_buffer_list[j];
+			}
 		}
 	}
 }
@@ -982,9 +992,9 @@
 }
 
 VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
-                                             const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo,
+                                             const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
                                              const VkAllocationCallbacks *pAllocator,
-                                             VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate)
+                                             VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
@@ -1002,7 +1012,7 @@
 	templ->bind_point = pCreateInfo->pipelineBindPoint;
 
 	for (i = 0; i < entry_count; i++) {
-		const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
+		const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
 		const struct radv_descriptor_set_binding_layout *binding_layout =
 			set_layout->binding + entry->dstBinding;
 		const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
@@ -1015,7 +1025,7 @@
 		switch (entry->descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR);
+			assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
 			dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
 			dst_stride = 0; /* Not used */
 			break;
@@ -1055,7 +1065,7 @@
 }
 
 void radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
-                                          VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+                                          VkDescriptorUpdateTemplate descriptorUpdateTemplate,
                                           const VkAllocationCallbacks *pAllocator)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
@@ -1070,7 +1080,7 @@
 void radv_update_descriptor_set_with_template(struct radv_device *device,
                                               struct radv_cmd_buffer *cmd_buffer,
                                               struct radv_descriptor_set *set,
-                                              VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+                                              VkDescriptorUpdateTemplate descriptorUpdateTemplate,
                                               const void *pData)
 {
 	RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
@@ -1137,7 +1147,7 @@
 
 void radv_UpdateDescriptorSetWithTemplate(VkDevice _device,
                                           VkDescriptorSet descriptorSet,
-                                          VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+                                          VkDescriptorUpdateTemplate descriptorUpdateTemplate,
                                           const void *pData)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.h mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.h
--- mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.h	2018-04-24 14:37:08.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.h	2019-03-31 23:16:37.000000000 +0000
@@ -85,6 +85,7 @@
    uint32_t num_sets;
    uint32_t push_constant_size;
    uint32_t dynamic_offset_count;
+   uint16_t dynamic_shader_stages;
 
    unsigned char sha1[20];
 };
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_device.c mesa-19.0.1/src/amd/vulkan/radv_device.c
--- mesa-18.3.3/src/amd/vulkan/radv_device.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_device.c	2019-03-31 23:16:37.000000000 +0000
@@ -45,10 +45,10 @@
 #include "sid.h"
 #include "git_sha1.h"
 #include "gfx9d.h"
-#include "addrlib/gfx9/chip/gfx9_enum.h"
 #include "util/build_id.h"
 #include "util/debug.h"
 #include "util/mesa-sha1.h"
+#include "compiler/glsl_types.h"
 
 static int
 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
@@ -123,19 +123,30 @@
 	snprintf(name, name_len, "%s%s", chip_string, llvm_string);
 }
 
+static uint64_t
+radv_get_visible_vram_size(struct radv_physical_device *device)
+{
+	return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
+}
+
+static uint64_t
+radv_get_vram_size(struct radv_physical_device *device)
+{
+	return device->rad_info.vram_size - radv_get_visible_vram_size(device);
+}
+
 static void
 radv_physical_device_init_mem_types(struct radv_physical_device *device)
 {
 	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
-	                                  device->rad_info.vram_vis_size);
-
+	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+	uint64_t vram_size = radv_get_vram_size(device);
 	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
 	device->memory_properties.memoryHeapCount = 0;
-	if (device->rad_info.vram_size - visible_vram_size > 0) {
+	if (vram_size > 0) {
 		vram_index = device->memory_properties.memoryHeapCount++;
 		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
-			.size = device->rad_info.vram_size - visible_vram_size,
+			.size = vram_size,
 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 		};
 	}
@@ -271,8 +282,6 @@
 
 	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
 	device->instance = instance;
-	assert(strlen(path) < ARRAY_SIZE(device->path));
-	strncpy(device->path, path, ARRAY_SIZE(device->path));
 
 	device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
 					       instance->perftest_flags);
@@ -329,7 +338,7 @@
 	    device->rad_info.chip_class > GFX9)
 		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 
-	radv_get_driver_uuid(&device->device_uuid);
+	radv_get_driver_uuid(&device->driver_uuid);
 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
 
 	if (device->rad_info.family == CHIP_STONEY ||
@@ -361,6 +370,11 @@
 	device->dcc_msaa_allowed =
 		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
 
+	/* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
+	device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
+				       (device->rad_info.chip_class >= VI &&
+				        device->rad_info.me_fw_feature >= 41);
+
 	radv_physical_device_init_mem_types(device);
 	radv_fill_device_extension_table(device, &device->supported_extensions);
 
@@ -451,6 +465,7 @@
 	{"startup", RADV_DEBUG_STARTUP},
 	{"checkir", RADV_DEBUG_CHECKIR},
 	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
+	{"nobinning", RADV_DEBUG_NOBINNING},
 	{NULL, 0}
 };
 
@@ -465,7 +480,6 @@
 	{"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
 	{"sisched", RADV_PERFTEST_SISCHED},
 	{"localbos", RADV_PERFTEST_LOCAL_BOS},
-	{"binning", RADV_PERFTEST_BINNING},
 	{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
 	{NULL, 0}
 };
@@ -597,6 +611,7 @@
 
 	VG(VALGRIND_DESTROY_MEMPOOL(instance));
 
+	_mesa_glsl_release_types();
 	_mesa_locale_fini();
 
 	vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
@@ -726,8 +741,7 @@
 		.alphaToOne                               = true,
 		.multiViewport                            = true,
 		.samplerAnisotropy                        = true,
-		.textureCompressionETC2                   = pdevice->rad_info.chip_class >= GFX9 ||
-		                                            pdevice->rad_info.family == CHIP_STONEY,
+		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
 		.textureCompressionASTC_LDR               = false,
 		.textureCompressionBC                     = true,
 		.occlusionQueryPrecise                    = true,
@@ -737,7 +751,7 @@
 		.shaderTessellationAndGeometryPointSize   = true,
 		.shaderImageGatherExtended                = true,
 		.shaderStorageImageExtendedFormats        = true,
-		.shaderStorageImageMultisample            = false,
+		.shaderStorageImageMultisample            = pdevice->rad_info.chip_class >= VI,
 		.shaderUniformBufferArrayDynamicIndexing  = true,
 		.shaderSampledImageArrayDynamicIndexing   = true,
 		.shaderStorageBufferArrayDynamicIndexing  = true,
@@ -748,7 +762,7 @@
 		.shaderCullDistance                       = true,
 		.shaderFloat64                            = true,
 		.shaderInt64                              = true,
-		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
+		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9,
 		.sparseBinding                            = true,
 		.variableMultisampleRate                  = true,
 		.inheritedQueries                         = true,
@@ -757,19 +771,19 @@
 
 void radv_GetPhysicalDeviceFeatures2(
 	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceFeatures2KHR               *pFeatures)
+	VkPhysicalDeviceFeatures2                  *pFeatures)
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 	vk_foreach_struct(ext, pFeatures->pNext) {
 		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
-			VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
+			VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
 			features->variablePointersStorageBuffer = true;
-			features->variablePointers = false;
+			features->variablePointers = true;
 			break;
 		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
-			VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
+			VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
 			features->multiview = true;
 			features->multiviewGeometryShader = true;
 			features->multiviewTessellationShader = true;
@@ -790,11 +804,11 @@
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
 			VkPhysicalDevice16BitStorageFeatures *features =
 			    (VkPhysicalDevice16BitStorageFeatures*)ext;
-			bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
+			bool enabled = pdevice->rad_info.chip_class >= VI;
 			features->storageBuffer16BitAccess = enabled;
 			features->uniformAndStorageBuffer16BitAccess = enabled;
 			features->storagePushConstant16 = enabled;
-			features->storageInputOutput16 = enabled;
+			features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
 			break;
 		}
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
@@ -849,6 +863,18 @@
 			features->geometryStreams = true;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
+			VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
+				(VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
+			features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK;
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
+			VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
+				(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
+			features->memoryPriority = VK_TRUE;
+			break;
+		}
 		default:
 			break;
 		}
@@ -970,7 +996,7 @@
 		.sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
 		.sampledImageDepthSampleCounts            = sample_counts,
 		.sampledImageStencilSampleCounts          = sample_counts,
-		.storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
+		.storageImageSampleCounts                 = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
 		.maxSampleMaskWords                       = 1,
 		.timestampComputeAndGraphics              = true,
 		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
@@ -978,7 +1004,7 @@
 		.maxCullDistances                         = 8,
 		.maxCombinedClipAndCullDistances          = 8,
 		.discreteQueuePriorities                  = 2,
-		.pointSizeRange                           = { 0.125, 255.875 },
+		.pointSizeRange                           = { 0.0, 8192.0 },
 		.lineWidthRange                           = { 0.0, 7.9921875 },
 		.pointSizeGranularity                     = (1.0 / 8.0),
 		.lineWidthGranularity                     = (1.0 / 128.0),
@@ -1005,7 +1031,7 @@
 
 void radv_GetPhysicalDeviceProperties2(
 	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceProperties2KHR             *pProperties)
+	VkPhysicalDeviceProperties2                *pProperties)
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
@@ -1018,23 +1044,23 @@
 			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
 			break;
 		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
-			VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
+			VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
 			memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
 			memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
 			properties->deviceLUIDValid = false;
 			break;
 		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
-			VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
+			VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
 			properties->maxMultiviewViewCount = MAX_VIEWS;
 			properties->maxMultiviewInstanceIndex = INT_MAX;
 			break;
 		}
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
-			VkPhysicalDevicePointClippingPropertiesKHR *properties =
-			    (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
-			properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
+			VkPhysicalDevicePointClippingProperties *properties =
+			    (VkPhysicalDevicePointClippingProperties*)ext;
+			properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
 			break;
 		}
 		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
@@ -1296,7 +1322,7 @@
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 	if (!pQueueFamilyProperties) {
-		return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
 		return;
 	}
 	VkQueueFamilyProperties *properties[] = {
@@ -1311,11 +1337,11 @@
 void radv_GetPhysicalDeviceQueueFamilyProperties2(
 	VkPhysicalDevice                            physicalDevice,
 	uint32_t*                                   pCount,
-	VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
+	VkQueueFamilyProperties2                   *pQueueFamilyProperties)
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 	if (!pQueueFamilyProperties) {
-		return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
 		return;
 	}
 	VkQueueFamilyProperties *properties[] = {
@@ -1336,17 +1362,89 @@
 	*pMemoryProperties = physical_device->memory_properties;
 }
 
+static void
+radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
+				  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
+{
+	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+	VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
+	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+	uint64_t vram_size = radv_get_vram_size(device);
+	uint64_t gtt_size = device->rad_info.gart_size;
+	uint64_t heap_budget, heap_usage;
+
+	/* For all memory heaps, the computation of budget is as follow:
+	 *	heap_budget = heap_size - global_heap_usage + app_heap_usage
+	 *
+	 * The Vulkan spec 1.1.97 says that the budget should include any
+	 * currently allocated device memory.
+	 *
+	 * Note that the application heap usages are not really accurate (eg.
+	 * in presence of shared buffers).
+	 */
+	if (vram_size) {
+		heap_usage = device->ws->query_value(device->ws,
+						     RADEON_ALLOCATED_VRAM);
+
+		heap_budget = vram_size -
+			device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
+			heap_usage;
+
+		memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget;
+		memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage;
+	}
+
+	if (visible_vram_size) {
+		heap_usage = device->ws->query_value(device->ws,
+						     RADEON_ALLOCATED_VRAM_VIS);
+
+		heap_budget = visible_vram_size -
+			device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
+			heap_usage;
+
+		memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget;
+		memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage;
+	}
+
+	if (gtt_size) {
+		heap_usage = device->ws->query_value(device->ws,
+						     RADEON_ALLOCATED_GTT);
+
+		heap_budget = gtt_size -
+			device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
+			heap_usage;
+
+		memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget;
+		memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage;
+	}
+
+	/* The heapBudget and heapUsage values must be zero for array elements
+	 * greater than or equal to
+	 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
+	 */
+	for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
+		memoryBudget->heapBudget[i] = 0;
+		memoryBudget->heapUsage[i] = 0;
+	}
+}
+
 void radv_GetPhysicalDeviceMemoryProperties2(
 	VkPhysicalDevice                            physicalDevice,
-	VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
+	VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
 {
-	return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
-						      &pMemoryProperties->memoryProperties);
+	radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
+					       &pMemoryProperties->memoryProperties);
+
+	VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
+		vk_find_struct(pMemoryProperties->pNext,
+			       PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
+	if (memory_budget)
+		radv_get_memory_budget_properties(physicalDevice, memory_budget);
 }
 
 VkResult radv_GetMemoryHostPointerPropertiesEXT(
 	VkDevice                                    _device,
-	VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
+	VkExternalMemoryHandleTypeFlagBits          handleType,
 	const void                                 *pHostPointer,
 	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
 {
@@ -1367,7 +1465,7 @@
 		return VK_SUCCESS;
 	}
 	default:
-		return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
 	}
 }
 
@@ -1632,9 +1730,7 @@
 	}
 
 	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
-			((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
-			 device->physical_device->rad_info.family == CHIP_RAVEN ||
-			 device->physical_device->rad_info.family == CHIP_RAVEN2);
+			      !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
 
 	/* Disabled and not implemented for now. */
 	device->dfsm_allowed = device->pbb_allowed &&
@@ -1881,136 +1977,138 @@
 		     uint32_t tess_offchip_ring_size,
 		     struct radeon_winsys_bo *tess_rings_bo)
 {
-	uint64_t esgs_va = 0, gsvs_va = 0;
-	uint64_t tess_va = 0, tess_offchip_va = 0;
 	uint32_t *desc = &map[4];
 
-	if (esgs_ring_bo)
-		esgs_va = radv_buffer_get_va(esgs_ring_bo);
-	if (gsvs_ring_bo)
-		gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
+	if (esgs_ring_bo) {
+		uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
+
+		/* stride 0, num records - size, add tid, swizzle, elsize4,
+		   index stride 64 */
+		desc[0] = esgs_va;
+		desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(true);
+		desc[2] = esgs_ring_size;
+		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(1) |
+			  S_008F0C_INDEX_STRIDE(3) |
+			  S_008F0C_ADD_TID_ENABLE(true);
+
+		/* GS entry for ES->GS ring */
+		/* stride 0, num records - size, elsize0,
+		   index stride 0 */
+		desc[4] = esgs_va;
+		desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(false);
+		desc[6] = esgs_ring_size;
+		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(0) |
+			  S_008F0C_INDEX_STRIDE(0) |
+			  S_008F0C_ADD_TID_ENABLE(false);
+	}
+
+	desc += 8;
+
+	if (gsvs_ring_bo) {
+		uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
+
+		/* VS entry for GS->VS ring */
+		/* stride 0, num records - size, elsize0,
+		   index stride 0 */
+		desc[0] = gsvs_va;
+		desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(false);
+		desc[2] = gsvs_ring_size;
+		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(0) |
+			  S_008F0C_INDEX_STRIDE(0) |
+			  S_008F0C_ADD_TID_ENABLE(false);
+
+		/* stride gsvs_itemsize, num records 64
+		   elsize 4, index stride 16 */
+		/* shader will patch stride and desc[2] */
+		desc[4] = gsvs_va;
+		desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(true);
+		desc[6] = 0;
+		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(1) |
+			  S_008F0C_INDEX_STRIDE(1) |
+			  S_008F0C_ADD_TID_ENABLE(true);
+	}
+
+	desc += 8;
+
 	if (tess_rings_bo) {
-		tess_va = radv_buffer_get_va(tess_rings_bo);
-		tess_offchip_va = tess_va + tess_offchip_ring_offset;
+		uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
+		uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
+
+		desc[0] = tess_va;
+		desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(false);
+		desc[2] = tess_factor_ring_size;
+		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(0) |
+			  S_008F0C_INDEX_STRIDE(0) |
+			  S_008F0C_ADD_TID_ENABLE(false);
+
+		desc[4] = tess_offchip_va;
+		desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
+			  S_008F04_STRIDE(0) |
+			  S_008F04_SWIZZLE_ENABLE(false);
+		desc[6] = tess_offchip_ring_size;
+		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+			  S_008F0C_ELEMENT_SIZE(0) |
+			  S_008F0C_INDEX_STRIDE(0) |
+			  S_008F0C_ADD_TID_ENABLE(false);
 	}
 
-	/* stride 0, num records - size, add tid, swizzle, elsize4,
-	   index stride 64 */
-	desc[0] = esgs_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(true);
-	desc[2] = esgs_ring_size;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(1) |
-		S_008F0C_INDEX_STRIDE(3) |
-		S_008F0C_ADD_TID_ENABLE(true);
-
-	desc += 4;
-	/* GS entry for ES->GS ring */
-	/* stride 0, num records - size, elsize0,
-	   index stride 0 */
-	desc[0] = esgs_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(false);
-	desc[2] = esgs_ring_size;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(0) |
-		S_008F0C_INDEX_STRIDE(0) |
-		S_008F0C_ADD_TID_ENABLE(false);
-
-	desc += 4;
-	/* VS entry for GS->VS ring */
-	/* stride 0, num records - size, elsize0,
-	   index stride 0 */
-	desc[0] = gsvs_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(false);
-	desc[2] = gsvs_ring_size;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(0) |
-		S_008F0C_INDEX_STRIDE(0) |
-		S_008F0C_ADD_TID_ENABLE(false);
-	desc += 4;
-
-	/* stride gsvs_itemsize, num records 64
-	   elsize 4, index stride 16 */
-	/* shader will patch stride and desc[2] */
-	desc[0] = gsvs_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(true);
-	desc[2] = 0;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(1) |
-		S_008F0C_INDEX_STRIDE(1) |
-		S_008F0C_ADD_TID_ENABLE(true);
-	desc += 4;
-
-	desc[0] = tess_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(false);
-	desc[2] = tess_factor_ring_size;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(0) |
-		S_008F0C_INDEX_STRIDE(0) |
-		S_008F0C_ADD_TID_ENABLE(false);
-	desc += 4;
-
-	desc[0] = tess_offchip_va;
-	desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
-		S_008F04_STRIDE(0) |
-		S_008F04_SWIZZLE_ENABLE(false);
-	desc[2] = tess_offchip_ring_size;
-	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-		S_008F0C_ELEMENT_SIZE(0) |
-		S_008F0C_INDEX_STRIDE(0) |
-		S_008F0C_ADD_TID_ENABLE(false);
-	desc += 4;
-
-	/* add sample positions after all rings */
-	memcpy(desc, queue->device->sample_locations_1x, 8);
-	desc += 2;
-	memcpy(desc, queue->device->sample_locations_2x, 16);
-	desc += 4;
-	memcpy(desc, queue->device->sample_locations_4x, 32);
 	desc += 8;
-	memcpy(desc, queue->device->sample_locations_8x, 64);
-	desc += 16;
-	memcpy(desc, queue->device->sample_locations_16x, 128);
+
+	if (add_sample_positions) {
+		/* add sample positions after all rings */
+		memcpy(desc, queue->device->sample_locations_1x, 8);
+		desc += 2;
+		memcpy(desc, queue->device->sample_locations_2x, 16);
+		desc += 4;
+		memcpy(desc, queue->device->sample_locations_4x, 32);
+		desc += 8;
+		memcpy(desc, queue->device->sample_locations_8x, 64);
+	}
 }
 
 static unsigned
@@ -2044,16 +2142,15 @@
 	max_offchip_buffers = max_offchip_buffers_per_se *
 		device->physical_device->rad_info.max_se;
 
-	switch (device->tess_offchip_block_dw_size) {
-	default:
-		assert(0);
-		/* fall through */
-	case 8192:
-		offchip_granularity = V_03093C_X_8K_DWORDS;
-		break;
-	case 4096:
+	/* Hawaii has a bug with offchip buffers > 256 that can be worked
+	 * around by setting 4K granularity.
+	 */
+	if (device->tess_offchip_block_dw_size == 4096) {
+		assert(device->physical_device->rad_info.family == CHIP_HAWAII);
 		offchip_granularity = V_03093C_X_4K_DWORDS;
-		break;
+	} else {
+		assert(device->tess_offchip_block_dw_size == 8192);
+		offchip_granularity = V_03093C_X_8K_DWORDS;
 	}
 
 	switch (device->physical_device->rad_info.chip_class) {
@@ -2288,7 +2385,8 @@
 		                                              scratch_size,
 		                                              4096,
 		                                              RADEON_DOMAIN_VRAM,
-		                                              ring_bo_flags);
+		                                              ring_bo_flags,
+		                                              RADV_BO_PRIORITY_SCRATCH);
 		if (!scratch_bo)
 			goto fail;
 	} else
@@ -2299,7 +2397,8 @@
 		                                                      compute_scratch_size,
 		                                                      4096,
 		                                                      RADEON_DOMAIN_VRAM,
-		                                                      ring_bo_flags);
+		                                                      ring_bo_flags,
+		                                                      RADV_BO_PRIORITY_SCRATCH);
 		if (!compute_scratch_bo)
 			goto fail;
 
@@ -2311,7 +2410,8 @@
 								esgs_ring_size,
 								4096,
 								RADEON_DOMAIN_VRAM,
-								ring_bo_flags);
+								ring_bo_flags,
+								RADV_BO_PRIORITY_SCRATCH);
 		if (!esgs_ring_bo)
 			goto fail;
 	} else {
@@ -2324,7 +2424,8 @@
 								gsvs_ring_size,
 								4096,
 								RADEON_DOMAIN_VRAM,
-								ring_bo_flags);
+								ring_bo_flags,
+								RADV_BO_PRIORITY_SCRATCH);
 		if (!gsvs_ring_bo)
 			goto fail;
 	} else {
@@ -2337,7 +2438,8 @@
 								 tess_offchip_ring_offset + tess_offchip_ring_size,
 								 256,
 								 RADEON_DOMAIN_VRAM,
-								 ring_bo_flags);
+								 ring_bo_flags,
+								 RADV_BO_PRIORITY_SCRATCH);
 		if (!tess_rings_bo)
 			goto fail;
 	} else {
@@ -2354,7 +2456,7 @@
 		    tess_rings_bo || add_sample_positions) {
 			size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
 			if (add_sample_positions)
-				size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
+				size += 128; /* 64+32+16+8 = 120 bytes */
 		}
 		else if (scratch_bo)
 			size = 8; /* 2 dword */
@@ -2365,12 +2467,36 @@
 		                                                 RADEON_DOMAIN_VRAM,
 		                                                 RADEON_FLAG_CPU_ACCESS |
 								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
-								 RADEON_FLAG_READ_ONLY);
+								 RADEON_FLAG_READ_ONLY,
+								 RADV_BO_PRIORITY_DESCRIPTOR);
 		if (!descriptor_bo)
 			goto fail;
 	} else
 		descriptor_bo = queue->descriptor_bo;
 
+	if (descriptor_bo != queue->descriptor_bo) {
+		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
+
+		if (scratch_bo) {
+			uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
+			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
+				         S_008F04_SWIZZLE_ENABLE(1);
+			map[0] = scratch_va;
+			map[1] = rsrc1;
+		}
+
+		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
+			fill_geom_tess_rings(queue, map, add_sample_positions,
+					     esgs_ring_size, esgs_ring_bo,
+					     gsvs_ring_size, gsvs_ring_bo,
+					     tess_factor_ring_size,
+					     tess_offchip_ring_offset,
+					     tess_offchip_ring_size,
+					     tess_rings_bo);
+
+		queue->device->ws->buffer_unmap(descriptor_bo);
+	}
+
 	for(int i = 0; i < 3; ++i) {
 		struct radeon_cmdbuf *cs = NULL;
 		cs = queue->device->ws->cs_create(queue->device->ws,
@@ -2395,30 +2521,6 @@
 			break;
 		}
 
-		if (descriptor_bo != queue->descriptor_bo) {
-			uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
-
-			if (scratch_bo) {
-				uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
-				uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
-				                 S_008F04_SWIZZLE_ENABLE(1);
-				map[0] = scratch_va;
-				map[1] = rsrc1;
-			}
-
-			if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
-			    add_sample_positions)
-				fill_geom_tess_rings(queue, map, add_sample_positions,
-						     esgs_ring_size, esgs_ring_bo,
-						     gsvs_ring_size, gsvs_ring_bo,
-						     tess_factor_ring_size,
-						     tess_offchip_ring_offset,
-						     tess_offchip_ring_size,
-						     tess_rings_bo);
-
-			queue->device->ws->buffer_unmap(descriptor_bo);
-		}
-
 		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@@ -2694,7 +2796,7 @@
 	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
 	int ret;
-	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
+	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
 	uint32_t scratch_size = 0;
 	uint32_t compute_scratch_size = 0;
 	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
@@ -2970,10 +3072,10 @@
 
 	const VkImportMemoryFdInfoKHR *import_info =
 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
-	const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
-		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
-	const VkExportMemoryAllocateInfoKHR *export_info =
-		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+	const VkMemoryDedicatedAllocateInfo *dedicate_info =
+		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+	const VkExportMemoryAllocateInfo *export_info =
+		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
 	const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
 
@@ -2996,17 +3098,27 @@
 		mem->buffer = NULL;
 	}
 
+	float priority_float = 0.5;
+	const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
+		vk_find_struct_const(pAllocateInfo->pNext,
+				     MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
+	if (priority_ext)
+		priority_float = priority_ext->priority;
+
+	unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
+	                         (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
+
 	mem->user_ptr = NULL;
 
 	if (import_info) {
 		assert(import_info->handleType ==
-		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
+		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
 		       import_info->handleType ==
 		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
-						     NULL, NULL);
+						     priority, NULL, NULL);
 		if (!mem->bo) {
-			result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
 			goto fail;
 		} else {
 			close(import_info->fd);
@@ -3015,9 +3127,10 @@
 		assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
 		assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
 		mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
-		                                      pAllocateInfo->allocationSize);
+		                                      pAllocateInfo->allocationSize,
+		                                      priority);
 		if (!mem->bo) {
-			result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
 			goto fail;
 		} else {
 			mem->user_ptr = host_ptr_info->pHostPointer;
@@ -3042,7 +3155,7 @@
 			flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
 		mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
-		                                    domain, flags);
+		                                    domain, flags, priority);
 
 		if (!mem->bo) {
 			result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -3174,17 +3287,17 @@
 
 void radv_GetBufferMemoryRequirements2(
 	VkDevice                                     device,
-	const VkBufferMemoryRequirementsInfo2KHR*    pInfo,
-	VkMemoryRequirements2KHR*                    pMemoryRequirements)
+	const VkBufferMemoryRequirementsInfo2       *pInfo,
+	VkMemoryRequirements2                       *pMemoryRequirements)
 {
 	radv_GetBufferMemoryRequirements(device, pInfo->buffer,
                                         &pMemoryRequirements->memoryRequirements);
 	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
 		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
-			VkMemoryDedicatedRequirementsKHR *req =
-			               (VkMemoryDedicatedRequirementsKHR *) ext;
+		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+			VkMemoryDedicatedRequirements *req =
+			               (VkMemoryDedicatedRequirements *) ext;
 			req->requiresDedicatedAllocation = buffer->shareable;
 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
 			break;
@@ -3211,8 +3324,8 @@
 
 void radv_GetImageMemoryRequirements2(
 	VkDevice                                    device,
-	const VkImageMemoryRequirementsInfo2KHR*    pInfo,
-	VkMemoryRequirements2KHR*                   pMemoryRequirements)
+	const VkImageMemoryRequirementsInfo2       *pInfo,
+	VkMemoryRequirements2                      *pMemoryRequirements)
 {
 	radv_GetImageMemoryRequirements(device, pInfo->image,
                                         &pMemoryRequirements->memoryRequirements);
@@ -3221,9 +3334,9 @@
 
 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
 		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
-			VkMemoryDedicatedRequirementsKHR *req =
-			               (VkMemoryDedicatedRequirementsKHR *) ext;
+		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+			VkMemoryDedicatedRequirements *req =
+			               (VkMemoryDedicatedRequirements *) ext;
 			req->requiresDedicatedAllocation = image->shareable;
 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
 			break;
@@ -3245,9 +3358,9 @@
 
 void radv_GetImageSparseMemoryRequirements2(
 	VkDevice                                    device,
-	const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
+	const VkImageSparseMemoryRequirementsInfo2 *pInfo,
 	uint32_t*                                   pSparseMemoryRequirementCount,
-	VkSparseImageMemoryRequirements2KHR*            pSparseMemoryRequirements)
+	VkSparseImageMemoryRequirements2           *pSparseMemoryRequirements)
 {
 	stub();
 }
@@ -3262,7 +3375,7 @@
 
 VkResult radv_BindBufferMemory2(VkDevice device,
                                 uint32_t bindInfoCount,
-                                const VkBindBufferMemoryInfoKHR *pBindInfos)
+                                const VkBindBufferMemoryInfo *pBindInfos)
 {
 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
@@ -3284,8 +3397,8 @@
 	VkDeviceMemory                              memory,
 	VkDeviceSize                                memoryOffset)
 {
-	const VkBindBufferMemoryInfoKHR info = {
-		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+	const VkBindBufferMemoryInfo info = {
+		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
 		.buffer = buffer,
 		.memory = memory,
 		.memoryOffset = memoryOffset
@@ -3296,7 +3409,7 @@
 
 VkResult radv_BindImageMemory2(VkDevice device,
                                uint32_t bindInfoCount,
-                               const VkBindImageMemoryInfoKHR *pBindInfos)
+                               const VkBindImageMemoryInfo *pBindInfos)
 {
 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
@@ -3320,8 +3433,8 @@
 	VkDeviceMemory                              memory,
 	VkDeviceSize                                memoryOffset)
 {
-	const VkBindImageMemoryInfoKHR info = {
-		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+	const VkBindImageMemoryInfo info = {
+		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
 		.image = image,
 		.memory = memory,
 		.memoryOffset = memoryOffset
@@ -3446,9 +3559,9 @@
 	VkFence*                                    pFence)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	const VkExportFenceCreateInfoKHR *export =
-		vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
-	VkExternalFenceHandleTypeFlagsKHR handleTypes =
+	const VkExportFenceCreateInfo *export =
+		vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
+	VkExternalFenceHandleTypeFlags handleTypes =
 		export ? export->handleTypes : 0;
 
 	struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
@@ -3735,9 +3848,9 @@
 	VkSemaphore*                                pSemaphore)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	const VkExportSemaphoreCreateInfoKHR *export =
-		vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
-	VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
+	const VkExportSemaphoreCreateInfo *export =
+		vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
+	VkExternalSemaphoreHandleTypeFlags handleTypes =
 		export ? export->handleTypes : 0;
 
 	struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
@@ -3802,7 +3915,8 @@
 
 	event->bo = device->ws->buffer_create(device->ws, 8, 8,
 					      RADEON_DOMAIN_GTT,
-					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
+					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+					      RADV_BO_PRIORITY_FENCE);
 	if (!event->bo) {
 		vk_free2(&device->alloc, pAllocator, event);
 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -3883,12 +3997,13 @@
 	buffer->flags = pCreateInfo->flags;
 
 	buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
-						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
+						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
 
 	if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
 		buffer->bo = device->ws->buffer_create(device->ws,
 		                                       align64(buffer->size, 4096),
-		                                       4096, 0, RADEON_FLAG_VIRTUAL);
+		                                       4096, 0, RADEON_FLAG_VIRTUAL,
+		                                       RADV_BO_PRIORITY_VIRTUAL);
 		if (!buffer->bo) {
 			vk_free2(&device->alloc, pAllocator, buffer);
 			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -4529,11 +4644,11 @@
 {
 	switch (mode) {
 	case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
-		return SQ_IMG_FILTER_MODE_BLEND;
+		return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
 	case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
-		return SQ_IMG_FILTER_MODE_MIN;
+		return V_008F30_SQ_IMG_FILTER_MODE_MIN;
 	case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
-		return SQ_IMG_FILTER_MODE_MAX;
+		return V_008F30_SQ_IMG_FILTER_MODE_MAX;
 	default:
 		break;
 	}
@@ -4562,7 +4677,7 @@
 	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
 	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
 	bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
-	unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
+	unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
 
 	const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
 		vk_find_struct_const(pCreateInfo->pNext,
@@ -4686,7 +4801,7 @@
 
 	/* At the moment, we support only the below handle types. */
 	assert(pGetFdInfo->handleType ==
-	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
+	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
 	       pGetFdInfo->handleType ==
 	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
@@ -4697,7 +4812,7 @@
 }
 
 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
-				       VkExternalMemoryHandleTypeFlagBitsKHR handleType,
+				       VkExternalMemoryHandleTypeFlagBits handleType,
 				       int fd,
 				       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
 {
@@ -4716,7 +4831,7 @@
        *
        * So opaque handle types fall into the default "unsupported" case.
        */
-      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
    }
 }
 
@@ -4727,7 +4842,7 @@
 	uint32_t syncobj_handle = 0;
 	int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
 	if (ret != 0)
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
 
 	if (*syncobj)
 		device->ws->destroy_syncobj(device->ws, *syncobj);
@@ -4748,7 +4863,7 @@
 	if (!syncobj_handle) {
 		int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
 		if (ret) {
-			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
 		}
 	}
 
@@ -4757,7 +4872,7 @@
 	} else {
 		int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
 	if (ret != 0)
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
 	}
 
 	*syncobj = syncobj_handle;
@@ -4774,16 +4889,16 @@
 	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
 	uint32_t *syncobj_dst = NULL;
 
-	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
+	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
 		syncobj_dst = &sem->temp_syncobj;
 	} else {
 		syncobj_dst = &sem->syncobj;
 	}
 
 	switch(pImportSemaphoreFdInfo->handleType) {
-		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
 			return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
-		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
 			return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
 		default:
 			unreachable("Unhandled semaphore handle type");
@@ -4805,10 +4920,10 @@
 		syncobj_handle = sem->syncobj;
 
 	switch(pGetFdInfo->handleType) {
-	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
 		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
 		break;
-	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
 		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
 		if (!ret) {
 			if (sem->temp_syncobj) {
@@ -4824,30 +4939,30 @@
 	}
 
 	if (ret)
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
 	return VK_SUCCESS;
 }
 
 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
-	VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
+	const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
+	VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 
 	/* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
 	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
-	    (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
-	     pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
-		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
-	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
-		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+	    (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 
+	     pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
 	} else {
 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
 		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
@@ -4863,16 +4978,16 @@
 	uint32_t *syncobj_dst = NULL;
 
 
-	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
+	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
 		syncobj_dst = &fence->temp_syncobj;
 	} else {
 		syncobj_dst = &fence->syncobj;
 	}
 
 	switch(pImportFenceFdInfo->handleType) {
-		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
 			return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
-		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
 			return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
 		default:
 			unreachable("Unhandled fence handle type");
@@ -4894,10 +5009,10 @@
 		syncobj_handle = fence->syncobj;
 
 	switch(pGetFdInfo->handleType) {
-	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
 		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
 		break;
-	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
 		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
 		if (!ret) {
 			if (fence->temp_syncobj) {
@@ -4913,24 +5028,24 @@
 	}
 
 	if (ret)
-		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
 	return VK_SUCCESS;
 }
 
 void radv_GetPhysicalDeviceExternalFenceProperties(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
-	VkExternalFencePropertiesKHR*           pExternalFenceProperties)
+	const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
+	VkExternalFenceProperties               *pExternalFenceProperties)
 {
 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 
 	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
-	    (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
-	     pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
-		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
-		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
-		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
-			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+	    (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT || 
+	     pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
+		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
+			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
 	} else {
 		pExternalFenceProperties->exportFromImportedHandleTypes = 0;
 		pExternalFenceProperties->compatibleHandleTypes = 0;
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_extensions.py mesa-19.0.1/src/amd/vulkan/radv_extensions.py
--- mesa-18.3.3/src/amd/vulkan/radv_extensions.py	2019-01-13 21:16:37.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_extensions.py	2019-03-31 23:16:37.000000000 +0000
@@ -31,7 +31,7 @@
 
 from mako.template import Template
 
-MAX_API_VERSION = '1.1.70'
+MAX_API_VERSION = '1.1.90'
 
 class Extension:
     def __init__(self, name, ext_version, enable):
@@ -51,7 +51,7 @@
 # and dEQP-VK.api.info.device fail due to the duplicated strings.
 EXTENSIONS = [
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
-    Extension('VK_KHR_16bit_storage',                     1, 'HAVE_LLVM >= 0x0700'),
+    Extension('VK_KHR_16bit_storage',                     1, True),
     Extension('VK_KHR_bind_memory2',                      1, True),
     Extension('VK_KHR_create_renderpass2',                1, True),
     Extension('VK_KHR_dedicated_allocation',              1, True),
@@ -105,8 +105,11 @@
     Extension('VK_EXT_external_memory_dma_buf',           1, True),
     Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
     Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
-    Extension('VK_EXT_pci_bus_info',                      1, False),
+    Extension('VK_EXT_memory_budget',                     1, True),
+    Extension('VK_EXT_memory_priority',                   1, True),
+    Extension('VK_EXT_pci_bus_info',                      2, True),
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
+    Extension('VK_EXT_scalar_block_layout',               1, 'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
     Extension('VK_EXT_shader_stencil_export',             1, True),
     Extension('VK_EXT_transform_feedback',                1, True),
@@ -182,6 +185,32 @@
 
         ext = ext_name_map[ext_name]
         ext.type = ext_elem.attrib['type']
+        ext.promotedto = ext_elem.attrib.get('promotedto', None)
+        try:
+            ext.requires = ext_elem.attrib['requires'].split(',')
+        except KeyError:
+            ext.requires = []
+
+    def extra_deps(ext):
+        if ext.type == 'instance':
+            check = 'instance->enabled_extensions.{}'.format(ext.name[3:])
+            if ext.promotedto is not None:
+                # the xml contains values like VK_VERSION_1_1, but we need to
+                # translate them to VK_API_VERSION_1_1 for the apiVersion check
+                api_ver = ext.promotedto.replace('VK_VER', 'VK_API_VER')
+                check = '({} || instance->apiVersion >= {})'.format(check, api_ver)
+            return set([check])
+
+        deps = set()
+        for dep in ext.requires:
+            deps |= extra_deps(ext_name_map[dep])
+
+        return deps
+
+    for ext in EXTENSIONS:
+        if ext.type == 'device':
+            for dep in extra_deps(ext):
+                ext.enable += ' && ' + dep
 
 _TEMPLATE_H = Template(COPYRIGHT + """
 #ifndef RADV_EXTENSIONS_H
@@ -276,6 +305,7 @@
 void radv_fill_device_extension_table(const struct radv_physical_device *device,
                                       struct radv_device_extension_table* table)
 {
+   const struct radv_instance *instance = device->instance;
 %for ext in device_extensions:
    table->${ext.name[3:]} = ${ext.enable};
 %endfor
@@ -292,7 +322,7 @@
 radv_physical_device_api_version(struct radv_physical_device *dev)
 {
     if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
-        return VK_MAKE_VERSION(1, 1, 70);
+        return ${MAX_API_VERSION.c_vk_version()};
     return VK_MAKE_VERSION(1, 0, 68);
 }
 """)
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_formats.c mesa-19.0.1/src/amd/vulkan/radv_formats.c
--- mesa-18.3.3/src/amd/vulkan/radv_formats.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_formats.c	2019-03-31 23:16:37.000000000 +0000
@@ -595,6 +595,14 @@
 	}
 }
 
+bool
+radv_device_supports_etc(struct radv_physical_device *physical_device)
+{
+	return physical_device->rad_info.family == CHIP_VEGA10 ||
+	       physical_device->rad_info.family == CHIP_RAVEN ||
+	       physical_device->rad_info.family == CHIP_STONEY;
+}
+
 static void
 radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
 					   VkFormat format,
@@ -612,9 +620,7 @@
 	}
 
 	if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
-	    physical_device->rad_info.family != CHIP_VEGA10 &&
-	    physical_device->rad_info.family != CHIP_RAVEN &&
-	    physical_device->rad_info.family != CHIP_STONEY) {
+	    !radv_device_supports_etc(physical_device)) {
 		out_properties->linearTilingFeatures = linear;
 		out_properties->optimalTilingFeatures = tiled;
 		out_properties->bufferFeatures = buffer;
@@ -639,8 +645,8 @@
 			tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
 			tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
 			         VK_FORMAT_FEATURE_BLIT_DST_BIT;
-			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
-			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
+			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
 
 			if (radv_is_filter_minmax_format_supported(format))
 				 tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;
@@ -684,8 +690,8 @@
 			}
 		}
 		if (tiled && !scaled) {
-			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
-			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
+			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
 		}
 
 		/* Tiled formatting does not support NPOT pixel sizes */
@@ -694,8 +700,8 @@
 	}
 
 	if (linear && !scaled) {
-		linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
-		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
+		linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
 	}
 
 	if (format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT) {
@@ -1026,7 +1032,7 @@
 void radv_GetPhysicalDeviceFormatProperties2(
 	VkPhysicalDevice                            physicalDevice,
 	VkFormat                                    format,
-	VkFormatProperties2KHR*                         pFormatProperties)
+	VkFormatProperties2*                        pFormatProperties)
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
@@ -1036,7 +1042,7 @@
 }
 
 static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device,
-						 const VkPhysicalDeviceImageFormatInfo2KHR *info,
+						 const VkPhysicalDeviceImageFormatInfo2 *info,
 						 VkImageFormatProperties *pImageFormatProperties)
 
 {
@@ -1093,8 +1099,7 @@
 	    info->type == VK_IMAGE_TYPE_2D &&
 	    (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
 				     VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
-	    !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
-	    !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+	    !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
 		sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
 	}
 
@@ -1112,6 +1117,18 @@
 		maxMipLevels = 1;
 	}
 
+
+	/* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
+	if (physical_device->rad_info.chip_class >= GFX9 &&
+	    info->type == VK_IMAGE_TYPE_3D &&
+	    vk_format_get_blocksizebits(info->format) == 128 &&
+	    vk_format_is_compressed(info->format) &&
+	    (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
+	    ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
+	     (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) {
+		goto unsupported;
+	}
+
 	if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
 			goto unsupported;
@@ -1191,8 +1208,8 @@
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
-	const VkPhysicalDeviceImageFormatInfo2KHR info = {
-		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+	const VkPhysicalDeviceImageFormatInfo2 info = {
+		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
 		.pNext = NULL,
 		.format = format,
 		.type = type,
@@ -1206,20 +1223,20 @@
 }
 
 static void
-get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo,
-				     VkExternalMemoryHandleTypeFlagBitsKHR handleType,
-				     VkExternalMemoryPropertiesKHR *external_properties)
-{
-	VkExternalMemoryFeatureFlagBitsKHR flags = 0;
-	VkExternalMemoryHandleTypeFlagsKHR export_flags = 0;
-	VkExternalMemoryHandleTypeFlagsKHR compat_flags = 0;
+get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+				     VkExternalMemoryHandleTypeFlagBits handleType,
+				     VkExternalMemoryProperties *external_properties)
+{
+	VkExternalMemoryFeatureFlagBits flags = 0;
+	VkExternalMemoryHandleTypeFlags export_flags = 0;
+	VkExternalMemoryHandleTypeFlags compat_flags = 0;
 	switch (handleType) {
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
 		switch (pImageFormatInfo->type) {
 		case VK_IMAGE_TYPE_2D:
-			flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHR|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR;
-			compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR |
+			flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+			compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
 						      VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
 			break;
 		default:
@@ -1227,14 +1244,14 @@
 		}
 		break;
 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR;
+		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
 		compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
 		break;
 	default:
 		break;
 	}
 
-	*external_properties = (VkExternalMemoryPropertiesKHR) {
+	*external_properties = (VkExternalMemoryProperties) {
 		.externalMemoryFeatures = flags,
 		.exportFromImportedHandleTypes = export_flags,
 		.compatibleHandleTypes = compat_flags,
@@ -1243,12 +1260,12 @@
 
 VkResult radv_GetPhysicalDeviceImageFormatProperties2(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceImageFormatInfo2KHR  *base_info,
-	VkImageFormatProperties2KHR                *base_props)
+	const VkPhysicalDeviceImageFormatInfo2     *base_info,
+	VkImageFormatProperties2                   *base_props)
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-	const VkPhysicalDeviceExternalImageFormatInfoKHR *external_info = NULL;
-	VkExternalImageFormatPropertiesKHR *external_props = NULL;
+	const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+	VkExternalImageFormatProperties *external_props = NULL;
 	VkResult result;
 
 	result = radv_get_image_format_properties(physical_device, base_info,
@@ -1259,7 +1276,7 @@
 	   /* Extract input structs */
 	vk_foreach_struct_const(s, base_info->pNext) {
 		switch (s->sType) {
-		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR:
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
 			external_info = (const void *) s;
 			break;
 		default:
@@ -1270,7 +1287,7 @@
 	/* Extract output structs */
 	vk_foreach_struct(s, base_props->pNext) {
 		switch (s->sType) {
-		case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR:
+		case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
 			external_props = (void *) s;
 			break;
 		default:
@@ -1278,26 +1295,26 @@
 		}
 	}
 
-	/* From the Vulkan 1.0.42 spec:
+	/* From the Vulkan 1.0.97 spec:
 	 *
-	 *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2KHR will
-	 *    behave as if VkPhysicalDeviceExternalImageFormatInfoKHR was not
-	 *    present and VkExternalImageFormatPropertiesKHR will be ignored.
+	 *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
+	 *    behave as if VkPhysicalDeviceExternalImageFormatInfo was not
+	 *    present and VkExternalImageFormatProperties will be ignored.
 	 */
 	if (external_info && external_info->handleType != 0) {
 		switch (external_info->handleType) {
-		case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+		case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
 		case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
 		case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
 			get_external_image_format_properties(base_info, external_info->handleType,
 			                                     &external_props->externalMemoryProperties);
 			break;
 		default:
-			/* From the Vulkan 1.0.42 spec:
+			/* From the Vulkan 1.0.97 spec:
 			 *
 			 *    If handleType is not compatible with the [parameters] specified
-			 *    in VkPhysicalDeviceImageFormatInfo2KHR, then
-			 *    vkGetPhysicalDeviceImageFormatProperties2KHR returns
+			 *    in VkPhysicalDeviceImageFormatInfo2, then
+			 *    vkGetPhysicalDeviceImageFormatProperties2 returns
 			 *    VK_ERROR_FORMAT_NOT_SUPPORTED.
 			 */
 			result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
@@ -1311,10 +1328,10 @@
 
 fail:
 	if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
-		/* From the Vulkan 1.0.42 spec:
+		/* From the Vulkan 1.0.97 spec:
 		 *
 		 *    If the combination of parameters to
-		 *    vkGetPhysicalDeviceImageFormatProperties2KHR is not supported by
+		 *    vkGetPhysicalDeviceImageFormatProperties2 is not supported by
 		 *    the implementation for use in vkCreateImage, then all members of
 		 *    imageFormatProperties will be filled with zero.
 		 */
@@ -1340,9 +1357,9 @@
 
 void radv_GetPhysicalDeviceSparseImageFormatProperties2(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo,
+	const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
 	uint32_t                                   *pPropertyCount,
-	VkSparseImageFormatProperties2KHR*          pProperties)
+	VkSparseImageFormatProperties2             *pProperties)
 {
 	/* Sparse images are not yet supported. */
 	*pPropertyCount = 0;
@@ -1350,28 +1367,28 @@
 
 void radv_GetPhysicalDeviceExternalBufferProperties(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceExternalBufferInfoKHR *pExternalBufferInfo,
-	VkExternalBufferPropertiesKHR               *pExternalBufferProperties)
+	const VkPhysicalDeviceExternalBufferInfo    *pExternalBufferInfo,
+	VkExternalBufferProperties                  *pExternalBufferProperties)
 {
-	VkExternalMemoryFeatureFlagBitsKHR flags = 0;
-	VkExternalMemoryHandleTypeFlagsKHR export_flags = 0;
-	VkExternalMemoryHandleTypeFlagsKHR compat_flags = 0;
+	VkExternalMemoryFeatureFlagBits flags = 0;
+	VkExternalMemoryHandleTypeFlags export_flags = 0;
+	VkExternalMemoryHandleTypeFlags compat_flags = 0;
 	switch(pExternalBufferInfo->handleType) {
-	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR |
-		        VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR;
-		compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR |
+		flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+		        VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+		compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
 					      VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
 		break;
 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
-		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR;
+		flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
 		compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
 		break;
 	default:
 		break;
 	}
-	pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryPropertiesKHR) {
+	pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties) {
 		.externalMemoryFeatures = flags,
 		.exportFromImportedHandleTypes = export_flags,
 		.compatibleHandleTypes = compat_flags,
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_image.c mesa-19.0.1/src/amd/vulkan/radv_image.c
--- mesa-18.3.3/src/amd/vulkan/radv_image.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_image.c	2019-03-31 23:16:37.000000000 +0000
@@ -73,7 +73,7 @@
 		return false;
 
 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
-	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
+	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
 		return false;
 
 	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@@ -122,13 +122,12 @@
 
 static bool
 radv_use_dcc_for_image(struct radv_device *device,
+		       const struct radv_image *image,
 		       const struct radv_image_create_info *create_info,
 		       const VkImageCreateInfo *pCreateInfo)
 {
 	bool dcc_compatible_formats;
 	bool blendable;
-	bool shareable = vk_find_struct_const(pCreateInfo->pNext,
-	                                      EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
 
 	/* DCC (Delta Color Compression) is only available for GFX8+. */
 	if (device->physical_device->rad_info.chip_class < VI)
@@ -139,12 +138,12 @@
 
 	/* FIXME: DCC is broken for shareable images starting with GFX9 */
 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
-	    shareable)
+	    image->shareable)
 		return false;
 
 	/* TODO: Enable DCC for storage images. */
 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
-	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
+	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
 		return false;
 
 	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@@ -198,6 +197,7 @@
 
 static int
 radv_init_surface(struct radv_device *device,
+		  const struct radv_image *image,
 		  struct radeon_surf *surface,
 		  const struct radv_image_create_info *create_info)
 {
@@ -249,9 +249,15 @@
 	if (is_stencil)
 		surface->flags |= RADEON_SURF_SBUFFER;
 
+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
+	    vk_format_get_blocksizebits(pCreateInfo->format) == 128 &&
+	    vk_format_is_compressed(pCreateInfo->format))
+		surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
+
 	surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
-	if (!radv_use_dcc_for_image(device, create_info, pCreateInfo))
+	if (!radv_use_dcc_for_image(device, image, create_info, pCreateInfo))
 		surface->flags |= RADEON_SURF_DISABLE_DCC;
 
 	if (create_info->scanout)
@@ -857,8 +863,9 @@
 	image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
 	/* + 16 for storing the clear values + dcc pred */
 	image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
-	image->dcc_pred_offset = image->clear_value_offset + 8;
-	image->size = image->dcc_offset + image->surface.dcc_size + 16;
+	image->fce_pred_offset = image->clear_value_offset + 8;
+	image->dcc_pred_offset = image->clear_value_offset + 16;
+	image->size = image->dcc_offset + image->surface.dcc_size + 24;
 	image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }
 
@@ -931,8 +938,8 @@
 static inline bool
 radv_image_can_enable_htile(struct radv_image *image)
 {
-	return image->info.levels == 1 &&
-	       vk_format_is_depth(image->vk_format) &&
+	return radv_image_has_htile(image) &&
+	       image->info.levels == 1 &&
 	       image->info.width * image->info.height >= 8 * 8;
 }
 
@@ -977,19 +984,19 @@
 	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
 	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
 		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
-			if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
+			if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL)
 				image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
 			else
 				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
 	}
 
 	image->shareable = vk_find_struct_const(pCreateInfo->pNext,
-	                                        EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
+	                                        EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
 	if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
 		image->info.surf_index = &device->image_mrt_offset_counter;
 	}
 
-	radv_init_surface(device, &image->surface, create_info);
+	radv_init_surface(device, image, &image->surface, create_info);
 
 	device->ws->surface_init(device->ws, &image->info, &image->surface);
 
@@ -1039,7 +1046,7 @@
 		image->offset = 0;
 
 		image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
-		                                      0, RADEON_FLAG_VIRTUAL);
+		                                      0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
 		if (!image->bo) {
 			vk_free2(&device->alloc, alloc, image);
 			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -1257,7 +1264,7 @@
 {
 	if (!image->exclusive)
 		return image->queue_family_mask;
-	if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
+	if (family == VK_QUEUE_FAMILY_EXTERNAL)
 		return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
 	if (family == VK_QUEUE_FAMILY_IGNORED)
 		return 1u << queue_family;
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_llvm_helper.cpp mesa-19.0.1/src/amd/vulkan/radv_llvm_helper.cpp
--- mesa-18.3.3/src/amd/vulkan/radv_llvm_helper.cpp	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_llvm_helper.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,6 @@
 	bool init(void)
 	{
 		if (!ac_init_llvm_compiler(&llvm_info,
-					  true,
 					  family,
 					  tm_options))
 			return false;
@@ -99,7 +98,6 @@
 }
 
 bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
-			     bool okay_to_leak_target_library_info,
 			     bool thread_compiler,
 			     enum radeon_family family,
 			     enum ac_target_machine_options tm_options)
@@ -125,7 +123,6 @@
 	}
 
 	if (!ac_init_llvm_compiler(info,
-				   okay_to_leak_target_library_info,
 				   family,
 				   tm_options))
 		return false;
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_blit.c mesa-19.0.1/src/amd/vulkan/radv_meta_blit.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_blit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_blit.c	2019-03-31 23:16:37.000000000 +0000
@@ -849,54 +849,60 @@
 		.subpass = 0,
 	};
 
-	switch(aspect) {
-	case VK_IMAGE_ASPECT_COLOR_BIT:
-		vk_pipeline_info.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 1,
-			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-				{ .colorWriteMask =
-				VK_COLOR_COMPONENT_A_BIT |
-				VK_COLOR_COMPONENT_R_BIT |
-				VK_COLOR_COMPONENT_G_BIT |
-				VK_COLOR_COMPONENT_B_BIT },
+	VkPipelineColorBlendStateCreateInfo color_blend_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+		.attachmentCount = 1,
+		.pAttachments = (VkPipelineColorBlendAttachmentState []) {
+			{
+				.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
+						  VK_COLOR_COMPONENT_R_BIT |
+						  VK_COLOR_COMPONENT_G_BIT |
+						  VK_COLOR_COMPONENT_B_BIT },
 			}
 		};
+
+	VkPipelineDepthStencilStateCreateInfo depth_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+		.depthTestEnable = true,
+		.depthWriteEnable = true,
+		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
+	};
+
+	VkPipelineDepthStencilStateCreateInfo stencil_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+		.depthTestEnable = false,
+		.depthWriteEnable = false,
+		.stencilTestEnable = true,
+		.front = {
+			.failOp = VK_STENCIL_OP_REPLACE,
+			.passOp = VK_STENCIL_OP_REPLACE,
+			.depthFailOp = VK_STENCIL_OP_REPLACE,
+			.compareOp = VK_COMPARE_OP_ALWAYS,
+			.compareMask = 0xff,
+			.writeMask = 0xff,
+			.reference = 0
+		},
+		.back = {
+			.failOp = VK_STENCIL_OP_REPLACE,
+			.passOp = VK_STENCIL_OP_REPLACE,
+			.depthFailOp = VK_STENCIL_OP_REPLACE,
+			.compareOp = VK_COMPARE_OP_ALWAYS,
+			.compareMask = 0xff,
+			.writeMask = 0xff,
+			.reference = 0
+		},
+		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
+	};
+
+	switch(aspect) {
+	case VK_IMAGE_ASPECT_COLOR_BIT:
+		vk_pipeline_info.pColorBlendState = &color_blend_info;
 		break;
 	case VK_IMAGE_ASPECT_DEPTH_BIT:
-		vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = true,
-			.depthWriteEnable = true,
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		};
+		vk_pipeline_info.pDepthStencilState = &depth_info;
 		break;
 	case VK_IMAGE_ASPECT_STENCIL_BIT:
-		vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = false,
-			.depthWriteEnable = false,
-			.stencilTestEnable = true,
-			.front = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.back = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		};
+		vk_pipeline_info.pDepthStencilState = &stencil_info;
 		break;
 	default:
 		unreachable("Unhandled aspect");
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_buffer.c mesa-19.0.1/src/amd/vulkan/radv_meta_buffer.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_buffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -15,8 +15,8 @@
 	b.shader->info.cs.local_size[1] = 1;
 	b.shader->info.cs.local_size[2] = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -67,8 +67,8 @@
 	b.shader->info.cs.local_size[1] = 1;
 	b.shader->info.cs.local_size[2] = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -522,7 +522,7 @@
 
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
 		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
-		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
+		                                V_370_MEM : V_370_MEM_GRBM) |
 		                            S_370_WR_CONFIRM(1) |
 		                            S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cmd_buffer->cs, va);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_bufimage.c mesa-19.0.1/src/amd/vulkan/radv_meta_bufimage.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_bufimage.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_bufimage.c	2019-03-31 23:16:37.000000000 +0000
@@ -60,8 +60,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -289,8 +289,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -511,8 +511,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -719,8 +719,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -932,8 +932,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -1139,8 +1139,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 0;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -1331,8 +1331,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 0;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -1593,7 +1593,7 @@
 			  }, NULL, buffer);
 
 	radv_BindBufferMemory2(radv_device_to_handle(device), 1,
-			       (VkBindBufferMemoryInfoKHR[]) {
+			       (VkBindBufferMemoryInfo[]) {
 				    {
 					.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
 					.buffer = *buffer,
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta.c mesa-19.0.1/src/amd/vulkan/radv_meta.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta.c	2019-03-31 23:16:37.000000000 +0000
@@ -389,8 +389,15 @@
 	result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_resolve_fragment;
+
+	result = radv_device_init_meta_fmask_expand_state(device);
+	if (result != VK_SUCCESS)
+		goto fail_fmask_expand;
+
 	return VK_SUCCESS;
 
+fail_fmask_expand:
+	radv_device_finish_meta_resolve_fragment_state(device);
 fail_resolve_fragment:
 	radv_device_finish_meta_resolve_compute_state(device);
 fail_resolve_compute:
@@ -431,6 +438,7 @@
 	radv_device_finish_meta_fast_clear_flush_state(device);
 	radv_device_finish_meta_resolve_compute_state(device);
 	radv_device_finish_meta_resolve_fragment_state(device);
+	radv_device_finish_meta_fmask_expand_state(device);
 
 	radv_store_meta_pipeline(device);
 	radv_pipeline_cache_finish(&device->meta_state.cache);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_clear.c mesa-19.0.1/src/amd/vulkan/radv_meta_clear.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_clear.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_clear.c	2019-03-31 23:16:37.000000000 +0000
@@ -81,8 +81,8 @@
 				    "v_layer");
 	vs_out_layer->data.location = VARYING_SLOT_LAYER;
 	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
-	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
-	nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
+	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
 
 	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
 	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
@@ -303,6 +303,22 @@
 	return result;
 }
 
+static void
+finish_meta_clear_htile_mask_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->clear_htile_mask_pipeline,
+			     &state->alloc);
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->clear_htile_mask_p_layout,
+				   &state->alloc);
+	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+					state->clear_htile_mask_ds_layout,
+					&state->alloc);
+}
+
 void
 radv_device_finish_meta_clear_state(struct radv_device *device)
 {
@@ -339,6 +355,8 @@
 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
 				   state->clear_depth_p_layout,
 				   &state->alloc);
+
+	finish_meta_clear_htile_mask_state(device);
 }
 
 static void
@@ -352,14 +370,29 @@
 	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
 	const uint32_t subpass_att = clear_att->colorAttachment;
 	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	const uint32_t samples = iview->image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
-	unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
+	const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+	uint32_t samples, samples_log2;
+	VkFormat format;
+	unsigned fs_key;
 	VkClearColorValue clear_value = clear_att->clearValue.color;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
 	VkPipeline pipeline;
 
+	/* When a framebuffer is bound to the current command buffer, get the
+	 * number of samples from it. Otherwise, get the number of samples from
+	 * the render pass because it's likely a secondary command buffer.
+	 */
+	if (iview) {
+		samples = iview->image->info.samples;
+		format = iview->vk_format;
+	} else {
+		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+		format = cmd_buffer->state.pass->attachments[pass_att].format;
+	}
+
+	samples_log2 = ffs(samples) - 1;
+	fs_key = radv_format_meta_fs_key(format);
+
 	if (fs_key == -1) {
 		radv_finishme("color clears incomplete");
 		return;
@@ -470,8 +503,8 @@
 				    "v_layer");
 	vs_out_layer->data.location = VARYING_SLOT_LAYER;
 	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
-	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
-	nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0);
+	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
 
 	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
 	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
@@ -599,6 +632,9 @@
 				      const VkClearRect *clear_rect,
 				      VkClearDepthStencilValue clear_value)
 {
+	if (!iview)
+		return false;
+
 	uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
 	                                                   cmd_buffer->queue_family_index,
 	                                                   cmd_buffer->queue_family_index);
@@ -615,7 +651,7 @@
 	    iview->base_mip == 0 &&
 	    iview->base_layer == 0 &&
 	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
-	    !radv_image_extent_compare(iview->image, &iview->extent))
+	    radv_image_extent_compare(iview->image, &iview->extent))
 		return true;
 	return false;
 }
@@ -677,7 +713,8 @@
 static void
 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
                         const VkClearAttachment *clear_att,
-                        const VkClearRect *clear_rect)
+                        const VkClearRect *clear_rect,
+                        uint32_t view_mask)
 {
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_state *meta_state = &device->meta_state;
@@ -686,11 +723,22 @@
 	const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
 	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
 	VkImageAspectFlags aspects = clear_att->aspectMask;
-	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	const uint32_t samples = iview->image->info.samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;
+	const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+	uint32_t samples, samples_log2;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
 
+	/* When a framebuffer is bound to the current command buffer, get the
+	 * number of samples from it. Otherwise, get the number of samples from
+	 * the render pass because it's likely a secondary command buffer.
+	 */
+	if (iview) {
+		samples = iview->image->info.samples;
+	} else {
+		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+	}
+
+	samples_log2 = ffs(samples) - 1;
+
 	assert(pass_att != VK_ATTACHMENT_UNUSED);
 
 	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
@@ -738,7 +786,13 @@
 
 	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
 
-	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+	if (view_mask) {
+		unsigned i;
+		for_each_bit(i, view_mask)
+			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+	} else {
+		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+	}
 
 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
 		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
@@ -746,94 +800,396 @@
 	}
 }
 
+static uint32_t
+clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
+		 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
+		 uint32_t htile_value, uint32_t htile_mask)
+{
+	struct radv_device *device = cmd_buffer->device;
+	struct radv_meta_state *state = &device->meta_state;
+	uint64_t block_count = round_up_u64(size, 1024);
+	struct radv_meta_saved_state saved_state;
+
+	radv_meta_save(&saved_state, cmd_buffer,
+		       RADV_META_SAVE_COMPUTE_PIPELINE |
+		       RADV_META_SAVE_CONSTANTS |
+		       RADV_META_SAVE_DESCRIPTORS);
+
+	struct radv_buffer dst_buffer = {
+		.bo = bo,
+		.offset = offset,
+		.size = size
+	};
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE,
+			     state->clear_htile_mask_pipeline);
+
+	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+			              state->clear_htile_mask_p_layout,
+				      0, /* set */
+				      1, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+				              {
+				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+				                      .dstBinding = 0,
+				                      .dstArrayElement = 0,
+				                      .descriptorCount = 1,
+				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
+				                              .buffer = radv_buffer_to_handle(&dst_buffer),
+				                              .offset = 0,
+				                              .range = size
+				                      }
+				              }
+				      });
+
+	const unsigned constants[2] = {
+		htile_value & htile_mask,
+		~htile_mask,
+	};
+
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      state->clear_htile_mask_p_layout,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
+			      constants);
+
+	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+
+	return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	       RADV_CMD_FLAG_INV_VMEM_L1 |
+	       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+}
+
+static uint32_t
+radv_get_htile_fast_clear_value(const struct radv_image *image,
+				VkClearDepthStencilValue value)
+{
+	uint32_t clear_value;
+
+	if (!image->surface.has_stencil) {
+		clear_value = value.depth ? 0xfffffff0 : 0;
+	} else {
+		clear_value = value.depth ? 0xfffc0000 : 0;
+	}
+
+	return clear_value;
+}
+
+static uint32_t
+radv_get_htile_mask(const struct radv_image *image, VkImageAspectFlags aspects)
+{
+	uint32_t mask = 0;
+
+	if (!image->surface.has_stencil) {
+		/* All the HTILE buffer is used when there is no stencil. */
+		mask = UINT32_MAX;
+	} else {
+		if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+			mask |= 0xfffffc0f;
+		if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
+			mask |= 0x000003f0;
+	}
+
+	return mask;
+}
+
 static bool
-emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
-		      const VkClearAttachment *clear_att,
-		      const VkClearRect *clear_rect,
-		      enum radv_cmd_flush_bits *pre_flush,
-		      enum radv_cmd_flush_bits *post_flush)
+radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
 {
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
-	VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
-	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
-	VkImageAspectFlags aspects = clear_att->aspectMask;
-	uint32_t clear_word, flush_bits;
+	return value.depth == 1.0f || value.depth == 0.0f;
+}
+
+static bool
+radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
+{
+	return value.stencil == 0;
+}
+
+/**
+ * Determine if the given image can be fast cleared.
+ */
+static bool
+radv_image_can_fast_clear(struct radv_device *device,  struct radv_image *image)
+{
+	if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+		return false;
+
+	if (vk_format_is_color(image->vk_format)) {
+		if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
+			return false;
+
+		/* RB+ doesn't work with CMASK fast clear on Stoney. */
+		if (!radv_image_has_dcc(image) &&
+		    device->physical_device->rad_info.family == CHIP_STONEY)
+			return false;
+	} else {
+		if (!radv_image_has_htile(image))
+			return false;
+	}
 
-	if (!radv_image_has_htile(iview->image))
+	/* Do not fast clears 3D images. */
+	if (image->type == VK_IMAGE_TYPE_3D)
 		return false;
 
-	if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+	return true;
+}
+
+/**
+ * Determine if the given image view can be fast cleared.
+ */
+static bool
+radv_image_view_can_fast_clear(struct radv_device *device,
+			       const struct radv_image_view *iview)
+{
+	struct radv_image *image;
+
+	if (!iview)
 		return false;
+	image = iview->image;
 
-	if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
-		goto fail;
+	/* Only fast clear if the image itself can be fast cleared. */
+	if (!radv_image_can_fast_clear(device, image))
+		return false;
 
-	/* don't fast clear 3D */
-	if (iview->image->type == VK_IMAGE_TYPE_3D)
-		goto fail;
+	/* Only fast clear if all layers are bound. */
+	if (iview->base_layer > 0 ||
+	    iview->layer_count != image->info.array_size)
+		return false;
 
-	/* all layers are bound */
-	if (iview->base_layer > 0)
-		goto fail;
-	if (iview->image->info.array_size != iview->layer_count)
-		goto fail;
+	/* Only fast clear if the view covers the whole image. */
+	if (!radv_image_extent_compare(image, &iview->extent))
+		return false;
 
-	if (!radv_image_extent_compare(iview->image, &iview->extent))
-		goto fail;
+	return true;
+}
+
+static bool
+radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
+			  const struct radv_image_view *iview,
+			  VkImageLayout image_layout,
+			  VkImageAspectFlags aspects,
+			  const VkClearRect *clear_rect,
+			  const VkClearDepthStencilValue clear_value,
+			  uint32_t view_mask)
+{
+	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
+		return false;
+
+	if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
+		return false;
 
 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
 	    clear_rect->rect.extent.width != iview->image->info.width ||
 	    clear_rect->rect.extent.height != iview->image->info.height)
-		goto fail;
+		return false;
 
-	if (clear_rect->baseArrayLayer != 0)
-		goto fail;
-	if (clear_rect->layerCount != iview->image->info.array_size)
-		goto fail;
+	if (view_mask && (iview->image->info.array_size >= 32 ||
+	                 (1u << iview->image->info.array_size) - 1u != view_mask))
+		return false;
+	if (!view_mask && clear_rect->baseArrayLayer != 0)
+		return false;
+	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
+		return false;
 
-	if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || !(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
-		goto fail;
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 &&
+	    (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ||
+	    ((vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+	     !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))))
+		return false;
 
-	/* GFX8 only supports 32-bit depth surfaces but we can enable TC-compat
-	 * HTILE for 16-bit surfaces if no Z planes are compressed. Though,
-	 * fast HTILE clears don't seem to work.
-	 */
-	if (cmd_buffer->device->physical_device->rad_info.chip_class == VI &&
-	    iview->image->vk_format == VK_FORMAT_D16_UNORM)
-		goto fail;
+	if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+	    !radv_is_fast_clear_depth_allowed(clear_value)) ||
+	    ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+	     !radv_is_fast_clear_stencil_allowed(clear_value)))
+		return false;
 
-	if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) {
-		if (clear_value.stencil != 0 || !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))
-			goto fail;
-		clear_word = clear_value.depth ? 0xfffc0000 : 0;
-	} else
-		clear_word = clear_value.depth ? 0xfffffff0 : 0;
+	return true;
+}
+
+static void
+radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
+		      const struct radv_image_view *iview,
+		      const VkClearAttachment *clear_att,
+		      enum radv_cmd_flush_bits *pre_flush,
+		      enum radv_cmd_flush_bits *post_flush)
+{
+	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+	VkImageAspectFlags aspects = clear_att->aspectMask;
+	uint32_t clear_word, flush_bits;
+	uint32_t htile_mask;
+
+	clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
+	htile_mask = radv_get_htile_mask(iview->image, aspects);
 
 	if (pre_flush) {
 		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
 						 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
 		*pre_flush |= cmd_buffer->state.flush_bits;
-	} else
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+	}
 
-	flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
-				      iview->image->offset + iview->image->htile_offset,
-				      iview->image->surface.htile_size, clear_word);
+	if (htile_mask == UINT_MAX) {
+		/* Clear the whole HTILE buffer. */
+		flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
+					      iview->image->offset + iview->image->htile_offset,
+					      iview->image->surface.htile_size, clear_word);
+	} else {
+		/* Only clear depth or stencil bytes in the HTILE buffer. */
+		assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
+		flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo,
+					      iview->image->offset + iview->image->htile_offset,
+					      iview->image->surface.htile_size, clear_word,
+					      htile_mask);
+	}
 
 	radv_update_ds_clear_metadata(cmd_buffer, iview->image, clear_value, aspects);
 	if (post_flush) {
 		*post_flush |= flush_bits;
-	} else {
-		cmd_buffer->state.flush_bits |= flush_bits;
 	}
+}
 
-	return true;
+static nir_shader *
+build_clear_htile_mask_shader()
+{
+	nir_builder b;
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_clear_htile_mask");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;
+
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+	offset = nir_channel(&b, offset, 0);
+
+	nir_intrinsic_instr *buf =
+		nir_intrinsic_instr_create(b.shader,
+					   nir_intrinsic_vulkan_resource_index);
+
+	buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_desc_set(buf, 0);
+	nir_intrinsic_set_binding(buf, 0);
+	nir_ssa_dest_init(&buf->instr, &buf->dest, 1, 32, NULL);
+	nir_builder_instr_insert(&b, &buf->instr);
+
+	nir_intrinsic_instr *constants =
+		nir_intrinsic_instr_create(b.shader,
+					   nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(constants, 0);
+	nir_intrinsic_set_range(constants, 8);
+	constants->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	constants->num_components = 2;
+	nir_ssa_dest_init(&constants->instr, &constants->dest, 2, 32, "constants");
+	nir_builder_instr_insert(&b, &constants->instr);
+
+	nir_intrinsic_instr *load =
+		nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
+	load->src[0] = nir_src_for_ssa(&buf->dest.ssa);
+	load->src[1] = nir_src_for_ssa(offset);
+	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
+	load->num_components = 4;
+	nir_builder_instr_insert(&b, &load->instr);
+
+	/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
+	nir_ssa_def *data =
+		nir_iand(&b, &load->dest.ssa,
+			 nir_channel(&b, &constants->dest.ssa, 1));
+	data = nir_ior(&b, data, nir_channel(&b, &constants->dest.ssa, 0));
+
+	nir_intrinsic_instr *store =
+		nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+	store->src[0] = nir_src_for_ssa(data);
+	store->src[1] = nir_src_for_ssa(&buf->dest.ssa);
+	store->src[2] = nir_src_for_ssa(offset);
+	nir_intrinsic_set_write_mask(store, 0xf);
+	store->num_components = 4;
+	nir_builder_instr_insert(&b, &store->instr);
+
+	return b.shader;
+}
+
+static VkResult
+init_meta_clear_htile_mask_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+	struct radv_shader_module cs = { .nir = NULL };
+	VkResult result;
+
+	cs.nir = build_clear_htile_mask_shader();
+
+	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+		.bindingCount = 1,
+		.pBindings = (VkDescriptorSetLayoutBinding[]) {
+			{
+				.binding = 0,
+				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+		}
+	};
+
+	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+						&ds_layout_info, &state->alloc,
+						&state->clear_htile_mask_ds_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	VkPipelineLayoutCreateInfo p_layout_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 1,
+		.pSetLayouts = &state->clear_htile_mask_ds_layout,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){
+			VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
+		},
+	};
+
+	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					  &p_layout_info, &state->alloc,
+					  &state->clear_htile_mask_p_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	VkPipelineShaderStageCreateInfo shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = shader_stage,
+		.flags = 0,
+		.layout = state->clear_htile_mask_p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&state->cache),
+					     1, &pipeline_info, NULL,
+					     &state->clear_htile_mask_pipeline);
+
+	ralloc_free(cs.nir);
+	return result;
 fail:
-	return false;
+	ralloc_free(cs.nir);
+	return result;
 }
 
 VkResult
@@ -870,6 +1226,10 @@
 	if (res != VK_SUCCESS)
 		goto fail;
 
+	res = init_meta_clear_htile_mask_state(device);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	if (on_demand)
 		return VK_SUCCESS;
 
@@ -961,9 +1321,21 @@
 }
 
 uint32_t
+radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
+		 struct radv_image *image, uint32_t value)
+{
+	return radv_fill_buffer(cmd_buffer, image->bo,
+				image->offset + image->fmask.offset,
+				image->fmask.size, value);
+}
+
+uint32_t
 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
 	       struct radv_image *image, uint32_t value)
 {
+	/* Mark the image as being compressed. */
+	radv_update_dcc_metadata(cmd_buffer, image, true);
+
 	return radv_fill_buffer(cmd_buffer, image->bo,
 				image->offset + image->dcc_offset,
 				image->surface.dcc_size, value);
@@ -1047,88 +1419,42 @@
 }
 
 static bool
-emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
-		      const VkClearAttachment *clear_att,
-		      const VkClearRect *clear_rect,
-		      enum radv_cmd_flush_bits *pre_flush,
-		      enum radv_cmd_flush_bits *post_flush,
-                      uint32_t view_mask)
+radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
+			  const struct radv_image_view *iview,
+			  VkImageLayout image_layout,
+			  const VkClearRect *clear_rect,
+			  VkClearColorValue clear_value,
+			  uint32_t view_mask)
 {
-	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-	const uint32_t subpass_att = clear_att->colorAttachment;
-	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
-	VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
-	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	VkClearColorValue clear_value = clear_att->clearValue.color;
-	uint32_t clear_color[2], flush_bits = 0;
-	uint32_t cmask_clear_value;
-	bool ret;
+	uint32_t clear_color[2];
 
-	if (!radv_image_has_cmask(iview->image) && !radv_image_has_dcc(iview->image))
-		return false;
-
-	if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
 		return false;
 
 	if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
-		goto fail;
-
-	/* don't fast clear 3D */
-	if (iview->image->type == VK_IMAGE_TYPE_3D)
-		goto fail;
-
-	/* all layers are bound */
-	if (iview->base_layer > 0)
-		goto fail;
-	if (iview->image->info.array_size != iview->layer_count)
-		goto fail;
-
-	if (iview->image->info.levels > 1)
-		goto fail;
-
-	if (!radv_image_extent_compare(iview->image, &iview->extent))
-		goto fail;
+		return false;
 
 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
 	    clear_rect->rect.extent.width != iview->image->info.width ||
 	    clear_rect->rect.extent.height != iview->image->info.height)
-		goto fail;
+		return false;
 
 	if (view_mask && (iview->image->info.array_size >= 32 ||
 	                 (1u << iview->image->info.array_size) - 1u != view_mask))
-		goto fail;
+		return false;
 	if (!view_mask && clear_rect->baseArrayLayer != 0)
-		goto fail;
+		return false;
 	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
-		goto fail;
-
-	/* RB+ doesn't work with CMASK fast clear on Stoney. */
-	if (!radv_image_has_dcc(iview->image) &&
-	    cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY)
-		goto fail;
+		return false;
 
 	/* DCC */
-	ret = radv_format_pack_clear_color(iview->vk_format,
-					   clear_color, &clear_value);
-	if (ret == false)
-		goto fail;
-
-	if (pre_flush) {
-		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-						 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
-		*pre_flush |= cmd_buffer->state.flush_bits;
-	} else
-		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-
-	cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
+	if (!radv_format_pack_clear_color(iview->vk_format,
+					  clear_color, &clear_value))
+		return false;
 
-	/* clear cmask buffer */
 	if (radv_image_has_dcc(iview->image)) {
-		uint32_t reset_value;
 		bool can_avoid_fast_clear_elim;
-		bool need_decompress_pass = false;
+		uint32_t reset_value;
 
 		vi_get_fast_clear_parameters(iview->vk_format,
 					     &clear_value, &reset_value,
@@ -1143,10 +1469,48 @@
 			 * CB flushes but that shouldn't matter.
 			 */
 			if (!can_avoid_fast_clear_elim)
-				goto fail;
+				return false;
+		}
+	}
+
+	return true;
+}
+
+
+static void
+radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
+		      const struct radv_image_view *iview,
+		      const VkClearAttachment *clear_att,
+		      uint32_t subpass_att,
+		      enum radv_cmd_flush_bits *pre_flush,
+		      enum radv_cmd_flush_bits *post_flush)
+{
+	VkClearColorValue clear_value = clear_att->clearValue.color;
+	uint32_t clear_color[2], flush_bits = 0;
+	uint32_t cmask_clear_value;
 
-			assert(radv_image_has_cmask(iview->image));
+	if (pre_flush) {
+		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+						 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
+		*pre_flush |= cmd_buffer->state.flush_bits;
+	}
+
+	/* DCC */
+	radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
+
+	cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
 
+	/* clear cmask buffer */
+	if (radv_image_has_dcc(iview->image)) {
+		uint32_t reset_value;
+		bool can_avoid_fast_clear_elim;
+		bool need_decompress_pass = false;
+
+		vi_get_fast_clear_parameters(iview->vk_format,
+					     &clear_value, &reset_value,
+					     &can_avoid_fast_clear_elim);
+
+		if (radv_image_has_cmask(iview->image)) {
 			flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
 						      cmask_clear_value);
 
@@ -1158,8 +1522,8 @@
 
 		flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, reset_value);
 
-		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image,
-						  need_decompress_pass);
+		radv_update_fce_metadata(cmd_buffer, iview->image,
+					 need_decompress_pass);
 	} else {
 		flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
 					      cmask_clear_value);
@@ -1167,16 +1531,10 @@
 
 	if (post_flush) {
 		*post_flush |= flush_bits;
-	} else {
-		cmd_buffer->state.flush_bits |= flush_bits;
 	}
 
 	radv_update_color_clear_metadata(cmd_buffer, iview->image, subpass_att,
 					 clear_color);
-
-	return true;
-fail:
-	return false;
 }
 
 /**
@@ -1190,16 +1548,46 @@
            enum radv_cmd_flush_bits *post_flush,
            uint32_t view_mask)
 {
-	if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-		if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect,
-		                           pre_flush, post_flush, view_mask))
+	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	VkImageAspectFlags aspects = clear_att->aspectMask;
+
+	if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+		const uint32_t subpass_att = clear_att->colorAttachment;
+		const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+		VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
+		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+		VkClearColorValue clear_value = clear_att->clearValue.color;
+
+		if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout,
+					      clear_rect, clear_value, view_mask)) {
+			radv_fast_clear_color(cmd_buffer, iview, clear_att,
+					      subpass_att, pre_flush,
+					      post_flush);
+		} else {
 			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
+		}
 	} else {
-		assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
-						VK_IMAGE_ASPECT_STENCIL_BIT));
-		if (!emit_fast_htile_clear(cmd_buffer, clear_att, clear_rect,
-		                           pre_flush, post_flush))
-			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
+		const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
+		if (pass_att == VK_ATTACHMENT_UNUSED)
+			return;
+
+		VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
+		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
+		VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+
+		assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
+				  VK_IMAGE_ASPECT_STENCIL_BIT));
+
+		if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
+		                              aspects, clear_rect, clear_value,
+		                              view_mask)) {
+			radv_fast_clear_depth(cmd_buffer, iview, clear_att,
+			                      pre_flush, post_flush);
+		} else {
+			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
+			                        view_mask);
+		}
 	}
 }
 
@@ -1441,6 +1829,75 @@
 	radv_DestroyFramebuffer(device_h, fb,
 				&cmd_buffer->pool->alloc);
 }
+
+/**
+ * Return TRUE if a fast color or depth clear has been performed.
+ */
+static bool
+radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
+		      struct radv_image *image,
+		      VkFormat format,
+		      VkImageLayout image_layout,
+		      const VkImageSubresourceRange *range,
+		      const VkClearValue *clear_val)
+{
+	struct radv_image_view iview;
+
+	radv_image_view_init(&iview, cmd_buffer->device,
+			     &(VkImageViewCreateInfo) {
+					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					.image = radv_image_to_handle(image),
+					.viewType = radv_meta_get_view_type(image),
+					.format = image->vk_format,
+					.subresourceRange = {
+					.aspectMask = range->aspectMask,
+					.baseMipLevel = range->baseMipLevel,
+					.levelCount = range->levelCount,
+					.baseArrayLayer = range->baseArrayLayer,
+					.layerCount = range->layerCount,
+				   },
+			     });
+
+	VkClearRect clear_rect = {
+		.rect = {
+			.offset = { 0, 0 },
+			.extent = {
+				radv_minify(image->info.width, range->baseMipLevel),
+				radv_minify(image->info.height, range->baseMipLevel),
+			},
+		},
+		.baseArrayLayer = range->baseArrayLayer,
+		.layerCount = range->layerCount,
+	};
+
+	VkClearAttachment clear_att = {
+		.aspectMask = range->aspectMask,
+		.colorAttachment = 0,
+		.clearValue = *clear_val,
+	};
+
+	if (vk_format_is_color(format)) {
+		if (radv_can_fast_clear_color(cmd_buffer, &iview,
+					      image_layout, &clear_rect,
+					      clear_att.clearValue.color, 0)) {
+			radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
+					      clear_att.colorAttachment,
+					      NULL, NULL);
+			return true;
+		}
+	} else {
+		if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
+					      range->aspectMask, &clear_rect,
+					      clear_att.clearValue.depthStencil, 0)) {
+			radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
+			                      NULL, NULL);
+			return true;
+		}
+	}
+
+	return false;
+}
+
 static void
 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		     struct radv_image *image,
@@ -1468,18 +1925,31 @@
 		internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
 	}
 
+	if (format == VK_FORMAT_R32G32B32_UINT ||
+	    format == VK_FORMAT_R32G32B32_SINT ||
+	    format == VK_FORMAT_R32G32B32_SFLOAT)
+		cs = true;
+
 	for (uint32_t r = 0; r < range_count; r++) {
 		const VkImageSubresourceRange *range = &ranges[r];
+
+		/* Try to perform a fast clear first, otherwise fallback to
+		 * the legacy path.
+		 */
+		if (!cs &&
+		    radv_fast_clear_range(cmd_buffer, image, format,
+					  image_layout, range,
+					  &internal_clear_value)) {
+			continue;
+		}
+
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
 				radv_minify(image->info.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {
 
-				if (cs ||
-				    (format == VK_FORMAT_R32G32B32_UINT ||
-				     format == VK_FORMAT_R32G32B32_SINT ||
-				     format == VK_FORMAT_R32G32B32_SFLOAT)) {
+				if (cs) {
 					struct radv_meta_blit2d_surf surf;
 					surf.format = format;
 					surf.image = image;
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_copy.c mesa-19.0.1/src/amd/vulkan/radv_meta_copy.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_copy.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_copy.c	2019-03-31 23:16:37.000000000 +0000
@@ -107,6 +107,22 @@
 	};
 }
 
+static bool
+image_is_renderable(struct radv_device *device, struct radv_image *image)
+{
+	if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+	    image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+	    image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
+		return false;
+
+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    image->type == VK_IMAGE_TYPE_3D &&
+	    vk_format_get_blocksizebits(image->vk_format) == 128 &&
+	    vk_format_is_compressed(image->vk_format))
+		return false;
+	return true;
+}
+
 static void
 meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                           struct radv_buffer* buffer,
@@ -196,9 +212,7 @@
 
 			/* Perform Blit */
 			if (cs ||
-			    (img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_UINT ||
-			     img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SINT ||
-			     img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)) {
+			    !image_is_renderable(cmd_buffer->device, img_bsurf.image)) {
 				radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
 			} else {
 				radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
@@ -483,9 +497,7 @@
 
 			/* Perform Blit */
 			if (cs ||
-			    (b_src.format == VK_FORMAT_R32G32B32_UINT ||
-			     b_src.format == VK_FORMAT_R32G32B32_SINT ||
-			     b_src.format == VK_FORMAT_R32G32B32_SFLOAT)) {
+			    !image_is_renderable(cmd_buffer->device, b_dst.image)) {
 				radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
 			} else {
 				radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_decompress.c mesa-19.0.1/src/amd/vulkan/radv_meta_decompress.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_decompress.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_decompress.c	2019-03-31 23:16:37.000000000 +0000
@@ -308,34 +308,6 @@
 	return res;
 }
 
-static void
-emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
-		  const VkExtent2D *depth_decomp_extent,
-		  VkPipeline pipeline_h)
-{
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     pipeline_h);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-		.x = 0,
-		.y = 0,
-		.width = depth_decomp_extent->width,
-		.height = depth_decomp_extent->height,
-		.minDepth = 0.0f,
-		.maxDepth = 1.0f
-	});
-
-	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-		.offset = { 0, 0 },
-		.extent = *depth_decomp_extent,
-	});
-
-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-}
-
-
 enum radv_depth_op {
 	DEPTH_DECOMPRESS,
 	DEPTH_RESUMMARIZE,
@@ -388,6 +360,23 @@
 		unreachable("unknown operation");
 	}
 
+	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+			     pipeline_h);
+
+	radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
+		.x = 0,
+		.y = 0,
+		.width = width,
+		.height = height,
+		.minDepth = 0.0f,
+		.maxDepth = 1.0f
+	});
+
+	radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
+		.offset = { 0, 0 },
+		.extent = { width, height },
+	});
+
 	for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
 		struct radv_image_view iview;
 
@@ -442,7 +431,7 @@
 					   },
 					   VK_SUBPASS_CONTENTS_INLINE);
 
-		emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, pipeline_h);
+		radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
 		radv_CmdEndRenderPass(cmd_buffer_h);
 
 		radv_DestroyFramebuffer(device_h, fb_h,
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_fast_clear.c mesa-19.0.1/src/amd/vulkan/radv_meta_fast_clear.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_fast_clear.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_fast_clear.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,8 +58,8 @@
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
@@ -550,43 +550,15 @@
 }
 
 static void
-emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
-		      const VkExtent2D *resolve_extent,
-		      VkPipeline pipeline)
-{
-	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     pipeline);
-
-	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-			.x = 0,
-			.y = 0,
-			.width = resolve_extent->width,
-			.height = resolve_extent->height,
-			.minDepth = 0.0f,
-			.maxDepth = 1.0f
-		});
-
-		radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-			.offset = (VkOffset2D) { 0, 0 },
-			.extent = (VkExtent2D) { resolve_extent->width, resolve_extent->height },
-		});
-
-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-	cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
-}
-
-static void
 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
-				      struct radv_image *image, bool value)
+				      struct radv_image *image, 
+				      uint64_t pred_offset, bool value)
 {
 	uint64_t va = 0;
 
 	if (value) {
 		va = radv_buffer_get_va(image->bo) + image->offset;
-		va += image->dcc_pred_offset;
+		va += pred_offset;
 	}
 
 	si_emit_set_predication_state(cmd_buffer, true, va);
@@ -629,12 +601,33 @@
                pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
 	}
 
-	if (!decompress_dcc && radv_image_has_dcc(image)) {
+	if (radv_image_has_dcc(image)) {
+		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
+							image->fce_pred_offset;
+
 		old_predicating = cmd_buffer->state.predicating;
 
-		radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
+		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
 		cmd_buffer->state.predicating = true;
 	}
+
+	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+			     pipeline);
+
+	radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
+		.x = 0,
+		.y = 0,
+		.width = image->info.width,
+		.height = image->info.height,
+		.minDepth = 0.0f,
+		.maxDepth = 1.0f
+	});
+
+	radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
+		.offset = (VkOffset2D) { 0, 0 },
+		.extent = (VkExtent2D) { image->info.width, image->info.height },
+	});
+
 	for (uint32_t layer = 0; layer < layer_count; ++layer) {
 		struct radv_image_view iview;
 
@@ -688,24 +681,24 @@
 				     },
 				     VK_SUBPASS_CONTENTS_INLINE);
 
-		emit_fast_clear_flush(cmd_buffer,
-				      &(VkExtent2D) { image->info.width, image->info.height },
-				      pipeline);
+		radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+						RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
 		radv_CmdEndRenderPass(cmd_buffer_h);
 
 		radv_DestroyFramebuffer(device_h, fb_h,
 					&cmd_buffer->pool->alloc);
 
 	}
-	if (!decompress_dcc && radv_image_has_dcc(image)) {
-		cmd_buffer->state.predicating = old_predicating;
+	if (radv_image_has_dcc(image)) {
+		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
+							image->fce_pred_offset;
 
-		radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
+		cmd_buffer->state.predicating = old_predicating;
 
-		/* Clear the image's fast-clear eliminate predicate because
-		 * FMASK and DCC also imply a fast-clear eliminate.
-		 */
-		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false);
+		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
 
 		if (cmd_buffer->state.predication_type != -1) {
 			/* Restore previous conditional rendering user state. */
@@ -714,6 +707,18 @@
 						      cmd_buffer->state.predication_va);
 		}
 	}
+
+	if (radv_image_has_dcc(image)) {
+		/* Clear the image's fast-clear eliminate predicate because
+		 * FMASK and DCC also imply a fast-clear eliminate.
+		 */
+		radv_update_fce_metadata(cmd_buffer, image, false);
+
+		/* Mark the image as being decompressed. */
+		if (decompress_dcc)
+			radv_update_dcc_metadata(cmd_buffer, image, false);
+	}
+
 	radv_meta_restore(&saved_state, cmd_buffer);
 }
 
@@ -808,6 +813,9 @@
 
 	radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
 
+	/* Mark this image as actually being decompressed. */
+	radv_update_dcc_metadata(cmd_buffer, image, false);
+
 	/* The fill buffer below does its own saving */
 	radv_meta_restore(&saved_state, cmd_buffer);
 
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_fmask_expand.c mesa-19.0.1/src/amd/vulkan/radv_meta_fmask_expand.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_fmask_expand.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_fmask_expand.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,306 @@
+/*
+ * Copyright © 2019 Valve Corporation
+ * Copyright © 2018 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "radv_meta.h"
+#include "radv_private.h"
+
+static nir_shader *
+build_fmask_expand_compute_shader(struct radv_device *device, int samples)
+{
+	nir_builder b;
+	char name[64];
+	const struct glsl_type *input_img_type =
+		glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false,
+				  GLSL_TYPE_FLOAT);
+	const struct glsl_type *output_img_type =
+		glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false,
+				  GLSL_TYPE_FLOAT);
+
+	snprintf(name, 64, "meta_fmask_expand_cs-%d", samples);
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
+
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      input_img_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+						       output_img_type, "out_img");
+	output_img->data.descriptor_set = 0;
+	output_img->data.binding = 1;
+
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);
+
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+	nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
+
+	nir_tex_instr *tex_instr[8];
+	for (uint32_t i = 0; i < samples; i++) {
+		tex_instr[i] = nir_tex_instr_create(b.shader, 3);
+
+		nir_tex_instr *tex = tex_instr[i];
+		tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+		tex->op = nir_texop_txf_ms;
+		tex->src[0].src_type = nir_tex_src_coord;
+		tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 0x3));
+		tex->src[1].src_type = nir_tex_src_ms_index;
+		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+		tex->src[2].src_type = nir_tex_src_texture_deref;
+		tex->src[2].src = nir_src_for_ssa(input_img_deref);
+		tex->dest_type = nir_type_float;
+		tex->is_array = false;
+		tex->coord_components = 2;
+
+		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+		nir_builder_instr_insert(&b, &tex->instr);
+	}
+
+	for (uint32_t i = 0; i < samples; i++) {
+		nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
+
+		nir_intrinsic_instr *store =
+			nir_intrinsic_instr_create(b.shader,
+						   nir_intrinsic_image_deref_store);
+		store->num_components = 4;
+		store->src[0] = nir_src_for_ssa(output_img_deref);
+		store->src[1] = nir_src_for_ssa(global_id);
+		store->src[2] = nir_src_for_ssa(nir_imm_int(&b, i));
+		store->src[3] = nir_src_for_ssa(outval);
+		nir_builder_instr_insert(&b, &store->instr);
+	}
+
+	return b.shader;
+}
+
+void
+radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
+				struct radv_image *image,
+				const VkImageSubresourceRange *subresourceRange)
+{
+	struct radv_device *device = cmd_buffer->device;
+	struct radv_meta_saved_state saved_state;
+	const uint32_t samples = image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+
+	radv_meta_save(&saved_state, cmd_buffer,
+		       RADV_META_SAVE_COMPUTE_PIPELINE |
+		       RADV_META_SAVE_DESCRIPTORS);
+
+	VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
+
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+
+	for (unsigned l = 0; l < subresourceRange->layerCount; l++) {
+		struct radv_image_view iview;
+
+		radv_image_view_init(&iview, device,
+				     &(VkImageViewCreateInfo) {
+					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					     .image = radv_image_to_handle(image),
+					     .viewType = radv_meta_get_view_type(image),
+					     .format = image->vk_format,
+					     .subresourceRange = {
+						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+						     .baseMipLevel = 0,
+						     .levelCount = 1,
+						     .baseArrayLayer = subresourceRange->baseArrayLayer + l,
+						     .layerCount = 1,
+					     },
+				     });
+
+		radv_meta_push_descriptor_set(cmd_buffer,
+					      VK_PIPELINE_BIND_POINT_COMPUTE,
+					      cmd_buffer->device->meta_state.fmask_expand.p_layout,
+					      0, /* set */
+					      2, /* descriptorWriteCount */
+					      (VkWriteDescriptorSet[]) {
+					      {
+						      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						      .dstBinding = 0,
+						      .dstArrayElement = 0,
+						      .descriptorCount = 1,
+						      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+						      .pImageInfo = (VkDescriptorImageInfo[]) {
+							      {
+								      .sampler = VK_NULL_HANDLE,
+								      .imageView = radv_image_view_to_handle(&iview),
+								      .imageLayout = VK_IMAGE_LAYOUT_GENERAL
+							      },
+						      }
+					      },
+					      {
+						      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						      .dstBinding = 1,
+						      .dstArrayElement = 0,
+						      .descriptorCount = 1,
+						      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+						      .pImageInfo = (VkDescriptorImageInfo[]) {
+							      {
+								      .sampler = VK_NULL_HANDLE,
+								      .imageView = radv_image_view_to_handle(&iview),
+								      .imageLayout = VK_IMAGE_LAYOUT_GENERAL
+							      },
+						      }
+					      }
+					      });
+
+		radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
+	}
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+					RADV_CMD_FLAG_INV_GLOBAL_L2;
+
+	/* Re-initialize FMASK in fully expanded mode. */
+	radv_initialize_fmask(cmd_buffer, image);
+}
+
+void radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+
+	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->fmask_expand.pipeline[i],
+				     &state->alloc);
+	}
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->fmask_expand.p_layout,
+				   &state->alloc);
+
+	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+					state->fmask_expand.ds_layout,
+					&state->alloc);
+}
+
+static VkResult
+create_fmask_expand_pipeline(struct radv_device *device,
+			     int samples,
+			     VkPipeline *pipeline)
+{
+	struct radv_meta_state *state = &device->meta_state;
+	struct radv_shader_module cs = { .nir = NULL };
+	VkResult result;
+
+	cs.nir = build_fmask_expand_compute_shader(device, samples);
+
+	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = pipeline_shader_stage,
+		.flags = 0,
+		.layout = state->fmask_expand.p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&state->cache),
+					     1, &vk_pipeline_info, NULL,
+					     pipeline);
+
+	ralloc_free(cs.nir);
+	return result;
+}
+
+VkResult
+radv_device_init_meta_fmask_expand_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+	VkResult result;
+
+	VkDescriptorSetLayoutCreateInfo ds_create_info = {
+		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+		.bindingCount = 2,
+		.pBindings = (VkDescriptorSetLayoutBinding[]) {
+			{
+				.binding = 0,
+				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+			{
+				.binding = 1,
+				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+				.pImmutableSamplers = NULL
+			},
+		}
+	};
+
+	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+						&ds_create_info, &state->alloc,
+						&state->fmask_expand.ds_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	VkPipelineLayoutCreateInfo color_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 1,
+		.pSetLayouts = &state->fmask_expand.ds_layout,
+		.pushConstantRangeCount = 0,
+		.pPushConstantRanges = NULL,
+	};
+
+	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					   &color_create_info, &state->alloc,
+					   &state->fmask_expand.p_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+		uint32_t samples = 1 << i;
+		result = create_fmask_expand_pipeline(device, samples,
+						      &state->fmask_expand.pipeline[i]);
+		if (result != VK_SUCCESS)
+			goto fail;
+	}
+
+	return VK_SUCCESS;
+fail:
+	radv_device_finish_meta_fmask_expand_state(device);
+	return result;
+}
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta.h mesa-19.0.1/src/amd/vulkan/radv_meta.h
--- mesa-18.3.3/src/amd/vulkan/radv_meta.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta.h	2019-03-31 23:16:37.000000000 +0000
@@ -88,6 +88,9 @@
 VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);
 
+VkResult radv_device_init_meta_fmask_expand_state(struct radv_device *device);
+void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
+
 void radv_meta_save(struct radv_meta_saved_state *saved_state,
 		    struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
 
@@ -174,6 +177,9 @@
 void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
 			struct radv_image *image,
                         const VkImageSubresourceRange *subresourceRange);
+void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
+				     struct radv_image *image,
+				     const VkImageSubresourceRange *subresourceRange);
 
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
@@ -201,6 +207,8 @@
 
 uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
 			  struct radv_image *image, uint32_t value);
+uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_image *image, uint32_t value);
 uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
 			struct radv_image *image, uint32_t value);
 
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_resolve.c mesa-19.0.1/src/amd/vulkan/radv_meta_resolve.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_resolve.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_resolve.c	2019-03-31 23:16:37.000000000 +0000
@@ -456,14 +456,6 @@
 	}
 	assert(dest_image->info.samples == 1);
 
-	if (src_image->info.samples >= 16) {
-		/* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
-		 * glBlitFramebuffer workaround for samples >= 16.
-		 */
-		radv_finishme("vkCmdResolveImage: need interpolation workaround when "
-			      "samples >= 16");
-	}
-
 	if (src_image->info.array_size > 1)
 		radv_finishme("vkCmdResolveImage: multisample array images");
 
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_resolve_cs.c mesa-19.0.1/src/amd/vulkan/radv_meta_resolve_cs.c
--- mesa-18.3.3/src/amd/vulkan/radv_meta_resolve_cs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_meta_resolve_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -99,8 +99,8 @@
 						       img_type, "out_img");
 	output_img->data.descriptor_set = 0;
 	output_img->data.binding = 1;
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 						b.shader->info.cs.local_size[0],
 						b.shader->info.cs.local_size[1],
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_nir_to_llvm.c mesa-19.0.1/src/amd/vulkan/radv_nir_to_llvm.c
--- mesa-18.3.3/src/amd/vulkan/radv_nir_to_llvm.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_nir_to_llvm.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,9 +33,7 @@
 #include <llvm-c/Core.h>
 #include <llvm-c/TargetMachine.h>
 #include <llvm-c/Transforms/Scalar.h>
-#if HAVE_LLVM >= 0x0700
 #include <llvm-c/Transforms/Utils.h>
-#endif
 
 #include "sid.h"
 #include "gfx9d.h"
@@ -94,6 +92,7 @@
 	gl_shader_stage stage;
 
 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
+	uint64_t float16_shaded_mask;
 
 	uint64_t input_mask;
 	uint64_t output_mask;
@@ -435,7 +434,6 @@
 struct arg_info {
 	LLVMTypeRef types[MAX_ARGS];
 	LLVMValueRef *assign[MAX_ARGS];
-	unsigned array_params_mask;
 	uint8_t count;
 	uint8_t sgpr_count;
 	uint8_t num_sgprs_used;
@@ -466,13 +464,6 @@
 	}
 }
 
-static inline void
-add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
-{
-	info->array_params_mask |= (1 << info->count);
-	add_arg(info, ARG_SGPR, type, param_ptr);
-}
-
 static void assign_arguments(LLVMValueRef main_function,
 			     struct arg_info *info)
 {
@@ -511,10 +502,11 @@
 
 	LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
 	for (unsigned i = 0; i < args->sgpr_count; ++i) {
+		LLVMValueRef P = LLVMGetParam(main_function, i);
+
 		ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
 
-		if (args->array_params_mask & (1 << i)) {
-			LLVMValueRef P = LLVMGetParam(main_function, i);
+		if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
 			ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
 			ac_add_attr_dereferenceable(P, UINT64_MAX);
 		}
@@ -555,11 +547,10 @@
 
 static void
 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
-	uint8_t num_sgprs, bool indirect)
+	uint8_t num_sgprs)
 {
 	ud_info->sgpr_idx = *sgpr_idx;
 	ud_info->num_sgprs = num_sgprs;
-	ud_info->indirect = indirect;
 	*sgpr_idx += num_sgprs;
 }
 
@@ -571,31 +562,28 @@
 		&ctx->shader_info->user_sgprs_locs.shader_data[idx];
 	assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, num_sgprs, false);
+	set_loc(ud_info, sgpr_idx, num_sgprs);
 }
 
 static void
 set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 {
-	bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
-				  idx != AC_UD_SCRATCH_RING_OFFSETS;
+	bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
 
 	set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
 }
 
 static void
-set_loc_desc(struct radv_shader_context *ctx, int idx,  uint8_t *sgpr_idx,
-	     bool indirect)
+set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 {
 	struct radv_userdata_locations *locs =
 		&ctx->shader_info->user_sgprs_locs;
 	struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
 	assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect);
+	set_loc(ud_info, sgpr_idx, 1);
 
-	if (!indirect)
-		locs->descriptor_sets_enabled |= 1 << idx;
+	locs->descriptor_sets_enabled |= 1 << idx;
 }
 
 struct user_sgpr_info {
@@ -633,7 +621,7 @@
 	uint8_t count = 0;
 
 	if (ctx->shader_info->info.vs.has_vertex_buffers)
-		count += HAVE_32BIT_POINTERS ? 1 : 2;
+		count++;
 	count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
 
 	return count;
@@ -702,51 +690,46 @@
 		user_sgpr_count++;
 
 	if (ctx->shader_info->info.loads_push_constants)
-		user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
+		user_sgpr_count++;
+
+	if (ctx->streamout_buffers)
+		user_sgpr_count++;
 
 	uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
 	uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
 	uint32_t num_desc_set =
 		util_bitcount(ctx->shader_info->info.desc_set_used_mask);
 
-	if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) {
+	if (remaining_sgprs < num_desc_set) {
 		user_sgpr_info->indirect_all_descriptor_sets = true;
 	}
 }
 
 static void
 declare_global_input_sgprs(struct radv_shader_context *ctx,
-			   gl_shader_stage stage,
-			   bool has_previous_stage,
-			   gl_shader_stage previous_stage,
 			   const struct user_sgpr_info *user_sgpr_info,
 			   struct arg_info *args,
 			   LLVMValueRef *desc_sets)
 {
 	LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
-	unsigned num_sets = ctx->options->layout ?
-			    ctx->options->layout->num_sets : 0;
-	unsigned stage_mask = 1 << stage;
-
-	if (has_previous_stage)
-		stage_mask |= 1 << previous_stage;
 
 	/* 1 for each descriptor set */
 	if (!user_sgpr_info->indirect_all_descriptor_sets) {
-		for (unsigned i = 0; i < num_sets; ++i) {
-			if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
-			    ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
-				add_array_arg(args, type,
-					      &ctx->descriptor_sets[i]);
-			}
+		uint32_t mask = ctx->shader_info->info.desc_set_used_mask;
+
+		while (mask) {
+			int i = u_bit_scan(&mask);
+
+			add_arg(args, ARG_SGPR, type, &ctx->descriptor_sets[i]);
 		}
 	} else {
-		add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
+		add_arg(args, ARG_SGPR, ac_array_in_const32_addr_space(type),
+			desc_sets);
 	}
 
 	if (ctx->shader_info->info.loads_push_constants) {
 		/* 1 for push constants and dynamic descriptors */
-		add_array_arg(args, type, &ctx->abi.push_constants);
+		add_arg(args, ARG_SGPR, type, &ctx->abi.push_constants);
 	}
 
 	if (ctx->shader_info->info.so.num_outputs) {
@@ -835,41 +818,31 @@
 }
 
 static void
-set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage,
-		      bool has_previous_stage, gl_shader_stage previous_stage,
+set_global_input_locs(struct radv_shader_context *ctx,
 		      const struct user_sgpr_info *user_sgpr_info,
 		      LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
 {
-	unsigned num_sets = ctx->options->layout ?
-			    ctx->options->layout->num_sets : 0;
-	unsigned stage_mask = 1 << stage;
-
-	if (has_previous_stage)
-		stage_mask |= 1 << previous_stage;
+	uint32_t mask = ctx->shader_info->info.desc_set_used_mask;
 
 	if (!user_sgpr_info->indirect_all_descriptor_sets) {
-		for (unsigned i = 0; i < num_sets; ++i) {
-			if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
-			    ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
-				set_loc_desc(ctx, i, user_sgpr_idx, false);
-			} else
-				ctx->descriptor_sets[i] = NULL;
+		while (mask) {
+			int i = u_bit_scan(&mask);
+
+			set_loc_desc(ctx, i, user_sgpr_idx);
 		}
 	} else {
 		set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
 			           user_sgpr_idx);
 
-		for (unsigned i = 0; i < num_sets; ++i) {
-			if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
-			    ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
-				ctx->descriptor_sets[i] =
-					ac_build_load_to_sgpr(&ctx->ac,
-							      desc_sets,
-							      LLVMConstInt(ctx->ac.i32, i, false));
+		while (mask) {
+			int i = u_bit_scan(&mask);
+
+			ctx->descriptor_sets[i] =
+				ac_build_load_to_sgpr(&ctx->ac, desc_sets,
+						      LLVMConstInt(ctx->ac.i32, i, false));
 
-			} else
-				ctx->descriptor_sets[i] = NULL;
 		}
+
 		ctx->shader_info->need_indirect_descriptor_sets = true;
 	}
 
@@ -955,9 +928,8 @@
 
 	switch (stage) {
 	case MESA_SHADER_COMPUTE:
-		declare_global_input_sgprs(ctx, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_info,
-					   &args, &desc_sets);
+		declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
+					   &desc_sets);
 
 		if (ctx->shader_info->info.cs.uses_grid_size) {
 			add_arg(&args, ARG_SGPR, ctx->ac.v3i32,
@@ -978,9 +950,9 @@
 			&ctx->abi.local_invocation_ids);
 		break;
 	case MESA_SHADER_VERTEX:
-		declare_global_input_sgprs(ctx, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_info,
-					   &args, &desc_sets);
+		declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
+					   &desc_sets);
+
 		declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
 						previous_stage, &args);
 
@@ -1011,11 +983,9 @@
 			add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
 			add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
 
-			declare_global_input_sgprs(ctx, stage,
-						   has_previous_stage,
-						   previous_stage,
-						   &user_sgpr_info, &args,
+			declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
 						   &desc_sets);
+
 			declare_vs_specific_input_sgprs(ctx, stage,
 							has_previous_stage,
 							previous_stage, &args);
@@ -1031,10 +1001,7 @@
 
 			declare_vs_input_vgprs(ctx, &args);
 		} else {
-			declare_global_input_sgprs(ctx, stage,
-						   has_previous_stage,
-						   previous_stage,
-						   &user_sgpr_info, &args,
+			declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
 						   &desc_sets);
 
 			if (needs_view_index)
@@ -1051,9 +1018,8 @@
 		}
 		break;
 	case MESA_SHADER_TESS_EVAL:
-		declare_global_input_sgprs(ctx, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_info,
-					   &args, &desc_sets);
+		declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
+					   &desc_sets);
 
 		if (needs_view_index)
 			add_arg(&args, ARG_SGPR, ctx->ac.i32,
@@ -1084,10 +1050,7 @@
 			add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
 			add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
 
-			declare_global_input_sgprs(ctx, stage,
-						   has_previous_stage,
-						   previous_stage,
-						   &user_sgpr_info, &args,
+			declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
 						   &desc_sets);
 
 			if (previous_stage != MESA_SHADER_TESS_EVAL) {
@@ -1118,10 +1081,7 @@
 				declare_tes_input_vgprs(ctx, &args);
 			}
 		} else {
-			declare_global_input_sgprs(ctx, stage,
-						   has_previous_stage,
-						   previous_stage,
-						   &user_sgpr_info, &args,
+			declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
 						   &desc_sets);
 
 			if (needs_view_index)
@@ -1149,9 +1109,8 @@
 		}
 		break;
 	case MESA_SHADER_FRAGMENT:
-		declare_global_input_sgprs(ctx, stage, has_previous_stage,
-					   previous_stage, &user_sgpr_info,
-					   &args, &desc_sets);
+		declare_global_input_sgprs(ctx, &user_sgpr_info, &args,
+					   &desc_sets);
 
 		add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask);
 		add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample);
@@ -1210,8 +1169,7 @@
 	if (has_previous_stage)
 		user_sgpr_idx = 0;
 
-	set_global_input_locs(ctx, stage, has_previous_stage, previous_stage,
-			      &user_sgpr_info, desc_sets, &user_sgpr_idx);
+	set_global_input_locs(ctx, &user_sgpr_info, desc_sets, &user_sgpr_idx);
 
 	switch (stage) {
 	case MESA_SHADER_COMPUTE:
@@ -1484,7 +1442,7 @@
 {
 	struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 	const unsigned location = var->data.location;
-	const unsigned component = var->data.location_frac;
+	unsigned component = var->data.location_frac;
 	const bool is_patch = var->data.patch;
 	const bool is_compact = var->data.compact;
 	LLVMValueRef dw_addr;
@@ -1502,10 +1460,14 @@
 	}
 
 	param = shader_io_get_unique_index(location);
-	if (location == VARYING_SLOT_CLIP_DIST0 &&
-	    is_compact && const_index > 3) {
-		const_index -= 3;
-		param++;
+	if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
+		const_index += component;
+		component = 0;
+
+		if (const_index >= 4) {
+			const_index -= 4;
+			param++;
+		}
 	}
 
 	if (!is_patch) {
@@ -1572,9 +1534,13 @@
 	LLVMValueRef result;
 	unsigned param = shader_io_get_unique_index(location);
 
-	if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
-		const_index -= 3;
-		param++;
+	if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) {
+		const_index += component;
+		component = 0;
+		if (const_index >= 4) {
+			const_index -= 4;
+			param++;
+		}
 	}
 
 	buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
@@ -1693,9 +1659,6 @@
 	case 8:
 		sample_pos_offset = 7;
 		break;
-	case 16:
-		sample_pos_offset = 15;
-		break;
 	default:
 		break;
 	}
@@ -2097,6 +2060,7 @@
 			    unsigned attr,
 			    LLVMValueRef interp_param,
 			    LLVMValueRef prim_mask,
+			    bool float16,
 			    LLVMValueRef result[4])
 {
 	LLVMValueRef attr_number;
@@ -2129,7 +2093,12 @@
 	for (chan = 0; chan < 4; chan++) {
 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
 
-		if (interp) {
+		if (interp && float16) {
+			result[chan] = ac_build_fs_interp_f16(&ctx->ac,
+							      llvm_chan,
+							      attr_number,
+							      prim_mask, i, j);
+		} else if (interp) {
 			result[chan] = ac_build_fs_interp(&ctx->ac,
 							  llvm_chan,
 							  attr_number,
@@ -2141,7 +2110,30 @@
 							      attr_number,
 							      prim_mask);
 			result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
-			result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
+			result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, "");
+		}
+	}
+}
+
+static void mark_16bit_fs_input(struct radv_shader_context *ctx,
+                                const struct glsl_type *type,
+                                int location)
+{
+	if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+		unsigned attrib_count = glsl_count_attribute_slots(type, false);
+		if (glsl_type_is_16bit(type)) {
+			ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+		}
+	} else if (glsl_type_is_array(type)) {
+		unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+		for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+			mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride);
+		}
+	} else {
+		assert(glsl_type_is_struct(type));
+		for (unsigned i = 0; i < glsl_get_length(type); i++) {
+			mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location);
+			location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
 		}
 	}
 }
@@ -2156,9 +2148,20 @@
 	uint64_t mask;
 
 	variable->data.driver_location = idx * 4;
+
+
+	if (variable->data.compact) {
+		unsigned component_count = variable->data.location_frac +
+		                           glsl_get_length(variable->type);
+		attrib_count = (component_count + 3) / 4;
+	} else
+		mark_16bit_fs_input(ctx, variable->type, idx);
+
 	mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
-	if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
+	if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT ||
+	    glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT16 ||
+	    glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_STRUCT) {
 		unsigned interp_type;
 		if (variable->data.sample)
 			interp_type = INTERP_SAMPLE;
@@ -2169,22 +2172,12 @@
 
 		interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
 	}
-	bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
-	LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
 	if (interp == NULL)
-		interp = LLVMGetUndef(type);
+		interp = LLVMGetUndef(ctx->ac.i32);
 
 	for (unsigned i = 0; i < attrib_count; ++i)
 		ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
 
-	if (idx == VARYING_SLOT_CLIP_DIST0) {
-		/* Do not account for the number of components inside the array
-		 * of clip/cull distances because this might wrongly set other
-		 * bits like primitive ID or layer.
-		 */
-		mask = 1ull << VARYING_SLOT_CLIP_DIST0;
-	}
-
 	ctx->input_mask |= mask;
 }
 
@@ -2246,11 +2239,14 @@
 		if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
 		    i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
 			interp_param = *inputs;
-			interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
+			bool float16 = (ctx->float16_shaded_mask >> i) & 1;
+			interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16,
 					inputs);
 
 			if (LLVMIsUndef(interp_param))
 				ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
+			if (float16)
+				ctx->shader_info->fs.float16_shaded_mask |= 1u << index;
 			if (i >= VARYING_SLOT_VAR0)
 				ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
 			++index;
@@ -2262,7 +2258,7 @@
 
 				interp_param = *inputs;
 				interp_fs_input(ctx, index, interp_param,
-						ctx->abi.prim_mask, inputs);
+						ctx->abi.prim_mask, false, inputs);
 				++index;
 			}
 		} else if (i == VARYING_SLOT_POS) {
@@ -2296,6 +2292,12 @@
 	if (stage == MESA_SHADER_TESS_CTRL)
 		return;
 
+	if (variable->data.compact) {
+		unsigned component_count = variable->data.location_frac +
+		                           glsl_get_length(variable->type);
+		attrib_count = (component_count + 3) / 4;
+	}
+
 	mask_attribs = ((1ull << attrib_count) - 1) << idx;
 	if (stage == MESA_SHADER_VERTEX ||
 	    stage == MESA_SHADER_TESS_EVAL ||
@@ -2311,8 +2313,6 @@
 				ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
 				ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
 			}
-
-			mask_attribs = 1ull << idx;
 		}
 	}
 
@@ -2411,7 +2411,7 @@
 			if (is_16bit) {
 				for (unsigned chan = 0; chan < 4; chan++)
 					values[chan] = LLVMBuildZExt(ctx->ac.builder,
-								      values[chan],
+								      ac_to_integer(&ctx->ac, values[chan]),
 								      ctx->ac.i32, "");
 			}
 			break;
@@ -2422,7 +2422,7 @@
 			if (is_16bit) {
 				for (unsigned chan = 0; chan < 4; chan++)
 					values[chan] = LLVMBuildSExt(ctx->ac.builder,
-								      values[chan],
+								      ac_to_integer(&ctx->ac, values[chan]),
 								      ctx->ac.i32, "");
 			}
 			break;
@@ -2475,12 +2475,8 @@
 	} else
 		memcpy(&args->out[0], values, sizeof(values[0]) * 4);
 
-	for (unsigned i = 0; i < 4; ++i) {
-		if (!(args->enabled_channels & (1 << i)))
-			continue;
-
+	for (unsigned i = 0; i < 4; ++i)
 		args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
-	}
 }
 
 static void
@@ -2661,51 +2657,41 @@
 	memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
 	       sizeof(outinfo->vs_output_param_offset));
 
-	if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
-		unsigned output_usage_mask, length;
-		LLVMValueRef slots[8];
-		unsigned j;
-
-		if (ctx->stage == MESA_SHADER_VERTEX &&
-		    !ctx->is_gs_copy_shader) {
-			output_usage_mask =
-				ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
-		} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
-			output_usage_mask =
-				ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
-		} else {
-			assert(ctx->is_gs_copy_shader);
-			output_usage_mask =
-				ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
-		}
+	for(unsigned location = VARYING_SLOT_CLIP_DIST0; location <= VARYING_SLOT_CLIP_DIST1; ++location) {
+		if (ctx->output_mask & (1ull << location)) {
+			unsigned output_usage_mask, length;
+			LLVMValueRef slots[4];
+			unsigned j;
+
+			if (ctx->stage == MESA_SHADER_VERTEX &&
+			!ctx->is_gs_copy_shader) {
+				output_usage_mask =
+					ctx->shader_info->info.vs.output_usage_mask[location];
+			} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+				output_usage_mask =
+					ctx->shader_info->info.tes.output_usage_mask[location];
+			} else {
+				assert(ctx->is_gs_copy_shader);
+				output_usage_mask =
+					ctx->shader_info->info.gs.output_usage_mask[location];
+			}
 
-		length = util_last_bit(output_usage_mask);
+			length = util_last_bit(output_usage_mask);
 
-		i = VARYING_SLOT_CLIP_DIST0;
-		for (j = 0; j < length; j++)
-			slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+			for (j = 0; j < length; j++)
+				slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, location, j));
 
-		for (i = length; i < 8; i++)
-			slots[i] = LLVMGetUndef(ctx->ac.f32);
+			for (i = length; i < 4; i++)
+				slots[i] = LLVMGetUndef(ctx->ac.f32);
 
-		if (length > 4) {
-			target = V_008DFC_SQ_EXP_POS + 3;
-			si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args);
+			target = V_008DFC_SQ_EXP_POS + 2 + (location - VARYING_SLOT_CLIP_DIST0);
+			si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
 			memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
-			       &args, sizeof(args));
-		}
+			&args, sizeof(args));
 
-		target = V_008DFC_SQ_EXP_POS + 2;
-		si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
-		memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
-		       &args, sizeof(args));
-
-		/* Export the clip/cull distances values to the next stage. */
-		radv_export_param(ctx, param_count, &slots[0], 0xf);
-		outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++;
-		if (length > 4) {
-			radv_export_param(ctx, param_count, &slots[4], 0xf);
-			outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+			/* Export the clip/cull distances values to the next stage. */
+			radv_export_param(ctx, param_count, &slots[0], 0xf);
+			outinfo->vs_output_param_offset[location] = param_count++;
 		}
 	}
 
@@ -2866,28 +2852,14 @@
 	LLVMValueRef lds_base = NULL;
 
 	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		unsigned output_usage_mask;
 		int param_index;
-		int length = 4;
 
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
 
-		if (ctx->stage == MESA_SHADER_VERTEX) {
-			output_usage_mask =
-				ctx->shader_info->info.vs.output_usage_mask[i];
-		} else {
-			assert(ctx->stage == MESA_SHADER_TESS_EVAL);
-			output_usage_mask =
-				ctx->shader_info->info.tes.output_usage_mask[i];
-		}
-
-		if (i == VARYING_SLOT_CLIP_DIST0)
-			length = util_last_bit(output_usage_mask);
-
 		param_index = shader_io_get_unique_index(i);
 
-		max_output_written = MAX2(param_index + (length > 4), max_output_written);
+		max_output_written = MAX2(param_index, max_output_written);
 	}
 
 	outinfo->esgs_itemsize = (max_output_written + 1) * 16;
@@ -2908,7 +2880,6 @@
 		LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
 		unsigned output_usage_mask;
 		int param_index;
-		int length = 4;
 
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
@@ -2922,9 +2893,6 @@
 				ctx->shader_info->info.tes.output_usage_mask[i];
 		}
 
-		if (i == VARYING_SLOT_CLIP_DIST0)
-			length = util_last_bit(output_usage_mask);
-
 		param_index = shader_io_get_unique_index(i);
 
 		if (lds_base) {
@@ -2933,7 +2901,7 @@
 			                       "");
 		}
 
-		for (j = 0; j < length; j++) {
+		for (j = 0; j < 4; j++) {
 			if (!(output_usage_mask & (1 << j)))
 				continue;
 
@@ -2970,22 +2938,16 @@
 						 vertex_dw_stride, "");
 
 	for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
-		unsigned output_usage_mask =
-			ctx->shader_info->info.vs.output_usage_mask[i];
 		LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
-		int length = 4;
 
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;
 
-		if (i == VARYING_SLOT_CLIP_DIST0)
-			length = util_last_bit(output_usage_mask);
-
 		int param = shader_io_get_unique_index(i);
 		LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
 						    LLVMConstInt(ctx->ac.i32, param * 4, false),
 						    "");
-		for (unsigned j = 0; j < length; j++) {
+		for (unsigned j = 0; j < 4; j++) {
 			LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
 			value = ac_to_integer(&ctx->ac, value);
 			value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
@@ -3511,7 +3473,7 @@
 	ctx.abi.load_sampler_desc = radv_get_sampler_desc;
 	ctx.abi.load_resource = radv_load_resource;
 	ctx.abi.clamp_shadow_reference = false;
-	ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
+	ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
 
 	if (shader_count >= 2)
 		ac_init_exec_full_mask(&ctx.ac);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_pass.c mesa-19.0.1/src/amd/vulkan/radv_pass.c
--- mesa-18.3.3/src/amd/vulkan/radv_pass.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_pass.c	2019-03-31 23:16:37.000000000 +0000
@@ -38,7 +38,7 @@
 	struct radv_render_pass *pass;
 	size_t size;
 	size_t attachments_offset;
-	VkRenderPassMultiviewCreateInfoKHR *multiview_info = NULL;
+	VkRenderPassMultiviewCreateInfo *multiview_info = NULL;
 
 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
 
@@ -59,8 +59,8 @@
 
 	vk_foreach_struct(ext, pCreateInfo->pNext) {
 		switch(ext->sType) {
-		case  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR:
-			multiview_info = ( VkRenderPassMultiviewCreateInfoKHR*)ext;
+		case  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO:
+			multiview_info = (VkRenderPassMultiviewCreateInfo*)ext;
 			break;
 		default:
 			break;
@@ -180,7 +180,17 @@
 	}
 
 	for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+		uint32_t src = pCreateInfo->pDependencies[i].srcSubpass;
 		uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
+
+		/* Ignore subpass self-dependencies as they allow the app to
+		 * call vkCmdPipelineBarrier() inside the render pass and the
+		 * driver should only do the barrier when called, not when
+		 * starting the render pass.
+		 */
+		if (src == dst)
+			continue;
+
 		if (dst == VK_SUBPASS_EXTERNAL) {
 			pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
 			pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
@@ -337,7 +347,17 @@
 	}
 
 	for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+		uint32_t src = pCreateInfo->pDependencies[i].srcSubpass;
 		uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
+
+		/* Ignore subpass self-dependencies as they allow the app to
+		 * call vkCmdPipelineBarrier() inside the render pass and the
+		 * driver should only do the barrier when called, not when
+		 * starting the render pass.
+		 */
+		if (src == dst)
+			continue;
+
 		if (dst == VK_SUBPASS_EXTERNAL) {
 			pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
 			pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_pipeline.c mesa-19.0.1/src/amd/vulkan/radv_pipeline.c
--- mesa-18.3.3/src/amd/vulkan/radv_pipeline.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_pipeline.c	2019-03-31 23:16:37.000000000 +0000
@@ -524,6 +524,14 @@
 		col_format |= cf << (4 * i);
 	}
 
+	if (!col_format && blend->need_src_alpha & (1 << 0)) {
+		/* When a subpass doesn't have any color attachments, write the
+		 * alpha channel of MRT0 when alpha coverage is enabled because
+		 * the depth attachment needs it.
+		 */
+		col_format |= V_028714_SPI_SHADER_32_ABGR;
+	}
+
 	/* If the i-th target format is set, all previous target formats must
 	 * be non-zero to avoid hangs.
 	 */
@@ -681,13 +689,15 @@
 	else
 		blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
 
-	blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
-		S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
-		S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
-		S_028B70_ALPHA_TO_MASK_OFFSET3(2);
+	blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) |
+		S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
+		S_028B70_ALPHA_TO_MASK_OFFSET2(0) |
+		S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
+		S_028B70_OFFSET_ROUND(1);
 
 	if (vkms && vkms->alphaToCoverageEnable) {
 		blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+		blend.need_src_alpha |= 0x1;
 	}
 
 	blend.cb_target_mask = 0;
@@ -1705,11 +1715,11 @@
 	}
 
 	bool ccw = tes->info.tes.ccw;
-	const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state =
+	const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
 	              vk_find_struct_const(pCreateInfo->pTessellationState,
-	                                   PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR);
+	                                   PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
 
-	if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR)
+	if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
 		ccw = !ccw;
 
 	if (tes->info.tes.point_mode)
@@ -1814,6 +1824,10 @@
 		nir_lower_io_arrays_to_elements(ordered_shaders[i],
 						ordered_shaders[i - 1]);
 
+		if (nir_link_opt_varyings(ordered_shaders[i],
+					  ordered_shaders[i - 1]))
+			radv_optimize_nir(ordered_shaders[i - 1], false, false);
+
 		nir_remove_dead_variables(ordered_shaders[i],
 					  nir_var_shader_out);
 		nir_remove_dead_variables(ordered_shaders[i - 1],
@@ -2083,6 +2097,10 @@
 		radv_link_shaders(pipeline, nir);
 
 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+		if (nir[i]) {
+			NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+		}
+
 		if (radv_can_dump_shader(device, modules[i], false))
 			nir_print_shader(nir[i], stderr);
 	}
@@ -2517,7 +2535,7 @@
 }
 
 static void
-radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
 				     struct radv_pipeline *pipeline,
 				     const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
@@ -2567,15 +2585,15 @@
 	                S_028C44_OPTIMAL_BIN_SELECTION(1);
 	}
 
-	radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+	radeon_set_context_reg(ctx_cs, R_028C44_PA_SC_BINNER_CNTL_0,
 			       pa_sc_binner_cntl_0);
-	radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+	radeon_set_context_reg(ctx_cs, R_028060_DB_DFSM_CONTROL,
 			       db_dfsm_control);
 }
 
 
 static void
-radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
                                            struct radv_pipeline *pipeline,
                                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
                                            const struct radv_graphics_pipeline_create_info *extra)
@@ -2648,35 +2666,35 @@
 		db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
 	}
 
-	radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
-	radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+	radeon_set_context_reg(ctx_cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
+	radeon_set_context_reg(ctx_cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
 
-	radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control);
-	radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
-	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
+	radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
+	radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+	radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
 }
 
 static void
-radv_pipeline_generate_blend_state(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs,
                                    struct radv_pipeline *pipeline,
                                    const struct radv_blend_state *blend)
 {
-	radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8);
-	radeon_emit_array(cs, blend->cb_blend_control,
+	radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
+	radeon_emit_array(ctx_cs, blend->cb_blend_control,
 			  8);
-	radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
-	radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
+	radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
+	radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
 
 	if (pipeline->device->physical_device->has_rbplus) {
 
-		radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
-		radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
+		radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+		radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
 	}
 
-	radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
+	radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
 
-	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
-	radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+	radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
+	radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
 
 	pipeline->graphics.col_format = blend->spi_shader_col_format;
 	pipeline->graphics.cb_target_mask = blend->cb_target_mask;
@@ -2694,23 +2712,23 @@
 }
 
 static void
-radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
 				    struct radv_pipeline *pipeline,
                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
 {
 	const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
 	const VkConservativeRasterizationModeEXT mode =
 		radv_get_conservative_raster_mode(vkraster);
-	uint32_t pa_sc_conservative_rast = 0;
+	uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
 
-	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
 	                       S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
 	                       S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
 	                       S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
 	                       S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
 	                       S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
 
-	radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
+	radeon_set_context_reg(ctx_cs, R_0286D4_SPI_INTERP_CONTROL_0,
 	                       S_0286D4_FLAT_SHADE_ENA(1) |
 	                       S_0286D4_PNT_SPRITE_ENA(1) |
 	                       S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
@@ -2719,12 +2737,12 @@
 	                       S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
 	                       S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
 
-	radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028BE4_PA_SU_VTX_CNTL,
 	                       S_028BE4_PIX_CENTER(1) | // TODO verify
 	                       S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
 	                       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
 
-	radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028814_PA_SU_SC_MODE_CNTL,
 	                       S_028814_FACE(vkraster->frontFace) |
 	                       S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) |
 	                       S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) |
@@ -2765,37 +2783,37 @@
 		}
 	}
 
-	radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
 				   pa_sc_conservative_rast);
 }
 
 
 static void
-radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
                                          struct radv_pipeline *pipeline)
 {
 	struct radv_multisample_state *ms = &pipeline->graphics.ms;
 
-	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
-	radeon_emit(cs, ms->pa_sc_aa_mask[0]);
-	radeon_emit(cs, ms->pa_sc_aa_mask[1]);
+	radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+	radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
+	radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
 
-	radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa);
-	radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+	radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
+	radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
 
 	/* The exclusion bits can be set to improve rasterization efficiency
 	 * if no sample lies on the pixel boundary (-8 sample offset). It's
 	 * currently always TRUE because the driver doesn't support 16 samples.
 	 */
 	bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK;
-	radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+	radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
 			       S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
 			       S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
 }
 
 static void
-radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *cs,
-                                   const struct radv_pipeline *pipeline)
+radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
+                                   struct radv_pipeline *pipeline)
 {
 	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
 
@@ -2813,12 +2831,13 @@
 		vgt_primitiveid_en = true;
 	}
 
-	radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
-	radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
+	radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
+	radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
 }
 
 static void
-radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
+			     struct radeon_cmdbuf *cs,
 			     struct radv_pipeline *pipeline,
 			     struct radv_shader_variant *shader)
 {
@@ -2839,10 +2858,10 @@
 		outinfo->writes_layer ||
 		outinfo->writes_viewport_index;
 
-	radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG,
+	radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
 	                       S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1));
 
-	radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT,
+	radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
 	                       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
 	                       S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
 	                                                   V_02870C_SPI_SHADER_4COMP :
@@ -2854,13 +2873,13 @@
 	                                                   V_02870C_SPI_SHADER_4COMP :
 	                                                   V_02870C_SPI_SHADER_NONE));
 
-	radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028818_PA_CL_VTE_CNTL,
 			       S_028818_VTX_W0_FMT(1) |
 			       S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
 			       S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
 			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
 
-	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+	radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
 	                       S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
 	                       S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
 	                       S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
@@ -2872,7 +2891,7 @@
 	                       clip_dist_mask);
 
 	if (pipeline->device->physical_device->rad_info.chip_class <= VI)
-		radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+		radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
 		                       outinfo->writes_viewport_index);
 }
 
@@ -2940,7 +2959,8 @@
 }
 
 static void
-radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
+				     struct radeon_cmdbuf *cs,
 				     struct radv_pipeline *pipeline,
 				     const struct radv_tessellation_state *tess)
 {
@@ -2956,11 +2976,12 @@
 	else if (vs->info.vs.as_es)
 		radv_pipeline_generate_hw_es(cs, pipeline, vs);
 	else
-		radv_pipeline_generate_hw_vs(cs, pipeline, vs);
+		radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
 }
 
 static void
-radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
+				    struct radeon_cmdbuf *cs,
 				    struct radv_pipeline *pipeline,
 				    const struct radv_tessellation_state *tess)
 {
@@ -2976,24 +2997,25 @@
 		if (tes->info.tes.as_es)
 			radv_pipeline_generate_hw_es(cs, pipeline, tes);
 		else
-			radv_pipeline_generate_hw_vs(cs, pipeline, tes);
+			radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes);
 	}
 
 	radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess);
 
-	radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM,
+	radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
 			       tess->tf_param);
 
 	if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
-		radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
+		radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2,
 					   tess->ls_hs_config);
 	else
-		radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
+		radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
 				       tess->ls_hs_config);
 }
 
 static void
-radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
+				       struct radeon_cmdbuf *cs,
 				       struct radv_pipeline *pipeline,
 				       const struct radv_gs_state *gs_state)
 {
@@ -3014,32 +3036,32 @@
 
 	offset = num_components[0] * gs_max_out_vertices;
 
-	radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
-	radeon_emit(cs, offset);
+	radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+	radeon_emit(ctx_cs, offset);
 	if (max_stream >= 1)
 		offset += num_components[1] * gs_max_out_vertices;
-	radeon_emit(cs, offset);
+	radeon_emit(ctx_cs, offset);
 	if (max_stream >= 2)
 		offset += num_components[2] * gs_max_out_vertices;
-	radeon_emit(cs, offset);
+	radeon_emit(ctx_cs, offset);
 	if (max_stream >= 3)
 		offset += num_components[3] * gs_max_out_vertices;
-	radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
+	radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
 
-	radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
+	radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
 
-	radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
-	radeon_emit(cs, num_components[0]);
-	radeon_emit(cs, (max_stream >= 1) ? num_components[1] : 0);
-	radeon_emit(cs, (max_stream >= 2) ? num_components[2] : 0);
-	radeon_emit(cs, (max_stream >= 3) ? num_components[3] : 0);
+	radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
+	radeon_emit(ctx_cs, num_components[0]);
+	radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
+	radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
+	radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
 
 	uint32_t gs_num_invocations = gs->info.gs.invocations;
-	radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT,
+	radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
 			       S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
 			       S_028B90_ENABLE(gs_num_invocations > 0));
 
-	radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+	radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
 			       gs_state->vgt_esgs_ring_itemsize);
 
 	va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
@@ -3053,8 +3075,8 @@
 		radeon_emit(cs, gs->rsrc1);
 		radeon_emit(cs, gs->rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
 
-		radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
-		radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
+		radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
+		radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
 	} else {
 		radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
 		radeon_emit(cs, va >> 8);
@@ -3063,16 +3085,20 @@
 		radeon_emit(cs, gs->rsrc2);
 	}
 
-	radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader);
+	radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
 }
 
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
 {
 	uint32_t ps_input_cntl;
 	if (offset <= AC_EXP_PARAM_OFFSET_31) {
 		ps_input_cntl = S_028644_OFFSET(offset);
 		if (flat_shade)
 			ps_input_cntl |= S_028644_FLAT_SHADE(1);
+		if (float16) {
+			ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
+			                 S_028644_ATTR0_VALID(1);
+		}
 	} else {
 		/* The input is a DEFAULT_VAL constant. */
 		assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
@@ -3085,8 +3111,8 @@
 }
 
 static void
-radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
-                                 struct radv_pipeline *pipeline)
+radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
+				 struct radv_pipeline *pipeline)
 {
 	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
 	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
@@ -3097,7 +3123,7 @@
 	if (ps->info.info.ps.prim_id_input) {
 		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
 		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
 			++ps_offset;
 		}
 	}
@@ -3107,9 +3133,9 @@
 	    ps->info.info.needs_multiview_view_index) {
 		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
 		if (vs_offset != AC_EXP_PARAM_UNDEFINED)
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
 		else
-			ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+			ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
 		++ps_offset;
 	}
 
@@ -3125,14 +3151,14 @@
 
 		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
 		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
 			++ps_offset;
 		}
 
 		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
 		if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
 		    ps->info.info.ps.num_input_clips_culls > 4) {
-			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
 			++ps_offset;
 		}
 	}
@@ -3140,6 +3166,7 @@
 	for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
 		unsigned vs_offset;
 		bool flat_shade;
+		bool float16;
 		if (!(ps->info.fs.input_mask & (1u << i)))
 			continue;
 
@@ -3151,15 +3178,16 @@
 		}
 
 		flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
+		float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
 
-		ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
+		ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
 		++ps_offset;
 	}
 
 	if (ps_offset) {
-		radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
+		radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
 		for (unsigned i = 0; i < ps_offset; i++) {
-			radeon_emit(cs, ps_input_cntl[i]);
+			radeon_emit(ctx_cs, ps_input_cntl[i]);
 		}
 	}
 }
@@ -3179,11 +3207,11 @@
 	bool disable_rbplus = device->physical_device->has_rbplus &&
 	                      !device->physical_device->rbplus_allowed;
 
-	/* Do not enable the gl_SampleMask fragment shader output if MSAA is
-	 * disabled.
+	/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+	 * but this appears to break Project Cars (DXVK). See
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=109401
 	 */
-	bool mask_export_enable = ms->num_samples > 1 &&
-				  ps->info.info.ps.writes_sample_mask;
+	bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
 
 	return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
 		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
@@ -3197,7 +3225,8 @@
 }
 
 static void
-radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
+				       struct radeon_cmdbuf *cs,
 				       struct radv_pipeline *pipeline)
 {
 	struct radv_shader_variant *ps;
@@ -3213,22 +3242,22 @@
 	radeon_emit(cs, ps->rsrc1);
 	radeon_emit(cs, ps->rsrc2);
 
-	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
+	radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
 	                       radv_compute_db_shader_control(pipeline->device,
 							      pipeline, ps));
 
-	radeon_set_context_reg(cs, R_0286CC_SPI_PS_INPUT_ENA,
+	radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA,
 			       ps->config.spi_ps_input_ena);
 
-	radeon_set_context_reg(cs, R_0286D0_SPI_PS_INPUT_ADDR,
+	radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR,
 			       ps->config.spi_ps_input_addr);
 
-	radeon_set_context_reg(cs, R_0286D8_SPI_PS_IN_CONTROL,
+	radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
 			       S_0286D8_NUM_INTERP(ps->info.fs.num_interp));
 
-	radeon_set_context_reg(cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
+	radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
 
-	radeon_set_context_reg(cs, R_028710_SPI_SHADER_Z_FORMAT,
+	radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
 	                       ac_get_spi_shader_z_format(ps->info.info.ps.writes_z,
 	                                                  ps->info.info.ps.writes_stencil,
 	                                                  ps->info.info.ps.writes_sample_mask));
@@ -3241,7 +3270,7 @@
 }
 
 static void
-radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *cs,
+radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
 					struct radv_pipeline *pipeline)
 {
 	if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10)
@@ -3252,7 +3281,7 @@
 	    radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
 		vtx_reuse_depth = 14;
 	}
-	radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+	radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
 	                       S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
 }
 
@@ -3322,38 +3351,46 @@
                            const struct radv_gs_state *gs,
                            unsigned prim, unsigned gs_out)
 {
-	pipeline->cs.buf = malloc(4 * 256);
-	pipeline->cs.max_dw = 256;
+	struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
+	struct radeon_cmdbuf *cs = &pipeline->cs;
 
-	radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline, pCreateInfo, extra);
-	radv_pipeline_generate_blend_state(&pipeline->cs, pipeline, blend);
-	radv_pipeline_generate_raster_state(&pipeline->cs, pipeline, pCreateInfo);
-	radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline);
-	radv_pipeline_generate_vgt_gs_mode(&pipeline->cs, pipeline);
-	radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline, tess);
-	radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline, tess);
-	radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline, gs);
-	radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline);
-	radv_pipeline_generate_ps_inputs(&pipeline->cs, pipeline);
-	radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline);
-	radv_pipeline_generate_binning_state(&pipeline->cs, pipeline, pCreateInfo);
+	cs->max_dw = 64;
+	ctx_cs->max_dw = 256;
+	cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
+	ctx_cs->buf = cs->buf + cs->max_dw;
+
+	radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra);
+	radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend);
+	radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
+	radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
+	radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
+	radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess);
+	radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess);
+	radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline, gs);
+	radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
+	radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
+	radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
+	radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo);
 
-	radeon_set_context_reg(&pipeline->cs, R_0286E8_SPI_TMPRING_SIZE,
+	radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE,
 			       S_0286E8_WAVES(pipeline->max_waves) |
 			       S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10));
 
-	radeon_set_context_reg(&pipeline->cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
+	radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
 
 	if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
-		radeon_set_uconfig_reg_idx(&pipeline->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
+		radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
 	} else {
-		radeon_set_config_reg(&pipeline->cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
+		radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
 	}
-	radeon_set_context_reg(&pipeline->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+	radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+
+	radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo));
 
-	radeon_set_context_reg(&pipeline->cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo));
+	pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
 
-	assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
+	assert(ctx_cs->cdw <= ctx_cs->max_dw);
+	assert(cs->cdw <= cs->max_dw);
 }
 
 static struct radv_ia_multi_vgt_param_helpers
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_private.h mesa-19.0.1/src/amd/vulkan/radv_private.h
--- mesa-18.3.3/src/amd/vulkan/radv_private.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -285,7 +285,6 @@
 
 	struct radeon_winsys *ws;
 	struct radeon_info rad_info;
-	char                                        path[20];
 	char                                        name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
 	uint8_t                                     driver_uuid[VK_UUID_SIZE];
 	uint8_t                                     device_uuid[VK_UUID_SIZE];
@@ -307,6 +306,9 @@
 	/* Whether DCC should be enabled for MSAA textures. */
 	bool dcc_msaa_allowed;
 
+	/* Whether LOAD_CONTEXT_REG packets are supported. */
+	bool has_load_ctx_reg_pkt;
+
 	/* This is the drivers on-disk cache used as a fallback as opposed to
 	 * the pipeline cache defined by apps.
 	 */
@@ -457,6 +459,12 @@
 
 	VkPipelineLayout                          clear_color_p_layout;
 	VkPipelineLayout                          clear_depth_p_layout;
+
+	/* Optimized compute fast HTILE clear for stencil or depth only. */
+	VkPipeline clear_htile_mask_pipeline;
+	VkPipelineLayout clear_htile_mask_p_layout;
+	VkDescriptorSetLayout clear_htile_mask_ds_layout;
+
 	struct {
 		VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
 
@@ -597,6 +605,12 @@
 		VkPipeline pipeline_statistics_query_pipeline;
 		VkPipeline tfb_query_pipeline;
 	} query;
+
+	struct {
+		VkDescriptorSetLayout ds_layout;
+		VkPipelineLayout p_layout;
+		VkPipeline pipeline[MAX_SAMPLES_LOG2];
+	} fmask_expand;
 };
 
 /* queue types */
@@ -1044,6 +1058,8 @@
 	/* Conditional rendering info. */
 	int predication_type; /* -1: disabled, 0: normal, 1: inverted */
 	uint64_t predication_va;
+
+	bool context_roll_without_scissor_emitted;
 };
 
 struct radv_cmd_pool {
@@ -1103,8 +1119,7 @@
 
 	VkResult record_result;
 
-	uint32_t gfx9_fence_offset;
-	struct radeon_winsys_bo *gfx9_fence_bo;
+	uint64_t gfx9_fence_va;
 	uint32_t gfx9_fence_idx;
 	uint64_t gfx9_eop_bug_va;
 
@@ -1139,13 +1154,11 @@
 				unsigned event, unsigned event_flags,
 				unsigned data_sel,
 				uint64_t va,
-				uint32_t old_fence,
 				uint32_t new_fence,
 				uint64_t gfx9_eop_bug_va);
 
-void si_emit_wait_fence(struct radeon_cmdbuf *cs,
-			uint64_t va, uint32_t ref,
-			uint32_t mask);
+void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
+		      uint32_t ref, uint32_t mask);
 void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
 			    enum chip_class chip_class,
 			    uint32_t *fence_ptr, uint64_t va,
@@ -1198,9 +1211,12 @@
 				      int cb_idx,
 				      uint32_t color_values[2]);
 
-void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
-				       struct radv_image *image,
-				       bool value);
+void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
+			      struct radv_image *image, bool value);
+
+void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
+			      struct radv_image *image, bool value);
+
 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
 			  struct radeon_winsys_bo *bo,
 			  uint64_t offset, uint64_t size, uint32_t value);
@@ -1238,7 +1254,7 @@
 			 struct radeon_cmdbuf *cs,
 			 uint32_t sh_offset, uint64_t va, bool global)
 {
-	bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global;
+	bool use_32bit_pointers = !global;
 
 	radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
 	radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
@@ -1352,6 +1368,8 @@
 	VkShaderStageFlags                           active_stages;
 
 	struct radeon_cmdbuf                      cs;
+	uint32_t                                  ctx_cs_hash;
+	struct radeon_cmdbuf                      ctx_cs;
 
 	struct radv_vertex_elements_info             vertex_elements;
 
@@ -1447,6 +1465,7 @@
 bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
 bool radv_dcc_formats_compatible(VkFormat format1,
                                  VkFormat format2);
+bool radv_device_supports_etc(struct radv_physical_device *physical_device);
 
 struct radv_fmask_info {
 	uint64_t offset;
@@ -1496,6 +1515,7 @@
 	struct radv_fmask_info fmask;
 	struct radv_cmask_info cmask;
 	uint64_t clear_value_offset;
+	uint64_t fce_pred_offset;
 	uint64_t dcc_pred_offset;
 
 	/*
@@ -1873,7 +1893,7 @@
 radv_update_descriptor_set_with_template(struct radv_device *device,
                                          struct radv_cmd_buffer *cmd_buffer,
                                          struct radv_descriptor_set *set,
-                                         VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
+                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
                                          const void *pData);
 
 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
@@ -1886,6 +1906,9 @@
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value);
 
+void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_image *image);
+
 struct radv_fence {
 	struct radeon_winsys_fence *fence;
 	struct wsi_fence *fence_wsi;
@@ -1967,7 +1990,7 @@
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout)
-RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR)
+RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplate)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent)
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_query.c mesa-19.0.1/src/amd/vulkan/radv_query.c
--- mesa-18.3.3/src/amd/vulkan/radv_query.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -51,6 +51,12 @@
 	return num_db;
 }
 
+
+static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
+{
+	return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
+}
+
 static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
 {
 	nir_ssa_def *counter = nir_load_var(b, var);
@@ -132,7 +138,7 @@
 	nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
 	nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
 	nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
-	nir_variable *available = nir_local_variable_create(b.impl, glsl_int_type(), "available");
+	nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
 	unsigned db_count = get_max_db(device);
 
 	nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
@@ -153,8 +159,8 @@
 	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
 	nir_builder_instr_insert(&b, &src_buf->instr);
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 	                                        b.shader->info.cs.local_size[0],
 	                                        b.shader->info.cs.local_size[1],
@@ -170,7 +176,7 @@
 
 	nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
 	nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
-	nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1);
+	nir_store_var(&b, available, nir_imm_true(&b), 0x1);
 
 	nir_loop *outer_loop = nir_loop_create(b.shader);
 	nir_builder_cf_insert(&b, &outer_loop->cf_node);
@@ -208,18 +214,17 @@
 
 	b.cursor = nir_after_cf_list(&update_if->else_list);
 
-	nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1);
+	nir_store_var(&b, available, nir_imm_false(&b), 0x1);
 
 	b.cursor = nir_after_cf_node(&outer_loop->cf_node);
 
 	/* Store the result if complete or if partial results have been requested. */
 
-	nir_ssa_def *result_is_64bit = nir_iand(&b, flags,
-	                                        nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
+	nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
 	nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
 
 	nir_if *store_if = nir_if_create(b.shader);
-	store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), nir_load_var(&b, available)));
+	store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
 	nir_cf_node_insert(b.cursor, &store_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&store_if->then_list);
@@ -253,13 +258,13 @@
 	/* Store the availability bit if requested. */
 
 	nir_if *availability_if = nir_if_create(b.shader);
-	availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+	availability_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
 	nir_cf_node_insert(b.cursor, &availability_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&availability_if->then_list);
 
 	store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
-	store->src[0] = nir_src_for_ssa(nir_load_var(&b, available));
+	store->src[0] = nir_src_for_ssa(nir_b2i32(&b, nir_load_var(&b, available)));
 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
 	store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base));
 	nir_intrinsic_set_write_mask(store, 0x1);
@@ -291,11 +296,11 @@
 	 * 	uint64_t dst_offset = dst_base;
 	 * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
 	 * 	uint32_t elem_count = stats_mask >> 16;
-	 * 	uint32_t available = src_buf[avail_offset + 4 * global_id.x];
+	 * 	uint32_t available32 = src_buf[avail_offset + 4 * global_id.x];
 	 * 	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-	 * 		dst_buf[dst_offset + elem_count * elem_size] = available;
+	 * 		dst_buf[dst_offset + elem_count * elem_size] = available32;
 	 * 	}
-	 * 	if (available) {
+	 * 	if ((bool)available32) {
 	 * 		// repeat 11 times:
 	 * 		if (stats_mask & (1 << 0)) {
 	 * 			uint64_t start = src_buf[src_offset + 8 * indices[0]];
@@ -343,8 +348,8 @@
 	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
 	nir_builder_instr_insert(&b, &src_buf->instr);
 
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 	                                        b.shader->info.cs.local_size[0],
 	                                        b.shader->info.cs.local_size[1],
@@ -367,23 +372,22 @@
 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
 	load->num_components = 1;
 	nir_builder_instr_insert(&b, &load->instr);
-	nir_ssa_def *available = &load->dest.ssa;
+	nir_ssa_def *available32 = &load->dest.ssa;
 
-	nir_ssa_def *result_is_64bit = nir_iand(&b, flags,
-	                                        nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
+	nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
 	nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
 	nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
 
 	/* Store the availability bit if requested. */
 
 	nir_if *availability_if = nir_if_create(b.shader);
-	availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+	availability_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
 	nir_cf_node_insert(b.cursor, &availability_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&availability_if->then_list);
 
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
-	store->src[0] = nir_src_for_ssa(available);
+	store->src[0] = nir_src_for_ssa(available32);
 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
 	store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)));
 	nir_intrinsic_set_write_mask(store, 0x1);
@@ -393,7 +397,7 @@
 	b.cursor = nir_after_cf_node(&availability_if->cf_node);
 
 	nir_if *available_if = nir_if_create(b.shader);
-	available_if->condition = nir_src_for_ssa(available);
+	available_if->condition = nir_src_for_ssa(nir_i2b(&b, available32));
 	nir_cf_node_insert(b.cursor, &available_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&available_if->then_list);
@@ -401,7 +405,7 @@
 	nir_store_var(&b, output_offset, output_base, 0x1);
 	for (int i = 0; i < 11; ++i) {
 		nir_if *store_if = nir_if_create(b.shader);
-		store_if->condition = nir_src_for_ssa(nir_iand(&b, stats_mask, nir_imm_int(&b, 1u << i)));
+		store_if->condition = nir_src_for_ssa(nir_test_flag(&b, stats_mask, 1u << i));
 		nir_cf_node_insert(b.cursor, &store_if->cf_node);
 
 		b.cursor = nir_after_cf_list(&store_if->then_list);
@@ -463,8 +467,7 @@
 	b.cursor = nir_after_cf_list(&available_if->else_list);
 
 	available_if = nir_if_create(b.shader);
-	available_if->condition = nir_src_for_ssa(nir_iand(&b, flags,
-	                                                       nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)));
+	available_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT));
 	nir_cf_node_insert(b.cursor, &available_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&available_if->then_list);
@@ -563,12 +566,12 @@
 					  glsl_vector_type(GLSL_TYPE_UINT64, 2),
 					  "result");
 	nir_variable *available =
-		nir_local_variable_create(b.impl, glsl_int_type(), "available");
+		nir_local_variable_create(b.impl, glsl_bool_type(), "available");
 
 	nir_store_var(&b, result,
 		      nir_vec2(&b, nir_imm_int64(&b, 0),
 				   nir_imm_int64(&b, 0)), 0x3);
-	nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1);
+	nir_store_var(&b, available, nir_imm_false(&b), 0x1);
 
 	nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
 
@@ -590,8 +593,8 @@
 	nir_builder_instr_insert(&b, &src_buf->instr);
 
 	/* Compute global ID. */
-	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
-	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
 	                                        b.shader->info.cs.local_size[0],
 	                                        b.shader->info.cs.local_size[1],
@@ -627,8 +630,8 @@
 	avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1),
 				 nir_channel(&b, &load2->dest.ssa, 3));
 	nir_ssa_def *result_is_available =
-		nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
-			     nir_imm_int(&b, 0x80000000));
+		nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
+			                 nir_imm_int(&b, 0x80000000)));
 
 	/* Only compute result if available. */
 	nir_if *available_if = nir_if_create(b.shader);
@@ -661,13 +664,13 @@
 	nir_store_var(&b, result,
 		      nir_vec2(&b, num_primitive_written,
 				   primitive_storage_needed), 0x3);
-	nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1);
+	nir_store_var(&b, available, nir_imm_true(&b), 0x1);
 
 	b.cursor = nir_after_cf_node(&available_if->cf_node);
 
 	/* Determine if result is 64 or 32 bit. */
 	nir_ssa_def *result_is_64bit =
-		nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
+		nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
 	nir_ssa_def *result_size =
 		nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16),
 			  nir_imm_int(&b, 8));
@@ -675,8 +678,7 @@
 	/* Store the result if complete or partial results have been requested. */
 	nir_if *store_if = nir_if_create(b.shader);
 	store_if->condition =
-		nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags,
-						     nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)),
+		nir_src_for_ssa(nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
 					nir_load_var(&b, available)));
 	nir_cf_node_insert(b.cursor, &store_if->cf_node);
 
@@ -714,14 +716,13 @@
 	/* Store the availability bit if requested. */
 	nir_if *availability_if = nir_if_create(b.shader);
 	availability_if->condition =
-		nir_src_for_ssa(nir_iand(&b, flags,
-					 nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+		nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
 	nir_cf_node_insert(b.cursor, &availability_if->cf_node);
 
 	b.cursor = nir_after_cf_list(&availability_if->then_list);
 
 	store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
-	store->src[0] = nir_src_for_ssa(nir_load_var(&b, available));
+	store->src[0] = nir_src_for_ssa(nir_b2i32(&b, nir_load_var(&b, available)));
 	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
 	store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base));
 	nir_intrinsic_set_write_mask(store, 0x1);
@@ -1012,9 +1013,6 @@
 
 	radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
 
-	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
-	                                RADV_CMD_FLAG_INV_VMEM_L1 |
-	                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
 	/* Restore conditional rendering. */
 	cmd_buffer->state.predicating = old_predicating;
 
@@ -1063,7 +1061,8 @@
 		pool->size += 4 * pCreateInfo->queryCount;
 
 	pool->bo = device->ws->buffer_create(device->ws, pool->size,
-					     64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING);
+					     64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
+					     RADV_BO_PRIORITY_QUERY_POOL);
 
 	if (!pool->bo) {
 		vk_free2(&device->alloc, pAllocator, pool);
@@ -1296,14 +1295,11 @@
 				unsigned query = firstQuery + i;
 				uint64_t src_va = va + query * pool->stride + pool->stride - 4;
 
+				radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
 				/* Waits on the upper word of the last DB entry */
-				radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-				radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-				radeon_emit(cs, src_va);
-				radeon_emit(cs, src_va >> 32);
-				radeon_emit(cs, 0x80000000); /* reference value */
-				radeon_emit(cs, 0xffffffff); /* mask */
-				radeon_emit(cs, 4); /* poll interval */
+				radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
+						 src_va, 0x80000000, 0xffffffff);
 			}
 		}
 		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
@@ -1322,7 +1318,8 @@
 				uint64_t avail_va = va + pool->availability_offset + 4 * query;
 
 				/* This waits on the ME. All copies below are done on the ME */
-				si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
+				radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL,
+						 avail_va, 1, 0xffffffff);
 			}
 		}
 		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
@@ -1344,13 +1341,10 @@
 				/* Wait on the high 32 bits of the timestamp in
 				 * case the low part is 0xffffffff.
 				 */
-				radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
-				radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-				radeon_emit(cs, local_src_va + 4);
-				radeon_emit(cs, (local_src_va + 4) >> 32);
-				radeon_emit(cs, TIMESTAMP_NOT_READY >> 32);
-				radeon_emit(cs, 0xffffffff);
-				radeon_emit(cs, 4);
+				radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL,
+						 local_src_va + 4,
+						 TIMESTAMP_NOT_READY >> 32,
+						 0xffffffff);
 			}
 			if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
 				uint64_t avail_dest_va = dest_va + elem_size;
@@ -1383,16 +1377,13 @@
 				unsigned query = firstQuery + i;
 				uint64_t src_va = va + query * pool->stride;
 
+				radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
+
 				/* Wait on the upper word of all results. */
 				for (unsigned j = 0; j < 4; j++, src_va += 8) {
-					radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-					radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL |
-							WAIT_REG_MEM_MEM_SPACE(1));
-					radeon_emit(cs, (src_va + 4));
-					radeon_emit(cs, (src_va + 4) >> 32);
-					radeon_emit(cs, 0x80000000); /* reference value */
-					radeon_emit(cs, 0xffffffff); /* mask */
-					radeon_emit(cs, 4); /* poll interval */
+					radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
+							 src_va + 4, 0x80000000,
+							 0xffffffff);
 				}
 			}
 		}
@@ -1461,7 +1452,6 @@
 			 * because we use a CP dma clear.
 			 */
 			si_emit_cache_flush(cmd_buffer);
-			cmd_buffer->pending_reset_query = false;
 		}
 	}
 }
@@ -1580,7 +1570,7 @@
 					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 					   EOP_DATA_SEL_VALUE_32BIT,
-					   avail_va, 0, 1,
+					   avail_va, 1,
 					   cmd_buffer->gfx9_eop_bug_va);
 		break;
 	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
@@ -1703,7 +1693,7 @@
 			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
 			radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
 				    COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-				    COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
+				    COPY_DATA_DST_SEL(V_370_MEM));
 			radeon_emit(cs, 0);
 			radeon_emit(cs, 0);
 			radeon_emit(cs, query_va);
@@ -1715,7 +1705,7 @@
 						   mec,
 						   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 						   EOP_DATA_SEL_TIMESTAMP,
-						   query_va, 0, 0,
+						   query_va, 0,
 						   cmd_buffer->gfx9_eop_bug_va);
 			break;
 		}
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_radeon_winsys.h mesa-19.0.1/src/amd/vulkan/radv_radeon_winsys.h
--- mesa-18.3.3/src/amd/vulkan/radv_radeon_winsys.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_radeon_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -84,6 +84,9 @@
 };
 
 enum radeon_value_id {
+	RADEON_ALLOCATED_VRAM,
+	RADEON_ALLOCATED_VRAM_VIS,
+	RADEON_ALLOCATED_GTT,
 	RADEON_TIMESTAMP,
 	RADEON_NUM_BYTES_MOVED,
 	RADEON_NUM_EVICTIONS,
@@ -164,6 +167,7 @@
 struct radeon_winsys_bo {
 	uint64_t va;
 	bool is_local;
+	bool vram_cpu_access;
 };
 struct radv_winsys_sem_counts {
 	uint32_t syncobj_count;
@@ -184,6 +188,27 @@
 	unsigned count;
 };
 
+/* Kernel effectively allows 0-31. This sets some priorities for fixed
+ * functionality buffers */
+enum {
+	RADV_BO_PRIORITY_APPLICATION_MAX = 28,
+
+	/* virtual buffers have 0 priority since the priority is not used. */
+	RADV_BO_PRIORITY_VIRTUAL = 0,
+
+	/* This should be considerably lower than most of the stuff below,
+	 * but how much lower is hard to say since we don't know application
+	 * assignments. Put it pretty high since it is GTT anyway. */
+	RADV_BO_PRIORITY_QUERY_POOL = 29,
+
+	RADV_BO_PRIORITY_DESCRIPTOR = 30,
+	RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
+	RADV_BO_PRIORITY_FENCE = 30,
+	RADV_BO_PRIORITY_SHADER = 31,
+	RADV_BO_PRIORITY_SCRATCH = 31,
+	RADV_BO_PRIORITY_CS = 31,
+};
+
 struct radeon_winsys {
 	void (*destroy)(struct radeon_winsys *ws);
 
@@ -202,17 +227,20 @@
 						  uint64_t size,
 						  unsigned alignment,
 						  enum radeon_bo_domain domain,
-						  enum radeon_bo_flag flags);
+						  enum radeon_bo_flag flags,
+						  unsigned priority);
 
 	void (*buffer_destroy)(struct radeon_winsys_bo *bo);
 	void *(*buffer_map)(struct radeon_winsys_bo *bo);
 
 	struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
 						    void *pointer,
-						    uint64_t size);
+						    uint64_t size,
+						    unsigned priority);
 
 	struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
 						   int fd,
+						   unsigned priority,
 						   unsigned *stride, unsigned *offset);
 
 	bool (*buffer_get_fd)(struct radeon_winsys *ws,
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader.c mesa-19.0.1/src/amd/vulkan/radv_shader.c
--- mesa-18.3.3/src/amd/vulkan/radv_shader.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_shader.c	2019-03-31 23:16:37.000000000 +0000
@@ -126,8 +126,8 @@
         do {
                 progress = false;
 
-		NIR_PASS(progress, shader, nir_split_array_vars, nir_var_local);
-		NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_local);
+		NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp);
+		NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp);
 
                 NIR_PASS_V(shader, nir_lower_vars_to_ssa);
 		NIR_PASS_V(shader, nir_lower_pack);
@@ -159,7 +159,7 @@
                 NIR_PASS(progress, shader, nir_opt_if);
                 NIR_PASS(progress, shader, nir_opt_dead_cf);
                 NIR_PASS(progress, shader, nir_opt_cse);
-                NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
+                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
                 NIR_PASS(progress, shader, nir_opt_algebraic);
                 NIR_PASS(progress, shader, nir_opt_constant_folding);
                 NIR_PASS(progress, shader, nir_opt_undef);
@@ -219,33 +219,39 @@
 			}
 		}
 		const struct spirv_to_nir_options spirv_options = {
+			.lower_ubo_ssbo_access_to_offsets = true,
 			.caps = {
+				.descriptor_array_dynamic_indexing = true,
 				.device_group = true,
 				.draw_parameters = true,
 				.float64 = true,
+				.gcn_shader = true,
+				.geometry_streams = true,
 				.image_read_without_format = true,
 				.image_write_without_format = true,
-				.tessellation = true,
-				.int64 = true,
 				.int16 = true,
+				.int64 = true,
 				.multiview = true,
+				.runtime_descriptor_array = true,
+				.shader_viewport_index_layer = true,
+				.stencil_export = true,
+				.storage_16bit = true,
+				.storage_image_ms = true,
 				.subgroup_arithmetic = true,
 				.subgroup_ballot = true,
 				.subgroup_basic = true,
 				.subgroup_quad = true,
 				.subgroup_shuffle = true,
 				.subgroup_vote = true,
-				.variable_pointers = true,
-				.gcn_shader = true,
-				.trinary_minmax = true,
-				.shader_viewport_index_layer = true,
-				.descriptor_array_dynamic_indexing = true,
-				.runtime_descriptor_array = true,
-				.stencil_export = true,
-				.storage_16bit = true,
-				.geometry_streams = true,
+				.tessellation = true,
 				.transform_feedback = true,
+				.trinary_minmax = true,
+				.variable_pointers = true,
 			},
+			.ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
+			.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
+			.push_const_ptr_type = glsl_uint_type(),
+			.shared_ptr_type = glsl_uint_type(),
 		};
 		entry_point = spirv_to_nir(spirv, module->size / 4,
 					   spec_entries, num_spec_entries,
@@ -261,10 +267,10 @@
 		 * inline functions.  That way they get properly initialized at the top
 		 * of the function and not at the top of its caller.
 		 */
-		NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+		NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
 		NIR_PASS_V(nir, nir_lower_returns);
 		NIR_PASS_V(nir, nir_inline_functions);
-		NIR_PASS_V(nir, nir_copy_prop);
+		NIR_PASS_V(nir, nir_opt_deref);
 
 		/* Pick off the single entrypoint that we want */
 		foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -323,7 +329,7 @@
 	nir_split_var_copies(nir);
 
 	nir_lower_global_vars_to_local(nir);
-	nir_remove_dead_variables(nir, nir_var_local);
+	nir_remove_dead_variables(nir, nir_var_function_temp);
 	nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
 			.subgroup_size = 64,
 			.ballot_bit_size = 64,
@@ -389,7 +395,8 @@
 	                                     RADEON_DOMAIN_VRAM,
 					     RADEON_FLAG_NO_INTERPROCESS_SHARING |
 					     (device->physical_device->cpdma_prefetch_writes_memory ?
-					             0 : RADEON_FLAG_READ_ONLY));
+					             0 : RADEON_FLAG_READ_ONLY),
+					     RADV_BO_PRIORITY_SHADER);
 	slab->ptr = (char*)device->ws->buffer_map(slab->bo);
 	list_inithead(&slab->shaders);
 
@@ -548,9 +555,15 @@
 	 *
 	 * "mesa" is the prefix for error messages.
 	 */
-	const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
-				"-amdgpu-skip-threshold=1" };
-	LLVMParseCommandLineOptions(3, argv, NULL);
+	if (HAVE_LLVM >= 0x0800) {
+		const char *argv[2] = { "mesa", "-simplifycfg-sink-common=false" };
+		LLVMParseCommandLineOptions(2, argv, NULL);
+
+	} else {
+		const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
+					"-amdgpu-skip-threshold=1" };
+		LLVMParseCommandLineOptions(3, argv, NULL);
+	}
 }
 
 static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
@@ -600,7 +613,7 @@
 
 	thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
 	radv_init_llvm_once();
-	radv_init_llvm_compiler(&ac_llvm, false,
+	radv_init_llvm_compiler(&ac_llvm,
 				thread_compiler,
 				chip_family, tm_options);
 	if (gs_copy_shader) {
@@ -860,6 +873,7 @@
 		buf = _mesa_string_buffer_create(NULL, 1024);
 
 		_mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
+		_mesa_string_buffer_printf(buf, "%s\n\n", variant->llvm_ir_string);
 		_mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
 		generate_shader_stats(device, variant, stage, buf);
 
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader.h mesa-19.0.1/src/amd/vulkan/radv_shader.h
--- mesa-18.3.3/src/amd/vulkan/radv_shader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_shader.h	2019-03-31 23:16:37.000000000 +0000
@@ -213,7 +213,6 @@
 struct radv_userdata_info {
 	int8_t sgpr_idx;
 	uint8_t num_sgprs;
-	bool indirect;
 };
 
 struct radv_userdata_locations {
@@ -258,6 +257,7 @@
 			unsigned num_interp;
 			uint32_t input_mask;
 			uint32_t flat_shaded_mask;
+			uint32_t float16_shaded_mask;
 			bool can_discard;
 			bool early_fragment_test;
 		} fs;
@@ -402,6 +402,8 @@
 		return 1;
 	if (slot == VARYING_SLOT_CLIP_DIST0)
 		return 2;
+	if (slot == VARYING_SLOT_CLIP_DIST1)
+		return 3;
 	/* 3 is reserved for clip dist as well */
 	if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
 		return 4 + (slot - VARYING_SLOT_VAR0);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader_helper.h mesa-19.0.1/src/amd/vulkan/radv_shader_helper.h
--- mesa-18.3.3/src/amd/vulkan/radv_shader_helper.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_shader_helper.h	2019-03-31 23:16:37.000000000 +0000
@@ -27,7 +27,6 @@
 #endif
 
 bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
-			     bool okay_to_leak_target_library_info,
 			     bool thread_compiler,
 			     enum radeon_family family,
 			     enum ac_target_machine_options tm_options);
diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader_info.c mesa-19.0.1/src/amd/vulkan/radv_shader_info.c
--- mesa-18.3.3/src/amd/vulkan/radv_shader_info.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/radv_shader_info.c	2019-03-31 23:16:37.000000000 +0000
@@ -101,7 +101,7 @@
 	case MESA_SHADER_VERTEX: {
 		nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
 
-		if (var->data.mode == nir_var_shader_in) {
+		if (var && var->data.mode == nir_var_shader_in) {
 			unsigned idx = var->data.location;
 			uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
 
@@ -129,11 +129,9 @@
 
 	get_deref_offset(deref_instr, &const_offset);
 
-	if (idx == VARYING_SLOT_CLIP_DIST0) {
-		/* Special case for clip/cull distances because there are
-		 * combined into a single array that contains both.
-		 */
-		output_usage_mask[idx] |= 1 << const_offset;
+	if (var->data.compact) {
+		const_offset += comp;
+		output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
 		return;
 	}
 
@@ -150,7 +148,7 @@
 {
 	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
 
-	if (var->data.mode == nir_var_shader_out) {
+	if (var && var->data.mode == nir_var_shader_out) {
 		unsigned idx = var->data.location;
 
 		switch (nir->info.stage) {
@@ -174,13 +172,9 @@
 				type = glsl_get_array_element(var->type);
 
 			unsigned slots =
-				var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
+				var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
 						  : glsl_count_attribute_slots(type, false);
 
-			if (idx == VARYING_SLOT_CLIP_DIST0)
-				slots = (nir->info.clip_distance_array_size +
-					 nir->info.cull_distance_array_size > 4) ? 2 : 1;
-
 			mark_tess_output(info, var->data.patch, param, slots);
 			break;
 		}
@@ -270,15 +264,15 @@
 		}
 		mark_sampler_desc(var, info);
 
-		if (nir_intrinsic_image_deref_store ||
-		    nir_intrinsic_image_deref_atomic_add ||
-		    nir_intrinsic_image_deref_atomic_min ||
-		    nir_intrinsic_image_deref_atomic_max ||
-		    nir_intrinsic_image_deref_atomic_and ||
-		    nir_intrinsic_image_deref_atomic_or ||
-		    nir_intrinsic_image_deref_atomic_xor ||
-		    nir_intrinsic_image_deref_atomic_exchange ||
-		    nir_intrinsic_image_deref_atomic_comp_swap) {
+		if (instr->intrinsic == nir_intrinsic_image_deref_store ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_min ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_max ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
+		    instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
 			if (nir->info.stage == MESA_SHADER_FRAGMENT)
 				info->ps.writes_memory = true;
 		}
@@ -374,7 +368,8 @@
 		info->ps.layer_input = true;
 		break;
 	case VARYING_SLOT_CLIP_DIST0:
-		info->ps.num_input_clips_culls = attrib_count;
+	case VARYING_SLOT_CLIP_DIST1:
+		info->ps.num_input_clips_culls += attrib_count;
 		break;
 	default:
 		break;
@@ -409,8 +404,8 @@
 	int idx = var->data.location;
 	unsigned param = shader_io_get_unique_index(idx);
 	int num_slots = glsl_count_attribute_slots(var->type, false);
-	if (idx == VARYING_SLOT_CLIP_DIST0)
-		num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
+	if (var->data.compact)
+		num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
 	mark_ls_output(info, param, num_slots);
 }
 
@@ -512,8 +507,10 @@
 	struct nir_function *func =
 		(struct nir_function *)exec_list_get_head_const(&nir->functions);
 
-	if (options->layout && options->layout->dynamic_offset_count)
+	if (options->layout && options->layout->dynamic_offset_count &&
+	    (options->layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
 		info->loads_push_constants = true;
+	}
 
 	nir_foreach_variable(variable, &nir->inputs)
 		gather_info_input_decl(nir, variable, info);
diff -Nru mesa-18.3.3/src/amd/vulkan/si_cmd_buffer.c mesa-19.0.1/src/amd/vulkan/si_cmd_buffer.c
--- mesa-18.3.3/src/amd/vulkan/si_cmd_buffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/si_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -278,8 +278,7 @@
 		radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
 				  S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
 
-		if (physical_device->rad_info.num_good_compute_units /
-		    (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
+		if (physical_device->rad_info.num_good_cu_per_sh <= 4) {
 			/* Too few available compute units per SH. Disallowing
 			 * VS to run on CU0 could hurt us more than late VS
 			 * allocation would help.
@@ -306,9 +305,6 @@
 
 	if (physical_device->rad_info.chip_class >= VI) {
 		uint32_t vgt_tess_distribution;
-		radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
-				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
-				       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
 
 		vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
 			S_028B50_ACCUM_TRI(11) |
@@ -403,7 +399,8 @@
 						     RADEON_DOMAIN_GTT,
 						     RADEON_FLAG_CPU_ACCESS|
 						     RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						     RADEON_FLAG_READ_ONLY);
+						     RADEON_FLAG_READ_ONLY,
+						     RADV_BO_PRIORITY_CS);
 	if (!device->gfx_init)
 		goto fail;
 
@@ -664,7 +661,6 @@
 				unsigned event, unsigned event_flags,
 				unsigned data_sel,
 				uint64_t va,
-				uint32_t old_fence,
 				uint32_t new_fence,
 				uint64_t gfx9_eop_bug_va)
 {
@@ -711,7 +707,7 @@
 			radeon_emit(cs, op);
 			radeon_emit(cs, va);
 			radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
-			radeon_emit(cs, old_fence); /* immediate data */
+			radeon_emit(cs, 0); /* immediate data */
 			radeon_emit(cs, 0); /* unused */
 		}
 
@@ -725,12 +721,15 @@
 }
 
 void
-si_emit_wait_fence(struct radeon_cmdbuf *cs,
-		   uint64_t va, uint32_t ref,
-		   uint32_t mask)
+radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
+		 uint32_t ref, uint32_t mask)
 {
+	assert(op == WAIT_REG_MEM_EQUAL ||
+	       op == WAIT_REG_MEM_NOT_EQUAL ||
+	       op == WAIT_REG_MEM_GREATER_OR_EQUAL);
+
 	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
-	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+	radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, ref); /* reference value */
@@ -802,7 +801,7 @@
 							   V_028A90_FLUSH_AND_INV_CB_DATA_TS,
 							   0,
 							   EOP_DATA_SEL_DISCARD,
-							   0, 0, 0,
+							   0, 0,
 							   gfx9_eop_bug_va);
 			}
 		}
@@ -869,13 +868,14 @@
 					 RADV_CMD_FLAG_INV_VMEM_L1);
 		}
 		assert(flush_cnt);
-		uint32_t old_fence = (*flush_cnt)++;
+		(*flush_cnt)++;
 
 		si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
 					   EOP_DATA_SEL_VALUE_32BIT,
-					   flush_va, old_fence, *flush_cnt,
+					   flush_va, *flush_cnt,
 					   gfx9_eop_bug_va);
-		si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
+		radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va,
+				 *flush_cnt, 0xffffffff);
 	}
 
 	/* VGT state sync */
@@ -971,18 +971,12 @@
 	if (!cmd_buffer->state.flush_bits)
 		return;
 
-	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
 
-	uint32_t *ptr = NULL;
-	uint64_t va = 0;
-	if (chip_class == GFX9) {
-		va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset;
-		ptr = &cmd_buffer->gfx9_fence_idx;
-	}
 	si_cs_emit_cache_flush(cmd_buffer->cs,
 	                       cmd_buffer->device->physical_device->rad_info.chip_class,
-			       ptr, va,
+			       &cmd_buffer->gfx9_fence_idx,
+			       cmd_buffer->gfx9_fence_va,
 	                       radv_cmd_buffer_uses_mec(cmd_buffer),
 	                       cmd_buffer->state.flush_bits,
 			       cmd_buffer->gfx9_eop_bug_va);
@@ -992,6 +986,11 @@
 		radv_cmd_buffer_trace_emit(cmd_buffer);
 
 	cmd_buffer->state.flush_bits = 0;
+
+	/* If the driver used a compute shader for resetting a query pool, it
+	 * should be finished at this point.
+	 */
+	cmd_buffer->pending_reset_query = false;
 }
 
 /* sets the CP predication state using a boolean stored at va */
diff -Nru mesa-18.3.3/src/amd/vulkan/vk_format_table.py mesa-19.0.1/src/amd/vulkan/vk_format_table.py
--- mesa-18.3.3/src/amd/vulkan/vk_format_table.py	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/vk_format_table.py	2019-03-31 23:16:37.000000000 +0000
@@ -146,10 +146,6 @@
     print("const struct vk_format_description *")
     print("vk_format_description(VkFormat format)")
     print("{")
-    print("   if (format > VK_FORMAT_END_RANGE) {")
-    print("      return NULL;")
-    print("   }")
-    print()
     print("   switch (format) {")
     for format in formats:
         print("   case %s:" % format.name)
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c	2019-03-31 23:16:37.000000000 +0000
@@ -249,6 +249,7 @@
 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 {
 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+	struct radv_amdgpu_winsys *ws = bo->ws;
 
 	if (p_atomic_dec_return(&bo->ref_count))
 		return;
@@ -269,6 +270,17 @@
 				     0, AMDGPU_VA_OP_UNMAP);
 		amdgpu_bo_free(bo->bo);
 	}
+
+	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+		p_atomic_add(&ws->allocated_vram,
+			     -align64(bo->size, ws->info.gart_page_size));
+	if (bo->base.vram_cpu_access)
+		p_atomic_add(&ws->allocated_vram_vis,
+			     -align64(bo->size, ws->info.gart_page_size));
+	if (bo->initial_domain & RADEON_DOMAIN_GTT)
+		p_atomic_add(&ws->allocated_gtt,
+			     -align64(bo->size, ws->info.gart_page_size));
+
 	amdgpu_va_range_free(bo->va_handle);
 	FREE(bo);
 }
@@ -290,7 +302,8 @@
 			     uint64_t size,
 			     unsigned alignment,
 			     enum radeon_bo_domain initial_domain,
-			     unsigned flags)
+			     unsigned flags,
+			     unsigned priority)
 {
 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 	struct radv_amdgpu_winsys_bo *bo;
@@ -344,8 +357,10 @@
 	if (initial_domain & RADEON_DOMAIN_GTT)
 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
 
-	if (flags & RADEON_FLAG_CPU_ACCESS)
+	if (flags & RADEON_FLAG_CPU_ACCESS) {
+		bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
 		request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	}
 	if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 		request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 	if (flags & RADEON_FLAG_GTT_WC)
@@ -378,6 +393,21 @@
 	bo->bo = buf_handle;
 	bo->initial_domain = initial_domain;
 	bo->is_shared = false;
+	bo->priority = priority;
+
+	r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+	assert(!r);
+
+	if (initial_domain & RADEON_DOMAIN_VRAM)
+		p_atomic_add(&ws->allocated_vram,
+			     align64(bo->size, ws->info.gart_page_size));
+	if (bo->base.vram_cpu_access)
+		p_atomic_add(&ws->allocated_vram_vis,
+			     align64(bo->size, ws->info.gart_page_size));
+	if (initial_domain & RADEON_DOMAIN_GTT)
+		p_atomic_add(&ws->allocated_gtt,
+			     align64(bo->size, ws->info.gart_page_size));
+
 	radv_amdgpu_add_buffer_to_global_list(bo);
 	return (struct radeon_winsys_bo *)bo;
 error_va_map:
@@ -410,16 +440,40 @@
 	amdgpu_bo_cpu_unmap(bo->bo);
 }
 
+static uint64_t
+radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
+				     uint64_t size, unsigned alignment)
+{
+	uint64_t vm_alignment = alignment;
+
+	/* Increase the VM alignment for faster address translation. */
+	if (size >= ws->info.pte_fragment_size)
+		vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
+
+	/* Gfx9: Increase the VM alignment to the most significant bit set
+	 * in the size for faster address translation.
+	 */
+	if (ws->info.chip_class >= GFX9) {
+		unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
+		uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
+
+		vm_alignment = MAX2(vm_alignment, msb_alignment);
+	}
+	return vm_alignment;
+}
+
 static struct radeon_winsys_bo *
 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
                                void *pointer,
-                               uint64_t size)
+                               uint64_t size,
+			       unsigned priority)
 {
 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
 	amdgpu_bo_handle buf_handle;
 	struct radv_amdgpu_winsys_bo *bo;
 	uint64_t va;
 	amdgpu_va_handle va_handle;
+	uint64_t vm_alignment;
 
 	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
 	if (!bo)
@@ -428,8 +482,14 @@
 	if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
 		goto error;
 
+	/* Using the optimal VM alignment also fixes GPU hangs for buffers that
+	 * are imported.
+	 */
+	vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
+							    ws->info.gart_page_size);
+
 	if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-	                          size, 1 << 12, 0, &va, &va_handle,
+	                          size, vm_alignment, 0, &va, &va_handle,
 				  AMDGPU_VA_RANGE_HIGH))
 		goto error_va_alloc;
 
@@ -444,6 +504,13 @@
 	bo->ws = ws;
 	bo->bo = buf_handle;
 	bo->initial_domain = RADEON_DOMAIN_GTT;
+	bo->priority = priority;
+
+	MAYBE_UNUSED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+	assert(!r);
+
+	p_atomic_add(&ws->allocated_gtt,
+		     align64(bo->size, ws->info.gart_page_size));
 
 	radv_amdgpu_add_buffer_to_global_list(bo);
 	return (struct radeon_winsys_bo *)bo;
@@ -461,7 +528,8 @@
 
 static struct radeon_winsys_bo *
 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
-			      int fd, unsigned *stride,
+			      int fd, unsigned priority,
+			      unsigned *stride,
 			      unsigned *offset)
 {
 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -508,7 +576,19 @@
 	bo->size = result.alloc_size;
 	bo->is_shared = true;
 	bo->ws = ws;
+	bo->priority = priority;
 	bo->ref_count = 1;
+
+	r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+	assert(!r);
+
+	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+		p_atomic_add(&ws->allocated_vram,
+			     align64(bo->size, ws->info.gart_page_size));
+	if (bo->initial_domain & RADEON_DOMAIN_GTT)
+		p_atomic_add(&ws->allocated_gtt,
+			     align64(bo->size, ws->info.gart_page_size));
+
 	radv_amdgpu_add_buffer_to_global_list(bo);
 	return (struct radeon_winsys_bo *)bo;
 error_va_map:
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h	2017-11-07 20:47:52.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h	2019-03-31 23:16:37.000000000 +0000
@@ -45,6 +45,7 @@
 	uint64_t size;
 	struct radv_amdgpu_winsys *ws;
 	bool is_virtual;
+	uint8_t priority;
 	int ref_count;
 
 	union {
@@ -53,6 +54,7 @@
 			amdgpu_bo_handle bo;
 			enum radeon_bo_domain initial_domain;
 			bool is_shared;
+			uint32_t bo_handle;
 			struct list_head global_list_item;
 		};
 		/* virtual bo */
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -50,7 +50,7 @@
 	uint8_t                 *ib_mapped;
 	unsigned                    max_num_buffers;
 	unsigned                    num_buffers;
-	amdgpu_bo_handle            *handles;
+	struct drm_amdgpu_bo_list_entry *handles;
 
 	struct radeon_winsys_bo     **old_ib_buffers;
 	unsigned                    num_old_ib_buffers;
@@ -92,17 +92,71 @@
 	}
 }
 
+struct radv_amdgpu_cs_request {
+	/** Specify flags with additional information */
+	uint64_t flags;
+
+	/** Specify HW IP block type to which to send the IB. */
+	unsigned ip_type;
+
+	/** IP instance index if there are several IPs of the same type. */
+	unsigned ip_instance;
+
+	/**
+	 * Specify ring index of the IP. We could have several rings
+	 * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
+	 */
+	uint32_t ring;
+
+	/**
+	 * List handle with resources used by this request. This is a raw
+	 * bo list handle used by the kernel.
+	 */
+	uint32_t resources;
+
+	/**
+	 * Number of dependencies this Command submission needs to
+	 * wait for before starting execution.
+	 */
+	uint32_t number_of_dependencies;
+
+	/**
+	 * Array of dependencies which need to be met before
+	 * execution can start.
+	 */
+	struct amdgpu_cs_fence *dependencies;
+
+	/** Number of IBs to submit in the field ibs. */
+	uint32_t number_of_ibs;
+
+	/**
+	 * IBs to submit. Those IBs will be submit together as single entity
+	 */
+	struct amdgpu_cs_ib_info *ibs;
+
+	/**
+	 * The returned sequence number for the command submission
+	 */
+	uint64_t seq_no;
+
+	/**
+	 * The fence information
+	 */
+	struct amdgpu_cs_fence_info fence_info;
+};
+
+
 static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
 				   uint32_t ip_type,
 				   uint32_t ring,
 				   struct radv_winsys_sem_info *sem_info);
 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
-				 struct amdgpu_cs_request *request,
+				 struct radv_amdgpu_cs_request *request,
 				 struct radv_winsys_sem_info *sem_info);
 
 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
 					 struct radv_amdgpu_fence *fence,
-					 struct amdgpu_cs_request *req)
+					 struct radv_amdgpu_cs_request *req)
 {
 	fence->fence.context = ctx->ctx;
 	fence->fence.ip_type = req->ip_type;
@@ -243,7 +297,8 @@
 						  RADEON_DOMAIN_GTT,
 						  RADEON_FLAG_CPU_ACCESS |
 						  RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						  RADEON_FLAG_READ_ONLY);
+						  RADEON_FLAG_READ_ONLY,
+						  RADV_BO_PRIORITY_CS);
 		if (!cs->ib_buffer) {
 			free(cs);
 			return NULL;
@@ -295,15 +350,6 @@
 			/* The maximum size in dwords has been reached,
 			 * try to allocate a new one.
 			 */
-			if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
-				/* TODO: Allow to submit more than 4 IBs. */
-				fprintf(stderr, "amdgpu: Maximum number of IBs "
-						"per submit reached.\n");
-				cs->failed = true;
-				cs->base.cdw = 0;
-				return;
-			}
-
 			cs->old_cs_buffers =
 				realloc(cs->old_cs_buffers,
 				        (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
@@ -367,7 +413,8 @@
 						   RADEON_DOMAIN_GTT,
 						   RADEON_FLAG_CPU_ACCESS |
 						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						   RADEON_FLAG_READ_ONLY);
+						   RADEON_FLAG_READ_ONLY,
+						   RADV_BO_PRIORITY_CS);
 
 	if (!cs->ib_buffer) {
 		cs->base.cdw = 0;
@@ -421,8 +468,8 @@
 	cs->failed = false;
 
 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
-		unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
-		                 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+		unsigned hash = cs->handles[i].bo_handle &
+		                (ARRAY_SIZE(cs->buffer_hash_table) - 1);
 		cs->buffer_hash_table[hash] = -1;
 	}
 
@@ -457,19 +504,19 @@
 }
 
 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
-				      amdgpu_bo_handle bo)
+				      uint32_t bo)
 {
-	unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+	unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
 	int index = cs->buffer_hash_table[hash];
 
 	if (index == -1)
 		return -1;
 
-	if (cs->handles[index] == bo)
+	if (cs->handles[index].bo_handle == bo)
 		return index;
 
 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
-		if (cs->handles[i] == bo) {
+		if (cs->handles[i].bo_handle == bo) {
 			cs->buffer_hash_table[hash] = i;
 			return i;
 		}
@@ -479,7 +526,7 @@
 }
 
 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
-					       amdgpu_bo_handle bo)
+					       uint32_t bo, uint8_t priority)
 {
 	unsigned hash;
 	int index = radv_amdgpu_cs_find_buffer(cs, bo);
@@ -489,13 +536,14 @@
 
 	if (cs->num_buffers == cs->max_num_buffers) {
 		unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
-		cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
+		cs->handles = realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry));
 		cs->max_num_buffers = new_count;
 	}
 
-	cs->handles[cs->num_buffers] = bo;
+	cs->handles[cs->num_buffers].bo_handle = bo;
+	cs->handles[cs->num_buffers].bo_priority = priority;
 
-	hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+	hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
 	cs->buffer_hash_table[hash] = cs->num_buffers;
 
 	++cs->num_buffers;
@@ -553,7 +601,7 @@
 	if (bo->base.is_local)
 		return;
 
-	radv_amdgpu_cs_add_buffer_internal(cs, bo->bo);
+	radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
 }
 
 static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent,
@@ -563,7 +611,9 @@
 	struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
 
 	for (unsigned i = 0; i < child->num_buffers; ++i) {
-		radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i]);
+		radv_amdgpu_cs_add_buffer_internal(parent,
+		                                   child->handles[i].bo_handle,
+		                                   child->handles[i].bo_priority);
 	}
 
 	for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
@@ -594,13 +644,13 @@
 				      unsigned num_extra_bo,
 				      struct radeon_cmdbuf *extra_cs,
 				      const struct radv_winsys_bo_list *radv_bo_list,
-				      amdgpu_bo_list_handle *bo_list)
+				      uint32_t *bo_list)
 {
 	int r = 0;
 
 	if (ws->debug_all_bos) {
 		struct radv_amdgpu_winsys_bo *bo;
-		amdgpu_bo_handle *handles;
+		struct drm_amdgpu_bo_list_entry *handles;
 		unsigned num = 0;
 
 		pthread_mutex_lock(&ws->global_bo_list_lock);
@@ -613,12 +663,13 @@
 
 		LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
 			assert(num < ws->num_buffers);
-			handles[num++] = bo->bo;
+			handles[num].bo_handle = bo->bo_handle;
+			handles[num].bo_priority = bo->priority;
+			num++;
 		}
 
-		r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
-					  handles, NULL,
-					  bo_list);
+		r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers,
+					      handles, bo_list);
 		free(handles);
 		pthread_mutex_unlock(&ws->global_bo_list_lock);
 	} else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list &&
@@ -628,8 +679,8 @@
 			*bo_list = 0;
 			return 0;
 		}
-		r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
-					  NULL, bo_list);
+		r = amdgpu_bo_list_create_raw(ws->dev, cs->num_buffers, cs->handles,
+					      bo_list);
 	} else {
 		unsigned total_buffer_count = num_extra_bo;
 		unsigned unique_bo_count = num_extra_bo;
@@ -652,14 +703,15 @@
 			*bo_list = 0;
 			return 0;
 		}
-		amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
+		struct drm_amdgpu_bo_list_entry *handles = malloc(sizeof(struct drm_amdgpu_bo_list_entry) * total_buffer_count);
 		if (!handles) {
 			free(handles);
 			return -ENOMEM;
 		}
 
 		for (unsigned i = 0; i < num_extra_bo; i++) {
-			handles[i] = extra_bo_array[i]->bo;
+			handles[i].bo_handle = extra_bo_array[i]->bo_handle;
+			handles[i].bo_priority = extra_bo_array[i]->priority;
 		}
 
 		for (unsigned i = 0; i < count + !!extra_cs; ++i) {
@@ -674,7 +726,7 @@
 				continue;
 
 			if (unique_bo_count == 0 && !cs->num_virtual_buffers) {
-				memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
+				memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
 				unique_bo_count = cs->num_buffers;
 				continue;
 			}
@@ -682,7 +734,7 @@
 			for (unsigned j = 0; j < cs->num_buffers; ++j) {
 				bool found = false;
 				for (unsigned k = 0; k < unique_bo_so_far; ++k) {
-					if (handles[k] == cs->handles[j]) {
+					if (handles[k].bo_handle == cs->handles[j].bo_handle) {
 						found = true;
 						break;
 					}
@@ -698,13 +750,14 @@
 					struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
 					bool found = false;
 					for (unsigned m = 0; m < unique_bo_count; ++m) {
-						if (handles[m] == bo->bo) {
+						if (handles[m].bo_handle == bo->bo_handle) {
 							found = true;
 							break;
 						}
 					}
 					if (!found) {
-						handles[unique_bo_count] = bo->bo;
+						handles[unique_bo_count].bo_handle = bo->bo_handle;
+						handles[unique_bo_count].bo_priority = bo->priority;
 						++unique_bo_count;
 					}
 				}
@@ -717,21 +770,22 @@
 				struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(radv_bo_list->bos[i]);
 				bool found = false;
 				for (unsigned j = 0; j < unique_bo_so_far; ++j) {
-					if (bo->bo == handles[j]) {
+					if (bo->bo_handle == handles[j].bo_handle) {
 						found = true;
 						break;
 					}
 				}
 				if (!found) {
-					handles[unique_bo_count] = bo->bo;
+					handles[unique_bo_count].bo_handle = bo->bo_handle;
+					handles[unique_bo_count].bo_priority = bo->priority;
 					++unique_bo_count;
 				}
 			}
 		}
 
 		if (unique_bo_count > 0) {
-			r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
-						  NULL, bo_list);
+			r = amdgpu_bo_list_create_raw(ws->dev, unique_bo_count, handles,
+						      bo_list);
 		} else {
 			*bo_list = 0;
 		}
@@ -753,7 +807,7 @@
 }
 
 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
-				    struct amdgpu_cs_request *request)
+				    struct radv_amdgpu_cs_request *request)
 {
 	radv_amdgpu_request_to_fence(ctx,
 	                             &ctx->last_submission[request->ip_type][request->ring],
@@ -774,8 +828,8 @@
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
-	amdgpu_bo_list_handle bo_list;
-	struct amdgpu_cs_request request = {0};
+	uint32_t bo_list;
+	struct radv_amdgpu_cs_request request = {0};
 	struct amdgpu_cs_ib_info ibs[2];
 	unsigned number_of_ibs = 1;
 
@@ -837,8 +891,7 @@
 					"see dmesg for more information.\n");
 	}
 
-	if (bo_list)
-		amdgpu_bo_list_destroy(bo_list);
+	amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
 
 	if (r)
 		return r;
@@ -864,67 +917,72 @@
 	int r;
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
-	amdgpu_bo_list_handle bo_list;
-	struct amdgpu_cs_request request;
-	bool emit_signal_sem = sem_info->cs_emit_signal;
+	uint32_t bo_list;
+	struct radv_amdgpu_cs_request request = {};
+	struct amdgpu_cs_ib_info *ibs;
+	struct radv_amdgpu_cs *cs0;
+	unsigned number_of_ibs;
+
 	assert(cs_count);
+	cs0 = radv_amdgpu_cs(cs_array[0]);
 
-	for (unsigned i = 0; i < cs_count;) {
-		struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
-		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
-		struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
-		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
-		                    cs_count - i);
+	/* Compute the number of IBs for this submit. */
+	number_of_ibs = cs_count + !!initial_preamble_cs;
 
-		memset(&request, 0, sizeof(request));
+	/* Create a buffer object list. */
+	r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
+				       initial_preamble_cs, radv_bo_list,
+				       &bo_list);
+	if (r) {
+		fprintf(stderr, "amdgpu: buffer list creation failed "
+				"for the fallback submission (%d)\n", r);
+		return r;
+	}
 
-		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
-		                               preamble_cs, radv_bo_list, &bo_list);
-		if (r) {
-			fprintf(stderr, "amdgpu: buffer list creation failed "
-					"for the fallback submission (%d)\n", r);
-			return r;
-		}
+	ibs = malloc(number_of_ibs * sizeof(*ibs));
+	if (!ibs) {
+		amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
+		return -ENOMEM;
+	}
 
-		request.ip_type = cs0->hw_ip;
-		request.ring = queue_idx;
-		request.resources = bo_list;
-		request.number_of_ibs = cnt + !!preamble_cs;
-		request.ibs = ibs;
-		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
+	/* Configure the CS request. */
+	if (initial_preamble_cs)
+		ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
 
-		if (preamble_cs) {
-			ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
-		}
+	for (unsigned i = 0; i < cs_count; i++) {
+		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
 
-		for (unsigned j = 0; j < cnt; ++j) {
-			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
-			ibs[j + !!preamble_cs] = cs->ib;
+		ibs[i + !!initial_preamble_cs] = cs->ib;
 
-			if (cs->is_chained) {
-				*cs->ib_size_ptr -= 4;
-				cs->is_chained = false;
-			}
+		if (cs->is_chained) {
+			*cs->ib_size_ptr -= 4;
+			cs->is_chained = false;
 		}
+	}
 
-		sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
-		r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-		if (r) {
-			if (r == -ENOMEM)
-				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
-			else
-				fprintf(stderr, "amdgpu: The CS has been rejected, "
-						"see dmesg for more information.\n");
-		}
+	request.ip_type = cs0->hw_ip;
+	request.ring = queue_idx;
+	request.resources = bo_list;
+	request.number_of_ibs = number_of_ibs;
+	request.ibs = ibs;
+	request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
+
+	/* Submit the CS. */
+	r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+	if (r) {
+		if (r == -ENOMEM)
+			fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+		else
+			fprintf(stderr, "amdgpu: The CS has been rejected, "
+					"see dmesg for more information.\n");
+	}
 
-		if (bo_list)
-			amdgpu_bo_list_destroy(bo_list);
+	amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
+	free(ibs);
 
-		if (r)
-			return r;
+	if (r)
+		return r;
 
-		i += cnt;
-	}
 	if (fence)
 		radv_amdgpu_request_to_fence(ctx, fence, &request);
 
@@ -948,8 +1006,8 @@
 	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
 	struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
-	amdgpu_bo_list_handle bo_list;
-	struct amdgpu_cs_request request;
+	uint32_t bo_list;
+	struct radv_amdgpu_cs_request request;
 	uint32_t pad_word = 0xffff1000U;
 	bool emit_signal_sem = sem_info->cs_emit_signal;
 
@@ -959,30 +1017,46 @@
 	assert(cs_count);
 
 	for (unsigned i = 0; i < cs_count;) {
-		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
-		unsigned number_of_ibs = 1;
-		struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
+		struct amdgpu_cs_ib_info *ibs;
+		struct radeon_winsys_bo **bos;
 		struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
 		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+		unsigned number_of_ibs;
 		uint32_t *ptr;
 		unsigned cnt = 0;
 		unsigned size = 0;
 		unsigned pad_words = 0;
 
-		if (cs->num_old_cs_buffers > 0) {
+		/* Compute the number of IBs for this submit. */
+		number_of_ibs = cs->num_old_cs_buffers + 1;
+
+		ibs = malloc(number_of_ibs * sizeof(*ibs));
+		if (!ibs)
+			return -ENOMEM;
+
+		bos = malloc(number_of_ibs * sizeof(*bos));
+		if (!bos) {
+			free(ibs);
+			return -ENOMEM;
+		}
+
+		if (number_of_ibs > 1) {
 			/* Special path when the maximum size in dwords has
 			 * been reached because we need to handle more than one
 			 * IB per submit.
 			 */
-			unsigned new_cs_count = cs->num_old_cs_buffers + 1;
-			struct radeon_cmdbuf *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
+			struct radeon_cmdbuf **new_cs_array;
 			unsigned idx = 0;
 
+			new_cs_array = malloc(cs->num_old_cs_buffers *
+					      sizeof(*new_cs_array));
+			assert(new_cs_array);
+
 			for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
 				new_cs_array[idx++] = &cs->old_cs_buffers[j];
 			new_cs_array[idx++] = cs_array[i];
 
-			for (unsigned j = 0; j < new_cs_count; j++) {
+			for (unsigned j = 0; j < number_of_ibs; j++) {
 				struct radeon_cmdbuf *rcs = new_cs_array[j];
 				bool needs_preamble = preamble_cs && j == 0;
 				unsigned size = 0;
@@ -1002,7 +1076,8 @@
 							   RADEON_DOMAIN_GTT,
 							   RADEON_FLAG_CPU_ACCESS |
 							   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-							   RADEON_FLAG_READ_ONLY);
+							   RADEON_FLAG_READ_ONLY,
+							   RADV_BO_PRIORITY_CS);
 				ptr = ws->buffer_map(bos[j]);
 
 				if (needs_preamble) {
@@ -1020,8 +1095,8 @@
 				ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
 			}
 
-			number_of_ibs = new_cs_count;
 			cnt++;
+			free(new_cs_array);
 		} else {
 			if (preamble_cs)
 				size += preamble_cs->cdw;
@@ -1041,7 +1116,8 @@
 						   RADEON_DOMAIN_GTT,
 						   RADEON_FLAG_CPU_ACCESS |
 						   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-						   RADEON_FLAG_READ_ONLY);
+						   RADEON_FLAG_READ_ONLY,
+						   RADV_BO_PRIORITY_CS);
 			ptr = ws->buffer_map(bos[0]);
 
 			if (preamble_cs) {
@@ -1070,6 +1146,8 @@
 		if (r) {
 			fprintf(stderr, "amdgpu: buffer list creation failed "
 					"for the sysmem submission (%d)\n", r);
+			free(ibs);
+			free(bos);
 			return r;
 		}
 
@@ -1092,13 +1170,15 @@
 						"see dmesg for more information.\n");
 		}
 
-		if (bo_list)
-			amdgpu_bo_list_destroy(bo_list);
+		amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
 
 		for (unsigned j = 0; j < number_of_ibs; j++) {
 			ws->buffer_destroy(bos[j]);
 		}
 
+		free(ibs);
+		free(bos);
+
 		if (r)
 			return r;
 
@@ -1131,7 +1211,7 @@
 	if (!cs->ws->use_ib_bos) {
 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
 							   cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
-	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
+	} else if (can_patch && cs->ws->batchchain) {
 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
 							    cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
 	} else {
@@ -1230,8 +1310,9 @@
 	assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
 	ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
 	                                      RADEON_DOMAIN_GTT,
-	                                      RADEON_FLAG_CPU_ACCESS|
-					       RADEON_FLAG_NO_INTERPROCESS_SHARING);
+	                                      RADEON_FLAG_CPU_ACCESS |
+					      RADEON_FLAG_NO_INTERPROCESS_SHARING,
+					      RADV_BO_PRIORITY_CS);
 	if (ctx->fence_bo)
 		ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
 	if (ctx->fence_map)
@@ -1318,7 +1399,7 @@
 }
 
 static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
-				 struct amdgpu_cs_request *request,
+				 struct radv_amdgpu_cs_request *request,
 				 struct radv_winsys_sem_info *sem_info)
 {
 	int r;
@@ -1420,7 +1501,7 @@
 		num_chunks++;
 	}
 
-	r = amdgpu_cs_submit_raw(ctx->ws->dev,
+	r = amdgpu_cs_submit_raw2(ctx->ws->dev,
 				 ctx->ctx,
 				 request->resources,
 				 num_chunks,
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,7 +29,6 @@
 #include <errno.h>
 
 #include "radv_private.h"
-#include "addrlib/addrinterface.h"
 #include "util/bitset.h"
 #include "radv_amdgpu_winsys.h"
 #include "radv_amdgpu_surface.h"
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,12 @@
 	uint64_t retval = 0;
 
 	switch (value) {
+	case RADEON_ALLOCATED_VRAM:
+		return ws->allocated_vram;
+	case RADEON_ALLOCATED_VRAM_VIS:
+		return ws->allocated_vram_vis;
+	case RADEON_ALLOCATED_GTT:
+		return ws->allocated_gtt;
 	case RADEON_TIMESTAMP:
 		amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
 		return retval;
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h	2018-07-29 21:30:58.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -30,7 +30,7 @@
 
 #include "radv_radeon_winsys.h"
 #include "ac_gpu_info.h"
-#include "addrlib/addrinterface.h"
+#include "addrlib/inc/addrinterface.h"
 #include <amdgpu.h>
 #include "util/list.h"
 #include <pthread.h>
@@ -52,6 +52,10 @@
 
 	pthread_mutex_t global_bo_list_lock;
 	struct list_head global_bo_list;
+
+	uint64_t allocated_vram;
+	uint64_t allocated_vram_vis;
+	uint64_t allocated_gtt;
 };
 
 static inline struct radv_amdgpu_winsys *
diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
--- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h	2019-03-31 23:16:37.000000000 +0000
@@ -29,6 +29,13 @@
 #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
 #define RADV_AMDGPU_WINSYS_PUBLIC_H
 
+/* The number of IBs per submit isn't infinite, it depends on the ring type
+ * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
+ * This limit is arbitrary but should be safe for now.  Ideally, we should get
+ * this limit from the KMD.
+*/
+#define RADV_MAX_IBS_PER_SUBMIT 192
+
 struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
 						uint64_t perftest_flags);
 
diff -Nru mesa-18.3.3/src/broadcom/cle/v3d_packet_v33.xml mesa-19.0.1/src/broadcom/cle/v3d_packet_v33.xml
--- mesa-18.3.3/src/broadcom/cle/v3d_packet_v33.xml	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/cle/v3d_packet_v33.xml	2019-03-31 23:16:37.000000000 +0000
@@ -99,12 +99,12 @@
     <value name="Follows" value="7"/>
   </enum>
 
-  <enum name="Wrap Mode" prefix="V3D_WRAP_MODE" min_ver="41">
-      <value name="Wrap mode REPEAT" value="0"/>
-      <value name="Wrap mode CLAMP" value="1"/>
-      <value name="Wrap mode MIRROR" value="2"/>
-      <value name="Wrap mode BORDER" value="3"/>
-      <value name="Wrap mode MIRROR_ONCE" value="4"/>
+  <enum name="Wrap Mode" prefix="V3D_WRAP_MODE">
+      <value name="REPEAT" value="0"/>
+      <value name="CLAMP" value="1"/>
+      <value name="MIRROR" value="2"/>
+      <value name="BORDER" value="3"/>
+      <value name="MIRROR_ONCE" value="4"/>
   </enum>
 
   <enum name="TMU Op" prefix="V3D_TMU_OP" min_ver="41">
@@ -174,6 +174,16 @@
     <value name="int" value="3" min_ver="42"/> <!-- clamp to integer RT's range -->
   </enum>
 
+  <enum name="L2T Flush Mode" prefix="L2T_FLUSH_MODE">
+    <!-- invalidates all cache lines -->
+    <value name="flush" value="0"/>
+    <!-- Invalidates dirty cachelines without writeback -->
+    <value name="clear" value="1"/>
+    <!-- Writes back dirty cachelines and marks them clean, without
+         invalidating -->
+    <value name="clean" value="2"/>
+  </enum>
+
   <enum name="Output Image Format" prefix="V3D_OUTPUT_IMAGE_FORMAT">
     <!--
 	Formats appear with their channels named from the low bits to
@@ -457,6 +467,37 @@
     <field name="mode" size="6" start="0" type="Primitive"/>
   </packet>
 
+  <packet code="33" name="Indirect Indexed Instanced Prim List" cl="B" max_ver="33">
+    <field name="Stride in Multiples of 4 Bytes" size="8" start="104" type="uint"/>
+    <field name="Address of Indices List" size="32" start="72" type="address"/>
+    <field name="Address" size="32" start="40" type="address"/>
+    <field name="Enable Primitive Restarts" size="1" start="39" type="bool"/>
+    <field name="Number of Draw Indirect Indexed Records" size="31" start="8" type="uint"/>
+
+    <field name="Index type" size="2" start="6" type="uint">
+      <value name="Index type 8-bit" value="0"/>
+      <value name="Index type 16-bit" value="1"/>
+      <value name="Index type 32-bit" value="2"/>
+    </field>
+
+    <field name="mode" size="6" start="0" type="Primitive"/>
+  </packet>
+
+  <packet code="33" name="Indirect Indexed Instanced Prim List" cl="B" min_ver="41">
+    <field name="Stride in Multiples of 4 Bytes" size="8" start="72" type="uint"/>
+    <field name="Address" size="32" start="40" type="address"/>
+    <field name="Enable Primitive Restarts" size="1" start="39" type="bool"/>
+    <field name="Number of Draw Indirect Indexed Records" size="31" start="8" type="uint"/>
+
+    <field name="Index type" size="2" start="6" type="uint">
+      <value name="Index type 8-bit" value="0"/>
+      <value name="Index type 16-bit" value="1"/>
+      <value name="Index type 32-bit" value="2"/>
+    </field>
+
+    <field name="mode" size="6" start="0" type="Primitive"/>
+  </packet>
+
   <packet code="34" name="Indexed Instanced Prim List" cl="B" max_ver="33">
     <field name="Enable Primitive Restarts" size="1" start="135" type="bool"/>
     <field name="Maximum index" size="31" start="104" type="uint"/>
@@ -495,6 +536,14 @@
     <field name="mode" size="8" start="0" type="Primitive"/>
   </packet>
 
+  <packet code="37" name="Indirect Vertex Array Instanced Prims" cl="B">
+    <field name="Stride in Multiples of 4 Bytes" size="8" start="72" type="uint"/>
+    <field name="Address" size="32" start="40" type="address"/>
+    <field name="Number of Draw Indirect Array Records" size="32" start="8" type="uint"/>
+
+    <field name="mode" size="8" start="0" type="Primitive"/>
+  </packet>
+
   <packet code="38" name="Vertex Array Instanced Prims" cl="B">
     <field name="Index of First Vertex" size="32" start="72" type="uint"/>
     <field name="Number of Instances" size="32" start="40" type="uint"/>
@@ -556,6 +605,19 @@
 
   <packet code="75" name="Flush Transform Feedback Data"/>
 
+  <packet code="76" name="L1 Cache Flush Control">
+    <field name="TMU Config Cache Clear" size="4" start="12" type="uint"/>
+    <field name="TMU Data Cache Clear" size="4" start="8" type="uint"/>
+    <field name="Uniforms Cache Clear" size="4" start="4" type="uint"/>
+    <field name="Instruction Cache Clear" size="4" start="0" type="uint"/>
+  </packet>
+
+  <packet code="77" name="L2T Cache Flush Control">
+    <field name="L2T Flush Mode" size="4" start="64" type="L2T Flush Mode"/>
+    <field name="L2T Flush End" size="32" start="32" type="address"/>
+    <field name="L2T Flush Start" size="32" start="0" type="address"/>
+  </packet>
+
   <struct name="Transform Feedback Output Data Spec" max_ver="33">
     <field name="First Shaded Vertex Value to output" size="8" start="0" type="uint"/>
     <field name="Number of consecutive Vertex Values to output as 32-bit values" size="4" start="8" type="uint" minus_one="true"/>
@@ -1230,29 +1292,9 @@
     <field name="Texel offset for t coordinate" size="4" start="23" type="int"/>
     <field name="Texel offset for s coordinate" size="4" start="19" type="int"/>
 
-    <field name="R Wrap Mode" size="3" start="16" type="uint">
-      <value name="Wrap mode REPEAT" value="0"/>
-      <value name="Wrap mode CLAMP" value="1"/>
-      <value name="Wrap mode MIRROR" value="2"/>
-      <value name="Wrap mode BORDER" value="3"/>
-      <value name="Wrap mode MIRROR_ONCE" value="4"/>
-    </field>
-
-    <field name="T Wrap Mode" size="3" start="13" type="uint">
-      <value name="Wrap mode REPEAT" value="0"/>
-      <value name="Wrap mode CLAMP" value="1"/>
-      <value name="Wrap mode MIRROR" value="2"/>
-      <value name="Wrap mode BORDER" value="3"/>
-      <value name="Wrap mode MIRROR_ONCE" value="4"/>
-    </field>
-
-    <field name="S Wrap Mode" size="3" start="10" type="uint">
-      <value name="Wrap mode REPEAT" value="0"/>
-      <value name="Wrap mode CLAMP" value="1"/>
-      <value name="Wrap mode MIRROR" value="2"/>
-      <value name="Wrap mode BORDER" value="3"/>
-      <value name="Wrap mode MIRROR_ONCE" value="4"/>
-    </field>
+    <field name="R Wrap Mode" size="3" start="16" type="Wrap Mode"/>
+    <field name="T Wrap Mode" size="3" start="13" type="Wrap Mode"/>
+    <field name="S Wrap Mode" size="3" start="10" type="Wrap Mode"/>
 
     <field name="New configuration mode" size="1" start="9" type="bool" default="1"/>
 
@@ -1415,10 +1457,10 @@
   </struct>
 
   <struct name="Sampler State" min_ver="41">
-    <field name="Border color Alpha" size="32" start="160" type="uint"/>
-    <field name="Border color Blue" size="32" start="128" type="uint"/>
-    <field name="Border color Green" size="32" start="96" type="uint"/>
-    <field name="Border color Red" size="32" start="64" type="uint"/>
+    <field name="Border color word 3" size="32" start="160" type="uint"/>
+    <field name="Border color word 2" size="32" start="128" type="uint"/>
+    <field name="Border color word 1" size="32" start="96" type="uint"/>
+    <field name="Border color word 0" size="32" start="64" type="uint"/>
 
     <field name="Maximum Anisotropy" size="2" start="61" type="uint"/>
     <field name="Border Color Mode" size="3" start="58" type="Border Color Mode"/>
diff -Nru mesa-18.3.3/src/broadcom/common/v3d_cpu_tiling.h mesa-19.0.1/src/broadcom/common/v3d_cpu_tiling.h
--- mesa-18.3.3/src/broadcom/common/v3d_cpu_tiling.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/broadcom/common/v3d_cpu_tiling.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,242 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file v3d_cpu_tiling.h
+ *
+ * Contains load/store functions common to both v3d and vc4.  The utile layout
+ * stayed the same, though the way utiles get laid out has changed.
+ */
+
+static inline void
+v3d_load_utile(void *cpu, uint32_t cpu_stride,
+               void *gpu, uint32_t gpu_stride)
+{
+#if defined(V3D_BUILD_NEON) && defined(PIPE_ARCH_ARM)
+        if (gpu_stride == 8) {
+                __asm__ volatile (
+                        /* Load from the GPU in one shot, no interleave, to
+                         * d0-d7.
+                         */
+                        "vldm %[gpu], {q0, q1, q2, q3}\n"
+                        /* Store each 8-byte line to cpu-side destination,
+                         * incrementing it by the stride each time.
+                         */
+                        "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d1, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d3, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d5, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d6, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d7, [%[cpu]]\n"
+                        : [cpu]         "+r"(cpu)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "q0", "q1", "q2", "q3");
+                return;
+        } else if (gpu_stride == 16) {
+                void *cpu2 = cpu + 8;
+                __asm__ volatile (
+                        /* Load from the GPU in one shot, no interleave, to
+                         * d0-d7.
+                         */
+                        "vldm %[gpu], {q0, q1, q2, q3};\n"
+                        /* Store each 16-byte line in 2 parts to the cpu-side
+                         * destination.  (vld1 can only store one d-register
+                         * at a time).
+                         */
+                        "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d1, [%[cpu2]],%[cpu_stride]\n"
+                        "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d3, [%[cpu2]],%[cpu_stride]\n"
+                        "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
+                        "vst1.8 d5, [%[cpu2]],%[cpu_stride]\n"
+                        "vst1.8 d6, [%[cpu]]\n"
+                        "vst1.8 d7, [%[cpu2]]\n"
+                        : [cpu]         "+r"(cpu),
+                          [cpu2]        "+r"(cpu2)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "q0", "q1", "q2", "q3");
+                return;
+        }
+#elif defined (PIPE_ARCH_AARCH64)
+        if (gpu_stride == 8) {
+                __asm__ volatile (
+                        /* Load from the GPU in one shot, no interleave, to
+                         * d0-d7.
+                         */
+                        "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
+                        /* Store each 8-byte line to cpu-side destination,
+                         * incrementing it by the stride each time.
+                         */
+                        "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v3.D}[1], [%[cpu]]\n"
+                        : [cpu]         "+r"(cpu)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "v0", "v1", "v2", "v3");
+                return;
+        } else if (gpu_stride == 16) {
+                void *cpu2 = cpu + 8;
+                __asm__ volatile (
+                        /* Load from the GPU in one shot, no interleave, to
+                         * d0-d7.
+                         */
+                        "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
+                        /* Store each 16-byte line in 2 parts to the cpu-side
+                         * destination.  (vld1 can only store one d-register
+                         * at a time).
+                         */
+                        "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "st1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "st1 {v3.D}[0], [%[cpu]]\n"
+                        "st1 {v3.D}[1], [%[cpu2]]\n"
+                        : [cpu]         "+r"(cpu),
+                          [cpu2]        "+r"(cpu2)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "v0", "v1", "v2", "v3");
+                return;
+        }
+#endif
+
+        for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
+                memcpy(cpu, gpu + gpu_offset, gpu_stride);
+                cpu += cpu_stride;
+        }
+}
+
+static inline void
+v3d_store_utile(void *gpu, uint32_t gpu_stride,
+                void *cpu, uint32_t cpu_stride)
+{
+#if defined(V3D_BUILD_NEON) && defined(PIPE_ARCH_ARM)
+        if (gpu_stride == 8) {
+                __asm__ volatile (
+                        /* Load each 8-byte line from cpu-side source,
+                         * incrementing it by the stride each time.
+                         */
+                        "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d1, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d3, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d5, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d6, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d7, [%[cpu]]\n"
+                        /* Load from the GPU in one shot, no interleave, to
+                         * d0-d7.
+                         */
+                        "vstm %[gpu], {q0, q1, q2, q3}\n"
+                        : [cpu]         "+r"(cpu)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "q0", "q1", "q2", "q3");
+                return;
+        } else if (gpu_stride == 16) {
+                void *cpu2 = cpu + 8;
+                __asm__ volatile (
+                        /* Load each 16-byte line in 2 parts from the cpu-side
+                         * destination.  (vld1 can only store one d-register
+                         * at a time).
+                         */
+                        "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d1, [%[cpu2]],%[cpu_stride]\n"
+                        "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d3, [%[cpu2]],%[cpu_stride]\n"
+                        "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
+                        "vld1.8 d5, [%[cpu2]],%[cpu_stride]\n"
+                        "vld1.8 d6, [%[cpu]]\n"
+                        "vld1.8 d7, [%[cpu2]]\n"
+                        /* Store to the GPU in one shot, no interleave. */
+                        "vstm %[gpu], {q0, q1, q2, q3}\n"
+                        : [cpu]         "+r"(cpu),
+                          [cpu2]        "+r"(cpu2)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "q0", "q1", "q2", "q3");
+                return;
+        }
+#elif defined (PIPE_ARCH_AARCH64)
+        if (gpu_stride == 8) {
+                __asm__ volatile (
+                        /* Load each 8-byte line from cpu-side source,
+                         * incrementing it by the stride each time.
+                         */
+                        "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v3.D}[1], [%[cpu]]\n"
+                        /* Store to the GPU in one shot, no interleave. */
+                        "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
+                        : [cpu]         "+r"(cpu)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "v0", "v1", "v2", "v3");
+                return;
+        } else if (gpu_stride == 16) {
+                void *cpu2 = cpu + 8;
+                __asm__ volatile (
+                        /* Load each 16-byte line in 2 parts from the cpu-side
+                         * destination.  (vld1 can only store one d-register
+                         * at a time).
+                         */
+                        "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
+                        "ld1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
+                        "ld1 {v3.D}[0], [%[cpu]]\n"
+                        "ld1 {v3.D}[1], [%[cpu2]]\n"
+                        /* Store to the GPU in one shot, no interleave. */
+                        "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
+                        : [cpu]         "+r"(cpu),
+                          [cpu2]        "+r"(cpu2)
+                        : [gpu]         "r"(gpu),
+                          [cpu_stride]  "r"(cpu_stride)
+                        : "v0", "v1", "v2", "v3");
+                return;
+        }
+#endif
+
+        for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
+                memcpy(gpu + gpu_offset, cpu, gpu_stride);
+                cpu += cpu_stride;
+        }
+}
diff -Nru mesa-18.3.3/src/broadcom/common/v3d_debug.c mesa-19.0.1/src/broadcom/common/v3d_debug.c
--- mesa-18.3.3/src/broadcom/common/v3d_debug.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/common/v3d_debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -54,6 +54,7 @@
         { "vs",          V3D_DEBUG_VS},
         { "cs",          V3D_DEBUG_CS},
         { "always_flush", V3D_DEBUG_ALWAYS_FLUSH},
+        { "precompile",  V3D_DEBUG_PRECOMPILE},
         { NULL,    0 }
 };
 
diff -Nru mesa-18.3.3/src/broadcom/common/v3d_debug.h mesa-19.0.1/src/broadcom/common/v3d_debug.h
--- mesa-18.3.3/src/broadcom/common/v3d_debug.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/common/v3d_debug.h	2019-03-31 23:16:37.000000000 +0000
@@ -55,6 +55,7 @@
 #define V3D_DEBUG_NORAST		(1 << 11)
 #define V3D_DEBUG_ALWAYS_FLUSH		(1 << 12)
 #define V3D_DEBUG_CLIF			(1 << 13)
+#define V3D_DEBUG_PRECOMPILE		(1 << 14)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "BROADCOM-MESA"
diff -Nru mesa-18.3.3/src/broadcom/common/v3d_limits.h mesa-19.0.1/src/broadcom/common/v3d_limits.h
--- mesa-18.3.3/src/broadcom/common/v3d_limits.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/broadcom/common/v3d_limits.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2019 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef V3D_LIMITS_H
+#define V3D_LIMITS_H
+
+#define V3D_MAX_FS_INPUTS 64
+#define V3D_MAX_VS_INPUTS 64
+
+/* Not specifically a hardware limit, just coordination between compiler and
+ * driver.
+ */
+#define V3D_MAX_TEXTURE_SAMPLERS 16
+
+#define V3D_MAX_MIP_LEVELS 12
+
+#define V3D_MAX_SAMPLES 4
+
+#define V3D_MAX_DRAW_BUFFERS 4
+
+#endif /* V3D_LIMITS_H */
diff -Nru mesa-18.3.3/src/broadcom/compiler/meson.build mesa-19.0.1/src/broadcom/compiler/meson.build
--- mesa-18.3.3/src/broadcom/compiler/meson.build	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -36,6 +36,7 @@
   'v3d33_vpm_setup.c',
   'v3d_compiler.h',
   'v3d_nir_lower_io.c',
+  'v3d_nir_lower_image_load_store.c',
   'v3d_nir_lower_txf_ms.c',
 )
 
diff -Nru mesa-18.3.3/src/broadcom/compiler/nir_to_vir.c mesa-19.0.1/src/broadcom/compiler/nir_to_vir.c
--- mesa-18.3.3/src/broadcom/compiler/nir_to_vir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/nir_to_vir.c	2019-03-31 23:16:37.000000000 +0000
@@ -32,6 +32,57 @@
 #include "common/v3d_device_info.h"
 #include "v3d_compiler.h"
 
+#define GENERAL_TMU_LOOKUP_PER_QUAD                 (0 << 7)
+#define GENERAL_TMU_LOOKUP_PER_PIXEL                (1 << 7)
+#define GENERAL_TMU_READ_OP_PREFETCH                (0 << 3)
+#define GENERAL_TMU_READ_OP_CACHE_CLEAR             (1 << 3)
+#define GENERAL_TMU_READ_OP_CACHE_FLUSH             (3 << 3)
+#define GENERAL_TMU_READ_OP_CACHE_CLEAN             (3 << 3)
+#define GENERAL_TMU_READ_OP_CACHE_L1T_CLEAR         (4 << 3)
+#define GENERAL_TMU_READ_OP_CACHE_L1T_FLUSH_AGGREGATION (5 << 3)
+#define GENERAL_TMU_READ_OP_ATOMIC_INC              (8 << 3)
+#define GENERAL_TMU_READ_OP_ATOMIC_DEC              (9 << 3)
+#define GENERAL_TMU_READ_OP_ATOMIC_NOT              (10 << 3)
+#define GENERAL_TMU_READ_OP_READ                    (15 << 3)
+#define GENERAL_TMU_LOOKUP_TYPE_8BIT_I              (0 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_16BIT_I             (1 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_VEC2                (2 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_VEC3                (3 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_VEC4                (4 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_8BIT_UI             (5 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_16BIT_UI            (6 << 0)
+#define GENERAL_TMU_LOOKUP_TYPE_32BIT_UI            (7 << 0)
+
+#define GENERAL_TMU_WRITE_OP_ATOMIC_ADD_WRAP         (0 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_SUB_WRAP         (1 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_XCHG             (2 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_CMPXCHG          (3 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_UMIN             (4 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_UMAX             (5 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_SMIN             (6 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_SMAX             (7 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_AND              (8 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_OR               (9 << 3)
+#define GENERAL_TMU_WRITE_OP_ATOMIC_XOR              (10 << 3)
+#define GENERAL_TMU_WRITE_OP_WRITE                   (15 << 3)
+
+#define V3D_TSY_SET_QUORUM          0
+#define V3D_TSY_INC_WAITERS         1
+#define V3D_TSY_DEC_WAITERS         2
+#define V3D_TSY_INC_QUORUM          3
+#define V3D_TSY_DEC_QUORUM          4
+#define V3D_TSY_FREE_ALL            5
+#define V3D_TSY_RELEASE             6
+#define V3D_TSY_ACQUIRE             7
+#define V3D_TSY_WAIT                8
+#define V3D_TSY_WAIT_INC            9
+#define V3D_TSY_WAIT_CHECK          10
+#define V3D_TSY_WAIT_INC_CHECK      11
+#define V3D_TSY_WAIT_CV             12
+#define V3D_TSY_INC_SEMAPHORE       13
+#define V3D_TSY_DEC_SEMAPHORE       14
+#define V3D_TSY_SET_QUORUM_FREE_ALL 15
+
 static void
 ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
 
@@ -70,50 +121,203 @@
          */
         c->last_thrsw = vir_NOP(c);
         c->last_thrsw->qpu.sig.thrsw = true;
-        c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
+        c->last_thrsw_at_top_level = !c->in_control_flow;
 }
 
-static struct qreg
-indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr)
+static uint32_t
+v3d_general_tmu_op(nir_intrinsic_instr *instr)
 {
-        struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
-        uint32_t offset = nir_intrinsic_base(intr);
-        struct v3d_ubo_range *range = NULL;
-        unsigned i;
-
-        for (i = 0; i < c->num_ubo_ranges; i++) {
-                range = &c->ubo_ranges[i];
-                if (offset >= range->src_offset &&
-                    offset < range->src_offset + range->size) {
-                        break;
+        switch (instr->intrinsic) {
+        case nir_intrinsic_load_ssbo:
+        case nir_intrinsic_load_ubo:
+        case nir_intrinsic_load_uniform:
+        case nir_intrinsic_load_shared:
+                return GENERAL_TMU_READ_OP_READ;
+        case nir_intrinsic_store_ssbo:
+        case nir_intrinsic_store_shared:
+                return GENERAL_TMU_WRITE_OP_WRITE;
+        case nir_intrinsic_ssbo_atomic_add:
+        case nir_intrinsic_shared_atomic_add:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_ADD_WRAP;
+        case nir_intrinsic_ssbo_atomic_imin:
+        case nir_intrinsic_shared_atomic_imin:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_SMIN;
+        case nir_intrinsic_ssbo_atomic_umin:
+        case nir_intrinsic_shared_atomic_umin:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_UMIN;
+        case nir_intrinsic_ssbo_atomic_imax:
+        case nir_intrinsic_shared_atomic_imax:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_SMAX;
+        case nir_intrinsic_ssbo_atomic_umax:
+        case nir_intrinsic_shared_atomic_umax:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_UMAX;
+        case nir_intrinsic_ssbo_atomic_and:
+        case nir_intrinsic_shared_atomic_and:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_AND;
+        case nir_intrinsic_ssbo_atomic_or:
+        case nir_intrinsic_shared_atomic_or:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_OR;
+        case nir_intrinsic_ssbo_atomic_xor:
+        case nir_intrinsic_shared_atomic_xor:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_XOR;
+        case nir_intrinsic_ssbo_atomic_exchange:
+        case nir_intrinsic_shared_atomic_exchange:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_XCHG;
+        case nir_intrinsic_ssbo_atomic_comp_swap:
+        case nir_intrinsic_shared_atomic_comp_swap:
+                return GENERAL_TMU_WRITE_OP_ATOMIC_CMPXCHG;
+        default:
+                unreachable("unknown intrinsic op");
+        }
+}
+
+/**
+ * Implements indirect uniform loads and SSBO accesses through the TMU general
+ * memory access interface.
+ */
+static void
+ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
+                     bool is_shared)
+{
+        /* XXX perf: We should turn add/sub of 1 to inc/dec.  Perhaps NIR
+         * wants to have support for inc/dec?
+         */
+
+        uint32_t tmu_op = v3d_general_tmu_op(instr);
+        bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
+                         instr->intrinsic == nir_intrinsic_store_shared);
+        bool has_index = !is_shared;
+
+        int offset_src;
+        int tmu_writes = 1; /* address */
+        if (instr->intrinsic == nir_intrinsic_load_uniform) {
+                offset_src = 0;
+        } else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
+                   instr->intrinsic == nir_intrinsic_load_ubo ||
+                   instr->intrinsic == nir_intrinsic_load_shared) {
+                offset_src = 0 + has_index;
+        } else if (is_store) {
+                offset_src = 1 + has_index;
+                for (int i = 0; i < instr->num_components; i++) {
+                        vir_MOV_dest(c,
+                                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+                                     ntq_get_src(c, instr->src[0], i));
+                        tmu_writes++;
+                }
+        } else {
+                offset_src = 0 + has_index;
+                vir_MOV_dest(c,
+                             vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+                             ntq_get_src(c, instr->src[1 + has_index], 0));
+                tmu_writes++;
+                if (tmu_op == GENERAL_TMU_WRITE_OP_ATOMIC_CMPXCHG) {
+                        vir_MOV_dest(c,
+                                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+                                     ntq_get_src(c, instr->src[2 + has_index],
+                                                 0));
+                        tmu_writes++;
                 }
         }
-        /* The driver-location-based offset always has to be within a declared
-         * uniform range.
+
+        /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
+         * storing at the same time.
          */
-        assert(i != c->num_ubo_ranges);
-        if (!c->ubo_range_used[i]) {
-                c->ubo_range_used[i] = true;
-                range->dst_offset = c->next_ubo_dst_offset;
-                c->next_ubo_dst_offset += range->size;
+        while (tmu_writes > 16 / c->threads)
+                c->threads /= 2;
+
+        struct qreg offset;
+        if (instr->intrinsic == nir_intrinsic_load_uniform) {
+                offset = vir_uniform(c, QUNIFORM_UBO_ADDR, 0);
+
+                /* Find what variable in the default uniform block this
+                 * uniform load is coming from.
+                 */
+                uint32_t base = nir_intrinsic_base(instr);
+                int i;
+                struct v3d_ubo_range *range = NULL;
+                for (i = 0; i < c->num_ubo_ranges; i++) {
+                        range = &c->ubo_ranges[i];
+                        if (base >= range->src_offset &&
+                            base < range->src_offset + range->size) {
+                                break;
+                        }
+                }
+                /* The driver-location-based offset always has to be within a
+                 * declared uniform range.
+                 */
+                assert(i != c->num_ubo_ranges);
+                if (!c->ubo_range_used[i]) {
+                        c->ubo_range_used[i] = true;
+                        range->dst_offset = c->next_ubo_dst_offset;
+                        c->next_ubo_dst_offset += range->size;
+                }
+
+                base = base - range->src_offset + range->dst_offset;
+
+                if (base != 0)
+                        offset = vir_ADD(c, offset, vir_uniform_ui(c, base));
+        } else if (instr->intrinsic == nir_intrinsic_load_ubo) {
+                /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by
+                 * 1 (0 is gallium's constant buffer 0).
+                 */
+                offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
+                                     nir_src_as_uint(instr->src[0]) + 1);
+        } else if (is_shared) {
+                /* Shared variables have no buffer index, and all start from a
+                 * common base that we set up at the start of dispatch
+                 */
+                offset = c->cs_shared_offset;
+        } else {
+                offset = vir_uniform(c, QUNIFORM_SSBO_OFFSET,
+                                     nir_src_as_uint(instr->src[is_store ?
+                                                                1 : 0]));
+        }
+
+        uint32_t config = (0xffffff00 |
+                           tmu_op |
+                           GENERAL_TMU_LOOKUP_PER_PIXEL);
+        if (instr->num_components == 1) {
+                config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI;
+        } else {
+                config |= (GENERAL_TMU_LOOKUP_TYPE_VEC2 +
+                           instr->num_components - 2);
         }
 
-        offset -= range->src_offset;
+        if (c->execute.file != QFILE_NULL)
+                vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
 
-        if (range->dst_offset + offset != 0) {
-                indirect_offset = vir_ADD(c, indirect_offset,
-                                          vir_uniform_ui(c, range->dst_offset +
-                                                         offset));
+        struct qreg dest;
+        if (config == ~0)
+                dest = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA);
+        else
+                dest = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUAU);
+
+        struct qinst *tmu;
+        if (nir_src_is_const(instr->src[offset_src]) &&
+            nir_src_as_uint(instr->src[offset_src]) == 0) {
+                tmu = vir_MOV_dest(c, dest, offset);
+        } else {
+                tmu = vir_ADD_dest(c, dest,
+                                   offset,
+                                   ntq_get_src(c, instr->src[offset_src], 0));
         }
 
-        /* Adjust for where we stored the TGSI register base. */
-        vir_ADD_dest(c,
-                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA),
-                     vir_uniform(c, QUNIFORM_UBO_ADDR, 0),
-                     indirect_offset);
+        if (config != ~0) {
+                tmu->src[vir_get_implicit_uniform_src(tmu)] =
+                        vir_uniform_ui(c, config);
+        }
+
+        if (c->execute.file != QFILE_NULL)
+                vir_set_cond(tmu, V3D_QPU_COND_IFA);
 
         vir_emit_thrsw(c);
-        return vir_LDTMU(c);
+
+        /* Read the result, or wait for the TMU op to complete. */
+        for (int i = 0; i < nir_intrinsic_dest_components(instr); i++)
+                ntq_store_dest(c, &instr->dest, i, vir_MOV(c, vir_LDTMU(c)));
+
+        if (nir_intrinsic_dest_components(instr) == 0)
+                vir_TMUWT(c);
 }
 
 static struct qreg *
@@ -268,6 +472,7 @@
                 switch (instr->sampler_dim) {
                 case GLSL_SAMPLER_DIM_1D:
                 case GLSL_SAMPLER_DIM_2D:
+                case GLSL_SAMPLER_DIM_MS:
                 case GLSL_SAMPLER_DIM_3D:
                 case GLSL_SAMPLER_DIM_CUBE:
                         /* Don't minify the array size. */
@@ -366,7 +571,7 @@
 
 static struct qreg
 emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
-                      uint8_t swizzle)
+                      uint8_t swizzle, int array_index)
 {
         struct qreg r3 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
         struct qreg r5 = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R5);
@@ -391,8 +596,9 @@
         }
 
         int i = c->num_inputs++;
-        c->input_slots[i] = v3d_slot_from_slot_and_component(var->data.location,
-                                                             swizzle);
+        c->input_slots[i] =
+                v3d_slot_from_slot_and_component(var->data.location +
+                                                 array_index, swizzle);
 
         switch (var->data.interpolation) {
         case INTERP_MODE_NONE:
@@ -400,7 +606,7 @@
                  * qualifier, then if we're using glShadeModel(GL_FLAT) it
                  * needs to be flat shaded.
                  */
-                switch (var->data.location) {
+                switch (var->data.location + array_index) {
                 case VARYING_SLOT_COL0:
                 case VARYING_SLOT_COL1:
                 case VARYING_SLOT_BFC0:
@@ -438,12 +644,13 @@
 }
 
 static void
-emit_fragment_input(struct v3d_compile *c, int attr, nir_variable *var)
+emit_fragment_input(struct v3d_compile *c, int attr, nir_variable *var,
+                    int array_index)
 {
         for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
                 int chan = var->data.location_frac + i;
                 c->inputs[attr * 4 + chan] =
-                        emit_fragment_varying(c, var, chan);
+                        emit_fragment_varying(c, var, chan, array_index);
         }
 }
 
@@ -495,92 +702,91 @@
  * on the compare_instr's result.
  */
 static bool
-ntq_emit_comparison(struct v3d_compile *c, struct qreg *dest,
+ntq_emit_comparison(struct v3d_compile *c,
                     nir_alu_instr *compare_instr,
-                    nir_alu_instr *sel_instr)
+                    enum v3d_qpu_cond *out_cond)
 {
         struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
         struct qreg src1;
         if (nir_op_infos[compare_instr->op].num_inputs > 1)
                 src1 = ntq_get_alu_src(c, compare_instr, 1);
         bool cond_invert = false;
+        struct qreg nop = vir_reg(QFILE_NULL, 0);
 
         switch (compare_instr->op) {
-        case nir_op_feq:
+        case nir_op_feq32:
         case nir_op_seq:
-                vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_FCMP_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHZ);
                 break;
-        case nir_op_ieq:
-                vir_PF(c, vir_XOR(c, src0, src1), V3D_QPU_PF_PUSHZ);
+        case nir_op_ieq32:
+                vir_set_pf(vir_XOR_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHZ);
                 break;
 
-        case nir_op_fne:
+        case nir_op_fne32:
         case nir_op_sne:
-                vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_FCMP_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHZ);
                 cond_invert = true;
                 break;
-        case nir_op_ine:
-                vir_PF(c, vir_XOR(c, src0, src1), V3D_QPU_PF_PUSHZ);
+        case nir_op_ine32:
+                vir_set_pf(vir_XOR_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHZ);
                 cond_invert = true;
                 break;
 
-        case nir_op_fge:
+        case nir_op_fge32:
         case nir_op_sge:
-                vir_PF(c, vir_FCMP(c, src1, src0), V3D_QPU_PF_PUSHC);
+                vir_set_pf(vir_FCMP_dest(c, nop, src1, src0), V3D_QPU_PF_PUSHC);
                 break;
-        case nir_op_ige:
-                vir_PF(c, vir_MIN(c, src1, src0), V3D_QPU_PF_PUSHC);
+        case nir_op_ige32:
+                vir_set_pf(vir_MIN_dest(c, nop, src1, src0), V3D_QPU_PF_PUSHC);
                 cond_invert = true;
                 break;
-        case nir_op_uge:
-                vir_PF(c, vir_SUB(c, src0, src1), V3D_QPU_PF_PUSHC);
+        case nir_op_uge32:
+                vir_set_pf(vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
                 cond_invert = true;
                 break;
 
         case nir_op_slt:
-        case nir_op_flt:
-                vir_PF(c, vir_FCMP(c, src0, src1), V3D_QPU_PF_PUSHN);
+        case nir_op_flt32:
+                vir_set_pf(vir_FCMP_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHN);
                 break;
-        case nir_op_ilt:
-                vir_PF(c, vir_MIN(c, src1, src0), V3D_QPU_PF_PUSHC);
+        case nir_op_ilt32:
+                vir_set_pf(vir_MIN_dest(c, nop, src1, src0), V3D_QPU_PF_PUSHC);
                 break;
-        case nir_op_ult:
-                vir_PF(c, vir_SUB(c, src0, src1), V3D_QPU_PF_PUSHC);
+        case nir_op_ult32:
+                vir_set_pf(vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
                 break;
 
         default:
                 return false;
         }
 
-        enum v3d_qpu_cond cond = (cond_invert ?
-                                  V3D_QPU_COND_IFNA :
-                                  V3D_QPU_COND_IFA);
+        *out_cond = cond_invert ? V3D_QPU_COND_IFNA : V3D_QPU_COND_IFA;
 
-        switch (sel_instr->op) {
-        case nir_op_seq:
-        case nir_op_sne:
-        case nir_op_sge:
-        case nir_op_slt:
-                *dest = vir_SEL(c, cond,
-                                vir_uniform_f(c, 1.0), vir_uniform_f(c, 0.0));
-                break;
-
-        case nir_op_bcsel:
-                *dest = vir_SEL(c, cond,
-                                ntq_get_alu_src(c, sel_instr, 1),
-                                ntq_get_alu_src(c, sel_instr, 2));
-                break;
+        return true;
+}
 
-        default:
-                *dest = vir_SEL(c, cond,
-                                vir_uniform_ui(c, ~0), vir_uniform_ui(c, 0));
-                break;
+/* Finds an ALU instruction that generates our src value that could
+ * (potentially) be greedily emitted in the consuming instruction.
+ */
+static struct nir_alu_instr *
+ntq_get_alu_parent(nir_src src)
+{
+        if (!src.is_ssa || src.ssa->parent_instr->type != nir_instr_type_alu)
+                return NULL;
+        nir_alu_instr *instr = nir_instr_as_alu(src.ssa->parent_instr);
+        if (!instr)
+                return NULL;
+
+        /* If the ALU instr's srcs are non-SSA, then we would have to avoid
+         * moving emission of the ALU instr down past another write of the
+         * src.
+         */
+        for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+                if (!instr->src[i].src.is_ssa)
+                        return NULL;
         }
 
-        /* Make the temporary for nir_store_dest(). */
-        *dest = vir_MOV(c, *dest);
-
-        return true;
+        return instr;
 }
 
 /**
@@ -591,18 +797,13 @@
 static struct qreg ntq_emit_bcsel(struct v3d_compile *c, nir_alu_instr *instr,
                                   struct qreg *src)
 {
-        if (!instr->src[0].src.is_ssa)
-                goto out;
-        if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
-                goto out;
-        nir_alu_instr *compare =
-                nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+        nir_alu_instr *compare = ntq_get_alu_parent(instr->src[0].src);
         if (!compare)
                 goto out;
 
-        struct qreg dest;
-        if (ntq_emit_comparison(c, &dest, compare, instr))
-                return dest;
+        enum v3d_qpu_cond cond;
+        if (ntq_emit_comparison(c, compare, &cond))
+                return vir_MOV(c, vir_SEL(c, cond, src[1], src[2]));
 
 out:
         vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
@@ -682,14 +883,14 @@
         case nir_op_u2f32:
                 result = vir_UTOF(c, src[0]);
                 break;
-        case nir_op_b2f:
+        case nir_op_b2f32:
                 result = vir_AND(c, src[0], vir_uniform_f(c, 1.0));
                 break;
-        case nir_op_b2i:
+        case nir_op_b2i32:
                 result = vir_AND(c, src[0], vir_uniform_ui(c, 1));
                 break;
-        case nir_op_i2b:
-        case nir_op_f2b:
+        case nir_op_i2b32:
+        case nir_op_f2b32:
                 vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
                 result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
                                             vir_uniform_ui(c, ~0),
@@ -747,23 +948,36 @@
         case nir_op_seq:
         case nir_op_sne:
         case nir_op_sge:
-        case nir_op_slt:
-        case nir_op_feq:
-        case nir_op_fne:
-        case nir_op_fge:
-        case nir_op_flt:
-        case nir_op_ieq:
-        case nir_op_ine:
-        case nir_op_ige:
-        case nir_op_uge:
-        case nir_op_ilt:
-        case nir_op_ult:
-                if (!ntq_emit_comparison(c, &result, instr, instr)) {
-                        fprintf(stderr, "Bad comparison instruction\n");
-                }
+        case nir_op_slt: {
+                enum v3d_qpu_cond cond;
+                MAYBE_UNUSED bool ok = ntq_emit_comparison(c, instr, &cond);
+                assert(ok);
+                result = vir_MOV(c, vir_SEL(c, cond,
+                                            vir_uniform_f(c, 1.0),
+                                            vir_uniform_f(c, 0.0)));
                 break;
+        }
 
-        case nir_op_bcsel:
+        case nir_op_feq32:
+        case nir_op_fne32:
+        case nir_op_fge32:
+        case nir_op_flt32:
+        case nir_op_ieq32:
+        case nir_op_ine32:
+        case nir_op_ige32:
+        case nir_op_uge32:
+        case nir_op_ilt32:
+        case nir_op_ult32: {
+                enum v3d_qpu_cond cond;
+                MAYBE_UNUSED bool ok = ntq_emit_comparison(c, instr, &cond);
+                assert(ok);
+                result = vir_MOV(c, vir_SEL(c, cond,
+                                            vir_uniform_ui(c, ~0),
+                                            vir_uniform_ui(c, 0)));
+                break;
+        }
+
+        case nir_op_b32csel:
                 result = ntq_emit_bcsel(c, instr, src);
                 break;
         case nir_op_fcsel:
@@ -850,6 +1064,9 @@
                 break;
 
         case nir_op_unpack_half_2x16_split_x:
+                /* XXX perf: It would be good to be able to merge this unpack
+                 * with whatever uses our result.
+                 */
                 result = vir_FMOV(c, src[0]);
                 vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L);
                 break;
@@ -896,6 +1113,8 @@
 #define TLB_TYPE_DEPTH             ((2 << 6) | (0 << 4))
 #define TLB_DEPTH_TYPE_INVARIANT   (0 << 2) /* Unmodified sideband input used */
 #define TLB_DEPTH_TYPE_PER_PIXEL   (1 << 2) /* QPU result used */
+#define TLB_V42_DEPTH_TYPE_INVARIANT   (0 << 3) /* Unmodified sideband input used */
+#define TLB_V42_DEPTH_TYPE_PER_PIXEL   (1 << 3) /* QPU result used */
 
 /* Stencil is a single 32-bit write. */
 #define TLB_TYPE_STENCIL_ALPHA     ((2 << 6) | (1 << 4))
@@ -929,13 +1148,19 @@
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   c->outputs[c->output_position_index]);
+                uint8_t tlb_specifier = TLB_TYPE_DEPTH;
+
+                if (c->devinfo->ver >= 42) {
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);
+                } else
+                        tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
-                        vir_uniform_ui(c,
-                                       TLB_TYPE_DEPTH |
-                                       TLB_DEPTH_TYPE_PER_PIXEL |
-                                       0xffffff00);
+                        vir_uniform_ui(c, tlb_specifier | 0xffffff00);
+                c->writes_z = true;
         } else if (c->s->info.fs.uses_discard ||
+                   !c->s->info.fs.early_fragment_tests ||
                    c->fs_key->sample_alpha_to_coverage ||
                    !has_any_tlb_color_write) {
                 /* Emit passthrough Z if it needed to be delayed until shader
@@ -951,12 +1176,21 @@
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   vir_reg(QFILE_NULL, 0));
+                uint8_t tlb_specifier = TLB_TYPE_DEPTH;
+
+                if (c->devinfo->ver >= 42) {
+                        /* The spec says the PER_PIXEL flag is ignored for
+                         * invariant writes, but the simulator demands it.
+                         */
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);
+                } else {
+                        tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
+                }
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
-                        vir_uniform_ui(c,
-                                       TLB_TYPE_DEPTH |
-                                       TLB_DEPTH_TYPE_INVARIANT |
-                                       0xffffff00);
+                        vir_uniform_ui(c, tlb_specifier | 0xffffff00);
+                c->writes_z = true;
         }
 
         /* XXX: Performance improvement: Merge Z write and color writes TLB
@@ -1224,7 +1458,7 @@
                 NIR_PASS(progress, s, nir_opt_dce);
                 NIR_PASS(progress, s, nir_opt_dead_cf);
                 NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
                 NIR_PASS(progress, s, nir_opt_algebraic);
                 NIR_PASS(progress, s, nir_opt_constant_folding);
                 NIR_PASS(progress, s, nir_opt_undef);
@@ -1366,10 +1600,8 @@
                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
                 unsigned loc = var->data.driver_location;
 
-                assert(array_len == 1);
-                (void)array_len;
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
-                                  (loc + 1) * 4);
+                                  (loc + array_len) * 4);
 
                 if (var->data.location == VARYING_SLOT_POS) {
                         emit_fragcoord_input(c, loc);
@@ -1381,7 +1613,8 @@
                         c->inputs[loc * 4 + 0] = c->point_x;
                         c->inputs[loc * 4 + 1] = c->point_y;
                 } else {
-                        emit_fragment_input(c, loc, var);
+                        for (int j = 0; j < array_len; j++)
+                                emit_fragment_input(c, loc + j, var, j);
                 }
         }
 }
@@ -1445,6 +1678,9 @@
                                                                  false);
                 unsigned vec4_size = 4 * sizeof(float);
 
+                if (var->data.mode != nir_var_uniform)
+                        continue;
+
                 declare_uniform_range(c, var->data.driver_location * vec4_size,
                                       vec4_count * vec4_size);
 
@@ -1475,6 +1711,10 @@
 static void
 ntq_emit_load_const(struct v3d_compile *c, nir_load_const_instr *instr)
 {
+        /* XXX perf: Experiment with using immediate loads to avoid having
+         * these end up in the uniform stream.  Watch out for breaking the
+         * small immediates optimization in the process!
+         */
         struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
         for (int i = 0; i < instr->def.num_components; i++)
                 qregs[i] = vir_uniform_ui(c, instr->value.u32[i]);
@@ -1495,60 +1735,105 @@
 }
 
 static void
+ntq_emit_image_size(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+        assert(instr->intrinsic == nir_intrinsic_image_deref_size);
+        nir_variable *var = nir_intrinsic_get_var(instr, 0);
+        unsigned image_index = var->data.driver_location;
+        const struct glsl_type *sampler_type = glsl_without_array(var->type);
+        bool is_array = glsl_sampler_type_is_array(sampler_type);
+
+        ntq_store_dest(c, &instr->dest, 0,
+                       vir_uniform(c, QUNIFORM_IMAGE_WIDTH, image_index));
+        if (instr->num_components > 1) {
+                ntq_store_dest(c, &instr->dest, 1,
+                               vir_uniform(c, QUNIFORM_IMAGE_HEIGHT,
+                                           image_index));
+        }
+        if (instr->num_components > 2) {
+                ntq_store_dest(c, &instr->dest, 2,
+                               vir_uniform(c,
+                                           is_array ?
+                                           QUNIFORM_IMAGE_ARRAY_SIZE :
+                                           QUNIFORM_IMAGE_DEPTH,
+                                           image_index));
+        }
+}
+
+static void
 ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
 {
-        nir_const_value *const_offset;
         unsigned offset;
 
         switch (instr->intrinsic) {
         case nir_intrinsic_load_uniform:
-                assert(instr->num_components == 1);
-                const_offset = nir_src_as_const_value(instr->src[0]);
-                if (const_offset) {
-                        offset = nir_intrinsic_base(instr) + const_offset->u32[0];
+                if (nir_src_is_const(instr->src[0])) {
+                        int offset = (nir_intrinsic_base(instr) +
+                                      nir_src_as_uint(instr->src[0]));
                         assert(offset % 4 == 0);
                         /* We need dwords */
                         offset = offset / 4;
-                        ntq_store_dest(c, &instr->dest, 0,
-                                       vir_uniform(c, QUNIFORM_UNIFORM,
-                                                   offset));
+                        for (int i = 0; i < instr->num_components; i++) {
+                                ntq_store_dest(c, &instr->dest, i,
+                                               vir_uniform(c, QUNIFORM_UNIFORM,
+                                                           offset + i));
+                        }
                 } else {
-                        ntq_store_dest(c, &instr->dest, 0,
-                                       indirect_uniform_load(c, instr));
+                        ntq_emit_tmu_general(c, instr, false);
                 }
                 break;
 
         case nir_intrinsic_load_ubo:
-                for (int i = 0; i < instr->num_components; i++) {
-                        int ubo = nir_src_as_const_value(instr->src[0])->u32[0];
-
-                        /* Adjust for where we stored the TGSI register base. */
-                        vir_ADD_dest(c,
-                                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA),
-                                     vir_uniform(c, QUNIFORM_UBO_ADDR, 1 + ubo),
-                                     vir_ADD(c,
-                                             ntq_get_src(c, instr->src[1], 0),
-                                             vir_uniform_ui(c, i * 4)));
-
-                        vir_emit_thrsw(c);
+                ntq_emit_tmu_general(c, instr, false);
+                break;
 
-                        ntq_store_dest(c, &instr->dest, i, vir_LDTMU(c));
-                }
+        case nir_intrinsic_ssbo_atomic_add:
+        case nir_intrinsic_ssbo_atomic_imin:
+        case nir_intrinsic_ssbo_atomic_umin:
+        case nir_intrinsic_ssbo_atomic_imax:
+        case nir_intrinsic_ssbo_atomic_umax:
+        case nir_intrinsic_ssbo_atomic_and:
+        case nir_intrinsic_ssbo_atomic_or:
+        case nir_intrinsic_ssbo_atomic_xor:
+        case nir_intrinsic_ssbo_atomic_exchange:
+        case nir_intrinsic_ssbo_atomic_comp_swap:
+        case nir_intrinsic_load_ssbo:
+        case nir_intrinsic_store_ssbo:
+                ntq_emit_tmu_general(c, instr, false);
+                break;
+
+        case nir_intrinsic_shared_atomic_add:
+        case nir_intrinsic_shared_atomic_imin:
+        case nir_intrinsic_shared_atomic_umin:
+        case nir_intrinsic_shared_atomic_imax:
+        case nir_intrinsic_shared_atomic_umax:
+        case nir_intrinsic_shared_atomic_and:
+        case nir_intrinsic_shared_atomic_or:
+        case nir_intrinsic_shared_atomic_xor:
+        case nir_intrinsic_shared_atomic_exchange:
+        case nir_intrinsic_shared_atomic_comp_swap:
+        case nir_intrinsic_load_shared:
+        case nir_intrinsic_store_shared:
+                ntq_emit_tmu_general(c, instr, true);
+                break;
+
+        case nir_intrinsic_image_deref_load:
+        case nir_intrinsic_image_deref_store:
+        case nir_intrinsic_image_deref_atomic_add:
+        case nir_intrinsic_image_deref_atomic_min:
+        case nir_intrinsic_image_deref_atomic_max:
+        case nir_intrinsic_image_deref_atomic_and:
+        case nir_intrinsic_image_deref_atomic_or:
+        case nir_intrinsic_image_deref_atomic_xor:
+        case nir_intrinsic_image_deref_atomic_exchange:
+        case nir_intrinsic_image_deref_atomic_comp_swap:
+                v3d40_vir_emit_image_load_store(c, instr);
                 break;
 
-                const_offset = nir_src_as_const_value(instr->src[0]);
-                if (const_offset) {
-                        offset = nir_intrinsic_base(instr) + const_offset->u32[0];
-                        assert(offset % 4 == 0);
-                        /* We need dwords */
-                        offset = offset / 4;
-                        ntq_store_dest(c, &instr->dest, 0,
-                                       vir_uniform(c, QUNIFORM_UNIFORM,
-                                                   offset));
-                } else {
-                        ntq_store_dest(c, &instr->dest, 0,
-                                       indirect_uniform_load(c, instr));
-                }
+        case nir_intrinsic_get_buffer_size:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_uniform(c, QUNIFORM_GET_BUFFER_SIZE,
+                                           nir_src_as_uint(instr->src[0])));
                 break;
 
         case nir_intrinsic_load_user_clip_plane:
@@ -1566,8 +1851,15 @@
                 break;
 
         case nir_intrinsic_load_sample_mask_in:
+                ntq_store_dest(c, &instr->dest, 0, vir_MSF(c));
+                break;
+
+        case nir_intrinsic_load_helper_invocation:
+                vir_PF(c, vir_MSF(c), V3D_QPU_PF_PUSHZ);
                 ntq_store_dest(c, &instr->dest, 0,
-                               vir_uniform(c, QUNIFORM_SAMPLE_MASK, 0));
+                               vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
+                                                  vir_uniform_ui(c, ~0),
+                                                  vir_uniform_ui(c, 0))));
                 break;
 
         case nir_intrinsic_load_front_face:
@@ -1589,10 +1881,9 @@
                 break;
 
         case nir_intrinsic_load_input:
-                const_offset = nir_src_as_const_value(instr->src[0]);
-                assert(const_offset && "v3d doesn't support indirect inputs");
                 for (int i = 0; i < instr->num_components; i++) {
-                        offset = nir_intrinsic_base(instr) + const_offset->u32[0];
+                        offset = (nir_intrinsic_base(instr) +
+                                  nir_src_as_uint(instr->src[0]));
                         int comp = nir_intrinsic_component(instr) + i;
                         ntq_store_dest(c, &instr->dest, i,
                                        vir_MOV(c, c->inputs[offset * 4 + comp]));
@@ -1600,10 +1891,8 @@
                 break;
 
         case nir_intrinsic_store_output:
-                const_offset = nir_src_as_const_value(instr->src[1]);
-                assert(const_offset && "v3d doesn't support indirect outputs");
                 offset = ((nir_intrinsic_base(instr) +
-                           const_offset->u32[0]) * 4 +
+                           nir_src_as_uint(instr->src[1])) * 4 +
                           nir_intrinsic_component(instr));
 
                 for (int i = 0; i < instr->num_components; i++) {
@@ -1614,6 +1903,10 @@
                                       offset + instr->num_components);
                 break;
 
+        case nir_intrinsic_image_deref_size:
+                ntq_emit_image_size(c, instr);
+                break;
+
         case nir_intrinsic_discard:
                 if (c->execute.file != QFILE_NULL) {
                         vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
@@ -1650,6 +1943,73 @@
                 break;
         }
 
+        case nir_intrinsic_memory_barrier:
+        case nir_intrinsic_memory_barrier_atomic_counter:
+        case nir_intrinsic_memory_barrier_buffer:
+        case nir_intrinsic_memory_barrier_image:
+        case nir_intrinsic_memory_barrier_shared:
+                /* We don't do any instruction scheduling of these NIR
+                 * instructions between each other, so we just need to make
+                 * sure that the TMU operations before the barrier are flushed
+                 * before the ones after the barrier.  That is currently
+                 * handled by having a THRSW in each of them and a LDTMU
+                 * series or a TMUWT after.
+                 */
+                break;
+
+        case nir_intrinsic_barrier:
+                /* Emit a TSY op to get all invocations in the workgroup
+                 * (actually supergroup) to block until the last invocation
+                 * reaches the TSY op.
+                 */
+                if (c->devinfo->ver >= 42) {
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNCB));
+                } else {
+                        struct qinst *sync =
+                                vir_BARRIERID_dest(c,
+                                                   vir_reg(QFILE_MAGIC,
+                                                           V3D_QPU_WADDR_SYNCU));
+                        sync->src[vir_get_implicit_uniform_src(sync)] =
+                                vir_uniform_ui(c,
+                                               0xffffff00 |
+                                               V3D_TSY_WAIT_INC_CHECK);
+
+                }
+
+                /* The blocking of a TSY op only happens at the next thread
+                 * switch.  No texturing may be outstanding at the time of a
+                 * TSY blocking operation.
+                 */
+                vir_emit_thrsw(c);
+                break;
+
+        case nir_intrinsic_load_num_work_groups:
+                for (int i = 0; i < 3; i++) {
+                        ntq_store_dest(c, &instr->dest, i,
+                                       vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
+                                                   i));
+                }
+                break;
+
+        case nir_intrinsic_load_local_invocation_index:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_SHR(c, c->cs_payload[1],
+                                       vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
+                break;
+
+        case nir_intrinsic_load_work_group_id:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_AND(c, c->cs_payload[0],
+                                       vir_uniform_ui(c, 0xffff)));
+                ntq_store_dest(c, &instr->dest, 1,
+                               vir_SHR(c, c->cs_payload[0],
+                                       vir_uniform_ui(c, 16)));
+                ntq_store_dest(c, &instr->dest, 2,
+                               vir_AND(c, c->cs_payload[1],
+                                       vir_uniform_ui(c, 0xffff)));
+                break;
+
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
@@ -1660,18 +2020,78 @@
 
 /* Clears (activates) the execute flags for any channels whose jump target
  * matches this block.
+ *
+ * XXX perf: Could we be using flpush/flpop somehow for our execution channel
+ * enabling?
+ *
+ * XXX perf: For uniform control flow, we should be able to skip c->execute
+ * handling entirely.
  */
 static void
 ntq_activate_execute_for_block(struct v3d_compile *c)
 {
-        vir_PF(c, vir_XOR(c, c->execute, vir_uniform_ui(c, c->cur_block->index)),
-               V3D_QPU_PF_PUSHZ);
+        vir_set_pf(vir_XOR_dest(c, vir_reg(QFILE_NULL, 0),
+                                c->execute, vir_uniform_ui(c, c->cur_block->index)),
+                   V3D_QPU_PF_PUSHZ);
 
         vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0));
 }
 
 static void
-ntq_emit_if(struct v3d_compile *c, nir_if *if_stmt)
+ntq_emit_uniform_if(struct v3d_compile *c, nir_if *if_stmt)
+{
+        nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
+        bool empty_else_block =
+                (nir_else_block == nir_if_last_else_block(if_stmt) &&
+                 exec_list_is_empty(&nir_else_block->instr_list));
+
+        struct qblock *then_block = vir_new_block(c);
+        struct qblock *after_block = vir_new_block(c);
+        struct qblock *else_block;
+        if (empty_else_block)
+                else_block = after_block;
+        else
+                else_block = vir_new_block(c);
+
+        /* Set up the flags for the IF condition (taking the THEN branch). */
+        nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
+        enum v3d_qpu_cond cond;
+        if (!if_condition_alu ||
+            !ntq_emit_comparison(c, if_condition_alu, &cond)) {
+                vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
+                       V3D_QPU_PF_PUSHZ);
+                cond = V3D_QPU_COND_IFNA;
+        }
+
+        /* Jump to ELSE. */
+        vir_BRANCH(c, cond == V3D_QPU_COND_IFA ?
+                   V3D_QPU_BRANCH_COND_ALLNA :
+                   V3D_QPU_BRANCH_COND_ALLA);
+        vir_link_blocks(c->cur_block, else_block);
+        vir_link_blocks(c->cur_block, then_block);
+
+        /* Process the THEN block. */
+        vir_set_emit_block(c, then_block);
+        ntq_emit_cf_list(c, &if_stmt->then_list);
+
+        if (!empty_else_block) {
+                /* At the end of the THEN block, jump to ENDIF */
+                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
+                vir_link_blocks(c->cur_block, after_block);
+
+                /* Emit the else block. */
+                vir_set_emit_block(c, else_block);
+                ntq_activate_execute_for_block(c);
+                ntq_emit_cf_list(c, &if_stmt->else_list);
+        }
+
+        vir_link_blocks(c->cur_block, after_block);
+
+        vir_set_emit_block(c, after_block);
+}
+
+static void
+ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
 {
         nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
         bool empty_else_block =
@@ -1686,21 +2106,39 @@
         else
                 else_block = vir_new_block(c);
 
-        bool was_top_level = false;
+        bool was_uniform_control_flow = false;
         if (c->execute.file == QFILE_NULL) {
                 c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
-                was_top_level = true;
+                was_uniform_control_flow = true;
+        }
+
+        /* Set up the flags for the IF condition (taking the THEN branch). */
+        nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
+        enum v3d_qpu_cond cond;
+        if (!if_condition_alu ||
+            !ntq_emit_comparison(c, if_condition_alu, &cond)) {
+                vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
+                       V3D_QPU_PF_PUSHZ);
+                cond = V3D_QPU_COND_IFNA;
         }
 
-        /* Set A for executing (execute == 0) and jumping (if->condition ==
-         * 0) channels, and then update execute flags for those to point to
-         * the ELSE block.
-         */
-        vir_PF(c, vir_OR(c,
-                         c->execute,
-                         ntq_get_src(c, if_stmt->condition, 0)),
-                V3D_QPU_PF_PUSHZ);
-        vir_MOV_cond(c, V3D_QPU_COND_IFA,
+        /* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
+         * was previously active (execute Z) for updating the exec flags.
+         */
+        if (was_uniform_control_flow) {
+                cond = v3d_qpu_cond_invert(cond);
+        } else {
+                struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
+                                                  c->execute);
+                if (cond == V3D_QPU_COND_IFA) {
+                        vir_set_uf(inst, V3D_QPU_UF_NORNZ);
+                } else {
+                        vir_set_uf(inst, V3D_QPU_UF_ANDZ);
+                        cond = V3D_QPU_COND_IFA;
+                }
+        }
+
+        vir_MOV_cond(c, cond,
                      c->execute,
                      vir_uniform_ui(c, else_block->index));
 
@@ -1741,13 +2179,27 @@
         vir_link_blocks(c->cur_block, after_block);
 
         vir_set_emit_block(c, after_block);
-        if (was_top_level)
+        if (was_uniform_control_flow)
                 c->execute = c->undef;
         else
                 ntq_activate_execute_for_block(c);
 }
 
 static void
+ntq_emit_if(struct v3d_compile *c, nir_if *nif)
+{
+        bool was_in_control_flow = c->in_control_flow;
+        c->in_control_flow = true;
+        if (c->execute.file == QFILE_NULL &&
+            nir_src_is_dynamically_uniform(nif->condition)) {
+                ntq_emit_uniform_if(c, nif);
+        } else {
+                ntq_emit_nonuniform_if(c, nif);
+        }
+        c->in_control_flow = was_in_control_flow;
+}
+
+static void
 ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
 {
         switch (jump->type) {
@@ -1772,6 +2224,10 @@
 ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
 {
         switch (instr->type) {
+        case nir_instr_type_deref:
+                /* ignored, will be walked by the intrinsic using it. */
+                break;
+
         case nir_instr_type_alu:
                 ntq_emit_alu(c, nir_instr_as_alu(instr));
                 break;
@@ -1817,10 +2273,13 @@
 static void
 ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
 {
-        bool was_top_level = false;
+        bool was_in_control_flow = c->in_control_flow;
+        c->in_control_flow = true;
+
+        bool was_uniform_control_flow = false;
         if (c->execute.file == QFILE_NULL) {
                 c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
-                was_top_level = true;
+                was_uniform_control_flow = true;
         }
 
         struct qblock *save_loop_cont_block = c->loop_cont_block;
@@ -1857,13 +2316,17 @@
         vir_link_blocks(c->cur_block, c->loop_break_block);
 
         vir_set_emit_block(c, c->loop_break_block);
-        if (was_top_level)
+        if (was_uniform_control_flow)
                 c->execute = c->undef;
         else
                 ntq_activate_execute_for_block(c);
 
         c->loop_break_block = save_loop_break_block;
         c->loop_cont_block = save_loop_cont_block;
+
+        c->loops++;
+
+        c->in_control_flow = was_in_control_flow;
 }
 
 static void
@@ -1911,17 +2374,74 @@
 static void
 nir_to_vir(struct v3d_compile *c)
 {
-        if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+        switch (c->s->info.stage) {
+        case MESA_SHADER_FRAGMENT:
                 c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
                 c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
                 c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
 
+                /* XXX perf: We could set the "disable implicit point/line
+                 * varyings" field in the shader record and not emit these, if
+                 * they're not going to be used.
+                 */
                 if (c->fs_key->is_points) {
-                        c->point_x = emit_fragment_varying(c, NULL, 0);
-                        c->point_y = emit_fragment_varying(c, NULL, 0);
+                        c->point_x = emit_fragment_varying(c, NULL, 0, 0);
+                        c->point_y = emit_fragment_varying(c, NULL, 0, 0);
                 } else if (c->fs_key->is_lines) {
-                        c->line_x = emit_fragment_varying(c, NULL, 0);
+                        c->line_x = emit_fragment_varying(c, NULL, 0, 0);
+                }
+                break;
+        case MESA_SHADER_COMPUTE:
+                /* Set up the TSO for barriers, assuming we do some. */
+                if (c->devinfo->ver < 42) {
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNC));
+                }
+
+                if (c->s->info.system_values_read &
+                    ((1ull << SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
+                     (1ull << SYSTEM_VALUE_WORK_GROUP_ID))) {
+                        c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
+                }
+                if ((c->s->info.system_values_read &
+                     ((1ull << SYSTEM_VALUE_WORK_GROUP_ID))) ||
+                    c->s->info.cs.shared_size) {
+                        c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
+                }
+
+                /* Set up the division between gl_LocalInvocationIndex and
+                 * wg_in_mem in the payload reg.
+                 */
+                int wg_size = (c->s->info.cs.local_size[0] *
+                               c->s->info.cs.local_size[1] *
+                               c->s->info.cs.local_size[2]);
+                c->local_invocation_index_bits =
+                        ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
+                assert(c->local_invocation_index_bits <= 8);
+
+                if (c->s->info.cs.shared_size) {
+                        struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1],
+                                                        vir_uniform_ui(c, 16));
+                        if (c->s->info.cs.local_size[0] != 1 ||
+                            c->s->info.cs.local_size[1] != 1 ||
+                            c->s->info.cs.local_size[2] != 1) {
+                                int wg_bits = (16 -
+                                               c->local_invocation_index_bits);
+                                int wg_mask = (1 << wg_bits) - 1;
+                                wg_in_mem = vir_AND(c, wg_in_mem,
+                                                    vir_uniform_ui(c, wg_mask));
+                        }
+                        struct qreg shared_per_wg =
+                                vir_uniform_ui(c, c->s->info.cs.shared_size);
+
+                        c->cs_shared_offset =
+                                vir_ADD(c,
+                                        vir_uniform(c, QUNIFORM_SHARED_OFFSET,0),
+                                        vir_UMUL(c, wg_in_mem, shared_per_wg));
                 }
+                break;
+        default:
+                break;
         }
 
         if (c->s->info.stage == MESA_SHADER_FRAGMENT)
@@ -1950,6 +2470,7 @@
         .lower_bitfield_extract_to_shifts = true,
         .lower_bitfield_reverse = true,
         .lower_bit_count = true,
+        .lower_cs_local_id_from_index = true,
         .lower_pack_unorm_2x16 = true,
         .lower_pack_snorm_2x16 = true,
         .lower_pack_unorm_4x8 = true,
@@ -1972,24 +2493,6 @@
         .native_integers = true,
 };
 
-
-#if 0
-static int
-count_nir_instrs(nir_shader *nir)
-{
-        int count = 0;
-        nir_foreach_function(function, nir) {
-                if (!function->impl)
-                        continue;
-                nir_foreach_block(block, function->impl) {
-                        nir_foreach_instr(instr, block)
-                                count++;
-                }
-        }
-        return count;
-}
-#endif
-
 /**
  * When demoting a shader down to single-threaded, removes the THRSW
  * instructions (one will still be inserted at v3d_vir_to_qpu() for the
@@ -2110,7 +2613,14 @@
 
         vir_check_payload_w(c);
 
-        /* XXX: vir_schedule_instructions(c); */
+        /* XXX perf: On VC4, we do a VIR-level instruction scheduling here.
+         * We used that on that platform to pipeline TMU writes and reduce the
+         * number of thread switches, as well as try (mostly successfully) to
+         * reduce maximum register pressure to allow more threads.  We should
+         * do something of that sort for V3D -- either instruction scheduling
+         * here, or delay the the THRSW and LDTMUs from our texture
+         * instructions until the results are needed.
+         */
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
                          v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
diff -Nru mesa-18.3.3/src/broadcom/compiler/qpu_schedule.c mesa-19.0.1/src/broadcom/compiler/qpu_schedule.c
--- mesa-18.3.3/src/broadcom/compiler/qpu_schedule.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/qpu_schedule.c	2019-03-31 23:16:37.000000000 +0000
@@ -195,6 +195,9 @@
         if (!magic) {
                 add_write_dep(state, &state->last_rf[waddr], n);
         } else if (v3d_qpu_magic_waddr_is_tmu(waddr)) {
+                /* XXX perf: For V3D 4.x, we could reorder TMU writes other
+                 * than the TMUS/TMUD/TMUA to improve scheduling flexibility.
+                 */
                 add_write_dep(state, &state->last_tmu_write, n);
                 switch (waddr) {
                 case V3D_QPU_WADDR_TMUS:
@@ -233,6 +236,16 @@
                         add_write_dep(state, &state->last_tlb, n);
                         break;
 
+                case V3D_QPU_WADDR_SYNC:
+                case V3D_QPU_WADDR_SYNCB:
+                case V3D_QPU_WADDR_SYNCU:
+                        /* For CS barrier(): Sync against any other memory
+                         * accesses.  There doesn't appear to be any need for
+                         * barriers to affect ALU operations.
+                         */
+                        add_write_dep(state, &state->last_tmu_write, n);
+                        break;
+
                 case V3D_QPU_WADDR_NOP:
                         break;
 
@@ -243,30 +256,6 @@
         }
 }
 
-static void
-process_cond_deps(struct schedule_state *state, struct schedule_node *n,
-                  enum v3d_qpu_cond cond)
-{
-        if (cond != V3D_QPU_COND_NONE)
-                add_read_dep(state, state->last_sf, n);
-}
-
-static void
-process_pf_deps(struct schedule_state *state, struct schedule_node *n,
-                enum v3d_qpu_pf pf)
-{
-        if (pf != V3D_QPU_PF_NONE)
-                add_write_dep(state, &state->last_sf, n);
-}
-
-static void
-process_uf_deps(struct schedule_state *state, struct schedule_node *n,
-                enum v3d_qpu_uf uf)
-{
-        if (uf != V3D_QPU_UF_NONE)
-                add_write_dep(state, &state->last_sf, n);
-}
-
 /**
  * Common code for dependencies that need to be tracked both forward and
  * backward.
@@ -280,6 +269,11 @@
         const struct v3d_device_info *devinfo = state->devinfo;
         struct qinst *qinst = n->inst;
         struct v3d_qpu_instr *inst = &qinst->qpu;
+        /* If the input and output segments are shared, then all VPM reads to
+         * a location need to happen before all writes.  We handle this by
+         * serializing all VPM operations for now.
+         */
+        bool separate_vpm_segment = false;
 
         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
                 if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
@@ -321,6 +315,14 @@
                 add_write_dep(state, &state->last_vpm, n);
                 break;
 
+        case V3D_QPU_A_LDVPMV_IN:
+        case V3D_QPU_A_LDVPMD_IN:
+        case V3D_QPU_A_LDVPMG_IN:
+        case V3D_QPU_A_LDVPMP:
+                if (!separate_vpm_segment)
+                        add_write_dep(state, &state->last_vpm, n);
+                break;
+
         case V3D_QPU_A_VPMWT:
                 add_read_dep(state, state->last_vpm, n);
                 break;
@@ -334,19 +336,6 @@
                 add_write_dep(state, &state->last_tlb, n);
                 break;
 
-        case V3D_QPU_A_FLAPUSH:
-        case V3D_QPU_A_FLBPUSH:
-        case V3D_QPU_A_VFLA:
-        case V3D_QPU_A_VFLNA:
-        case V3D_QPU_A_VFLB:
-        case V3D_QPU_A_VFLNB:
-                add_read_dep(state, state->last_sf, n);
-                break;
-
-        case V3D_QPU_A_FLPOP:
-                add_write_dep(state, &state->last_sf, n);
-                break;
-
         default:
                 break;
         }
@@ -415,19 +404,24 @@
         if (inst->sig.ldtlb | inst->sig.ldtlbu)
                 add_read_dep(state, state->last_tlb, n);
 
-        if (inst->sig.ldvpm)
+        if (inst->sig.ldvpm) {
                 add_write_dep(state, &state->last_vpm_read, n);
 
+                /* At least for now, we're doing shared I/O segments, so queue
+                 * all writes after all reads.
+                 */
+                if (!separate_vpm_segment)
+                        add_write_dep(state, &state->last_vpm, n);
+        }
+
         /* inst->sig.ldunif or sideband uniform read */
         if (qinst->uniform != ~0)
                 add_write_dep(state, &state->last_unif, n);
 
-        process_cond_deps(state, n, inst->flags.ac);
-        process_cond_deps(state, n, inst->flags.mc);
-        process_pf_deps(state, n, inst->flags.apf);
-        process_pf_deps(state, n, inst->flags.mpf);
-        process_uf_deps(state, n, inst->flags.auf);
-        process_uf_deps(state, n, inst->flags.muf);
+        if (v3d_qpu_reads_flags(inst))
+                add_read_dep(state, state->last_sf, n);
+        if (v3d_qpu_writes_flags(inst))
+                add_write_dep(state, &state->last_sf, n);
 }
 
 static void
@@ -570,6 +564,10 @@
                 return next_score;
         next_score++;
 
+        /* XXX perf: We should schedule SFU ALU ops so that the reader is 2
+         * instructions after the producer if possible, not just 1.
+         */
+
         /* Default score for things that aren't otherwise special. */
         baseline_score = next_score;
         next_score++;
@@ -764,6 +762,12 @@
                  * sooner.  If the ldvary's r5 wasn't used, then ldunif might
                  * otherwise get scheduled so ldunif and ldvary try to update
                  * r5 in the same tick.
+                 *
+                 * XXX perf: To get good pipelining of a sequence of varying
+                 * loads, we need to figure out how to pair the ldvary signal
+                 * up to the instruction before the last r5 user in the
+                 * previous ldvary sequence.  Currently, it usually pairs with
+                 * the last r5 user.
                  */
                 if ((inst->sig.ldunif || inst->sig.ldunifa) &&
                     scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d33_tex.c mesa-19.0.1/src/broadcom/compiler/v3d33_tex.c
--- mesa-18.3.3/src/broadcom/compiler/v3d33_tex.c	2018-01-17 14:10:45.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d33_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -126,19 +126,12 @@
                 }
         }
 
-        bool return_16 = (c->key->tex[unit].return_size == 16 ||
-                          p0_unpacked.shadow);
-
         /* Limit the number of channels returned to both how many the NIR
          * instruction writes and how many the instruction could produce.
          */
-        uint32_t instr_return_channels = nir_tex_instr_dest_size(instr);
-        if (return_16)
-                instr_return_channels = (instr_return_channels + 1) / 2;
-
+        assert(instr->dest.is_ssa);
         p1_unpacked.return_words_of_texture_data =
-                (1 << MIN2(instr_return_channels,
-                           c->key->tex[unit].return_channels)) - 1;
+                nir_ssa_def_components_read(&instr->dest.ssa);
 
         uint32_t p0_packed;
         V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
@@ -193,56 +186,8 @@
 
         vir_emit_thrsw(c);
 
-        struct qreg return_values[4];
         for (int i = 0; i < 4; i++) {
-                /* Swizzling .zw of an RG texture should give undefined
-                 * results, not crash the compiler.
-                 */
                 if (p1_unpacked.return_words_of_texture_data & (1 << i))
-                        return_values[i] = vir_LDTMU(c);
-                else
-                        return_values[i] = c->undef;
-        }
-
-        for (int i = 0; i < nir_tex_instr_dest_size(instr); i++) {
-                struct qreg chan;
-
-                if (return_16) {
-                        STATIC_ASSERT(PIPE_SWIZZLE_X == 0);
-                        chan = return_values[i / 2];
-
-                        if (nir_alu_type_get_base_type(instr->dest_type) ==
-                            nir_type_float) {
-                                enum v3d_qpu_input_unpack unpack;
-                                if (i & 1)
-                                        unpack = V3D_QPU_UNPACK_H;
-                                else
-                                        unpack = V3D_QPU_UNPACK_L;
-
-                                chan = vir_FMOV(c, chan);
-                                vir_set_unpack(c->defs[chan.index], 0, unpack);
-                        } else {
-                                /* If we're unpacking the low field, shift it
-                                 * up to the top first.
-                                 */
-                                if ((i & 1) == 0) {
-                                        chan = vir_SHL(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                }
-
-                                /* Do proper sign extension to a 32-bit int. */
-                                if (nir_alu_type_get_base_type(instr->dest_type) ==
-                                    nir_type_int) {
-                                        chan = vir_ASR(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                } else {
-                                        chan = vir_SHR(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                }
-                        }
-                } else {
-                        chan = vir_MOV(c, return_values[i]);
-                }
-                ntq_store_dest(c, &instr->dest, i, chan);
+                        ntq_store_dest(c, &instr->dest, i, vir_LDTMU(c));
         }
 }
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d40_tex.c mesa-19.0.1/src/broadcom/compiler/v3d40_tex.c
--- mesa-18.3.3/src/broadcom/compiler/v3d40_tex.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d40_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -22,6 +22,7 @@
  */
 
 #include "v3d_compiler.h"
+#include "compiler/nir/nir_deref.h"
 
 /* We don't do any address packing. */
 #define __gen_user_data void
@@ -34,6 +35,9 @@
 vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val,
               int *tmu_writes)
 {
+        /* XXX perf: We should figure out how to merge ALU operations
+         * producing the val with this MOV, when possible.
+         */
         vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val);
 
         (*tmu_writes)++;
@@ -48,14 +52,19 @@
         inst->src[0] = vir_uniform(c, contents, data);
 }
 
+static const struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
+        .per_pixel_mask_enable = true,
+};
+
+static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
+        .op = V3D_TMU_OP_REGULAR,
+};
+
 void
 v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
         unsigned unit = instr->texture_index;
         int tmu_writes = 0;
-        static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
-                .op = V3D_TMU_OP_REGULAR,
-        };
 
         struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
@@ -75,6 +84,8 @@
                 .gather_component = instr->component,
 
                 .coefficient_mode = instr->op == nir_texop_txd,
+
+                .disable_autolod = instr->op == nir_texop_tg4
         };
 
         int non_array_components = instr->coord_components - instr->is_array;
@@ -116,10 +127,8 @@
                                       ntq_get_src(c, instr->src[i].src, 0),
                                       &tmu_writes);
 
-                        if (instr->op != nir_texop_txf &&
-                            instr->op != nir_texop_tg4) {
+                        if (instr->op != nir_texop_txf)
                                 p2_unpacked.disable_autolod = true;
-                        }
                         break;
 
                 case nir_tex_src_comparator:
@@ -129,14 +138,30 @@
                         break;
 
                 case nir_tex_src_offset: {
-                        nir_const_value *offset =
-                                nir_src_as_const_value(instr->src[i].src);
+                        if (nir_src_is_const(instr->src[i].src)) {
+                                nir_const_value *offset =
+                                        nir_src_as_const_value(instr->src[i].src);
+
+                                p2_unpacked.offset_s = offset->i32[0];
+                                if (instr->coord_components >= 2)
+                                        p2_unpacked.offset_t = offset->i32[1];
+                                if (instr->coord_components >= 3)
+                                        p2_unpacked.offset_r = offset->i32[2];
+                        } else {
+                                struct qreg mask = vir_uniform_ui(c, 0xf);
+                                struct qreg x, y, offset;
+
+                                x = vir_AND(c, ntq_get_src(c, instr->src[i].src,
+                                                           0), mask);
+                                y = vir_AND(c, ntq_get_src(c, instr->src[i].src,
+                                                           1), mask);
+                                offset = vir_OR(c, x,
+                                                vir_SHL(c, y,
+                                                        vir_uniform_ui(c, 4)));
 
-                        p2_unpacked.offset_s = offset->i32[0];
-                        if (instr->coord_components >= 2)
-                                p2_unpacked.offset_t = offset->i32[1];
-                        if (instr->coord_components >= 3)
-                                p2_unpacked.offset_r = offset->i32[2];
+                                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUOFF,
+                                              offset, &tmu_writes);
+                        }
                         break;
                 }
 
@@ -148,13 +173,9 @@
         /* Limit the number of channels returned to both how many the NIR
          * instruction writes and how many the instruction could produce.
          */
-        uint32_t instr_return_channels = nir_tex_instr_dest_size(instr);
-        if (!p1_unpacked.output_type_32_bit)
-                instr_return_channels = (instr_return_channels + 1) / 2;
-
+        assert(instr->dest.is_ssa);
         p0_unpacked.return_words_of_texture_data =
-                (1 << MIN2(instr_return_channels,
-                           c->key->tex[unit].return_channels)) - 1;
+                nir_ssa_def_components_read(&instr->dest.ssa);
 
         /* Word enables can't ask for more channels than the output type could
          * provide (2 for f16, 4 for 32-bit).
@@ -187,6 +208,7 @@
         p1_packed |= unit << 24;
 
         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed);
+        /* XXX perf: Can we skip p1 setup for txf ops? */
         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
         if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0)
                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);
@@ -208,56 +230,178 @@
         while (tmu_writes > 16 / c->threads)
                 c->threads /= 2;
 
-        struct qreg return_values[4];
         for (int i = 0; i < 4; i++) {
-                /* Swizzling .zw of an RG texture should give undefined
-                 * results, not crash the compiler.
-                 */
                 if (p0_unpacked.return_words_of_texture_data & (1 << i))
-                        return_values[i] = vir_LDTMU(c);
-                else
-                        return_values[i] = c->undef;
-        }
-
-        for (int i = 0; i < nir_tex_instr_dest_size(instr); i++) {
-                struct qreg chan;
-
-                if (!p1_unpacked.output_type_32_bit) {
-                        STATIC_ASSERT(PIPE_SWIZZLE_X == 0);
-                        chan = return_values[i / 2];
-
-                        if (nir_alu_type_get_base_type(instr->dest_type) ==
-                            nir_type_float) {
-                                enum v3d_qpu_input_unpack unpack;
-                                if (i & 1)
-                                        unpack = V3D_QPU_UNPACK_H;
-                                else
-                                        unpack = V3D_QPU_UNPACK_L;
+                        ntq_store_dest(c, &instr->dest, i, vir_LDTMU(c));
+        }
+}
 
-                                chan = vir_FMOV(c, chan);
-                                vir_set_unpack(c->defs[chan.index], 0, unpack);
-                        } else {
-                                /* If we're unpacking the low field, shift it
-                                 * up to the top first.
-                                 */
-                                if ((i & 1) == 0) {
-                                        chan = vir_SHL(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                }
-
-                                /* Do proper sign extension to a 32-bit int. */
-                                if (nir_alu_type_get_base_type(instr->dest_type) ==
-                                    nir_type_int) {
-                                        chan = vir_ASR(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                } else {
-                                        chan = vir_SHR(c, chan,
-                                                       vir_uniform_ui(c, 16));
-                                }
-                        }
-                } else {
-                        chan = vir_MOV(c, return_values[i]);
+static void
+type_size_align_1(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+        *size = 1;
+        *align = 1;
+}
+
+void
+v3d40_vir_emit_image_load_store(struct v3d_compile *c,
+                                nir_intrinsic_instr *instr)
+{
+        nir_variable *var = nir_intrinsic_get_var(instr, 0);
+        const struct glsl_type *sampler_type = glsl_without_array(var->type);
+        unsigned unit = (var->data.driver_location +
+                         nir_deref_instr_get_const_offset(nir_src_as_deref(instr->src[0]),
+                                                          type_size_align_1));
+        int tmu_writes = 0;
+
+        struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
+        };
+
+        struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+                .per_pixel_mask_enable = true,
+                .output_type_32_bit = v3d_gl_format_is_return_32(var->data.image.format),
+        };
+
+        struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
+
+        /* XXX perf: We should turn add/sub of 1 to inc/dec.  Perhaps NIR
+         * wants to have support for inc/dec?
+         */
+        switch (instr->intrinsic) {
+        case nir_intrinsic_image_deref_load:
+        case nir_intrinsic_image_deref_store:
+                p2_unpacked.op = V3D_TMU_OP_REGULAR;
+                break;
+        case nir_intrinsic_image_deref_atomic_add:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_ADD_READ_PREFETCH;
+                break;
+        case nir_intrinsic_image_deref_atomic_min:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR;
+                break;
+
+        case nir_intrinsic_image_deref_atomic_max:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_UMAX;
+                break;
+        case nir_intrinsic_image_deref_atomic_and:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_AND_READ_INC;
+                break;
+        case nir_intrinsic_image_deref_atomic_or:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_OR_READ_DEC;
+                break;
+        case nir_intrinsic_image_deref_atomic_xor:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_XOR_READ_NOT;
+                break;
+        case nir_intrinsic_image_deref_atomic_exchange:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_XCHG_READ_FLUSH;
+                break;
+        case nir_intrinsic_image_deref_atomic_comp_swap:
+                p2_unpacked.op = V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH;
+                break;
+        default:
+                unreachable("unknown image intrinsic");
+        };
+
+        bool is_1d = false;
+        switch (glsl_get_sampler_dim(sampler_type)) {
+        case GLSL_SAMPLER_DIM_1D:
+                is_1d = true;
+                break;
+        case GLSL_SAMPLER_DIM_BUF:
+                break;
+        case GLSL_SAMPLER_DIM_2D:
+        case GLSL_SAMPLER_DIM_RECT:
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT,
+                              ntq_get_src(c, instr->src[1], 1), &tmu_writes);
+                break;
+        case GLSL_SAMPLER_DIM_3D:
+        case GLSL_SAMPLER_DIM_CUBE:
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT,
+                              ntq_get_src(c, instr->src[1], 1), &tmu_writes);
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUR,
+                              ntq_get_src(c, instr->src[1], 2), &tmu_writes);
+                break;
+        default:
+                unreachable("bad image sampler dim");
+        }
+
+        if (glsl_sampler_type_is_array(sampler_type)) {
+                vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI,
+                              ntq_get_src(c, instr->src[1],
+                                          is_1d ? 1 : 2), &tmu_writes);
+        }
+
+        /* Limit the number of channels returned to both how many the NIR
+         * instruction writes and how many the instruction could produce.
+         */
+        uint32_t instr_return_channels = nir_intrinsic_dest_components(instr);
+        if (!p1_unpacked.output_type_32_bit)
+                instr_return_channels = (instr_return_channels + 1) / 2;
+
+        p0_unpacked.return_words_of_texture_data =
+                (1 << instr_return_channels) - 1;
+
+        uint32_t p0_packed;
+        V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
+                                          (uint8_t *)&p0_packed,
+                                          &p0_unpacked);
+
+        uint32_t p1_packed;
+        V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                                          (uint8_t *)&p1_packed,
+                                          &p1_unpacked);
+
+        uint32_t p2_packed;
+        V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
+                                          (uint8_t *)&p2_packed,
+                                          &p2_unpacked);
+
+        /* Load unit number into the high bits of the texture or sampler
+         * address field, which will be be used by the driver to decide which
+         * texture to put in the actual address field.
+         */
+        p0_packed |= unit << 24;
+
+        vir_WRTMUC(c, QUNIFORM_IMAGE_TMU_CONFIG_P0, p0_packed);
+        if (memcmp(&p1_unpacked, &p1_unpacked_default, sizeof(p1_unpacked)) != 0)
+                vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed);
+        if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0)
+                vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);
+
+        /* Emit the data writes for atomics or image store. */
+        if (instr->intrinsic != nir_intrinsic_image_deref_load) {
+                /* Vector for stores, or first atomic argument */
+                struct qreg src[4];
+                for (int i = 0; i < nir_intrinsic_src_components(instr, 3); i++) {
+                        src[i] = ntq_get_src(c, instr->src[3], i);
+                        vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUD, src[i],
+                                      &tmu_writes);
                 }
-                ntq_store_dest(c, &instr->dest, i, chan);
+
+                /* Second atomic argument */
+                if (instr->intrinsic ==
+                    nir_intrinsic_image_deref_atomic_comp_swap) {
+                        vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUD,
+                                      ntq_get_src(c, instr->src[4], 0),
+                                      &tmu_writes);
+                }
+        }
+
+        vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, ntq_get_src(c, instr->src[1], 0),
+                      &tmu_writes);
+
+        vir_emit_thrsw(c);
+
+        /* The input FIFO has 16 slots across all threads, so make sure we
+         * don't overfill our allocation.
+         */
+        while (tmu_writes > 16 / c->threads)
+                c->threads /= 2;
+
+        for (int i = 0; i < 4; i++) {
+                if (p0_unpacked.return_words_of_texture_data & (1 << i))
+                        ntq_store_dest(c, &instr->dest, i, vir_LDTMU(c));
         }
+
+        if (nir_intrinsic_dest_components(instr) == 0)
+                vir_TMUWT(c);
 }
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d_compiler.h mesa-19.0.1/src/broadcom/compiler/v3d_compiler.h
--- mesa-18.3.3/src/broadcom/compiler/v3d_compiler.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d_compiler.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,6 +34,7 @@
 #include "util/macros.h"
 #include "common/v3d_debug.h"
 #include "common/v3d_device_info.h"
+#include "common/v3d_limits.h"
 #include "compiler/nir/nir.h"
 #include "util/list.h"
 #include "util/u_math.h"
@@ -41,11 +42,6 @@
 #include "qpu/qpu_instr.h"
 #include "pipe/p_state.h"
 
-#define V3D_MAX_TEXTURE_SAMPLERS 32
-#define V3D_MAX_SAMPLES 4
-#define V3D_MAX_FS_INPUTS 64
-#define V3D_MAX_VS_INPUTS 64
-
 struct nir_builder;
 
 struct v3d_fs_inputs {
@@ -223,13 +219,15 @@
          */
         QUNIFORM_TEXTURE_CONFIG_P1,
 
-        /* A a V3D 4.x texture config parameter.  The high 8 bits will be
+        /* A V3D 4.x texture config parameter.  The high 8 bits will be
          * which texture or sampler is being sampled, and the driver must
          * replace the address field with the appropriate address.
          */
         QUNIFORM_TMU_CONFIG_P0,
         QUNIFORM_TMU_CONFIG_P1,
 
+        QUNIFORM_IMAGE_TMU_CONFIG_P0,
+
         QUNIFORM_TEXTURE_FIRST_LEVEL,
 
         QUNIFORM_TEXTURE_WIDTH,
@@ -243,18 +241,55 @@
         QUNIFORM_TEXRECT_SCALE_X,
         QUNIFORM_TEXRECT_SCALE_Y,
 
-        QUNIFORM_TEXTURE_BORDER_COLOR,
+        /* Returns the base offset of the SSBO given by the data value. */
+        QUNIFORM_SSBO_OFFSET,
+
+        /* Returns the size of the SSBO given by the data value. */
+        QUNIFORM_GET_BUFFER_SIZE,
+
+        /* Sizes (in pixels) of a shader image given by the data value. */
+        QUNIFORM_IMAGE_WIDTH,
+        QUNIFORM_IMAGE_HEIGHT,
+        QUNIFORM_IMAGE_DEPTH,
+        QUNIFORM_IMAGE_ARRAY_SIZE,
 
         QUNIFORM_ALPHA_REF,
-        QUNIFORM_SAMPLE_MASK,
+
+        /* Number of workgroups passed to glDispatchCompute in the dimension
+         * selected by the data value.
+         */
+        QUNIFORM_NUM_WORK_GROUPS,
 
         /**
          * Returns the the offset of the scratch buffer for register spilling.
          */
         QUNIFORM_SPILL_OFFSET,
         QUNIFORM_SPILL_SIZE_PER_THREAD,
+
+        /**
+         * Returns the offset of the shared memory for compute shaders.
+         *
+         * This will be accessed using TMU general memory operations, so the
+         * L2T cache will effectively be the shared memory area.
+         */
+        QUNIFORM_SHARED_OFFSET,
 };
 
+static inline uint32_t v3d_tmu_config_data_create(uint32_t unit, uint32_t value)
+{
+        return unit << 24 | value;
+}
+
+static inline uint32_t v3d_tmu_config_data_get_unit(uint32_t data)
+{
+        return data >> 24;
+}
+
+static inline uint32_t v3d_tmu_config_data_get_value(uint32_t data)
+{
+        return data & 0xffffff;
+}
+
 struct v3d_varying_slot {
         uint8_t slot_and_component;
 };
@@ -301,8 +336,6 @@
                 uint8_t swizzle[4];
                 uint8_t return_size;
                 uint8_t return_channels;
-                unsigned compare_mode:1;
-                unsigned compare_func:3;
                 bool clamp_s:1;
                 bool clamp_t:1;
                 bool clamp_r:1;
@@ -447,6 +480,10 @@
         struct exec_list *cf_node_list;
         const struct v3d_compiler *compiler;
 
+        void (*debug_output)(const char *msg,
+                             void *debug_output_data);
+        void *debug_output_data;
+
         /**
          * Mapping from nir_register * or nir_ssa_def * to array of struct
          * qreg for the values.
@@ -482,6 +519,7 @@
         uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
 
         bool uses_center_w;
+        bool writes_z;
 
         struct v3d_ubo_range *ubo_ranges;
         bool *ubo_range_used;
@@ -494,6 +532,7 @@
          * yes, otherwise a block number + 1 that the channel jumped to.
          */
         struct qreg execute;
+        bool in_control_flow;
 
         struct qreg line_x, point_x, point_y;
 
@@ -512,15 +551,19 @@
         /* Fragment shader payload regs. */
         struct qreg payload_w, payload_w_centroid, payload_z;
 
-        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
+        struct qreg cs_payload[2];
+        struct qreg cs_shared_offset;
+        int local_invocation_index_bits;
+
+        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
         uint32_t num_vpm_writes;
 
         /* Size in bytes of registers that have been spilled. This is how much
          * space needs to be available in the spill BO per thread per QPU.
          */
         uint32_t spill_size;
-        /* Shader-db stats for register spilling. */
-        uint32_t spills, fills;
+        /* Shader-db stats */
+        uint32_t spills, fills, loops;
         /**
          * Register spilling's per-thread base address, shared between each
          * spill/fill's addressing calculations.
@@ -641,7 +684,7 @@
         bool uses_iid, uses_vid;
 
         /* Number of components read from each vertex attribute. */
-        uint8_t vattr_sizes[32];
+        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
 
         /* Total number of components read, for the shader state record. */
         uint32_t vpm_input_size;
@@ -649,6 +692,11 @@
         /* Total number of components written, for the shader state record. */
         uint32_t vpm_output_size;
 
+        /* Set if there should be separate VPM segments for input and output.
+         * If unset, vpm_input_size will be 0.
+         */
+        bool separate_segments;
+
         /* Value to be programmed in VCM_CACHE_SIZE. */
         uint8_t vcm_cache_size;
 };
@@ -670,7 +718,7 @@
         uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
 
         bool writes_z;
-        bool discard;
+        bool disable_ez;
         bool uses_center_w;
 };
 
@@ -687,19 +735,15 @@
 void v3d_compiler_free(const struct v3d_compiler *compiler);
 void v3d_optimize_nir(struct nir_shader *s);
 
-uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
-                         struct v3d_vs_key *key,
-                         struct v3d_vs_prog_data *prog_data,
-                         nir_shader *s,
-                         int program_id, int variant_id,
-                         uint32_t *final_assembly_size);
-
-uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
-                         struct v3d_fs_key *key,
-                         struct v3d_fs_prog_data *prog_data,
-                         nir_shader *s,
-                         int program_id, int variant_id,
-                         uint32_t *final_assembly_size);
+uint64_t *v3d_compile(const struct v3d_compiler *compiler,
+                      struct v3d_key *key,
+                      struct v3d_prog_data **prog_data,
+                      nir_shader *s,
+                      void (*debug_output)(const char *msg,
+                                           void *debug_output_data),
+                      void *debug_output_data,
+                      int program_id, int variant_id,
+                      uint32_t *final_assembly_size);
 
 void v3d_nir_to_vir(struct v3d_compile *c);
 
@@ -726,6 +770,7 @@
 struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
 void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
 void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
+void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf);
 void vir_set_unpack(struct qinst *inst, int src,
                     enum v3d_qpu_input_unpack unpack);
 
@@ -744,7 +789,6 @@
 bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
 bool vir_is_float_input(struct qinst *inst);
-bool vir_depends_on_flags(struct qinst *inst);
 bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
@@ -756,6 +800,7 @@
 
 void vir_dump(struct v3d_compile *c);
 void vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
+void vir_dump_uniform(enum quniform_contents contents, uint32_t data);
 
 void vir_validate(struct v3d_compile *c);
 
@@ -770,12 +815,15 @@
 void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
+void v3d_nir_lower_image_load_store(nir_shader *s);
 void vir_lower_uniforms(struct v3d_compile *c);
 
 void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
 void v3d33_vir_vpm_write_setup(struct v3d_compile *c);
 void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
 void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
+void v3d40_vir_emit_image_load_store(struct v3d_compile *c,
+                                     nir_intrinsic_instr *instr);
 
 void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
 uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
@@ -783,6 +831,8 @@
 struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled);
 bool vir_init_reg_sets(struct v3d_compiler *compiler);
 
+bool v3d_gl_format_is_return_32(GLenum format);
+
 void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
 
 static inline bool
@@ -951,6 +1001,7 @@
 VIR_A_ALU0(YCD)
 VIR_A_ALU0(MSF)
 VIR_A_ALU0(REVF)
+VIR_A_ALU0(BARRIERID)
 VIR_A_NODST_1(VPMSETUP)
 VIR_A_NODST_0(VPMWT)
 VIR_A_ALU2(FCMP)
@@ -1060,7 +1111,7 @@
 */
 
 static inline struct qinst *
-vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond)
+vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
 {
         /* The actual uniform_data value will be set at scheduling time */
         return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0)));
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_image_load_store.c mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
--- mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_image_load_store.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_image_load_store.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,390 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ * Copyright © 2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3d_compiler.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
+
+/** @file v3d_nir_lower_image_load_store.c
+ *
+ * Performs any necessary lowering of GL_ARB_shader_image_load_store
+ * operations.
+ *
+ * On V3D 4.x, we just need to do format conversion for stores such that the
+ * GPU can effectively memcpy the arguments (in increments of 32-bit words)
+ * into the texel.  Loads are the same as texturing, where we may need to
+ * unpack from 16-bit ints or floats.
+ *
+ * On V3D 3.x, to implement image load store we would need to do manual tiling
+ * calculations and load/store using the TMU general memory access path.
+ */
+
+bool
+v3d_gl_format_is_return_32(GLenum format)
+{
+        switch (format) {
+        case GL_R8:
+        case GL_R8_SNORM:
+        case GL_R8UI:
+        case GL_R8I:
+        case GL_RG8:
+        case GL_RG8_SNORM:
+        case GL_RG8UI:
+        case GL_RG8I:
+        case GL_RGBA8:
+        case GL_RGBA8_SNORM:
+        case GL_RGBA8UI:
+        case GL_RGBA8I:
+        case GL_R11F_G11F_B10F:
+        case GL_RGB10_A2:
+        case GL_RGB10_A2UI:
+        case GL_R16F:
+        case GL_R16UI:
+        case GL_R16I:
+        case GL_RG16F:
+        case GL_RG16UI:
+        case GL_RG16I:
+        case GL_RGBA16F:
+        case GL_RGBA16UI:
+        case GL_RGBA16I:
+                return false;
+        case GL_R16:
+        case GL_R16_SNORM:
+        case GL_RG16:
+        case GL_RG16_SNORM:
+        case GL_RGBA16:
+        case GL_RGBA16_SNORM:
+        case GL_R32F:
+        case GL_R32UI:
+        case GL_R32I:
+        case GL_RG32F:
+        case GL_RG32UI:
+        case GL_RG32I:
+        case GL_RGBA32F:
+        case GL_RGBA32UI:
+        case GL_RGBA32I:
+                return true;
+        default:
+                unreachable("Invalid image format");
+        }
+}
+
+/* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
+ * 32-bit SSA value, with as many channels as necessary to store all the bits
+ */
+static nir_ssa_def *
+pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+          int num_components, bool mask)
+{
+        nir_ssa_def *results[4];
+        int offset = 0;
+        for (int i = 0; i < num_components; i++) {
+                nir_ssa_def *chan = nir_channel(b, color, i);
+
+                /* Channels being stored shouldn't cross a 32-bit boundary. */
+                assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
+
+                if (mask) {
+                        chan = nir_iand(b, chan,
+                                        nir_imm_int(b, (1 << bits[i]) - 1));
+                }
+
+                if (offset % 32 == 0) {
+                        results[offset / 32] = chan;
+                } else {
+                        results[offset / 32] =
+                                nir_ior(b, results[offset / 32],
+                                        nir_ishl(b, chan,
+                                                 nir_imm_int(b, offset % 32)));
+                }
+                offset += bits[i];
+        }
+
+        return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
+}
+
+static nir_ssa_def *
+pack_unorm(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+           int num_components)
+{
+        color = nir_channels(b, color, (1 << num_components) - 1);
+        color = nir_format_float_to_unorm(b, color, bits);
+        return pack_bits(b, color, bits, color->num_components, false);
+}
+
+static nir_ssa_def *
+pack_snorm(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+           int num_components)
+{
+        color = nir_channels(b, color, (1 << num_components) - 1);
+        color = nir_format_float_to_snorm(b, color, bits);
+        return pack_bits(b, color, bits, color->num_components, true);
+}
+
+static nir_ssa_def *
+pack_uint(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+          int num_components)
+{
+        color = nir_channels(b, color, (1 << num_components) - 1);
+        color = nir_format_clamp_uint(b, color, bits);
+        return pack_bits(b, color, bits, num_components, false);
+}
+
+static nir_ssa_def *
+pack_sint(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+          int num_components)
+{
+        color = nir_channels(b, color, (1 << num_components) - 1);
+        color = nir_format_clamp_sint(b, color, bits);
+        return pack_bits(b, color, bits, num_components, true);
+}
+
+static nir_ssa_def *
+pack_half(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
+          int num_components)
+{
+        color = nir_channels(b, color, (1 << num_components) - 1);
+        color = nir_format_float_to_half(b, color);
+        return pack_bits(b, color, bits, color->num_components, false);
+}
+
+static void
+v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
+{
+        nir_variable *var = nir_intrinsic_get_var(instr, 0);
+        GLenum format = var->data.image.format;
+        static const unsigned bits_8[4] = {8, 8, 8, 8};
+        static const unsigned bits_16[4] = {16, 16, 16, 16};
+        static const unsigned bits_1010102[4] = {10, 10, 10, 2};
+
+        b->cursor = nir_before_instr(&instr->instr);
+
+        nir_ssa_def *unformatted = nir_ssa_for_src(b, instr->src[3], 4);
+        nir_ssa_def *formatted = NULL;
+        switch (format) {
+        case GL_RGBA32F:
+        case GL_RGBA32UI:
+        case GL_RGBA32I:
+                /* For 4-component 32-bit components, there's no packing to be
+                 * done.
+                 */
+                return;
+
+        case GL_R32F:
+        case GL_R32UI:
+        case GL_R32I:
+                /* For other 32-bit components, just reduce the size of
+                 * the input vector.
+                 */
+                formatted = nir_channels(b, unformatted, 1);
+                break;
+        case GL_RG32F:
+        case GL_RG32UI:
+        case GL_RG32I:
+                formatted = nir_channels(b, unformatted, 2);
+                break;
+
+        case GL_R8:
+                formatted = pack_unorm(b, unformatted, bits_8, 1);
+                break;
+        case GL_RG8:
+                formatted = pack_unorm(b, unformatted, bits_8, 2);
+                break;
+        case GL_RGBA8:
+                formatted = pack_unorm(b, unformatted, bits_8, 4);
+                break;
+
+        case GL_R8_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_8, 1);
+                break;
+        case GL_RG8_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_8, 2);
+                break;
+        case GL_RGBA8_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_8, 4);
+                break;
+
+        case GL_R16:
+                formatted = pack_unorm(b, unformatted, bits_16, 1);
+                break;
+        case GL_RG16:
+                formatted = pack_unorm(b, unformatted, bits_16, 2);
+                break;
+        case GL_RGBA16:
+                formatted = pack_unorm(b, unformatted, bits_16, 4);
+                break;
+
+        case GL_R16_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_16, 1);
+                break;
+        case GL_RG16_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_16, 2);
+                break;
+        case GL_RGBA16_SNORM:
+                formatted = pack_snorm(b, unformatted, bits_16, 4);
+                break;
+
+        case GL_R16F:
+                formatted = pack_half(b, unformatted, bits_16, 1);
+                break;
+        case GL_RG16F:
+                formatted = pack_half(b, unformatted, bits_16, 2);
+                break;
+        case GL_RGBA16F:
+                formatted = pack_half(b, unformatted, bits_16, 4);
+                break;
+
+        case GL_R8UI:
+                formatted = pack_uint(b, unformatted, bits_8, 1);
+                break;
+        case GL_R8I:
+                formatted = pack_sint(b, unformatted, bits_8, 1);
+                break;
+        case GL_RG8UI:
+                formatted = pack_uint(b, unformatted, bits_8, 2);
+                break;
+        case GL_RG8I:
+                formatted = pack_sint(b, unformatted, bits_8, 2);
+                break;
+        case GL_RGBA8UI:
+                formatted = pack_uint(b, unformatted, bits_8, 4);
+                break;
+        case GL_RGBA8I:
+                formatted = pack_sint(b, unformatted, bits_8, 4);
+                break;
+
+        case GL_R16UI:
+                formatted = pack_uint(b, unformatted, bits_16, 1);
+                break;
+        case GL_R16I:
+                formatted = pack_sint(b, unformatted, bits_16, 1);
+                break;
+        case GL_RG16UI:
+                formatted = pack_uint(b, unformatted, bits_16, 2);
+                break;
+        case GL_RG16I:
+                formatted = pack_sint(b, unformatted, bits_16, 2);
+                break;
+        case GL_RGBA16UI:
+                formatted = pack_uint(b, unformatted, bits_16, 4);
+                break;
+        case GL_RGBA16I:
+                formatted = pack_sint(b, unformatted, bits_16, 4);
+                break;
+
+        case GL_R11F_G11F_B10F:
+                formatted = nir_format_pack_11f11f10f(b, unformatted);
+                break;
+        case GL_RGB9_E5:
+                formatted = nir_format_pack_r9g9b9e5(b, unformatted);
+                break;
+
+        case GL_RGB10_A2:
+                formatted = pack_unorm(b, unformatted, bits_1010102, 4);
+                break;
+
+        case GL_RGB10_A2UI:
+                formatted = pack_uint(b, unformatted, bits_1010102, 4);
+                break;
+
+        default:
+                unreachable("bad format");
+        }
+
+        nir_instr_rewrite_src(&instr->instr, &instr->src[3],
+                              nir_src_for_ssa(formatted));
+        instr->num_components = formatted->num_components;
+}
+
+static void
+v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
+{
+        static const unsigned bits16[] = {16, 16, 16, 16};
+        nir_variable *var = nir_intrinsic_get_var(instr, 0);
+        const struct glsl_type *sampler_type = glsl_without_array(var->type);
+        enum glsl_base_type base_type =
+                glsl_get_sampler_result_type(sampler_type);
+
+        if (v3d_gl_format_is_return_32(var->data.image.format))
+                return;
+
+        b->cursor = nir_after_instr(&instr->instr);
+
+        assert(instr->dest.is_ssa);
+        nir_ssa_def *result = &instr->dest.ssa;
+        if (base_type == GLSL_TYPE_FLOAT) {
+            nir_ssa_def *rg = nir_channel(b, result, 0);
+            nir_ssa_def *ba = nir_channel(b, result, 1);
+            result = nir_vec4(b,
+                              nir_unpack_half_2x16_split_x(b, rg),
+                              nir_unpack_half_2x16_split_y(b, rg),
+                              nir_unpack_half_2x16_split_x(b, ba),
+                              nir_unpack_half_2x16_split_y(b, ba));
+        } else if (base_type == GLSL_TYPE_INT) {
+                result = nir_format_unpack_sint(b, result, bits16, 4);
+        } else {
+                assert(base_type == GLSL_TYPE_UINT);
+                result = nir_format_unpack_uint(b, result, bits16, 4);
+        }
+
+        nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, nir_src_for_ssa(result),
+                                       result->parent_instr);
+}
+
+void
+v3d_nir_lower_image_load_store(nir_shader *s)
+{
+        nir_foreach_function(function, s) {
+                if (!function->impl)
+                        continue;
+
+                nir_builder b;
+                nir_builder_init(&b, function->impl);
+
+                nir_foreach_block(block, function->impl) {
+                        nir_foreach_instr_safe(instr, block) {
+                                if (instr->type != nir_instr_type_intrinsic)
+                                        continue;
+
+                                nir_intrinsic_instr *intr =
+                                        nir_instr_as_intrinsic(instr);
+
+                                switch (intr->intrinsic) {
+                                case nir_intrinsic_image_deref_load:
+                                        v3d_nir_lower_image_load(&b, intr);
+                                        break;
+                                case nir_intrinsic_image_deref_store:
+                                        v3d_nir_lower_image_store(&b, intr);
+                                        break;
+                                default:
+                                        break;
+                                }
+                        }
+                }
+
+                nir_metadata_preserve(function->impl,
+                                      nir_metadata_block_index |
+                                      nir_metadata_dominance);
+        }
+}
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_io.c mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_io.c
--- mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_io.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_io.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,61 +28,26 @@
  * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
  * intrinsics into something amenable to the V3D architecture.
  *
- * Currently, it splits VS inputs and uniforms into scalars, drops any
- * non-position outputs in coordinate shaders, and fixes up the addressing on
- * indirect uniform loads.  FS input and VS output scalarization is handled by
- * nir_lower_io_to_scalar().
+ * After moving more and more logic to NIR, all that's left here is fixing up
+ * addressing on uniform loads.  FS input and VS output scalarization is
+ * handled by nir_lower_io_to_scalar().
  */
 
-static void
-replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr,
-                           nir_ssa_def **comps)
-{
-
-        /* Batch things back together into a vector.  This will get split by
-         * the later ALU scalarization pass.
-         */
-        nir_ssa_def *vec = nir_vec(b, comps, intr->num_components);
-
-        /* Replace the old intrinsic with a reference to our reconstructed
-         * vector.
-         */
-        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec));
-        nir_instr_remove(&intr->instr);
-}
-
+/* Convert the uniform offset to bytes.  If it happens to be a constant,
+ * constant-folding will clean up the shift for us.
+ */
 static void
 v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
                       nir_intrinsic_instr *intr)
 {
         b->cursor = nir_before_instr(&intr->instr);
 
-        /* Generate scalar loads equivalent to the original vector. */
-        nir_ssa_def *dests[4];
-        for (unsigned i = 0; i < intr->num_components; i++) {
-                nir_intrinsic_instr *intr_comp =
-                        nir_intrinsic_instr_create(c->s, intr->intrinsic);
-                intr_comp->num_components = 1;
-                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
-
-                /* Convert the uniform offset to bytes.  If it happens
-                 * to be a constant, constant-folding will clean up
-                 * the shift for us.
-                 */
-                nir_intrinsic_set_base(intr_comp,
-                                       nir_intrinsic_base(intr) * 16 +
-                                       i * 4);
-
-                intr_comp->src[0] =
-                        nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
-                                                 nir_imm_int(b, 4)));
+        nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
 
-                dests[i] = &intr_comp->dest.ssa;
-
-                nir_builder_instr_insert(b, &intr_comp->instr);
-        }
-
-        replace_intrinsic_with_vec(b, intr, dests);
+        nir_instr_rewrite_src(&intr->instr,
+                              &intr->src[0],
+                              nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
+                                                       nir_imm_int(b, 4))));
 }
 
 static void
@@ -94,14 +59,10 @@
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
         switch (intr->intrinsic) {
-        case nir_intrinsic_load_input:
-                break;
-
         case nir_intrinsic_load_uniform:
                 v3d_nir_lower_uniform(c, b, intr);
                 break;
 
-        case nir_intrinsic_load_user_clip_plane:
         default:
                 break;
         }
diff -Nru mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_txf_ms.c mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_txf_ms.c
--- mesa-18.3.3/src/broadcom/compiler/v3d_nir_lower_txf_ms.c	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/v3d_nir_lower_txf_ms.c	2019-03-31 23:16:37.000000000 +0000
@@ -49,14 +49,16 @@
         nir_ssa_def *sample = instr->src[sample_index].src.ssa;
 
         nir_ssa_def *one = nir_imm_int(b, 1);
-        coord = nir_ishl(b, coord, nir_imm_int(b, 1));
-        coord = nir_vec2(b,
-                         nir_iadd(b,
-                                  nir_channel(b, coord, 0),
-                                  nir_iand(b, sample, one)),
-                         nir_iadd(b,
-                                  nir_channel(b, coord, 1),
-                                  nir_iand(b, nir_ushr(b, sample, one), one)));
+        nir_ssa_def *x = nir_iadd(b,
+                                  nir_ishl(b, nir_channel(b, coord, 0), one),
+                                  nir_iand(b, sample, one));
+        nir_ssa_def *y = nir_iadd(b,
+                                  nir_ishl(b, nir_channel(b, coord, 1), one),
+                                  nir_iand(b, nir_ushr(b, sample, one), one));
+        if (instr->is_array)
+                coord = nir_vec3(b, x, y, nir_channel(b, coord, 2));
+        else
+                coord = nir_vec2(b, x, y);
 
         nir_instr_rewrite_src(&instr->instr,
                               &instr->src[nir_tex_src_coord].src,
diff -Nru mesa-18.3.3/src/broadcom/compiler/vir.c mesa-19.0.1/src/broadcom/compiler/vir.c
--- mesa-18.3.3/src/broadcom/compiler/vir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/vir.c	2019-03-31 23:16:37.000000000 +0000
@@ -61,6 +61,16 @@
                 switch (inst->dst.file) {
                 case QFILE_TLBU:
                         return true;
+                case QFILE_MAGIC:
+                        switch (inst->dst.index) {
+                        case V3D_QPU_WADDR_TLBU:
+                        case V3D_QPU_WADDR_TMUAU:
+                        case V3D_QPU_WADDR_SYNCU:
+                                return true;
+                        default:
+                                break;
+                        }
+                        break;
                 default:
                         return inst->has_implicit_uniform;
                 }
@@ -204,17 +214,6 @@
 }
 
 bool
-vir_depends_on_flags(struct qinst *inst)
-{
-        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
-                return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
-        } else {
-                return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
-                        inst->qpu.flags.mc != V3D_QPU_COND_NONE);
-        }
-}
-
-bool
 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
 {
         for (int i = 0; i < vir_get_nsrc(inst); i++) {
@@ -302,6 +301,17 @@
         }
 }
 
+void
+vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
+{
+        if (vir_is_add(inst)) {
+                inst->qpu.flags.auf = uf;
+        } else {
+                assert(vir_is_mul(inst));
+                inst->qpu.flags.muf = uf;
+        }
+}
+
 #if 0
 uint8_t
 vir_channels_written(struct qinst *inst)
@@ -558,6 +568,9 @@
 vir_compile_init(const struct v3d_compiler *compiler,
                  struct v3d_key *key,
                  nir_shader *s,
+                 void (*debug_output)(const char *msg,
+                                      void *debug_output_data),
+                 void *debug_output_data,
                  int program_id, int variant_id)
 {
         struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
@@ -568,6 +581,8 @@
         c->program_id = program_id;
         c->variant_id = variant_id;
         c->threads = 4;
+        c->debug_output = debug_output;
+        c->debug_output_data = debug_output_data;
 
         s = nir_shader_clone(c, s);
         c->s = s;
@@ -596,6 +611,8 @@
 {
         struct nir_lower_tex_options tex_options = {
                 .lower_txd = true,
+                .lower_tg4_broadcom_swizzle = true,
+
                 .lower_rect = false, /* XXX: Use this on V3D 3.x */
                 .lower_txp = ~0,
                 /* Apply swizzles to all samplers. */
@@ -615,17 +632,14 @@
                         tex_options.saturate_t |= 1 << i;
                 if (c->key->tex[i].clamp_r)
                         tex_options.saturate_r |= 1 << i;
+                if (c->key->tex[i].return_size == 16) {
+                        tex_options.lower_tex_packing[i] =
+                                nir_lower_tex_packing_16;
+                }
         }
 
         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
-}
-
-static void
-v3d_lower_nir_late(struct v3d_compile *c)
-{
-        NIR_PASS_V(c->s, v3d_nir_lower_io, c);
-        NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
-        NIR_PASS_V(c->s, nir_lower_idiv);
+        NIR_PASS_V(c->s, nir_lower_system_values);
 }
 
 static void
@@ -680,87 +694,9 @@
 }
 
 static void
-v3d_set_prog_data(struct v3d_compile *c,
-                  struct v3d_prog_data *prog_data)
-{
-        prog_data->threads = c->threads;
-        prog_data->single_seg = !c->last_thrsw;
-        prog_data->spill_size = c->spill_size;
-
-        v3d_set_prog_data_uniforms(c, prog_data);
-        v3d_set_prog_data_ubo(c, prog_data);
-}
-
-static uint64_t *
-v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
-{
-        *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
-
-        uint64_t *qpu_insts = malloc(*final_assembly_size);
-        if (!qpu_insts)
-                return NULL;
-
-        memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
-
-        vir_compile_destroy(c);
-
-        return qpu_insts;
-}
-
-uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
-                         struct v3d_vs_key *key,
-                         struct v3d_vs_prog_data *prog_data,
-                         nir_shader *s,
-                         int program_id, int variant_id,
-                         uint32_t *final_assembly_size)
+v3d_vs_set_prog_data(struct v3d_compile *c,
+                     struct v3d_vs_prog_data *prog_data)
 {
-        struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
-                                                 program_id, variant_id);
-
-        c->vs_key = key;
-
-        /* Split our I/O vars and dead code eliminate the unused
-         * components.
-         */
-        NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
-                   nir_var_shader_in | nir_var_shader_out);
-        uint64_t used_outputs[4] = {0};
-        for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
-                int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
-                int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
-                used_outputs[comp] |= 1ull << slot;
-        }
-        NIR_PASS_V(c->s, nir_remove_unused_io_vars,
-                   &c->s->outputs, used_outputs, NULL); /* demotes to globals */
-        NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
-        v3d_optimize_nir(c->s);
-        NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
-        NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
-                   type_size_vec4,
-                   (nir_lower_io_options)0);
-
-        v3d_lower_nir(c);
-
-        if (key->clamp_color)
-                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
-
-        if (key->base.ucp_enables) {
-                NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
-                NIR_PASS_V(c->s, nir_lower_io_to_scalar,
-                           nir_var_shader_out);
-        }
-
-        /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
-        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
-
-        v3d_lower_nir_late(c);
-        v3d_optimize_nir(c->s);
-        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
-
-        v3d_nir_to_vir(c);
-
-        v3d_set_prog_data(c, &prog_data->base);
-
         prog_data->base.num_inputs = c->num_inputs;
 
         /* The vertex data gets format converted by the VPM so that
@@ -772,9 +708,9 @@
                 prog_data->vpm_input_size += c->vattr_sizes[i];
         }
 
-        prog_data->uses_vid = (s->info.system_values_read &
+        prog_data->uses_vid = (c->s->info.system_values_read &
                                (1ull << SYSTEM_VALUE_VERTEX_ID));
-        prog_data->uses_iid = (s->info.system_values_read &
+        prog_data->uses_iid = (c->s->info.system_values_read &
                                (1ull << SYSTEM_VALUE_INSTANCE_ID));
 
         if (prog_data->uses_vid)
@@ -788,6 +724,14 @@
         prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
         prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
 
+        /* Set us up for shared input/output segments.  This is apparently
+         * necessary for our VCM setup to avoid varying corruption.
+         */
+        prog_data->separate_segments = false;
+        prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
+                                          prog_data->vpm_input_size);
+        prog_data->vpm_input_size = 0;
+
         /* Compute VCM cache size.  We set up our program to take up less than
          * half of the VPM, so that any set of bin and render programs won't
          * run out of space.  We need space for at least one input segment,
@@ -804,8 +748,6 @@
         int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
         assert(vpm_output_batches >= 2);
         prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
-
-        return v3d_return_qpu_insts(c, final_assembly_size);
 }
 
 static void
@@ -831,6 +773,75 @@
 }
 
 static void
+v3d_fs_set_prog_data(struct v3d_compile *c,
+                     struct v3d_fs_prog_data *prog_data)
+{
+        v3d_set_fs_prog_data_inputs(c, prog_data);
+        prog_data->writes_z = c->writes_z;
+        prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
+        prog_data->uses_center_w = c->uses_center_w;
+}
+
+static void
+v3d_set_prog_data(struct v3d_compile *c,
+                  struct v3d_prog_data *prog_data)
+{
+        prog_data->threads = c->threads;
+        prog_data->single_seg = !c->last_thrsw;
+        prog_data->spill_size = c->spill_size;
+
+        v3d_set_prog_data_uniforms(c, prog_data);
+        v3d_set_prog_data_ubo(c, prog_data);
+
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
+                v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
+        } else {
+                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+                v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
+        }
+}
+
+static uint64_t *
+v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
+{
+        *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
+
+        uint64_t *qpu_insts = malloc(*final_assembly_size);
+        if (!qpu_insts)
+                return NULL;
+
+        memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
+
+        vir_compile_destroy(c);
+
+        return qpu_insts;
+}
+
+static void
+v3d_nir_lower_vs_early(struct v3d_compile *c)
+{
+        /* Split our I/O vars and dead code eliminate the unused
+         * components.
+         */
+        NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
+                   nir_var_shader_in | nir_var_shader_out);
+        uint64_t used_outputs[4] = {0};
+        for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
+                int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
+                int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
+                used_outputs[comp] |= 1ull << slot;
+        }
+        NIR_PASS_V(c->s, nir_remove_unused_io_vars,
+                   &c->s->outputs, used_outputs, NULL); /* demotes to globals */
+        NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
+        v3d_optimize_nir(c->s);
+        NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
+        NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+                   type_size_vec4,
+                   (nir_lower_io_options)0);
+}
+
+static void
 v3d_fixup_fs_output_types(struct v3d_compile *c)
 {
         nir_foreach_variable(var, &c->s->outputs) {
@@ -860,57 +871,139 @@
         }
 }
 
-uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
-                         struct v3d_fs_key *key,
-                         struct v3d_fs_prog_data *prog_data,
-                         nir_shader *s,
-                         int program_id, int variant_id,
-                         uint32_t *final_assembly_size)
+static void
+v3d_nir_lower_fs_early(struct v3d_compile *c)
 {
-        struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
-                                                 program_id, variant_id);
+        if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
+                v3d_fixup_fs_output_types(c);
 
-        c->fs_key = key;
+        /* If the shader has no non-TLB side effects, we can promote it to
+         * enabling early_fragment_tests even if the user didn't.
+         */
+        if (!(c->s->info.num_images ||
+              c->s->info.num_ssbos ||
+              c->s->info.num_abos)) {
+                c->s->info.fs.early_fragment_tests = true;
+        }
+}
 
-        if (key->int_color_rb || key->uint_color_rb)
-                v3d_fixup_fs_output_types(c);
+static void
+v3d_nir_lower_vs_late(struct v3d_compile *c)
+{
+        if (c->vs_key->clamp_color)
+                NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
 
-        v3d_lower_nir(c);
+        if (c->key->ucp_enables) {
+                NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
+                           false);
+                NIR_PASS_V(c->s, nir_lower_io_to_scalar,
+                           nir_var_shader_out);
+        }
+
+        /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
+        NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
+}
 
-        if (key->light_twoside)
+static void
+v3d_nir_lower_fs_late(struct v3d_compile *c)
+{
+        if (c->fs_key->light_twoside)
                 NIR_PASS_V(c->s, nir_lower_two_sided_color);
 
-        if (key->clamp_color)
+        if (c->fs_key->clamp_color)
                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
 
-        if (key->alpha_test) {
-                NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
+        if (c->fs_key->alpha_test) {
+                NIR_PASS_V(c->s, nir_lower_alpha_test,
+                           c->fs_key->alpha_test_func,
                            false);
         }
 
-        if (key->base.ucp_enables)
-                NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
+        if (c->key->ucp_enables)
+                NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
 
         /* Note: FS input scalarizing must happen after
          * nir_lower_two_sided_color, which only handles a vec4 at a time.
          */
         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
+}
+
+uint64_t *v3d_compile(const struct v3d_compiler *compiler,
+                      struct v3d_key *key,
+                      struct v3d_prog_data **out_prog_data,
+                      nir_shader *s,
+                      void (*debug_output)(const char *msg,
+                                           void *debug_output_data),
+                      void *debug_output_data,
+                      int program_id, int variant_id,
+                      uint32_t *final_assembly_size)
+{
+        struct v3d_prog_data *prog_data;
+        struct v3d_compile *c = vir_compile_init(compiler, key, s,
+                                                 debug_output, debug_output_data,
+                                                 program_id, variant_id);
+
+        switch (c->s->info.stage) {
+        case MESA_SHADER_VERTEX:
+                c->vs_key = (struct v3d_vs_key *)key;
+                prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
+                break;
+        case MESA_SHADER_FRAGMENT:
+                c->fs_key = (struct v3d_fs_key *)key;
+                prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
+                break;
+        default:
+                unreachable("unsupported shader stage");
+        }
+
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
+                v3d_nir_lower_vs_early(c);
+        } else {
+                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+                v3d_nir_lower_fs_early(c);
+        }
+
+        v3d_lower_nir(c);
+
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
+                v3d_nir_lower_vs_late(c);
+        } else {
+                assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+                v3d_nir_lower_fs_late(c);
+        }
+
+        NIR_PASS_V(c->s, v3d_nir_lower_io, c);
+        NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
+        NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
+        NIR_PASS_V(c->s, nir_lower_idiv);
 
-        v3d_lower_nir_late(c);
         v3d_optimize_nir(c->s);
+        NIR_PASS_V(c->s, nir_lower_bool_to_int32);
         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
         v3d_nir_to_vir(c);
 
-        v3d_set_prog_data(c, &prog_data->base);
-        v3d_set_fs_prog_data_inputs(c, prog_data);
-        prog_data->writes_z = (c->s->info.outputs_written &
-                               (1 << FRAG_RESULT_DEPTH));
-        prog_data->discard = (c->s->info.fs.uses_discard ||
-                              c->fs_key->sample_alpha_to_coverage);
-        prog_data->uses_center_w = c->uses_center_w;
+        v3d_set_prog_data(c, prog_data);
 
-        return v3d_return_qpu_insts(c, final_assembly_size);
+        *out_prog_data = prog_data;
+
+        char *shaderdb;
+        int ret = asprintf(&shaderdb,
+                           "%s shader: %d inst, %d threads, %d loops, "
+                           "%d uniforms, %d:%d spills:fills",
+                           vir_get_stage_name(c),
+                           c->qpu_inst_count,
+                           c->threads,
+                           c->loops,
+                           c->num_uniforms,
+                           c->spills,
+                           c->fills);
+        if (ret >= 0) {
+                c->debug_output(shaderdb, c->debug_output_data);
+                free(shaderdb);
+        }
+
+       return v3d_return_qpu_insts(c, final_assembly_size);
 }
 
 void
@@ -1006,6 +1099,12 @@
                 return false;
         }
 
+        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
+            (inst->qpu.alu.add.op == V3D_QPU_A_NOP &&
+             inst->qpu.alu.mul.op == V3D_QPU_M_NOP)) {
+               return false;
+        }
+
         return true;
 }
 
diff -Nru mesa-18.3.3/src/broadcom/compiler/vir_dump.c mesa-19.0.1/src/broadcom/compiler/vir_dump.c
--- mesa-18.3.3/src/broadcom/compiler/vir_dump.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/vir_dump.c	2019-03-31 23:16:37.000000000 +0000
@@ -24,6 +24,109 @@
 #include "broadcom/common/v3d_device_info.h"
 #include "v3d_compiler.h"
 
+/* Prints a human-readable description of the uniform reference. */
+void
+vir_dump_uniform(enum quniform_contents contents,
+                 uint32_t data)
+{
+        static const char *quniform_names[] = {
+                [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
+                [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
+                [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
+                [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
+                [QUNIFORM_SHARED_OFFSET] = "shared_offset",
+        };
+
+        switch (contents) {
+        case QUNIFORM_CONSTANT:
+                fprintf(stderr, "0x%08x / %f", data, uif(data));
+                break;
+
+        case QUNIFORM_UNIFORM:
+                fprintf(stderr, "push[%d]", data);
+                break;
+
+        case QUNIFORM_TEXTURE_CONFIG_P1:
+                fprintf(stderr, "tex[%d].p1", data);
+                break;
+
+        case QUNIFORM_TMU_CONFIG_P0:
+                fprintf(stderr, "tex[%d].p0 | 0x%x",
+                        v3d_tmu_config_data_get_unit(data),
+                        v3d_tmu_config_data_get_value(data));
+                break;
+
+        case QUNIFORM_TMU_CONFIG_P1:
+                fprintf(stderr, "tex[%d].p1 | 0x%x",
+                        v3d_tmu_config_data_get_unit(data),
+                        v3d_tmu_config_data_get_value(data));
+                break;
+
+        case QUNIFORM_IMAGE_TMU_CONFIG_P0:
+                fprintf(stderr, "img[%d].p0 | 0x%x",
+                        v3d_tmu_config_data_get_unit(data),
+                        v3d_tmu_config_data_get_value(data));
+                break;
+
+        case QUNIFORM_TEXTURE_WIDTH:
+                fprintf(stderr, "tex[%d].width", data);
+                break;
+        case QUNIFORM_TEXTURE_HEIGHT:
+                fprintf(stderr, "tex[%d].height", data);
+                break;
+        case QUNIFORM_TEXTURE_DEPTH:
+                fprintf(stderr, "tex[%d].depth", data);
+                break;
+        case QUNIFORM_TEXTURE_ARRAY_SIZE:
+                fprintf(stderr, "tex[%d].array_size", data);
+                break;
+        case QUNIFORM_TEXTURE_LEVELS:
+                fprintf(stderr, "tex[%d].levels", data);
+                break;
+
+        case QUNIFORM_IMAGE_WIDTH:
+                fprintf(stderr, "img[%d].width", data);
+                break;
+        case QUNIFORM_IMAGE_HEIGHT:
+                fprintf(stderr, "img[%d].height", data);
+                break;
+        case QUNIFORM_IMAGE_DEPTH:
+                fprintf(stderr, "img[%d].depth", data);
+                break;
+        case QUNIFORM_IMAGE_ARRAY_SIZE:
+                fprintf(stderr, "img[%d].array_size", data);
+                break;
+
+        case QUNIFORM_UBO_ADDR:
+                fprintf(stderr, "ubo[%d]", data);
+                break;
+
+        case QUNIFORM_SSBO_OFFSET:
+                fprintf(stderr, "ssbo[%d]", data);
+                break;
+
+        case QUNIFORM_GET_BUFFER_SIZE:
+                fprintf(stderr, "ssbo_size[%d]", data);
+                break;
+
+        case QUNIFORM_NUM_WORK_GROUPS:
+                fprintf(stderr, "num_wg.%c", data < 3 ? "xyz"[data] : '?');
+                break;
+
+        default:
+                if (quniform_contents_is_texture_p0(contents)) {
+                        fprintf(stderr, "tex[%d].p0: 0x%08x",
+                                contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
+                                data);
+                } else if (contents < ARRAY_SIZE(quniform_names)) {
+                        fprintf(stderr, "%s",
+                                quniform_names[contents]);
+                } else {
+                        fprintf(stderr, "%d / 0x%08x", contents, data);
+                }
+        }
+}
+
 static void
 vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
               struct qreg reg)
@@ -34,12 +137,6 @@
                 [QFILE_TLB] = "tlb",
                 [QFILE_TLBU] = "tlbu",
         };
-        static const char *quniform_names[] = {
-                [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
-                [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
-                [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
-                [QUNIFORM_VIEWPORT_Z_SCALE] = "vp_z_scale",
-        };
 
         switch (reg.file) {
 
@@ -84,70 +181,13 @@
                 fprintf(stderr, "%s", files[reg.file]);
                 break;
 
-        case QFILE_UNIF: {
-                enum quniform_contents contents = c->uniform_contents[reg.index];
-
+        case QFILE_UNIF:
                 fprintf(stderr, "%s%d", files[reg.file], reg.index);
-
-                switch (contents) {
-                case QUNIFORM_CONSTANT:
-                        fprintf(stderr, " (0x%08x / %f)",
-                                c->uniform_data[reg.index],
-                                uif(c->uniform_data[reg.index]));
-                        break;
-
-                case QUNIFORM_UNIFORM:
-                        fprintf(stderr, " (push[%d])",
-                                c->uniform_data[reg.index]);
-                        break;
-
-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        fprintf(stderr, " (tex[%d].p1)",
-                                c->uniform_data[reg.index]);
-                        break;
-
-                case QUNIFORM_TEXTURE_WIDTH:
-                        fprintf(stderr, " (tex[%d].width)",
-                                c->uniform_data[reg.index]);
-                        break;
-                case QUNIFORM_TEXTURE_HEIGHT:
-                        fprintf(stderr, " (tex[%d].height)",
-                                c->uniform_data[reg.index]);
-                        break;
-                case QUNIFORM_TEXTURE_DEPTH:
-                        fprintf(stderr, " (tex[%d].depth)",
-                                c->uniform_data[reg.index]);
-                        break;
-                case QUNIFORM_TEXTURE_ARRAY_SIZE:
-                        fprintf(stderr, " (tex[%d].array_size)",
-                                c->uniform_data[reg.index]);
-                        break;
-                case QUNIFORM_TEXTURE_LEVELS:
-                        fprintf(stderr, " (tex[%d].levels)",
-                                c->uniform_data[reg.index]);
-                        break;
-
-                case QUNIFORM_UBO_ADDR:
-                        fprintf(stderr, " (ubo[%d])",
-                                c->uniform_data[reg.index]);
-                        break;
-
-                default:
-                        if (quniform_contents_is_texture_p0(contents)) {
-                                fprintf(stderr, " (tex[%d].p0: 0x%08x)",
-                                        contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
-                                        c->uniform_data[reg.index]);
-                        } else if (contents < ARRAY_SIZE(quniform_names)) {
-                                fprintf(stderr, " (%s)",
-                                        quniform_names[contents]);
-                        } else {
-                                fprintf(stderr, " (%d / 0x%08x)", contents,
-                                        c->uniform_data[reg.index]);
-                        }
-                }
-
+                fprintf(stderr, " (");
+                vir_dump_uniform(c->uniform_contents[reg.index],
+                                 c->uniform_data[reg.index]);
+                fprintf(stderr, ")");
                 break;
-        }
 
         default:
                 fprintf(stderr, "%s%d", files[reg.file], reg.index);
diff -Nru mesa-18.3.3/src/broadcom/compiler/vir_opt_dead_code.c mesa-19.0.1/src/broadcom/compiler/vir_opt_dead_code.c
--- mesa-18.3.3/src/broadcom/compiler/vir_opt_dead_code.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/vir_opt_dead_code.c	2019-03-31 23:16:37.000000000 +0000
@@ -47,10 +47,7 @@
                 vir_dump_inst(c, inst);
                 fprintf(stderr, "\n");
         }
-        assert(inst->qpu.flags.apf == V3D_QPU_PF_NONE);
-        assert(inst->qpu.flags.mpf == V3D_QPU_PF_NONE);
-        assert(inst->qpu.flags.auf == V3D_QPU_UF_NONE);
-        assert(inst->qpu.flags.muf == V3D_QPU_UF_NONE);
+        assert(!v3d_qpu_writes_flags(&inst->qpu));
         vir_remove_instruction(c, inst);
 }
 
@@ -95,12 +92,36 @@
         return true;
 }
 
+static void
+vir_dce_flags(struct v3d_compile *c, struct qinst *inst)
+{
+        if (debug) {
+                fprintf(stderr,
+                        "Removing flags write from: ");
+                vir_dump_inst(c, inst);
+                fprintf(stderr, "\n");
+        }
+
+        assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
+
+        inst->qpu.flags.apf = V3D_QPU_PF_NONE;
+        inst->qpu.flags.mpf = V3D_QPU_PF_NONE;
+        inst->qpu.flags.auf = V3D_QPU_UF_NONE;
+        inst->qpu.flags.muf = V3D_QPU_UF_NONE;
+}
+
 bool
 vir_opt_dead_code(struct v3d_compile *c)
 {
         bool progress = false;
         bool *used = calloc(c->num_temps, sizeof(bool));
 
+        /* Defuse the "are you removing the cursor?" assertion in the core.
+         * You'll need to set up a new cursor for any new instructions after
+         * doing DCE (which we would expect, anyway).
+         */
+        c->cursor.link = NULL;
+
         vir_for_each_inst_inorder(inst, c) {
                 for (int i = 0; i < vir_get_nsrc(inst); i++) {
                         if (inst->src[i].file == QFILE_TEMP)
@@ -109,7 +130,15 @@
         }
 
         vir_for_each_block(block, c) {
+                struct qinst *last_flags_write = NULL;
+
                 vir_for_each_inst_safe(inst, block) {
+                        /* If this instruction reads the flags, we can't
+                         * remove the flags generation for it.
+                         */
+                        if (v3d_qpu_reads_flags(&inst->qpu))
+                                last_flags_write = NULL;
+
                         if (inst->dst.file != QFILE_NULL &&
                             !(inst->dst.file == QFILE_TEMP &&
                               !used[inst->dst.index])) {
@@ -119,10 +148,21 @@
                         if (vir_has_side_effects(c, inst))
                                 continue;
 
-                        if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
-                            inst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
-                            inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
-                            inst->qpu.flags.muf != V3D_QPU_UF_NONE ||
+                        if (v3d_qpu_writes_flags(&inst->qpu)) {
+                                /* If we obscure a previous flags write,
+                                 * drop it.
+                                 */
+                                if (last_flags_write &&
+                                    (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
+                                     inst->qpu.flags.mpf != V3D_QPU_PF_NONE)) {
+                                        vir_dce_flags(c, last_flags_write);
+                                        progress = true;
+                                }
+
+                                last_flags_write = inst;
+                        }
+
+                        if (v3d_qpu_writes_flags(&inst->qpu) ||
                             has_nonremovable_reads(c, inst)) {
                                 /* If we can't remove the instruction, but we
                                  * don't need its destination value, just
@@ -159,6 +199,7 @@
                                 }
                         }
 
+                        assert(inst != last_flags_write);
                         dce(c, inst);
                         progress = true;
                         continue;
diff -Nru mesa-18.3.3/src/broadcom/compiler/vir_register_allocate.c mesa-19.0.1/src/broadcom/compiler/vir_register_allocate.c
--- mesa-18.3.3/src/broadcom/compiler/vir_register_allocate.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/vir_register_allocate.c	2019-03-31 23:16:37.000000000 +0000
@@ -482,6 +482,7 @@
                         case 0:
                         case 1:
                         case 2:
+                        case 3:
                                 /* Payload setup instructions: Force allocate
                                  * the dst to the given register (so the MOV
                                  * will disappear).
diff -Nru mesa-18.3.3/src/broadcom/compiler/vir_to_qpu.c mesa-19.0.1/src/broadcom/compiler/vir_to_qpu.c
--- mesa-18.3.3/src/broadcom/compiler/vir_to_qpu.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/broadcom/compiler/vir_to_qpu.c	2019-03-31 23:16:37.000000000 +0000
@@ -241,6 +241,10 @@
                                 src[i] = temp_registers[index];
                                 break;
                         case QFILE_UNIF:
+                                /* XXX perf: If the last ldunif we emitted was
+                                 * the same uniform value, skip it.  Common
+                                 * for multop/umul24 sequences.
+                                 */
                                 if (!emitted_ldunif) {
                                         new_ldunif_instr(qinst, i);
                                         c->num_uniforms++;
@@ -353,6 +357,36 @@
         }
 }
 
+static bool
+reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
+{
+        struct v3d_qpu_instr qpu;
+        MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
+        assert(ok);
+
+        if (qpu.sig.ldunif ||
+            qpu.sig.ldunifarf ||
+            qpu.sig.wrtmuc) {
+                return true;
+        }
+
+        if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
+                return true;
+
+        if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (qpu.alu.add.magic_write &&
+                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
+                        return true;
+                }
+
+                if (qpu.alu.mul.magic_write &&
+                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
+                        return true;
+                }
+        }
+
+        return false;
+}
 
 static void
 v3d_dump_qpu(struct v3d_compile *c)
@@ -361,11 +395,30 @@
                 vir_get_stage_name(c),
                 c->program_id, c->variant_id);
 
+        int next_uniform = 0;
         for (int i = 0; i < c->qpu_inst_count; i++) {
                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
-                fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
+                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
+
+                /* We can only do this on 4.x, because we're not tracking TMU
+                 * implicit uniforms here on 3.x.
+                 */
+                if (c->devinfo->ver >= 40 &&
+                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
+                        fprintf(stderr, " (");
+                        vir_dump_uniform(c->uniform_contents[next_uniform],
+                                         c->uniform_data[next_uniform]);
+                        fprintf(stderr, ")");
+                        next_uniform++;
+                }
+                fprintf(stderr, "\n");
                 ralloc_free((void *)str);
         }
+
+        /* Make sure our dumping lined up. */
+        if (c->devinfo->ver >= 40)
+                assert(next_uniform == c->num_uniforms);
+
         fprintf(stderr, "\n");
 }
 
diff -Nru mesa-18.3.3/src/broadcom/Makefile.am mesa-19.0.1/src/broadcom/Makefile.am
--- mesa-18.3.3/src/broadcom/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/broadcom/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,7 @@
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
 	-I$(top_builddir)/src/compiler/nir \
+	-I$(top_srcdir)/src/compiler/nir \
 	-I$(top_srcdir)/src/broadcom/ \
 	-I$(top_srcdir)/src/broadcom/cle \
 	-I$(top_srcdir)/src/broadcom/include \
diff -Nru mesa-18.3.3/src/broadcom/Makefile.sources mesa-19.0.1/src/broadcom/Makefile.sources
--- mesa-18.3.3/src/broadcom/Makefile.sources	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -17,9 +17,11 @@
 	clif/clif_dump.c \
 	clif/clif_dump.h \
 	clif/clif_private.h \
+	common/v3d_cpu_tiling.h \
 	common/v3d_debug.c \
 	common/v3d_debug.h \
 	common/v3d_device_info.h \
+	common/v3d_limits.h \
 	common/v3d_macros.h \
 	compiler/nir_to_vir.c \
 	compiler/vir.c \
@@ -37,6 +39,7 @@
 	compiler/v3d33_vpm_setup.c \
 	compiler/v3d40_tex.c \
 	compiler/v3d_compiler.h \
+	compiler/v3d_nir_lower_image_load_store.c \
 	compiler/v3d_nir_lower_io.c \
 	compiler/v3d_nir_lower_txf_ms.c \
 	qpu/qpu_disasm.c \
diff -Nru mesa-18.3.3/src/broadcom/qpu/meson.build mesa-19.0.1/src/broadcom/qpu/meson.build
--- mesa-18.3.3/src/broadcom/qpu/meson.build	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/qpu/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -41,5 +41,6 @@
     'qpu_disasm', 'tests/qpu_disasm.c',
     link_with: [libbroadcom_qpu, libmesa_util],
     include_directories: inc_common
-  )
+  ),
+  suite : ['broadcom'],
 )
diff -Nru mesa-18.3.3/src/broadcom/qpu/qpu_instr.c mesa-19.0.1/src/broadcom/qpu/qpu_instr.c
--- mesa-18.3.3/src/broadcom/qpu/qpu_instr.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/broadcom/qpu/qpu_instr.c	2019-03-31 23:16:37.000000000 +0000
@@ -499,6 +499,23 @@
                 return 0;
 }
 
+enum v3d_qpu_cond
+v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
+{
+        switch (cond) {
+        case V3D_QPU_COND_IFA:
+                return V3D_QPU_COND_IFNA;
+        case V3D_QPU_COND_IFNA:
+                return V3D_QPU_COND_IFA;
+        case V3D_QPU_COND_IFB:
+                return V3D_QPU_COND_IFNB;
+        case V3D_QPU_COND_IFNB:
+                return V3D_QPU_COND_IFB;
+        default:
+                unreachable("Non-invertible cond");
+        }
+}
+
 bool
 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
 {
@@ -555,6 +572,20 @@
                 waddr == V3D_QPU_WADDR_SYNCU);
 }
 
+bool
+v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
+{
+        switch (waddr) {
+        case V3D_QPU_WADDR_VPMU:
+        case V3D_QPU_WADDR_TLBU:
+        case V3D_QPU_WADDR_TMUAU:
+        case V3D_QPU_WADDR_SYNCU:
+                return true;
+        default:
+                return false;
+        }
+}
+
 static bool
 v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
 {
@@ -795,3 +826,44 @@
                 sig->ldtlb ||
                 sig->ldtlbu);
 }
+
+bool
+v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
+                return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
+        } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (inst->flags.ac != V3D_QPU_COND_NONE ||
+                    inst->flags.mc != V3D_QPU_COND_NONE ||
+                    inst->flags.auf != V3D_QPU_UF_NONE ||
+                    inst->flags.muf != V3D_QPU_UF_NONE)
+                        return true;
+
+                switch (inst->alu.add.op) {
+                case V3D_QPU_A_VFLA:
+                case V3D_QPU_A_VFLNA:
+                case V3D_QPU_A_VFLB:
+                case V3D_QPU_A_VFLNB:
+                case V3D_QPU_A_FLAPUSH:
+                case V3D_QPU_A_FLBPUSH:
+                        return true;
+                default:
+                        break;
+                }
+        }
+
+        return false;
+}
+
+bool
+v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
+{
+        if (inst->flags.apf != V3D_QPU_PF_NONE ||
+            inst->flags.mpf != V3D_QPU_PF_NONE ||
+            inst->flags.auf != V3D_QPU_UF_NONE ||
+            inst->flags.muf != V3D_QPU_UF_NONE) {
+                return true;
+        }
+
+        return false;
+}
diff -Nru mesa-18.3.3/src/broadcom/qpu/qpu_instr.h mesa-19.0.1/src/broadcom/qpu/qpu_instr.h
--- mesa-18.3.3/src/broadcom/qpu/qpu_instr.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/broadcom/qpu/qpu_instr.h	2019-03-31 23:16:37.000000000 +0000
@@ -398,6 +398,8 @@
 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
 
+enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
+
 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);
@@ -443,6 +445,7 @@
 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
+bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
@@ -457,6 +460,8 @@
 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
                                 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
 
diff -Nru mesa-18.3.3/src/compiler/Android.glsl.gen.mk mesa-19.0.1/src/compiler/Android.glsl.gen.mk
--- mesa-18.3.3/src/compiler/Android.glsl.gen.mk	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/Android.glsl.gen.mk	2019-03-31 23:16:37.000000000 +0000
@@ -103,3 +103,7 @@
 $(intermediates)/glsl/ir_expression_operation_strings.h: $(LOCAL_PATH)/glsl/ir_expression_operation.py
 	@mkdir -p $(dir $@)
 	$(hide) $(MESA_PYTHON2) $< strings > $@
+
+$(intermediates)/glsl/float64_glsl.h: $(LOCAL_PATH)/glsl/xxd.py
+	@mkdir -p $(dir $@)
+	$(hide) $(MESA_PYTHON2) $< $(MESA_TOP)/src/compiler/glsl/float64.glsl $@ -n float64_source > $@
diff -Nru mesa-18.3.3/src/compiler/glsl/ast_function.cpp mesa-19.0.1/src/compiler/glsl/ast_function.cpp
--- mesa-18.3.3/src/compiler/glsl/ast_function.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ast_function.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -666,7 +666,12 @@
    /* Local shader has no exact candidates; check the built-ins. */
    _mesa_glsl_initialize_builtin_functions();
    sig = _mesa_glsl_find_builtin_function(state, name, actual_parameters);
-   return sig;
+
+   /* if _mesa_glsl_find_builtin_function failed, fall back to the result
+    * of choose_best_inexact_overload() instead. This should only affect
+    * GLES.
+    */
+   return sig ? sig : local_sig;
 }
 
 static ir_function_signature *
diff -Nru mesa-18.3.3/src/compiler/glsl/ast.h mesa-19.0.1/src/compiler/glsl/ast.h
--- mesa-18.3.3/src/compiler/glsl/ast.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ast.h	2019-03-31 23:16:37.000000000 +0000
@@ -1315,6 +1315,20 @@
    ast_layout_expression *local_size[3];
 };
 
+class ast_warnings_toggle : public ast_node {
+public:
+   ast_warnings_toggle(bool _enable)
+      : enable(_enable)
+   {
+      /* empty */
+   }
+
+   virtual ir_rvalue *hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state);
+
+private:
+   bool enable;
+};
 /*@}*/
 
 extern void
diff -Nru mesa-18.3.3/src/compiler/glsl/ast_to_hir.cpp mesa-19.0.1/src/compiler/glsl/ast_to_hir.cpp
--- mesa-18.3.3/src/compiler/glsl/ast_to_hir.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ast_to_hir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -250,8 +250,7 @@
       }
 
    case GLSL_TYPE_UINT:
-      if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable
-          && !state->MESA_shader_integer_functions_enable)
+      if (!state->has_implicit_uint_to_int_conversion())
          return (ir_expression_operation)0;
       switch (from->base_type) {
          case GLSL_TYPE_INT: return ir_unop_i2u;
@@ -315,7 +314,7 @@
       return true;
 
    /* Prior to GLSL 1.20, there are no implicit conversions */
-   if (!state->is_version(120, 0))
+   if (!state->has_implicit_conversions())
       return false;
 
    /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
@@ -2722,7 +2721,7 @@
     * "Only variables output from a vertex shader can be candidates
     * for invariance".
     */
-   if (!state->is_version(130, 0))
+   if (!state->is_version(130, 100))
       return false;
 
    /*
@@ -3699,6 +3698,10 @@
                                 "cannot be applied to a matrix, a structure, "
                                 "a block, or an array containing any of "
                                 "these.");
+            } else if (components > 4 && type->is_64bit()) {
+               _mesa_glsl_error(loc, state, "component layout qualifier "
+                                "cannot be applied to dvec%u.",
+                                components / 2);
             } else if (qual_component != 0 &&
                 (qual_component + components - 1) > 3) {
                _mesa_glsl_error(loc, state, "component overflow (%u > 3)",
@@ -3941,7 +3944,8 @@
                           "`invariant' after being used",
                           var->name);
       } else {
-         var->data.invariant = 1;
+         var->data.explicit_invariant = true;
+         var->data.invariant = true;
       }
    }
 
@@ -4149,8 +4153,10 @@
       }
    }
 
-   if (state->all_invariant && var->data.mode == ir_var_shader_out)
+   if (state->all_invariant && var->data.mode == ir_var_shader_out) {
+      var->data.explicit_invariant = true;
       var->data.invariant = true;
+   }
 
    var->data.interpolation =
       interpret_interpolation_qualifier(qual, var->type,
@@ -4239,6 +4245,29 @@
 
    *is_redeclaration = true;
 
+   if (earlier->data.how_declared == ir_var_declared_implicitly) {
+      /* Verify that the redeclaration of a built-in does not change the
+       * storage qualifier.  There are a couple special cases.
+       *
+       * 1. Some built-in variables that are defined as 'in' in the
+       *    specification are implemented as system values.  Allow
+       *    ir_var_system_value -> ir_var_shader_in.
+       *
+       * 2. gl_LastFragData is implemented as a ir_var_shader_out, but the
+       *    specification requires that redeclarations omit any qualifier.
+       *    Allow ir_var_shader_out -> ir_var_auto for this one variable.
+       */
+      if (earlier->data.mode != var->data.mode &&
+          !(earlier->data.mode == ir_var_system_value &&
+            var->data.mode == ir_var_shader_in) &&
+          !(strcmp(var->name, "gl_LastFragData") == 0 &&
+            var->data.mode == ir_var_auto)) {
+         _mesa_glsl_error(&loc, state,
+                          "redeclaration cannot change qualification of `%s'",
+                          var->name);
+      }
+   }
+
    /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec,
     *
     * "It is legal to declare an array without a size and then
@@ -4247,11 +4276,6 @@
     */
    if (earlier->type->is_unsized_array() && var->type->is_array()
        && (var->type->fields.array == earlier->type->fields.array)) {
-      /* FINISHME: This doesn't match the qualifiers on the two
-       * FINISHME: declarations.  It's not 100% clear whether this is
-       * FINISHME: required or not.
-       */
-
       const int size = var->type->array_size();
       check_builtin_array_max_size(var->name, size, loc, state);
       if ((size > 0) && (size <= earlier->data.max_array_access)) {
@@ -4264,11 +4288,13 @@
       delete var;
       var = NULL;
       *var_ptr = NULL;
+   } else if (earlier->type != var->type) {
+      _mesa_glsl_error(&loc, state,
+                       "redeclaration of `%s' has incorrect type",
+                       var->name);
    } else if ((state->ARB_fragment_coord_conventions_enable ||
               state->is_version(150, 0))
-              && strcmp(var->name, "gl_FragCoord") == 0
-              && earlier->type == var->type
-              && var->data.mode == ir_var_shader_in) {
+              && strcmp(var->name, "gl_FragCoord") == 0) {
       /* Allow redeclaration of gl_FragCoord for ARB_fcc layout
        * qualifiers.
        */
@@ -4291,18 +4317,14 @@
                   || strcmp(var->name, "gl_FrontSecondaryColor") == 0
                   || strcmp(var->name, "gl_BackSecondaryColor") == 0
                   || strcmp(var->name, "gl_Color") == 0
-                  || strcmp(var->name, "gl_SecondaryColor") == 0)
-              && earlier->type == var->type
-              && earlier->data.mode == var->data.mode) {
+                  || strcmp(var->name, "gl_SecondaryColor") == 0)) {
       earlier->data.interpolation = var->data.interpolation;
 
       /* Layout qualifiers for gl_FragDepth. */
    } else if ((state->is_version(420, 0) ||
                state->AMD_conservative_depth_enable ||
                state->ARB_conservative_depth_enable)
-              && strcmp(var->name, "gl_FragDepth") == 0
-              && earlier->type == var->type
-              && earlier->data.mode == var->data.mode) {
+              && strcmp(var->name, "gl_FragDepth") == 0) {
 
       /** From the AMD_conservative_depth spec:
        *     Within any shader, the first redeclarations of gl_FragDepth
@@ -4329,7 +4351,6 @@
 
    } else if (state->has_framebuffer_fetch() &&
               strcmp(var->name, "gl_LastFragData") == 0 &&
-              var->type == earlier->type &&
               var->data.mode == ir_var_auto) {
       /* According to the EXT_shader_framebuffer_fetch spec:
        *
@@ -4343,32 +4364,12 @@
       earlier->data.precision = var->data.precision;
       earlier->data.memory_coherent = var->data.memory_coherent;
 
-   } else if (earlier->data.how_declared == ir_var_declared_implicitly &&
-              state->allow_builtin_variable_redeclaration) {
+   } else if ((earlier->data.how_declared == ir_var_declared_implicitly &&
+               state->allow_builtin_variable_redeclaration) ||
+              allow_all_redeclarations) {
       /* Allow verbatim redeclarations of built-in variables. Not explicitly
        * valid, but some applications do it.
        */
-      if (earlier->data.mode != var->data.mode &&
-          !(earlier->data.mode == ir_var_system_value &&
-            var->data.mode == ir_var_shader_in)) {
-         _mesa_glsl_error(&loc, state,
-                          "redeclaration of `%s' with incorrect qualifiers",
-                          var->name);
-      } else if (earlier->type != var->type) {
-         _mesa_glsl_error(&loc, state,
-                          "redeclaration of `%s' has incorrect type",
-                          var->name);
-      }
-   } else if (allow_all_redeclarations) {
-      if (earlier->data.mode != var->data.mode) {
-         _mesa_glsl_error(&loc, state,
-                          "redeclaration of `%s' with incorrect qualifiers",
-                          var->name);
-      } else if (earlier->type != var->type) {
-         _mesa_glsl_error(&loc, state,
-                          "redeclaration of `%s' has incorrect type",
-                          var->name);
-      }
    } else {
       _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
    }
@@ -4863,6 +4864,7 @@
                             "`invariant' after being used",
                             earlier->name);
          } else {
+            earlier->data.explicit_invariant = true;
             earlier->data.invariant = true;
          }
       }
@@ -4946,7 +4948,8 @@
              && process_qualifier_constant(state, &loc, "offset",
                                         type->qualifier.offset,
                                         &qual_offset)) {
-            state->atomic_counter_offsets[qual_binding] = qual_offset;
+            if (qual_binding < ARRAY_SIZE(state->atomic_counter_offsets))
+               state->atomic_counter_offsets[qual_binding] = qual_offset;
          }
       }
 
@@ -7402,7 +7405,7 @@
                                       "alignment of %s", field_type->name);
                   }
                   fields[i].offset = qual_offset;
-                  next_offset = glsl_align(qual_offset + size, align);
+                  next_offset = qual_offset + size;
                } else {
                   _mesa_glsl_error(&loc, state, "offset can only be used "
                                    "with std430 and std140 layouts");
@@ -7426,16 +7429,16 @@
                                       "is not a power of 2");
                   } else {
                      fields[i].offset = glsl_align(offset, member_align);
-                     next_offset = glsl_align(fields[i].offset + size, align);
+                     next_offset = fields[i].offset + size;
                   }
                }
             } else {
                fields[i].offset = glsl_align(offset, expl_align);
-               next_offset = glsl_align(fields[i].offset + size, align);
+               next_offset = fields[i].offset + size;
             }
          } else if (!qual->flags.q.explicit_offset) {
             if (align != 0 && size != 0)
-               next_offset = glsl_align(next_offset + size, align);
+               next_offset = glsl_align(next_offset, align) + size;
          }
 
          /* From the ARB_enhanced_layouts spec:
@@ -8767,3 +8770,11 @@
       }
    }
 }
+
+ir_rvalue *
+ast_warnings_toggle::hir(exec_list *,
+                         struct _mesa_glsl_parse_state *state)
+{
+   state->warnings_enabled = enable;
+   return NULL;
+}
diff -Nru mesa-18.3.3/src/compiler/glsl/builtin_functions.cpp mesa-19.0.1/src/compiler/glsl/builtin_functions.cpp
--- mesa-18.3.3/src/compiler/glsl/builtin_functions.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/builtin_functions.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -419,6 +419,12 @@
 }
 
 static bool
+texture_texture4(const _mesa_glsl_parse_state *state)
+{
+   return state->AMD_texture_texture4_enable;
+}
+
+static bool
 texture_gather_or_es31(const _mesa_glsl_parse_state *state)
 {
    return state->is_version(400, 310) ||
@@ -2891,6 +2897,10 @@
                 _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type,  glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT),
                 NULL);
 
+   add_function("texture4",
+                _texture(ir_tg4, texture_texture4, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type),
+                NULL);
+
    add_function("textureGather",
                 _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type),
                 _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type),
diff -Nru mesa-18.3.3/src/compiler/glsl/float64.glsl mesa-19.0.1/src/compiler/glsl/float64.glsl
--- mesa-18.3.3/src/compiler/glsl/float64.glsl	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/float64.glsl	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1740 @@
+/*
+ * The implementations contained in this file are heavily based on the
+ * implementations found in the Berkeley SoftFloat library. As such, they are
+ * licensed under the same 3-clause BSD license:
+ *
+ * License for Berkeley SoftFloat Release 3e
+ *
+ * John R. Hauser
+ * 2018 January 20
+ *
+ * The following applies to the whole of SoftFloat Release 3e as well as to
+ * each source file individually.
+ *
+ * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
+ * University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions, and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions, and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the University nor the names of its contributors
+ *     may be used to endorse or promote products derived from this software
+ *     without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#version 430
+#extension GL_ARB_gpu_shader_int64 : enable
+#extension GL_ARB_shader_bit_encoding : enable
+#extension GL_EXT_shader_integer_mix : enable
+#extension GL_MESA_shader_integer_functions : enable
+
+#pragma warning(off)
+
+/* Software IEEE floating-point rounding mode.
+ * GLSL spec section "4.7.1 Range and Precision":
+ * The rounding mode cannot be set and is undefined.
+ * But here, we are able to define the rounding mode at the compilation time.
+ */
+#define FLOAT_ROUND_NEAREST_EVEN    0
+#define FLOAT_ROUND_TO_ZERO         1
+#define FLOAT_ROUND_DOWN            2
+#define FLOAT_ROUND_UP              3
+#define FLOAT_ROUNDING_MODE         FLOAT_ROUND_NEAREST_EVEN
+
+/* Absolute value of a Float64 :
+ * Clear the sign bit
+ */
+uint64_t
+__fabs64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   a.y &= 0x7FFFFFFFu;
+   return packUint2x32(a);
+}
+
+/* Returns 1 if the double-precision floating-point value `a' is a NaN;
+ * otherwise returns 0.
+ */
+bool
+__is_nan(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   return (0xFFE00000u <= (a.y<<1)) &&
+      ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u));
+}
+
+/* Negate value of a Float64 :
+ * Toggle the sign bit
+ */
+uint64_t
+__fneg64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   uint t = a.y;
+
+   t ^= (1u << 31);
+   a.y = mix(t, a.y, __is_nan(__a));
+   return packUint2x32(a);
+}
+
+uint64_t
+__fsign64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   uvec2 retval;
+   retval.x = 0u;
+   retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u);
+   return packUint2x32(retval);
+}
+
+/* Returns the fraction bits of the double-precision floating-point value `a'.*/
+uint
+__extractFloat64FracLo(uint64_t a)
+{
+   return unpackUint2x32(a).x;
+}
+
+uint
+__extractFloat64FracHi(uint64_t a)
+{
+   return unpackUint2x32(a).y & 0x000FFFFFu;
+}
+
+/* Returns the exponent bits of the double-precision floating-point value `a'.*/
+int
+__extractFloat64Exp(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   return int((a.y>>20) & 0x7FFu);
+}
+
+bool
+__feq64_nonnan(uint64_t __a, uint64_t __b)
+{
+   uvec2 a = unpackUint2x32(__a);
+   uvec2 b = unpackUint2x32(__b);
+   return (a.x == b.x) &&
+          ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u)));
+}
+
+/* Returns true if the double-precision floating-point value `a' is equal to the
+ * corresponding value `b', and false otherwise.  The comparison is performed
+ * according to the IEEE Standard for Floating-Point Arithmetic.
+ */
+bool
+__feq64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a) || __is_nan(b))
+      return false;
+
+   return __feq64_nonnan(a, b);
+}
+
+/* Returns true if the double-precision floating-point value `a' is not equal
+ * to the corresponding value `b', and false otherwise.  The comparison is
+ * performed according to the IEEE Standard for Floating-Point Arithmetic.
+ */
+bool
+__fne64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a) || __is_nan(b))
+      return true;
+
+   return !__feq64_nonnan(a, b);
+}
+
+/* Returns the sign bit of the double-precision floating-point value `a'.*/
+uint
+__extractFloat64Sign(uint64_t a)
+{
+   return unpackUint2x32(a).y >> 31;
+}
+
+/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
+ * than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+ * returns false.
+ */
+bool
+lt64(uint a0, uint a1, uint b0, uint b1)
+{
+   return (a0 < b0) || ((a0 == b0) && (a1 < b1));
+}
+
+bool
+__flt64_nonnan(uint64_t __a, uint64_t __b)
+{
+   uvec2 a = unpackUint2x32(__a);
+   uvec2 b = unpackUint2x32(__b);
+   uint aSign = __extractFloat64Sign(__a);
+   uint bSign = __extractFloat64Sign(__b);
+   if (aSign != bSign)
+      return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);
+
+   return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);
+}
+
+/* Returns true if the double-precision floating-point value `a' is less than
+ * the corresponding value `b', and false otherwise.  The comparison is performed
+ * according to the IEEE Standard for Floating-Point Arithmetic.
+ */
+bool
+__flt64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a) || __is_nan(b))
+      return false;
+
+   return __flt64_nonnan(a, b);
+}
+
+/* Returns true if the double-precision floating-point value `a' is greater
+ * than or equal to * the corresponding value `b', and false otherwise.  The
+ * comparison is performed * according to the IEEE Standard for Floating-Point
+ * Arithmetic.
+ */
+bool
+__fge64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a) || __is_nan(b))
+      return false;
+
+   return !__flt64_nonnan(a, b);
+}
+
+/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
+ * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
+ * any carry out is lost.  The result is broken into two 32-bit pieces which
+ * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+ */
+void
+__add64(uint a0, uint a1, uint b0, uint b1,
+        out uint z0Ptr,
+        out uint z1Ptr)
+{
+   uint z1 = a1 + b1;
+   z1Ptr = z1;
+   z0Ptr = a0 + b0 + uint(z1 < a1);
+}
+
+
+/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
+ * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+ * 2^64, so any borrow out (carry out) is lost.  The result is broken into two
+ * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+ * `z1Ptr'.
+ */
+void
+__sub64(uint a0, uint a1, uint b0, uint b1,
+        out uint z0Ptr,
+        out uint z1Ptr)
+{
+   z1Ptr = a1 - b1;
+   z0Ptr = a0 - b0 - uint(a1 < b1);
+}
+
+/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
+ * number of bits given in `count'.  If any nonzero bits are shifted off, they
+ * are "jammed" into the least significant bit of the result by setting the
+ * least significant bit to 1.  The value of `count' can be arbitrarily large;
+ * in particular, if `count' is greater than 64, the result will be either 0
+ * or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+ * nonzero.  The result is broken into two 32-bit pieces which are stored at
+ * the locations pointed to by `z0Ptr' and `z1Ptr'.
+ */
+void
+__shift64RightJamming(uint a0,
+                      uint a1,
+                      int count,
+                      out uint z0Ptr,
+                      out uint z1Ptr)
+{
+   uint z0;
+   uint z1;
+   int negCount = (-count) & 31;
+
+   z0 = mix(0u, a0, count == 0);
+   z0 = mix(z0, (a0 >> count), count < 32);
+
+   z1 = uint((a0 | a1) != 0u); /* count >= 64 */
+   uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u);
+   z1 = mix(z1, z1_lt64, count < 64);
+   z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32);
+   uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u);
+   z1 = mix(z1, z1_lt32, count < 32);
+   z1 = mix(z1, a1, count == 0);
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
+ * by 32 _plus_ the number of bits given in `count'.  The shifted result is
+ * at most 64 nonzero bits; these are broken into two 32-bit pieces which are
+ * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+ * off form a third 32-bit result as follows:  The _last_ bit shifted off is
+ * the most-significant bit of the extra result, and the other 31 bits of the
+ * extra result are all zero if and only if _all_but_the_last_ bits shifted off
+ * were all zero.  This extra result is stored in the location pointed to by
+ * `z2Ptr'.  The value of `count' can be arbitrarily large.
+ *     (This routine makes more sense if `a0', `a1', and `a2' are considered
+ * to form a fixed-point value with binary point between `a1' and `a2'.  This
+ * fixed-point value is shifted right by the number of bits given in `count',
+ * and the integer part of the result is returned at the locations pointed to
+ * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+ * corrupted as described above, and is returned at the location pointed to by
+ * `z2Ptr'.)
+ */
+void
+__shift64ExtraRightJamming(uint a0, uint a1, uint a2,
+                           int count,
+                           out uint z0Ptr,
+                           out uint z1Ptr,
+                           out uint z2Ptr)
+{
+   uint z0 = 0u;
+   uint z1;
+   uint z2;
+   int negCount = (-count) & 31;
+
+   z2 = mix(uint(a0 != 0u), a0, count == 64);
+   z2 = mix(z2, a0 << negCount, count < 64);
+   z2 = mix(z2, a1 << negCount, count < 32);
+
+   z1 = mix(0u, (a0 >> (count & 31)), count < 64);
+   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);
+
+   a2 = mix(a2 | a1, a2, count < 32);
+   z0 = mix(z0, a0 >> count, count < 32);
+   z2 |= uint(a2 != 0u);
+
+   z0 = mix(z0, 0u, (count == 32));
+   z1 = mix(z1, a0, (count == 32));
+   z2 = mix(z2, a1, (count == 32));
+   z0 = mix(z0, a0, (count == 0));
+   z1 = mix(z1, a1, (count == 0));
+   z2 = mix(z2, a2, (count == 0));
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
+ * number of bits given in `count'.  Any bits shifted off are lost.  The value
+ * of `count' must be less than 32.  The result is broken into two 32-bit
+ * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+ */
+void
+__shortShift64Left(uint a0, uint a1,
+                   int count,
+                   out uint z0Ptr,
+                   out uint z1Ptr)
+{
+   z1Ptr = a1<<count;
+   z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0);
+}
+
+/* Packs the sign `zSign', the exponent `zExp', and the significand formed by
+ * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
+ * point value, returning the result.  After being shifted into the proper
+ * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
+ * together to form the most significant 32 bits of the result.  This means
+ * that any integer portion of `zFrac0' will be added into the exponent.  Since
+ * a properly normalized significand will have an integer portion equal to 1,
+ * the `zExp' input should be 1 less than the desired result exponent whenever
+ * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
+ */
+uint64_t
+__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1)
+{
+   uvec2 z;
+
+   z.y = (zSign << 31) + (uint(zExp) << 20) + zFrac0;
+   z.x = zFrac1;
+   return packUint2x32(z);
+}
+
+/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+ * and extended significand formed by the concatenation of `zFrac0', `zFrac1',
+ * and `zFrac2', and returns the proper double-precision floating-point value
+ * corresponding to the abstract input.  Ordinarily, the abstract value is
+ * simply rounded and packed into the double-precision format, with the inexact
+ * exception raised if the abstract input cannot be represented exactly.
+ * However, if the abstract value is too large, the overflow and inexact
+ * exceptions are raised and an infinity or maximal finite value is returned.
+ * If the abstract value is too small, the input value is rounded to a
+ * subnormal number, and the underflow and inexact exceptions are raised if the
+ * abstract input cannot be represented exactly as a subnormal double-precision
+ * floating-point number.
+ *     The input significand must be normalized or smaller.  If the input
+ * significand is not normalized, `zExp' must be 0; in that case, the result
+ * returned is a subnormal number, and it must not require rounding.  In the
+ * usual case that the input significand is normalized, `zExp' must be 1 less
+ * than the "true" floating-point exponent.  The handling of underflow and
+ * overflow follows the IEEE Standard for Floating-Point Arithmetic.
+ */
+uint64_t
+__roundAndPackFloat64(uint zSign,
+                      int zExp,
+                      uint zFrac0,
+                      uint zFrac1,
+                      uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+   increment = int(zFrac2) < 0;
+   if (!roundNearestEven) {
+      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {
+         increment = false;
+      } else {
+         if (zSign != 0u) {
+            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+               (zFrac2 != 0u);
+         } else {
+            increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+               (zFrac2 != 0u);
+         }
+      }
+   }
+   if (0x7FD <= zExp) {
+      if ((0x7FD < zExp) ||
+         ((zExp == 0x7FD) &&
+            (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) &&
+               increment)) {
+         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) ||
+            ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) ||
+               ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) {
+            return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu);
+         }
+         return __packFloat64(zSign, 0x7FF, 0u, 0u);
+      }
+      if (zExp < 0) {
+         __shift64ExtraRightJamming(
+            zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2);
+         zExp = 0;
+         if (roundNearestEven) {
+            increment = zFrac2 < 0u;
+         } else {
+            if (zSign != 0u) {
+               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+                  (zFrac2 != 0u);
+            } else {
+               increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+                  (zFrac2 != 0u);
+            }
+         }
+      }
+   }
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven));
+   } else {
+      zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u);
+   }
+   return __packFloat64(zSign, zExp, zFrac0, zFrac1);
+}
+
+uint64_t
+__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {
+            increment = false;
+         }
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
+              (zSign !=0u && (zFrac0 | zFrac1) != 0u));
+}
+
+int64_t
+__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+            (zFrac2 != 0u));
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+
+   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      (zSign != 0u));
+   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
+   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
+}
+
+/* Returns the number of leading 0 bits before the most-significant 1 bit of
+ * `a'.  If `a' is zero, 32 is returned.
+ */
+int
+__countLeadingZeros32(uint a)
+{
+   int shiftCount;
+   shiftCount = mix(31 - findMSB(a), 32, a == 0u);
+   return shiftCount;
+}
+
+/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+ * and significand formed by the concatenation of `zSig0' and `zSig1', and
+ * returns the proper double-precision floating-point value corresponding
+ * to the abstract input.  This routine is just like `__roundAndPackFloat64'
+ * except that the input significand has fewer bits and does not have to be
+ * normalized.  In all cases, `zExp' must be 1 less than the "true" floating-
+ * point exponent.
+ */
+uint64_t
+__normalizeRoundAndPackFloat64(uint zSign,
+                               int zExp,
+                               uint zFrac0,
+                               uint zFrac1)
+{
+   int shiftCount;
+   uint zFrac2;
+
+   if (zFrac0 == 0u) {
+      zExp -= 32;
+      zFrac0 = zFrac1;
+      zFrac1 = 0u;
+   }
+
+   shiftCount = __countLeadingZeros32(zFrac0) - 11;
+   if (0 <= shiftCount) {
+      zFrac2 = 0u;
+      __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1);
+   } else {
+      __shift64ExtraRightJamming(
+         zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2);
+   }
+   zExp -= shiftCount;
+   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
+}
+
+/* Takes two double-precision floating-point values `a' and `b', one of which
+ * is a NaN, and returns the appropriate NaN result.
+ */
+uint64_t
+__propagateFloat64NaN(uint64_t __a, uint64_t __b)
+{
+   bool aIsNaN = __is_nan(__a);
+   bool bIsNaN = __is_nan(__b);
+   uvec2 a = unpackUint2x32(__a);
+   uvec2 b = unpackUint2x32(__b);
+   a.y |= 0x00080000u;
+   b.y |= 0x00080000u;
+
+   return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN)));
+}
+
+/* Returns the result of adding the double-precision floating-point values
+ * `a' and `b'.  The operation is performed according to the IEEE Standard for
+ * Floating-Point Arithmetic.
+ */
+uint64_t
+__fadd64(uint64_t a, uint64_t b)
+{
+   uint aSign = __extractFloat64Sign(a);
+   uint bSign = __extractFloat64Sign(b);
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   uint bFracLo = __extractFloat64FracLo(b);
+   uint bFracHi = __extractFloat64FracHi(b);
+   int aExp = __extractFloat64Exp(a);
+   int bExp = __extractFloat64Exp(b);
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   int expDiff = aExp - bExp;
+   if (aSign == bSign) {
+      uint zFrac2 = 0u;
+      int zExp;
+      bool orig_exp_diff_is_zero = (expDiff == 0);
+
+      if (orig_exp_diff_is_zero) {
+         if (aExp == 0x7FF) {
+            bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
+            return mix(a, __propagateFloat64NaN(a, b), propagate);
+         }
+         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
+         if (aExp == 0)
+            return __packFloat64(aSign, 0, zFrac0, zFrac1);
+         zFrac2 = 0u;
+         zFrac0 |= 0x00200000u;
+         zExp = aExp;
+         __shift64ExtraRightJamming(
+            zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
+      } else if (0 < expDiff) {
+         if (aExp == 0x7FF) {
+            bool propagate = (aFracHi | aFracLo) != 0u;
+            return mix(a, __propagateFloat64NaN(a, b), propagate);
+         }
+
+         expDiff = mix(expDiff, expDiff - 1, bExp == 0);
+         bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0);
+         __shift64ExtraRightJamming(
+            bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2);
+         zExp = aExp;
+      } else if (expDiff < 0) {
+         if (bExp == 0x7FF) {
+            bool propagate = (bFracHi | bFracLo) != 0u;
+            return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
+         }
+         expDiff = mix(expDiff, expDiff + 1, aExp == 0);
+         aFracHi = mix(aFracHi | 0x00100000u, aFracHi, aExp == 0);
+         __shift64ExtraRightJamming(
+            aFracHi, aFracLo, 0u, - expDiff, aFracHi, aFracLo, zFrac2);
+         zExp = bExp;
+      }
+      if (!orig_exp_diff_is_zero) {
+         aFracHi |= 0x00100000u;
+         __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
+         --zExp;
+         if (!(zFrac0 < 0x00200000u)) {
+            __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
+            ++zExp;
+         }
+      }
+      return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2);
+
+   } else {
+      int zExp;
+
+      __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
+      __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
+      if (0 < expDiff) {
+         if (aExp == 0x7FF) {
+            bool propagate = (aFracHi | aFracLo) != 0u;
+            return mix(a, __propagateFloat64NaN(a, b), propagate);
+         }
+         expDiff = mix(expDiff, expDiff - 1, bExp == 0);
+         bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
+         __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
+         aFracHi |= 0x40000000u;
+         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
+         zExp = aExp;
+         --zExp;
+         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
+      }
+      if (expDiff < 0) {
+         if (bExp == 0x7FF) {
+            bool propagate = (bFracHi | bFracLo) != 0u;
+            return mix(__packFloat64(aSign ^ 1u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
+         }
+         expDiff = mix(expDiff, expDiff + 1, aExp == 0);
+         aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
+         __shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
+         bFracHi |= 0x40000000u;
+         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
+         zExp = bExp;
+         aSign ^= 1u;
+         --zExp;
+         return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
+      }
+      if (aExp == 0x7FF) {
+          bool propagate = (aFracHi | aFracLo | bFracHi | bFracLo) != 0u;
+         return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
+      }
+      bExp = mix(bExp, 1, aExp == 0);
+      aExp = mix(aExp, 1, aExp == 0);
+      bool zexp_normal = false;
+      bool blta = true;
+      if (bFracHi < aFracHi) {
+         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
+         zexp_normal = true;
+      }
+      else if (aFracHi < bFracHi) {
+         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
+         blta = false;
+         zexp_normal = true;
+      }
+      else if (bFracLo < aFracLo) {
+         __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
+         zexp_normal = true;
+      }
+      else if (aFracLo < bFracLo) {
+         __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
+          blta = false;
+          zexp_normal = true;
+      }
+      zExp = mix(bExp, aExp, blta);
+      aSign = mix(aSign ^ 1u, aSign, blta);
+      uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN), 0, 0u, 0u);
+      uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
+      return mix(retval_0, retval_1, zexp_normal);
+   }
+}
+
+/* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
+ * into two 32-bit pieces which are stored at the locations pointed to by
+ * `z0Ptr' and `z1Ptr'.
+ */
+void
+__mul32To64(uint a, uint b, out uint z0Ptr, out uint z1Ptr)
+{
+   uint aLow = a & 0x0000FFFFu;
+   uint aHigh = a>>16;
+   uint bLow = b & 0x0000FFFFu;
+   uint bHigh = b>>16;
+   uint z1 = aLow * bLow;
+   uint zMiddleA = aLow * bHigh;
+   uint zMiddleB = aHigh * bLow;
+   uint z0 = aHigh * bHigh;
+   zMiddleA += zMiddleB;
+   z0 += ((uint(zMiddleA < zMiddleB)) << 16) + (zMiddleA >> 16);
+   zMiddleA <<= 16;
+   z1 += zMiddleA;
+   z0 += uint(z1 < zMiddleA);
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
+ * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
+ * product.  The product is broken into four 32-bit pieces which are stored at
+ * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+ */
+void
+__mul64To128(uint a0, uint a1, uint b0, uint b1,
+             out uint z0Ptr,
+             out uint z1Ptr,
+             out uint z2Ptr,
+             out uint z3Ptr)
+{
+   uint z0 = 0u;
+   uint z1 = 0u;
+   uint z2 = 0u;
+   uint z3 = 0u;
+   uint more1 = 0u;
+   uint more2 = 0u;
+
+   __mul32To64(a1, b1, z2, z3);
+   __mul32To64(a1, b0, z1, more2);
+   __add64(z1, more2, 0u, z2, z1, z2);
+   __mul32To64(a0, b0, z0, more1);
+   __add64(z0, more1, 0u, z1, z0, z1);
+   __mul32To64(a0, b1, more1, more2);
+   __add64(more1, more2, 0u, z2, more1, z2);
+   __add64(z0, z1, 0u, more1, z0, z1);
+   z3Ptr = z3;
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Normalizes the subnormal double-precision floating-point value represented
+ * by the denormalized significand formed by the concatenation of `aFrac0' and
+ * `aFrac1'.  The normalized exponent is stored at the location pointed to by
+ * `zExpPtr'.  The most significant 21 bits of the normalized significand are
+ * stored at the location pointed to by `zFrac0Ptr', and the least significant
+ * 32 bits of the normalized significand are stored at the location pointed to
+ * by `zFrac1Ptr'.
+ */
+void
+__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1,
+                            out int zExpPtr,
+                            out uint zFrac0Ptr,
+                            out uint zFrac1Ptr)
+{
+   int shiftCount;
+   uint temp_zfrac0, temp_zfrac1;
+   shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11;
+   zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u);
+
+   temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0);
+   temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0);
+
+   __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr);
+
+   zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0);
+   zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0);
+}
+
+/* Returns the result of multiplying the double-precision floating-point values
+ * `a' and `b'.  The operation is performed according to the IEEE Standard for
+ * Floating-Point Arithmetic.
+ */
+uint64_t
+__fmul64(uint64_t a, uint64_t b)
+{
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   uint zFrac2 = 0u;
+   uint zFrac3 = 0u;
+   int zExp;
+
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   uint bFracLo = __extractFloat64FracLo(b);
+   uint bFracHi = __extractFloat64FracHi(b);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   int bExp = __extractFloat64Exp(b);
+   uint bSign = __extractFloat64Sign(b);
+   uint zSign = aSign ^ bSign;
+   if (aExp == 0x7FF) {
+      if (((aFracHi | aFracLo) != 0u) ||
+         ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) {
+         return __propagateFloat64NaN(a, b);
+      }
+      if ((uint(bExp) | bFracHi | bFracLo) == 0u)
+            return 0xFFFFFFFFFFFFFFFFUL;
+      return __packFloat64(zSign, 0x7FF, 0u, 0u);
+   }
+   if (bExp == 0x7FF) {
+      if ((bFracHi | bFracLo) != 0u)
+         return __propagateFloat64NaN(a, b);
+      if ((uint(aExp) | aFracHi | aFracLo) == 0u)
+         return 0xFFFFFFFFFFFFFFFFUL;
+      return __packFloat64(zSign, 0x7FF, 0u, 0u);
+   }
+   if (aExp == 0) {
+      if ((aFracHi | aFracLo) == 0u)
+         return __packFloat64(zSign, 0, 0u, 0u);
+      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);
+   }
+   if (bExp == 0) {
+      if ((bFracHi | bFracLo) == 0u)
+         return __packFloat64(zSign, 0, 0u, 0u);
+      __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo);
+   }
+   zExp = aExp + bExp - 0x400;
+   aFracHi |= 0x00100000u;
+   __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo);
+   __mul64To128(
+      aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3);
+   __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1);
+   zFrac2 |= uint(zFrac3 != 0u);
+   if (0x00200000u <= zFrac0) {
+      __shift64ExtraRightJamming(
+         zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2);
+      ++zExp;
+   }
+   return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
+}
+
+uint64_t
+__ffma64(uint64_t a, uint64_t b, uint64_t c)
+{
+   return __fadd64(__fmul64(a, b), c);
+}
+
+/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
+ * number of bits given in `count'.  Any bits shifted off are lost.  The value
+ * of `count' can be arbitrarily large; in particular, if `count' is greater
+ * than 64, the result will be 0.  The result is broken into two 32-bit pieces
+ * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+ */
+void
+__shift64Right(uint a0, uint a1,
+               int count,
+               out uint z0Ptr,
+               out uint z1Ptr)
+{
+   uint z0;
+   uint z1;
+   int negCount = (-count) & 31;
+
+   z0 = 0u;
+   z0 = mix(z0, (a0 >> count), count < 32);
+   z0 = mix(z0, a0, count == 0);
+
+   z1 = mix(0u, (a0 >> (count & 31)), count < 64);
+   z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32);
+   z1 = mix(z1, a0, count == 0);
+
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Returns the result of converting the double-precision floating-point value
+ * `a' to the unsigned integer format.  The conversion is performed according
+ * to the IEEE Standard for Floating-Point Arithmetic.
+ */
+uint
+__fp64_to_uint(uint64_t a)
+{
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+
+   if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u))
+      return 0xFFFFFFFFu;
+
+   aFracHi |= mix(0u, 0x00100000u, aExp != 0);
+
+   int shiftDist = 0x427 - aExp;
+   if (0 < shiftDist)
+      __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo);
+
+   if ((aFracHi & 0xFFFFF000u) != 0u)
+      return mix(~0u, 0u, (aSign != 0u));
+
+   uint z = 0u;
+   uint zero = 0u;
+   __shift64Right(aFracHi, aFracLo, 12, zero, z);
+
+   uint expt = mix(~0u, 0u, (aSign != 0u));
+
+   return mix(z, expt, (aSign != 0u) && (z != 0u));
+}
+
+uint64_t
+__uint_to_fp64(uint a)
+{
+   if (a == 0u)
+      return 0ul;
+
+   int shiftDist = __countLeadingZeros32(a) + 21;
+
+   uint aHigh = 0u;
+   uint aLow = 0u;
+   int negCount = (- shiftDist) & 31;
+
+   aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64);
+   aLow = 0u;
+   aHigh = mix(aHigh, 0u, shiftDist == 0);
+   aLow = mix(aLow, a, shiftDist ==0);
+   aHigh = mix(aHigh, a >> negCount, shiftDist < 32);
+   aLow = mix(aLow, a << shiftDist, shiftDist < 32);
+
+   return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);
+}
+
+uint64_t
+__uint64_to_fp64(uint64_t a)
+{
+   if (a == 0u)
+      return 0ul;
+
+   uvec2 aFrac = unpackUint2x32(a);
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);
+      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);
+   } else {
+      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);
+   }
+}
+
+uint64_t
+__fp64_to_uint64(uint64_t a)
+{
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   uint zFrac2 = 0u;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
+   int shiftCount = 0x433 - aExp;
+
+   if ( shiftCount <= 0 ) {
+      if (shiftCount < -11 && aExp == 0x7FF) {
+         if ((aFracHi | aFracLo) != 0u)
+            return __propagateFloat64NaN(a, a);
+         return mix(default_nan, a, aSign == 0u);
+      }
+      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
+   } else {
+      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
+                                 aFracHi, aFracLo, zFrac2);
+   }
+   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);
+}
+
+int64_t
+__fp64_to_int64(uint64_t a)
+{
+   uint zFrac2 = 0u;
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+
+   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
+   int shiftCount = 0x433 - aExp;
+
+   if (shiftCount <= 0) {
+      if (shiftCount < -11 && aExp == 0x7FF) {
+         if ((aFracHi | aFracLo) != 0u)
+            return default_NegNaN;
+         return mix(default_NegNaN, default_PosNaN, aSign == 0u);
+      }
+      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
+   } else {
+      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
+                                 aFracHi, aFracLo, zFrac2);
+   }
+
+   return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2);
+}
+
+uint64_t
+__fp32_to_uint64(float f)
+{
+   uint a = floatBitsToUint(f);
+   uint aFrac = a & 0x007FFFFFu;
+   int aExp = int((a>>23) & 0xFFu);
+   uint aSign = a>>31;
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   uint zFrac2 = 0u;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+   int shiftCount = 0xBE - aExp;
+
+   if (shiftCount <0) {
+      if (aExp == 0xFF)
+         return default_nan;
+   }
+
+   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);
+   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);
+
+   if (shiftCount != 0) {
+      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,
+                                 zFrac0, zFrac1, zFrac2);
+   }
+
+   return __roundAndPackUInt64(aSign, zFrac0, zFrac1, zFrac2);
+}
+
+int64_t
+__fp32_to_int64(float f)
+{
+   uint a = floatBitsToUint(f);
+   uint aFrac = a & 0x007FFFFFu;
+   int aExp = int((a>>23) & 0xFFu);
+   uint aSign = a>>31;
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   uint zFrac2 = 0u;
+   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+   int shiftCount = 0xBE - aExp;
+
+   if (shiftCount <0) {
+      if (aExp == 0xFF && aFrac != 0u)
+         return default_NegNaN;
+      return mix(default_NegNaN, default_PosNaN, aSign == 0u);
+   }
+
+   aFrac = mix(aFrac, aFrac | 0x00800000u, aExp != 0);
+   __shortShift64Left(aFrac, 0, 40, zFrac0, zFrac1);
+
+   if (shiftCount != 0) {
+      __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, shiftCount,
+                                 zFrac0, zFrac1, zFrac2);
+   }
+
+   return __roundAndPackInt64(aSign, zFrac0, zFrac1, zFrac2);
+}
+
+uint64_t
+__int64_to_fp64(int64_t a)
+{
+   if (a==0)
+      return 0ul;
+
+   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
+   uint aFracHi = __extractFloat64FracHi(absA);
+   uvec2 aFrac = unpackUint2x32(absA);
+   uint zSign = uint(a < 0);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
+   }
+
+   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
+}
+
+/* Returns the result of converting the double-precision floating-point value
+ * `a' to the 32-bit two's complement integer format.  The conversion is
+ * performed according to the IEEE Standard for Floating-Point Arithmetic---
+ * which means in particular that the conversion is rounded according to the
+ * current rounding mode.  If `a' is a NaN, the largest positive integer is
+ * returned.  Otherwise, if the conversion overflows, the largest integer with
+ * the same sign as `a' is returned.
+ */
+int
+__fp64_to_int(uint64_t a)
+{
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+
+   uint absZ = 0u;
+   uint aFracExtra = 0u;
+   int shiftCount = aExp - 0x413;
+
+   if (0 <= shiftCount) {
+      if (0x41E < aExp) {
+         if ((aExp == 0x7FF) && bool(aFracHi | aFracLo))
+            aSign = 0u;
+         return mix(0x7FFFFFFF, 0x80000000, bool(aSign));
+      }
+      __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra);
+   } else {
+      if (aExp < 0x3FF)
+         return 0;
+
+      aFracHi |= 0x00100000u;
+      aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo;
+      absZ = aFracHi >> (- shiftCount);
+   }
+
+   int z = mix(int(absZ), -int(absZ), (aSign != 0u));
+   int nan = mix(0x7FFFFFFF, 0x80000000, bool(aSign));
+   return mix(z, nan, bool(aSign ^ uint(z < 0)) && bool(z));
+}
+
+/* Returns the result of converting the 32-bit two's complement integer `a'
+ * to the double-precision floating-point format.  The conversion is performed
+ * according to the IEEE Standard for Floating-Point Arithmetic.
+ */
+uint64_t
+__int_to_fp64(int a)
+{
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   if (a==0)
+      return __packFloat64(0u, 0, 0u, 0u);
+   uint zSign = uint(a < 0);
+   uint absA = mix(uint(a), uint(-a), a < 0);
+   int shiftCount = __countLeadingZeros32(absA) - 11;
+   if (0 <= shiftCount) {
+      zFrac0 = absA << shiftCount;
+      zFrac1 = 0u;
+   } else {
+      __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1);
+   }
+   return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
+}
+
+bool
+__fp64_to_bool(uint64_t a)
+{
+   return !__feq64_nonnan(__fabs64(a), 0ul);
+}
+
+uint64_t
+__bool_to_fp64(bool a)
+{
+   return __int_to_fp64(int(a));
+}
+
+/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
+ * single-precision floating-point value, returning the result.  After being
+ * shifted into the proper positions, the three fields are simply added
+ * together to form the result.  This means that any integer portion of `zSig'
+ * will be added into the exponent.  Since a properly normalized significand
+ * will have an integer portion equal to 1, the `zExp' input should be 1 less
+ * than the desired result exponent whenever `zFrac' is a complete, normalized
+ * significand.
+ */
+float
+__packFloat32(uint zSign, int zExp, uint zFrac)
+{
+   return uintBitsToFloat((zSign<<31) + (uint(zExp)<<23) + zFrac);
+}
+
+/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+ * and significand `zFrac', and returns the proper single-precision floating-
+ * point value corresponding to the abstract input.  Ordinarily, the abstract
+ * value is simply rounded and packed into the single-precision format, with
+ * the inexact exception raised if the abstract input cannot be represented
+ * exactly.  However, if the abstract value is too large, the overflow and
+ * inexact exceptions are raised and an infinity or maximal finite value is
+ * returned.  If the abstract value is too small, the input value is rounded to
+ * a subnormal number, and the underflow and inexact exceptions are raised if
+ * the abstract input cannot be represented exactly as a subnormal single-
+ * precision floating-point number.
+ *     The input significand `zFrac' has its binary point between bits 30
+ * and 29, which is 7 bits to the left of the usual location.  This shifted
+ * significand must be normalized or smaller.  If `zFrac' is not normalized,
+ * `zExp' must be 0; in that case, the result returned is a subnormal number,
+ * and it must not require rounding.  In the usual case that `zFrac' is
+ * normalized, `zExp' must be 1 less than the "true" floating-point exponent.
+ * The handling of underflow and overflow follows the IEEE Standard for
+ * Floating-Point Arithmetic.
+ */
+float
+__roundAndPackFloat32(uint zSign, int zExp, uint zFrac)
+{
+   bool roundNearestEven;
+   int roundIncrement;
+   int roundBits;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+   roundIncrement = 0x40;
+   if (!roundNearestEven) {
+      if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) {
+         roundIncrement = 0;
+      } else {
+         roundIncrement = 0x7F;
+         if (zSign != 0u) {
+            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)
+               roundIncrement = 0;
+         } else {
+            if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN)
+               roundIncrement = 0;
+         }
+      }
+   }
+   roundBits = int(zFrac & 0x7Fu);
+   if (0xFDu <= uint(zExp)) {
+      if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0))
+         return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0);
+      int count = -zExp;
+      bool zexp_lt0 = zExp < 0;
+      uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32);
+      zFrac = mix(zFrac, zFrac_lt0, zexp_lt0);
+      roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0);
+      zExp = mix(zExp, 0, zexp_lt0);
+   }
+   zFrac = (zFrac + uint(roundIncrement))>>7;
+   zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven);
+
+   return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac);
+}
+
+/* Returns the result of converting the double-precision floating-point value
+ * `a' to the single-precision floating-point format.  The conversion is
+ * performed according to the IEEE Standard for Floating-Point Arithmetic.
+ */
+float
+__fp64_to_fp32(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   uint zFrac = 0u;
+   uint allZero = 0u;
+
+   uint aFracLo = __extractFloat64FracLo(__a);
+   uint aFracHi = __extractFloat64FracHi(__a);
+   int aExp = __extractFloat64Exp(__a);
+   uint aSign = __extractFloat64Sign(__a);
+   if (aExp == 0x7FF) {
+      __shortShift64Left(a.y, a.x, 12, a.y, a.x);
+      float rval = uintBitsToFloat((aSign<<31) | 0x7FC00000u | (a.y>>9));
+      rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u);
+      return rval;
+   }
+   __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac);
+   zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0);
+   return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);
+}
+
+float
+__uint64_to_fp32(uint64_t __a)
+{
+   uint zFrac = 0u;
+   uvec2 aFrac = unpackUint2x32(__a);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);
+}
+
+float
+__int64_to_fp32(int64_t __a)
+{
+   uint zFrac = 0u;
+   uint aSign = uint(__a < 0);
+   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
+   uvec2 aFrac = unpackUint2x32(absA);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);
+}
+
+/* Returns the result of converting the single-precision floating-point value
+ * `a' to the double-precision floating-point format.
+ */
+uint64_t
+__fp32_to_fp64(float f)
+{
+   uint a = floatBitsToUint(f);
+   uint aFrac = a & 0x007FFFFFu;
+   int aExp = int((a>>23) & 0xFFu);
+   uint aSign = a>>31;
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+
+   if (aExp == 0xFF) {
+      if (aFrac != 0u) {
+         uint nanLo = 0u;
+         uint nanHi = a<<9;
+         __shift64Right(nanHi, nanLo, 12, nanHi, nanLo);
+         nanHi |= ((aSign<<31) | 0x7FF80000u);
+         return packUint2x32(uvec2(nanLo, nanHi));
+      }
+      return __packFloat64(aSign, 0x7FF, 0u, 0u);
+    }
+
+   if (aExp == 0) {
+      if (aFrac == 0u)
+         return __packFloat64(aSign, 0, 0u, 0u);
+      /* Normalize subnormal */
+      int shiftCount = __countLeadingZeros32(aFrac) - 8;
+      aFrac <<= shiftCount;
+      aExp = 1 - shiftCount;
+      --aExp;
+   }
+
+   __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1);
+   return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1);
+}
+
+/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
+ * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+ * modulo 2^96, so any carry out is lost.  The result is broken into three
+ * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
+ * `z1Ptr', and `z2Ptr'.
+ */
+void
+__add96(uint a0, uint a1, uint a2,
+        uint b0, uint b1, uint b2,
+        out uint z0Ptr,
+        out uint z1Ptr,
+        out uint z2Ptr)
+{
+   uint z2 = a2 + b2;
+   uint carry1 = uint(z2 < a2);
+   uint z1 = a1 + b1;
+   uint carry0 = uint(z1 < a1);
+   uint z0 = a0 + b0;
+   z1 += carry1;
+   z0 += uint(z1 < carry1);
+   z0 += carry0;
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
+ * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
+ * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
+ * into three 32-bit pieces which are stored at the locations pointed to by
+ * `z0Ptr', `z1Ptr', and `z2Ptr'.
+ */
+void
+__sub96(uint a0, uint a1, uint a2,
+        uint b0, uint b1, uint b2,
+        out uint z0Ptr,
+        out uint z1Ptr,
+        out uint z2Ptr)
+{
+   uint z2 = a2 - b2;
+   uint borrow1 = uint(a2 < b2);
+   uint z1 = a1 - b1;
+   uint borrow0 = uint(a1 < b1);
+   uint z0 = a0 - b0;
+   z0 -= uint(z1 < borrow1);
+   z1 -= borrow1;
+   z0 -= borrow0;
+   z2Ptr = z2;
+   z1Ptr = z1;
+   z0Ptr = z0;
+}
+
+/* Returns an approximation to the 32-bit integer quotient obtained by dividing
+ * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
+ * divisor `b' must be at least 2^31.  If q is the exact quotient truncated
+ * toward zero, the approximation returned lies between q and q + 2 inclusive.
+ * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
+ * unsigned integer is returned.
+ */
+uint
+__estimateDiv64To32(uint a0, uint a1, uint b)
+{
+   uint b0;
+   uint b1;
+   uint rem0 = 0u;
+   uint rem1 = 0u;
+   uint term0 = 0u;
+   uint term1 = 0u;
+   uint z;
+
+   if (b <= a0)
+      return 0xFFFFFFFFu;
+   b0 = b>>16;
+   z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16;
+   __mul32To64(b, z, term0, term1);
+   __sub64(a0, a1, term0, term1, rem0, rem1);
+   while (int(rem0) < 0) {
+      z -= 0x10000u;
+      b1 = b<<16;
+      __add64(rem0, rem1, b0, b1, rem0, rem1);
+   }
+   rem0 = (rem0<<16) | (rem1>>16);
+   z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0;
+   return z;
+}
+
+uint
+__sqrtOddAdjustments(int index)
+{
+   uint res = 0u;
+   if (index == 0)
+      res = 0x0004u;
+   if (index == 1)
+      res = 0x0022u;
+   if (index == 2)
+      res = 0x005Du;
+   if (index == 3)
+      res = 0x00B1u;
+   if (index == 4)
+      res = 0x011Du;
+   if (index == 5)
+      res = 0x019Fu;
+   if (index == 6)
+      res = 0x0236u;
+   if (index == 7)
+      res = 0x02E0u;
+   if (index == 8)
+      res = 0x039Cu;
+   if (index == 9)
+      res = 0x0468u;
+   if (index == 10)
+      res = 0x0545u;
+   if (index == 11)
+      res = 0x631u;
+   if (index == 12)
+      res = 0x072Bu;
+   if (index == 13)
+      res = 0x0832u;
+   if (index == 14)
+      res = 0x0946u;
+   if (index == 15)
+      res = 0x0A67u;
+
+   return res;
+}
+
+uint
+__sqrtEvenAdjustments(int index)
+{
+   uint res = 0u;
+   if (index == 0)
+      res = 0x0A2Du;
+   if (index == 1)
+      res = 0x08AFu;
+   if (index == 2)
+      res = 0x075Au;
+   if (index == 3)
+      res = 0x0629u;
+   if (index == 4)
+      res = 0x051Au;
+   if (index == 5)
+      res = 0x0429u;
+   if (index == 6)
+      res = 0x0356u;
+   if (index == 7)
+      res = 0x029Eu;
+   if (index == 8)
+      res = 0x0200u;
+   if (index == 9)
+      res = 0x0179u;
+   if (index == 10)
+      res = 0x0109u;
+   if (index == 11)
+      res = 0x00AFu;
+   if (index == 12)
+      res = 0x0068u;
+   if (index == 13)
+      res = 0x0034u;
+   if (index == 14)
+      res = 0x0012u;
+   if (index == 15)
+      res = 0x0002u;
+
+   return res;
+}
+
+/* Returns an approximation to the square root of the 32-bit significand given
+ * by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+ * `aExp' (the least significant bit) is 1, the integer returned approximates
+ * 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+ * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+ * case, the approximation returned lies strictly within +/-2 of the exact
+ * value.
+ */
+uint
+__estimateSqrt32(int aExp, uint a)
+{
+   uint z;
+
+   int index = int(a>>27 & 15u);
+   if ((aExp & 1) != 0) {
+      z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index);
+      z = ((a / z)<<14) + (z<<15);
+      a >>= 1;
+   } else {
+      z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index);
+      z = a / z + z;
+      z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15);
+      if (z <= a)
+         return uint(int(a)>>1);
+   }
+   return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1);
+}
+
+/* Returns the square root of the double-precision floating-point value `a'.
+ * The operation is performed according to the IEEE Standard for Floating-Point
+ * Arithmetic.
+ */
+uint64_t
+__fsqrt64(uint64_t a)
+{
+   uint zFrac0 = 0u;
+   uint zFrac1 = 0u;
+   uint zFrac2 = 0u;
+   uint doubleZFrac0 = 0u;
+   uint rem0 = 0u;
+   uint rem1 = 0u;
+   uint rem2 = 0u;
+   uint rem3 = 0u;
+   uint term0 = 0u;
+   uint term1 = 0u;
+   uint term2 = 0u;
+   uint term3 = 0u;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   if (aExp == 0x7FF) {
+      if ((aFracHi | aFracLo) != 0u)
+         return __propagateFloat64NaN(a, a);
+      if (aSign == 0u)
+         return a;
+      return default_nan;
+   }
+   if (aSign != 0u) {
+      if ((uint(aExp) | aFracHi | aFracLo) == 0u)
+         return a;
+      return default_nan;
+   }
+   if (aExp == 0) {
+      if ((aFracHi | aFracLo) == 0u)
+         return __packFloat64(0u, 0, 0u, 0u);
+      __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo);
+   }
+   int zExp = ((aExp - 0x3FF)>>1) + 0x3FE;
+   aFracHi |= 0x00100000u;
+   __shortShift64Left(aFracHi, aFracLo, 11, term0, term1);
+   zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u;
+   if (zFrac0 == 0u)
+      zFrac0 = 0x7FFFFFFFu;
+   doubleZFrac0 = zFrac0 + zFrac0;
+   __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo);
+   __mul32To64(zFrac0, zFrac0, term0, term1);
+   __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1);
+   while (int(rem0) < 0) {
+      --zFrac0;
+      doubleZFrac0 -= 2u;
+      __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1);
+   }
+   zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0);
+   if ((zFrac1 & 0x1FFu) <= 5u) {
+      if (zFrac1 == 0u)
+         zFrac1 = 1u;
+      __mul32To64(doubleZFrac0, zFrac1, term1, term2);
+      __sub64(rem1, 0u, term1, term2, rem1, rem2);
+      __mul32To64(zFrac1, zFrac1, term2, term3);
+      __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3);
+      while (int(rem1) < 0) {
+         --zFrac1;
+         __shortShift64Left(0u, zFrac1, 1, term2, term3);
+         term3 |= 1u;
+         term2 |= doubleZFrac0;
+         __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3);
+      }
+      zFrac1 |= uint((rem1 | rem2 | rem3) != 0u);
+   }
+   __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2);
+   return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2);
+}
+
+uint64_t
+__ftrunc64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   int aExp = __extractFloat64Exp(__a);
+   uint zLo;
+   uint zHi;
+
+   int unbiasedExp = aExp - 1023;
+   int fracBits = 52 - unbiasedExp;
+   uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32);
+   uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33);
+   zLo = maskLo & a.x;
+   zHi = maskHi & a.y;
+
+   zLo = mix(zLo, 0u, unbiasedExp < 0);
+   zHi = mix(zHi, 0u, unbiasedExp < 0);
+   zLo = mix(zLo, a.x, unbiasedExp > 52);
+   zHi = mix(zHi, a.y, unbiasedExp > 52);
+   return packUint2x32(uvec2(zLo, zHi));
+}
+
+uint64_t
+__ffloor64(uint64_t a)
+{
+   bool is_positive = __fge64(a, 0ul);
+   uint64_t tr = __ftrunc64(a);
+
+   if (is_positive || __feq64(tr, a)) {
+      return tr;
+   } else {
+      return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);
+   }
+}
+
+uint64_t
+__fround64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   int unbiasedExp = __extractFloat64Exp(__a) - 1023;
+   uint aHi = a.y;
+   uint aLo = a.x;
+
+   if (unbiasedExp < 20) {
+      if (unbiasedExp < 0) {
+         aHi &= 0x80000000u;
+         if (unbiasedExp == -1 && aLo != 0u)
+            aHi |= (1023u << 20);
+         aLo = 0u;
+      } else {
+         uint maskExp = 0x000FFFFFu >> unbiasedExp;
+         /* a is an integral value */
+         if (((aHi & maskExp) == 0u) && (aLo == 0u))
+            return __a;
+
+         aHi += 0x00080000u >> unbiasedExp;
+         aHi &= ~maskExp;
+         aLo = 0u;
+      }
+   } else if (unbiasedExp > 51 || unbiasedExp == 1024) {
+      return __a;
+   } else {
+      uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);
+      if ((aLo & maskExp) == 0u)
+         return __a;
+      uint tmp = aLo + (1u << (51 - unbiasedExp));
+      if(tmp < aLo)
+         aHi += 1u;
+      aLo = tmp;
+      aLo &= ~maskExp;
+   }
+
+   a.x = aLo;
+   a.y = aHi;
+   return packUint2x32(a);
+}
+
+uint64_t
+__fmin64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a)) return b;
+   if (__is_nan(b)) return a;
+
+   if (__flt64_nonnan(a, b)) return a;
+   return b;
+}
+
+uint64_t
+__fmax64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a)) return b;
+   if (__is_nan(b)) return a;
+
+   if (__flt64_nonnan(a, b)) return b;
+   return a;
+}
+
+uint64_t
+__ffract64(uint64_t a)
+{
+   return __fadd64(a, __fneg64(__ffloor64(a)));
+}
diff -Nru mesa-18.3.3/src/compiler/glsl/glcpp/meson.build mesa-19.0.1/src/compiler/glsl/glcpp/meson.build
--- mesa-18.3.3/src/compiler/glsl/glcpp/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glcpp/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -70,6 +70,7 @@
         glcpp, join_paths(meson.current_source_dir(), 'tests'),
         '--@0@'.format(m),
       ],
+      suite : ['compiler', 'glcpp'],
     )
   endforeach
 endif
diff -Nru mesa-18.3.3/src/compiler/glsl/gl_nir_linker.c mesa-19.0.1/src/compiler/glsl/gl_nir_linker.c
--- mesa-18.3.3/src/compiler/glsl/gl_nir_linker.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/gl_nir_linker.c	2019-03-31 23:16:37.000000000 +0000
@@ -44,9 +44,7 @@
       prog->data->NumProgramResourceList = 0;
    }
 
-   struct set *resource_set = _mesa_set_create(NULL,
-                                               _mesa_hash_pointer,
-                                               _mesa_key_pointer_equal);
+   struct set *resource_set = _mesa_pointer_set_create(NULL);
 
    /* Add uniforms
     *
diff -Nru mesa-18.3.3/src/compiler/glsl/gl_nir_lower_atomics.c mesa-19.0.1/src/compiler/glsl/gl_nir_lower_atomics.c
--- mesa-18.3.3/src/compiler/glsl/gl_nir_lower_atomics.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/gl_nir_lower_atomics.c	2019-03-31 23:16:37.000000000 +0000
@@ -101,8 +101,8 @@
    nir_variable *var = nir_deref_instr_get_variable(deref);
 
    if (var->data.mode != nir_var_uniform &&
-       var->data.mode != nir_var_shader_storage &&
-       var->data.mode != nir_var_shared)
+       var->data.mode != nir_var_mem_ssbo &&
+       var->data.mode != nir_var_mem_shared)
       return false; /* atomics passed as function arguments can't be lowered */
 
    const unsigned uniform_loc = var->data.location;
diff -Nru mesa-18.3.3/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c mesa-19.0.1/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c
--- mesa-18.3.3/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c	2019-03-31 23:16:37.000000000 +0000
@@ -99,7 +99,7 @@
 
       remove_struct_derefs_prep(&p[1], name, location, type);
 
-      *type = glsl_get_array_instance(*type, length);
+      *type = glsl_array_type(*type, length, glsl_get_explicit_stride(cur->type));
       break;
    }
 
@@ -109,9 +109,6 @@
                              glsl_get_struct_elem_name(cur->type, next->strct.index));
 
       remove_struct_derefs_prep(&p[1], name, location, type);
-
-      /* skip over the struct type: */
-      *type = next->type;
       break;
    }
 
@@ -199,28 +196,30 @@
    int sampler_idx =
       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
 
-   if (texture_idx < 0)
-      return false;
-
-   assert(texture_idx >= 0 && sampler_idx >= 0);
-   assert(instr->src[texture_idx].src.is_ssa);
-   assert(instr->src[sampler_idx].src.is_ssa);
-   assert(instr->src[texture_idx].src.ssa == instr->src[sampler_idx].src.ssa);
-
    b->cursor = nir_before_instr(&instr->instr);
 
-   nir_deref_instr *texture_deref =
-      lower_deref(b, state, nir_src_as_deref(instr->src[texture_idx].src));
-   /* don't lower bindless: */
-   if (!texture_deref)
-      return false;
-   nir_instr_rewrite_src(&instr->instr, &instr->src[texture_idx].src,
-                         nir_src_for_ssa(&texture_deref->dest.ssa));
-
-   nir_deref_instr *sampler_deref =
-      lower_deref(b, state, nir_src_as_deref(instr->src[sampler_idx].src));
-   nir_instr_rewrite_src(&instr->instr, &instr->src[sampler_idx].src,
-                         nir_src_for_ssa(&sampler_deref->dest.ssa));
+   if (texture_idx >= 0) {
+      assert(instr->src[texture_idx].src.is_ssa);
+
+      nir_deref_instr *texture_deref =
+         lower_deref(b, state, nir_src_as_deref(instr->src[texture_idx].src));
+      /* only lower non-bindless: */
+      if (texture_deref) {
+         nir_instr_rewrite_src(&instr->instr, &instr->src[texture_idx].src,
+                               nir_src_for_ssa(&texture_deref->dest.ssa));
+      }
+   }
+
+   if (sampler_idx >= 0) {
+      assert(instr->src[sampler_idx].src.is_ssa);
+      nir_deref_instr *sampler_deref =
+         lower_deref(b, state, nir_src_as_deref(instr->src[sampler_idx].src));
+      /* only lower non-bindless: */
+      if (sampler_deref) {
+         nir_instr_rewrite_src(&instr->instr, &instr->src[sampler_idx].src,
+                               nir_src_for_ssa(&sampler_deref->dest.ssa));
+      }
+   }
 
    return true;
 }
diff -Nru mesa-18.3.3/src/compiler/glsl/gl_nir_lower_samplers.c mesa-19.0.1/src/compiler/glsl/gl_nir_lower_samplers.c
--- mesa-18.3.3/src/compiler/glsl/gl_nir_lower_samplers.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/gl_nir_lower_samplers.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,15 +33,13 @@
 
 static void
 lower_tex_src_to_offset(nir_builder *b,
-                        nir_tex_instr *instr, unsigned src_idx,
-                        const struct gl_shader_program *shader_program)
+                        nir_tex_instr *instr, unsigned src_idx)
 {
    nir_ssa_def *index = NULL;
    unsigned base_index = 0;
    unsigned array_elements = 1;
    nir_tex_src *src = &instr->src[src_idx];
    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
-   unsigned location = 0;
 
    /* We compute first the offsets */
    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
@@ -50,35 +48,24 @@
       nir_deref_instr *parent =
          nir_instr_as_deref(deref->parent.ssa->parent_instr);
 
-      switch (deref->deref_type) {
-      case nir_deref_type_struct:
-         location += glsl_get_record_location_offset(parent->type,
-                                                     deref->strct.index);
-         break;
-
-      case nir_deref_type_array: {
-         if (nir_src_is_const(deref->arr.index) && index == NULL) {
-            /* We're still building a direct index */
-            base_index += nir_src_as_uint(deref->arr.index) * array_elements;
-         } else {
-            if (index == NULL) {
-               /* We used to be direct but not anymore */
-               index = nir_imm_int(b, base_index);
-               base_index = 0;
-            }
-
-            index = nir_iadd(b, index,
-                             nir_imul(b, nir_imm_int(b, array_elements),
-                                      nir_ssa_for_src(b, deref->arr.index, 1)));
+      assert(deref->deref_type == nir_deref_type_array);
+
+      if (nir_src_is_const(deref->arr.index) && index == NULL) {
+         /* We're still building a direct index */
+         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
+      } else {
+         if (index == NULL) {
+            /* We used to be direct but not anymore */
+            index = nir_imm_int(b, base_index);
+            base_index = 0;
          }
 
-         array_elements *= glsl_get_length(parent->type);
-         break;
+         index = nir_iadd(b, index,
+                          nir_imul(b, nir_imm_int(b, array_elements),
+                                   nir_ssa_for_src(b, deref->arr.index, 1)));
       }
 
-      default:
-         unreachable("Invalid sampler deref type");
-      }
+      array_elements *= glsl_get_length(parent->type);
 
       deref = parent;
    }
@@ -89,14 +76,7 @@
    /* We hit the deref_var.  This is the end of the line */
    assert(deref->deref_type == nir_deref_type_var);
 
-   location += deref->var->data.location;
-
-   gl_shader_stage stage = b->shader->info.stage;
-   assert(location < shader_program->data->NumUniformStorage &&
-          shader_program->data->UniformStorage[location].opaque[stage].active);
-
-   base_index +=
-      shader_program->data->UniformStorage[location].opaque[stage].index;
+   base_index += deref->var->data.binding;
 
    /* We have the offsets, we apply them, rewriting the source or removing
     * instr if needed
@@ -123,8 +103,7 @@
 }
 
 static bool
-lower_sampler(nir_builder *b, nir_tex_instr *instr,
-              const struct gl_shader_program *shader_program)
+lower_sampler(nir_builder *b, nir_tex_instr *instr)
 {
    int texture_idx =
       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
@@ -132,16 +111,14 @@
    if (texture_idx >= 0) {
       b->cursor = nir_before_instr(&instr->instr);
 
-      lower_tex_src_to_offset(b, instr, texture_idx,
-                              shader_program);
+      lower_tex_src_to_offset(b, instr, texture_idx);
    }
 
    int sampler_idx =
       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
 
    if (sampler_idx >= 0) {
-      lower_tex_src_to_offset(b, instr, sampler_idx,
-                              shader_program);
+      lower_tex_src_to_offset(b, instr, sampler_idx);
    }
 
    if (texture_idx < 0 && sampler_idx < 0)
@@ -151,8 +128,7 @@
 }
 
 static bool
-lower_impl(nir_function_impl *impl,
-           const struct gl_shader_program *shader_program)
+lower_impl(nir_function_impl *impl)
 {
    nir_builder b;
    nir_builder_init(&b, impl);
@@ -161,8 +137,7 @@
    nir_foreach_block(block, impl) {
       nir_foreach_instr(instr, block) {
          if (instr->type == nir_instr_type_tex)
-            progress |= lower_sampler(&b, nir_instr_as_tex(instr),
-                                      shader_program);
+            progress |= lower_sampler(&b, nir_instr_as_tex(instr));
       }
    }
 
@@ -175,9 +150,15 @@
 {
    bool progress = false;
 
+   /* First, use gl_nir_lower_samplers_as_derefs to set var->data.binding
+    * based on the uniforms, and split structures to simplify derefs.
+    */
+   gl_nir_lower_samplers_as_deref(shader, shader_program);
+
+   /* Next, lower derefs to offsets. */
    nir_foreach_function(function, shader) {
       if (function->impl)
-         progress |= lower_impl(function->impl, shader_program);
+         progress |= lower_impl(function->impl);
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/glsl/glsl_lexer.ll mesa-19.0.1/src/compiler/glsl/glsl_lexer.ll
--- mesa-18.3.3/src/compiler/glsl/glsl_lexer.ll	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glsl_lexer.ll	2019-03-31 23:16:37.000000000 +0000
@@ -295,6 +295,14 @@
 				  BEGIN PP;
 				  return PRAGMA_OPTIMIZE_OFF;
 				}
+^{SPC}#{SPC}pragma{SPCP}warning{SPC}\({SPC}on{SPC}\) {
+				  BEGIN PP;
+				  return PRAGMA_WARNING_ON;
+				}
+^{SPC}#{SPC}pragma{SPCP}warning{SPC}\({SPC}off{SPC}\) {
+				  BEGIN PP;
+				  return PRAGMA_WARNING_OFF;
+				}
 ^{SPC}#{SPC}pragma{SPCP}STDGL{SPCP}invariant{SPC}\({SPC}all{SPC}\) {
 				  BEGIN PP;
 				  return PRAGMA_INVARIANT_ALL;
diff -Nru mesa-18.3.3/src/compiler/glsl/glsl_parser_extras.cpp mesa-19.0.1/src/compiler/glsl/glsl_parser_extras.cpp
--- mesa-18.3.3/src/compiler/glsl/glsl_parser_extras.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glsl_parser_extras.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -62,7 +62,7 @@
 					       gl_shader_stage stage,
                                                void *mem_ctx)
    : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(),
-     switch_state()
+     switch_state(), warnings_enabled(true)
 {
    assert(stage < MESA_SHADER_STAGES);
    this->stage = stage;
@@ -527,11 +527,13 @@
 _mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
 		   const char *fmt, ...)
 {
-   va_list ap;
+   if (state->warnings_enabled) {
+      va_list ap;
 
-   va_start(ap, fmt);
-   _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap);
-   va_end(ap);
+      va_start(ap, fmt);
+      _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap);
+      va_end(ap);
+   }
 }
 
 
@@ -704,6 +706,7 @@
    EXT(AMD_gpu_shader_int64),
    EXT(AMD_shader_stencil_export),
    EXT(AMD_shader_trinary_minmax),
+   EXT(AMD_texture_texture4),
    EXT(AMD_vertex_shader_layer),
    EXT(AMD_vertex_shader_viewport_index),
    EXT(ANDROID_extension_pack_es31a),
@@ -718,6 +721,7 @@
    EXT(EXT_separate_shader_objects),
    EXT(EXT_shader_framebuffer_fetch),
    EXT(EXT_shader_framebuffer_fetch_non_coherent),
+   EXT(EXT_shader_implicit_conversions),
    EXT(EXT_shader_integer_mix),
    EXT_AEP(EXT_shader_io_blocks),
    EXT(EXT_shader_samples_identical),
@@ -2086,14 +2090,6 @@
        */
       if (shader->CompileStatus == COMPILE_SUCCESS)
          return;
-
-      if (shader->CompileStatus == COMPILED_NO_OPTS) {
-         opt_shader_and_create_symbol_table(ctx,
-                                            NULL, /* source_symbols */
-                                            shader);
-         shader->CompileStatus = COMPILE_SUCCESS;
-         return;
-      }
    }
 
    struct _mesa_glsl_parse_state *state =
@@ -2149,13 +2145,7 @@
    if (!state->error && !shader->ir->is_empty()) {
       assign_subroutine_indexes(state);
       lower_subroutine(shader->ir, state);
-
-      if (!ctx->Cache || force_recompile)
-         opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
-      else {
-         reparent_ir(shader->ir, shader->ir);
-         shader->CompileStatus = COMPILED_NO_OPTS;
-      }
+      opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
    }
 
    if (!force_recompile) {
@@ -2165,6 +2155,15 @@
 
    delete state->symbols;
    ralloc_free(state);
+
+   if (ctx->Cache && shader->CompileStatus == COMPILE_SUCCESS) {
+      char sha1_buf[41];
+      disk_cache_put_key(ctx->Cache, shader->sha1);
+      if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
+         _mesa_sha1_format(sha1_buf, shader->sha1);
+         fprintf(stderr, "marking shader: %s\n", sha1_buf);
+      }
+   }
 }
 
 } /* extern "C" */
diff -Nru mesa-18.3.3/src/compiler/glsl/glsl_parser_extras.h mesa-19.0.1/src/compiler/glsl/glsl_parser_extras.h
--- mesa-18.3.3/src/compiler/glsl/glsl_parser_extras.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glsl_parser_extras.h	2019-03-31 23:16:37.000000000 +0000
@@ -344,6 +344,19 @@
       return ARB_bindless_texture_enable;
    }
 
+   bool has_implicit_conversions() const
+   {
+      return EXT_shader_implicit_conversions_enable || is_version(120, 0);
+   }
+
+   bool has_implicit_uint_to_int_conversion() const
+   {
+      return ARB_gpu_shader5_enable ||
+             MESA_shader_integer_functions_enable ||
+             EXT_shader_implicit_conversions_enable ||
+             is_version(400, 0);
+   }
+
    void process_version_directive(YYLTYPE *locp, int version,
                                   const char *ident);
 
@@ -602,6 +615,13 @@
    char *info_log;
 
    /**
+    * Are warnings enabled?
+    *
+    * Emission of warngins is controlled by '#pragma warning(...)'.
+    */
+   bool warnings_enabled;
+
+   /**
     * \name Enable bits for GLSL extensions
     */
    /*@{*/
@@ -766,6 +786,8 @@
    bool AMD_shader_stencil_export_warn;
    bool AMD_shader_trinary_minmax_enable;
    bool AMD_shader_trinary_minmax_warn;
+   bool AMD_texture_texture4_enable;
+   bool AMD_texture_texture4_warn;
    bool AMD_vertex_shader_layer_enable;
    bool AMD_vertex_shader_layer_warn;
    bool AMD_vertex_shader_viewport_index_enable;
@@ -794,6 +816,8 @@
    bool EXT_shader_framebuffer_fetch_warn;
    bool EXT_shader_framebuffer_fetch_non_coherent_enable;
    bool EXT_shader_framebuffer_fetch_non_coherent_warn;
+   bool EXT_shader_implicit_conversions_enable;
+   bool EXT_shader_implicit_conversions_warn;
    bool EXT_shader_integer_mix_enable;
    bool EXT_shader_integer_mix_warn;
    bool EXT_shader_io_blocks_enable;
diff -Nru mesa-18.3.3/src/compiler/glsl/glsl_parser.yy mesa-19.0.1/src/compiler/glsl/glsl_parser.yy
--- mesa-18.3.3/src/compiler/glsl/glsl_parser.yy	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glsl_parser.yy	2019-03-31 23:16:37.000000000 +0000
@@ -164,6 +164,7 @@
 %token VERSION_TOK EXTENSION LINE COLON EOL INTERFACE OUTPUT
 %token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF
 %token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF
+%token PRAGMA_WARNING_ON PRAGMA_WARNING_OFF
 %token PRAGMA_INVARIANT_ALL
 %token LAYOUT_TOK
 %token DOT_TOK
@@ -246,6 +247,7 @@
 %type <n> unary_operator
 %type <expression> function_identifier
 %type <node> external_declaration
+%type <node> pragma_statement
 %type <declarator_list> init_declarator_list
 %type <declarator_list> single_declaration
 %type <expression> initializer
@@ -328,10 +330,10 @@
    ;
 
 pragma_statement:
-   PRAGMA_DEBUG_ON EOL
-   | PRAGMA_DEBUG_OFF EOL
-   | PRAGMA_OPTIMIZE_ON EOL
-   | PRAGMA_OPTIMIZE_OFF EOL
+   PRAGMA_DEBUG_ON EOL { $$ = NULL; }
+   | PRAGMA_DEBUG_OFF EOL { $$ = NULL; }
+   | PRAGMA_OPTIMIZE_ON EOL { $$ = NULL; }
+   | PRAGMA_OPTIMIZE_OFF EOL { $$ = NULL; }
    | PRAGMA_INVARIANT_ALL EOL
    {
       /* Pragma invariant(all) cannot be used in a fragment shader.
@@ -353,6 +355,18 @@
       } else {
          state->all_invariant = true;
       }
+
+      $$ = NULL;
+   }
+   | PRAGMA_WARNING_ON EOL
+   {
+      void *mem_ctx = state->linalloc;
+      $$ = new(mem_ctx) ast_warnings_toggle(true);
+   }
+   | PRAGMA_WARNING_OFF EOL
+   {
+      void *mem_ctx = state->linalloc;
+      $$ = new(mem_ctx) ast_warnings_toggle(false);
    }
    ;
 
@@ -2723,7 +2737,7 @@
 external_declaration:
    function_definition      { $$ = $1; }
    | declaration            { $$ = $1; }
-   | pragma_statement       { $$ = NULL; }
+   | pragma_statement       { $$ = $1; }
    | layout_defaults        { $$ = $1; }
    | ';'                    { $$ = NULL; }
    ;
diff -Nru mesa-18.3.3/src/compiler/glsl/glsl_to_nir.cpp mesa-19.0.1/src/compiler/glsl/glsl_to_nir.cpp
--- mesa-18.3.3/src/compiler/glsl/glsl_to_nir.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/glsl_to_nir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -33,6 +33,7 @@
 #include "compiler/nir/nir_builder.h"
 #include "main/imports.h"
 #include "main/mtypes.h"
+#include "util/u_math.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -93,14 +94,16 @@
 
    nir_deref_instr *evaluate_deref(ir_instruction *ir);
 
+   nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);
+
    /* most recent deref instruction created */
    nir_deref_instr *deref;
 
-   nir_variable *var; /* variable created by ir_variable visitor */
-
    /* whether the IR we're operating on is per-function or global */
    bool is_global;
 
+   ir_function_signature *sig;
+
    /* map of ir_variable -> nir_variable */
    struct hash_table *var_table;
 
@@ -173,13 +176,10 @@
    this->supports_ints = shader->options->native_integers;
    this->shader = shader;
    this->is_global = true;
-   this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
-   this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal);
+   this->var_table = _mesa_pointer_hash_table_create(NULL);
+   this->overload_table = _mesa_pointer_hash_table_create(NULL);
    this->result = NULL;
    this->impl = NULL;
-   this->var = NULL;
    memset(&this->b, 0, sizeof(this->b));
 }
 
@@ -196,8 +196,8 @@
    return this->deref;
 }
 
-static nir_constant *
-constant_copy(ir_constant *ir, void *mem_ctx)
+nir_constant *
+nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
 {
    if (ir == NULL)
       return NULL;
@@ -215,7 +215,10 @@
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].u32[r] = ir->value.u[r];
+         if (supports_ints)
+            ret->values[0].u32[r] = ir->value.u[r];
+         else
+            ret->values[0].f32[r] = ir->value.u[r];
 
       break;
 
@@ -224,7 +227,10 @@
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].i32[r] = ir->value.i[r];
+         if (supports_ints)
+            ret->values[0].i32[r] = ir->value.i[r];
+         else
+            ret->values[0].f32[r] = ir->value.i[r];
 
       break;
 
@@ -263,7 +269,7 @@
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE;
+         ret->values[0].b[r] = ir->value.b[r];
 
       break;
 
@@ -294,6 +300,12 @@
    if (ir->data.mode == ir_var_shader_shared)
       return;
 
+   /* FINISHME: inout parameters */
+   assert(ir->data.mode != ir_var_function_inout);
+
+   if (ir->data.mode == ir_var_function_out)
+      return;
+
    nir_variable *var = rzalloc(shader, nir_variable);
    var->type = ir->type;
    var->name = ralloc_strdup(var, ir->name);
@@ -312,16 +324,14 @@
    case ir_var_auto:
    case ir_var_temporary:
       if (is_global)
-         var->data.mode = nir_var_global;
+         var->data.mode = nir_var_shader_temp;
       else
-         var->data.mode = nir_var_local;
+         var->data.mode = nir_var_function_temp;
       break;
 
    case ir_var_function_in:
-   case ir_var_function_out:
-   case ir_var_function_inout:
    case ir_var_const_in:
-      var->data.mode = nir_var_local;
+      var->data.mode = nir_var_function_temp;
       break;
 
    case ir_var_shader_in:
@@ -343,6 +353,12 @@
               ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
             var->data.compact = ir->type->without_array()->is_scalar();
          }
+
+         if (shader->info.stage > MESA_SHADER_VERTEX &&
+             ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
+             ir->data.location <= VARYING_SLOT_CULL_DIST1) {
+            var->data.compact = ir->type->without_array()->is_scalar();
+         }
       }
       break;
 
@@ -353,14 +369,23 @@
            ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
          var->data.compact = ir->type->without_array()->is_scalar();
       }
+
+      if (shader->info.stage <= MESA_SHADER_GEOMETRY &&
+          ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
+          ir->data.location <= VARYING_SLOT_CULL_DIST1) {
+         var->data.compact = ir->type->without_array()->is_scalar();
+      }
       break;
 
    case ir_var_uniform:
-      var->data.mode = nir_var_uniform;
+      if (ir->get_interface_type())
+         var->data.mode = nir_var_mem_ubo;
+      else
+         var->data.mode = nir_var_uniform;
       break;
 
    case ir_var_shader_storage:
-      var->data.mode = nir_var_shader_storage;
+      var->data.mode = nir_var_mem_ssbo;
       break;
 
    case ir_var_system_value:
@@ -447,13 +472,12 @@
 
    var->interface_type = ir->get_interface_type();
 
-   if (var->data.mode == nir_var_local)
+   if (var->data.mode == nir_var_function_temp)
       nir_function_impl_add_variable(impl, var);
    else
       nir_shader_add_variable(shader, var);
 
    _mesa_hash_table_insert(var_table, ir, var);
-   this->var = var;
 }
 
 ir_visitor_status
@@ -472,9 +496,36 @@
       return;
 
    nir_function *func = nir_function_create(shader, ir->function_name());
+   if (strcmp(ir->function_name(), "main") == 0)
+      func->is_entrypoint = true;
+
+   func->num_params = ir->parameters.length() +
+                      (ir->return_type != glsl_type::void_type);
+   func->params = ralloc_array(shader, nir_parameter, func->num_params);
+
+   unsigned np = 0;
+
+   if (ir->return_type != glsl_type::void_type) {
+      /* The return value is a variable deref (basically an out parameter) */
+      func->params[np].num_components = 1;
+      func->params[np].bit_size = 32;
+      np++;
+   }
 
-   assert(ir->parameters.is_empty());
-   assert(ir->return_type == glsl_type::void_type);
+   foreach_in_list(ir_variable, param, &ir->parameters) {
+      /* FINISHME: pass arrays, structs, etc by reference? */
+      assert(param->type->is_vector() || param->type->is_scalar());
+
+      if (param->data.mode == ir_var_function_in) {
+         func->params[np].num_components = param->type->vector_elements;
+         func->params[np].bit_size = glsl_get_bit_size(param->type);
+      } else {
+         func->params[np].num_components = 1;
+         func->params[np].bit_size = 32;
+      }
+      np++;
+   }
+   assert(np == func->num_params);
 
    _mesa_hash_table_insert(this->overload_table, ir, func);
 }
@@ -492,6 +543,8 @@
    if (ir->is_intrinsic())
       return;
 
+   this->sig = ir;
+
    struct hash_entry *entry =
       _mesa_hash_table_search(this->overload_table, ir);
 
@@ -502,13 +555,25 @@
       nir_function_impl *impl = nir_function_impl_create(func);
       this->impl = impl;
 
-      assert(strcmp(func->name, "main") == 0);
-      assert(ir->parameters.is_empty());
-
       this->is_global = false;
 
       nir_builder_init(&b, impl);
       b.cursor = nir_after_cf_list(&impl->body);
+
+      unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0;
+
+      foreach_in_list(ir_variable, param, &ir->parameters) {
+         nir_variable *var =
+            nir_local_variable_create(impl, param->type, param->name);
+
+         if (param->data.mode == ir_var_function_in) {
+            nir_store_var(&b, var, nir_load_param(&b, i), ~0);
+         }
+
+         _mesa_hash_table_insert(var_table, param, var);
+         i++;
+      }
+
       visit_exec_list(&ir->body, this);
 
       this->is_global = true;
@@ -598,11 +663,27 @@
 void
 nir_visitor::visit(ir_return *ir)
 {
-   assert(ir->value == NULL);
+   if (ir->value != NULL) {
+      nir_deref_instr *ret_deref =
+         nir_build_deref_cast(&b, nir_load_param(&b, 0),
+                              nir_var_function_temp, ir->value->type, 0);
+
+      nir_ssa_def *val = evaluate_rvalue(ir->value);
+      nir_store_deref(&b, ret_deref, val, ~0);
+   }
+
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
    nir_builder_instr_insert(&b, &instr->instr);
 }
 
+static void
+intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
+{
+   unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
+   unsigned pow2_components = util_next_power_of_two(type->vector_elements);
+   nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
+}
+
 void
 nir_visitor::visit(ir_call *ir)
 {
@@ -994,9 +1075,14 @@
          ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
          assert(write_mask);
 
-         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+         nir_ssa_def *nir_val = evaluate_rvalue(val);
+         if (val->type->is_boolean())
+            nir_val = nir_b2i32(&b, nir_val);
+
+         instr->src[0] = nir_src_for_ssa(nir_val);
          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+         intrinsic_set_std430_align(instr, val->type);
          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
          instr->num_components = val->type->vector_elements;
 
@@ -1015,9 +1101,10 @@
 
          const glsl_type *type = ir->return_deref->var->type;
          instr->num_components = type->vector_elements;
+         intrinsic_set_std430_align(instr, type);
 
          /* Setup destination register */
-         unsigned bit_size = glsl_get_bit_size(type);
+         unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
                            type->vector_elements, bit_size, NULL);
 
@@ -1092,13 +1179,18 @@
 
          const glsl_type *type = ir->return_deref->var->type;
          instr->num_components = type->vector_elements;
+         intrinsic_set_std430_align(instr, type);
 
          /* Setup destination register */
-         unsigned bit_size = glsl_get_bit_size(type);
+         unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
                            type->vector_elements, bit_size, NULL);
 
          nir_builder_instr_insert(&b, &instr->instr);
+
+         /* The value in shared memory is a 32-bit value */
+         if (type->is_boolean())
+            ret = nir_i2b(&b, &instr->dest.ssa);
          break;
       }
       case nir_intrinsic_store_shared: {
@@ -1117,8 +1209,14 @@
 
          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
 
-         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+         nir_ssa_def *nir_val = evaluate_rvalue(val);
+         /* The value in shared memory is a 32-bit value */
+         if (val->type->is_boolean())
+            nir_val = nir_b2i32(&b, nir_val);
+
+         instr->src[0] = nir_src_for_ssa(nir_val);
          instr->num_components = val->type->vector_elements;
+         intrinsic_set_std430_align(instr, val->type);
 
          nir_builder_instr_insert(&b, &instr->instr);
          break;
@@ -1172,7 +1270,7 @@
       case nir_intrinsic_vote_any:
       case nir_intrinsic_vote_all:
       case nir_intrinsic_vote_ieq: {
-         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
          instr->num_components = 1;
 
          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
@@ -1228,7 +1326,47 @@
       return;
    }
 
-   unreachable("glsl_to_nir only handles function calls to intrinsics");
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir->callee);
+   assert(entry);
+   nir_function *callee = (nir_function *) entry->data;
+
+   nir_call_instr *call = nir_call_instr_create(this->shader, callee);
+
+   unsigned i = 0;
+   nir_deref_instr *ret_deref = NULL;
+   if (ir->return_deref) {
+      nir_variable *ret_tmp =
+         nir_local_variable_create(this->impl, ir->return_deref->type,
+                                   "return_tmp");
+      ret_deref = nir_build_deref_var(&b, ret_tmp);
+      call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa);
+   }
+
+   foreach_two_lists(formal_node, &ir->callee->parameters,
+                     actual_node, &ir->actual_parameters) {
+      ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
+      ir_variable *sig_param = (ir_variable *) formal_node;
+
+      if (sig_param->data.mode == ir_var_function_out) {
+         nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
+         call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
+      } else if (sig_param->data.mode == ir_var_function_in) {
+         nir_ssa_def *val = evaluate_rvalue(param_rvalue);
+         nir_src src = nir_src_for_ssa(val);
+
+         nir_src_copy(&call->params[i], &src, call);
+      } else if (sig_param->data.mode == ir_var_function_inout) {
+         unreachable("unimplemented: inout parameters");
+      }
+
+      i++;
+   }
+
+   nir_builder_instr_insert(&b, &call->instr);
+
+   if (ir->return_deref)
+      nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);
 }
 
 void
@@ -1268,7 +1406,7 @@
       for (unsigned i = 0; i < 4; i++) {
          swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
       }
-      src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
+      src = nir_swizzle(&b, src, swiz, num_components, false);
    }
 
    if (ir->condition) {
@@ -1363,6 +1501,15 @@
       type == GLSL_TYPE_INT16;
 }
 
+static bool
+type_is_int(glsl_base_type type)
+{
+   return type == GLSL_TYPE_UINT || type == GLSL_TYPE_INT ||
+      type == GLSL_TYPE_UINT8 || type == GLSL_TYPE_INT8 ||
+      type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16 ||
+      type == GLSL_TYPE_UINT64 || type == GLSL_TYPE_INT64;
+}
+
 void
 nir_visitor::visit(ir_expression *ir)
 {
@@ -1371,10 +1518,12 @@
    case ir_binop_ubo_load: {
       nir_intrinsic_instr *load =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
-      unsigned bit_size = glsl_get_bit_size(ir->type);
+      unsigned bit_size = ir->type->is_boolean() ? 32 :
+                          glsl_get_bit_size(ir->type);
       load->num_components = ir->type->vector_elements;
       load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
       load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+      intrinsic_set_std430_align(load, ir->type);
       add_instr(&load->instr, ir->type->vector_elements, bit_size);
 
       /*
@@ -1427,7 +1576,7 @@
           * sense, we'll just turn it into a load which will probably
           * eventually end up as an SSA definition.
           */
-         assert(this->deref->mode == nir_var_global);
+         assert(this->deref->mode == nir_var_shader_temp);
          op = nir_intrinsic_load_deref;
       }
 
@@ -1464,13 +1613,13 @@
 
    glsl_base_type types[4];
    for (unsigned i = 0; i < ir->num_operands; i++)
-      if (supports_ints)
+      if (supports_ints || !type_is_int(ir->operands[i]->type->base_type))
          types[i] = ir->operands[i]->type->base_type;
       else
          types[i] = GLSL_TYPE_FLOAT;
 
    glsl_base_type out_type;
-   if (supports_ints)
+   if (supports_ints || !type_is_int(ir->type->base_type))
       out_type = ir->type->base_type;
    else
       out_type = GLSL_TYPE_FLOAT;
@@ -1478,7 +1627,7 @@
    switch (ir->operation) {
    case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
    case ir_unop_logic_not:
-      result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
+      result = nir_inot(&b, srcs[0]);
       break;
    case ir_unop_neg:
       result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
@@ -1510,10 +1659,14 @@
       result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
       break;
    case ir_unop_b2f:
-      result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
+      result = nir_b2f32(&b, srcs[0]);
       break;
    case ir_unop_f2i:
+      result = supports_ints ? nir_f2i32(&b, srcs[0]) : nir_ftrunc(&b, srcs[0]);
+      break;
    case ir_unop_f2u:
+      result = supports_ints ? nir_f2u32(&b, srcs[0]) : nir_ftrunc(&b, srcs[0]);
+      break;
    case ir_unop_f2b:
    case ir_unop_i2b:
    case ir_unop_b2i:
@@ -1749,16 +1902,13 @@
    case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
    case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
    case ir_binop_logic_and:
-      result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
-                             : nir_fand(&b, srcs[0], srcs[1]);
+      result = nir_iand(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_logic_or:
-      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
-                             : nir_for(&b, srcs[0], srcs[1]);
+      result = nir_ior(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_logic_xor:
-      result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
-                             : nir_fxor(&b, srcs[0], srcs[1]);
+      result = nir_ixor(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
    case ir_binop_rshift:
@@ -1772,108 +1922,70 @@
    case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
    case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
    case ir_binop_less:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_flt(&b, srcs[0], srcs[1]);
-         else if (type_is_signed(types[0]))
-            result = nir_ilt(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ult(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_slt(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_flt(&b, srcs[0], srcs[1]);
+      else if (type_is_signed(types[0]))
+         result = nir_ilt(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ult(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_gequal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_fge(&b, srcs[0], srcs[1]);
-         else if (type_is_signed(types[0]))
-            result = nir_ige(&b, srcs[0], srcs[1]);
-         else
-            result = nir_uge(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_sge(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_fge(&b, srcs[0], srcs[1]);
+      else if (type_is_signed(types[0]))
+         result = nir_ige(&b, srcs[0], srcs[1]);
+      else
+         result = nir_uge(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_equal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_feq(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ieq(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_seq(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_feq(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ieq(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_nequal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_fne(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ine(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_sne(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_fne(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ine(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_all_equal:
-      if (supports_ints) {
-         if (type_is_float(types[0])) {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
-         } else {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
+      if (type_is_float(types[0])) {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
+            default:
+               unreachable("not reached");
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
-            case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
-            case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
-            case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
+            case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
       }
       break;
    case ir_binop_any_nequal:
-      if (supports_ints) {
-         if (type_is_float(types[0])) {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
-         } else {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
+      if (type_is_float(types[0])) {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
+            default:
+               unreachable("not reached");
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
-            case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
-            case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
-            case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
+            case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
@@ -1906,10 +2018,7 @@
       result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_csel:
-      if (supports_ints)
-         result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
-      else
-         result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
+      result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_bitfield_extract:
       result = (out_type == GLSL_TYPE_INT) ?
@@ -1933,7 +2042,7 @@
 {
    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
    result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
-                        ir->type->vector_elements, !supports_ints);
+                        ir->type->vector_elements, false);
 }
 
 void
@@ -2153,6 +2262,23 @@
 void
 nir_visitor::visit(ir_dereference_variable *ir)
 {
+   if (ir->variable_referenced()->data.mode == ir_var_function_out) {
+      unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0;
+
+      foreach_in_list(ir_variable, param, &sig->parameters) {
+         if (param == ir->variable_referenced()) {
+            break;
+         }
+         i++;
+      }
+
+      this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),
+                                         nir_var_function_temp, ir->type, 0);
+      return;
+   }
+
+   assert(ir->variable_referenced()->data.mode != ir_var_function_inout);
+
    struct hash_entry *entry =
       _mesa_hash_table_search(this->var_table, ir->var);
    assert(entry);
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_array_refcount.cpp mesa-19.0.1/src/compiler/glsl/ir_array_refcount.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_array_refcount.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_array_refcount.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -37,8 +37,7 @@
    : last_array_deref(0), derefs(0), num_derefs(0), derefs_size(0)
 {
    this->mem_ctx = ralloc_context(NULL);
-   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+   this->ht = _mesa_pointer_hash_table_create(NULL);
 }
 
 static void
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_builder_print_visitor.cpp mesa-19.0.1/src/compiler/glsl/ir_builder_print_visitor.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_builder_print_visitor.cpp	2018-08-28 22:58:31.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_builder_print_visitor.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -141,8 +141,7 @@
 ir_builder_print_visitor::ir_builder_print_visitor(FILE *f)
    : next_ir_index(1), f(f), indentation(0)
 {
-   index_map =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   index_map = _mesa_pointer_hash_table_create(NULL);
 }
 
 ir_builder_print_visitor::~ir_builder_print_visitor()
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_clone.cpp mesa-19.0.1/src/compiler/glsl/ir_clone.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_clone.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_clone.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -419,8 +419,7 @@
 void
 clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in)
 {
-   struct hash_table *ht =
-         _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
 
    foreach_in_list(const ir_instruction, original, in) {
       ir_instruction *copy = original->clone(mem_ctx, ht);
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_constant_expression.cpp mesa-19.0.1/src/compiler/glsl/ir_constant_expression.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_constant_expression.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_constant_expression.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -1024,8 +1024,7 @@
     * We expect the correctness of the number of parameters to have
     * been checked earlier.
     */
-   hash_table *deref_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                    _mesa_key_pointer_equal);
+   hash_table *deref_hash = _mesa_pointer_hash_table_create(NULL);
 
    /* If "origin" is non-NULL, then the function body is there.  So we
     * have to use the variable objects from the object with the body,
diff -Nru mesa-18.3.3/src/compiler/glsl/ir.cpp mesa-19.0.1/src/compiler/glsl/ir.cpp
--- mesa-18.3.3/src/compiler/glsl/ir.cpp	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -1734,6 +1734,7 @@
    this->data.centroid = false;
    this->data.sample = false;
    this->data.patch = false;
+   this->data.explicit_invariant = false;
    this->data.invariant = false;
    this->data.how_declared = ir_var_declared_normally;
    this->data.mode = mode;
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_function.cpp mesa-19.0.1/src/compiler/glsl/ir_function.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_function.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_function.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -274,7 +274,8 @@
     * assume everything supported in any GLSL version is available.
     */
    if (!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable ||
-       state->MESA_shader_integer_functions_enable) {
+       state->MESA_shader_integer_functions_enable ||
+       state->EXT_shader_implicit_conversions_enable) {
       for (ir_function_signature **sig = matches; sig < matches + num_matches; sig++) {
          if (is_best_inexact_overload(actual_parameters, matches, num_matches, *sig))
             return *sig;
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_function_detect_recursion.cpp mesa-19.0.1/src/compiler/glsl/ir_function_detect_recursion.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_function_detect_recursion.cpp	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_function_detect_recursion.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -158,8 +158,7 @@
    {
       progress = false;
       this->mem_ctx = ralloc_context(NULL);
-      this->function_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                    _mesa_key_pointer_equal);
+      this->function_hash = _mesa_pointer_hash_table_create(NULL);
    }
 
    ~has_recursion_visitor()
diff -Nru mesa-18.3.3/src/compiler/glsl/ir.h mesa-19.0.1/src/compiler/glsl/ir.h
--- mesa-18.3.3/src/compiler/glsl/ir.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir.h	2019-03-31 23:16:37.000000000 +0000
@@ -657,6 +657,19 @@
       unsigned centroid:1;
       unsigned sample:1;
       unsigned patch:1;
+      /**
+       * Was an 'invariant' qualifier explicitly set in the shader?
+       *
+       * This is used to cross validate qualifiers.
+       */
+      unsigned explicit_invariant:1;
+      /**
+       * Is the variable invariant?
+       *
+       * It can happen either by having the 'invariant' qualifier
+       * explicitly set in the shader or by being used in calculations
+       * of other invariant variables.
+       */
       unsigned invariant:1;
       unsigned precise:1;
 
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_print_visitor.cpp mesa-19.0.1/src/compiler/glsl/ir_print_visitor.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_print_visitor.cpp	2018-08-28 22:58:31.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_print_visitor.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -90,8 +90,7 @@
    : f(f)
 {
    indentation = 0;
-   printable_names =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   printable_names = _mesa_pointer_hash_table_create(NULL);
    symbols = _mesa_symbol_table_ctor();
    mem_ctx = ralloc_context(NULL);
 }
@@ -200,6 +199,7 @@
    const char *const samp = (ir->data.sample) ? "sample " : "";
    const char *const patc = (ir->data.patch) ? "patch " : "";
    const char *const inv = (ir->data.invariant) ? "invariant " : "";
+   const char *const explicit_inv = (ir->data.explicit_invariant) ? "explicit_invariant " : "";
    const char *const prec = (ir->data.precise) ? "precise " : "";
    const char *const bindless = (ir->data.bindless) ? "bindless " : "";
    const char *const bound = (ir->data.bound) ? "bound " : "";
@@ -216,11 +216,11 @@
    const char *const interp[] = { "", "smooth", "flat", "noperspective" };
    STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_MODE_COUNT);
 
-   fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
+   fprintf(f, "(%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s) ",
            binding, loc, component, cent, bindless, bound,
            image_format, memory_read_only, memory_write_only,
            memory_coherent, memory_volatile, memory_restrict,
-           samp, patc, inv, prec, mode[ir->data.mode],
+           samp, patc, inv, explicit_inv, prec, mode[ir->data.mode],
            stream,
            interp[ir->data.interpolation]);
 
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_reader.cpp mesa-19.0.1/src/compiler/glsl/ir_reader.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_reader.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_reader.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -419,8 +419,10 @@
          var->data.sample = 1;
       } else if (strcmp(qualifier->value(), "patch") == 0) {
          var->data.patch = 1;
+      } else if (strcmp(qualifier->value(), "explicit_invariant") == 0) {
+         var->data.explicit_invariant = true;
       } else if (strcmp(qualifier->value(), "invariant") == 0) {
-	 var->data.invariant = 1;
+         var->data.invariant = true;
       } else if (strcmp(qualifier->value(), "uniform") == 0) {
 	 var->data.mode = ir_var_uniform;
       } else if (strcmp(qualifier->value(), "shader_storage") == 0) {
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_validate.cpp mesa-19.0.1/src/compiler/glsl/ir_validate.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_validate.cpp	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_validate.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -46,8 +46,7 @@
 public:
    ir_validate()
    {
-      this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+      this->ir_set = _mesa_pointer_set_create(NULL);
 
       this->current_function = NULL;
 
diff -Nru mesa-18.3.3/src/compiler/glsl/ir_variable_refcount.cpp mesa-19.0.1/src/compiler/glsl/ir_variable_refcount.cpp
--- mesa-18.3.3/src/compiler/glsl/ir_variable_refcount.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/ir_variable_refcount.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -38,8 +38,7 @@
 ir_variable_refcount_visitor::ir_variable_refcount_visitor()
 {
    this->mem_ctx = ralloc_context(NULL);
-   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+   this->ht = _mesa_pointer_hash_table_create(NULL);
 }
 
 static void
diff -Nru mesa-18.3.3/src/compiler/glsl/linker.cpp mesa-19.0.1/src/compiler/glsl/linker.cpp
--- mesa-18.3.3/src/compiler/glsl/linker.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/linker.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -1090,7 +1090,7 @@
             }
          }
 
-         if (existing->data.invariant != var->data.invariant) {
+         if (existing->data.explicit_invariant != var->data.explicit_invariant) {
             linker_error(prog, "declarations for %s `%s' have "
                          "mismatching invariant qualifiers\n",
                          mode_string(var), var->name);
@@ -1460,8 +1460,7 @@
    hash_table *temps = NULL;
 
    if (make_copies)
-      temps = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+      temps = _mesa_pointer_hash_table_create(NULL);
 
    foreach_in_list_safe(ir_instruction, inst, instructions) {
       if (inst->as_function())
@@ -1507,8 +1506,7 @@
 public:
    array_sizing_visitor()
       : mem_ctx(ralloc_context(NULL)),
-        unnamed_interfaces(_mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                   _mesa_key_pointer_equal))
+        unnamed_interfaces(_mesa_pointer_hash_table_create(NULL))
    {
    }
 
@@ -2693,18 +2691,22 @@
 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
 
 /**
- * Assign locations for either VS inputs or FS outputs
+ * Assign locations for either VS inputs or FS outputs.
  *
- * \param mem_ctx       Temporary ralloc context used for linking
- * \param prog          Shader program whose variables need locations assigned
- * \param constants     Driver specific constant values for the program.
- * \param target_index  Selector for the program target to receive location
- *                      assignmnets.  Must be either \c MESA_SHADER_VERTEX or
- *                      \c MESA_SHADER_FRAGMENT.
+ * \param mem_ctx        Temporary ralloc context used for linking.
+ * \param prog           Shader program whose variables need locations
+ *                       assigned.
+ * \param constants      Driver specific constant values for the program.
+ * \param target_index   Selector for the program target to receive location
+ *                       assignmnets.  Must be either \c MESA_SHADER_VERTEX or
+ *                       \c MESA_SHADER_FRAGMENT.
+ * \param do_assignment  Whether we are actually marking the assignment or we
+ *                       are just doing a dry-run checking.
  *
  * \return
- * If locations are successfully assigned, true is returned.  Otherwise an
- * error is emitted to the shader link log and false is returned.
+ * If locations are (or can be, in case of dry-running) successfully assigned,
+ * true is returned.  Otherwise an error is emitted to the shader link log and
+ * false is returned.
  */
 static bool
 assign_attribute_or_color_locations(void *mem_ctx,
@@ -3181,6 +3183,12 @@
          const unsigned idx = var->data.location - VARYING_SLOT_VAR0;
          if (explicit_locations[idx][var->data.location_frac] == NULL)
             explicit_locations[idx][var->data.location_frac] = var;
+
+         /* Always match TCS outputs. They are shared by all invocations
+          * within a patch and can be used as shared memory.
+          */
+         if (producer->Stage == MESA_SHADER_TESS_CTRL)
+            var->data.is_unmatched_generic_inout = 0;
       }
    }
 
@@ -4402,9 +4410,7 @@
    if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
       return;
 
-   struct set *resource_set = _mesa_set_create(NULL,
-                                               _mesa_hash_pointer,
-                                               _mesa_key_pointer_equal);
+   struct set *resource_set = _mesa_pointer_set_create(NULL);
 
    /* Program interface needs to expose varyings in case of SSO. */
    if (shProg->SeparateShader) {
diff -Nru mesa-18.3.3/src/compiler/glsl/linker.h mesa-19.0.1/src/compiler/glsl/linker.h
--- mesa-18.3.3/src/compiler/glsl/linker.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/linker.h	2019-03-31 23:16:37.000000000 +0000
@@ -135,6 +135,26 @@
    void process(ir_variable *var, bool use_std430_as_default);
 
    /**
+    * Begin processing a variable
+    *
+    * Classes that overload this function should call \c ::process from the
+    * base class to start the recursive processing of the variable.
+    *
+    * \param var  The variable that is to be processed
+    * \param var_type The glsl_type reference of the variable
+    *
+    * Calls \c ::visit_field for each leaf of the variable.
+    *
+    * \warning
+    * When processing a uniform block, this entry should only be used in cases
+    * where the row / column ordering of matrices in the block does not
+    * matter.  For example, enumerating the names of members of the block, but
+    * not for determining the offsets of members.
+    */
+   void process(ir_variable *var, const glsl_type *var_type,
+                bool use_std430_as_default);
+
+   /**
     * Begin processing a variable of a structured type.
     *
     * This flavor of \c process should be used to handle structured types
diff -Nru mesa-18.3.3/src/compiler/glsl/link_functions.cpp mesa-19.0.1/src/compiler/glsl/link_functions.cpp
--- mesa-18.3.3/src/compiler/glsl/link_functions.cpp	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/link_functions.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -47,8 +47,7 @@
       this->success = true;
       this->linked = linked;
 
-      this->locals = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+      this->locals = _mesa_pointer_set_create(NULL);
    }
 
    ~call_link_visitor()
@@ -148,8 +147,7 @@
        * replace signature stored in a function.  One could easily be added,
        * but this avoids the need.
        */
-      struct hash_table *ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                      _mesa_key_pointer_equal);
+      struct hash_table *ht = _mesa_pointer_hash_table_create(NULL);
 
       exec_list formal_parameters;
       foreach_in_list(const ir_instruction, original, &sig->parameters) {
diff -Nru mesa-18.3.3/src/compiler/glsl/link_uniforms.cpp mesa-19.0.1/src/compiler/glsl/link_uniforms.cpp
--- mesa-18.3.3/src/compiler/glsl/link_uniforms.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/link_uniforms.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -63,6 +63,15 @@
 void
 program_resource_visitor::process(ir_variable *var, bool use_std430_as_default)
 {
+   const glsl_type *t =
+      var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+   process(var, t, use_std430_as_default);
+}
+
+void
+program_resource_visitor::process(ir_variable *var, const glsl_type *var_type,
+                                  bool use_std430_as_default)
+{
    unsigned record_array_count = 1;
    const bool row_major =
       var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
@@ -72,8 +81,7 @@
          get_internal_ifc_packing(use_std430_as_default) :
       var->type->get_internal_ifc_packing(use_std430_as_default);
 
-   const glsl_type *t =
-      var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+   const glsl_type *t = var_type;
    const glsl_type *t_without_array = t->without_array();
 
    /* false is always passed for the row_major parameter to the other
diff -Nru mesa-18.3.3/src/compiler/glsl/link_varyings.cpp mesa-19.0.1/src/compiler/glsl/link_varyings.cpp
--- mesa-18.3.3/src/compiler/glsl/link_varyings.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/link_varyings.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -309,16 +309,16 @@
     *    "The invariance of varyings that are declared in both the vertex
     *     and fragment shaders must match."
     */
-   if (input->data.invariant != output->data.invariant &&
+   if (input->data.explicit_invariant != output->data.explicit_invariant &&
        prog->data->Version < (prog->IsES ? 300 : 430)) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
                    "but %s shader input %s invariant qualifier\n",
                    _mesa_shader_stage_to_string(producer_stage),
                    output->name,
-                   (output->data.invariant) ? "has" : "lacks",
+                   (output->data.explicit_invariant) ? "has" : "lacks",
                    _mesa_shader_stage_to_string(consumer_stage),
-                   (input->data.invariant) ? "has" : "lacks");
+                   (input->data.explicit_invariant) ? "has" : "lacks");
       return;
    }
 
@@ -773,8 +773,20 @@
 
                output = explicit_locations[idx][input->data.location_frac].var;
 
-               if (output == NULL ||
-                   input->data.location != output->data.location) {
+               if (output == NULL) {
+                  /* A linker failure should only happen when there is no
+                   * output declaration and there is Static Use of the
+                   * declared input.
+                   */
+                  if (input->data.used) {
+                     linker_error(prog,
+                                  "%s shader input `%s' with explicit location "
+                                  "has no matching output\n",
+                                  _mesa_shader_stage_to_string(consumer->Stage),
+                                  input->name);
+                     break;
+                  }
+               } else if (input->data.location != output->data.location) {
                   linker_error(prog,
                                "%s shader input `%s' with explicit location "
                                "has no matching output\n",
@@ -804,7 +816,7 @@
              */
             assert(!input->data.assigned);
             if (input->data.used && !input->get_interface_type() &&
-                !input->data.explicit_location && !prog->SeparateShader)
+                !input->data.explicit_location)
                linker_error(prog,
                             "%s shader input `%s' "
                             "has no matching output in the previous stage\n",
@@ -1166,8 +1178,7 @@
          return false;
       }
 
-      if ((this->offset / 4) / info->Buffers[buffer].Stride !=
-          (xfb_offset - 1) / info->Buffers[buffer].Stride) {
+      if (xfb_offset > info->Buffers[buffer].Stride) {
          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
                       "buffer (%d)", xfb_offset * 4,
                       info->Buffers[buffer].Stride * 4, buffer);
@@ -2124,9 +2135,11 @@
 {
 public:
    tfeedback_candidate_generator(void *mem_ctx,
-                                 hash_table *tfeedback_candidates)
+                                 hash_table *tfeedback_candidates,
+                                 gl_shader_stage stage)
       : mem_ctx(mem_ctx),
         tfeedback_candidates(tfeedback_candidates),
+        stage(stage),
         toplevel_var(NULL),
         varying_floats(0)
    {
@@ -2136,10 +2149,17 @@
    {
       /* All named varying interface blocks should be flattened by now */
       assert(!var->is_interface_instance());
+      assert(var->data.mode == ir_var_shader_out);
 
       this->toplevel_var = var;
       this->varying_floats = 0;
-      program_resource_visitor::process(var, false);
+      const glsl_type *t =
+         var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
+      if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
+         assert(t->is_array());
+         t = t->fields.array;
+      }
+      program_resource_visitor::process(var, t, false);
    }
 
 private:
@@ -2173,6 +2193,8 @@
     */
    hash_table * const tfeedback_candidates;
 
+   gl_shader_stage stage;
+
    /**
     * Pointer to the toplevel variable that is being traversed.
     */
@@ -2503,8 +2525,28 @@
                  producer->Stage == MESA_SHADER_GEOMETRY));
 
          if (num_tfeedback_decls > 0) {
-            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
-            g.process(output_var);
+            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
+            /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
+             * ("Vertex Shader Variables / Output Variables")
+             *
+             * "Each program object can specify a set of output variables from
+             * one shader to be recorded in transform feedback mode (see
+             * section 13.3). The variables that can be recorded are those
+             * emitted by the first active shader, in order, from the
+             * following list:
+             *
+             *  * geometry shader
+             *  * tessellation evaluation shader
+             *  * tessellation control shader
+             *  * vertex shader"
+             *
+             * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
+             * Variables / Output Variables") tessellation control shader is
+             * not included in the stages list.
+             */
+            if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
+               g.process(output_var);
+            }
          }
 
          ir_variable *const input_var =
diff -Nru mesa-18.3.3/src/compiler/glsl/list.h mesa-19.0.1/src/compiler/glsl/list.h
--- mesa-18.3.3/src/compiler/glsl/list.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/list.h	2019-03-31 23:16:37.000000000 +0000
@@ -81,6 +81,12 @@
     * Insert a node in the list after the current node
     */
    void insert_after(exec_node *after);
+
+   /**
+    * Insert another list in the list after the current node
+    */
+   void insert_after(struct exec_list *after);
+
    /**
     * Insert a node in the list before the current node
     */
@@ -508,6 +514,21 @@
 }
 
 static inline void
+exec_node_insert_list_after(struct exec_node *n, struct exec_list *after)
+{
+   if (exec_list_is_empty(after))
+      return;
+
+   after->tail_sentinel.prev->next = n->next;
+   after->head_sentinel.next->prev = n;
+
+   n->next->prev = after->tail_sentinel.prev;
+   n->next = after->head_sentinel.next;
+
+   exec_list_make_empty(after);
+}
+
+static inline void
 exec_list_prepend(struct exec_list *list, struct exec_list *source)
 {
    exec_list_append(source, list);
@@ -635,6 +656,11 @@
    exec_list_append(this, source);
 }
 
+inline void exec_node::insert_after(exec_list *after)
+{
+   exec_node_insert_list_after(this, after);
+}
+
 inline void exec_list::prepend_list(exec_list *source)
 {
    exec_list_prepend(this, source);
diff -Nru mesa-18.3.3/src/compiler/glsl/loop_analysis.cpp mesa-19.0.1/src/compiler/glsl/loop_analysis.cpp
--- mesa-18.3.3/src/compiler/glsl/loop_analysis.cpp	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/loop_analysis.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -252,8 +252,7 @@
 
 loop_state::loop_state()
 {
-   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+   this->ht = _mesa_pointer_hash_table_create(NULL);
    this->mem_ctx = ralloc_context(NULL);
    this->loop_found = false;
 }
diff -Nru mesa-18.3.3/src/compiler/glsl/loop_analysis.h mesa-19.0.1/src/compiler/glsl/loop_analysis.h
--- mesa-18.3.3/src/compiler/glsl/loop_analysis.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/loop_analysis.h	2019-03-31 23:16:37.000000000 +0000
@@ -113,8 +113,7 @@
    {
       this->num_loop_jumps = 0;
       this->contains_calls = false;
-      this->var_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                               _mesa_key_pointer_equal);
+      this->var_hash = _mesa_pointer_hash_table_create(NULL);
       this->limiting_terminator = NULL;
    }
 
diff -Nru mesa-18.3.3/src/compiler/glsl/lower_if_to_cond_assign.cpp mesa-19.0.1/src/compiler/glsl/lower_if_to_cond_assign.cpp
--- mesa-18.3.3/src/compiler/glsl/lower_if_to_cond_assign.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/lower_if_to_cond_assign.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -71,9 +71,7 @@
       this->min_branch_cost = min_branch_cost;
       this->depth = 0;
 
-      this->condition_variables =
-            _mesa_set_create(NULL, _mesa_hash_pointer,
-                                    _mesa_key_pointer_equal);
+      this->condition_variables = _mesa_pointer_set_create(NULL);
    }
 
    ~ir_if_to_cond_assign_visitor()
diff -Nru mesa-18.3.3/src/compiler/glsl/lower_vector_derefs.cpp mesa-19.0.1/src/compiler/glsl/lower_vector_derefs.cpp
--- mesa-18.3.3/src/compiler/glsl/lower_vector_derefs.cpp	2018-10-21 19:21:32.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/lower_vector_derefs.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -32,8 +32,9 @@
 
 class vector_deref_visitor : public ir_rvalue_enter_visitor {
 public:
-   vector_deref_visitor()
-      : progress(false)
+   vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage)
+      : progress(false), shader_stage(shader_stage),
+        factory(&factory_instructions, mem_ctx)
    {
    }
 
@@ -45,6 +46,9 @@
    virtual ir_visitor_status visit_enter(ir_assignment *ir);
 
    bool progress;
+   gl_shader_stage shader_stage;
+   exec_list factory_instructions;
+   ir_factory factory;
 };
 
 } /* anonymous namespace */
@@ -65,13 +69,63 @@
    ir_constant *old_index_constant =
       deref->array_index->constant_expression_value(mem_ctx);
    if (!old_index_constant) {
-      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
-                                           new_lhs->type,
-                                           new_lhs->clone(mem_ctx, NULL),
-                                           ir->rhs,
-                                           deref->array_index);
-      ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
-      ir->set_lhs(new_lhs);
+      if (shader_stage == MESA_SHADER_TESS_CTRL &&
+          deref->variable_referenced()->data.mode == ir_var_shader_out) {
+         /* Tessellation control shader outputs act as if they have memory
+          * backing them and if we have writes from multiple threads
+          * targeting the same vec4 (this can happen for patch outputs), the
+          * load-vec-store pattern of ir_triop_vector_insert doesn't work.
+          * Instead, we have to lower to a series of conditional write-masked
+          * assignments.
+          */
+         ir_variable *const src_temp =
+            factory.make_temp(ir->rhs->type, "scalar_tmp");
+
+         /* The newly created variable declaration goes before the assignment
+          * because we're going to set it as the new LHS.
+          */
+         ir->insert_before(factory.instructions);
+         ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp));
+
+         ir_variable *const arr_index =
+            factory.make_temp(deref->array_index->type, "index_tmp");
+         factory.emit(assign(arr_index, deref->array_index));
+
+         for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) {
+            ir_constant *const cmp_index =
+               ir_constant::zero(factory.mem_ctx, deref->array_index->type);
+            cmp_index->value.u[0] = i;
+
+            ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL);
+            ir_dereference_variable *const src_temp_deref =
+               new(mem_ctx) ir_dereference_variable(src_temp);
+
+            if (new_lhs->ir_type != ir_type_swizzle) {
+               assert(lhs_clone->as_dereference());
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(lhs_clone->as_dereference(),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index),
+                                             WRITEMASK_X << i);
+               factory.emit(cond_assign);
+            } else {
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index));
+               factory.emit(cond_assign);
+            }
+         }
+         ir->insert_after(factory.instructions);
+      } else {
+         ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                              new_lhs->type,
+                                              new_lhs->clone(mem_ctx, NULL),
+                                              ir->rhs,
+                                              deref->array_index);
+         ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+         ir->set_lhs(new_lhs);
+      }
    } else if (new_lhs->ir_type != ir_type_swizzle) {
       ir->set_lhs(new_lhs);
       ir->write_mask = 1 << old_index_constant->get_uint_component(0);
@@ -105,7 +159,7 @@
 bool
 lower_vector_derefs(gl_linked_shader *shader)
 {
-   vector_deref_visitor v;
+   vector_deref_visitor v(shader->ir, shader->Stage);
 
    visit_list_elements(&v, shader->ir);
 
diff -Nru mesa-18.3.3/src/compiler/glsl/meson.build mesa-19.0.1/src/compiler/glsl/meson.build
--- mesa-18.3.3/src/compiler/glsl/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -53,6 +53,13 @@
   capture : true,
 )
 
+float64_glsl_h = custom_target(
+  'float64_glsl.h',
+  input : ['xxd.py', 'float64.glsl'],
+  output : 'float64_glsl.h',
+  command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'float64_source'],
+)
+
 files_libglsl = files(
   'ast.h',
   'ast_array_index.cpp',
@@ -213,7 +220,8 @@
 libglsl = static_library(
   'glsl',
   [files_libglsl, glsl_parser, glsl_lexer_cpp, ir_expression_operation_h,
-   ir_expression_operation_strings_h, ir_expression_operation_constant_h],
+   ir_expression_operation_strings_h, ir_expression_operation_constant_h,
+   float64_glsl_h],
   c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
   cpp_args : [cpp_vis_args, cpp_msvc_compat_args],
   link_with : libglcpp,
diff -Nru mesa-18.3.3/src/compiler/glsl/opt_constant_propagation.cpp mesa-19.0.1/src/compiler/glsl/opt_constant_propagation.cpp
--- mesa-18.3.3/src/compiler/glsl/opt_constant_propagation.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/opt_constant_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -86,8 +86,7 @@
       mem_ctx = ralloc_context(0);
       this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
       this->acp = new(mem_ctx) exec_list;
-      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                                            _mesa_key_pointer_equal);
+      this->kills = _mesa_pointer_hash_table_create(mem_ctx);
    }
    ~ir_constant_propagation_visitor()
    {
@@ -256,8 +255,7 @@
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
+   this->kills = _mesa_pointer_hash_table_create(mem_ctx);
    this->killed_all = false;
 
    visit_list_elements(this, &ir->body);
@@ -368,8 +366,7 @@
    ir->condition->accept(this);
    handle_rvalue(&ir->condition);
 
-   hash_table *new_kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                                                   _mesa_key_pointer_equal);
+   hash_table *new_kills = _mesa_pointer_hash_table_create(mem_ctx);
    bool then_killed_all = false;
    bool else_killed_all = false;
 
@@ -398,8 +395,7 @@
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
+   this->kills = _mesa_pointer_hash_table_create(mem_ctx);
    this->killed_all = false;
 
    if (keep_acp) {
diff -Nru mesa-18.3.3/src/compiler/glsl/opt_constant_variable.cpp mesa-19.0.1/src/compiler/glsl/opt_constant_variable.cpp
--- mesa-18.3.3/src/compiler/glsl/opt_constant_variable.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/opt_constant_variable.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -186,8 +186,7 @@
    bool progress = false;
    ir_constant_variable_visitor v;
 
-   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                  _mesa_key_pointer_equal);
+   v.ht = _mesa_pointer_hash_table_create(NULL);
    v.run(instructions);
 
    hash_table_foreach(v.ht, hte) {
diff -Nru mesa-18.3.3/src/compiler/glsl/opt_copy_propagation_elements.cpp mesa-19.0.1/src/compiler/glsl/opt_copy_propagation_elements.cpp
--- mesa-18.3.3/src/compiler/glsl/opt_copy_propagation_elements.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/opt_copy_propagation_elements.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -203,8 +203,7 @@
       /* Use 'this' as context for the table, no explicit destruction
        * needed later.
        */
-      acp = _mesa_hash_table_create(this, _mesa_hash_pointer,
-                                    _mesa_key_pointer_equal);
+      acp = _mesa_pointer_hash_table_create(this);
       lin_ctx = linear_alloc_parent(this, 0);
    }
 
@@ -231,8 +230,7 @@
       }
 
       if (!found) {
-         entry->dsts = _mesa_set_create(this, _mesa_hash_pointer,
-                                        _mesa_key_pointer_equal);
+         entry->dsts = _mesa_pointer_set_create(this);
       }
 
       return entry;
diff -Nru mesa-18.3.3/src/compiler/glsl/opt_function_inlining.cpp mesa-19.0.1/src/compiler/glsl/opt_function_inlining.cpp
--- mesa-18.3.3/src/compiler/glsl/opt_function_inlining.cpp	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/opt_function_inlining.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -152,7 +152,7 @@
    int i;
    struct hash_table *ht;
 
-   ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   ht = _mesa_pointer_hash_table_create(NULL);
 
    num_parameters = this->callee->parameters.length();
    parameters = new ir_variable *[num_parameters];
diff -Nru mesa-18.3.3/src/compiler/glsl/serialize.cpp mesa-19.0.1/src/compiler/glsl/serialize.cpp
--- mesa-18.3.3/src/compiler/glsl/serialize.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/serialize.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -996,15 +996,14 @@
                         struct gl_program_parameter_list *params)
 {
    blob_write_uint32(metadata, params->NumParameters);
-   blob_write_uint32(metadata, params->NumParameterValues);
    uint32_t i = 0;
 
    while (i < params->NumParameters) {
       struct gl_program_parameter *param = &params->Parameters[i];
-
       blob_write_uint32(metadata, param->Type);
       blob_write_string(metadata, param->Name);
       blob_write_uint32(metadata, param->Size);
+      blob_write_uint32(metadata, param->Padded);
       blob_write_uint32(metadata, param->DataType);
       blob_write_bytes(metadata, param->StateIndexes,
                        sizeof(param->StateIndexes));
@@ -1015,9 +1014,6 @@
    blob_write_bytes(metadata, params->ParameterValues,
                     sizeof(gl_constant_value) * params->NumParameterValues);
 
-   blob_write_bytes(metadata, params->ParameterValueOffset,
-                    sizeof(uint32_t) * params->NumParameters);
-
    blob_write_uint32(metadata, params->StateFlags);
 }
 
@@ -1028,28 +1024,25 @@
    gl_state_index16 state_indexes[STATE_LENGTH];
    uint32_t i = 0;
    uint32_t num_parameters = blob_read_uint32(metadata);
-   uint32_t num_parameters_values = blob_read_uint32(metadata);
 
    _mesa_reserve_parameter_storage(params, num_parameters);
    while (i < num_parameters) {
       gl_register_file type = (gl_register_file) blob_read_uint32(metadata);
       const char *name = blob_read_string(metadata);
       unsigned size = blob_read_uint32(metadata);
+      bool padded = blob_read_uint32(metadata);
       unsigned data_type = blob_read_uint32(metadata);
       blob_copy_bytes(metadata, (uint8_t *) state_indexes,
                       sizeof(state_indexes));
 
       _mesa_add_parameter(params, type, name, size, data_type,
-                          NULL, state_indexes, false);
+                          NULL, state_indexes, padded);
 
       i++;
    }
 
    blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues,
-                   sizeof(gl_constant_value) * num_parameters_values);
-
-   blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset,
-                   sizeof(uint32_t) * num_parameters);
+                   sizeof(gl_constant_value) * params->NumParameterValues);
 
    params->StateFlags = blob_read_uint32(metadata);
 }
diff -Nru mesa-18.3.3/src/compiler/glsl/shader_cache.cpp mesa-19.0.1/src/compiler/glsl/shader_cache.cpp
--- mesa-18.3.3/src/compiler/glsl/shader_cache.cpp	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/shader_cache.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -121,20 +121,15 @@
    if (!cache_item_metadata.keys)
       goto fail;
 
-   char sha1_buf[41];
    for (unsigned i = 0; i < prog->NumShaders; i++) {
-      disk_cache_put_key(cache, prog->Shaders[i]->sha1);
       memcpy(cache_item_metadata.keys[i], prog->Shaders[i]->sha1,
              sizeof(cache_key));
-      if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
-         _mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1);
-         fprintf(stderr, "marking shader: %s\n", sha1_buf);
-      }
    }
 
    disk_cache_put(cache, prog->data->sha1, metadata.data, metadata.size,
                   &cache_item_metadata);
 
+   char sha1_buf[41];
    if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
       _mesa_sha1_format(sha1_buf, prog->data->sha1);
       fprintf(stderr, "putting program metadata in cache: %s\n", sha1_buf);
@@ -264,23 +259,6 @@
    /* This is used to flag a shader retrieved from cache */
    prog->data->LinkStatus = LINKING_SKIPPED;
 
-   /* Since the program load was successful, CompileStatus of all shaders at
-    * this point should normally be compile_skipped. However because of how
-    * the eviction works, it may happen that some of the individual shader keys
-    * have been evicted, resulting in unnecessary recompiles on this load, so
-    * mark them again to skip such recompiles next time.
-    */
-   char sha1_buf[41];
-   for (unsigned i = 0; i < prog->NumShaders; i++) {
-      if (prog->Shaders[i]->CompileStatus == COMPILED_NO_OPTS) {
-         disk_cache_put_key(cache, prog->Shaders[i]->sha1);
-         if (ctx->_Shader->Flags & GLSL_CACHE_INFO) {
-            _mesa_sha1_format(sha1_buf, prog->Shaders[i]->sha1);
-            fprintf(stderr, "re-marking shader: %s\n", sha1_buf);
-         }
-      }
-   }
-
    free (buffer);
 
    return true;
diff -Nru mesa-18.3.3/src/compiler/glsl/standalone.cpp mesa-19.0.1/src/compiler/glsl/standalone.cpp
--- mesa-18.3.3/src/compiler/glsl/standalone.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/standalone.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -49,9 +49,7 @@
 public:
    dead_variable_visitor()
    {
-      variables = _mesa_set_create(NULL,
-                                   _mesa_hash_pointer,
-                                   _mesa_key_pointer_equal);
+      variables = _mesa_pointer_set_create(NULL);
    }
 
    virtual ~dead_variable_visitor()
diff -Nru mesa-18.3.3/src/compiler/glsl/test_optpass.cpp mesa-19.0.1/src/compiler/glsl/test_optpass.cpp
--- mesa-18.3.3/src/compiler/glsl/test_optpass.cpp	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/test_optpass.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -166,6 +166,7 @@
    int loop = 0;
    int shader_type = GL_VERTEX_SHADER;
    int quiet = 0;
+   int error;
 
    const struct option optpass_opts[] = {
       { "input-ir", no_argument, &input_format_ir, 1 },
@@ -264,9 +265,11 @@
       printf("--\n");
    }
 
+   error = state->error;
+
    ralloc_free(state);
    ralloc_free(shader);
 
-   return state->error;
+   return error;
 }
 
diff -Nru mesa-18.3.3/src/compiler/glsl/tests/meson.build mesa-19.0.1/src/compiler/glsl/tests/meson.build
--- mesa-18.3.3/src/compiler/glsl/tests/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/tests/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -26,7 +26,8 @@
     c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
     include_directories : [inc_common, inc_compiler],
     link_with : [libglsl],
-  )
+  ),
+  suite : ['compiler', 'glsl'],
 )
 
 test(
@@ -38,7 +39,8 @@
     include_directories : [inc_common, inc_glsl],
     link_with : [libglsl],
     dependencies : [dep_clock, dep_thread],
-  )
+  ),
+  suite : ['compiler', 'glsl'],
 )
 
 
@@ -54,7 +56,8 @@
     include_directories : [inc_common, inc_glsl],
     link_with : [libglsl, libglsl_standalone, libglsl_util],
     dependencies : [dep_clock, dep_thread, idep_gtest],
-  )
+  ),
+  suite : ['compiler', 'glsl'],
 )
 
 test(
@@ -68,7 +71,8 @@
     include_directories : [inc_common, inc_glsl],
     link_with : [libglsl, libglsl_util],
     dependencies : [dep_thread, idep_gtest],
-  )
+  ),
+  suite : ['compiler', 'glsl'],
 )
 
 test(
@@ -80,7 +84,8 @@
     include_directories : [inc_common, inc_glsl],
     link_with : [libglsl, libglsl_util],
     dependencies : [dep_thread, idep_gtest],
-  )
+  ),
+  suite : ['compiler', 'glsl'],
 )
 
 test(
@@ -93,7 +98,9 @@
       meson.source_root(), 'src', 'compiler', 'glsl', 'tests', 'warnings'
     ),
   ],
+  suite : ['compiler', 'glsl'],
 )
+
 test(
   'glsl optimization',
   prog_python,
@@ -101,4 +108,5 @@
     join_paths(meson.current_source_dir(), 'optimization_test.py'),
     '--test-runner', glsl_test
   ],
+  suite : ['compiler', 'glsl'],
 )
diff -Nru mesa-18.3.3/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert mesa-19.0.1/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert
--- mesa-18.3.3/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,22 @@
+#version 130
+
+float __foo(float x)
+{
+   return 6.0 * x;
+}
+
+float __bar(float x)
+{
+   return 3.0 * x;
+}
+
+float __blat(float x)
+{
+   return 2.0 * x;
+}
+
+void main()
+{
+   gl_Position = vec4(__foo(gl_Vertex.x), __bar(gl_Vertex.y), __blat(gl_Vertex.z), 1.0);
+}
+
diff -Nru mesa-18.3.3/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert.expected mesa-19.0.1/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert.expected
--- mesa-18.3.3/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert.expected	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/tests/warnings/031-__-in-function-name.vert.expected	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3 @@
+0:3(7): warning: identifier `__foo' uses reserved `__' string
+0:8(7): warning: identifier `__bar' uses reserved `__' string
+0:13(7): warning: identifier `__blat' uses reserved `__' string
diff -Nru mesa-18.3.3/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert mesa-19.0.1/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert
--- mesa-18.3.3/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,24 @@
+#version 130
+
+float __foo(float x)
+{
+   return 6.0 * x;
+}
+
+#pragma warning(off)
+float __bar(float x)
+{
+   return 3.0 * x;
+}
+#pragma warning(on)
+
+float __blat(float x)
+{
+   return 2.0 * x;
+}
+
+void main()
+{
+   gl_Position = vec4(__foo(gl_Vertex.x), __bar(gl_Vertex.y), __blat(gl_Vertex.z), 1.0);
+}
+
diff -Nru mesa-18.3.3/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert.expected mesa-19.0.1/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert.expected
--- mesa-18.3.3/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert.expected	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/tests/warnings/032-__-in-function-name-pragma-disable.vert.expected	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,2 @@
+0:3(7): warning: identifier `__foo' uses reserved `__' string
+0:15(7): warning: identifier `__blat' uses reserved `__' string
diff -Nru mesa-18.3.3/src/compiler/glsl/xxd.py mesa-19.0.1/src/compiler/glsl/xxd.py
--- mesa-18.3.3/src/compiler/glsl/xxd.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl/xxd.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,111 @@
+# encoding=utf-8
+# Copyright © 2018 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+# Converts a file to a C/C++ #include containing a string
+
+from __future__ import unicode_literals
+import argparse
+import io
+import string
+import sys
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input', help="Name of input file")
+    parser.add_argument('output', help="Name of output file")
+    parser.add_argument("-n", "--name",
+                        help="Name of C variable")
+    args = parser.parse_args()
+    return args
+
+
+def filename_to_C_identifier(n):
+    if n[0] != '_' and not n[0].isalpha():
+        n = "_" + n[1:]
+
+    return "".join([c if c.isalnum() or c == "_" else "_" for c in n])
+
+
+def emit_byte(f, b):
+    if ord(b) == ord('\n'):
+        f.write(b"\\n\"\n    \"")
+        return
+    elif ord(b) == ord('\r'):
+        f.write(b"\\r\"\n    \"")
+        return
+    elif ord(b) == ord('\t'):
+        f.write(b"\\t")
+        return
+    elif ord(b) == ord('"'):
+        f.write(b"\\\"")
+        return
+    elif ord(b) == ord('\\'):
+        f.write(b"\\\\")
+        return
+
+    if ord(b) >= ord(' ') and ord(b) <= ord('~'):
+        f.write(b)
+    else:
+        hi = ord(b) >> 4
+        lo = ord(b) & 0x0f
+        f.write("\\x{:x}{:x}".format(hi, lo).encode('utf-8'))
+
+
+def process_file(args):
+    with io.open(args.input, "rb") as infile:
+        try:
+            with io.open(args.output, "wb") as outfile:
+                # If a name was not specified on the command line, pick one based on the
+                # name of the input file.  If no input filename was specified, use
+                # from_stdin.
+                if args.name is not None:
+                    name = args.name
+                else:
+                    name = filename_to_C_identifier(args.input)
+
+                outfile.write("static const char {}[] =\n \"".format(name).encode('utf-8'))
+
+                while True:
+                    byte = infile.read(1)
+                    if byte == b"":
+                        break
+
+                    emit_byte(outfile, byte)
+
+                outfile.write(b"\"\n    ;\n")
+        except Exception:
+            # In the event that anything goes wrong, delete the output file,
+            # then re-raise the exception. Deleteing the output file should
+            # ensure that the build system doesn't try to use the stale,
+            # half-generated file.
+            os.unlink(args.output)
+            raise
+
+
+def main():
+    args = get_args()
+    process_file(args)
+
+
+if __name__ == "__main__":
+    main()
diff -Nru mesa-18.3.3/src/compiler/glsl_types.cpp mesa-19.0.1/src/compiler/glsl_types.cpp
--- mesa-18.3.3/src/compiler/glsl_types.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl_types.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -30,6 +30,7 @@
 
 
 mtx_t glsl_type::hash_mutex = _MTX_INITIALIZER_NP;
+hash_table *glsl_type::explicit_matrix_types = NULL;
 hash_table *glsl_type::array_types = NULL;
 hash_table *glsl_type::record_types = NULL;
 hash_table *glsl_type::interface_types = NULL;
@@ -38,13 +39,14 @@
 
 glsl_type::glsl_type(GLenum gl_type,
                      glsl_base_type base_type, unsigned vector_elements,
-                     unsigned matrix_columns, const char *name) :
+                     unsigned matrix_columns, const char *name,
+                     unsigned explicit_stride, bool row_major) :
    gl_type(gl_type),
    base_type(base_type), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(row_major),
    vector_elements(vector_elements), matrix_columns(matrix_columns),
-   length(0)
+   length(0), explicit_stride(explicit_stride)
 {
    /* Values of these types must fit in the two bits of
     * glsl_type::sampled_type.
@@ -77,7 +79,8 @@
    base_type(base_type), sampled_type(type),
    sampler_dimensionality(dim), sampler_shadow(shadow),
    sampler_array(array), interface_packing(0),
-   interface_row_major(0), length(0)
+   interface_row_major(0),
+   length(0), explicit_stride(0)
 {
    this->mem_ctx = ralloc_context(NULL);
    assert(this->mem_ctx != NULL);
@@ -97,7 +100,7 @@
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
    interface_packing(0), interface_row_major(0),
    vector_elements(0), matrix_columns(0),
-   length(num_fields)
+   length(num_fields), explicit_stride(0)
 {
    unsigned int i;
 
@@ -127,7 +130,7 @@
    interface_packing((unsigned) packing),
    interface_row_major((unsigned) row_major),
    vector_elements(0), matrix_columns(0),
-   length(num_fields)
+   length(num_fields), explicit_stride(0)
 {
    unsigned int i;
 
@@ -152,7 +155,7 @@
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
    interface_packing(0), interface_row_major(0),
    vector_elements(0), matrix_columns(0),
-   length(num_params)
+   length(num_params), explicit_stride(0)
 {
    unsigned int i;
 
@@ -181,7 +184,7 @@
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
    interface_packing(0), interface_row_major(0),
    vector_elements(1), matrix_columns(1),
-   length(0)
+   length(0), explicit_stride(0)
 {
    this->mem_ctx = ralloc_context(NULL);
    assert(this->mem_ctx != NULL);
@@ -258,6 +261,22 @@
 }
 
 bool
+glsl_type::contains_64bit() const
+{
+   if (this->is_array()) {
+      return this->fields.array->contains_64bit();
+   } else if (this->is_record() || this->is_interface()) {
+      for (unsigned int i = 0; i < this->length; i++) {
+         if (this->fields.structure[i].type->contains_64bit())
+            return true;
+      }
+      return false;
+   } else {
+      return this->is_64bit();
+   }
+}
+
+bool
 glsl_type::contains_opaque() const {
    switch (base_type) {
    case GLSL_TYPE_SAMPLER:
@@ -389,6 +408,55 @@
 }
 
 
+const glsl_type *glsl_type::get_bare_type() const
+{
+   switch (this->base_type) {
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
+   case GLSL_TYPE_FLOAT16:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_INT64:
+      return get_instance(this->base_type, this->vector_elements,
+                          this->matrix_columns);
+
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE: {
+      glsl_struct_field *bare_fields = new glsl_struct_field[this->length];
+      for (unsigned i = 0; i < this->length; i++) {
+         bare_fields[i].type = this->fields.structure[i].type->get_bare_type();
+         bare_fields[i].name = this->fields.structure[i].name;
+      }
+      const glsl_type *bare_type =
+         get_record_instance(bare_fields, this->length, this->name);
+      delete[] bare_fields;
+      return bare_type;
+   }
+
+   case GLSL_TYPE_ARRAY:
+      return get_array_instance(this->fields.array->get_bare_type(),
+                                this->length);
+
+   case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
+   case GLSL_TYPE_FUNCTION:
+   case GLSL_TYPE_ERROR:
+      return this;
+   }
+
+   unreachable("Invalid base type");
+}
+
+
 static void
 hash_free_type_function(struct hash_entry *entry)
 {
@@ -407,6 +475,12 @@
     * object, or if process terminates), so no mutex-locking should be
     * necessary.
     */
+   if (glsl_type::explicit_matrix_types != NULL) {
+      _mesa_hash_table_destroy(glsl_type::explicit_matrix_types,
+                               hash_free_type_function);
+      glsl_type::explicit_matrix_types = NULL;
+   }
+
    if (glsl_type::array_types != NULL) {
       _mesa_hash_table_destroy(glsl_type::array_types, hash_free_type_function);
       glsl_type::array_types = NULL;
@@ -434,12 +508,13 @@
 }
 
 
-glsl_type::glsl_type(const glsl_type *array, unsigned length) :
+glsl_type::glsl_type(const glsl_type *array, unsigned length,
+                     unsigned explicit_stride) :
    base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
    interface_packing(0), interface_row_major(0),
    vector_elements(0), matrix_columns(0),
-   length(length), name(NULL)
+   length(length), name(NULL), explicit_stride(explicit_stride)
 {
    this->fields.array = array;
    /* Inherit the gl type of the base. The GL type is used for
@@ -521,10 +596,57 @@
 VECN(components, uint8_t, u8vec)
 
 const glsl_type *
-glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
+glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns,
+                        unsigned explicit_stride, bool row_major)
 {
-   if (base_type == GLSL_TYPE_VOID)
+   if (base_type == GLSL_TYPE_VOID) {
+      assert(explicit_stride == 0 && !row_major);
       return void_type;
+   }
+
+   /* Matrix and vector types with explicit strides have to be looked up in a
+    * table so they're handled separately.
+    */
+   if (explicit_stride > 0) {
+      const glsl_type *bare_type = get_instance(base_type, rows, columns);
+
+      assert(columns > 1 || !row_major);
+
+      char name[128];
+      util_snprintf(name, sizeof(name), "%sx%uB%s", bare_type->name,
+                    explicit_stride, row_major ? "RM" : "");
+
+      mtx_lock(&glsl_type::hash_mutex);
+
+      if (explicit_matrix_types == NULL) {
+         explicit_matrix_types =
+            _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+                                    _mesa_key_string_equal);
+      }
+
+      const struct hash_entry *entry =
+         _mesa_hash_table_search(explicit_matrix_types, name);
+      if (entry == NULL) {
+         const glsl_type *t = new glsl_type(bare_type->gl_type,
+                                            (glsl_base_type)base_type,
+                                            rows, columns, name,
+                                            explicit_stride, row_major);
+
+         entry = _mesa_hash_table_insert(explicit_matrix_types,
+                                         t->name, (void *)t);
+      }
+
+      assert(((glsl_type *) entry->data)->base_type == base_type);
+      assert(((glsl_type *) entry->data)->vector_elements == rows);
+      assert(((glsl_type *) entry->data)->matrix_columns == columns);
+      assert(((glsl_type *) entry->data)->explicit_stride == explicit_stride);
+
+      mtx_unlock(&glsl_type::hash_mutex);
+
+      return (const glsl_type *) entry->data;
+   }
+
+   assert(!row_major);
 
    /* Treat GLSL vectors as Nx1 matrices.
     */
@@ -846,7 +968,9 @@
 }
 
 const glsl_type *
-glsl_type::get_array_instance(const glsl_type *base, unsigned array_size)
+glsl_type::get_array_instance(const glsl_type *base,
+                              unsigned array_size,
+                              unsigned explicit_stride)
 {
    /* Generate a name using the base type pointer in the key.  This is
     * done because the name of the base type may not be unique across
@@ -854,7 +978,8 @@
     * named 'foo'.
     */
    char key[128];
-   util_snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size);
+   util_snprintf(key, sizeof(key), "%p[%u]x%uB", (void *) base, array_size,
+                 explicit_stride);
 
    mtx_lock(&glsl_type::hash_mutex);
 
@@ -865,7 +990,7 @@
 
    const struct hash_entry *entry = _mesa_hash_table_search(array_types, key);
    if (entry == NULL) {
-      const glsl_type *t = new glsl_type(base, array_size);
+      const glsl_type *t = new glsl_type(base, array_size, explicit_stride);
 
       entry = _mesa_hash_table_insert(array_types,
                                       strdup(key),
@@ -1425,7 +1550,7 @@
     * state, we're doing intra-stage function linking where these checks have
     * already been done.
     */
-   if (state && !state->is_version(120, 0))
+   if (state && !state->has_implicit_conversions())
       return false;
 
    /* There is no conversion among matrix types. */
@@ -1446,8 +1571,7 @@
     * state-dependent checks have already happened though, so allow anything
     * that's allowed in any shader version.
     */
-   if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable ||
-        state->MESA_shader_integer_functions_enable) &&
+   if ((!state || state->has_implicit_uint_to_int_conversion()) &&
          desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT)
       return true;
 
@@ -1600,6 +1724,7 @@
     *     <N> basic machine units, the base alignment is 4<N>.
     */
    if (this->is_scalar() || this->is_vector()) {
+      assert(this->explicit_stride == 0);
       return this->vector_elements * N;
    }
 
@@ -1634,6 +1759,8 @@
          array_len = 1;
       }
 
+      assert(element_type->explicit_stride == 0);
+
       if (row_major) {
          vec_type = get_instance(element_type->base_type,
                                  element_type->matrix_columns, 1);
@@ -1661,6 +1788,7 @@
     *      the array are laid out in order, according to rule (9).
     */
    if (this->is_array()) {
+      assert(this->explicit_stride == 0);
       if (this->without_array()->is_record()) {
 	 return this->arrays_of_arrays_size() *
             this->without_array()->std140_size(row_major);
@@ -1835,6 +1963,8 @@
 {
    unsigned N = is_64bit() ? 8 : 4;
 
+   assert(explicit_stride == 0);
+
    /* Notice that the array stride of a vec3 is not 3 * N but 4 * N.
     * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout"
     *
@@ -1861,8 +1991,10 @@
     * stride of arrays of scalars and vectors in rule 4 and of structures
     * in rule 9 are not rounded up a multiple of the base alignment of a vec4.
     */
-   if (this->is_scalar() || this->is_vector())
-         return this->vector_elements * N;
+   if (this->is_scalar() || this->is_vector()) {
+      assert(this->explicit_stride == 0);
+      return this->vector_elements * N;
+   }
 
    if (this->without_array()->is_matrix()) {
       const struct glsl_type *element_type;
@@ -1877,6 +2009,8 @@
          array_len = 1;
       }
 
+      assert(element_type->explicit_stride == 0);
+
       if (row_major) {
          vec_type = get_instance(element_type->base_type,
                                  element_type->matrix_columns, 1);
@@ -1894,6 +2028,7 @@
    }
 
    if (this->is_array()) {
+      assert(this->explicit_stride == 0);
       if (this->without_array()->is_record())
          return this->arrays_of_arrays_size() *
             this->without_array()->std430_size(row_major);
@@ -1932,7 +2067,7 @@
 }
 
 unsigned
-glsl_type::count_attribute_slots(bool is_vertex_input) const
+glsl_type::count_attribute_slots(bool is_gl_vertex_input) const
 {
    /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec:
     *
@@ -1975,7 +2110,7 @@
    case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_UINT64:
    case GLSL_TYPE_INT64:
-      if (this->vector_elements > 2 && !is_vertex_input)
+      if (this->vector_elements > 2 && !is_gl_vertex_input)
          return this->matrix_columns * 2;
       else
          return this->matrix_columns;
@@ -1983,14 +2118,18 @@
    case GLSL_TYPE_INTERFACE: {
       unsigned size = 0;
 
-      for (unsigned i = 0; i < this->length; i++)
-         size += this->fields.structure[i].type->count_attribute_slots(is_vertex_input);
+      for (unsigned i = 0; i < this->length; i++) {
+         const glsl_type *member_type = this->fields.structure[i].type;
+         size += member_type->count_attribute_slots(is_gl_vertex_input);
+      }
 
       return size;
    }
 
-   case GLSL_TYPE_ARRAY:
-      return this->length * this->fields.array->count_attribute_slots(is_vertex_input);
+   case GLSL_TYPE_ARRAY: {
+      const glsl_type *element = this->fields.array;
+      return this->length * element->count_attribute_slots(is_gl_vertex_input);
+   }
 
    case GLSL_TYPE_SUBROUTINE:
       return 1;
@@ -2083,14 +2222,22 @@
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
-   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_FLOAT16:
    case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
    case GLSL_TYPE_UINT64:
    case GLSL_TYPE_INT64:
+   case GLSL_TYPE_BOOL:
       encoding = (type->base_type << 24) |
+         (type->interface_row_major << 10) |
          (type->vector_elements << 4) |
          (type->matrix_columns);
-      break;
+      blob_write_uint32(blob, encoding);
+      blob_write_uint32(blob, type->explicit_stride);
+      return;
    case GLSL_TYPE_SAMPLER:
       encoding = (type->base_type) << 24 |
          (type->sampler_dimensionality << 4) |
@@ -2115,6 +2262,7 @@
    case GLSL_TYPE_ARRAY:
       blob_write_uint32(blob, (type->base_type) << 24);
       blob_write_uint32(blob, type->length);
+      blob_write_uint32(blob, type->explicit_stride);
       encode_type_to_blob(blob, type->fields.array);
       return;
    case GLSL_TYPE_STRUCT:
@@ -2169,28 +2317,37 @@
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
-   case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_FLOAT16:
    case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
+   case GLSL_TYPE_UINT16:
+   case GLSL_TYPE_INT16:
    case GLSL_TYPE_UINT64:
    case GLSL_TYPE_INT64:
-      return glsl_type::get_instance(base_type, (u >> 4) & 0x0f, u & 0x0f);
+   case GLSL_TYPE_BOOL: {
+      unsigned explicit_stride = blob_read_uint32(blob);
+      return glsl_type::get_instance(base_type, (u >> 4) & 0x0f, u & 0x0f,
+                                     explicit_stride, (u >> 10) & 0x1);
+   }
    case GLSL_TYPE_SAMPLER:
-      return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) & 0x07),
+      return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) & 0x0f),
                                              (u >> 3) & 0x01,
                                              (u >> 2) & 0x01,
                                              (glsl_base_type) ((u >> 0) & 0x03));
    case GLSL_TYPE_SUBROUTINE:
       return glsl_type::get_subroutine_instance(blob_read_string(blob));
    case GLSL_TYPE_IMAGE:
-      return glsl_type::get_image_instance((enum glsl_sampler_dim) ((u >> 3) & 0x07),
+      return glsl_type::get_image_instance((enum glsl_sampler_dim) ((u >> 3) & 0x0f),
                                              (u >> 2) & 0x01,
                                              (glsl_base_type) ((u >> 0) & 0x03));
    case GLSL_TYPE_ATOMIC_UINT:
       return glsl_type::atomic_uint_type;
    case GLSL_TYPE_ARRAY: {
       unsigned length = blob_read_uint32(blob);
+      unsigned explicit_stride = blob_read_uint32(blob);
       return glsl_type::get_array_instance(decode_type_from_blob(blob),
-                                           length);
+                                           length, explicit_stride);
    }
    case GLSL_TYPE_STRUCT:
    case GLSL_TYPE_INTERFACE: {
diff -Nru mesa-18.3.3/src/compiler/glsl_types.h mesa-19.0.1/src/compiler/glsl_types.h
--- mesa-18.3.3/src/compiler/glsl_types.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/glsl_types.h	2019-03-31 23:16:37.000000000 +0000
@@ -210,6 +210,13 @@
    const char *name;
 
    /**
+    * Explicit array, matrix, or vector stride.  This is used to communicate
+    * explicit array layouts from SPIR-V.  Should be 0 if the type has no
+    * explicit stride.
+    */
+   unsigned explicit_stride;
+
+   /**
     * Subtype of composite data types.
     */
    union {
@@ -272,10 +279,17 @@
    const glsl_type *get_scalar_type() const;
 
    /**
+    * Gets the "bare" type without any decorations or layout information.
+    */
+   const glsl_type *get_bare_type() const;
+
+   /**
     * Get the instance of a built-in scalar, vector, or matrix type
     */
    static const glsl_type *get_instance(unsigned base_type, unsigned rows,
-					unsigned columns);
+                                        unsigned columns,
+                                        unsigned explicit_stride = 0,
+                                        bool row_major = false);
 
    /**
     * Get the instance of a sampler type
@@ -292,7 +306,8 @@
     * Get the instance of an array type
     */
    static const glsl_type *get_array_instance(const glsl_type *base,
-					      unsigned elements);
+                                              unsigned elements,
+                                              unsigned explicit_stride = 0);
 
    /**
     * Get the instance of a record type
@@ -378,8 +393,11 @@
     *
     * For vertex shader attributes - doubles only take one slot.
     * For inter-shader varyings - dvec3/dvec4 take two slots.
+    *
+    * Vulkan doesn’t make this distinction so the argument should always be
+    * false.
     */
-   unsigned count_attribute_slots(bool is_vertex_input) const;
+   unsigned count_attribute_slots(bool is_gl_vertex_input) const;
 
    /**
     * Alignment in bytes of the start of this type in a std140 uniform
@@ -527,6 +545,12 @@
    bool contains_double() const;
 
    /**
+    * Query whether or not type is a 64-bit type, or for struct, interface and
+    * array types, contains a double type.
+    */
+   bool contains_64bit() const;
+
+   /**
     * Query whether or not a type is a float type
     */
    bool is_float() const
@@ -752,9 +776,13 @@
     */
    const glsl_type *row_type() const
    {
-      return is_matrix()
-	 ? get_instance(base_type, matrix_columns, 1)
-	 : error_type;
+      if (!is_matrix())
+         return error_type;
+
+      if (explicit_stride && !interface_row_major)
+         return get_instance(base_type, matrix_columns, 1, explicit_stride);
+      else
+         return get_instance(base_type, matrix_columns, 1);
    }
 
    /**
@@ -766,9 +794,13 @@
     */
    const glsl_type *column_type() const
    {
-      return is_matrix()
-	 ? get_instance(base_type, vector_elements, 1)
-	 : error_type;
+      if (!is_matrix())
+         return error_type;
+
+      if (explicit_stride && interface_row_major)
+         return get_instance(base_type, vector_elements, 1, explicit_stride);
+      else
+         return get_instance(base_type, vector_elements, 1);
    }
 
    /**
@@ -878,8 +910,9 @@
 
    /** Constructor for vector and matrix types */
    glsl_type(GLenum gl_type,
-	     glsl_base_type base_type, unsigned vector_elements,
-	     unsigned matrix_columns, const char *name);
+             glsl_base_type base_type, unsigned vector_elements,
+             unsigned matrix_columns, const char *name,
+             unsigned explicit_stride = 0, bool row_major = false);
 
    /** Constructor for sampler or image types */
    glsl_type(GLenum gl_type, glsl_base_type base_type,
@@ -899,12 +932,15 @@
    glsl_type(const glsl_type *return_type,
              const glsl_function_param *params, unsigned num_params);
 
-   /** Constructor for array types */
-   glsl_type(const glsl_type *array, unsigned length);
+   /** Constructors for array types */
+   glsl_type(const glsl_type *array, unsigned length, unsigned explicit_stride);
 
    /** Constructor for subroutine types */
    glsl_type(const char *name);
 
+   /** Hash table containing the known explicit matrix and vector types. */
+   static struct hash_table *explicit_matrix_types;
+
    /** Hash table containing the known array types. */
    static struct hash_table *array_types;
 
diff -Nru mesa-18.3.3/src/compiler/Makefile.glsl.am mesa-19.0.1/src/compiler/Makefile.glsl.am
--- mesa-18.3.3/src/compiler/Makefile.glsl.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/Makefile.glsl.am	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,8 @@
 	glsl/ir_expression_operation.py			\
 	glsl/glcpp/glcpp-lex.l				\
 	glsl/glcpp/glcpp-parse.y			\
+	glsl/xxd.py					\
+	glsl/float64.glsl                               \
 	SConscript.glsl
 
 TESTS += glsl/glcpp/tests/glcpp-test.sh			\
@@ -225,6 +227,10 @@
 	$(MKDIR_GEN)
 	$(PYTHON_GEN) $(srcdir)/glsl/ir_expression_operation.py strings > $@ || ($(RM) $@; false)
 
+glsl/float64_glsl.h: glsl/xxd.py glsl/float64.glsl
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/glsl/xxd.py $(srcdir)/glsl/float64.glsl $@ -n float64_source || ($(RM) $@; false)
+
 # Only the parsers (specifically the header files generated at the same time)
 # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
 # called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
@@ -232,6 +238,7 @@
 # will be created at the appropriate times according to standard automake
 # dependency rules.
 BUILT_SOURCES +=					\
+	glsl/float64_glsl.h				\
 	glsl/glsl_parser.cpp				\
 	glsl/glsl_lexer.cpp				\
 	glsl/ir_expression_operation.h			\
@@ -240,6 +247,7 @@
 	glsl/glcpp/glcpp-parse.c			\
 	glsl/glcpp/glcpp-lex.c
 CLEANFILES +=						\
+	glsl/float64_glsl.h				\
 	glsl/glcpp/glcpp-parse.h			\
 	glsl/glsl_parser.h				\
 	glsl/glsl_parser.cpp				\
diff -Nru mesa-18.3.3/src/compiler/Makefile.nir.am mesa-19.0.1/src/compiler/Makefile.nir.am
--- mesa-18.3.3/src/compiler/Makefile.nir.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/Makefile.nir.am	2019-03-31 23:16:37.000000000 +0000
@@ -87,10 +87,12 @@
 nir_tests_vars_tests_CFLAGS = $(NIR_TESTS_CFLAGS)
 nir_tests_vars_tests_LDADD = $(NIR_TESTS_LDADD)
 
+check_SCRIPTS = nir/tests/algebraic_parser_test.sh
 
 TESTS += \
         nir/tests/control_flow_tests \
-        nir/tests/vars_tests
+        nir/tests/vars_tests \
+	nir/tests/algebraic_parser_test.sh
 
 
 BUILT_SOURCES += \
diff -Nru mesa-18.3.3/src/compiler/Makefile.sources mesa-19.0.1/src/compiler/Makefile.sources
--- mesa-18.3.3/src/compiler/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -178,7 +178,8 @@
 	glsl/ir_expression_operation_strings.h \
 	glsl/glsl_lexer.cpp \
 	glsl/glsl_parser.cpp \
-	glsl/glsl_parser.h
+	glsl/glsl_parser.h \
+	glsl/float64_glsl.h
 
 # libglcpp
 
@@ -228,9 +229,12 @@
 	nir/nir_lower_alpha_test.c \
 	nir/nir_lower_alu.c \
 	nir/nir_lower_alu_to_scalar.c \
+	nir/nir_lower_array_deref_of_vec.c \
 	nir/nir_lower_atomics_to_ssbo.c \
 	nir/nir_lower_bitmap.c \
 	nir/nir_lower_bit_size.c \
+	nir/nir_lower_bool_to_float.c \
+	nir/nir_lower_bool_to_int32.c \
 	nir/nir_lower_clamp_color_outputs.c \
 	nir/nir_lower_clip.c \
 	nir/nir_lower_clip_cull_distance_arrays.c \
@@ -248,6 +252,7 @@
 	nir/nir_lower_io_arrays_to_elements.c \
 	nir/nir_lower_io_to_temporaries.c \
 	nir/nir_lower_io_to_scalar.c \
+	nir/nir_lower_io_to_vector.c \
 	nir/nir_lower_packing.c \
 	nir/nir_lower_passthrough_edgeflags.c \
 	nir/nir_lower_patch_vertices.c \
@@ -279,6 +284,7 @@
 	nir/nir_opt_find_array_copies.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_idiv_const.c \
 	nir/nir_opt_if.c \
 	nir/nir_opt_intrinsics.c \
 	nir/nir_opt_loop_unroll.c \
diff -Nru mesa-18.3.3/src/compiler/nir/meson.build mesa-19.0.1/src/compiler/nir/meson.build
--- mesa-18.3.3/src/compiler/nir/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -112,8 +112,11 @@
   'nir_lower_alu.c',
   'nir_lower_alu_to_scalar.c',
   'nir_lower_alpha_test.c',
+  'nir_lower_array_deref_of_vec.c',
   'nir_lower_atomics_to_ssbo.c',
   'nir_lower_bitmap.c',
+  'nir_lower_bool_to_float.c',
+  'nir_lower_bool_to_int32.c',
   'nir_lower_clamp_color_outputs.c',
   'nir_lower_clip.c',
   'nir_lower_clip_cull_distance_arrays.c',
@@ -131,6 +134,7 @@
   'nir_lower_io_arrays_to_elements.c',
   'nir_lower_io_to_temporaries.c',
   'nir_lower_io_to_scalar.c',
+  'nir_lower_io_to_vector.c',
   'nir_lower_packing.c',
   'nir_lower_passthrough_edgeflags.c',
   'nir_lower_patch_vertices.c',
@@ -163,6 +167,7 @@
   'nir_opt_find_array_copies.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
+  'nir_opt_idiv_const.c',
   'nir_opt_if.c',
   'nir_opt_intrinsics.c',
   'nir_opt_large_constants.c',
@@ -246,8 +251,10 @@
       include_directories : [inc_common],
       dependencies : [dep_thread, idep_gtest, idep_nir],
       link_with : libmesa_util,
-    )
+    ), 
+    suite : ['compiler', 'nir'],
   )
+
   test(
     'nir_vars',
     executable(
@@ -257,6 +264,15 @@
       include_directories : [inc_common],
       dependencies : [dep_thread, idep_gtest, idep_nir],
       link_with : libmesa_util,
-    )
+    ),
+    suite : ['compiler', 'nir'],
+  )
+  test(
+    'nir_algebraic_parser',
+    prog_python,
+    args : [
+      join_paths(meson.current_source_dir(), 'tests/algebraic_parser_test.py')
+    ],
+    suite : ['compiler', 'nir'],
   )
 endif
diff -Nru mesa-18.3.3/src/compiler/nir/nir_algebraic.py mesa-19.0.1/src/compiler/nir/nir_algebraic.py
--- mesa-18.3.3/src/compiler/nir/nir_algebraic.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_algebraic.py	2019-03-31 23:16:37.000000000 +0000
@@ -25,7 +25,7 @@
 
 from __future__ import print_function
 import ast
-from collections import OrderedDict
+from collections import defaultdict
 import itertools
 import struct
 import sys
@@ -33,7 +33,23 @@
 import re
 import traceback
 
-from nir_opcodes import opcodes
+from nir_opcodes import opcodes, type_sizes
+
+# These opcodes are only employed by nir_search.  This provides a mapping from
+# opcode to destination type.
+conv_opcode_types = {
+    'i2f' : 'float',
+    'u2f' : 'float',
+    'f2f' : 'float',
+    'f2u' : 'uint',
+    'f2i' : 'int',
+    'u2u' : 'uint',
+    'i2i' : 'int',
+    'b2f' : 'float',
+    'b2i' : 'int',
+    'i2b' : 'bool',
+    'f2b' : 'bool',
+}
 
 if sys.version_info < (3, 0):
     integer_types = (int, long)
@@ -88,7 +104,7 @@
 
    __template = mako.template.Template("""
 static const ${val.c_type} ${val.name} = {
-   { ${val.type_enum}, ${val.bit_size} },
+   { ${val.type_enum}, ${val.c_bit_size} },
 % if isinstance(val, Constant):
    ${val.type()}, { ${val.hex()} /* ${val.value} */ },
 % elif isinstance(val, Variable):
@@ -98,7 +114,7 @@
    ${val.cond if val.cond else 'NULL'},
 % elif isinstance(val, Expression):
    ${'true' if val.inexact else 'false'},
-   nir_op_${val.opcode},
+   ${val.c_opcode()},
    { ${', '.join(src.c_ptr for src in val.sources)} },
    ${val.cond if val.cond else 'NULL'},
 % endif
@@ -112,6 +128,40 @@
    def __str__(self):
       return self.in_val
 
+   def get_bit_size(self):
+      """Get the physical bit-size that has been chosen for this value, or if
+      there is none, the canonical value which currently represents this
+      bit-size class. Variables will be preferred, i.e. if there are any
+      variables in the equivalence class, the canonical value will be a
+      variable. We do this since we'll need to know which variable each value
+      is equivalent to when constructing the replacement expression. This is
+      the "find" part of the union-find algorithm.
+      """
+      bit_size = self
+
+      while isinstance(bit_size, Value):
+         if bit_size._bit_size is None:
+            break
+         bit_size = bit_size._bit_size
+
+      if bit_size is not self:
+         self._bit_size = bit_size
+      return bit_size
+
+   def set_bit_size(self, other):
+      """Make self.get_bit_size() return what other.get_bit_size() return
+      before calling this, or just "other" if it's a concrete bit-size. This is
+      the "union" part of the union-find algorithm.
+      """
+
+      self_bit_size = self.get_bit_size()
+      other_bit_size = other if isinstance(other, int) else other.get_bit_size()
+
+      if self_bit_size == other_bit_size:
+         return
+
+      self_bit_size._bit_size = other_bit_size
+
    @property
    def type_enum(self):
       return "nir_search_value_" + self.type_str
@@ -124,6 +174,21 @@
    def c_ptr(self):
       return "&{0}.value".format(self.name)
 
+   @property
+   def c_bit_size(self):
+      bit_size = self.get_bit_size()
+      if isinstance(bit_size, int):
+         return bit_size
+      elif isinstance(bit_size, Variable):
+         return -bit_size.index - 1
+      else:
+         # If the bit-size class is neither a variable, nor an actual bit-size, then
+         # - If it's in the search expression, we don't need to check anything
+         # - If it's in the replace expression, either it's ambiguous (in which
+         # case we'd reject it), or it equals the bit-size of the search value
+         # We represent these cases with a 0 bit-size.
+         return 0
+
    def render(self):
       return self.__template.render(val=self,
                                     Constant=Constant,
@@ -136,18 +201,17 @@
    def __init__(self, val, name):
       Value.__init__(self, val, name, "constant")
 
-      self.in_val = str(val)
       if isinstance(val, (str)):
          m = _constant_re.match(val)
          self.value = ast.literal_eval(m.group('value'))
-         self.bit_size = int(m.group('bits')) if m.group('bits') else 0
+         self._bit_size = int(m.group('bits')) if m.group('bits') else None
       else:
          self.value = val
-         self.bit_size = 0
+         self._bit_size = None
 
       if isinstance(self.value, bool):
-         assert self.bit_size == 0 or self.bit_size == 32
-         self.bit_size = 32
+         assert self._bit_size is None or self._bit_size == 1
+         self._bit_size = 1
 
    def hex(self):
       if isinstance(self.value, (bool)):
@@ -188,23 +252,30 @@
       assert m and m.group('name') is not None
 
       self.var_name = m.group('name')
+
+      # Prevent common cases where someone puts quotes around a literal
+      # constant.  If we want to support names that have numeric or
+      # punctuation characters, we can me the first assertion more flexible.
+      assert self.var_name.isalpha()
+      assert self.var_name is not 'True'
+      assert self.var_name is not 'False'
+
       self.is_constant = m.group('const') is not None
       self.cond = m.group('cond')
       self.required_type = m.group('type')
-      self.bit_size = int(m.group('bits')) if m.group('bits') else 0
+      self._bit_size = int(m.group('bits')) if m.group('bits') else None
 
       if self.required_type == 'bool':
-         assert self.bit_size == 0 or self.bit_size == 32
-         self.bit_size = 32
+         if self._bit_size is not None:
+            assert self._bit_size in type_sizes(self.required_type)
+         else:
+            self._bit_size = 1
 
       if self.required_type is not None:
          assert self.required_type in ('float', 'bool', 'int', 'uint')
 
       self.index = varset[self.var_name]
 
-   def __str__(self):
-      return self.in_val
-
    def type(self):
       if self.required_type == 'bool':
          return "nir_type_bool"
@@ -225,49 +296,27 @@
       assert m and m.group('opcode') is not None
 
       self.opcode = m.group('opcode')
-      self.bit_size = int(m.group('bits')) if m.group('bits') else 0
+      self._bit_size = int(m.group('bits')) if m.group('bits') else None
       self.inexact = m.group('inexact') is not None
       self.cond = m.group('cond')
       self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
                        for (i, src) in enumerate(expr[1:]) ]
 
-   def render(self):
-      srcs = "\n".join(src.render() for src in self.sources)
-      return srcs + super(Expression, self).render()
-
-class IntEquivalenceRelation(object):
-   """A class representing an equivalence relation on integers.
+      if self.opcode in conv_opcode_types:
+         assert self._bit_size is None, \
+                'Expression cannot use an unsized conversion opcode with ' \
+                'an explicit size; that\'s silly.'
 
-   Each integer has a canonical form which is the maximum integer to which it
-   is equivalent.  Two integers are equivalent precisely when they have the
-   same canonical form.
-
-   The convention of maximum is explicitly chosen to make using it in
-   BitSizeValidator easier because it means that an actual bit_size (if any)
-   will always be the canonical form.
-   """
-   def __init__(self):
-      self._remap = {}
 
-   def get_canonical(self, x):
-      """Get the canonical integer corresponding to x."""
-      if x in self._remap:
-         return self.get_canonical(self._remap[x])
+   def c_opcode(self):
+      if self.opcode in conv_opcode_types:
+         return 'nir_search_op_' + self.opcode
       else:
-         return x
+         return 'nir_op_' + self.opcode
 
-   def add_equiv(self, a, b):
-      """Add an equivalence and return the canonical form."""
-      c = max(self.get_canonical(a), self.get_canonical(b))
-      if a != c:
-         assert a < c
-         self._remap[a] = c
-
-      if b != c:
-         assert b < c
-         self._remap[b] = c
-
-      return c
+   def render(self):
+      srcs = "\n".join(src.render() for src in self.sources)
+      return srcs + super(Expression, self).render()
 
 class BitSizeValidator(object):
    """A class for validating bit sizes of expressions.
@@ -296,7 +345,7 @@
    inference can be ambiguous or contradictory.  Consider, for instance, the
    following transformation:
 
-   (('usub_borrow', a, b), ('b2i', ('ult', a, b)))
+   (('usub_borrow', a, b), ('b2i@32', ('ult', a, b)))
 
    This transformation can potentially cause a problem because usub_borrow is
    well-defined for any bit-size of integer.  However, b2i always generates a
@@ -315,217 +364,261 @@
    generate any code.  This ensures that bugs are caught at compile time
    rather than at run time.
 
-   The basic operation of the validator is very similar to the bitsize_tree in
-   nir_search only a little more subtle.  Instead of simply tracking bit
-   sizes, it tracks "bit classes" where each class is represented by an
-   integer.  A value of 0 means we don't know anything yet, positive values
-   are actual bit-sizes, and negative values are used to track equivalence
-   classes of sizes that must be the same but have yet to receive an actual
-   size.  The first stage uses the bitsize_tree algorithm to assign bit
-   classes to each variable.  If it ever comes across an inconsistency, it
-   assert-fails.  Then the second stage uses that information to prove that
-   the resulting expression can always validly be constructed.
-   """
+   Each value maintains a "bit-size class", which is either an actual bit size
+   or an equivalence class with other values that must have the same bit size.
+   The validator works by combining bit-size classes with each other according
+   to the NIR rules outlined above, checking that there are no inconsistencies.
+   When doing this for the replacement expression, we make sure to never change
+   the equivalence class of any of the search values. We could make the example
+   transforms above work by doing some extra run-time checking of the search
+   expression, but we make the user specify those constraints themselves, to
+   avoid any surprises. Since the replacement bitsizes can only be connected to
+   the source bitsize via variables (variables must have the same bitsize in
+   the source and replacment expressions) or the roots of the expression (the
+   replacement expression must produce the same bit size as the search
+   expression), we prevent merging a variable with anything when processing the
+   replacement expression, or specializing the search bitsize
+   with anything. The former prevents
 
-   def __init__(self, varset):
-      self._num_classes = 0
-      self._var_classes = [0] * len(varset.names)
-      self._class_relation = IntEquivalenceRelation()
+   (('bcsel', a, b, 0), ('iand', a, b))
 
-   def validate(self, search, replace):
-      search_dst_class = self._propagate_bit_size_up(search)
-      if search_dst_class == 0:
-         search_dst_class = self._new_class()
-      self._propagate_bit_class_down(search, search_dst_class)
-
-      replace_dst_class = self._validate_bit_class_up(replace)
-      if replace_dst_class != 0:
-         assert search_dst_class != 0, \
-                'Search expression matches any bit size but replace ' \
-                'expression can only generate {0}-bit values' \
-                .format(replace_dst_class)
-
-         assert search_dst_class == replace_dst_class, \
-                'Search expression matches any {0}-bit values but replace ' \
-                'expression can only generates {1}-bit values' \
-                .format(search_dst_class, replace_dst_class)
-
-      self._validate_bit_class_down(replace, search_dst_class)
-
-   def _new_class(self):
-      self._num_classes += 1
-      return -self._num_classes
-
-   def _set_var_bit_class(self, var, bit_class):
-      assert bit_class != 0
-      var_class = self._var_classes[var.index]
-      if var_class == 0:
-         self._var_classes[var.index] = bit_class
-      else:
-         canon_var_class = self._class_relation.get_canonical(var_class)
-         canon_bit_class = self._class_relation.get_canonical(bit_class)
-         assert canon_var_class < 0 or canon_bit_class < 0 or \
-                canon_var_class == canon_bit_class, \
-                'Variable {0} cannot be both {1}-bit and {2}-bit' \
-                .format(str(var), bit_class, var_class)
-         var_class = self._class_relation.add_equiv(var_class, bit_class)
-         self._var_classes[var.index] = var_class
-
-   def _get_var_bit_class(self, var):
-      return self._class_relation.get_canonical(self._var_classes[var.index])
-
-   def _propagate_bit_size_up(self, val):
-      if isinstance(val, (Constant, Variable)):
-         return val.bit_size
+   from being allowed, since we'd have to merge the bitsizes for a and b due to
+   the 'iand', while the latter prevents
 
-      elif isinstance(val, Expression):
-         nir_op = opcodes[val.opcode]
-         val.common_size = 0
-         for i in range(nir_op.num_inputs):
-            src_bits = self._propagate_bit_size_up(val.sources[i])
-            if src_bits == 0:
-               continue
+   (('usub_borrow', a, b), ('b2i@32', ('ult', a, b)))
 
-            src_type_bits = type_bits(nir_op.input_types[i])
-            if src_type_bits != 0:
-               assert src_bits == src_type_bits, \
-                      'Source {0} of nir_op_{1} must be a {2}-bit value but ' \
-                      'the only possible matched values are {3}-bit: {4}' \
-                      .format(i, val.opcode, src_type_bits, src_bits, str(val))
-            else:
-               assert val.common_size == 0 or src_bits == val.common_size, \
-                      'Expression cannot have both {0}-bit and {1}-bit ' \
-                      'variable-width sources: {2}' \
-                      .format(src_bits, val.common_size, str(val))
-               val.common_size = src_bits
-
-         dst_type_bits = type_bits(nir_op.output_type)
-         if dst_type_bits != 0:
-            assert val.bit_size == 0 or val.bit_size == dst_type_bits, \
-                   'nir_op_{0} produces a {1}-bit result but a {2}-bit ' \
-                   'result was requested' \
-                   .format(val.opcode, dst_type_bits, val.bit_size)
-            return dst_type_bits
-         else:
-            if val.common_size != 0:
-               assert val.bit_size == 0 or val.bit_size == val.common_size, \
-                      'Variable width expression musr be {0}-bit based on ' \
-                      'the sources but a {1}-bit result was requested: {2}' \
-                      .format(val.common_size, val.bit_size, str(val))
-            else:
-               val.common_size = val.bit_size
-            return val.common_size
+   from being allowed, since the search expression has the bit size of a and b,
+   which can't be specialized to 32 which is the bitsize of the replace
+   expression. It also prevents something like:
+
+   (('b2i', ('i2b', a)), ('ineq', a, 0))
+
+   since the bitsize of 'b2i', which can be anything, can't be specialized to
+   the bitsize of a.
+
+   After doing all this, we check that every subexpression of the replacement
+   was assigned a constant bitsize, the bitsize of a variable, or the bitsize
+   of the search expresssion, since those are the things that are known when
+   constructing the replacement expresssion. Finally, we record the bitsize
+   needed in nir_search_value so that we know what to do when building the
+   replacement expression.
+   """
 
-   def _propagate_bit_class_down(self, val, bit_class):
-      if isinstance(val, Constant):
-         assert val.bit_size == 0 or val.bit_size == bit_class, \
-                'Constant is {0}-bit but a {1}-bit value is required: {2}' \
-                .format(val.bit_size, bit_class, str(val))
-
-      elif isinstance(val, Variable):
-         assert val.bit_size == 0 or val.bit_size == bit_class, \
-                'Variable is {0}-bit but a {1}-bit value is required: {2}' \
-                .format(val.bit_size, bit_class, str(val))
-         self._set_var_bit_class(val, bit_class)
+   def __init__(self, varset):
+      self._var_classes = [None] * len(varset.names)
 
-      elif isinstance(val, Expression):
-         nir_op = opcodes[val.opcode]
-         dst_type_bits = type_bits(nir_op.output_type)
-         if dst_type_bits != 0:
-            assert bit_class == 0 or bit_class == dst_type_bits, \
-                   'nir_op_{0} produces a {1}-bit result but the parent ' \
-                   'expression wants a {2}-bit value' \
-                   .format(val.opcode, dst_type_bits, bit_class)
+   def compare_bitsizes(self, a, b):
+      """Determines which bitsize class is a specialization of the other, or
+      whether neither is. When we merge two different bitsizes, the
+      less-specialized bitsize always points to the more-specialized one, so
+      that calling get_bit_size() always gets you the most specialized bitsize.
+      The specialization partial order is given by:
+      - Physical bitsizes are always the most specialized, and a different
+        bitsize can never specialize another.
+      - In the search expression, variables can always be specialized to each
+        other and to physical bitsizes. In the replace expression, we disallow
+        this to avoid adding extra constraints to the search expression that
+        the user didn't specify.
+      - Expressions and constants without a bitsize can always be specialized to
+        each other and variables, but not the other way around.
+
+        We return -1 if a <= b (b can be specialized to a), 0 if a = b, 1 if a >= b,
+        and None if they are not comparable (neither a <= b nor b <= a).
+      """
+      if isinstance(a, int):
+         if isinstance(b, int):
+            return 0 if a == b else None
+         elif isinstance(b, Variable):
+            return -1 if self.is_search else None
          else:
-            assert val.common_size == 0 or val.common_size == bit_class, \
-                   'Variable-width expression produces a {0}-bit result ' \
-                   'based on the source widths but the parent expression ' \
-                   'wants a {1}-bit value: {2}' \
-                   .format(val.common_size, bit_class, str(val))
-            val.common_size = bit_class
-
-         if val.common_size:
-            common_class = val.common_size
-         elif nir_op.num_inputs:
-            # If we got here then we have no idea what the actual size is.
-            # Instead, we use a generic class
-            common_class = self._new_class()
-
-         for i in range(nir_op.num_inputs):
-            src_type_bits = type_bits(nir_op.input_types[i])
-            if src_type_bits != 0:
-               self._propagate_bit_class_down(val.sources[i], src_type_bits)
-            else:
-               self._propagate_bit_class_down(val.sources[i], common_class)
-
-   def _validate_bit_class_up(self, val):
-      if isinstance(val, Constant):
-         return val.bit_size
-
-      elif isinstance(val, Variable):
-         var_class = self._get_var_bit_class(val)
-         # By the time we get to validation, every variable should have a class
-         assert var_class != 0
-
-         # If we have an explicit size provided by the user, the variable
-         # *must* exactly match the search.  It cannot be implicitly sized
-         # because otherwise we could end up with a conflict at runtime.
-         assert val.bit_size == 0 or val.bit_size == var_class
-
-         return var_class
+            return -1
+      elif isinstance(a, Variable):
+         if isinstance(b, int):
+            return 1 if self.is_search else None
+         elif isinstance(b, Variable):
+            return 0 if self.is_search or a.index == b.index else None
+         else:
+            return -1
+      else:
+         if isinstance(b, int):
+            return 1
+         elif isinstance(b, Variable):
+            return 1
+         else:
+            return 0
 
+   def unify_bit_size(self, a, b, error_msg):
+      """Record that a must have the same bit-size as b. If both
+      have been assigned conflicting physical bit-sizes, call "error_msg" with
+      the bit-sizes of self and other to get a message and raise an error.
+      In the replace expression, disallow merging variables with other
+      variables and physical bit-sizes as well.
+      """
+      a_bit_size = a.get_bit_size()
+      b_bit_size = b if isinstance(b, int) else b.get_bit_size()
+
+      cmp_result = self.compare_bitsizes(a_bit_size, b_bit_size)
+
+      assert cmp_result is not None, \
+         error_msg(a_bit_size, b_bit_size)
+
+      if cmp_result < 0:
+         b_bit_size.set_bit_size(a)
+      elif not isinstance(a_bit_size, int):
+         a_bit_size.set_bit_size(b)
+
+   def merge_variables(self, val):
+      """Perform the first part of type inference by merging all the different
+      uses of the same variable. We always do this as if we're in the search
+      expression, even if we're actually not, since otherwise we'd get errors
+      if the search expression specified some constraint but the replace
+      expression didn't, because we'd be merging a variable and a constant.
+      """
+      if isinstance(val, Variable):
+         if self._var_classes[val.index] is None:
+            self._var_classes[val.index] = val
+         else:
+            other = self._var_classes[val.index]
+            self.unify_bit_size(other, val,
+                  lambda other_bit_size, bit_size:
+                     'Variable {} has conflicting bit size requirements: ' \
+                     'it must have bit size {} and {}'.format(
+                        val.var_name, other_bit_size, bit_size))
       elif isinstance(val, Expression):
-         nir_op = opcodes[val.opcode]
-         val.common_class = 0
-         for i in range(nir_op.num_inputs):
-            src_class = self._validate_bit_class_up(val.sources[i])
-            if src_class == 0:
+         for src in val.sources:
+            self.merge_variables(src)
+
+   def validate_value(self, val):
+      """Validate the an expression by performing classic Hindley-Milner
+      type inference on bitsizes. This will detect if there are any conflicting
+      requirements, and unify variables so that we know which variables must
+      have the same bitsize. If we're operating on the replace expression, we
+      will refuse to merge different variables together or merge a variable
+      with a constant, in order to prevent surprises due to rules unexpectedly
+      not matching at runtime.
+      """
+      if not isinstance(val, Expression):
+         return
+
+      # Generic conversion ops are special in that they have a single unsized
+      # source and an unsized destination and the two don't have to match.
+      # This means there's no validation or unioning to do here besides the
+      # len(val.sources) check.
+      if val.opcode in conv_opcode_types:
+         assert len(val.sources) == 1, \
+            "Expression {} has {} sources, expected 1".format(
+               val, len(val.sources))
+         self.validate_value(val.sources[0])
+         return
+
+      nir_op = opcodes[val.opcode]
+      assert len(val.sources) == nir_op.num_inputs, \
+         "Expression {} has {} sources, expected {}".format(
+            val, len(val.sources), nir_op.num_inputs)
+
+      for src in val.sources:
+         self.validate_value(src)
+
+      dst_type_bits = type_bits(nir_op.output_type)
+
+      # First, unify all the sources. That way, an error coming up because two
+      # sources have an incompatible bit-size won't produce an error message
+      # involving the destination.
+      first_unsized_src = None
+      for src_type, src in zip(nir_op.input_types, val.sources):
+         src_type_bits = type_bits(src_type)
+         if src_type_bits == 0:
+            if first_unsized_src is None:
+               first_unsized_src = src
                continue
 
-            src_type_bits = type_bits(nir_op.input_types[i])
-            if src_type_bits != 0:
-               assert src_class == src_type_bits
+            if self.is_search:
+               self.unify_bit_size(first_unsized_src, src,
+                  lambda first_unsized_src_bit_size, src_bit_size:
+                     'Source {} of {} must have bit size {}, while source {} ' \
+                     'must have incompatible bit size {}'.format(
+                        first_unsized_src, val, first_unsized_src_bit_size,
+                        src, src_bit_size))
             else:
-               assert val.common_class == 0 or src_class == val.common_class
-               val.common_class = src_class
-
-         dst_type_bits = type_bits(nir_op.output_type)
-         if dst_type_bits != 0:
-            assert val.bit_size == 0 or val.bit_size == dst_type_bits
-            return dst_type_bits
+               self.unify_bit_size(first_unsized_src, src,
+                  lambda first_unsized_src_bit_size, src_bit_size:
+                     'Sources {} (bit size of {}) and {} (bit size of {}) ' \
+                     'of {} may not have the same bit size when building the ' \
+                     'replacement expression.'.format(
+                        first_unsized_src, first_unsized_src_bit_size, src,
+                        src_bit_size, val))
          else:
-            if val.common_class != 0:
-               assert val.bit_size == 0 or val.bit_size == val.common_class
+            if self.is_search:
+               self.unify_bit_size(src, src_type_bits,
+                  lambda src_bit_size, unused:
+                     '{} must have {} bits, but as a source of nir_op_{} '\
+                     'it must have {} bits'.format(
+                        src, src_bit_size, nir_op.name, src_type_bits))
             else:
-               val.common_class = val.bit_size
-            return val.common_class
-
-   def _validate_bit_class_down(self, val, bit_class):
-      # At this point, everything *must* have a bit class.  Otherwise, we have
-      # a value we don't know how to define.
-      assert bit_class != 0
+               self.unify_bit_size(src, src_type_bits,
+                  lambda src_bit_size, unused:
+                     '{} has the bit size of {}, but as a source of ' \
+                     'nir_op_{} it must have {} bits, which may not be the ' \
+                     'same'.format(
+                        src, src_bit_size, nir_op.name, src_type_bits))
+
+      if dst_type_bits == 0:
+         if first_unsized_src is not None:
+            if self.is_search:
+               self.unify_bit_size(val, first_unsized_src,
+                  lambda val_bit_size, src_bit_size:
+                     '{} must have the bit size of {}, while its source {} ' \
+                     'must have incompatible bit size {}'.format(
+                        val, val_bit_size, first_unsized_src, src_bit_size))
+            else:
+               self.unify_bit_size(val, first_unsized_src,
+                  lambda val_bit_size, src_bit_size:
+                     '{} must have {} bits, but its source {} ' \
+                     '(bit size of {}) may not have that bit size ' \
+                     'when building the replacement.'.format(
+                        val, val_bit_size, first_unsized_src, src_bit_size))
+      else:
+         self.unify_bit_size(val, dst_type_bits,
+            lambda dst_bit_size, unused:
+               '{} must have {} bits, but as a destination of nir_op_{} ' \
+               'it must have {} bits'.format(
+                  val, dst_bit_size, nir_op.name, dst_type_bits))
+
+   def validate_replace(self, val, search):
+      bit_size = val.get_bit_size()
+      assert isinstance(bit_size, int) or isinstance(bit_size, Variable) or \
+            bit_size == search.get_bit_size(), \
+            'Ambiguous bit size for replacement value {}: ' \
+            'it cannot be deduced from a variable, a fixed bit size ' \
+            'somewhere, or the search expression.'.format(val)
+
+      if isinstance(val, Expression):
+         for src in val.sources:
+            self.validate_replace(src, search)
 
-      if isinstance(val, Constant):
-         assert val.bit_size == 0 or val.bit_size == bit_class
+   def validate(self, search, replace):
+      self.is_search = True
+      self.merge_variables(search)
+      self.merge_variables(replace)
+      self.validate_value(search)
+
+      self.is_search = False
+      self.validate_value(replace)
+
+      # Check that search is always more specialized than replace. Note that
+      # we're doing this in replace mode, disallowing merging variables.
+      search_bit_size = search.get_bit_size()
+      replace_bit_size = replace.get_bit_size()
+      cmp_result = self.compare_bitsizes(search_bit_size, replace_bit_size)
+
+      assert cmp_result is not None and cmp_result <= 0, \
+         'The search expression bit size {} and replace expression ' \
+         'bit size {} may not be the same'.format(
+               search_bit_size, replace_bit_size)
 
-      elif isinstance(val, Variable):
-         assert val.bit_size == 0 or val.bit_size == bit_class
+      replace.set_bit_size(search)
 
-      elif isinstance(val, Expression):
-         nir_op = opcodes[val.opcode]
-         dst_type_bits = type_bits(nir_op.output_type)
-         if dst_type_bits != 0:
-            assert bit_class == dst_type_bits
-         else:
-            assert val.common_class == 0 or val.common_class == bit_class
-            val.common_class = bit_class
-
-         for i in range(nir_op.num_inputs):
-            src_type_bits = type_bits(nir_op.input_types[i])
-            if src_type_bits != 0:
-               self._validate_bit_class_down(val.sources[i], src_type_bits)
-            else:
-               self._validate_bit_class_down(val.sources[i], val.common_class)
+      self.validate_replace(replace, search)
 
 _optimization_ids = itertools.count()
 
@@ -578,12 +671,12 @@
 
 #endif
 
-% for (opcode, xform_list) in xform_dict.items():
-% for xform in xform_list:
+% for xform in xforms:
    ${xform.search.render()}
    ${xform.replace.render()}
 % endfor
 
+% for (opcode, xform_list) in sorted(opcode_xforms.items()):
 static const struct transform ${pass_name}_${opcode}_xforms[] = {
 % for xform in xform_list:
    { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
@@ -606,7 +699,7 @@
          continue;
 
       switch (alu->op) {
-      % for opcode in xform_dict.keys():
+      % for opcode in sorted(opcode_xforms.keys()):
       case nir_op_${opcode}:
          for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
             const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
@@ -638,9 +731,14 @@
       progress |= ${pass_name}_block(&build, block, condition_flags);
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+    } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+    }
 
    return progress;
 }
@@ -669,7 +767,8 @@
 
 class AlgebraicPass(object):
    def __init__(self, pass_name, transforms):
-      self.xform_dict = OrderedDict()
+      self.xforms = []
+      self.opcode_xforms = defaultdict(lambda : [])
       self.pass_name = pass_name
 
       error = False
@@ -686,15 +785,21 @@
                error = True
                continue
 
-         if xform.search.opcode not in self.xform_dict:
-            self.xform_dict[xform.search.opcode] = []
-
-         self.xform_dict[xform.search.opcode].append(xform)
+         self.xforms.append(xform)
+         if xform.search.opcode in conv_opcode_types:
+            dst_type = conv_opcode_types[xform.search.opcode]
+            for size in type_sizes(dst_type):
+               sized_opcode = xform.search.opcode + str(size)
+               self.opcode_xforms[sized_opcode].append(xform)
+         else:
+            self.opcode_xforms[xform.search.opcode].append(xform)
 
       if error:
          sys.exit(1)
 
+
    def render(self):
       return _algebraic_pass_template.render(pass_name=self.pass_name,
-                                             xform_dict=self.xform_dict,
+                                             xforms=self.xforms,
+                                             opcode_xforms=self.opcode_xforms,
                                              condition_list=condition_list)
diff -Nru mesa-18.3.3/src/compiler/nir/nir_builder.h mesa-19.0.1/src/compiler/nir/nir_builder.h
--- mesa-18.3.3/src/compiler/nir/nir_builder.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_builder.h	2019-03-31 23:16:37.000000000 +0000
@@ -55,6 +55,7 @@
 {
    build->shader = nir_shader_create(mem_ctx, stage, options, NULL);
    nir_function *func = nir_function_create(build->shader, "main");
+   func->is_entrypoint = true;
    build->exact = false;
    build->impl = nir_function_impl_create(func);
    build->cursor = nir_after_cf_list(&build->impl->body);
@@ -212,9 +213,9 @@
    nir_const_value v;
 
    memset(&v, 0, sizeof(v));
-   v.u32[0] = x ? NIR_TRUE : NIR_FALSE;
+   v.b[0] = x;
 
-   return nir_build_imm(build, 1, 32, v);
+   return nir_build_imm(build, 1, 1, v);
 }
 
 static inline nir_ssa_def *
@@ -332,7 +333,10 @@
 
    memset(&v, 0, sizeof(v));
    assert(bit_size <= 64);
-   v.i64[0] = x & (~0ull >> (64 - bit_size));
+   if (bit_size == 1)
+      v.b[0] = x & 1;
+   else
+      v.i64[0] = x & (~0ull >> (64 - bit_size));
 
    return nir_build_imm(build, 1, bit_size, v);
 }
@@ -352,6 +356,13 @@
 }
 
 static inline nir_ssa_def *
+nir_imm_boolN_t(nir_builder *build, bool x, unsigned bit_size)
+{
+   /* We use a 0/-1 convention for all booleans regardless of size */
+   return nir_imm_intN_t(build, -(int)x, bit_size);
+}
+
+static inline nir_ssa_def *
 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
               nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
 {
@@ -549,6 +560,204 @@
    return nir_swizzle(b, def, swizzle, num_channels, false);
 }
 
+static inline nir_ssa_def *
+_nir_vector_extract_helper(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c,
+                           unsigned start, unsigned end)
+{
+   if (start == end - 1) {
+      return nir_channel(b, vec, start);
+   } else {
+      unsigned mid = start + (end - start) / 2;
+      return nir_bcsel(b, nir_ilt(b, c, nir_imm_int(b, mid)),
+                       _nir_vector_extract_helper(b, vec, c, start, mid),
+                       _nir_vector_extract_helper(b, vec, c, mid, end));
+   }
+}
+
+static inline nir_ssa_def *
+nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c)
+{
+   nir_src c_src = nir_src_for_ssa(c);
+   if (nir_src_is_const(c_src)) {
+      unsigned c_const = nir_src_as_uint(c_src);
+      if (c_const < vec->num_components)
+         return nir_channel(b, vec, c_const);
+      else
+         return nir_ssa_undef(b, 1, vec->bit_size);
+   } else {
+      return _nir_vector_extract_helper(b, vec, c, 0, vec->num_components);
+   }
+}
+
+static inline nir_ssa_def *
+nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
+{
+   if (x->bit_size == dest_bit_size)
+      return x;
+
+   switch (dest_bit_size) {
+   case 64: return nir_i2i64(build, x);
+   case 32: return nir_i2i32(build, x);
+   case 16: return nir_i2i16(build, x);
+   case 8:  return nir_i2i8(build, x);
+   default: unreachable("Invalid bit size");
+   }
+}
+
+static inline nir_ssa_def *
+nir_u2u(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
+{
+   if (x->bit_size == dest_bit_size)
+      return x;
+
+   switch (dest_bit_size) {
+   case 64: return nir_u2u64(build, x);
+   case 32: return nir_u2u32(build, x);
+   case 16: return nir_u2u16(build, x);
+   case 8:  return nir_u2u8(build, x);
+   default: unreachable("Invalid bit size");
+   }
+}
+
+static inline nir_ssa_def *
+nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
+{
+   return nir_iadd(build, x, nir_imm_intN_t(build, y, x->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
+{
+   return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)
+{
+   return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_fmul_imm(nir_builder *build, nir_ssa_def *x, double y)
+{
+   return nir_fmul(build, x, nir_imm_floatN_t(build, y, x->bit_size));
+}
+
+static inline nir_ssa_def *
+nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert(src->num_components * src->bit_size == dest_bit_size);
+
+   switch (dest_bit_size) {
+   case 64:
+      switch (src->bit_size) {
+      case 32: return nir_pack_64_2x32(b, src);
+      case 16: return nir_pack_64_4x16(b, src);
+      default: break;
+      }
+      break;
+
+   case 32:
+      if (src->bit_size == 16)
+         return nir_pack_32_2x16(b, src);
+      break;
+
+   default:
+      break;
+   }
+
+   /* If we got here, we have no dedicated unpack opcode. */
+   nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);
+   for (unsigned i = 0; i < src->num_components; i++) {
+      nir_ssa_def *val = nir_u2u(b, nir_channel(b, src, i), dest_bit_size);
+      val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));
+      dest = nir_ior(b, dest, val);
+   }
+   return dest;
+}
+
+static inline nir_ssa_def *
+nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert(src->num_components == 1);
+   assert(src->bit_size > dest_bit_size);
+   const unsigned dest_num_components = src->bit_size / dest_bit_size;
+   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
+
+   switch (src->bit_size) {
+   case 64:
+      switch (dest_bit_size) {
+      case 32: return nir_unpack_64_2x32(b, src);
+      case 16: return nir_unpack_64_4x16(b, src);
+      default: break;
+      }
+      break;
+
+   case 32:
+      if (dest_bit_size == 16)
+         return nir_unpack_32_2x16(b, src);
+      break;
+
+   default:
+      break;
+   }
+
+   /* If we got here, we have no dedicated unpack opcode. */
+   nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < dest_num_components; i++) {
+      nir_ssa_def *val = nir_ushr(b, src, nir_imm_int(b, i * dest_bit_size));
+      dest_comps[i] = nir_u2u(b, val, dest_bit_size);
+   }
+   return nir_vec(b, dest_comps, dest_num_components);
+}
+
+static inline nir_ssa_def *
+nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
+{
+   assert((src->bit_size * src->num_components) % dest_bit_size == 0);
+   const unsigned dest_num_components =
+      (src->bit_size * src->num_components) / dest_bit_size;
+   assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
+
+   if (src->bit_size > dest_bit_size) {
+      assert(src->bit_size % dest_bit_size == 0);
+      if (src->num_components == 1) {
+         return nir_unpack_bits(b, src, dest_bit_size);
+      } else {
+         const unsigned divisor = src->bit_size / dest_bit_size;
+         assert(src->num_components * divisor == dest_num_components);
+         nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
+         for (unsigned i = 0; i < src->num_components; i++) {
+            nir_ssa_def *unpacked =
+               nir_unpack_bits(b, nir_channel(b, src, i), dest_bit_size);
+            assert(unpacked->num_components == divisor);
+            for (unsigned j = 0; j < divisor; j++)
+               dest[i * divisor + j] = nir_channel(b, unpacked, j);
+         }
+         return nir_vec(b, dest, dest_num_components);
+      }
+   } else if (src->bit_size < dest_bit_size) {
+      assert(dest_bit_size % src->bit_size == 0);
+      if (dest_num_components == 1) {
+         return nir_pack_bits(b, src, dest_bit_size);
+      } else {
+         const unsigned divisor = dest_bit_size / src->bit_size;
+         assert(src->num_components == dest_num_components * divisor);
+         nir_ssa_def *dest[NIR_MAX_VEC_COMPONENTS];
+         for (unsigned i = 0; i < dest_num_components; i++) {
+            nir_component_mask_t src_mask =
+               ((1 << divisor) - 1) << (i * divisor);
+            dest[i] = nir_pack_bits(b, nir_channels(b, src, src_mask),
+                                       dest_bit_size);
+         }
+         return nir_vec(b, dest, dest_num_components);
+      }
+   } else {
+      assert(src->bit_size == dest_bit_size);
+      return src;
+   }
+}
+
 /**
  * Turns a nir_src into a nir_ssa_def * so it can be passed to
  * nir_build_alu()-based builder calls.
@@ -633,6 +842,31 @@
 }
 
 static inline nir_deref_instr *
+nir_build_deref_ptr_as_array(nir_builder *build, nir_deref_instr *parent,
+                             nir_ssa_def *index)
+{
+   assert(parent->deref_type == nir_deref_type_array ||
+          parent->deref_type == nir_deref_type_ptr_as_array ||
+          parent->deref_type == nir_deref_type_cast);
+
+   nir_deref_instr *deref =
+      nir_deref_instr_create(build->shader, nir_deref_type_ptr_as_array);
+
+   deref->mode = parent->mode;
+   deref->type = parent->type;
+   deref->parent = nir_src_for_ssa(&parent->dest.ssa);
+   deref->arr.index = nir_src_for_ssa(index);
+
+   nir_ssa_dest_init(&deref->instr, &deref->dest,
+                     parent->dest.ssa.num_components,
+                     parent->dest.ssa.bit_size, NULL);
+
+   nir_builder_instr_insert(build, &deref->instr);
+
+   return deref;
+}
+
+static inline nir_deref_instr *
 nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent)
 {
    assert(glsl_type_is_array(parent->type) ||
@@ -679,7 +913,8 @@
 
 static inline nir_deref_instr *
 nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent,
-                     nir_variable_mode mode, const struct glsl_type *type)
+                     nir_variable_mode mode, const struct glsl_type *type,
+                     unsigned ptr_stride)
 {
    nir_deref_instr *deref =
       nir_deref_instr_create(build->shader, nir_deref_type_cast);
@@ -687,6 +922,7 @@
    deref->mode = mode;
    deref->type = type;
    deref->parent = nir_src_for_ssa(parent);
+   deref->cast.ptr_stride = ptr_stride;
 
    nir_ssa_dest_init(&deref->instr, &deref->dest,
                      parent->num_components, parent->bit_size, NULL);
@@ -826,6 +1062,30 @@
 #include "nir_builder_opcodes.h"
 
 static inline nir_ssa_def *
+nir_f2b(nir_builder *build, nir_ssa_def *f)
+{
+   return nir_f2b1(build, f);
+}
+
+static inline nir_ssa_def *
+nir_i2b(nir_builder *build, nir_ssa_def *i)
+{
+   return nir_i2b1(build, i);
+}
+
+static inline nir_ssa_def *
+nir_b2f(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)
+{
+   switch (bit_size) {
+   case 64: return nir_b2f64(build, b);
+   case 32: return nir_b2f32(build, b);
+   case 16: return nir_b2f16(build, b);
+   default:
+      unreachable("Invalid bit-size");
+   };
+}
+
+static inline nir_ssa_def *
 nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
                      unsigned interp_mode)
 {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_builder_opcodes_h.py mesa-19.0.1/src/compiler/nir/nir_builder_opcodes_h.py
--- mesa-18.3.3/src/compiler/nir/nir_builder_opcodes_h.py	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_builder_opcodes_h.py	2019-03-31 23:16:37.000000000 +0000
@@ -44,22 +44,50 @@
 
 /* Generic builder for system values. */
 static inline nir_ssa_def *
-nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
+                      unsigned bit_size)
 {
    nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
    load->num_components = nir_intrinsic_infos[op].dest_components;
    load->const_index[0] = index;
    nir_ssa_dest_init(&load->instr, &load->dest,
-                     nir_intrinsic_infos[op].dest_components, 32, NULL);
+                     nir_intrinsic_infos[op].dest_components, bit_size, NULL);
    nir_builder_instr_insert(build, &load->instr);
    return &load->dest.ssa;
 }
 
+<%
+def sysval_decl_list(opcode):
+   res = ''
+   if opcode.indices:
+      res += ', unsigned ' + opcode.indices[0].lower()
+   if len(opcode.bit_sizes) != 1:
+      res += ', unsigned bit_size'
+   return res
+
+def sysval_arg_list(opcode):
+   args = []
+   if opcode.indices:
+      args.append(opcode.indices[0].lower())
+   else:
+      args.append('0')
+
+   if len(opcode.bit_sizes) == 1:
+      bit_size = opcode.bit_sizes[0]
+      args.append(str(bit_size))
+   else:
+      args.append('bit_size')
+
+   return ', '.join(args)
+%>
+
 % for name, opcode in filter(lambda v: v[1].sysval, sorted(INTR_OPCODES.items())):
+<% assert len(opcode.bit_sizes) > 0 %>
 static inline nir_ssa_def *
-nir_${name}(nir_builder *build)
+nir_${name}(nir_builder *build${sysval_decl_list(opcode)})
 {
-   return nir_load_system_value(build, nir_intrinsic_${name}, 0);
+   return nir_load_system_value(build, nir_intrinsic_${name},
+                                ${sysval_arg_list(opcode)});
 }
 % endfor
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_builtin_builder.h mesa-19.0.1/src/compiler/nir/nir_builtin_builder.h
--- mesa-18.3.3/src/compiler/nir/nir_builtin_builder.h	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_builtin_builder.h	2019-03-31 23:16:37.000000000 +0000
@@ -60,7 +60,7 @@
 static inline nir_ssa_def *
 nir_degrees(nir_builder *b, nir_ssa_def *val)
 {
-   return nir_fmul(b, val, nir_imm_float(b, 57.2957795131));
+   return nir_fmul_imm(b, val, 57.2957795131);
 }
 
 static inline nir_ssa_def *
@@ -78,7 +78,7 @@
 static inline nir_ssa_def *
 nir_radians(nir_builder *b, nir_ssa_def *val)
 {
-   return nir_fmul(b, val, nir_imm_float(b, 0.01745329251));
+   return nir_fmul_imm(b, val, 0.01745329251);
 }
 
 #endif /* NIR_BUILTIN_BUILDER_H */
diff -Nru mesa-18.3.3/src/compiler/nir/nir.c mesa-19.0.1/src/compiler/nir/nir.c
--- mesa-18.3.3/src/compiler/nir/nir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -125,11 +125,11 @@
       assert(!"invalid mode");
       break;
 
-   case nir_var_local:
+   case nir_var_function_temp:
       assert(!"nir_shader_add_variable cannot be used for local variables");
       break;
 
-   case nir_var_global:
+   case nir_var_shader_temp:
       exec_list_push_tail(&shader->globals, &var->node);
       break;
 
@@ -142,15 +142,20 @@
       break;
 
    case nir_var_uniform:
-   case nir_var_shader_storage:
+   case nir_var_mem_ubo:
+   case nir_var_mem_ssbo:
       exec_list_push_tail(&shader->uniforms, &var->node);
       break;
 
-   case nir_var_shared:
-      assert(shader->info.stage == MESA_SHADER_COMPUTE);
+   case nir_var_mem_shared:
+      assert(gl_shader_stage_is_compute(shader->info.stage));
       exec_list_push_tail(&shader->shared, &var->node);
       break;
 
+   case nir_var_mem_global:
+      assert(!"nir_shader_add_variable cannot be used for global memory");
+      break;
+
    case nir_var_system_value:
       exec_list_push_tail(&shader->system_values, &var->node);
       break;
@@ -188,7 +193,7 @@
    nir_variable *var = rzalloc(impl->function->shader, nir_variable);
    var->name = ralloc_strdup(var, name);
    var->type = type;
-   var->data.mode = nir_var_local;
+   var->data.mode = nir_var_function_temp;
 
    nir_function_impl_add_variable(impl, var);
 
@@ -207,6 +212,7 @@
    func->num_params = 0;
    func->params = NULL;
    func->impl = NULL;
+   func->is_entrypoint = false;
 
    return func;
 }
@@ -328,8 +334,7 @@
    cf_init(&block->cf_node, nir_cf_node_block);
 
    block->successors[0] = block->successors[1] = NULL;
-   block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
-                                          _mesa_key_pointer_equal);
+   block->predecessors = _mesa_pointer_set_create(block);
    block->imm_dom = NULL;
    /* XXX maybe it would be worth it to defer allocation?  This
     * way it doesn't get allocated for shader refs that never run
@@ -339,8 +344,7 @@
     * which is later used to do state specific lowering and futher
     * opt.  Do any of the references not need dominance metadata?
     */
-   block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
-                                          _mesa_key_pointer_equal);
+   block->dom_frontier = _mesa_pointer_set_create(block);
 
    exec_list_make_empty(&block->instr_list);
 
@@ -459,7 +463,8 @@
    if (deref_type != nir_deref_type_var)
       src_init(&instr->parent);
 
-   if (deref_type == nir_deref_type_array)
+   if (deref_type == nir_deref_type_array ||
+       deref_type == nir_deref_type_ptr_as_array)
       src_init(&instr->arr.index);
 
    dest_init(&instr->dest);
@@ -638,6 +643,7 @@
 {
    nir_const_value v;
    switch (bit_size) {
+   case 1:  v.b[0]   = i & 1;  break;
    case 8:  v.i8[0]  = i;  break;
    case 16: v.i16[0] = i;  break;
    case 32: v.i32[0] = i;  break;
@@ -1065,7 +1071,8 @@
          return false;
    }
 
-   if (instr->deref_type == nir_deref_type_array) {
+   if (instr->deref_type == nir_deref_type_array ||
+       instr->deref_type == nir_deref_type_ptr_as_array) {
       if (!visit_src(&instr->arr.index, cb, state))
          return false;
    }
@@ -1206,6 +1213,8 @@
 
    assert(comp < load->def.num_components);
    switch (load->def.bit_size) {
+   /* int1_t uses 0/-1 convention */
+   case 1:  return -(int)load->value.b[comp];
    case 8:  return load->value.i8[comp];
    case 16: return load->value.i16[comp];
    case 32: return load->value.i32[comp];
@@ -1223,6 +1232,7 @@
 
    assert(comp < load->def.num_components);
    switch (load->def.bit_size) {
+   case 1:  return load->value.b[comp];
    case 8:  return load->value.u8[comp];
    case 16: return load->value.u16[comp];
    case 32: return load->value.u32[comp];
@@ -1235,15 +1245,12 @@
 bool
 nir_src_comp_as_bool(nir_src src, unsigned comp)
 {
-   assert(nir_src_is_const(src));
-   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+   int64_t i = nir_src_comp_as_int(src, comp);
 
-   assert(comp < load->def.num_components);
-   assert(load->def.bit_size == 32);
-   assert(load->value.u32[comp] == NIR_TRUE ||
-          load->value.u32[comp] == NIR_FALSE);
+   /* Booleans of any size use 0/-1 convention */
+   assert(i == 0 || i == -1);
 
-   return load->value.u32[comp];
+   return i;
 }
 
 double
@@ -1530,13 +1537,7 @@
          nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
          int src_idx = alu_src - &alu->src[0];
          assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
-
-         for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
-            if (!nir_alu_instr_channel_used(alu, src_idx, c))
-               continue;
-
-            read_mask |= (1 << alu_src->swizzle[c]);
-         }
+         read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
       } else {
          return (1 << def->num_components) - 1;
       }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_clone.c mesa-19.0.1/src/compiler/nir/nir_clone.c
--- mesa-18.3.3/src/compiler/nir/nir_clone.c	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_clone.c	2019-03-31 23:16:37.000000000 +0000
@@ -62,8 +62,7 @@
    if (remap_table) {
       state->remap_table = remap_table;
    } else {
-      state->remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                   _mesa_key_pointer_equal);
+      state->remap_table = _mesa_pointer_hash_table_create(NULL);
    }
 
    list_inithead(&state->phi_srcs);
@@ -311,15 +310,19 @@
       break;
 
    case nir_deref_type_array:
+   case nir_deref_type_ptr_as_array:
       __clone_src(state, &nderef->instr,
                   &nderef->arr.index, &deref->arr.index);
       break;
 
    case nir_deref_type_array_wildcard:
-   case nir_deref_type_cast:
       /* Nothing to do */
       break;
 
+   case nir_deref_type_cast:
+      nderef->cast.ptr_stride = deref->cast.ptr_stride;
+      break;
+
    default:
       unreachable("Invalid instruction deref type");
    }
@@ -680,6 +683,7 @@
    nfxn->num_params = fxn->num_params;
    nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
    memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
+   nfxn->is_entrypoint = fxn->is_entrypoint;
 
    /* At first glance, it looks like we should clone the function_impl here.
     * However, call instructions need to be able to reference at least the
diff -Nru mesa-18.3.3/src/compiler/nir/nir_constant_expressions.py mesa-19.0.1/src/compiler/nir/nir_constant_expressions.py
--- mesa-18.3.3/src/compiler/nir/nir_constant_expressions.py	2018-09-27 19:13:53.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_constant_expressions.py	2019-03-31 23:16:37.000000000 +0000
@@ -1,23 +1,8 @@
 from __future__ import print_function
 
 import re
-
-type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)')
-
-def type_has_size(type_):
-    return type_[-1:].isdigit()
-
-def type_size(type_):
-    assert type_has_size(type_)
-    return int(type_split_re.match(type_).group('bits'))
-
-def type_sizes(type_):
-    if type_has_size(type_):
-        return [type_size(type_)]
-    elif type_ == 'float':
-        return [16, 32, 64]
-    else:
-        return [8, 16, 32, 64]
+from nir_opcodes import opcodes
+from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
 
 def type_add_size(type_, size):
     if type_has_size(type_):
@@ -39,15 +24,14 @@
     return sorted(list(sizes)) if sizes is not None else None
 
 def get_const_field(type_):
-    if type_ == "bool32":
-        return "u32"
+    if type_size(type_) == 1:
+        return 'b'
+    elif type_base_type(type_) == 'bool':
+        return 'i' + str(type_size(type_))
     elif type_ == "float16":
         return "u16"
     else:
-        m = type_split_re.match(type_)
-        if not m:
-            raise Exception(str(type_))
-        return m.group('type')[0] + m.group('bits')
+        return type_base_type(type_)[0] + str(type_size(type_))
 
 template = """\
 /*
@@ -79,6 +63,7 @@
 #include <math.h>
 #include "util/rounding.h" /* for _mesa_roundeven */
 #include "util/half_float.h"
+#include "util/bigmath.h"
 #include "nir_constant_expressions.h"
 
 /**
@@ -254,11 +239,17 @@
 }
 
 /* Some typed vector structures to make things like src0.y work */
+typedef int8_t int1_t;
+typedef uint8_t uint1_t;
 typedef float float16_t;
 typedef float float32_t;
 typedef double float64_t;
+typedef bool bool1_t;
+typedef bool bool8_t;
+typedef bool bool16_t;
 typedef bool bool32_t;
-% for type in ["float", "int", "uint"]:
+typedef bool bool64_t;
+% for type in ["float", "int", "uint", "bool"]:
 % for width in type_sizes(type):
 struct ${type}${width}_vec {
    ${type}${width}_t x;
@@ -269,13 +260,6 @@
 % endfor
 % endfor
 
-struct bool32_vec {
-    bool x;
-    bool y;
-    bool z;
-    bool w;
-};
-
 <%def name="evaluate_op(op, bit_size)">
    <%
    output_type = type_add_size(op.output_type, bit_size)
@@ -295,8 +279,9 @@
 
       const struct ${input_types[j]}_vec src${j} = {
       % for k in range(op.input_sizes[j]):
-         % if input_types[j] == "bool32":
-            _src[${j}].u32[${k}] != 0,
+         % if input_types[j] == "int1":
+             /* 1-bit integers use a 0/-1 convention */
+             -(int1_t)_src[${j}].b[${k}],
          % elif input_types[j] == "float16":
             _mesa_half_to_float(_src[${j}].u16[${k}]),
          % else:
@@ -322,8 +307,9 @@
             % elif "src" + str(j) not in op.const_expr:
                ## Avoid unused variable warnings
                <% continue %>
-            % elif input_types[j] == "bool32":
-               const bool src${j} = _src[${j}].u32[_i] != 0;
+            % elif input_types[j] == "int1":
+               /* 1-bit integers use a 0/-1 convention */
+               const int1_t src${j} = -(int1_t)_src[${j}].b[_i];
             % elif input_types[j] == "float16":
                const float src${j} =
                   _mesa_half_to_float(_src[${j}].u16[_i]);
@@ -346,9 +332,12 @@
 
          ## Store the current component of the actual destination to the
          ## value of dst.
-         % if output_type == "bool32":
-            ## Sanitize the C value to a proper NIR bool
-            _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+         % if output_type == "int1" or output_type == "uint1":
+            /* 1-bit integers get truncated */
+            _dst_val.b[_i] = dst & 1;
+         % elif output_type.startswith("bool"):
+            ## Sanitize the C value to a proper NIR 0/-1 bool
+            _dst_val.${get_const_field(output_type)}[_i] = -(int)dst;
          % elif output_type == "float16":
             _dst_val.u16[_i] = _mesa_float_to_half(dst);
          % else:
@@ -375,9 +364,12 @@
       ## For each component in the destination, copy the value of dst to
       ## the actual destination.
       % for k in range(op.output_size):
-         % if output_type == "bool32":
-            ## Sanitize the C value to a proper NIR bool
-            _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+         % if output_type == "int1" or output_type == "uint1":
+            /* 1-bit integers get truncated */
+            _dst_val.b[${k}] = dst.${"xyzw"[k]} & 1;
+         % elif output_type.startswith("bool"):
+            ## Sanitize the C value to a proper NIR 0/-1 bool
+            _dst_val.${get_const_field(output_type)}[${k}] = -(int)dst.${"xyzw"[k]};
          % elif output_type == "float16":
             _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]});
          % else:
@@ -429,7 +421,6 @@
    }
 }"""
 
-from nir_opcodes import opcodes
 from mako.template import Template
 
 print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
diff -Nru mesa-18.3.3/src/compiler/nir/nir_control_flow.h mesa-19.0.1/src/compiler/nir/nir_control_flow.h
--- mesa-18.3.3/src/compiler/nir/nir_control_flow.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_control_flow.h	2019-03-31 23:16:37.000000000 +0000
@@ -146,6 +146,16 @@
                        struct hash_table *remap_table);
 
 static inline void
+nir_cf_list_clone_and_reinsert(nir_cf_list *src_list, nir_cf_node *parent,
+                               nir_cursor cursor,
+                               struct hash_table *remap_table)
+{
+   nir_cf_list list;
+   nir_cf_list_clone(&list, src_list, parent, remap_table);
+   nir_cf_reinsert(&list, cursor);
+}
+
+static inline void
 nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list)
 {
    nir_cf_extract(extracted, nir_before_cf_list(cf_list),
diff -Nru mesa-18.3.3/src/compiler/nir/nir_deref.c mesa-19.0.1/src/compiler/nir/nir_deref.c
--- mesa-18.3.3/src/compiler/nir/nir_deref.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_deref.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,7 +70,6 @@
 done:
    assert(head == path->path);
    assert(tail == head + count);
-   assert((*head)->deref_type == nir_deref_type_var);
    assert(*tail == NULL);
 }
 
@@ -111,7 +110,8 @@
       if (instr->deref_type == nir_deref_type_cast)
          return true;
 
-      if (instr->deref_type == nir_deref_type_array &&
+      if ((instr->deref_type == nir_deref_type_array ||
+           instr->deref_type == nir_deref_type_ptr_as_array) &&
           !nir_src_is_const(instr->arr.index))
          return true;
 
@@ -121,12 +121,29 @@
    return false;
 }
 
+unsigned
+nir_deref_instr_ptr_as_array_stride(nir_deref_instr *deref)
+{
+   assert(deref->deref_type == nir_deref_type_ptr_as_array);
+   nir_deref_instr *parent = nir_deref_instr_parent(deref);
+   switch (parent->deref_type) {
+   case nir_deref_type_array:
+      return glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
+   case nir_deref_type_ptr_as_array:
+      return nir_deref_instr_ptr_as_array_stride(parent);
+   case nir_deref_type_cast:
+      return parent->cast.ptr_stride;
+   default:
+      unreachable("Invalid parent for ptr_as_array deref");
+   }
+}
+
 static unsigned
 type_get_array_stride(const struct glsl_type *elem_type,
                       glsl_type_size_align_func size_align)
 {
    unsigned elem_size, elem_align;
-   glsl_get_natural_size_align_bytes(elem_type, &elem_size, &elem_align);
+   size_align(elem_type, &elem_size, &elem_align);
    return ALIGN_POT(elem_size, elem_align);
 }
 
@@ -139,8 +156,7 @@
    unsigned offset = 0;
    for (unsigned i = 0; i <= field_idx; i++) {
       unsigned elem_size, elem_align;
-      glsl_get_natural_size_align_bytes(glsl_get_struct_field(struct_type, i),
-                                        &elem_size, &elem_align);
+      size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
       offset = ALIGN_POT(offset, elem_align);
       if (i < field_idx)
          offset += elem_size;
@@ -255,6 +271,8 @@
                continue;
 
             nir_deref_instr *deref = nir_instr_as_deref(instr);
+            if (deref->deref_type == nir_deref_type_cast)
+               continue;
 
             nir_variable_mode parent_mode;
             if (deref->deref_type == nir_deref_type_var) {
@@ -272,12 +290,55 @@
    }
 }
 
+static bool
+modes_may_alias(nir_variable_mode a, nir_variable_mode b)
+{
+   /* Generic pointers can alias with SSBOs */
+   if ((a == nir_var_mem_ssbo || a == nir_var_mem_global) &&
+       (b == nir_var_mem_ssbo || b == nir_var_mem_global))
+      return true;
+
+   /* In the general case, pointers can only alias if they have the same mode.
+    *
+    * NOTE: In future, with things like OpenCL generic pointers, this may not
+    * be true and will have to be re-evaluated.  However, with graphics only,
+    * it should be safe.
+    */
+   return a == b;
+}
+
 nir_deref_compare_result
 nir_compare_deref_paths(nir_deref_path *a_path,
                         nir_deref_path *b_path)
 {
-   if (a_path->path[0]->var != b_path->path[0]->var)
-      return 0;
+   if (!modes_may_alias(b_path->path[0]->mode, a_path->path[0]->mode))
+      return nir_derefs_do_not_alias;
+
+   if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
+      return nir_derefs_may_alias_bit;
+
+   if (a_path->path[0]->deref_type == nir_deref_type_var) {
+      /* If we can chase the deref all the way back to the variable and
+       * they're not the same variable, we know they can't possibly alias.
+       */
+      if (a_path->path[0]->var != b_path->path[0]->var)
+         return nir_derefs_do_not_alias;
+   } else {
+      assert(a_path->path[0]->deref_type == nir_deref_type_cast);
+      /* If they're not exactly the same cast, it's hard to compare them so we
+       * just assume they alias.  Comparing casts is tricky as there are lots
+       * of things such as mode, type, etc. to make sure work out; for now, we
+       * just assume nit_opt_deref will combine them and compare the deref
+       * instructions.
+       *
+       * TODO: At some point in the future, we could be clever and understand
+       * that a float[] and int[] have the same layout and aliasing structure
+       * but double[] and vec3[] do not and we could potentially be a bit
+       * smarter here.
+       */
+      if (a_path->path[0] != b_path->path[0])
+         return nir_derefs_may_alias_bit;
+   }
 
    /* Start off assuming they fully compare.  We ignore equality for now.  In
     * the end, we'll determine that by containment.
@@ -288,13 +349,35 @@
 
    nir_deref_instr **a_p = &a_path->path[1];
    nir_deref_instr **b_p = &b_path->path[1];
+   while (*a_p != NULL && *a_p == *b_p) {
+      a_p++;
+      b_p++;
+   }
+
+   /* We're at either the tail or the divergence point between the two deref
+    * paths.  Look to see if either contains a ptr_as_array deref.  It it
+    * does we don't know how to safely make any inferences.  Hopefully,
+    * nir_opt_deref will clean most of these up and we can start inferring
+    * things again.
+    *
+    * In theory, we could do a bit better.  For instance, we could detect the
+    * case where we have exactly one ptr_as_array deref in the chain after the
+    * divergence point and it's matched in both chains and the two chains have
+    * different constant indices.
+    */
+   for (nir_deref_instr **t_p = a_p; *t_p; t_p++) {
+      if ((*t_p)->deref_type == nir_deref_type_ptr_as_array)
+         return nir_derefs_may_alias_bit;
+   }
+   for (nir_deref_instr **t_p = b_p; *t_p; t_p++) {
+      if ((*t_p)->deref_type == nir_deref_type_ptr_as_array)
+         return nir_derefs_may_alias_bit;
+   }
+
    while (*a_p != NULL && *b_p != NULL) {
       nir_deref_instr *a_tail = *(a_p++);
       nir_deref_instr *b_tail = *(b_p++);
 
-      if (a_tail == b_tail)
-         continue;
-
       switch (a_tail->deref_type) {
       case nir_deref_type_array:
       case nir_deref_type_array_wildcard: {
@@ -319,7 +402,7 @@
                 */
                if (nir_src_as_uint(a_tail->arr.index) !=
                    nir_src_as_uint(b_tail->arr.index))
-                  return 0;
+                  return nir_derefs_do_not_alias;
             } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {
                /* They're the same indirect, continue on */
             } else {
@@ -335,7 +418,7 @@
       case nir_deref_type_struct: {
          /* If they're different struct members, they don't even alias */
          if (a_tail->strct.index != b_tail->strct.index)
-            return 0;
+            return nir_derefs_do_not_alias;
          break;
       }
 
@@ -368,8 +451,10 @@
    nir_deref_path a_path, b_path;
    nir_deref_path_init(&a_path, a, NULL);
    nir_deref_path_init(&b_path, b, NULL);
-   assert(a_path.path[0]->deref_type == nir_deref_type_var);
-   assert(b_path.path[0]->deref_type == nir_deref_type_var);
+   assert(a_path.path[0]->deref_type == nir_deref_type_var ||
+          a_path.path[0]->deref_type == nir_deref_type_cast);
+   assert(b_path.path[0]->deref_type == nir_deref_type_var ||
+          b_path.path[0]->deref_type == nir_deref_type_cast);
 
    nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
 
@@ -394,8 +479,7 @@
       return deref;
 
    if (!state->cache) {
-      state->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+      state->cache = _mesa_pointer_hash_table_create(NULL);
    }
 
    struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
@@ -490,10 +574,9 @@
          _mesa_hash_table_clear(state.cache, NULL);
 
       nir_foreach_instr_safe(instr, block) {
-         if (instr->type == nir_instr_type_deref) {
-            nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr));
+         if (instr->type == nir_instr_type_deref &&
+             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
             continue;
-         }
 
          state.builder.cursor = nir_before_instr(instr);
          nir_foreach_src(instr, rematerialize_deref_src, &state);
@@ -510,3 +593,155 @@
 
    return state.progress;
 }
+
+static bool
+is_trivial_deref_cast(nir_deref_instr *cast)
+{
+   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
+   if (!parent)
+      return false;
+
+   return cast->mode == parent->mode &&
+          cast->type == parent->type &&
+          cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
+          cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
+}
+
+static bool
+is_trivial_array_deref_cast(nir_deref_instr *cast)
+{
+   assert(is_trivial_deref_cast(cast));
+
+   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
+
+   if (parent->deref_type == nir_deref_type_array) {
+      return cast->cast.ptr_stride ==
+             glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
+   } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
+      return cast->cast.ptr_stride ==
+             nir_deref_instr_ptr_as_array_stride(parent);
+   } else {
+      return false;
+   }
+}
+
+static bool
+is_deref_ptr_as_array(nir_instr *instr)
+{
+   return instr->type == nir_instr_type_deref &&
+          nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
+}
+
+static bool
+opt_deref_cast(nir_deref_instr *cast)
+{
+   if (!is_trivial_deref_cast(cast))
+      return false;
+
+   bool trivial_array_cast = is_trivial_array_deref_cast(cast);
+
+   assert(cast->dest.is_ssa);
+   assert(cast->parent.is_ssa);
+
+   bool progress = false;
+   nir_foreach_use_safe(use_src, &cast->dest.ssa) {
+      /* If this isn't a trivial array cast, we can't propagate into
+       * ptr_as_array derefs.
+       */
+      if (is_deref_ptr_as_array(use_src->parent_instr) &&
+          !trivial_array_cast)
+         continue;
+
+      nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
+      progress = true;
+   }
+
+   /* If uses would be a bit crazy */
+   assert(list_empty(&cast->dest.ssa.if_uses));
+
+   nir_deref_instr_remove_if_unused(cast);
+   return progress;
+}
+
+static bool
+opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
+{
+   assert(deref->deref_type == nir_deref_type_ptr_as_array);
+
+   nir_deref_instr *parent = nir_deref_instr_parent(deref);
+   if (parent->deref_type != nir_deref_type_array &&
+       parent->deref_type != nir_deref_type_ptr_as_array)
+      return false;
+
+   assert(parent->parent.is_ssa);
+   assert(parent->arr.index.is_ssa);
+   assert(deref->arr.index.is_ssa);
+
+   nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
+                                      deref->arr.index.ssa);
+
+   deref->deref_type = parent->deref_type;
+   nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
+   nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
+                         nir_src_for_ssa(new_idx));
+   return true;
+}
+
+static bool
+nir_opt_deref_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_deref)
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+
+         nir_deref_instr *deref = nir_instr_as_deref(instr);
+         switch (deref->deref_type) {
+         case nir_deref_type_ptr_as_array:
+            if (opt_deref_ptr_as_array(&b, deref))
+               progress = true;
+            break;
+
+         case nir_deref_type_cast:
+            if (opt_deref_cast(deref))
+               progress = true;
+            break;
+
+         default:
+            /* Do nothing */
+            break;
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_deref(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(func, shader) {
+      if (func->impl && nir_opt_deref_impl(func->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_deref.h mesa-19.0.1/src/compiler/nir/nir_deref.h
--- mesa-18.3.3/src/compiler/nir/nir_deref.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_deref.h	2019-03-31 23:16:37.000000000 +0000
@@ -55,6 +55,7 @@
                                     glsl_type_size_align_func size_align);
 
 typedef enum {
+   nir_derefs_do_not_alias     = 0,
    nir_derefs_equal_bit        = (1 << 0),
    nir_derefs_may_alias_bit    = (1 << 1),
    nir_derefs_a_contains_b_bit = (1 << 2),
diff -Nru mesa-18.3.3/src/compiler/nir/nir_format_convert.h mesa-19.0.1/src/compiler/nir/nir_format_convert.h
--- mesa-18.3.3/src/compiler/nir/nir_format_convert.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_format_convert.h	2019-03-31 23:16:37.000000000 +0000
@@ -51,8 +51,7 @@
 }
 
 static inline nir_ssa_def *
-nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src,
-                     const unsigned *bits)
+nir_format_mask_uvec(nir_builder *b, nir_ssa_def *src, const unsigned *bits)
 {
    nir_const_value mask;
    for (unsigned i = 0; i < src->num_components; i++) {
@@ -91,19 +90,24 @@
       return packed;
    }
 
+   unsigned next_chan = 0;
    unsigned offset = 0;
    for (unsigned i = 0; i < num_components; i++) {
       assert(bits[i] < bit_size);
       assert(offset + bits[i] <= bit_size);
+      nir_ssa_def *chan = nir_channel(b, packed, next_chan);
       nir_ssa_def *lshift = nir_imm_int(b, bit_size - (offset + bits[i]));
       nir_ssa_def *rshift = nir_imm_int(b, bit_size - bits[i]);
       if (sign_extend)
-         comps[i] = nir_ishr(b, nir_ishl(b, packed, lshift), rshift);
+         comps[i] = nir_ishr(b, nir_ishl(b, chan, lshift), rshift);
       else
-         comps[i] = nir_ushr(b, nir_ishl(b, packed, lshift), rshift);
+         comps[i] = nir_ushr(b, nir_ishl(b, chan, lshift), rshift);
       offset += bits[i];
+      if (offset >= bit_size) {
+         next_chan++;
+         offset -= bit_size;
+      }
    }
-   assert(offset <= bit_size);
 
    return nir_vec(b, comps, num_components);
 }
@@ -202,7 +206,7 @@
 }
 
 static inline nir_ssa_def *
-_nir_format_norm_factor(nir_builder *b, unsigned *bits,
+_nir_format_norm_factor(nir_builder *b, const unsigned *bits,
                         unsigned num_components,
                         bool is_signed)
 {
@@ -215,7 +219,7 @@
 }
 
 static inline nir_ssa_def *
-nir_format_unorm_to_float(nir_builder *b, nir_ssa_def *u, unsigned *bits)
+nir_format_unorm_to_float(nir_builder *b, nir_ssa_def *u, const unsigned *bits)
 {
    nir_ssa_def *factor =
       _nir_format_norm_factor(b, bits, u->num_components, false);
@@ -224,7 +228,7 @@
 }
 
 static inline nir_ssa_def *
-nir_format_snorm_to_float(nir_builder *b, nir_ssa_def *s, unsigned *bits)
+nir_format_snorm_to_float(nir_builder *b, nir_ssa_def *s, const unsigned *bits)
 {
    nir_ssa_def *factor =
       _nir_format_norm_factor(b, bits, s->num_components, true);
@@ -234,7 +238,7 @@
 }
 
 static inline nir_ssa_def *
-nir_format_float_to_unorm(nir_builder *b, nir_ssa_def *f, unsigned *bits)
+nir_format_float_to_unorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
 {
    nir_ssa_def *factor =
       _nir_format_norm_factor(b, bits, f->num_components, false);
@@ -246,7 +250,7 @@
 }
 
 static inline nir_ssa_def *
-nir_format_float_to_snorm(nir_builder *b, nir_ssa_def *f, unsigned *bits)
+nir_format_float_to_snorm(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
 {
    nir_ssa_def *factor =
       _nir_format_norm_factor(b, bits, f->num_components, true);
@@ -257,6 +261,19 @@
    return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor)));
 }
 
+/* Converts a vector of floats to a vector of half-floats packed in the low 16
+ * bits.
+ */
+static inline nir_ssa_def *
+nir_format_float_to_half(nir_builder *b, nir_ssa_def *f)
+{
+   nir_ssa_def *zero = nir_imm_float(b, 0);
+   nir_ssa_def *f16comps[4];
+   for (unsigned i = 0; i < f->num_components; i++)
+      f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, f, i), zero);
+   return nir_vec(b, f16comps, f->num_components);
+}
+
 static inline nir_ssa_def *
 nir_format_linear_to_srgb(nir_builder *b, nir_ssa_def *c)
 {
@@ -283,6 +300,44 @@
                                    linear, curved));
 }
 
+/* Clamps a vector of uints so they don't extend beyond the given number of
+ * bits per channel.
+ */
+static inline nir_ssa_def *
+nir_format_clamp_uint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
+{
+   if (bits[0] == 32)
+      return f;
+
+   nir_const_value max;
+   for (unsigned i = 0; i < f->num_components; i++) {
+      assert(bits[i] < 32);
+      max.u32[i] = (1 << bits[i]) - 1;
+   }
+   return nir_umin(b, f, nir_build_imm(b, f->num_components, 32, max));
+}
+
+/* Clamps a vector of sints so they don't extend beyond the given number of
+ * bits per channel.
+ */
+static inline nir_ssa_def *
+nir_format_clamp_sint(nir_builder *b, nir_ssa_def *f, const unsigned *bits)
+{
+   if (bits[0] == 32)
+      return f;
+
+   nir_const_value min, max;
+   for (unsigned i = 0; i < f->num_components; i++) {
+      assert(bits[i] < 32);
+      max.i32[i] = (1 << (bits[i] - 1)) - 1;
+      min.i32[i] = -(1 << (bits[i] - 1));
+   }
+   f = nir_imin(b, f, nir_build_imm(b, f->num_components, 32, max));
+   f = nir_imax(b, f, nir_build_imm(b, f->num_components, 32, min));
+
+   return f;
+}
+
 static inline nir_ssa_def *
 nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed)
 {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_from_ssa.c mesa-19.0.1/src/compiler/nir/nir_from_ssa.c
--- mesa-18.3.3/src/compiler/nir/nir_from_ssa.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_from_ssa.c	2019-03-31 23:16:37.000000000 +0000
@@ -707,10 +707,13 @@
       nir_register *reg = nir_local_reg_create(state->builder.impl);
       reg->name = "copy_temp";
       reg->num_array_elems = 0;
-      if (values[b].is_ssa)
+      if (values[b].is_ssa) {
          reg->num_components = values[b].ssa->num_components;
-      else
+         reg->bit_size = values[b].ssa->bit_size;
+      } else {
          reg->num_components = values[b].reg.reg->num_components;
+         reg->bit_size = values[b].reg.reg->bit_size;
+      }
       values[num_vals].is_ssa = false;
       values[num_vals].reg.reg = reg;
 
@@ -765,8 +768,7 @@
    nir_builder_init(&state.builder, impl);
    state.dead_ctx = ralloc_context(NULL);
    state.phi_webs_only = phi_webs_only;
-   state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                    _mesa_key_pointer_equal);
+   state.merge_node_table = _mesa_pointer_hash_table_create(NULL);
    state.progress = false;
 
    nir_foreach_block(block, impl) {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_gather_info.c mesa-19.0.1/src/compiler/nir/nir_gather_info.c
--- mesa-18.3.3/src/compiler/nir/nir_gather_info.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_gather_info.c	2019-03-31 23:16:37.000000000 +0000
@@ -210,10 +210,9 @@
    case nir_intrinsic_load_deref:
    case nir_intrinsic_store_deref:{
       nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      nir_variable *var = nir_deref_instr_get_variable(deref);
-
-      if (var->data.mode == nir_var_shader_in ||
-          var->data.mode == nir_var_shader_out) {
+      if (deref->mode == nir_var_shader_in ||
+          deref->mode == nir_var_shader_out) {
+         nir_variable *var = nir_deref_instr_get_variable(deref);
          bool is_output_read = false;
          if (var->data.mode == nir_var_shader_out &&
              instr->intrinsic == nir_intrinsic_load_deref)
@@ -308,6 +307,11 @@
       shader->info.uses_fddx_fddy = true;
       break;
    default:
+      shader->info.uses_64bit |= instr->dest.dest.ssa.bit_size == 64;
+      unsigned num_srcs = nir_op_infos[instr->op].num_inputs;
+      for (unsigned i = 0; i < num_srcs; i++) {
+         shader->info.uses_64bit |= nir_src_bit_size(instr->src[i].src) == 64;
+      }
       break;
    }
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_gather_xfb_info.c mesa-19.0.1/src/compiler/nir/nir_gather_xfb_info.c
--- mesa-18.3.3/src/compiler/nir/nir_gather_xfb_info.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_gather_xfb_info.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,61 +28,77 @@
 static void
 add_var_xfb_outputs(nir_xfb_info *xfb,
                     nir_variable *var,
+                    unsigned buffer,
                     unsigned *location,
                     unsigned *offset,
                     const struct glsl_type *type)
 {
-   if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
+   /* If this type contains a 64-bit value, align to 8 bytes */
+   if (glsl_type_contains_64bit(type))
+      *offset = ALIGN_POT(*offset, 8);
+
+   if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
       unsigned length = glsl_get_length(type);
       const struct glsl_type *child_type = glsl_get_array_element(type);
       for (unsigned i = 0; i < length; i++)
-         add_var_xfb_outputs(xfb, var, location, offset, child_type);
+         add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type);
    } else if (glsl_type_is_struct(type)) {
       unsigned length = glsl_get_length(type);
       for (unsigned i = 0; i < length; i++) {
          const struct glsl_type *child_type = glsl_get_struct_field(type, i);
-         add_var_xfb_outputs(xfb, var, location, offset, child_type);
+         add_var_xfb_outputs(xfb, var, buffer, location, offset, child_type);
       }
    } else {
-      assert(var->data.xfb_buffer < NIR_MAX_XFB_BUFFERS);
-      if (xfb->buffers_written & (1 << var->data.xfb_buffer)) {
-         assert(xfb->strides[var->data.xfb_buffer] == var->data.xfb_stride);
-         assert(xfb->buffer_to_stream[var->data.xfb_buffer] == var->data.stream);
+      assert(buffer < NIR_MAX_XFB_BUFFERS);
+      if (xfb->buffers_written & (1 << buffer)) {
+         assert(xfb->strides[buffer] == var->data.xfb_stride);
+         assert(xfb->buffer_to_stream[buffer] == var->data.stream);
       } else {
-         xfb->buffers_written |= (1 << var->data.xfb_buffer);
-         xfb->strides[var->data.xfb_buffer] = var->data.xfb_stride;
-         xfb->buffer_to_stream[var->data.xfb_buffer] = var->data.stream;
+         xfb->buffers_written |= (1 << buffer);
+         xfb->strides[buffer] = var->data.xfb_stride;
+         xfb->buffer_to_stream[buffer] = var->data.stream;
       }
 
       assert(var->data.stream < NIR_MAX_XFB_STREAMS);
       xfb->streams_written |= (1 << var->data.stream);
 
-      unsigned comp_slots = glsl_get_component_slots(type);
-      unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
-      assert(attrib_slots == glsl_count_attribute_slots(type, false));
-
-      /* Ensure that we don't have, for instance, a dvec2 with a location_frac
-       * of 2 which would make it crass a location boundary even though it
-       * fits in a single slot.  However, you can have a dvec3 which crosses
-       * the slot boundary with a location_frac of 2.
-       */
-      assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
+      unsigned comp_slots;
+      if (var->data.compact) {
+         /* This only happens for clip/cull which are float arrays */
+         assert(glsl_without_array(type) == glsl_float_type());
+         assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
+                var->data.location == VARYING_SLOT_CLIP_DIST1);
+         comp_slots = glsl_get_length(type);
+      } else {
+         comp_slots = glsl_get_component_slots(type);
+
+         unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
+         assert(attrib_slots == glsl_count_attribute_slots(type, false));
+
+         /* Ensure that we don't have, for instance, a dvec2 with a
+          * location_frac of 2 which would make it crass a location boundary
+          * even though it fits in a single slot.  However, you can have a
+          * dvec3 which crosses the slot boundary with a location_frac of 2.
+          */
+         assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
+                attrib_slots);
+      }
 
       assert(var->data.location_frac + comp_slots <= 8);
       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
 
-      assert(attrib_slots <= 2);
-      for (unsigned s = 0; s < attrib_slots; s++) {
+      while (comp_mask) {
          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
 
-         output->buffer = var->data.xfb_buffer;
-         output->offset = *offset + s * 16;
+         output->buffer = buffer;
+         output->offset = *offset;
          output->location = *location;
-         output->component_mask = (comp_mask >> (s * 4)) & 0xf;
+         output->component_mask = comp_mask & 0xf;
 
+         *offset += util_bitcount(output->component_mask) * 4;
          (*location)++;
+         comp_mask >>= 4;
       }
-      *offset += comp_slots * 4;
    }
 }
 
@@ -103,17 +119,14 @@
    /* Compute the number of outputs we have.  This is simply the number of
     * cumulative locations consumed by all the variables.  If a location is
     * represented by multiple variables, then they each count separately in
-    * number of outputs.
+    * number of outputs.  This is only an estimate as some variables may have
+    * an xfb_buffer but not an output so it may end up larger than we need but
+    * it should be good enough for allocation.
     */
    unsigned num_outputs = 0;
    nir_foreach_variable(var, &shader->outputs) {
-      if (var->data.explicit_xfb_buffer ||
-          var->data.explicit_xfb_stride) {
-         assert(var->data.explicit_xfb_buffer &&
-                var->data.explicit_xfb_stride &&
-                var->data.explicit_offset);
+      if (var->data.explicit_xfb_buffer)
          num_outputs += glsl_count_attribute_slots(var->type, false);
-      }
    }
    if (num_outputs == 0)
       return NULL;
@@ -122,14 +135,46 @@
 
    /* Walk the list of outputs and add them to the array */
    nir_foreach_variable(var, &shader->outputs) {
-      if (var->data.explicit_xfb_buffer ||
-          var->data.explicit_xfb_stride) {
-         unsigned location = var->data.location;
+      if (!var->data.explicit_xfb_buffer)
+         continue;
+
+      unsigned location = var->data.location;
+
+      /* In order to know if we have a array of blocks can't be done just by
+       * checking if we have an interface type and is an array, because due
+       * splitting we could end on a case were we received a split struct
+       * that contains an array.
+       */
+      bool is_array_block = var->interface_type != NULL &&
+         glsl_type_is_array(var->type) &&
+         glsl_without_array(var->type) == glsl_get_bare_type(var->interface_type);
+
+      if (var->data.explicit_offset && !is_array_block) {
          unsigned offset = var->data.offset;
-         add_var_xfb_outputs(xfb, var, &location, &offset, var->type);
+         add_var_xfb_outputs(xfb, var, var->data.xfb_buffer,
+                             &location, &offset, var->type);
+      } else if (is_array_block) {
+         assert(glsl_type_is_struct(var->interface_type));
+
+         unsigned aoa_size = glsl_get_aoa_size(var->type);
+         const struct glsl_type *itype = var->interface_type;
+         unsigned nfields = glsl_get_length(itype);
+         for (unsigned b = 0; b < aoa_size; b++) {
+            for (unsigned f = 0; f < nfields; f++) {
+               int foffset = glsl_get_struct_field_offset(itype, f);
+               const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
+               if (foffset < 0) {
+                  location += glsl_count_attribute_slots(ftype, false);
+                  continue;
+               }
+
+               unsigned offset = foffset;
+               add_var_xfb_outputs(xfb, var, var->data.xfb_buffer + b,
+                                   &location, &offset, ftype);
+            }
+         }
       }
    }
-   assert(xfb->output_count == num_outputs);
 
    /* Everything is easier in the state setup code if the list is sorted in
     * order of output offset.
diff -Nru mesa-18.3.3/src/compiler/nir/nir.h mesa-19.0.1/src/compiler/nir/nir.h
--- mesa-18.3.3/src/compiler/nir/nir.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,6 +34,7 @@
 #include "util/list.h"
 #include "util/ralloc.h"
 #include "util/set.h"
+#include "util/bitscan.h"
 #include "util/bitset.h"
 #include "util/macros.h"
 #include "compiler/nir_types.h"
@@ -96,12 +97,14 @@
 typedef enum {
    nir_var_shader_in       = (1 << 0),
    nir_var_shader_out      = (1 << 1),
-   nir_var_global          = (1 << 2),
-   nir_var_local           = (1 << 3),
+   nir_var_shader_temp     = (1 << 2),
+   nir_var_function_temp   = (1 << 3),
    nir_var_uniform         = (1 << 4),
-   nir_var_shader_storage  = (1 << 5),
+   nir_var_mem_ubo         = (1 << 5),
    nir_var_system_value    = (1 << 6),
-   nir_var_shared          = (1 << 8),
+   nir_var_mem_ssbo        = (1 << 7),
+   nir_var_mem_shared      = (1 << 8),
+   nir_var_mem_global      = (1 << 9),
    nir_var_all             = ~0,
 } nir_variable_mode;
 
@@ -117,6 +120,7 @@
 } nir_rounding_mode;
 
 typedef union {
+   bool b[NIR_MAX_VEC_COMPONENTS];
    float f32[NIR_MAX_VEC_COMPONENTS];
    double f64[NIR_MAX_VEC_COMPONENTS];
    int8_t i8[NIR_MAX_VEC_COMPONENTS];
@@ -438,7 +442,7 @@
 static inline bool
 nir_variable_is_global(const nir_variable *var)
 {
-   return var->data.mode != nir_var_local;
+   return var->data.mode != nir_var_function_temp;
 }
 
 typedef struct nir_register {
@@ -485,7 +489,7 @@
 #define nir_foreach_register_safe(reg, reg_list) \
    foreach_list_typed_safe(nir_register, reg, node, reg_list)
 
-typedef enum {
+typedef enum PACKED {
    nir_instr_type_alu,
    nir_instr_type_deref,
    nir_instr_type_call,
@@ -500,16 +504,16 @@
 
 typedef struct nir_instr {
    struct exec_node node;
-   nir_instr_type type;
    struct nir_block *block;
-
-   /** generic instruction index. */
-   unsigned index;
+   nir_instr_type type;
 
    /* A temporary for optimization and analysis passes to use for storing
     * flags.  For instance, DCE uses this to store the "dead/live" info.
     */
    uint8_t pass_flags;
+
+   /** generic instruction index. */
+   unsigned index;
 } nir_instr;
 
 static inline nir_instr *
@@ -778,17 +782,25 @@
    unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */
 } nir_alu_dest;
 
+/** NIR sized and unsized types
+ *
+ * The values in this enum are carefully chosen so that the sized type is
+ * just the unsized type OR the number of bits.
+ */
 typedef enum {
    nir_type_invalid = 0, /* Not a valid type */
-   nir_type_float,
-   nir_type_int,
-   nir_type_uint,
-   nir_type_bool,
+   nir_type_int =       2,
+   nir_type_uint =      4,
+   nir_type_bool =      6,
+   nir_type_float =     128,
+   nir_type_bool1 =     1  | nir_type_bool,
    nir_type_bool32 =    32 | nir_type_bool,
+   nir_type_int1 =      1  | nir_type_int,
    nir_type_int8 =      8  | nir_type_int,
    nir_type_int16 =     16 | nir_type_int,
    nir_type_int32 =     32 | nir_type_int,
    nir_type_int64 =     64 | nir_type_int,
+   nir_type_uint1 =     1  | nir_type_uint,
    nir_type_uint8 =     8  | nir_type_uint,
    nir_type_uint16 =    16 | nir_type_uint,
    nir_type_uint32 =    32 | nir_type_uint,
@@ -798,8 +810,8 @@
    nir_type_float64 =   64 | nir_type_float,
 } nir_alu_type;
 
-#define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8
-#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007
+#define NIR_ALU_TYPE_SIZE_MASK 0x79
+#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
 
 static inline unsigned
 nir_alu_type_get_type_size(nir_alu_type type)
@@ -818,7 +830,7 @@
 {
    switch (base_type) {
    case GLSL_TYPE_BOOL:
-      return nir_type_bool32;
+      return nir_type_bool1;
       break;
    case GLSL_TYPE_UINT:
       return nir_type_uint32;
@@ -952,6 +964,19 @@
    return (instr->dest.write_mask >> channel) & 1;
 }
 
+static inline nir_component_mask_t
+nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
+{
+   nir_component_mask_t read_mask = 0;
+   for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
+      if (!nir_alu_instr_channel_used(instr, src, c))
+         continue;
+
+      read_mask |= (1 << instr->src[src].swizzle[c]);
+   }
+   return read_mask;
+}
+
 /*
  * For instructions whose destinations are SSA, get the number of channels
  * used for a source
@@ -974,6 +999,7 @@
    nir_deref_type_var,
    nir_deref_type_array,
    nir_deref_type_array_wildcard,
+   nir_deref_type_ptr_as_array,
    nir_deref_type_struct,
    nir_deref_type_cast,
 } nir_deref_type;
@@ -1007,6 +1033,10 @@
       struct {
          unsigned index;
       } strct;
+
+      struct {
+         unsigned ptr_stride;
+      } cast;
    };
 
    /** Destination to store the resulting "pointer" */
@@ -1054,6 +1084,8 @@
 
 bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
 
+unsigned nir_deref_instr_ptr_as_array_stride(nir_deref_instr *instr);
+
 typedef struct {
    nir_instr instr;
 
@@ -1231,10 +1263,27 @@
    NIR_INTRINSIC_FORMAT = 15,
 
    /**
-    * Access qualifiers for image intrinsics
+    * Access qualifiers for image and memory access intrinsics
     */
    NIR_INTRINSIC_ACCESS = 16,
 
+   /**
+    * Alignment for offsets and addresses
+    *
+    * These two parameters, specify an alignment in terms of a multiplier and
+    * an offset.  The offset or address parameter X of the intrinsic is
+    * guaranteed to satisfy the following:
+    *
+    *                (X - align_offset) % align_mul == 0
+    */
+   NIR_INTRINSIC_ALIGN_MUL = 17,
+   NIR_INTRINSIC_ALIGN_OFFSET = 18,
+
+   /**
+    * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic.
+    */
+   NIR_INTRINSIC_DESC_TYPE = 19,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1249,9 +1298,11 @@
    /** number of components of each input register
     *
     * If this value is 0, the number of components is given by the
-    * num_components field of nir_intrinsic_instr.
+    * num_components field of nir_intrinsic_instr.  If this value is -1, the
+    * intrinsic consumes however many components are provided and it is not
+    * validated at all.
     */
-   unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+   int src_components[NIR_INTRINSIC_MAX_INPUTS];
 
    bool has_dest;
 
@@ -1262,6 +1313,9 @@
     */
    unsigned dest_components;
 
+   /** bitfield of legal bit sizes */
+   unsigned dest_bit_sizes;
+
    /** the number of constant indices used by the intrinsic */
    unsigned num_indices;
 
@@ -1279,10 +1333,12 @@
 {
    const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
    assert(srcn < info->num_srcs);
-   if (info->src_components[srcn])
+   if (info->src_components[srcn] > 0)
       return info->src_components[srcn];
-   else
+   else if (info->src_components[srcn] == 0)
       return intr->num_components;
+   else
+      return nir_src_num_components(intr->src[srcn]);
 }
 
 static inline unsigned
@@ -1329,6 +1385,35 @@
 INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
 INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
 INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned)
+INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned)
+INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
+INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
+
+static inline void
+nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
+                        unsigned align_mul, unsigned align_offset)
+{
+   assert(util_is_power_of_two_nonzero(align_mul));
+   assert(align_offset < align_mul);
+   nir_intrinsic_set_align_mul(intrin, align_mul);
+   nir_intrinsic_set_align_offset(intrin, align_offset);
+}
+
+/** Returns a simple alignment for a load/store intrinsic offset
+ *
+ * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
+ * and ALIGN_OFFSET parameters, this helper takes both into account and
+ * provides a single simple alignment parameter.  The offset X is guaranteed
+ * to satisfy X % align == 0.
+ */
+static inline unsigned
+nir_intrinsic_align(const nir_intrinsic_instr *intrin)
+{
+   const unsigned align_mul = nir_intrinsic_align_mul(intrin);
+   const unsigned align_offset = nir_intrinsic_align_offset(intrin);
+   assert(align_offset < align_mul);
+   return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
+}
 
 /**
  * \group texture information
@@ -1344,6 +1429,7 @@
    nir_tex_src_offset,
    nir_tex_src_bias,
    nir_tex_src_lod,
+   nir_tex_src_min_lod,
    nir_tex_src_ms_index, /* MSAA sample index */
    nir_tex_src_ms_mcs, /* MSAA compression value */
    nir_tex_src_ddx,
@@ -1514,8 +1600,8 @@
    case nir_op_uge:
    case nir_op_ieq:
    case nir_op_ine:
-   case nir_op_i2b:
-   case nir_op_f2b:
+   case nir_op_i2b1:
+   case nir_op_f2b1:
    case nir_op_inot:
    case nir_op_fnot:
       return true;
@@ -1832,9 +1918,11 @@
    /* Number of instructions in the loop */
    unsigned num_instructions;
 
-   /* How many times the loop is run (if known) */
-   unsigned trip_count;
-   bool is_trip_count_known;
+   /* Maximum number of times the loop is run (if known) */
+   unsigned max_trip_count;
+
+   /* Do we know the exact number of times the loop will be run */
+   bool exact_trip_count_known;
 
    /* Unroll the loop regardless of its size */
    bool force_unroll;
@@ -2014,6 +2102,8 @@
     * If the function is only declared and not implemented, this is NULL.
     */
    nir_function_impl *impl;
+
+   bool is_entrypoint;
 } nir_function;
 
 typedef struct nir_shader_compiler_options {
@@ -2061,18 +2151,21 @@
    /** enables rules to lower idiv by power-of-two: */
    bool lower_idiv;
 
-   /* lower b2f to iand */
-   bool lower_b2f;
-
    /* Does the native fdot instruction replicate its result for four
     * components?  If so, then opt_algebraic_late will turn all fdotN
     * instructions into fdot_replicatedN instructions.
     */
    bool fdot_replicates;
 
+   /** lowers ffloor to fsub+ffract: */
+   bool lower_ffloor;
+
    /** lowers ffract to fsub+ffloor: */
    bool lower_ffract;
 
+   /** lowers fceil to fneg+ffloor+fneg: */
+   bool lower_fceil;
+
    bool lower_ldexp;
 
    bool lower_pack_half_2x16;
@@ -2121,6 +2214,7 @@
    bool lower_helper_invocation;
 
    bool lower_cs_local_index_from_id;
+   bool lower_cs_local_id_from_index;
 
    bool lower_device_index_to_zero;
 
@@ -2190,20 +2284,32 @@
    unsigned constant_data_size;
 } nir_shader;
 
+#define nir_foreach_function(func, shader) \
+   foreach_list_typed(nir_function, func, node, &(shader)->functions)
+
 static inline nir_function_impl *
 nir_shader_get_entrypoint(nir_shader *shader)
 {
-   assert(exec_list_length(&shader->functions) == 1);
-   struct exec_node *func_node = exec_list_get_head(&shader->functions);
-   nir_function *func = exec_node_data(nir_function, func_node, node);
+   nir_function *func = NULL;
+
+   nir_foreach_function(function, shader) {
+      assert(func == NULL);
+      if (function->is_entrypoint) {
+         func = function;
+#ifndef NDEBUG
+         break;
+#endif
+      }
+   }
+
+   if (!func)
+      return NULL;
+
    assert(func->num_params == 0);
    assert(func->impl);
    return func->impl;
 }
 
-#define nir_foreach_function(func, shader) \
-   foreach_list_typed(nir_function, func, node, &(shader)->functions)
-
 nir_shader *nir_shader_create(void *mem_ctx,
                               gl_shader_stage stage,
                               const nir_shader_compiler_options *options,
@@ -2222,7 +2328,7 @@
 static inline void
 nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
 {
-   assert(var->data.mode == nir_var_local);
+   assert(var->data.mode == nir_var_function_temp);
    exec_list_push_tail(&impl->locals, &var->node);
 }
 
@@ -2655,6 +2761,7 @@
 void nir_print_shader(nir_shader *shader, FILE *fp);
 void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
 void nir_print_instr(const nir_instr *instr, FILE *fp);
+void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
 
 nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
 nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi);
@@ -2669,6 +2776,23 @@
 void nir_metadata_check_validation_flag(nir_shader *shader);
 
 static inline bool
+should_skip_nir(const char *name)
+{
+   static const char *list = NULL;
+   if (!list) {
+      /* Comma separated list of names to skip. */
+      list = getenv("NIR_SKIP");
+      if (!list)
+         list = "";
+   }
+
+   if (!list[0])
+      return false;
+
+   return comma_separated_list_contains(list, name);
+}
+
+static inline bool
 should_clone_nir(void)
 {
    static int should_clone = -1;
@@ -2701,12 +2825,17 @@
 static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
+static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
 static inline bool should_clone_nir(void) { return false; }
 static inline bool should_serialize_deserialize_nir(void) { return false; }
 static inline bool should_print_nir(void) { return false; }
 #endif /* NDEBUG */
 
 #define _PASS(pass, nir, do_pass) do {                               \
+   if (should_skip_nir(#pass)) {                                     \
+      printf("skipping %s\n", #pass);                                \
+      break;                                                         \
+   }                                                                 \
    do_pass                                                           \
    nir_validate_shader(nir, "after " #pass);                         \
    if (should_clone_nir()) {                                         \
@@ -2740,6 +2869,8 @@
       nir_print_shader(nir, stdout);                                 \
 )
 
+#define NIR_SKIP(name) should_skip_nir(#name)
+
 void nir_calc_dominance_impl(nir_function_impl *impl);
 void nir_calc_dominance(nir_shader *shader);
 
@@ -2779,6 +2910,16 @@
 
 bool nir_lower_global_vars_to_local(nir_shader *shader);
 
+typedef enum {
+   nir_lower_direct_array_deref_of_vec_load     = (1 << 0),
+   nir_lower_indirect_array_deref_of_vec_load   = (1 << 1),
+   nir_lower_direct_array_deref_of_vec_store    = (1 << 2),
+   nir_lower_indirect_array_deref_of_vec_store  = (1 << 3),
+} nir_lower_array_deref_of_vec_options;
+
+bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
+                                  nir_lower_array_deref_of_vec_options options);
+
 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
 
 bool nir_lower_locals_to_regs(nir_shader *shader);
@@ -2800,6 +2941,7 @@
 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
                           bool default_to_smooth_interp);
 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
+bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
 
 typedef enum {
    /* If set, this forces all non-flat fragment shader inputs to be
@@ -2812,6 +2954,28 @@
                   nir_variable_mode modes,
                   int (*type_size)(const struct glsl_type *),
                   nir_lower_io_options);
+
+typedef enum {
+   /**
+    * An address format which is a simple 32-bit global GPU address.
+    */
+   nir_address_format_32bit_global,
+
+   /**
+    * An address format which is a simple 64-bit global GPU address.
+    */
+   nir_address_format_64bit_global,
+
+   /**
+    * An address format which is comprised of a vec2 where the first
+    * component is a vulkan descriptor index and the second is an offset.
+    */
+   nir_address_format_vk_index_offset,
+} nir_address_format;
+bool nir_lower_explicit_io(nir_shader *shader,
+                           nir_variable_mode modes,
+                           nir_address_format);
+
 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
 
@@ -2834,6 +2998,8 @@
                           bool alpha_to_one);
 bool nir_lower_alu(nir_shader *shader);
 bool nir_lower_alu_to_scalar(nir_shader *shader);
+bool nir_lower_bool_to_float(nir_shader *shader);
+bool nir_lower_bool_to_int32(nir_shader *shader);
 bool nir_lower_load_const_to_scalar(nir_shader *shader);
 bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
 bool nir_lower_phis_to_scalar(nir_shader *shader);
@@ -2842,6 +3008,7 @@
                                                   bool outputs_only);
 void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
 void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
+bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
 
 typedef struct nir_lower_subgroups_options {
    uint8_t subgroup_size;
@@ -2860,6 +3027,16 @@
 
 bool nir_lower_system_values(nir_shader *shader);
 
+enum PACKED nir_lower_tex_packing {
+   nir_lower_tex_packing_none = 0,
+   /* The sampler returns up to 2 32-bit words of half floats or 16-bit signed
+    * or unsigned ints based on the sampler type
+    */
+   nir_lower_tex_packing_16,
+   /* The sampler returns 1 32-bit word of 4x8 unorm */
+   nir_lower_tex_packing_8,
+};
+
 typedef struct nir_lower_tex_options {
    /**
     * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
@@ -2891,6 +3068,7 @@
    unsigned lower_y_u_v_external;
    unsigned lower_yx_xuxv_external;
    unsigned lower_xy_uxvx_external;
+   unsigned lower_ayuv_external;
 
    /**
     * To emulate certain texture wrap modes, this can be used
@@ -2936,6 +3114,11 @@
    bool lower_txd_cube_map;
 
    /**
+    * If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
+    */
+   bool lower_txd_3d;
+
+   /**
     * If true, lower nir_texop_txd on shadow samplers (except cube maps)
     * with nir_texop_txl. Notice that cube map shadow samplers are lowered
     * with lower_txd_cube_map.
@@ -2947,6 +3130,38 @@
     * Implies lower_txd_cube_map and lower_txd_shadow.
     */
    bool lower_txd;
+
+   /**
+    * If true, lower nir_texop_txb that try to use shadow compare and min_lod
+    * at the same time to a nir_texop_lod, some math, and nir_texop_tex.
+    */
+   bool lower_txb_shadow_clamp;
+
+   /**
+    * If true, lower nir_texop_txd on shadow samplers when it uses min_lod
+    * with nir_texop_txl.  This includes cube maps.
+    */
+   bool lower_txd_shadow_clamp;
+
+   /**
+    * If true, lower nir_texop_txd on when it uses both offset and min_lod
+    * with nir_texop_txl.  This includes cube maps.
+    */
+   bool lower_txd_offset_clamp;
+
+   /**
+    * If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
+    * sampler index is not statically determinable to be less than 16.
+    */
+   bool lower_txd_clamp_if_sampler_index_not_lt_16;
+
+   /**
+    * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
+    * mixed-up tg4 locations.
+    */
+   bool lower_tg4_broadcom_swizzle;
+
+   enum nir_lower_tex_packing lower_tex_packing[32];
 } nir_lower_tex_options;
 
 bool nir_lower_tex(nir_shader *shader,
@@ -2954,7 +3169,7 @@
 
 bool nir_lower_idiv(nir_shader *shader);
 
-bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables);
+bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars);
 bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
 bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
 
@@ -2999,7 +3214,15 @@
 void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
 
 bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
-bool nir_lower_to_source_mods(nir_shader *shader);
+
+typedef enum  {
+   nir_lower_int_source_mods = 1 << 0,
+   nir_lower_float_source_mods = 1 << 1,
+   nir_lower_all_source_mods = (1 << 2) - 1
+} nir_lower_to_source_mods_flags;
+
+
+bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
 
 bool nir_lower_gs_intrinsics(nir_shader *shader);
 
@@ -3014,6 +3237,16 @@
    nir_lower_isign64 = (1 << 1),
    /** Lower all int64 modulus and division opcodes */
    nir_lower_divmod64 = (1 << 2),
+   /** Lower all 64-bit umul_high and imul_high opcodes */
+   nir_lower_imul_high64 = (1 << 3),
+   nir_lower_mov64 = (1 << 4),
+   nir_lower_icmp64 = (1 << 5),
+   nir_lower_iadd64 = (1 << 6),
+   nir_lower_iabs64 = (1 << 7),
+   nir_lower_ineg64 = (1 << 8),
+   nir_lower_logic64 = (1 << 9),
+   nir_lower_minmax64 = (1 << 10),
+   nir_lower_shift64 = (1 << 11),
 } nir_lower_int64_options;
 
 bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
@@ -3027,7 +3260,8 @@
    nir_lower_dceil = (1 << 5),
    nir_lower_dfract = (1 << 6),
    nir_lower_dround_even = (1 << 7),
-   nir_lower_dmod = (1 << 8)
+   nir_lower_dmod = (1 << 8),
+   nir_lower_fp64_full_software = (1 << 9),
 } nir_lower_doubles_options;
 
 bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);
@@ -3076,10 +3310,14 @@
 
 bool nir_opt_dead_write_vars(nir_shader *shader);
 
+bool nir_opt_deref(nir_shader *shader);
+
 bool nir_opt_find_array_copies(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
+bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
+
 bool nir_opt_if(nir_shader *shader);
 
 bool nir_opt_intrinsics(nir_shader *shader);
@@ -3094,9 +3332,9 @@
 
 bool nir_opt_move_load_ubo(nir_shader *shader);
 
-bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
+bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
+                             bool indirect_load_ok);
 
-bool nir_opt_remove_phis_impl(nir_function_impl *impl);
 bool nir_opt_remove_phis(nir_shader *shader);
 
 bool nir_opt_shrink_load(nir_shader *shader);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_inline_functions.c mesa-19.0.1/src/compiler/nir/nir_inline_functions.c
--- mesa-18.3.3/src/compiler/nir/nir_inline_functions.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_inline_functions.c	2019-03-31 23:16:37.000000000 +0000
@@ -125,6 +125,10 @@
       nir_index_local_regs(impl);
 
       nir_metadata_preserve(impl, nir_metadata_none);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    _mesa_set_add(inlined, impl);
@@ -132,11 +136,80 @@
    return progress;
 }
 
+/** A pass to inline all functions in a shader into their callers
+ *
+ * For most use-cases, function inlining is a multi-step process.  The general
+ * pattern employed by SPIR-V consumers and others is as follows:
+ *
+ *  1. nir_lower_constant_initializers(shader, nir_var_function_temp)
+ *
+ *     This is needed because local variables from the callee are simply added
+ *     to the locals list for the caller and the information about where the
+ *     constant initializer logically happens is lost.  If the callee is
+ *     called in a loop, this can cause the variable to go from being
+ *     initialized once per loop iteration to being initialized once at the
+ *     top of the caller and values to persist from one invocation of the
+ *     callee to the next.  The simple solution to this problem is to get rid
+ *     of constant initializers before function inlining.
+ *
+ *  2. nir_lower_returns(shader)
+ *
+ *     nir_inline_functions assumes that all functions end "naturally" by
+ *     execution reaching the end of the function without any return
+ *     instructions causing instant jumps to the end.  Thanks to NIR being
+ *     structured, we can't represent arbitrary jumps to various points in the
+ *     program which is what an early return in the callee would have to turn
+ *     into when we inline it into the caller.  Instead, we require returns to
+ *     be lowered which lets us just copy+paste the callee directly into the
+ *     caller.
+ *
+ *  3. nir_inline_functions(shader)
+ *
+ *     This does the actual function inlining and the resulting shader will
+ *     contain no call instructions.
+ *
+ *  4. nir_opt_deref(shader)
+ *
+ *     Most functions contain pointer parameters where the result of a deref
+ *     instruction is passed in as a parameter, loaded via a load_param
+ *     intrinsic, and then turned back into a deref via a cast.  Function
+ *     inlining will get rid of the load_param but we are still left with a
+ *     cast.  Running nir_opt_deref gets rid of the intermediate cast and
+ *     results in a whole deref chain again.  This is currently required by a
+ *     number of optimizations and lowering passes at least for certain
+ *     variable modes.
+ *
+ *  5. Loop over the functions and delete all but the main entrypoint.
+ *
+ *     In the Intel Vulkan driver this looks like this:
+ *
+ *        foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ *           if (func != entry_point)
+ *              exec_node_remove(&func->node);
+ *        }
+ *        assert(exec_list_length(&nir->functions) == 1);
+ *
+ *    While nir_inline_functions does get rid of all call instructions, it
+ *    doesn't get rid of any functions because it doesn't know what the "root
+ *    function" is.  Instead, it's up to the individual driver to know how to
+ *    decide on a root function and delete the rest.  With SPIR-V,
+ *    spirv_to_nir returns the root function and so we can just use == whereas
+ *    with GL, you may have to look for a function named "main".
+ *
+ *  6. nir_lower_constant_initializers(shader, ~nir_var_function_temp)
+ *
+ *     Lowering constant initializers on inputs, outputs, global variables,
+ *     etc. requires that we know the main entrypoint so that we know where to
+ *     initialize them.  Otherwise, we would have to assume that anything
+ *     could be a main entrypoint and initialize them at the start of every
+ *     function but that would clearly be wrong if any of those functions were
+ *     ever called within another function.  Simply requiring a single-
+ *     entrypoint function shader is the best way to make it well-defined.
+ */
 bool
 nir_inline_functions(nir_shader *shader)
 {
-   struct set *inlined = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                          _mesa_key_pointer_equal);
+   struct set *inlined = _mesa_pointer_set_create(NULL);
    bool progress = false;
 
    nir_foreach_function(function, shader) {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_instr_set.c mesa-19.0.1/src/compiler/nir/nir_instr_set.c
--- mesa-18.3.3/src/compiler/nir/nir_instr_set.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_instr_set.c	2019-03-31 23:16:37.000000000 +0000
@@ -96,12 +96,16 @@
       break;
 
    case nir_deref_type_array:
+   case nir_deref_type_ptr_as_array:
       hash = hash_src(hash, &instr->arr.index);
       break;
 
+   case nir_deref_type_cast:
+      hash = HASH(hash, instr->cast.ptr_stride);
+      break;
+
    case nir_deref_type_var:
    case nir_deref_type_array_wildcard:
-   case nir_deref_type_cast:
       /* Nothing to do */
       break;
 
@@ -117,8 +121,15 @@
 {
    hash = HASH(hash, instr->def.num_components);
 
-   unsigned size = instr->def.num_components * (instr->def.bit_size / 8);
-   hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32, size);
+   if (instr->def.bit_size == 1) {
+      for (unsigned i = 0; i < instr->def.num_components; i++) {
+         uint8_t b = instr->value.b[i];
+         hash = HASH(hash, b);
+      }
+   } else {
+      unsigned size = instr->def.num_components * (instr->def.bit_size / 8);
+      hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32, size);
+   }
 
    return hash;
 }
@@ -344,13 +355,18 @@
          break;
 
       case nir_deref_type_array:
+      case nir_deref_type_ptr_as_array:
          if (!nir_srcs_equal(deref1->arr.index, deref2->arr.index))
             return false;
          break;
 
+      case nir_deref_type_cast:
+         if (deref1->cast.ptr_stride != deref2->cast.ptr_stride)
+            return false;
+         break;
+
       case nir_deref_type_var:
       case nir_deref_type_array_wildcard:
-      case nir_deref_type_cast:
          /* Nothing to do */
          break;
 
@@ -399,8 +415,13 @@
       if (load1->def.bit_size != load2->def.bit_size)
          return false;
 
-      return memcmp(load1->value.f32, load2->value.f32,
-                    load1->def.num_components * (load1->def.bit_size / 8u)) == 0;
+      if (load1->def.bit_size == 1) {
+         unsigned size = load1->def.num_components * sizeof(bool);
+         return memcmp(load1->value.b, load2->value.b, size) == 0;
+      } else {
+         unsigned size = load1->def.num_components * (load1->def.bit_size / 8);
+         return memcmp(load1->value.f32, load2->value.f32, size) == 0;
+      }
    }
    case nir_instr_type_phi: {
       nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_intrinsics_c.py mesa-19.0.1/src/compiler/nir/nir_intrinsics_c.py
--- mesa-18.3.3/src/compiler/nir/nir_intrinsics_c.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_intrinsics_c.py	2019-03-31 23:16:37.000000000 +0000
@@ -1,3 +1,5 @@
+from functools import reduce
+import operator
 
 template = """\
 /* Copyright (C) 2018 Red Hat
@@ -36,6 +38,7 @@
 % endif
    .has_dest = ${"true" if opcode.has_dest else "false"},
    .dest_components = ${max(opcode.dest_components, 0)},
+   .dest_bit_sizes = ${hex(reduce(operator.or_, opcode.bit_sizes, 0))},
    .num_indices = ${opcode.num_indices},
 % if opcode.indices:
    .index_map = {
@@ -64,7 +67,7 @@
 
     path = os.path.join(args.outdir, 'nir_intrinsics.c')
     with open(path, 'wb') as f:
-        f.write(Template(template, output_encoding='utf-8').render(INTR_OPCODES=INTR_OPCODES))
+        f.write(Template(template, output_encoding='utf-8').render(INTR_OPCODES=INTR_OPCODES, reduce=reduce, operator=operator))
 
 if __name__ == '__main__':
     main()
diff -Nru mesa-18.3.3/src/compiler/nir/nir_intrinsics.py mesa-19.0.1/src/compiler/nir/nir_intrinsics.py
--- mesa-18.3.3/src/compiler/nir/nir_intrinsics.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_intrinsics.py	2019-03-31 23:16:37.000000000 +0000
@@ -32,7 +32,7 @@
    NOTE: this must be kept in sync with nir_intrinsic_info.
    """
    def __init__(self, name, src_components, dest_components,
-                indices, flags, sysval):
+                indices, flags, sysval, bit_sizes):
        """Parameters:
 
        - name: the intrinsic name
@@ -45,6 +45,7 @@
        - indices: list of constant indicies
        - flags: list of semantic flags
        - sysval: is this a system-value intrinsic
+       - bit_sizes: allowed dest bit_sizes
        """
        assert isinstance(name, str)
        assert isinstance(src_components, list)
@@ -58,6 +59,8 @@
        if flags:
            assert isinstance(flags[0], str)
        assert isinstance(sysval, bool)
+       if bit_sizes:
+           assert isinstance(bit_sizes[0], int)
 
        self.name = name
        self.num_srcs = len(src_components)
@@ -68,6 +71,7 @@
        self.indices = indices
        self.flags = flags
        self.sysval = sysval
+       self.bit_sizes = bit_sizes
 
 #
 # Possible indices:
@@ -105,10 +109,15 @@
 IMAGE_DIM = "NIR_INTRINSIC_IMAGE_DIM"
 # Non-zero if we are accessing an array image
 IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY"
-# Access qualifiers for image intrinsics
+# Access qualifiers for image and memory access intrinsics
 ACCESS = "NIR_INTRINSIC_ACCESS"
 # Image format for image intrinsics
 FORMAT = "NIR_INTRINSIC_FORMAT"
+# Offset or address alignment
+ALIGN_MUL = "NIR_INTRINSIC_ALIGN_MUL"
+ALIGN_OFFSET = "NIR_INTRINSIC_ALIGN_OFFSET"
+# The vulkan descriptor type for vulkan_resource_index
+DESC_TYPE = "NIR_INTRINSIC_DESC_TYPE"
 
 #
 # Possible flags:
@@ -120,18 +129,19 @@
 INTR_OPCODES = {}
 
 def intrinsic(name, src_comp=[], dest_comp=-1, indices=[],
-              flags=[], sysval=False):
+              flags=[], sysval=False, bit_sizes=[]):
     assert name not in INTR_OPCODES
     INTR_OPCODES[name] = Intrinsic(name, src_comp, dest_comp,
-                                   indices, flags, sysval)
+                                   indices, flags, sysval, bit_sizes)
 
 intrinsic("nop", flags=[CAN_ELIMINATE])
 
 intrinsic("load_param", dest_comp=0, indices=[PARAM_IDX], flags=[CAN_ELIMINATE])
 
-intrinsic("load_deref", dest_comp=0, src_comp=[1], flags=[CAN_ELIMINATE])
-intrinsic("store_deref", src_comp=[1, 0], indices=[WRMASK])
-intrinsic("copy_deref", src_comp=[1, 1])
+intrinsic("load_deref", dest_comp=0, src_comp=[-1],
+          indices=[ACCESS], flags=[CAN_ELIMINATE])
+intrinsic("store_deref", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
+intrinsic("copy_deref", src_comp=[-1, -1])
 
 # Interpolation of input.  The interp_deref_at* intrinsics are similar to the
 # load_var intrinsic acting on a shader input except that they interpolate the
@@ -266,15 +276,15 @@
 # lowered, variants take a constant buffer index and register offset.
 
 def atomic(name, flags=[]):
-    intrinsic(name + "_deref", src_comp=[1], dest_comp=1, flags=flags)
+    intrinsic(name + "_deref", src_comp=[-1], dest_comp=1, flags=flags)
     intrinsic(name, src_comp=[1], dest_comp=1, indices=[BASE], flags=flags)
 
 def atomic2(name):
-    intrinsic(name + "_deref", src_comp=[1, 1], dest_comp=1)
+    intrinsic(name + "_deref", src_comp=[-1, 1], dest_comp=1)
     intrinsic(name, src_comp=[1, 1], dest_comp=1, indices=[BASE])
 
 def atomic3(name):
-    intrinsic(name + "_deref", src_comp=[1, 1, 1], dest_comp=1)
+    intrinsic(name + "_deref", src_comp=[-1, 1, 1], dest_comp=1)
     intrinsic(name, src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
 
 atomic("atomic_counter_inc")
@@ -354,9 +364,12 @@
 # corresponds to the tuple (set, binding, index) and computes an index
 # corresponding to tuple (set, binding, idx + src1).
 intrinsic("vulkan_resource_index", src_comp=[1], dest_comp=1,
-          indices=[DESC_SET, BINDING], flags=[CAN_ELIMINATE, CAN_REORDER])
-intrinsic("vulkan_resource_reindex", src_comp=[1, 1], dest_comp=1,
+          indices=[DESC_SET, BINDING, DESC_TYPE],
           flags=[CAN_ELIMINATE, CAN_REORDER])
+intrinsic("vulkan_resource_reindex", src_comp=[1, 1], dest_comp=1,
+          indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER])
+intrinsic("load_vulkan_descriptor", src_comp=[1], dest_comp=0,
+          indices=[DESC_TYPE], flags=[CAN_ELIMINATE, CAN_REORDER])
 
 # variable atomic intrinsics
 #
@@ -371,20 +384,20 @@
 # 1: The data parameter to the atomic function (i.e. the value to add
 #    in shared_atomic_add, etc).
 # 2: For CompSwap only: the second data parameter.
-intrinsic("deref_atomic_add",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_imin", src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_umin", src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_imax", src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_umax", src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_and",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_or",   src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_xor",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_exchange", src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1)
-intrinsic("deref_atomic_fadd",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_fmin",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_fmax",  src_comp=[1, 1], dest_comp=1)
-intrinsic("deref_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1)
+intrinsic("deref_atomic_add",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_imin", src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_umin", src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_imax", src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_umax", src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_and",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_or",   src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_xor",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_exchange", src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_comp_swap", src_comp=[-1, 1, 1], dest_comp=1)
+intrinsic("deref_atomic_fadd",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_fmin",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_fmax",  src_comp=[-1, 1], dest_comp=1)
+intrinsic("deref_atomic_fcomp_swap", src_comp=[-1, 1, 1], dest_comp=1)
 
 # SSBO atomic intrinsics
 #
@@ -445,12 +458,41 @@
 intrinsic("shared_atomic_fmax",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
 intrinsic("shared_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
 
-def system_value(name, dest_comp, indices=[]):
+# Global atomic intrinsics
+#
+# All of the shared variable atomic memory operations read a value from
+# memory, compute a new value using one of the operations below, write the
+# new value to memory, and return the original value read.
+#
+# All operations take 2 sources except CompSwap that takes 3. These
+# sources represent:
+#
+# 0: The memory address that the atomic operation will operate on.
+# 1: The data parameter to the atomic function (i.e. the value to add
+#    in shared_atomic_add, etc).
+# 2: For CompSwap only: the second data parameter.
+intrinsic("global_atomic_add",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_imin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_umin", src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_imax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_umax", src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_and",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_or",   src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_xor",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_exchange", src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_comp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_fadd",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_fmin",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_fmax",  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+intrinsic("global_atomic_fcomp_swap", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
+
+def system_value(name, dest_comp, indices=[], bit_sizes=[32]):
     intrinsic("load_" + name, [], dest_comp, indices,
-              flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True)
+              flags=[CAN_ELIMINATE, CAN_REORDER], sysval=True,
+              bit_sizes=bit_sizes)
 
 system_value("frag_coord", 4)
-system_value("front_face", 1)
+system_value("front_face", 1, bit_sizes=[1, 32])
 system_value("vertex_id", 1)
 system_value("vertex_id_zero_base", 1)
 system_value("first_vertex", 1)
@@ -476,17 +518,17 @@
 system_value("work_group_id", 3)
 system_value("user_clip_plane", 4, indices=[UCP_ID])
 system_value("num_work_groups", 3)
-system_value("helper_invocation", 1)
+system_value("helper_invocation", 1, bit_sizes=[1, 32])
 system_value("alpha_ref_float", 1)
 system_value("layer_id", 1)
 system_value("view_index", 1)
 system_value("subgroup_size", 1)
 system_value("subgroup_invocation", 1)
-system_value("subgroup_eq_mask", 0)
-system_value("subgroup_ge_mask", 0)
-system_value("subgroup_gt_mask", 0)
-system_value("subgroup_le_mask", 0)
-system_value("subgroup_lt_mask", 0)
+system_value("subgroup_eq_mask", 0, bit_sizes=[32, 64])
+system_value("subgroup_ge_mask", 0, bit_sizes=[32, 64])
+system_value("subgroup_gt_mask", 0, bit_sizes=[32, 64])
+system_value("subgroup_le_mask", 0, bit_sizes=[32, 64])
+system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
 system_value("num_subgroups", 1)
 system_value("subgroup_id", 1)
 system_value("local_group_size", 3)
@@ -553,8 +595,8 @@
 
 # src[] = { offset }. const_index[] = { base, range }
 load("uniform", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
-# src[] = { buffer_index, offset }. No const_index
-load("ubo", 2, flags=[CAN_ELIMINATE, CAN_REORDER])
+# src[] = { buffer_index, offset }. const_index[] = { align_mul, align_offset }
+load("ubo", 2, [ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
 # src[] = { offset }. const_index[] = { base, component }
 load("input", 1, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER])
 # src[] = { vertex, offset }. const_index[] = { base, component }
@@ -563,18 +605,22 @@
 intrinsic("load_interpolated_input", src_comp=[2, 1], dest_comp=0,
           indices=[BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
 
-# src[] = { buffer_index, offset }. No const_index
-load("ssbo", 2, flags=[CAN_ELIMINATE], indices=[ACCESS])
+# src[] = { buffer_index, offset }.
+# const_index[] = { access, align_mul, align_offset }
+load("ssbo", 2, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
 # src[] = { offset }. const_index[] = { base, component }
 load("output", 1, [BASE, COMPONENT], flags=[CAN_ELIMINATE])
 # src[] = { vertex, offset }. const_index[] = { base }
 load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE])
-# src[] = { offset }. const_index[] = { base }
-load("shared", 1, [BASE], [CAN_ELIMINATE])
+# src[] = { offset }. const_index[] = { base, align_mul, align_offset }
+load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
 # src[] = { offset }. const_index[] = { base, range }
 load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
 # src[] = { offset }. const_index[] = { base, range }
 load("constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
+# src[] = { address }.
+# const_index[] = { access, align_mul, align_offset }
+load("global", 1, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
 
 # Stores work the same way as loads, except now the first source is the value
 # to store and the second (and possibly third) source specify where to store
@@ -589,7 +635,12 @@
 # src[] = { value, vertex, offset }.
 # const_index[] = { base, write_mask, component }
 store("per_vertex_output", 3, [BASE, WRMASK, COMPONENT])
-# src[] = { value, block_index, offset }. const_index[] = { write_mask }
-store("ssbo", 3, [WRMASK, ACCESS])
-# src[] = { value, offset }. const_index[] = { base, write_mask }
-store("shared", 2, [BASE, WRMASK])
+# src[] = { value, block_index, offset }
+# const_index[] = { write_mask, access, align_mul, align_offset }
+store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
+# src[] = { value, offset }.
+# const_index[] = { base, write_mask, align_mul, align_offset }
+store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
+# src[] = { value, address }.
+# const_index[] = { write_mask, align_mul, align_offset }
+store("global", 2, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
diff -Nru mesa-18.3.3/src/compiler/nir/nir_linking_helpers.c mesa-19.0.1/src/compiler/nir/nir_linking_helpers.c
--- mesa-18.3.3/src/compiler/nir/nir_linking_helpers.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_linking_helpers.c	2019-03-31 23:16:37.000000000 +0000
@@ -22,6 +22,7 @@
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 #include "util/set.h"
 #include "util/hash_table.h"
 
@@ -74,12 +75,11 @@
             if (intrin->intrinsic != nir_intrinsic_load_deref)
                continue;
 
-            nir_variable *var =
-               nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
-
-            if (var->data.mode != nir_var_shader_out)
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            if (deref->mode != nir_var_shader_out)
                continue;
 
+            nir_variable *var = nir_deref_instr_get_variable(deref);
             if (var->data.patch) {
                patches_read[var->data.location_frac] |=
                   get_variable_io_mask(var, shader->info.stage);
@@ -128,12 +128,15 @@
       if (var->data.always_active_io)
          continue;
 
+      if (var->data.explicit_xfb_buffer)
+         continue;
+
       uint64_t other_stage = used[var->data.location_frac];
 
       if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
          /* This one is invalid, make it a global variable instead */
          var->data.location = 0;
-         var->data.mode = nir_var_global;
+         var->data.mode = nir_var_shader_temp;
 
          exec_node_remove(&var->node);
          exec_list_push_tail(&shader->globals, &var->node);
@@ -413,7 +416,7 @@
 
          /* We ignore complex types above and all other vector types should
           * have been split into scalar variables by the lower_io_to_scalar
-          * pass. The only exeption should by OpenGL xfb varyings.
+          * pass. The only exception should by OpenGL xfb varyings.
           */
          if (glsl_get_vector_elements(type) != 1)
             continue;
@@ -559,3 +562,202 @@
       }
    }
 }
+
+static bool
+does_varying_match(nir_variable *out_var, nir_variable *in_var)
+{
+   return in_var->data.location == out_var->data.location &&
+          in_var->data.location_frac == out_var->data.location_frac;
+}
+
+static nir_variable *
+get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
+{
+   nir_foreach_variable(var, &consumer->inputs) {
+      if (does_varying_match(out_var, var))
+         return var;
+   }
+
+   return NULL;
+}
+
+static bool
+can_replace_varying(nir_variable *out_var)
+{
+   /* Skip types that require more complex handling.
+    * TODO: add support for these types.
+    */
+   if (glsl_type_is_array(out_var->type) ||
+       glsl_type_is_dual_slot(out_var->type) ||
+       glsl_type_is_matrix(out_var->type) ||
+       glsl_type_is_struct(out_var->type))
+      return false;
+
+   /* Limit this pass to scalars for now to keep things simple. Most varyings
+    * should have been lowered to scalars at this point anyway.
+    */
+   if (!glsl_type_is_scalar(out_var->type))
+      return false;
+
+   if (out_var->data.location < VARYING_SLOT_VAR0 ||
+       out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
+      return false;
+
+   return true;
+}
+
+static bool
+replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
+{
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_variable *out_var =
+      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if (intr->intrinsic != nir_intrinsic_load_deref)
+            continue;
+
+         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
+         if (in_deref->mode != nir_var_shader_in)
+            continue;
+
+         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
+
+         if (!does_varying_match(out_var, in_var))
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+
+         nir_load_const_instr *out_const =
+            nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
+
+         /* Add new const to replace the input */
+         nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
+                                             intr->dest.ssa.bit_size,
+                                             out_const->value);
+
+         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst));
+
+         progress = true;
+      }
+   }
+
+   return progress;
+}
+
+static bool
+replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
+                         nir_intrinsic_instr *dup_store_intr)
+{
+   assert(input_var);
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_variable *dup_out_var =
+      nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         if (intr->intrinsic != nir_intrinsic_load_deref)
+            continue;
+
+         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
+         if (in_deref->mode != nir_var_shader_in)
+            continue;
+
+         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
+
+         if (!does_varying_match(dup_out_var, in_var) ||
+             in_var->data.interpolation != input_var->data.interpolation ||
+             get_interp_loc(in_var) != get_interp_loc(input_var))
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+
+         nir_ssa_def *load = nir_load_var(&b, input_var);
+         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
+
+         progress = true;
+      }
+   }
+
+   return progress;
+}
+
+bool
+nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
+{
+   /* TODO: Add support for more shader stage combinations */
+   if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
+       (producer->info.stage != MESA_SHADER_VERTEX &&
+        producer->info.stage != MESA_SHADER_TESS_EVAL))
+      return false;
+
+   bool progress = false;
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
+
+   struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
+
+   /* If we find a store in the last block of the producer we can be sure this
+    * is the only possible value for this output.
+    */
+   nir_block *last_block = nir_impl_last_block(impl);
+   nir_foreach_instr_reverse(instr, last_block) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+      if (intr->intrinsic != nir_intrinsic_store_deref)
+         continue;
+
+      nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
+      if (out_deref->mode != nir_var_shader_out)
+         continue;
+
+      nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
+      if (!can_replace_varying(out_var))
+         continue;
+
+      if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
+         progress |= replace_constant_input(consumer, intr);
+      } else {
+         struct hash_entry *entry =
+               _mesa_hash_table_search(varying_values, intr->src[1].ssa);
+         if (entry) {
+            progress |= replace_duplicate_input(consumer,
+                                                (nir_variable *) entry->data,
+                                                intr);
+         } else {
+            nir_variable *in_var = get_matching_input_var(consumer, out_var);
+            if (in_var) {
+               _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
+                                       in_var);
+            }
+         }
+      }
+   }
+
+   _mesa_hash_table_destroy(varying_values, NULL);
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_loop_analyze.c mesa-19.0.1/src/compiler/nir/nir_loop_analyze.c
--- mesa-18.3.3/src/compiler/nir/nir_loop_analyze.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_loop_analyze.c	2019-03-31 23:16:37.000000000 +0000
@@ -49,8 +49,11 @@
    /* If this is of type basic_induction */
    struct nir_basic_induction_var *ind;
 
-   /* True if variable is in an if branch or a nested loop */
-   bool in_control_flow;
+   /* True if variable is in an if branch */
+   bool in_if_branch;
+
+   /* True if variable is in a nested loop */
+   bool in_nested_loop;
 
 } nir_loop_variable;
 
@@ -83,7 +86,8 @@
 
 typedef struct {
    loop_info_state *state;
-   bool in_control_flow;
+   bool in_if_branch;
+   bool in_nested_loop;
 } init_loop_state;
 
 static bool
@@ -92,8 +96,10 @@
    init_loop_state *loop_init_state = void_init_loop_state;
    nir_loop_variable *var = get_loop_var(def, loop_init_state->state);
 
-   if (loop_init_state->in_control_flow) {
-      var->in_control_flow = true;
+   if (loop_init_state->in_nested_loop) {
+      var->in_nested_loop = true;
+   } else if (loop_init_state->in_if_branch) {
+      var->in_if_branch = true;
    } else {
       /* Add to the tail of the list. That way we start at the beginning of
        * the defs in the loop instead of the end when walking the list. This
@@ -110,9 +116,10 @@
 
 static bool
 init_loop_block(nir_block *block, loop_info_state *state,
-                bool in_control_flow)
+                bool in_if_branch, bool in_nested_loop)
 {
-   init_loop_state init_state = {.in_control_flow = in_control_flow,
+   init_loop_state init_state = {.in_if_branch = in_if_branch,
+                                 .in_nested_loop = in_nested_loop,
                                  .state = state };
 
    nir_foreach_instr(instr, block) {
@@ -198,7 +205,7 @@
     */
    list_for_each_entry_safe(nir_loop_variable, var, &state->process_list,
                             process_link) {
-      assert(!var->in_control_flow);
+      assert(!var->in_if_branch && !var->in_nested_loop);
 
       if (mark_invariant(var->def, state))
          list_del(&var->process_link);
@@ -216,7 +223,8 @@
        * things in nested loops or conditionals should have been removed from
        * the list by compute_invariance_information().
        */
-      assert(!var->in_control_flow && var->type != invariant);
+      assert(!var->in_if_branch && !var->in_nested_loop &&
+             var->type != invariant);
 
       /* We are only interested in checking phis for the basic induction
        * variable case as its simple to detect. All basic induction variables
@@ -231,12 +239,48 @@
       nir_foreach_phi_src(src, phi) {
          nir_loop_variable *src_var = get_loop_var(src->src.ssa, state);
 
-         /* If one of the sources is in a conditional or nested block then
-          * panic.
+         /* If one of the sources is in an if branch or nested loop then don't
+          * attempt to go any further.
           */
-         if (src_var->in_control_flow)
+         if (src_var->in_if_branch || src_var->in_nested_loop)
             break;
 
+         /* Detect inductions variables that are incremented in both branches
+          * of an unnested if rather than in a loop block.
+          */
+         if (is_var_phi(src_var)) {
+            nir_phi_instr *src_phi =
+               nir_instr_as_phi(src_var->def->parent_instr);
+
+            nir_op alu_op = nir_num_opcodes; /* avoid uninitialized warning */
+            nir_ssa_def *alu_srcs[2] = {0};
+            nir_foreach_phi_src(src2, src_phi) {
+               nir_loop_variable *src_var2 =
+                  get_loop_var(src2->src.ssa, state);
+
+               if (!src_var2->in_if_branch || !is_var_alu(src_var2))
+                  break;
+
+               nir_alu_instr *alu =
+                  nir_instr_as_alu(src_var2->def->parent_instr);
+               if (nir_op_infos[alu->op].num_inputs != 2)
+                  break;
+
+               if (alu->src[0].src.ssa == alu_srcs[0] &&
+                   alu->src[1].src.ssa == alu_srcs[1] &&
+                   alu->op == alu_op) {
+                  /* Both branches perform the same calculation so we can use
+                   * one of them to find the induction variable.
+                   */
+                  src_var = src_var2;
+               } else {
+                  alu_srcs[0] = alu->src[0].src.ssa;
+                  alu_srcs[1] = alu->src[1].src.ssa;
+                  alu_op = alu->op;
+               }
+            }
+         }
+
          if (!src_var->in_loop) {
             biv->def_outside_loop = src_var;
          } else if (is_var_alu(src_var)) {
@@ -350,6 +394,38 @@
    return success;
 }
 
+/* This function looks for an array access within a loop that uses an
+ * induction variable for the array index. If found it returns the size of the
+ * array, otherwise 0 is returned. If we find an induction var we pass it back
+ * to the caller via array_index_out.
+ */
+static unsigned
+find_array_access_via_induction(loop_info_state *state,
+                                nir_deref_instr *deref,
+                                nir_loop_variable **array_index_out)
+{
+   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+      if (d->deref_type != nir_deref_type_array)
+         continue;
+
+      assert(d->arr.index.is_ssa);
+      nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
+
+      if (array_index->type != basic_induction)
+         continue;
+
+      if (array_index_out)
+         *array_index_out = array_index;
+
+      nir_deref_instr *parent = nir_deref_instr_parent(d);
+      assert(glsl_type_is_array_or_matrix(parent->type));
+
+      return glsl_get_length(parent->type);
+   }
+
+   return 0;
+}
+
 static int32_t
 get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
               nir_const_value *limit)
@@ -527,7 +603,7 @@
 {
    bool trip_count_known = true;
    nir_loop_terminator *limiting_terminator = NULL;
-   int min_trip_count = -1;
+   int max_trip_count = -1;
 
    list_for_each_entry(nir_loop_terminator, terminator,
                        &state->loop->info->loop_terminator_list,
@@ -606,8 +682,8 @@
           * iterations than previously (we have identified a more limiting
           * terminator) set the trip count and limiting terminator.
           */
-         if (min_trip_count == -1 || iterations < min_trip_count) {
-            min_trip_count = iterations;
+         if (max_trip_count == -1 || iterations < max_trip_count) {
+            max_trip_count = iterations;
             limiting_terminator = terminator;
          }
          break;
@@ -617,46 +693,29 @@
       }
    }
 
-   state->loop->info->is_trip_count_known = trip_count_known;
-   if (min_trip_count > -1)
-      state->loop->info->trip_count = min_trip_count;
+   state->loop->info->exact_trip_count_known = trip_count_known;
+   if (max_trip_count > -1)
+      state->loop->info->max_trip_count = max_trip_count;
    state->loop->info->limiting_terminator = limiting_terminator;
 }
 
 static bool
-force_unroll_array_access(loop_info_state *state, nir_shader *ns,
-                          nir_deref_instr *deref)
+force_unroll_array_access(loop_info_state *state, nir_deref_instr *deref)
 {
-   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
-      if (d->deref_type != nir_deref_type_array)
-         continue;
-
-      assert(d->arr.index.is_ssa);
-      nir_loop_variable *array_index = get_loop_var(d->arr.index.ssa, state);
-
-      if (array_index->type != basic_induction)
-         continue;
-
-      nir_deref_instr *parent = nir_deref_instr_parent(d);
-      assert(glsl_type_is_array(parent->type) ||
-             glsl_type_is_matrix(parent->type));
-      if (glsl_get_length(parent->type) == state->loop->info->trip_count) {
-         state->loop->info->force_unroll = true;
+   unsigned array_size = find_array_access_via_induction(state, deref, NULL);
+   if (array_size) {
+      if (array_size == state->loop->info->max_trip_count)
          return true;
-      }
 
-      if (deref->mode & state->indirect_mask) {
-         state->loop->info->force_unroll = true;
+      if (deref->mode & state->indirect_mask)
          return true;
-      }
    }
 
    return false;
 }
 
 static bool
-force_unroll_heuristics(loop_info_state *state, nir_shader *ns,
-                        nir_block *block)
+force_unroll_heuristics(loop_info_state *state, nir_block *block)
 {
    nir_foreach_instr(instr, block) {
       if (instr->type != nir_instr_type_intrinsic)
@@ -670,12 +729,12 @@
       if (intrin->intrinsic == nir_intrinsic_load_deref ||
           intrin->intrinsic == nir_intrinsic_store_deref ||
           intrin->intrinsic == nir_intrinsic_copy_deref) {
-         if (force_unroll_array_access(state, ns,
+         if (force_unroll_array_access(state,
                                        nir_src_as_deref(intrin->src[0])))
             return true;
 
          if (intrin->intrinsic == nir_intrinsic_copy_deref &&
-             force_unroll_array_access(state, ns,
+             force_unroll_array_access(state,
                                        nir_src_as_deref(intrin->src[1])))
             return true;
       }
@@ -702,17 +761,17 @@
       switch (node->type) {
 
       case nir_cf_node_block:
-         init_loop_block(nir_cf_node_as_block(node), state, false);
+         init_loop_block(nir_cf_node_as_block(node), state, false, false);
          break;
 
       case nir_cf_node_if:
          nir_foreach_block_in_cf_node(block, node)
-            init_loop_block(block, state, true);
+            init_loop_block(block, state, true, false);
          break;
 
       case nir_cf_node_loop:
          nir_foreach_block_in_cf_node(block, node) {
-            init_loop_block(block, state, true);
+            init_loop_block(block, state, false, true);
          }
          break;
 
@@ -744,16 +803,10 @@
    /* Run through each of the terminators and try to compute a trip-count */
    find_trip_count(state);
 
-   nir_shader *ns = impl->function->shader;
-   foreach_list_typed_safe(nir_cf_node, node, node, &state->loop->body) {
-      if (node->type == nir_cf_node_block) {
-         if (force_unroll_heuristics(state, ns, nir_cf_node_as_block(node)))
-            break;
-      } else {
-         nir_foreach_block_in_cf_node(block, node) {
-            if (force_unroll_heuristics(state, ns, block))
-               break;
-         }
+   nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
+      if (force_unroll_heuristics(state, block)) {
+         state->loop->info->force_unroll = true;
+         break;
       }
    }
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_alu_to_scalar.c mesa-19.0.1/src/compiler/nir/nir_lower_alu_to_scalar.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_alu_to_scalar.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_alu_to_scalar.c	2019-03-31 23:16:37.000000000 +0000
@@ -202,6 +202,10 @@
       LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
       LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
       LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+      LOWER_REDUCTION(nir_op_b32all_fequal, nir_op_feq32, nir_op_iand);
+      LOWER_REDUCTION(nir_op_b32all_iequal, nir_op_ieq32, nir_op_iand);
+      LOWER_REDUCTION(nir_op_b32any_fnequal, nir_op_fne32, nir_op_ior);
+      LOWER_REDUCTION(nir_op_b32any_inequal, nir_op_ine32, nir_op_ior);
       LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
       LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_array_deref_of_vec.c mesa-19.0.1/src/compiler/nir/nir_lower_array_deref_of_vec.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_array_deref_of_vec.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_array_deref_of_vec.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static void
+build_write_masked_store(nir_builder *b, nir_deref_instr *vec_deref,
+                         nir_ssa_def *value, unsigned component)
+{
+   assert(value->num_components == 1);
+   unsigned num_components = glsl_get_components(vec_deref->type);
+   assert(num_components > 1 && num_components <= NIR_MAX_VEC_COMPONENTS);
+
+   nir_ssa_def *u = nir_ssa_undef(b, 1, value->bit_size);
+   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < num_components; i++)
+      comps[i] = (i == component) ? value : u;
+
+   nir_ssa_def *vec = nir_vec(b, comps, num_components);
+   nir_store_deref(b, vec_deref, vec, (1u << component));
+}
+
+static void
+build_write_masked_stores(nir_builder *b, nir_deref_instr *vec_deref,
+                          nir_ssa_def *value, nir_ssa_def *index,
+                          unsigned start, unsigned end)
+{
+   if (start == end - 1) {
+      build_write_masked_store(b, vec_deref, value, start);
+   } else {
+      unsigned mid = start + (end - start) / 2;
+      nir_push_if(b, nir_ilt(b, index, nir_imm_int(b, mid)));
+      build_write_masked_stores(b, vec_deref, value, index, start, mid);
+      nir_push_else(b, NULL);
+      build_write_masked_stores(b, vec_deref, value, index, mid, end);
+      nir_pop_if(b, NULL);
+   }
+}
+
+static bool
+nir_lower_array_deref_of_vec_impl(nir_function_impl *impl,
+                                  nir_variable_mode modes,
+                                  nir_lower_array_deref_of_vec_options options)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         assert(intrin->intrinsic != nir_intrinsic_copy_deref);
+
+         if (intrin->intrinsic != nir_intrinsic_load_deref &&
+             intrin->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
+             intrin->intrinsic != nir_intrinsic_interp_deref_at_sample &&
+             intrin->intrinsic != nir_intrinsic_interp_deref_at_offset &&
+             intrin->intrinsic != nir_intrinsic_store_deref)
+            continue;
+
+         nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+         if (!(deref->mode & modes))
+            continue;
+
+         /* We only care about array derefs that act on vectors */
+         if (deref->deref_type != nir_deref_type_array)
+            continue;
+
+         nir_deref_instr *vec_deref = nir_deref_instr_parent(deref);
+         if (!glsl_type_is_vector(vec_deref->type))
+            continue;
+
+         assert(intrin->num_components == 1);
+         unsigned num_components = glsl_get_components(vec_deref->type);
+         assert(num_components > 1 && num_components <= NIR_MAX_VEC_COMPONENTS);
+
+         b.cursor = nir_after_instr(&intrin->instr);
+
+         if (intrin->intrinsic == nir_intrinsic_store_deref) {
+            assert(intrin->src[1].is_ssa);
+            nir_ssa_def *value = intrin->src[1].ssa;
+
+            if (nir_src_is_const(deref->arr.index)) {
+               if (!(options & nir_lower_direct_array_deref_of_vec_store))
+                  continue;
+
+               unsigned index = nir_src_as_uint(deref->arr.index);
+               /* If index is OOB, we throw the old store away and don't
+                * replace it with anything.
+                */
+               if (index < num_components)
+                  build_write_masked_store(&b, vec_deref, value, index);
+            } else {
+               if (!(options & nir_lower_indirect_array_deref_of_vec_store))
+                  continue;
+
+               nir_ssa_def *index = nir_ssa_for_src(&b, deref->arr.index, 1);
+               build_write_masked_stores(&b, vec_deref, value, index,
+                                         0, num_components);
+            }
+            nir_instr_remove(&intrin->instr);
+
+            progress = true;
+         } else {
+            if (nir_src_is_const(deref->arr.index)) {
+               if (!(options & nir_lower_direct_array_deref_of_vec_load))
+                  continue;
+            } else {
+               if (!(options & nir_lower_indirect_array_deref_of_vec_load))
+                  continue;
+            }
+
+            /* Turn the load into a vector load */
+            nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                                  nir_src_for_ssa(&vec_deref->dest.ssa));
+            intrin->dest.ssa.num_components = num_components;
+            intrin->num_components = num_components;
+
+            nir_ssa_def *index = nir_ssa_for_src(&b, deref->arr.index, 1);
+            nir_ssa_def *scalar =
+               nir_vector_extract(&b, &intrin->dest.ssa, index);
+            if (scalar->parent_instr->type == nir_instr_type_ssa_undef) {
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(scalar));
+               nir_instr_remove(&intrin->instr);
+            } else {
+               nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
+                                              nir_src_for_ssa(scalar),
+                                              scalar->parent_instr);
+            }
+            progress = true;
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+/* Lowers away array dereferences on vectors
+ *
+ * These are allowed on certain variable types such as SSBOs and TCS outputs.
+ * However, not everyone can actually handle them everywhere.  There are also
+ * cases where we want to lower them for performance reasons.
+ *
+ * This patch assumes that copy_deref instructions have already been lowered.
+ */
+bool
+nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
+                             nir_lower_array_deref_of_vec_options options)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl &&
+          nir_lower_array_deref_of_vec_impl(function->impl, modes, options))
+         progress = true;
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_atomics_to_ssbo.c mesa-19.0.1/src/compiler/nir/nir_lower_atomics_to_ssbo.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_atomics_to_ssbo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_atomics_to_ssbo.c	2019-03-31 23:16:37.000000000 +0000
@@ -149,6 +149,10 @@
       break;
    }
 
+   if (new_instr->intrinsic == nir_intrinsic_load_ssbo ||
+       new_instr->intrinsic == nir_intrinsic_store_ssbo)
+      nir_intrinsic_set_align(new_instr, 4, 0);
+
    nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
                      instr->dest.ssa.num_components,
                      instr->dest.ssa.bit_size, NULL);
@@ -215,12 +219,11 @@
             char name[16];
 
             /* A length of 0 is used to denote unsized arrays */
-            const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0);
+            const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
 
             snprintf(name, sizeof(name), "counter%d", var->data.binding);
 
-            ssbo = nir_variable_create(shader, nir_var_shader_storage,
-                                       type, name);
+            ssbo = nir_variable_create(shader, nir_var_mem_ssbo, type, name);
             ssbo->data.binding = var->data.binding;
 
             struct glsl_struct_field field = {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_bool_to_float.c mesa-19.0.1/src/compiler/nir/nir_lower_bool_to_float.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_bool_to_float.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_bool_to_float.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+assert_ssa_def_is_not_1bit(nir_ssa_def *def, UNUSED void *unused)
+{
+   assert(def->bit_size > 1);
+   return true;
+}
+
+static bool
+rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
+{
+   bool *progress = _progress;
+   if (def->bit_size == 1) {
+      def->bit_size = 32;
+      *progress = true;
+   }
+   return true;
+}
+
+static bool
+lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
+{
+   const nir_op_info *op_info = &nir_op_infos[alu->op];
+
+   b->cursor = nir_before_instr(&alu->instr);
+
+   /* Replacement SSA value */
+   nir_ssa_def *rep = NULL;
+   switch (alu->op) {
+   case nir_op_b2f32: alu->op = nir_op_fmov; break;
+   case nir_op_b2i32: alu->op = nir_op_fmov; break;
+   case nir_op_f2b1:
+   case nir_op_i2b1:
+      rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
+                       nir_imm_float(b, 0));
+      break;
+
+   case nir_op_flt: alu->op = nir_op_slt; break;
+   case nir_op_fge: alu->op = nir_op_sge; break;
+   case nir_op_feq: alu->op = nir_op_seq; break;
+   case nir_op_fne: alu->op = nir_op_sne; break;
+   case nir_op_ilt: alu->op = nir_op_slt; break;
+   case nir_op_ige: alu->op = nir_op_sge; break;
+   case nir_op_ieq: alu->op = nir_op_seq; break;
+   case nir_op_ine: alu->op = nir_op_sne; break;
+   case nir_op_ult: alu->op = nir_op_slt; break;
+   case nir_op_uge: alu->op = nir_op_sge; break;
+
+   case nir_op_ball_fequal2:  alu->op = nir_op_fall_equal2; break;
+   case nir_op_ball_fequal3:  alu->op = nir_op_fall_equal3; break;
+   case nir_op_ball_fequal4:  alu->op = nir_op_fall_equal4; break;
+   case nir_op_bany_fnequal2: alu->op = nir_op_fany_nequal2; break;
+   case nir_op_bany_fnequal3: alu->op = nir_op_fany_nequal3; break;
+   case nir_op_bany_fnequal4: alu->op = nir_op_fany_nequal4; break;
+   case nir_op_ball_iequal2:  alu->op = nir_op_fall_equal2; break;
+   case nir_op_ball_iequal3:  alu->op = nir_op_fall_equal3; break;
+   case nir_op_ball_iequal4:  alu->op = nir_op_fall_equal4; break;
+   case nir_op_bany_inequal2: alu->op = nir_op_fany_nequal2; break;
+   case nir_op_bany_inequal3: alu->op = nir_op_fany_nequal3; break;
+   case nir_op_bany_inequal4: alu->op = nir_op_fany_nequal4; break;
+
+   case nir_op_bcsel: alu->op = nir_op_fcsel; break;
+
+   case nir_op_imov: alu->op = nir_op_fmov; break;
+   case nir_op_iand: alu->op = nir_op_fmul; break;
+   case nir_op_ixor: alu->op = nir_op_sne; break;
+   case nir_op_ior: alu->op = nir_op_fmax; break;
+
+   case nir_op_inot:
+      rep = nir_seq(b, nir_ssa_for_alu_src(b, alu, 0),
+                       nir_imm_float(b, 0));
+      break;
+
+   default:
+      assert(alu->dest.dest.ssa.bit_size > 1);
+      for (unsigned i = 0; i < op_info->num_inputs; i++)
+         assert(alu->src[i].src.ssa->bit_size > 1);
+      return false;
+   }
+
+   if (rep) {
+      /* We've emitted a replacement instruction */
+      nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(rep));
+      nir_instr_remove(&alu->instr);
+   } else {
+      if (alu->dest.dest.ssa.bit_size == 1)
+         alu->dest.dest.ssa.bit_size = 32;
+   }
+
+   return true;
+}
+
+static bool
+nir_lower_bool_to_float_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         switch (instr->type) {
+         case nir_instr_type_alu:
+            progress |= lower_alu_instr(&b, nir_instr_as_alu(instr));
+            break;
+
+         case nir_instr_type_load_const: {
+            nir_load_const_instr *load = nir_instr_as_load_const(instr);
+            if (load->def.bit_size == 1) {
+               nir_const_value value = load->value;
+               for (unsigned i = 0; i < load->def.num_components; i++)
+                  load->value.f32[i] = value.b[i] ? 1.0 : 0.0;
+               load->def.bit_size = 32;
+               progress = true;
+            }
+            break;
+         }
+
+         case nir_instr_type_intrinsic:
+         case nir_instr_type_ssa_undef:
+         case nir_instr_type_phi:
+         case nir_instr_type_tex:
+            nir_foreach_ssa_def(instr, rewrite_1bit_ssa_def_to_32bit,
+                                &progress);
+            break;
+
+         default:
+            nir_foreach_ssa_def(instr, assert_ssa_def_is_not_1bit, NULL);
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_lower_bool_to_float(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl && nir_lower_bool_to_float_impl(function->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_bool_to_int32.c mesa-19.0.1/src/compiler/nir/nir_lower_bool_to_int32.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_bool_to_int32.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_bool_to_int32.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+static bool
+assert_ssa_def_is_not_1bit(nir_ssa_def *def, UNUSED void *unused)
+{
+   assert(def->bit_size > 1);
+   return true;
+}
+
+static bool
+rewrite_1bit_ssa_def_to_32bit(nir_ssa_def *def, void *_progress)
+{
+   bool *progress = _progress;
+   if (def->bit_size == 1) {
+      def->bit_size = 32;
+      *progress = true;
+   }
+   return true;
+}
+
+static bool
+lower_alu_instr(nir_alu_instr *alu)
+{
+   const nir_op_info *op_info = &nir_op_infos[alu->op];
+
+   assert(alu->dest.dest.is_ssa);
+
+   switch (alu->op) {
+   case nir_op_imov:
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4:
+   case nir_op_inot:
+   case nir_op_iand:
+   case nir_op_ior:
+   case nir_op_ixor:
+      /* These we expect to have booleans but the opcode doesn't change */
+      break;
+
+   case nir_op_f2b1: alu->op = nir_op_f2b32; break;
+   case nir_op_i2b1: alu->op = nir_op_i2b32; break;
+
+   case nir_op_flt: alu->op = nir_op_flt32; break;
+   case nir_op_fge: alu->op = nir_op_fge32; break;
+   case nir_op_feq: alu->op = nir_op_feq32; break;
+   case nir_op_fne: alu->op = nir_op_fne32; break;
+   case nir_op_ilt: alu->op = nir_op_ilt32; break;
+   case nir_op_ige: alu->op = nir_op_ige32; break;
+   case nir_op_ieq: alu->op = nir_op_ieq32; break;
+   case nir_op_ine: alu->op = nir_op_ine32; break;
+   case nir_op_ult: alu->op = nir_op_ult32; break;
+   case nir_op_uge: alu->op = nir_op_uge32; break;
+
+   case nir_op_ball_fequal2:  alu->op = nir_op_b32all_fequal2; break;
+   case nir_op_ball_fequal3:  alu->op = nir_op_b32all_fequal3; break;
+   case nir_op_ball_fequal4:  alu->op = nir_op_b32all_fequal4; break;
+   case nir_op_bany_fnequal2: alu->op = nir_op_b32any_fnequal2; break;
+   case nir_op_bany_fnequal3: alu->op = nir_op_b32any_fnequal3; break;
+   case nir_op_bany_fnequal4: alu->op = nir_op_b32any_fnequal4; break;
+   case nir_op_ball_iequal2:  alu->op = nir_op_b32all_iequal2; break;
+   case nir_op_ball_iequal3:  alu->op = nir_op_b32all_iequal3; break;
+   case nir_op_ball_iequal4:  alu->op = nir_op_b32all_iequal4; break;
+   case nir_op_bany_inequal2: alu->op = nir_op_b32any_inequal2; break;
+   case nir_op_bany_inequal3: alu->op = nir_op_b32any_inequal3; break;
+   case nir_op_bany_inequal4: alu->op = nir_op_b32any_inequal4; break;
+
+   case nir_op_bcsel: alu->op = nir_op_b32csel; break;
+
+   default:
+      assert(alu->dest.dest.ssa.bit_size > 1);
+      for (unsigned i = 0; i < op_info->num_inputs; i++)
+         assert(alu->src[i].src.ssa->bit_size > 1);
+      return false;
+   }
+
+   if (alu->dest.dest.ssa.bit_size == 1)
+      alu->dest.dest.ssa.bit_size = 32;
+
+   return true;
+}
+
+static bool
+nir_lower_bool_to_int32_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         switch (instr->type) {
+         case nir_instr_type_alu:
+            progress |= lower_alu_instr(nir_instr_as_alu(instr));
+            break;
+
+         case nir_instr_type_load_const: {
+            nir_load_const_instr *load = nir_instr_as_load_const(instr);
+            if (load->def.bit_size == 1) {
+               nir_const_value value = load->value;
+               for (unsigned i = 0; i < load->def.num_components; i++)
+                  load->value.u32[i] = value.b[i] ? NIR_TRUE : NIR_FALSE;
+               load->def.bit_size = 32;
+               progress = true;
+            }
+            break;
+         }
+
+         case nir_instr_type_intrinsic:
+         case nir_instr_type_ssa_undef:
+         case nir_instr_type_phi:
+         case nir_instr_type_tex:
+            nir_foreach_ssa_def(instr, rewrite_1bit_ssa_def_to_32bit,
+                                &progress);
+            break;
+
+         default:
+            nir_foreach_ssa_def(instr, assert_ssa_def_is_not_1bit, NULL);
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_lower_bool_to_int32(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl && nir_lower_bool_to_int32_impl(function->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_clip.c mesa-19.0.1/src/compiler/nir/nir_lower_clip.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_clip.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_clip.c	2019-03-31 23:16:37.000000000 +0000
@@ -150,12 +150,26 @@
  * VS lowering
  */
 
-static void
-lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
-              nir_ssa_def *cv, nir_variable **out)
+/* ucp_enables is bitmask of enabled ucps.  Actual ucp values are
+ * passed in to shader via user_clip_plane system-values
+ *
+ * If use_vars is true, the pass will use variable loads and stores instead
+ * of working with store_output intrinsics.
+ */
+bool
+nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars)
 {
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
    nir_ssa_def *clipdist[MAX_CLIP_PLANES];
    nir_builder b;
+   int maxloc = -1;
+   nir_variable *position = NULL;
+   nir_variable *clipvertex = NULL;
+   nir_ssa_def *cv;
+   nir_variable *out[2] = { NULL };
+
+   if (!ucp_enables)
+      return false;
 
    nir_builder_init(&b, impl);
 
@@ -171,76 +185,44 @@
    assert(impl->end_block->predecessors->entries == 1);
    b.cursor = nir_after_cf_list(&impl->body);
 
-   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
-      if (ucp_enables & (1 << plane)) {
-         nir_ssa_def *ucp =
-            nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane);
-
-         /* calculate clipdist[plane] - dot(ucp, cv): */
-         clipdist[plane] = nir_fdot4(&b, ucp, cv);
-      }
-      else {
-         /* 0.0 == don't-clip == disabled: */
-         clipdist[plane] = nir_imm_float(&b, 0.0);
-      }
-   }
-
-   if (ucp_enables & 0x0f)
-      store_clipdist_output(&b, out[0], &clipdist[0]);
-   if (ucp_enables & 0xf0)
-      store_clipdist_output(&b, out[1], &clipdist[4]);
-
-   nir_metadata_preserve(impl, nir_metadata_dominance);
-}
-
-/* ucp_enables is bitmask of enabled ucps.  Actual ucp values are
- * passed in to shader via user_clip_plane system-values
- */
-bool
-nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
-{
-   int clipvertex = -1;
-   int position = -1;
-   int maxloc = -1;
-   nir_ssa_def *cv;
-   nir_variable *out[2] = { NULL };
-
-   if (!ucp_enables)
-      return false;
-
    /* find clipvertex/position outputs: */
    nir_foreach_variable(var, &shader->outputs) {
-      int loc = var->data.driver_location;
-
-      /* keep track of last used driver-location.. we'll be
-       * appending CLIP_DIST0/CLIP_DIST1 after last existing
-       * output:
-       */
-      maxloc = MAX2(maxloc, loc);
-
       switch (var->data.location) {
       case VARYING_SLOT_POS:
-         position = loc;
+         position = var;
          break;
       case VARYING_SLOT_CLIP_VERTEX:
-         clipvertex = loc;
+         clipvertex = var;
          break;
       case VARYING_SLOT_CLIP_DIST0:
       case VARYING_SLOT_CLIP_DIST1:
          /* if shader is already writing CLIPDIST, then
           * there should be no user-clip-planes to deal
           * with.
+          *
+          * We assume nir_remove_dead_variables has removed the clipdist
+          * variables if they're not written.
           */
          return false;
       }
    }
 
-   if (clipvertex != -1)
-      cv = find_output(shader, clipvertex);
-   else if (position != -1)
-      cv = find_output(shader, position);
-   else
-      return false;
+   if (use_vars) {
+      cv = nir_load_var(&b, clipvertex ? clipvertex : position);
+
+      if (clipvertex) {
+         exec_node_remove(&clipvertex->node);
+         clipvertex->data.mode = nir_var_shader_temp;
+         exec_list_push_tail(&shader->globals, &clipvertex->node);
+      }
+   } else {
+      if (clipvertex)
+         cv = find_output(shader, clipvertex->data.driver_location);
+      else if (position)
+         cv = find_output(shader, position->data.driver_location);
+      else
+         return false;
+   }
 
    /* insert CLIPDIST outputs: */
    if (ucp_enables & 0x0f)
@@ -250,11 +232,32 @@
       out[1] =
          create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);
 
-   nir_foreach_function(function, shader) {
-      if (!strcmp(function->name, "main"))
-         lower_clip_vs(function->impl, ucp_enables, cv, out);
+   for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
+      if (ucp_enables & (1 << plane)) {
+         nir_ssa_def *ucp = nir_load_user_clip_plane(&b, plane);
+
+         /* calculate clipdist[plane] - dot(ucp, cv): */
+         clipdist[plane] = nir_fdot4(&b, ucp, cv);
+      } else {
+         /* 0.0 == don't-clip == disabled: */
+         clipdist[plane] = nir_imm_float(&b, 0.0);
+      }
+   }
+
+   if (use_vars) {
+      if (ucp_enables & 0x0f)
+         nir_store_var(&b, out[0], nir_vec(&b, clipdist, 4), 0xf);
+      if (ucp_enables & 0xf0)
+         nir_store_var(&b, out[1], nir_vec(&b, &clipdist[4], 4), 0xf);
+   } else {
+      if (ucp_enables & 0x0f)
+         store_clipdist_output(&b, out[0], &clipdist[0]);
+      if (ucp_enables & 0xf0)
+         store_clipdist_output(&b, out[1], &clipdist[4]);
    }
 
+   nir_metadata_preserve(impl, nir_metadata_dominance);
+
    return true;
 }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c mesa-19.0.1/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,10 +27,10 @@
 /**
  * @file
  *
- * This pass combines separate clip and cull distance arrays into a
- * single array that contains both.  Clip distances come first, then
- * cull distances.  It also populates nir_shader_info with the size
- * of the original arrays so the driver knows which are which.
+ * This pass combines clip and cull distance arrays in separate locations and
+ * colocates them both in VARYING_SLOT_CLIP_DIST0.  It does so by maintaining
+ * two arrays but making them compact and using location_frac to stack them on
+ * top of each other.
  */
 
 /**
@@ -56,77 +56,6 @@
    return glsl_get_length(type);
 }
 
-/**
- * Update the type of the combined array (including interface block nesting).
- */
-static void
-update_type(nir_variable *var, gl_shader_stage stage, unsigned length)
-{
-   const struct glsl_type *type = glsl_array_type(glsl_float_type(), length);
-
-   if (nir_is_per_vertex_io(var, stage))
-      type = glsl_array_type(type, glsl_get_length(var->type));
-
-   var->type = type;
-}
-
-static void
-rewrite_clip_cull_deref(nir_builder *b,
-                        nir_deref_instr *deref,
-                        const struct glsl_type *type,
-                        unsigned tail_offset)
-{
-   deref->type = type;
-
-   if (glsl_type_is_array(type)) {
-      const struct glsl_type *child_type = glsl_get_array_element(type);
-      nir_foreach_use(src, &deref->dest.ssa) {
-         rewrite_clip_cull_deref(b, nir_instr_as_deref(src->parent_instr),
-                                 child_type, tail_offset);
-      }
-   } else {
-      assert(glsl_type_is_scalar(type));
-
-      /* This is the end of the line.  Add the tail offset if needed */
-      if (tail_offset > 0) {
-         b->cursor = nir_before_instr(&deref->instr);
-         assert(deref->deref_type == nir_deref_type_array);
-         nir_ssa_def *index = nir_iadd(b, deref->arr.index.ssa,
-                                          nir_imm_int(b, tail_offset));
-         nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
-                               nir_src_for_ssa(index));
-      }
-   }
-}
-
-static void
-rewrite_references(nir_builder *b,
-                   nir_instr *instr,
-                   nir_variable *combined,
-                   unsigned cull_offset)
-{
-   if (instr->type != nir_instr_type_deref)
-      return;
-
-   nir_deref_instr *deref = nir_instr_as_deref(instr);
-   if (deref->deref_type != nir_deref_type_var)
-      return;
-
-   if (deref->var->data.mode != combined->data.mode)
-      return;
-
-   const unsigned location = deref->var->data.location;
-   if (location != VARYING_SLOT_CLIP_DIST0 &&
-       location != VARYING_SLOT_CULL_DIST0)
-      return;
-
-   deref->var = combined;
-   if (location == VARYING_SLOT_CULL_DIST0)
-      rewrite_clip_cull_deref(b, deref, combined->type, cull_offset);
-   else
-      rewrite_clip_cull_deref(b, deref, combined->type, 0);
-}
-
 static bool
 combine_clip_cull(nir_shader *nir,
                   struct exec_list *vars,
@@ -134,7 +63,6 @@
 {
    nir_variable *cull = NULL;
    nir_variable *clip = NULL;
-   bool progress = false;
 
    nir_foreach_variable(var, vars) {
       if (var->data.location == VARYING_SLOT_CLIP_DIST0)
@@ -144,7 +72,9 @@
          cull = var;
    }
 
-   /* if the GLSL lowering pass has already run, don't bother repeating */
+   if (!cull && !clip)
+      return false;
+
    if (!cull && clip) {
       if (!glsl_type_is_array(clip->type))
          return false;
@@ -158,50 +88,29 @@
       nir->info.cull_distance_array_size = cull_array_size;
    }
 
-   if (clip)
-      clip->data.compact = true;
-
-   if (cull)
-      cull->data.compact = true;
+   if (clip) {
+      assert(clip->data.compact);
+      clip->data.how_declared = nir_var_hidden;
+   }
 
-   if (cull_array_size > 0) {
-      if (clip_array_size == 0) {
-         /* No clip distances, just change the cull distance location */
-         cull->data.location = VARYING_SLOT_CLIP_DIST0;
-      } else {
-         /* Turn the ClipDistance array into a combined one */
-         update_type(clip, nir->info.stage, clip_array_size + cull_array_size);
-
-         /* Rewrite CullDistance to reference the combined array */
-         nir_foreach_function(function, nir) {
-            if (function->impl) {
-               nir_builder b;
-               nir_builder_init(&b, function->impl);
-
-               nir_foreach_block(block, function->impl) {
-                  nir_foreach_instr(instr, block) {
-                     rewrite_references(&b, instr, clip, clip_array_size);
-                  }
-               }
-            }
-         }
-
-         /* Delete the old CullDistance variable */
-         exec_node_remove(&cull->node);
-         ralloc_free(cull);
-      }
+   if (cull) {
+      assert(cull->data.compact);
+      cull->data.how_declared = nir_var_hidden;
+      cull->data.location = VARYING_SLOT_CLIP_DIST0 + clip_array_size / 4;
+      cull->data.location_frac = clip_array_size % 4;
+   }
 
-      nir_foreach_function(function, nir) {
-         if (function->impl) {
-            nir_metadata_preserve(function->impl,
-                                  nir_metadata_block_index |
-                                  nir_metadata_dominance);
-         }
+   nir_foreach_function(function, nir) {
+      if (function->impl) {
+         nir_metadata_preserve(function->impl,
+                               nir_metadata_block_index |
+                               nir_metadata_dominance |
+                               nir_metadata_live_ssa_defs |
+                               nir_metadata_loop_analysis);
       }
-      progress = true;
    }
 
-   return progress;
+   return true;
 }
 
 bool
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_constant_initializers.c mesa-19.0.1/src/compiler/nir/nir_lower_constant_initializers.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_constant_initializers.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_constant_initializers.c	2019-03-31 23:16:37.000000000 +0000
@@ -91,41 +91,36 @@
 {
    bool progress = false;
 
-   nir_builder builder;
-   if (modes & ~nir_var_local)
-      nir_builder_init(&builder, nir_shader_get_entrypoint(shader));
-
-   if (modes & nir_var_shader_out)
-      progress |= lower_const_initializer(&builder, &shader->outputs);
-
-   if (modes & nir_var_global)
-      progress |= lower_const_initializer(&builder, &shader->globals);
-
-   if (modes & nir_var_system_value)
-      progress |= lower_const_initializer(&builder, &shader->system_values);
-
-   if (progress) {
-      nir_foreach_function(function, shader) {
-         if (function->impl) {
-            nir_metadata_preserve(function->impl, nir_metadata_block_index |
-                                                  nir_metadata_dominance |
-                                                  nir_metadata_live_ssa_defs);
-         }
-      }
-   }
-
-   if (modes & nir_var_local) {
-      nir_foreach_function(function, shader) {
-         if (!function->impl)
-            continue;
-
-         nir_builder_init(&builder, function->impl);
-         if (lower_const_initializer(&builder, &function->impl->locals)) {
-            nir_metadata_preserve(function->impl, nir_metadata_block_index |
-                                                  nir_metadata_dominance |
-                                                  nir_metadata_live_ssa_defs);
-            progress = true;
-         }
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+	 continue;
+
+      bool impl_progress = false;
+
+      nir_builder builder;
+      nir_builder_init(&builder, function->impl);
+
+      if ((modes & nir_var_shader_out) && function->is_entrypoint)
+         impl_progress |= lower_const_initializer(&builder, &shader->outputs);
+
+      if ((modes & nir_var_shader_temp) && function->is_entrypoint)
+         impl_progress |= lower_const_initializer(&builder, &shader->globals);
+
+      if ((modes & nir_var_system_value) && function->is_entrypoint)
+         impl_progress |= lower_const_initializer(&builder, &shader->system_values);
+
+      if (modes & nir_var_function_temp)
+         impl_progress |= lower_const_initializer(&builder, &function->impl->locals);
+
+      if (impl_progress) {
+         progress = true;
+         nir_metadata_preserve(function->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance |
+                                               nir_metadata_live_ssa_defs);
+      } else {
+#ifndef NDEBUG
+         function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
       }
    }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_double_ops.c mesa-19.0.1/src/compiler/nir/nir_lower_double_ops.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_double_ops.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_double_ops.c	2019-03-31 23:16:37.000000000 +0000
@@ -392,50 +392,18 @@
 static nir_ssa_def *
 lower_round_even(nir_builder *b, nir_ssa_def *src)
 {
-   /* If fract(src) == 0.5, then we will have to decide the rounding direction.
-    * We will do this by computing the mod(abs(src), 2) and testing if it
-    * is < 1 or not.
-    *
-    * We compute mod(abs(src), 2) as:
-    * abs(src) - 2.0 * floor(abs(src) / 2.0)
-    */
-   nir_ssa_def *two = nir_imm_double(b, 2.0);
-   nir_ssa_def *abs_src = nir_fabs(b, src);
-   nir_ssa_def *mod =
-      nir_fsub(b,
-               abs_src,
-               nir_fmul(b,
-                        two,
-                        nir_ffloor(b,
-                                   nir_fmul(b,
-                                            abs_src,
-                                            nir_imm_double(b, 0.5)))));
-
-   /*
-    * If fract(src) != 0.5, then we round as floor(src + 0.5)
-    *
-    * If fract(src) == 0.5, then we have to check the modulo:
-    *
-    *   if it is < 1 we need a trunc operation so we get:
-    *      0.5 -> 0,   -0.5 -> -0
-    *      2.5 -> 2,   -2.5 -> -2
-    *
-    *   otherwise we need to check if src >= 0, in which case we need to round
-    *   upwards, or not, in which case we need to round downwards so we get:
-    *      1.5 -> 2,   -1.5 -> -2
-    *      3.5 -> 4,   -3.5 -> -4
-    */
-   nir_ssa_def *fract = nir_ffract(b, src);
-   return nir_bcsel(b,
-                    nir_fne(b, fract, nir_imm_double(b, 0.5)),
-                    nir_ffloor(b, nir_fadd(b, src, nir_imm_double(b, 0.5))),
-                    nir_bcsel(b,
-                              nir_flt(b, mod, nir_imm_double(b, 1.0)),
-                              nir_ftrunc(b, src),
-                              nir_bcsel(b,
-                                        nir_fge(b, src, nir_imm_double(b, 0.0)),
-                                        nir_fadd(b, src, nir_imm_double(b, 0.5)),
-                                        nir_fsub(b, src, nir_imm_double(b, 0.5)))));
+   /* Add and subtract 2**52 to round off any fractional bits. */
+   nir_ssa_def *two52 = nir_imm_double(b, (double)(1ull << 52));
+   nir_ssa_def *sign = nir_iand(b, nir_unpack_64_2x32_split_y(b, src),
+                                nir_imm_int(b, 1ull << 31));
+
+   b->exact = true;
+   nir_ssa_def *res = nir_fsub(b, nir_fadd(b, nir_fabs(b, src), two52), two52);
+   b->exact = false;
+
+   return nir_bcsel(b, nir_flt(b, nir_fabs(b, src), two52),
+                    nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, res),
+                                           nir_ior(b, nir_unpack_64_2x32_split_y(b, res), sign)), src);
 }
 
 static nir_ssa_def *
@@ -457,12 +425,188 @@
 }
 
 static bool
-lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
+lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
+                            nir_lower_doubles_options options)
+{
+   if (!(options & nir_lower_fp64_full_software))
+      return false;
+
+   assert(instr->dest.dest.is_ssa);
+
+   const char *name;
+   const struct glsl_type *return_type = glsl_uint64_t_type();
+
+   switch (instr->op) {
+   case nir_op_f2i64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__fp64_to_int64";
+      else
+         name = "__fp32_to_int64";
+      return_type = glsl_int64_t_type();
+      break;
+   case nir_op_f2u64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__fp64_to_uint64";
+      else
+         name = "__fp32_to_uint64";
+      break;
+   case nir_op_f2f64:
+      name = "__fp32_to_fp64";
+      break;
+   case nir_op_f2f32:
+      name = "__fp64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_f2i32:
+      name = "__fp64_to_int";
+      return_type = glsl_int_type();
+      break;
+   case nir_op_f2u32:
+      name = "__fp64_to_uint";
+      return_type = glsl_uint_type();
+      break;
+   case nir_op_f2b1:
+   case nir_op_f2b32:
+      name = "__fp64_to_bool";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_b2f64:
+      name = "__bool_to_fp64";
+      break;
+   case nir_op_i2f32:
+      if (instr->src[0].src.ssa->bit_size != 64)
+         return false;
+      name = "__int64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_u2f32:
+      if (instr->src[0].src.ssa->bit_size != 64)
+         return false;
+      name = "__uint64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_i2f64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__int64_to_fp64";
+      else
+         name = "__int_to_fp64";
+      break;
+   case nir_op_u2f64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__uint64_to_fp64";
+      else
+         name = "__uint_to_fp64";
+      break;
+   case nir_op_fabs:
+      name = "__fabs64";
+      break;
+   case nir_op_fneg:
+      name = "__fneg64";
+      break;
+   case nir_op_fround_even:
+      name = "__fround64";
+      break;
+   case nir_op_ftrunc:
+      name = "__ftrunc64";
+      break;
+   case nir_op_ffloor:
+      name = "__ffloor64";
+      break;
+   case nir_op_ffract:
+      name = "__ffract64";
+      break;
+   case nir_op_fsign:
+      name = "__fsign64";
+      break;
+   case nir_op_feq:
+      name = "__feq64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fne:
+      name = "__fne64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_flt:
+      name = "__flt64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fge:
+      name = "__fge64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fmin:
+      name = "__fmin64";
+      break;
+   case nir_op_fmax:
+      name = "__fmax64";
+      break;
+   case nir_op_fadd:
+      name = "__fadd64";
+      break;
+   case nir_op_fmul:
+      name = "__fmul64";
+      break;
+   case nir_op_ffma:
+      name = "__ffma64";
+      break;
+   default:
+      return false;
+   }
+
+   nir_shader *shader = b->shader;
+   nir_function *func = NULL;
+
+   nir_foreach_function(function, shader) {
+      if (strcmp(function->name, name) == 0) {
+         func = function;
+         break;
+      }
+   }
+   if (!func) {
+      fprintf(stderr, "Cannot find function \"%s\"\n", name);
+      assert(func);
+   }
+
+   b->cursor = nir_before_instr(&instr->instr);
+
+   nir_call_instr *call = nir_call_instr_create(shader, func);
+
+   nir_variable *ret_tmp =
+      nir_local_variable_create(b->impl, return_type, "return_tmp");
+   nir_deref_instr *ret_deref = nir_build_deref_var(b, ret_tmp);
+   call->params[0] = nir_src_for_ssa(&ret_deref->dest.ssa);
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      nir_src arg = nir_src_for_ssa(nir_imov_alu(b, instr->src[i], 1));
+      nir_src_copy(&call->params[i + 1], &arg, call);
+   }
+
+   nir_builder_instr_insert(b, &call->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(nir_load_deref(b, ret_deref)));
+   nir_instr_remove(&instr->instr);
+   return true;
+}
+
+static bool
+lower_doubles_instr(nir_builder *b, nir_alu_instr *instr,
+                    nir_lower_doubles_options options)
 {
    assert(instr->dest.dest.is_ssa);
-   if (instr->dest.dest.ssa.bit_size != 64)
+   bool is_64 = instr->dest.dest.ssa.bit_size == 64;
+
+   unsigned num_srcs = nir_op_infos[instr->op].num_inputs;
+   for (unsigned i = 0; i < num_srcs; i++) {
+      is_64 |= (nir_src_bit_size(instr->src[i].src) == 64);
+   }
+
+   if (!is_64)
       return false;
 
+   if (lower_doubles_instr_to_soft(b, instr, options))
+      return true;
+
    switch (instr->op) {
    case nir_op_frcp:
       if (!(options & nir_lower_drcp))
@@ -513,45 +657,43 @@
       return false;
    }
 
-   nir_builder bld;
-   nir_builder_init(&bld, nir_cf_node_get_function(&instr->instr.block->cf_node));
-   bld.cursor = nir_before_instr(&instr->instr);
+   b->cursor = nir_before_instr(&instr->instr);
 
-   nir_ssa_def *src = nir_fmov_alu(&bld, instr->src[0],
+   nir_ssa_def *src = nir_fmov_alu(b, instr->src[0],
                                    instr->dest.dest.ssa.num_components);
 
    nir_ssa_def *result;
 
    switch (instr->op) {
    case nir_op_frcp:
-      result = lower_rcp(&bld, src);
+      result = lower_rcp(b, src);
       break;
    case nir_op_fsqrt:
-      result = lower_sqrt_rsq(&bld, src, true);
+      result = lower_sqrt_rsq(b, src, true);
       break;
    case nir_op_frsq:
-      result = lower_sqrt_rsq(&bld, src, false);
+      result = lower_sqrt_rsq(b, src, false);
       break;
    case nir_op_ftrunc:
-      result = lower_trunc(&bld, src);
+      result = lower_trunc(b, src);
       break;
    case nir_op_ffloor:
-      result = lower_floor(&bld, src);
+      result = lower_floor(b, src);
       break;
    case nir_op_fceil:
-      result = lower_ceil(&bld, src);
+      result = lower_ceil(b, src);
       break;
    case nir_op_ffract:
-      result = lower_fract(&bld, src);
+      result = lower_fract(b, src);
       break;
    case nir_op_fround_even:
-      result = lower_round_even(&bld, src);
+      result = lower_round_even(b, src);
       break;
 
    case nir_op_fmod: {
-      nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1],
-                                      instr->dest.dest.ssa.num_components);
-      result = lower_mod(&bld, src, src1);
+      nir_ssa_def *src1 = nir_fmov_alu(b, instr->src[1],
+                                       instr->dest.dest.ssa.num_components);
+      result = lower_mod(b, src, src1);
    }
       break;
    default:
@@ -569,17 +711,25 @@
 {
    bool progress = false;
 
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
    nir_foreach_block(block, impl) {
       nir_foreach_instr_safe(instr, block) {
          if (instr->type == nir_instr_type_alu)
-            progress |= lower_doubles_instr(nir_instr_as_alu(instr),
+            progress |= lower_doubles_instr(&b, nir_instr_as_alu(instr),
                                             options);
       }
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+    } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+    }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_global_vars_to_local.c mesa-19.0.1/src/compiler/nir/nir_lower_global_vars_to_local.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_global_vars_to_local.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_global_vars_to_local.c	2019-03-31 23:16:37.000000000 +0000
@@ -36,7 +36,7 @@
 register_var_use(nir_variable *var, nir_function_impl *impl,
                  struct hash_table *var_func_table)
 {
-   if (var->data.mode != nir_var_global)
+   if (var->data.mode != nir_var_shader_temp)
       return;
 
    struct hash_entry *entry =
@@ -74,9 +74,7 @@
     * nir_function_impl that uses the given variable.  If a variable is
     * used in multiple functions, the data for the given key will be NULL.
     */
-   struct hash_table *var_func_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *var_func_table = _mesa_pointer_hash_table_create(NULL);
 
    nir_foreach_function(function, shader) {
       if (function->impl) {
@@ -89,11 +87,11 @@
       nir_variable *var = (void *)entry->key;
       nir_function_impl *impl = entry->data;
 
-      assert(var->data.mode == nir_var_global);
+      assert(var->data.mode == nir_var_shader_temp);
 
       if (impl != NULL) {
          exec_node_remove(&var->node);
-         var->data.mode = nir_var_local;
+         var->data.mode = nir_var_function_temp;
          exec_list_push_tail(&impl->locals, &var->node);
          nir_metadata_preserve(impl, nir_metadata_block_index |
                                      nir_metadata_dominance |
@@ -107,5 +105,13 @@
    if (progress)
       nir_fixup_deref_modes(shader);
 
+#ifndef NDEBUG
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+      }
+   }
+#endif
+
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_idiv.c mesa-19.0.1/src/compiler/nir/nir_lower_idiv.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_idiv.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_idiv.c	2019-03-31 23:16:37.000000000 +0000
@@ -95,13 +95,13 @@
    r = nir_isub(bld, a, r);
 
    r = nir_uge(bld, r, b);
-   r = nir_b2i(bld, r);
+   r = nir_b2i32(bld, r);
 
    q = nir_iadd(bld, q, r);
    if (is_signed)  {
       /* fix the sign: */
       r = nir_ixor(bld, numer, denom);
-      r = nir_ishr(bld, r, nir_imm_int(bld, 31));
+      r = nir_ilt(bld, r, nir_imm_int(bld, 0));
       b = nir_ineg(bld, q);
       q = nir_bcsel(bld, r, b, q);
    }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_indirect_derefs.c mesa-19.0.1/src/compiler/nir/nir_lower_indirect_derefs.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_indirect_derefs.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_indirect_derefs.c	2019-03-31 23:16:37.000000000 +0000
@@ -135,7 +135,7 @@
       /* Walk the deref chain back to the base and look for indirects */
       bool has_indirect = false;
       nir_deref_instr *base = deref;
-      while (base->deref_type != nir_deref_type_var) {
+      while (base && base->deref_type != nir_deref_type_var) {
          if (base->deref_type == nir_deref_type_array &&
              !nir_src_is_const(base->arr.index))
             has_indirect = true;
@@ -143,7 +143,7 @@
          base = nir_deref_instr_parent(base);
       }
 
-      if (!has_indirect)
+      if (!has_indirect || !base)
          continue;
 
       /* Only lower variables whose mode is in the mask, or compact
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_int64.c mesa-19.0.1/src/compiler/nir/nir_lower_int64.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_int64.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_int64.c	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,365 @@
 #include "nir_builder.h"
 
 static nir_ssa_def *
+lower_b2i64(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0));
+}
+
+static nir_ssa_def *
+lower_i2b(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x),
+                                nir_unpack_64_2x32_split_y(b, x)),
+                     nir_imm_int(b, 0));
+}
+
+static nir_ssa_def *
+lower_i2i8(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x));
+}
+
+static nir_ssa_def *
+lower_i2i16(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_i2i16(b, nir_unpack_64_2x32_split_x(b, x));
+}
+
+
+static nir_ssa_def *
+lower_i2i32(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_unpack_64_2x32_split_x(b, x);
+}
+
+static nir_ssa_def *
+lower_i2i64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_i2i32(b, x);
+   return nir_pack_64_2x32_split(b, x32, nir_ishr(b, x32, nir_imm_int(b, 31)));
+}
+
+static nir_ssa_def *
+lower_u2u8(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_u2u8(b, nir_unpack_64_2x32_split_x(b, x));
+}
+
+static nir_ssa_def *
+lower_u2u16(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_u2u16(b, nir_unpack_64_2x32_split_x(b, x));
+}
+
+static nir_ssa_def *
+lower_u2u32(nir_builder *b, nir_ssa_def *x)
+{
+   return nir_unpack_64_2x32_split_x(b, x);
+}
+
+static nir_ssa_def *
+lower_u2u64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_u2u32(b, x);
+   return nir_pack_64_2x32_split(b, x32, nir_imm_int(b, 0));
+}
+
+static nir_ssa_def *
+lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
+                                    nir_bcsel(b, cond, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_inot64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
+                                    nir_iand(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
+                                    nir_ior(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
+                                    nir_ixor(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t lshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x), hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo << c;
+    *       uint32_t hi_shifted = hi << c;
+    *       uint32_t lo_shifted_hi = lo >> abs(32 - c);
+    *       return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
+    *    } else {
+    *       uint32_t lo_shifted_hi = lo << abs(32 - c);
+    *       return pack_64(0, lo_shifted_hi);
+    *    }
+    * }
+    */
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
+   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, lo_shifted,
+                                nir_ior(b, hi_shifted, lo_shifted_hi));
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
+                                nir_ishl(b, x_lo, reverse_count));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t arshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x);
+    *    int32_t  hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo >> c;
+    *       uint32_t hi_shifted = hi >> c;
+    *       uint32_t hi_shifted_lo = hi << abs(32 - c);
+    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+    *    } else {
+    *       uint32_t hi_shifted = hi >> 31;
+    *       uint32_t hi_shifted_lo = hi >> abs(32 - c);
+    *       return pack_64(hi_shifted, hi_shifted_lo);
+    *    }
+    * }
+    */
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
+   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
+                                hi_shifted);
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
+                                nir_ishr(b, x_hi, nir_imm_int(b, 31)));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t rshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x), hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo >> c;
+    *       uint32_t hi_shifted = hi >> c;
+    *       uint32_t hi_shifted_lo = hi << abs(32 - c);
+    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+    *    } else {
+    *       uint32_t hi_shifted_lo = hi >> abs(32 - c);
+    *       return pack_64(0, hi_shifted_lo);
+    *    }
+    * }
+    */
+
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
+   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
+                                hi_shifted);
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
+                                nir_imm_int(b, 0));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
+   nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, x_lo));
+   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
+   nir_ssa_def *borrow = nir_ineg(b, nir_b2i32(b, nir_ult(b, x_lo, y_lo)));
+   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_ineg64(nir_builder *b, nir_ssa_def *x)
+{
+   /* Since isub is the same number of instructions (with better dependencies)
+    * as iadd, subtraction is actually more efficient for ineg than the usual
+    * 2's complement "flip the bits and add one".
+    */
+   return lower_isub64(b, nir_imm_int64(b, 0), x);
+}
+
+static nir_ssa_def *
+lower_iabs64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
+   return nir_bcsel(b, x_is_neg, nir_ineg(b, x), x);
+}
+
+static nir_ssa_def *
+lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   switch (op) {
+   case nir_op_ieq:
+      return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
+   case nir_op_ine:
+      return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
+   case nir_op_ult:
+      return nir_ior(b, nir_ult(b, x_hi, y_hi),
+                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
+                                    nir_ult(b, x_lo, y_lo)));
+   case nir_op_ilt:
+      return nir_ior(b, nir_ilt(b, x_hi, y_hi),
+                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
+                                    nir_ult(b, x_lo, y_lo)));
+      break;
+   case nir_op_uge:
+      /* Lower as !(x < y) in the hopes of better CSE */
+      return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
+   case nir_op_ige:
+      /* Lower as !(x < y) in the hopes of better CSE */
+      return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
+   default:
+      unreachable("Invalid comparison");
+   }
+}
+
+static nir_ssa_def *
+lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
+}
+
+static nir_ssa_def *
+lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
+}
+
+static nir_ssa_def *
+lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
+}
+
+static nir_ssa_def *
+lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
+}
+
+static nir_ssa_def *
 lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
 {
    nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
@@ -41,6 +400,64 @@
 }
 
 static nir_ssa_def *
+lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
+                 bool sign_extend)
+{
+   nir_ssa_def *x32[4], *y32[4];
+   x32[0] = nir_unpack_64_2x32_split_x(b, x);
+   x32[1] = nir_unpack_64_2x32_split_y(b, x);
+   if (sign_extend) {
+      x32[2] = x32[3] = nir_ishr(b, x32[1], nir_imm_int(b, 31));
+   } else {
+      x32[2] = x32[3] = nir_imm_int(b, 0);
+   }
+
+   y32[0] = nir_unpack_64_2x32_split_x(b, y);
+   y32[1] = nir_unpack_64_2x32_split_y(b, y);
+   if (sign_extend) {
+      y32[2] = y32[3] = nir_ishr(b, y32[1], nir_imm_int(b, 31));
+   } else {
+      y32[2] = y32[3] = nir_imm_int(b, 0);
+   }
+
+   nir_ssa_def *res[8] = { NULL, };
+
+   /* Yes, the following generates a pile of code.  However, we throw res[0]
+    * and res[1] away in the end and, if we're in the umul case, four of our
+    * eight dword operands will be constant zero and opt_algebraic will clean
+    * this up nicely.
+    */
+   for (unsigned i = 0; i < 4; i++) {
+      nir_ssa_def *carry = NULL;
+      for (unsigned j = 0; j < 4; j++) {
+         /* The maximum values of x32[i] and y32[i] are UINT32_MAX so the
+          * maximum value of tmp is UINT32_MAX * UINT32_MAX.  The maximum
+          * value that will fit in tmp is
+          *
+          *    UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
+          *               = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
+          *               = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
+          *
+          * so we're guaranteed that we can add in two more 32-bit values
+          * without overflowing tmp.
+          */
+         nir_ssa_def *tmp =
+            nir_pack_64_2x32_split(b, nir_imul(b, x32[i], y32[j]),
+                                      nir_umul_high(b, x32[i], y32[j]));
+         if (res[i + j])
+            tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
+         if (carry)
+            tmp = nir_iadd(b, tmp, carry);
+         res[i + j] = nir_u2u32(b, tmp);
+         carry = nir_ushr(b, tmp, nir_imm_int(b, 32));
+      }
+      res[i + 4] = nir_u2u32(b, carry);
+   }
+
+   return nir_pack_64_2x32_split(b, res[2], res[3]);
+}
+
+static nir_ssa_def *
 lower_isign64(nir_builder *b, nir_ssa_def *x)
 {
    nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
@@ -48,7 +465,7 @@
 
    nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
    nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
-   nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i(b, is_non_zero));
+   nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero));
 
    return nir_pack_64_2x32_split(b, res_lo, res_hi);
 }
@@ -209,6 +626,9 @@
    switch (opcode) {
    case nir_op_imul:
       return nir_lower_imul64;
+   case nir_op_imul_high:
+   case nir_op_umul_high:
+      return nir_lower_imul_high64;
    case nir_op_isign:
       return nir_lower_isign64;
    case nir_op_udiv:
@@ -217,6 +637,42 @@
    case nir_op_imod:
    case nir_op_irem:
       return nir_lower_divmod64;
+   case nir_op_b2i64:
+   case nir_op_i2b1:
+   case nir_op_i2i32:
+   case nir_op_i2i64:
+   case nir_op_u2u32:
+   case nir_op_u2u64:
+   case nir_op_bcsel:
+      return nir_lower_mov64;
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_ult:
+   case nir_op_ilt:
+   case nir_op_uge:
+   case nir_op_ige:
+      return nir_lower_icmp64;
+   case nir_op_iadd:
+   case nir_op_isub:
+      return nir_lower_iadd64;
+   case nir_op_imin:
+   case nir_op_imax:
+   case nir_op_umin:
+   case nir_op_umax:
+      return nir_lower_minmax64;
+   case nir_op_iabs:
+      return nir_lower_iabs64;
+   case nir_op_ineg:
+      return nir_lower_ineg64;
+   case nir_op_iand:
+   case nir_op_ior:
+   case nir_op_ixor:
+   case nir_op_inot:
+      return nir_lower_logic64;
+   case nir_op_ishl:
+   case nir_op_ishr:
+   case nir_op_ushr:
+      return nir_lower_shift64;
    default:
       return 0;
    }
@@ -232,6 +688,10 @@
    switch (alu->op) {
    case nir_op_imul:
       return lower_imul64(b, src[0], src[1]);
+   case nir_op_imul_high:
+      return lower_mul_high64(b, src[0], src[1], true);
+   case nir_op_umul_high:
+      return lower_mul_high64(b, src[0], src[1], false);
    case nir_op_isign:
       return lower_isign64(b, src[0]);
    case nir_op_udiv:
@@ -244,6 +704,65 @@
       return lower_imod64(b, src[0], src[1]);
    case nir_op_irem:
       return lower_irem64(b, src[0], src[1]);
+   case nir_op_b2i64:
+      return lower_b2i64(b, src[0]);
+   case nir_op_i2b1:
+      return lower_i2b(b, src[0]);
+   case nir_op_i2i8:
+      return lower_i2i8(b, src[0]);
+   case nir_op_i2i16:
+      return lower_i2i16(b, src[0]);
+   case nir_op_i2i32:
+      return lower_i2i32(b, src[0]);
+   case nir_op_i2i64:
+      return lower_i2i64(b, src[0]);
+   case nir_op_u2u8:
+      return lower_u2u8(b, src[0]);
+   case nir_op_u2u16:
+      return lower_u2u16(b, src[0]);
+   case nir_op_u2u32:
+      return lower_u2u32(b, src[0]);
+   case nir_op_u2u64:
+      return lower_u2u64(b, src[0]);
+   case nir_op_bcsel:
+      return lower_bcsel64(b, src[0], src[1], src[2]);
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_ult:
+   case nir_op_ilt:
+   case nir_op_uge:
+   case nir_op_ige:
+      return lower_int64_compare(b, alu->op, src[0], src[1]);
+   case nir_op_iadd:
+      return lower_iadd64(b, src[0], src[1]);
+   case nir_op_isub:
+      return lower_isub64(b, src[0], src[1]);
+   case nir_op_imin:
+      return lower_imin64(b, src[0], src[1]);
+   case nir_op_imax:
+      return lower_imax64(b, src[0], src[1]);
+   case nir_op_umin:
+      return lower_umin64(b, src[0], src[1]);
+   case nir_op_umax:
+      return lower_umax64(b, src[0], src[1]);
+   case nir_op_iabs:
+      return lower_iabs64(b, src[0]);
+   case nir_op_ineg:
+      return lower_ineg64(b, src[0]);
+   case nir_op_iand:
+      return lower_iand64(b, src[0], src[1]);
+   case nir_op_ior:
+      return lower_ior64(b, src[0], src[1]);
+   case nir_op_ixor:
+      return lower_ixor64(b, src[0], src[1]);
+   case nir_op_inot:
+      return lower_inot64(b, src[0]);
+   case nir_op_ishl:
+      return lower_ishl64(b, src[0], src[1]);
+   case nir_op_ishr:
+      return lower_ishr64(b, src[0], src[1]);
+   case nir_op_ushr:
+      return lower_ushr64(b, src[0], src[1]);
    default:
       unreachable("Invalid ALU opcode to lower");
    }
@@ -262,9 +781,41 @@
             continue;
 
          nir_alu_instr *alu = nir_instr_as_alu(instr);
-         assert(alu->dest.dest.is_ssa);
-         if (alu->dest.dest.ssa.bit_size != 64)
-            continue;
+         switch (alu->op) {
+         case nir_op_i2b1:
+         case nir_op_i2i32:
+         case nir_op_u2u32:
+            assert(alu->src[0].src.is_ssa);
+            if (alu->src[0].src.ssa->bit_size != 64)
+               continue;
+            break;
+         case nir_op_bcsel:
+            assert(alu->src[1].src.is_ssa);
+            assert(alu->src[2].src.is_ssa);
+            assert(alu->src[1].src.ssa->bit_size ==
+                   alu->src[2].src.ssa->bit_size);
+            if (alu->src[1].src.ssa->bit_size != 64)
+               continue;
+            break;
+         case nir_op_ieq:
+         case nir_op_ine:
+         case nir_op_ult:
+         case nir_op_ilt:
+         case nir_op_uge:
+         case nir_op_ige:
+            assert(alu->src[0].src.is_ssa);
+            assert(alu->src[1].src.is_ssa);
+            assert(alu->src[0].src.ssa->bit_size ==
+                   alu->src[1].src.ssa->bit_size);
+            if (alu->src[0].src.ssa->bit_size != 64)
+               continue;
+            break;
+         default:
+            assert(alu->dest.dest.is_ssa);
+            if (alu->dest.dest.ssa.bit_size != 64)
+               continue;
+            break;
+         }
 
          if (!(options & opcode_to_options_mask(alu->op)))
             continue;
@@ -279,8 +830,13 @@
       }
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_none);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_io_arrays_to_elements.c mesa-19.0.1/src/compiler/nir/nir_lower_io_arrays_to_elements.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_io_arrays_to_elements.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_io_arrays_to_elements.c	2019-03-31 23:16:37.000000000 +0000
@@ -34,7 +34,8 @@
 
 static unsigned
 get_io_offset(nir_builder *b, nir_deref_instr *deref, nir_variable *var,
-              unsigned *element_index, nir_ssa_def **vertex_index)
+              unsigned *element_index, unsigned *xfb_offset,
+              nir_ssa_def **vertex_index)
 {
    nir_deref_path path;
    nir_deref_path_init(&path, deref, NULL);
@@ -51,6 +52,7 @@
    }
 
    unsigned offset = 0;
+   *xfb_offset = 0;
    for (; *p; p++) {
       if ((*p)->deref_type == nir_deref_type_array) {
          /* must not be indirect dereference */
@@ -59,6 +61,8 @@
          unsigned size = glsl_count_attribute_slots((*p)->type, false);
          offset += size * index;
 
+         xfb_offset += index * glsl_get_component_slots((*p)->type) * 4;
+
          unsigned num_elements = glsl_type_is_array((*p)->type) ?
             glsl_get_aoa_size((*p)->type) : 1;
 
@@ -116,25 +120,28 @@
 
    nir_ssa_def *vertex_index = NULL;
    unsigned elements_index = 0;
+   unsigned xfb_offset = 0;
    unsigned io_offset = get_io_offset(b, nir_src_as_deref(intr->src[0]),
-                                      var, &elements_index, &vertex_index);
+                                      var, &elements_index, &xfb_offset,
+                                      &vertex_index);
 
    nir_variable *element = elements[elements_index];
    if (!element) {
          element = nir_variable_clone(var, b->shader);
          element->data.location =  var->data.location + io_offset;
 
+         if (var->data.explicit_offset)
+            element->data.offset = var->data.offset + xfb_offset;
+
          const struct glsl_type *type = glsl_without_array(element->type);
 
          /* This pass also splits matrices so we need give them a new type. */
-         if (glsl_type_is_matrix(type)) {
-            type = glsl_vector_type(glsl_get_base_type(type),
-                                    glsl_get_vector_elements(type));
-         }
+         if (glsl_type_is_matrix(type))
+            type = glsl_get_column_type(type);
 
          if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
-            type = glsl_get_array_instance(type,
-                                           glsl_get_length(element->type));
+            type = glsl_array_type(type, glsl_get_length(element->type),
+                                   glsl_get_explicit_stride(element->type));
          }
 
          element->type = type;
@@ -229,11 +236,11 @@
                   continue;
 
                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
-               nir_variable *var = nir_deref_instr_get_variable(deref);
-
-               if (var->data.mode != mode)
+               if (deref->mode != mode)
                   continue;
 
+               nir_variable *var = nir_deref_instr_get_variable(deref);
+
                nir_deref_path path;
                nir_deref_path_init(&path, deref, NULL);
 
@@ -278,8 +285,11 @@
                    intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
                   continue;
 
-               nir_variable *var =
-                  nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+               nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+               if (!(deref->mode & mask))
+                  continue;
+
+               nir_variable *var = nir_deref_instr_get_variable(deref);
 
                /* Skip indirects */
                uint64_t loc_mask = ((uint64_t)1) << var->data.location;
@@ -342,12 +352,8 @@
 nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
                                              bool outputs_only)
 {
-   struct hash_table *split_inputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-   struct hash_table *split_outputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL);
+   struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL);
 
    uint64_t indirects[4] = {0}, patch_indirects[4] = {0};
 
@@ -384,12 +390,8 @@
 void
 nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer)
 {
-   struct hash_table *split_inputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-   struct hash_table *split_outputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL);
+   struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL);
 
    uint64_t indirects[4] = {0}, patch_indirects[4] = {0};
    create_indirects_mask(producer, indirects, patch_indirects,
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_io.c mesa-19.0.1/src/compiler/nir/nir_lower_io.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_io.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_io.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,6 +43,56 @@
    nir_lower_io_options options;
 };
 
+static nir_intrinsic_op
+ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
+{
+   switch (deref_op) {
+#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
+   OP(atomic_exchange)
+   OP(atomic_comp_swap)
+   OP(atomic_add)
+   OP(atomic_imin)
+   OP(atomic_umin)
+   OP(atomic_imax)
+   OP(atomic_umax)
+   OP(atomic_and)
+   OP(atomic_or)
+   OP(atomic_xor)
+   OP(atomic_fadd)
+   OP(atomic_fmin)
+   OP(atomic_fmax)
+   OP(atomic_fcomp_swap)
+#undef OP
+   default:
+      unreachable("Invalid SSBO atomic");
+   }
+}
+
+static nir_intrinsic_op
+global_atomic_for_deref(nir_intrinsic_op deref_op)
+{
+   switch (deref_op) {
+#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
+   OP(atomic_exchange)
+   OP(atomic_comp_swap)
+   OP(atomic_add)
+   OP(atomic_imin)
+   OP(atomic_umin)
+   OP(atomic_imax)
+   OP(atomic_umax)
+   OP(atomic_and)
+   OP(atomic_or)
+   OP(atomic_xor)
+   OP(atomic_fadd)
+   OP(atomic_fmin)
+   OP(atomic_fmax)
+   OP(atomic_fcomp_swap)
+#undef OP
+   default:
+      unreachable("Invalid SSBO atomic");
+   }
+}
+
 void
 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                          int (*type_size)(const struct glsl_type *))
@@ -54,8 +104,7 @@
        * UBOs have their own address spaces, so don't count them towards the
        * number of global uniforms
        */
-      if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
-          var->interface_type != NULL)
+      if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo)
          continue;
 
       var->data.driver_location = location;
@@ -127,8 +176,7 @@
          unsigned size = type_size((*p)->type);
 
          nir_ssa_def *mul =
-            nir_imul(b, nir_imm_int(b, size),
-                     nir_ssa_for_src(b, (*p)->arr.index, 1));
+            nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
 
          offset = nir_iadd(b, offset, mul);
       } else if ((*p)->deref_type == nir_deref_type_struct) {
@@ -139,7 +187,7 @@
          for (unsigned i = 0; i < (*p)->strct.index; i++) {
             field_offset += type_size(glsl_get_struct_field(parent->type, i));
          }
-         offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
+         offset = nir_iadd_imm(b, offset, field_offset);
       } else {
          unreachable("Unsupported deref type");
       }
@@ -191,7 +239,7 @@
    case nir_var_uniform:
       op = nir_intrinsic_load_uniform;
       break;
-   case nir_var_shared:
+   case nir_var_mem_shared:
       op = nir_intrinsic_load_shared;
       break;
    default:
@@ -230,7 +278,7 @@
    nir_variable_mode mode = var->data.mode;
 
    nir_intrinsic_op op;
-   if (mode == nir_var_shared) {
+   if (mode == nir_var_mem_shared) {
       op = nir_intrinsic_store_shared;
    } else {
       assert(mode == nir_var_shader_out);
@@ -263,7 +311,7 @@
 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
              nir_variable *var, nir_ssa_def *offset)
 {
-   assert(var->data.mode == nir_var_shared);
+   assert(var->data.mode == nir_var_mem_shared);
 
    nir_intrinsic_op op;
    switch (intrin->intrinsic) {
@@ -409,7 +457,7 @@
 
       if (mode != nir_var_shader_in &&
           mode != nir_var_shader_out &&
-          mode != nir_var_shared &&
+          mode != nir_var_mem_shared &&
           mode != nir_var_uniform)
          continue;
 
@@ -530,6 +578,456 @@
    return progress;
 }
 
+static unsigned
+type_scalar_size_bytes(const struct glsl_type *type)
+{
+   assert(glsl_type_is_vector_or_scalar(type) ||
+          glsl_type_is_matrix(type));
+   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+}
+
+static nir_ssa_def *
+build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
+                nir_address_format addr_format, nir_ssa_def *offset)
+{
+   assert(offset->num_components == 1);
+   assert(addr->bit_size == offset->bit_size);
+
+   switch (addr_format) {
+   case nir_address_format_32bit_global:
+   case nir_address_format_64bit_global:
+      assert(addr->num_components == 1);
+      return nir_iadd(b, addr, offset);
+
+   case nir_address_format_vk_index_offset:
+      assert(addr->num_components == 2);
+      return nir_vec2(b, nir_channel(b, addr, 0),
+                         nir_iadd(b, nir_channel(b, addr, 1), offset));
+   }
+   unreachable("Invalid address format");
+}
+
+static nir_ssa_def *
+build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
+                    nir_address_format addr_format, int64_t offset)
+{
+   return build_addr_iadd(b, addr, addr_format,
+                             nir_imm_intN_t(b, offset, addr->bit_size));
+}
+
+static nir_ssa_def *
+addr_to_index(nir_builder *b, nir_ssa_def *addr,
+              nir_address_format addr_format)
+{
+   assert(addr_format == nir_address_format_vk_index_offset);
+   assert(addr->num_components == 2);
+   return nir_channel(b, addr, 0);
+}
+
+static nir_ssa_def *
+addr_to_offset(nir_builder *b, nir_ssa_def *addr,
+               nir_address_format addr_format)
+{
+   assert(addr_format == nir_address_format_vk_index_offset);
+   assert(addr->num_components == 2);
+   return nir_channel(b, addr, 1);
+}
+
+/** Returns true if the given address format resolves to a global address */
+static bool
+addr_format_is_global(nir_address_format addr_format)
+{
+   return addr_format == nir_address_format_32bit_global ||
+          addr_format == nir_address_format_64bit_global;
+}
+
+static nir_ssa_def *
+addr_to_global(nir_builder *b, nir_ssa_def *addr,
+               nir_address_format addr_format)
+{
+   switch (addr_format) {
+   case nir_address_format_32bit_global:
+   case nir_address_format_64bit_global:
+      assert(addr->num_components == 1);
+      return addr;
+
+   case nir_address_format_vk_index_offset:
+      unreachable("Cannot get a 64-bit address with this address format");
+   }
+
+   unreachable("Invalid address format");
+}
+
+static nir_ssa_def *
+build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
+                       nir_ssa_def *addr, nir_address_format addr_format,
+                       unsigned num_components)
+{
+   nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
+
+   nir_intrinsic_op op;
+   switch (mode) {
+   case nir_var_mem_ubo:
+      op = nir_intrinsic_load_ubo;
+      break;
+   case nir_var_mem_ssbo:
+      if (addr_format_is_global(addr_format))
+         op = nir_intrinsic_load_global;
+      else
+         op = nir_intrinsic_load_ssbo;
+      break;
+   case nir_var_mem_global:
+      assert(addr_format_is_global(addr_format));
+      op = nir_intrinsic_load_global;
+      break;
+   default:
+      unreachable("Unsupported explicit IO variable mode");
+   }
+
+   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
+
+   if (addr_format_is_global(addr_format)) {
+      assert(op == nir_intrinsic_load_global);
+      load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else {
+      load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
+      load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
+   }
+
+   if (mode != nir_var_mem_ubo)
+      nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
+
+   /* TODO: We should try and provide a better alignment.  For OpenCL, we need
+    * to plumb the alignment through from SPIR-V when we have one.
+    */
+   nir_intrinsic_set_align(load, intrin->dest.ssa.bit_size / 8, 0);
+
+   assert(intrin->dest.is_ssa);
+   load->num_components = num_components;
+   nir_ssa_dest_init(&load->instr, &load->dest, num_components,
+                     intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
+   nir_builder_instr_insert(b, &load->instr);
+
+   return &load->dest.ssa;
+}
+
+static void
+build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
+                        nir_ssa_def *addr, nir_address_format addr_format,
+                        nir_ssa_def *value, nir_component_mask_t write_mask)
+{
+   nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
+
+   nir_intrinsic_op op;
+   switch (mode) {
+   case nir_var_mem_ssbo:
+      if (addr_format_is_global(addr_format))
+         op = nir_intrinsic_store_global;
+      else
+         op = nir_intrinsic_store_ssbo;
+      break;
+   case nir_var_mem_global:
+      assert(addr_format_is_global(addr_format));
+      op = nir_intrinsic_store_global;
+      break;
+   default:
+      unreachable("Unsupported explicit IO variable mode");
+   }
+
+   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
+
+   store->src[0] = nir_src_for_ssa(value);
+   if (addr_format_is_global(addr_format)) {
+      store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else {
+      store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
+      store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
+   }
+
+   nir_intrinsic_set_write_mask(store, write_mask);
+
+   nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
+
+   /* TODO: We should try and provide a better alignment.  For OpenCL, we need
+    * to plumb the alignment through from SPIR-V when we have one.
+    */
+   nir_intrinsic_set_align(store, value->bit_size / 8, 0);
+
+   assert(value->num_components == 1 ||
+          value->num_components == intrin->num_components);
+   store->num_components = value->num_components;
+   nir_builder_instr_insert(b, &store->instr);
+}
+
+static nir_ssa_def *
+build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
+                         nir_ssa_def *addr, nir_address_format addr_format)
+{
+   nir_variable_mode mode = nir_src_as_deref(intrin->src[0])->mode;
+   const unsigned num_data_srcs =
+      nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
+
+   nir_intrinsic_op op;
+   switch (mode) {
+   case nir_var_mem_ssbo:
+      if (addr_format_is_global(addr_format))
+         op = global_atomic_for_deref(intrin->intrinsic);
+      else
+         op = ssbo_atomic_for_deref(intrin->intrinsic);
+      break;
+   case nir_var_mem_global:
+      assert(addr_format_is_global(addr_format));
+      op = global_atomic_for_deref(intrin->intrinsic);
+      break;
+   default:
+      unreachable("Unsupported explicit IO variable mode");
+   }
+
+   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
+
+   unsigned src = 0;
+   if (addr_format_is_global(addr_format)) {
+      atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
+   } else {
+      atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
+      atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
+   }
+   for (unsigned i = 0; i < num_data_srcs; i++) {
+      atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
+   }
+
+   assert(intrin->dest.ssa.num_components == 1);
+   nir_ssa_dest_init(&atomic->instr, &atomic->dest,
+                     1, intrin->dest.ssa.bit_size, intrin->dest.ssa.name);
+   nir_builder_instr_insert(b, &atomic->instr);
+
+   return &atomic->dest.ssa;
+}
+
+static void
+lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
+                        nir_address_format addr_format)
+{
+   /* Just delete the deref if it's not used.  We can't use
+    * nir_deref_instr_remove_if_unused here because it may remove more than
+    * one deref which could break our list walking since we walk the list
+    * backwards.
+    */
+   assert(list_empty(&deref->dest.ssa.if_uses));
+   if (list_empty(&deref->dest.ssa.uses)) {
+      nir_instr_remove(&deref->instr);
+      return;
+   }
+
+   b->cursor = nir_after_instr(&deref->instr);
+
+   /* Var derefs must be lowered away by the driver */
+   assert(deref->deref_type != nir_deref_type_var);
+
+   assert(deref->parent.is_ssa);
+   nir_ssa_def *parent_addr = deref->parent.ssa;
+
+   nir_ssa_def *addr = NULL;
+   assert(deref->dest.is_ssa);
+   switch (deref->deref_type) {
+   case nir_deref_type_var:
+      unreachable("Must be lowered by the driver");
+      break;
+
+   case nir_deref_type_array: {
+      nir_deref_instr *parent = nir_deref_instr_parent(deref);
+
+      unsigned stride = glsl_get_explicit_stride(parent->type);
+      if ((glsl_type_is_matrix(parent->type) &&
+           glsl_matrix_type_is_row_major(parent->type)) ||
+          (glsl_type_is_vector(parent->type) && stride == 0))
+         stride = type_scalar_size_bytes(parent->type);
+
+      assert(stride > 0);
+
+      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+      index = nir_i2i(b, index, parent_addr->bit_size);
+      addr = build_addr_iadd(b, parent_addr, addr_format,
+                                nir_imul_imm(b, index, stride));
+      break;
+   }
+
+   case nir_deref_type_ptr_as_array: {
+      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+      index = nir_i2i(b, index, parent_addr->bit_size);
+      unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
+      addr = build_addr_iadd(b, parent_addr, addr_format,
+                                nir_imul_imm(b, index, stride));
+      break;
+   }
+
+   case nir_deref_type_array_wildcard:
+      unreachable("Wildcards should be lowered by now");
+      break;
+
+   case nir_deref_type_struct: {
+      nir_deref_instr *parent = nir_deref_instr_parent(deref);
+      int offset = glsl_get_struct_field_offset(parent->type,
+                                                deref->strct.index);
+      assert(offset >= 0);
+      addr = build_addr_iadd_imm(b, parent_addr, addr_format, offset);
+      break;
+   }
+
+   case nir_deref_type_cast:
+      /* Nothing to do here */
+      addr = parent_addr;
+      break;
+   }
+
+   nir_instr_remove(&deref->instr);
+   nir_ssa_def_rewrite_uses(&deref->dest.ssa, nir_src_for_ssa(addr));
+}
+
+static void
+lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
+                         nir_address_format addr_format)
+{
+   b->cursor = nir_after_instr(&intrin->instr);
+
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   unsigned vec_stride = glsl_get_explicit_stride(deref->type);
+   unsigned scalar_size = type_scalar_size_bytes(deref->type);
+   assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
+   assert(vec_stride == 0 || vec_stride >= scalar_size);
+
+   nir_ssa_def *addr = &deref->dest.ssa;
+   if (intrin->intrinsic == nir_intrinsic_load_deref) {
+      nir_ssa_def *value;
+      if (vec_stride > scalar_size) {
+         nir_ssa_def *comps[4] = { NULL, };
+         for (unsigned i = 0; i < intrin->num_components; i++) {
+            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
+                                                         vec_stride * i);
+            comps[i] = build_explicit_io_load(b, intrin, comp_addr,
+                                              addr_format, 1);
+         }
+         value = nir_vec(b, comps, intrin->num_components);
+      } else {
+         value = build_explicit_io_load(b, intrin, addr, addr_format,
+                                        intrin->num_components);
+      }
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
+   } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
+      assert(intrin->src[1].is_ssa);
+      nir_ssa_def *value = intrin->src[1].ssa;
+      nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
+      if (vec_stride > scalar_size) {
+         for (unsigned i = 0; i < intrin->num_components; i++) {
+            if (!(write_mask & (1 << i)))
+               continue;
+
+            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
+                                                         vec_stride * i);
+            build_explicit_io_store(b, intrin, comp_addr, addr_format,
+                                    nir_channel(b, value, i), 1);
+         }
+      } else {
+         build_explicit_io_store(b, intrin, addr, addr_format,
+                                 value, write_mask);
+      }
+   } else {
+      nir_ssa_def *value =
+         build_explicit_io_atomic(b, intrin, addr, addr_format);
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(value));
+   }
+
+   nir_instr_remove(&intrin->instr);
+}
+
+static bool
+nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
+                           nir_address_format addr_format)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   /* Walk in reverse order so that we can see the full deref chain when we
+    * lower the access operations.  We lower them assuming that the derefs
+    * will be turned into address calculations later.
+    */
+   nir_foreach_block_reverse(block, impl) {
+      nir_foreach_instr_reverse_safe(instr, block) {
+         switch (instr->type) {
+         case nir_instr_type_deref: {
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+            if (deref->mode & modes) {
+               lower_explicit_io_deref(&b, deref, addr_format);
+               progress = true;
+            }
+            break;
+         }
+
+         case nir_instr_type_intrinsic: {
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            switch (intrin->intrinsic) {
+            case nir_intrinsic_load_deref:
+            case nir_intrinsic_store_deref:
+            case nir_intrinsic_deref_atomic_add:
+            case nir_intrinsic_deref_atomic_imin:
+            case nir_intrinsic_deref_atomic_umin:
+            case nir_intrinsic_deref_atomic_imax:
+            case nir_intrinsic_deref_atomic_umax:
+            case nir_intrinsic_deref_atomic_and:
+            case nir_intrinsic_deref_atomic_or:
+            case nir_intrinsic_deref_atomic_xor:
+            case nir_intrinsic_deref_atomic_exchange:
+            case nir_intrinsic_deref_atomic_comp_swap:
+            case nir_intrinsic_deref_atomic_fadd:
+            case nir_intrinsic_deref_atomic_fmin:
+            case nir_intrinsic_deref_atomic_fmax:
+            case nir_intrinsic_deref_atomic_fcomp_swap: {
+               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+               if (deref->mode & modes) {
+                  lower_explicit_io_access(&b, intrin, addr_format);
+                  progress = true;
+               }
+               break;
+            }
+
+            default:
+               break;
+            }
+            break;
+         }
+
+         default:
+            /* Nothing to do */
+            break;
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
+                      nir_address_format addr_format)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl &&
+          nir_lower_explicit_io_impl(function->impl, modes, addr_format))
+         progress = true;
+   }
+
+   return progress;
+}
+
 /**
  * Return the offset source for a load/store intrinsic.
  */
@@ -539,7 +1037,9 @@
    switch (instr->intrinsic) {
    case nir_intrinsic_load_input:
    case nir_intrinsic_load_output:
+   case nir_intrinsic_load_shared:
    case nir_intrinsic_load_uniform:
+   case nir_intrinsic_load_global:
       return &instr->src[0];
    case nir_intrinsic_load_ubo:
    case nir_intrinsic_load_ssbo:
@@ -547,6 +1047,8 @@
    case nir_intrinsic_load_per_vertex_output:
    case nir_intrinsic_load_interpolated_input:
    case nir_intrinsic_store_output:
+   case nir_intrinsic_store_shared:
+   case nir_intrinsic_store_global:
       return &instr->src[1];
    case nir_intrinsic_store_ssbo:
    case nir_intrinsic_store_per_vertex_output:
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_io_to_scalar.c mesa-19.0.1/src/compiler/nir/nir_lower_io_to_scalar.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_io_to_scalar.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_io_to_scalar.c	2019-03-31 23:16:37.000000000 +0000
@@ -192,6 +192,10 @@
          chan_var = nir_variable_clone(var, b->shader);
          chan_var->data.location_frac =  var->data.location_frac + i;
          chan_var->type = glsl_channel_type(chan_var->type);
+         if (var->data.explicit_offset) {
+            unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8;
+            chan_var->data.offset = var->data.offset + i * comp_size;
+         }
 
          chan_vars[var->data.location_frac + i] = chan_var;
 
@@ -246,6 +250,10 @@
          chan_var = nir_variable_clone(var, b->shader);
          chan_var->data.location_frac =  var->data.location_frac + i;
          chan_var->type = glsl_channel_type(chan_var->type);
+         if (var->data.explicit_offset) {
+            unsigned comp_size = glsl_get_bit_size(chan_var->type) / 8;
+            chan_var->data.offset = var->data.offset + i * comp_size;
+         }
 
          chan_vars[var->data.location_frac + i] = chan_var;
 
@@ -279,12 +287,8 @@
 void
 nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask)
 {
-   struct hash_table *split_inputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-   struct hash_table *split_outputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL);
+   struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL);
 
    nir_foreach_function(function, shader) {
       if (function->impl) {
@@ -308,9 +312,12 @@
                    intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
                   continue;
 
-               nir_variable *var =
-                  nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
-               nir_variable_mode mode = var->data.mode;
+               nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+               nir_variable_mode mode = deref->mode;
+               if (!(mode & mask))
+                  continue;
+
+               nir_variable *var = nir_deref_instr_get_variable(deref);
 
                /* TODO: add patch support */
                if (var->data.patch)
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_io_to_temporaries.c mesa-19.0.1/src/compiler/nir/nir_lower_io_to_temporaries.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_io_to_temporaries.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_io_to_temporaries.c	2019-03-31 23:16:37.000000000 +0000
@@ -85,7 +85,8 @@
                continue;
 
             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-            if (intrin->intrinsic == nir_intrinsic_emit_vertex) {
+            if (intrin->intrinsic == nir_intrinsic_emit_vertex ||
+                intrin->intrinsic == nir_intrinsic_emit_vertex_with_counter) {
                b.cursor = nir_before_instr(&intrin->instr);
                emit_copies(&b, &state->shader->outputs, &state->old_outputs);
             }
@@ -134,7 +135,7 @@
    /* Give the original a new name with @<mode>-temp appended */
    const char *mode = (temp->data.mode == nir_var_shader_in) ? "in" : "out";
    temp->name = ralloc_asprintf(var, "%s@%s-temp", mode, nvar->name);
-   temp->data.mode = nir_var_global;
+   temp->data.mode = nir_var_shader_temp;
    temp->data.read_only = false;
    temp->data.fb_fetch_output = false;
    temp->data.compact = false;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_io_to_vector.c mesa-19.0.1/src/compiler/nir/nir_lower_io_to_vector.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_io_to_vector.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_io_to_vector.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,387 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+/** @file nir_lower_io_to_vector.c
+ *
+ * Merges compatible input/output variables residing in different components
+ * of the same location. It's expected that further passes such as
+ * nir_lower_io_to_temporaries will combine loads and stores of the merged
+ * variables, producing vector nir_load_input/nir_store_output instructions
+ * when all is said and done.
+ */
+
+static const struct glsl_type *
+resize_array_vec_type(const struct glsl_type *type, unsigned num_components)
+{
+   if (glsl_type_is_array(type)) {
+      const struct glsl_type *arr_elem =
+         resize_array_vec_type(glsl_get_array_element(type), num_components);
+      return glsl_array_type(arr_elem, glsl_get_length(type), 0);
+   } else {
+      assert(glsl_type_is_vector_or_scalar(type));
+      return glsl_vector_type(glsl_get_base_type(type), num_components);
+   }
+}
+
+static bool
+variable_can_rewrite(const nir_variable *var)
+{
+   /* Only touch user defined varyings as these are the only ones we split */
+   if (var->data.location < VARYING_SLOT_VAR0)
+      return false;
+
+   /* Skip complex types we don't split in the first place */
+   if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+      return false;
+
+   /* TODO: add 64/16bit support ? */
+   if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+      return false;
+
+   return true;
+}
+
+static bool
+variables_can_merge(nir_shader *shader,
+                    const nir_variable *a, const nir_variable *b)
+{
+   const struct glsl_type *a_type_tail = a->type;
+   const struct glsl_type *b_type_tail = b->type;
+
+   /* They must have the same array structure */
+   while (glsl_type_is_array(a_type_tail)) {
+      if (!glsl_type_is_array(b_type_tail))
+         return false;
+
+      if (glsl_get_length(a_type_tail) != glsl_get_length(b_type_tail))
+         return false;
+
+      a_type_tail = glsl_get_array_element(a_type_tail);
+      b_type_tail = glsl_get_array_element(b_type_tail);
+   }
+
+   if (!glsl_type_is_vector_or_scalar(a_type_tail) ||
+       !glsl_type_is_vector_or_scalar(b_type_tail))
+      return false;
+
+   if (glsl_get_base_type(a->type) != glsl_get_base_type(b->type))
+      return false;
+
+   assert(a->data.mode == b->data.mode);
+   if (shader->info.stage == MESA_SHADER_FRAGMENT &&
+       a->data.mode == nir_var_shader_in &&
+       a->data.interpolation != b->data.interpolation)
+      return false;
+
+   return true;
+}
+
+static bool
+create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
+                   nir_variable *old_vars[MAX_VARYINGS_INCL_PATCH][4],
+                   nir_variable *new_vars[MAX_VARYINGS_INCL_PATCH][4])
+{
+   if (exec_list_is_empty(io_list))
+      return false;
+
+   nir_foreach_variable(var, io_list) {
+      if (variable_can_rewrite(var)) {
+         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
+         unsigned frac = var->data.location_frac;
+         old_vars[loc][frac] = var;
+      }
+   }
+
+   bool merged_any_vars = false;
+
+   /* We don't handle combining vars of different type e.g. different array
+    * lengths.
+    */
+   for (unsigned loc = 0; loc < MAX_VARYINGS_INCL_PATCH; loc++) {
+      unsigned frac = 0;
+      while (frac < 4) {
+         nir_variable *first_var = old_vars[loc][frac];
+         if (!first_var) {
+            frac++;
+            continue;
+         }
+
+         int first = frac;
+         bool found_merge = false;
+
+         while (frac < 4) {
+            nir_variable *var = old_vars[loc][frac];
+            if (!var)
+               break;
+
+            if (var != first_var) {
+               if (!variables_can_merge(shader, first_var, var))
+                  break;
+
+               found_merge = true;
+            }
+
+            const unsigned num_components =
+               glsl_get_components(glsl_without_array(var->type));
+
+            /* We had better not have any overlapping vars */
+            for (unsigned i = 1; i < num_components; i++)
+               assert(old_vars[loc][frac + i] == NULL);
+
+            frac += num_components;
+         }
+
+         if (!found_merge)
+            continue;
+
+         merged_any_vars = true;
+
+         nir_variable *var = nir_variable_clone(old_vars[loc][first], shader);
+         var->data.location_frac = first;
+         var->type = resize_array_vec_type(var->type, frac - first);
+
+         nir_shader_add_variable(shader, var);
+         for (unsigned i = first; i < frac; i++)
+            new_vars[loc][i] = var;
+      }
+   }
+
+   return merged_any_vars;
+}
+
+static nir_deref_instr *
+build_array_deref_of_new_var(nir_builder *b, nir_variable *new_var,
+                             nir_deref_instr *leader)
+{
+   if (leader->deref_type == nir_deref_type_var)
+      return nir_build_deref_var(b, new_var);
+
+   nir_deref_instr *parent =
+      build_array_deref_of_new_var(b, new_var, nir_deref_instr_parent(leader));
+
+   return nir_build_deref_follower(b, parent, leader);
+}
+
+static bool
+nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
+{
+   assert(!(modes & ~(nir_var_shader_in | nir_var_shader_out)));
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_shader *shader = impl->function->shader;
+   nir_variable *old_inputs[MAX_VARYINGS_INCL_PATCH][4] = {0};
+   nir_variable *new_inputs[MAX_VARYINGS_INCL_PATCH][4] = {0};
+   nir_variable *old_outputs[MAX_VARYINGS_INCL_PATCH][4] = {0};
+   nir_variable *new_outputs[MAX_VARYINGS_INCL_PATCH][4] = {0};
+
+   if (modes & nir_var_shader_in) {
+      /* Vertex shaders support overlapping inputs.  We don't do those */
+      assert(b.shader->info.stage != MESA_SHADER_VERTEX);
+
+      /* If we don't actually merge any variables, remove that bit from modes
+       * so we don't bother doing extra non-work.
+       */
+      if (!create_new_io_vars(shader, &shader->inputs,
+                              old_inputs, new_inputs))
+         modes &= ~nir_var_shader_in;
+   }
+
+   if (modes & nir_var_shader_out) {
+      /* Fragment shader outputs are always vec4.  You shouldn't have
+       * scalarized them and it doesn't make sense to vectorize them.
+       */
+      assert(b.shader->info.stage != MESA_SHADER_FRAGMENT);
+
+      /* If we don't actually merge any variables, remove that bit from modes
+       * so we don't bother doing extra non-work.
+       */
+      if (!create_new_io_vars(shader, &shader->outputs,
+                              old_outputs, new_outputs))
+         modes &= ~nir_var_shader_out;
+   }
+
+   if (!modes)
+      return false;
+
+   bool progress = false;
+
+   /* Actually lower all the IO load/store intrinsics.  Load instructions are
+    * lowered to a vector load and an ALU instruction to grab the channels we
+    * want.  Outputs are lowered to a write-masked store of the vector output.
+    * For non-TCS outputs, we then run nir_lower_io_to_temporaries at the end
+    * to clean up the partial writes.
+    */
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_deref:
+         case nir_intrinsic_interp_deref_at_centroid:
+         case nir_intrinsic_interp_deref_at_sample:
+         case nir_intrinsic_interp_deref_at_offset: {
+            nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
+            if (!(old_deref->mode & modes))
+               break;
+
+            if (old_deref->mode == nir_var_shader_out)
+               assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL);
+
+            nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
+            if (old_var->data.location < VARYING_SLOT_VAR0)
+               break;
+
+            const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
+            const unsigned old_frac = old_var->data.location_frac;
+            nir_variable *new_var = old_deref->mode == nir_var_shader_in ?
+                                    new_inputs[loc][old_frac] :
+                                    new_outputs[loc][old_frac];
+            if (!new_var)
+               break;
+
+            assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
+            const unsigned new_frac = new_var->data.location_frac;
+
+            nir_component_mask_t vec4_comp_mask =
+               ((1 << intrin->num_components) - 1) << old_frac;
+
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            /* Rewrite the load to use the new variable and only select a
+             * portion of the result.
+             */
+            nir_deref_instr *new_deref =
+               build_array_deref_of_new_var(&b, new_var, old_deref);
+            assert(glsl_type_is_vector(new_deref->type));
+            nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                                  nir_src_for_ssa(&new_deref->dest.ssa));
+
+            intrin->num_components =
+               glsl_get_components(new_deref->type);
+            intrin->dest.ssa.num_components = intrin->num_components;
+
+            b.cursor = nir_after_instr(&intrin->instr);
+
+            nir_ssa_def *new_vec = nir_channels(&b, &intrin->dest.ssa,
+                                                vec4_comp_mask >> new_frac);
+            nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
+                                           nir_src_for_ssa(new_vec),
+                                           new_vec->parent_instr);
+
+            progress = true;
+            break;
+         }
+
+         case nir_intrinsic_store_deref: {
+            nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
+            if (old_deref->mode != nir_var_shader_out)
+               break;
+
+            nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
+            if (old_var->data.location < VARYING_SLOT_VAR0)
+               break;
+
+            const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
+            const unsigned old_frac = old_var->data.location_frac;
+            nir_variable *new_var = new_outputs[loc][old_frac];
+            if (!new_var)
+               break;
+
+            assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
+            const unsigned new_frac = new_var->data.location_frac;
+
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            /* Rewrite the store to be a masked store to the new variable */
+            nir_deref_instr *new_deref =
+               build_array_deref_of_new_var(&b, new_var, old_deref);
+            assert(glsl_type_is_vector(new_deref->type));
+            nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                                  nir_src_for_ssa(&new_deref->dest.ssa));
+
+            intrin->num_components =
+               glsl_get_components(new_deref->type);
+
+            nir_component_mask_t old_wrmask = nir_intrinsic_write_mask(intrin);
+
+            assert(intrin->src[1].is_ssa);
+            nir_ssa_def *old_value = intrin->src[1].ssa;
+            nir_ssa_def *comps[4];
+            for (unsigned c = 0; c < intrin->num_components; c++) {
+               if (new_frac + c >= old_frac &&
+                   (old_wrmask & 1 << (new_frac + c - old_frac))) {
+                  comps[c] = nir_channel(&b, old_value,
+                                         new_frac + c - old_frac);
+               } else {
+                  comps[c] = nir_ssa_undef(&b, old_value->num_components,
+                                               old_value->bit_size);
+               }
+            }
+            nir_ssa_def *new_value = nir_vec(&b, comps, intrin->num_components);
+            nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
+                                  nir_src_for_ssa(new_value));
+
+            nir_intrinsic_set_write_mask(intrin,
+                                         old_wrmask << (old_frac - new_frac));
+
+            progress = true;
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode modes)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= nir_lower_io_to_vector_impl(function->impl, modes);
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_load_const_to_scalar.c mesa-19.0.1/src/compiler/nir/nir_lower_load_const_to_scalar.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_load_const_to_scalar.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_load_const_to_scalar.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,6 +63,9 @@
       case 8:
          load_comp->value.u8[0] = lower->value.u8[i];
          break;
+      case 1:
+         load_comp->value.b[0] = lower->value.b[i];
+         break;
       default:
          assert(!"invalid bit size");
       }
@@ -92,9 +95,14 @@
       }
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_locals_to_regs.c mesa-19.0.1/src/compiler/nir/nir_lower_locals_to_regs.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_locals_to_regs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_locals_to_regs.c	2019-03-31 23:16:37.000000000 +0000
@@ -192,7 +192,7 @@
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_deref: {
          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-         if (deref->mode != nir_var_local)
+         if (deref->mode != nir_var_function_temp)
             continue;
 
          b->cursor = nir_before_instr(&intrin->instr);
@@ -218,7 +218,7 @@
 
       case nir_intrinsic_store_deref: {
          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-         if (deref->mode != nir_var_local)
+         if (deref->mode != nir_var_function_temp)
             continue;
 
          b->cursor = nir_before_instr(&intrin->instr);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_phis_to_scalar.c mesa-19.0.1/src/compiler/nir/nir_lower_phis_to_scalar.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_phis_to_scalar.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_phis_to_scalar.c	2019-03-31 23:16:37.000000000 +0000
@@ -86,7 +86,10 @@
       case nir_intrinsic_load_deref: {
          nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
          return deref->mode == nir_var_shader_in ||
-                deref->mode == nir_var_uniform;
+                deref->mode == nir_var_uniform ||
+                deref->mode == nir_var_mem_ubo ||
+                deref->mode == nir_var_mem_ssbo ||
+                deref->mode == nir_var_mem_global;
       }
 
       case nir_intrinsic_interp_deref_at_centroid:
@@ -95,6 +98,7 @@
       case nir_intrinsic_load_uniform:
       case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_ssbo:
+      case nir_intrinsic_load_global:
       case nir_intrinsic_load_input:
          return true;
       default:
@@ -275,8 +279,7 @@
 
    state.mem_ctx = ralloc_parent(impl);
    state.dead_ctx = ralloc_context(NULL);
-   state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   state.phi_table = _mesa_pointer_hash_table_create(state.dead_ctx);
 
    nir_foreach_block(block, impl) {
       progress = lower_phis_to_scalar_block(block, &state) || progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_returns.c mesa-19.0.1/src/compiler/nir/nir_lower_returns.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_returns.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_returns.c	2019-03-31 23:16:37.000000000 +0000
@@ -274,6 +274,10 @@
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_none);
       nir_repair_ssa_impl(impl);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_system_values.c mesa-19.0.1/src/compiler/nir/nir_lower_system_values.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_system_values.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_system_values.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,12 +31,63 @@
 static nir_ssa_def*
 build_local_group_size(nir_builder *b)
 {
-   nir_const_value local_size;
-   memset(&local_size, 0, sizeof(local_size));
-   local_size.u32[0] = b->shader->info.cs.local_size[0];
-   local_size.u32[1] = b->shader->info.cs.local_size[1];
-   local_size.u32[2] = b->shader->info.cs.local_size[2];
-   return nir_build_imm(b, 3, 32, local_size);
+   nir_ssa_def *local_size;
+
+   /*
+    * If the local work group size is variable it can't be lowered at this
+    * point, but its intrinsic can still be used.
+    */
+   if (b->shader->info.cs.local_size_variable) {
+      local_size = nir_load_local_group_size(b);
+   } else {
+      nir_const_value local_size_const;
+      memset(&local_size_const, 0, sizeof(local_size_const));
+      local_size_const.u32[0] = b->shader->info.cs.local_size[0];
+      local_size_const.u32[1] = b->shader->info.cs.local_size[1];
+      local_size_const.u32[2] = b->shader->info.cs.local_size[2];
+      local_size = nir_build_imm(b, 3, 32, local_size_const);
+   }
+
+   return local_size;
+}
+
+static nir_ssa_def *
+build_local_invocation_id(nir_builder *b)
+{
+   if (b->shader->options->lower_cs_local_id_from_index) {
+      /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
+       * on this formula:
+       *
+       *    gl_LocalInvocationID.x =
+       *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
+       *    gl_LocalInvocationID.y =
+       *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
+       *       gl_WorkGroupSize.y;
+       *    gl_LocalInvocationID.z =
+       *       (gl_LocalInvocationIndex /
+       *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
+       *       gl_WorkGroupSize.z;
+       *
+       * However, the final % gl_WorkGroupSize.z does nothing unless we
+       * accidentally end up with a gl_LocalInvocationIndex that is too
+       * large so it can safely be omitted.
+       */
+      nir_ssa_def *local_index = nir_load_local_invocation_index(b);
+      nir_ssa_def *local_size = build_local_group_size(b);
+
+      nir_ssa_def *id_x, *id_y, *id_z;
+      id_x = nir_umod(b, local_index,
+                         nir_channel(b, local_size, 0));
+      id_y = nir_umod(b, nir_udiv(b, local_index,
+                                     nir_channel(b, local_size, 0)),
+                         nir_channel(b, local_size, 1));
+      id_z = nir_udiv(b, local_index,
+                         nir_imul(b, nir_channel(b, local_size, 0),
+                                     nir_channel(b, local_size, 1)));
+      return nir_vec3(b, id_x, id_y, id_z);
+   } else {
+      return nir_load_local_invocation_id(b);
+   }
 }
 
 static bool
@@ -79,7 +130,7 @@
           */
          nir_ssa_def *group_size = build_local_group_size(b);
          nir_ssa_def *group_id = nir_load_work_group_id(b);
-         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
+         nir_ssa_def *local_id = build_local_invocation_id(b);
 
          sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
          break;
@@ -114,6 +165,14 @@
          break;
       }
 
+      case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
+         /* If lower_cs_local_id_from_index is true, then we derive the local
+          * index from the local id.
+          */
+         if (b->shader->options->lower_cs_local_id_from_index)
+            sysval = build_local_invocation_id(b);
+         break;
+
       case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
          sysval = build_local_group_size(b);
          break;
@@ -202,8 +261,8 @@
       if (sysval == NULL) {
          nir_intrinsic_op sysval_op =
             nir_intrinsic_from_system_value(var->data.location);
-         sysval = nir_load_system_value(b, sysval_op, 0);
-         sysval->bit_size = load_deref->dest.ssa.bit_size;
+         sysval = nir_load_system_value(b, sysval_op, 0,
+                                        load_deref->dest.ssa.bit_size);
       }
 
       nir_ssa_def_rewrite_uses(&load_deref->dest.ssa, nir_src_for_ssa(sysval));
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_tex.c mesa-19.0.1/src/compiler/nir/nir_lower_tex.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_tex.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -150,6 +150,54 @@
    return nir_i2f32(b, &txs->dest.ssa);
 }
 
+static nir_ssa_def *
+get_texture_lod(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_tex_instr *tql;
+
+   unsigned num_srcs = 0;
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type == nir_tex_src_coord ||
+          tex->src[i].src_type == nir_tex_src_texture_deref ||
+          tex->src[i].src_type == nir_tex_src_sampler_deref ||
+          tex->src[i].src_type == nir_tex_src_texture_offset ||
+          tex->src[i].src_type == nir_tex_src_sampler_offset)
+         num_srcs++;
+   }
+
+   tql = nir_tex_instr_create(b->shader, num_srcs);
+   tql->op = nir_texop_lod;
+   tql->coord_components = tex->coord_components;
+   tql->sampler_dim = tex->sampler_dim;
+   tql->is_array = tex->is_array;
+   tql->is_shadow = tex->is_shadow;
+   tql->is_new_style_shadow = tex->is_new_style_shadow;
+   tql->texture_index = tex->texture_index;
+   tql->sampler_index = tex->sampler_index;
+   tql->dest_type = nir_type_float;
+
+   unsigned idx = 0;
+   for (unsigned i = 0; i < tex->num_srcs; i++) {
+      if (tex->src[i].src_type == nir_tex_src_coord ||
+          tex->src[i].src_type == nir_tex_src_texture_deref ||
+          tex->src[i].src_type == nir_tex_src_sampler_deref ||
+          tex->src[i].src_type == nir_tex_src_texture_offset ||
+          tex->src[i].src_type == nir_tex_src_sampler_offset) {
+         nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql);
+         tql->src[idx].src_type = tex->src[i].src_type;
+         idx++;
+      }
+   }
+
+   nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL);
+   nir_builder_instr_insert(b, &tql->instr);
+
+   /* The LOD is the y component of the result */
+   return nir_channel(b, &tql->dest.ssa, 1);
+}
+
 static bool
 lower_offset(nir_builder *b, nir_tex_instr *tex)
 {
@@ -227,6 +275,36 @@
    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
 }
 
+static void
+lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
+
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_ssa_def *lod = get_texture_lod(b, tex);
+
+   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+   if (bias_idx >= 0) {
+      /* If we have a bias, add it in */
+      lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
+      nir_tex_instr_remove_src(tex, bias_idx);
+   }
+
+   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
+   if (min_lod_idx >= 0) {
+      /* If we have a minimum LOD, clamp LOD accordingly */
+      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
+      nir_tex_instr_remove_src(tex, min_lod_idx);
+   }
+
+   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
+   tex->op = nir_texop_txl;
+}
+
 static nir_ssa_def *
 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane)
 {
@@ -261,7 +339,8 @@
 
 static void
 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
-                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v)
+                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
+                   nir_ssa_def *a)
 {
    nir_const_value m[3] = {
       { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },
@@ -281,7 +360,7 @@
    nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
    nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
 
-   nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f));
+   nir_ssa_def *result = nir_vec4(b, red, green, blue, a);
 
    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
 }
@@ -297,7 +376,8 @@
    convert_yuv_to_rgb(b, tex,
                       nir_channel(b, y, 0),
                       nir_channel(b, uv, 0),
-                      nir_channel(b, uv, 1));
+                      nir_channel(b, uv, 1),
+                      nir_imm_float(b, 1.0f));
 }
 
 static void
@@ -312,7 +392,8 @@
    convert_yuv_to_rgb(b, tex,
                       nir_channel(b, y, 0),
                       nir_channel(b, u, 0),
-                      nir_channel(b, v, 0));
+                      nir_channel(b, v, 0),
+                      nir_imm_float(b, 1.0f));
 }
 
 static void
@@ -326,7 +407,8 @@
    convert_yuv_to_rgb(b, tex,
                       nir_channel(b, y, 0),
                       nir_channel(b, xuxv, 1),
-                      nir_channel(b, xuxv, 3));
+                      nir_channel(b, xuxv, 3),
+                      nir_imm_float(b, 1.0f));
 }
 
 static void
@@ -340,57 +422,45 @@
   convert_yuv_to_rgb(b, tex,
                      nir_channel(b, y, 1),
                      nir_channel(b, uxvx, 0),
-                     nir_channel(b, uxvx, 2));
+                     nir_channel(b, uxvx, 2),
+                     nir_imm_float(b, 1.0f));
 }
 
-/*
- * Emits a textureLod operation used to replace an existing
- * textureGrad instruction.
- */
 static void
-replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
+lower_ayuv_external(nir_builder *b, nir_tex_instr *tex)
 {
-   /* We are going to emit a textureLod() with the same parameters except that
-    * we replace ddx/ddy with lod.
-    */
-   int num_srcs = tex->num_srcs - 1;
-   nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
+  b->cursor = nir_after_instr(&tex->instr);
 
-   txl->op = nir_texop_txl;
-   txl->sampler_dim = tex->sampler_dim;
-   txl->texture_index = tex->texture_index;
-   txl->dest_type = tex->dest_type;
-   txl->is_array = tex->is_array;
-   txl->is_shadow = tex->is_shadow;
-   txl->is_new_style_shadow = tex->is_new_style_shadow;
-   txl->sampler_index = tex->sampler_index;
-   txl->coord_components = tex->coord_components;
-
-   nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
-
-   int src_num = 0;
-   for (int i = 0; i < tex->num_srcs; i++) {
-      if (tex->src[i].src_type == nir_tex_src_ddx ||
-          tex->src[i].src_type == nir_tex_src_ddy)
-         continue;
-      nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl);
-      txl->src[src_num].src_type = tex->src[i].src_type;
-      src_num++;
-   }
+  nir_ssa_def *ayuv = sample_plane(b, tex, 0);
 
-   txl->src[src_num].src = nir_src_for_ssa(lod);
-   txl->src[src_num].src_type = nir_tex_src_lod;
-   src_num++;
+  convert_yuv_to_rgb(b, tex,
+                     nir_channel(b, ayuv, 2),
+                     nir_channel(b, ayuv, 1),
+                     nir_channel(b, ayuv, 0),
+                     nir_channel(b, ayuv, 3));
+}
 
-   assert(src_num == num_srcs);
+/*
+ * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
+ * computed from the gradients.
+ */
+static void
+replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
+{
+   assert(tex->op == nir_texop_txd);
 
-   nir_ssa_dest_init(&txl->instr, &txl->dest,
-                     tex->dest.ssa.num_components, 32, NULL);
-   nir_builder_instr_insert(b, &txl->instr);
+   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
+   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
 
-   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
+   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
+   if (min_lod_idx >= 0) {
+      /* If we have a minimum LOD, clamp LOD accordingly */
+      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
+      nir_tex_instr_remove_src(tex, min_lod_idx);
+   }
 
-   nir_instr_remove(&tex->instr);
+   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
+   tex->op = nir_texop_txl;
 }
 
 static void
@@ -543,6 +613,12 @@
 static void
 lower_gradient(nir_builder *b, nir_tex_instr *tex)
 {
+   /* Cubes are more complicated and have their own function */
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+      lower_gradient_cube_map(b, tex);
+      return;
+   }
+
    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
    assert(tex->op == nir_texop_txd);
    assert(tex->dest.is_ssa);
@@ -663,6 +739,21 @@
 }
 
 static void
+swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->dest.is_ssa);
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   assert(nir_tex_instr_dest_size(tex) == 4);
+   unsigned swiz[4] = { 2, 3, 1, 0 };
+   nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+                                  swizzled->parent_instr);
+}
+
+static void
 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
 {
    assert(tex->dest.is_ssa);
@@ -725,6 +816,88 @@
                                   result->parent_instr);
 }
 
+/**
+ * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
+ * i16, or u16, or a single unorm4x8 value.
+ *
+ * Note that we don't change the destination num_components, because
+ * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
+ * to not store the other channels, given that nothing at the NIR level will
+ * read them.
+ */
+static void
+lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
+                  const nir_lower_tex_options *options)
+{
+   nir_ssa_def *color = &tex->dest.ssa;
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   switch (options->lower_tex_packing[tex->sampler_index]) {
+   case nir_lower_tex_packing_none:
+      return;
+
+   case nir_lower_tex_packing_16: {
+      static const unsigned bits[4] = {16, 16, 16, 16};
+
+      switch (nir_alu_type_get_base_type(tex->dest_type)) {
+      case nir_type_float:
+         if (tex->is_shadow && tex->is_new_style_shadow) {
+            color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
+         } else {
+            nir_ssa_def *rg = nir_channel(b, color, 0);
+            nir_ssa_def *ba = nir_channel(b, color, 1);
+            color = nir_vec4(b,
+                             nir_unpack_half_2x16_split_x(b, rg),
+                             nir_unpack_half_2x16_split_y(b, rg),
+                             nir_unpack_half_2x16_split_x(b, ba),
+                             nir_unpack_half_2x16_split_y(b, ba));
+         }
+         break;
+
+      case nir_type_int:
+         color = nir_format_unpack_sint(b, color, bits, 4);
+         break;
+
+      case nir_type_uint:
+         color = nir_format_unpack_uint(b, color, bits, 4);
+         break;
+
+      default:
+         unreachable("unknown base type");
+      }
+      break;
+   }
+
+   case nir_lower_tex_packing_8:
+      assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
+      color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
+      break;
+   }
+
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
+                                  color->parent_instr);
+}
+
+static bool
+sampler_index_lt(nir_tex_instr *tex, unsigned max)
+{
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
+
+   unsigned sampler_index = tex->sampler_index;
+
+   int sampler_offset_idx =
+      nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
+   if (sampler_offset_idx >= 0) {
+      if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
+         return false;
+
+      sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
+   }
+
+   return sampler_index < max;
+}
+
 static bool
 nir_lower_tex_block(nir_block *block, nir_builder *b,
                     const nir_lower_tex_options *options)
@@ -788,11 +961,21 @@
          progress = true;
       }
 
+      if ((1 << tex->texture_index) & options->lower_ayuv_external) {
+         lower_ayuv_external(b, tex);
+         progress = true;
+      }
+
       if (sat_mask) {
          saturate_src(b, tex, sat_mask);
          progress = true;
       }
 
+      if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
+         swizzle_tg4_broadcom(b, tex);
+         progress = true;
+      }
+
       if (((1 << tex->texture_index) & options->swizzle_result) &&
           !nir_tex_instr_is_query(tex) &&
           !(tex->is_shadow && tex->is_new_style_shadow)) {
@@ -807,20 +990,36 @@
          progress = true;
       }
 
-      if (tex->op == nir_texop_txd &&
-          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
-          (options->lower_txd ||
-           options->lower_txd_cube_map ||
-           (tex->is_shadow && options->lower_txd_shadow))) {
-         lower_gradient_cube_map(b, tex);
+      const bool has_min_lod =
+         nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
+      const bool has_offset =
+         nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
+
+      if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
+          options->lower_txb_shadow_clamp) {
+         lower_implicit_lod(b, tex);
+         progress = true;
+      }
+
+      if (options->lower_tex_packing[tex->sampler_index] !=
+          nir_lower_tex_packing_none &&
+          tex->op != nir_texop_txs &&
+          tex->op != nir_texop_query_levels) {
+         lower_tex_packing(b, tex, options);
          progress = true;
-         continue;
       }
 
       if (tex->op == nir_texop_txd &&
           (options->lower_txd ||
-           (options->lower_txd_shadow &&
-            tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE))) {
+           (options->lower_txd_shadow && tex->is_shadow) ||
+           (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
+           (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
+           (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
+            has_min_lod && !sampler_index_lt(tex, 16)) ||
+           (options->lower_txd_cube_map &&
+            tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
+           (options->lower_txd_3d &&
+            tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
          lower_gradient(b, tex);
          progress = true;
          continue;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_to_source_mods.c mesa-19.0.1/src/compiler/nir/nir_lower_to_source_mods.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_to_source_mods.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_to_source_mods.c	2019-03-31 23:16:37.000000000 +0000
@@ -34,7 +34,8 @@
  */
 
 static bool
-nir_lower_to_source_mods_block(nir_block *block)
+nir_lower_to_source_mods_block(nir_block *block,
+                               nir_lower_to_source_mods_flags options)
 {
    bool progress = false;
 
@@ -58,10 +59,14 @@
 
          switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) {
          case nir_type_float:
+            if (!(options & nir_lower_float_source_mods))
+               continue;
             if (parent->op != nir_op_fmov)
                continue;
             break;
          case nir_type_int:
+            if (!(options & nir_lower_int_source_mods))
+               continue;
             if (parent->op != nir_op_imov)
                continue;
             break;
@@ -97,33 +102,41 @@
          progress = true;
       }
 
-      switch (alu->op) {
-      case nir_op_fsat:
-         alu->op = nir_op_fmov;
-         alu->dest.saturate = true;
-         break;
-      case nir_op_ineg:
-         alu->op = nir_op_imov;
-         alu->src[0].negate = !alu->src[0].negate;
-         break;
-      case nir_op_fneg:
-         alu->op = nir_op_fmov;
-         alu->src[0].negate = !alu->src[0].negate;
-         break;
-      case nir_op_iabs:
-         alu->op = nir_op_imov;
-         alu->src[0].abs = true;
-         alu->src[0].negate = false;
-         break;
-      case nir_op_fabs:
-         alu->op = nir_op_fmov;
-         alu->src[0].abs = true;
-         alu->src[0].negate = false;
-         break;
-      default:
-         break;
+      if (options & nir_lower_float_source_mods) {
+         switch (alu->op) {
+         case nir_op_fsat:
+            alu->op = nir_op_fmov;
+            alu->dest.saturate = true;
+            break;
+         case nir_op_fneg:
+            alu->op = nir_op_fmov;
+            alu->src[0].negate = !alu->src[0].negate;
+            break;
+         case nir_op_fabs:
+            alu->op = nir_op_fmov;
+            alu->src[0].abs = true;
+            alu->src[0].negate = false;
+            break;
+         default:
+            break;
+         }
       }
 
+      if (options & nir_lower_int_source_mods) {
+         switch (alu->op) {
+         case nir_op_ineg:
+            alu->op = nir_op_imov;
+            alu->src[0].negate = !alu->src[0].negate;
+            break;
+         case nir_op_iabs:
+            alu->op = nir_op_imov;
+            alu->src[0].abs = true;
+            alu->src[0].negate = false;
+            break;
+         default:
+            break;
+         }
+      }
       /* We've covered sources.  Now we're going to try and saturate the
        * destination if we can.
        */
@@ -136,6 +149,9 @@
           nir_type_float)
          continue;
 
+      if (!(options & nir_lower_float_source_mods))
+         continue;
+
       if (!list_empty(&alu->dest.dest.ssa.if_uses))
          continue;
 
@@ -185,12 +201,13 @@
 }
 
 static bool
-nir_lower_to_source_mods_impl(nir_function_impl *impl)
+nir_lower_to_source_mods_impl(nir_function_impl *impl,
+                              nir_lower_to_source_mods_flags options)
 {
    bool progress = false;
 
    nir_foreach_block(block, impl) {
-      progress |= nir_lower_to_source_mods_block(block);
+      progress |= nir_lower_to_source_mods_block(block, options);
    }
 
    if (progress)
@@ -201,13 +218,14 @@
 }
 
 bool
-nir_lower_to_source_mods(nir_shader *shader)
+nir_lower_to_source_mods(nir_shader *shader,
+                         nir_lower_to_source_mods_flags options)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
       if (function->impl) {
-         progress |= nir_lower_to_source_mods_impl(function->impl);
+         progress |= nir_lower_to_source_mods_impl(function->impl, options);
       }
    }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_two_sided_color.c mesa-19.0.1/src/compiler/nir/nir_lower_two_sided_color.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_two_sided_color.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_two_sided_color.c	2019-03-31 23:16:37.000000000 +0000
@@ -158,7 +158,10 @@
        * bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn))
        */
       b->cursor = nir_before_instr(&intr->instr);
-      nir_ssa_def *face  = nir_load_front_face(b);
+      /* gl_FrontFace is a boolean but the intrinsic constructor creates
+       * 32-bit value by default.
+       */
+      nir_ssa_def *face = nir_load_front_face(b, 1);
       nir_ssa_def *front = load_input(b, state->colors[idx].front);
       nir_ssa_def *back  = load_input(b, state->colors[idx].back);
       nir_ssa_def *color = nir_bcsel(b, face, front, back);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_var_copies.c mesa-19.0.1/src/compiler/nir/nir_lower_var_copies.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_var_copies.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_var_copies.c	2019-03-31 23:16:37.000000000 +0000
@@ -140,9 +140,14 @@
       }
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_vars_to_ssa.c mesa-19.0.1/src/compiler/nir/nir_lower_vars_to_ssa.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_vars_to_ssa.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_vars_to_ssa.c	2019-03-31 23:16:37.000000000 +0000
@@ -208,7 +208,7 @@
    /* This pass only works on local variables.  Just ignore any derefs with
     * a non-local mode.
     */
-   if (deref->mode != nir_var_local)
+   if (deref->mode != nir_var_function_temp)
       return NULL;
 
    struct deref_node *node = get_deref_node_recur(deref, state);
@@ -376,8 +376,7 @@
       return;
 
    if (node->loads == NULL)
-      node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
-                                     _mesa_key_pointer_equal);
+      node->loads = _mesa_pointer_set_create(state->dead_ctx);
 
    _mesa_set_add(node->loads, load_instr);
 }
@@ -392,8 +391,7 @@
       return;
 
    if (node->stores == NULL)
-      node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
-                                     _mesa_key_pointer_equal);
+      node->stores = _mesa_pointer_set_create(state->dead_ctx);
 
    _mesa_set_add(node->stores, store_instr);
 }
@@ -409,8 +407,7 @@
          continue;
 
       if (node->copies == NULL)
-         node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
+         node->copies = _mesa_pointer_set_create(state->dead_ctx);
 
       _mesa_set_add(node->copies, copy_instr);
    }
@@ -507,7 +504,7 @@
          switch (intrin->intrinsic) {
          case nir_intrinsic_load_deref: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            if (deref->mode != nir_var_local)
+            if (deref->mode != nir_var_function_temp)
                continue;
 
             struct deref_node *node = get_deref_node(deref, state);
@@ -557,7 +554,7 @@
 
          case nir_intrinsic_store_deref: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            if (deref->mode != nir_var_local)
+            if (deref->mode != nir_var_function_temp)
                continue;
 
             struct deref_node *node = get_deref_node(deref, state);
@@ -661,9 +658,7 @@
    state.dead_ctx = ralloc_context(state.shader);
    state.impl = impl;
 
-   state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
-                                                   _mesa_hash_pointer,
-                                                   _mesa_key_pointer_equal);
+   state.deref_var_nodes = _mesa_pointer_hash_table_create(state.dead_ctx);
    exec_list_make_empty(&state.direct_deref_nodes);
 
    /* Build the initial deref structures and direct_deref_nodes table */
@@ -683,10 +678,9 @@
       nir_deref_path *path = &node->path;
 
       assert(path->path[0]->deref_type == nir_deref_type_var);
-      nir_variable *var = path->path[0]->var;
 
       /* We don't build deref nodes for non-local variables */
-      assert(var->data.mode == nir_var_local);
+      assert(path->path[0]->var->data.mode == nir_var_function_temp);
 
       if (path_may_be_aliased(path, &state)) {
          exec_node_remove(&node->direct_derefs_link);
@@ -699,8 +693,12 @@
       foreach_deref_node_match(path, lower_copies_to_load_store, &state);
    }
 
-   if (!progress)
+   if (!progress) {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
       return false;
+   }
 
    nir_metadata_require(impl, nir_metadata_dominance);
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_lower_wpos_center.c mesa-19.0.1/src/compiler/nir/nir_lower_wpos_center.c
--- mesa-18.3.3/src/compiler/nir/nir_lower_wpos_center.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_lower_wpos_center.c	2019-03-31 23:16:37.000000000 +0000
@@ -57,8 +57,7 @@
    if (!for_sample_shading) {
       wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));
    } else {
-      nir_ssa_def *spos =
-         nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0);
+      nir_ssa_def *spos = nir_load_sample_pos(b);
 
       wpos = nir_fadd(b, wpos,
                       nir_vec4(b,
@@ -83,10 +82,12 @@
          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
          if (intr->intrinsic == nir_intrinsic_load_deref) {
             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+            if (deref->mode != nir_var_shader_in)
+               continue;
+
             nir_variable *var = nir_deref_instr_get_variable(deref);
 
-            if (var->data.mode == nir_var_shader_in &&
-                var->data.location == VARYING_SLOT_POS) {
+            if (var->data.location == VARYING_SLOT_POS) {
                /* gl_FragCoord should not have array/struct derefs: */
                assert(deref->deref_type == nir_deref_type_var);
                update_fragcoord(b, intr, for_sample_shading);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opcodes_c.py mesa-19.0.1/src/compiler/nir/nir_opcodes_c.py
--- mesa-18.3.3/src/compiler/nir/nir_opcodes_c.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opcodes_c.py	2019-03-31 23:16:37.000000000 +0000
@@ -25,7 +25,7 @@
 
 from __future__ import print_function
 
-from nir_opcodes import opcodes
+from nir_opcodes import opcodes, type_sizes
 from mako.template import Template
 
 template = Template("""
@@ -41,6 +41,8 @@
 
    if (src == dst && src_base == nir_type_float) {
       return nir_op_fmov;
+   } else if (src == dst && src_base == nir_type_bool) {
+      return nir_op_imov;
    } else if ((src_base == nir_type_int || src_base == nir_type_uint) &&
               (dst_base == nir_type_int || dst_base == nir_type_uint) &&
               src_bit_size == dst_bit_size) {
@@ -51,10 +53,10 @@
    }
 
    switch (src_base) {
-%     for src_t in ['int', 'uint', 'float']:
+%     for src_t in ['int', 'uint', 'float', 'bool']:
       case nir_type_${src_t}:
          switch (dst_base) {
-%           for dst_t in ['int', 'uint', 'float']:
+%           for dst_t in ['int', 'uint', 'float', 'bool']:
             case nir_type_${dst_t}:
 %              if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']:
 %                 if dst_t == 'int':
@@ -62,14 +64,17 @@
 %                 else:
 <%                   dst_t = src_t %>
 %                 endif
-%              endif
-               switch (dst_bit_size) {
-%                 if dst_t == 'float':
-<%                    bit_sizes = [16, 32, 64] %>
+%              elif src_t == 'bool' and dst_t in ['int', 'uint', 'bool']:
+%                 if dst_t == 'int':
+<%                   continue %>
 %                 else:
-<%                    bit_sizes = [8, 16, 32, 64] %>
+<%                   dst_t = 'int' %>
 %                 endif
-%                 for dst_bits in bit_sizes:
+%              elif src_t == 'uint' and dst_t == 'bool':
+<%                src_t = 'int' %>
+%              endif
+               switch (dst_bit_size) {
+%                 for dst_bits in type_sizes(dst_t):
                   case ${dst_bits}:
 %                    if src_t == 'float' and dst_t == 'float' and dst_bits == 16:
                      switch(rnd) {
@@ -90,26 +95,10 @@
                      unreachable("Invalid nir alu bit size");
                }
 %           endfor
-            case nir_type_bool:
-%              if src_t == 'float':
-                  return nir_op_f2b;
-%              else:
-                  return nir_op_i2b;
-%              endif
             default:
                unreachable("Invalid nir alu base type");
          }
 %     endfor
-      case nir_type_bool:
-         switch (dst_base) {
-            case nir_type_int:
-            case nir_type_uint:
-               return nir_op_b2i;
-            case nir_type_float:
-               return nir_op_b2f;
-            default:
-               unreachable("Invalid nir alu base type");
-         }
       default:
          unreachable("Invalid nir alu base type");
    }
@@ -137,4 +126,4 @@
 };
 """)
 
-print(template.render(opcodes=opcodes))
+print(template.render(opcodes=opcodes, type_sizes=type_sizes))
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opcodes.py mesa-19.0.1/src/compiler/nir/nir_opcodes.py
--- mesa-18.3.3/src/compiler/nir/nir_opcodes.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opcodes.py	2019-03-31 23:16:37.000000000 +0000
@@ -23,6 +23,7 @@
 # Authors:
 #    Connor Abbott (cwabbott0@gmail.com)
 
+import re
 
 # Class that represents all the information we have about the opcode
 # NOTE: this must be kept in sync with nir_op_info
@@ -89,7 +90,9 @@
 # helper variables for strings
 tfloat = "float"
 tint = "int"
-tbool = "bool32"
+tbool = "bool"
+tbool1 = "bool1"
+tbool32 = "bool32"
 tuint = "uint"
 tuint16 = "uint16"
 tfloat32 = "float32"
@@ -99,6 +102,35 @@
 tuint64 = "uint64"
 tfloat64 = "float64"
 
+_TYPE_SPLIT_RE = re.compile(r'(?P<type>int|uint|float|bool)(?P<bits>\d+)?')
+
+def type_has_size(type_):
+    m = _TYPE_SPLIT_RE.match(type_)
+    assert m is not None, 'Invalid NIR type string: "{}"'.format(type_)
+    return m.group('bits') is not None
+
+def type_size(type_):
+    m = _TYPE_SPLIT_RE.match(type_)
+    assert m is not None, 'Invalid NIR type string: "{}"'.format(type_)
+    assert m.group('bits') is not None, \
+           'NIR type string has no bit size: "{}"'.format(type_)
+    return int(m.group('bits'))
+
+def type_sizes(type_):
+    if type_has_size(type_):
+        return [type_size(type_)]
+    elif type_ == 'bool':
+        return [1, 32]
+    elif type_ == 'float':
+        return [16, 32, 64]
+    else:
+        return [1, 8, 16, 32, 64]
+
+def type_base_type(type_):
+    m = _TYPE_SPLIT_RE.match(type_)
+    assert m is not None, 'Invalid NIR type string: "{}"'.format(type_)
+    return m.group('type')
+
 commutative = "commutative "
 associative = "associative "
 
@@ -168,18 +200,18 @@
 unop("flog2", tfloat, "log2f(src0)")
 
 # Generate all of the numeric conversion opcodes
-for src_t in [tint, tuint, tfloat]:
-   if src_t in (tint, tuint):
-      dst_types = [tfloat, src_t]
+for src_t in [tint, tuint, tfloat, tbool]:
+   if src_t == tbool:
+      dst_types = [tfloat, tint]
+   elif src_t == tint:
+      dst_types = [tfloat, tint, tbool]
+   elif src_t == tuint:
+      dst_types = [tfloat, tuint]
    elif src_t == tfloat:
-      dst_types = [tint, tuint, tfloat]
+      dst_types = [tint, tuint, tfloat, tbool]
 
    for dst_t in dst_types:
-      if dst_t == tfloat:
-         bit_sizes = [16, 32, 64]
-      else:
-         bit_sizes = [8, 16, 32, 64]
-      for bit_size in bit_sizes:
+      for bit_size in type_sizes(dst_t):
           if bit_size == 16 and dst_t == tfloat and src_t == tfloat:
               rnd_modes = ['_rtne', '_rtz', '']
               for rnd_mode in rnd_modes:
@@ -187,15 +219,9 @@
                                                        bit_size, rnd_mode),
                                dst_t + str(bit_size), src_t, "src0")
           else:
+              conv_expr = "src0 != 0" if dst_t == tbool else "src0"
               unop_convert("{0}2{1}{2}".format(src_t[0], dst_t[0], bit_size),
-                           dst_t + str(bit_size), src_t, "src0")
-
-# We'll hand-code the to/from bool conversion opcodes.  Because bool doesn't
-# have multiple bit-sizes, we can always infer the size from the other type.
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0")
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0")
+                           dst_t + str(bit_size), src_t, conv_expr)
 
 
 # Unary floating-point rounding operations.
@@ -406,7 +432,10 @@
    binop_convert(name, ty, ty, alg_props, const_expr)
 
 def binop_compare(name, ty, alg_props, const_expr):
-   binop_convert(name, tbool, ty, alg_props, const_expr)
+   binop_convert(name, tbool1, ty, alg_props, const_expr)
+
+def binop_compare32(name, ty, alg_props, const_expr):
+   binop_convert(name, tbool32, ty, alg_props, const_expr)
 
 def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
                 src2_type, const_expr):
@@ -437,18 +466,55 @@
 
 binop("fadd", tfloat, commutative + associative, "src0 + src1")
 binop("iadd", tint, commutative + associative, "src0 + src1")
+binop("uadd_sat", tuint, commutative,
+      "(src0 + src1) < src0 ? UINT64_MAX : (src0 + src1)")
 binop("fsub", tfloat, "", "src0 - src1")
 binop("isub", tint, "", "src0 - src1")
 
 binop("fmul", tfloat, commutative + associative, "src0 * src1")
 # low 32-bits of signed/unsigned integer multiply
 binop("imul", tint, commutative + associative, "src0 * src1")
+
 # high 32-bits of signed integer multiply
-binop("imul_high", tint32, commutative,
-      "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+binop("imul_high", tint, commutative, """
+if (bit_size == 64) {
+   /* We need to do a full 128-bit x 128-bit multiply in order for the sign
+    * extension to work properly.  The casts are kind-of annoying but needed
+    * to prevent compiler warnings.
+    */
+   uint32_t src0_u32[4] = {
+      src0,
+      (int64_t)src0 >> 32,
+      (int64_t)src0 >> 63,
+      (int64_t)src0 >> 63,
+   };
+   uint32_t src1_u32[4] = {
+      src1,
+      (int64_t)src1 >> 32,
+      (int64_t)src1 >> 63,
+      (int64_t)src1 >> 63,
+   };
+   uint32_t prod_u32[4];
+   ubm_mul_u32arr(prod_u32, src0_u32, src1_u32);
+   dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32);
+} else {
+   dst = ((int64_t)src0 * (int64_t)src1) >> bit_size;
+}
+""")
+
 # high 32-bits of unsigned integer multiply
-binop("umul_high", tuint32, commutative,
-      "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+binop("umul_high", tuint, commutative, """
+if (bit_size == 64) {
+   /* The casts are kind-of annoying but needed to prevent compiler warnings. */
+   uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 };
+   uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 };
+   uint32_t prod_u32[4];
+   ubm_mul_u32arr(prod_u32, src0_u32, src1_u32);
+   dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32);
+} else {
+   dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
+}
+""")
 
 binop("fdiv", tfloat, "", "src0 / src1")
 binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
@@ -498,16 +564,35 @@
 binop_compare("ine", tint, commutative, "src0 != src1")
 binop_compare("ult", tuint, "", "src0 < src1")
 binop_compare("uge", tuint, "", "src0 >= src1")
+binop_compare32("flt32", tfloat, "", "src0 < src1")
+binop_compare32("fge32", tfloat, "", "src0 >= src1")
+binop_compare32("feq32", tfloat, commutative, "src0 == src1")
+binop_compare32("fne32", tfloat, commutative, "src0 != src1")
+binop_compare32("ilt32", tint, "", "src0 < src1")
+binop_compare32("ige32", tint, "", "src0 >= src1")
+binop_compare32("ieq32", tint, commutative, "src0 == src1")
+binop_compare32("ine32", tint, commutative, "src0 != src1")
+binop_compare32("ult32", tuint, "", "src0 < src1")
+binop_compare32("uge32", tuint, "", "src0 >= src1")
 
 # integer-aware GLSL-style comparisons that compare floats and ints
 
-binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+binop_reduce("ball_fequal",  1, tbool1, tfloat, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_fnequal", 1, tbool1, tfloat, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+binop_reduce("ball_iequal",  1, tbool1, tint, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_inequal", 1, tbool1, tint, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+
+binop_reduce("b32all_fequal",  1, tbool32, tfloat, "{src0} == {src1}",
              "{src0} && {src1}", "{src}")
-binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+binop_reduce("b32any_fnequal", 1, tbool32, tfloat, "{src0} != {src1}",
              "{src0} || {src1}", "{src}")
-binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+binop_reduce("b32all_iequal",  1, tbool32, tint, "{src0} == {src1}",
              "{src0} && {src1}", "{src}")
-binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+binop_reduce("b32any_inequal", 1, tbool32, tint, "{src0} != {src1}",
              "{src0} || {src1}", "{src}")
 
 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
@@ -695,7 +780,9 @@
 triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
 
 opcode("bcsel", 0, tuint, [0, 0, 0],
-      [tbool, tuint, tuint], "", "src0 ? src1 : src2")
+      [tbool1, tuint, tuint], "", "src0 ? src1 : src2")
+opcode("b32csel", 0, tuint, [0, 0, 0],
+       [tbool32, tuint, tuint], "", "src0 ? src1 : src2")
 
 # SM5 bfi assembly
 triop("bfi", tuint32, """
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_algebraic.py mesa-19.0.1/src/compiler/nir/nir_opt_algebraic.py
--- mesa-18.3.3/src/compiler/nir/nir_opt_algebraic.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_algebraic.py	2019-03-31 23:16:37.000000000 +0000
@@ -27,6 +27,7 @@
 
 from collections import OrderedDict
 import nir_algebraic
+from nir_opcodes import type_sizes
 import itertools
 
 # Convenience variables
@@ -60,10 +61,10 @@
 #
 # All expression types can have a bit-size specified.  For opcodes, this
 # looks like "op@32", for variables it is "a@32" or "a@uint32" to specify a
-# type and size, and for literals, you can write "2.0@32".  In the search half
-# of the expression this indicates that it should only match that particular
-# bit-size.  In the replace half of the expression this indicates that the
-# constructed value should have that bit-size.
+# type and size.  In the search half of the expression this indicates that it
+# should only match that particular bit-size.  In the replace half of the
+# expression this indicates that the constructed value should have that
+# bit-size.
 
 optimizations = [
 
@@ -82,9 +83,11 @@
    (('ineg', ('ineg', a)), a),
    (('fabs', ('fabs', a)), ('fabs', a)),
    (('fabs', ('fneg', a)), ('fabs', a)),
-   (('fabs', ('u2f32', a)), ('u2f32', a)),
+   (('fabs', ('u2f', a)), ('u2f', a)),
    (('iabs', ('iabs', a)), ('iabs', a)),
    (('iabs', ('ineg', a)), ('iabs', a)),
+   (('f2b', ('fneg', a)), ('f2b', a)),
+   (('i2b', ('ineg', a)), ('i2b', a)),
    (('~fadd', a, 0.0), a),
    (('iadd', a, 0), a),
    (('usadd_4x8', a, 0), a),
@@ -119,15 +122,17 @@
    (('~flrp', a, b, 1.0), b),
    (('~flrp', a, a, b), a),
    (('~flrp', 0.0, a, b), ('fmul', a, b)),
-   (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~flrp', a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp32'),
    (('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
    (('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'),
    (('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp64'),
+   (('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
-   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
+   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
    (('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp32'),
    (('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp64'),
-   (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
+   (('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
    (('~fadd@32', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
    (('~fadd@64', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
@@ -167,50 +172,50 @@
    # b2f(a) <= 0.0
    # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
    # inot(a)
-   (('fge', 0.0, ('b2f', a)), ('inot', a)),
+   (('fge', 0.0, ('b2f', 'a@1')), ('inot', a)),
 
-   (('fge', ('fneg', ('b2f', a)), 0.0), ('inot', a)),
+   (('fge', ('fneg', ('b2f', 'a@1')), 0.0), ('inot', a)),
 
-   (('fne', ('fadd', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
-   (('fne', ('fmax', ('b2f', a), ('b2f', b)), 0.0), ('ior', a, b)),
-   (('fne', ('bcsel', a, 1.0, ('b2f', b))   , 0.0), ('ior', a, b)),
-   (('fne', ('b2f', a), ('fneg', ('b2f', b))),      ('ior', a, b)),
-   (('fne', ('fmul', ('b2f', a), ('b2f', b)), 0.0), ('iand', a, b)),
-   (('fne', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('iand', a, b)),
-   (('fne', ('bcsel', a, ('b2f', b), 0.0)   , 0.0), ('iand', a, b)),
-   (('fne', ('fadd', ('b2f', a), ('fneg', ('b2f', b))), 0.0), ('ixor', a, b)),
-   (('fne',          ('b2f', a) ,          ('b2f', b) ),      ('ixor', a, b)),
-   (('fne', ('fneg', ('b2f', a)), ('fneg', ('b2f', b))),      ('ixor', a, b)),
-   (('feq', ('fadd', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('ior', a, b))),
-   (('feq', ('fmax', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('ior', a, b))),
-   (('feq', ('bcsel', a, 1.0, ('b2f', b))   , 0.0), ('inot', ('ior', a, b))),
-   (('feq', ('b2f', a), ('fneg', ('b2f', b))),      ('inot', ('ior', a, b))),
-   (('feq', ('fmul', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('iand', a, b))),
-   (('feq', ('fmin', ('b2f', a), ('b2f', b)), 0.0), ('inot', ('iand', a, b))),
-   (('feq', ('bcsel', a, ('b2f', b), 0.0)   , 0.0), ('inot', ('iand', a, b))),
-   (('feq', ('fadd', ('b2f', a), ('fneg', ('b2f', b))), 0.0), ('ieq', a, b)),
-   (('feq',          ('b2f', a) ,          ('b2f', b) ),      ('ieq', a, b)),
-   (('feq', ('fneg', ('b2f', a)), ('fneg', ('b2f', b))),      ('ieq', a, b)),
+   (('fne', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
+   (('fne', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
+   (('fne', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('ior', a, b)),
+   (('fne', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('ior', a, b)),
+   (('fne', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
+   (('fne', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
+   (('fne', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('iand', a, b)),
+   (('fne', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ixor', a, b)),
+   (('fne',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ixor', a, b)),
+   (('fne', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ixor', a, b)),
+   (('feq', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
+   (('feq', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
+   (('feq', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('inot', ('ior', a, b))),
+   (('feq', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('inot', ('ior', a, b))),
+   (('feq', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
+   (('feq', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
+   (('feq', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('inot', ('iand', a, b))),
+   (('feq', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ieq', a, b)),
+   (('feq',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ieq', a, b)),
+   (('feq', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ieq', a, b)),
 
    # -(b2f(a) + b2f(b)) < 0
    # 0 < b2f(a) + b2f(b)
    # 0 != b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
    # a || b
-   (('flt', ('fneg', ('fadd', ('b2f', a), ('b2f', b))), 0.0), ('ior', a, b)),
-   (('flt', 0.0, ('fadd', ('b2f', a), ('b2f', b))), ('ior', a, b)),
+   (('flt', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('ior', a, b)),
+   (('flt', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('ior', a, b)),
 
    # -(b2f(a) + b2f(b)) >= 0
    # 0 >= b2f(a) + b2f(b)
    # 0 == b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
    # !(a || b)
-   (('fge', ('fneg', ('fadd', ('b2f', a), ('b2f', b))), 0.0), ('inot', ('ior', a, b))),
-   (('fge', 0.0, ('fadd', ('b2f', a), ('b2f', b))), ('inot', ('ior', a, b))),
+   (('fge', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('inot', ('ior', a, b))),
+   (('fge', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('inot', ('ior', a, b))),
 
    # Some optimizations (below) convert things like (a < b || c < b) into
    # (min(a, c) < b).  However, this interfers with the previous optimizations
    # that try to remove comparisons with negated sums of b2f.  This just
    # breaks that apart.
-   (('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', a), ('b2f', b)))), 0.0),
+   (('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')))), 0.0),
     ('ior', ('flt', c, 0.0), ('ior', a, b))),
 
    (('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
@@ -232,26 +237,26 @@
    # The fge in the second replacement is not a typo.  I leave the proof that
    # "fmin(-b2f(a), b) >= 0 <=> fmin(-b2f(a), b) == 0" as an exercise for the
    # reader.
-   (('fge', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
-   (('feq', ('fmin', ('fneg', ('b2f', a)), b), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
+   (('fge', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
+   (('feq', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
 
-   (('feq', ('b2f', a), 0.0), ('inot', a)),
-   (('fne', ('b2f', a), 0.0), a),
-   (('ieq', ('b2i', a), 0),   ('inot', a)),
-   (('ine', ('b2i', a), 0),   a),
-
-   (('fne', ('u2f32', a), 0.0), ('ine', a, 0)),
-   (('feq', ('u2f32', a), 0.0), ('ieq', a, 0)),
-   (('fge', ('u2f32', a), 0.0), True),
-   (('fge', 0.0, ('u2f32', a)), ('uge', 0, a)),    # ieq instead?
-   (('flt', ('u2f32', a), 0.0), False),
-   (('flt', 0.0, ('u2f32', a)), ('ult', 0, a)),    # ine instead?
-   (('fne', ('i2f32', a), 0.0), ('ine', a, 0)),
-   (('feq', ('i2f32', a), 0.0), ('ieq', a, 0)),
-   (('fge', ('i2f32', a), 0.0), ('ige', a, 0)),
-   (('fge', 0.0, ('i2f32', a)), ('ige', 0, a)),
-   (('flt', ('i2f32', a), 0.0), ('ilt', a, 0)),
-   (('flt', 0.0, ('i2f32', a)), ('ilt', 0, a)),
+   (('feq', ('b2f', 'a@1'), 0.0), ('inot', a)),
+   (('fne', ('b2f', 'a@1'), 0.0), a),
+   (('ieq', ('b2i', 'a@1'), 0),   ('inot', a)),
+   (('ine', ('b2i', 'a@1'), 0),   a),
+
+   (('fne', ('u2f', a), 0.0), ('ine', a, 0)),
+   (('feq', ('u2f', a), 0.0), ('ieq', a, 0)),
+   (('fge', ('u2f', a), 0.0), True),
+   (('fge', 0.0, ('u2f', a)), ('uge', 0, a)),    # ieq instead?
+   (('flt', ('u2f', a), 0.0), False),
+   (('flt', 0.0, ('u2f', a)), ('ult', 0, a)),    # ine instead?
+   (('fne', ('i2f', a), 0.0), ('ine', a, 0)),
+   (('feq', ('i2f', a), 0.0), ('ieq', a, 0)),
+   (('fge', ('i2f', a), 0.0), ('ige', a, 0)),
+   (('fge', 0.0, ('i2f', a)), ('ige', 0, a)),
+   (('flt', ('i2f', a), 0.0), ('ilt', a, 0)),
+   (('flt', 0.0, ('i2f', a)), ('ilt', 0, a)),
 
    # 0.0 < fabs(a)
    # fabs(a) > 0.0
@@ -272,10 +277,10 @@
    # 0.0 >= fabs(a)
    (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
 
-   (('fmax',                        ('b2f(is_used_once)', a),           ('b2f', b)),           ('b2f', ('ior', a, b))),
-   (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', ('b2f', b))), ('fneg', ('b2f', ('ior', a, b)))),
-   (('fmin',                        ('b2f(is_used_once)', a),           ('b2f', b)),           ('b2f', ('iand', a, b))),
-   (('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', a)), ('fneg', ('b2f', b))), ('fneg', ('b2f', ('iand', a, b)))),
+   (('fmax',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('ior', a, b))),
+   (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('ior', a, b)))),
+   (('fmin',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('iand', a, b))),
+   (('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('iand', a, b)))),
 
    # fmin(b2f(a), b)
    # bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
@@ -284,7 +289,7 @@
    #
    # Since b is a constant, constant folding will eliminate the fmin and the
    # fmax.  If b is > 1.0, the bcsel will be replaced with a b2f.
-   (('fmin', ('b2f', a), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
+   (('fmin', ('b2f', 'a@1'), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
 
    (('flt', ('fadd(is_used_once)', a, ('fneg', b)), 0.0), ('flt', a, b)),
 
@@ -433,14 +438,14 @@
    (('fne', ('fneg', a), a), ('fne', a, 0.0)),
    (('feq', ('fneg', a), a), ('feq', a, 0.0)),
    # Emulating booleans
-   (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
-   (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
-   (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
-   (('iand', 'a@bool', 1.0), ('b2f', a), '!options->lower_b2f'),
+   (('imul', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
+   (('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
+   (('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
+   (('iand', 'a@bool32', 1.0), ('b2f', a)),
    # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
-   (('ineg', ('b2i@32', a)), a),
-   (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
-   (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+   (('ineg', ('b2i32', 'a@32')), a),
+   (('flt', ('fneg', ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
+   (('flt', ('fsub', 0.0, ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
    # Comparison with the same args.  Note that these are not done for
    # the float versions because NaN always returns false on float
    # inequalities.
@@ -521,7 +526,7 @@
    (('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
    (('bcsel', True, b, c), b),
    (('bcsel', False, b, c), c),
-   (('bcsel', a, ('b2f(is_used_once)', b), ('b2f', c)), ('b2f', ('bcsel', a, b, c))),
+   (('bcsel', a, ('b2f(is_used_once)', 'b@32'), ('b2f', 'c@32')), ('b2f', ('bcsel', a, b, c))),
    # The result of this should be hit by constant propagation and, in the
    # next round of opt_algebraic, get picked up by one of the above two.
    (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
@@ -529,24 +534,36 @@
    (('bcsel', a, b, b), b),
    (('fcsel', a, b, b), b),
 
+   # D3D Boolean emulation
+   (('bcsel', a, -1, 0), ('ineg', ('b2i', 'a@1'))),
+   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a)))),
+   (('iand', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
+    ('ineg', ('b2i', ('iand', a, b)))),
+   (('ior', ('ineg', ('b2i','a@1')), ('ineg', ('b2i', 'b@1'))),
+    ('ineg', ('b2i', ('ior', a, b)))),
+   (('ieq', ('ineg', ('b2i', 'a@1')), 0), ('inot', a)),
+   (('ieq', ('ineg', ('b2i', 'a@1')), -1), a),
+   (('ine', ('ineg', ('b2i', 'a@1')), 0), a),
+   (('ine', ('ineg', ('b2i', 'a@1')), -1), ('inot', a)),
+   (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)),
+
    # Conversions
-   (('i2b', ('b2i', a)), a),
-   (('i2b', 'a@bool'), a),
-   (('f2i32', ('ftrunc', a)), ('f2i32', a)),
-   (('f2u32', ('ftrunc', a)), ('f2u32', a)),
+   (('i2b32', ('b2i', 'a@32')), a),
+   (('f2i', ('ftrunc', a)), ('f2i', a)),
+   (('f2u', ('ftrunc', a)), ('f2u', a)),
    (('i2b', ('ineg', a)), ('i2b', a)),
    (('i2b', ('iabs', a)), ('i2b', a)),
    (('fabs', ('b2f', a)), ('b2f', a)),
    (('iabs', ('b2i', a)), ('b2i', a)),
-   (('inot', ('f2b', a)), ('feq', a, 0.0)),
+   (('inot', ('f2b1', a)), ('feq', a, 0.0)),
 
    # Ironically, mark these as imprecise because removing the conversions may
    # preserve more precision than doing the conversions (e.g.,
    # uint(float(0x81818181u)) == 0x81818200).
-   (('~f2i32', ('i2f32', 'a@32')), a),
-   (('~f2i32', ('u2f32', 'a@32')), a),
-   (('~f2u32', ('i2f32', 'a@32')), a),
-   (('~f2u32', ('u2f32', 'a@32')), a),
+   (('~f2i32', ('i2f', 'a@32')), a),
+   (('~f2i32', ('u2f', 'a@32')), a),
+   (('~f2u32', ('i2f', 'a@32')), a),
+   (('~f2u32', ('u2f', 'a@32')), a),
 
    # Packing and then unpacking does nothing
    (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
@@ -601,11 +618,11 @@
    # Reassociate constants in add/mul chains so they can be folded together.
    # For now, we mostly only handle cases where the constants are separated by
    # a single non-constant.  We could do better eventually.
-   (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
-   (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
-   (('~fadd', '#a',          ('fadd', b, '#c')),  ('fadd', ('fadd', a,          c),           b)),
-   (('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
-   (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
+   (('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
+   (('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
+   (('~fadd', '#a',          ('fadd', 'b(is_not_const)', '#c')),  ('fadd', ('fadd', a,          c),           b)),
+   (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
+   (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
 
    # By definition...
    (('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', a)),
@@ -716,29 +733,29 @@
 
     (('unpack_unorm_2x16', 'v'),
      ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),
-                                 ('extract_u16', 'v', 1))),
+                                  ('extract_u16', 'v', 1))),
               65535.0),
      'options->lower_unpack_unorm_2x16'),
 
     (('unpack_unorm_4x8', 'v'),
      ('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0),
-                                 ('extract_u8', 'v', 1),
-                                 ('extract_u8', 'v', 2),
-                                 ('extract_u8', 'v', 3))),
+                                  ('extract_u8', 'v', 1),
+                                  ('extract_u8', 'v', 2),
+                                  ('extract_u8', 'v', 3))),
               255.0),
      'options->lower_unpack_unorm_4x8'),
 
     (('unpack_snorm_2x16', 'v'),
-     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec2', ('extract_i16', 'v', 0),
-                                                              ('extract_i16', 'v', 1))),
+     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
+                                                            ('extract_i16', 'v', 1))),
                                            32767.0))),
      'options->lower_unpack_snorm_2x16'),
 
     (('unpack_snorm_4x8', 'v'),
-     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec4', ('extract_i8', 'v', 0),
-                                                              ('extract_i8', 'v', 1),
-                                                              ('extract_i8', 'v', 2),
-                                                              ('extract_i8', 'v', 3))),
+     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
+                                                            ('extract_i8', 'v', 1),
+                                                            ('extract_i8', 'v', 2),
+                                                            ('extract_i8', 'v', 3))),
                                            127.0))),
      'options->lower_unpack_snorm_4x8'),
 ]
@@ -751,22 +768,23 @@
    optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
                          ('ior', (invert[left], a, b), (invert[right], c, d))))
 
+# Optimize x2bN(b2x(x)) -> x
+for size in type_sizes('bool'):
+    aN = 'a@' + str(size)
+    f2bN = 'f2b' + str(size)
+    i2bN = 'i2b' + str(size)
+    optimizations.append(((f2bN, ('b2f', aN)), a))
+    optimizations.append(((i2bN, ('b2i', aN)), a))
+
 # Optimize x2yN(b2x(x)) -> b2y
-optimizations.append((('f2b', ('b2f', a)), a))
-optimizations.append((('i2b', ('b2i', a)), a))
 for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
    if x != 'f' and y != 'f' and x != y:
       continue
 
    b2x = 'b2f' if x == 'f' else 'b2i'
    b2y = 'b2f' if y == 'f' else 'b2i'
-
-   for N in [8, 16, 32, 64]:
-      if y == 'f' and N == 8:
-         continue
-
-      x2yN = '{}2{}{}'.format(x, y, N)
-      optimizations.append(((x2yN, (b2x, a)), (b2y, a)))
+   x2yN = '{}2{}'.format(x, y)
+   optimizations.append(((x2yN, (b2x, a)), (b2y, a)))
 
 def fexp2i(exp, bits):
    # We assume that exp is already in the right range.
@@ -911,15 +929,12 @@
    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
    (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
 
-   (('b2f(is_used_more_than_once)', ('inot', a)), ('bcsel', a, 0.0, 1.0)),
-   (('fneg(is_used_more_than_once)', ('b2f', ('inot', a))), ('bcsel', a, -0.0, -1.0)),
+   (('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)),
+   (('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)),
 
    # we do these late so that we don't get in the way of creating ffmas
    (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
-
-   # Lowered for backends without a dedicated b2f instruction
-   (('b2f@32', a), ('iand', a, 1.0), 'options->lower_b2f'),
 ]
 
 print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_constant_folding.c mesa-19.0.1/src/compiler/nir/nir_opt_constant_folding.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_constant_folding.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_constant_folding.c	2019-03-31 23:16:37.000000000 +0000
@@ -88,6 +88,9 @@
          case 8:
             src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]];
             break;
+         case 1:
+            src[i].b[j] = load_const->value.b[instr->src[i].swizzle[j]];
+            break;
          default:
             unreachable("Invalid bit size");
          }
@@ -190,9 +193,14 @@
       progress |= constant_fold_block(block, mem_ctx);
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_copy_propagate.c mesa-19.0.1/src/compiler/nir/nir_opt_copy_propagate.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_copy_propagate.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_copy_propagate.c	2019-03-31 23:16:37.000000000 +0000
@@ -99,22 +99,6 @@
 }
 
 static bool
-is_trivial_deref_cast(nir_deref_instr *cast)
-{
-   if (cast->deref_type != nir_deref_type_cast)
-      return false;
-
-   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
-   if (!parent)
-      return false;
-
-   return cast->mode == parent->mode &&
-          cast->type == parent->type &&
-          cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
-          cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
-}
-
-static bool
 copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
               unsigned num_components)
 {
@@ -135,12 +119,6 @@
          return false;
 
       copy_def= alu_instr->src[0].src.ssa;
-   } else if (src_instr->type == nir_instr_type_deref) {
-      nir_deref_instr *deref_instr = nir_instr_as_deref(src_instr);
-      if (!is_trivial_deref_cast(deref_instr))
-         return false;
-
-      copy_def = deref_instr->parent.ssa;
    } else {
       return false;
    }
@@ -246,7 +224,8 @@
             progress = true;
       }
 
-      if (deref->deref_type == nir_deref_type_array) {
+      if (deref->deref_type == nir_deref_type_array ||
+          deref->deref_type == nir_deref_type_ptr_as_array) {
          while (copy_prop_src(&deref->arr.index, instr, NULL, 1))
             progress = true;
       }
@@ -328,6 +307,10 @@
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_copy_prop_vars.c mesa-19.0.1/src/compiler/nir/nir_opt_copy_prop_vars.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_copy_prop_vars.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_copy_prop_vars.c	2019-03-31 23:16:37.000000000 +0000
@@ -86,13 +86,27 @@
    bool progress;
 };
 
+static bool
+value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
+{
+   assert(intrin->intrinsic == nir_intrinsic_store_deref);
+   uintptr_t write_mask = nir_intrinsic_write_mask(intrin);
+
+   for (unsigned i = 0; i < intrin->num_components; i++) {
+      if ((write_mask & (1 << i)) &&
+          value->ssa[i] != intrin->src[1].ssa)
+         return false;
+   }
+
+   return true;
+}
+
 static struct vars_written *
 create_vars_written(struct copy_prop_var_state *state)
 {
    struct vars_written *written =
       linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
-   written->derefs = _mesa_hash_table_create(state->mem_ctx, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   written->derefs = _mesa_pointer_hash_table_create(state->mem_ctx);
    return written;
 }
 
@@ -119,10 +133,10 @@
       nir_foreach_instr(instr, block) {
          if (instr->type == nir_instr_type_call) {
             written->modes |= nir_var_shader_out |
-                              nir_var_global |
-                              nir_var_local |
-                              nir_var_shader_storage |
-                              nir_var_shared;
+                              nir_var_shader_temp |
+                              nir_var_function_temp |
+                              nir_var_mem_ssbo |
+                              nir_var_mem_shared;
             continue;
          }
 
@@ -134,8 +148,8 @@
          case nir_intrinsic_barrier:
          case nir_intrinsic_memory_barrier:
             written->modes |= nir_var_shader_out |
-                              nir_var_shader_storage |
-                              nir_var_shared;
+                              nir_var_mem_ssbo |
+                              nir_var_mem_shared;
             break;
 
          case nir_intrinsic_emit_vertex:
@@ -345,12 +359,8 @@
                         nir_variable_mode modes)
 {
    util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
-      nir_variable *dst_var = nir_deref_instr_get_variable(iter->dst);
-      nir_variable *src_var = iter->src.is_ssa ? NULL :
-         nir_deref_instr_get_variable(iter->src.deref);
-
-      if ((dst_var->data.mode & modes) ||
-          (src_var && (src_var->data.mode & modes)))
+      if ((iter->dst->mode & modes) ||
+          (!iter->src.is_ssa && (iter->src.deref->mode & modes)))
          copy_entry_remove(copies, iter);
    }
 }
@@ -614,10 +624,10 @@
    nir_foreach_instr_safe(instr, block) {
       if (instr->type == nir_instr_type_call) {
          apply_barrier_for_modes(copies, nir_var_shader_out |
-                                         nir_var_global |
-                                         nir_var_local |
-                                         nir_var_shader_storage |
-                                         nir_var_shared);
+                                         nir_var_shader_temp |
+                                         nir_var_function_temp |
+                                         nir_var_mem_ssbo |
+                                         nir_var_mem_shared);
          continue;
       }
 
@@ -629,8 +639,8 @@
       case nir_intrinsic_barrier:
       case nir_intrinsic_memory_barrier:
          apply_barrier_for_modes(copies, nir_var_shader_out |
-                                         nir_var_shader_storage |
-                                         nir_var_shared);
+                                         nir_var_mem_ssbo |
+                                         nir_var_mem_shared);
          break;
 
       case nir_intrinsic_emit_vertex:
@@ -643,7 +653,7 @@
 
          struct copy_entry *src_entry =
             lookup_entry_for_deref(copies, src, nir_derefs_a_contains_b_bit);
-         struct value value;
+         struct value value = {0};
          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
             if (value.is_ssa) {
                /* lookup_load has already ensured that we get a single SSA
@@ -702,18 +712,28 @@
       }
 
       case nir_intrinsic_store_deref: {
-         struct value value = {
-            .is_ssa = true
-         };
-
-         for (unsigned i = 0; i < intrin->num_components; i++)
-            value.ssa[i] = intrin->src[1].ssa;
-
          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
-         unsigned wrmask = nir_intrinsic_write_mask(intrin);
          struct copy_entry *entry =
-            get_entry_and_kill_aliases(copies, dst, wrmask);
-         store_to_entry(state, entry, &value, wrmask);
+            lookup_entry_for_deref(copies, dst, nir_derefs_equal_bit);
+         if (entry && value_equals_store_src(&entry->src, intrin)) {
+            /* If we are storing the value from a load of the same var the
+             * store is redundant so remove it.
+             */
+            nir_instr_remove(instr);
+         } else {
+            struct value value = {
+               .is_ssa = true
+            };
+
+            for (unsigned i = 0; i < intrin->num_components; i++)
+               value.ssa[i] = intrin->src[1].ssa;
+
+            unsigned wrmask = nir_intrinsic_write_mask(intrin);
+            struct copy_entry *entry =
+               get_entry_and_kill_aliases(copies, dst, wrmask);
+            store_to_entry(state, entry, &value, wrmask);
+         }
+
          break;
       }
 
@@ -867,8 +887,7 @@
       .mem_ctx = mem_ctx,
       .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
 
-      .vars_written_map = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal),
+      .vars_written_map = _mesa_pointer_hash_table_create(mem_ctx),
    };
 
    gather_vars_written(&state, NULL, &impl->cf_node);
@@ -878,6 +897,10 @@
    if (state.progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    ralloc_free(mem_ctx);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_cse.c mesa-19.0.1/src/compiler/nir/nir_opt_cse.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_cse.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_cse.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,9 +70,14 @@
 
    bool progress = cse_block(nir_start_block(impl), instr_set);
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    nir_instr_set_destroy(instr_set);
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_dce.c mesa-19.0.1/src/compiler/nir/nir_opt_dce.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_dce.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_dce.c	2019-03-31 23:16:37.000000000 +0000
@@ -145,9 +145,14 @@
       }
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_dead_cf.c mesa-19.0.1/src/compiler/nir/nir_opt_dead_cf.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_dead_cf.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_dead_cf.c	2019-03-31 23:16:37.000000000 +0000
@@ -339,8 +339,13 @@
    bool dummy;
    bool progress = dead_cf_list(&impl->body, &dummy);
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_none);
+    } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+    }
 
    return progress;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_dead_write_vars.c mesa-19.0.1/src/compiler/nir/nir_opt_dead_write_vars.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_dead_write_vars.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_dead_write_vars.c	2019-03-31 23:16:37.000000000 +0000
@@ -56,8 +56,7 @@
 clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode modes)
 {
    util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
-      nir_variable *var = nir_deref_instr_get_variable(entry->dst);
-      if (var->data.mode & modes)
+      if (entry->dst->mode & modes)
          *entry = util_dynarray_pop(unused_writes, struct write_entry);
    }
 }
@@ -120,10 +119,10 @@
    nir_foreach_instr_safe(instr, block) {
       if (instr->type == nir_instr_type_call) {
          clear_unused_for_modes(&unused_writes, nir_var_shader_out |
-                                                nir_var_global |
-                                                nir_var_local |
-                                                nir_var_shader_storage |
-                                                nir_var_shared);
+                                                nir_var_shader_temp |
+                                                nir_var_function_temp |
+                                                nir_var_mem_ssbo |
+                                                nir_var_mem_shared);
          continue;
       }
 
@@ -135,8 +134,8 @@
       case nir_intrinsic_barrier:
       case nir_intrinsic_memory_barrier: {
          clear_unused_for_modes(&unused_writes, nir_var_shader_out |
-                                                nir_var_shader_storage |
-                                                nir_var_shared);
+                                                nir_var_mem_ssbo |
+                                                nir_var_mem_shared);
          break;
       }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_find_array_copies.c mesa-19.0.1/src/compiler/nir/nir_opt_find_array_copies.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_find_array_copies.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_find_array_copies.c	2019-03-31 23:16:37.000000000 +0000
@@ -225,7 +225,7 @@
        * continue on because it won't affect local stores or read-only
        * variables.
        */
-      if (dst_deref->mode != nir_var_local)
+      if (dst_deref->mode != nir_var_function_temp)
          continue;
 
       /* We keep track of the SSA indices where the two last-written
@@ -273,7 +273,7 @@
        */
       const nir_variable_mode read_only_modes =
          nir_var_shader_in | nir_var_uniform | nir_var_system_value;
-      if (!(src_deref->mode & (nir_var_local | read_only_modes)))
+      if (!(src_deref->mode & (nir_var_function_temp | read_only_modes)))
          goto reset;
 
       /* If we don't yet have an active copy, then make this instruction the
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_gcm.c mesa-19.0.1/src/compiler/nir/nir_opt_gcm.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_gcm.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_gcm.c	2019-03-31 23:16:37.000000000 +0000
@@ -128,6 +128,10 @@
          }
          break;
 
+      case nir_instr_type_deref:
+         instr->pass_flags = 0;
+         break;
+
       case nir_instr_type_tex:
          switch (nir_instr_as_tex(instr)->op) {
          case nir_texop_tex:
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_idiv_const.c mesa-19.0.1/src/compiler/nir/nir_opt_idiv_const.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_idiv_const.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_idiv_const.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,215 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "util/fast_idiv_by_const.h"
+#include "util/u_math.h"
+
+static nir_ssa_def *
+build_udiv(nir_builder *b, nir_ssa_def *n, uint64_t d)
+{
+   if (d == 0) {
+      return nir_imm_intN_t(b, 0, n->bit_size);
+   } else if (util_is_power_of_two_or_zero64(d)) {
+      return nir_ushr(b, n, nir_imm_int(b, util_logbase2_64(d)));
+   } else {
+      struct util_fast_udiv_info m =
+         util_compute_fast_udiv_info(d, n->bit_size, n->bit_size);
+
+      if (m.pre_shift)
+         n = nir_ushr(b, n, nir_imm_int(b, m.pre_shift));
+      if (m.increment)
+         n = nir_uadd_sat(b, n, nir_imm_intN_t(b, m.increment, n->bit_size));
+      n = nir_umul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
+      if (m.post_shift)
+         n = nir_ushr(b, n, nir_imm_int(b, m.post_shift));
+
+      return n;
+   }
+}
+
+static nir_ssa_def *
+build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
+{
+   if (d == 0) {
+      return nir_imm_intN_t(b, 0, n->bit_size);
+   } else if (util_is_power_of_two_or_zero64(d)) {
+      return nir_iand(b, n, nir_imm_intN_t(b, d - 1, n->bit_size));
+   } else {
+      return nir_isub(b, n, nir_imul(b, build_udiv(b, n, d),
+                                        nir_imm_intN_t(b, d, n->bit_size)));
+   }
+}
+
+static nir_ssa_def *
+build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
+{
+   if (d == 0) {
+      return nir_imm_intN_t(b, 0, n->bit_size);
+   } else if (d == 1) {
+      return n;
+   } else if (d == -1) {
+      return nir_ineg(b, n);
+   } else if (util_is_power_of_two_or_zero64(d)) {
+      uint64_t abs_d = d < 0 ? -d : d;
+      nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b, util_logbase2_64(abs_d)));
+      nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size));
+      nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
+      return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
+   } else {
+      struct util_fast_sdiv_info m =
+         util_compute_fast_sdiv_info(d, n->bit_size);
+
+      nir_ssa_def *res =
+         nir_imul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
+      if (d > 0 && m.multiplier < 0)
+         res = nir_iadd(b, res, n);
+      if (d < 0 && m.multiplier > 0)
+         res = nir_isub(b, res, n);
+      if (m.shift)
+         res = nir_ishr(b, res, nir_imm_int(b, m.shift));
+      res = nir_iadd(b, res, nir_ushr(b, res, nir_imm_int(b, n->bit_size - 1)));
+
+      return res;
+   }
+}
+
+static bool
+nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
+{
+   assert(alu->dest.dest.is_ssa);
+   assert(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa);
+
+   nir_const_value *const_denom = nir_src_as_const_value(alu->src[1].src);
+   if (!const_denom)
+      return false;
+
+   unsigned bit_size = alu->src[1].src.ssa->bit_size;
+
+   b->cursor = nir_before_instr(&alu->instr);
+
+   nir_ssa_def *q[4];
+   for (unsigned comp = 0; comp < alu->dest.dest.ssa.num_components; comp++) {
+      /* Get the numerator for the channel */
+      nir_ssa_def *n = nir_channel(b, alu->src[0].src.ssa,
+                                   alu->src[0].swizzle[comp]);
+
+      /* Get the denominator for the channel */
+      int64_t d;
+      switch (bit_size) {
+      case 8:
+         d = const_denom->i8[alu->src[1].swizzle[comp]];
+         break;
+      case 16:
+         d = const_denom->i16[alu->src[1].swizzle[comp]];
+         break;
+      case 32:
+         d = const_denom->i32[alu->src[1].swizzle[comp]];
+         break;
+      case 64:
+         d = const_denom->i64[alu->src[1].swizzle[comp]];
+         break;
+      default:
+         unreachable("Invalid bit size");
+      }
+
+      nir_alu_type d_type = nir_op_infos[alu->op].input_types[1];
+      if (nir_alu_type_get_base_type(d_type) == nir_type_uint) {
+         /* The code above sign-extended.  If we're lowering an unsigned op,
+          * we need to mask it off to the correct number of bits so that a
+          * cast to uint64_t will do the right thing.
+          */
+         if (bit_size < 64)
+            d &= (1ull << bit_size) - 1;
+      }
+
+      switch (alu->op) {
+      case nir_op_udiv:
+         q[comp] = build_udiv(b, n, d);
+         break;
+      case nir_op_idiv:
+         q[comp] = build_idiv(b, n, d);
+         break;
+      case nir_op_umod:
+         q[comp] = build_umod(b, n, d);
+         break;
+      default:
+         unreachable("Unknown integer division op");
+      }
+   }
+
+   nir_ssa_def *qvec = nir_vec(b, q, alu->dest.dest.ssa.num_components);
+   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(qvec));
+   nir_instr_remove(&alu->instr);
+
+   return true;
+}
+
+static bool
+nir_opt_idiv_const_impl(nir_function_impl *impl, unsigned min_bit_size)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_alu)
+            continue;
+
+         nir_alu_instr *alu = nir_instr_as_alu(instr);
+         if (alu->op != nir_op_udiv &&
+             alu->op != nir_op_idiv &&
+             alu->op != nir_op_umod)
+            continue;
+
+         assert(alu->dest.dest.is_ssa);
+         if (alu->dest.dest.ssa.bit_size < min_bit_size)
+            continue;
+
+         progress |= nir_opt_idiv_const_instr(&b, alu);
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= nir_opt_idiv_const_impl(function->impl, min_bit_size);
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_if.c mesa-19.0.1/src/compiler/nir/nir_opt_if.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_if.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_if.c	2019-03-31 23:16:37.000000000 +0000
@@ -237,6 +237,140 @@
           exec_list_is_empty(&block->instr_list);
 }
 
+static bool
+nir_block_ends_in_continue(nir_block *block)
+{
+   if (exec_list_is_empty(&block->instr_list))
+      return false;
+
+   nir_instr *instr = nir_block_last_instr(block);
+   return instr->type == nir_instr_type_jump &&
+      nir_instr_as_jump(instr)->type == nir_jump_continue;
+}
+
+/**
+ * This optimization turns:
+ *
+ *     loop {
+ *        ...
+ *        if (cond) {
+ *           do_work_1();
+ *           continue;
+ *        } else {
+ *        }
+ *        do_work_2();
+ *     }
+ *
+ * into:
+ *
+ *     loop {
+ *        ...
+ *        if (cond) {
+ *           do_work_1();
+ *           continue;
+ *        } else {
+ *           do_work_2();
+ *        }
+ *     }
+ *
+ * The continue should then be removed by nir_opt_trivial_continues() and the
+ * loop can potentially be unrolled.
+ *
+ * Note: do_work_2() is only ever blocks and nested loops. We could also nest
+ * other if-statments in the branch which would allow further continues to
+ * be removed. However in practice this can result in increased register
+ * pressure.
+ */
+static bool
+opt_if_loop_last_continue(nir_loop *loop)
+{
+   /* Get the last if-stament in the loop */
+   nir_block *last_block = nir_loop_last_block(loop);
+   nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
+   while (if_node) {
+      if (if_node->type == nir_cf_node_if)
+         break;
+
+      if_node = nir_cf_node_prev(if_node);
+   }
+
+   if (!if_node || if_node->type != nir_cf_node_if)
+      return false;
+
+   nir_if *nif = nir_cf_node_as_if(if_node);
+   nir_block *then_block = nir_if_last_then_block(nif);
+   nir_block *else_block = nir_if_last_else_block(nif);
+
+   bool then_ends_in_continue = nir_block_ends_in_continue(then_block);
+   bool else_ends_in_continue = nir_block_ends_in_continue(else_block);
+
+   /* If both branches end in a continue do nothing, this should be handled
+    * by nir_opt_dead_cf().
+    */
+   if (then_ends_in_continue && else_ends_in_continue)
+      return false;
+
+   if (!then_ends_in_continue && !else_ends_in_continue)
+      return false;
+
+   /* if the block after the if/else is empty we bail, otherwise we might end
+    * up looping forever
+    */
+   if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
+       exec_list_is_empty(&last_block->instr_list))
+      return false;
+
+   /* Move the last block of the loop inside the last if-statement */
+   nir_cf_list tmp;
+   nir_cf_extract(&tmp, nir_after_cf_node(if_node),
+                        nir_after_block(last_block));
+   if (then_ends_in_continue) {
+      nir_cursor last_blk_cursor = nir_after_cf_list(&nif->else_list);
+      nir_cf_reinsert(&tmp,
+                      nir_after_block_before_jump(last_blk_cursor.block));
+   } else {
+      nir_cursor last_blk_cursor = nir_after_cf_list(&nif->then_list);
+      nir_cf_reinsert(&tmp,
+                      nir_after_block_before_jump(last_blk_cursor.block));
+   }
+
+   /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
+    * opt makes progress we leave nir_opt_trivial_continues() to remove the
+    * continue now that the end of the loop has been simplified.
+    */
+
+   return true;
+}
+
+/* Walk all the phis in the block immediately following the if statement and
+ * swap the blocks.
+ */
+static void
+rewrite_phi_predecessor_blocks(nir_if *nif,
+                               nir_block *old_then_block,
+                               nir_block *old_else_block,
+                               nir_block *new_then_block,
+                               nir_block *new_else_block)
+{
+   nir_block *after_if_block =
+      nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
+
+   nir_foreach_instr(instr, after_if_block) {
+      if (instr->type != nir_instr_type_phi)
+         continue;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+         if (src->pred == old_then_block) {
+            src->pred = new_then_block;
+         } else if (src->pred == old_else_block) {
+            src->pred = new_else_block;
+         }
+      }
+   }
+}
+
 /**
  * This optimization turns:
  *
@@ -281,26 +415,8 @@
    nir_block *then_block = nir_if_last_then_block(nif);
    nir_block *else_block = nir_if_last_else_block(nif);
 
-   /* Walk all the phis in the block immediately following the if statement and
-    * swap the blocks.
-    */
-   nir_block *after_if_block =
-      nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
-
-   nir_foreach_instr(instr, after_if_block) {
-      if (instr->type != nir_instr_type_phi)
-         continue;
-
-      nir_phi_instr *phi = nir_instr_as_phi(instr);
-
-      foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
-         if (src->pred == else_block) {
-            src->pred = then_block;
-         } else if (src->pred == then_block) {
-            src->pred = else_block;
-         }
-      }
-   }
+   rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
+                                  then_block);
 
    /* Finally, move the else block to the then block. */
    nir_cf_list tmp;
@@ -509,7 +625,7 @@
       case nir_op_ior:
       case nir_op_iand:
       case nir_op_inot:
-      case nir_op_b2i:
+      case nir_op_b2i32:
          return true;
       case nir_op_bcsel:
          return src == &alu->src[0].src;
@@ -574,6 +690,98 @@
    return progress;
 }
 
+static void
+simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then,
+                bool src_if_then)
+{
+   /* Now merge the if branch */
+   nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if)
+                                      : nir_if_last_else_block(dest_if);
+
+   struct exec_list *list = src_if_then ? &src_if->then_list
+                                        : &src_if->else_list;
+
+   nir_cf_list if_cf_list;
+   nir_cf_extract(&if_cf_list, nir_before_cf_list(list),
+                  nir_after_cf_list(list));
+   nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk));
+}
+
+static bool
+opt_if_merge(nir_if *nif)
+{
+   bool progress = false;
+
+   nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
+   if (next_blk && nif->condition.is_ssa) {
+      nir_if *next_if = nir_block_get_following_if(next_blk);
+      if (next_if && next_if->condition.is_ssa) {
+
+         /* Here we merge two consecutive ifs that have the same
+          * condition e.g:
+          *
+          *   if ssa_12 {
+          *      ...
+          *   } else {
+          *      ...
+          *   }
+          *   if ssa_12 {
+          *      ...
+          *   } else {
+          *      ...
+          *   }
+          *
+          * Note: This only merges if-statements when the block between them
+          * is empty. The reason we don't try to merge ifs that just have phis
+          * between them is because this can results in increased register
+          * pressure. For example when merging if ladders created by indirect
+          * indexing.
+          */
+         if (nif->condition.ssa == next_if->condition.ssa &&
+             exec_list_is_empty(&next_blk->instr_list)) {
+
+            simple_merge_if(nif, next_if, true, true);
+            simple_merge_if(nif, next_if, false, false);
+
+            nir_block *new_then_block = nir_if_last_then_block(nif);
+            nir_block *new_else_block = nir_if_last_else_block(nif);
+
+            nir_block *old_then_block = nir_if_last_then_block(next_if);
+            nir_block *old_else_block = nir_if_last_else_block(next_if);
+
+            /* Rewrite the predecessor block for any phis following the second
+             * if-statement.
+             */
+            rewrite_phi_predecessor_blocks(next_if, old_then_block,
+                                           old_else_block,
+                                           new_then_block,
+                                           new_else_block);
+
+            /* Move phis after merged if to avoid them being deleted when we
+             * remove the merged if-statement.
+             */
+            nir_block *after_next_if_block =
+               nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node));
+
+            nir_foreach_instr_safe(instr, after_next_if_block) {
+               if (instr->type != nir_instr_type_phi)
+                  break;
+
+               exec_node_remove(&instr->node);
+               exec_list_push_tail(&next_blk->instr_list, &instr->node);
+               instr->block = next_blk;
+            }
+
+            nir_cf_node_remove(&next_if->cf_node);
+
+            progress = true;
+         }
+      }
+   }
+
+   return progress;
+}
+
 static bool
 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 {
@@ -588,6 +796,7 @@
          progress |= opt_if_cf_list(b, &nif->then_list);
          progress |= opt_if_cf_list(b, &nif->else_list);
          progress |= opt_if_loop_terminator(nif);
+         progress |= opt_if_merge(nif);
          progress |= opt_if_simplification(b, nif);
          break;
       }
@@ -596,6 +805,7 @@
          nir_loop *loop = nir_cf_node_as_loop(cf_node);
          progress |= opt_if_cf_list(b, &loop->body);
          progress |= opt_peel_loop_initial_if(loop);
+         progress |= opt_if_loop_last_continue(loop);
          break;
       }
 
@@ -670,6 +880,10 @@
          nir_lower_regs_to_ssa_impl(function->impl);
 
          progress = true;
+      } else {
+   #ifndef NDEBUG
+         function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+   #endif
       }
    }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_large_constants.c mesa-19.0.1/src/compiler/nir/nir_opt_large_constants.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_large_constants.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_large_constants.c	2019-03-31 23:16:37.000000000 +0000
@@ -53,7 +53,23 @@
                      num_components, bit_size, NULL);
    nir_builder_instr_insert(b, &load->instr);
 
-   return &load->dest.ssa;
+   if (load->dest.ssa.bit_size < 8) {
+      /* Booleans are special-cased to be 32-bit
+       *
+       * Ideally, for drivers that can handle 32-bit booleans, we wouldn't
+       * emit the i2b here.  However, at this point, the driver is likely to
+       * still have 1-bit booleans so we need to at least convert bit sizes.
+       * Unfortunately, we don't have a good way to annotate the load as
+       * loading a known boolean value so the optimizer isn't going to be
+       * able to get rid of the conversion.  Some day, we may solve that
+       * problem but not today.
+       */
+      assert(glsl_type_is_boolean(deref->type));
+      load->dest.ssa.bit_size = 32;
+      return nir_i2b(b, &load->dest.ssa);
+   } else {
+      return &load->dest.ssa;
+   }
 }
 
 static void
@@ -74,6 +90,12 @@
 
    nir_const_value *val = nir_src_as_const_value(store->src[1]);
    switch (bit_size) {
+   case 1:
+      /* Booleans are special-cased to be 32-bit */
+      for (unsigned i = 0; i < num_components; i++)
+         ((int32_t *)dst)[i] = -(int)val->b[i];
+      break;
+
    case 8:
       for (unsigned i = 0; i < num_components; i++)
          ((uint8_t *)dst)[i] = val->u8[i];
@@ -174,9 +196,9 @@
             continue;
          }
 
-         if (dst_deref && dst_deref->mode == nir_var_local) {
+         if (dst_deref && dst_deref->mode == nir_var_function_temp) {
             nir_variable *var = nir_deref_instr_get_variable(dst_deref);
-            assert(var->data.mode == nir_var_local);
+            assert(var->data.mode == nir_var_function_temp);
 
             /* We only consider variables constant if they only have constant
              * stores, all the stores come before any reads, and all stores
@@ -188,9 +210,9 @@
                info->is_constant = false;
          }
 
-         if (src_deref && src_deref->mode == nir_var_local) {
+         if (src_deref && src_deref->mode == nir_var_function_temp) {
             nir_variable *var = nir_deref_instr_get_variable(src_deref);
-            assert(var->data.mode == nir_var_local);
+            assert(var->data.mode == nir_var_function_temp);
 
             var_infos[var->data.index].found_read = true;
          }
@@ -236,7 +258,7 @@
          switch (intrin->intrinsic) {
          case nir_intrinsic_load_deref: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            if (deref->mode != nir_var_local)
+            if (deref->mode != nir_var_function_temp)
                continue;
 
             nir_variable *var = nir_deref_instr_get_variable(deref);
@@ -254,7 +276,7 @@
 
          case nir_intrinsic_store_deref: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-            if (deref->mode != nir_var_local)
+            if (deref->mode != nir_var_function_temp)
                continue;
 
             nir_variable *var = nir_deref_instr_get_variable(deref);
@@ -270,7 +292,7 @@
 
          case nir_intrinsic_copy_deref: {
             nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
-            if (deref->mode != nir_var_local)
+            if (deref->mode != nir_var_function_temp)
                continue;
 
             nir_variable *var = nir_deref_instr_get_variable(deref);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_loop_unroll.c mesa-19.0.1/src/compiler/nir/nir_opt_loop_unroll.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_loop_unroll.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_loop_unroll.c	2019-03-31 23:16:37.000000000 +0000
@@ -165,36 +165,23 @@
    nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
                   nir_after_block(nir_loop_last_block(loop)));
 
-   struct hash_table *remap_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-
-   /* Clone the loop header */
-   nir_cf_list cloned_header;
-   nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                     remap_table);
+   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
 
-   /* Insert cloned loop header before the loop */
-   nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
-
-   /* Temp list to store the cloned loop body as we unroll */
-   nir_cf_list unrolled_lp_body;
-
-   /* Clone loop header and append to the loop body */
-   for (unsigned i = 0; i < loop->info->trip_count; i++) {
-      /* Clone loop body */
-      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
-                        remap_table);
-
-      /* Insert unrolled loop body before the loop */
-      nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
-
-      /* Clone loop header */
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
-
-      /* Insert loop header after loop body */
-      nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+   /* Clone the loop header and insert before the loop */
+   nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                  nir_before_cf_node(&loop->cf_node),
+                                  remap_table);
+
+   for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
+      /* Clone loop body and insert before the loop */
+      nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
+                                     nir_before_cf_node(&loop->cf_node),
+                                     remap_table);
+
+      /* Clone loop header and insert after loop body */
+      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                     nir_before_cf_node(&loop->cf_node),
+                                     remap_table);
    }
 
    /* Remove the break from the loop terminator and add instructions from
@@ -207,11 +194,9 @@
                   nir_after_block(limiting_term->break_block));
 
    /* Clone so things get properly remapped */
-   nir_cf_list cloned_break_list;
-   nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
-                     remap_table);
-
-   nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
+   nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
+                                  nir_before_cf_node(&loop->cf_node),
+                                  remap_table);
 
    /* Remove the loop */
    nir_cf_node_remove(&loop->cf_node);
@@ -249,6 +234,65 @@
    }
 }
 
+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
+                         nir_cf_list *lp_header, nir_cf_list *lp_body,
+                         struct hash_table *remap_table,
+                         unsigned num_times_to_clone)
+{
+   /* In the terminator that we have no trip count for move everything after
+    * the terminator into the continue from branch.
+    */
+   nir_cf_list loop_end;
+   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
+                  nir_after_block(nir_loop_last_block(loop)));
+   move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+   /* Pluck out the loop body. */
+   nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+                  nir_after_block(nir_loop_last_block(loop)));
+
+   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+    */
+   nir_cf_node *unroll_loc = &loop->cf_node;
+
+   /* Temp list to store the cloned loop as we unroll */
+   nir_cf_list unrolled_lp_body;
+
+   for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+      nir_cursor cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Clone loop header and insert in if branch */
+      nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+                                     cursor, remap_table);
+
+      cursor =
+         get_complex_unroll_insert_location(unroll_loc,
+                                            unlimit_term->continue_from_then);
+
+      /* Clone loop body */
+      nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
+                        remap_table);
+
+      unroll_loc = exec_node_data(nir_cf_node,
+                                  exec_list_get_tail(&unrolled_lp_body.list),
+                                  node);
+      assert(unroll_loc->type == nir_cf_node_block &&
+             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+      /* Get the unrolled if node */
+      unroll_loc = nir_cf_node_prev(unroll_loc);
+
+      /* Insert unrolled loop body */
+      nir_cf_reinsert(&unrolled_lp_body, cursor);
+   }
+
+   return unroll_loc;
+}
+
 /**
  * Unroll a loop with two exists when the trip count of one of the exits is
  * unknown.  If continue_from_then is true, the loop is repeated only when the
@@ -340,7 +384,7 @@
        * trip count == 1 we execute the code above the break twice and the
        * code below it once so we need clone things twice and so on.
        */
-      num_times_to_clone = loop->info->trip_count + 1;
+      num_times_to_clone = loop->info->max_trip_count + 1;
    } else {
       /* Pluck out the loop header */
       nir_cf_extract(&lp_header, nir_before_block(header_blk),
@@ -368,92 +412,37 @@
 
       nir_cf_node_remove(&limiting_term->nif->cf_node);
 
-      num_times_to_clone = loop->info->trip_count;
+      num_times_to_clone = loop->info->max_trip_count;
    }
 
-   /* In the terminator that we have no trip count for move everything after
-    * the terminator into the continue from branch.
-    */
-   nir_cf_list loop_end;
-   nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
-                  nir_after_block(nir_loop_last_block(loop)));
-   move_cf_list_into_loop_term(&loop_end, unlimit_term);
-
-   /* Pluck out the loop body. */
-   nir_cf_list loop_body;
-   nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
-                  nir_after_block(nir_loop_last_block(loop)));
-
-   struct hash_table *remap_table =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
 
-   /* Set unroll_loc to the loop as we will insert the unrolled loop before it
-    */
-   nir_cf_node *unroll_loc = &loop->cf_node;
+   nir_cf_list lp_body;
+   nir_cf_node *unroll_loc =
+      complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
+                               remap_table, num_times_to_clone);
 
-   /* Temp lists to store the cloned loop as we unroll */
-   nir_cf_list unrolled_lp_body;
-   nir_cf_list cloned_header;
-
-   for (unsigned i = 0; i < num_times_to_clone; i++) {
-      /* Clone loop header */
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
+   if (!limiting_term_second) {
+      assert(unroll_loc->type == nir_cf_node_if);
 
       nir_cursor cursor =
          get_complex_unroll_insert_location(unroll_loc,
                                             unlimit_term->continue_from_then);
 
-      /* Insert cloned loop header */
-      nir_cf_reinsert(&cloned_header, cursor);
+      /* Clone loop header and insert in if branch */
+      nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+                                     cursor, remap_table);
 
       cursor =
          get_complex_unroll_insert_location(unroll_loc,
                                             unlimit_term->continue_from_then);
 
-      /* Clone loop body */
-      nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
-                        remap_table);
-
-      unroll_loc = exec_node_data(nir_cf_node,
-                                  exec_list_get_tail(&unrolled_lp_body.list),
-                                  node);
-      assert(unroll_loc->type == nir_cf_node_block &&
-             exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
-
-      /* Get the unrolled if node */
-      unroll_loc = nir_cf_node_prev(unroll_loc);
-
-      /* Insert unrolled loop body */
-      nir_cf_reinsert(&unrolled_lp_body, cursor);
-   }
-
-   if (!limiting_term_second) {
-      assert(unroll_loc->type == nir_cf_node_if);
-
-      nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
-                        remap_table);
-
-      nir_cursor cursor =
-         get_complex_unroll_insert_location(unroll_loc,
-                                            unlimit_term->continue_from_then);
-
-      /* Insert cloned loop header */
-      nir_cf_reinsert(&cloned_header, cursor);
-
       /* Clone so things get properly remapped, and insert break block from
        * the limiting terminator.
        */
-      nir_cf_list cloned_break_blk;
-      nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
-                        loop->cf_node.parent, remap_table);
-
-      cursor =
-         get_complex_unroll_insert_location(unroll_loc,
-                                            unlimit_term->continue_from_then);
+      nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
+                                     cursor, remap_table);
 
-      nir_cf_reinsert(&cloned_break_blk, cursor);
       nir_cf_delete(&limit_break_list);
    }
 
@@ -462,7 +451,7 @@
 
    /* Delete the original loop header and body */
    nir_cf_delete(&lp_header);
-   nir_cf_delete(&loop_body);
+   nir_cf_delete(&lp_body);
 
    _mesa_hash_table_destroy(remap_table, NULL);
 }
@@ -568,14 +557,14 @@
 {
    unsigned max_iter = shader->options->max_unroll_iterations;
 
-   if (li->trip_count > max_iter)
+   if (li->max_trip_count > max_iter)
       return false;
 
    if (li->force_unroll)
       return true;
 
    bool loop_not_too_large =
-      li->num_instructions * li->trip_count <= max_iter * LOOP_UNROLL_LIMIT;
+      li->num_instructions * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT;
 
    return loop_not_too_large;
 }
@@ -641,7 +630,7 @@
       if (!is_loop_small_enough_to_unroll(sh, loop->info))
          goto exit;
 
-      if (loop->info->is_trip_count_known) {
+      if (loop->info->exact_trip_count_known) {
          simple_unroll(loop);
          progress = true;
       } else {
@@ -665,7 +654,7 @@
              * limiting terminator just do a simple unroll as the second
              * terminator can never be reached.
              */
-            if (loop->info->trip_count == 0 && !limiting_term_second) {
+            if (loop->info->max_trip_count == 0 && !limiting_term_second) {
                simple_unroll(loop);
             } else {
                complex_unroll(loop, terminator, limiting_term_second);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_peephole_select.c mesa-19.0.1/src/compiler/nir/nir_opt_peephole_select.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_peephole_select.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_peephole_select.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,7 +58,8 @@
  */
 
 static bool
-block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
+block_check_for_allowed_instrs(nir_block *block, unsigned *count,
+                               bool alu_ok, bool indirect_load_ok)
 {
    nir_foreach_instr(instr, block) {
       switch (instr->type) {
@@ -66,16 +67,26 @@
          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
          switch (intrin->intrinsic) {
-         case nir_intrinsic_load_deref:
-            switch (nir_src_as_deref(intrin->src[0])->mode) {
+         case nir_intrinsic_load_deref: {
+            nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
+
+            switch (deref->mode) {
             case nir_var_shader_in:
             case nir_var_uniform:
+               /* Don't try to remove flow control around an indirect load
+                * because that flow control may be trying to avoid invalid
+                * loads.
+                */
+               if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
+                  return false;
+
                break;
 
             default:
                return false;
             }
             break;
+         }
 
          case nir_intrinsic_load_uniform:
             if (!alu_ok)
@@ -149,7 +160,7 @@
 
 static bool
 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
-                              unsigned limit)
+                              unsigned limit, bool indirect_load_ok)
 {
    if (nir_cf_node_is_first(&block->cf_node))
       return false;
@@ -169,8 +180,10 @@
 
    /* ... and those blocks must only contain "allowed" instructions. */
    unsigned count = 0;
-   if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) ||
-       !block_check_for_allowed_instrs(else_block, &count, limit != 0))
+   if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
+                                       indirect_load_ok) ||
+       !block_check_for_allowed_instrs(else_block, &count, limit != 0,
+                                       indirect_load_ok))
       return false;
 
    if (count > limit)
@@ -236,29 +249,38 @@
 }
 
 static bool
-nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit)
+nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
+                             bool indirect_load_ok)
 {
    nir_shader *shader = impl->function->shader;
    bool progress = false;
 
    nir_foreach_block_safe(block, impl) {
-      progress |= nir_opt_peephole_select_block(block, shader, limit);
+      progress |= nir_opt_peephole_select_block(block, shader, limit,
+                                                indirect_load_ok);
    }
 
-   if (progress)
+   if (progress) {
       nir_metadata_preserve(impl, nir_metadata_none);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+   }
 
    return progress;
 }
 
 bool
-nir_opt_peephole_select(nir_shader *shader, unsigned limit)
+nir_opt_peephole_select(nir_shader *shader, unsigned limit,
+                        bool indirect_load_ok)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
       if (function->impl)
-         progress |= nir_opt_peephole_select_impl(function->impl, limit);
+         progress |= nir_opt_peephole_select_impl(function->impl, limit,
+                                                  indirect_load_ok);
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_remove_phis.c mesa-19.0.1/src/compiler/nir/nir_opt_remove_phis.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_remove_phis.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_remove_phis.c	2019-03-31 23:16:37.000000000 +0000
@@ -139,7 +139,7 @@
    return progress;
 }
 
-bool
+static bool
 nir_opt_remove_phis_impl(nir_function_impl *impl)
 {
    bool progress = false;
@@ -153,6 +153,10 @@
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_opt_undef.c mesa-19.0.1/src/compiler/nir/nir_opt_undef.c
--- mesa-18.3.3/src/compiler/nir/nir_opt_undef.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_opt_undef.c	2019-03-31 23:16:37.000000000 +0000
@@ -154,10 +154,15 @@
             }
          }
 
-         if (progress)
+         if (progress) {
             nir_metadata_preserve(function->impl,
                                   nir_metadata_block_index |
                                   nir_metadata_dominance);
+         } else {
+#ifndef NDEBUG
+            function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
+         }
       }
    }
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_phi_builder.c mesa-19.0.1/src/compiler/nir/nir_phi_builder.c
--- mesa-18.3.3/src/compiler/nir/nir_phi_builder.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_phi_builder.c	2019-03-31 23:16:37.000000000 +0000
@@ -75,9 +75,18 @@
     *  - A regular SSA def.  This will be either the result of a phi node or
     *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
     */
-   nir_ssa_def *defs[0];
+   struct hash_table ht;
 };
 
+/**
+ * Convert a block index into a value that can be used as a key for a hash table
+ *
+ * The hash table functions want a pointer that is not \c NULL.
+ * _mesa_hash_pointer drops the two least significant bits, but that's where
+ * most of our data likely is.  Shift by 2 and add 1 to make everything happy.
+ */
+#define INDEX_TO_KEY(x) ((void *)(uintptr_t) ((x << 2) + 1))
+
 struct nir_phi_builder *
 nir_phi_builder_create(nir_function_impl *impl)
 {
@@ -111,13 +120,16 @@
    struct nir_phi_builder_value *val;
    unsigned i, w_start = 0, w_end = 0;
 
-   val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
+   val = rzalloc_size(pb, sizeof(*val));
    val->builder = pb;
    val->num_components = num_components;
    val->bit_size = bit_size;
    exec_list_make_empty(&val->phis);
    exec_list_push_tail(&pb->values, &val->node);
 
+   _mesa_hash_table_init(&val->ht, pb, _mesa_hash_pointer,
+                         _mesa_key_pointer_equal);
+
    pb->iter_count++;
 
    BITSET_WORD tmp;
@@ -142,12 +154,12 @@
          if (next == pb->impl->end_block)
             continue;
 
-         if (val->defs[next->index] == NULL) {
+         if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(next->index)) == NULL) {
             /* Instead of creating a phi node immediately, we simply set the
              * value to the magic value NEEDS_PHI.  Later, we create phi nodes
              * on demand in nir_phi_builder_value_get_block_def().
              */
-            val->defs[next->index] = NEEDS_PHI;
+            nir_phi_builder_value_set_block_def(val, next, NEEDS_PHI);
 
             if (pb->work[next->index] < pb->iter_count) {
                pb->work[next->index] = pb->iter_count;
@@ -164,7 +176,7 @@
 nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
                                     nir_block *block, nir_ssa_def *def)
 {
-   val->defs[block->index] = def;
+   _mesa_hash_table_insert(&val->ht, INDEX_TO_KEY(block->index), def);
 }
 
 nir_ssa_def *
@@ -175,8 +187,18 @@
     * have a valid ssa_def, if any.
     */
    nir_block *dom = block;
-   while (dom && val->defs[dom->index] == NULL)
+   struct hash_entry *he = NULL;
+
+   while (dom != NULL) {
+      he = _mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index));
+      if (he != NULL)
+         break;
+
       dom = dom->imm_dom;
+   }
+
+   /* Exactly one of (he != NULL) and (dom == NULL) must be true. */
+   assert((he != NULL) != (dom == NULL));
 
    nir_ssa_def *def;
    if (dom == NULL) {
@@ -191,7 +213,7 @@
       nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
                        &undef->instr);
       def = &undef->def;
-   } else if (val->defs[dom->index] == NEEDS_PHI) {
+   } else if (he->data == NEEDS_PHI) {
       /* The magic value NEEDS_PHI indicates that the block needs a phi node
        * but none has been created.  We need to create one now so we can
        * return it to the caller.
@@ -215,13 +237,14 @@
                         val->bit_size, NULL);
       phi->instr.block = dom;
       exec_list_push_tail(&val->phis, &phi->instr.node);
-      def = val->defs[dom->index] = &phi->dest.ssa;
+      def = &phi->dest.ssa;
+      he->data = def;
    } else {
       /* In this case, we have an actual SSA def.  It's either the result of a
        * phi node created by the case above or one passed to us through
        * nir_phi_builder_value_set_block_def().
        */
-      def = val->defs[dom->index];
+      def = (struct nir_ssa_def *) he->data;
    }
 
    /* Walk the chain and stash the def in all of the applicable blocks.  We do
@@ -231,8 +254,12 @@
     *     block that is not dominated by this one.
     *  2) To avoid unneeded recreation of phi nodes and undefs.
     */
-   for (dom = block; dom && val->defs[dom->index] == NULL; dom = dom->imm_dom)
-      val->defs[dom->index] = def;
+   for (dom = block; dom != NULL; dom = dom->imm_dom) {
+      if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)) != NULL)
+         break;
+
+      nir_phi_builder_value_set_block_def(val, dom, def);
+   }
 
    return def;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_print.c mesa-19.0.1/src/compiler/nir/nir_print.c
--- mesa-18.3.3/src/compiler/nir/nir_print.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_print.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 #include "nir.h"
 #include "compiler/shader_enums.h"
 #include "util/half_float.h"
+#include "vulkan/vulkan_core.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <inttypes.h> /* for PRIx64 macro */
@@ -123,10 +124,10 @@
    fprintf(fp, "ssa_%u", def->index);
 }
 
-static void print_src(nir_src *src, print_state *state);
+static void print_src(const nir_src *src, print_state *state);
 
 static void
-print_reg_src(nir_reg_src *src, print_state *state)
+print_reg_src(const nir_reg_src *src, print_state *state)
 {
    FILE *fp = state->fp;
    print_register(src->reg, state);
@@ -156,7 +157,7 @@
 }
 
 static void
-print_src(nir_src *src, print_state *state)
+print_src(const nir_src *src, print_state *state)
 {
    if (src->is_ssa)
       print_ssa_use(src->ssa, state);
@@ -298,6 +299,16 @@
    unsigned i, j;
 
    switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_BOOL:
+      /* Only float base types can be matrices. */
+      assert(cols == 1);
+
+      for (i = 0; i < rows; i++) {
+         if (i > 0) fprintf(fp, ", ");
+         fprintf(fp, "%s", c->values[0].b[i] ? "true" : "false");
+      }
+      break;
+
    case GLSL_TYPE_UINT8:
    case GLSL_TYPE_INT8:
       /* Only float base types can be matrices. */
@@ -322,7 +333,6 @@
 
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
-   case GLSL_TYPE_BOOL:
       /* Only float base types can be matrices. */
       assert(cols == 1);
 
@@ -403,16 +413,20 @@
       return "shader_out";
    case nir_var_uniform:
       return "uniform";
-   case nir_var_shader_storage:
-      return "shader_storage";
+   case nir_var_mem_ubo:
+      return "ubo";
    case nir_var_system_value:
       return "system";
-   case nir_var_shared:
+   case nir_var_mem_ssbo:
+      return "ssbo";
+   case nir_var_mem_shared:
       return "shared";
-   case nir_var_global:
-      return want_local_global_mode ? "global" : "";
-   case nir_var_local:
-      return want_local_global_mode ? "local" : "";
+   case nir_var_mem_global:
+      return "global";
+   case nir_var_shader_temp:
+      return want_local_global_mode ? "shader_temp" : "";
+   case nir_var_function_temp:
+      return want_local_global_mode ? "function_temp" : "";
    default:
       return "";
    }
@@ -441,13 +455,61 @@
    const char *const wonly = (access & ACCESS_NON_READABLE) ? "writeonly " : "";
    fprintf(fp, "%s%s%s%s%s", coher, volat, restr, ronly, wonly);
 
+#define FORMAT_CASE(x) case x: fprintf(stderr, #x " "); break
+   switch (var->data.image.format) {
+   FORMAT_CASE(GL_RGBA32F);
+   FORMAT_CASE(GL_RGBA32UI);
+   FORMAT_CASE(GL_RGBA32I);
+   FORMAT_CASE(GL_R32F);
+   FORMAT_CASE(GL_R32UI);
+   FORMAT_CASE(GL_R32I);
+   FORMAT_CASE(GL_RG32F);
+   FORMAT_CASE(GL_RG32UI);
+   FORMAT_CASE(GL_RG32I);
+   FORMAT_CASE(GL_R8);
+   FORMAT_CASE(GL_RG8);
+   FORMAT_CASE(GL_RGBA8);
+   FORMAT_CASE(GL_R8_SNORM);
+   FORMAT_CASE(GL_RG8_SNORM);
+   FORMAT_CASE(GL_RGBA8_SNORM);
+   FORMAT_CASE(GL_R16);
+   FORMAT_CASE(GL_RG16);
+   FORMAT_CASE(GL_RGBA16);
+   FORMAT_CASE(GL_R16_SNORM);
+   FORMAT_CASE(GL_RG16_SNORM);
+   FORMAT_CASE(GL_RGBA16_SNORM);
+   FORMAT_CASE(GL_R16F);
+   FORMAT_CASE(GL_RG16F);
+   FORMAT_CASE(GL_RGBA16F);
+   FORMAT_CASE(GL_R8UI);
+   FORMAT_CASE(GL_R8I);
+   FORMAT_CASE(GL_RG8UI);
+   FORMAT_CASE(GL_RG8I);
+   FORMAT_CASE(GL_RGBA8UI);
+   FORMAT_CASE(GL_RGBA8I);
+   FORMAT_CASE(GL_R16UI);
+   FORMAT_CASE(GL_R16I);
+   FORMAT_CASE(GL_RG16UI);
+   FORMAT_CASE(GL_RG16I);
+   FORMAT_CASE(GL_RGBA16UI);
+   FORMAT_CASE(GL_RGBA16I);
+   FORMAT_CASE(GL_R11F_G11F_B10F);
+   FORMAT_CASE(GL_RGB9_E5);
+   FORMAT_CASE(GL_RGB10_A2);
+   FORMAT_CASE(GL_RGB10_A2UI);
+   default: /* Including the normal GL_NONE */
+      break;
+   }
+#undef FORMAT_CASE
+
    fprintf(fp, "%s %s", glsl_get_type_name(var->type),
            get_var_name(var, state));
 
    if (var->data.mode == nir_var_shader_in ||
        var->data.mode == nir_var_shader_out ||
        var->data.mode == nir_var_uniform ||
-       var->data.mode == nir_var_shader_storage) {
+       var->data.mode == nir_var_mem_ubo ||
+       var->data.mode == nir_var_mem_ssbo) {
       const char *loc = NULL;
       char buf[4];
 
@@ -472,6 +534,7 @@
       case MESA_SHADER_TESS_CTRL:
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_COMPUTE:
+      case MESA_SHADER_KERNEL:
       default:
          /* TODO */
          break;
@@ -521,7 +584,7 @@
 }
 
 static void
-print_deref_link(nir_deref_instr *instr, bool whole_chain, print_state *state)
+print_deref_link(const nir_deref_instr *instr, bool whole_chain, print_state *state)
 {
    FILE *fp = state->fp;
 
@@ -577,7 +640,8 @@
               glsl_get_struct_elem_name(parent->type, instr->strct.index));
       break;
 
-   case nir_deref_type_array: {
+   case nir_deref_type_array:
+   case nir_deref_type_ptr_as_array: {
       nir_const_value *const_index = nir_src_as_const_value(instr->arr.index);
       if (const_index) {
          fprintf(fp, "[%u]", const_index->u32[0]);
@@ -619,6 +683,9 @@
    case nir_deref_type_cast:
       fprintf(fp, " = deref_cast ");
       break;
+   case nir_deref_type_ptr_as_array:
+      fprintf(fp, " = deref_ptr_as_array ");
+      break;
    default:
       unreachable("Invalid deref instruction type");
    }
@@ -642,6 +709,26 @@
    }
 }
 
+static const char *
+vulkan_descriptor_type_name(VkDescriptorType type)
+{
+   switch (type) {
+   case VK_DESCRIPTOR_TYPE_SAMPLER: return "sampler";
+   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: return "texture+sampler";
+   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: return "texture";
+   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: return "image";
+   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: return "texture-buffer";
+   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: return "image-buffer";
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: return "UBO";
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: return "SSBO";
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: return "UBO";
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: return "SSBO";
+   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: return "input-att";
+   case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: return "inline-UBO";
+   default: return "unknown";
+   }
+}
+
 static void
 print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
 {
@@ -691,6 +778,9 @@
       [NIR_INTRINSIC_IMAGE_ARRAY] = "image_array",
       [NIR_INTRINSIC_ACCESS] = "access",
       [NIR_INTRINSIC_FORMAT] = "format",
+      [NIR_INTRINSIC_ALIGN_MUL] = "align_mul",
+      [NIR_INTRINSIC_ALIGN_OFFSET] = "align_offset",
+      [NIR_INTRINSIC_DESC_TYPE] = "desc_type",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
@@ -719,11 +809,14 @@
             [GLSL_SAMPLER_DIM_SUBPASS_MS] = "Subpass-MSAA",
          };
          enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
-         assert(dim < ARRAY_SIZE(dim_name) && dim_name[idx]);
+         assert(dim < ARRAY_SIZE(dim_name) && dim_name[dim]);
          fprintf(fp, " image_dim=%s", dim_name[dim]);
       } else if (idx == NIR_INTRINSIC_IMAGE_ARRAY) {
          bool array = nir_intrinsic_image_dim(instr);
          fprintf(fp, " image_dim=%s", array ? "true" : "false");
+      } else if (idx == NIR_INTRINSIC_DESC_TYPE) {
+         VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
+         fprintf(fp, " desc_type=%s", vulkan_descriptor_type_name(desc_type));
       } else {
          unsigned off = info->index_map[idx] - 1;
          assert(index_name[idx]);  /* forgot to update index_name table? */
@@ -844,6 +937,9 @@
       case nir_tex_src_lod:
          fprintf(fp, "(lod)");
          break;
+      case nir_tex_src_min_lod:
+         fprintf(fp, "(min_lod)");
+         break;
       case nir_tex_src_ms_index:
          fprintf(fp, "(ms_index)");
          break;
@@ -944,6 +1040,9 @@
       case 8:
          fprintf(fp, "0x%02x", instr->value.u8[i]);
          break;
+      case 1:
+         fprintf(fp, "%s", instr->value.b[i] ? "true" : "false");
+         break;
       }
    }
 
@@ -1222,8 +1321,7 @@
 {
    state->fp = fp;
    state->shader = shader;
-   state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                       _mesa_key_pointer_equal);
+   state->ht = _mesa_pointer_hash_table_create(NULL);
    state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
                                   _mesa_key_string_equal);
    state->index = 0;
@@ -1253,17 +1351,13 @@
    if (shader->info.label)
       fprintf(fp, "label: %s\n", shader->info.label);
 
-   switch (shader->info.stage) {
-   case MESA_SHADER_COMPUTE:
+   if (gl_shader_stage_is_compute(shader->info.stage)) {
       fprintf(fp, "local-size: %u, %u, %u%s\n",
               shader->info.cs.local_size[0],
               shader->info.cs.local_size[1],
               shader->info.cs.local_size[2],
               shader->info.cs.local_size_variable ? " (variable)" : "");
       fprintf(fp, "shared-size: %u\n", shader->info.cs.shared_size);
-      break;
-   default:
-      break;
    }
 
    fprintf(fp, "inputs: %u\n", shader->num_inputs);
@@ -1310,6 +1404,7 @@
 nir_print_shader(nir_shader *shader, FILE *fp)
 {
    nir_print_shader_annotated(shader, fp, NULL);
+   fflush(fp);
 }
 
 void
@@ -1321,3 +1416,12 @@
    print_instr(instr, &state, 0);
 
 }
+
+void
+nir_print_deref(const nir_deref_instr *deref, FILE *fp)
+{
+   print_state state = {
+      .fp = fp,
+   };
+   print_deref_link(deref, true, &state);
+}
diff -Nru mesa-18.3.3/src/compiler/nir/nir_propagate_invariant.c mesa-19.0.1/src/compiler/nir/nir_propagate_invariant.c
--- mesa-18.3.3/src/compiler/nir/nir_propagate_invariant.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_propagate_invariant.c	2019-03-31 23:16:37.000000000 +0000
@@ -71,7 +71,7 @@
 static bool
 var_is_invariant(nir_variable *var, struct set * invariants)
 {
-   return var->data.invariant || _mesa_set_search(invariants, var);
+   return var && (var->data.invariant || _mesa_set_search(invariants, var));
 }
 
 static void
@@ -182,8 +182,7 @@
 nir_propagate_invariant(nir_shader *shader)
 {
    /* Hash set of invariant things */
-   struct set *invariants = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   struct set *invariants = _mesa_pointer_set_create(NULL);
 
    bool progress = false;
    nir_foreach_function(function, shader) {
diff -Nru mesa-18.3.3/src/compiler/nir/nir_remove_dead_variables.c mesa-19.0.1/src/compiler/nir/nir_remove_dead_variables.c
--- mesa-18.3.3/src/compiler/nir/nir_remove_dead_variables.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_remove_dead_variables.c	2019-03-31 23:16:37.000000000 +0000
@@ -71,7 +71,7 @@
     * all means we need to keep it alive.
     */
    assert(deref->mode == deref->var->data.mode);
-   if (!(deref->mode & (nir_var_local | nir_var_global | nir_var_shared)) ||
+   if (!(deref->mode & (nir_var_function_temp | nir_var_shader_temp | nir_var_mem_shared)) ||
        deref_used_for_not_store(deref))
       _mesa_set_add(live, deref->var);
 }
@@ -103,6 +103,9 @@
             switch (instr->type) {
             case nir_instr_type_deref: {
                nir_deref_instr *deref = nir_instr_as_deref(instr);
+               if (deref->deref_type == nir_deref_type_cast &&
+                   !nir_deref_instr_parent(deref))
+                  continue;
 
                nir_variable_mode parent_mode;
                if (deref->deref_type == nir_deref_type_var)
@@ -161,8 +164,7 @@
 nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes)
 {
    bool progress = false;
-   struct set *live =
-      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+   struct set *live = _mesa_pointer_set_create(NULL);
 
    add_var_use_shader(shader, live, modes);
 
@@ -175,16 +177,16 @@
    if (modes & nir_var_shader_out)
       progress = remove_dead_vars(&shader->outputs, live) || progress;
 
-   if (modes & nir_var_global)
+   if (modes & nir_var_shader_temp)
       progress = remove_dead_vars(&shader->globals, live) || progress;
 
    if (modes & nir_var_system_value)
       progress = remove_dead_vars(&shader->system_values, live) || progress;
 
-   if (modes & nir_var_shared)
+   if (modes & nir_var_mem_shared)
       progress = remove_dead_vars(&shader->shared, live) || progress;
 
-   if (modes & nir_var_local) {
+   if (modes & nir_var_function_temp) {
       nir_foreach_function(function, shader) {
          if (function->impl) {
             if (remove_dead_vars(&function->impl->locals, live))
diff -Nru mesa-18.3.3/src/compiler/nir/nir_search.c mesa-19.0.1/src/compiler/nir/nir_search.c
--- mesa-18.3.3/src/compiler/nir/nir_search.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_search.c	2019-03-31 23:16:37.000000000 +0000
@@ -90,6 +90,103 @@
 }
 
 static bool
+nir_op_matches_search_op(nir_op nop, uint16_t sop)
+{
+   if (sop <= nir_last_opcode)
+      return nop == sop;
+
+#define MATCH_FCONV_CASE(op) \
+   case nir_search_op_##op: \
+      return nop == nir_op_##op##16 || \
+             nop == nir_op_##op##32 || \
+             nop == nir_op_##op##64;
+
+#define MATCH_ICONV_CASE(op) \
+   case nir_search_op_##op: \
+      return nop == nir_op_##op##8 || \
+             nop == nir_op_##op##16 || \
+             nop == nir_op_##op##32 || \
+             nop == nir_op_##op##64;
+
+#define MATCH_BCONV_CASE(op) \
+   case nir_search_op_##op: \
+      return nop == nir_op_##op##1 || \
+             nop == nir_op_##op##32;
+
+   switch (sop) {
+   MATCH_FCONV_CASE(i2f)
+   MATCH_FCONV_CASE(u2f)
+   MATCH_FCONV_CASE(f2f)
+   MATCH_ICONV_CASE(f2u)
+   MATCH_ICONV_CASE(f2i)
+   MATCH_ICONV_CASE(u2u)
+   MATCH_ICONV_CASE(i2i)
+   MATCH_FCONV_CASE(b2f)
+   MATCH_ICONV_CASE(b2i)
+   MATCH_BCONV_CASE(i2b)
+   MATCH_BCONV_CASE(f2b)
+   default:
+      unreachable("Invalid nir_search_op");
+   }
+
+#undef MATCH_FCONV_CASE
+#undef MATCH_ICONV_CASE
+}
+
+static nir_op
+nir_op_for_search_op(uint16_t sop, unsigned bit_size)
+{
+   if (sop <= nir_last_opcode)
+      return sop;
+
+#define RET_FCONV_CASE(op) \
+   case nir_search_op_##op: \
+      switch (bit_size) { \
+      case 16: return nir_op_##op##16; \
+      case 32: return nir_op_##op##32; \
+      case 64: return nir_op_##op##64; \
+      default: unreachable("Invalid bit size"); \
+      }
+
+#define RET_ICONV_CASE(op) \
+   case nir_search_op_##op: \
+      switch (bit_size) { \
+      case 8:  return nir_op_##op##8; \
+      case 16: return nir_op_##op##16; \
+      case 32: return nir_op_##op##32; \
+      case 64: return nir_op_##op##64; \
+      default: unreachable("Invalid bit size"); \
+      }
+
+#define RET_BCONV_CASE(op) \
+   case nir_search_op_##op: \
+      switch (bit_size) { \
+      case 1: return nir_op_##op##1; \
+      case 32: return nir_op_##op##32; \
+      default: unreachable("Invalid bit size"); \
+      }
+
+   switch (sop) {
+   RET_FCONV_CASE(i2f)
+   RET_FCONV_CASE(u2f)
+   RET_FCONV_CASE(f2f)
+   RET_ICONV_CASE(f2u)
+   RET_ICONV_CASE(f2i)
+   RET_ICONV_CASE(u2u)
+   RET_ICONV_CASE(i2i)
+   RET_FCONV_CASE(b2f)
+   RET_ICONV_CASE(b2i)
+   RET_BCONV_CASE(i2b)
+   RET_BCONV_CASE(f2b)
+   default:
+      unreachable("Invalid nir_search_op");
+   }
+
+#undef RET_FCONV_CASE
+#undef RET_ICONV_CASE
+}
+
+static bool
 match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
             unsigned num_components, const uint8_t *swizzle,
             struct match_state *state)
@@ -118,7 +215,7 @@
       new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
 
    /* If the value has a specific bit size and it doesn't match, bail */
-   if (value->bit_size &&
+   if (value->bit_size > 0 &&
        nir_src_bit_size(instr->src[src].src) != value->bit_size)
       return false;
 
@@ -223,12 +320,12 @@
    if (expr->cond && !expr->cond(instr))
       return false;
 
-   if (instr->op != expr->opcode)
+   if (!nir_op_matches_search_op(instr->op, expr->opcode))
       return false;
 
    assert(instr->dest.dest.is_ssa);
 
-   if (expr->value.bit_size &&
+   if (expr->value.bit_size > 0 &&
        instr->dest.dest.ssa.bit_size != expr->value.bit_size)
       return false;
 
@@ -290,141 +387,36 @@
    }
 }
 
-typedef struct bitsize_tree {
-   unsigned num_srcs;
-   struct bitsize_tree *srcs[4];
-
-   unsigned common_size;
-   bool is_src_sized[4];
-   bool is_dest_sized;
-
-   unsigned dest_size;
-   unsigned src_size[4];
-} bitsize_tree;
-
-static bitsize_tree *
-build_bitsize_tree(void *mem_ctx, struct match_state *state,
-                   const nir_search_value *value)
-{
-   bitsize_tree *tree = rzalloc(mem_ctx, bitsize_tree);
-
-   switch (value->type) {
-   case nir_search_value_expression: {
-      nir_search_expression *expr = nir_search_value_as_expression(value);
-      nir_op_info info = nir_op_infos[expr->opcode];
-      tree->num_srcs = info.num_inputs;
-      tree->common_size = 0;
-      for (unsigned i = 0; i < info.num_inputs; i++) {
-         tree->is_src_sized[i] = !!nir_alu_type_get_type_size(info.input_types[i]);
-         if (tree->is_src_sized[i])
-            tree->src_size[i] = nir_alu_type_get_type_size(info.input_types[i]);
-         tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);
-      }
-      tree->is_dest_sized = !!nir_alu_type_get_type_size(info.output_type);
-      if (tree->is_dest_sized)
-         tree->dest_size = nir_alu_type_get_type_size(info.output_type);
-      break;
-   }
-
-   case nir_search_value_variable: {
-      nir_search_variable *var = nir_search_value_as_variable(value);
-      tree->num_srcs = 0;
-      tree->is_dest_sized = true;
-      tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);
-      break;
-   }
-
-   case nir_search_value_constant: {
-      tree->num_srcs = 0;
-      tree->is_dest_sized = false;
-      tree->common_size = 0;
-      break;
-   }
-   }
-
-   if (value->bit_size) {
-      assert(!tree->is_dest_sized || tree->dest_size == value->bit_size);
-      tree->common_size = value->bit_size;
-   }
-
-   return tree;
-}
-
 static unsigned
-bitsize_tree_filter_up(bitsize_tree *tree)
-{
-   for (unsigned i = 0; i < tree->num_srcs; i++) {
-      unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);
-      if (src_size == 0)
-         continue;
-
-      if (tree->is_src_sized[i]) {
-         assert(src_size == tree->src_size[i]);
-      } else if (tree->common_size != 0) {
-         assert(src_size == tree->common_size);
-         tree->src_size[i] = src_size;
-      } else {
-         tree->common_size = src_size;
-         tree->src_size[i] = src_size;
-      }
-   }
-
-   if (tree->num_srcs && tree->common_size) {
-      if (tree->dest_size == 0)
-         tree->dest_size = tree->common_size;
-      else if (!tree->is_dest_sized)
-         assert(tree->dest_size == tree->common_size);
-
-      for (unsigned i = 0; i < tree->num_srcs; i++) {
-         if (!tree->src_size[i])
-            tree->src_size[i] = tree->common_size;
-      }
-   }
-
-   return tree->dest_size;
-}
-
-static void
-bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)
+replace_bitsize(const nir_search_value *value, unsigned search_bitsize,
+                struct match_state *state)
 {
-   if (tree->dest_size)
-      assert(tree->dest_size == size);
-   else
-      tree->dest_size = size;
-
-   if (!tree->is_dest_sized) {
-      if (tree->common_size)
-         assert(tree->common_size == size);
-      else
-         tree->common_size = size;
-   }
-
-   for (unsigned i = 0; i < tree->num_srcs; i++) {
-      if (!tree->src_size[i]) {
-         assert(tree->common_size);
-         tree->src_size[i] = tree->common_size;
-      }
-      bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);
-   }
+   if (value->bit_size > 0)
+      return value->bit_size;
+   if (value->bit_size < 0)
+      return nir_src_bit_size(state->variables[-value->bit_size - 1].src);
+   return search_bitsize;
 }
 
 static nir_alu_src
 construct_value(nir_builder *build,
                 const nir_search_value *value,
-                unsigned num_components, bitsize_tree *bitsize,
+                unsigned num_components, unsigned search_bitsize,
                 struct match_state *state,
                 nir_instr *instr)
 {
    switch (value->type) {
    case nir_search_value_expression: {
       const nir_search_expression *expr = nir_search_value_as_expression(value);
+      unsigned dst_bit_size = replace_bitsize(value, search_bitsize, state);
+      nir_op op = nir_op_for_search_op(expr->opcode, dst_bit_size);
 
-      if (nir_op_infos[expr->opcode].output_size != 0)
-         num_components = nir_op_infos[expr->opcode].output_size;
+      if (nir_op_infos[op].output_size != 0)
+         num_components = nir_op_infos[op].output_size;
 
-      nir_alu_instr *alu = nir_alu_instr_create(build->shader, expr->opcode);
+      nir_alu_instr *alu = nir_alu_instr_create(build->shader, op);
       nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
-                        bitsize->dest_size, NULL);
+                        dst_bit_size, NULL);
       alu->dest.write_mask = (1 << num_components) - 1;
       alu->dest.saturate = false;
 
@@ -435,7 +427,7 @@
        */
       alu->exact = state->has_exact_alu;
 
-      for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) {
+      for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
          /* If the source is an explicitly sized source, then we need to reset
           * the number of components to match.
           */
@@ -443,7 +435,7 @@
             num_components = nir_op_infos[alu->op].input_sizes[i];
 
          alu->src[i] = construct_value(build, expr->srcs[i],
-                                       num_components, bitsize->srcs[i],
+                                       num_components, search_bitsize,
                                        state, instr);
       }
 
@@ -472,21 +464,23 @@
 
    case nir_search_value_constant: {
       const nir_search_constant *c = nir_search_value_as_constant(value);
+      unsigned bit_size = replace_bitsize(value, search_bitsize, state);
 
       nir_ssa_def *cval;
       switch (c->type) {
       case nir_type_float:
-         cval = nir_imm_floatN_t(build, c->data.d, bitsize->dest_size);
+         cval = nir_imm_floatN_t(build, c->data.d, bit_size);
          break;
 
       case nir_type_int:
       case nir_type_uint:
-         cval = nir_imm_intN_t(build, c->data.i, bitsize->dest_size);
+         cval = nir_imm_intN_t(build, c->data.i, bit_size);
          break;
 
       case nir_type_bool:
-         cval = nir_imm_bool(build, c->data.u);
+         cval = nir_imm_boolN_t(build, c->data.u, bit_size);
          break;
+
       default:
          unreachable("Invalid alu source type");
       }
@@ -526,16 +520,12 @@
                          swizzle, &state))
       return NULL;
 
-   void *bitsize_ctx = ralloc_context(NULL);
-   bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);
-   bitsize_tree_filter_up(tree);
-   bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);
-
    build->cursor = nir_before_instr(&instr->instr);
 
    nir_alu_src val = construct_value(build, replace,
                                      instr->dest.dest.ssa.num_components,
-                                     tree, &state, &instr->instr);
+                                     instr->dest.dest.ssa.bit_size,
+                                     &state, &instr->instr);
 
    /* Inserting a mov may be unnecessary.  However, it's much easier to
     * simply let copy propagation clean this up than to try to go through
@@ -551,7 +541,5 @@
     */
    nir_instr_remove(&instr->instr);
 
-   ralloc_free(bitsize_ctx);
-
    return ssa_val;
 }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_search.h mesa-19.0.1/src/compiler/nir/nir_search.h
--- mesa-18.3.3/src/compiler/nir/nir_search.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_search.h	2019-03-31 23:16:37.000000000 +0000
@@ -43,7 +43,22 @@
 typedef struct {
    nir_search_value_type type;
 
-   unsigned bit_size;
+   /**
+    * Bit size of the value. It is interpreted as follows:
+    *
+    * For a search expression:
+    * - If bit_size > 0, then the value only matches an SSA value with the
+    *   given bit size.
+    * - If bit_size <= 0, then the value matches any size SSA value.
+    *
+    * For a replace expression:
+    * - If bit_size > 0, then the value is constructed with the given bit size.
+    * - If bit_size == 0, then the value is constructed with the same bit size
+    *   as the search value.
+    * - If bit_size < 0, then the value is constructed with the same bit size
+    *   as variable (-bit_size - 1).
+    */
+   int bit_size;
 } nir_search_value;
 
 typedef struct {
@@ -94,6 +109,20 @@
    } data;
 } nir_search_constant;
 
+enum nir_search_op {
+   nir_search_op_i2f = nir_last_opcode + 1,
+   nir_search_op_u2f,
+   nir_search_op_f2f,
+   nir_search_op_f2u,
+   nir_search_op_f2i,
+   nir_search_op_u2u,
+   nir_search_op_i2i,
+   nir_search_op_b2f,
+   nir_search_op_b2i,
+   nir_search_op_i2b,
+   nir_search_op_f2b,
+};
+
 typedef struct {
    nir_search_value value;
 
@@ -103,7 +132,8 @@
     */
    bool inexact;
 
-   nir_op opcode;
+   /* One of nir_op or nir_search_op */
+   uint16_t opcode;
    const nir_search_value *srcs[4];
 
    /** Optional condition fxn ptr
diff -Nru mesa-18.3.3/src/compiler/nir/nir_serialize.c mesa-19.0.1/src/compiler/nir/nir_serialize.c
--- mesa-18.3.3/src/compiler/nir/nir_serialize.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_serialize.c	2019-03-31 23:16:37.000000000 +0000
@@ -438,11 +438,15 @@
       break;
 
    case nir_deref_type_array:
+   case nir_deref_type_ptr_as_array:
       write_src(ctx, &deref->arr.index);
       break;
 
-   case nir_deref_type_array_wildcard:
    case nir_deref_type_cast:
+      blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
+      break;
+
+   case nir_deref_type_array_wildcard:
       /* Nothing to do */
       break;
 
@@ -475,11 +479,15 @@
       break;
 
    case nir_deref_type_array:
+   case nir_deref_type_ptr_as_array:
       read_src(ctx, &deref->arr.index, &deref->instr);
       break;
 
-   case nir_deref_type_array_wildcard:
    case nir_deref_type_cast:
+      deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
+      break;
+
+   case nir_deref_type_array_wildcard:
       /* Nothing to do */
       break;
 
@@ -1040,6 +1048,8 @@
       blob_write_uint32(ctx->blob, val);
    }
 
+   blob_write_uint32(ctx->blob, fxn->is_entrypoint);
+
    /* At first glance, it looks like we should write the function_impl here.
     * However, call instructions need to be able to reference at least the
     * function and those will get processed as we write the function_impls.
@@ -1064,14 +1074,15 @@
       fxn->params[i].num_components = val & 0xff;
       fxn->params[i].bit_size = (val >> 8) & 0xff;
    }
+
+   fxn->is_entrypoint = blob_read_uint32(ctx->blob);
 }
 
 void
 nir_serialize(struct blob *blob, const nir_shader *nir)
 {
    write_ctx ctx;
-   ctx.remap_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
    ctx.next_idx = 0;
    ctx.blob = blob;
    ctx.nir = nir;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_split_per_member_structs.c mesa-19.0.1/src/compiler/nir/nir_split_per_member_structs.c
--- mesa-18.3.3/src/compiler/nir/nir_split_per_member_structs.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_split_per_member_structs.c	2019-03-31 23:16:37.000000000 +0000
@@ -50,7 +50,8 @@
    if (glsl_type_is_array(type)) {
       const struct glsl_type *elem =
          member_type(glsl_get_array_element(type), index);
-      return glsl_get_array_instance(elem, glsl_get_length(type));
+      assert(glsl_get_explicit_stride(type) == 0);
+      return glsl_array_type(elem, glsl_get_length(type), 0);
    } else {
       assert(glsl_type_is_struct(type));
       assert(index < glsl_get_length(type));
@@ -174,8 +175,7 @@
    bool progress = false;
    void *dead_ctx = ralloc_context(NULL);
    struct hash_table *var_to_member_map =
-      _mesa_hash_table_create(dead_ctx, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+      _mesa_pointer_hash_table_create(dead_ctx);
 
    progress |= split_variables_in_list(&shader->inputs, shader,
                                        var_to_member_map, dead_ctx);
diff -Nru mesa-18.3.3/src/compiler/nir/nir_split_var_copies.c mesa-19.0.1/src/compiler/nir/nir_split_var_copies.c
--- mesa-18.3.3/src/compiler/nir/nir_split_var_copies.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_split_var_copies.c	2019-03-31 23:16:37.000000000 +0000
@@ -113,6 +113,10 @@
    if (progress) {
       nir_metadata_preserve(impl, nir_metadata_block_index |
                                   nir_metadata_dominance);
+   } else {
+#ifndef NDEBUG
+      impl->valid_metadata &= ~nir_metadata_not_properly_reset;
+#endif
    }
 
    return progress;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_split_vars.c mesa-19.0.1/src/compiler/nir/nir_split_vars.c
--- mesa-18.3.3/src/compiler/nir/nir_split_vars.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_split_vars.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,7 +58,8 @@
 
    const struct glsl_type *elem_type =
       wrap_type_in_array(type, glsl_get_array_element(array_type));
-   return glsl_array_type(elem_type, glsl_get_length(array_type));
+   assert(glsl_get_explicit_stride(array_type) == 0);
+   return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
 }
 
 static int
@@ -114,7 +115,7 @@
          var_type = wrap_type_in_array(var_type, f->type);
 
       nir_variable_mode mode = state->base_var->data.mode;
-      if (mode == nir_var_local) {
+      if (mode == nir_var_function_temp) {
          field->var = nir_local_variable_create(state->impl, var_type, name);
       } else {
          field->var = nir_variable_create(state->shader, mode, var_type, name);
@@ -256,13 +257,12 @@
 {
    void *mem_ctx = ralloc_context(NULL);
    struct hash_table *var_field_map =
-      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+      _mesa_pointer_hash_table_create(mem_ctx);
 
-   assert((modes & (nir_var_global | nir_var_local)) == modes);
+   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
 
    bool has_global_splits = false;
-   if (modes & nir_var_global) {
+   if (modes & nir_var_shader_temp) {
       has_global_splits = split_var_list_structs(shader, NULL,
                                                  &shader->globals,
                                                  var_field_map, mem_ctx);
@@ -274,7 +274,7 @@
          continue;
 
       bool has_local_splits = false;
-      if (modes & nir_var_local) {
+      if (modes & nir_var_function_temp) {
          has_local_splits = split_var_list_structs(shader, function->impl,
                                                    &function->impl->locals,
                                                    var_field_map, mem_ctx);
@@ -341,6 +341,7 @@
 
       const struct glsl_type *type = var->type;
       for (int i = 0; i < num_levels; i++) {
+         assert(glsl_get_explicit_stride(type) == 0);
          info->levels[i].array_len = glsl_get_length(type);
          type = glsl_get_array_element(type);
 
@@ -453,7 +454,7 @@
       name = ralloc_asprintf(mem_ctx, "(%s)", name);
 
       nir_variable_mode mode = var_info->base_var->data.mode;
-      if (mode == nir_var_local) {
+      if (mode == nir_var_function_temp) {
          split->var = nir_local_variable_create(impl,
                                                 var_info->split_var_type, name);
       } else {
@@ -506,7 +507,7 @@
                                           glsl_get_components(split_type),
                                           info->levels[i].array_len);
          } else {
-            split_type = glsl_array_type(split_type, info->levels[i].array_len);
+            split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
          }
       }
 
@@ -791,14 +792,12 @@
 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
 {
    void *mem_ctx = ralloc_context(NULL);
-   struct hash_table *var_info_map =
-      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
 
-   assert((modes & (nir_var_global | nir_var_local)) == modes);
+   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
 
    bool has_global_array = false;
-   if (modes & nir_var_global) {
+   if (modes & nir_var_shader_temp) {
       has_global_array = init_var_list_array_infos(&shader->globals,
                                                    var_info_map, mem_ctx);
    }
@@ -809,7 +808,7 @@
          continue;
 
       bool has_local_array = false;
-      if (modes & nir_var_local) {
+      if (modes & nir_var_function_temp) {
          has_local_array = init_var_list_array_infos(&function->impl->locals,
                                                      var_info_map, mem_ctx);
       }
@@ -827,7 +826,7 @@
    }
 
    bool has_global_splits = false;
-   if (modes & nir_var_global) {
+   if (modes & nir_var_shader_temp) {
       has_global_splits = split_var_list_arrays(shader, NULL,
                                                 &shader->globals,
                                                 var_info_map, mem_ctx);
@@ -839,7 +838,7 @@
          continue;
 
       bool has_local_splits = false;
-      if (modes & nir_var_local) {
+      if (modes & nir_var_function_temp) {
          has_local_splits = split_var_list_arrays(shader, function->impl,
                                                   &function->impl->locals,
                                                   var_info_map, mem_ctx);
@@ -918,6 +917,7 @@
    const struct glsl_type *type = var->type;
    for (unsigned i = 0; i < num_levels; i++) {
       usage->levels[i].array_len = glsl_get_length(type);
+      assert(glsl_get_explicit_stride(type) == 0);
       type = glsl_get_array_element(type);
    }
    assert(glsl_type_is_vector_or_scalar(type));
@@ -970,8 +970,7 @@
                                        true, mem_ctx);
       if (copy_usage) {
          if (usage->vars_copied == NULL) {
-            usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal);
+            usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
          }
          _mesa_set_add(usage->vars_copied, copy_usage);
       } else {
@@ -1013,9 +1012,7 @@
                &copy_usage->levels[copy_i++];
 
             if (level->levels_copied == NULL) {
-               level->levels_copied =
-                  _mesa_set_create(mem_ctx, _mesa_hash_pointer,
-                                   _mesa_key_pointer_equal);
+               level->levels_copied = _mesa_pointer_set_create(mem_ctx);
             }
             _mesa_set_add(level->levels_copied, copy_level);
          } else {
@@ -1290,7 +1287,7 @@
                                         new_num_comps,
                                         usage->levels[i].array_len);
          } else {
-            new_type = glsl_array_type(new_type, usage->levels[i].array_len);
+            new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
          }
       }
       var->type = new_type;
@@ -1494,10 +1491,10 @@
 {
    nir_shader *shader = impl->function->shader;
 
-   if ((modes & nir_var_global) && !exec_list_is_empty(&shader->globals))
+   if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
       return true;
 
-   if ((modes & nir_var_local) && !exec_list_is_empty(&impl->locals))
+   if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
       return true;
 
    return false;
@@ -1515,13 +1512,12 @@
 bool
 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
 {
-   assert((modes & (nir_var_global | nir_var_local)) == modes);
+   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
 
    void *mem_ctx = ralloc_context(NULL);
 
    struct hash_table *var_usage_map =
-      _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+      _mesa_pointer_hash_table_create(mem_ctx);
 
    bool has_vars_to_shrink = false;
    nir_foreach_function(function, shader) {
@@ -1544,7 +1540,7 @@
    }
 
    bool globals_shrunk = false;
-   if (modes & nir_var_global)
+   if (modes & nir_var_shader_temp)
       globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
 
    bool progress = false;
@@ -1553,7 +1549,7 @@
          continue;
 
       bool locals_shrunk = false;
-      if (modes & nir_var_local) {
+      if (modes & nir_var_function_temp) {
          locals_shrunk = shrink_vec_var_list(&function->impl->locals,
                                              var_usage_map);
       }
diff -Nru mesa-18.3.3/src/compiler/nir/nir_sweep.c mesa-19.0.1/src/compiler/nir/nir_sweep.c
--- mesa-18.3.3/src/compiler/nir/nir_sweep.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_sweep.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,6 +63,15 @@
 {
    ralloc_steal(nir, block);
 
+   /* sweep_impl will mark all metadata invalid.  We can safely release all of
+    * this here.
+    */
+   ralloc_free(block->live_in);
+   block->live_in = NULL;
+
+   ralloc_free(block->live_out);
+   block->live_out = NULL;
+
    nir_foreach_instr(instr, block) {
       ralloc_steal(nir, instr);
 
diff -Nru mesa-18.3.3/src/compiler/nir/nir_validate.c mesa-19.0.1/src/compiler/nir/nir_validate.c
--- mesa-18.3.3/src/compiler/nir/nir_validate.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_validate.c	2019-03-31 23:16:37.000000000 +0000
@@ -129,11 +129,11 @@
    } while (0)
 
 static void validate_src(nir_src *src, validate_state *state,
-                         unsigned bit_size, unsigned num_components);
+                         unsigned bit_sizes, unsigned num_components);
 
 static void
 validate_reg_src(nir_src *src, validate_state *state,
-                 unsigned bit_size, unsigned num_components)
+                 unsigned bit_sizes, unsigned num_components)
 {
    validate_assert(state, src->reg.reg != NULL);
 
@@ -156,8 +156,8 @@
    }
 
    if (!src->reg.reg->is_packed) {
-      if (bit_size)
-         validate_assert(state, src->reg.reg->bit_size == bit_size);
+      if (bit_sizes)
+         validate_assert(state, src->reg.reg->bit_size & bit_sizes);
       if (num_components)
          validate_assert(state, src->reg.reg->num_components == num_components);
    }
@@ -177,7 +177,7 @@
 
 static void
 validate_ssa_src(nir_src *src, validate_state *state,
-                 unsigned bit_size, unsigned num_components)
+                 unsigned bit_sizes, unsigned num_components)
 {
    validate_assert(state, src->ssa != NULL);
 
@@ -200,8 +200,8 @@
       _mesa_set_add(def_state->if_uses, src);
    }
 
-   if (bit_size)
-      validate_assert(state, src->ssa->bit_size == bit_size);
+   if (bit_sizes)
+      validate_assert(state, src->ssa->bit_size & bit_sizes);
    if (num_components)
       validate_assert(state, src->ssa->num_components == num_components);
 
@@ -210,7 +210,7 @@
 
 static void
 validate_src(nir_src *src, validate_state *state,
-             unsigned bit_size, unsigned num_components)
+             unsigned bit_sizes, unsigned num_components)
 {
    if (state->instr)
       validate_assert(state, src->parent_instr == state->instr);
@@ -218,9 +218,9 @@
       validate_assert(state, src->parent_if == state->if_stmt);
 
    if (src->is_ssa)
-      validate_ssa_src(src, state, bit_size, num_components);
+      validate_ssa_src(src, state, bit_sizes, num_components);
    else
-      validate_reg_src(src, state, bit_size, num_components);
+      validate_reg_src(src, state, bit_sizes, num_components);
 }
 
 static void
@@ -243,7 +243,7 @@
 
 static void
 validate_reg_dest(nir_reg_dest *dest, validate_state *state,
-                  unsigned bit_size, unsigned num_components)
+                  unsigned bit_sizes, unsigned num_components)
 {
    validate_assert(state, dest->reg != NULL);
 
@@ -263,8 +263,8 @@
    }
 
    if (!dest->reg->is_packed) {
-      if (bit_size)
-         validate_assert(state, dest->reg->bit_size == bit_size);
+      if (bit_sizes)
+         validate_assert(state, dest->reg->bit_size & bit_sizes);
       if (num_components)
          validate_assert(state, dest->reg->num_components == num_components);
    }
@@ -300,25 +300,23 @@
    ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
                                               ssa_def_validate_state);
    def_state->where_defined = state->impl;
-   def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
-   def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
+   def_state->uses = _mesa_pointer_set_create(def_state);
+   def_state->if_uses = _mesa_pointer_set_create(def_state);
    _mesa_hash_table_insert(state->ssa_defs, def, def_state);
 }
 
 static void
 validate_dest(nir_dest *dest, validate_state *state,
-              unsigned bit_size, unsigned num_components)
+              unsigned bit_sizes, unsigned num_components)
 {
    if (dest->is_ssa) {
-      if (bit_size)
-         validate_assert(state, dest->ssa.bit_size == bit_size);
+      if (bit_sizes)
+         validate_assert(state, dest->ssa.bit_size & bit_sizes);
       if (num_components)
          validate_assert(state, dest->ssa.num_components == num_components);
       validate_ssa_def(&dest->ssa, state);
    } else {
-      validate_reg_dest(&dest->reg, state, bit_size, num_components);
+      validate_reg_dest(&dest->reg, state, bit_sizes, num_components);
    }
 }
 
@@ -397,7 +395,7 @@
 {
    struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
    validate_assert(state, entry);
-   if (var->data.mode == nir_var_local)
+   if (var->data.mode == nir_var_function_temp)
       validate_assert(state, (nir_function_impl *) entry->data == state->impl);
 }
 
@@ -448,10 +446,13 @@
 
       case nir_deref_type_array:
       case nir_deref_type_array_wildcard:
-         if (instr->mode == nir_var_shared) {
-            /* Shared variables have a bit more relaxed rules because we need
-             * to be able to handle array derefs on vectors.  Fortunately,
-             * nir_lower_io handles these just fine.
+         if (instr->mode == nir_var_mem_ubo ||
+             instr->mode == nir_var_mem_ssbo ||
+             instr->mode == nir_var_mem_shared ||
+             instr->mode == nir_var_mem_global) {
+            /* Shared variables and UBO/SSBOs have a bit more relaxed rules
+             * because we need to be able to handle array derefs on vectors.
+             * Fortunately, nir_lower_io handles these just fine.
              */
             validate_assert(state, glsl_type_is_array(parent->type) ||
                                    glsl_type_is_matrix(parent->type) ||
@@ -464,8 +465,23 @@
          validate_assert(state,
             instr->type == glsl_get_array_element(parent->type));
 
-         if (instr->deref_type == nir_deref_type_array)
-            validate_src(&instr->arr.index, state, 32, 1);
+         if (instr->deref_type == nir_deref_type_array) {
+            validate_src(&instr->arr.index, state,
+                         nir_dest_bit_size(instr->dest), 1);
+         }
+         break;
+
+      case nir_deref_type_ptr_as_array:
+         /* ptr_as_array derefs must have a parent that is either an array,
+          * ptr_as_array, or cast.  If the parent is a cast, we get the stride
+          * information (if any) from the cast deref.
+          */
+         validate_assert(state,
+                         parent->deref_type == nir_deref_type_array ||
+                         parent->deref_type == nir_deref_type_ptr_as_array ||
+                         parent->deref_type == nir_deref_type_cast);
+         validate_src(&instr->arr.index, state,
+                      nir_dest_bit_size(instr->dest), 1);
          break;
 
       default:
@@ -478,6 +494,12 @@
     * pointers should be.
     */
    validate_dest(&instr->dest, state, 0, 0);
+
+   /* Deref instructions as if conditions don't make sense because if
+    * conditions expect well-formed Booleans.  If you want to compare with
+    * NULL, an explicit comparison operation should be used.
+    */
+   validate_assert(state, list_empty(&instr->dest.ssa.if_uses));
 }
 
 static void
@@ -513,8 +535,7 @@
                              glsl_get_vector_elements(dst->type));
       src_bit_sizes[1] = glsl_get_bit_size(dst->type);
       validate_assert(state, (dst->mode & (nir_var_shader_in |
-                                           nir_var_uniform |
-                                           nir_var_shader_storage)) == 0);
+                                           nir_var_uniform)) == 0);
       validate_assert(state, (nir_intrinsic_write_mask(instr) & ~((1 << instr->num_components) - 1)) == 0);
       break;
    }
@@ -524,8 +545,7 @@
       nir_deref_instr *src = nir_src_as_deref(instr->src[1]);
       validate_assert(state, dst->type == src->type);
       validate_assert(state, (dst->mode & (nir_var_shader_in |
-                                           nir_var_uniform |
-                                           nir_var_shader_storage)) == 0);
+                                           nir_var_uniform)) == 0);
       break;
    }
 
@@ -544,9 +564,15 @@
 
    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
       unsigned components_written = nir_intrinsic_dest_components(instr);
+      unsigned bit_sizes = nir_intrinsic_infos[instr->intrinsic].dest_bit_sizes;
 
       validate_assert(state, components_written > 0);
 
+      if (dest_bit_size && bit_sizes)
+         validate_assert(state, dest_bit_size & bit_sizes);
+      else
+         dest_bit_size = dest_bit_size ? dest_bit_size : bit_sizes;
+
       validate_dest(&instr->dest, state, dest_bit_size, components_written);
    }
 }
@@ -669,7 +695,6 @@
    nir_foreach_phi_src(src, instr) {
       if (src->pred == pred) {
          validate_assert(state, src->src.is_ssa);
-         validate_assert(state, src->src.ssa->parent_instr->type != nir_instr_type_deref);
          validate_src(&src->src, state, instr->dest.ssa.bit_size,
                       instr->dest.ssa.num_components);
          state->instr = NULL;
@@ -818,7 +843,7 @@
    nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
    validate_assert(state, next_node->type == nir_cf_node_block);
 
-   validate_src(&if_stmt->condition, state, 32, 1);
+   validate_src(&if_stmt->condition, state, 0, 1);
 
    validate_assert(state, !exec_list_is_empty(&if_stmt->then_list));
    validate_assert(state, !exec_list_is_empty(&if_stmt->else_list));
@@ -907,12 +932,9 @@
    list_validate(&reg->if_uses);
 
    reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
-   reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
-   reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
-   reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
-                                      _mesa_key_pointer_equal);
+   reg_state->uses = _mesa_pointer_set_create(reg_state);
+   reg_state->if_uses = _mesa_pointer_set_create(reg_state);
+   reg_state->defs = _mesa_pointer_set_create(reg_state);
 
    reg_state->where_defined = is_global ? NULL : state->impl;
 
@@ -1112,16 +1134,12 @@
 static void
 init_validate_state(validate_state *state)
 {
-   state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                         _mesa_key_pointer_equal);
-   state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   state->regs = _mesa_pointer_hash_table_create(NULL);
+   state->ssa_defs = _mesa_pointer_hash_table_create(NULL);
    state->ssa_defs_found = NULL;
    state->regs_found = NULL;
-   state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
-   state->errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                           _mesa_key_pointer_equal);
+   state->var_defs = _mesa_pointer_hash_table_create(NULL);
+   state->errors = _mesa_pointer_hash_table_create(NULL);
 
    state->loop = NULL;
    state->instr = NULL;
diff -Nru mesa-18.3.3/src/compiler/nir/nir_xfb_info.h mesa-19.0.1/src/compiler/nir/nir_xfb_info.h
--- mesa-18.3.3/src/compiler/nir/nir_xfb_info.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/nir_xfb_info.h	2019-03-31 23:16:37.000000000 +0000
@@ -36,7 +36,7 @@
    uint8_t component_mask;
 } nir_xfb_output_info;
 
-typedef struct {
+typedef struct nir_xfb_info {
    uint8_t buffers_written;
    uint8_t streams_written;
 
diff -Nru mesa-18.3.3/src/compiler/nir/tests/algebraic_parser_test.py mesa-19.0.1/src/compiler/nir/tests/algebraic_parser_test.py
--- mesa-18.3.3/src/compiler/nir/tests/algebraic_parser_test.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/tests/algebraic_parser_test.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,116 @@
+#
+# Copyright (C) 2018 Valve Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import unittest
+
+import sys
+import os
+sys.path.insert(1, os.path.join(sys.path[0], '..'))
+
+from nir_algebraic import SearchAndReplace
+
+# These tests check that the bitsize validator correctly rejects various
+# different kinds of malformed expressions, and documents what the error
+# message looks like.
+
+a = 'a'
+b = 'b'
+c = 'c'
+
+class ValidatorTests(unittest.TestCase):
+    pattern = ()
+    message = ''
+
+    def common(self, pattern, message):
+        with self.assertRaises(AssertionError) as context:
+            SearchAndReplace(pattern)
+
+        self.assertEqual(message, str(context.exception))
+
+    def test_wrong_src_count(self):
+        self.common((('iadd', a), ('fadd', a, a)),
+            "Expression ('iadd', 'a') has 1 sources, expected 2")
+
+    def test_var_bitsize(self):
+        self.common((('iadd', 'a@32', 'a@64'), ('fadd', a, a)),
+            "Variable a has conflicting bit size requirements: " \
+            "it must have bit size 32 and 64")
+
+    def test_var_bitsize_2(self):
+        self.common((('iadd', a, 'a@32'), ('fadd', 'a@64', a)),
+            "Variable a has conflicting bit size requirements: " \
+            "it must have bit size 32 and 64")
+
+    def test_search_src_bitsize(self):
+        self.common((('iadd', 'a@32', 'b@64'), ('fadd', a, b)),
+            "Source a@32 of ('iadd', 'a@32', 'b@64') must have bit size 32, " \
+            "while source b@64 must have incompatible bit size 64")
+
+    def test_replace_src_bitsize(self):
+        self.common((('iadd', a, ('b2i', b)), ('iadd', a, b)),
+            "Sources a (bit size of a) and b (bit size of b) " \
+            "of ('iadd', 'a', 'b') may not have the same bit size " \
+            "when building the replacement expression.")
+
+    def test_search_src_bitsize_fixed(self):
+        self.common((('ishl', a, 'b@64'), ('ishl', a, b)),
+            "b@64 must have 64 bits, but as a source of nir_op_ishl " \
+            "it must have 32 bits")
+
+    def test_replace_src_bitsize_fixed(self):
+        self.common((('iadd', a, b), ('ishl', a, b)),
+            "b has the bit size of b, but as a source of nir_op_ishl " \
+            "it must have 32 bits, which may not be the same")
+
+    def test_search_dst_bitsize(self):
+        self.common((('iadd@32', 'a@64', b), ('iadd', a, b)),
+            "('iadd@32', 'a@64', 'b') must have the bit size of 32, " \
+            "while its source a@64 must have incompatible bit size 64")
+
+    def test_replace_dst_bitsize(self):
+        self.common((('iadd', a, b), ('iadd@32', a, b)),
+            "('iadd@32', 'a', 'b') must have 32 bits, but its source a " \
+            "(bit size of b) may not have that bit size when building " \
+            "the replacement.")
+
+    def test_search_dst_bitsize_fixed(self):
+        self.common((('ufind_msb@64', a), ('ineg', a)),
+            "('ufind_msb@64', 'a') must have 64 bits, "\
+            "but as a destination of nir_op_ufind_msb it must have 32 bits")
+
+    def test_replace_dst_bitsize_fixed(self):
+        self.common((('ineg', 'a@64'), ('ufind_msb@64', a)),
+            "('ufind_msb@64', 'a') must have 64 bits, " \
+            "but as a destination of nir_op_ufind_msb it must have 32 bits")
+
+    def test_ambiguous_bitsize(self):
+        self.common((('ineg', 'a@32'), ('i2b', ('b2i', a))),
+            "Ambiguous bit size for replacement value ('b2i', 'a'): it "\
+            "cannot be deduced from a variable, a fixed bit size somewhere, "
+            "or the search expression.")
+
+    def test_search_replace_mismatch(self):
+        self.common((('b2i', ('i2b', a)), a),
+            "The search expression bit size ('b2i', ('i2b', 'a')) and " \
+            "replace expression bit size a may not be the same")
+
+unittest.main()
diff -Nru mesa-18.3.3/src/compiler/nir/tests/algebraic_parser_test.sh mesa-19.0.1/src/compiler/nir/tests/algebraic_parser_test.sh
--- mesa-18.3.3/src/compiler/nir/tests/algebraic_parser_test.sh	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/tests/algebraic_parser_test.sh	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+$PYTHON $srcdir/nir/tests/algebraic_parser_test.py
diff -Nru mesa-18.3.3/src/compiler/nir/tests/vars_tests.cpp mesa-19.0.1/src/compiler/nir/tests/vars_tests.cpp
--- mesa-18.3.3/src/compiler/nir/tests/vars_tests.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir/tests/vars_tests.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -34,14 +34,14 @@
    ~nir_vars_test();
 
    nir_variable *create_int(nir_variable_mode mode, const char *name) {
-      if (mode == nir_var_local)
+      if (mode == nir_var_function_temp)
          return nir_local_variable_create(b->impl, glsl_int_type(), name);
       return nir_variable_create(b->shader, mode, glsl_int_type(), name);
    }
 
    nir_variable *create_ivec2(nir_variable_mode mode, const char *name) {
       const glsl_type *var_type = glsl_vector_type(GLSL_TYPE_INT, 2);
-      if (mode == nir_var_local)
+      if (mode == nir_var_function_temp)
          return nir_local_variable_create(b->impl, var_type, name);
       return nir_variable_create(b->shader, mode, var_type, name);
    }
@@ -191,7 +191,7 @@
     * if statement.  They should be invalidated accordingly.
     */
 
-   nir_variable **g = create_many_int(nir_var_global, "g", 3);
+   nir_variable **g = create_many_int(nir_var_shader_temp, "g", 3);
    nir_variable **out = create_many_int(nir_var_shader_out, "out", 3);
 
    nir_load_var(b, g[0]);
@@ -237,7 +237,7 @@
     * body.
     */
 
-   nir_variable *v = create_int(nir_var_shader_storage, "v");
+   nir_variable *v = create_int(nir_var_mem_ssbo, "v");
 
    nir_load_var(b, v);
 
@@ -258,9 +258,9 @@
 
 TEST_F(nir_copy_prop_vars_test, simple_copies)
 {
-   nir_variable *in   = create_int(nir_var_shader_in,  "in");
-   nir_variable *temp = create_int(nir_var_local,      "temp");
-   nir_variable *out  = create_int(nir_var_shader_out, "out");
+   nir_variable *in   = create_int(nir_var_shader_in,     "in");
+   nir_variable *temp = create_int(nir_var_function_temp, "temp");
+   nir_variable *out  = create_int(nir_var_shader_out,    "out");
 
    nir_copy_var(b, temp, in);
    nir_copy_var(b, out, temp);
@@ -284,7 +284,7 @@
 
 TEST_F(nir_copy_prop_vars_test, simple_store_load)
 {
-   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
    unsigned mask = 1 | 2;
 
    nir_ssa_def *stored_value = nir_imm_ivec2(b, 10, 20);
@@ -312,7 +312,7 @@
 
 TEST_F(nir_copy_prop_vars_test, store_store_load)
 {
-   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
    unsigned mask = 1 | 2;
 
    nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
@@ -345,7 +345,7 @@
 
 TEST_F(nir_copy_prop_vars_test, store_store_load_different_components)
 {
-   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
 
    nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
    nir_store_var(b, v[0], first_value, 1 << 1);
@@ -384,7 +384,7 @@
 
 TEST_F(nir_copy_prop_vars_test, store_store_load_different_components_in_many_blocks)
 {
-   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
 
    nir_ssa_def *first_value = nir_imm_ivec2(b, 10, 20);
    nir_store_var(b, v[0], first_value, 1 << 1);
@@ -433,7 +433,7 @@
 
 TEST_F(nir_copy_prop_vars_test, memory_barrier_in_two_blocks)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 4);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 4);
 
    nir_store_var(b, v[0], nir_imm_int(b, 1), 1);
    nir_store_var(b, v[1], nir_imm_int(b, 2), 1);
@@ -459,7 +459,7 @@
 
 TEST_F(nir_copy_prop_vars_test, simple_store_load_in_two_blocks)
 {
-   nir_variable **v = create_many_ivec2(nir_var_local, "v", 2);
+   nir_variable **v = create_many_ivec2(nir_var_function_temp, "v", 2);
    unsigned mask = 1 | 2;
 
    nir_ssa_def *stored_value = nir_imm_ivec2(b, 10, 20);
@@ -490,7 +490,7 @@
 
 TEST_F(nir_dead_write_vars_test, no_dead_writes_in_block)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 2);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 2);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
 
@@ -500,7 +500,7 @@
 
 TEST_F(nir_dead_write_vars_test, no_dead_writes_different_components_in_block)
 {
-   nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
    nir_store_var(b, v[0], nir_load_var(b, v[2]), 1 << 1);
@@ -511,7 +511,7 @@
 
 TEST_F(nir_dead_write_vars_test, no_dead_writes_in_if_statement)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 6);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 6);
 
    nir_store_var(b, v[2], nir_load_var(b, v[0]), 1);
    nir_store_var(b, v[3], nir_load_var(b, v[1]), 1);
@@ -531,7 +531,7 @@
 
 TEST_F(nir_dead_write_vars_test, no_dead_writes_in_loop_statement)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
 
@@ -553,7 +553,7 @@
 
 TEST_F(nir_dead_write_vars_test, dead_write_in_block)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
    nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
@@ -571,7 +571,7 @@
 
 TEST_F(nir_dead_write_vars_test, dead_write_components_in_block)
 {
-   nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
    nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
@@ -595,7 +595,7 @@
 
 TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_in_two_blocks)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
    nir_ssa_def *load_v2 = nir_load_var(b, v[2]);
@@ -617,7 +617,7 @@
 
 TEST_F(nir_dead_write_vars_test, DISABLED_dead_write_components_in_two_blocks)
 {
-   nir_variable **v = create_many_ivec2(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_ivec2(nir_var_mem_ssbo, "v", 3);
 
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1 << 0);
 
@@ -639,7 +639,7 @@
 
 TEST_F(nir_dead_write_vars_test, DISABLED_dead_writes_in_if_statement)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 4);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 4);
 
    /* Both branches will overwrite, making the previous store dead. */
    nir_store_var(b, v[0], nir_load_var(b, v[1]), 1);
@@ -670,7 +670,7 @@
 
 TEST_F(nir_dead_write_vars_test, DISABLED_memory_barrier_in_two_blocks)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 2);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 2);
 
    nir_store_var(b, v[0], nir_imm_int(b, 1), 1);
    nir_store_var(b, v[1], nir_imm_int(b, 2), 1);
@@ -693,7 +693,7 @@
 
 TEST_F(nir_dead_write_vars_test, DISABLED_unrelated_barrier_in_two_blocks)
 {
-   nir_variable **v = create_many_int(nir_var_shader_storage, "v", 3);
+   nir_variable **v = create_many_int(nir_var_mem_ssbo, "v", 3);
    nir_variable *out = create_int(nir_var_shader_out, "out");
 
    nir_store_var(b, out, nir_load_var(b, v[1]), 1);
diff -Nru mesa-18.3.3/src/compiler/nir_types.cpp mesa-19.0.1/src/compiler/nir_types.cpp
--- mesa-18.3.3/src/compiler/nir_types.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir_types.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -60,10 +60,9 @@
 }
 
 const glsl_type *
-glsl_get_array_instance(const glsl_type *type,
-                        unsigned array_size)
+glsl_get_bare_type(const glsl_type *type)
 {
-   return glsl_type::get_array_instance(type, array_size);
+   return type->get_bare_type();
 }
 
 const glsl_type *
@@ -72,6 +71,19 @@
    return type->fields.structure[index].type;
 }
 
+int
+glsl_get_struct_field_offset(const struct glsl_type *type,
+                             unsigned index)
+{
+   return type->fields.structure[index].offset;
+}
+
+unsigned
+glsl_get_explicit_stride(const struct glsl_type *type)
+{
+   return type->explicit_stride;
+}
+
 const glsl_type *
 glsl_get_function_return_type(const glsl_type *type)
 {
@@ -134,9 +146,9 @@
 
 unsigned
 glsl_count_attribute_slots(const struct glsl_type *type,
-                           bool is_vertex_input)
+                           bool is_gl_vertex_input)
 {
-   return type->count_attribute_slots(is_vertex_input);
+   return type->count_attribute_slots(is_gl_vertex_input);
 }
 
 unsigned
@@ -235,6 +247,13 @@
 }
 
 bool
+glsl_matrix_type_is_row_major(const struct glsl_type *type)
+{
+   assert(type->is_matrix() && type->explicit_stride);
+   return type->interface_row_major;
+}
+
+bool
 glsl_type_is_array(const struct glsl_type *type)
 {
    return type->is_array();
@@ -307,6 +326,12 @@
    return type->is_integer();
 }
 
+bool
+glsl_type_contains_64bit(const struct glsl_type *type)
+{
+   return type->contains_64bit();
+}
+
 const glsl_type *
 glsl_void_type(void)
 {
@@ -432,9 +457,23 @@
 }
 
 const glsl_type *
-glsl_array_type(const glsl_type *base, unsigned elements)
+glsl_explicit_matrix_type(const glsl_type *mat,
+                          unsigned stride, bool row_major)
+{
+   assert(stride > 0);
+   const glsl_type *t = glsl_type::get_instance(mat->base_type,
+                                                mat->vector_elements,
+                                                mat->matrix_columns,
+                                                stride, row_major);
+   assert(t != glsl_type::error_type);
+   return t;
+}
+
+const glsl_type *
+glsl_array_type(const glsl_type *base, unsigned elements,
+                unsigned explicit_stride)
 {
-   return glsl_type::get_array_instance(base, elements);
+   return glsl_type::get_array_instance(base, elements, explicit_stride);
 }
 
 const glsl_type *
@@ -493,31 +532,23 @@
 const glsl_type *
 glsl_channel_type(const glsl_type *t)
 {
-   switch (glsl_get_base_type(t)) {
-   case GLSL_TYPE_ARRAY: {
-      const glsl_type *base = glsl_channel_type(glsl_get_array_element(t));
-      return glsl_array_type(base, glsl_get_length(t));
-   }
+   switch (t->base_type) {
+   case GLSL_TYPE_ARRAY:
+      return glsl_array_type(glsl_channel_type(t->fields.array), t->length,
+                             t->explicit_stride);
    case GLSL_TYPE_UINT:
-      return glsl_uint_type();
    case GLSL_TYPE_INT:
-      return glsl_int_type();
    case GLSL_TYPE_FLOAT:
-      return glsl_float_type();
-   case GLSL_TYPE_BOOL:
-      return glsl_bool_type();
-   case GLSL_TYPE_DOUBLE:
-      return glsl_double_type();
-   case GLSL_TYPE_UINT64:
-      return glsl_uint64_t_type();
-   case GLSL_TYPE_INT64:
-      return glsl_int64_t_type();
    case GLSL_TYPE_FLOAT16:
-      return glsl_float16_t_type();
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_UINT8:
+   case GLSL_TYPE_INT8:
    case GLSL_TYPE_UINT16:
-      return glsl_uint16_t_type();
    case GLSL_TYPE_INT16:
-      return glsl_int16_t_type();
+   case GLSL_TYPE_UINT64:
+   case GLSL_TYPE_INT64:
+   case GLSL_TYPE_BOOL:
+      return glsl_type::get_instance(t->base_type, 1, 1);
    default:
       unreachable("Unhandled base type glsl_channel_type()");
    }
@@ -528,6 +559,14 @@
                                   unsigned *size, unsigned *align)
 {
    switch (type->base_type) {
+   case GLSL_TYPE_BOOL:
+      /* We special-case Booleans to 32 bits to not cause heartburn for
+       * drivers that suddenly get an 8-bit load.
+       */
+      *size = 4 * type->components();
+      *align = 4;
+      break;
+
    case GLSL_TYPE_UINT8:
    case GLSL_TYPE_INT8:
    case GLSL_TYPE_UINT16:
@@ -536,7 +575,6 @@
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
-   case GLSL_TYPE_BOOL:
    case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_UINT64:
    case GLSL_TYPE_INT64: {
diff -Nru mesa-18.3.3/src/compiler/nir_types.h mesa-19.0.1/src/compiler/nir_types.h
--- mesa-18.3.3/src/compiler/nir_types.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/nir_types.h	2019-03-31 23:16:37.000000000 +0000
@@ -46,11 +46,14 @@
 const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type,
                                               unsigned index);
 
+int glsl_get_struct_field_offset(const struct glsl_type *type,
+                                 unsigned index);
+
+unsigned glsl_get_explicit_stride(const struct glsl_type *type);
 const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
 const struct glsl_type *glsl_without_array(const struct glsl_type *type);
 const struct glsl_type *glsl_without_array_or_matrix(const struct glsl_type *type);
-const struct glsl_type *glsl_get_array_instance(const struct glsl_type *type,
-                                                unsigned array_size);
+const struct glsl_type *glsl_get_bare_type(const struct glsl_type *type);
 
 const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
 
@@ -75,7 +78,7 @@
 unsigned glsl_get_aoa_size(const struct glsl_type *type);
 
 unsigned glsl_count_attribute_slots(const struct glsl_type *type,
-                                    bool is_vertex_input);
+                                    bool is_gl_vertex_input);
 unsigned glsl_get_component_slots(const struct glsl_type *type);
 
 const char *glsl_get_struct_elem_name(const struct glsl_type *type,
@@ -95,9 +98,11 @@
 glsl_get_bit_size(const struct glsl_type *type)
 {
    switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_BOOL:
+      return 1;
+
    case GLSL_TYPE_INT:
    case GLSL_TYPE_UINT:
-   case GLSL_TYPE_BOOL:
    case GLSL_TYPE_FLOAT: /* TODO handle mediump */
    case GLSL_TYPE_SUBROUTINE:
       return 32;
@@ -133,6 +138,7 @@
 bool glsl_type_is_scalar(const struct glsl_type *type);
 bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
 bool glsl_type_is_matrix(const struct glsl_type *type);
+bool glsl_matrix_type_is_row_major(const struct glsl_type *type);
 bool glsl_type_is_array(const struct glsl_type *type);
 bool glsl_type_is_array_of_arrays(const struct glsl_type *type);
 bool glsl_type_is_array_or_matrix(const struct glsl_type *type);
@@ -143,6 +149,7 @@
 bool glsl_type_is_numeric(const struct glsl_type *type);
 bool glsl_type_is_boolean(const struct glsl_type *type);
 bool glsl_type_is_integer(const struct glsl_type *type);
+bool glsl_type_contains_64bit(const struct glsl_type *type);
 bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
 bool glsl_sampler_type_is_array(const struct glsl_type *type);
 bool glsl_contains_atomic(const struct glsl_type *type);
@@ -170,8 +177,14 @@
                                          unsigned components);
 const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type,
                                          unsigned rows, unsigned columns);
+const struct glsl_type *glsl_explicit_matrix_type(const struct glsl_type *mat,
+                                                  unsigned stride,
+                                                  bool row_major);
+
 const struct glsl_type *glsl_array_type(const struct glsl_type *base,
-                                        unsigned elements);
+                                        unsigned elements,
+                                        unsigned explicit_stride);
+
 const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields,
                                          unsigned num_fields, const char *name);
 const struct glsl_type *glsl_interface_type(const struct glsl_struct_field *fields,
diff -Nru mesa-18.3.3/src/compiler/shader_enums.c mesa-19.0.1/src/compiler/shader_enums.c
--- mesa-18.3.3/src/compiler/shader_enums.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/shader_enums.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,8 +43,9 @@
       ENUM(MESA_SHADER_GEOMETRY),
       ENUM(MESA_SHADER_FRAGMENT),
       ENUM(MESA_SHADER_COMPUTE),
+      ENUM(MESA_SHADER_KERNEL),
    };
-   STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES);
+   STATIC_ASSERT(ARRAY_SIZE(names) == MESA_ALL_SHADER_STAGES);
    return NAME(stage);
 }
 
@@ -60,6 +61,7 @@
    case MESA_SHADER_FRAGMENT: return "fragment";
    case MESA_SHADER_GEOMETRY: return "geometry";
    case MESA_SHADER_COMPUTE:  return "compute";
+   case MESA_SHADER_KERNEL:   return "kernel";
    case MESA_SHADER_TESS_CTRL: return "tessellation control";
    case MESA_SHADER_TESS_EVAL: return "tessellation evaluation";
    }
@@ -79,6 +81,7 @@
    case MESA_SHADER_FRAGMENT: return "FS";
    case MESA_SHADER_GEOMETRY: return "GS";
    case MESA_SHADER_COMPUTE:  return "CS";
+   case MESA_SHADER_KERNEL:   return "CL";
    case MESA_SHADER_TESS_CTRL: return "TCS";
    case MESA_SHADER_TESS_EVAL: return "TES";
    }
diff -Nru mesa-18.3.3/src/compiler/shader_enums.h mesa-19.0.1/src/compiler/shader_enums.h
--- mesa-18.3.3/src/compiler/shader_enums.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/shader_enums.h	2019-03-31 23:16:37.000000000 +0000
@@ -26,6 +26,8 @@
 #ifndef SHADER_ENUMS_H
 #define SHADER_ENUMS_H
 
+#include <stdbool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -46,8 +48,16 @@
    MESA_SHADER_GEOMETRY = 3,
    MESA_SHADER_FRAGMENT = 4,
    MESA_SHADER_COMPUTE = 5,
+   /* must be last so it doesn't affect the GL pipeline */
+   MESA_SHADER_KERNEL = 6,
 } gl_shader_stage;
 
+static inline bool
+gl_shader_stage_is_compute(gl_shader_stage stage)
+{
+   return stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL;
+}
+
 /**
  * Number of STATE_* values we need to address any GL state.
  * Used to dimension arrays.
@@ -70,8 +80,16 @@
  */
 const char *_mesa_shader_stage_to_abbrev(unsigned stage);
 
+/**
+ * GL related stages (not including CL)
+ */
 #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
 
+/**
+ * All stages
+ */
+#define MESA_ALL_SHADER_STAGES (MESA_SHADER_KERNEL + 1)
+
 
 /**
  * Indexes for vertex program attributes.
diff -Nru mesa-18.3.3/src/compiler/shader_info.h mesa-19.0.1/src/compiler/shader_info.h
--- mesa-18.3.3/src/compiler/shader_info.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/shader_info.h	2019-03-31 23:16:37.000000000 +0000
@@ -33,35 +33,42 @@
 #endif
 
 struct spirv_supported_capabilities {
-   bool float64;
-   bool image_ms_array;
-   bool tessellation;
+   bool address;
+   bool atomic_storage;
+   bool descriptor_array_dynamic_indexing;
    bool device_group;
    bool draw_parameters;
+   bool float64;
+   bool geometry_streams;
+   bool gcn_shader;
+   bool image_ms_array;
    bool image_read_without_format;
    bool image_write_without_format;
+   bool int8;
+   bool int16;
    bool int64;
+   bool int64_atomics;
+   bool kernel;
+   bool min_lod;
    bool multiview;
-   bool variable_pointers;
-   bool storage_16bit;
-   bool int16;
+   bool physical_storage_buffer_address;
+   bool post_depth_coverage;
+   bool runtime_descriptor_array;
    bool shader_viewport_index_layer;
+   bool stencil_export;
+   bool storage_8bit;
+   bool storage_16bit;
+   bool storage_image_ms;
    bool subgroup_arithmetic;
    bool subgroup_ballot;
    bool subgroup_basic;
    bool subgroup_quad;
    bool subgroup_shuffle;
    bool subgroup_vote;
-   bool gcn_shader;
-   bool trinary_minmax;
-   bool descriptor_array_dynamic_indexing;
-   bool runtime_descriptor_array;
-   bool stencil_export;
-   bool atomic_storage;
-   bool storage_8bit;
-   bool post_depth_coverage;
+   bool tessellation;
    bool transform_feedback;
-   bool geometry_streams;
+   bool trinary_minmax;
+   bool variable_pointers;
 };
 
 typedef struct shader_info {
@@ -118,6 +125,11 @@
     */
    bool uses_fddx_fddy;
 
+   /**
+    * True if this shader uses 64-bit ALU operations
+    */
+   bool uses_64bit;
+
    /* The size of the gl_ClipDistance[] array, if declared. */
    unsigned clip_distance_array_size;
 
diff -Nru mesa-18.3.3/src/compiler/spirv/nir_spirv.h mesa-19.0.1/src/compiler/spirv/nir_spirv.h
--- mesa-18.3.3/src/compiler/spirv/nir_spirv.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/nir_spirv.h	2019-03-31 23:16:37.000000000 +0000
@@ -59,8 +59,18 @@
     */
    bool lower_workgroup_access_to_offsets;
 
+   /* Whether or not to lower all UBO/SSBO access to offsets up-front. */
+   bool lower_ubo_ssbo_access_to_offsets;
+
    struct spirv_supported_capabilities caps;
 
+   /* Storage types for various kinds of pointers. */
+   const struct glsl_type *ubo_ptr_type;
+   const struct glsl_type *ssbo_ptr_type;
+   const struct glsl_type *phys_ssbo_ptr_type;
+   const struct glsl_type *push_const_ptr_type;
+   const struct glsl_type *shared_ptr_type;
+
    struct {
       void (*func)(void *private_data,
                    enum nir_spirv_debug_level level,
diff -Nru mesa-18.3.3/src/compiler/spirv/spirv.core.grammar.json mesa-19.0.1/src/compiler/spirv/spirv.core.grammar.json
--- mesa-18.3.3/src/compiler/spirv/spirv.core.grammar.json	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/spirv.core.grammar.json	2019-03-31 23:16:37.000000000 +0000
@@ -339,7 +339,10 @@
         { "kind" : "IdRef",        "name" : "'Pointer Type'" },
         { "kind" : "StorageClass" }
       ],
-      "capabilities" : [ "Addresses" ]
+      "capabilities" : [
+        "Addresses",
+        "PhysicalStorageBufferAddressesEXT"
+      ]
     },
     {
       "opname" : "OpConstantTrue",
@@ -563,7 +566,8 @@
       "capabilities" : [
         "Addresses",
         "VariablePointers",
-        "VariablePointersStorageBuffer"
+        "VariablePointersStorageBuffer",
+        "PhysicalStorageBufferAddressesEXT"
       ]
     },
     {
@@ -1048,7 +1052,10 @@
         { "kind" : "IdResult" },
         { "kind" : "IdRef",        "name" : "'Pointer'" }
       ],
-      "capabilities" : [ "Addresses" ]
+      "capabilities" : [
+        "Addresses",
+        "PhysicalStorageBufferAddressesEXT"
+      ]
     },
     {
       "opname" : "OpSatConvertSToU",
@@ -1078,7 +1085,10 @@
         { "kind" : "IdResult" },
         { "kind" : "IdRef",        "name" : "'Integer Value'" }
       ],
-      "capabilities" : [ "Addresses" ]
+      "capabilities" : [
+        "Addresses",
+        "PhysicalStorageBufferAddressesEXT"
+      ]
     },
     {
       "opname" : "OpPtrCastToGeneric",
@@ -3825,7 +3835,7 @@
       "version" : "None"
     },
     {
-      "opname" : "OpReportIntersectionNVX",
+      "opname" : "OpReportIntersectionNV",
       "opcode" : 5334,
       "operands" : [
         { "kind" : "IdResultType" },
@@ -3833,25 +3843,25 @@
         { "kind" : "IdRef", "name" : "'Hit'" },
         { "kind" : "IdRef", "name" : "'HitKind'" }
       ],
-      "capabilities" : [ "RaytracingNVX" ],
-      "extensions" : [ "SPV_NVX_raytracing" ]
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
     },
     {
-      "opname" : "OpIgnoreIntersectionNVX",
+      "opname" : "OpIgnoreIntersectionNV",
       "opcode" : 5335,
 
-      "capabilities" : [ "RaytracingNVX" ],
-      "extensions" : [ "SPV_NVX_raytracing" ]
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
     },
     {
-      "opname" : "OpTerminateRayNVX",
+      "opname" : "OpTerminateRayNV",
       "opcode" : 5336,
 
-      "capabilities" : [ "RaytracingNVX" ],
-      "extensions" : [ "SPV_NVX_raytracing" ]
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
     },
     {
-      "opname" : "OpTraceNVX",
+      "opname" : "OpTraceNV",
       "opcode" : 5337,
       "operands" : [
 
@@ -3867,17 +3877,28 @@
         { "kind" : "IdRef", "name" : "'Ray Tmax'" },
         { "kind" : "IdRef", "name" : "'PayloadId'" }
       ],
-      "capabilities" : [ "RaytracingNVX" ],
-      "extensions" : [ "SPV_NVX_raytracing" ]
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
     },
     {
-      "opname" : "OpTypeAccelerationStructureNVX",
+      "opname" : "OpTypeAccelerationStructureNV",
       "opcode" : 5341,
       "operands" : [
         { "kind" : "IdResult" }
       ],
-      "capabilities" : [ "RaytracingNVX" ],
-      "extensions" : [ "SPV_NVX_raytracing" ]
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
+    },
+    {
+      "opname" : "OpExecuteCallableNV",
+      "opcode" : 5344,
+      "operands" : [
+
+        { "kind" : "IdRef", "name" : "'SBT Index'" },
+        { "kind" : "IdRef", "name" : "'Callable DataId'" }
+      ],
+      "capabilities" : [ "RayTracingNV" ],
+      "extensions" : [ "SPV_NV_ray_tracing" ]
     },
     {
       "opname" : "OpSubgroupShuffleINTEL",
@@ -4443,34 +4464,34 @@
           "capabilities" : [ "MeshShadingNV" ]
         },
         {
-          "enumerant" : "RayGenerationNVX",
+          "enumerant" : "RayGenerationNV",
           "value" : 5313,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "IntersectionNVX",
+          "enumerant" : "IntersectionNV",
           "value" : 5314,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "AnyHitNVX",
+          "enumerant" : "AnyHitNV",
           "value" : 5315,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "ClosestHitNVX",
+          "enumerant" : "ClosestHitNV",
           "value" : 5316,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "MissNVX",
+          "enumerant" : "MissNV",
           "value" : 5317,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "CallableNVX",
+          "enumerant" : "CallableNV",
           "value" : 5318,
-          "capabilities" : [ "RaytracingNVX" ]
+          "capabilities" : [ "RayTracingNV" ]
         }
       ]
     },
@@ -4491,6 +4512,12 @@
           "enumerant" : "Physical64",
           "value" : 2,
           "capabilities" : [ "Addresses" ]
+        },
+        {
+          "enumerant" : "PhysicalStorageBuffer64EXT",
+          "value" : 5348,
+          "extensions" : [ "SPV_EXT_physical_storage_buffer" ],
+          "capabilities" : [ "PhysicalStorageBufferAddressesEXT" ]
         }
       ]
     },
@@ -4762,6 +4789,56 @@
           "extensions" : [ "SPV_KHR_post_depth_coverage" ],
           "version" : "None"
         },
+{
+          "enumerant" : "DenormPreserve",
+          "value" : 4459,
+          "capabilities" : [ "DenormPreserve"],
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "parameters" : [
+            { "kind" : "LiteralInteger", "name" : "'Target Width'" }
+          ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "DenormFlushToZero",
+          "value" : 4460,
+          "capabilities" : [ "DenormFlushToZero"],
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "parameters" : [
+            { "kind" : "LiteralInteger", "name" : "'Target Width'" }
+          ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "SignedZeroInfNanPreserve",
+          "value" : 4461,
+          "capabilities" : [ "SignedZeroInfNanPreserve"],
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "parameters" : [
+            { "kind" : "LiteralInteger", "name" : "'Target Width'" }
+          ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "RoundingModeRTE",
+          "value" : 4462,
+          "capabilities" : [ "RoundingModeRTE"],
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "parameters" : [
+            { "kind" : "LiteralInteger", "name" : "'Target Width'" }
+          ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "RoundingModeRTZ",
+          "value" : 4463,
+          "capabilities" : [ "RoundingModeRTZ"],
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "parameters" : [
+            { "kind" : "LiteralInteger", "name" : "'Target Width'" }
+          ],
+          "version" : "None"
+        },
         {
           "enumerant" : "StencilRefReplacingEXT",
           "value" : 5027,
@@ -4878,28 +4955,46 @@
           "version" : "1.3"
         },
         {
-          "enumerant" : "RayPayloadNVX",
+          "enumerant" : "CallableDataNV",
+          "value" : 5328,
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
+        },
+        {
+          "enumerant" : "IncomingCallableDataNV",
+          "value" : 5329,
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
+        },
+        {
+          "enumerant" : "RayPayloadNV",
           "value" : 5338,
-          "extensions" : [ "SPV_NVX_raytracing" ],
-          "capabilities" : [ "RaytracingNVX" ]
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "HitAttributeNVX",
+          "enumerant" : "HitAttributeNV",
           "value" : 5339,
-          "extensions" : [ "SPV_NVX_raytracing" ],
-          "capabilities" : [ "RaytracingNVX" ]
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
         },
         {
-          "enumerant" : "IncomingRayPayloadNVX",
+          "enumerant" : "IncomingRayPayloadNV",
           "value" : 5342,
-          "extensions" : [ "SPV_NVX_raytracing" ],
-          "capabilities" : [ "RaytracingNVX" ]
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
         },
-{
-          "enumerant" : "ShaderRecordBufferNVX",
+        {
+          "enumerant" : "ShaderRecordBufferNV",
           "value" : 5343,
-          "extensions" : [ "SPV_NVX_raytracing" ],
-          "capabilities" : [ "RaytracingNVX" ]
+          "extensions" : [ "SPV_NV_ray_tracing" ],
+          "capabilities" : [ "RayTracingNV" ]
+        },
+        {
+          "enumerant" : "PhysicalStorageBufferEXT",
+          "value" : 5349,
+          "extensions" : [ "SPV_EXT_physical_storage_buffer" ],
+          "capabilities" : [ "PhysicalStorageBufferAddressesEXT" ]
         }
       ]
     },
@@ -5794,6 +5889,18 @@
           "version" : "1.2"
         },
         {
+          "enumerant" : "NoSignedWrap",
+          "value" : 4469,
+          "extensions" : [ "SPV_KHR_no_integer_wrap_decoration" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "NoUnsignedWrap",
+          "value" : 4470,
+          "extensions" : [ "SPV_KHR_no_integer_wrap_decoration" ],
+          "version" : "None"
+        },
+        {
           "enumerant" : "ExplicitInterpAMD",
           "value" : 4999,
           "extensions" : [ "SPV_AMD_shader_explicit_vertex_parameter" ],
@@ -5879,6 +5986,20 @@
           ],
           "extensions" : [ "SPV_GOOGLE_hlsl_functionality1" ],
           "version" : "None"
+        },
+        {
+          "enumerant" : "RestrictPointerEXT",
+          "value" : 5355,
+          "capabilities" : [ "PhysicalStorageBufferAddressesEXT" ],
+          "extensions" : [ "SPV_EXT_physical_storage_buffer" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "AliasedPointerEXT",
+          "value" : 5356,
+          "capabilities" : [ "PhysicalStorageBufferAddressesEXT" ],
+          "extensions" : [ "SPV_EXT_physical_storage_buffer" ],
+          "version" : "None"
         }
       ]
     },
@@ -5919,7 +6040,7 @@
         {
           "enumerant" : "PrimitiveId",
           "value" : 7,
-          "capabilities" : [ "Geometry", "Tessellation", "RaytracingNVX" ]
+          "capabilities" : [ "Geometry", "Tessellation", "RayTracingNV" ]
         },
         {
           "enumerant" : "InvocationId",
@@ -6347,96 +6468,116 @@
           "version" : "None"
         },
         {
+          "enumerant" : "FragSizeEXT",
+          "value" : 5292 ,
+          "capabilities" : [ "FragmentDensityEXT", "ShadingRateNV" ],
+          "extensions" : [ "SPV_EXT_fragment_invocation_density", "SPV_NV_shading_rate" ],
+          "version" : "None"
+        },
+        {
           "enumerant" : "FragmentSizeNV",
           "value" : 5292 ,
-          "capabilities" : [ "ShadingRateNV" ],
-          "extensions" : [ "SPV_NV_shading_rate" ],
+          "capabilities" : [ "ShadingRateNV", "FragmentDensityEXT" ],
+          "extensions" : [ "SPV_NV_shading_rate", "SPV_EXT_fragment_invocation_density" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "FragInvocationCountEXT",
+          "value" : 5293,
+          "capabilities" : [ "FragmentDensityEXT", "ShadingRateNV" ],
+          "extensions" : [ "SPV_EXT_fragment_invocation_density", "SPV_NV_shading_rate" ],
           "version" : "None"
         },
         {
           "enumerant" : "InvocationsPerPixelNV",
           "value" : 5293,
-          "capabilities" : [ "ShadingRateNV" ],
-          "extensions" : [ "SPV_NV_shading_rate" ],
+          "capabilities" : [ "ShadingRateNV", "FragmentDensityEXT" ],
+          "extensions" : [ "SPV_NV_shading_rate", "SPV_EXT_fragment_invocation_density" ],
           "version" : "None"
         },
         {
-          "enumerant" : "LaunchIdNVX",
+          "enumerant" : "LaunchIdNV",
           "value" : 5319,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "LaunchSizeNVX",
+          "enumerant" : "LaunchSizeNV",
           "value" : 5320,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "WorldRayOriginNVX",
+          "enumerant" : "WorldRayOriginNV",
           "value" : 5321,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "WorldRayDirectionNVX",
+          "enumerant" : "WorldRayDirectionNV",
           "value" : 5322,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "ObjectRayOriginNVX",
+          "enumerant" : "ObjectRayOriginNV",
           "value" : 5323,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "ObjectRayDirectionNVX",
+          "enumerant" : "ObjectRayDirectionNV",
           "value" : 5324,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "RayTminNVX",
+          "enumerant" : "RayTminNV",
           "value" : 5325,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "RayTmaxNVX",
+          "enumerant" : "RayTmaxNV",
           "value" : 5326,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "InstanceCustomIndexNVX",
+          "enumerant" : "InstanceCustomIndexNV",
           "value" : 5327,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "ObjectToWorldNVX",
+          "enumerant" : "ObjectToWorldNV",
           "value" : 5330,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "WorldToObjectNVX",
+          "enumerant" : "WorldToObjectNV",
           "value" : 5331,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "HitTNVX",
+          "enumerant" : "HitTNV",
           "value" : 5332,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         },
         {
-          "enumerant" : "HitKindNVX",
+          "enumerant" : "HitKindNV",
           "value" : 5333,
-          "capabilities" : [ "RaytracingNVX" ],
-          "extensions" : [ "SPV_NVX_raytracing" ]
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
+        },
+        {
+          "enumerant" : "IncomingRayFlagsNV",
+          "value" : 5351,
+          "capabilities" : [ "RayTracingNV" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ]
         }
       ]
     },
@@ -6999,6 +7140,36 @@
           "version" : "None"
         },
         {
+          "enumerant" : "DenormPreserve",
+          "value" : 4464,
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "DenormFlushToZero",
+          "value" : 4465,
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "SignedZeroInfNanPreserve",
+          "value" : 4466,
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "RoundingModeRTE",
+          "value" : 4467,
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "RoundingModeRTZ",
+          "value" : 4468,
+          "extensions" : [ "SPV_KHR_float_controls" ],
+          "version" : "None"
+        },
+        {
           "enumerant" : "Float16ImageAMD",
           "value" : 5008,
           "capabilities" : [ "Shader" ],
@@ -7181,10 +7352,10 @@
           "version" : "None"
         },
         {
-          "enumerant" : "RaytracingNVX",
+          "enumerant" : "RayTracingNV",
           "value" : 5340,
           "capabilities" : [ "Shader" ],
-          "extensions" : [ "SPV_NVX_raytracing" ],
+          "extensions" : [ "SPV_NV_ray_tracing" ],
           "version" : "None"
         },
         {
@@ -7248,10 +7419,24 @@
           "version" : "None"
         },
         {
+          "enumerant" : "FragmentDensityEXT",
+          "value" : 5291,
+          "capabilities" : [ "Shader" ],
+          "extensions" : [ "SPV_EXT_fragment_invocation_density", "SPV_NV_shading_rate" ],
+          "version" : "None"
+        },
+        {
           "enumerant" : "ShadingRateNV",
           "value" : 5291,
           "capabilities" : [ "Shader" ],
-          "extensions" : [ "SPV_NV_shading_rate" ],
+          "extensions" : [ "SPV_NV_shading_rate", "SPV_EXT_fragment_invocation_density" ],
+          "version" : "None"
+        },
+        {
+          "enumerant" : "PhysicalStorageBufferAddressesEXT",
+          "value" : 5347,
+          "capabilities" : [ "Shader" ],
+          "extensions" : [ "SPV_EXT_physical_storage_buffer" ],
           "version" : "None"
         }
       ]
diff -Nru mesa-18.3.3/src/compiler/spirv/spirv.h mesa-19.0.1/src/compiler/spirv/spirv.h
--- mesa-18.3.3/src/compiler/spirv/spirv.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/spirv.h	2019-03-31 23:16:37.000000000 +0000
@@ -1,5 +1,5 @@
 /*
-** Copyright (c) 2014-2018 The Khronos Group Inc.
+** Copyright (c) 2014-2019 The Khronos Group Inc.
 ** 
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and/or associated documentation files (the "Materials"),
@@ -31,13 +31,16 @@
 
 /*
 ** Enumeration tokens for SPIR-V, in various styles:
-**   C, C++, C++11, JSON, Lua, Python
+**   C, C++, C++11, JSON, Lua, Python, C#, D
 ** 
 ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
 ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
 ** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
 ** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
 ** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
+** - C# will use enum classes in the Specification class located in the "Spv" namespace,
+**     e.g.: Spv.Specification.SourceLanguage.GLSL
+** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL
 ** 
 ** Some tokens act like mask values, which can be OR'd together,
 ** while others are mutually exclusive.  The mask-like ones have
@@ -51,11 +54,11 @@
 typedef unsigned int SpvId;
 
 #define SPV_VERSION 0x10300
-#define SPV_REVISION 1
+#define SPV_REVISION 6
 
 static const unsigned int SpvMagicNumber = 0x07230203;
 static const unsigned int SpvVersion = 0x00010300;
-static const unsigned int SpvRevision = 1;
+static const unsigned int SpvRevision = 6;
 static const unsigned int SpvOpCodeMask = 0xffff;
 static const unsigned int SpvWordCountShift = 16;
 
@@ -79,12 +82,12 @@
     SpvExecutionModelKernel = 6,
     SpvExecutionModelTaskNV = 5267,
     SpvExecutionModelMeshNV = 5268,
-    SpvExecutionModelRayGenerationNVX = 5313,
-    SpvExecutionModelIntersectionNVX = 5314,
-    SpvExecutionModelAnyHitNVX = 5315,
-    SpvExecutionModelClosestHitNVX = 5316,
-    SpvExecutionModelMissNVX = 5317,
-    SpvExecutionModelCallableNVX = 5318,
+    SpvExecutionModelRayGenerationNV = 5313,
+    SpvExecutionModelIntersectionNV = 5314,
+    SpvExecutionModelAnyHitNV = 5315,
+    SpvExecutionModelClosestHitNV = 5316,
+    SpvExecutionModelMissNV = 5317,
+    SpvExecutionModelCallableNV = 5318,
     SpvExecutionModelMax = 0x7fffffff,
 } SpvExecutionModel;
 
@@ -92,6 +95,7 @@
     SpvAddressingModelLogical = 0,
     SpvAddressingModelPhysical32 = 1,
     SpvAddressingModelPhysical64 = 2,
+    SpvAddressingModelPhysicalStorageBuffer64EXT = 5348,
     SpvAddressingModelMax = 0x7fffffff,
 } SpvAddressingModel;
 
@@ -143,6 +147,11 @@
     SpvExecutionModeLocalSizeId = 38,
     SpvExecutionModeLocalSizeHintId = 39,
     SpvExecutionModePostDepthCoverage = 4446,
+    SpvExecutionModeDenormPreserve = 4459,
+    SpvExecutionModeDenormFlushToZero = 4460,
+    SpvExecutionModeSignedZeroInfNanPreserve = 4461,
+    SpvExecutionModeRoundingModeRTE = 4462,
+    SpvExecutionModeRoundingModeRTZ = 4463,
     SpvExecutionModeStencilRefReplacingEXT = 5027,
     SpvExecutionModeOutputLinesNV = 5269,
     SpvExecutionModeOutputPrimitivesNV = 5270,
@@ -166,10 +175,13 @@
     SpvStorageClassAtomicCounter = 10,
     SpvStorageClassImage = 11,
     SpvStorageClassStorageBuffer = 12,
-    SpvStorageClassRayPayloadNVX = 5338,
-    SpvStorageClassHitAttributeNVX = 5339,
-    SpvStorageClassIncomingRayPayloadNVX = 5342,
-    SpvStorageClassShaderRecordBufferNVX = 5343,
+    SpvStorageClassCallableDataNV = 5328,
+    SpvStorageClassIncomingCallableDataNV = 5329,
+    SpvStorageClassRayPayloadNV = 5338,
+    SpvStorageClassHitAttributeNV = 5339,
+    SpvStorageClassIncomingRayPayloadNV = 5342,
+    SpvStorageClassShaderRecordBufferNV = 5343,
+    SpvStorageClassPhysicalStorageBufferEXT = 5349,
     SpvStorageClassMax = 0x7fffffff,
 } SpvStorageClass;
 
@@ -418,6 +430,8 @@
     SpvDecorationMaxByteOffset = 45,
     SpvDecorationAlignmentId = 46,
     SpvDecorationMaxByteOffsetId = 47,
+    SpvDecorationNoSignedWrap = 4469,
+    SpvDecorationNoUnsignedWrap = 4470,
     SpvDecorationExplicitInterpAMD = 4999,
     SpvDecorationOverrideCoverageNV = 5248,
     SpvDecorationPassthroughNV = 5250,
@@ -428,6 +442,8 @@
     SpvDecorationPerTaskNV = 5273,
     SpvDecorationPerVertexNV = 5285,
     SpvDecorationNonUniformEXT = 5300,
+    SpvDecorationRestrictPointerEXT = 5355,
+    SpvDecorationAliasedPointerEXT = 5356,
     SpvDecorationHlslCounterBufferGOOGLE = 5634,
     SpvDecorationHlslSemanticGOOGLE = 5635,
     SpvDecorationMax = 0x7fffffff,
@@ -514,21 +530,24 @@
     SpvBuiltInMeshViewIndicesNV = 5281,
     SpvBuiltInBaryCoordNV = 5286,
     SpvBuiltInBaryCoordNoPerspNV = 5287,
+    SpvBuiltInFragSizeEXT = 5292,
     SpvBuiltInFragmentSizeNV = 5292,
+    SpvBuiltInFragInvocationCountEXT = 5293,
     SpvBuiltInInvocationsPerPixelNV = 5293,
-    SpvBuiltInLaunchIdNVX = 5319,
-    SpvBuiltInLaunchSizeNVX = 5320,
-    SpvBuiltInWorldRayOriginNVX = 5321,
-    SpvBuiltInWorldRayDirectionNVX = 5322,
-    SpvBuiltInObjectRayOriginNVX = 5323,
-    SpvBuiltInObjectRayDirectionNVX = 5324,
-    SpvBuiltInRayTminNVX = 5325,
-    SpvBuiltInRayTmaxNVX = 5326,
-    SpvBuiltInInstanceCustomIndexNVX = 5327,
-    SpvBuiltInObjectToWorldNVX = 5330,
-    SpvBuiltInWorldToObjectNVX = 5331,
-    SpvBuiltInHitTNVX = 5332,
-    SpvBuiltInHitKindNVX = 5333,
+    SpvBuiltInLaunchIdNV = 5319,
+    SpvBuiltInLaunchSizeNV = 5320,
+    SpvBuiltInWorldRayOriginNV = 5321,
+    SpvBuiltInWorldRayDirectionNV = 5322,
+    SpvBuiltInObjectRayOriginNV = 5323,
+    SpvBuiltInObjectRayDirectionNV = 5324,
+    SpvBuiltInRayTminNV = 5325,
+    SpvBuiltInRayTmaxNV = 5326,
+    SpvBuiltInInstanceCustomIndexNV = 5327,
+    SpvBuiltInObjectToWorldNV = 5330,
+    SpvBuiltInWorldToObjectNV = 5331,
+    SpvBuiltInHitTNV = 5332,
+    SpvBuiltInHitKindNV = 5333,
+    SpvBuiltInIncomingRayFlagsNV = 5351,
     SpvBuiltInMax = 0x7fffffff,
 } SpvBuiltIn;
 
@@ -754,6 +773,11 @@
     SpvCapabilityStorageBuffer8BitAccess = 4448,
     SpvCapabilityUniformAndStorageBuffer8BitAccess = 4449,
     SpvCapabilityStoragePushConstant8 = 4450,
+    SpvCapabilityDenormPreserve = 4464,
+    SpvCapabilityDenormFlushToZero = 4465,
+    SpvCapabilitySignedZeroInfNanPreserve = 4466,
+    SpvCapabilityRoundingModeRTE = 4467,
+    SpvCapabilityRoundingModeRTZ = 4468,
     SpvCapabilityFloat16ImageAMD = 5008,
     SpvCapabilityImageGatherBiasLodAMD = 5009,
     SpvCapabilityFragmentMaskAMD = 5010,
@@ -771,6 +795,7 @@
     SpvCapabilityImageFootprintNV = 5282,
     SpvCapabilityFragmentBarycentricNV = 5284,
     SpvCapabilityComputeDerivativeGroupQuadsNV = 5288,
+    SpvCapabilityFragmentDensityEXT = 5291,
     SpvCapabilityShadingRateNV = 5291,
     SpvCapabilityGroupNonUniformPartitionedNV = 5297,
     SpvCapabilityShaderNonUniformEXT = 5301,
@@ -785,9 +810,10 @@
     SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
     SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
     SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
-    SpvCapabilityRaytracingNVX = 5340,
+    SpvCapabilityRayTracingNV = 5340,
     SpvCapabilityVulkanMemoryModelKHR = 5345,
     SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347,
     SpvCapabilityComputeDerivativeGroupLinearNV = 5350,
     SpvCapabilitySubgroupShuffleINTEL = 5568,
     SpvCapabilitySubgroupBufferBlockIOINTEL = 5569,
@@ -1155,11 +1181,12 @@
     SpvOpImageSampleFootprintNV = 5283,
     SpvOpGroupNonUniformPartitionNV = 5296,
     SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
-    SpvOpReportIntersectionNVX = 5334,
-    SpvOpIgnoreIntersectionNVX = 5335,
-    SpvOpTerminateRayNVX = 5336,
-    SpvOpTraceNVX = 5337,
-    SpvOpTypeAccelerationStructureNVX = 5341,
+    SpvOpReportIntersectionNV = 5334,
+    SpvOpIgnoreIntersectionNV = 5335,
+    SpvOpTerminateRayNV = 5336,
+    SpvOpTraceNV = 5337,
+    SpvOpTypeAccelerationStructureNV = 5341,
+    SpvOpExecuteCallableNV = 5344,
     SpvOpSubgroupShuffleINTEL = 5571,
     SpvOpSubgroupShuffleDownINTEL = 5572,
     SpvOpSubgroupShuffleUpINTEL = 5573,
diff -Nru mesa-18.3.3/src/compiler/spirv/spirv_info_c.py mesa-19.0.1/src/compiler/spirv/spirv_info_c.py
--- mesa-18.3.3/src/compiler/spirv/spirv_info_c.py	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/spirv_info_c.py	2019-03-31 23:16:37.000000000 +0000
@@ -90,6 +90,7 @@
     info = [
         collect_data(spirv_info, "Capability"),
         collect_data(spirv_info, "Decoration"),
+        collect_data(spirv_info, "ExecutionMode"),
         collect_opcodes(spirv_info),
     ]
 
diff -Nru mesa-18.3.3/src/compiler/spirv/spirv_info.h mesa-19.0.1/src/compiler/spirv/spirv_info.h
--- mesa-18.3.3/src/compiler/spirv/spirv_info.h	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/spirv_info.h	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 
 const char *spirv_capability_to_string(SpvCapability cap);
 const char *spirv_decoration_to_string(SpvDecoration dec);
+const char *spirv_executionmode_to_string(SpvExecutionMode mode);
 const char *spirv_op_to_string(SpvOp op);
 
 #endif /* SPIRV_INFO_H */
diff -Nru mesa-18.3.3/src/compiler/spirv/spirv_to_nir.c mesa-19.0.1/src/compiler/spirv/spirv_to_nir.c
--- mesa-18.3.3/src/compiler/spirv/spirv_to_nir.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/spirv_to_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -563,6 +563,29 @@
    struct vtn_type *type;
 };
 
+/**
+ * Returns true if the given type contains a struct decorated Block or
+ * BufferBlock
+ */
+bool
+vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type)
+{
+   switch (type->base_type) {
+   case vtn_base_type_array:
+      return vtn_type_contains_block(b, type->array_element);
+   case vtn_base_type_struct:
+      if (type->block || type->buffer_block)
+         return true;
+      for (unsigned i = 0; i < type->length; i++) {
+         if (vtn_type_contains_block(b, type->members[i]))
+            return true;
+      }
+      return false;
+   default:
+      return false;
+   }
+}
+
 /** Returns true if two types are "compatible", i.e. you can do an OpLoad,
  * OpStore, or OpCopyMemory between them without breaking anything.
  * Technically, the SPIR-V rules require the exact same type ID but this lets
@@ -683,6 +706,19 @@
 }
 
 static void
+array_stride_decoration_cb(struct vtn_builder *b,
+                           struct vtn_value *val, int member,
+                           const struct vtn_decoration *dec, void *void_ctx)
+{
+   struct vtn_type *type = val->type;
+
+   if (dec->decoration == SpvDecorationArrayStride) {
+      vtn_fail_if(dec->literals[0] == 0, "ArrayStride must be non-zero");
+      type->stride = dec->literals[0];
+   }
+}
+
+static void
 struct_member_decoration_cb(struct vtn_builder *b,
                             struct vtn_value *val, int member,
                             const struct vtn_decoration *dec, void *void_ctx)
@@ -739,6 +775,7 @@
       break;
    case SpvDecorationOffset:
       ctx->type->offsets[member] = dec->literals[0];
+      ctx->fields[member].offset = dec->literals[0];
       break;
    case SpvDecorationMatrixStride:
       /* Handled as a second pass */
@@ -783,8 +820,10 @@
    case SpvDecorationFPRoundingMode:
    case SpvDecorationFPFastMathMode:
    case SpvDecorationAlignment:
-      vtn_warn("Decoration only allowed for CL-style kernels: %s",
-               spirv_decoration_to_string(dec->decoration));
+      if (b->shader->info.stage != MESA_SHADER_KERNEL) {
+         vtn_warn("Decoration only allowed for CL-style kernels: %s",
+                  spirv_decoration_to_string(dec->decoration));
+      }
       break;
 
    case SpvDecorationHlslSemanticGOOGLE:
@@ -796,6 +835,21 @@
    }
 }
 
+/** Chases the array type all the way down to the tail and rewrites the
+ * glsl_types to be based off the tail's glsl_type.
+ */
+static void
+vtn_array_type_rewrite_glsl_type(struct vtn_type *type)
+{
+   if (type->base_type != vtn_base_type_array)
+      return;
+
+   vtn_array_type_rewrite_glsl_type(type->array_element);
+
+   type->type = glsl_array_type(type->array_element->type,
+                                type->length, type->stride);
+}
+
 /* Matrix strides are handled as a separate pass because we need to know
  * whether the matrix is row-major or not first.
  */
@@ -811,6 +865,7 @@
    vtn_fail_if(member < 0,
                "The MatrixStride decoration is only allowed on members "
                "of OpTypeStruct");
+   vtn_fail_if(dec->literals[0] == 0, "MatrixStride must be non-zero");
 
    struct member_decoration_ctx *ctx = void_ctx;
 
@@ -819,10 +874,24 @@
       mat_type->array_element = vtn_type_copy(b, mat_type->array_element);
       mat_type->stride = mat_type->array_element->stride;
       mat_type->array_element->stride = dec->literals[0];
+
+      mat_type->type = glsl_explicit_matrix_type(mat_type->type,
+                                                 dec->literals[0], true);
+      mat_type->array_element->type = glsl_get_column_type(mat_type->type);
    } else {
       vtn_assert(mat_type->array_element->stride > 0);
       mat_type->stride = dec->literals[0];
+
+      mat_type->type = glsl_explicit_matrix_type(mat_type->type,
+                                                 dec->literals[0], false);
    }
+
+   /* Now that we've replaced the glsl_type with a properly strided matrix
+    * type, rewrite the member type so that it's an array of the proper kind
+    * of glsl_type.
+    */
+   vtn_array_type_rewrite_glsl_type(ctx->type->members[member]);
+   ctx->fields[member].type = ctx->type->members[member]->type;
 }
 
 static void
@@ -841,10 +910,8 @@
 
    switch (dec->decoration) {
    case SpvDecorationArrayStride:
-      vtn_assert(type->base_type == vtn_base_type_matrix ||
-                 type->base_type == vtn_base_type_array ||
+      vtn_assert(type->base_type == vtn_base_type_array ||
                  type->base_type == vtn_base_type_pointer);
-      type->stride = dec->literals[0];
       break;
    case SpvDecorationBlock:
       vtn_assert(type->base_type == vtn_base_type_struct);
@@ -977,14 +1044,16 @@
 {
    switch (type->base_type) {
    case vtn_base_type_scalar: {
-      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      uint32_t comp_size = glsl_type_is_boolean(type->type)
+         ? 4 : glsl_get_bit_size(type->type) / 8;
       *size_out = comp_size;
       *align_out = comp_size;
       return type;
    }
 
    case vtn_base_type_vector: {
-      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      uint32_t comp_size = glsl_type_is_boolean(type->type)
+         ? 4 : glsl_get_bit_size(type->type) / 8;
       unsigned align_comps = type->length == 3 ? 4 : type->length;
       *size_out = comp_size * type->length,
       *align_out = comp_size * align_comps;
@@ -1032,10 +1101,18 @@
 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
                 const uint32_t *w, unsigned count)
 {
-   struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type);
+   struct vtn_value *val = NULL;
 
-   val->type = rzalloc(b, struct vtn_type);
-   val->type->id = w[1];
+   /* In order to properly handle forward declarations, we have to defer
+    * allocation for pointer types.
+    */
+   if (opcode != SpvOpTypePointer && opcode != SpvOpTypeForwardPointer) {
+      val = vtn_push_value(b, w[1], vtn_value_type_type);
+      vtn_fail_if(val->type != NULL,
+                  "Only pointers can have forward declarations");
+      val->type = rzalloc(b, struct vtn_type);
+      val->type->id = w[1];
+   }
 
    switch (opcode) {
    case SpvOpTypeVoid:
@@ -1103,7 +1180,8 @@
       val->type->base_type = vtn_base_type_vector;
       val->type->type = glsl_vector_type(glsl_get_base_type(base->type), elems);
       val->type->length = elems;
-      val->type->stride = glsl_get_bit_size(base->type) / 8;
+      val->type->stride = glsl_type_is_boolean(val->type->type)
+         ? 4 : glsl_get_bit_size(base->type) / 8;
       val->type->array_element = base;
       break;
    }
@@ -1145,9 +1223,12 @@
       }
 
       val->type->base_type = vtn_base_type_array;
-      val->type->type = glsl_array_type(array_element->type, val->type->length);
       val->type->array_element = array_element;
       val->type->stride = 0;
+
+      vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
+      val->type->type = glsl_array_type(array_element->type, val->type->length,
+                                        val->type->stride);
       break;
    }
 
@@ -1166,6 +1247,7 @@
             .type = val->type->members[i]->type,
             .name = ralloc_asprintf(b, "field%d", i),
             .location = -1,
+            .offset = -1,
          };
       }
 
@@ -1200,41 +1282,73 @@
       break;
    }
 
-   case SpvOpTypePointer: {
-      SpvStorageClass storage_class = w[2];
-      struct vtn_type *deref_type =
-         vtn_value(b, w[3], vtn_value_type_type)->type;
-
-      val->type->base_type = vtn_base_type_pointer;
-      val->type->storage_class = storage_class;
-      val->type->deref = deref_type;
+   case SpvOpTypePointer:
+   case SpvOpTypeForwardPointer: {
+      /* We can't blindly push the value because it might be a forward
+       * declaration.
+       */
+      val = vtn_untyped_value(b, w[1]);
 
-      if (storage_class == SpvStorageClassUniform ||
-          storage_class == SpvStorageClassStorageBuffer) {
-         /* These can actually be stored to nir_variables and used as SSA
-          * values so they need a real glsl_type.
-          */
-         val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
-      }
+      SpvStorageClass storage_class = w[2];
 
-      if (storage_class == SpvStorageClassPushConstant) {
-         /* These can actually be stored to nir_variables and used as SSA
-          * values so they need a real glsl_type.
-          */
-         val->type->type = glsl_uint_type();
-      }
+      if (val->value_type == vtn_value_type_invalid) {
+         val->value_type = vtn_value_type_type;
+         val->type = rzalloc(b, struct vtn_type);
+         val->type->id = w[1];
+         val->type->base_type = vtn_base_type_pointer;
+         val->type->storage_class = storage_class;
 
-      if (storage_class == SpvStorageClassWorkgroup &&
-          b->options->lower_workgroup_access_to_offsets) {
-         uint32_t size, align;
-         val->type->deref = vtn_type_layout_std430(b, val->type->deref,
-                                                   &size, &align);
-         val->type->length = size;
-         val->type->align = align;
          /* These can actually be stored to nir_variables and used as SSA
           * values so they need a real glsl_type.
           */
-         val->type->type = glsl_uint_type();
+         switch (storage_class) {
+         case SpvStorageClassUniform:
+            val->type->type = b->options->ubo_ptr_type;
+            break;
+         case SpvStorageClassStorageBuffer:
+            val->type->type = b->options->ssbo_ptr_type;
+            break;
+         case SpvStorageClassPhysicalStorageBufferEXT:
+            val->type->type = b->options->phys_ssbo_ptr_type;
+            break;
+         case SpvStorageClassPushConstant:
+            val->type->type = b->options->push_const_ptr_type;
+            break;
+         case SpvStorageClassWorkgroup:
+            val->type->type = b->options->shared_ptr_type;
+            break;
+         default:
+            /* In this case, no variable pointers are allowed so all deref
+             * chains are complete back to the variable and it doesn't matter
+             * what type gets used so we leave it NULL.
+             */
+            break;
+         }
+      } else {
+         vtn_fail_if(val->type->storage_class != storage_class,
+                     "The storage classes of an OpTypePointer and any "
+                     "OpTypeForwardPointers that provide forward "
+                     "declarations of it must match.");
+      }
+
+      if (opcode == SpvOpTypePointer) {
+         vtn_fail_if(val->type->deref != NULL,
+                     "While OpTypeForwardPointer can be used to provide a "
+                     "forward declaration of a pointer, OpTypePointer can "
+                     "only be used once for a given id.");
+
+         val->type->deref = vtn_value(b, w[3], vtn_value_type_type)->type;
+
+         vtn_foreach_decoration(b, val, array_stride_decoration_cb, NULL);
+
+         if (storage_class == SpvStorageClassWorkgroup &&
+             b->options->lower_workgroup_access_to_offsets) {
+            uint32_t size, align;
+            val->type->deref = vtn_type_layout_std430(b, val->type->deref,
+                                                      &size, &align);
+            val->type->length = size;
+            val->type->align = align;
+         }
       }
       break;
    }
@@ -1327,6 +1441,17 @@
    }
 
    vtn_foreach_decoration(b, val, type_decoration_cb, NULL);
+
+   if (val->type->base_type == vtn_base_type_struct &&
+       (val->type->block || val->type->buffer_block)) {
+      for (unsigned i = 0; i < val->type->length; i++) {
+         vtn_fail_if(vtn_type_contains_block(b, val->type->members[i]),
+                     "Block and BufferBlock decorations cannot decorate a "
+                     "structure type that is nested at any level inside "
+                     "another structure type decorated with Block or "
+                     "BufferBlock.");
+      }
+   }
 }
 
 static nir_constant *
@@ -1467,7 +1592,7 @@
           opcode == SpvOpSpecConstantFalse)
          int_val = get_specialization(b, val, int_val);
 
-      val->constant->values[0].u32[0] = int_val ? NIR_TRUE : NIR_FALSE;
+      val->constant->values[0].b[0] = int_val != 0;
       break;
    }
 
@@ -1560,6 +1685,9 @@
             case 8:
                val->constant->values[0].u8[i] = elems[i]->values[0].u8[0];
                break;
+            case 1:
+               val->constant->values[0].b[i] = elems[i]->values[0].b[0];
+               break;
             default:
                vtn_fail("Invalid SpvOpConstantComposite bit size");
             }
@@ -1733,6 +1861,9 @@
                   case 8:
                      val->constant->values[0].u8[i] = (*c)->values[col].u8[elem + i];
                      break;
+                  case 1:
+                     val->constant->values[0].b[i] = (*c)->values[col].b[elem + i];
+                     break;
                   default:
                      vtn_fail("Invalid SpvOpCompositeExtract bit size");
                   }
@@ -1760,6 +1891,9 @@
                   case 8:
                      (*c)->values[col].u8[elem + i] = insert->constant->values[0].u8[i];
                      break;
+                  case 1:
+                     (*c)->values[col].b[elem + i] = insert->constant->values[0].b[i];
+                     break;
                   default:
                      vtn_fail("Invalid SpvOpCompositeInsert bit size");
                   }
@@ -2021,6 +2155,7 @@
    case nir_texop_txl:
    case nir_texop_txd:
    case nir_texop_tg4:
+   case nir_texop_lod:
       /* These operations require a sampler */
       p->src = nir_src_for_ssa(&sampler->dest.ssa);
       p->src_type = nir_tex_src_sampler_deref;
@@ -2029,7 +2164,6 @@
    case nir_texop_txf:
    case nir_texop_txf_ms:
    case nir_texop_txs:
-   case nir_texop_lod:
    case nir_texop_query_levels:
    case nir_texop_texture_samples:
    case nir_texop_samples_identical:
@@ -2173,6 +2307,13 @@
          texop = nir_texop_txf_ms;
          (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index);
       }
+
+      if (operands & SpvImageOperandsMinLodMask) {
+         vtn_assert(texop == nir_texop_tex ||
+                    texop == nir_texop_txb ||
+                    texop == nir_texop_txd);
+         (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_min_lod);
+      }
    }
    /* We should have now consumed exactly all of the arguments */
    vtn_assert(idx == count);
@@ -2703,23 +2844,39 @@
          unreachable("Invalid SPIR-V atomic");
 
       }
-   } else if (ptr->mode == vtn_variable_mode_workgroup &&
-              !b->options->lower_workgroup_access_to_offsets) {
-      nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
-      const struct glsl_type *deref_type = deref->type;
-      nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
+   } else if (vtn_pointer_uses_ssa_offset(b, ptr)) {
+      nir_ssa_def *offset, *index;
+      offset = vtn_pointer_to_offset(b, ptr, &index);
+
+      nir_intrinsic_op op;
+      if (ptr->mode == vtn_variable_mode_ssbo) {
+         op = get_ssbo_nir_atomic_op(b, opcode);
+      } else {
+         vtn_assert(ptr->mode == vtn_variable_mode_workgroup &&
+                    b->options->lower_workgroup_access_to_offsets);
+         op = get_shared_nir_atomic_op(b, opcode);
+      }
+
       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
-      atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 
+      int src = 0;
       switch (opcode) {
       case SpvOpAtomicLoad:
-         atomic->num_components = glsl_get_vector_elements(deref_type);
+         atomic->num_components = glsl_get_vector_elements(ptr->type->type);
+         nir_intrinsic_set_align(atomic, 4, 0);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicStore:
-         atomic->num_components = glsl_get_vector_elements(deref_type);
+         atomic->num_components = glsl_get_vector_elements(ptr->type->type);
          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
-         atomic->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+         nir_intrinsic_set_align(atomic, 4, 0);
+         atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicExchange:
@@ -2736,44 +2893,31 @@
       case SpvOpAtomicAnd:
       case SpvOpAtomicOr:
       case SpvOpAtomicXor:
-         fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
+         fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
          break;
 
       default:
          vtn_fail("Invalid SPIR-V atomic");
-
       }
    } else {
-      nir_ssa_def *offset, *index;
-      offset = vtn_pointer_to_offset(b, ptr, &index);
-
-      nir_intrinsic_op op;
-      if (ptr->mode == vtn_variable_mode_ssbo) {
-         op = get_ssbo_nir_atomic_op(b, opcode);
-      } else {
-         vtn_assert(ptr->mode == vtn_variable_mode_workgroup &&
-                    b->options->lower_workgroup_access_to_offsets);
-         op = get_shared_nir_atomic_op(b, opcode);
-      }
-
+      nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
+      const struct glsl_type *deref_type = deref->type;
+      nir_intrinsic_op op = get_deref_nir_atomic_op(b, opcode);
       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
+      atomic->src[0] = nir_src_for_ssa(&deref->dest.ssa);
 
-      int src = 0;
       switch (opcode) {
       case SpvOpAtomicLoad:
-         atomic->num_components = glsl_get_vector_elements(ptr->type->type);
-         if (ptr->mode == vtn_variable_mode_ssbo)
-            atomic->src[src++] = nir_src_for_ssa(index);
-         atomic->src[src++] = nir_src_for_ssa(offset);
+         atomic->num_components = glsl_get_vector_elements(deref_type);
          break;
 
       case SpvOpAtomicStore:
-         atomic->num_components = glsl_get_vector_elements(ptr->type->type);
+         atomic->num_components = glsl_get_vector_elements(deref_type);
          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
-         atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
-         if (ptr->mode == vtn_variable_mode_ssbo)
-            atomic->src[src++] = nir_src_for_ssa(index);
-         atomic->src[src++] = nir_src_for_ssa(offset);
+         atomic->src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
          break;
 
       case SpvOpAtomicExchange:
@@ -2790,10 +2934,7 @@
       case SpvOpAtomicAnd:
       case SpvOpAtomicOr:
       case SpvOpAtomicXor:
-         if (ptr->mode == vtn_variable_mode_ssbo)
-            atomic->src[src++] = nir_src_for_ssa(index);
-         atomic->src[src++] = nir_src_for_ssa(offset);
-         fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
+         fill_common_atomic_sources(b, opcode, w, &atomic->src[1]);
          break;
 
       default:
@@ -2904,12 +3045,7 @@
 vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
                            nir_ssa_def *index)
 {
-   nir_ssa_def *dest = vtn_vector_extract(b, src, 0);
-   for (unsigned i = 1; i < src->num_components; i++)
-      dest = nir_bcsel(&b->nb, nir_ieq_imm(&b->nb, index, i),
-                       vtn_vector_extract(b, src, i), dest);
-
-   return dest;
+   return nir_vector_extract(&b->nb, src, nir_i2i(&b->nb, index, 32));
 }
 
 nir_ssa_def *
@@ -3322,6 +3458,8 @@
       return MESA_SHADER_FRAGMENT;
    case SpvExecutionModelGLCompute:
       return MESA_SHADER_COMPUTE;
+   case SpvExecutionModelKernel:
+      return MESA_SHADER_KERNEL;
    default:
       vtn_fail("Unsupported execution model");
    }
@@ -3419,15 +3557,15 @@
       case SpvCapabilityVector16:
       case SpvCapabilityFloat16Buffer:
       case SpvCapabilityFloat16:
-      case SpvCapabilityInt64Atomics:
-      case SpvCapabilityStorageImageMultisample:
-      case SpvCapabilityInt8:
       case SpvCapabilitySparseResidency:
-      case SpvCapabilityMinLod:
          vtn_warn("Unsupported SPIR-V capability: %s",
                   spirv_capability_to_string(cap));
          break;
 
+      case SpvCapabilityMinLod:
+         spv_check_supported(min_lod, cap);
+         break;
+
       case SpvCapabilityAtomicStorage:
          spv_check_supported(atomic_storage, cap);
          break;
@@ -3450,8 +3588,26 @@
          spv_check_supported(geometry_streams, cap);
          break;
 
+      case SpvCapabilityInt64Atomics:
+         spv_check_supported(int64_atomics, cap);
+         break;
+
+      case SpvCapabilityInt8:
+         spv_check_supported(int8, cap);
+         break;
+
+      case SpvCapabilityStorageImageMultisample:
+         spv_check_supported(storage_image_ms, cap);
+         break;
+
       case SpvCapabilityAddresses:
+         spv_check_supported(address, cap);
+         break;
+
       case SpvCapabilityKernel:
+         spv_check_supported(kernel, cap);
+         break;
+
       case SpvCapabilityImageBasic:
       case SpvCapabilityImageReadWrite:
       case SpvCapabilityImageMipmap:
@@ -3523,6 +3679,7 @@
       case SpvCapabilityVariablePointersStorageBuffer:
       case SpvCapabilityVariablePointers:
          spv_check_supported(variable_pointers, cap);
+         b->variable_pointers = true;
          break;
 
       case SpvCapabilityStorageUniformBufferBlock16:
@@ -3560,6 +3717,10 @@
          spv_check_supported(post_depth_coverage, cap);
          break;
 
+      case SpvCapabilityPhysicalStorageBufferAddressesEXT:
+         spv_check_supported(physical_storage_buffer_address, cap);
+         break;
+
       default:
          vtn_fail("Unhandled capability");
       }
@@ -3571,7 +3732,10 @@
       break;
 
    case SpvOpMemoryModel:
-      vtn_assert(w[1] == SpvAddressingModelLogical);
+      vtn_assert(w[1] == SpvAddressingModelLogical ||
+                 (b->options &&
+                  b->options->caps.physical_storage_buffer_address &&
+                  w[1] == SpvAddressingModelPhysicalStorageBuffer64EXT));
       vtn_assert(w[2] == SpvMemoryModelSimple ||
                  w[2] == SpvMemoryModelGLSL450);
       break;
@@ -3657,7 +3821,7 @@
       break;
 
    case SpvExecutionModeLocalSize:
-      vtn_assert(b->shader->info.stage == MESA_SHADER_COMPUTE);
+      vtn_assert(gl_shader_stage_is_compute(b->shader->info.stage));
       b->shader->info.cs.local_size[0] = mode->literals[0];
       b->shader->info.cs.local_size[1] = mode->literals[1];
       b->shader->info.cs.local_size[2] = mode->literals[2];
@@ -3743,9 +3907,16 @@
       break;
 
    case SpvExecutionModeVecTypeHint:
-   case SpvExecutionModeContractionOff:
       break; /* OpenCL */
 
+   case SpvExecutionModeContractionOff:
+      if (b->shader->info.stage != MESA_SHADER_KERNEL)
+         vtn_warn("ExectionMode only allowed for CL-style kernels: %s",
+                  spirv_executionmode_to_string(mode->exec_mode));
+      else
+         b->exact = true;
+      break;
+
    case SpvExecutionModeStencilRefReplacingEXT:
       vtn_assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
       break;
@@ -3798,6 +3969,7 @@
    case SpvOpTypeStruct:
    case SpvOpTypeOpaque:
    case SpvOpTypePointer:
+   case SpvOpTypeForwardPointer:
    case SpvOpTypeFunction:
    case SpvOpTypeEvent:
    case SpvOpTypeDeviceEvent:
@@ -3865,6 +4037,8 @@
    case SpvOpPtrAccessChain:
    case SpvOpInBoundsAccessChain:
    case SpvOpArrayLength:
+   case SpvOpConvertPtrToU:
+   case SpvOpConvertUToPtr:
       vtn_handle_variables(b, opcode, w, count);
       break;
 
@@ -4021,8 +4195,6 @@
    case SpvOpSConvert:
    case SpvOpFConvert:
    case SpvOpQuantizeToF16:
-   case SpvOpConvertPtrToU:
-   case SpvOpConvertUToPtr:
    case SpvOpPtrCastToGeneric:
    case SpvOpGenericCastToPtr:
    case SpvOpBitcast:
@@ -4206,6 +4378,15 @@
       goto fail;
    }
 
+   uint16_t generator_id = words[2] >> 16;
+   uint16_t generator_version = words[2];
+
+   /* The first GLSLang version bump actually 1.5 years after #179 was fixed
+    * but this should at least let us shut the workaround off for modern
+    * versions of GLSLang.
+    */
+   b->wa_glslang_179 = (generator_id == 8 && generator_version == 1);
+
    /* words[2] == generator magic */
    unsigned value_id_bound = words[3];
    if (words[4] != 0) {
@@ -4287,8 +4468,7 @@
       progress = false;
       foreach_list_typed(struct vtn_function, func, node, &b->functions) {
          if (func->referenced && !func->emitted) {
-            b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
-                                                     _mesa_key_pointer_equal);
+            b->const_table = _mesa_pointer_hash_table_create(b);
 
             vtn_function_emit(b, func, vtn_handle_body_instruction);
             progress = true;
@@ -4296,15 +4476,31 @@
       }
    } while (progress);
 
+   vtn_assert(b->entry_point->value_type == vtn_value_type_function);
+   nir_function *entry_point = b->entry_point->func->impl->function;
+   vtn_assert(entry_point);
+
+   entry_point->is_entrypoint = true;
+
+   /* When multiple shader stages exist in the same SPIR-V module, we
+    * generate input and output variables for every stage, in the same
+    * NIR program.  These dead variables can be invalid NIR.  For example,
+    * TCS outputs must be per-vertex arrays (or decorated 'patch'), while
+    * VS output variables wouldn't be.
+    *
+    * To ensure we have valid NIR, we eliminate any dead inputs and outputs
+    * right away.  In order to do so, we must lower any constant initializers
+    * on outputs so nir_remove_dead_variables sees that they're written to.
+    */
+   nir_lower_constant_initializers(b->shader, nir_var_shader_out);
+   nir_remove_dead_variables(b->shader,
+                             nir_var_shader_in | nir_var_shader_out);
+
    /* We sometimes generate bogus derefs that, while never used, give the
     * validator a bit of heartburn.  Run dead code to get rid of them.
     */
    nir_opt_dce(b->shader);
 
-   vtn_assert(b->entry_point->value_type == vtn_value_type_function);
-   nir_function *entry_point = b->entry_point->func->impl->function;
-   vtn_assert(entry_point);
-
    /* Unparent the shader from the vtn_builder before we delete the builder */
    ralloc_steal(NULL, b->shader);
 
diff -Nru mesa-18.3.3/src/compiler/spirv/vtn_alu.c mesa-19.0.1/src/compiler/spirv/vtn_alu.c
--- mesa-18.3.3/src/compiler/spirv/vtn_alu.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/vtn_alu.c	2019-03-31 23:16:37.000000000 +0000
@@ -211,81 +211,6 @@
    }
 }
 
-static void
-vtn_handle_bitcast(struct vtn_builder *b, struct vtn_ssa_value *dest,
-                   struct nir_ssa_def *src)
-{
-   if (glsl_get_vector_elements(dest->type) == src->num_components) {
-      /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
-       *
-       * "If Result Type has the same number of components as Operand, they
-       * must also have the same component width, and results are computed per
-       * component."
-       */
-      dest->def = nir_imov(&b->nb, src);
-      return;
-   }
-
-   /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
-    *
-    * "If Result Type has a different number of components than Operand, the
-    * total number of bits in Result Type must equal the total number of bits
-    * in Operand. Let L be the type, either Result Type or Operand’s type, that
-    * has the larger number of components. Let S be the other type, with the
-    * smaller number of components. The number of components in L must be an
-    * integer multiple of the number of components in S. The first component
-    * (that is, the only or lowest-numbered component) of S maps to the first
-    * components of L, and so on, up to the last component of S mapping to the
-    * last components of L. Within this mapping, any single component of S
-    * (mapping to multiple components of L) maps its lower-ordered bits to the
-    * lower-numbered components of L."
-    */
-   unsigned src_bit_size = src->bit_size;
-   unsigned dest_bit_size = glsl_get_bit_size(dest->type);
-   unsigned src_components = src->num_components;
-   unsigned dest_components = glsl_get_vector_elements(dest->type);
-   vtn_assert(src_bit_size * src_components == dest_bit_size * dest_components);
-
-   nir_ssa_def *dest_chan[NIR_MAX_VEC_COMPONENTS];
-   if (src_bit_size > dest_bit_size) {
-      vtn_assert(src_bit_size % dest_bit_size == 0);
-      unsigned divisor = src_bit_size / dest_bit_size;
-      for (unsigned comp = 0; comp < src_components; comp++) {
-         nir_ssa_def *split;
-         if (src_bit_size == 64) {
-            assert(dest_bit_size == 32 || dest_bit_size == 16);
-            split = dest_bit_size == 32 ?
-               nir_unpack_64_2x32(&b->nb, nir_channel(&b->nb, src, comp)) :
-               nir_unpack_64_4x16(&b->nb, nir_channel(&b->nb, src, comp));
-         } else {
-            vtn_assert(src_bit_size == 32);
-            vtn_assert(dest_bit_size == 16);
-            split = nir_unpack_32_2x16(&b->nb, nir_channel(&b->nb, src, comp));
-         }
-         for (unsigned i = 0; i < divisor; i++)
-            dest_chan[divisor * comp + i] = nir_channel(&b->nb, split, i);
-      }
-   } else {
-      vtn_assert(dest_bit_size % src_bit_size == 0);
-      unsigned divisor = dest_bit_size / src_bit_size;
-      for (unsigned comp = 0; comp < dest_components; comp++) {
-         unsigned channels = ((1 << divisor) - 1) << (comp * divisor);
-         nir_ssa_def *src_chan = nir_channels(&b->nb, src, channels);
-         if (dest_bit_size == 64) {
-            assert(src_bit_size == 32 || src_bit_size == 16);
-            dest_chan[comp] = src_bit_size == 32 ?
-               nir_pack_64_2x32(&b->nb, src_chan) :
-               nir_pack_64_4x16(&b->nb, src_chan);
-         } else {
-            vtn_assert(dest_bit_size == 32);
-            vtn_assert(src_bit_size == 16);
-            dest_chan[comp] = nir_pack_32_2x16(&b->nb, src_chan);
-         }
-      }
-   }
-   dest->def = nir_vec(&b->nb, dest_chan, dest_components);
-}
-
 nir_op
 vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
                                 SpvOp opcode, bool *swap,
@@ -470,7 +395,7 @@
    if (glsl_type_is_matrix(vtn_src[0]->type) ||
        (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
       vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
-      b->nb.exact = false;
+      b->nb.exact = b->exact;
       return;
    }
 
@@ -633,7 +558,31 @@
    }
 
    case SpvOpBitcast:
-      vtn_handle_bitcast(b, val->ssa, src[0]);
+      /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
+       *
+       *    "If Result Type has the same number of components as Operand, they
+       *    must also have the same component width, and results are computed
+       *    per component.
+       *
+       *    If Result Type has a different number of components than Operand,
+       *    the total number of bits in Result Type must equal the total
+       *    number of bits in Operand. Let L be the type, either Result Type
+       *    or Operand’s type, that has the larger number of components. Let S
+       *    be the other type, with the smaller number of components. The
+       *    number of components in L must be an integer multiple of the
+       *    number of components in S.  The first component (that is, the only
+       *    or lowest-numbered component) of S maps to the first components of
+       *    L, and so on, up to the last component of S mapping to the last
+       *    components of L. Within this mapping, any single component of S
+       *    (mapping to multiple components of L) maps its lower-ordered bits
+       *    to the lower-numbered components of L."
+       */
+      vtn_fail_if(src[0]->num_components * src[0]->bit_size !=
+                  glsl_get_vector_elements(type) * glsl_get_bit_size(type),
+                  "Source and destination of OpBitcast must have the same "
+                  "total number of bits");
+      val->ssa->def = nir_bitcast_vector(&b->nb, src[0],
+                                         glsl_get_bit_size(type));
       break;
 
    case SpvOpFConvert: {
@@ -712,5 +661,5 @@
    } /* default */
    }
 
-   b->nb.exact = false;
+   b->nb.exact = b->exact;
 }
diff -Nru mesa-18.3.3/src/compiler/spirv/vtn_cfg.c mesa-19.0.1/src/compiler/spirv/vtn_cfg.c
--- mesa-18.3.3/src/compiler/spirv/vtn_cfg.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/vtn_cfg.c	2019-03-31 23:16:37.000000000 +0000
@@ -194,7 +194,9 @@
    struct vtn_type *ret_type = vtn_callee->type->return_type;
    if (ret_type->base_type != vtn_base_type_void) {
       nir_variable *ret_tmp =
-         nir_local_variable_create(b->nb.impl, ret_type->type, "return_tmp");
+         nir_local_variable_create(b->nb.impl,
+                                   glsl_get_bare_type(ret_type->type),
+                                   "return_tmp");
       ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
       call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
    }
@@ -285,6 +287,7 @@
       b->func->impl = nir_function_impl_create(func);
       nir_builder_init(&b->nb, func->impl);
       b->nb.cursor = nir_before_cf_list(&b->func->impl->body);
+      b->nb.exact = b->exact;
 
       b->func_param_idx = 0;
 
@@ -850,6 +853,30 @@
    }
 }
 
+static nir_ssa_def *
+vtn_switch_case_condition(struct vtn_builder *b, struct vtn_switch *swtch,
+                          nir_ssa_def *sel, struct vtn_case *cse)
+{
+   if (cse->is_default) {
+      nir_ssa_def *any = nir_imm_false(&b->nb);
+      list_for_each_entry(struct vtn_case, other, &swtch->cases, link) {
+         if (other->is_default)
+            continue;
+
+         any = nir_ior(&b->nb, any,
+                       vtn_switch_case_condition(b, swtch, sel, other));
+      }
+      return nir_inot(&b->nb, any);
+   } else {
+      nir_ssa_def *cond = nir_imm_false(&b->nb);
+      util_dynarray_foreach(&cse->values, uint64_t, val) {
+         nir_ssa_def *imm = nir_imm_intN_t(&b->nb, *val, sel->bit_size);
+         cond = nir_ior(&b->nb, cond, nir_ieq(&b->nb, sel, imm));
+      }
+      return cond;
+   }
+}
+
 static void
 vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
                  nir_variable *switch_fall_var, bool *has_switch_break,
@@ -878,9 +905,11 @@
                         vtn_base_type_void,
                         "Return with a value from a function returning void");
             struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
+            const struct glsl_type *ret_type =
+               glsl_get_bare_type(b->func->type->return_type->type);
             nir_deref_instr *ret_deref =
                nir_build_deref_cast(&b->nb, nir_load_param(&b->nb, 0),
-                                    nir_var_local, src->type);
+                                    nir_var_function_temp, ret_type, 0);
             vtn_local_store(b, src, ret_deref);
          }
 
@@ -974,46 +1003,13 @@
             nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall");
          nir_store_var(&b->nb, fall_var, nir_imm_false(&b->nb), 1);
 
-         /* Next, we gather up all of the conditions.  We have to do this
-          * up-front because we also need to build an "any" condition so
-          * that we can use !any for default.
-          */
-         const int num_cases = list_length(&vtn_switch->cases);
-         NIR_VLA(nir_ssa_def *, conditions, num_cases);
-
          nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def;
-         /* An accumulation of all conditions.  Used for the default */
-         nir_ssa_def *any = NULL;
-
-         int i = 0;
-         list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
-            if (cse->is_default) {
-               conditions[i++] = NULL;
-               continue;
-            }
-
-            nir_ssa_def *cond = NULL;
-            util_dynarray_foreach(&cse->values, uint64_t, val) {
-               nir_ssa_def *imm = nir_imm_intN_t(&b->nb, *val, sel->bit_size);
-               nir_ssa_def *is_val = nir_ieq(&b->nb, sel, imm);
-
-               cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val;
-            }
-
-            any = any ? nir_ior(&b->nb, any, cond) : cond;
-            conditions[i++] = cond;
-         }
-         vtn_assert(i == num_cases);
 
          /* Now we can walk the list of cases and actually emit code */
-         i = 0;
          list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
             /* Figure out the condition */
-            nir_ssa_def *cond = conditions[i++];
-            if (cse->is_default) {
-               vtn_assert(cond == NULL);
-               cond = nir_inot(&b->nb, any);
-            }
+            nir_ssa_def *cond =
+               vtn_switch_case_condition(b, vtn_switch, sel, cse);
             /* Take fallthrough into account */
             cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var));
 
@@ -1026,7 +1022,6 @@
 
             nir_pop_if(&b->nb, case_if);
          }
-         vtn_assert(i == num_cases);
 
          break;
       }
@@ -1044,15 +1039,17 @@
    nir_builder_init(&b->nb, func->impl);
    b->func = func;
    b->nb.cursor = nir_after_cf_list(&func->impl->body);
+   b->nb.exact = b->exact;
    b->has_loop_continue = false;
-   b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
-                                          _mesa_key_pointer_equal);
+   b->phi_table = _mesa_pointer_hash_table_create(b);
 
    vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler);
 
    vtn_foreach_instruction(b, func->start_block->label, func->end,
                            vtn_handle_phi_second_pass);
 
+   nir_rematerialize_derefs_in_use_blocks_impl(func->impl);
+
    /* Continue blocks for loops get inserted before the body of the loop
     * but instructions in the continue may use SSA defs in the loop body.
     * Therefore, we need to repair SSA to insert the needed phi nodes.
diff -Nru mesa-18.3.3/src/compiler/spirv/vtn_glsl450.c mesa-19.0.1/src/compiler/spirv/vtn_glsl450.c
--- mesa-18.3.3/src/compiler/spirv/vtn_glsl450.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/vtn_glsl450.c	2019-03-31 23:16:37.000000000 +0000
@@ -177,7 +177,7 @@
 static nir_ssa_def *
 build_exp(nir_builder *b, nir_ssa_def *x)
 {
-   return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
+   return nir_fexp2(b, nir_fmul_imm(b, x, M_LOG2E));
 }
 
 /**
@@ -186,7 +186,7 @@
 static nir_ssa_def *
 build_log(nir_builder *b, nir_ssa_def *x)
 {
-   return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
+   return nir_fmul_imm(b, nir_flog2(b, x), 1.0 / M_LOG2E);
 }
 
 /**
@@ -202,17 +202,36 @@
 static nir_ssa_def *
 build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
 {
+   if (x->bit_size == 16) {
+      /* The polynomial approximation isn't precise enough to meet half-float
+       * precision requirements. Alternatively, we could implement this using
+       * the formula:
+       *
+       * asin(x) = atan2(x, sqrt(1 - x*x))
+       *
+       * But that is very expensive, so instead we just do the polynomial
+       * approximation in 32-bit math and then we convert the result back to
+       * 16-bit.
+       */
+      return nir_f2f16(b, build_asin(b, nir_f2f32(b, x), p0, p1));
+   }
+
+   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, x->bit_size);
    nir_ssa_def *abs_x = nir_fabs(b, x);
+
+   nir_ssa_def *p0_plus_xp1 = nir_fadd_imm(b, nir_fmul_imm(b, abs_x, p1), p0);
+
+   nir_ssa_def *expr_tail =
+      nir_fadd_imm(b, nir_fmul(b, abs_x,
+                                  nir_fadd_imm(b, nir_fmul(b, abs_x,
+                                                               p0_plus_xp1),
+                                                  M_PI_4f - 1.0f)),
+                      M_PI_2f);
+
    return nir_fmul(b, nir_fsign(b, x),
-                   nir_fsub(b, nir_imm_float(b, M_PI_2f),
-                            nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
-                                     nir_fadd(b, nir_imm_float(b, M_PI_2f),
-                                              nir_fmul(b, abs_x,
-                                                       nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
-                                                                nir_fmul(b, abs_x,
-                                                                         nir_fadd(b, nir_imm_float(b, p0),
-                                                                                  nir_fmul(b, abs_x,
-                                                                                           nir_imm_float(b, p1))))))))));
+                      nir_fsub(b, nir_imm_floatN_t(b, M_PI_2f, x->bit_size),
+                                  nir_fmul(b, nir_fsqrt(b, nir_fsub(b, one, abs_x)),
+                                                           expr_tail)));
 }
 
 /**
@@ -232,8 +251,10 @@
 static nir_ssa_def *
 build_atan(nir_builder *b, nir_ssa_def *y_over_x)
 {
+   const uint32_t bit_size = y_over_x->bit_size;
+
    nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
-   nir_ssa_def *one = nir_imm_float(b, 1.0f);
+   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, bit_size);
 
    /*
     * range-reduction, first step:
@@ -260,12 +281,12 @@
    nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
 
    nir_ssa_def *polynomial_terms[] = {
-      nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
-      nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
-      nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
-      nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
-      nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
-      nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
+      nir_fmul_imm(b, x,     0.9999793128310355f),
+      nir_fmul_imm(b, x_3,  -0.3326756418091246f),
+      nir_fmul_imm(b, x_5,   0.1938924977115610f),
+      nir_fmul_imm(b, x_7,  -0.1173503194786851f),
+      nir_fmul_imm(b, x_9,   0.0536813784310406f),
+      nir_fmul_imm(b, x_11, -0.0121323213173444f),
    };
 
    nir_ssa_def *tmp =
@@ -273,11 +294,8 @@
 
    /* range-reduction fixup */
    tmp = nir_fadd(b, tmp,
-                  nir_fmul(b,
-                           nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
-                           nir_fadd(b, nir_fmul(b, tmp,
-                                                nir_imm_float(b, -2.0f)),
-                                       nir_imm_float(b, M_PI_2f))));
+                  nir_fmul(b, nir_b2f(b, nir_flt(b, one, abs_y_over_x), bit_size),
+                           nir_fadd_imm(b, nir_fmul_imm(b, tmp, -2.0f), M_PI_2f)));
 
    /* sign fixup */
    return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
@@ -286,8 +304,11 @@
 static nir_ssa_def *
 build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
 {
-   nir_ssa_def *zero = nir_imm_float(b, 0);
-   nir_ssa_def *one = nir_imm_float(b, 1);
+   assert(y->bit_size == x->bit_size);
+   const uint32_t bit_size = x->bit_size;
+
+   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, bit_size);
+   nir_ssa_def *one = nir_imm_floatN_t(b, 1, bit_size);
 
    /* If we're on the left half-plane rotate the coordinates π/2 clock-wise
     * for the y=0 discontinuity to end up aligned with the vertical
@@ -317,9 +338,10 @@
     * floating point representations with at least the dynamic range of ATI's
     * 24-bit representation.
     */
-   nir_ssa_def *huge = nir_imm_float(b, 1e18f);
+   const double huge_val = bit_size >= 32 ? 1e18 : 16384;
+   nir_ssa_def *huge = nir_imm_floatN_t(b,  huge_val, bit_size);
    nir_ssa_def *scale = nir_bcsel(b, nir_fge(b, nir_fabs(b, t), huge),
-                                  nir_imm_float(b, 0.25), one);
+                                  nir_imm_floatN_t(b, 0.25, bit_size), one);
    nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale));
    nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t);
 
@@ -346,9 +368,9 @@
    /* Calculate the arctangent and fix up the result if we had flipped the
     * coordinate system.
     */
-   nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip),
-                                           nir_imm_float(b, M_PI_2f)),
-                               build_atan(b, tan));
+   nir_ssa_def *arc =
+      nir_fadd(b, nir_fmul_imm(b, nir_b2f(b, flip, bit_size), M_PI_2f),
+                  build_atan(b, tan));
 
    /* Rather convoluted calculation of the sign of the result.  When x < 0 we
     * cannot use fsign because we need to be able to distinguish between
@@ -364,6 +386,45 @@
 }
 
 static nir_ssa_def *
+build_frexp16(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
+{
+   assert(x->bit_size == 16);
+
+   nir_ssa_def *abs_x = nir_fabs(b, x);
+   nir_ssa_def *zero = nir_imm_floatN_t(b, 0, 16);
+
+   /* Half-precision floating-point values are stored as
+    *   1 sign bit;
+    *   5 exponent bits;
+    *   10 mantissa bits.
+    *
+    * An exponent shift of 10 will shift the mantissa out, leaving only the
+    * exponent and sign bit (which itself may be zero, if the absolute value
+    * was taken before the bitcast and shift).
+    */
+   nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
+   nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);
+
+   nir_ssa_def *sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
+
+   /* Exponent of floating-point values in the range [0.5, 1.0). */
+   nir_ssa_def *exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
+
+   nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
+
+   /* Significand return must be of the same type as the input, but the
+    * exponent must be a 32-bit integer.
+    */
+   *exponent =
+      nir_i2i32(b,
+                nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
+                            nir_bcsel(b, is_not_zero, exponent_bias, zero)));
+
+   return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
+                     nir_bcsel(b, is_not_zero, exponent_value, zero));
+}
+
+static nir_ssa_def *
 build_frexp32(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
 {
    nir_ssa_def *abs_x = nir_fabs(b, x);
@@ -646,17 +707,17 @@
    case GLSLstd450Sinh:
       /* 0.5 * (e^x - e^(-x)) */
       val->ssa->def =
-         nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                      nir_fsub(nb, build_exp(nb, src[0]),
-                                   build_exp(nb, nir_fneg(nb, src[0]))));
+         nir_fmul_imm(nb, nir_fsub(nb, build_exp(nb, src[0]),
+                                       build_exp(nb, nir_fneg(nb, src[0]))),
+                          0.5f);
       return;
 
    case GLSLstd450Cosh:
       /* 0.5 * (e^x + e^(-x)) */
       val->ssa->def =
-         nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                      nir_fadd(nb, build_exp(nb, src[0]),
-                                   build_exp(nb, nir_fneg(nb, src[0]))));
+         nir_fmul_imm(nb, nir_fadd(nb, build_exp(nb, src[0]),
+                                       build_exp(nb, nir_fneg(nb, src[0]))),
+                          0.5f);
       return;
 
    case GLSLstd450Tanh: {
@@ -667,30 +728,38 @@
        * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,
        * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
        * computation e^2x +/- 1 so it can be ignored.
+       *
+       * For 16-bit precision we clamp x to (-inf, +4.2] since the maximum
+       * representable number is only 65,504 and e^(2*6) exceeds that. Also,
+       * if x > 4.2, tanh(x) will return 1.0 in fp16.
        */
-      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
-      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
-      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
-                                   nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
+      const uint32_t bit_size = src[0]->bit_size;
+      const double clamped_x = bit_size > 16 ? 10.0 : 4.2;
+      nir_ssa_def *x = nir_fmin(nb, src[0],
+                                    nir_imm_floatN_t(nb, clamped_x, bit_size));
+      nir_ssa_def *exp2x = build_exp(nb, nir_fmul_imm(nb, x, 2.0));
+      val->ssa->def = nir_fdiv(nb, nir_fadd_imm(nb, exp2x, -1.0),
+                                   nir_fadd_imm(nb, exp2x, 1.0));
       return;
    }
 
    case GLSLstd450Asinh:
       val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
          build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
-                       nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
-                                                  nir_imm_float(nb, 1.0f))))));
+                       nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
+                                                      1.0f)))));
       return;
    case GLSLstd450Acosh:
       val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
-         nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
-                                    nir_imm_float(nb, 1.0f)))));
+         nir_fsqrt(nb, nir_fadd_imm(nb, nir_fmul(nb, src[0], src[0]),
+                                        -1.0f))));
       return;
    case GLSLstd450Atanh: {
-      nir_ssa_def *one = nir_imm_float(nb, 1.0);
-      val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
-         build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
-                                    nir_fsub(nb, one, src[0]))));
+      nir_ssa_def *one = nir_imm_floatN_t(nb, 1.0, src[0]->bit_size);
+      val->ssa->def =
+         nir_fmul_imm(nb, build_log(nb, nir_fdiv(nb, nir_fadd(nb, src[0], one),
+                                        nir_fsub(nb, one, src[0]))),
+                          0.5f);
       return;
    }
 
@@ -699,8 +768,9 @@
       return;
 
    case GLSLstd450Acos:
-      val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
-                               build_asin(nb, src[0], 0.08132463, -0.02363318));
+      val->ssa->def =
+         nir_fsub(nb, nir_imm_floatN_t(nb, M_PI_2f, src[0]->bit_size),
+                      build_asin(nb, src[0], 0.08132463, -0.02363318));
       return;
 
    case GLSLstd450Atan:
@@ -715,8 +785,10 @@
       nir_ssa_def *exponent;
       if (src[0]->bit_size == 64)
          val->ssa->def = build_frexp64(nb, src[0], &exponent);
-      else
+      else if (src[0]->bit_size == 32)
          val->ssa->def = build_frexp32(nb, src[0], &exponent);
+      else
+         val->ssa->def = build_frexp16(nb, src[0], &exponent);
       nir_store_deref(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
       return;
    }
@@ -726,9 +798,12 @@
       if (src[0]->bit_size == 64)
          val->ssa->elems[0]->def = build_frexp64(nb, src[0],
                                                  &val->ssa->elems[1]->def);
-      else
+      else if (src[0]->bit_size == 32)
          val->ssa->elems[0]->def = build_frexp32(nb, src[0],
                                                  &val->ssa->elems[1]->def);
+      else
+         val->ssa->elems[0]->def = build_frexp16(nb, src[0],
+                                                 &val->ssa->elems[1]->def);
       return;
    }
 
diff -Nru mesa-18.3.3/src/compiler/spirv/vtn_private.h mesa-19.0.1/src/compiler/spirv/vtn_private.h
--- mesa-18.3.3/src/compiler/spirv/vtn_private.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/vtn_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -378,6 +378,8 @@
    };
 };
 
+bool vtn_type_contains_block(struct vtn_builder *b, struct vtn_type *type);
+
 bool vtn_types_compatible(struct vtn_builder *b,
                           struct vtn_type *t1, struct vtn_type *t2);
 
@@ -390,7 +392,7 @@
 
 struct vtn_access_link {
    enum vtn_access_mode mode;
-   uint32_t id;
+   int64_t id;
 };
 
 struct vtn_access_chain {
@@ -410,13 +412,15 @@
 };
 
 enum vtn_variable_mode {
-   vtn_variable_mode_local,
-   vtn_variable_mode_global,
+   vtn_variable_mode_function,
+   vtn_variable_mode_private,
    vtn_variable_mode_uniform,
    vtn_variable_mode_ubo,
    vtn_variable_mode_ssbo,
+   vtn_variable_mode_phys_ssbo,
    vtn_variable_mode_push_constant,
    vtn_variable_mode_workgroup,
+   vtn_variable_mode_cross_workgroup,
    vtn_variable_mode_input,
    vtn_variable_mode_output,
 };
@@ -443,20 +447,9 @@
     */
    struct vtn_variable *var;
 
-   /** The deref at the base of the chain
-    *
-    * This field may be NULL if the pointer uses a (block_index, offset) pair
-    * instead of an access chain or if the access chain starts at a variable.
-    */
+   /** The NIR deref corresponding to this pointer */
    nir_deref_instr *deref;
 
-   /** An access chain describing how to get from var to the referenced data
-    *
-    * This field may be NULL if the pointer references the entire variable or
-    * if a (block_index, offset) pair is used instead of an access chain.
-    */
-   struct vtn_access_chain *chain;
-
    /** A (block_index, offset) pair representing a UBO or SSBO position. */
    struct nir_ssa_def *block_index;
    struct nir_ssa_def *offset;
@@ -465,6 +458,9 @@
    enum gl_access_qualifier access;
 };
 
+bool vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
+                                 struct vtn_pointer *ptr);
+
 struct vtn_variable {
    enum vtn_variable_mode mode;
 
@@ -479,6 +475,12 @@
 
    nir_variable *var;
 
+   /* If the variable is a struct with a location set on it then this will be
+    * stored here. This will be used to calculate locations for members that
+    * don’t have their own explicit location.
+    */
+   int base_location;
+
    int shared_location;
 
    /**
@@ -592,11 +594,15 @@
    unsigned value_id_bound;
    struct vtn_value *values;
 
+   /* True if we should watch out for GLSLang issue #179 */
+   bool wa_glslang_179;
+
    gl_shader_stage entry_point_stage;
    const char *entry_point_name;
    struct vtn_value *entry_point;
    bool origin_upper_left;
    bool pixel_center_integer;
+   bool variable_pointers;
 
    struct vtn_function *func;
    struct exec_list functions;
@@ -605,6 +611,9 @@
    unsigned func_param_idx;
 
    bool has_loop_continue;
+
+   /* false by default, set to true by the ContractionOff execution mode */
+   bool exact;
 };
 
 nir_ssa_def *
diff -Nru mesa-18.3.3/src/compiler/spirv/vtn_variables.c mesa-19.0.1/src/compiler/spirv/vtn_variables.c
--- mesa-18.3.3/src/compiler/spirv/vtn_variables.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/compiler/spirv/vtn_variables.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 #include "vtn_private.h"
 #include "spirv_info.h"
 #include "nir_deref.h"
+#include <vulkan/vulkan_core.h>
 
 static struct vtn_access_chain *
 vtn_access_chain_create(struct vtn_builder *b, unsigned length)
@@ -43,27 +44,13 @@
    return chain;
 }
 
-static struct vtn_access_chain *
-vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
-                        unsigned new_ids)
-{
-   struct vtn_access_chain *chain;
-
-   unsigned old_len = old ? old->length : 0;
-   chain = vtn_access_chain_create(b, old_len + new_ids);
-
-   for (unsigned i = 0; i < old_len; i++)
-      chain->link[i] = old->link[i];
-
-   return chain;
-}
-
-static bool
+bool
 vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
                             struct vtn_pointer *ptr)
 {
-   return ptr->mode == vtn_variable_mode_ubo ||
-          ptr->mode == vtn_variable_mode_ssbo ||
+   return ((ptr->mode == vtn_variable_mode_ubo ||
+            ptr->mode == vtn_variable_mode_ssbo) &&
+           b->options->lower_ubo_ssbo_access_to_offsets) ||
           ptr->mode == vtn_variable_mode_push_constant ||
           (ptr->mode == vtn_variable_mode_workgroup &&
            b->options->lower_workgroup_access_to_offsets);
@@ -75,70 +62,39 @@
 {
    return ptr->mode == vtn_variable_mode_ssbo ||
           ptr->mode == vtn_variable_mode_ubo ||
+          ptr->mode == vtn_variable_mode_phys_ssbo ||
           ptr->mode == vtn_variable_mode_push_constant ||
           (ptr->mode == vtn_variable_mode_workgroup &&
            b->options->lower_workgroup_access_to_offsets);
 }
 
-/* Dereference the given base pointer by the access chain */
-static struct vtn_pointer *
-vtn_access_chain_pointer_dereference(struct vtn_builder *b,
-                                     struct vtn_pointer *base,
-                                     struct vtn_access_chain *deref_chain)
-{
-   struct vtn_access_chain *chain =
-      vtn_access_chain_extend(b, base->chain, deref_chain->length);
-   struct vtn_type *type = base->type;
-   enum gl_access_qualifier access = base->access;
-
-   /* OpPtrAccessChain is only allowed on things which support variable
-    * pointers.  For everything else, the client is expected to just pass us
-    * the right access chain.
-    */
-   vtn_assert(!deref_chain->ptr_as_array);
-
-   unsigned start = base->chain ? base->chain->length : 0;
-   for (unsigned i = 0; i < deref_chain->length; i++) {
-      chain->link[start + i] = deref_chain->link[i];
-
-      if (glsl_type_is_struct(type->type)) {
-         vtn_assert(deref_chain->link[i].mode == vtn_access_mode_literal);
-         type = type->members[deref_chain->link[i].id];
-      } else {
-         type = type->array_element;
-      }
-
-      access |= type->access;
-   }
-
-   struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
-   ptr->mode = base->mode;
-   ptr->type = type;
-   ptr->var = base->var;
-   ptr->deref = base->deref;
-   ptr->chain = chain;
-   ptr->access = access;
-
-   return ptr;
-}
-
 static nir_ssa_def *
 vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
-                       unsigned stride)
+                       unsigned stride, unsigned bit_size)
 {
    vtn_assert(stride > 0);
    if (link.mode == vtn_access_mode_literal) {
-      return nir_imm_int(&b->nb, link.id * stride);
-   } else if (stride == 1) {
-       nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def;
-       if (ssa->bit_size != 32)
-          ssa = nir_i2i32(&b->nb, ssa);
-      return ssa;
+      return nir_imm_intN_t(&b->nb, link.id * stride, bit_size);
    } else {
-      nir_ssa_def *src0 = vtn_ssa_value(b, link.id)->def;
-      if (src0->bit_size != 32)
-         src0 = nir_i2i32(&b->nb, src0);
-      return nir_imul(&b->nb, src0, nir_imm_int(&b->nb, stride));
+      nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def;
+      if (ssa->bit_size != bit_size)
+         ssa = nir_i2i(&b->nb, ssa, bit_size);
+      if (stride != 1)
+         ssa = nir_imul_imm(&b->nb, ssa, stride);
+      return ssa;
+   }
+}
+
+static VkDescriptorType
+vk_desc_type_for_mode(struct vtn_builder *b, enum vtn_variable_mode mode)
+{
+   switch (mode) {
+   case vtn_variable_mode_ubo:
+      return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+   case vtn_variable_mode_ssbo:
+      return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+   default:
+      vtn_fail("Invalid mode for vulkan_resource_index");
    }
 }
 
@@ -157,6 +113,7 @@
    instr->src[0] = nir_src_for_ssa(desc_array_index);
    nir_intrinsic_set_desc_set(instr, var->descriptor_set);
    nir_intrinsic_set_binding(instr, var->binding);
+   nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, var->mode));
 
    nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
    nir_builder_instr_insert(&b->nb, &instr->instr);
@@ -165,14 +122,15 @@
 }
 
 static nir_ssa_def *
-vtn_resource_reindex(struct vtn_builder *b, nir_ssa_def *base_index,
-                     nir_ssa_def *offset_index)
+vtn_resource_reindex(struct vtn_builder *b, enum vtn_variable_mode mode,
+                     nir_ssa_def *base_index, nir_ssa_def *offset_index)
 {
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(b->nb.shader,
                                  nir_intrinsic_vulkan_resource_reindex);
    instr->src[0] = nir_src_for_ssa(base_index);
    instr->src[1] = nir_src_for_ssa(offset_index);
+   nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, mode));
 
    nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
    nir_builder_instr_insert(&b->nb, &instr->instr);
@@ -180,6 +138,182 @@
    return &instr->dest.ssa;
 }
 
+static nir_ssa_def *
+vtn_descriptor_load(struct vtn_builder *b, enum vtn_variable_mode mode,
+                    const struct glsl_type *desc_type, nir_ssa_def *desc_index)
+{
+   nir_intrinsic_instr *desc_load =
+      nir_intrinsic_instr_create(b->nb.shader,
+                                 nir_intrinsic_load_vulkan_descriptor);
+   desc_load->src[0] = nir_src_for_ssa(desc_index);
+   desc_load->num_components = glsl_get_vector_elements(desc_type);
+   nir_intrinsic_set_desc_type(desc_load, vk_desc_type_for_mode(b, mode));
+   nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
+                     desc_load->num_components,
+                     glsl_get_bit_size(desc_type), NULL);
+   nir_builder_instr_insert(&b->nb, &desc_load->instr);
+
+   return &desc_load->dest.ssa;
+}
+
+/* Dereference the given base pointer by the access chain */
+static struct vtn_pointer *
+vtn_nir_deref_pointer_dereference(struct vtn_builder *b,
+                                  struct vtn_pointer *base,
+                                  struct vtn_access_chain *deref_chain)
+{
+   struct vtn_type *type = base->type;
+   enum gl_access_qualifier access = base->access;
+   unsigned idx = 0;
+
+   nir_deref_instr *tail;
+   if (base->deref) {
+      tail = base->deref;
+   } else if (vtn_pointer_is_external_block(b, base)) {
+      nir_ssa_def *block_index = base->block_index;
+
+      /* We dereferencing an external block pointer.  Correctness of this
+       * operation relies on one particular line in the SPIR-V spec, section
+       * entitled "Validation Rules for Shader Capabilities":
+       *
+       *    "Block and BufferBlock decorations cannot decorate a structure
+       *    type that is nested at any level inside another structure type
+       *    decorated with Block or BufferBlock."
+       *
+       * This means that we can detect the point where we cross over from
+       * descriptor indexing to buffer indexing by looking for the block
+       * decorated struct type.  Anything before the block decorated struct
+       * type is a descriptor indexing operation and anything after the block
+       * decorated struct is a buffer offset operation.
+       */
+
+      /* Figure out the descriptor array index if any
+       *
+       * Some of the Vulkan CTS tests with hand-rolled SPIR-V have been known
+       * to forget the Block or BufferBlock decoration from time to time.
+       * It's more robust if we check for both !block_index and for the type
+       * to contain a block.  This way there's a decent chance that arrays of
+       * UBOs/SSBOs will work correctly even if variable pointers are
+       * completley toast.
+       */
+      nir_ssa_def *desc_arr_idx = NULL;
+      if (!block_index || vtn_type_contains_block(b, type)) {
+         /* If our type contains a block, then we're still outside the block
+          * and we need to process enough levels of dereferences to get inside
+          * of it.
+          */
+         if (deref_chain->ptr_as_array) {
+            unsigned aoa_size = glsl_get_aoa_size(type->type);
+            desc_arr_idx = vtn_access_link_as_ssa(b, deref_chain->link[idx],
+                                                  MAX2(aoa_size, 1), 32);
+            idx++;
+         }
+
+         for (; idx < deref_chain->length; idx++) {
+            if (type->base_type != vtn_base_type_array) {
+               vtn_assert(type->base_type == vtn_base_type_struct);
+               break;
+            }
+
+            unsigned aoa_size = glsl_get_aoa_size(type->array_element->type);
+            nir_ssa_def *arr_offset =
+               vtn_access_link_as_ssa(b, deref_chain->link[idx],
+                                      MAX2(aoa_size, 1), 32);
+            if (desc_arr_idx)
+               desc_arr_idx = nir_iadd(&b->nb, desc_arr_idx, arr_offset);
+            else
+               desc_arr_idx = arr_offset;
+
+            type = type->array_element;
+            access |= type->access;
+         }
+      }
+
+      if (!block_index) {
+         vtn_assert(base->var && base->type);
+         block_index = vtn_variable_resource_index(b, base->var, desc_arr_idx);
+      } else if (desc_arr_idx) {
+         block_index = vtn_resource_reindex(b, base->mode,
+                                            block_index, desc_arr_idx);
+      }
+
+      if (idx == deref_chain->length) {
+         /* The entire deref was consumed in finding the block index.  Return
+          * a pointer which just has a block index and a later access chain
+          * will dereference deeper.
+          */
+         struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
+         ptr->mode = base->mode;
+         ptr->type = type;
+         ptr->block_index = block_index;
+         ptr->access = access;
+         return ptr;
+      }
+
+      /* If we got here, there's more access chain to handle and we have the
+       * final block index.  Insert a descriptor load and cast to a deref to
+       * start the deref chain.
+       */
+      nir_ssa_def *desc =
+         vtn_descriptor_load(b, base->mode, base->ptr_type->type, block_index);
+
+      assert(base->mode == vtn_variable_mode_ssbo ||
+             base->mode == vtn_variable_mode_ubo);
+      nir_variable_mode nir_mode =
+         base->mode == vtn_variable_mode_ssbo ? nir_var_mem_ssbo : nir_var_mem_ubo;
+
+      tail = nir_build_deref_cast(&b->nb, desc, nir_mode, type->type,
+                                  base->ptr_type->stride);
+   } else {
+      assert(base->var && base->var->var);
+      tail = nir_build_deref_var(&b->nb, base->var->var);
+      if (base->ptr_type && base->ptr_type->type) {
+         tail->dest.ssa.num_components =
+            glsl_get_vector_elements(base->ptr_type->type);
+         tail->dest.ssa.bit_size = glsl_get_bit_size(base->ptr_type->type);
+      }
+   }
+
+   if (idx == 0 && deref_chain->ptr_as_array) {
+      /* We start with a deref cast to get the stride.  Hopefully, we'll be
+       * able to delete that cast eventually.
+       */
+      tail = nir_build_deref_cast(&b->nb, &tail->dest.ssa, tail->mode,
+                                  tail->type, base->ptr_type->stride);
+
+      nir_ssa_def *index = vtn_access_link_as_ssa(b, deref_chain->link[0], 1,
+                                                  tail->dest.ssa.bit_size);
+      tail = nir_build_deref_ptr_as_array(&b->nb, tail, index);
+      idx++;
+   }
+
+   for (; idx < deref_chain->length; idx++) {
+      if (glsl_type_is_struct(type->type)) {
+         vtn_assert(deref_chain->link[idx].mode == vtn_access_mode_literal);
+         unsigned field = deref_chain->link[idx].id;
+         tail = nir_build_deref_struct(&b->nb, tail, field);
+         type = type->members[field];
+      } else {
+         nir_ssa_def *arr_index =
+            vtn_access_link_as_ssa(b, deref_chain->link[idx], 1,
+                                   tail->dest.ssa.bit_size);
+         tail = nir_build_deref_array(&b->nb, tail, arr_index);
+         type = type->array_element;
+      }
+
+      access |= type->access;
+   }
+
+   struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
+   ptr->mode = base->mode;
+   ptr->type = type;
+   ptr->var = base->var;
+   ptr->deref = tail;
+   ptr->access = access;
+
+   return ptr;
+}
+
 static struct vtn_pointer *
 vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
                                    struct vtn_pointer *base,
@@ -199,7 +333,7 @@
          if (glsl_type_is_array(type->type)) {
             if (deref_chain->length >= 1) {
                desc_arr_idx =
-                  vtn_access_link_as_ssa(b, deref_chain->link[0], 1);
+                  vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
                idx++;
                /* This consumes a level of type */
                type = type->array_element;
@@ -215,7 +349,7 @@
          } else if (deref_chain->ptr_as_array) {
             /* You can't have a zero-length OpPtrAccessChain */
             vtn_assert(deref_chain->length >= 1);
-            desc_arr_idx = vtn_access_link_as_ssa(b, deref_chain->link[0], 1);
+            desc_arr_idx = vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
          } else {
             /* We have a regular non-array SSBO. */
             desc_arr_idx = NULL;
@@ -247,10 +381,11 @@
           */
          vtn_assert(deref_chain->length >= 1);
          nir_ssa_def *offset_index =
-            vtn_access_link_as_ssa(b, deref_chain->link[0], 1);
+            vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
          idx++;
 
-         block_index = vtn_resource_reindex(b, block_index, offset_index);
+         block_index = vtn_resource_reindex(b, base->mode,
+                                            block_index, offset_index);
       }
    }
 
@@ -301,7 +436,7 @@
 
       nir_ssa_def *elem_offset =
          vtn_access_link_as_ssa(b, deref_chain->link[idx],
-                                base->ptr_type->stride);
+                                base->ptr_type->stride, offset->bit_size);
       offset = nir_iadd(&b->nb, offset, elem_offset);
       idx++;
    }
@@ -322,7 +457,8 @@
       case GLSL_TYPE_BOOL:
       case GLSL_TYPE_ARRAY: {
          nir_ssa_def *elem_offset =
-            vtn_access_link_as_ssa(b, deref_chain->link[idx], type->stride);
+            vtn_access_link_as_ssa(b, deref_chain->link[idx],
+                                   type->stride, offset->bit_size);
          offset = nir_iadd(&b->nb, offset, elem_offset);
          type = type->array_element;
          access |= type->access;
@@ -332,8 +468,7 @@
       case GLSL_TYPE_STRUCT: {
          vtn_assert(deref_chain->link[idx].mode == vtn_access_mode_literal);
          unsigned member = deref_chain->link[idx].id;
-         nir_ssa_def *mem_offset = nir_imm_int(&b->nb, type->offsets[member]);
-         offset = nir_iadd(&b->nb, offset, mem_offset);
+         offset = nir_iadd_imm(&b->nb, offset, type->offsets[member]);
          type = type->members[member];
          access |= type->access;
          break;
@@ -363,7 +498,7 @@
    if (vtn_pointer_uses_ssa_offset(b, base)) {
       return vtn_ssa_offset_pointer_dereference(b, base, deref_chain);
    } else {
-      return vtn_access_chain_pointer_dereference(b, base, deref_chain);
+      return vtn_nir_deref_pointer_dereference(b, base, deref_chain);
    }
 }
 
@@ -400,7 +535,8 @@
       const struct glsl_type *atomic =
          repair_atomic_type(glsl_get_array_element(type));
 
-      return glsl_array_type(atomic, glsl_get_length(type));
+      return glsl_array_type(atomic, glsl_get_length(type),
+                             glsl_get_explicit_stride(type));
    } else {
       return glsl_atomic_uint_type();
    }
@@ -409,43 +545,21 @@
 nir_deref_instr *
 vtn_pointer_to_deref(struct vtn_builder *b, struct vtn_pointer *ptr)
 {
-   /* Do on-the-fly copy propagation for samplers. */
-   if (ptr->var && ptr->var->copy_prop_sampler)
-      return vtn_pointer_to_deref(b, ptr->var->copy_prop_sampler);
-
-   nir_deref_instr *tail;
-   if (ptr->deref) {
-      tail = ptr->deref;
-   } else {
-      assert(ptr->var && ptr->var->var);
-      tail = nir_build_deref_var(&b->nb, ptr->var->var);
+   if (b->wa_glslang_179) {
+      /* Do on-the-fly copy propagation for samplers. */
+      if (ptr->var && ptr->var->copy_prop_sampler)
+         return vtn_pointer_to_deref(b, ptr->var->copy_prop_sampler);
    }
 
-   /* Raw variable access */
-   if (!ptr->chain)
-      return tail;
-
-   struct vtn_access_chain *chain = ptr->chain;
-   vtn_assert(chain);
-
-   for (unsigned i = 0; i < chain->length; i++) {
-      if (glsl_type_is_struct(tail->type)) {
-         vtn_assert(chain->link[i].mode == vtn_access_mode_literal);
-         unsigned idx = chain->link[i].id;
-         tail = nir_build_deref_struct(&b->nb, tail, idx);
-      } else {
-         nir_ssa_def *index;
-         if (chain->link[i].mode == vtn_access_mode_literal) {
-            index = nir_imm_int(&b->nb, chain->link[i].id);
-         } else {
-            vtn_assert(chain->link[i].mode == vtn_access_mode_id);
-            index = vtn_ssa_value(b, chain->link[i].id)->def;
-         }
-         tail = nir_build_deref_array(&b->nb, tail, index);
-      }
+   vtn_assert(!vtn_pointer_uses_ssa_offset(b, ptr));
+   if (!ptr->deref) {
+      struct vtn_access_chain chain = {
+         .length = 0,
+      };
+      ptr = vtn_nir_deref_pointer_dereference(b, ptr, &chain);
    }
 
-   return tail;
+   return ptr->deref;
 }
 
 static void
@@ -623,6 +737,13 @@
    nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
    instr->num_components = glsl_get_vector_elements(type);
 
+   /* Booleans usually shouldn't show up in external memory in SPIR-V.
+    * However, they do for certain older GLSLang versions and can for shared
+    * memory when we lower access chains internally.
+    */
+   const unsigned data_bit_size = glsl_type_is_boolean(type) ? 32 :
+                                  glsl_get_bit_size(type);
+
    int src = 0;
    if (!load) {
       nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
@@ -639,6 +760,12 @@
       nir_intrinsic_set_access(instr, access);
    }
 
+   /* With extensions like relaxed_block_layout, we really can't guarantee
+    * much more than scalar alignment.
+    */
+   if (op != nir_intrinsic_load_push_constant)
+      nir_intrinsic_set_align(instr, data_bit_size / 8, 0);
+
    if (index)
       instr->src[src++] = nir_src_for_ssa(index);
 
@@ -654,8 +781,7 @@
 
    if (load) {
       nir_ssa_dest_init(&instr->instr, &instr->dest,
-                        instr->num_components,
-                        glsl_get_bit_size(type), NULL);
+                        instr->num_components, data_bit_size, NULL);
       (*inout)->def = &instr->dest.ssa;
    }
 
@@ -716,7 +842,7 @@
 
          for (unsigned i = 0; i < num_ops; i++) {
             nir_ssa_def *elem_offset =
-               nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * col_stride));
+               nir_iadd_imm(&b->nb, offset, i * col_stride);
             _vtn_load_store_tail(b, op, load, index, elem_offset,
                                  access_offset, access_size,
                                  &(*inout)->elems[i],
@@ -746,8 +872,7 @@
             nir_ssa_def *per_comp[4];
             for (unsigned i = 0; i < elems; i++) {
                nir_ssa_def *elem_offset =
-                  nir_iadd(&b->nb, offset,
-                                   nir_imm_int(&b->nb, i * type->stride));
+                  nir_iadd_imm(&b->nb, offset, i * type->stride);
                struct vtn_ssa_value *comp, temp_val;
                if (!load) {
                   temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
@@ -774,7 +899,7 @@
       unsigned elems = glsl_get_length(type->type);
       for (unsigned i = 0; i < elems; i++) {
          nir_ssa_def *elem_off =
-            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
+            nir_iadd_imm(&b->nb, offset, i * type->stride);
          _vtn_block_load_store(b, op, load, index, elem_off,
                                access_offset, access_size,
                                type->array_element,
@@ -788,7 +913,7 @@
       unsigned elems = glsl_get_length(type->type);
       for (unsigned i = 0; i < elems; i++) {
          nir_ssa_def *elem_off =
-            nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
+            nir_iadd_imm(&b->nb, offset, type->offsets[i]);
          _vtn_block_load_store(b, op, load, index, elem_off,
                                access_offset, access_size,
                                type->members[i],
@@ -878,18 +1003,35 @@
    case GLSL_TYPE_FLOAT16:
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_DOUBLE:
-      /* At this point, we have a scalar, vector, or matrix so we know that
-       * there cannot be any structure splitting still in the way.  By
-       * stopping at the matrix level rather than the vector level, we
-       * ensure that matrices get loaded in the optimal way even if they
-       * are storred row-major in a UBO.
-       */
-      if (load) {
-         *inout = vtn_local_load(b, vtn_pointer_to_deref(b, ptr));
-      } else {
-         vtn_local_store(b, *inout, vtn_pointer_to_deref(b, ptr));
+      if (glsl_type_is_vector_or_scalar(ptr->type->type)) {
+         /* We hit a vector or scalar; go ahead and emit the load[s] */
+         nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
+         if (vtn_pointer_is_external_block(b, ptr)) {
+            /* If it's external, we call nir_load/store_deref directly.  The
+             * vtn_local_load/store helpers are too clever and do magic to
+             * avoid array derefs of vectors.  That magic is both less
+             * efficient than the direct load/store and, in the case of
+             * stores, is broken because it creates a race condition if two
+             * threads are writing to different components of the same vector
+             * due to the load+insert+store it uses to emulate the array
+             * deref.
+             */
+            if (load) {
+               *inout = vtn_create_ssa_value(b, ptr->type->type);
+               (*inout)->def = nir_load_deref(&b->nb, deref);
+            } else {
+               nir_store_deref(&b->nb, deref, (*inout)->def, ~0);
+            }
+         } else {
+            if (load) {
+               *inout = vtn_local_load(b, deref);
+            } else {
+               vtn_local_store(b, *inout, deref);
+            }
+         }
+         return;
       }
-      return;
+      /* Fall through */
 
    case GLSL_TYPE_ARRAY:
    case GLSL_TYPE_STRUCT: {
@@ -923,7 +1065,7 @@
 struct vtn_ssa_value *
 vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src)
 {
-   if (vtn_pointer_is_external_block(b, src)) {
+   if (vtn_pointer_uses_ssa_offset(b, src)) {
       return vtn_block_load(b, src);
    } else {
       struct vtn_ssa_value *val = NULL;
@@ -936,7 +1078,7 @@
 vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
                    struct vtn_pointer *dest)
 {
-   if (vtn_pointer_is_external_block(b, dest)) {
+   if (vtn_pointer_uses_ssa_offset(b, dest)) {
       vtn_assert(dest->mode == vtn_variable_mode_ssbo ||
                  dest->mode == vtn_variable_mode_workgroup);
       vtn_block_store(b, src, dest);
@@ -1302,6 +1444,8 @@
       switch (builtin) {
       case SpvBuiltInTessLevelOuter:
       case SpvBuiltInTessLevelInner:
+      case SpvBuiltInClipDistance:
+      case SpvBuiltInCullDistance:
          var_data->compact = true;
          break;
       case SpvBuiltInFragCoord:
@@ -1370,14 +1514,21 @@
    case SpvDecorationFPRoundingMode:
    case SpvDecorationFPFastMathMode:
    case SpvDecorationAlignment:
-      vtn_warn("Decoration only allowed for CL-style kernels: %s",
-               spirv_decoration_to_string(dec->decoration));
+      if (b->shader->info.stage != MESA_SHADER_KERNEL) {
+         vtn_warn("Decoration only allowed for CL-style kernels: %s",
+                  spirv_decoration_to_string(dec->decoration));
+      }
       break;
 
    case SpvDecorationHlslSemanticGOOGLE:
       /* HLSL semantic decorations can safely be ignored by the driver. */
       break;
 
+   case SpvDecorationRestrictPointerEXT:
+   case SpvDecorationAliasedPointerEXT:
+      /* TODO: We should actually plumb alias information through NIR. */
+      break;
+
    default:
       vtn_fail("Unhandled decoration");
    }
@@ -1437,7 +1588,6 @@
 
    if (val->value_type == vtn_value_type_pointer) {
       assert(val->pointer->var == void_var);
-      assert(val->pointer->chain == NULL);
       assert(member == -1);
    } else {
       assert(val->value_type == vtn_value_type_type);
@@ -1449,13 +1599,11 @@
     */
    if (dec->decoration == SpvDecorationLocation) {
       unsigned location = dec->literals[0];
-      bool is_vertex_input = false;
       if (b->shader->info.stage == MESA_SHADER_FRAGMENT &&
           vtn_var->mode == vtn_variable_mode_output) {
          location += FRAG_RESULT_DATA0;
       } else if (b->shader->info.stage == MESA_SHADER_VERTEX &&
                  vtn_var->mode == vtn_variable_mode_input) {
-         is_vertex_input = true;
          location += VERT_ATTRIB_GENERIC0;
       } else if (vtn_var->mode == vtn_variable_mode_input ||
                  vtn_var->mode == vtn_variable_mode_output) {
@@ -1472,20 +1620,23 @@
       } else {
          /* This handles the structure member case */
          assert(vtn_var->var->members);
-         for (unsigned i = 0; i < vtn_var->var->num_members; i++) {
-            vtn_var->var->members[i].location = location;
-            const struct glsl_type *member_type =
-               glsl_get_struct_field(vtn_var->var->interface_type, i);
-            location += glsl_count_attribute_slots(member_type,
-                                                   is_vertex_input);
-         }
+
+         if (member == -1)
+            vtn_var->base_location = location;
+         else
+            vtn_var->var->members[member].location = location;
       }
+
       return;
    } else {
       if (vtn_var->var) {
          if (vtn_var->var->num_members == 0) {
-            assert(member == -1);
-            apply_var_decoration(b, &vtn_var->var->data, dec);
+            /* We call this function on types as well as variables and not all
+             * struct types get split so we can end up having stray member
+             * decorations; just ignore them.
+             */
+            if (member == -1)
+               apply_var_decoration(b, &vtn_var->var->data, dec);
          } else if (member >= 0) {
             /* Member decorations must come from a type */
             assert(val->value_type == vtn_value_type_type);
@@ -1522,10 +1673,10 @@
    case SpvStorageClassUniform:
       if (interface_type->block) {
          mode = vtn_variable_mode_ubo;
-         nir_mode = 0;
+         nir_mode = nir_var_mem_ubo;
       } else if (interface_type->buffer_block) {
          mode = vtn_variable_mode_ssbo;
-         nir_mode = 0;
+         nir_mode = nir_var_mem_ssbo;
       } else {
          /* Default-block uniforms, coming from gl_spirv */
          mode = vtn_variable_mode_uniform;
@@ -1534,7 +1685,11 @@
       break;
    case SpvStorageClassStorageBuffer:
       mode = vtn_variable_mode_ssbo;
-      nir_mode = 0;
+      nir_mode = nir_var_mem_ssbo;
+      break;
+   case SpvStorageClassPhysicalStorageBufferEXT:
+      mode = vtn_variable_mode_phys_ssbo;
+      nir_mode = nir_var_mem_global;
       break;
    case SpvStorageClassUniformConstant:
       mode = vtn_variable_mode_uniform;
@@ -1553,22 +1708,25 @@
       nir_mode = nir_var_shader_out;
       break;
    case SpvStorageClassPrivate:
-      mode = vtn_variable_mode_global;
-      nir_mode = nir_var_global;
+      mode = vtn_variable_mode_private;
+      nir_mode = nir_var_shader_temp;
       break;
    case SpvStorageClassFunction:
-      mode = vtn_variable_mode_local;
-      nir_mode = nir_var_local;
+      mode = vtn_variable_mode_function;
+      nir_mode = nir_var_function_temp;
       break;
    case SpvStorageClassWorkgroup:
       mode = vtn_variable_mode_workgroup;
-      nir_mode = nir_var_shared;
+      nir_mode = nir_var_mem_shared;
       break;
    case SpvStorageClassAtomicCounter:
       mode = vtn_variable_mode_uniform;
       nir_mode = nir_var_uniform;
       break;
    case SpvStorageClassCrossWorkgroup:
+      mode = vtn_variable_mode_cross_workgroup;
+      nir_mode = nir_var_mem_global;
+      break;
    case SpvStorageClassGeneric:
    default:
       vtn_fail("Unhandled variable storage class");
@@ -1610,7 +1768,47 @@
          return ptr->offset;
       }
    } else {
-      return &vtn_pointer_to_deref(b, ptr)->dest.ssa;
+      if (vtn_pointer_is_external_block(b, ptr) &&
+          vtn_type_contains_block(b, ptr->type) &&
+          ptr->mode != vtn_variable_mode_phys_ssbo) {
+         const unsigned bit_size = glsl_get_bit_size(ptr->ptr_type->type);
+         const unsigned num_components =
+            glsl_get_vector_elements(ptr->ptr_type->type);
+
+         /* In this case, we're looking for a block index and not an actual
+          * deref.
+          *
+          * For PhysicalStorageBufferEXT pointers, we don't have a block index
+          * at all because we get the pointer directly from the client.  This
+          * assumes that there will never be a SSBO binding variable using the
+          * PhysicalStorageBufferEXT storage class.  This assumption appears
+          * to be correct according to the Vulkan spec because the table,
+          * "Shader Resource and Storage Class Correspondence," the only the
+          * Uniform storage class with BufferBlock or the StorageBuffer
+          * storage class with Block can be used.
+          */
+         if (!ptr->block_index) {
+            /* If we don't have a block_index then we must be a pointer to the
+             * variable itself.
+             */
+            vtn_assert(!ptr->deref);
+
+            struct vtn_access_chain chain = {
+               .length = 0,
+            };
+            ptr = vtn_nir_deref_pointer_dereference(b, ptr, &chain);
+         }
+
+         /* A block index is just a 32-bit value but the pointer has some
+          * other dimensionality.  Cram it in there and we'll unpack it later
+          * in vtn_pointer_from_ssa.
+          */
+         const unsigned swiz[4] = { 0, };
+         return nir_swizzle(&b->nb, nir_u2u(&b->nb, ptr->block_index, bit_size),
+                            swiz, num_components, false);
+      } else {
+         return &vtn_pointer_to_deref(b, ptr)->dest.ssa;
+      }
    }
 }
 
@@ -1618,7 +1816,6 @@
 vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
                      struct vtn_type *ptr_type)
 {
-   vtn_assert(ssa->num_components <= 2 && ssa->bit_size == 32);
    vtn_assert(ptr_type->base_type == vtn_base_type_pointer);
 
    struct vtn_type *interface_type = ptr_type->deref;
@@ -1632,23 +1829,67 @@
    ptr->type = ptr_type->deref;
    ptr->ptr_type = ptr_type;
 
-   if (ptr->mode == vtn_variable_mode_ubo ||
-       ptr->mode == vtn_variable_mode_ssbo) {
-      /* This pointer type needs to have actual storage */
-      vtn_assert(ptr_type->type);
-      vtn_assert(ssa->num_components == 2);
-      ptr->block_index = nir_channel(&b->nb, ssa, 0);
-      ptr->offset = nir_channel(&b->nb, ssa, 1);
-   } else if (ptr->mode == vtn_variable_mode_workgroup ||
-              ptr->mode == vtn_variable_mode_push_constant) {
+   if (b->wa_glslang_179) {
+      /* To work around https://github.com/KhronosGroup/glslang/issues/179 we
+       * need to whack the mode because it creates a function parameter with
+       * the Function storage class even though it's a pointer to a sampler.
+       * If we don't do this, then NIR won't get rid of the deref_cast for us.
+       */
+      if (ptr->mode == vtn_variable_mode_function &&
+          (ptr->type->base_type == vtn_base_type_sampler ||
+           ptr->type->base_type == vtn_base_type_sampled_image)) {
+         ptr->mode = vtn_variable_mode_uniform;
+         nir_mode = nir_var_uniform;
+      }
+   }
+
+   if (vtn_pointer_uses_ssa_offset(b, ptr)) {
       /* This pointer type needs to have actual storage */
       vtn_assert(ptr_type->type);
-      vtn_assert(ssa->num_components == 1);
-      ptr->block_index = NULL;
-      ptr->offset = ssa;
+      if (ptr->mode == vtn_variable_mode_ubo ||
+          ptr->mode == vtn_variable_mode_ssbo) {
+         vtn_assert(ssa->num_components == 2);
+         ptr->block_index = nir_channel(&b->nb, ssa, 0);
+         ptr->offset = nir_channel(&b->nb, ssa, 1);
+      } else {
+         vtn_assert(ssa->num_components == 1);
+         ptr->block_index = NULL;
+         ptr->offset = ssa;
+      }
    } else {
-      ptr->deref = nir_build_deref_cast(&b->nb, ssa, nir_mode,
-                                        ptr_type->deref->type);
+      const struct glsl_type *deref_type = ptr_type->deref->type;
+      if (!vtn_pointer_is_external_block(b, ptr)) {
+         assert(ssa->bit_size == 32 && ssa->num_components == 1);
+         ptr->deref = nir_build_deref_cast(&b->nb, ssa, nir_mode,
+                                           glsl_get_bare_type(deref_type), 0);
+      } else if (vtn_type_contains_block(b, ptr->type) &&
+                 ptr->mode != vtn_variable_mode_phys_ssbo) {
+         /* This is a pointer to somewhere in an array of blocks, not a
+          * pointer to somewhere inside the block.  We squashed it into a
+          * random vector type before so just pick off the first channel and
+          * cast it back to 32 bits.
+          */
+         ptr->block_index = nir_u2u32(&b->nb, nir_channel(&b->nb, ssa, 0));
+      } else {
+         /* This is a pointer to something internal or a pointer inside a
+          * block.  It's just a regular cast.
+          *
+          * For PhysicalStorageBufferEXT pointers, we don't have a block index
+          * at all because we get the pointer directly from the client.  This
+          * assumes that there will never be a SSBO binding variable using the
+          * PhysicalStorageBufferEXT storage class.  This assumption appears
+          * to be correct according to the Vulkan spec because the table,
+          * "Shader Resource and Storage Class Correspondence," the only the
+          * Uniform storage class with BufferBlock or the StorageBuffer
+          * storage class with Block can be used.
+          */
+         ptr->deref = nir_build_deref_cast(&b->nb, ssa, nir_mode,
+                                           ptr_type->deref->type,
+                                           ptr_type->stride);
+         ptr->deref->dest.ssa.num_components =
+            glsl_get_vector_elements(ptr_type->type);
+         ptr->deref->dest.ssa.bit_size = glsl_get_bit_size(ptr_type->type);
+      }
    }
 
    return ptr;
@@ -1673,6 +1914,50 @@
 }
 
 static void
+assign_missing_member_locations(struct vtn_variable *var)
+{
+   unsigned length =
+      glsl_get_length(glsl_without_array(var->type->type));
+   int location = var->base_location;
+
+   for (unsigned i = 0; i < length; i++) {
+      /* From the Vulkan spec:
+       *
+       * “If the structure type is a Block but without a Location, then each
+       *  of its members must have a Location decoration.”
+       *
+       */
+      if (var->type->block) {
+         assert(var->base_location != -1 ||
+                var->var->members[i].location != -1);
+      }
+
+      /* From the Vulkan spec:
+       *
+       * “Any member with its own Location decoration is assigned that
+       *  location. Each remaining member is assigned the location after the
+       *  immediately preceding member in declaration order.”
+       */
+      if (var->var->members[i].location != -1)
+         location = var->var->members[i].location;
+      else
+         var->var->members[i].location = location;
+
+      /* Below we use type instead of interface_type, because interface_type
+       * is only available when it is a Block. This code also supports
+       * input/outputs that are just structs
+       */
+      const struct glsl_type *member_type =
+         glsl_get_struct_field(glsl_without_array(var->type->type), i);
+
+      location +=
+         glsl_count_attribute_slots(member_type,
+                                    false /* is_gl_vertex_input */);
+   }
+}
+
+
+static void
 vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
                     struct vtn_type *ptr_type, SpvStorageClass storage_class,
                     nir_constant *initializer)
@@ -1690,9 +1975,26 @@
 
    switch (mode) {
    case vtn_variable_mode_ubo:
+      /* There's no other way to get vtn_variable_mode_ubo */
+      vtn_assert(without_array->block);
       b->shader->info.num_ubos++;
       break;
    case vtn_variable_mode_ssbo:
+      if (storage_class == SpvStorageClassStorageBuffer &&
+          !without_array->block) {
+         if (b->variable_pointers) {
+            vtn_fail("Variables in the StorageBuffer storage class must "
+                     "have a struct type with the Block decoration");
+         } else {
+            /* If variable pointers are not present, it's still malformed
+             * SPIR-V but we can parse it and do the right thing anyway.
+             * Since some of the 8-bit storage tests have bugs in this are,
+             * just make it a warning for now.
+             */
+            vtn_warn("Variables in the StorageBuffer storage class must "
+                     "have a struct type with the Block decoration");
+         }
+      }
       b->shader->info.num_ssbos++;
       break;
    case vtn_variable_mode_uniform:
@@ -1704,6 +2006,12 @@
    case vtn_variable_mode_push_constant:
       b->shader->num_uniforms = vtn_type_block_size(b, type);
       break;
+
+   case vtn_variable_mode_phys_ssbo:
+      vtn_fail("Cannot create a variable with the "
+               "PhysicalStorageBufferEXT storage class");
+      break;
+
    default:
       /* No tallying is needed */
       break;
@@ -1712,26 +2020,30 @@
    struct vtn_variable *var = rzalloc(b, struct vtn_variable);
    var->type = type;
    var->mode = mode;
+   var->base_location = -1;
 
    vtn_assert(val->value_type == vtn_value_type_pointer);
    val->pointer = vtn_pointer_for_variable(b, var, ptr_type);
 
    switch (var->mode) {
-   case vtn_variable_mode_local:
-   case vtn_variable_mode_global:
+   case vtn_variable_mode_function:
+   case vtn_variable_mode_private:
    case vtn_variable_mode_uniform:
       /* For these, we create the variable normally */
       var->var = rzalloc(b->shader, nir_variable);
       var->var->name = ralloc_strdup(var->var, val->name);
 
-      /* Need to tweak the nir type here as at vtn_handle_type we don't have
-       * the access to storage_class, that is the one that points us that is
-       * an atomic uint.
-       */
       if (storage_class == SpvStorageClassAtomicCounter) {
+         /* Need to tweak the nir type here as at vtn_handle_type we don't
+          * have the access to storage_class, that is the one that points us
+          * that is an atomic uint.
+          */
          var->var->type = repair_atomic_type(var->type->type);
       } else {
-         var->var->type = var->type->type;
+         /* Private variables don't have any explicit layout but some layouts
+          * may have leaked through due to type deduplication in the SPIR-V.
+          */
+         var->var->type = glsl_get_bare_type(var->type->type);
       }
       var->var->data.mode = nir_mode;
       var->var->data.location = -1;
@@ -1745,8 +2057,12 @@
          /* Create the variable normally */
          var->var = rzalloc(b->shader, nir_variable);
          var->var->name = ralloc_strdup(var->var, val->name);
-         var->var->type = var->type->type;
-         var->var->data.mode = nir_var_shared;
+         /* Workgroup variables don't have any explicit layout but some
+          * layouts may have leaked through due to type deduplication in the
+          * SPIR-V.
+          */
+         var->var->type = glsl_get_bare_type(var->type->type);
+         var->var->data.mode = nir_var_mem_shared;
       }
       break;
 
@@ -1785,7 +2101,7 @@
        * able to preserve that information.
        */
 
-      struct vtn_type *interface_type = var->type;
+      struct vtn_type *per_vertex_type = var->type;
       if (is_per_vertex_inout(var, b->shader->info.stage)) {
          /* In Geometry shaders (and some tessellation), inputs come
           * in per-vertex arrays.  However, some builtins come in
@@ -1793,32 +2109,54 @@
           * any case, there are no non-builtin arrays allowed so this
           * check should be sufficient.
           */
-         interface_type = var->type->array_element;
+         per_vertex_type = var->type->array_element;
       }
 
       var->var = rzalloc(b->shader, nir_variable);
       var->var->name = ralloc_strdup(var->var, val->name);
-      var->var->type = var->type->type;
-      var->var->interface_type = interface_type->type;
+      /* In Vulkan, shader I/O variables don't have any explicit layout but
+       * some layouts may have leaked through due to type deduplication in
+       * the SPIR-V.  We do, however, keep the layouts in the variable's
+       * interface_type because we need offsets for XFB arrays of blocks.
+       */
+      var->var->type = glsl_get_bare_type(var->type->type);
       var->var->data.mode = nir_mode;
       var->var->data.patch = var->patch;
 
-      if (glsl_type_is_struct(interface_type->type)) {
+      /* Figure out the interface block type. */
+      struct vtn_type *iface_type = per_vertex_type;
+      if (var->mode == vtn_variable_mode_output &&
+          (b->shader->info.stage == MESA_SHADER_VERTEX ||
+           b->shader->info.stage == MESA_SHADER_TESS_EVAL ||
+           b->shader->info.stage == MESA_SHADER_GEOMETRY)) {
+         /* For vertex data outputs, we can end up with arrays of blocks for
+          * transform feedback where each array element corresponds to a
+          * different XFB output buffer.
+          */
+         while (iface_type->base_type == vtn_base_type_array)
+            iface_type = iface_type->array_element;
+      }
+      if (iface_type->base_type == vtn_base_type_struct && iface_type->block)
+         var->var->interface_type = iface_type->type;
+
+      if (per_vertex_type->base_type == vtn_base_type_struct &&
+          per_vertex_type->block) {
          /* It's a struct.  Set it up as per-member. */
-         var->var->num_members = glsl_get_length(interface_type->type);
+         var->var->num_members = glsl_get_length(per_vertex_type->type);
          var->var->members = rzalloc_array(var->var, struct nir_variable_data,
                                            var->var->num_members);
 
          for (unsigned i = 0; i < var->var->num_members; i++) {
             var->var->members[i].mode = nir_mode;
             var->var->members[i].patch = var->patch;
+            var->var->members[i].location = -1;
          }
       }
 
       /* For inputs and outputs, we need to grab locations and builtin
-       * information from the interface type.
+       * information from the per-vertex type.
        */
-      vtn_foreach_decoration(b, vtn_value(b, interface_type->id,
+      vtn_foreach_decoration(b, vtn_value(b, per_vertex_type->id,
                                           vtn_value_type_type),
                              var_decoration_cb, var);
       break;
@@ -1827,8 +2165,12 @@
    case vtn_variable_mode_ubo:
    case vtn_variable_mode_ssbo:
    case vtn_variable_mode_push_constant:
+   case vtn_variable_mode_cross_workgroup:
       /* These don't need actual variables. */
       break;
+
+   case vtn_variable_mode_phys_ssbo:
+      unreachable("Should have been caught before");
    }
 
    if (initializer) {
@@ -1838,6 +2180,12 @@
 
    vtn_foreach_decoration(b, val, var_decoration_cb, var);
 
+   if ((var->mode == vtn_variable_mode_input ||
+        var->mode == vtn_variable_mode_output) &&
+       var->var->members) {
+      assign_missing_member_locations(var);
+   }
+
    if (var->mode == vtn_variable_mode_uniform) {
       /* XXX: We still need the binding information in the nir_variable
        * for these. We should fix that.
@@ -1852,7 +2200,7 @@
          var->var->data.image.format = without_array->image_format;
    }
 
-   if (var->mode == vtn_variable_mode_local) {
+   if (var->mode == vtn_variable_mode_function) {
       vtn_assert(var->var != NULL && var->var->members == NULL);
       nir_function_impl_add_variable(b->nb.impl, var->var);
    } else if (var->var) {
@@ -1892,6 +2240,44 @@
             glsl_get_type_name(src_type->type));
 }
 
+static nir_ssa_def *
+nir_shrink_zero_pad_vec(nir_builder *b, nir_ssa_def *val,
+                        unsigned num_components)
+{
+   if (val->num_components == num_components)
+      return val;
+
+   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
+   for (unsigned i = 0; i < num_components; i++) {
+      if (i < val->num_components)
+         comps[i] = nir_channel(b, val, i);
+      else
+         comps[i] = nir_imm_intN_t(b, 0, val->bit_size);
+   }
+   return nir_vec(b, comps, num_components);
+}
+
+static nir_ssa_def *
+nir_sloppy_bitcast(nir_builder *b, nir_ssa_def *val,
+                   const struct glsl_type *type)
+{
+   const unsigned num_components = glsl_get_vector_elements(type);
+   const unsigned bit_size = glsl_get_bit_size(type);
+
+   /* First, zero-pad to ensure that the value is big enough that when we
+    * bit-cast it, we don't loose anything.
+    */
+   if (val->bit_size < bit_size) {
+      const unsigned src_num_components_needed =
+         vtn_align_u32(val->num_components, bit_size / val->bit_size);
+      val = nir_shrink_zero_pad_vec(b, val, src_num_components_needed);
+   }
+
+   val = nir_bitcast_vector(b, val, bit_size);
+
+   return nir_shrink_zero_pad_vec(b, val, num_components);
+}
+
 void
 vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                      const uint32_t *w, unsigned count)
@@ -1928,7 +2314,22 @@
          struct vtn_value *link_val = vtn_untyped_value(b, w[i]);
          if (link_val->value_type == vtn_value_type_constant) {
             chain->link[idx].mode = vtn_access_mode_literal;
-            chain->link[idx].id = link_val->constant->values[0].u32[0];
+            switch (glsl_get_bit_size(link_val->type->type)) {
+            case 8:
+               chain->link[idx].id = link_val->constant->values[0].i8[0];
+               break;
+            case 16:
+               chain->link[idx].id = link_val->constant->values[0].i16[0];
+               break;
+            case 32:
+               chain->link[idx].id = link_val->constant->values[0].i32[0];
+               break;
+            case 64:
+               chain->link[idx].id = link_val->constant->values[0].i64[0];
+               break;
+            default:
+               vtn_fail("Invalid bit size");
+            }
          } else {
             chain->link[idx].mode = vtn_access_mode_id;
             chain->link[idx].id = w[i];
@@ -2023,11 +2424,15 @@
       vtn_assert_types_equal(b, opcode, dest_val->type->deref, src_val->type);
 
       if (glsl_type_is_sampler(dest->type->type)) {
-         vtn_warn("OpStore of a sampler detected.  Doing on-the-fly copy "
-                  "propagation to workaround the problem.");
-         vtn_assert(dest->var->copy_prop_sampler == NULL);
-         dest->var->copy_prop_sampler =
-            vtn_value(b, w[2], vtn_value_type_pointer)->pointer;
+         if (b->wa_glslang_179) {
+            vtn_warn("OpStore of a sampler detected.  Doing on-the-fly copy "
+                     "propagation to workaround the problem.");
+            vtn_assert(dest->var->copy_prop_sampler == NULL);
+            dest->var->copy_prop_sampler =
+               vtn_value(b, w[2], vtn_value_type_pointer)->pointer;
+         } else {
+            vtn_fail("Vulkan does not allow OpStore of a sampler or image.");
+         }
          break;
       }
 
@@ -2039,9 +2444,17 @@
    case SpvOpArrayLength: {
       struct vtn_pointer *ptr =
          vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
+      const uint32_t field = w[4];
 
-      const uint32_t offset = ptr->var->type->offsets[w[4]];
-      const uint32_t stride = ptr->var->type->members[w[4]]->stride;
+      vtn_fail_if(ptr->type->base_type != vtn_base_type_struct,
+                  "OpArrayLength must take a pointer to a structure type");
+      vtn_fail_if(field != ptr->type->length - 1 ||
+                  ptr->type->members[field]->base_type != vtn_base_type_array,
+                  "OpArrayLength must reference the last memeber of the "
+                  "structure and that must be an array");
+
+      const uint32_t offset = ptr->type->offsets[field];
+      const uint32_t stride = ptr->type->members[field]->stride;
 
       if (!ptr->block_index) {
          struct vtn_access_chain chain = {
@@ -2075,6 +2488,41 @@
       break;
    }
 
+   case SpvOpConvertPtrToU: {
+      struct vtn_value *u_val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+
+      vtn_fail_if(u_val->type->base_type != vtn_base_type_vector &&
+                  u_val->type->base_type != vtn_base_type_scalar,
+                  "OpConvertPtrToU can only be used to cast to a vector or "
+                  "scalar type");
+
+      /* The pointer will be converted to an SSA value automatically */
+      nir_ssa_def *ptr_ssa = vtn_ssa_value(b, w[3])->def;
+
+      u_val->ssa = vtn_create_ssa_value(b, u_val->type->type);
+      u_val->ssa->def = nir_sloppy_bitcast(&b->nb, ptr_ssa, u_val->type->type);
+      break;
+   }
+
+   case SpvOpConvertUToPtr: {
+      struct vtn_value *ptr_val =
+         vtn_push_value(b, w[2], vtn_value_type_pointer);
+      struct vtn_value *u_val = vtn_value(b, w[3], vtn_value_type_ssa);
+
+      vtn_fail_if(ptr_val->type->type == NULL,
+                  "OpConvertUToPtr can only be used on physical pointers");
+
+      vtn_fail_if(u_val->type->base_type != vtn_base_type_vector &&
+                  u_val->type->base_type != vtn_base_type_scalar,
+                  "OpConvertUToPtr can only be used to cast from a vector or "
+                  "scalar type");
+
+      nir_ssa_def *ptr_ssa = nir_sloppy_bitcast(&b->nb, u_val->ssa->def,
+                                                ptr_val->type->type);
+      ptr_val->pointer = vtn_pointer_from_ssa(b, ptr_ssa, ptr_val->type);
+      break;
+   }
+
    case SpvOpCopyMemorySized:
    default:
       vtn_fail("Unhandled opcode");
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/egl_dri2.c mesa-19.0.1/src/egl/drivers/dri2/egl_dri2.c
--- mesa-18.3.3/src/egl/drivers/dri2/egl_dri2.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/egl_dri2.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,6 +58,7 @@
 #include "X11/Xlibint.h"
 #endif
 
+#include "egldefines.h"
 #include "egl_dri2.h"
 #include "GL/mesa_glinterop.h"
 #include "loader/loader.h"
@@ -198,8 +199,10 @@
    bind_to_texture_rgb = 0;
    bind_to_texture_rgba = 0;
 
-   for (int i = 0; dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib,
-                                                     &value); ++i) {
+   for (int i = 0; i < __DRI_ATTRIB_MAX; ++i) {
+      if (!dri2_dpy->core->indexConfigAttrib(dri_config, i, &attrib, &value))
+         break;
+
       switch (attrib) {
       case __DRI_ATTRIB_RENDER_TYPE:
          if (value & __DRI_ATTRIB_RGBA_BIT)
@@ -425,6 +428,11 @@
    { NULL, 0, 0 }
 };
 
+static const struct dri2_extension_match optional_driver_extensions[] = {
+   { __DRI_CONFIG_OPTIONS, 1, offsetof(struct dri2_egl_display, configOptions) },
+   { NULL, 0, 0 }
+};
+
 static const struct dri2_extension_match optional_core_extensions[] = {
    { __DRI2_ROBUSTNESS, 1, offsetof(struct dri2_egl_display, robustness) },
    { __DRI2_NO_ERROR, 1, offsetof(struct dri2_egl_display, no_error) },
@@ -483,75 +491,14 @@
 dri2_open_driver(_EGLDisplay *disp)
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   const __DRIextension **extensions = NULL;
-   char path[PATH_MAX], *search_paths, *next, *end;
-   char *get_extensions_name;
-   const __DRIextension **(*get_extensions)(void);
-
-   search_paths = NULL;
-   if (geteuid() == getuid()) {
-      /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
-      search_paths = getenv("LIBGL_DRIVERS_PATH");
-   }
-   if (search_paths == NULL)
-      search_paths = DEFAULT_DRIVER_DIR;
-
-   dri2_dpy->driver = NULL;
-   end = search_paths + strlen(search_paths);
-   for (char *p = search_paths; p < end; p = next + 1) {
-      int len;
-      next = strchr(p, ':');
-      if (next == NULL)
-         next = end;
-
-      len = next - p;
-#if GLX_USE_TLS
-      snprintf(path, sizeof path,
-               "%.*s/tls/%s_dri.so", len, p, dri2_dpy->driver_name);
-      dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
-#endif
-      if (dri2_dpy->driver == NULL) {
-         snprintf(path, sizeof path,
-                  "%.*s/%s_dri.so", len, p, dri2_dpy->driver_name);
-         dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
-         if (dri2_dpy->driver == NULL)
-            _eglLog(_EGL_DEBUG, "failed to open %s: %s\n", path, dlerror());
-      }
-      /* not need continue to loop all paths once the driver is found */
-      if (dri2_dpy->driver != NULL)
-         break;
-   }
-
-   if (dri2_dpy->driver == NULL) {
-      _eglLog(_EGL_WARNING,
-              "DRI2: failed to open %s (search paths %s)",
-              dri2_dpy->driver_name, search_paths);
-      return NULL;
-   }
-
-   _eglLog(_EGL_DEBUG, "DRI2: dlopen(%s)", path);
-
-   get_extensions_name = loader_get_extensions_name(dri2_dpy->driver_name);
-   if (get_extensions_name) {
-      get_extensions = dlsym(dri2_dpy->driver, get_extensions_name);
-      if (get_extensions) {
-         extensions = get_extensions();
-      } else {
-         _eglLog(_EGL_DEBUG, "driver does not expose %s(): %s\n",
-                 get_extensions_name, dlerror());
-      }
-      free(get_extensions_name);
-   }
-
-   if (!extensions)
-      extensions = dlsym(dri2_dpy->driver, __DRI_DRIVER_EXTENSIONS);
-   if (extensions == NULL) {
-      _eglLog(_EGL_WARNING,
-              "DRI2: driver exports no extensions (%s)", dlerror());
-      dlclose(dri2_dpy->driver);
-   }
+   static const char *search_path_vars[] = {
+      "LIBGL_DRIVERS_PATH",
+      NULL,
+   };
 
-   return extensions;
+   return loader_open_driver(dri2_dpy->driver_name,
+                             &dri2_dpy->driver,
+                             search_path_vars);
 }
 
 EGLBoolean
@@ -570,6 +517,8 @@
    }
    dri2_dpy->driver_extensions = extensions;
 
+   dri2_bind_extensions(dri2_dpy, optional_driver_extensions, extensions, true);
+
    return EGL_TRUE;
 }
 
@@ -589,6 +538,8 @@
    }
    dri2_dpy->driver_extensions = extensions;
 
+   dri2_bind_extensions(dri2_dpy, optional_driver_extensions, extensions, true);
+
    return EGL_TRUE;
 }
 
@@ -608,6 +559,8 @@
    }
    dri2_dpy->driver_extensions = extensions;
 
+   dri2_bind_extensions(dri2_dpy, optional_driver_extensions, extensions, true);
+
    return EGL_TRUE;
 }
 
@@ -624,6 +577,26 @@
    return value;
 }
 
+static const char *
+dri2_query_driver_name(_EGLDisplay *disp)
+{
+    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+    return dri2_dpy->driver_name;
+}
+
+static char *
+dri2_query_driver_config(_EGLDisplay *disp)
+{
+    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+    const __DRIconfigOptionsExtension *ext = dri2_dpy->configOptions;
+
+    if (ext->base.version >= 2)
+        return ext->getXml(dri2_dpy->driver_name);
+
+    return strdup(ext->xml);
+}
+
+
 void
 dri2_setup_screen(_EGLDisplay *disp)
 {
@@ -668,6 +641,10 @@
    disp->Extensions.KHR_no_config_context = EGL_TRUE;
    disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
 
+   if (dri2_dpy->configOptions) {
+       disp->Extensions.MESA_query_driver = EGL_TRUE;
+   }
+
    /* Report back to EGL the bitmask of priorities supported */
    disp->Extensions.IMG_context_priority =
       dri2_renderer_query_integer(dri2_dpy,
@@ -900,6 +877,8 @@
       return EGL_TRUE;
    }
 
+   loader_set_logger(_eglLog);
+
    switch (disp->Platform) {
    case _EGL_PLATFORM_SURFACELESS:
       ret = dri2_initialize_surfaceless(drv, disp);
@@ -2278,6 +2257,7 @@
    case DRM_FORMAT_YVYU:
    case DRM_FORMAT_UYVY:
    case DRM_FORMAT_VYUY:
+   case DRM_FORMAT_AYUV:
       return 1;
 
    case DRM_FORMAT_NV12:
@@ -2819,7 +2799,8 @@
    const struct wayland_drm_callbacks wl_drm_callbacks = {
       .authenticate = (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate,
       .reference_buffer = dri2_wl_reference_buffer,
-      .release_buffer = dri2_wl_release_buffer
+      .release_buffer = dri2_wl_release_buffer,
+      .is_format_supported = dri2_wl_is_format_supported
    };
    int flags = 0;
    uint64_t cap;
@@ -3286,6 +3267,8 @@
    dri2_drv->API.DestroyImageKHR = dri2_destroy_image_khr;
    dri2_drv->API.CreateWaylandBufferFromImageWL = dri2_create_wayland_buffer_from_image;
    dri2_drv->API.QuerySurface = dri2_query_surface;
+   dri2_drv->API.QueryDriverName = dri2_query_driver_name;
+   dri2_drv->API.QueryDriverConfig = dri2_query_driver_config;
 #ifdef HAVE_LIBDRM
    dri2_drv->API.CreateDRMImageMESA = dri2_create_drm_image_mesa;
    dri2_drv->API.ExportDRMImageMESA = dri2_export_drm_image_mesa;
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/egl_dri2.h mesa-19.0.1/src/egl/drivers/dri2/egl_dri2.h
--- mesa-18.3.3/src/egl/drivers/dri2/egl_dri2.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/egl_dri2.h	2019-03-31 23:16:37.000000000 +0000
@@ -185,6 +185,7 @@
    const __DRI2blobExtension *blob;
    const __DRI2rendererQueryExtension *rendererQuery;
    const __DRI2interopExtension *interop;
+   const __DRIconfigOptionsExtension *configOptions;
    const __DRImutableRenderBufferDriverExtension *mutable_render_buffer;
    int                       fd;
 
@@ -457,6 +458,8 @@
 dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp);
 void
 dri2_teardown_wayland(struct dri2_egl_display *dri2_dpy);
+bool
+dri2_wl_is_format_supported(void* user_data, uint32_t format);
 #else
 static inline EGLBoolean
 dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/platform_android.c mesa-19.0.1/src/egl/drivers/dri2/platform_android.c
--- mesa-18.3.3/src/egl/drivers/dri2/platform_android.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/platform_android.c	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,7 @@
    { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 2, __DRI_IMAGE_FOURCC_NV12 },
    { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 1, __DRI_IMAGE_FOURCC_YUV420 },
    { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   1, 1, __DRI_IMAGE_FOURCC_YVU420 },
+   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   1, 1, __DRI_IMAGE_FOURCC_AYUV },
 };
 
 static int
@@ -1535,8 +1536,6 @@
    if (disp->Options.ForceSoftware)
       return EGL_FALSE;
 
-   loader_set_logger(_eglLog);
-
    dri2_dpy = calloc(1, sizeof(*dri2_dpy));
    if (!dri2_dpy)
       return _eglError(EGL_BAD_ALLOC, "eglInitialize");
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/platform_drm.c mesa-19.0.1/src/egl/drivers/dri2/platform_drm.c
--- mesa-18.3.3/src/egl/drivers/dri2/platform_drm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/platform_drm.c	2019-03-31 23:16:37.000000000 +0000
@@ -664,8 +664,9 @@
 
    for (unsigned i = 0; i < ARRAY_SIZE(format_count); i++) {
       if (!format_count[i]) {
-         _eglLog(_EGL_DEBUG, "No DRI config supports native format 0x%x",
-                 visuals[i].gbm_format);
+         struct gbm_format_name_desc desc;
+         _eglLog(_EGL_DEBUG, "No DRI config supports native format %s",
+                 gbm_format_get_name(visuals[i].gbm_format, &desc));
       }
    }
 
@@ -703,8 +704,6 @@
    if (disp->Options.ForceSoftware)
       return EGL_FALSE;
 
-   loader_set_logger(_eglLog);
-
    dri2_dpy = calloc(1, sizeof *dri2_dpy);
    if (!dri2_dpy)
       return _eglError(EGL_BAD_ALLOC, "eglInitialize");
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/platform_surfaceless.c mesa-19.0.1/src/egl/drivers/dri2/platform_surfaceless.c
--- mesa-18.3.3/src/egl/drivers/dri2/platform_surfaceless.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/platform_surfaceless.c	2019-03-31 23:16:37.000000000 +0000
@@ -350,8 +350,6 @@
    const char* err;
    bool driver_loaded = false;
 
-   loader_set_logger(_eglLog);
-
    dri2_dpy = calloc(1, sizeof *dri2_dpy);
    if (!dri2_dpy)
       return _eglError(EGL_BAD_ALLOC, "eglInitialize");
diff -Nru mesa-18.3.3/src/egl/drivers/dri2/platform_wayland.c mesa-19.0.1/src/egl/drivers/dri2/platform_wayland.c
--- mesa-18.3.3/src/egl/drivers/dri2/platform_wayland.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/drivers/dri2/platform_wayland.c	2019-03-31 23:16:37.000000000 +0000
@@ -59,49 +59,57 @@
    uint32_t wl_drm_format;
    uint32_t wl_shm_format;
    int dri_image_format;
+   /* alt_dri_image_format is a substitute wl_buffer format to use for a
+    * wl-server unsupported dri_image_format, ie. some other dri_image_format in
+    * the table, of the same precision but with different channel ordering, or
+    * __DRI_IMAGE_FORMAT_NONE if an alternate format is not needed or supported.
+    * The code checks if alt_dri_image_format can be used as a fallback for a
+    * dri_image_format for a given wl-server implementation.
+    */
+   int alt_dri_image_format;
    int bpp;
    unsigned int rgba_masks[4];
 } dri2_wl_visuals[] = {
    {
      "XRGB2101010",
      WL_DRM_FORMAT_XRGB2101010, WL_SHM_FORMAT_XRGB2101010,
-     __DRI_IMAGE_FORMAT_XRGB2101010, 32,
+     __DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32,
      { 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 }
    },
    {
      "ARGB2101010",
      WL_DRM_FORMAT_ARGB2101010, WL_SHM_FORMAT_ARGB2101010,
-     __DRI_IMAGE_FORMAT_ARGB2101010, 32,
+     __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32,
      { 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }
    },
    {
      "XBGR2101010",
      WL_DRM_FORMAT_XBGR2101010, WL_SHM_FORMAT_XBGR2101010,
-     __DRI_IMAGE_FORMAT_XBGR2101010, 32,
+     __DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32,
      { 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 }
    },
    {
      "ABGR2101010",
      WL_DRM_FORMAT_ABGR2101010, WL_SHM_FORMAT_ABGR2101010,
-     __DRI_IMAGE_FORMAT_ABGR2101010, 32,
+     __DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32,
      { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }
    },
    {
      "XRGB8888",
      WL_DRM_FORMAT_XRGB8888, WL_SHM_FORMAT_XRGB8888,
-     __DRI_IMAGE_FORMAT_XRGB8888, 32,
+     __DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
      { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }
    },
    {
      "ARGB8888",
      WL_DRM_FORMAT_ARGB8888, WL_SHM_FORMAT_ARGB8888,
-     __DRI_IMAGE_FORMAT_ARGB8888, 32,
+     __DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
      { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }
    },
    {
      "RGB565",
      WL_DRM_FORMAT_RGB565, WL_SHM_FORMAT_RGB565,
-     __DRI_IMAGE_FORMAT_RGB565, 16,
+     __DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16,
      { 0xf800, 0x07e0, 0x001f, 0x0000 }
    },
 };
@@ -166,6 +174,24 @@
    return -1;
 }
 
+bool
+dri2_wl_is_format_supported(void* user_data, uint32_t format)
+{
+   _EGLDisplay *disp = (_EGLDisplay *) user_data;
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   int j = dri2_wl_visual_idx_from_fourcc(format);
+
+   if (j == -1)
+      return false;
+
+   for (int i = 0; dri2_dpy->driver_configs[i]; i++)
+      if (j == dri2_wl_visual_idx_from_config(dri2_dpy,
+                                              dri2_dpy->driver_configs[i]))
+         return true;
+
+   return false;
+}
+
 static int
 roundtrip(struct dri2_egl_display *dri2_dpy)
 {
@@ -461,15 +487,29 @@
    int use_flags;
    int visual_idx;
    unsigned int dri_image_format;
+   unsigned int linear_dri_image_format;
    uint64_t *modifiers;
    int num_modifiers;
 
    visual_idx = dri2_wl_visual_idx_from_fourcc(dri2_surf->format);
    assert(visual_idx != -1);
    dri_image_format = dri2_wl_visuals[visual_idx].dri_image_format;
+   linear_dri_image_format = dri_image_format;
    modifiers = u_vector_tail(&dri2_dpy->wl_modifiers[visual_idx]);
    num_modifiers = u_vector_length(&dri2_dpy->wl_modifiers[visual_idx]);
 
+   /* Substitute dri image format if server does not support original format */
+   if (!(dri2_dpy->formats & (1 << visual_idx)))
+      linear_dri_image_format = dri2_wl_visuals[visual_idx].alt_dri_image_format;
+
+   /* These asserts hold, as long as dri2_wl_visuals[] is self-consistent and
+    * the PRIME substitution logic in dri2_wl_add_configs_for_visuals() is free
+    * of bugs.
+    */
+   assert(linear_dri_image_format != __DRI_IMAGE_FORMAT_NONE);
+   assert(dri2_dpy->formats &
+          (1 << dri2_wl_visual_idx_from_dri_image_format(linear_dri_image_format)));
+
    /* There might be a buffer release already queued that wasn't processed */
    wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);
 
@@ -516,7 +556,7 @@
             dri2_dpy->image->createImageWithModifiers(dri2_dpy->dri_screen,
                                                       dri2_surf->base.Width,
                                                       dri2_surf->base.Height,
-                                                      dri_image_format,
+                                                      linear_dri_image_format,
                                                       &linear_mod,
                                                       1,
                                                       NULL);
@@ -525,7 +565,7 @@
             dri2_dpy->image->createImage(dri2_dpy->dri_screen,
                                          dri2_surf->base.Width,
                                          dri2_surf->base.Height,
-                                         dri_image_format,
+                                         linear_dri_image_format,
                                          use_flags |
                                          __DRI_IMAGE_USE_LINEAR,
                                          NULL);
@@ -597,10 +637,8 @@
    struct dri2_egl_display *dri2_dpy =
       dri2_egl_display(dri2_surf->base.Resource.Display);
 
-   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
-       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
-
-      dri2_wl_release_buffers(dri2_surf);
+   if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->height) {
 
       dri2_surf->base.Width  = dri2_surf->wl_win->width;
       dri2_surf->base.Height = dri2_surf->wl_win->height;
@@ -608,6 +646,11 @@
       dri2_surf->dy = dri2_surf->wl_win->dy;
    }
 
+   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
+      dri2_wl_release_buffers(dri2_surf);
+   }
+
    if (get_back_bo(dri2_surf) < 0) {
       _eglError(EGL_BAD_ALLOC, "failed to allocate color buffer");
       return -1;
@@ -1298,8 +1341,11 @@
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    unsigned int format_count[ARRAY_SIZE(dri2_wl_visuals)] = { 0 };
    unsigned int count = 0;
+   bool assigned;
 
    for (unsigned i = 0; dri2_dpy->driver_configs[i]; i++) {
+      assigned = false;
+
       for (unsigned j = 0; j < ARRAY_SIZE(dri2_wl_visuals); j++) {
          struct dri2_egl_config *dri2_conf;
 
@@ -1312,6 +1358,43 @@
             if (dri2_conf->base.ConfigID == count + 1)
                count++;
             format_count[j]++;
+            assigned = true;
+         }
+      }
+
+      if (!assigned && dri2_dpy->is_different_gpu) {
+         struct dri2_egl_config *dri2_conf;
+         int alt_dri_image_format, c, s;
+
+         /* No match for config. Try if we can blitImage convert to a visual */
+         c = dri2_wl_visual_idx_from_config(dri2_dpy,
+                                            dri2_dpy->driver_configs[i]);
+
+         if (c == -1)
+            continue;
+
+         /* Find optimal target visual for blitImage conversion, if any. */
+         alt_dri_image_format = dri2_wl_visuals[c].alt_dri_image_format;
+         s = dri2_wl_visual_idx_from_dri_image_format(alt_dri_image_format);
+
+         if (s == -1 || !(dri2_dpy->formats & (1 << s)))
+            continue;
+
+         /* Visual s works for the Wayland server, and c can be converted into s
+          * by our client gpu during PRIME blitImage conversion to a linear
+          * wl_buffer, so add visual c as supported by the client renderer.
+          */
+         dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
+                                     count + 1, EGL_WINDOW_BIT, NULL,
+                                     dri2_wl_visuals[c].rgba_masks);
+         if (dri2_conf) {
+            if (dri2_conf->base.ConfigID == count + 1)
+               count++;
+            format_count[c]++;
+            if (format_count[c] == 1)
+               _eglLog(_EGL_DEBUG, "Client format %s to server format %s via "
+                       "PRIME blitImage.", dri2_wl_visuals[c].format_name,
+                       dri2_wl_visuals[s].format_name);
          }
       }
    }
@@ -1332,8 +1415,6 @@
    _EGLDevice *dev;
    struct dri2_egl_display *dri2_dpy;
 
-   loader_set_logger(_eglLog);
-
    dri2_dpy = calloc(1, sizeof *dri2_dpy);
    if (!dri2_dpy)
       return _eglError(EGL_BAD_ALLOC, "eglInitialize");
@@ -1995,8 +2076,6 @@
    _EGLDevice *dev;
    struct dri2_egl_display *dri2_dpy;
 
-   loader_set_logger(_eglLog);
-
    dri2_dpy = calloc(1, sizeof *dri2_dpy);
    if (!dri2_dpy)
       return _eglError(EGL_BAD_ALLOC, "eglInitialize");
diff -Nru mesa-18.3.3/src/egl/generate/eglFunctionList.py mesa-19.0.1/src/egl/generate/eglFunctionList.py
--- mesa-18.3.3/src/egl/generate/eglFunctionList.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/generate/eglFunctionList.py	2019-03-31 23:16:37.000000000 +0000
@@ -209,5 +209,9 @@
     _eglFunc("eglQueryDevicesEXT",                   "none"),
     _eglFunc("eglQueryDisplayAttribEXT",             "display"),
 
+    # EGL_MESA_query_driver
+    _eglFunc("eglGetDisplayDriverName",              "display"),
+    _eglFunc("eglGetDisplayDriverConfig",            "display"),
+
 )
 
diff -Nru mesa-18.3.3/src/egl/generate/egl.xml mesa-19.0.1/src/egl/generate/egl.xml
--- mesa-18.3.3/src/egl/generate/egl.xml	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/egl/generate/egl.xml	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,7 @@
         <type name="khrplatform">#include &lt;KHR/khrplatform.h&gt;</type>
         <type name="eglplatform" requires="khrplatform">#include &lt;EGL/eglplatform.h&gt;</type>
         <type name="khronos_utime_nanoseconds_t" requires="khrplatform"/>
+        <type name="khronos_stime_nanoseconds_t" requires="khrplatform"/>
         <type name="khronos_uint64_t" requires="khrplatform"/>
         <type name="khronos_ssize_t" requires="khrplatform"/>
         <type name="EGLNativeDisplayType" requires="eglplatform"/>
@@ -47,6 +48,7 @@
         <type name="NativeDisplayType" requires="eglplatform"/>
         <type name="NativePixmapType" requires="eglplatform"/>
         <type name="NativeWindowType" requires="eglplatform"/>
+        <type>struct <name>AHardwareBuffer</name>;</type>
         <!-- Dummy placeholders for non-EGL types -->
         <type name="Bool"/>
             <!-- These are actual EGL types.  -->
@@ -147,6 +149,7 @@
     <enums namespace="EGLDRMBufferUseMESAMask" type="bitmask" comment="EGL_DRM_BUFFER_USE_MESA bits">
         <enum value="0x00000001" name="EGL_DRM_BUFFER_USE_SCANOUT_MESA"/>
         <enum value="0x00000002" name="EGL_DRM_BUFFER_USE_SHARE_MESA"/>
+        <enum value="0x00000004" name="EGL_DRM_BUFFER_USE_CURSOR_MESA"/>
     </enums>
 
     <!-- Should be shared with GL, but aren't aren't since the
@@ -496,8 +499,9 @@
         <enum value="0x3146" name="EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID"/>
         <enum value="0x3147" name="EGL_FRAMEBUFFER_TARGET_ANDROID"/>
             <unused start="0x3148" end="0x314B"/>
-        <enum value="0x314C"     name="EGL_FRONT_BUFFER_AUTO_REFRESH_ANDROID"/>
-            <unused start="0x314D" end="0x314F"/>
+        <enum value="0x314C" name="EGL_FRONT_BUFFER_AUTO_REFRESH_ANDROID"/>
+        <enum value="0x314D" name="EGL_GL_COLORSPACE_DEFAULT_EXT"/>
+            <unused start="0x314E" end="0x314F"/>
     </enums>
 
     <enums namespace="EGL" start="0x3150" end="0x315F" vendor="NOK" comment="Reserved for Robert Palmer (Khronos bug 5368)">
@@ -768,7 +772,8 @@
         <enum value="0x3339" name="EGL_COLOR_COMPONENT_TYPE_EXT"/>
         <enum value="0x333A" name="EGL_COLOR_COMPONENT_TYPE_FIXED_EXT"/>
         <enum value="0x333B" name="EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT"/>
-            <unused start="0x333C" end="0x333E"/>
+        <enum value="0x333C" name="EGL_DRM_MASTER_FD_EXT"/>
+            <unused start="0x333D" end="0x333E"/>
         <enum value="0x333F" name="EGL_GL_COLORSPACE_BT2020_LINEAR_EXT"/>
         <enum value="0x3340" name="EGL_GL_COLORSPACE_BT2020_PQ_EXT"/>
         <enum value="0x3341" name="EGL_SMPTE2086_DISPLAY_PRIMARY_RX_EXT"/>
@@ -790,12 +795,16 @@
         <enum value="0x3350" name="EGL_GL_COLORSPACE_SCRGB_LINEAR_EXT"/>
         <enum value="0x3351" name="EGL_GL_COLORSPACE_SCRGB_EXT"/>
         <enum value="0x3352" name="EGL_TRACK_REFERENCES_KHR"/>
-            <unused start="0x3353" end="0x335F"/>
+            <unused start="0x3353" end="0x3356"/>
+        <enum value="0x3357" name="EGL_CONTEXT_PRIORITY_REALTIME_NV"/>
+            <unused start="0x3358" end="0x335F"/>
         <enum value="0x3360" name="EGL_CTA861_3_MAX_CONTENT_LIGHT_LEVEL_EXT"/>
         <enum value="0x3361" name="EGL_CTA861_3_MAX_FRAME_AVERAGE_LEVEL_EXT"/>
         <enum value="0x3362" name="EGL_GL_COLORSPACE_DISPLAY_P3_LINEAR_EXT"/>
         <enum value="0x3363" name="EGL_GL_COLORSPACE_DISPLAY_P3_EXT"/>
-            <unused start="0x3364" end="0x339F"/>
+        <enum value="0x3364" name="EGL_SYNC_CLIENT_EXT"/>
+        <enum value="0x3365" name="EGL_SYNC_CLIENT_SIGNAL_EXT"/>
+            <unused start="0x3366" end="0x339F"/>
     </enums>
 
     <enums namespace="EGL" start="0x33A0" end="0x33AF" vendor="ANGLE" comment="Reserved for Shannon Woods (Bug 13175)">
@@ -830,7 +839,22 @@
     </enums>
 
     <enums namespace="EGL" start="0x3430" end="0x343F" vendor="ANDROID" comment="Reserved for Pablo Ceballos (Bug 15874)">
-            <unused start="0x3430" end="0x343F"/>
+        <enum value="EGL_CAST(EGLnsecsANDROID,-2)" name="EGL_TIMESTAMP_PENDING_ANDROID"/>
+        <enum value="EGL_CAST(EGLnsecsANDROID,-1)" name="EGL_TIMESTAMP_INVALID_ANDROID"/>
+        <enum value="0x3430" name="EGL_TIMESTAMPS_ANDROID"/>
+        <enum value="0x3431" name="EGL_COMPOSITE_DEADLINE_ANDROID"/>
+        <enum value="0x3432" name="EGL_COMPOSITE_INTERVAL_ANDROID"/>
+        <enum value="0x3433" name="EGL_COMPOSITE_TO_PRESENT_LATENCY_ANDROID"/>
+        <enum value="0x3434" name="EGL_REQUESTED_PRESENT_TIME_ANDROID"/>
+        <enum value="0x3435" name="EGL_RENDERING_COMPLETE_TIME_ANDROID"/>
+        <enum value="0x3436" name="EGL_COMPOSITION_LATCH_TIME_ANDROID"/>
+        <enum value="0x3437" name="EGL_FIRST_COMPOSITION_START_TIME_ANDROID"/>
+        <enum value="0x3438" name="EGL_LAST_COMPOSITION_START_TIME_ANDROID"/>
+        <enum value="0x3439" name="EGL_FIRST_COMPOSITION_GPU_FINISHED_TIME_ANDROID"/>
+        <enum value="0x343A" name="EGL_DISPLAY_PRESENT_TIME_ANDROID"/>
+        <enum value="0x343B" name="EGL_DEQUEUE_READY_TIME_ANDROID"/>
+        <enum value="0x343C" name="EGL_READS_DONE_TIME_ANDROID"/>
+            <unused start="0x343D" end="0x343F"/>
     </enums>
 
     <enums namespace="EGL" start="0x3440" end="0x344F" vendor="ANDROID" comment="Reserved for Kristian Kristensen (Bug 16033)">
@@ -862,9 +886,16 @@
     </enums>
 
     <enums namespace="EGL" start="0x3470" end="0x347F" vendor="EXT" comment="Reserved for Daniel Stone (PR 14)">
-	<enum value="0x3470" name="EGL_IMPORT_SYNC_TYPE_EXT"/>
-	<enum value="0x3471" name="EGL_IMPORT_IMPLICIT_SYNC_EXT"/>
-	<enum value="0x3472" name="EGL_IMPORT_EXPLICIT_SYNC_EXT"/>
+        <enum value="0x3470" name="EGL_IMPORT_SYNC_TYPE_EXT"/>
+        <enum value="0x3471" name="EGL_IMPORT_IMPLICIT_SYNC_EXT"/>
+        <enum value="0x3472" name="EGL_IMPORT_EXPLICIT_SYNC_EXT"/>
+    </enums>
+    <enums namespace="EGL" start="0x3480" end="0x348F" vendor="ANGLE" comment="Reserved for Courtney Goeltzenleuchter - ANGLE (gitlab EGL bug 7)">
+            <unused start="0x3480" end="0x348F"/>
+    </enums>
+    <enums namespace="EGL" start="0x3490" end="0x349F" vendor="EXT" comment="Reserved for Courtney Goeltzenleuchter - Android (gitlab EGL bug 69)">
+        <enum value="0x3490" name="EGL_GL_COLORSPACE_DISPLAY_P3_PASSTHROUGH_EXT"/>
+            <unused start="0x3491" end="0x349F"/>
     </enums>
 
 <!-- Please remember that new enumerant allocations must be obtained by
@@ -876,8 +907,8 @@
 
 <!-- Reservable for future use. To generate a new range, allocate multiples
      of 16 starting at the lowest available point in this block. -->
-    <enums namespace="EGL" start="0x3480" end="0x3FFF" vendor="KHR" comment="Reserved for future use">
-            <unused start="0x3480" end="0x3FFF"/>
+    <enums namespace="EGL" start="0x34A0" end="0x3FFF" vendor="KHR" comment="Reserved for future use">
+            <unused start="0x34A0" end="0x3FFF"/>
     </enums>
 
     <enums namespace="EGL" start="0x8F70" end="0x8F7F" vendor="HI" comment="For Mark Callow, Khronos bug 4055. Shared with GL.">
@@ -909,6 +940,12 @@
             <param><ptype>EGLint</ptype> *<name>num_config</name></param>
         </command>
         <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglClientSignalSyncEXT</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSync</ptype> <name>sync</name></param>
+            <param>const <ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLint</ptype> <name>eglClientWaitSync</name></proto>
             <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
             <param><ptype>EGLSync</ptype> <name>sync</name></param>
@@ -1191,9 +1228,21 @@
             <param><ptype>EGLNativeDisplayType</ptype> <name>display_id</name></param>
         </command>
         <command>
+            <proto>char *<name>eglGetDisplayDriverConfig</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+        </command>
+        <command>
+            <proto>const char *<name>eglGetDisplayDriverName</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLint</ptype> <name>eglGetError</name></proto>
         </command>
         <command>
+            <proto><ptype>EGLClientBuffer</ptype> <name>eglGetNativeClientBufferANDROID</name></proto>
+            <param>const struct <ptype>AHardwareBuffer</ptype> *<name>buffer</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLBoolean</ptype> <name>eglGetOutputLayersEXT</name></proto>
             <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
             <param>const <ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
@@ -1312,6 +1361,41 @@
             <param><ptype>EGLnsecsANDROID</ptype> <name>time</name></param>
         </command>
         <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglGetCompositorTimingSupportedANDROID</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSurface</ptype> <name>surface</name></param>
+            <param><ptype>EGLint</ptype> <name>name</name></param>
+        </command>
+        <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglGetCompositorTimingANDROID</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSurface</ptype> <name>surface</name></param>
+            <param><ptype>EGLint</ptype> <name>numTimestamps</name></param>
+            <param> const <ptype>EGLint</ptype> *<name>names</name></param>
+            <param><ptype>EGLnsecsANDROID</ptype> *<name>values</name></param>
+        </command>
+        <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglGetNextFrameIdANDROID</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSurface</ptype> <name>surface</name></param>
+            <param><ptype>EGLuint64KHR</ptype> *<name>frameId</name></param>
+        </command>
+        <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglGetFrameTimestampSupportedANDROID</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSurface</ptype> <name>surface</name></param>
+            <param><ptype>EGLint</ptype> <name>timestamp</name></param>
+        </command>
+        <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglGetFrameTimestampsANDROID</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSurface</ptype> <name>surface</name></param>
+            <param><ptype>EGLuint64KHR</ptype> <name>frameId</name></param>
+            <param><ptype>EGLint</ptype> <name>numTimestamps</name></param>
+            <param> const <ptype>EGLint</ptype> *<name>timestamps</name></param>
+            <param><ptype>EGLnsecsANDROID</ptype> *<name>values</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLenum</ptype> <name>eglQueryAPI</name></proto>
         </command>
         <command>
@@ -1567,7 +1651,7 @@
             <proto><ptype>EGLBoolean</ptype> <name>eglStreamConsumerGLTextureExternalAttribsNV</name></proto>
             <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
             <param><ptype>EGLStreamKHR</ptype> <name>stream</name></param>
-            <param><ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
+            <param>const <ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
         </command>
         <command>
             <proto><ptype>EGLBoolean</ptype> <name>eglStreamConsumerOutputEXT</name></proto>
@@ -1587,6 +1671,11 @@
             <param>const <ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
         </command>
         <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglStreamFlushNV</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLStreamKHR</ptype> <name>stream</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLBoolean</ptype> <name>eglSurfaceAttrib</name></proto>
             <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
             <param><ptype>EGLSurface</ptype> <name>surface</name></param>
@@ -1641,6 +1730,12 @@
             <param><ptype>EGLSurface</ptype> <name>surface</name></param>
         </command>
         <command>
+            <proto><ptype>EGLBoolean</ptype> <name>eglUnsignalSyncEXT</name></proto>
+            <param><ptype>EGLDisplay</ptype> <name>dpy</name></param>
+            <param><ptype>EGLSync</ptype> <name>sync</name></param>
+            <param>const <ptype>EGLAttrib</ptype> *<name>attrib_list</name></param>
+        </command>
+        <command>
             <proto><ptype>EGLBoolean</ptype> <name>eglWaitClient</name></proto>
         </command>
         <command>
@@ -1986,6 +2081,11 @@
                 <enum name="EGL_FRAMEBUFFER_TARGET_ANDROID"/>
             </require>
         </extension>
+        <extension name="EGL_ANDROID_get_native_client_buffer" supported="egl">
+            <require>
+                <command name="eglGetNativeClientBufferANDROID"/>
+            </require>
+        </extension>
         <extension name="EGL_ANDROID_front_buffer_auto_refresh" supported="egl">
             <require>
                 <enum name="EGL_FRONT_BUFFER_AUTO_REFRESH_ANDROID"/>
@@ -2010,6 +2110,30 @@
                 <command name="eglPresentationTimeANDROID"/>
             </require>
         </extension>
+        <extension name="EGL_ANDROID_get_frame_timestamps" supported="egl">
+            <require>
+                <enum name="EGL_TIMESTAMP_PENDING_ANDROID"/>
+                <enum name="EGL_TIMESTAMP_INVALID_ANDROID"/>
+                <enum name="EGL_TIMESTAMPS_ANDROID"/>
+                <enum name="EGL_COMPOSITE_DEADLINE_ANDROID"/>
+                <enum name="EGL_COMPOSITE_INTERVAL_ANDROID"/>
+                <enum name="EGL_COMPOSITE_TO_PRESENT_LATENCY_ANDROID"/>
+                <enum name="EGL_REQUESTED_PRESENT_TIME_ANDROID"/>
+                <enum name="EGL_RENDERING_COMPLETE_TIME_ANDROID"/>
+                <enum name="EGL_COMPOSITION_LATCH_TIME_ANDROID"/>
+                <enum name="EGL_FIRST_COMPOSITION_START_TIME_ANDROID"/>
+                <enum name="EGL_LAST_COMPOSITION_START_TIME_ANDROID"/>
+                <enum name="EGL_FIRST_COMPOSITION_GPU_FINISHED_TIME_ANDROID"/>
+                <enum name="EGL_DISPLAY_PRESENT_TIME_ANDROID"/>
+                <enum name="EGL_DEQUEUE_READY_TIME_ANDROID"/>
+                <enum name="EGL_READS_DONE_TIME_ANDROID"/>
+                <command name="eglGetCompositorTimingSupportedANDROID"/>
+                <command name="eglGetCompositorTimingANDROID"/>
+                <command name="eglGetNextFrameIdANDROID"/>
+                <command name="eglGetFrameTimestampSupportedANDROID"/>
+                <command name="eglGetFrameTimestampsANDROID"/>
+            </require>
+        </extension>
         <extension name="EGL_ANDROID_recordable" supported="egl">
             <require>
                 <enum name="EGL_RECORDABLE_ANDROID"/>
@@ -2057,6 +2181,13 @@
             </require>
         </extension>
         <extension name="EGL_EXT_client_extensions" supported="egl"/>
+        <extension name="EGL_EXT_client_sync" supported="egl">
+            <require>
+                <enum name="EGL_SYNC_CLIENT_EXT"/>
+                <enum name="EGL_SYNC_CLIENT_SIGNAL_EXT"/>
+                <command name="eglClientSignalSyncEXT"/>
+            </require>
+        </extension>
         <extension name="EGL_EXT_create_context_robustness" supported="egl">
             <require>
                 <enum name="EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT"/>
@@ -2079,6 +2210,7 @@
         <extension name="EGL_EXT_device_drm" supported="egl">
             <require>
                 <enum name="EGL_DRM_DEVICE_FILE_EXT"/>
+                <enum name="EGL_DRM_MASTER_FD_EXT"/>
             </require>
         </extension>
         <extension name="EGL_EXT_device_enumeration" supported="egl">
@@ -2131,6 +2263,11 @@
                 <enum name="EGL_GL_COLORSPACE_DISPLAY_P3_EXT"/>
             </require>
         </extension>
+        <extension name="EGL_EXT_gl_colorspace_display_p3_passthrough" supported="egl">
+            <require>
+                <enum name="EGL_GL_COLORSPACE_DISPLAY_P3_PASSTHROUGH_EXT"/>
+            </require>
+        </extension>
         <extension name="EGL_EXT_image_dma_buf_import" supported="egl">
             <require>
                 <enum name="EGL_LINUX_DMA_BUF_EXT"/>
@@ -2174,6 +2311,12 @@
                 <command name="eglQueryDmaBufModifiersEXT"/>
             </require>
         </extension>
+        <extension name="EGL_EXT_image_gl_colorspace" supported="egl">
+            <require>
+                <enum name="EGL_GL_COLORSPACE"/>
+                <enum name="EGL_GL_COLORSPACE_DEFAULT_EXT"/>
+            </require>
+        </extension>
         <extension name="EGL_EXT_multiview_window" supported="egl">
             <require>
                 <enum name="EGL_MULTIVIEW_VIEW_COUNT_EXT"/>
@@ -2276,6 +2419,11 @@
                 <command name="eglSwapBuffersWithDamageEXT"/>
             </require>
         </extension>
+        <extension name="EGL_EXT_sync_reuse" supported="egl">
+            <require>
+                <command name="eglUnsignalSyncEXT"/>
+            </require>
+        </extension>
         <extension name="EGL_EXT_yuv_surface" supported="egl">
             <require>
                 <enum name="EGL_YUV_ORDER_EXT"/>
@@ -2690,6 +2838,7 @@
                 <enum name="EGL_DRM_BUFFER_STRIDE_MESA"/>
                 <enum name="EGL_DRM_BUFFER_USE_SCANOUT_MESA"/>
                 <enum name="EGL_DRM_BUFFER_USE_SHARE_MESA"/>
+                <enum name="EGL_DRM_BUFFER_USE_CURSOR_MESA"/>
                 <command name="eglCreateDRMImageMESA"/>
                 <command name="eglExportDRMImageMESA"/>
             </require>
@@ -2711,6 +2860,12 @@
                 <enum name="EGL_PLATFORM_SURFACELESS_MESA"/>
             </require>
         </extension>
+        <extension name="EGL_MESA_query_driver" supported="egl">
+            <require>
+                <command name="eglGetDisplayDriverConfig"/>
+                <command name="eglGetDisplayDriverName"/>
+            </require>
+        </extension>
         <extension name="EGL_NOK_swap_region" supported="egl">
             <require>
                 <command name="eglSwapBuffersRegionNOK"/>
@@ -2737,6 +2892,11 @@
                 <enum name="EGL_COVERAGE_SAMPLES_NV"/>
             </require>
         </extension>
+        <extension name="EGL_NV_context_priority_realtime" supported="egl">
+            <require>
+                <enum name="EGL_CONTEXT_PRIORITY_REALTIME_NV"/>
+            </require>
+        </extension>
         <extension name="EGL_NV_coverage_sample_resolve" supported="egl">
             <require>
                 <enum name="EGL_COVERAGE_SAMPLE_RESOLVE_NV"/>
@@ -2831,6 +2991,11 @@
                 <enum name="EGL_STREAM_FIFO_SYNCHRONOUS_NV"/>
             </require>
         </extension>
+        <extension name="EGL_NV_stream_flush" supported="egl">
+            <require>
+                <command name="eglStreamFlushNV"/>
+            </require>
+        </extension>
         <extension name="EGL_NV_stream_frame_limits" supported="egl">
             <require>
                 <enum name="EGL_PRODUCER_MAX_FRAME_HINT_NV"/>
@@ -2965,12 +3130,12 @@
             </require>
         </extension>
         <extension name="EGL_EXT_image_implicit_sync_control" supported="egl">
-	    <require>
-	        <enum name="EGL_IMPORT_SYNC_TYPE_EXT"/>
-		<enum name="EGL_IMPORT_IMPLICIT_SYNC_EXT"/>
-		<enum name="EGL_IMPORT_EXPLICIT_SYNC_EXT"/>
-	    </require>
-	</extension>
+            <require>
+                <enum name="EGL_IMPORT_SYNC_TYPE_EXT"/>
+                <enum name="EGL_IMPORT_IMPLICIT_SYNC_EXT"/>
+                <enum name="EGL_IMPORT_EXPLICIT_SYNC_EXT"/>
+            </require>
+        </extension>
         <extension name="EGL_EXT_bind_to_front" supported="egl">
             <require>
                 <enum name="EGL_FRONT_BUFFER_EXT"/>
diff -Nru mesa-18.3.3/src/egl/generate/genCommon.py mesa-19.0.1/src/egl/generate/genCommon.py
--- mesa-18.3.3/src/egl/generate/genCommon.py	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/egl/generate/genCommon.py	1970-01-01 00:00:00.000000000 +0000
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-
-# (C) Copyright 2015, NVIDIA CORPORATION.
-# All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# on the rights to use, copy, modify, merge, publish, distribute, sub
-# license, and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-#
-# Authors:
-#    Kyle Brenneman <kbrenneman@nvidia.com>
-
-import collections
-import re
-import sys
-import xml.etree.cElementTree as etree
-
-MAPI_TABLE_NUM_DYNAMIC = 4096
-
-_LIBRARY_FEATURE_NAMES = {
-    # libGL and libGLdiapatch both include every function.
-    "gl" : None,
-    "gldispatch" : None,
-    "opengl" : frozenset(( "GL_VERSION_1_0", "GL_VERSION_1_1",
-        "GL_VERSION_1_2", "GL_VERSION_1_3", "GL_VERSION_1_4", "GL_VERSION_1_5",
-        "GL_VERSION_2_0", "GL_VERSION_2_1", "GL_VERSION_3_0", "GL_VERSION_3_1",
-        "GL_VERSION_3_2", "GL_VERSION_3_3", "GL_VERSION_4_0", "GL_VERSION_4_1",
-        "GL_VERSION_4_2", "GL_VERSION_4_3", "GL_VERSION_4_4", "GL_VERSION_4_5",
-    )),
-    "glesv1" : frozenset(("GL_VERSION_ES_CM_1_0", "GL_OES_point_size_array")),
-    "glesv2" : frozenset(("GL_ES_VERSION_2_0", "GL_ES_VERSION_3_0",
-            "GL_ES_VERSION_3_1" "GL_ES_VERSION_3_2",
-    )),
-}
-
-def getFunctions(xmlFiles):
-    """
-    Reads an XML file and returns all of the functions defined in it.
-
-    xmlFile should be the path to Khronos's gl.xml file. The return value is a
-    sequence of FunctionDesc objects, ordered by slot number.
-    """
-    roots = [ etree.parse(xmlFile).getroot() for xmlFile in xmlFiles ]
-    return getFunctionsFromRoots(roots)
-
-def getFunctionsFromRoots(roots):
-    functions = {}
-    for root in roots:
-        for func in _getFunctionList(root):
-            functions[func.name] = func
-    functions = functions.values()
-
-    # Sort the function list by name.
-    functions = sorted(functions, key=lambda f: f.name)
-
-    # Assign a slot number to each function. This isn't strictly necessary,
-    # since you can just look at the index in the list, but it makes it easier
-    # to include the slot when formatting output.
-    for i in range(len(functions)):
-        functions[i] = functions[i]._replace(slot=i)
-
-    return functions
-
-def getExportNamesFromRoots(target, roots):
-    """
-    Goes through the <feature> tags from gl.xml and returns a set of OpenGL
-    functions that a library should export.
-
-    target should be one of "gl", "gldispatch", "opengl", "glesv1", or
-    "glesv2".
-    """
-    featureNames = _LIBRARY_FEATURE_NAMES[target]
-    if featureNames is None:
-        return set(func.name for func in getFunctionsFromRoots(roots))
-
-    names = set()
-    for root in roots:
-        features = []
-        for featElem in root.findall("feature"):
-            if featElem.get("name") in featureNames:
-                features.append(featElem)
-        for featElem in root.findall("extensions/extension"):
-            if featElem.get("name") in featureNames:
-                features.append(featElem)
-        for featElem in features:
-            for commandElem in featElem.findall("require/command"):
-                names.add(commandElem.get("name"))
-    return names
-
-class FunctionArg(collections.namedtuple("FunctionArg", "type name")):
-    @property
-    def dec(self):
-        """
-        Returns a "TYPE NAME" string, suitable for a function prototype.
-        """
-        rv = str(self.type)
-        if not rv.endswith("*"):
-            rv += " "
-        rv += self.name
-        return rv
-
-class FunctionDesc(collections.namedtuple("FunctionDesc", "name rt args slot")):
-    def hasReturn(self):
-        """
-        Returns true if the function returns a value.
-        """
-        return (self.rt != "void")
-
-    @property
-    def decArgs(self):
-        """
-        Returns a string with the types and names of the arguments, as you
-        would use in a function declaration.
-        """
-        if not self.args:
-            return "void"
-        else:
-            return ", ".join(arg.dec for arg in self.args)
-
-    @property
-    def callArgs(self):
-        """
-        Returns a string with the names of the arguments, as you would use in a
-        function call.
-        """
-        return ", ".join(arg.name for arg in self.args)
-
-    @property
-    def basename(self):
-        assert self.name.startswith("gl")
-        return self.name[2:]
-
-def _getFunctionList(root):
-    for elem in root.findall("commands/command"):
-        yield _parseCommandElem(elem)
-
-def _parseCommandElem(elem):
-    protoElem = elem.find("proto")
-    (rt, name) = _parseProtoElem(protoElem)
-
-    args = []
-    for ch in elem.findall("param"):
-        # <param> tags have the same format as a <proto> tag.
-        args.append(FunctionArg(*_parseProtoElem(ch)))
-    func = FunctionDesc(name, rt, tuple(args), slot=None)
-
-    return func
-
-def _parseProtoElem(elem):
-    # If I just remove the tags and string the text together, I'll get valid C code.
-    text = _flattenText(elem)
-    text = text.strip()
-    m = re.match(r"^(.+)\b(\w+)(?:\s*\[\s*(\d*)\s*\])?$", text, re.S)
-    if m:
-        typename = _fixupTypeName(m.group(1))
-        name = m.group(2)
-        if m.group(3):
-            # HACK: glPathGlyphIndexRangeNV defines an argument like this:
-            # GLuint baseAndCount[2]
-            # Convert it to a pointer and hope for the best.
-            typename += "*"
-        return (typename, name)
-    else:
-        raise ValueError("Can't parse element %r -> %r" % (elem, text))
-
-def _flattenText(elem):
-    """
-    Returns the text in an element and all child elements, with the tags
-    removed.
-    """
-    text = ""
-    if elem.text is not None:
-        text = elem.text
-    for ch in elem:
-        text += _flattenText(ch)
-        if ch.tail is not None:
-            text += ch.tail
-    return text
-
-def _fixupTypeName(typeName):
-    """
-    Converts a typename into a more consistent format.
-    """
-
-    rv = typeName.strip()
-
-    # Replace "GLvoid" with just plain "void".
-    rv = re.sub(r"\bGLvoid\b", "void", rv)
-
-    # Remove the vendor suffixes from types that have a suffix-less version.
-    rv = re.sub(r"\b(GLhalf|GLintptr|GLsizeiptr|GLint64|GLuint64)(?:ARB|EXT|NV|ATI)\b", r"\1", rv)
-
-    rv = re.sub(r"\bGLvoid\b", "void", rv)
-
-    # Clear out any leading and trailing whitespace.
-    rv = rv.strip()
-
-    # Remove any whitespace before a '*'
-    rv = re.sub(r"\s+\*", r"*", rv)
-
-    # Change "foo*" to "foo *"
-    rv = re.sub(r"([^\*])\*", r"\1 *", rv)
-
-    # Condense all whitespace into a single space.
-    rv = re.sub(r"\s+", " ", rv)
-
-    return rv
-
diff -Nru mesa-18.3.3/src/egl/generate/gen_egl_dispatch.py mesa-19.0.1/src/egl/generate/gen_egl_dispatch.py
--- mesa-18.3.3/src/egl/generate/gen_egl_dispatch.py	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/egl/generate/gen_egl_dispatch.py	2019-03-31 23:16:37.000000000 +0000
@@ -34,25 +34,23 @@
 
 import argparse
 import collections
-import imp
+import eglFunctionList
 import sys
 import textwrap
 
+import os
+NEWAPI = os.path.join(os.path.dirname(__file__), "..", "..", "mapi", "new")
+sys.path.insert(0, NEWAPI)
 import genCommon
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("target", choices=("header", "source"),
             help="Whether to build the source or header file.")
-    parser.add_argument("func_list_file", help="The function list .py file.")
     parser.add_argument("xml_files", nargs="+", help="The XML files with the EGL function lists.")
 
     args = parser.parse_args()
 
-    # The function list is a Python module, but it's specified on the command
-    # line.
-    eglFunctionList = imp.load_source("eglFunctionList", args.func_list_file)
-
     xmlFunctions = genCommon.getFunctions(args.xml_files)
     xmlByName = dict((f.name, f) for f in xmlFunctions)
     functions = []
diff -Nru mesa-18.3.3/src/egl/main/eglapi.c mesa-19.0.1/src/egl/main/eglapi.c
--- mesa-18.3.3/src/egl/main/eglapi.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglapi.c	2019-03-31 23:16:37.000000000 +0000
@@ -90,6 +90,8 @@
 #include "c11/threads.h"
 #include "util/macros.h"
 
+#include "eglapi.h"
+#include "egldefines.h"
 #include "eglglobals.h"
 #include "eglcontext.h"
 #include "egldisplay.h"
@@ -526,6 +528,7 @@
       _eglAppendExtension(&exts, "EGL_MESA_configless_context");
    _EGL_CHECK_EXTENSION(MESA_drm_image);
    _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export);
+   _EGL_CHECK_EXTENSION(MESA_query_driver);
 
    _EGL_CHECK_EXTENSION(NOK_swap_region);
    _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
@@ -2647,6 +2650,38 @@
    RETURN_EGL_SUCCESS(disp, EGL_TRUE);
 }
 
+static char * EGLAPIENTRY
+eglGetDisplayDriverConfig(EGLDisplay dpy)
+{
+    _EGLDisplay *disp = _eglLockDisplay(dpy);
+    _EGLDriver *drv;
+    char *ret;
+
+    _EGL_FUNC_START(disp, EGL_NONE, NULL, NULL);
+    _EGL_CHECK_DISPLAY(disp, NULL, drv);
+
+    assert(disp->Extensions.MESA_query_driver);
+
+    ret = drv->API.QueryDriverConfig(disp);
+    RETURN_EGL_EVAL(disp, ret);
+}
+
+static const char * EGLAPIENTRY
+eglGetDisplayDriverName(EGLDisplay dpy)
+{
+    _EGLDisplay *disp = _eglLockDisplay(dpy);
+    _EGLDriver *drv;
+    const char *ret;
+
+    _EGL_FUNC_START(disp, EGL_NONE, NULL, NULL);
+    _EGL_CHECK_DISPLAY(disp, NULL, drv);
+
+    assert(disp->Extensions.MESA_query_driver);
+
+    ret = drv->API.QueryDriverName(disp);
+    RETURN_EGL_EVAL(disp, ret);
+}
+
 __eglMustCastToProperFunctionPointerType EGLAPIENTRY
 eglGetProcAddress(const char *procname)
 {
diff -Nru mesa-18.3.3/src/egl/main/eglapi.h mesa-19.0.1/src/egl/main/eglapi.h
--- mesa-18.3.3/src/egl/main/eglapi.h	2018-02-08 14:40:56.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglapi.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,7 @@
 #ifndef EGLAPI_INCLUDED
 #define EGLAPI_INCLUDED
 
+#include "egltypedefs.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -54,6 +55,8 @@
    /* driver funcs */
    EGLBoolean (*Initialize)(_EGLDriver *, _EGLDisplay *dpy);
    EGLBoolean (*Terminate)(_EGLDriver *, _EGLDisplay *dpy);
+   const char *(*QueryDriverName)(_EGLDisplay *dpy);
+   char *(*QueryDriverConfig)(_EGLDisplay *dpy);
 
    /* config funcs */
    EGLBoolean (*GetConfigs)(_EGLDriver *drv, _EGLDisplay *dpy,
diff -Nru mesa-18.3.3/src/egl/main/eglcontext.c mesa-19.0.1/src/egl/main/eglcontext.c
--- mesa-18.3.3/src/egl/main/eglcontext.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglcontext.c	2019-03-31 23:16:37.000000000 +0000
@@ -37,6 +37,7 @@
 #include "eglcurrent.h"
 #include "eglsurface.h"
 #include "egllog.h"
+#include "util/macros.h"
 
 
 /**
diff -Nru mesa-18.3.3/src/egl/main/eglcurrent.c mesa-19.0.1/src/egl/main/eglcurrent.c
--- mesa-18.3.3/src/egl/main/eglcurrent.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglcurrent.c	2019-03-31 23:16:37.000000000 +0000
@@ -310,20 +310,28 @@
 
    mtx_unlock(_eglGlobal.Mutex);
 
-   if (callback != NULL) {
-      char *buf = NULL;
+   char *message_buf = NULL;
+   if (message != NULL) {
+      va_start(args, message);
+      if (vasprintf(&message_buf, message, args) < 0)
+         message_buf = NULL;
+      va_end(args);
+   }
 
-      if (message != NULL) {
-         va_start(args, message);
-         if (vasprintf(&buf, message, args) < 0)
-            buf = NULL;
+   if (callback != NULL) {
+      callback(error, funcName, type, thr->Label, thr->CurrentObjectLabel,
+               message_buf);
+   }
 
-         va_end(args);
+   if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR) {
+      char *func_message_buf = NULL;
+      /* Note: _eglError() is often called with msg == thr->currentFuncName */
+      if (message_buf && funcName && strcmp(message_buf, funcName) != 0) {
+         if (asprintf(&func_message_buf, "%s: %s", funcName, message_buf) < 0)
+            func_message_buf = NULL;
       }
-      callback(error, funcName, type, thr->Label, thr->CurrentObjectLabel, buf);
-      free(buf);
+      _eglInternalError(error, func_message_buf ? func_message_buf : funcName);
+      free(func_message_buf);
    }
-
-   if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR)
-      _eglInternalError(error, funcName);
+   free(message_buf);
 }
diff -Nru mesa-18.3.3/src/egl/main/egldefines.h mesa-19.0.1/src/egl/main/egldefines.h
--- mesa-18.3.3/src/egl/main/egldefines.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/egl/main/egldefines.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,8 +34,6 @@
 #ifndef EGLDEFINES_INCLUDED
 #define EGLDEFINES_INCLUDED
 
-#include "util/macros.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
diff -Nru mesa-18.3.3/src/egl/main/egldevice.c mesa-19.0.1/src/egl/main/egldevice.c
--- mesa-18.3.3/src/egl/main/egldevice.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/egldevice.c	2019-03-31 23:16:37.000000000 +0000
@@ -202,18 +202,6 @@
    };
 }
 
-/* Ideally we'll have an extension which passes the render node,
- * instead of the card one + magic.
- *
- * Then we can move this in _eglQueryDeviceStringEXT below. Until then
- * keep it separate.
- */
-const char *
-_eglGetDRMDeviceRenderNode(_EGLDevice *dev)
-{
-   return dev->device->nodes[DRM_NODE_RENDER];
-}
-
 EGLBoolean
 _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute,
                          EGLAttrib *value)
diff -Nru mesa-18.3.3/src/egl/main/egldevice.h mesa-19.0.1/src/egl/main/egldevice.h
--- mesa-18.3.3/src/egl/main/egldevice.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/egldevice.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,9 +31,9 @@
 
 
 #include <stdbool.h>
+#include <stddef.h>
 #include "egltypedefs.h"
 
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -68,9 +68,6 @@
 EGLBoolean
 _eglDeviceSupports(_EGLDevice *dev, _EGLDeviceExtension ext);
 
-const char *
-_eglGetDRMDeviceRenderNode(_EGLDevice *dev);
-
 EGLBoolean
 _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute,
                          EGLAttrib *value);
diff -Nru mesa-18.3.3/src/egl/main/egldisplay.h mesa-19.0.1/src/egl/main/egldisplay.h
--- mesa-18.3.3/src/egl/main/egldisplay.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/egldisplay.h	2019-03-31 23:16:37.000000000 +0000
@@ -138,6 +138,7 @@
 
    EGLBoolean MESA_drm_image;
    EGLBoolean MESA_image_dma_buf_export;
+   EGLBoolean MESA_query_driver;
 
    EGLBoolean NOK_swap_region;
    EGLBoolean NOK_texture_from_pixmap;
diff -Nru mesa-18.3.3/src/egl/main/eglentrypoint.h mesa-19.0.1/src/egl/main/eglentrypoint.h
--- mesa-18.3.3/src/egl/main/eglentrypoint.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglentrypoint.h	2019-03-31 23:16:37.000000000 +0000
@@ -42,6 +42,8 @@
 EGL_ENTRYPOINT(eglGetCurrentDisplay)
 EGL_ENTRYPOINT(eglGetCurrentSurface)
 EGL_ENTRYPOINT(eglGetDisplay)
+EGL_ENTRYPOINT(eglGetDisplayDriverConfig)
+EGL_ENTRYPOINT(eglGetDisplayDriverName)
 EGL_ENTRYPOINT(eglGetError)
 EGL_ENTRYPOINT(eglGetPlatformDisplay)
 EGL_ENTRYPOINT(eglGetPlatformDisplayEXT)
diff -Nru mesa-18.3.3/src/egl/main/eglglobals.c mesa-19.0.1/src/egl/main/eglglobals.c
--- mesa-18.3.3/src/egl/main/eglglobals.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglglobals.c	2019-03-31 23:16:37.000000000 +0000
@@ -40,6 +40,8 @@
 #include "egldriver.h"
 #include "egllog.h"
 
+#include "util/macros.h"
+
 #ifdef HAVE_MINCORE
 #include <unistd.h>
 #include <sys/mman.h>
diff -Nru mesa-18.3.3/src/egl/main/eglsurface.c mesa-19.0.1/src/egl/main/eglsurface.c
--- mesa-18.3.3/src/egl/main/eglsurface.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/main/eglsurface.c	2019-03-31 23:16:37.000000000 +0000
@@ -36,6 +36,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
+#include "egldefines.h"
 #include "egldisplay.h"
 #include "egldriver.h"
 #include "eglcontext.h"
@@ -44,6 +45,7 @@
 #include "egllog.h"
 #include "eglsurface.h"
 
+#include "util/macros.h"
 
 /**
  * Parse the list of surface attributes and return the proper error code.
diff -Nru mesa-18.3.3/src/egl/Makefile.am mesa-19.0.1/src/egl/Makefile.am
--- mesa-18.3.3/src/egl/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -119,8 +119,7 @@
 	-I$(top_srcdir)/src/egl/drivers/dri2 \
 	-I$(top_srcdir)/src/gbm/backends/dri \
 	-I$(top_builddir)/src/egl/wayland/wayland-drm \
-	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
-	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\"
+	-I$(top_srcdir)/src/egl/wayland/wayland-drm
 
 nodist_libEGL_common_la_SOURCES = \
 	$(dri2_backend_GENERATED_FILES)
@@ -137,20 +136,26 @@
 	$(LIBDRM_LIBS) \
 	$(CLOCK_LIB)
 
-GLVND_GEN_DEPS = generate/gen_egl_dispatch.py \
-	generate/egl.xml generate/eglFunctionList.py generate/genCommon.py \
+# dummy rule to keep dist happy
+$(top_scrdir)/src/mapi/new/genCommon.py:
+
+GLVND_GEN_EGL_DEPS = \
+	generate/gen_egl_dispatch.py \
+	generate/eglFunctionList.py \
+	generate/egl.xml \
 	generate/egl_other.xml
 
+GLVND_GEN_DEPS = $(top_scrdir)/src/mapi/new/genCommon.py \
+	$(GLVND_GEN_EGL_DEPS)
+
 PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS)
 g_egldispatchstubs.c: $(GLVND_GEN_DEPS)
 	$(PYTHON_GEN) $(top_srcdir)/src/egl/generate/gen_egl_dispatch.py source \
-		$(top_srcdir)/src/egl/generate/eglFunctionList.py \
 		$(top_srcdir)/src/egl/generate/egl.xml \
 		$(top_srcdir)/src/egl/generate/egl_other.xml > $@
 
 g_egldispatchstubs.h: $(GLVND_GEN_DEPS)
 	$(PYTHON_GEN) $(top_srcdir)/src/egl/generate/gen_egl_dispatch.py header \
-		$(top_srcdir)/src/egl/generate/eglFunctionList.py \
 		$(top_srcdir)/src/egl/generate/egl.xml \
 		$(top_srcdir)/src/egl/generate/egl_other.xml > $@
 
@@ -229,6 +234,6 @@
 	drivers/haiku \
 	main/egl.def \
 	main/README.txt \
-	$(GLVND_GEN_DEPS) \
+	$(GLVND_GEN_EGL_DEPS) \
 	main/50_mesa.json \
 	meson.build
diff -Nru mesa-18.3.3/src/egl/meson.build mesa-19.0.1/src/egl/meson.build
--- mesa-18.3.3/src/egl/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/egl/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,4 @@
-# Copyright © 2017 Intel Corporation
+# Copyright Â© 2017 Intel Corporation
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -62,28 +62,28 @@
 g_egldispatchstubs_c = custom_target(
   'g_egldispatchstubs.c',
   input : [
-    'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py',
+    'generate/gen_egl_dispatch.py',
     'generate/egl.xml', 'generate/egl_other.xml'
   ],
   output : 'g_egldispatchstubs.c',
   command : [
-    prog_python, '@INPUT0@', 'source', '@INPUT1@', '@INPUT2@', '@INPUT3@'
+    prog_python, '@INPUT0@', 'source', '@INPUT1@', '@INPUT2@',
   ],
-  depend_files : files('generate/genCommon.py'),
+  depend_files : [ files('generate/eglFunctionList.py'), genCommon_py, ],
   capture : true,
 )
 
 g_egldispatchstubs_h = custom_target(
   'g_egldispatchstubs.h',
   input : [
-    'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py',
+    'generate/gen_egl_dispatch.py',
     'generate/egl.xml', 'generate/egl_other.xml'
   ],
   output : 'g_egldispatchstubs.h',
   command : [
-    prog_python, '@INPUT0@', 'header', '@INPUT1@', '@INPUT2@', '@INPUT3@'
+    prog_python, '@INPUT0@', 'header', '@INPUT1@', '@INPUT2@',
   ],
-  depend_files : files('generate/genCommon.py'),
+  depend_files : [ files('generate/eglFunctionList.py'), genCommon_py, ],
   capture : true,
 )
 
@@ -93,13 +93,11 @@
     'drivers/dri2/egl_dri2.h',
     'drivers/dri2/egl_dri2_fallbacks.h',
   )
-  c_args_for_egl += [
-    '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
-  ]
+  link_for_egl += [libloader, libxmlconfig]
+  incs_for_egl += inc_loader
 
   if with_platform_x11
     files_egl += files('drivers/dri2/platform_x11.c')
-    incs_for_egl += inc_loader
     if with_dri3
       files_egl += files('drivers/dri2/platform_x11_dri3.c')
       link_for_egl += libloader_dri3_helper
@@ -108,13 +106,12 @@
   endif
   if with_platform_drm
     files_egl += files('drivers/dri2/platform_drm.c')
-    link_for_egl += [libloader, libgbm, libxmlconfig]
-    incs_for_egl += [inc_loader, inc_gbm, include_directories('../gbm/main')]
+    link_for_egl += libgbm
+    incs_for_egl += [inc_gbm, include_directories('../gbm/main')]
     deps_for_egl += dep_libdrm
   endif
   if with_platform_surfaceless
     files_egl += files('drivers/dri2/platform_surfaceless.c')
-    incs_for_egl += [inc_loader]
   endif
   if with_platform_wayland
     deps_for_egl += [dep_wayland_client, dep_wayland_server, dep_wayland_egl_headers]
@@ -130,7 +127,6 @@
   if with_platform_android
     deps_for_egl += dep_android
     files_egl += files('drivers/dri2/platform_android.c')
-    incs_for_egl += [inc_loader]
   endif
 elif with_platform_haiku
   incs_for_egl += inc_haikugl
@@ -169,7 +165,7 @@
     '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
   ],
   include_directories : incs_for_egl,
-  link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
+  link_with : [link_for_egl, libglapi, libmesa_util],
   link_args : [ld_args_bsymbolic, ld_args_gc_sections],
   dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
   install : true,
@@ -203,11 +199,13 @@
     test('egl-symbols-check',
       find_program('egl-symbols-check'),
       env : env_test,
-      args : libegl
+      args : libegl,
+      suite : ['egl'],
     )
   endif
   test('egl-entrypoint-check',
     find_program('egl-entrypoint-check'),
-    env : [ 'srcdir=' + meson.current_source_dir() ]
+    env : ['srcdir=' + meson.current_source_dir()],
+    suite : ['egl'],
   )
 endif
diff -Nru mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.c mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.c
--- mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.c	2018-01-06 23:02:18.000000000 +0000
+++ mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.c	2019-03-31 23:16:37.000000000 +0000
@@ -111,6 +111,8 @@
 		  uint32_t stride, uint32_t format)
 {
         switch (format) {
+        case WL_DRM_FORMAT_ABGR2101010:
+        case WL_DRM_FORMAT_XBGR2101010:
         case WL_DRM_FORMAT_ARGB2101010:
         case WL_DRM_FORMAT_XRGB2101010:
         case WL_DRM_FORMAT_ARGB8888:
@@ -210,10 +212,31 @@
 	wl_resource_set_implementation(resource, &drm_interface, data, NULL);
 
 	wl_resource_post_event(resource, WL_DRM_DEVICE, drm->device_name);
-	wl_resource_post_event(resource, WL_DRM_FORMAT,
-			       WL_DRM_FORMAT_ARGB2101010);
-	wl_resource_post_event(resource, WL_DRM_FORMAT,
-			       WL_DRM_FORMAT_XRGB2101010);
+
+	if (drm->callbacks.is_format_supported(drm->user_data,
+					       WL_DRM_FORMAT_ARGB2101010)) {
+		wl_resource_post_event(resource, WL_DRM_FORMAT,
+				       WL_DRM_FORMAT_ARGB2101010);
+	}
+
+	if (drm->callbacks.is_format_supported(drm->user_data,
+					       WL_DRM_FORMAT_XRGB2101010)) {
+		wl_resource_post_event(resource, WL_DRM_FORMAT,
+				       WL_DRM_FORMAT_XRGB2101010);
+	}
+
+	if (drm->callbacks.is_format_supported(drm->user_data,
+					       WL_DRM_FORMAT_ABGR2101010)) {
+		wl_resource_post_event(resource, WL_DRM_FORMAT,
+				       WL_DRM_FORMAT_ABGR2101010);
+	}
+
+	if (drm->callbacks.is_format_supported(drm->user_data,
+					       WL_DRM_FORMAT_XBGR2101010)) {
+		wl_resource_post_event(resource, WL_DRM_FORMAT,
+				       WL_DRM_FORMAT_XBGR2101010);
+	}
+
 	wl_resource_post_event(resource, WL_DRM_FORMAT,
 			       WL_DRM_FORMAT_ARGB8888);
 	wl_resource_post_event(resource, WL_DRM_FORMAT,
diff -Nru mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.h mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.h
--- mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.h	2019-03-31 23:16:37.000000000 +0000
@@ -14,6 +14,8 @@
                                  struct wl_drm_buffer *buffer);
 
 	void (*release_buffer)(void *user_data, struct wl_drm_buffer *buffer);
+
+	bool (*is_format_supported)(void *user_data, uint32_t format);
 };
 
 
diff -Nru mesa-18.3.3/src/freedreno/.dir-locals.el mesa-19.0.1/src/freedreno/.dir-locals.el
--- mesa-18.3.3/src/freedreno/.dir-locals.el	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/.dir-locals.el	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,8 @@
+((prog-mode
+  (indent-tabs-mode . t)
+  (tab-width . 4)
+  (c-basic-offset . 4)
+  (c-file-style . "k&r")
+  (fill-column . 78)
+  )
+ )
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_bo.c mesa-19.0.1/src/freedreno/drm/freedreno_bo.c
--- mesa-18.3.3/src/freedreno/drm/freedreno_bo.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_bo.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "os/os_mman.h"
+
+#include "freedreno_drmif.h"
+#include "freedreno_priv.h"
+
+pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
+void bo_del(struct fd_bo *bo);
+
+/* set buffer name, and add to table, call w/ table_lock held: */
+static void set_name(struct fd_bo *bo, uint32_t name)
+{
+	bo->name = name;
+	/* add ourself into the handle table: */
+	_mesa_hash_table_insert(bo->dev->name_table, &bo->name, bo);
+}
+
+/* lookup a buffer, call w/ table_lock held: */
+static struct fd_bo * lookup_bo(struct hash_table *tbl, uint32_t key)
+{
+	struct fd_bo *bo = NULL;
+	struct hash_entry *entry = _mesa_hash_table_search(tbl, &key);
+	if (entry) {
+		/* found, incr refcnt and return: */
+		bo = fd_bo_ref(entry->data);
+
+		/* don't break the bucket if this bo was found in one */
+		list_delinit(&bo->list);
+	}
+	return bo;
+}
+
+/* allocate a new buffer object, call w/ table_lock held */
+static struct fd_bo * bo_from_handle(struct fd_device *dev,
+		uint32_t size, uint32_t handle)
+{
+	struct fd_bo *bo;
+
+	bo = dev->funcs->bo_from_handle(dev, size, handle);
+	if (!bo) {
+		struct drm_gem_close req = {
+				.handle = handle,
+		};
+		drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
+		return NULL;
+	}
+	bo->dev = fd_device_ref(dev);
+	bo->size = size;
+	bo->handle = handle;
+	p_atomic_set(&bo->refcnt, 1);
+	list_inithead(&bo->list);
+	/* add ourself into the handle table: */
+	_mesa_hash_table_insert(dev->handle_table, &bo->handle, bo);
+	return bo;
+}
+
+static struct fd_bo *
+bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
+		struct fd_bo_cache *cache)
+{
+	struct fd_bo *bo = NULL;
+	uint32_t handle;
+	int ret;
+
+	bo = fd_bo_cache_alloc(cache, &size, flags);
+	if (bo)
+		return bo;
+
+	ret = dev->funcs->bo_new_handle(dev, size, flags, &handle);
+	if (ret)
+		return NULL;
+
+	pthread_mutex_lock(&table_lock);
+	bo = bo_from_handle(dev, size, handle);
+	pthread_mutex_unlock(&table_lock);
+
+	VG_BO_ALLOC(bo);
+
+	return bo;
+}
+
+struct fd_bo *
+_fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
+{
+	struct fd_bo *bo = bo_new(dev, size, flags, &dev->bo_cache);
+	if (bo)
+		bo->bo_reuse = BO_CACHE;
+	return bo;
+}
+
+void
+_fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
+{
+	bo->funcs->set_name(bo, fmt, ap);
+}
+
+/* internal function to allocate bo's that use the ringbuffer cache
+ * instead of the normal bo_cache.  The purpose is, because cmdstream
+ * bo's get vmap'd on the kernel side, and that is expensive, we want
+ * to re-use cmdstream bo's for cmdstream and not unrelated purposes.
+ */
+struct fd_bo *
+fd_bo_new_ring(struct fd_device *dev, uint32_t size, uint32_t flags)
+{
+	struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache);
+	if (bo)
+		bo->bo_reuse = RING_CACHE;
+	fd_bo_set_name(bo, "cmdstream");
+	return bo;
+}
+
+struct fd_bo *
+fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size)
+{
+	struct fd_bo *bo = NULL;
+
+	pthread_mutex_lock(&table_lock);
+
+	bo = lookup_bo(dev->handle_table, handle);
+	if (bo)
+		goto out_unlock;
+
+	bo = bo_from_handle(dev, size, handle);
+
+	VG_BO_ALLOC(bo);
+
+out_unlock:
+	pthread_mutex_unlock(&table_lock);
+
+	return bo;
+}
+
+struct fd_bo *
+fd_bo_from_dmabuf(struct fd_device *dev, int fd)
+{
+	int ret, size;
+	uint32_t handle;
+	struct fd_bo *bo;
+
+	pthread_mutex_lock(&table_lock);
+	ret = drmPrimeFDToHandle(dev->fd, fd, &handle);
+	if (ret) {
+		pthread_mutex_unlock(&table_lock);
+		return NULL;
+	}
+
+	bo = lookup_bo(dev->handle_table, handle);
+	if (bo)
+		goto out_unlock;
+
+	/* lseek() to get bo size */
+	size = lseek(fd, 0, SEEK_END);
+	lseek(fd, 0, SEEK_CUR);
+
+	bo = bo_from_handle(dev, size, handle);
+
+	VG_BO_ALLOC(bo);
+
+out_unlock:
+	pthread_mutex_unlock(&table_lock);
+
+	return bo;
+}
+
+struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name)
+{
+	struct drm_gem_open req = {
+			.name = name,
+	};
+	struct fd_bo *bo;
+
+	pthread_mutex_lock(&table_lock);
+
+	/* check name table first, to see if bo is already open: */
+	bo = lookup_bo(dev->name_table, name);
+	if (bo)
+		goto out_unlock;
+
+	if (drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req)) {
+		ERROR_MSG("gem-open failed: %s", strerror(errno));
+		goto out_unlock;
+	}
+
+	bo = lookup_bo(dev->handle_table, req.handle);
+	if (bo)
+		goto out_unlock;
+
+	bo = bo_from_handle(dev, req.size, req.handle);
+	if (bo) {
+		set_name(bo, name);
+		VG_BO_ALLOC(bo);
+	}
+
+out_unlock:
+	pthread_mutex_unlock(&table_lock);
+
+	return bo;
+}
+
+uint64_t fd_bo_get_iova(struct fd_bo *bo)
+{
+	if (!bo->iova)
+		bo->iova = bo->funcs->iova(bo);
+	return bo->iova;
+}
+
+void fd_bo_put_iova(struct fd_bo *bo)
+{
+	/* currently a no-op */
+}
+
+struct fd_bo * fd_bo_ref(struct fd_bo *bo)
+{
+	p_atomic_inc(&bo->refcnt);
+	return bo;
+}
+
+void fd_bo_del(struct fd_bo *bo)
+{
+	struct fd_device *dev = bo->dev;
+
+	if (!atomic_dec_and_test(&bo->refcnt))
+		return;
+
+	pthread_mutex_lock(&table_lock);
+
+	if ((bo->bo_reuse == BO_CACHE) && (fd_bo_cache_free(&dev->bo_cache, bo) == 0))
+		goto out;
+	if ((bo->bo_reuse == RING_CACHE) && (fd_bo_cache_free(&dev->ring_cache, bo) == 0))
+		goto out;
+
+	bo_del(bo);
+	fd_device_del_locked(dev);
+out:
+	pthread_mutex_unlock(&table_lock);
+}
+
+/* Called under table_lock */
+void bo_del(struct fd_bo *bo)
+{
+	VG_BO_FREE(bo);
+
+	if (bo->map)
+		os_munmap(bo->map, bo->size);
+
+	/* TODO probably bo's in bucket list get removed from
+	 * handle table??
+	 */
+
+	if (bo->handle) {
+		struct drm_gem_close req = {
+				.handle = bo->handle,
+		};
+		_mesa_hash_table_remove_key(bo->dev->handle_table, &bo->handle);
+		if (bo->name)
+			_mesa_hash_table_remove_key(bo->dev->name_table, &bo->name);
+		drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
+	}
+
+	bo->funcs->destroy(bo);
+}
+
+int fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
+{
+	if (!bo->name) {
+		struct drm_gem_flink req = {
+				.handle = bo->handle,
+		};
+		int ret;
+
+		ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_FLINK, &req);
+		if (ret) {
+			return ret;
+		}
+
+		pthread_mutex_lock(&table_lock);
+		set_name(bo, req.name);
+		pthread_mutex_unlock(&table_lock);
+		bo->bo_reuse = NO_CACHE;
+	}
+
+	*name = bo->name;
+
+	return 0;
+}
+
+uint32_t fd_bo_handle(struct fd_bo *bo)
+{
+	return bo->handle;
+}
+
+int fd_bo_dmabuf(struct fd_bo *bo)
+{
+	int ret, prime_fd;
+
+	ret = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
+			&prime_fd);
+	if (ret) {
+		ERROR_MSG("failed to get dmabuf fd: %d", ret);
+		return ret;
+	}
+
+	bo->bo_reuse = NO_CACHE;
+
+	return prime_fd;
+}
+
+uint32_t fd_bo_size(struct fd_bo *bo)
+{
+	return bo->size;
+}
+
+void * fd_bo_map(struct fd_bo *bo)
+{
+	if (!bo->map) {
+		uint64_t offset;
+		int ret;
+
+		ret = bo->funcs->offset(bo, &offset);
+		if (ret) {
+			return NULL;
+		}
+
+		bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+				bo->dev->fd, offset);
+		if (bo->map == MAP_FAILED) {
+			ERROR_MSG("mmap failed: %s", strerror(errno));
+			bo->map = NULL;
+		}
+	}
+	return bo->map;
+}
+
+/* a bit odd to take the pipe as an arg, but it's a, umm, quirk of kgsl.. */
+int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
+{
+	return bo->funcs->cpu_prep(bo, pipe, op);
+}
+
+void fd_bo_cpu_fini(struct fd_bo *bo)
+{
+	bo->funcs->cpu_fini(bo);
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_bo_cache.c mesa-19.0.1/src/freedreno/drm/freedreno_bo_cache.c
--- mesa-18.3.3/src/freedreno/drm/freedreno_bo_cache.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_bo_cache.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_drmif.h"
+#include "freedreno_priv.h"
+
+void bo_del(struct fd_bo *bo);
+extern pthread_mutex_t table_lock;
+
+static void
+add_bucket(struct fd_bo_cache *cache, int size)
+{
+	unsigned int i = cache->num_buckets;
+
+	assert(i < ARRAY_SIZE(cache->cache_bucket));
+
+	list_inithead(&cache->cache_bucket[i].list);
+	cache->cache_bucket[i].size = size;
+	cache->num_buckets++;
+}
+
+/**
+ * @coarse: if true, only power-of-two bucket sizes, otherwise
+ *    fill in for a bit smoother size curve..
+ */
+void
+fd_bo_cache_init(struct fd_bo_cache *cache, int coarse)
+{
+	unsigned long size, cache_max_size = 64 * 1024 * 1024;
+
+	/* OK, so power of two buckets was too wasteful of memory.
+	 * Give 3 other sizes between each power of two, to hopefully
+	 * cover things accurately enough.  (The alternative is
+	 * probably to just go for exact matching of sizes, and assume
+	 * that for things like composited window resize the tiled
+	 * width/height alignment and rounding of sizes to pages will
+	 * get us useful cache hit rates anyway)
+	 */
+	add_bucket(cache, 4096);
+	add_bucket(cache, 4096 * 2);
+	if (!coarse)
+		add_bucket(cache, 4096 * 3);
+
+	/* Initialize the linked lists for BO reuse cache. */
+	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+		add_bucket(cache, size);
+		if (!coarse) {
+			add_bucket(cache, size + size * 1 / 4);
+			add_bucket(cache, size + size * 2 / 4);
+			add_bucket(cache, size + size * 3 / 4);
+		}
+	}
+}
+
+/* Frees older cached buffers.  Called under table_lock */
+void
+fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
+{
+	int i;
+
+	if (cache->time == time)
+		return;
+
+	for (i = 0; i < cache->num_buckets; i++) {
+		struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
+		struct fd_bo *bo;
+
+		while (!LIST_IS_EMPTY(&bucket->list)) {
+			bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+
+			/* keep things in cache for at least 1 second: */
+			if (time && ((time - bo->free_time) <= 1))
+				break;
+
+			VG_BO_OBTAIN(bo);
+			list_del(&bo->list);
+			bo_del(bo);
+		}
+	}
+
+	cache->time = time;
+}
+
+static struct fd_bo_bucket * get_bucket(struct fd_bo_cache *cache, uint32_t size)
+{
+	int i;
+
+	/* hmm, this is what intel does, but I suppose we could calculate our
+	 * way to the correct bucket size rather than looping..
+	 */
+	for (i = 0; i < cache->num_buckets; i++) {
+		struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
+		if (bucket->size >= size) {
+			return bucket;
+		}
+	}
+
+	return NULL;
+}
+
+static int is_idle(struct fd_bo *bo)
+{
+	return fd_bo_cpu_prep(bo, NULL,
+			DRM_FREEDRENO_PREP_READ |
+			DRM_FREEDRENO_PREP_WRITE |
+			DRM_FREEDRENO_PREP_NOSYNC) == 0;
+}
+
+static struct fd_bo *find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
+{
+	struct fd_bo *bo = NULL;
+
+	/* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could
+	 * skip the busy check.. if it is only going to be a render target
+	 * then we probably don't need to stall..
+	 *
+	 * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail
+	 * (MRU, since likely to be in GPU cache), rather than head (LRU)..
+	 */
+	pthread_mutex_lock(&table_lock);
+	if (!LIST_IS_EMPTY(&bucket->list)) {
+		bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
+		/* TODO check for compatible flags? */
+		if (is_idle(bo)) {
+			list_del(&bo->list);
+		} else {
+			bo = NULL;
+		}
+	}
+	pthread_mutex_unlock(&table_lock);
+
+	return bo;
+}
+
+/* NOTE: size is potentially rounded up to bucket size: */
+struct fd_bo *
+fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size, uint32_t flags)
+{
+	struct fd_bo *bo = NULL;
+	struct fd_bo_bucket *bucket;
+
+	*size = align(*size, 4096);
+	bucket = get_bucket(cache, *size);
+
+	/* see if we can be green and recycle: */
+retry:
+	if (bucket) {
+		*size = bucket->size;
+		bo = find_in_bucket(bucket, flags);
+		if (bo) {
+			VG_BO_OBTAIN(bo);
+			if (bo->funcs->madvise(bo, TRUE) <= 0) {
+				/* we've lost the backing pages, delete and try again: */
+				pthread_mutex_lock(&table_lock);
+				bo_del(bo);
+				pthread_mutex_unlock(&table_lock);
+				goto retry;
+			}
+			p_atomic_set(&bo->refcnt, 1);
+			fd_device_ref(bo->dev);
+			return bo;
+		}
+	}
+
+	return NULL;
+}
+
+int
+fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo)
+{
+	struct fd_bo_bucket *bucket = get_bucket(cache, bo->size);
+
+	/* see if we can be green and recycle: */
+	if (bucket) {
+		struct timespec time;
+
+		bo->funcs->madvise(bo, FALSE);
+
+		clock_gettime(CLOCK_MONOTONIC, &time);
+
+		bo->free_time = time.tv_sec;
+		VG_BO_RELEASE(bo);
+		list_addtail(&bo->list, &bucket->list);
+		fd_bo_cache_cleanup(cache, time.tv_sec);
+
+		/* bo's in the bucket cache don't have a ref and
+		 * don't hold a ref to the dev:
+		 */
+		fd_device_del_locked(bo->dev);
+
+		return 0;
+	}
+
+	return -1;
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_device.c mesa-19.0.1/src/freedreno/drm/freedreno_device.c
--- mesa-18.3.3/src/freedreno/drm/freedreno_device.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_device.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "freedreno_drmif.h"
+#include "freedreno_priv.h"
+
+static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static uint32_t
+u32_hash(const void *key)
+{
+	return _mesa_hash_data(key, sizeof(uint32_t));
+}
+
+static bool
+u32_equals(const void *key1, const void *key2)
+{
+	return *(const uint32_t *)key1 == *(const uint32_t *)key2;
+}
+
+
+struct fd_device * kgsl_device_new(int fd);
+struct fd_device * msm_device_new(int fd);
+
+struct fd_device * fd_device_new(int fd)
+{
+	struct fd_device *dev;
+	drmVersionPtr version;
+
+	/* figure out if we are kgsl or msm drm driver: */
+	version = drmGetVersion(fd);
+	if (!version) {
+		ERROR_MSG("cannot get version: %s", strerror(errno));
+		return NULL;
+	}
+
+	if (!strcmp(version->name, "msm")) {
+		DEBUG_MSG("msm DRM device");
+		if (version->version_major != 1) {
+			ERROR_MSG("unsupported version: %u.%u.%u", version->version_major,
+				version->version_minor, version->version_patchlevel);
+			dev = NULL;
+			goto out;
+		}
+
+		dev = msm_device_new(fd);
+		dev->version = version->version_minor;
+#if HAVE_FREEDRENO_KGSL
+	} else if (!strcmp(version->name, "kgsl")) {
+		DEBUG_MSG("kgsl DRM device");
+		dev = kgsl_device_new(fd);
+#endif
+	} else {
+		ERROR_MSG("unknown device: %s", version->name);
+		dev = NULL;
+	}
+
+out:
+	drmFreeVersion(version);
+
+	if (!dev)
+		return NULL;
+
+	p_atomic_set(&dev->refcnt, 1);
+	dev->fd = fd;
+	dev->handle_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals);
+	dev->name_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals);
+	fd_bo_cache_init(&dev->bo_cache, FALSE);
+	fd_bo_cache_init(&dev->ring_cache, TRUE);
+
+	return dev;
+}
+
+/* like fd_device_new() but creates it's own private dup() of the fd
+ * which is close()d when the device is finalized.
+ */
+struct fd_device * fd_device_new_dup(int fd)
+{
+	int dup_fd = dup(fd);
+	struct fd_device *dev = fd_device_new(dup_fd);
+	if (dev)
+		dev->closefd = 1;
+	else
+		close(dup_fd);
+	return dev;
+}
+
+struct fd_device * fd_device_ref(struct fd_device *dev)
+{
+	p_atomic_inc(&dev->refcnt);
+	return dev;
+}
+
+static void fd_device_del_impl(struct fd_device *dev)
+{
+	int close_fd = dev->closefd ? dev->fd : -1;
+	fd_bo_cache_cleanup(&dev->bo_cache, 0);
+	_mesa_hash_table_destroy(dev->handle_table, NULL);
+	_mesa_hash_table_destroy(dev->name_table, NULL);
+	dev->funcs->destroy(dev);
+	if (close_fd >= 0)
+		close(close_fd);
+}
+
+void fd_device_del_locked(struct fd_device *dev)
+{
+	if (!atomic_dec_and_test(&dev->refcnt))
+		return;
+	fd_device_del_impl(dev);
+}
+
+void fd_device_del(struct fd_device *dev)
+{
+	if (!atomic_dec_and_test(&dev->refcnt))
+		return;
+	pthread_mutex_lock(&table_lock);
+	fd_device_del_impl(dev);
+	pthread_mutex_unlock(&table_lock);
+}
+
+int fd_device_fd(struct fd_device *dev)
+{
+	return dev->fd;
+}
+
+enum fd_version fd_device_version(struct fd_device *dev)
+{
+	return dev->version;
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_drmif.h mesa-19.0.1/src/freedreno/drm/freedreno_drmif.h
--- mesa-18.3.3/src/freedreno/drm/freedreno_drmif.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_drmif.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_DRMIF_H_
+#define FREEDRENO_DRMIF_H_
+
+#include <stdint.h>
+
+#include "util/u_debug.h"
+
+struct fd_bo;
+struct fd_pipe;
+struct fd_device;
+
+enum fd_pipe_id {
+	FD_PIPE_3D = 1,
+	FD_PIPE_2D = 2,
+	/* some devices have two 2d blocks.. not really sure how to
+	 * use that yet, so just ignoring the 2nd 2d pipe for now
+	 */
+	FD_PIPE_MAX
+};
+
+enum fd_param_id {
+	FD_DEVICE_ID,
+	FD_GMEM_SIZE,
+	FD_GPU_ID,
+	FD_CHIP_ID,
+	FD_MAX_FREQ,
+	FD_TIMESTAMP,
+	FD_NR_RINGS,      /* # of rings == # of distinct priority levels */
+};
+
+/* bo flags: */
+#define DRM_FREEDRENO_GEM_TYPE_SMI        0x00000001
+#define DRM_FREEDRENO_GEM_TYPE_KMEM       0x00000002
+#define DRM_FREEDRENO_GEM_TYPE_MEM_MASK   0x0000000f
+#define DRM_FREEDRENO_GEM_CACHE_NONE      0x00000000
+#define DRM_FREEDRENO_GEM_CACHE_WCOMBINE  0x00100000
+#define DRM_FREEDRENO_GEM_CACHE_WTHROUGH  0x00200000
+#define DRM_FREEDRENO_GEM_CACHE_WBACK     0x00400000
+#define DRM_FREEDRENO_GEM_CACHE_WBACKWA   0x00800000
+#define DRM_FREEDRENO_GEM_CACHE_MASK      0x00f00000
+#define DRM_FREEDRENO_GEM_GPUREADONLY     0x01000000
+#define DRM_FREEDRENO_GEM_SCANOUT         0x02000000
+
+/* bo access flags: (keep aligned to MSM_PREP_x) */
+#define DRM_FREEDRENO_PREP_READ           0x01
+#define DRM_FREEDRENO_PREP_WRITE          0x02
+#define DRM_FREEDRENO_PREP_NOSYNC         0x04
+
+/* device functions:
+ */
+
+struct fd_device * fd_device_new(int fd);
+struct fd_device * fd_device_new_dup(int fd);
+struct fd_device * fd_device_ref(struct fd_device *dev);
+void fd_device_del(struct fd_device *dev);
+int fd_device_fd(struct fd_device *dev);
+
+enum fd_version {
+	FD_VERSION_MADVISE = 1,            /* kernel supports madvise */
+	FD_VERSION_UNLIMITED_CMDS = 1,     /* submits w/ >4 cmd buffers (growable ringbuffer) */
+	FD_VERSION_FENCE_FD = 2,           /* submit command supports in/out fences */
+	FD_VERSION_SUBMIT_QUEUES = 3,      /* submit queues and multiple priority levels */
+	FD_VERSION_BO_IOVA = 3,            /* supports fd_bo_get/put_iova() */
+	FD_VERSION_SOFTPIN = 4,            /* adds softpin, bo name, and dump flag */
+};
+enum fd_version fd_device_version(struct fd_device *dev);
+
+/* pipe functions:
+ */
+
+struct fd_pipe * fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id);
+struct fd_pipe * fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio);
+struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe);
+void fd_pipe_del(struct fd_pipe *pipe);
+int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
+		uint64_t *value);
+int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp);
+/* timeout in nanosec */
+int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp,
+		uint64_t timeout);
+
+
+/* buffer-object functions:
+ */
+
+struct fd_bo * _fd_bo_new(struct fd_device *dev,
+		uint32_t size, uint32_t flags);
+void _fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap);
+
+static inline void
+fd_bo_set_name(struct fd_bo *bo, const char *fmt, ...) _util_printf_format(2, 3);
+
+static inline void
+fd_bo_set_name(struct fd_bo *bo, const char *fmt, ...)
+{
+#ifndef NDEBUG
+	va_list ap;
+	va_start(ap, fmt);
+	_fd_bo_set_name(bo, fmt, ap);
+	va_end(ap);
+#endif
+}
+
+static inline struct fd_bo *
+fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
+		const char *fmt, ...) _util_printf_format(4, 5);
+
+static inline struct fd_bo *
+fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
+		const char *fmt, ...)
+{
+	struct fd_bo *bo = _fd_bo_new(dev, size, flags);
+#ifndef NDEBUG
+	if (fmt) {
+		va_list ap;
+		va_start(ap, fmt);
+		_fd_bo_set_name(bo, fmt, ap);
+		va_end(ap);
+	}
+#endif
+	return bo;
+}
+
+struct fd_bo *fd_bo_from_handle(struct fd_device *dev,
+		uint32_t handle, uint32_t size);
+struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name);
+struct fd_bo * fd_bo_from_dmabuf(struct fd_device *dev, int fd);
+uint64_t fd_bo_get_iova(struct fd_bo *bo);
+void fd_bo_put_iova(struct fd_bo *bo);
+struct fd_bo * fd_bo_ref(struct fd_bo *bo);
+void fd_bo_del(struct fd_bo *bo);
+int fd_bo_get_name(struct fd_bo *bo, uint32_t *name);
+uint32_t fd_bo_handle(struct fd_bo *bo);
+int fd_bo_dmabuf(struct fd_bo *bo);
+uint32_t fd_bo_size(struct fd_bo *bo);
+void * fd_bo_map(struct fd_bo *bo);
+int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
+void fd_bo_cpu_fini(struct fd_bo *bo);
+
+#endif /* FREEDRENO_DRMIF_H_ */
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_pipe.c mesa-19.0.1/src/freedreno/drm/freedreno_pipe.c
--- mesa-18.3.3/src/freedreno/drm/freedreno_pipe.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_pipe.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_drmif.h"
+#include "freedreno_priv.h"
+
+/**
+ * priority of zero is highest priority, and higher numeric values are
+ * lower priorities
+ */
+struct fd_pipe *
+fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
+{
+	struct fd_pipe *pipe;
+	uint64_t val;
+
+	if (id > FD_PIPE_MAX) {
+		ERROR_MSG("invalid pipe id: %d", id);
+		return NULL;
+	}
+
+	if ((prio != 1) && (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)) {
+		ERROR_MSG("invalid priority!");
+		return NULL;
+	}
+
+	pipe = dev->funcs->pipe_new(dev, id, prio);
+	if (!pipe) {
+		ERROR_MSG("allocation failed");
+		return NULL;
+	}
+
+	pipe->dev = dev;
+	pipe->id = id;
+	p_atomic_set(&pipe->refcnt, 1);
+
+	fd_pipe_get_param(pipe, FD_GPU_ID, &val);
+	pipe->gpu_id = val;
+
+	return pipe;
+}
+
+struct fd_pipe *
+fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
+{
+	return fd_pipe_new2(dev, id, 1);
+}
+
+struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe)
+{
+	p_atomic_inc(&pipe->refcnt);
+	return pipe;
+}
+
+void fd_pipe_del(struct fd_pipe *pipe)
+{
+	if (!atomic_dec_and_test(&pipe->refcnt))
+		return;
+	pipe->funcs->destroy(pipe);
+}
+
+int fd_pipe_get_param(struct fd_pipe *pipe,
+				 enum fd_param_id param, uint64_t *value)
+{
+	return pipe->funcs->get_param(pipe, param, value);
+}
+
+int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp)
+{
+	return fd_pipe_wait_timeout(pipe, timestamp, ~0);
+}
+
+int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp,
+		uint64_t timeout)
+{
+	return pipe->funcs->wait(pipe, timestamp, timeout);
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_priv.h mesa-19.0.1/src/freedreno/drm/freedreno_priv.h
--- mesa-18.3.3/src/freedreno/drm/freedreno_priv.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_priv.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_PRIV_H_
+#define FREEDRENO_PRIV_H_
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <stdio.h>
+
+#include <xf86drm.h>
+
+#include "util/hash_table.h"
+#include "util/list.h"
+#include "util/u_debug.h"
+#include "util/u_atomic.h"
+#include "util/u_math.h"
+#include "util/u_debug.h"
+
+#include "freedreno_drmif.h"
+#include "freedreno_ringbuffer.h"
+
+#define atomic_dec_and_test(x) (__sync_add_and_fetch (x, -1) == 0)
+
+struct fd_device_funcs {
+	int (*bo_new_handle)(struct fd_device *dev, uint32_t size,
+			uint32_t flags, uint32_t *handle);
+	struct fd_bo * (*bo_from_handle)(struct fd_device *dev,
+			uint32_t size, uint32_t handle);
+	struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id,
+			unsigned prio);
+	void (*destroy)(struct fd_device *dev);
+};
+
+struct fd_bo_bucket {
+	uint32_t size;
+	struct list_head list;
+};
+
+struct fd_bo_cache {
+	struct fd_bo_bucket cache_bucket[14 * 4];
+	int num_buckets;
+	time_t time;
+};
+
+struct fd_device {
+	int fd;
+	enum fd_version version;
+	int32_t refcnt;
+
+	/* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects:
+	 *
+	 *   handle_table: maps handle to fd_bo
+	 *   name_table: maps flink name to fd_bo
+	 *
+	 * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always
+	 * returns a new handle.  So we need to figure out if the bo is already
+	 * open in the process first, before calling gem-open.
+	 */
+	struct hash_table *handle_table, *name_table;
+
+	const struct fd_device_funcs *funcs;
+
+	struct fd_bo_cache bo_cache;
+	struct fd_bo_cache ring_cache;
+
+	int closefd;        /* call close(fd) upon destruction */
+
+	/* just for valgrind: */
+	int bo_size;
+};
+
+void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse);
+void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
+struct fd_bo * fd_bo_cache_alloc(struct fd_bo_cache *cache,
+		uint32_t *size, uint32_t flags);
+int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo);
+
+/* for where @table_lock is already held: */
+void fd_device_del_locked(struct fd_device *dev);
+
+struct fd_pipe_funcs {
+	struct fd_ringbuffer * (*ringbuffer_new_object)(struct fd_pipe *pipe, uint32_t size);
+	struct fd_submit * (*submit_new)(struct fd_pipe *pipe);
+	int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value);
+	int (*wait)(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout);
+	void (*destroy)(struct fd_pipe *pipe);
+};
+
+struct fd_pipe {
+	struct fd_device *dev;
+	enum fd_pipe_id id;
+	uint32_t gpu_id;
+	int32_t refcnt;
+	const struct fd_pipe_funcs *funcs;
+};
+
+struct fd_submit_funcs {
+	struct fd_ringbuffer * (*new_ringbuffer)(struct fd_submit *submit,
+			uint32_t size, enum fd_ringbuffer_flags flags);
+	int (*flush)(struct fd_submit *submit, int in_fence_fd,
+			int *out_fence_fd, uint32_t *out_fence);
+	void (*destroy)(struct fd_submit *submit);
+};
+
+struct fd_submit {
+	struct fd_pipe *pipe;
+	const struct fd_submit_funcs *funcs;
+};
+
+struct fd_ringbuffer_funcs {
+	void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
+	void (*emit_reloc)(struct fd_ringbuffer *ring,
+			const struct fd_reloc *reloc);
+	uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
+			struct fd_ringbuffer *target, uint32_t cmd_idx);
+	uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
+	void (*destroy)(struct fd_ringbuffer *ring);
+};
+
+struct fd_bo_funcs {
+	int (*offset)(struct fd_bo *bo, uint64_t *offset);
+	int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
+	void (*cpu_fini)(struct fd_bo *bo);
+	int (*madvise)(struct fd_bo *bo, int willneed);
+	uint64_t (*iova)(struct fd_bo *bo);
+	void (*set_name)(struct fd_bo *bo, const char *fmt, va_list ap);
+	void (*destroy)(struct fd_bo *bo);
+};
+
+struct fd_bo {
+	struct fd_device *dev;
+	uint32_t size;
+	uint32_t handle;
+	uint32_t name;
+	int32_t refcnt;
+	uint64_t iova;
+	void *map;
+	const struct fd_bo_funcs *funcs;
+
+	enum {
+		NO_CACHE = 0,
+		BO_CACHE = 1,
+		RING_CACHE = 2,
+	} bo_reuse;
+
+	struct list_head list;   /* bucket-list entry */
+	time_t free_time;        /* time when added to bucket-list */
+};
+
+struct fd_bo *fd_bo_new_ring(struct fd_device *dev,
+		uint32_t size, uint32_t flags);
+
+#define enable_debug 0  /* TODO make dynamic */
+
+#define INFO_MSG(fmt, ...) \
+		do { debug_printf("[I] "fmt " (%s:%d)\n", \
+				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
+#define DEBUG_MSG(fmt, ...) \
+		do if (enable_debug) { debug_printf("[D] "fmt " (%s:%d)\n", \
+				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
+#define WARN_MSG(fmt, ...) \
+		do { debug_printf("[W] "fmt " (%s:%d)\n", \
+				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
+#define ERROR_MSG(fmt, ...) \
+		do { debug_printf("[E] " fmt " (%s:%d)\n", \
+				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
+
+#define U642VOID(x) ((void *)(unsigned long)(x))
+#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
+
+#if HAVE_VALGRIND
+#  include <memcheck.h>
+
+/*
+ * For tracking the backing memory (if valgrind enabled, we force a mmap
+ * for the purposes of tracking)
+ */
+static inline void VG_BO_ALLOC(struct fd_bo *bo)
+{
+	if (bo && RUNNING_ON_VALGRIND) {
+		VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1);
+	}
+}
+
+static inline void VG_BO_FREE(struct fd_bo *bo)
+{
+	VALGRIND_FREELIKE_BLOCK(bo->map, 0);
+}
+
+/*
+ * For tracking bo structs that are in the buffer-cache, so that valgrind
+ * doesn't attribute ownership to the first one to allocate the recycled
+ * bo.
+ *
+ * Note that the list_head in fd_bo is used to track the buffers in cache
+ * so disable error reporting on the range while they are in cache so
+ * valgrind doesn't squawk about list traversal.
+ *
+ */
+static inline void VG_BO_RELEASE(struct fd_bo *bo)
+{
+	if (RUNNING_ON_VALGRIND) {
+		VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
+		VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size);
+		VALGRIND_FREELIKE_BLOCK(bo->map, 0);
+	}
+}
+static inline void VG_BO_OBTAIN(struct fd_bo *bo)
+{
+	if (RUNNING_ON_VALGRIND) {
+		VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size);
+		VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
+		VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
+	}
+}
+#else
+static inline void VG_BO_ALLOC(struct fd_bo *bo)   {}
+static inline void VG_BO_FREE(struct fd_bo *bo)    {}
+static inline void VG_BO_RELEASE(struct fd_bo *bo) {}
+static inline void VG_BO_OBTAIN(struct fd_bo *bo)  {}
+#endif
+
+#define FD_DEFINE_CAST(parent, child) \
+static inline struct child * to_ ## child (struct parent *x) \
+{ return (struct child *)x; }
+
+
+#endif /* FREEDRENO_PRIV_H_ */
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.c mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.c
--- mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <assert.h>
+
+#include "freedreno_drmif.h"
+#include "freedreno_ringbuffer.h"
+#include "freedreno_priv.h"
+
+struct fd_submit *
+fd_submit_new(struct fd_pipe *pipe)
+{
+	return pipe->funcs->submit_new(pipe);
+}
+
+void
+fd_submit_del(struct fd_submit *submit)
+{
+	return submit->funcs->destroy(submit);
+}
+
+int
+fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd,
+		uint32_t *out_fence)
+{
+	return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence);
+}
+
+struct fd_ringbuffer *
+fd_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size,
+		enum fd_ringbuffer_flags flags)
+{
+	debug_assert(!(flags & _FD_RINGBUFFER_OBJECT));
+	if (flags & FD_RINGBUFFER_STREAMING) {
+		debug_assert(!(flags & FD_RINGBUFFER_GROWABLE));
+		debug_assert(!(flags & FD_RINGBUFFER_PRIMARY));
+	}
+	return submit->funcs->new_ringbuffer(submit, size, flags);
+}
+
+struct fd_ringbuffer *
+fd_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size)
+{
+	return pipe->funcs->ringbuffer_new_object(pipe, size);
+}
+
+void fd_ringbuffer_del(struct fd_ringbuffer *ring)
+{
+	if (!atomic_dec_and_test(&ring->refcnt))
+		return;
+
+	ring->funcs->destroy(ring);
+}
+
+struct fd_ringbuffer *
+fd_ringbuffer_ref(struct fd_ringbuffer *ring)
+{
+	p_atomic_inc(&ring->refcnt);
+	return ring;
+}
+
+void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+	assert(ring->funcs->grow);     /* unsupported on kgsl */
+
+	/* there is an upper bound on IB size, which appears to be 0x100000 */
+	if (ring->size < 0x100000)
+		ring->size *= 2;
+
+	ring->funcs->grow(ring, ring->size);
+}
+
+void fd_ringbuffer_reloc(struct fd_ringbuffer *ring,
+				     const struct fd_reloc *reloc)
+{
+	ring->funcs->emit_reloc(ring, reloc);
+}
+
+uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
+{
+	if (!ring->funcs->cmd_count)
+		return 1;
+	return ring->funcs->cmd_count(ring);
+}
+
+uint32_t
+fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
+		struct fd_ringbuffer *target, uint32_t cmd_idx)
+{
+	return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.h mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.h
--- mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RINGBUFFER_H_
+#define FREEDRENO_RINGBUFFER_H_
+
+#include "util/u_debug.h"
+
+#include "freedreno_drmif.h"
+
+struct fd_submit;
+struct fd_ringbuffer;
+
+enum fd_ringbuffer_flags {
+
+	/* Primary ringbuffer for a submit, ie. an IB1 level rb
+	 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
+	 * packets.
+	 */
+	FD_RINGBUFFER_PRIMARY = 0x1,
+
+	/* Hint that the stateobj will be used for streaming state
+	 * that is used once or a few times and then discarded.
+	 *
+	 * For sub-allocation, non streaming stateobj's should be
+	 * sub-allocated from a page size buffer, so one long lived
+	 * state obj doesn't prevent other pages from being freed.
+	 * (Ie. it would be no worse than allocating a page sized
+	 * bo for each small non-streaming stateobj).
+	 *
+	 * But streaming stateobj's could be sub-allocated from a
+	 * larger buffer to reduce the alloc/del overhead.
+	 */
+	FD_RINGBUFFER_STREAMING = 0x2,
+
+	/* Indicates that "growable" cmdstream can be used,
+	 * consisting of multiple physical cmdstream buffers
+	 */
+	FD_RINGBUFFER_GROWABLE = 0x4,
+
+	/* Internal use only: */
+	_FD_RINGBUFFER_OBJECT = 0x8,
+};
+
+/* A submit object manages/tracks all the state buildup for a "submit"
+ * ioctl to the kernel.  Additionally, with the exception of long-lived
+ * non-STREAMING stateobj rb's, rb's are allocated from the submit.
+ */
+struct fd_submit * fd_submit_new(struct fd_pipe *pipe);
+
+/* NOTE: all ringbuffer's create from the submit should be unref'd
+ * before destroying the submit.
+ */
+void fd_submit_del(struct fd_submit *submit);
+
+/* Allocate a new rb from the submit. */
+struct fd_ringbuffer * fd_submit_new_ringbuffer(struct fd_submit *submit,
+		uint32_t size, enum fd_ringbuffer_flags flags);
+
+/* in_fence_fd: -1 for no in-fence, else fence fd
+ * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence
+ */
+int fd_submit_flush(struct fd_submit *submit,
+		int in_fence_fd, int *out_fence_fd,
+		uint32_t *out_fence);
+
+struct fd_ringbuffer_funcs;
+
+/* the ringbuffer object is not opaque so that OUT_RING() type stuff
+ * can be inlined.  Note that users should not make assumptions about
+ * the size of this struct.
+ */
+struct fd_ringbuffer {
+	uint32_t *cur, *end, *start;
+	const struct fd_ringbuffer_funcs *funcs;
+
+// size or end coudl probably go away
+	int size;
+	int32_t refcnt;
+	enum fd_ringbuffer_flags flags;
+};
+
+/* Allocate a new long-lived state object, not associated with
+ * a submit:
+ */
+struct fd_ringbuffer * fd_ringbuffer_new_object(struct fd_pipe *pipe,
+		uint32_t size);
+
+struct fd_ringbuffer *fd_ringbuffer_ref(struct fd_ringbuffer *ring);
+void fd_ringbuffer_del(struct fd_ringbuffer *ring);
+
+void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords);
+
+static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring,
+		uint32_t data)
+{
+	(*ring->cur++) = data;
+}
+
+struct fd_reloc {
+	struct fd_bo *bo;
+#define FD_RELOC_READ             0x0001
+#define FD_RELOC_WRITE            0x0002
+#define FD_RELOC_DUMP             0x0004
+	uint32_t flags;
+	uint32_t offset;
+	uint32_t or;
+	int32_t  shift;
+	uint32_t orhi;      /* used for a5xx+ */
+};
+
+/* NOTE: relocs are 2 dwords on a5xx+ */
+
+void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
+uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring);
+uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
+		struct fd_ringbuffer *target, uint32_t cmd_idx);
+
+static inline uint32_t
+offset_bytes(void *end, void *start)
+{
+	return ((char *)end) - ((char *)start);
+}
+
+static inline uint32_t
+fd_ringbuffer_size(struct fd_ringbuffer *ring)
+{
+	/* only really needed for stateobj ringbuffers, and won't really
+	 * do what you expect for growable rb's.. so lets just restrict
+	 * this to stateobj's for now:
+	 */
+	debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
+	return offset_bytes(ring->cur, ring->start);
+}
+
+
+#endif /* FREEDRENO_RINGBUFFER_H_ */
diff -Nru mesa-18.3.3/src/freedreno/drm/meson.build mesa-19.0.1/src/freedreno/drm/meson.build
--- mesa-18.3.3/src/freedreno/drm/meson.build	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,54 @@
+# Copyright © 2018 Rob Clark
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libfreedreno_drm_files = files(
+  'freedreno_bo.c',
+  'freedreno_bo_cache.c',
+  'freedreno_device.c',
+  'freedreno_drmif.h',
+  'freedreno_pipe.c',
+  'freedreno_priv.h',
+  'freedreno_ringbuffer.c',
+  'freedreno_ringbuffer.h',
+  'msm_bo.c',
+  'msm_device.c',
+  'msm_drm.h',
+  'msm_pipe.c',
+  'msm_priv.h',
+  'msm_ringbuffer.c',
+  'msm_ringbuffer_sp.c',
+)
+
+libfreedreno_drm = static_library(
+  'freedreno_drm',
+  libfreedreno_drm_files,
+  include_directories : [
+    inc_freedreno,
+    inc_common,
+  ],
+  c_args : [c_vis_args, no_override_init_args],
+  cpp_args : [cpp_vis_args],
+  dependencies : [
+    dep_libdrm,
+    dep_valgrind,
+  ],
+  build_by_default : false,
+)
+
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_bo.c mesa-19.0.1/src/freedreno/drm/msm_bo.c
--- mesa-18.3.3/src/freedreno/drm/msm_bo.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_bo.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "msm_priv.h"
+
+static int bo_allocate(struct msm_bo *msm_bo)
+{
+	struct fd_bo *bo = &msm_bo->base;
+	if (!msm_bo->offset) {
+		struct drm_msm_gem_info req = {
+				.handle = bo->handle,
+				.info = MSM_INFO_GET_OFFSET,
+		};
+		int ret;
+
+		/* if the buffer is already backed by pages then this
+		 * doesn't actually do anything (other than giving us
+		 * the offset)
+		 */
+		ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO,
+				&req, sizeof(req));
+		if (ret) {
+			ERROR_MSG("alloc failed: %s", strerror(errno));
+			return ret;
+		}
+
+		msm_bo->offset = req.value;
+	}
+
+	return 0;
+}
+
+static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset)
+{
+	struct msm_bo *msm_bo = to_msm_bo(bo);
+	int ret = bo_allocate(msm_bo);
+	if (ret)
+		return ret;
+	*offset = msm_bo->offset;
+	return 0;
+}
+
+static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
+{
+	struct drm_msm_gem_cpu_prep req = {
+			.handle = bo->handle,
+			.op = op,
+	};
+
+	get_abs_timeout(&req.timeout, 5000000000);
+
+	return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req));
+}
+
+static void msm_bo_cpu_fini(struct fd_bo *bo)
+{
+	struct drm_msm_gem_cpu_fini req = {
+			.handle = bo->handle,
+	};
+
+	drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req));
+}
+
+static int msm_bo_madvise(struct fd_bo *bo, int willneed)
+{
+	struct drm_msm_gem_madvise req = {
+			.handle = bo->handle,
+			.madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED,
+	};
+	int ret;
+
+	/* older kernels do not support this: */
+	if (bo->dev->version < FD_VERSION_MADVISE)
+		return willneed;
+
+	ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req));
+	if (ret)
+		return ret;
+
+	return req.retained;
+}
+
+static uint64_t msm_bo_iova(struct fd_bo *bo)
+{
+	struct drm_msm_gem_info req = {
+			.handle = bo->handle,
+			.info = MSM_INFO_GET_IOVA,
+	};
+	int ret;
+
+	ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
+	debug_assert(ret == 0);
+
+	return req.value;
+}
+
+static void msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
+{
+	struct drm_msm_gem_info req = {
+			.handle = bo->handle,
+			.info = MSM_INFO_SET_NAME,
+	};
+	char buf[32];
+	int sz;
+
+	if (bo->dev->version < FD_VERSION_SOFTPIN)
+		return;
+
+	sz = vsnprintf(buf, sizeof(buf), fmt, ap);
+
+	req.value = VOID2U64(buf);
+	req.len = MIN2(sz, sizeof(buf));
+
+	drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
+}
+
+static void msm_bo_destroy(struct fd_bo *bo)
+{
+	struct msm_bo *msm_bo = to_msm_bo(bo);
+	free(msm_bo);
+}
+
+static const struct fd_bo_funcs funcs = {
+		.offset = msm_bo_offset,
+		.cpu_prep = msm_bo_cpu_prep,
+		.cpu_fini = msm_bo_cpu_fini,
+		.madvise = msm_bo_madvise,
+		.iova = msm_bo_iova,
+		.set_name = msm_bo_set_name,
+		.destroy = msm_bo_destroy,
+};
+
+/* allocate a buffer handle: */
+int msm_bo_new_handle(struct fd_device *dev,
+		uint32_t size, uint32_t flags, uint32_t *handle)
+{
+	struct drm_msm_gem_new req = {
+			.size = size,
+			.flags = MSM_BO_WC,  // TODO figure out proper flags..
+	};
+	int ret;
+
+	if (flags & DRM_FREEDRENO_GEM_SCANOUT)
+		req.flags |= MSM_BO_SCANOUT;
+
+	if (flags & DRM_FREEDRENO_GEM_GPUREADONLY)
+		req.flags |= MSM_BO_GPU_READONLY;
+
+	ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW,
+			&req, sizeof(req));
+	if (ret)
+		return ret;
+
+	*handle = req.handle;
+
+	return 0;
+}
+
+/* allocate a new buffer object */
+struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
+		uint32_t size, uint32_t handle)
+{
+	struct msm_bo *msm_bo;
+	struct fd_bo *bo;
+
+	msm_bo = calloc(1, sizeof(*msm_bo));
+	if (!msm_bo)
+		return NULL;
+
+	bo = &msm_bo->base;
+	bo->funcs = &funcs;
+
+	return bo;
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_device.c mesa-19.0.1/src/freedreno/drm/msm_device.c
--- mesa-18.3.3/src/freedreno/drm/msm_device.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_device.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "msm_priv.h"
+
+static void msm_device_destroy(struct fd_device *dev)
+{
+	struct msm_device *msm_dev = to_msm_device(dev);
+	free(msm_dev);
+}
+
+static const struct fd_device_funcs funcs = {
+		.bo_new_handle = msm_bo_new_handle,
+		.bo_from_handle = msm_bo_from_handle,
+		.pipe_new = msm_pipe_new,
+		.destroy = msm_device_destroy,
+};
+
+struct fd_device * msm_device_new(int fd)
+{
+	struct msm_device *msm_dev;
+	struct fd_device *dev;
+
+	msm_dev = calloc(1, sizeof(*msm_dev));
+	if (!msm_dev)
+		return NULL;
+
+	dev = &msm_dev->base;
+	dev->funcs = &funcs;
+
+	dev->bo_size = sizeof(struct msm_bo);
+
+	return dev;
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_drm.h mesa-19.0.1/src/freedreno/drm/msm_drm.h
--- mesa-18.3.3/src/freedreno/drm/msm_drm.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_drm.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MSM_DRM_H__
+#define __MSM_DRM_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints:
+ *  1) Do not use pointers, use __u64 instead for 32 bit / 64 bit
+ *     user/kernel compatibility
+ *  2) Keep fields aligned to their size
+ *  3) Because of how drm_ioctl() works, we can add new fields at
+ *     the end of an ioctl if some care is taken: drm_ioctl() will
+ *     zero out the new fields at the tail of the ioctl, so a zero
+ *     value should have a backwards compatible meaning.  And for
+ *     output params, userspace won't see the newly added output
+ *     fields.. so that has to be somehow ok.
+ */
+
+#define MSM_PIPE_NONE        0x00
+#define MSM_PIPE_2D0         0x01
+#define MSM_PIPE_2D1         0x02
+#define MSM_PIPE_3D0         0x10
+
+/* The pipe-id just uses the lower bits, so can be OR'd with flags in
+ * the upper 16 bits (which could be extended further, if needed, maybe
+ * we extend/overload the pipe-id some day to deal with multiple rings,
+ * but even then I don't think we need the full lower 16 bits).
+ */
+#define MSM_PIPE_ID_MASK     0xffff
+#define MSM_PIPE_ID(x)       ((x) & MSM_PIPE_ID_MASK)
+#define MSM_PIPE_FLAGS(x)    ((x) & ~MSM_PIPE_ID_MASK)
+
+/* timeouts are specified in clock-monotonic absolute times (to simplify
+ * restarting interrupted ioctls).  The following struct is logically the
+ * same as 'struct timespec' but 32/64b ABI safe.
+ */
+struct drm_msm_timespec {
+	__s64 tv_sec;          /* seconds */
+	__s64 tv_nsec;         /* nanoseconds */
+};
+
+#define MSM_PARAM_GPU_ID     0x01
+#define MSM_PARAM_GMEM_SIZE  0x02
+#define MSM_PARAM_CHIP_ID    0x03
+#define MSM_PARAM_MAX_FREQ   0x04
+#define MSM_PARAM_TIMESTAMP  0x05
+#define MSM_PARAM_GMEM_BASE  0x06
+#define MSM_PARAM_NR_RINGS   0x07
+
+struct drm_msm_param {
+	__u32 pipe;           /* in, MSM_PIPE_x */
+	__u32 param;          /* in, MSM_PARAM_x */
+	__u64 value;          /* out (get_param) or in (set_param) */
+};
+
+/*
+ * GEM buffers:
+ */
+
+#define MSM_BO_SCANOUT       0x00000001     /* scanout capable */
+#define MSM_BO_GPU_READONLY  0x00000002
+#define MSM_BO_CACHE_MASK    0x000f0000
+/* cache modes */
+#define MSM_BO_CACHED        0x00010000
+#define MSM_BO_WC            0x00020000
+#define MSM_BO_UNCACHED      0x00040000
+
+#define MSM_BO_FLAGS         (MSM_BO_SCANOUT | \
+                              MSM_BO_GPU_READONLY | \
+                              MSM_BO_CACHED | \
+                              MSM_BO_WC | \
+                              MSM_BO_UNCACHED)
+
+struct drm_msm_gem_new {
+	__u64 size;           /* in */
+	__u32 flags;          /* in, mask of MSM_BO_x */
+	__u32 handle;         /* out */
+};
+
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
+
+struct drm_msm_gem_info {
+	__u32 handle;         /* in */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
+};
+
+#define MSM_PREP_READ        0x01
+#define MSM_PREP_WRITE       0x02
+#define MSM_PREP_NOSYNC      0x04
+
+#define MSM_PREP_FLAGS       (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC)
+
+struct drm_msm_gem_cpu_prep {
+	__u32 handle;         /* in */
+	__u32 op;             /* in, mask of MSM_PREP_x */
+	struct drm_msm_timespec timeout;   /* in */
+};
+
+struct drm_msm_gem_cpu_fini {
+	__u32 handle;         /* in */
+};
+
+/*
+ * Cmdstream Submission:
+ */
+
+/* The value written into the cmdstream is logically:
+ *
+ *   ((relocbuf->gpuaddr + reloc_offset) << shift) | or
+ *
+ * When we have GPU's w/ >32bit ptrs, it should be possible to deal
+ * with this by emit'ing two reloc entries with appropriate shift
+ * values.  Or a new MSM_SUBMIT_CMD_x type would also be an option.
+ *
+ * NOTE that reloc's must be sorted by order of increasing submit_offset,
+ * otherwise EINVAL.
+ */
+struct drm_msm_gem_submit_reloc {
+	__u32 submit_offset;  /* in, offset from submit_bo */
+	__u32 or;             /* in, value OR'd with result */
+	__s32 shift;          /* in, amount of left shift (can be negative) */
+	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
+	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
+};
+
+/* submit-types:
+ *   BUF - this cmd buffer is executed normally.
+ *   IB_TARGET_BUF - this cmd buffer is an IB target.  Reloc's are
+ *      processed normally, but the kernel does not setup an IB to
+ *      this buffer in the first-level ringbuffer
+ *   CTX_RESTORE_BUF - only executed if there has been a GPU context
+ *      switch since the last SUBMIT ioctl
+ */
+#define MSM_SUBMIT_CMD_BUF             0x0001
+#define MSM_SUBMIT_CMD_IB_TARGET_BUF   0x0002
+#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003
+struct drm_msm_gem_submit_cmd {
+	__u32 type;           /* in, one of MSM_SUBMIT_CMD_x */
+	__u32 submit_idx;     /* in, index of submit_bo cmdstream buffer */
+	__u32 submit_offset;  /* in, offset into submit_bo */
+	__u32 size;           /* in, cmdstream size */
+	__u32 pad;
+	__u32 nr_relocs;      /* in, number of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
+};
+
+/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
+ * cmdstream buffer(s) themselves or reloc entries) has one (and only
+ * one) entry in the submit->bos[] table.
+ *
+ * As a optimization, the current buffer (gpu virtual address) can be
+ * passed back through the 'presumed' field.  If on a subsequent reloc,
+ * userspace passes back a 'presumed' address that is still valid,
+ * then patching the cmdstream for this entry is skipped.  This can
+ * avoid kernel needing to map/access the cmdstream bo in the common
+ * case.
+ */
+#define MSM_SUBMIT_BO_READ             0x0001
+#define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
+
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
+
+struct drm_msm_gem_submit_bo {
+	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
+	__u32 handle;         /* in, GEM handle */
+	__u64 presumed;       /* in/out, presumed buffer address */
+};
+
+/* Valid submit ioctl flags: */
+#define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
+#define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
+#define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
+#define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
+#define MSM_SUBMIT_FLAGS                ( \
+		MSM_SUBMIT_NO_IMPLICIT   | \
+		MSM_SUBMIT_FENCE_FD_IN   | \
+		MSM_SUBMIT_FENCE_FD_OUT  | \
+		MSM_SUBMIT_SUDO          | \
+		0)
+
+/* Each cmdstream submit consists of a table of buffers involved, and
+ * one or more cmdstream buffers.  This allows for conditional execution
+ * (context-restore), and IB buffers needed for per tile/bin draw cmds.
+ */
+struct drm_msm_gem_submit {
+	__u32 flags;          /* MSM_PIPE_x | MSM_SUBMIT_x */
+	__u32 fence;          /* out */
+	__u32 nr_bos;         /* in, number of submit_bo's */
+	__u32 nr_cmds;        /* in, number of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
+	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
+	__u32 queueid;         /* in, submitqueue id */
+};
+
+/* The normal way to synchronize with the GPU is just to CPU_PREP on
+ * a buffer if you need to access it from the CPU (other cmdstream
+ * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
+ * handle the required synchronization under the hood).  This ioctl
+ * mainly just exists as a way to implement the gallium pipe_fence
+ * APIs without requiring a dummy bo to synchronize on.
+ */
+struct drm_msm_wait_fence {
+	__u32 fence;          /* in */
+	__u32 pad;
+	struct drm_msm_timespec timeout;   /* in */
+	__u32 queueid;         /* in, submitqueue id */
+};
+
+/* madvise provides a way to tell the kernel in case a buffers contents
+ * can be discarded under memory pressure, which is useful for userspace
+ * bo cache where we want to optimistically hold on to buffer allocate
+ * and potential mmap, but allow the pages to be discarded under memory
+ * pressure.
+ *
+ * Typical usage would involve madvise(DONTNEED) when buffer enters BO
+ * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache.
+ * In the WILLNEED case, 'retained' indicates to userspace whether the
+ * backing pages still exist.
+ */
+#define MSM_MADV_WILLNEED 0       /* backing pages are needed, status returned in 'retained' */
+#define MSM_MADV_DONTNEED 1       /* backing pages not needed */
+#define __MSM_MADV_PURGED 2       /* internal state */
+
+struct drm_msm_gem_madvise {
+	__u32 handle;         /* in, GEM handle */
+	__u32 madv;           /* in, MSM_MADV_x */
+	__u32 retained;       /* out, whether backing store still exists */
+};
+
+/*
+ * Draw queues allow the user to set specific submission parameter. Command
+ * submissions specify a specific submitqueue to use.  ID 0 is reserved for
+ * backwards compatibility as a "default" submitqueue
+ */
+
+#define MSM_SUBMITQUEUE_FLAGS (0)
+
+struct drm_msm_submitqueue {
+	__u32 flags;   /* in, MSM_SUBMITQUEUE_x */
+	__u32 prio;    /* in, Priority level */
+	__u32 id;      /* out, identifier */
+};
+
+#define DRM_MSM_GET_PARAM              0x00
+/* placeholder:
+#define DRM_MSM_SET_PARAM              0x01
+ */
+#define DRM_MSM_GEM_NEW                0x02
+#define DRM_MSM_GEM_INFO               0x03
+#define DRM_MSM_GEM_CPU_PREP           0x04
+#define DRM_MSM_GEM_CPU_FINI           0x05
+#define DRM_MSM_GEM_SUBMIT             0x06
+#define DRM_MSM_WAIT_FENCE             0x07
+#define DRM_MSM_GEM_MADVISE            0x08
+/* placeholder:
+#define DRM_MSM_GEM_SVM_NEW            0x09
+ */
+#define DRM_MSM_SUBMITQUEUE_NEW        0x0A
+#define DRM_MSM_SUBMITQUEUE_CLOSE      0x0B
+
+#define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
+#define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
+#define DRM_IOCTL_MSM_GEM_INFO         DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info)
+#define DRM_IOCTL_MSM_GEM_CPU_PREP     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep)
+#define DRM_IOCTL_MSM_GEM_CPU_FINI     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini)
+#define DRM_IOCTL_MSM_GEM_SUBMIT       DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit)
+#define DRM_IOCTL_MSM_WAIT_FENCE       DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence)
+#define DRM_IOCTL_MSM_GEM_MADVISE      DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW    DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __MSM_DRM_H__ */
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_pipe.c mesa-19.0.1/src/freedreno/drm/msm_pipe.c
--- mesa-18.3.3/src/freedreno/drm/msm_pipe.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_pipe.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/slab.h"
+
+#include "msm_priv.h"
+
+static int query_param(struct fd_pipe *pipe, uint32_t param,
+		uint64_t *value)
+{
+	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
+	struct drm_msm_param req = {
+			.pipe = msm_pipe->pipe,
+			.param = param,
+	};
+	int ret;
+
+	ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM,
+			&req, sizeof(req));
+	if (ret)
+		return ret;
+
+	*value = req.value;
+
+	return 0;
+}
+
+static int msm_pipe_get_param(struct fd_pipe *pipe,
+		enum fd_param_id param, uint64_t *value)
+{
+	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
+	switch(param) {
+	case FD_DEVICE_ID: // XXX probably get rid of this..
+	case FD_GPU_ID:
+		*value = msm_pipe->gpu_id;
+		return 0;
+	case FD_GMEM_SIZE:
+		*value = msm_pipe->gmem;
+		return 0;
+	case FD_CHIP_ID:
+		*value = msm_pipe->chip_id;
+		return 0;
+	case FD_MAX_FREQ:
+		return query_param(pipe, MSM_PARAM_MAX_FREQ, value);
+	case FD_TIMESTAMP:
+		return query_param(pipe, MSM_PARAM_TIMESTAMP, value);
+	case FD_NR_RINGS:
+		return query_param(pipe, MSM_PARAM_NR_RINGS, value);
+	default:
+		ERROR_MSG("invalid param id: %d", param);
+		return -1;
+	}
+}
+
+static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp,
+		uint64_t timeout)
+{
+	struct fd_device *dev = pipe->dev;
+	struct drm_msm_wait_fence req = {
+			.fence = timestamp,
+			.queueid = to_msm_pipe(pipe)->queue_id,
+	};
+	int ret;
+
+	get_abs_timeout(&req.timeout, timeout);
+
+	ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
+	if (ret) {
+		ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno));
+		return ret;
+	}
+
+	return 0;
+}
+
+static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio)
+{
+	struct drm_msm_submitqueue req = {
+		.flags = 0,
+		.prio = prio,
+	};
+	uint64_t nr_rings = 1;
+	int ret;
+
+	if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) {
+		to_msm_pipe(pipe)->queue_id = 0;
+		return 0;
+	}
+
+	msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings);
+
+	req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1);
+
+	ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW,
+			&req, sizeof(req));
+	if (ret) {
+		ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno));
+		return ret;
+	}
+
+	to_msm_pipe(pipe)->queue_id = req.id;
+	return 0;
+}
+
+static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id)
+{
+	if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES)
+		return;
+
+	drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
+			&queue_id, sizeof(queue_id));
+}
+
+static void msm_pipe_destroy(struct fd_pipe *pipe)
+{
+	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
+	close_submitqueue(pipe, msm_pipe->queue_id);
+	free(msm_pipe);
+}
+
+static const struct fd_pipe_funcs sp_funcs = {
+		.ringbuffer_new_object = msm_ringbuffer_sp_new_object,
+		.submit_new = msm_submit_sp_new,
+		.get_param = msm_pipe_get_param,
+		.wait = msm_pipe_wait,
+		.destroy = msm_pipe_destroy,
+};
+
+static const struct fd_pipe_funcs legacy_funcs = {
+		.ringbuffer_new_object = msm_ringbuffer_new_object,
+		.submit_new = msm_submit_new,
+		.get_param = msm_pipe_get_param,
+		.wait = msm_pipe_wait,
+		.destroy = msm_pipe_destroy,
+};
+
+static uint64_t get_param(struct fd_pipe *pipe, uint32_t param)
+{
+	uint64_t value;
+	int ret = query_param(pipe, param, &value);
+	if (ret) {
+		ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno));
+		return 0;
+	}
+	return value;
+}
+
+struct fd_pipe * msm_pipe_new(struct fd_device *dev,
+		enum fd_pipe_id id, uint32_t prio)
+{
+	static const uint32_t pipe_id[] = {
+			[FD_PIPE_3D] = MSM_PIPE_3D0,
+			[FD_PIPE_2D] = MSM_PIPE_2D0,
+	};
+	struct msm_pipe *msm_pipe = NULL;
+	struct fd_pipe *pipe = NULL;
+
+	msm_pipe = calloc(1, sizeof(*msm_pipe));
+	if (!msm_pipe) {
+		ERROR_MSG("allocation failed");
+		goto fail;
+	}
+
+	pipe = &msm_pipe->base;
+
+	if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) {
+		pipe->funcs = &sp_funcs;
+	} else {
+		pipe->funcs = &legacy_funcs;
+	}
+
+	/* initialize before get_param(): */
+	pipe->dev = dev;
+	msm_pipe->pipe = pipe_id[id];
+
+	/* these params should be supported since the first version of drm/msm: */
+	msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID);
+	msm_pipe->gmem   = get_param(pipe, MSM_PARAM_GMEM_SIZE);
+	msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID);
+
+	if (! msm_pipe->gpu_id)
+		goto fail;
+
+	INFO_MSG("Pipe Info:");
+	INFO_MSG(" GPU-id:          %d", msm_pipe->gpu_id);
+	INFO_MSG(" Chip-id:         0x%08x", msm_pipe->chip_id);
+	INFO_MSG(" GMEM size:       0x%08x", msm_pipe->gmem);
+
+	if (open_submitqueue(pipe, prio))
+		goto fail;
+
+	return pipe;
+fail:
+	if (pipe)
+		fd_pipe_del(pipe);
+	return NULL;
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_priv.h mesa-19.0.1/src/freedreno/drm/msm_priv.h
--- mesa-18.3.3/src/freedreno/drm/msm_priv.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_priv.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef MSM_PRIV_H_
+#define MSM_PRIV_H_
+
+#include "freedreno_priv.h"
+
+#ifndef __user
+#  define __user
+#endif
+
+#include "msm_drm.h"
+
+struct msm_device {
+	struct fd_device base;
+	struct fd_bo_cache ring_cache;
+};
+FD_DEFINE_CAST(fd_device, msm_device);
+
+struct fd_device * msm_device_new(int fd);
+
+struct msm_pipe {
+	struct fd_pipe base;
+	uint32_t pipe;
+	uint32_t gpu_id;
+	uint32_t gmem;
+	uint32_t chip_id;
+	uint32_t queue_id;
+};
+FD_DEFINE_CAST(fd_pipe, msm_pipe);
+
+struct fd_pipe * msm_pipe_new(struct fd_device *dev,
+		enum fd_pipe_id id, uint32_t prio);
+
+struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size);
+struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size);
+
+struct fd_submit * msm_submit_new(struct fd_pipe *pipe);
+struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe);
+
+struct msm_bo {
+	struct fd_bo base;
+	uint64_t offset;
+	/* to avoid excess hashtable lookups, cache the ring this bo was
+	 * last emitted on (since that will probably also be the next ring
+	 * it is emitted on)
+	 */
+	unsigned current_submit_seqno;
+	uint32_t idx;
+};
+FD_DEFINE_CAST(fd_bo, msm_bo);
+
+int msm_bo_new_handle(struct fd_device *dev,
+		uint32_t size, uint32_t flags, uint32_t *handle);
+struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
+		uint32_t size, uint32_t handle);
+
+static inline void
+msm_dump_submit(struct drm_msm_gem_submit *req)
+{
+	for (unsigned i = 0; i < req->nr_bos; i++) {
+		struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos);
+		struct drm_msm_gem_submit_bo *bo = &bos[i];
+		ERROR_MSG("  bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags);
+	}
+	for (unsigned i = 0; i < req->nr_cmds; i++) {
+		struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds);
+		struct drm_msm_gem_submit_cmd *cmd = &cmds[i];
+		struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs);
+		ERROR_MSG("  cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u",
+				i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size);
+		for (unsigned j = 0; j < cmd->nr_relocs; j++) {
+			struct drm_msm_gem_submit_reloc *r = &relocs[j];
+			ERROR_MSG("    reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u"
+					", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift,
+					r->reloc_idx, r->reloc_offset);
+		}
+	}
+}
+
+static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
+{
+	struct timespec t;
+	uint32_t s = ns / 1000000000;
+	clock_gettime(CLOCK_MONOTONIC, &t);
+	tv->tv_sec = t.tv_sec + s;
+	tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000);
+}
+
+/*
+ * Stupid/simple growable array implementation:
+ */
+
+static inline void *
+grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz)
+{
+	if ((nr + 1) > *max) {
+		if ((*max * 2) < (nr + 1))
+			*max = nr + 5;
+		else
+			*max = *max * 2;
+		ptr = realloc(ptr, *max * sz);
+	}
+	return ptr;
+}
+
+#define DECLARE_ARRAY(type, name) \
+	unsigned short nr_ ## name, max_ ## name; \
+	type * name;
+
+#define APPEND(x, name) ({ \
+	(x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \
+	(x)->nr_ ## name ++; \
+})
+
+#endif /* MSM_PRIV_H_ */
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_ringbuffer.c mesa-19.0.1/src/freedreno/drm/msm_ringbuffer.c
--- mesa-18.3.3/src/freedreno/drm/msm_ringbuffer.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_ringbuffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,722 @@
+/*
+ * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+
+#include "util/hash_table.h"
+#include "util/set.h"
+#include "util/slab.h"
+
+#include "drm/freedreno_ringbuffer.h"
+#include "msm_priv.h"
+
+/* The legacy implementation of submit/ringbuffer, which still does the
+ * traditional reloc and cmd tracking
+ */
+
+
+#define INIT_SIZE 0x1000
+
+static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+struct msm_submit {
+	struct fd_submit base;
+
+	DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
+	DECLARE_ARRAY(struct fd_bo *, bos);
+
+	unsigned seqno;
+
+	/* maps fd_bo to idx in bos table: */
+	struct hash_table *bo_table;
+
+	struct slab_mempool ring_pool;
+
+	/* hash-set of associated rings: */
+	struct set *ring_set;
+
+	struct fd_ringbuffer *primary;
+
+	/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
+	 * the same underlying bo)..
+	 *
+	 * We also rely on previous stateobj having been fully constructed
+	 * so we can reclaim extra space at it's end.
+	 */
+	struct fd_ringbuffer *suballoc_ring;
+};
+FD_DEFINE_CAST(fd_submit, msm_submit);
+
+/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
+ * and sizes.  Ie. a finalized buffer can have no more commands appended to
+ * it.
+ */
+struct msm_cmd {
+	struct fd_bo *ring_bo;
+	unsigned size;
+	DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs);
+};
+
+static struct msm_cmd *
+cmd_new(struct fd_bo *ring_bo)
+{
+	struct msm_cmd *cmd = malloc(sizeof(*cmd));
+	cmd->ring_bo = fd_bo_ref(ring_bo);
+	cmd->size = 0;
+	cmd->nr_relocs = cmd->max_relocs = 0;
+	cmd->relocs = NULL;
+	return cmd;
+}
+
+static void
+cmd_free(struct msm_cmd *cmd)
+{
+	fd_bo_del(cmd->ring_bo);
+	free(cmd->relocs);
+	free(cmd);
+}
+
+/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
+ * later copy into the submit when the stateobj rb is later referenced by
+ * a regular rb:
+ */
+struct msm_reloc_bo {
+	struct fd_bo *bo;
+	unsigned flags;
+};
+
+struct msm_ringbuffer {
+	struct fd_ringbuffer base;
+
+	/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
+	unsigned offset;
+
+	union {
+		/* for _FD_RINGBUFFER_OBJECT case: */
+		struct {
+			struct fd_pipe *pipe;
+			DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos);
+			struct set *ring_set;
+		};
+		/* for other cases: */
+		struct {
+			struct fd_submit *submit;
+			DECLARE_ARRAY(struct msm_cmd *, cmds);
+		};
+	} u;
+
+	struct msm_cmd *cmd;          /* current cmd */
+	struct fd_bo *ring_bo;
+};
+FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer);
+
+static void finalize_current_cmd(struct fd_ringbuffer *ring);
+static struct fd_ringbuffer * msm_ringbuffer_init(
+		struct msm_ringbuffer *msm_ring,
+		uint32_t size, enum fd_ringbuffer_flags flags);
+
+/* add (if needed) bo to submit and return index: */
+static uint32_t
+append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags)
+{
+	struct msm_bo *msm_bo = to_msm_bo(bo);
+	uint32_t idx;
+	pthread_mutex_lock(&idx_lock);
+	if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
+		idx = msm_bo->idx;
+	} else {
+		uint32_t hash = _mesa_hash_pointer(bo);
+		struct hash_entry *entry;
+
+		entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
+		if (entry) {
+			/* found */
+			idx = (uint32_t)(uintptr_t)entry->data;
+		} else {
+			idx = APPEND(submit, submit_bos);
+			idx = APPEND(submit, bos);
+
+			submit->submit_bos[idx].flags = 0;
+			submit->submit_bos[idx].handle = bo->handle;
+			submit->submit_bos[idx].presumed = 0;
+
+			submit->bos[idx] = fd_bo_ref(bo);
+
+			_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
+					(void *)(uintptr_t)idx);
+		}
+		msm_bo->current_submit_seqno = submit->seqno;
+		msm_bo->idx = idx;
+	}
+	pthread_mutex_unlock(&idx_lock);
+	if (flags & FD_RELOC_READ)
+		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
+	if (flags & FD_RELOC_WRITE)
+		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
+	return idx;
+}
+
+static void
+append_ring(struct set *set, struct fd_ringbuffer *ring)
+{
+	uint32_t hash = _mesa_hash_pointer(ring);
+
+	if (!_mesa_set_search_pre_hashed(set, hash, ring)) {
+		fd_ringbuffer_ref(ring);
+		_mesa_set_add_pre_hashed(set, hash, ring);
+	}
+}
+
+static void
+msm_submit_suballoc_ring_bo(struct fd_submit *submit,
+		struct msm_ringbuffer *msm_ring, uint32_t size)
+{
+	struct msm_submit *msm_submit = to_msm_submit(submit);
+	unsigned suballoc_offset = 0;
+	struct fd_bo *suballoc_bo = NULL;
+
+	if (msm_submit->suballoc_ring) {
+		struct msm_ringbuffer *suballoc_ring =
+				to_msm_ringbuffer(msm_submit->suballoc_ring);
+
+		suballoc_bo = suballoc_ring->ring_bo;
+		suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
+				suballoc_ring->offset;
+
+		suballoc_offset = align(suballoc_offset, 0x10);
+
+		if ((size + suballoc_offset) > suballoc_bo->size) {
+			suballoc_bo = NULL;
+		}
+	}
+
+	if (!suballoc_bo) {
+		// TODO possibly larger size for streaming bo?
+		msm_ring->ring_bo = fd_bo_new_ring(
+				submit->pipe->dev, 0x8000, 0);
+		msm_ring->offset = 0;
+	} else {
+		msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
+		msm_ring->offset = suballoc_offset;
+	}
+
+	struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
+
+	msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
+
+	if (old_suballoc_ring)
+		fd_ringbuffer_del(old_suballoc_ring);
+}
+
+static struct fd_ringbuffer *
+msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size,
+		enum fd_ringbuffer_flags flags)
+{
+	struct msm_submit *msm_submit = to_msm_submit(submit);
+	struct msm_ringbuffer *msm_ring;
+
+	msm_ring = slab_alloc_st(&msm_submit->ring_pool);
+
+	msm_ring->u.submit = submit;
+
+	/* NOTE: needs to be before _suballoc_ring_bo() since it could
+	 * increment the refcnt of the current ring
+	 */
+	msm_ring->base.refcnt = 1;
+
+	if (flags & FD_RINGBUFFER_STREAMING) {
+		msm_submit_suballoc_ring_bo(submit, msm_ring, size);
+	} else {
+		if (flags & FD_RINGBUFFER_GROWABLE)
+			size = INIT_SIZE;
+
+		msm_ring->offset = 0;
+		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
+	}
+
+	if (!msm_ringbuffer_init(msm_ring, size, flags))
+		return NULL;
+
+	if (flags & FD_RINGBUFFER_PRIMARY) {
+		debug_assert(!msm_submit->primary);
+		msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
+	}
+
+	return &msm_ring->base;
+}
+
+static struct drm_msm_gem_submit_reloc *
+handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring)
+{
+	struct msm_cmd *cmd = ring->cmd;
+	struct drm_msm_gem_submit_reloc *relocs;
+
+	relocs = malloc(cmd->nr_relocs * sizeof(*relocs));
+
+	for (unsigned i = 0; i < cmd->nr_relocs; i++) {
+		unsigned idx = cmd->relocs[i].reloc_idx;
+		struct fd_bo *bo = ring->u.reloc_bos[idx].bo;
+		unsigned flags = 0;
+
+		if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ)
+			flags |= FD_RELOC_READ;
+		if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE)
+			flags |= FD_RELOC_WRITE;
+
+		relocs[i] = cmd->relocs[i];
+		relocs[i].reloc_idx = append_bo(submit, bo, flags);
+	}
+
+	return relocs;
+}
+
+static int
+msm_submit_flush(struct fd_submit *submit, int in_fence_fd,
+		int *out_fence_fd, uint32_t *out_fence)
+{
+	struct msm_submit *msm_submit = to_msm_submit(submit);
+	struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
+	struct drm_msm_gem_submit req = {
+			.flags = msm_pipe->pipe,
+			.queueid = msm_pipe->queue_id,
+	};
+	int ret;
+
+	debug_assert(msm_submit->primary);
+
+	finalize_current_cmd(msm_submit->primary);
+	append_ring(msm_submit->ring_set, msm_submit->primary);
+
+	unsigned nr_cmds = 0;
+	unsigned nr_objs = 0;
+
+	set_foreach(msm_submit->ring_set, entry) {
+		struct fd_ringbuffer *ring = (void *)entry->key;
+		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+			nr_cmds += 1;
+			nr_objs += 1;
+		} else {
+			if (ring != msm_submit->primary)
+				finalize_current_cmd(ring);
+			nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds;
+		}
+	}
+
+	void *obj_relocs[nr_objs];
+	struct drm_msm_gem_submit_cmd cmds[nr_cmds];
+	unsigned i = 0, o = 0;
+
+	set_foreach(msm_submit->ring_set, entry) {
+		struct fd_ringbuffer *ring = (void *)entry->key;
+		struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+
+		debug_assert(i < nr_cmds);
+
+		// TODO handle relocs:
+		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+
+			debug_assert(o < nr_objs);
+
+			void *relocs = handle_stateobj_relocs(msm_submit, msm_ring);
+			obj_relocs[o++] = relocs;
+
+			cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
+			cmds[i].submit_idx =
+				append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ);
+			cmds[i].submit_offset = msm_ring->offset;
+			cmds[i].size = offset_bytes(ring->cur, ring->start);
+			cmds[i].pad = 0;
+			cmds[i].nr_relocs = msm_ring->cmd->nr_relocs;
+			cmds[i].relocs = VOID2U64(relocs);
+
+			i++;
+		} else {
+			for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) {
+				if (ring->flags & FD_RINGBUFFER_PRIMARY) {
+					cmds[i].type = MSM_SUBMIT_CMD_BUF;
+				} else {
+					cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
+				}
+				cmds[i].submit_idx = append_bo(msm_submit,
+						msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ);
+				cmds[i].submit_offset = msm_ring->offset;
+				cmds[i].size = msm_ring->u.cmds[j]->size;
+				cmds[i].pad = 0;
+				cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs;
+				cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs);
+
+				i++;
+			}
+		}
+	}
+
+	if (in_fence_fd != -1) {
+		req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
+		req.fence_fd = in_fence_fd;
+	}
+
+	if (out_fence_fd) {
+		req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
+	}
+
+	/* needs to be after get_cmd() as that could create bos/cmds table: */
+	req.bos = VOID2U64(msm_submit->submit_bos),
+	req.nr_bos = msm_submit->nr_submit_bos;
+	req.cmds = VOID2U64(cmds),
+	req.nr_cmds = nr_cmds;
+
+	DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
+
+	ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
+			&req, sizeof(req));
+	if (ret) {
+		ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
+		msm_dump_submit(&req);
+	} else if (!ret) {
+		if (out_fence)
+			*out_fence = req.fence;
+
+		if (out_fence_fd)
+			*out_fence_fd = req.fence_fd;
+	}
+
+	for (unsigned o = 0; o < nr_objs; o++)
+		free(obj_relocs[o]);
+
+	return ret;
+}
+
+static void
+unref_rings(struct set_entry *entry)
+{
+	struct fd_ringbuffer *ring = (void *)entry->key;
+	fd_ringbuffer_del(ring);
+}
+
+static void
+msm_submit_destroy(struct fd_submit *submit)
+{
+	struct msm_submit *msm_submit = to_msm_submit(submit);
+
+	if (msm_submit->primary)
+		fd_ringbuffer_del(msm_submit->primary);
+	if (msm_submit->suballoc_ring)
+		fd_ringbuffer_del(msm_submit->suballoc_ring);
+
+	_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
+	_mesa_set_destroy(msm_submit->ring_set, unref_rings);
+
+	// TODO it would be nice to have a way to debug_assert() if all
+	// rb's haven't been free'd back to the slab, because that is
+	// an indication that we are leaking bo's
+	slab_destroy(&msm_submit->ring_pool);
+
+	for (unsigned i = 0; i < msm_submit->nr_bos; i++)
+		fd_bo_del(msm_submit->bos[i]);
+
+	free(msm_submit->submit_bos);
+	free(msm_submit->bos);
+	free(msm_submit);
+}
+
+static const struct fd_submit_funcs submit_funcs = {
+		.new_ringbuffer = msm_submit_new_ringbuffer,
+		.flush = msm_submit_flush,
+		.destroy = msm_submit_destroy,
+};
+
+struct fd_submit *
+msm_submit_new(struct fd_pipe *pipe)
+{
+	struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit));
+	struct fd_submit *submit;
+	static unsigned submit_cnt = 0;
+
+	msm_submit->seqno = ++submit_cnt;
+	msm_submit->bo_table = _mesa_hash_table_create(NULL,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+	msm_submit->ring_set = _mesa_set_create(NULL,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+	// TODO tune size:
+	slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16);
+
+	submit = &msm_submit->base;
+	submit->pipe = pipe;
+	submit->funcs = &submit_funcs;
+
+	return submit;
+}
+
+
+static void
+finalize_current_cmd(struct fd_ringbuffer *ring)
+{
+	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+
+	debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+
+	if (!msm_ring->cmd)
+		return;
+
+	debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo);
+
+	unsigned idx = APPEND(&msm_ring->u, cmds);
+
+	msm_ring->u.cmds[idx] = msm_ring->cmd;
+	msm_ring->cmd = NULL;
+
+	msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start);
+}
+
+static void
+msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size)
+{
+	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+	struct fd_pipe *pipe = msm_ring->u.submit->pipe;
+
+	debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
+
+	finalize_current_cmd(ring);
+
+	fd_bo_del(msm_ring->ring_bo);
+	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+	msm_ring->cmd = cmd_new(msm_ring->ring_bo);
+
+	ring->start = fd_bo_map(msm_ring->ring_bo);
+	ring->end = &(ring->start[size/4]);
+	ring->cur = ring->start;
+	ring->size = size;
+}
+
+static void
+msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
+		const struct fd_reloc *reloc)
+{
+	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+	struct fd_pipe *pipe;
+	unsigned reloc_idx;
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+
+		msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
+		msm_ring->u.reloc_bos[idx].flags = reloc->flags;
+
+		/* this gets fixed up at submit->flush() time, since this state-
+		 * object rb can be used with many different submits
+		 */
+		reloc_idx = idx;
+
+		pipe = msm_ring->u.pipe;
+	} else {
+		struct msm_submit *msm_submit =
+				to_msm_submit(msm_ring->u.submit);
+
+		reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags);
+
+		pipe = msm_ring->u.submit->pipe;
+	}
+
+	struct drm_msm_gem_submit_reloc *r;
+	unsigned idx = APPEND(msm_ring->cmd, relocs);
+
+	r = &msm_ring->cmd->relocs[idx];
+
+	r->reloc_idx = reloc_idx;
+	r->reloc_offset = reloc->offset;
+	r->or = reloc->or;
+	r->shift = reloc->shift;
+	r->submit_offset = offset_bytes(ring->cur, ring->start) +
+			msm_ring->offset;
+
+	ring->cur++;
+
+	if (pipe->gpu_id >= 500) {
+		idx = APPEND(msm_ring->cmd, relocs);
+		r = &msm_ring->cmd->relocs[idx];
+
+		r->reloc_idx = reloc_idx;
+		r->reloc_offset = reloc->offset;
+		r->or = reloc->orhi;
+		r->shift = reloc->shift - 32;
+		r->submit_offset = offset_bytes(ring->cur, ring->start) +
+				msm_ring->offset;
+
+		ring->cur++;
+	}
+}
+
+static void
+append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target)
+{
+	struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
+
+	debug_assert(target->flags & _FD_RINGBUFFER_OBJECT);
+
+	set_foreach(msm_target->u.ring_set, entry) {
+		struct fd_ringbuffer *ring = (void *)entry->key;
+
+		append_ring(submit->ring_set, ring);
+
+		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+			append_stateobj_rings(submit, ring);
+		}
+	}
+}
+
+static uint32_t
+msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
+		struct fd_ringbuffer *target, uint32_t cmd_idx)
+{
+	struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
+	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+	struct fd_bo *bo;
+	uint32_t size;
+
+	if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
+			(cmd_idx < msm_target->u.nr_cmds)) {
+		bo   = msm_target->u.cmds[cmd_idx]->ring_bo;
+		size = msm_target->u.cmds[cmd_idx]->size;
+	} else {
+		bo   = msm_target->ring_bo;
+		size = offset_bytes(target->cur, target->start);
+	}
+
+	msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
+		.bo     = bo,
+		.flags  = FD_RELOC_READ,
+		.offset = msm_target->offset,
+	});
+
+	if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
+			!(ring->flags & _FD_RINGBUFFER_OBJECT)) {
+		struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
+
+		append_stateobj_rings(msm_submit, target);
+	}
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		append_ring(msm_ring->u.ring_set, target);
+	} else {
+		struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
+		append_ring(msm_submit->ring_set, target);
+	}
+
+	return size;
+}
+
+static uint32_t
+msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
+{
+	if (ring->flags & FD_RINGBUFFER_GROWABLE)
+		return to_msm_ringbuffer(ring)->u.nr_cmds + 1;
+	return 1;
+}
+
+static void
+msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
+{
+	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
+
+	fd_bo_del(msm_ring->ring_bo);
+	if (msm_ring->cmd)
+		cmd_free(msm_ring->cmd);
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
+			fd_bo_del(msm_ring->u.reloc_bos[i].bo);
+		}
+
+		_mesa_set_destroy(msm_ring->u.ring_set, unref_rings);
+
+		free(msm_ring->u.reloc_bos);
+		free(msm_ring);
+	} else {
+		struct fd_submit *submit = msm_ring->u.submit;
+
+		for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
+			cmd_free(msm_ring->u.cmds[i]);
+		}
+
+		free(msm_ring->u.cmds);
+		slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring);
+	}
+}
+
+static const struct fd_ringbuffer_funcs ring_funcs = {
+		.grow = msm_ringbuffer_grow,
+		.emit_reloc = msm_ringbuffer_emit_reloc,
+		.emit_reloc_ring = msm_ringbuffer_emit_reloc_ring,
+		.cmd_count = msm_ringbuffer_cmd_count,
+		.destroy = msm_ringbuffer_destroy,
+};
+
+static inline struct fd_ringbuffer *
+msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size,
+		enum fd_ringbuffer_flags flags)
+{
+	struct fd_ringbuffer *ring = &msm_ring->base;
+
+	debug_assert(msm_ring->ring_bo);
+
+	uint8_t *base = fd_bo_map(msm_ring->ring_bo);
+	ring->start = (void *)(base + msm_ring->offset);
+	ring->end = &(ring->start[size/4]);
+	ring->cur = ring->start;
+
+	ring->size = size;
+	ring->flags = flags;
+
+	ring->funcs = &ring_funcs;
+
+	msm_ring->u.cmds = NULL;
+	msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
+
+	msm_ring->cmd = cmd_new(msm_ring->ring_bo);
+
+	return ring;
+}
+
+struct fd_ringbuffer *
+msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size)
+{
+	struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring));
+
+	msm_ring->u.pipe = pipe;
+	msm_ring->offset = 0;
+	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+	msm_ring->base.refcnt = 1;
+
+	msm_ring->u.reloc_bos = NULL;
+	msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
+
+	msm_ring->u.ring_set = _mesa_set_create(NULL,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+
+	return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
+}
diff -Nru mesa-18.3.3/src/freedreno/drm/msm_ringbuffer_sp.c mesa-19.0.1/src/freedreno/drm/msm_ringbuffer_sp.c
--- mesa-18.3.3/src/freedreno/drm/msm_ringbuffer_sp.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/drm/msm_ringbuffer_sp.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,568 @@
+/*
+ * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+
+#include "util/hash_table.h"
+#include "util/slab.h"
+
+#include "drm/freedreno_ringbuffer.h"
+#include "msm_priv.h"
+
+/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
+ * by avoiding the additional tracking necessary to build cmds/relocs tables
+ * (but still builds a bos table)
+ */
+
+
+#define INIT_SIZE 0x1000
+
+static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+struct msm_submit_sp {
+	struct fd_submit base;
+
+	DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
+	DECLARE_ARRAY(struct fd_bo *, bos);
+
+	unsigned seqno;
+
+	/* maps fd_bo to idx in bos table: */
+	struct hash_table *bo_table;
+
+	struct slab_mempool ring_pool;
+
+	struct fd_ringbuffer *primary;
+
+	/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
+	 * the same underlying bo)..
+	 *
+	 * We also rely on previous stateobj having been fully constructed
+	 * so we can reclaim extra space at it's end.
+	 */
+	struct fd_ringbuffer *suballoc_ring;
+};
+FD_DEFINE_CAST(fd_submit, msm_submit_sp);
+
+/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
+ * and sizes.  Ie. a finalized buffer can have no more commands appended to
+ * it.
+ */
+struct msm_cmd_sp {
+	struct fd_bo *ring_bo;
+	unsigned size;
+};
+
+/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
+ * later copy into the submit when the stateobj rb is later referenced by
+ * a regular rb:
+ */
+struct msm_reloc_bo_sp {
+	struct fd_bo *bo;
+	unsigned flags;
+};
+
+struct msm_ringbuffer_sp {
+	struct fd_ringbuffer base;
+
+	/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
+	unsigned offset;
+
+// TODO check disasm.. hopefully compilers CSE can realize that
+// reloc_bos and cmds are at the same offsets and optimize some
+// divergent cases into single case
+	union {
+		/* for _FD_RINGBUFFER_OBJECT case: */
+		struct {
+			struct fd_pipe *pipe;
+			DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
+		};
+		/* for other cases: */
+		struct {
+			struct fd_submit *submit;
+			DECLARE_ARRAY(struct msm_cmd_sp, cmds);
+		};
+	} u;
+
+	struct fd_bo *ring_bo;
+};
+FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
+
+static void finalize_current_cmd(struct fd_ringbuffer *ring);
+static struct fd_ringbuffer * msm_ringbuffer_sp_init(
+		struct msm_ringbuffer_sp *msm_ring,
+		uint32_t size, enum fd_ringbuffer_flags flags);
+
+/* add (if needed) bo to submit and return index: */
+static uint32_t
+append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
+{
+	struct msm_bo *msm_bo = to_msm_bo(bo);
+	uint32_t idx;
+	pthread_mutex_lock(&idx_lock);
+	if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
+		idx = msm_bo->idx;
+	} else {
+		uint32_t hash = _mesa_hash_pointer(bo);
+		struct hash_entry *entry;
+
+		entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
+		if (entry) {
+			/* found */
+			idx = (uint32_t)(uintptr_t)entry->data;
+		} else {
+			idx = APPEND(submit, submit_bos);
+			idx = APPEND(submit, bos);
+
+			submit->submit_bos[idx].flags = 0;
+			submit->submit_bos[idx].handle = bo->handle;
+			submit->submit_bos[idx].presumed = 0;
+
+			submit->bos[idx] = fd_bo_ref(bo);
+
+			_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
+					(void *)(uintptr_t)idx);
+		}
+		msm_bo->current_submit_seqno = submit->seqno;
+		msm_bo->idx = idx;
+	}
+	pthread_mutex_unlock(&idx_lock);
+	if (flags & FD_RELOC_READ)
+		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
+	if (flags & FD_RELOC_WRITE)
+		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
+	if (flags & FD_RELOC_DUMP)
+		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
+	return idx;
+}
+
+static void
+msm_submit_suballoc_ring_bo(struct fd_submit *submit,
+		struct msm_ringbuffer_sp *msm_ring, uint32_t size)
+{
+	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+	unsigned suballoc_offset = 0;
+	struct fd_bo *suballoc_bo = NULL;
+
+	if (msm_submit->suballoc_ring) {
+		struct msm_ringbuffer_sp *suballoc_ring =
+				to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
+
+		suballoc_bo = suballoc_ring->ring_bo;
+		suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
+				suballoc_ring->offset;
+
+		suballoc_offset = align(suballoc_offset, 0x10);
+
+		if ((size + suballoc_offset) > suballoc_bo->size) {
+			suballoc_bo = NULL;
+		}
+	}
+
+	if (!suballoc_bo) {
+		// TODO possibly larger size for streaming bo?
+		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev,
+				0x8000, DRM_FREEDRENO_GEM_GPUREADONLY);
+		msm_ring->offset = 0;
+	} else {
+		msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
+		msm_ring->offset = suballoc_offset;
+	}
+
+	struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
+
+	msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
+
+	if (old_suballoc_ring)
+		fd_ringbuffer_del(old_suballoc_ring);
+}
+
+static struct fd_ringbuffer *
+msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
+		enum fd_ringbuffer_flags flags)
+{
+	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+	struct msm_ringbuffer_sp *msm_ring;
+
+	msm_ring = slab_alloc_st(&msm_submit->ring_pool);
+
+	msm_ring->u.submit = submit;
+
+	/* NOTE: needs to be before _suballoc_ring_bo() since it could
+	 * increment the refcnt of the current ring
+	 */
+	msm_ring->base.refcnt = 1;
+
+	if (flags & FD_RINGBUFFER_STREAMING) {
+		msm_submit_suballoc_ring_bo(submit, msm_ring, size);
+	} else {
+		if (flags & FD_RINGBUFFER_GROWABLE)
+			size = INIT_SIZE;
+
+		msm_ring->offset = 0;
+		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size,
+				DRM_FREEDRENO_GEM_GPUREADONLY);
+	}
+
+	if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
+		return NULL;
+
+	if (flags & FD_RINGBUFFER_PRIMARY) {
+		debug_assert(!msm_submit->primary);
+		msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
+	}
+
+	return &msm_ring->base;
+}
+
+static int
+msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
+		int *out_fence_fd, uint32_t *out_fence)
+{
+	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+	struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
+	struct drm_msm_gem_submit req = {
+			.flags = msm_pipe->pipe,
+			.queueid = msm_pipe->queue_id,
+	};
+	int ret;
+
+	debug_assert(msm_submit->primary);
+	finalize_current_cmd(msm_submit->primary);
+
+	struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
+	struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
+
+	for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
+		cmds[i].type = MSM_SUBMIT_CMD_BUF;
+		cmds[i].submit_idx = append_bo(msm_submit,
+				primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
+		cmds[i].submit_offset = primary->offset;
+		cmds[i].size = primary->u.cmds[i].size;
+		cmds[i].pad = 0;
+		cmds[i].nr_relocs = 0;
+	}
+
+	if (in_fence_fd != -1) {
+		req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
+		req.fence_fd = in_fence_fd;
+	}
+
+	if (out_fence_fd) {
+		req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
+	}
+
+	/* needs to be after get_cmd() as that could create bos/cmds table: */
+	req.bos = VOID2U64(msm_submit->submit_bos),
+	req.nr_bos = msm_submit->nr_submit_bos;
+	req.cmds = VOID2U64(cmds),
+	req.nr_cmds = primary->u.nr_cmds;
+
+	DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
+
+	ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
+			&req, sizeof(req));
+	if (ret) {
+		ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
+		msm_dump_submit(&req);
+	} else if (!ret) {
+		if (out_fence)
+			*out_fence = req.fence;
+
+		if (out_fence_fd)
+			*out_fence_fd = req.fence_fd;
+	}
+
+	return ret;
+}
+
+static void
+msm_submit_sp_destroy(struct fd_submit *submit)
+{
+	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
+
+	if (msm_submit->primary)
+		fd_ringbuffer_del(msm_submit->primary);
+	if (msm_submit->suballoc_ring)
+		fd_ringbuffer_del(msm_submit->suballoc_ring);
+
+	_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
+
+	// TODO it would be nice to have a way to debug_assert() if all
+	// rb's haven't been free'd back to the slab, because that is
+	// an indication that we are leaking bo's
+	slab_destroy(&msm_submit->ring_pool);
+
+	for (unsigned i = 0; i < msm_submit->nr_bos; i++)
+		fd_bo_del(msm_submit->bos[i]);
+
+	free(msm_submit->submit_bos);
+	free(msm_submit->bos);
+	free(msm_submit);
+}
+
+static const struct fd_submit_funcs submit_funcs = {
+		.new_ringbuffer = msm_submit_sp_new_ringbuffer,
+		.flush = msm_submit_sp_flush,
+		.destroy = msm_submit_sp_destroy,
+};
+
+struct fd_submit *
+msm_submit_sp_new(struct fd_pipe *pipe)
+{
+	struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
+	struct fd_submit *submit;
+	static unsigned submit_cnt = 0;
+
+	msm_submit->seqno = ++submit_cnt;
+	msm_submit->bo_table = _mesa_hash_table_create(NULL,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+	// TODO tune size:
+	slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
+
+	submit = &msm_submit->base;
+	submit->pipe = pipe;
+	submit->funcs = &submit_funcs;
+
+	return submit;
+}
+
+
+static void
+finalize_current_cmd(struct fd_ringbuffer *ring)
+{
+	debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
+
+	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+	unsigned idx = APPEND(&msm_ring->u, cmds);
+
+	msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
+	msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
+}
+
+static void
+msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
+{
+	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+	struct fd_pipe *pipe = msm_ring->u.submit->pipe;
+
+	debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
+
+	finalize_current_cmd(ring);
+
+	fd_bo_del(msm_ring->ring_bo);
+	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
+			DRM_FREEDRENO_GEM_GPUREADONLY);
+
+	ring->start = fd_bo_map(msm_ring->ring_bo);
+	ring->end = &(ring->start[size/4]);
+	ring->cur = ring->start;
+	ring->size = size;
+}
+
+static void
+msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
+		const struct fd_reloc *reloc)
+{
+	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+	struct fd_pipe *pipe;
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+
+		msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
+		msm_ring->u.reloc_bos[idx].flags = reloc->flags;
+
+		pipe = msm_ring->u.pipe;
+	} else {
+		struct msm_submit_sp *msm_submit =
+				to_msm_submit_sp(msm_ring->u.submit);
+
+		append_bo(msm_submit, reloc->bo, reloc->flags);
+
+		pipe = msm_ring->u.submit->pipe;
+	}
+
+	uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
+	uint32_t dword = iova;
+	int shift = reloc->shift;
+
+	if (shift < 0)
+		dword >>= -shift;
+	else
+		dword <<= shift;
+
+	(*ring->cur++) = dword | reloc->or;
+
+	if (pipe->gpu_id >= 500) {
+		dword = iova >> 32;
+		shift -= 32;
+
+		if (shift < 0)
+			dword >>= -shift;
+		else
+			dword <<= shift;
+
+		(*ring->cur++) = dword | reloc->orhi;
+	}
+}
+
+static uint32_t
+msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
+		struct fd_ringbuffer *target, uint32_t cmd_idx)
+{
+	struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
+	struct fd_bo *bo;
+	uint32_t size;
+
+	if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
+			(cmd_idx < msm_target->u.nr_cmds)) {
+		bo   = msm_target->u.cmds[cmd_idx].ring_bo;
+		size = msm_target->u.cmds[cmd_idx].size;
+	} else {
+		bo   = msm_target->ring_bo;
+		size = offset_bytes(target->cur, target->start);
+	}
+
+	msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
+		.bo     = bo,
+		.flags  = FD_RELOC_READ | FD_RELOC_DUMP,
+		.offset = msm_target->offset,
+	});
+
+	if (!(target->flags & _FD_RINGBUFFER_OBJECT))
+		return size;
+
+	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
+			unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+
+			msm_ring->u.reloc_bos[idx].bo =
+				fd_bo_ref(msm_target->u.reloc_bos[i].bo);
+			msm_ring->u.reloc_bos[idx].flags =
+				msm_target->u.reloc_bos[i].flags;
+		}
+	} else {
+		// TODO it would be nice to know whether we have already
+		// seen this target before.  But hopefully we hit the
+		// append_bo() fast path enough for this to not matter:
+		struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
+
+		for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
+			append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
+					msm_target->u.reloc_bos[i].flags);
+		}
+	}
+
+	return size;
+}
+
+static uint32_t
+msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
+{
+	if (ring->flags & FD_RINGBUFFER_GROWABLE)
+		return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
+	return 1;
+}
+
+static void
+msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
+{
+	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+
+	fd_bo_del(msm_ring->ring_bo);
+
+	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+		for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
+			fd_bo_del(msm_ring->u.reloc_bos[i].bo);
+		}
+
+		free(msm_ring);
+	} else {
+		struct fd_submit *submit = msm_ring->u.submit;
+
+		for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
+			fd_bo_del(msm_ring->u.cmds[i].ring_bo);
+		}
+
+		slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
+	}
+}
+
+static const struct fd_ringbuffer_funcs ring_funcs = {
+		.grow = msm_ringbuffer_sp_grow,
+		.emit_reloc = msm_ringbuffer_sp_emit_reloc,
+		.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
+		.cmd_count = msm_ringbuffer_sp_cmd_count,
+		.destroy = msm_ringbuffer_sp_destroy,
+};
+
+static inline struct fd_ringbuffer *
+msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
+		enum fd_ringbuffer_flags flags)
+{
+	struct fd_ringbuffer *ring = &msm_ring->base;
+
+	debug_assert(msm_ring->ring_bo);
+
+	uint8_t *base = fd_bo_map(msm_ring->ring_bo);
+	ring->start = (void *)(base + msm_ring->offset);
+	ring->end = &(ring->start[size/4]);
+	ring->cur = ring->start;
+
+	ring->size = size;
+	ring->flags = flags;
+
+	ring->funcs = &ring_funcs;
+
+	// TODO initializing these could probably be conditional on flags
+	// since unneed for FD_RINGBUFFER_STAGING case..
+	msm_ring->u.cmds = NULL;
+	msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
+
+	msm_ring->u.reloc_bos = NULL;
+	msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
+
+	return ring;
+}
+
+struct fd_ringbuffer *
+msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
+{
+	struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
+
+	msm_ring->u.pipe = pipe;
+	msm_ring->offset = 0;
+	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
+			DRM_FREEDRENO_GEM_GPUREADONLY);
+	msm_ring->base.refcnt = 1;
+
+	return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/disasm-a3xx.c mesa-19.0.1/src/freedreno/ir3/disasm-a3xx.c
--- mesa-18.3.3/src/freedreno/ir3/disasm-a3xx.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/disasm-a3xx.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include <util/u_debug.h>
+
+#include "instr-a3xx.h"
+
+/* bitmask of debug flags */
+enum debug_t {
+	PRINT_RAW      = 0x1,    /* dump raw hexdump */
+	PRINT_VERBOSE  = 0x2,
+};
+
+static enum debug_t debug;
+
+#define printf debug_printf
+
+static const char *levels[] = {
+		"",
+		"\t",
+		"\t\t",
+		"\t\t\t",
+		"\t\t\t\t",
+		"\t\t\t\t\t",
+		"\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t\t\t",
+		"x",
+		"x",
+		"x",
+		"x",
+		"x",
+		"x",
+};
+
+static const char *component = "xyzw";
+
+static const char *type[] = {
+		[TYPE_F16] = "f16",
+		[TYPE_F32] = "f32",
+		[TYPE_U16] = "u16",
+		[TYPE_U32] = "u32",
+		[TYPE_S16] = "s16",
+		[TYPE_S32] = "s32",
+		[TYPE_U8]  = "u8",
+		[TYPE_S8]  = "s8",
+};
+
+struct disasm_ctx {
+	FILE *out;
+	int level;
+	unsigned gpu_id;
+
+	/* current instruction repeat flag: */
+	unsigned repeat;
+};
+
+static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
+		bool c, bool im, bool neg, bool abs, bool addr_rel)
+{
+	const char type = c ? 'c' : 'r';
+
+	// XXX I prefer - and || for neg/abs, but preserving format used
+	// by libllvm-a3xx for easy diffing..
+
+	if (abs && neg)
+		fprintf(ctx->out, "(absneg)");
+	else if (neg)
+		fprintf(ctx->out, "(neg)");
+	else if (abs)
+		fprintf(ctx->out, "(abs)");
+
+	if (r)
+		fprintf(ctx->out, "(r)");
+
+	if (im) {
+		fprintf(ctx->out, "%d", reg.iim_val);
+	} else if (addr_rel) {
+		/* I would just use %+d but trying to make it diff'able with
+		 * libllvm-a3xx...
+		 */
+		if (reg.iim_val < 0)
+			fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
+		else if (reg.iim_val > 0)
+			fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
+		else
+			fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
+	} else if ((reg.num == REG_A0) && !c) {
+		fprintf(ctx->out, "a0.%c", component[reg.comp]);
+	} else if ((reg.num == REG_P0) && !c) {
+		fprintf(ctx->out, "p0.%c", component[reg.comp]);
+	} else {
+		fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+	}
+}
+
+
+static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
+{
+	print_reg(ctx, reg, full, false, false, false, false, false, addr_rel);
+}
+
+static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
+		bool c, bool im, bool neg, bool abs, bool addr_rel)
+{
+	print_reg(ctx, reg, full, r, c, im, neg, abs, addr_rel);
+}
+
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+	reg_t reg;
+	bool full;
+	bool r;
+	bool c;
+	bool im;
+	bool neg;
+	bool abs;
+	bool addr_rel;
+};
+
+static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
+{
+	print_reg_src(ctx, info->reg, info->full, info->r, info->c, info->im,
+			info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
+//{
+//	print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
+//}
+
+static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat0_t *cat0 = &instr->cat0;
+
+	switch (cat0->opc) {
+	case OPC_KILL:
+		fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "",
+				component[cat0->comp]);
+		break;
+	case OPC_BR:
+		fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "",
+				component[cat0->comp], cat0->a3xx.immed);
+		break;
+	case OPC_JUMP:
+	case OPC_CALL:
+		fprintf(ctx->out, " #%d", cat0->a3xx.immed);
+		break;
+	}
+
+	if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4))
+		fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4);
+}
+
+static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat1_t *cat1 = &instr->cat1;
+
+	if (cat1->ul)
+		fprintf(ctx->out, "(ul)");
+
+	if (cat1->src_type == cat1->dst_type) {
+		if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
+			/* special case (nmemonic?): */
+			fprintf(ctx->out, "mova");
+		} else {
+			fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+		}
+	} else {
+		fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+	}
+
+	fprintf(ctx->out, " ");
+
+	if (cat1->even)
+		fprintf(ctx->out, "(even)");
+
+	if (cat1->pos_inf)
+		fprintf(ctx->out, "(pos_infinity)");
+
+	print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
+			cat1->dst_rel);
+
+	fprintf(ctx->out, ", ");
+
+	/* ugg, have to special case this.. vs print_reg().. */
+	if (cat1->src_im) {
+		if (type_float(cat1->src_type))
+			fprintf(ctx->out, "(%f)", cat1->fim_val);
+		else if (type_uint(cat1->src_type))
+			fprintf(ctx->out, "0x%08x", cat1->uim_val);
+		else
+			fprintf(ctx->out, "%d", cat1->iim_val);
+	} else if (cat1->src_rel && !cat1->src_c) {
+		/* I would just use %+d but trying to make it diff'able with
+		 * libllvm-a3xx...
+		 */
+		char type = cat1->src_rel_c ? 'c' : 'r';
+		if (cat1->off < 0)
+			fprintf(ctx->out, "%c<a0.x - %d>", type, -cat1->off);
+		else if (cat1->off > 0)
+			fprintf(ctx->out, "%c<a0.x + %d>", type, cat1->off);
+		else
+			fprintf(ctx->out, "%c<a0.x>", type);
+	} else {
+		print_reg_src(ctx, (reg_t)(cat1->src), type_size(cat1->src_type) == 32,
+				cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
+	}
+
+	if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
+		fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
+}
+
+static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat2_t *cat2 = &instr->cat2;
+	static const char *cond[] = {
+			"lt",
+			"le",
+			"gt",
+			"ge",
+			"eq",
+			"ne",
+			"?6?",
+	};
+
+	switch (_OPC(2, cat2->opc)) {
+	case OPC_CMPS_F:
+	case OPC_CMPS_U:
+	case OPC_CMPS_S:
+	case OPC_CMPV_F:
+	case OPC_CMPV_U:
+	case OPC_CMPV_S:
+		fprintf(ctx->out, ".%s", cond[cat2->cond]);
+		break;
+	}
+
+	fprintf(ctx->out, " ");
+	if (cat2->ei)
+		fprintf(ctx->out, "(ei)");
+	print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
+	fprintf(ctx->out, ", ");
+
+	unsigned src1_r = cat2->repeat ? cat2->src1_r : 0;
+	if (cat2->c1.src1_c) {
+		print_reg_src(ctx, (reg_t)(cat2->c1.src1), cat2->full, src1_r,
+				cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg,
+				cat2->src1_abs, false);
+	} else if (cat2->rel1.src1_rel) {
+		print_reg_src(ctx, (reg_t)(cat2->rel1.src1), cat2->full, src1_r,
+				cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg,
+				cat2->src1_abs, cat2->rel1.src1_rel);
+	} else {
+		print_reg_src(ctx, (reg_t)(cat2->src1), cat2->full, src1_r,
+				false, cat2->src1_im, cat2->src1_neg,
+				cat2->src1_abs, false);
+	}
+
+	unsigned src2_r = cat2->repeat ? cat2->src2_r : 0;
+	switch (_OPC(2, cat2->opc)) {
+	case OPC_ABSNEG_F:
+	case OPC_ABSNEG_S:
+	case OPC_CLZ_B:
+	case OPC_CLZ_S:
+	case OPC_SIGN_F:
+	case OPC_FLOOR_F:
+	case OPC_CEIL_F:
+	case OPC_RNDNE_F:
+	case OPC_RNDAZ_F:
+	case OPC_TRUNC_F:
+	case OPC_NOT_B:
+	case OPC_BFREV_B:
+	case OPC_SETRM:
+	case OPC_CBITS_B:
+		/* these only have one src reg */
+		break;
+	default:
+		fprintf(ctx->out, ", ");
+		if (cat2->c2.src2_c) {
+			print_reg_src(ctx, (reg_t)(cat2->c2.src2), cat2->full, src2_r,
+					cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg,
+					cat2->src2_abs, false);
+		} else if (cat2->rel2.src2_rel) {
+			print_reg_src(ctx, (reg_t)(cat2->rel2.src2), cat2->full, src2_r,
+					cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg,
+					cat2->src2_abs, cat2->rel2.src2_rel);
+		} else {
+			print_reg_src(ctx, (reg_t)(cat2->src2), cat2->full, src2_r,
+					false, cat2->src2_im, cat2->src2_neg,
+					cat2->src2_abs, false);
+		}
+		break;
+	}
+}
+
+static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat3_t *cat3 = &instr->cat3;
+	bool full = instr_cat3_full(cat3);
+
+	fprintf(ctx->out, " ");
+	print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
+	fprintf(ctx->out, ", ");
+	unsigned src1_r = cat3->repeat ? cat3->src1_r : 0;
+	if (cat3->c1.src1_c) {
+		print_reg_src(ctx, (reg_t)(cat3->c1.src1), full,
+				src1_r, cat3->c1.src1_c, false, cat3->src1_neg,
+				false, false);
+	} else if (cat3->rel1.src1_rel) {
+		print_reg_src(ctx, (reg_t)(cat3->rel1.src1), full,
+				src1_r, cat3->rel1.src1_c, false, cat3->src1_neg,
+				false, cat3->rel1.src1_rel);
+	} else {
+		print_reg_src(ctx, (reg_t)(cat3->src1), full,
+				src1_r, false, false, cat3->src1_neg,
+				false, false);
+	}
+	fprintf(ctx->out, ", ");
+	unsigned src2_r = cat3->repeat ? cat3->src2_r : 0;
+	print_reg_src(ctx, (reg_t)cat3->src2, full,
+			src2_r, cat3->src2_c, false, cat3->src2_neg,
+			false, false);
+	fprintf(ctx->out, ", ");
+	if (cat3->c2.src3_c) {
+		print_reg_src(ctx, (reg_t)(cat3->c2.src3), full,
+				cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg,
+				false, false);
+	} else if (cat3->rel2.src3_rel) {
+		print_reg_src(ctx, (reg_t)(cat3->rel2.src3), full,
+				cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg,
+				false, cat3->rel2.src3_rel);
+	} else {
+		print_reg_src(ctx, (reg_t)(cat3->src3), full,
+				cat3->src3_r, false, false, cat3->src3_neg,
+				false, false);
+	}
+}
+
+static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat4_t *cat4 = &instr->cat4;
+
+	fprintf(ctx->out, " ");
+	print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
+	fprintf(ctx->out, ", ");
+
+	if (cat4->c.src_c) {
+		print_reg_src(ctx, (reg_t)(cat4->c.src), cat4->full,
+				cat4->src_r, cat4->c.src_c, cat4->src_im,
+				cat4->src_neg, cat4->src_abs, false);
+	} else if (cat4->rel.src_rel) {
+		print_reg_src(ctx, (reg_t)(cat4->rel.src), cat4->full,
+				cat4->src_r, cat4->rel.src_c, cat4->src_im,
+				cat4->src_neg, cat4->src_abs, cat4->rel.src_rel);
+	} else {
+		print_reg_src(ctx, (reg_t)(cat4->src), cat4->full,
+				cat4->src_r, false, cat4->src_im,
+				cat4->src_neg, cat4->src_abs, false);
+	}
+
+	if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
+		fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
+}
+
+static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
+{
+	static const struct {
+		bool src1, src2, samp, tex;
+	} info[0x1f] = {
+			[opc_op(OPC_ISAM)]     = { true,  false, true,  true,  },
+			[opc_op(OPC_ISAML)]    = { true,  true,  true,  true,  },
+			[opc_op(OPC_ISAMM)]    = { true,  false, true,  true,  },
+			[opc_op(OPC_SAM)]      = { true,  false, true,  true,  },
+			[opc_op(OPC_SAMB)]     = { true,  true,  true,  true,  },
+			[opc_op(OPC_SAML)]     = { true,  true,  true,  true,  },
+			[opc_op(OPC_SAMGQ)]    = { true,  false, true,  true,  },
+			[opc_op(OPC_GETLOD)]   = { true,  false, true,  true,  },
+			[opc_op(OPC_CONV)]     = { true,  true,  true,  true,  },
+			[opc_op(OPC_CONVM)]    = { true,  true,  true,  true,  },
+			[opc_op(OPC_GETSIZE)]  = { true,  false, false, true,  },
+			[opc_op(OPC_GETBUF)]   = { false, false, false, true,  },
+			[opc_op(OPC_GETPOS)]   = { true,  false, false, true,  },
+			[opc_op(OPC_GETINFO)]  = { false, false, false, true,  },
+			[opc_op(OPC_DSX)]      = { true,  false, false, false, },
+			[opc_op(OPC_DSY)]      = { true,  false, false, false, },
+			[opc_op(OPC_GATHER4R)] = { true,  false, true,  true,  },
+			[opc_op(OPC_GATHER4G)] = { true,  false, true,  true,  },
+			[opc_op(OPC_GATHER4B)] = { true,  false, true,  true,  },
+			[opc_op(OPC_GATHER4A)] = { true,  false, true,  true,  },
+			[opc_op(OPC_SAMGP0)]   = { true,  false, true,  true,  },
+			[opc_op(OPC_SAMGP1)]   = { true,  false, true,  true,  },
+			[opc_op(OPC_SAMGP2)]   = { true,  false, true,  true,  },
+			[opc_op(OPC_SAMGP3)]   = { true,  false, true,  true,  },
+			[opc_op(OPC_DSXPP_1)]  = { true,  false, false, false, },
+			[opc_op(OPC_DSYPP_1)]  = { true,  false, false, false, },
+			[opc_op(OPC_RGETPOS)]  = { false, false, false, false, },
+			[opc_op(OPC_RGETINFO)] = { false, false, false, false, },
+	};
+	instr_cat5_t *cat5 = &instr->cat5;
+	int i;
+
+	if (cat5->is_3d)   fprintf(ctx->out, ".3d");
+	if (cat5->is_a)    fprintf(ctx->out, ".a");
+	if (cat5->is_o)    fprintf(ctx->out, ".o");
+	if (cat5->is_p)    fprintf(ctx->out, ".p");
+	if (cat5->is_s)    fprintf(ctx->out, ".s");
+	if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
+
+	fprintf(ctx->out, " ");
+
+	switch (_OPC(5, cat5->opc)) {
+	case OPC_DSXPP_1:
+	case OPC_DSYPP_1:
+		break;
+	default:
+		fprintf(ctx->out, "(%s)", type[cat5->type]);
+		break;
+	}
+
+	fprintf(ctx->out, "(");
+	for (i = 0; i < 4; i++)
+		if (cat5->wrmask & (1 << i))
+			fprintf(ctx->out, "%c", "xyzw"[i]);
+	fprintf(ctx->out, ")");
+
+	print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
+
+	if (info[cat5->opc].src1) {
+		fprintf(ctx->out, ", ");
+		print_reg_src(ctx, (reg_t)(cat5->src1), cat5->full, false, false, false,
+				false, false, false);
+	}
+
+	if (cat5->is_s2en) {
+		fprintf(ctx->out, ", ");
+		print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full, false, false, false,
+				false, false, false);
+		fprintf(ctx->out, ", ");
+		print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
+				false, false, false);
+	} else {
+		if (cat5->is_o || info[cat5->opc].src2) {
+			fprintf(ctx->out, ", ");
+			print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
+					false, false, false, false, false, false);
+		}
+		if (info[cat5->opc].samp)
+			fprintf(ctx->out, ", s#%d", cat5->norm.samp);
+		if (info[cat5->opc].tex)
+			fprintf(ctx->out, ", t#%d", cat5->norm.tex);
+	}
+
+	if (debug & PRINT_VERBOSE) {
+		if (cat5->is_s2en) {
+			if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
+				fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+		} else {
+			if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
+				fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+		}
+	}
+}
+
+static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat6_t *cat6 = &instr->cat6;
+	char sd = 0, ss = 0;  /* dst/src address space */
+	bool nodst = false;
+	struct reginfo dst, src1, src2;
+	int src1off = 0, dstoff = 0;
+
+	memset(&dst, 0, sizeof(dst));
+	memset(&src1, 0, sizeof(src1));
+	memset(&src2, 0, sizeof(src2));
+
+	switch (_OPC(6, cat6->opc)) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	case OPC_L2G:
+	case OPC_G2L:
+		dst.full = true;
+		src1.full = true;
+		src2.full = true;
+		break;
+	case OPC_STG:
+	case OPC_STL:
+	case OPC_STP:
+	case OPC_STI:
+	case OPC_STLW:
+	case OPC_STIB:
+		dst.full  = true;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	default:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = true;
+		src2.full = true;
+		break;
+	}
+
+	switch (_OPC(6, cat6->opc)) {
+	case OPC_PREFETCH:
+		break;
+	case OPC_RESINFO:
+		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+		break;
+	case OPC_LDGB:
+		fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
+		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+		fprintf(ctx->out, ".%s", type[cat6->type]);
+		fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
+		break;
+	case OPC_STGB:
+	case OPC_STIB:
+		fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
+		fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
+		fprintf(ctx->out, ".%s", type[cat6->type]);
+		fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
+		break;
+	case OPC_ATOMIC_ADD:
+	case OPC_ATOMIC_SUB:
+	case OPC_ATOMIC_XCHG:
+	case OPC_ATOMIC_INC:
+	case OPC_ATOMIC_DEC:
+	case OPC_ATOMIC_CMPXCHG:
+	case OPC_ATOMIC_MIN:
+	case OPC_ATOMIC_MAX:
+	case OPC_ATOMIC_AND:
+	case OPC_ATOMIC_OR:
+	case OPC_ATOMIC_XOR:
+		ss = cat6->g ? 'g' : 'l';
+		fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
+		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
+		fprintf(ctx->out, ".%s", type[cat6->type]);
+		fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
+		fprintf(ctx->out, ".%c", ss);
+		break;
+	default:
+		dst.im = cat6->g && !cat6->dst_off;
+		fprintf(ctx->out, ".%s", type[cat6->type]);
+		break;
+	}
+	fprintf(ctx->out, " ");
+
+	switch (_OPC(6, cat6->opc)) {
+	case OPC_STG:
+		sd = 'g';
+		break;
+	case OPC_STP:
+		sd = 'p';
+		break;
+	case OPC_STL:
+	case OPC_STLW:
+		sd = 'l';
+		break;
+
+	case OPC_LDG:
+	case OPC_LDC:
+		ss = 'g';
+		break;
+	case OPC_LDP:
+		ss = 'p';
+		break;
+	case OPC_LDL:
+	case OPC_LDLW:
+	case OPC_LDLV:
+		ss = 'l';
+		break;
+
+	case OPC_L2G:
+		ss = 'l';
+		sd = 'g';
+		break;
+
+	case OPC_G2L:
+		ss = 'g';
+		sd = 'l';
+		break;
+
+	case OPC_PREFETCH:
+		ss = 'g';
+		nodst = true;
+		break;
+
+	case OPC_STI:
+		dst.full = false;  // XXX or inverts??
+		break;
+	}
+
+	if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
+		struct reginfo src3;
+
+		memset(&src3, 0, sizeof(src3));
+
+		src1.reg = (reg_t)(cat6->stgb.src1);
+		src2.reg = (reg_t)(cat6->stgb.src2);
+		src2.im  = cat6->stgb.src2_im;
+		src3.reg = (reg_t)(cat6->stgb.src3);
+		src3.im  = cat6->stgb.src3_im;
+		src3.full = true;
+
+		fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
+		print_src(ctx, &src1);
+		fprintf(ctx->out, ", ");
+		print_src(ctx, &src2);
+		fprintf(ctx->out, ", ");
+		print_src(ctx, &src3);
+
+		if (debug & PRINT_VERBOSE)
+			fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
+
+		return;
+	}
+
+	if (is_atomic(_OPC(6, cat6->opc))) {
+
+		src1.reg = (reg_t)(cat6->ldgb.src1);
+		src1.im  = cat6->ldgb.src1_im;
+		src2.reg = (reg_t)(cat6->ldgb.src2);
+		src2.im  = cat6->ldgb.src2_im;
+		dst.reg  = (reg_t)(cat6->ldgb.dst);
+
+		print_src(ctx, &dst);
+		fprintf(ctx->out, ", ");
+		if (ss == 'g') {
+			struct reginfo src3;
+			memset(&src3, 0, sizeof(src3));
+
+			src3.reg = (reg_t)(cat6->ldgb.src3);
+			src3.full = true;
+
+			/* For images, the ".typed" variant is used and src2 is
+			 * the ivecN coordinates, ie ivec2 for 2d.
+			 *
+			 * For SSBOs, the ".untyped" variant is used and src2 is
+			 * a simple dword offset..  src3 appears to be
+			 * uvec2(offset * 4, 0).  Not sure the point of that.
+			 */
+
+			fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
+			print_src(ctx, &src1);  /* value */
+			fprintf(ctx->out, ", ");
+			print_src(ctx, &src2);  /* offset/coords */
+			fprintf(ctx->out, ", ");
+			print_src(ctx, &src3);  /* 64b byte offset.. */
+
+			if (debug & PRINT_VERBOSE) {
+				fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
+						cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+			}
+		} else { /* ss == 'l' */
+			fprintf(ctx->out, "l[");
+			print_src(ctx, &src1);  /* simple byte offset */
+			fprintf(ctx->out, "], ");
+			print_src(ctx, &src2);  /* value */
+
+			if (debug & PRINT_VERBOSE) {
+				fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
+						cat6->ldgb.src3, cat6->ldgb.pad0,
+						cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+			}
+		}
+
+		return;
+	} else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
+		dst.reg  = (reg_t)(cat6->ldgb.dst);
+
+		print_src(ctx, &dst);
+		fprintf(ctx->out, ", ");
+		fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
+
+		return;
+	} else if (_OPC(6, cat6->opc) == OPC_LDGB) {
+
+		src1.reg = (reg_t)(cat6->ldgb.src1);
+		src1.im  = cat6->ldgb.src1_im;
+		src2.reg = (reg_t)(cat6->ldgb.src2);
+		src2.im  = cat6->ldgb.src2_im;
+		dst.reg  = (reg_t)(cat6->ldgb.dst);
+
+		print_src(ctx, &dst);
+		fprintf(ctx->out, ", ");
+		fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
+		print_src(ctx, &src1);
+		fprintf(ctx->out, ", ");
+		print_src(ctx, &src2);
+
+		if (debug & PRINT_VERBOSE)
+			fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
+
+		return;
+	}
+	if (cat6->dst_off) {
+		dst.reg = (reg_t)(cat6->c.dst);
+		dstoff  = cat6->c.off;
+	} else {
+		dst.reg = (reg_t)(cat6->d.dst);
+	}
+
+	if (cat6->src_off) {
+		src1.reg = (reg_t)(cat6->a.src1);
+		src1.im  = cat6->a.src1_im;
+		src2.reg = (reg_t)(cat6->a.src2);
+		src2.im  = cat6->a.src2_im;
+		src1off  = cat6->a.off;
+	} else {
+		src1.reg = (reg_t)(cat6->b.src1);
+		src1.im  = cat6->b.src1_im;
+		src2.reg = (reg_t)(cat6->b.src2);
+		src2.im  = cat6->b.src2_im;
+	}
+
+	if (!nodst) {
+		if (sd)
+			fprintf(ctx->out, "%c[", sd);
+		/* note: dst might actually be a src (ie. address to store to) */
+		print_src(ctx, &dst);
+		if (dstoff)
+			fprintf(ctx->out, "%+d", dstoff);
+		if (sd)
+			fprintf(ctx->out, "]");
+		fprintf(ctx->out, ", ");
+	}
+
+	if (ss)
+		fprintf(ctx->out, "%c[", ss);
+
+	/* can have a larger than normal immed, so hack: */
+	if (src1.im) {
+		fprintf(ctx->out, "%u", src1.reg.dummy13);
+	} else {
+		print_src(ctx, &src1);
+	}
+
+	if (src1off)
+		fprintf(ctx->out, "%+d", src1off);
+	if (ss)
+		fprintf(ctx->out, "]");
+
+	switch (_OPC(6, cat6->opc)) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		break;
+	default:
+		fprintf(ctx->out, ", ");
+		print_src(ctx, &src2);
+		break;
+	}
+}
+
+static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
+	struct reginfo src1, src2;
+	char ss = 0;
+
+	memset(&src1, 0, sizeof(src1));
+	memset(&src2, 0, sizeof(src2));
+
+	fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
+	fprintf(ctx->out, ".%dd", cat6->d + 1);
+	fprintf(ctx->out, ".%s", type[cat6->type]);
+	fprintf(ctx->out, ".%u ", cat6->type_size + 1);
+
+	/* NOTE: blob seems to use old encoding for ldl/stl (local memory) */
+	ss = 'g';
+
+	fprintf(ctx->out, "%c[%u", ss, cat6->ssbo);
+	fprintf(ctx->out, "] + ");
+	src1.reg = (reg_t)(cat6->src1);
+	src1.full = true; // XXX
+	print_src(ctx, &src1);
+	fprintf(ctx->out, ", ");
+
+	src2.reg = (reg_t)(cat6->src2);
+	src2.full = true; // XXX
+	print_src(ctx, &src2);
+
+	if (debug & PRINT_VERBOSE) {
+		fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1,
+				cat6->pad2, cat6->pad3, cat6->pad4);
+	}
+}
+
+static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
+{
+	// TODO not sure if this is the best way to figure
+	// out if new vs old encoding, but it kinda seems
+	// to work:
+	if ((ctx->gpu_id >= 600) && (instr->cat6.opc == 0)) {
+		print_instr_cat6_a6xx(ctx, instr);
+		if (debug & PRINT_VERBOSE)
+			fprintf(ctx->out, " NEW");
+	} else {
+		print_instr_cat6_a3xx(ctx, instr);
+		if (debug & PRINT_VERBOSE)
+			fprintf(ctx->out, " LEGACY");
+	}
+}
+static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
+{
+	instr_cat7_t *cat7 = &instr->cat7;
+
+	if (cat7->g)
+		fprintf(ctx->out, ".g");
+	if (cat7->l)
+		fprintf(ctx->out, ".l");
+
+	if (_OPC(7, cat7->opc) == OPC_FENCE) {
+		if (cat7->r)
+			fprintf(ctx->out, ".r");
+		if (cat7->w)
+			fprintf(ctx->out, ".w");
+	}
+}
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+static const struct opc_info {
+	uint16_t cat;
+	uint16_t opc;
+	const char *name;
+	void (*print)(struct disasm_ctx *ctx, instr_t *instr);
+} opcs[1 << (3+NOPC_BITS)] = {
+#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
+	/* category 0: */
+	OPC(0, OPC_NOP,          nop),
+	OPC(0, OPC_BR,           br),
+	OPC(0, OPC_JUMP,         jump),
+	OPC(0, OPC_CALL,         call),
+	OPC(0, OPC_RET,          ret),
+	OPC(0, OPC_KILL,         kill),
+	OPC(0, OPC_END,          end),
+	OPC(0, OPC_EMIT,         emit),
+	OPC(0, OPC_CUT,          cut),
+	OPC(0, OPC_CHMASK,       chmask),
+	OPC(0, OPC_CHSH,         chsh),
+	OPC(0, OPC_FLOW_REV,     flow_rev),
+
+	/* category 1: */
+	OPC(1, OPC_MOV, ),
+
+	/* category 2: */
+	OPC(2, OPC_ADD_F,        add.f),
+	OPC(2, OPC_MIN_F,        min.f),
+	OPC(2, OPC_MAX_F,        max.f),
+	OPC(2, OPC_MUL_F,        mul.f),
+	OPC(2, OPC_SIGN_F,       sign.f),
+	OPC(2, OPC_CMPS_F,       cmps.f),
+	OPC(2, OPC_ABSNEG_F,     absneg.f),
+	OPC(2, OPC_CMPV_F,       cmpv.f),
+	OPC(2, OPC_FLOOR_F,      floor.f),
+	OPC(2, OPC_CEIL_F,       ceil.f),
+	OPC(2, OPC_RNDNE_F,      rndne.f),
+	OPC(2, OPC_RNDAZ_F,      rndaz.f),
+	OPC(2, OPC_TRUNC_F,      trunc.f),
+	OPC(2, OPC_ADD_U,        add.u),
+	OPC(2, OPC_ADD_S,        add.s),
+	OPC(2, OPC_SUB_U,        sub.u),
+	OPC(2, OPC_SUB_S,        sub.s),
+	OPC(2, OPC_CMPS_U,       cmps.u),
+	OPC(2, OPC_CMPS_S,       cmps.s),
+	OPC(2, OPC_MIN_U,        min.u),
+	OPC(2, OPC_MIN_S,        min.s),
+	OPC(2, OPC_MAX_U,        max.u),
+	OPC(2, OPC_MAX_S,        max.s),
+	OPC(2, OPC_ABSNEG_S,     absneg.s),
+	OPC(2, OPC_AND_B,        and.b),
+	OPC(2, OPC_OR_B,         or.b),
+	OPC(2, OPC_NOT_B,        not.b),
+	OPC(2, OPC_XOR_B,        xor.b),
+	OPC(2, OPC_CMPV_U,       cmpv.u),
+	OPC(2, OPC_CMPV_S,       cmpv.s),
+	OPC(2, OPC_MUL_U,        mul.u),
+	OPC(2, OPC_MUL_S,        mul.s),
+	OPC(2, OPC_MULL_U,       mull.u),
+	OPC(2, OPC_BFREV_B,      bfrev.b),
+	OPC(2, OPC_CLZ_S,        clz.s),
+	OPC(2, OPC_CLZ_B,        clz.b),
+	OPC(2, OPC_SHL_B,        shl.b),
+	OPC(2, OPC_SHR_B,        shr.b),
+	OPC(2, OPC_ASHR_B,       ashr.b),
+	OPC(2, OPC_BARY_F,       bary.f),
+	OPC(2, OPC_MGEN_B,       mgen.b),
+	OPC(2, OPC_GETBIT_B,     getbit.b),
+	OPC(2, OPC_SETRM,        setrm),
+	OPC(2, OPC_CBITS_B,      cbits.b),
+	OPC(2, OPC_SHB,          shb),
+	OPC(2, OPC_MSAD,         msad),
+
+	/* category 3: */
+	OPC(3, OPC_MAD_U16,      mad.u16),
+	OPC(3, OPC_MADSH_U16,    madsh.u16),
+	OPC(3, OPC_MAD_S16,      mad.s16),
+	OPC(3, OPC_MADSH_M16,    madsh.m16),
+	OPC(3, OPC_MAD_U24,      mad.u24),
+	OPC(3, OPC_MAD_S24,      mad.s24),
+	OPC(3, OPC_MAD_F16,      mad.f16),
+	OPC(3, OPC_MAD_F32,      mad.f32),
+	OPC(3, OPC_SEL_B16,      sel.b16),
+	OPC(3, OPC_SEL_B32,      sel.b32),
+	OPC(3, OPC_SEL_S16,      sel.s16),
+	OPC(3, OPC_SEL_S32,      sel.s32),
+	OPC(3, OPC_SEL_F16,      sel.f16),
+	OPC(3, OPC_SEL_F32,      sel.f32),
+	OPC(3, OPC_SAD_S16,      sad.s16),
+	OPC(3, OPC_SAD_S32,      sad.s32),
+
+	/* category 4: */
+	OPC(4, OPC_RCP,          rcp),
+	OPC(4, OPC_RSQ,          rsq),
+	OPC(4, OPC_LOG2,         log2),
+	OPC(4, OPC_EXP2,         exp2),
+	OPC(4, OPC_SIN,          sin),
+	OPC(4, OPC_COS,          cos),
+	OPC(4, OPC_SQRT,         sqrt),
+
+	/* category 5: */
+	OPC(5, OPC_ISAM,         isam),
+	OPC(5, OPC_ISAML,        isaml),
+	OPC(5, OPC_ISAMM,        isamm),
+	OPC(5, OPC_SAM,          sam),
+	OPC(5, OPC_SAMB,         samb),
+	OPC(5, OPC_SAML,         saml),
+	OPC(5, OPC_SAMGQ,        samgq),
+	OPC(5, OPC_GETLOD,       getlod),
+	OPC(5, OPC_CONV,         conv),
+	OPC(5, OPC_CONVM,        convm),
+	OPC(5, OPC_GETSIZE,      getsize),
+	OPC(5, OPC_GETBUF,       getbuf),
+	OPC(5, OPC_GETPOS,       getpos),
+	OPC(5, OPC_GETINFO,      getinfo),
+	OPC(5, OPC_DSX,          dsx),
+	OPC(5, OPC_DSY,          dsy),
+	OPC(5, OPC_GATHER4R,     gather4r),
+	OPC(5, OPC_GATHER4G,     gather4g),
+	OPC(5, OPC_GATHER4B,     gather4b),
+	OPC(5, OPC_GATHER4A,     gather4a),
+	OPC(5, OPC_SAMGP0,       samgp0),
+	OPC(5, OPC_SAMGP1,       samgp1),
+	OPC(5, OPC_SAMGP2,       samgp2),
+	OPC(5, OPC_SAMGP3,       samgp3),
+	OPC(5, OPC_DSXPP_1,      dsxpp.1),
+	OPC(5, OPC_DSYPP_1,      dsypp.1),
+	OPC(5, OPC_RGETPOS,      rgetpos),
+	OPC(5, OPC_RGETINFO,     rgetinfo),
+
+
+	/* category 6: */
+	OPC(6, OPC_LDG,          ldg),
+	OPC(6, OPC_LDL,          ldl),
+	OPC(6, OPC_LDP,          ldp),
+	OPC(6, OPC_STG,          stg),
+	OPC(6, OPC_STL,          stl),
+	OPC(6, OPC_STP,          stp),
+	OPC(6, OPC_STI,          sti),
+	OPC(6, OPC_G2L,          g2l),
+	OPC(6, OPC_L2G,          l2g),
+	OPC(6, OPC_PREFETCH,     prefetch),
+	OPC(6, OPC_LDLW,         ldlw),
+	OPC(6, OPC_STLW,         stlw),
+	OPC(6, OPC_RESFMT,       resfmt),
+	OPC(6, OPC_RESINFO,      resinfo),
+	OPC(6, OPC_ATOMIC_ADD,     atomic.add),
+	OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
+	OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
+	OPC(6, OPC_ATOMIC_INC,     atomic.inc),
+	OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
+	OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+	OPC(6, OPC_ATOMIC_MIN,     atomic.min),
+	OPC(6, OPC_ATOMIC_MAX,     atomic.max),
+	OPC(6, OPC_ATOMIC_AND,     atomic.and),
+	OPC(6, OPC_ATOMIC_OR,      atomic.or),
+	OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
+	OPC(6, OPC_LDGB,         ldgb),
+	OPC(6, OPC_STGB,         stgb),
+	OPC(6, OPC_STIB,         stib),
+	OPC(6, OPC_LDC,          ldc),
+	OPC(6, OPC_LDLV,         ldlv),
+
+	OPC(7, OPC_BAR,          bar),
+	OPC(7, OPC_FENCE,        fence),
+
+#undef OPC
+};
+
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
+
+// XXX hack.. probably should move this table somewhere common:
+#include "ir3.h"
+const char *ir3_instr_name(struct ir3_instruction *instr)
+{
+	if (opc_cat(instr->opc) == -1) return "??meta??";
+	return opcs[instr->opc].name;
+}
+
+static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
+{
+	instr_t *instr = (instr_t *)dwords;
+	uint32_t opc = instr_opc(instr, ctx->gpu_id);
+	const char *name;
+
+	if (debug & PRINT_VERBOSE)
+		fprintf(ctx->out, "%s%04d[%08xx_%08xx] ", levels[ctx->level], n, dwords[1], dwords[0]);
+
+	/* NOTE: order flags are printed is a bit fugly.. but for now I
+	 * try to match the order in llvm-a3xx disassembler for easy
+	 * diff'ing..
+	 */
+
+	ctx->repeat = instr_repeat(instr);
+
+	if (instr->sync)
+		fprintf(ctx->out, "(sy)");
+	if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7)))
+		fprintf(ctx->out, "(ss)");
+	if (instr->jmp_tgt)
+		fprintf(ctx->out, "(jp)");
+	if (instr_sat(instr))
+		fprintf(ctx->out, "(sat)");
+	if (ctx->repeat) {
+		fprintf(ctx->out, "(rpt%d)", ctx->repeat);
+	} else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) {
+		unsigned nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
+		fprintf(ctx->out, "(nop%d)", nop);
+	} else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) {
+		unsigned nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
+		fprintf(ctx->out, "(nop%d)", nop);
+	}
+	if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
+		fprintf(ctx->out, "(ul)");
+
+	name = GETINFO(instr)->name;
+
+	if (name) {
+		fprintf(ctx->out, "%s", name);
+		GETINFO(instr)->print(ctx, instr);
+	} else {
+		fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
+	}
+
+	fprintf(ctx->out, "\n");
+
+	return (instr->opc_cat == 0) && (opc == OPC_END);
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
+{
+	struct disasm_ctx ctx;
+	int i;
+
+	assert((sizedwords % 2) == 0);
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.out = out;
+	ctx.level = level;
+	ctx.gpu_id = gpu_id;
+
+	for (i = 0; i < sizedwords; i += 2)
+		print_instr(&ctx, &dwords[i], i/2);
+
+	return 0;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/instr-a3xx.h mesa-19.0.1/src/freedreno/ir3/instr-a3xx.h
--- mesa-18.3.3/src/freedreno/ir3/instr-a3xx.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/instr-a3xx.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,917 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <assert.h>
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+#define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
+
+typedef enum {
+	/* category 0: */
+	OPC_NOP             = _OPC(0, 0),
+	OPC_BR              = _OPC(0, 1),
+	OPC_JUMP            = _OPC(0, 2),
+	OPC_CALL            = _OPC(0, 3),
+	OPC_RET             = _OPC(0, 4),
+	OPC_KILL            = _OPC(0, 5),
+	OPC_END             = _OPC(0, 6),
+	OPC_EMIT            = _OPC(0, 7),
+	OPC_CUT             = _OPC(0, 8),
+	OPC_CHMASK          = _OPC(0, 9),
+	OPC_CHSH            = _OPC(0, 10),
+	OPC_FLOW_REV        = _OPC(0, 11),
+
+	/* category 1: */
+	OPC_MOV             = _OPC(1, 0),
+
+	/* category 2: */
+	OPC_ADD_F           = _OPC(2, 0),
+	OPC_MIN_F           = _OPC(2, 1),
+	OPC_MAX_F           = _OPC(2, 2),
+	OPC_MUL_F           = _OPC(2, 3),
+	OPC_SIGN_F          = _OPC(2, 4),
+	OPC_CMPS_F          = _OPC(2, 5),
+	OPC_ABSNEG_F        = _OPC(2, 6),
+	OPC_CMPV_F          = _OPC(2, 7),
+	/* 8 - invalid */
+	OPC_FLOOR_F         = _OPC(2, 9),
+	OPC_CEIL_F          = _OPC(2, 10),
+	OPC_RNDNE_F         = _OPC(2, 11),
+	OPC_RNDAZ_F         = _OPC(2, 12),
+	OPC_TRUNC_F         = _OPC(2, 13),
+	/* 14-15 - invalid */
+	OPC_ADD_U           = _OPC(2, 16),
+	OPC_ADD_S           = _OPC(2, 17),
+	OPC_SUB_U           = _OPC(2, 18),
+	OPC_SUB_S           = _OPC(2, 19),
+	OPC_CMPS_U          = _OPC(2, 20),
+	OPC_CMPS_S          = _OPC(2, 21),
+	OPC_MIN_U           = _OPC(2, 22),
+	OPC_MIN_S           = _OPC(2, 23),
+	OPC_MAX_U           = _OPC(2, 24),
+	OPC_MAX_S           = _OPC(2, 25),
+	OPC_ABSNEG_S        = _OPC(2, 26),
+	/* 27 - invalid */
+	OPC_AND_B           = _OPC(2, 28),
+	OPC_OR_B            = _OPC(2, 29),
+	OPC_NOT_B           = _OPC(2, 30),
+	OPC_XOR_B           = _OPC(2, 31),
+	/* 32 - invalid */
+	OPC_CMPV_U          = _OPC(2, 33),
+	OPC_CMPV_S          = _OPC(2, 34),
+	/* 35-47 - invalid */
+	OPC_MUL_U           = _OPC(2, 48),
+	OPC_MUL_S           = _OPC(2, 49),
+	OPC_MULL_U          = _OPC(2, 50),
+	OPC_BFREV_B         = _OPC(2, 51),
+	OPC_CLZ_S           = _OPC(2, 52),
+	OPC_CLZ_B           = _OPC(2, 53),
+	OPC_SHL_B           = _OPC(2, 54),
+	OPC_SHR_B           = _OPC(2, 55),
+	OPC_ASHR_B          = _OPC(2, 56),
+	OPC_BARY_F          = _OPC(2, 57),
+	OPC_MGEN_B          = _OPC(2, 58),
+	OPC_GETBIT_B        = _OPC(2, 59),
+	OPC_SETRM           = _OPC(2, 60),
+	OPC_CBITS_B         = _OPC(2, 61),
+	OPC_SHB             = _OPC(2, 62),
+	OPC_MSAD            = _OPC(2, 63),
+
+	/* category 3: */
+	OPC_MAD_U16         = _OPC(3, 0),
+	OPC_MADSH_U16       = _OPC(3, 1),
+	OPC_MAD_S16         = _OPC(3, 2),
+	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
+	OPC_MAD_U24         = _OPC(3, 4),
+	OPC_MAD_S24         = _OPC(3, 5),
+	OPC_MAD_F16         = _OPC(3, 6),
+	OPC_MAD_F32         = _OPC(3, 7),
+	OPC_SEL_B16         = _OPC(3, 8),
+	OPC_SEL_B32         = _OPC(3, 9),
+	OPC_SEL_S16         = _OPC(3, 10),
+	OPC_SEL_S32         = _OPC(3, 11),
+	OPC_SEL_F16         = _OPC(3, 12),
+	OPC_SEL_F32         = _OPC(3, 13),
+	OPC_SAD_S16         = _OPC(3, 14),
+	OPC_SAD_S32         = _OPC(3, 15),
+
+	/* category 4: */
+	OPC_RCP             = _OPC(4, 0),
+	OPC_RSQ             = _OPC(4, 1),
+	OPC_LOG2            = _OPC(4, 2),
+	OPC_EXP2            = _OPC(4, 3),
+	OPC_SIN             = _OPC(4, 4),
+	OPC_COS             = _OPC(4, 5),
+	OPC_SQRT            = _OPC(4, 6),
+	// 7-63 - invalid
+
+	/* category 5: */
+	OPC_ISAM            = _OPC(5, 0),
+	OPC_ISAML           = _OPC(5, 1),
+	OPC_ISAMM           = _OPC(5, 2),
+	OPC_SAM             = _OPC(5, 3),
+	OPC_SAMB            = _OPC(5, 4),
+	OPC_SAML            = _OPC(5, 5),
+	OPC_SAMGQ           = _OPC(5, 6),
+	OPC_GETLOD          = _OPC(5, 7),
+	OPC_CONV            = _OPC(5, 8),
+	OPC_CONVM           = _OPC(5, 9),
+	OPC_GETSIZE         = _OPC(5, 10),
+	OPC_GETBUF          = _OPC(5, 11),
+	OPC_GETPOS          = _OPC(5, 12),
+	OPC_GETINFO         = _OPC(5, 13),
+	OPC_DSX             = _OPC(5, 14),
+	OPC_DSY             = _OPC(5, 15),
+	OPC_GATHER4R        = _OPC(5, 16),
+	OPC_GATHER4G        = _OPC(5, 17),
+	OPC_GATHER4B        = _OPC(5, 18),
+	OPC_GATHER4A        = _OPC(5, 19),
+	OPC_SAMGP0          = _OPC(5, 20),
+	OPC_SAMGP1          = _OPC(5, 21),
+	OPC_SAMGP2          = _OPC(5, 22),
+	OPC_SAMGP3          = _OPC(5, 23),
+	OPC_DSXPP_1         = _OPC(5, 24),
+	OPC_DSYPP_1         = _OPC(5, 25),
+	OPC_RGETPOS         = _OPC(5, 26),
+	OPC_RGETINFO        = _OPC(5, 27),
+
+	/* category 6: */
+	OPC_LDG             = _OPC(6, 0),        /* load-global */
+	OPC_LDL             = _OPC(6, 1),
+	OPC_LDP             = _OPC(6, 2),
+	OPC_STG             = _OPC(6, 3),        /* store-global */
+	OPC_STL             = _OPC(6, 4),
+	OPC_STP             = _OPC(6, 5),
+	OPC_STI             = _OPC(6, 6),
+	OPC_G2L             = _OPC(6, 7),
+	OPC_L2G             = _OPC(6, 8),
+	OPC_PREFETCH        = _OPC(6, 9),
+	OPC_LDLW            = _OPC(6, 10),
+	OPC_STLW            = _OPC(6, 11),
+	OPC_RESFMT          = _OPC(6, 14),
+	OPC_RESINFO         = _OPC(6, 15),
+	OPC_ATOMIC_ADD      = _OPC(6, 16),
+	OPC_ATOMIC_SUB      = _OPC(6, 17),
+	OPC_ATOMIC_XCHG     = _OPC(6, 18),
+	OPC_ATOMIC_INC      = _OPC(6, 19),
+	OPC_ATOMIC_DEC      = _OPC(6, 20),
+	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
+	OPC_ATOMIC_MIN      = _OPC(6, 22),
+	OPC_ATOMIC_MAX      = _OPC(6, 23),
+	OPC_ATOMIC_AND      = _OPC(6, 24),
+	OPC_ATOMIC_OR       = _OPC(6, 25),
+	OPC_ATOMIC_XOR      = _OPC(6, 26),
+	OPC_LDGB            = _OPC(6, 27),
+	OPC_STGB            = _OPC(6, 28),
+	OPC_STIB            = _OPC(6, 29),
+	OPC_LDC             = _OPC(6, 30),
+	OPC_LDLV            = _OPC(6, 31),
+
+	/* category 7: */
+	OPC_BAR             = _OPC(7, 0),
+	OPC_FENCE           = _OPC(7, 1),
+
+	/* meta instructions (category -1): */
+	/* placeholder instr to mark shader inputs: */
+	OPC_META_INPUT      = _OPC(-1, 0),
+	/* The "fan-in" and "fan-out" instructions are used for keeping
+	 * track of instructions that write to multiple dst registers
+	 * (fan-out) like texture sample instructions, or read multiple
+	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
+	 */
+	OPC_META_FO         = _OPC(-1, 2),
+	OPC_META_FI         = _OPC(-1, 3),
+
+} opc_t;
+
+#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
+#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
+
+typedef enum {
+	TYPE_F16 = 0,
+	TYPE_F32 = 1,
+	TYPE_U16 = 2,
+	TYPE_U32 = 3,
+	TYPE_S16 = 4,
+	TYPE_S32 = 5,
+	TYPE_U8  = 6,
+	TYPE_S8  = 7,  // XXX I assume?
+} type_t;
+
+static inline uint32_t type_size(type_t type)
+{
+	switch (type) {
+	case TYPE_F32:
+	case TYPE_U32:
+	case TYPE_S32:
+		return 32;
+	case TYPE_F16:
+	case TYPE_U16:
+	case TYPE_S16:
+		return 16;
+	case TYPE_U8:
+	case TYPE_S8:
+		return 8;
+	default:
+		assert(0); /* invalid type */
+		return 0;
+	}
+}
+
+static inline int type_float(type_t type)
+{
+	return (type == TYPE_F32) || (type == TYPE_F16);
+}
+
+static inline int type_uint(type_t type)
+{
+	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
+}
+
+static inline int type_sint(type_t type)
+{
+	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
+}
+
+typedef union PACKED {
+	/* normal gpr or const src register: */
+	struct PACKED {
+		uint32_t comp  : 2;
+		uint32_t num   : 10;
+	};
+	/* for immediate val: */
+	int32_t  iim_val   : 11;
+	/* to make compiler happy: */
+	uint32_t dummy32;
+	uint32_t dummy10   : 10;
+	int32_t  idummy10  : 10;
+	uint32_t dummy11   : 11;
+	uint32_t dummy12   : 12;
+	uint32_t dummy13   : 13;
+	uint32_t dummy8    : 8;
+} reg_t;
+
+/* special registers: */
+#define REG_A0 61       /* address register */
+#define REG_P0 62       /* predicate register */
+
+static inline int reg_special(reg_t reg)
+{
+	return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		struct PACKED {
+			int16_t  immed    : 16;
+			uint32_t dummy1   : 16;
+		} a3xx;
+		struct PACKED {
+			int32_t  immed    : 20;
+			uint32_t dummy1   : 12;
+		} a4xx;
+		struct PACKED {
+			int32_t immed     : 32;
+		} a5xx;
+	};
+
+	/* dword1: */
+	uint32_t dummy2   : 8;
+	uint32_t repeat   : 3;
+	uint32_t dummy3   : 1;
+	uint32_t ss       : 1;
+	uint32_t dummy4   : 7;
+	uint32_t inv      : 1;
+	uint32_t comp     : 2;
+	uint32_t opc      : 4;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat0_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		/* for normal src register: */
+		struct PACKED {
+			uint32_t src : 11;
+			/* at least low bit of pad must be zero or it will
+			 * look like a address relative src
+			 */
+			uint32_t pad : 21;
+		};
+		/* for address relative: */
+		struct PACKED {
+			int32_t  off : 10;
+			uint32_t src_rel_c : 1;
+			uint32_t src_rel : 1;
+			uint32_t unknown : 20;
+		};
+		/* for immediate: */
+		int32_t  iim_val;
+		uint32_t uim_val;
+		float    fim_val;
+	};
+
+	/* dword1: */
+	uint32_t dst        : 8;
+	uint32_t repeat     : 3;
+	uint32_t src_r      : 1;
+	uint32_t ss         : 1;
+	uint32_t ul         : 1;
+	uint32_t dst_type   : 3;
+	uint32_t dst_rel    : 1;
+	uint32_t src_type   : 3;
+	uint32_t src_c      : 1;
+	uint32_t src_im     : 1;
+	uint32_t even       : 1;
+	uint32_t pos_inf    : 1;
+	uint32_t must_be_0  : 2;
+	uint32_t jmp_tgt    : 1;
+	uint32_t sync       : 1;
+	uint32_t opc_cat    : 3;
+} instr_cat1_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		struct PACKED {
+			uint32_t src1         : 11;
+			uint32_t must_be_zero1: 2;
+			uint32_t src1_im      : 1;   /* immediate */
+			uint32_t src1_neg     : 1;   /* negate */
+			uint32_t src1_abs     : 1;   /* absolute value */
+		};
+		struct PACKED {
+			uint32_t src1         : 10;
+			uint32_t src1_c       : 1;   /* relative-const */
+			uint32_t src1_rel     : 1;   /* relative address */
+			uint32_t must_be_zero : 1;
+			uint32_t dummy        : 3;
+		} rel1;
+		struct PACKED {
+			uint32_t src1         : 12;
+			uint32_t src1_c       : 1;   /* const */
+			uint32_t dummy        : 3;
+		} c1;
+	};
+
+	union PACKED {
+		struct PACKED {
+			uint32_t src2         : 11;
+			uint32_t must_be_zero2: 2;
+			uint32_t src2_im      : 1;   /* immediate */
+			uint32_t src2_neg     : 1;   /* negate */
+			uint32_t src2_abs     : 1;   /* absolute value */
+		};
+		struct PACKED {
+			uint32_t src2         : 10;
+			uint32_t src2_c       : 1;   /* relative-const */
+			uint32_t src2_rel     : 1;   /* relative address */
+			uint32_t must_be_zero : 1;
+			uint32_t dummy        : 3;
+		} rel2;
+		struct PACKED {
+			uint32_t src2         : 12;
+			uint32_t src2_c       : 1;   /* const */
+			uint32_t dummy        : 3;
+		} c2;
+	};
+
+	/* dword1: */
+	uint32_t dst      : 8;
+	uint32_t repeat   : 2;
+	uint32_t sat      : 1;
+	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
+	uint32_t ss       : 1;
+	uint32_t ul       : 1;   /* dunno */
+	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+	uint32_t ei       : 1;
+	uint32_t cond     : 3;
+	uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
+	uint32_t full     : 1;   /* not half */
+	uint32_t opc      : 6;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat2_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		struct PACKED {
+			uint32_t src1         : 11;
+			uint32_t must_be_zero1: 2;
+			uint32_t src2_c       : 1;
+			uint32_t src1_neg     : 1;
+			uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
+		};
+		struct PACKED {
+			uint32_t src1         : 10;
+			uint32_t src1_c       : 1;
+			uint32_t src1_rel     : 1;
+			uint32_t must_be_zero : 1;
+			uint32_t dummy        : 3;
+		} rel1;
+		struct PACKED {
+			uint32_t src1         : 12;
+			uint32_t src1_c       : 1;
+			uint32_t dummy        : 3;
+		} c1;
+	};
+
+	union PACKED {
+		struct PACKED {
+			uint32_t src3         : 11;
+			uint32_t must_be_zero2: 2;
+			uint32_t src3_r       : 1;
+			uint32_t src2_neg     : 1;
+			uint32_t src3_neg     : 1;
+		};
+		struct PACKED {
+			uint32_t src3         : 10;
+			uint32_t src3_c       : 1;
+			uint32_t src3_rel     : 1;
+			uint32_t must_be_zero : 1;
+			uint32_t dummy        : 3;
+		} rel2;
+		struct PACKED {
+			uint32_t src3         : 12;
+			uint32_t src3_c       : 1;
+			uint32_t dummy        : 3;
+		} c2;
+	};
+
+	/* dword1: */
+	uint32_t dst      : 8;
+	uint32_t repeat   : 2;
+	uint32_t sat      : 1;
+	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
+	uint32_t ss       : 1;
+	uint32_t ul       : 1;
+	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+	uint32_t src2     : 8;
+	uint32_t opc      : 4;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat3_t;
+
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+	switch (_OPC(3, cat3->opc)) {
+	case OPC_MAD_F16:
+	case OPC_MAD_U16:
+	case OPC_MAD_S16:
+	case OPC_SEL_B16:
+	case OPC_SEL_S16:
+	case OPC_SEL_F16:
+	case OPC_SAD_S16:
+	case OPC_SAD_S32:  // really??
+		return false;
+	default:
+		return true;
+	}
+}
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		struct PACKED {
+			uint32_t src          : 11;
+			uint32_t must_be_zero1: 2;
+			uint32_t src_im       : 1;   /* immediate */
+			uint32_t src_neg      : 1;   /* negate */
+			uint32_t src_abs      : 1;   /* absolute value */
+		};
+		struct PACKED {
+			uint32_t src          : 10;
+			uint32_t src_c        : 1;   /* relative-const */
+			uint32_t src_rel      : 1;   /* relative address */
+			uint32_t must_be_zero : 1;
+			uint32_t dummy        : 3;
+		} rel;
+		struct PACKED {
+			uint32_t src          : 12;
+			uint32_t src_c        : 1;   /* const */
+			uint32_t dummy        : 3;
+		} c;
+	};
+	uint32_t dummy1   : 16;  /* seem to be ignored */
+
+	/* dword1: */
+	uint32_t dst      : 8;
+	uint32_t repeat   : 2;
+	uint32_t sat      : 1;
+	uint32_t src_r    : 1;
+	uint32_t ss       : 1;
+	uint32_t ul       : 1;
+	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+	uint32_t dummy2   : 5;   /* seem to be ignored */
+	uint32_t full     : 1;   /* not half */
+	uint32_t opc      : 6;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat4_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	union PACKED {
+		/* normal case: */
+		struct PACKED {
+			uint32_t full     : 1;   /* not half */
+			uint32_t src1     : 8;
+			uint32_t src2     : 8;
+			uint32_t dummy1   : 4;   /* seem to be ignored */
+			uint32_t samp     : 4;
+			uint32_t tex      : 7;
+		} norm;
+		/* s2en case: */
+		struct PACKED {
+			uint32_t full     : 1;   /* not half */
+			uint32_t src1     : 8;
+			uint32_t src2     : 11;
+			uint32_t dummy1   : 1;
+			uint32_t src3     : 8;
+			uint32_t dummy2   : 3;
+		} s2en;
+		/* same in either case: */
+		// XXX I think, confirm this
+		struct PACKED {
+			uint32_t full     : 1;   /* not half */
+			uint32_t src1     : 8;
+			uint32_t pad      : 23;
+		};
+	};
+
+	/* dword1: */
+	uint32_t dst      : 8;
+	uint32_t wrmask   : 4;   /* write-mask */
+	uint32_t type     : 3;
+	uint32_t dummy2   : 1;   /* seems to be ignored */
+	uint32_t is_3d    : 1;
+
+	uint32_t is_a     : 1;
+	uint32_t is_s     : 1;
+	uint32_t is_s2en  : 1;
+	uint32_t is_o     : 1;
+	uint32_t is_p     : 1;
+
+	uint32_t opc      : 5;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat5_t;
+
+/* dword0 encoding for src_off: [src1 + off], src2: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t mustbe1  : 1;
+	int32_t  off      : 13;
+	uint32_t src1     : 8;
+	uint32_t src1_im  : 1;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;
+
+	/* dword1: */
+	uint32_t dword1;
+} instr_cat6a_t;
+
+/* dword0 encoding for !src_off: [src1], src2 */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t mustbe0  : 1;
+	uint32_t src1     : 13;
+	uint32_t ignore0  : 8;
+	uint32_t src1_im  : 1;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;
+
+	/* dword1: */
+	uint32_t dword1;
+} instr_cat6b_t;
+
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	/* note: there is some weird stuff going on where sometimes
+	 * cat6->a.off is involved.. but that seems like a bug in
+	 * the blob, since it is used even if !cat6->src_off
+	 * It would make sense for there to be some more bits to
+	 * bring us to 11 bits worth of offset, but not sure..
+	 */
+	int32_t off       : 8;
+	uint32_t mustbe1  : 1;
+	uint32_t dst      : 8;
+	uint32_t pad1     : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	uint32_t dst      : 8;
+	uint32_t mustbe0  : 1;
+	uint32_t idx      : 8;
+	uint32_t pad0     : 15;
+} instr_cat6d_t;
+
+/* ldgb and atomics..
+ *
+ * ldgb:      pad0=0, pad3=1
+ * atomic .g: pad0=1, pad3=1
+ *        .l: pad0=1, pad3=0
+ */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t pad0     : 1;
+	uint32_t src3     : 8;
+	uint32_t d        : 2;
+	uint32_t typed    : 1;
+	uint32_t type_size : 2;
+	uint32_t src1     : 8;
+	uint32_t src1_im  : 1;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;
+
+	/* dword1: */
+	uint32_t dst      : 8;
+	uint32_t mustbe0  : 1;
+	uint32_t src_ssbo : 8;
+	uint32_t pad2     : 3;  // type
+	uint32_t g        : 1;
+	uint32_t pad3     : 1;
+	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6ldgb_t;
+
+/* stgb, pad0=0, pad3=2
+ */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t mustbe1  : 1;  // ???
+	uint32_t src1     : 8;
+	uint32_t d        : 2;
+	uint32_t typed    : 1;
+	uint32_t type_size : 2;
+	uint32_t pad0     : 9;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;
+
+	/* dword1: */
+	uint32_t src3     : 8;
+	uint32_t src3_im  : 1;
+	uint32_t dst_ssbo : 8;
+	uint32_t pad2     : 3;  // type
+	uint32_t pad3     : 2;
+	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6stgb_t;
+
+typedef union PACKED {
+	instr_cat6a_t a;
+	instr_cat6b_t b;
+	instr_cat6c_t c;
+	instr_cat6d_t d;
+	instr_cat6ldgb_t ldgb;
+	instr_cat6stgb_t stgb;
+	struct PACKED {
+		/* dword0: */
+		uint32_t src_off  : 1;
+		uint32_t pad1     : 31;
+
+		/* dword1: */
+		uint32_t pad2     : 8;
+		uint32_t dst_off  : 1;
+		uint32_t pad3     : 8;
+		uint32_t type     : 3;
+		uint32_t g        : 1;  /* or in some cases it means dst immed */
+		uint32_t pad4     : 1;
+		uint32_t opc      : 5;
+		uint32_t jmp_tgt  : 1;
+		uint32_t sync     : 1;
+		uint32_t opc_cat  : 3;
+	};
+} instr_cat6_t;
+
+/**
+ * For atomic ops (which return a value):
+ *
+ *    pad1=1, pad2=c, pad3=0, pad4=3
+ *    src1    - vecN offset/coords
+ *    src2.x  - is actually dest register
+ *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
+ *              and src2.z is 'data'
+ *
+ * For stib (which does not return a value):
+ *    pad1=0, pad2=c, pad3=0, pad4=2
+ *    src1    - vecN offset/coords
+ *    src2    - value to store
+ *
+ * for ldc (load from UBO using descriptor):
+ *    pad1=0, pad2=8, pad3=0, pad4=2
+ */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t pad1     : 9;
+	uint32_t d        : 2;
+	uint32_t typed    : 1;
+	uint32_t type_size : 2;
+	uint32_t opc      : 5;
+	uint32_t pad2     : 5;
+	uint32_t src1     : 8;  /* coordinate/offset */
+
+	/* dword1: */
+	uint32_t src2     : 8;
+	uint32_t pad3     : 1;  //mustbe0 ?? or zero means imm vs reg for ssbo??
+	uint32_t ssbo     : 8;  /* ssbo/image binding point */
+	uint32_t type     : 3;
+	uint32_t pad4     : 7;
+	uint32_t jmp_tgt  : 1;
+	uint32_t sync     : 1;
+	uint32_t opc_cat  : 3;
+} instr_cat6_a6xx_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t pad1     : 32;
+
+	/* dword1: */
+	uint32_t pad2     : 12;
+	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
+	uint32_t pad3     : 6;
+	uint32_t w        : 1;  /* write */
+	uint32_t r        : 1;  /* read */
+	uint32_t l        : 1;  /* local */
+	uint32_t g        : 1;  /* global */
+	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
+	uint32_t jmp_tgt  : 1;  /* (jp) */
+	uint32_t sync     : 1;  /* (sy) */
+	uint32_t opc_cat  : 3;
+} instr_cat7_t;
+
+typedef union PACKED {
+	instr_cat0_t cat0;
+	instr_cat1_t cat1;
+	instr_cat2_t cat2;
+	instr_cat3_t cat3;
+	instr_cat4_t cat4;
+	instr_cat5_t cat5;
+	instr_cat6_t cat6;
+	instr_cat6_a6xx_t cat6_a6xx;
+	instr_cat7_t cat7;
+	struct PACKED {
+		/* dword0: */
+		uint32_t pad1     : 32;
+
+		/* dword1: */
+		uint32_t pad2     : 12;
+		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
+		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+		uint32_t pad3     : 13;
+		uint32_t jmp_tgt  : 1;
+		uint32_t sync     : 1;
+		uint32_t opc_cat  : 3;
+
+	};
+} instr_t;
+
+static inline uint32_t instr_repeat(instr_t *instr)
+{
+	switch (instr->opc_cat) {
+	case 0:  return instr->cat0.repeat;
+	case 1:  return instr->cat1.repeat;
+	case 2:  return instr->cat2.repeat;
+	case 3:  return instr->cat3.repeat;
+	case 4:  return instr->cat4.repeat;
+	default: return 0;
+	}
+}
+
+static inline bool instr_sat(instr_t *instr)
+{
+	switch (instr->opc_cat) {
+	case 2:  return instr->cat2.sat;
+	case 3:  return instr->cat3.sat;
+	case 4:  return instr->cat4.sat;
+	default: return false;
+	}
+}
+
+static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
+{
+	switch (instr->opc_cat) {
+	case 0:  return instr->cat0.opc;
+	case 1:  return 0;
+	case 2:  return instr->cat2.opc;
+	case 3:  return instr->cat3.opc;
+	case 4:  return instr->cat4.opc;
+	case 5:  return instr->cat5.opc;
+	case 6:
+		// TODO not sure if this is the best way to figure
+		// out if new vs old encoding, but it kinda seems
+		// to work:
+		if ((gpu_id >= 600) && (instr->cat6.opc == 0))
+			return instr->cat6_a6xx.opc;
+		return instr->cat6.opc;
+	case 7:  return instr->cat7.opc;
+	default: return 0;
+	}
+}
+
+static inline bool is_mad(opc_t opc)
+{
+	switch (opc) {
+	case OPC_MAD_U16:
+	case OPC_MAD_S16:
+	case OPC_MAD_U24:
+	case OPC_MAD_S24:
+	case OPC_MAD_F16:
+	case OPC_MAD_F32:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_madsh(opc_t opc)
+{
+	switch (opc) {
+	case OPC_MADSH_U16:
+	case OPC_MADSH_M16:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_atomic(opc_t opc)
+{
+	switch (opc) {
+	case OPC_ATOMIC_ADD:
+	case OPC_ATOMIC_SUB:
+	case OPC_ATOMIC_XCHG:
+	case OPC_ATOMIC_INC:
+	case OPC_ATOMIC_DEC:
+	case OPC_ATOMIC_CMPXCHG:
+	case OPC_ATOMIC_MIN:
+	case OPC_ATOMIC_MAX:
+	case OPC_ATOMIC_AND:
+	case OPC_ATOMIC_OR:
+	case OPC_ATOMIC_XOR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_ssbo(opc_t opc)
+{
+	switch (opc) {
+	case OPC_RESFMT:
+	case OPC_RESINFO:
+	case OPC_LDGB:
+	case OPC_STGB:
+	case OPC_STIB:
+		return true;
+	default:
+		return false;
+	}
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+
+#endif /* INSTR_A3XX_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3.c mesa-19.0.1/src/freedreno/ir3/ir3.c
--- mesa-18.3.3/src/freedreno/ir3/ir3.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,941 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "util/bitscan.h"
+#include "util/ralloc.h"
+#include "util/u_math.h"
+
+#include "instr-a3xx.h"
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+void * ir3_alloc(struct ir3 *shader, int sz)
+{
+	return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
+}
+
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+		unsigned nin, unsigned nout)
+{
+	struct ir3 *shader = rzalloc(compiler, struct ir3);
+
+	shader->compiler = compiler;
+	shader->ninputs = nin;
+	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
+
+	shader->noutputs = nout;
+	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
+
+	list_inithead(&shader->block_list);
+	list_inithead(&shader->array_list);
+
+	return shader;
+}
+
+void ir3_destroy(struct ir3 *shader)
+{
+	ralloc_free(shader);
+}
+
+#define iassert(cond) do { \
+	if (!(cond)) { \
+		debug_assert(cond); \
+		return -1; \
+	} } while (0)
+
+#define iassert_type(reg, full) do { \
+	if ((full)) { \
+		iassert(!((reg)->flags & IR3_REG_HALF)); \
+	} else { \
+		iassert((reg)->flags & IR3_REG_HALF); \
+	} } while (0);
+
+static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
+		uint32_t repeat, uint32_t valid_flags)
+{
+	reg_t val = { .dummy32 = 0 };
+
+	if (reg->flags & ~valid_flags) {
+		debug_printf("INVALID FLAGS: %x vs %x\n",
+				reg->flags, valid_flags);
+	}
+
+	if (!(reg->flags & IR3_REG_R))
+		repeat = 0;
+
+	if (reg->flags & IR3_REG_IMMED) {
+		val.iim_val = reg->iim_val;
+	} else {
+		unsigned components;
+		int16_t max;
+
+		if (reg->flags & IR3_REG_RELATIV) {
+			components = reg->size;
+			val.idummy10 = reg->array.offset;
+			max = (reg->array.offset + repeat + components - 1) >> 2;
+		} else {
+			components = util_last_bit(reg->wrmask);
+			val.comp = reg->num & 0x3;
+			val.num  = reg->num >> 2;
+			max = (reg->num + repeat + components - 1) >> 2;
+		}
+
+		if (reg->flags & IR3_REG_CONST) {
+			info->max_const = MAX2(info->max_const, max);
+		} else if (val.num == 63) {
+			/* ignore writes to dummy register r63.x */
+		} else if (max < 48) {
+			if (reg->flags & IR3_REG_HALF) {
+				if (info->gpu_id >= 600) {
+					/* starting w/ a6xx, half regs conflict with full regs: */
+					info->max_reg = MAX2(info->max_reg, (max+1)/2);
+				} else {
+					info->max_half_reg = MAX2(info->max_half_reg, max);
+				}
+			} else {
+				info->max_reg = MAX2(info->max_reg, max);
+			}
+		}
+	}
+
+	return val.dummy32;
+}
+
+static int emit_cat0(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	instr_cat0_t *cat0 = ptr;
+
+	if (info->gpu_id >= 500) {
+		cat0->a5xx.immed = instr->cat0.immed;
+	} else if (info->gpu_id >= 400) {
+		cat0->a4xx.immed = instr->cat0.immed;
+	} else {
+		cat0->a3xx.immed = instr->cat0.immed;
+	}
+	cat0->repeat   = instr->repeat;
+	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
+	cat0->inv      = instr->cat0.inv;
+	cat0->comp     = instr->cat0.comp;
+	cat0->opc      = instr->opc;
+	cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat0->opc_cat  = 0;
+
+	return 0;
+}
+
+static int emit_cat1(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst = instr->regs[0];
+	struct ir3_register *src = instr->regs[1];
+	instr_cat1_t *cat1 = ptr;
+
+	iassert(instr->regs_count == 2);
+	iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
+	if (!(src->flags & IR3_REG_IMMED))
+		iassert_type(src, type_size(instr->cat1.src_type) == 32);
+
+	if (src->flags & IR3_REG_IMMED) {
+		cat1->iim_val = src->iim_val;
+		cat1->src_im  = 1;
+	} else if (src->flags & IR3_REG_RELATIV) {
+		cat1->off       = reg(src, info, instr->repeat,
+				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
+		cat1->src_rel   = 1;
+		cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
+	} else {
+		cat1->src  = reg(src, info, instr->repeat,
+				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
+		cat1->src_c     = !!(src->flags & IR3_REG_CONST);
+	}
+
+	cat1->dst      = reg(dst, info, instr->repeat,
+			IR3_REG_RELATIV | IR3_REG_EVEN |
+			IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
+	cat1->repeat   = instr->repeat;
+	cat1->src_r    = !!(src->flags & IR3_REG_R);
+	cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
+	cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
+	cat1->dst_type = instr->cat1.dst_type;
+	cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
+	cat1->src_type = instr->cat1.src_type;
+	cat1->even     = !!(dst->flags & IR3_REG_EVEN);
+	cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
+	cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat1->opc_cat  = 1;
+
+	return 0;
+}
+
+static int emit_cat2(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst = instr->regs[0];
+	struct ir3_register *src1 = instr->regs[1];
+	struct ir3_register *src2 = instr->regs[2];
+	instr_cat2_t *cat2 = ptr;
+	unsigned absneg = ir3_cat2_absneg(instr->opc);
+
+	iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+
+	if (src1->flags & IR3_REG_RELATIV) {
+		iassert(src1->array.offset < (1 << 10));
+		cat2->rel1.src1      = reg(src1, info, instr->repeat,
+				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+				IR3_REG_HALF | absneg);
+		cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+		cat2->rel1.src1_rel  = 1;
+	} else if (src1->flags & IR3_REG_CONST) {
+		iassert(src1->num < (1 << 12));
+		cat2->c1.src1   = reg(src1, info, instr->repeat,
+				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+		cat2->c1.src1_c = 1;
+	} else {
+		iassert(src1->num < (1 << 11));
+		cat2->src1 = reg(src1, info, instr->repeat,
+				IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+				absneg);
+	}
+	cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
+	cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+	cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
+	cat2->src1_r   = !!(src1->flags & IR3_REG_R);
+
+	if (src2) {
+		iassert((src2->flags & IR3_REG_IMMED) ||
+				!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+
+		if (src2->flags & IR3_REG_RELATIV) {
+			iassert(src2->array.offset < (1 << 10));
+			cat2->rel2.src2      = reg(src2, info, instr->repeat,
+					IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+					IR3_REG_HALF | absneg);
+			cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
+			cat2->rel2.src2_rel  = 1;
+		} else if (src2->flags & IR3_REG_CONST) {
+			iassert(src2->num < (1 << 12));
+			cat2->c2.src2   = reg(src2, info, instr->repeat,
+					IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+			cat2->c2.src2_c = 1;
+		} else {
+			iassert(src2->num < (1 << 11));
+			cat2->src2 = reg(src2, info, instr->repeat,
+					IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+					absneg);
+		}
+
+		cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
+		cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+		cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
+		cat2->src2_r   = !!(src2->flags & IR3_REG_R);
+	}
+
+	cat2->dst      = reg(dst, info, instr->repeat,
+			IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
+	cat2->repeat   = instr->repeat;
+	cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
+	cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
+	cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
+	cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
+	cat2->ei       = !!(dst->flags & IR3_REG_EI);
+	cat2->cond     = instr->cat2.condition;
+	cat2->full     = ! (src1->flags & IR3_REG_HALF);
+	cat2->opc      = instr->opc;
+	cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat2->opc_cat  = 2;
+
+	return 0;
+}
+
+static int emit_cat3(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst = instr->regs[0];
+	struct ir3_register *src1 = instr->regs[1];
+	struct ir3_register *src2 = instr->regs[2];
+	struct ir3_register *src3 = instr->regs[3];
+	unsigned absneg = ir3_cat3_absneg(instr->opc);
+	instr_cat3_t *cat3 = ptr;
+	uint32_t src_flags = 0;
+
+	switch (instr->opc) {
+	case OPC_MAD_F16:
+	case OPC_MAD_U16:
+	case OPC_MAD_S16:
+	case OPC_SEL_B16:
+	case OPC_SEL_S16:
+	case OPC_SEL_F16:
+	case OPC_SAD_S16:
+	case OPC_SAD_S32:  // really??
+		src_flags |= IR3_REG_HALF;
+		break;
+	default:
+		break;
+	}
+
+	iassert(instr->regs_count == 4);
+	iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
+	iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
+	iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+
+	if (src1->flags & IR3_REG_RELATIV) {
+		iassert(src1->array.offset < (1 << 10));
+		cat3->rel1.src1      = reg(src1, info, instr->repeat,
+				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+				IR3_REG_HALF | absneg);
+		cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+		cat3->rel1.src1_rel  = 1;
+	} else if (src1->flags & IR3_REG_CONST) {
+		iassert(src1->num < (1 << 12));
+		cat3->c1.src1   = reg(src1, info, instr->repeat,
+				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+		cat3->c1.src1_c = 1;
+	} else {
+		iassert(src1->num < (1 << 11));
+		cat3->src1 = reg(src1, info, instr->repeat,
+				IR3_REG_R | IR3_REG_HALF | absneg);
+	}
+
+	cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+	cat3->src1_r   = !!(src1->flags & IR3_REG_R);
+
+	cat3->src2     = reg(src2, info, instr->repeat,
+			IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
+	cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
+	cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+	cat3->src2_r   = !!(src2->flags & IR3_REG_R);
+
+
+	if (src3->flags & IR3_REG_RELATIV) {
+		iassert(src3->array.offset < (1 << 10));
+		cat3->rel2.src3      = reg(src3, info, instr->repeat,
+				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+				IR3_REG_HALF | absneg);
+		cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
+		cat3->rel2.src3_rel  = 1;
+	} else if (src3->flags & IR3_REG_CONST) {
+		iassert(src3->num < (1 << 12));
+		cat3->c2.src3   = reg(src3, info, instr->repeat,
+				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+		cat3->c2.src3_c = 1;
+	} else {
+		iassert(src3->num < (1 << 11));
+		cat3->src3 = reg(src3, info, instr->repeat,
+				IR3_REG_R | IR3_REG_HALF | absneg);
+	}
+
+	cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+	cat3->src3_r   = !!(src3->flags & IR3_REG_R);
+
+	cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	cat3->repeat   = instr->repeat;
+	cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
+	cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
+	cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
+	cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
+	cat3->opc      = instr->opc;
+	cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat3->opc_cat  = 3;
+
+	return 0;
+}
+
+static int emit_cat4(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst = instr->regs[0];
+	struct ir3_register *src = instr->regs[1];
+	instr_cat4_t *cat4 = ptr;
+
+	iassert(instr->regs_count == 2);
+
+	if (src->flags & IR3_REG_RELATIV) {
+		iassert(src->array.offset < (1 << 10));
+		cat4->rel.src      = reg(src, info, instr->repeat,
+				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
+				IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
+		cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
+		cat4->rel.src_rel  = 1;
+	} else if (src->flags & IR3_REG_CONST) {
+		iassert(src->num < (1 << 12));
+		cat4->c.src   = reg(src, info, instr->repeat,
+				IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
+				IR3_REG_R | IR3_REG_HALF);
+		cat4->c.src_c = 1;
+	} else {
+		iassert(src->num < (1 << 11));
+		cat4->src = reg(src, info, instr->repeat,
+				IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
+				IR3_REG_R | IR3_REG_HALF);
+	}
+
+	cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
+	cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
+	cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
+	cat4->src_r    = !!(src->flags & IR3_REG_R);
+
+	cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	cat4->repeat   = instr->repeat;
+	cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
+	cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
+	cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
+	cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
+	cat4->full     = ! (src->flags & IR3_REG_HALF);
+	cat4->opc      = instr->opc;
+	cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat4->opc_cat  = 4;
+
+	return 0;
+}
+
+static int emit_cat5(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst = instr->regs[0];
+	struct ir3_register *src1 = instr->regs[1];
+	struct ir3_register *src2 = instr->regs[2];
+	struct ir3_register *src3 = instr->regs[3];
+	instr_cat5_t *cat5 = ptr;
+
+	iassert_type(dst, type_size(instr->cat5.type) == 32)
+
+	assume(src1 || !src2);
+	assume(src2 || !src3);
+
+	if (src1) {
+		cat5->full = ! (src1->flags & IR3_REG_HALF);
+		cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
+	}
+
+	if (instr->flags & IR3_INSTR_S2EN) {
+		if (src2) {
+			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+			cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+		}
+		if (src3) {
+			iassert(src3->flags & IR3_REG_HALF);
+			cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
+		}
+		iassert(!(instr->cat5.samp | instr->cat5.tex));
+	} else {
+		iassert(!src3);
+		if (src2) {
+			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+			cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+		}
+		cat5->norm.samp = instr->cat5.samp;
+		cat5->norm.tex  = instr->cat5.tex;
+	}
+
+	cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	cat5->wrmask   = dst->wrmask;
+	cat5->type     = instr->cat5.type;
+	cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
+	cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
+	cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
+	cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+	cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
+	cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
+	cat5->opc      = instr->opc;
+	cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat5->opc_cat  = 5;
+
+	return 0;
+}
+
+static int emit_cat6(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	struct ir3_register *dst, *src1, *src2;
+	instr_cat6_t *cat6 = ptr;
+	bool type_full = type_size(instr->cat6.type) == 32;
+
+	cat6->type     = instr->cat6.type;
+	cat6->opc      = instr->opc;
+	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat6->g        = !!(instr->flags & IR3_INSTR_G);
+	cat6->opc_cat  = 6;
+
+	switch (instr->opc) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		iassert_type(instr->regs[0], type_full); /* dst */
+		iassert_type(instr->regs[1], type_full); /* src1 */
+		break;
+	case OPC_L2G:
+	case OPC_G2L:
+		iassert_type(instr->regs[0], true);      /* dst */
+		iassert_type(instr->regs[1], true);      /* src1 */
+		break;
+	case OPC_STG:
+	case OPC_STL:
+	case OPC_STP:
+	case OPC_STI:
+	case OPC_STLW:
+	case OPC_STIB:
+		/* no dst, so regs[0] is dummy */
+		iassert_type(instr->regs[1], true);      /* dst */
+		iassert_type(instr->regs[2], type_full); /* src1 */
+		iassert_type(instr->regs[3], true);      /* src2 */
+		break;
+	default:
+		iassert_type(instr->regs[0], type_full); /* dst */
+		iassert_type(instr->regs[1], true);      /* src1 */
+		if (instr->regs_count > 2)
+			iassert_type(instr->regs[2], true);  /* src1 */
+		break;
+	}
+
+	/* the "dst" for a store instruction is (from the perspective
+	 * of data flow in the shader, ie. register use/def, etc) in
+	 * fact a register that is read by the instruction, rather
+	 * than written:
+	 */
+	if (is_store(instr)) {
+		iassert(instr->regs_count >= 3);
+
+		dst  = instr->regs[1];
+		src1 = instr->regs[2];
+		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
+	} else {
+		iassert(instr->regs_count >= 2);
+
+		dst  = instr->regs[0];
+		src1 = instr->regs[1];
+		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+	}
+
+	/* TODO we need a more comprehensive list about which instructions
+	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
+	 * indicate to use the src_off encoding even if offset is zero
+	 * (but then what to do about dst_off?)
+	 */
+	if (is_atomic(instr->opc)) {
+		instr_cat6ldgb_t *ldgb = ptr;
+
+		/* maybe these two bits both determine the instruction encoding? */
+		cat6->src_off = false;
+
+		ldgb->d = instr->cat6.d - 1;
+		ldgb->typed = instr->cat6.typed;
+		ldgb->type_size = instr->cat6.iim_val - 1;
+
+		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+		if (ldgb->g) {
+			struct ir3_register *src3 = instr->regs[3];
+			struct ir3_register *src4 = instr->regs[4];
+
+			/* first src is src_ssbo: */
+			iassert(src1->flags & IR3_REG_IMMED);
+			ldgb->src_ssbo = src1->uim_val;
+
+			ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+			ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+			ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+			ldgb->src3 = reg(src4, info, instr->repeat, 0);
+			ldgb->pad0 = 0x1;
+			ldgb->pad3 = 0x1;
+		} else {
+			ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+			ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
+			ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+			ldgb->pad0 = 0x1;
+			ldgb->pad3 = 0x0;
+		}
+
+		return 0;
+	} else if (instr->opc == OPC_LDGB) {
+		struct ir3_register *src3 = instr->regs[3];
+		instr_cat6ldgb_t *ldgb = ptr;
+
+		/* maybe these two bits both determine the instruction encoding? */
+		cat6->src_off = false;
+
+		ldgb->d = instr->cat6.d - 1;
+		ldgb->typed = instr->cat6.typed;
+		ldgb->type_size = instr->cat6.iim_val - 1;
+
+		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+		/* first src is src_ssbo: */
+		iassert(src1->flags & IR3_REG_IMMED);
+		ldgb->src_ssbo = src1->uim_val;
+
+		/* then next two are src1/src2: */
+		ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+		ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
+		ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+		ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
+
+		ldgb->pad0 = 0x0;
+		ldgb->pad3 = 0x1;
+
+		return 0;
+	} else if (instr->opc == OPC_RESINFO) {
+		instr_cat6ldgb_t *ldgb = ptr;
+
+		ldgb->d = instr->cat6.d - 1;
+
+		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+
+		/* first src is src_ssbo: */
+		iassert(src1->flags & IR3_REG_IMMED);
+		ldgb->src_ssbo = src1->uim_val;
+
+		return 0;
+	} else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
+		struct ir3_register *src3 = instr->regs[4];
+		instr_cat6stgb_t *stgb = ptr;
+
+		/* maybe these two bits both determine the instruction encoding? */
+		cat6->src_off = true;
+		stgb->pad3 = 0x2;
+
+		stgb->d = instr->cat6.d - 1;
+		stgb->typed = instr->cat6.typed;
+		stgb->type_size = instr->cat6.iim_val - 1;
+
+		/* first src is dst_ssbo: */
+		iassert(dst->flags & IR3_REG_IMMED);
+		stgb->dst_ssbo = dst->uim_val;
+
+		/* then src1/src2/src3: */
+		stgb->src1 = reg(src1, info, instr->repeat, 0);
+		stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+		stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
+		stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
+		stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
+
+		return 0;
+	} else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
+			(instr->opc == OPC_LDL)) {
+		instr_cat6a_t *cat6a = ptr;
+
+		cat6->src_off = true;
+
+		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
+		if (src2) {
+			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
+		}
+		cat6a->off = instr->cat6.src_offset;
+	} else {
+		instr_cat6b_t *cat6b = ptr;
+
+		cat6->src_off = false;
+
+		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
+		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
+		if (src2) {
+			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
+		}
+	}
+
+	if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
+			(instr->opc == OPC_STL)) {
+		instr_cat6c_t *cat6c = ptr;
+		cat6->dst_off = true;
+		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+		cat6c->off = instr->cat6.dst_offset;
+	} else {
+		instr_cat6d_t *cat6d = ptr;
+		cat6->dst_off = false;
+		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	}
+
+	return 0;
+}
+
+static int emit_cat7(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info)
+{
+	instr_cat7_t *cat7 = ptr;
+
+	cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
+	cat7->w       = instr->cat7.w;
+	cat7->r       = instr->cat7.r;
+	cat7->l       = instr->cat7.l;
+	cat7->g       = instr->cat7.g;
+	cat7->opc     = instr->opc;
+	cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
+	cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
+	cat7->opc_cat = 7;
+
+	return 0;
+}
+
+static int (*emit[])(struct ir3_instruction *instr, void *ptr,
+		struct ir3_info *info) = {
+	emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+	emit_cat7,
+};
+
+void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
+		uint32_t gpu_id)
+{
+	uint32_t *ptr, *dwords;
+
+	info->gpu_id        = gpu_id;
+	info->max_reg       = -1;
+	info->max_half_reg  = -1;
+	info->max_const     = -1;
+	info->instrs_count  = 0;
+	info->sizedwords    = 0;
+	info->ss = info->sy = 0;
+
+	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			info->sizedwords += 2;
+		}
+	}
+
+	/* need an integer number of instruction "groups" (sets of 16
+	 * instructions on a4xx or sets of 4 instructions on a3xx),
+	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
+	 */
+	if (gpu_id >= 400) {
+		info->sizedwords = align(info->sizedwords, 16 * 2);
+	} else {
+		info->sizedwords = align(info->sizedwords, 4 * 2);
+	}
+
+	ptr = dwords = calloc(4, info->sizedwords);
+
+	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
+			if (ret)
+				goto fail;
+			info->instrs_count += 1 + instr->repeat;
+			dwords += 2;
+
+			if (instr->flags & IR3_INSTR_SS)
+				info->ss++;
+
+			if (instr->flags & IR3_INSTR_SY)
+				info->sy++;
+		}
+	}
+
+	return ptr;
+
+fail:
+	free(ptr);
+	return NULL;
+}
+
+static struct ir3_register * reg_create(struct ir3 *shader,
+		int num, int flags)
+{
+	struct ir3_register *reg =
+			ir3_alloc(shader, sizeof(struct ir3_register));
+	reg->wrmask = 1;
+	reg->flags = flags;
+	reg->num = num;
+	return reg;
+}
+
+static void insert_instr(struct ir3_block *block,
+		struct ir3_instruction *instr)
+{
+	struct ir3 *shader = block->shader;
+#ifdef DEBUG
+	instr->serialno = ++shader->instr_count;
+#endif
+	list_addtail(&instr->node, &block->instr_list);
+
+	if (is_input(instr))
+		array_insert(shader, shader->baryfs, instr);
+}
+
+struct ir3_block * ir3_block_create(struct ir3 *shader)
+{
+	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
+#ifdef DEBUG
+	block->serialno = ++shader->block_count;
+#endif
+	block->shader = shader;
+	list_inithead(&block->node);
+	list_inithead(&block->instr_list);
+	return block;
+}
+
+static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
+{
+	struct ir3_instruction *instr;
+	unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
+	char *ptr = ir3_alloc(block->shader, sz);
+
+	instr = (struct ir3_instruction *)ptr;
+	ptr  += sizeof(*instr);
+	instr->regs = (struct ir3_register **)ptr;
+
+#ifdef DEBUG
+	instr->regs_max = nreg;
+#endif
+
+	return instr;
+}
+
+struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
+		opc_t opc, int nreg)
+{
+	struct ir3_instruction *instr = instr_create(block, nreg);
+	instr->block = block;
+	instr->opc = opc;
+	insert_instr(block, instr);
+	return instr;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
+{
+	/* NOTE: we could be slightly more clever, at least for non-meta,
+	 * and choose # of regs based on category.
+	 */
+	return ir3_instr_create2(block, opc, 4);
+}
+
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
+{
+	struct ir3_instruction *new_instr = instr_create(instr->block,
+			instr->regs_count);
+	struct ir3_register **regs;
+	unsigned i;
+
+	regs = new_instr->regs;
+	*new_instr = *instr;
+	new_instr->regs = regs;
+
+	insert_instr(instr->block, new_instr);
+
+	/* clone registers: */
+	new_instr->regs_count = 0;
+	for (i = 0; i < instr->regs_count; i++) {
+		struct ir3_register *reg = instr->regs[i];
+		struct ir3_register *new_reg =
+				ir3_reg_create(new_instr, reg->num, reg->flags);
+		*new_reg = *reg;
+	}
+
+	return new_instr;
+}
+
+/* Add a false dependency to instruction, to ensure it is scheduled first: */
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
+{
+	array_insert(instr, instr->deps, dep);
+}
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+		int num, int flags)
+{
+	struct ir3 *shader = instr->block->shader;
+	struct ir3_register *reg = reg_create(shader, num, flags);
+#ifdef DEBUG
+	debug_assert(instr->regs_count < instr->regs_max);
+#endif
+	instr->regs[instr->regs_count++] = reg;
+	return reg;
+}
+
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+		struct ir3_register *reg)
+{
+	struct ir3_register *new_reg = reg_create(shader, 0, 0);
+	*new_reg = *reg;
+	return new_reg;
+}
+
+void
+ir3_instr_set_address(struct ir3_instruction *instr,
+		struct ir3_instruction *addr)
+{
+	if (instr->address != addr) {
+		struct ir3 *ir = instr->block->shader;
+		instr->address = addr;
+		array_insert(ir, ir->indirects, instr);
+	}
+}
+
+void
+ir3_block_clear_mark(struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+		instr->flags &= ~IR3_INSTR_MARK;
+}
+
+void
+ir3_clear_mark(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		ir3_block_clear_mark(block);
+	}
+}
+
+/* note: this will destroy instr->depth, don't do it until after sched! */
+unsigned
+ir3_count_instructions(struct ir3 *ir)
+{
+	unsigned cnt = 0;
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			instr->ip = cnt++;
+		}
+		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+	}
+	return cnt;
+}
+
+struct ir3_array *
+ir3_lookup_array(struct ir3 *ir, unsigned id)
+{
+	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
+		if (arr->id == id)
+			return arr;
+	return NULL;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler.c mesa-19.0.1/src/freedreno/ir3/ir3_compiler.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_compiler.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/ralloc.h"
+
+#include "ir3_compiler.h"
+
+static const struct debug_named_value shader_debug_options[] = {
+		{"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
+		{"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
+		{"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
+		{"disasm",  IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},
+		{"optmsgs", IR3_DBG_OPTMSGS,"Enable optimizer debug messages"},
+		DEBUG_NAMED_VALUE_END
+};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG", shader_debug_options, 0)
+
+enum ir3_shader_debug ir3_shader_debug = 0;
+
+struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
+{
+	struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
+
+	ir3_shader_debug = debug_get_option_ir3_shader_debug();
+
+	compiler->dev = dev;
+	compiler->gpu_id = gpu_id;
+	compiler->set = ir3_ra_alloc_reg_set(compiler);
+
+	if (compiler->gpu_id >= 400) {
+		/* need special handling for "flat" */
+		compiler->flat_bypass = true;
+		compiler->levels_add_one = false;
+		compiler->unminify_coords = false;
+		compiler->txf_ms_with_isaml = false;
+		compiler->array_index_add_half = true;
+	} else {
+		/* no special handling for "flat" */
+		compiler->flat_bypass = false;
+		compiler->levels_add_one = true;
+		compiler->unminify_coords = true;
+		compiler->txf_ms_with_isaml = true;
+		compiler->array_index_add_half = false;
+	}
+
+	return compiler;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler.h mesa-19.0.1/src/freedreno/ir3/ir3_compiler.h
--- mesa-18.3.3/src/freedreno/ir3/ir3_compiler.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_COMPILER_H_
+#define IR3_COMPILER_H_
+
+#include "ir3_shader.h"
+
+struct ir3_ra_reg_set;
+
+struct ir3_compiler {
+	struct fd_device *dev;
+	uint32_t gpu_id;
+	struct ir3_ra_reg_set *set;
+	uint32_t shader_count;
+
+	/*
+	 * Configuration options for things that are handled differently on
+	 * different generations:
+	 */
+
+	/* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
+	 * so we need to use ldlv.u32 to load the varying directly:
+	 */
+	bool flat_bypass;
+
+	/* on a3xx, we need to add one to # of array levels:
+	 */
+	bool levels_add_one;
+
+	/* on a3xx, we need to scale up integer coords for isaml based
+	 * on LoD:
+	 */
+	bool unminify_coords;
+
+	/* on a3xx do txf_ms w/ isaml and scaled coords: */
+	bool txf_ms_with_isaml;
+
+	/* on a4xx, for array textures we need to add 0.5 to the array
+	 * index coordinate:
+	 */
+	bool array_index_add_half;
+};
+
+struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id);
+
+int ir3_compile_shader_nir(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so);
+
+enum ir3_shader_debug {
+	IR3_DBG_SHADER_VS = 0x01,
+	IR3_DBG_SHADER_FS = 0x02,
+	IR3_DBG_SHADER_CS = 0x04,
+	IR3_DBG_DISASM    = 0x08,
+	IR3_DBG_OPTMSGS   = 0x10,
+};
+
+extern enum ir3_shader_debug ir3_shader_debug;
+
+static inline bool
+shader_debug_enabled(gl_shader_stage type)
+{
+	switch (type) {
+	case MESA_SHADER_VERTEX:      return !!(ir3_shader_debug & IR3_DBG_SHADER_VS);
+	case MESA_SHADER_FRAGMENT:    return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
+	case MESA_SHADER_COMPUTE:     return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
+	default:
+		debug_assert(0);
+		return false;
+	}
+}
+
+#endif /* IR3_COMPILER_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler_nir.c mesa-19.0.1/src/freedreno/ir3/ir3_compiler_nir.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_compiler_nir.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3217 @@
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdarg.h>
+
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "ir3_compiler.h"
+#include "ir3_shader.h"
+#include "ir3_nir.h"
+
+#include "instr-a3xx.h"
+#include "ir3.h"
+#include "ir3_context.h"
+
+
+static struct ir3_instruction *
+create_indirect_load(struct ir3_context *ctx, unsigned arrsz, int n,
+		struct ir3_instruction *address, struct ir3_instruction *collect)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *mov;
+	struct ir3_register *src;
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	mov->cat1.src_type = TYPE_U32;
+	mov->cat1.dst_type = TYPE_U32;
+	ir3_reg_create(mov, 0, 0);
+	src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
+	src->instr = collect;
+	src->size  = arrsz;
+	src->array.offset = n;
+
+	ir3_instr_set_address(mov, address);
+
+	return mov;
+}
+
+static struct ir3_instruction *
+create_input_compmask(struct ir3_context *ctx, unsigned n, unsigned compmask)
+{
+	struct ir3_instruction *in;
+
+	in = ir3_instr_create(ctx->in_block, OPC_META_INPUT);
+	in->inout.block = ctx->in_block;
+	ir3_reg_create(in, n, 0);
+
+	in->regs[0]->wrmask = compmask;
+
+	return in;
+}
+
+static struct ir3_instruction *
+create_input(struct ir3_context *ctx, unsigned n)
+{
+	return create_input_compmask(ctx, n, 0x1);
+}
+
+static struct ir3_instruction *
+create_frag_input(struct ir3_context *ctx, bool use_ldlv)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *instr;
+	/* actual inloc is assigned and fixed up later: */
+	struct ir3_instruction *inloc = create_immed(block, 0);
+
+	if (use_ldlv) {
+		instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
+		instr->cat6.type = TYPE_U32;
+		instr->cat6.iim_val = 1;
+	} else {
+		instr = ir3_BARY_F(block, inloc, 0, ctx->frag_vcoord, 0);
+		instr->regs[2]->wrmask = 0x3;
+	}
+
+	return instr;
+}
+
+static struct ir3_instruction *
+create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
+{
+	/* first four vec4 sysval's reserved for UBOs: */
+	/* NOTE: dp is in scalar, but there can be >4 dp components: */
+	unsigned n = ctx->so->constbase.driver_param;
+	unsigned r = regid(n + dp / 4, dp % 4);
+	return create_uniform(ctx->block, r);
+}
+
+/*
+ * Adreno uses uint rather than having dedicated bool type,
+ * which (potentially) requires some conversion, in particular
+ * when using output of an bool instr to int input, or visa
+ * versa.
+ *
+ *         | Adreno  |  NIR  |
+ *  -------+---------+-------+-
+ *   true  |    1    |  ~0   |
+ *   false |    0    |   0   |
+ *
+ * To convert from an adreno bool (uint) to nir, use:
+ *
+ *    absneg.s dst, (neg)src
+ *
+ * To convert back in the other direction:
+ *
+ *    absneg.s dst, (abs)arc
+ *
+ * The CP step can clean up the absneg.s that cancel each other
+ * out, and with a slight bit of extra cleverness (to recognize
+ * the instructions which produce either a 0 or 1) can eliminate
+ * the absneg.s's completely when an instruction that wants
+ * 0/1 consumes the result.  For example, when a nir 'bcsel'
+ * consumes the result of 'feq'.  So we should be able to get by
+ * without a boolean resolve step, and without incuring any
+ * extra penalty in instruction count.
+ */
+
+/* NIR bool -> native (adreno): */
+static struct ir3_instruction *
+ir3_b2n(struct ir3_block *block, struct ir3_instruction *instr)
+{
+	return ir3_ABSNEG_S(block, instr, IR3_REG_SABS);
+}
+
+/* native (adreno) -> NIR bool: */
+static struct ir3_instruction *
+ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr)
+{
+	return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG);
+}
+
+/*
+ * alu/sfu instructions:
+ */
+
+static struct ir3_instruction *
+create_cov(struct ir3_context *ctx, struct ir3_instruction *src,
+		unsigned src_bitsize, nir_op op)
+{
+	type_t src_type, dst_type;
+
+	switch (op) {
+	case nir_op_f2f32:
+	case nir_op_f2f16_rtne:
+	case nir_op_f2f16_rtz:
+	case nir_op_f2f16:
+	case nir_op_f2i32:
+	case nir_op_f2i16:
+	case nir_op_f2i8:
+	case nir_op_f2u32:
+	case nir_op_f2u16:
+	case nir_op_f2u8:
+		switch (src_bitsize) {
+		case 32:
+			src_type = TYPE_F32;
+			break;
+		case 16:
+			src_type = TYPE_F16;
+			break;
+		default:
+			ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize);
+		}
+		break;
+
+	case nir_op_i2f32:
+	case nir_op_i2f16:
+	case nir_op_i2i32:
+	case nir_op_i2i16:
+	case nir_op_i2i8:
+		switch (src_bitsize) {
+		case 32:
+			src_type = TYPE_S32;
+			break;
+		case 16:
+			src_type = TYPE_S16;
+			break;
+		case 8:
+			src_type = TYPE_S8;
+			break;
+		default:
+			ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize);
+		}
+		break;
+
+	case nir_op_u2f32:
+	case nir_op_u2f16:
+	case nir_op_u2u32:
+	case nir_op_u2u16:
+	case nir_op_u2u8:
+		switch (src_bitsize) {
+		case 32:
+			src_type = TYPE_U32;
+			break;
+		case 16:
+			src_type = TYPE_U16;
+			break;
+		case 8:
+			src_type = TYPE_U8;
+			break;
+		default:
+			ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize);
+		}
+		break;
+
+	default:
+		ir3_context_error(ctx, "invalid conversion op: %u", op);
+	}
+
+	switch (op) {
+	case nir_op_f2f32:
+	case nir_op_i2f32:
+	case nir_op_u2f32:
+		dst_type = TYPE_F32;
+		break;
+
+	case nir_op_f2f16_rtne:
+	case nir_op_f2f16_rtz:
+	case nir_op_f2f16:
+		/* TODO how to handle rounding mode? */
+	case nir_op_i2f16:
+	case nir_op_u2f16:
+		dst_type = TYPE_F16;
+		break;
+
+	case nir_op_f2i32:
+	case nir_op_i2i32:
+		dst_type = TYPE_S32;
+		break;
+
+	case nir_op_f2i16:
+	case nir_op_i2i16:
+		dst_type = TYPE_S16;
+		break;
+
+	case nir_op_f2i8:
+	case nir_op_i2i8:
+		dst_type = TYPE_S8;
+		break;
+
+	case nir_op_f2u32:
+	case nir_op_u2u32:
+		dst_type = TYPE_U32;
+		break;
+
+	case nir_op_f2u16:
+	case nir_op_u2u16:
+		dst_type = TYPE_U16;
+		break;
+
+	case nir_op_f2u8:
+	case nir_op_u2u8:
+		dst_type = TYPE_U8;
+		break;
+
+	default:
+		ir3_context_error(ctx, "invalid conversion op: %u", op);
+	}
+
+	return ir3_COV(ctx->block, src, src_type, dst_type);
+}
+
+static void
+emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
+{
+	const nir_op_info *info = &nir_op_infos[alu->op];
+	struct ir3_instruction **dst, *src[info->num_inputs];
+	unsigned bs[info->num_inputs];     /* bit size */
+	struct ir3_block *b = ctx->block;
+	unsigned dst_sz, wrmask;
+
+	if (alu->dest.dest.is_ssa) {
+		dst_sz = alu->dest.dest.ssa.num_components;
+		wrmask = (1 << dst_sz) - 1;
+	} else {
+		dst_sz = alu->dest.dest.reg.reg->num_components;
+		wrmask = alu->dest.write_mask;
+	}
+
+	dst = ir3_get_dst(ctx, &alu->dest.dest, dst_sz);
+
+	/* Vectors are special in that they have non-scalarized writemasks,
+	 * and just take the first swizzle channel for each argument in
+	 * order into each writemask channel.
+	 */
+	if ((alu->op == nir_op_vec2) ||
+			(alu->op == nir_op_vec3) ||
+			(alu->op == nir_op_vec4)) {
+
+		for (int i = 0; i < info->num_inputs; i++) {
+			nir_alu_src *asrc = &alu->src[i];
+
+			compile_assert(ctx, !asrc->abs);
+			compile_assert(ctx, !asrc->negate);
+
+			src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[0]];
+			if (!src[i])
+				src[i] = create_immed(ctx->block, 0);
+			dst[i] = ir3_MOV(b, src[i], TYPE_U32);
+		}
+
+		put_dst(ctx, &alu->dest.dest);
+		return;
+	}
+
+	/* We also get mov's with more than one component for mov's so
+	 * handle those specially:
+	 */
+	if ((alu->op == nir_op_imov) || (alu->op == nir_op_fmov)) {
+		type_t type = (alu->op == nir_op_imov) ? TYPE_U32 : TYPE_F32;
+		nir_alu_src *asrc = &alu->src[0];
+		struct ir3_instruction *const *src0 = ir3_get_src(ctx, &asrc->src);
+
+		for (unsigned i = 0; i < dst_sz; i++) {
+			if (wrmask & (1 << i)) {
+				dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type);
+			} else {
+				dst[i] = NULL;
+			}
+		}
+
+		put_dst(ctx, &alu->dest.dest);
+		return;
+	}
+
+	/* General case: We can just grab the one used channel per src. */
+	for (int i = 0; i < info->num_inputs; i++) {
+		unsigned chan = ffs(alu->dest.write_mask) - 1;
+		nir_alu_src *asrc = &alu->src[i];
+
+		compile_assert(ctx, !asrc->abs);
+		compile_assert(ctx, !asrc->negate);
+
+		src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[chan]];
+		bs[i] = nir_src_bit_size(asrc->src);
+
+		compile_assert(ctx, src[i]);
+	}
+
+	switch (alu->op) {
+	case nir_op_f2f32:
+	case nir_op_f2f16_rtne:
+	case nir_op_f2f16_rtz:
+	case nir_op_f2f16:
+	case nir_op_f2i32:
+	case nir_op_f2i16:
+	case nir_op_f2i8:
+	case nir_op_f2u32:
+	case nir_op_f2u16:
+	case nir_op_f2u8:
+	case nir_op_i2f32:
+	case nir_op_i2f16:
+	case nir_op_i2i32:
+	case nir_op_i2i16:
+	case nir_op_i2i8:
+	case nir_op_u2f32:
+	case nir_op_u2f16:
+	case nir_op_u2u32:
+	case nir_op_u2u16:
+	case nir_op_u2u8:
+		dst[0] = create_cov(ctx, src[0], bs[0], alu->op);
+		break;
+	case nir_op_f2b32:
+		dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
+		dst[0]->cat2.condition = IR3_COND_NE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_b2f16:
+	case nir_op_b2f32:
+		dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
+		break;
+	case nir_op_b2i8:
+	case nir_op_b2i16:
+	case nir_op_b2i32:
+		dst[0] = ir3_b2n(b, src[0]);
+		break;
+	case nir_op_i2b32:
+		dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+		dst[0]->cat2.condition = IR3_COND_NE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+
+	case nir_op_fneg:
+		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG);
+		break;
+	case nir_op_fabs:
+		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS);
+		break;
+	case nir_op_fmax:
+		dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_fmin:
+		dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_fsat:
+		/* if there is just a single use of the src, and it supports
+		 * (sat) bit, we can just fold the (sat) flag back to the
+		 * src instruction and create a mov.  This is easier for cp
+		 * to eliminate.
+		 *
+		 * TODO probably opc_cat==4 is ok too
+		 */
+		if (alu->src[0].src.is_ssa &&
+				(list_length(&alu->src[0].src.ssa->uses) == 1) &&
+				((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
+			src[0]->flags |= IR3_INSTR_SAT;
+			dst[0] = ir3_MOV(b, src[0], TYPE_U32);
+		} else {
+			/* otherwise generate a max.f that saturates.. blob does
+			 * similar (generating a cat2 mov using max.f)
+			 */
+			dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0);
+			dst[0]->flags |= IR3_INSTR_SAT;
+		}
+		break;
+	case nir_op_fmul:
+		dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_fadd:
+		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_fsub:
+		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG);
+		break;
+	case nir_op_ffma:
+		dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0);
+		break;
+	case nir_op_fddx:
+		dst[0] = ir3_DSX(b, src[0], 0);
+		dst[0]->cat5.type = TYPE_F32;
+		break;
+	case nir_op_fddy:
+		dst[0] = ir3_DSY(b, src[0], 0);
+		dst[0]->cat5.type = TYPE_F32;
+		break;
+		break;
+	case nir_op_flt32:
+		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_LT;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_fge32:
+		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_GE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_feq32:
+		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_EQ;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_fne32:
+		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_NE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_fceil:
+		dst[0] = ir3_CEIL_F(b, src[0], 0);
+		break;
+	case nir_op_ffloor:
+		dst[0] = ir3_FLOOR_F(b, src[0], 0);
+		break;
+	case nir_op_ftrunc:
+		dst[0] = ir3_TRUNC_F(b, src[0], 0);
+		break;
+	case nir_op_fround_even:
+		dst[0] = ir3_RNDNE_F(b, src[0], 0);
+		break;
+	case nir_op_fsign:
+		dst[0] = ir3_SIGN_F(b, src[0], 0);
+		break;
+
+	case nir_op_fsin:
+		dst[0] = ir3_SIN(b, src[0], 0);
+		break;
+	case nir_op_fcos:
+		dst[0] = ir3_COS(b, src[0], 0);
+		break;
+	case nir_op_frsq:
+		dst[0] = ir3_RSQ(b, src[0], 0);
+		break;
+	case nir_op_frcp:
+		dst[0] = ir3_RCP(b, src[0], 0);
+		break;
+	case nir_op_flog2:
+		dst[0] = ir3_LOG2(b, src[0], 0);
+		break;
+	case nir_op_fexp2:
+		dst[0] = ir3_EXP2(b, src[0], 0);
+		break;
+	case nir_op_fsqrt:
+		dst[0] = ir3_SQRT(b, src[0], 0);
+		break;
+
+	case nir_op_iabs:
+		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS);
+		break;
+	case nir_op_iadd:
+		dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_iand:
+		dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_imax:
+		dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_umax:
+		dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_imin:
+		dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_umin:
+		dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_imul:
+		/*
+		 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
+		 *   mull.u tmp0, a, b           ; mul low, i.e. al * bl
+		 *   madsh.m16 tmp1, a, b, tmp0  ; mul-add shift high mix, i.e. ah * bl << 16
+		 *   madsh.m16 dst, b, a, tmp1   ; i.e. al * bh << 16
+		 */
+		dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0,
+					ir3_MADSH_M16(b, src[0], 0, src[1], 0,
+						ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0);
+		break;
+	case nir_op_ineg:
+		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
+		break;
+	case nir_op_inot:
+		dst[0] = ir3_NOT_B(b, src[0], 0);
+		break;
+	case nir_op_ior:
+		dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_ishl:
+		dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_ishr:
+		dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_isign: {
+		/* maybe this would be sane to lower in nir.. */
+		struct ir3_instruction *neg, *pos;
+
+		neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+		neg->cat2.condition = IR3_COND_LT;
+
+		pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+		pos->cat2.condition = IR3_COND_GT;
+
+		dst[0] = ir3_SUB_U(b, pos, 0, neg, 0);
+
+		break;
+	}
+	case nir_op_isub:
+		dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_ixor:
+		dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_ushr:
+		dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0);
+		break;
+	case nir_op_ilt32:
+		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_LT;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_ige32:
+		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_GE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_ieq32:
+		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_EQ;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_ine32:
+		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_NE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_ult32:
+		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_LT;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+	case nir_op_uge32:
+		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
+		dst[0]->cat2.condition = IR3_COND_GE;
+		dst[0] = ir3_n2b(b, dst[0]);
+		break;
+
+	case nir_op_b32csel: {
+		struct ir3_instruction *cond = ir3_b2n(b, src[0]);
+		compile_assert(ctx, bs[1] == bs[2]);
+		/* the boolean condition is 32b even if src[1] and src[2] are
+		 * half-precision, but sel.b16 wants all three src's to be the
+		 * same type.
+		 */
+		if (bs[1] < 32)
+			cond = ir3_COV(b, cond, TYPE_U32, TYPE_U16);
+		dst[0] = ir3_SEL_B32(b, src[1], 0, cond, 0, src[2], 0);
+		break;
+	}
+	case nir_op_bit_count:
+		dst[0] = ir3_CBITS_B(b, src[0], 0);
+		break;
+	case nir_op_ifind_msb: {
+		struct ir3_instruction *cmp;
+		dst[0] = ir3_CLZ_S(b, src[0], 0);
+		cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
+		cmp->cat2.condition = IR3_COND_GE;
+		dst[0] = ir3_SEL_B32(b,
+				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+				cmp, 0, dst[0], 0);
+		break;
+	}
+	case nir_op_ufind_msb:
+		dst[0] = ir3_CLZ_B(b, src[0], 0);
+		dst[0] = ir3_SEL_B32(b,
+				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
+				src[0], 0, dst[0], 0);
+		break;
+	case nir_op_find_lsb:
+		dst[0] = ir3_BFREV_B(b, src[0], 0);
+		dst[0] = ir3_CLZ_B(b, dst[0], 0);
+		break;
+	case nir_op_bitfield_reverse:
+		dst[0] = ir3_BFREV_B(b, src[0], 0);
+		break;
+
+	default:
+		ir3_context_error(ctx, "Unhandled ALU op: %s\n",
+				nir_op_infos[alu->op].name);
+		break;
+	}
+
+	put_dst(ctx, &alu->dest.dest);
+}
+
+/* handles direct/indirect UBO reads: */
+static void
+emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1;
+	nir_const_value *const_offset;
+	/* UBO addresses are the first driver params: */
+	unsigned ubo = regid(ctx->so->constbase.ubo, 0);
+	const unsigned ptrsz = ir3_pointer_size(ctx);
+
+	int off = 0;
+
+	/* First src is ubo index, which could either be an immed or not: */
+	src0 = ir3_get_src(ctx, &intr->src[0])[0];
+	if (is_same_type_mov(src0) &&
+			(src0->regs[1]->flags & IR3_REG_IMMED)) {
+		base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz));
+		base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
+	} else {
+		base_lo = create_uniform_indirect(b, ubo, ir3_get_addr(ctx, src0, 4));
+		base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr(ctx, src0, 4));
+	}
+
+	/* note: on 32bit gpu's base_hi is ignored and DCE'd */
+	addr = base_lo;
+
+	const_offset = nir_src_as_const_value(intr->src[1]);
+	if (const_offset) {
+		off += const_offset->u32[0];
+	} else {
+		/* For load_ubo_indirect, second src is indirect offset: */
+		src1 = ir3_get_src(ctx, &intr->src[1])[0];
+
+		/* and add offset to addr: */
+		addr = ir3_ADD_S(b, addr, 0, src1, 0);
+	}
+
+	/* if offset is to large to encode in the ldg, split it out: */
+	if ((off + (intr->num_components * 4)) > 1024) {
+		/* split out the minimal amount to improve the odds that
+		 * cp can fit the immediate in the add.s instruction:
+		 */
+		unsigned off2 = off + (intr->num_components * 4) - 1024;
+		addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0);
+		off -= off2;
+	}
+
+	if (ptrsz == 2) {
+		struct ir3_instruction *carry;
+
+		/* handle 32b rollover, ie:
+		 *   if (addr < base_lo)
+		 *      base_hi++
+		 */
+		carry = ir3_CMPS_U(b, addr, 0, base_lo, 0);
+		carry->cat2.condition = IR3_COND_LT;
+		base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0);
+
+		addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ addr, base_hi }, 2);
+	}
+
+	for (int i = 0; i < intr->num_components; i++) {
+		struct ir3_instruction *load =
+				ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
+		load->cat6.type = TYPE_U32;
+		load->cat6.src_offset = off + i * 4;     /* byte offset */
+		dst[i] = load;
+	}
+}
+
+/* src[] = { buffer_index, offset }. No const_index */
+static void
+emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *ldgb, *src0, *src1, *offset;
+	nir_const_value *const_offset;
+
+	/* can this be non-const buffer_index?  how do we handle that? */
+	const_offset = nir_src_as_const_value(intr->src[0]);
+	compile_assert(ctx, const_offset);
+
+	offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+	/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
+	src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+		offset,
+		create_immed(b, 0),
+	}, 2);
+	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
+
+	ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
+			src0, 0, src1, 0);
+	ldgb->regs[0]->wrmask = MASK(intr->num_components);
+	ldgb->cat6.iim_val = intr->num_components;
+	ldgb->cat6.d = 4;
+	ldgb->cat6.type = TYPE_U32;
+	ldgb->barrier_class = IR3_BARRIER_BUFFER_R;
+	ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W;
+
+	ir3_split_dest(b, dst, ldgb, 0, intr->num_components);
+}
+
+/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+static void
+emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *stgb, *src0, *src1, *src2, *offset;
+	nir_const_value *const_offset;
+	/* TODO handle wrmask properly, see _store_shared().. but I think
+	 * it is more a PITA than that, since blob ends up loading the
+	 * masked components and writing them back out.
+	 */
+	unsigned wrmask = intr->const_index[0];
+	unsigned ncomp = ffs(~wrmask) - 1;
+
+	/* can this be non-const buffer_index?  how do we handle that? */
+	const_offset = nir_src_as_const_value(intr->src[1]);
+	compile_assert(ctx, const_offset);
+
+	offset = ir3_get_src(ctx, &intr->src[2])[0];
+
+	/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
+	 * nir already *= 4:
+	 */
+	src0 = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
+	src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+		offset,
+		create_immed(b, 0),
+	}, 2);
+
+	stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
+			src0, 0, src1, 0, src2, 0);
+	stgb->cat6.iim_val = ncomp;
+	stgb->cat6.d = 4;
+	stgb->cat6.type = TYPE_U32;
+	stgb->barrier_class = IR3_BARRIER_BUFFER_W;
+	stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+
+	array_insert(b, b->keeps, stgb);
+}
+
+/* src[] = { block_index } */
+static void
+emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	/* SSBO size stored as a const starting at ssbo_sizes: */
+	unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0];
+	unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
+		ctx->so->const_layout.ssbo_size.off[blk_idx];
+
+	debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+
+	dst[0] = create_uniform(ctx->block, idx);
+}
+
+/*
+ * SSBO atomic intrinsics
+ *
+ * All of the SSBO atomic memory operations read a value from memory,
+ * compute a new value using one of the operations below, write the new
+ * value to memory, and return the original value read.
+ *
+ * All operations take 3 sources except CompSwap that takes 4. These
+ * sources represent:
+ *
+ * 0: The SSBO buffer index.
+ * 1: The offset into the SSBO buffer of the variable that the atomic
+ *    operation will operate on.
+ * 2: The data parameter to the atomic function (i.e. the value to add
+ *    in ssbo_atomic_add, etc).
+ * 3: For CompSwap only: the second data parameter.
+ */
+static struct ir3_instruction *
+emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset;
+	nir_const_value *const_offset;
+	type_t type = TYPE_U32;
+
+	/* can this be non-const buffer_index?  how do we handle that? */
+	const_offset = nir_src_as_const_value(intr->src[0]);
+	compile_assert(ctx, const_offset);
+	ssbo = create_immed(b, const_offset->u32[0]);
+
+	offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+	/* src0 is data (or uvec2(data, compare))
+	 * src1 is offset
+	 * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset)
+	 *
+	 * Note that nir already multiplies the offset by four
+	 */
+	src0 = ir3_get_src(ctx, &intr->src[2])[0];
+	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
+	src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+		offset,
+		create_immed(b, 0),
+	}, 2);
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_ssbo_atomic_add:
+		atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_imin:
+		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		type = TYPE_S32;
+		break;
+	case nir_intrinsic_ssbo_atomic_umin:
+		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_imax:
+		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		type = TYPE_S32;
+		break;
+	case nir_intrinsic_ssbo_atomic_umax:
+		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_and:
+		atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_or:
+		atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_xor:
+		atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_exchange:
+		atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_ssbo_atomic_comp_swap:
+		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
+		src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+			ir3_get_src(ctx, &intr->src[3])[0],
+			src0,
+		}, 2);
+		atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	default:
+		unreachable("boo");
+	}
+
+	atomic->cat6.iim_val = 1;
+	atomic->cat6.d = 4;
+	atomic->cat6.type = type;
+	atomic->barrier_class = IR3_BARRIER_BUFFER_W;
+	atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+
+	/* even if nothing consume the result, we can't DCE the instruction: */
+	array_insert(b, b->keeps, atomic);
+
+	return atomic;
+}
+
+/* src[] = { offset }. const_index[] = { base } */
+static void
+emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *ldl, *offset;
+	unsigned base;
+
+	offset = ir3_get_src(ctx, &intr->src[0])[0];
+	base   = nir_intrinsic_base(intr);
+
+	ldl = ir3_LDL(b, offset, 0, create_immed(b, intr->num_components), 0);
+	ldl->cat6.src_offset = base;
+	ldl->cat6.type = utype_dst(intr->dest);
+	ldl->regs[0]->wrmask = MASK(intr->num_components);
+
+	ldl->barrier_class = IR3_BARRIER_SHARED_R;
+	ldl->barrier_conflict = IR3_BARRIER_SHARED_W;
+
+	ir3_split_dest(b, dst, ldl, 0, intr->num_components);
+}
+
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+static void
+emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *stl, *offset;
+	struct ir3_instruction * const *value;
+	unsigned base, wrmask;
+
+	value  = ir3_get_src(ctx, &intr->src[0]);
+	offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+	base   = nir_intrinsic_base(intr);
+	wrmask = nir_intrinsic_write_mask(intr);
+
+	/* Combine groups of consecutive enabled channels in one write
+	 * message. We use ffs to find the first enabled channel and then ffs on
+	 * the bit-inverse, down-shifted writemask to determine the length of
+	 * the block of enabled bits.
+	 *
+	 * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
+	 */
+	while (wrmask) {
+		unsigned first_component = ffs(wrmask) - 1;
+		unsigned length = ffs(~(wrmask >> first_component)) - 1;
+
+		stl = ir3_STL(b, offset, 0,
+			ir3_create_collect(ctx, &value[first_component], length), 0,
+			create_immed(b, length), 0);
+		stl->cat6.dst_offset = first_component + base;
+		stl->cat6.type = utype_src(intr->src[0]);
+		stl->barrier_class = IR3_BARRIER_SHARED_W;
+		stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
+
+		array_insert(b, b->keeps, stl);
+
+		/* Clear the bits in the writemask that we just wrote, then try
+		 * again to see if more channels are left.
+		 */
+		wrmask &= (15 << (first_component + length));
+	}
+}
+
+/*
+ * CS shared variable atomic intrinsics
+ *
+ * All of the shared variable atomic memory operations read a value from
+ * memory, compute a new value using one of the operations below, write the
+ * new value to memory, and return the original value read.
+ *
+ * All operations take 2 sources except CompSwap that takes 3. These
+ * sources represent:
+ *
+ * 0: The offset into the shared variable storage region that the atomic
+ *    operation will operate on.
+ * 1: The data parameter to the atomic function (i.e. the value to add
+ *    in shared_atomic_add, etc).
+ * 2: For CompSwap only: the second data parameter.
+ */
+static struct ir3_instruction *
+emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *atomic, *src0, *src1;
+	type_t type = TYPE_U32;
+
+	src0 = ir3_get_src(ctx, &intr->src[0])[0];   /* offset */
+	src1 = ir3_get_src(ctx, &intr->src[1])[0];   /* value */
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_shared_atomic_add:
+		atomic = ir3_ATOMIC_ADD(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_imin:
+		atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
+		type = TYPE_S32;
+		break;
+	case nir_intrinsic_shared_atomic_umin:
+		atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_imax:
+		atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
+		type = TYPE_S32;
+		break;
+	case nir_intrinsic_shared_atomic_umax:
+		atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_and:
+		atomic = ir3_ATOMIC_AND(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_or:
+		atomic = ir3_ATOMIC_OR(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_xor:
+		atomic = ir3_ATOMIC_XOR(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_exchange:
+		atomic = ir3_ATOMIC_XCHG(b, src0, 0, src1, 0);
+		break;
+	case nir_intrinsic_shared_atomic_comp_swap:
+		/* for cmpxchg, src1 is [ui]vec2(data, compare): */
+		src1 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+			ir3_get_src(ctx, &intr->src[2])[0],
+			src1,
+		}, 2);
+		atomic = ir3_ATOMIC_CMPXCHG(b, src0, 0, src1, 0);
+		break;
+	default:
+		unreachable("boo");
+	}
+
+	atomic->cat6.iim_val = 1;
+	atomic->cat6.d = 1;
+	atomic->cat6.type = type;
+	atomic->barrier_class = IR3_BARRIER_SHARED_W;
+	atomic->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
+
+	/* even if nothing consume the result, we can't DCE the instruction: */
+	array_insert(b, b->keeps, atomic);
+
+	return atomic;
+}
+
+/* Images get mapped into SSBO/image state (for store/atomic) and texture
+ * state block (for load).  To simplify things, invert the image id and
+ * map it from end of state block, ie. image 0 becomes num-1, image 1
+ * becomes num-2, etc.  This potentially avoids needing to re-emit texture
+ * state when switching shaders.
+ *
+ * TODO is max # of samplers and SSBOs the same.  This shouldn't be hard-
+ * coded.  Also, since all the gl shader stages (ie. everything but CS)
+ * share the same SSBO/image state block, this might require some more
+ * logic if we supported images in anything other than FS..
+ */
+static unsigned
+get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
+{
+	unsigned int loc = 0;
+	unsigned inner_size = 1;
+
+	while (deref->deref_type != nir_deref_type_var) {
+		assert(deref->deref_type == nir_deref_type_array);
+		nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
+		assert(const_index);
+
+		/* Go to the next instruction */
+		deref = nir_deref_instr_parent(deref);
+
+		assert(glsl_type_is_array(deref->type));
+		const unsigned array_len = glsl_get_length(deref->type);
+		loc += MIN2(const_index->u32[0], array_len - 1) * inner_size;
+
+		/* Update the inner size */
+		inner_size *= array_len;
+	}
+
+	loc += deref->var->data.driver_location;
+
+	/* TODO figure out real limit per generation, and don't hardcode: */
+	const unsigned max_samplers = 16;
+	return max_samplers - loc - 1;
+}
+
+/* see tex_info() for equiv logic for texture instructions.. it would be
+ * nice if this could be better unified..
+ */
+static unsigned
+get_image_coords(const nir_variable *var, unsigned *flagsp)
+{
+	const struct glsl_type *type = glsl_without_array(var->type);
+	unsigned coords, flags = 0;
+
+	switch (glsl_get_sampler_dim(type)) {
+	case GLSL_SAMPLER_DIM_1D:
+	case GLSL_SAMPLER_DIM_BUF:
+		coords = 1;
+		break;
+	case GLSL_SAMPLER_DIM_2D:
+	case GLSL_SAMPLER_DIM_RECT:
+	case GLSL_SAMPLER_DIM_EXTERNAL:
+	case GLSL_SAMPLER_DIM_MS:
+		coords = 2;
+		break;
+	case GLSL_SAMPLER_DIM_3D:
+	case GLSL_SAMPLER_DIM_CUBE:
+		flags |= IR3_INSTR_3D;
+		coords = 3;
+		break;
+	default:
+		unreachable("bad sampler dim");
+		return 0;
+	}
+
+	if (glsl_sampler_type_is_array(type)) {
+		/* note: unlike tex_info(), adjust # of coords to include array idx: */
+		coords++;
+		flags |= IR3_INSTR_A;
+	}
+
+	if (flagsp)
+		*flagsp = flags;
+
+	return coords;
+}
+
+static type_t
+get_image_type(const nir_variable *var)
+{
+	switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) {
+	case GLSL_TYPE_UINT:
+		return TYPE_U32;
+	case GLSL_TYPE_INT:
+		return TYPE_S32;
+	case GLSL_TYPE_FLOAT:
+		return TYPE_F32;
+	default:
+		unreachable("bad sampler type.");
+		return 0;
+	}
+}
+
+static struct ir3_instruction *
+get_image_offset(struct ir3_context *ctx, const nir_variable *var,
+		struct ir3_instruction * const *coords, bool byteoff)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *offset;
+	unsigned ncoords = get_image_coords(var, NULL);
+
+	/* to calculate the byte offset (yes, uggg) we need (up to) three
+	 * const values to know the bytes per pixel, and y and z stride:
+	 */
+	unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+		ctx->so->const_layout.image_dims.off[var->data.driver_location];
+
+	debug_assert(ctx->so->const_layout.image_dims.mask &
+			(1 << var->data.driver_location));
+
+	/* offset = coords.x * bytes_per_pixel: */
+	offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0);
+	if (ncoords > 1) {
+		/* offset += coords.y * y_pitch: */
+		offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
+				coords[1], 0, offset, 0);
+	}
+	if (ncoords > 2) {
+		/* offset += coords.z * z_pitch: */
+		offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
+				coords[2], 0, offset, 0);
+	}
+
+	if (!byteoff) {
+		/* Some cases, like atomics, seem to use dword offset instead
+		 * of byte offsets.. blob just puts an extra shr.b in there
+		 * in those cases:
+		 */
+		offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
+	}
+
+	return ir3_create_collect(ctx, (struct ir3_instruction*[]){
+		offset,
+		create_immed(b, 0),
+	}, 2);
+}
+
+/* src[] = { deref, coord, sample_index }. const_index[] = {} */
+static void
+emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+	struct ir3_instruction *sam;
+	struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
+	struct ir3_instruction *coords[4];
+	unsigned flags, ncoords = get_image_coords(var, &flags);
+	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	type_t type = get_image_type(var);
+
+	/* hmm, this seems a bit odd, but it is what blob does and (at least
+	 * a5xx) just faults on bogus addresses otherwise:
+	 */
+	if (flags & IR3_INSTR_3D) {
+		flags &= ~IR3_INSTR_3D;
+		flags |= IR3_INSTR_A;
+	}
+
+	for (unsigned i = 0; i < ncoords; i++)
+		coords[i] = src0[i];
+
+	if (ncoords == 1)
+		coords[ncoords++] = create_immed(b, 0);
+
+	sam = ir3_SAM(b, OPC_ISAM, type, 0b1111, flags,
+			tex_idx, tex_idx, ir3_create_collect(ctx, coords, ncoords), NULL);
+
+	sam->barrier_class = IR3_BARRIER_IMAGE_R;
+	sam->barrier_conflict = IR3_BARRIER_IMAGE_W;
+
+	ir3_split_dest(b, dst, sam, 0, 4);
+}
+
+/* Returns the number of components for the different image formats
+ * supported by the GLES 3.1 spec, plus those added by the
+ * GL_NV_image_formats extension.
+ */
+static unsigned
+get_num_components_for_glformat(GLuint format)
+{
+	switch (format) {
+	case GL_R32F:
+	case GL_R32I:
+	case GL_R32UI:
+	case GL_R16F:
+	case GL_R16I:
+	case GL_R16UI:
+	case GL_R16:
+	case GL_R16_SNORM:
+	case GL_R8I:
+	case GL_R8UI:
+	case GL_R8:
+	case GL_R8_SNORM:
+		return 1;
+
+	case GL_RG32F:
+	case GL_RG32I:
+	case GL_RG32UI:
+	case GL_RG16F:
+	case GL_RG16I:
+	case GL_RG16UI:
+	case GL_RG16:
+	case GL_RG16_SNORM:
+	case GL_RG8I:
+	case GL_RG8UI:
+	case GL_RG8:
+	case GL_RG8_SNORM:
+		return 2;
+
+	case GL_R11F_G11F_B10F:
+		return 3;
+
+	case GL_RGBA32F:
+	case GL_RGBA32I:
+	case GL_RGBA32UI:
+	case GL_RGBA16F:
+	case GL_RGBA16I:
+	case GL_RGBA16UI:
+	case GL_RGBA16:
+	case GL_RGBA16_SNORM:
+	case GL_RGBA8I:
+	case GL_RGBA8UI:
+	case GL_RGBA8:
+	case GL_RGBA8_SNORM:
+	case GL_RGB10_A2UI:
+	case GL_RGB10_A2:
+		return 4;
+
+	case GL_NONE:
+		/* Omitting the image format qualifier is allowed on desktop GL
+		 * profiles. Assuming 4 components is always safe.
+		 */
+		return 4;
+
+	default:
+		/* Return 4 components also for all other formats we don't know
+		 * about. The format should have been validated already by
+		 * the higher level API, but drop a debug message just in case.
+		 */
+		debug_printf("Unhandled GL format %u while emitting imageStore()\n",
+					 format);
+		return 4;
+	}
+}
+
+/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
+static void
+emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+	struct ir3_instruction *stib, *offset;
+	struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
+	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
+	unsigned ncoords = get_image_coords(var, NULL);
+	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	unsigned ncomp = get_num_components_for_glformat(var->data.image.format);
+
+	/* src0 is value
+	 * src1 is coords
+	 * src2 is 64b byte offset
+	 */
+
+	offset = get_image_offset(ctx, var, coords, true);
+
+	/* NOTE: stib seems to take byte offset, but stgb.typed can be used
+	 * too and takes a dword offset.. not quite sure yet why blob uses
+	 * one over the other in various cases.
+	 */
+
+	stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
+			ir3_create_collect(ctx, value, ncomp), 0,
+			ir3_create_collect(ctx, coords, ncoords), 0,
+			offset, 0);
+	stib->cat6.iim_val = ncomp;
+	stib->cat6.d = ncoords;
+	stib->cat6.type = get_image_type(var);
+	stib->cat6.typed = true;
+	stib->barrier_class = IR3_BARRIER_IMAGE_W;
+	stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+
+	array_insert(b, b->keeps, stib);
+}
+
+static void
+emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
+	struct ir3_instruction *sam, *lod;
+	unsigned flags, ncoords = get_image_coords(var, &flags);
+
+	lod = create_immed(b, 0);
+	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, 0b1111, flags,
+			tex_idx, tex_idx, lod, NULL);
+
+	/* Array size actually ends up in .w rather than .z. This doesn't
+	 * matter for miplevel 0, but for higher mips the value in z is
+	 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+	 * returned, which means that we have to add 1 to it for arrays for
+	 * a3xx.
+	 *
+	 * Note use a temporary dst and then copy, since the size of the dst
+	 * array that is passed in is based on nir's understanding of the
+	 * result size, not the hardware's
+	 */
+	struct ir3_instruction *tmp[4];
+
+	ir3_split_dest(b, tmp, sam, 0, 4);
+
+	/* get_size instruction returns size in bytes instead of texels
+	 * for imageBuffer, so we need to divide it by the pixel size
+	 * of the image format.
+	 *
+	 * TODO: This is at least true on a5xx. Check other gens.
+	 */
+	enum glsl_sampler_dim dim =
+		glsl_get_sampler_dim(glsl_without_array(var->type));
+	if (dim == GLSL_SAMPLER_DIM_BUF) {
+		/* Since all the possible values the divisor can take are
+		 * power-of-two (4, 8, or 16), the division is implemented
+		 * as a shift-right.
+		 * During shader setup, the log2 of the image format's
+		 * bytes-per-pixel should have been emitted in 2nd slot of
+		 * image_dims. See ir3_shader::emit_image_dims().
+		 */
+		unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+			ctx->so->const_layout.image_dims.off[var->data.driver_location];
+		struct ir3_instruction *aux = create_uniform(b, cb + 1);
+
+		tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
+	}
+
+	for (unsigned i = 0; i < ncoords; i++)
+		dst[i] = tmp[i];
+
+	if (flags & IR3_INSTR_A) {
+		if (ctx->compiler->levels_add_one) {
+			dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0);
+		} else {
+			dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32);
+		}
+	}
+}
+
+/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
+static struct ir3_instruction *
+emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+	struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
+	struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
+	unsigned ncoords = get_image_coords(var, NULL);
+
+	image = create_immed(b, get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
+
+	/* src0 is value (or uvec2(value, compare))
+	 * src1 is coords
+	 * src2 is 64b byte offset
+	 */
+	src0 = ir3_get_src(ctx, &intr->src[3])[0];
+	src1 = ir3_create_collect(ctx, coords, ncoords);
+	src2 = get_image_offset(ctx, var, coords, false);
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_image_deref_atomic_add:
+		atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_min:
+		atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_max:
+		atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_and:
+		atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_or:
+		atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_xor:
+		atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_exchange:
+		atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	case nir_intrinsic_image_deref_atomic_comp_swap:
+		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
+		src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+			ir3_get_src(ctx, &intr->src[4])[0],
+			src0,
+		}, 2);
+		atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+		break;
+	default:
+		unreachable("boo");
+	}
+
+	atomic->cat6.iim_val = 1;
+	atomic->cat6.d = ncoords;
+	atomic->cat6.type = get_image_type(var);
+	atomic->cat6.typed = true;
+	atomic->barrier_class = IR3_BARRIER_IMAGE_W;
+	atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
+
+	/* even if nothing consume the result, we can't DCE the instruction: */
+	array_insert(b, b->keeps, atomic);
+
+	return atomic;
+}
+
+static void
+emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *barrier;
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_barrier:
+		barrier = ir3_BAR(b);
+		barrier->cat7.g = true;
+		barrier->cat7.l = true;
+		barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
+		barrier->barrier_class = IR3_BARRIER_EVERYTHING;
+		break;
+	case nir_intrinsic_memory_barrier:
+		barrier = ir3_FENCE(b);
+		barrier->cat7.g = true;
+		barrier->cat7.r = true;
+		barrier->cat7.w = true;
+		barrier->barrier_class = IR3_BARRIER_IMAGE_W |
+				IR3_BARRIER_BUFFER_W;
+		barrier->barrier_conflict =
+				IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
+				IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+		break;
+	case nir_intrinsic_memory_barrier_atomic_counter:
+	case nir_intrinsic_memory_barrier_buffer:
+		barrier = ir3_FENCE(b);
+		barrier->cat7.g = true;
+		barrier->cat7.r = true;
+		barrier->cat7.w = true;
+		barrier->barrier_class = IR3_BARRIER_BUFFER_W;
+		barrier->barrier_conflict = IR3_BARRIER_BUFFER_R |
+				IR3_BARRIER_BUFFER_W;
+		break;
+	case nir_intrinsic_memory_barrier_image:
+		// TODO double check if this should have .g set
+		barrier = ir3_FENCE(b);
+		barrier->cat7.g = true;
+		barrier->cat7.r = true;
+		barrier->cat7.w = true;
+		barrier->barrier_class = IR3_BARRIER_IMAGE_W;
+		barrier->barrier_conflict = IR3_BARRIER_IMAGE_R |
+				IR3_BARRIER_IMAGE_W;
+		break;
+	case nir_intrinsic_memory_barrier_shared:
+		barrier = ir3_FENCE(b);
+		barrier->cat7.g = true;
+		barrier->cat7.l = true;
+		barrier->cat7.r = true;
+		barrier->cat7.w = true;
+		barrier->barrier_class = IR3_BARRIER_SHARED_W;
+		barrier->barrier_conflict = IR3_BARRIER_SHARED_R |
+				IR3_BARRIER_SHARED_W;
+		break;
+	case nir_intrinsic_group_memory_barrier:
+		barrier = ir3_FENCE(b);
+		barrier->cat7.g = true;
+		barrier->cat7.l = true;
+		barrier->cat7.r = true;
+		barrier->cat7.w = true;
+		barrier->barrier_class = IR3_BARRIER_SHARED_W |
+				IR3_BARRIER_IMAGE_W |
+				IR3_BARRIER_BUFFER_W;
+		barrier->barrier_conflict =
+				IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W |
+				IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
+				IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+		break;
+	default:
+		unreachable("boo");
+	}
+
+	/* make sure barrier doesn't get DCE'd */
+	array_insert(b, b->keeps, barrier);
+}
+
+static void add_sysval_input_compmask(struct ir3_context *ctx,
+		gl_system_value slot, unsigned compmask,
+		struct ir3_instruction *instr)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	unsigned r = regid(so->inputs_count, 0);
+	unsigned n = so->inputs_count++;
+
+	so->inputs[n].sysval = true;
+	so->inputs[n].slot = slot;
+	so->inputs[n].compmask = compmask;
+	so->inputs[n].regid = r;
+	so->inputs[n].interpolate = INTERP_MODE_FLAT;
+	so->total_in++;
+
+	ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
+	ctx->ir->inputs[r] = instr;
+}
+
+static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot,
+		struct ir3_instruction *instr)
+{
+	add_sysval_input_compmask(ctx, slot, 0x1, instr);
+}
+
+static void
+emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
+	struct ir3_instruction **dst;
+	struct ir3_instruction * const *src;
+	struct ir3_block *b = ctx->block;
+	nir_const_value *const_offset;
+	int idx, comp;
+
+	if (info->has_dest) {
+		unsigned n = nir_intrinsic_dest_components(intr);
+		dst = ir3_get_dst(ctx, &intr->dest, n);
+	} else {
+		dst = NULL;
+	}
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_load_uniform:
+		idx = nir_intrinsic_base(intr);
+		const_offset = nir_src_as_const_value(intr->src[0]);
+		if (const_offset) {
+			idx += const_offset->u32[0];
+			for (int i = 0; i < intr->num_components; i++) {
+				unsigned n = idx * 4 + i;
+				dst[i] = create_uniform(b, n);
+			}
+		} else {
+			src = ir3_get_src(ctx, &intr->src[0]);
+			for (int i = 0; i < intr->num_components; i++) {
+				int n = idx * 4 + i;
+				dst[i] = create_uniform_indirect(b, n,
+						ir3_get_addr(ctx, src[0], 4));
+			}
+			/* NOTE: if relative addressing is used, we set
+			 * constlen in the compiler (to worst-case value)
+			 * since we don't know in the assembler what the max
+			 * addr reg value can be:
+			 */
+			ctx->so->constlen = ctx->s->num_uniforms;
+		}
+		break;
+	case nir_intrinsic_load_ubo:
+		emit_intrinsic_load_ubo(ctx, intr, dst);
+		break;
+	case nir_intrinsic_load_input:
+		idx = nir_intrinsic_base(intr);
+		comp = nir_intrinsic_component(intr);
+		const_offset = nir_src_as_const_value(intr->src[0]);
+		if (const_offset) {
+			idx += const_offset->u32[0];
+			for (int i = 0; i < intr->num_components; i++) {
+				unsigned n = idx * 4 + i + comp;
+				dst[i] = ctx->ir->inputs[n];
+			}
+		} else {
+			src = ir3_get_src(ctx, &intr->src[0]);
+			struct ir3_instruction *collect =
+					ir3_create_collect(ctx, ctx->ir->inputs, ctx->ir->ninputs);
+			struct ir3_instruction *addr = ir3_get_addr(ctx, src[0], 4);
+			for (int i = 0; i < intr->num_components; i++) {
+				unsigned n = idx * 4 + i + comp;
+				dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
+						n, addr, collect);
+			}
+		}
+		break;
+	case nir_intrinsic_load_ssbo:
+		emit_intrinsic_load_ssbo(ctx, intr, dst);
+		break;
+	case nir_intrinsic_store_ssbo:
+		emit_intrinsic_store_ssbo(ctx, intr);
+		break;
+	case nir_intrinsic_get_buffer_size:
+		emit_intrinsic_ssbo_size(ctx, intr, dst);
+		break;
+	case nir_intrinsic_ssbo_atomic_add:
+	case nir_intrinsic_ssbo_atomic_imin:
+	case nir_intrinsic_ssbo_atomic_umin:
+	case nir_intrinsic_ssbo_atomic_imax:
+	case nir_intrinsic_ssbo_atomic_umax:
+	case nir_intrinsic_ssbo_atomic_and:
+	case nir_intrinsic_ssbo_atomic_or:
+	case nir_intrinsic_ssbo_atomic_xor:
+	case nir_intrinsic_ssbo_atomic_exchange:
+	case nir_intrinsic_ssbo_atomic_comp_swap:
+		dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr);
+		break;
+	case nir_intrinsic_load_shared:
+		emit_intrinsic_load_shared(ctx, intr, dst);
+		break;
+	case nir_intrinsic_store_shared:
+		emit_intrinsic_store_shared(ctx, intr);
+		break;
+	case nir_intrinsic_shared_atomic_add:
+	case nir_intrinsic_shared_atomic_imin:
+	case nir_intrinsic_shared_atomic_umin:
+	case nir_intrinsic_shared_atomic_imax:
+	case nir_intrinsic_shared_atomic_umax:
+	case nir_intrinsic_shared_atomic_and:
+	case nir_intrinsic_shared_atomic_or:
+	case nir_intrinsic_shared_atomic_xor:
+	case nir_intrinsic_shared_atomic_exchange:
+	case nir_intrinsic_shared_atomic_comp_swap:
+		dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
+		break;
+	case nir_intrinsic_image_deref_load:
+		emit_intrinsic_load_image(ctx, intr, dst);
+		break;
+	case nir_intrinsic_image_deref_store:
+		emit_intrinsic_store_image(ctx, intr);
+		break;
+	case nir_intrinsic_image_deref_size:
+		emit_intrinsic_image_size(ctx, intr, dst);
+		break;
+	case nir_intrinsic_image_deref_atomic_add:
+	case nir_intrinsic_image_deref_atomic_min:
+	case nir_intrinsic_image_deref_atomic_max:
+	case nir_intrinsic_image_deref_atomic_and:
+	case nir_intrinsic_image_deref_atomic_or:
+	case nir_intrinsic_image_deref_atomic_xor:
+	case nir_intrinsic_image_deref_atomic_exchange:
+	case nir_intrinsic_image_deref_atomic_comp_swap:
+		dst[0] = emit_intrinsic_atomic_image(ctx, intr);
+		break;
+	case nir_intrinsic_barrier:
+	case nir_intrinsic_memory_barrier:
+	case nir_intrinsic_group_memory_barrier:
+	case nir_intrinsic_memory_barrier_atomic_counter:
+	case nir_intrinsic_memory_barrier_buffer:
+	case nir_intrinsic_memory_barrier_image:
+	case nir_intrinsic_memory_barrier_shared:
+		emit_intrinsic_barrier(ctx, intr);
+		/* note that blk ptr no longer valid, make that obvious: */
+		b = NULL;
+		break;
+	case nir_intrinsic_store_output:
+		idx = nir_intrinsic_base(intr);
+		comp = nir_intrinsic_component(intr);
+		const_offset = nir_src_as_const_value(intr->src[1]);
+		compile_assert(ctx, const_offset != NULL);
+		idx += const_offset->u32[0];
+
+		src = ir3_get_src(ctx, &intr->src[0]);
+		for (int i = 0; i < intr->num_components; i++) {
+			unsigned n = idx * 4 + i + comp;
+			ctx->ir->outputs[n] = src[i];
+		}
+		break;
+	case nir_intrinsic_load_base_vertex:
+	case nir_intrinsic_load_first_vertex:
+		if (!ctx->basevertex) {
+			ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
+			add_sysval_input(ctx, SYSTEM_VALUE_FIRST_VERTEX, ctx->basevertex);
+		}
+		dst[0] = ctx->basevertex;
+		break;
+	case nir_intrinsic_load_vertex_id_zero_base:
+	case nir_intrinsic_load_vertex_id:
+		if (!ctx->vertex_id) {
+			gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ?
+				SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+			ctx->vertex_id = create_input(ctx, 0);
+			add_sysval_input(ctx, sv, ctx->vertex_id);
+		}
+		dst[0] = ctx->vertex_id;
+		break;
+	case nir_intrinsic_load_instance_id:
+		if (!ctx->instance_id) {
+			ctx->instance_id = create_input(ctx, 0);
+			add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
+					ctx->instance_id);
+		}
+		dst[0] = ctx->instance_id;
+		break;
+	case nir_intrinsic_load_sample_id:
+	case nir_intrinsic_load_sample_id_no_per_sample:
+		if (!ctx->samp_id) {
+			ctx->samp_id = create_input(ctx, 0);
+			ctx->samp_id->regs[0]->flags |= IR3_REG_HALF;
+			add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_ID,
+					ctx->samp_id);
+		}
+		dst[0] = ir3_COV(b, ctx->samp_id, TYPE_U16, TYPE_U32);
+		break;
+	case nir_intrinsic_load_sample_mask_in:
+		if (!ctx->samp_mask_in) {
+			ctx->samp_mask_in = create_input(ctx, 0);
+			add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN,
+					ctx->samp_mask_in);
+		}
+		dst[0] = ctx->samp_mask_in;
+		break;
+	case nir_intrinsic_load_user_clip_plane:
+		idx = nir_intrinsic_ucp_id(intr);
+		for (int i = 0; i < intr->num_components; i++) {
+			unsigned n = idx * 4 + i;
+			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
+		}
+		break;
+	case nir_intrinsic_load_front_face:
+		if (!ctx->frag_face) {
+			ctx->so->frag_face = true;
+			ctx->frag_face = create_input(ctx, 0);
+			add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face);
+			ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
+		}
+		/* for fragface, we get -1 for back and 0 for front. However this is
+		 * the inverse of what nir expects (where ~0 is true).
+		 */
+		dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32);
+		dst[0] = ir3_NOT_B(b, dst[0], 0);
+		break;
+	case nir_intrinsic_load_local_invocation_id:
+		if (!ctx->local_invocation_id) {
+			ctx->local_invocation_id = create_input_compmask(ctx, 0, 0x7);
+			add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID,
+					0x7, ctx->local_invocation_id);
+		}
+		ir3_split_dest(b, dst, ctx->local_invocation_id, 0, 3);
+		break;
+	case nir_intrinsic_load_work_group_id:
+		if (!ctx->work_group_id) {
+			ctx->work_group_id = create_input_compmask(ctx, 0, 0x7);
+			add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID,
+					0x7, ctx->work_group_id);
+			ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH;
+		}
+		ir3_split_dest(b, dst, ctx->work_group_id, 0, 3);
+		break;
+	case nir_intrinsic_load_num_work_groups:
+		for (int i = 0; i < intr->num_components; i++) {
+			dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
+		}
+		break;
+	case nir_intrinsic_load_local_group_size:
+		for (int i = 0; i < intr->num_components; i++) {
+			dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
+		}
+		break;
+	case nir_intrinsic_discard_if:
+	case nir_intrinsic_discard: {
+		struct ir3_instruction *cond, *kill;
+
+		if (intr->intrinsic == nir_intrinsic_discard_if) {
+			/* conditional discard: */
+			src = ir3_get_src(ctx, &intr->src[0]);
+			cond = ir3_b2n(b, src[0]);
+		} else {
+			/* unconditional discard: */
+			cond = create_immed(b, 1);
+		}
+
+		/* NOTE: only cmps.*.* can write p0.x: */
+		cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
+		cond->cat2.condition = IR3_COND_NE;
+
+		/* condition always goes in predicate register: */
+		cond->regs[0]->num = regid(REG_P0, 0);
+
+		kill = ir3_KILL(b, cond, 0);
+		array_insert(ctx->ir, ctx->ir->predicates, kill);
+
+		array_insert(b, b->keeps, kill);
+		ctx->so->has_kill = true;
+
+		break;
+	}
+	default:
+		ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
+				nir_intrinsic_infos[intr->intrinsic].name);
+		break;
+	}
+
+	if (info->has_dest)
+		put_dst(ctx, &intr->dest);
+}
+
+static void
+emit_load_const(struct ir3_context *ctx, nir_load_const_instr *instr)
+{
+	struct ir3_instruction **dst = ir3_get_dst_ssa(ctx, &instr->def,
+			instr->def.num_components);
+	type_t type = (instr->def.bit_size < 32) ? TYPE_U16 : TYPE_U32;
+
+	for (int i = 0; i < instr->def.num_components; i++)
+		dst[i] = create_immed_typed(ctx->block, instr->value.u32[i], type);
+}
+
+static void
+emit_undef(struct ir3_context *ctx, nir_ssa_undef_instr *undef)
+{
+	struct ir3_instruction **dst = ir3_get_dst_ssa(ctx, &undef->def,
+			undef->def.num_components);
+	type_t type = (undef->def.bit_size < 32) ? TYPE_U16 : TYPE_U32;
+
+	/* backend doesn't want undefined instructions, so just plug
+	 * in 0.0..
+	 */
+	for (int i = 0; i < undef->def.num_components; i++)
+		dst[i] = create_immed_typed(ctx->block, fui(0.0), type);
+}
+
+/*
+ * texture fetch/sample instructions:
+ */
+
+static void
+tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
+{
+	unsigned coords, flags = 0;
+
+	/* note: would use tex->coord_components.. except txs.. also,
+	 * since array index goes after shadow ref, we don't want to
+	 * count it:
+	 */
+	switch (tex->sampler_dim) {
+	case GLSL_SAMPLER_DIM_1D:
+	case GLSL_SAMPLER_DIM_BUF:
+		coords = 1;
+		break;
+	case GLSL_SAMPLER_DIM_2D:
+	case GLSL_SAMPLER_DIM_RECT:
+	case GLSL_SAMPLER_DIM_EXTERNAL:
+	case GLSL_SAMPLER_DIM_MS:
+		coords = 2;
+		break;
+	case GLSL_SAMPLER_DIM_3D:
+	case GLSL_SAMPLER_DIM_CUBE:
+		coords = 3;
+		flags |= IR3_INSTR_3D;
+		break;
+	default:
+		unreachable("bad sampler_dim");
+	}
+
+	if (tex->is_shadow && tex->op != nir_texop_lod)
+		flags |= IR3_INSTR_S;
+
+	if (tex->is_array && tex->op != nir_texop_lod)
+		flags |= IR3_INSTR_A;
+
+	*flagsp = flags;
+	*coordsp = coords;
+}
+
+static void
+emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
+	struct ir3_instruction * const *coord, * const *off, * const *ddx, * const *ddy;
+	struct ir3_instruction *lod, *compare, *proj, *sample_index;
+	bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
+	unsigned i, coords, flags;
+	unsigned nsrc0 = 0, nsrc1 = 0;
+	type_t type;
+	opc_t opc = 0;
+
+	coord = off = ddx = ddy = NULL;
+	lod = proj = compare = sample_index = NULL;
+
+	/* TODO: might just be one component for gathers? */
+	dst = ir3_get_dst(ctx, &tex->dest, 4);
+
+	for (unsigned i = 0; i < tex->num_srcs; i++) {
+		switch (tex->src[i].src_type) {
+		case nir_tex_src_coord:
+			coord = ir3_get_src(ctx, &tex->src[i].src);
+			break;
+		case nir_tex_src_bias:
+			lod = ir3_get_src(ctx, &tex->src[i].src)[0];
+			has_bias = true;
+			break;
+		case nir_tex_src_lod:
+			lod = ir3_get_src(ctx, &tex->src[i].src)[0];
+			has_lod = true;
+			break;
+		case nir_tex_src_comparator: /* shadow comparator */
+			compare = ir3_get_src(ctx, &tex->src[i].src)[0];
+			break;
+		case nir_tex_src_projector:
+			proj = ir3_get_src(ctx, &tex->src[i].src)[0];
+			has_proj = true;
+			break;
+		case nir_tex_src_offset:
+			off = ir3_get_src(ctx, &tex->src[i].src);
+			has_off = true;
+			break;
+		case nir_tex_src_ddx:
+			ddx = ir3_get_src(ctx, &tex->src[i].src);
+			break;
+		case nir_tex_src_ddy:
+			ddy = ir3_get_src(ctx, &tex->src[i].src);
+			break;
+		case nir_tex_src_ms_index:
+			sample_index = ir3_get_src(ctx, &tex->src[i].src)[0];
+			break;
+		default:
+			ir3_context_error(ctx, "Unhandled NIR tex src type: %d\n",
+					tex->src[i].src_type);
+			return;
+		}
+	}
+
+	switch (tex->op) {
+	case nir_texop_tex:      opc = has_lod ? OPC_SAML : OPC_SAM; break;
+	case nir_texop_txb:      opc = OPC_SAMB;     break;
+	case nir_texop_txl:      opc = OPC_SAML;     break;
+	case nir_texop_txd:      opc = OPC_SAMGQ;    break;
+	case nir_texop_txf:      opc = OPC_ISAML;    break;
+	case nir_texop_lod:      opc = OPC_GETLOD;   break;
+	case nir_texop_tg4:
+		/* NOTE: a4xx might need to emulate gather w/ txf (this is
+		 * what blob does, seems gather  is broken?), and a3xx did
+		 * not support it (but probably could also emulate).
+		 */
+		switch (tex->component) {
+		case 0:              opc = OPC_GATHER4R; break;
+		case 1:              opc = OPC_GATHER4G; break;
+		case 2:              opc = OPC_GATHER4B; break;
+		case 3:              opc = OPC_GATHER4A; break;
+		}
+		break;
+	case nir_texop_txf_ms:   opc = OPC_ISAMM;    break;
+	case nir_texop_txs:
+	case nir_texop_query_levels:
+	case nir_texop_texture_samples:
+	case nir_texop_samples_identical:
+	case nir_texop_txf_ms_mcs:
+		ir3_context_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
+		return;
+	}
+
+	tex_info(tex, &flags, &coords);
+
+	/*
+	 * lay out the first argument in the proper order:
+	 *  - actual coordinates first
+	 *  - shadow reference
+	 *  - array index
+	 *  - projection w
+	 *  - starting at offset 4, dpdx.xy, dpdy.xy
+	 *
+	 * bias/lod go into the second arg
+	 */
+
+	/* insert tex coords: */
+	for (i = 0; i < coords; i++)
+		src0[i] = coord[i];
+
+	nsrc0 = i;
+
+	/* NOTE a3xx (and possibly a4xx?) might be different, using isaml
+	 * with scaled x coord according to requested sample:
+	 */
+	if (tex->op == nir_texop_txf_ms) {
+		if (ctx->compiler->txf_ms_with_isaml) {
+			/* the samples are laid out in x dimension as
+			 *     0 1 2 3
+			 * x_ms = (x << ms) + sample_index;
+			 */
+			struct ir3_instruction *ms;
+			ms = create_immed(b, (ctx->samples >> (2 * tex->texture_index)) & 3);
+
+			src0[0] = ir3_SHL_B(b, src0[0], 0, ms, 0);
+			src0[0] = ir3_ADD_U(b, src0[0], 0, sample_index, 0);
+
+			opc = OPC_ISAML;
+		} else {
+			src0[nsrc0++] = sample_index;
+		}
+	}
+
+	/* scale up integer coords for TXF based on the LOD */
+	if (ctx->compiler->unminify_coords && (opc == OPC_ISAML)) {
+		assert(has_lod);
+		for (i = 0; i < coords; i++)
+			src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
+	}
+
+	if (coords == 1) {
+		/* hw doesn't do 1d, so we treat it as 2d with
+		 * height of 1, and patch up the y coord.
+		 * TODO: y coord should be (int)0 in some cases..
+		 */
+		src0[nsrc0++] = create_immed(b, fui(0.5));
+	}
+
+	if (tex->is_shadow && tex->op != nir_texop_lod)
+		src0[nsrc0++] = compare;
+
+	if (tex->is_array && tex->op != nir_texop_lod) {
+		struct ir3_instruction *idx = coord[coords];
+
+		/* the array coord for cube arrays needs 0.5 added to it */
+		if (ctx->compiler->array_index_add_half && (opc != OPC_ISAML))
+			idx = ir3_ADD_F(b, idx, 0, create_immed(b, fui(0.5)), 0);
+
+		src0[nsrc0++] = idx;
+	}
+
+	if (has_proj) {
+		src0[nsrc0++] = proj;
+		flags |= IR3_INSTR_P;
+	}
+
+	/* pad to 4, then ddx/ddy: */
+	if (tex->op == nir_texop_txd) {
+		while (nsrc0 < 4)
+			src0[nsrc0++] = create_immed(b, fui(0.0));
+		for (i = 0; i < coords; i++)
+			src0[nsrc0++] = ddx[i];
+		if (coords < 2)
+			src0[nsrc0++] = create_immed(b, fui(0.0));
+		for (i = 0; i < coords; i++)
+			src0[nsrc0++] = ddy[i];
+		if (coords < 2)
+			src0[nsrc0++] = create_immed(b, fui(0.0));
+	}
+
+	/*
+	 * second argument (if applicable):
+	 *  - offsets
+	 *  - lod
+	 *  - bias
+	 */
+	if (has_off | has_lod | has_bias) {
+		if (has_off) {
+			unsigned off_coords = coords;
+			if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+				off_coords--;
+			for (i = 0; i < off_coords; i++)
+				src1[nsrc1++] = off[i];
+			if (off_coords < 2)
+				src1[nsrc1++] = create_immed(b, fui(0.0));
+			flags |= IR3_INSTR_O;
+		}
+
+		if (has_lod | has_bias)
+			src1[nsrc1++] = lod;
+	}
+
+	switch (tex->dest_type) {
+	case nir_type_invalid:
+	case nir_type_float:
+		type = TYPE_F32;
+		break;
+	case nir_type_int:
+		type = TYPE_S32;
+		break;
+	case nir_type_uint:
+	case nir_type_bool:
+		type = TYPE_U32;
+		break;
+	default:
+		unreachable("bad dest_type");
+	}
+
+	if (opc == OPC_GETLOD)
+		type = TYPE_U32;
+
+	unsigned tex_idx = tex->texture_index;
+
+	ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
+
+	struct ir3_instruction *col0 = ir3_create_collect(ctx, src0, nsrc0);
+	struct ir3_instruction *col1 = ir3_create_collect(ctx, src1, nsrc1);
+
+	sam = ir3_SAM(b, opc, type, 0b1111, flags,
+			tex_idx, tex_idx, col0, col1);
+
+	if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) {
+		/* only need first 3 components: */
+		sam->regs[0]->wrmask = 0x7;
+		ir3_split_dest(b, dst, sam, 0, 3);
+
+		/* we need to sample the alpha separately with a non-ASTC
+		 * texture state:
+		 */
+		sam = ir3_SAM(b, opc, type, 0b1000, flags,
+				tex_idx, tex_idx, col0, col1);
+
+		array_insert(ctx->ir, ctx->ir->astc_srgb, sam);
+
+		/* fixup .w component: */
+		ir3_split_dest(b, &dst[3], sam, 3, 1);
+	} else {
+		/* normal (non-workaround) case: */
+		ir3_split_dest(b, dst, sam, 0, 4);
+	}
+
+	/* GETLOD returns results in 4.8 fixed point */
+	if (opc == OPC_GETLOD) {
+		struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
+
+		compile_assert(ctx, tex->dest_type == nir_type_float);
+		for (i = 0; i < 2; i++) {
+			dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
+							   factor, 0);
+		}
+	}
+
+	put_dst(ctx, &tex->dest);
+}
+
+static void
+emit_tex_query_levels(struct ir3_context *ctx, nir_tex_instr *tex)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction **dst, *sam;
+
+	dst = ir3_get_dst(ctx, &tex->dest, 1);
+
+	sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, 0b0100, 0,
+			tex->texture_index, tex->texture_index, NULL, NULL);
+
+	/* even though there is only one component, since it ends
+	 * up in .z rather than .x, we need a split_dest()
+	 */
+	ir3_split_dest(b, dst, sam, 0, 3);
+
+	/* The # of levels comes from getinfo.z. We need to add 1 to it, since
+	 * the value in TEX_CONST_0 is zero-based.
+	 */
+	if (ctx->compiler->levels_add_one)
+		dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0);
+
+	put_dst(ctx, &tex->dest);
+}
+
+static void
+emit_tex_txs(struct ir3_context *ctx, nir_tex_instr *tex)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction **dst, *sam;
+	struct ir3_instruction *lod;
+	unsigned flags, coords;
+
+	tex_info(tex, &flags, &coords);
+
+	/* Actually we want the number of dimensions, not coordinates. This
+	 * distinction only matters for cubes.
+	 */
+	if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+		coords = 2;
+
+	dst = ir3_get_dst(ctx, &tex->dest, 4);
+
+	compile_assert(ctx, tex->num_srcs == 1);
+	compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod);
+
+	lod = ir3_get_src(ctx, &tex->src[0].src)[0];
+
+	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, 0b1111, flags,
+			tex->texture_index, tex->texture_index, lod, NULL);
+
+	ir3_split_dest(b, dst, sam, 0, 4);
+
+	/* Array size actually ends up in .w rather than .z. This doesn't
+	 * matter for miplevel 0, but for higher mips the value in z is
+	 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+	 * returned, which means that we have to add 1 to it for arrays.
+	 */
+	if (tex->is_array) {
+		if (ctx->compiler->levels_add_one) {
+			dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0);
+		} else {
+			dst[coords] = ir3_MOV(b, dst[3], TYPE_U32);
+		}
+	}
+
+	put_dst(ctx, &tex->dest);
+}
+
+static void
+emit_jump(struct ir3_context *ctx, nir_jump_instr *jump)
+{
+	switch (jump->type) {
+	case nir_jump_break:
+	case nir_jump_continue:
+	case nir_jump_return:
+		/* I *think* we can simply just ignore this, and use the
+		 * successor block link to figure out where we need to
+		 * jump to for break/continue
+		 */
+		break;
+	default:
+		ir3_context_error(ctx, "Unhandled NIR jump type: %d\n", jump->type);
+		break;
+	}
+}
+
+static void
+emit_instr(struct ir3_context *ctx, nir_instr *instr)
+{
+	switch (instr->type) {
+	case nir_instr_type_alu:
+		emit_alu(ctx, nir_instr_as_alu(instr));
+		break;
+	case nir_instr_type_deref:
+		/* ignored, handled as part of the intrinsic they are src to */
+		break;
+	case nir_instr_type_intrinsic:
+		emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+		break;
+	case nir_instr_type_load_const:
+		emit_load_const(ctx, nir_instr_as_load_const(instr));
+		break;
+	case nir_instr_type_ssa_undef:
+		emit_undef(ctx, nir_instr_as_ssa_undef(instr));
+		break;
+	case nir_instr_type_tex: {
+		nir_tex_instr *tex = nir_instr_as_tex(instr);
+		/* couple tex instructions get special-cased:
+		 */
+		switch (tex->op) {
+		case nir_texop_txs:
+			emit_tex_txs(ctx, tex);
+			break;
+		case nir_texop_query_levels:
+			emit_tex_query_levels(ctx, tex);
+			break;
+		default:
+			emit_tex(ctx, tex);
+			break;
+		}
+		break;
+	}
+	case nir_instr_type_jump:
+		emit_jump(ctx, nir_instr_as_jump(instr));
+		break;
+	case nir_instr_type_phi:
+		/* we have converted phi webs to regs in NIR by now */
+		ir3_context_error(ctx, "Unexpected NIR instruction type: %d\n", instr->type);
+		break;
+	case nir_instr_type_call:
+	case nir_instr_type_parallel_copy:
+		ir3_context_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
+		break;
+	}
+}
+
+static struct ir3_block *
+get_block(struct ir3_context *ctx, const nir_block *nblock)
+{
+	struct ir3_block *block;
+	struct hash_entry *hentry;
+	unsigned i;
+
+	hentry = _mesa_hash_table_search(ctx->block_ht, nblock);
+	if (hentry)
+		return hentry->data;
+
+	block = ir3_block_create(ctx->ir);
+	block->nblock = nblock;
+	_mesa_hash_table_insert(ctx->block_ht, nblock, block);
+
+	block->predecessors_count = nblock->predecessors->entries;
+	block->predecessors = ralloc_array_size(block,
+		sizeof(block->predecessors[0]), block->predecessors_count);
+	i = 0;
+	set_foreach(nblock->predecessors, sentry) {
+		block->predecessors[i++] = get_block(ctx, sentry->key);
+	}
+
+	return block;
+}
+
+static void
+emit_block(struct ir3_context *ctx, nir_block *nblock)
+{
+	struct ir3_block *block = get_block(ctx, nblock);
+
+	for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
+		if (nblock->successors[i]) {
+			block->successors[i] =
+				get_block(ctx, nblock->successors[i]);
+		}
+	}
+
+	ctx->block = block;
+	list_addtail(&block->node, &ctx->ir->block_list);
+
+	/* re-emit addr register in each block if needed: */
+	for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) {
+		_mesa_hash_table_destroy(ctx->addr_ht[i], NULL);
+		ctx->addr_ht[i] = NULL;
+	}
+
+	nir_foreach_instr(instr, nblock) {
+		ctx->cur_instr = instr;
+		emit_instr(ctx, instr);
+		ctx->cur_instr = NULL;
+		if (ctx->error)
+			return;
+	}
+}
+
+static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list);
+
+static void
+emit_if(struct ir3_context *ctx, nir_if *nif)
+{
+	struct ir3_instruction *condition = ir3_get_src(ctx, &nif->condition)[0];
+
+	ctx->block->condition =
+		ir3_get_predicate(ctx, ir3_b2n(condition->block, condition));
+
+	emit_cf_list(ctx, &nif->then_list);
+	emit_cf_list(ctx, &nif->else_list);
+}
+
+static void
+emit_loop(struct ir3_context *ctx, nir_loop *nloop)
+{
+	emit_cf_list(ctx, &nloop->body);
+}
+
+static void
+stack_push(struct ir3_context *ctx)
+{
+	ctx->stack++;
+	ctx->max_stack = MAX2(ctx->max_stack, ctx->stack);
+}
+
+static void
+stack_pop(struct ir3_context *ctx)
+{
+	compile_assert(ctx, ctx->stack > 0);
+	ctx->stack--;
+}
+
+static void
+emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
+{
+	foreach_list_typed(nir_cf_node, node, node, list) {
+		switch (node->type) {
+		case nir_cf_node_block:
+			emit_block(ctx, nir_cf_node_as_block(node));
+			break;
+		case nir_cf_node_if:
+			stack_push(ctx);
+			emit_if(ctx, nir_cf_node_as_if(node));
+			stack_pop(ctx);
+			break;
+		case nir_cf_node_loop:
+			stack_push(ctx);
+			emit_loop(ctx, nir_cf_node_as_loop(node));
+			stack_pop(ctx);
+			break;
+		case nir_cf_node_function:
+			ir3_context_error(ctx, "TODO\n");
+			break;
+		}
+	}
+}
+
+/* emit stream-out code.  At this point, the current block is the original
+ * (nir) end block, and nir ensures that all flow control paths terminate
+ * into the end block.  We re-purpose the original end block to generate
+ * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
+ * block holding stream-out write instructions, followed by the new end
+ * block:
+ *
+ *   blockOrigEnd {
+ *      p0.x = (vtxcnt < maxvtxcnt)
+ *      // succs: blockStreamOut, blockNewEnd
+ *   }
+ *   blockStreamOut {
+ *      ... stream-out instructions ...
+ *      // succs: blockNewEnd
+ *   }
+ *   blockNewEnd {
+ *   }
+ */
+static void
+emit_stream_out(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *v = ctx->so;
+	struct ir3 *ir = ctx->ir;
+	struct ir3_stream_output_info *strmout =
+			&ctx->so->shader->stream_output;
+	struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
+	struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
+	struct ir3_instruction *bases[IR3_MAX_SO_BUFFERS];
+
+	/* create vtxcnt input in input block at top of shader,
+	 * so that it is seen as live over the entire duration
+	 * of the shader:
+	 */
+	vtxcnt = create_input(ctx, 0);
+	add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
+
+	maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
+
+	/* at this point, we are at the original 'end' block,
+	 * re-purpose this block to stream-out condition, then
+	 * append stream-out block and new-end block
+	 */
+	orig_end_block = ctx->block;
+
+// TODO these blocks need to update predecessors..
+// maybe w/ store_global intrinsic, we could do this
+// stuff in nir->nir pass
+
+	stream_out_block = ir3_block_create(ir);
+	list_addtail(&stream_out_block->node, &ir->block_list);
+
+	new_end_block = ir3_block_create(ir);
+	list_addtail(&new_end_block->node, &ir->block_list);
+
+	orig_end_block->successors[0] = stream_out_block;
+	orig_end_block->successors[1] = new_end_block;
+	stream_out_block->successors[0] = new_end_block;
+
+	/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
+	cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
+	cond->regs[0]->num = regid(REG_P0, 0);
+	cond->cat2.condition = IR3_COND_LT;
+
+	/* condition goes on previous block to the conditional,
+	 * since it is used to pick which of the two successor
+	 * paths to take:
+	 */
+	orig_end_block->condition = cond;
+
+	/* switch to stream_out_block to generate the stream-out
+	 * instructions:
+	 */
+	ctx->block = stream_out_block;
+
+	/* Calculate base addresses based on vtxcnt.  Instructions
+	 * generated for bases not used in following loop will be
+	 * stripped out in the backend.
+	 */
+	for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+		unsigned stride = strmout->stride[i];
+		struct ir3_instruction *base, *off;
+
+		base = create_uniform(ctx->block, regid(v->constbase.tfbo, i));
+
+		/* 24-bit should be enough: */
+		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
+				create_immed(ctx->block, stride * 4), 0);
+
+		bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
+	}
+
+	/* Generate the per-output store instructions: */
+	for (unsigned i = 0; i < strmout->num_outputs; i++) {
+		for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
+			unsigned c = j + strmout->output[i].start_component;
+			struct ir3_instruction *base, *out, *stg;
+
+			base = bases[strmout->output[i].output_buffer];
+			out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
+
+			stg = ir3_STG(ctx->block, base, 0, out, 0,
+					create_immed(ctx->block, 1), 0);
+			stg->cat6.type = TYPE_U32;
+			stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
+
+			array_insert(ctx->block, ctx->block->keeps, stg);
+		}
+	}
+
+	/* and finally switch to the new_end_block: */
+	ctx->block = new_end_block;
+}
+
+static void
+emit_function(struct ir3_context *ctx, nir_function_impl *impl)
+{
+	nir_metadata_require(impl, nir_metadata_block_index);
+
+	compile_assert(ctx, ctx->stack == 0);
+
+	emit_cf_list(ctx, &impl->body);
+	emit_block(ctx, impl->end_block);
+
+	compile_assert(ctx, ctx->stack == 0);
+
+	/* at this point, we should have a single empty block,
+	 * into which we emit the 'end' instruction.
+	 */
+	compile_assert(ctx, list_empty(&ctx->block->instr_list));
+
+	/* If stream-out (aka transform-feedback) enabled, emit the
+	 * stream-out instructions, followed by a new empty block (into
+	 * which the 'end' instruction lands).
+	 *
+	 * NOTE: it is done in this order, rather than inserting before
+	 * we emit end_block, because NIR guarantees that all blocks
+	 * flow into end_block, and that end_block has no successors.
+	 * So by re-purposing end_block as the first block of stream-
+	 * out, we guarantee that all exit paths flow into the stream-
+	 * out instructions.
+	 */
+	if ((ctx->compiler->gpu_id < 500) &&
+			(ctx->so->shader->stream_output.num_outputs > 0) &&
+			!ctx->so->binning_pass) {
+		debug_assert(ctx->so->type == MESA_SHADER_VERTEX);
+		emit_stream_out(ctx);
+	}
+
+	ir3_END(ctx->block);
+}
+
+static struct ir3_instruction *
+create_frag_coord(struct ir3_context *ctx, unsigned comp)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *instr;
+
+	if (!ctx->frag_coord) {
+		ctx->frag_coord = create_input_compmask(ctx, 0, 0xf);
+		/* defer add_sysval_input() until after all inputs created */
+	}
+
+	ir3_split_dest(block, &instr, ctx->frag_coord, comp, 1);
+
+	switch (comp) {
+	case 0: /* .x */
+	case 1: /* .y */
+		/* for frag_coord, we get unsigned values.. we need
+		 * to subtract (integer) 8 and divide by 16 (right-
+		 * shift by 4) then convert to float:
+		 *
+		 *    sub.s tmp, src, 8
+		 *    shr.b tmp, tmp, 4
+		 *    mov.u32f32 dst, tmp
+		 *
+		 */
+		instr = ir3_SUB_S(block, instr, 0,
+				create_immed(block, 8), 0);
+		instr = ir3_SHR_B(block, instr, 0,
+				create_immed(block, 4), 0);
+		instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
+
+		return instr;
+	case 2: /* .z */
+	case 3: /* .w */
+	default:
+		/* seems that we can use these as-is: */
+		return instr;
+	}
+}
+
+static void
+setup_input(struct ir3_context *ctx, nir_variable *in)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	unsigned ncomp = glsl_get_components(in->type);
+	unsigned n = in->data.driver_location;
+	unsigned frac = in->data.location_frac;
+	unsigned slot = in->data.location;
+
+	/* skip unread inputs, we could end up with (for example), unsplit
+	 * matrix/etc inputs in the case they are not read, so just silently
+	 * skip these.
+	 */
+	if (ncomp > 4)
+		return;
+
+	so->inputs[n].slot = slot;
+	so->inputs[n].compmask = (1 << (ncomp + frac)) - 1;
+	so->inputs_count = MAX2(so->inputs_count, n + 1);
+	so->inputs[n].interpolate = in->data.interpolation;
+
+	if (ctx->so->type == MESA_SHADER_FRAGMENT) {
+		for (int i = 0; i < ncomp; i++) {
+			struct ir3_instruction *instr = NULL;
+			unsigned idx = (n * 4) + i + frac;
+
+			if (slot == VARYING_SLOT_POS) {
+				so->inputs[n].bary = false;
+				so->frag_coord = true;
+				instr = create_frag_coord(ctx, i);
+			} else if (slot == VARYING_SLOT_PNTC) {
+				/* see for example st_nir_fixup_varying_slots().. this is
+				 * maybe a bit mesa/st specific.  But we need things to line
+				 * up for this in fdN_program:
+				 *    unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+				 *    if (emit->sprite_coord_enable & texmask) {
+				 *       ...
+				 *    }
+				 */
+				so->inputs[n].slot = VARYING_SLOT_VAR8;
+				so->inputs[n].bary = true;
+				instr = create_frag_input(ctx, false);
+			} else {
+				bool use_ldlv = false;
+
+				/* detect the special case for front/back colors where
+				 * we need to do flat vs smooth shading depending on
+				 * rast state:
+				 */
+				if (in->data.interpolation == INTERP_MODE_NONE) {
+					switch (slot) {
+					case VARYING_SLOT_COL0:
+					case VARYING_SLOT_COL1:
+					case VARYING_SLOT_BFC0:
+					case VARYING_SLOT_BFC1:
+						so->inputs[n].rasterflat = true;
+						break;
+					default:
+						break;
+					}
+				}
+
+				if (ctx->compiler->flat_bypass) {
+					if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
+							(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
+						use_ldlv = true;
+				}
+
+				so->inputs[n].bary = true;
+
+				instr = create_frag_input(ctx, use_ldlv);
+			}
+
+			compile_assert(ctx, idx < ctx->ir->ninputs);
+
+			ctx->ir->inputs[idx] = instr;
+		}
+	} else if (ctx->so->type == MESA_SHADER_VERTEX) {
+		for (int i = 0; i < ncomp; i++) {
+			unsigned idx = (n * 4) + i + frac;
+			compile_assert(ctx, idx < ctx->ir->ninputs);
+			ctx->ir->inputs[idx] = create_input(ctx, idx);
+		}
+	} else {
+		ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type);
+	}
+
+	if (so->inputs[n].bary || (ctx->so->type == MESA_SHADER_VERTEX)) {
+		so->total_in += ncomp;
+	}
+}
+
+static void
+setup_output(struct ir3_context *ctx, nir_variable *out)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	unsigned ncomp = glsl_get_components(out->type);
+	unsigned n = out->data.driver_location;
+	unsigned frac = out->data.location_frac;
+	unsigned slot = out->data.location;
+	unsigned comp = 0;
+
+	if (ctx->so->type == MESA_SHADER_FRAGMENT) {
+		switch (slot) {
+		case FRAG_RESULT_DEPTH:
+			comp = 2;  /* tgsi will write to .z component */
+			so->writes_pos = true;
+			break;
+		case FRAG_RESULT_COLOR:
+			so->color0_mrt = 1;
+			break;
+		default:
+			if (slot >= FRAG_RESULT_DATA0)
+				break;
+			ir3_context_error(ctx, "unknown FS output name: %s\n",
+					gl_frag_result_name(slot));
+		}
+	} else if (ctx->so->type == MESA_SHADER_VERTEX) {
+		switch (slot) {
+		case VARYING_SLOT_POS:
+			so->writes_pos = true;
+			break;
+		case VARYING_SLOT_PSIZ:
+			so->writes_psize = true;
+			break;
+		case VARYING_SLOT_COL0:
+		case VARYING_SLOT_COL1:
+		case VARYING_SLOT_BFC0:
+		case VARYING_SLOT_BFC1:
+		case VARYING_SLOT_FOGC:
+		case VARYING_SLOT_CLIP_DIST0:
+		case VARYING_SLOT_CLIP_DIST1:
+		case VARYING_SLOT_CLIP_VERTEX:
+			break;
+		default:
+			if (slot >= VARYING_SLOT_VAR0)
+				break;
+			if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
+				break;
+			ir3_context_error(ctx, "unknown VS output name: %s\n",
+					gl_varying_slot_name(slot));
+		}
+	} else {
+		ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type);
+	}
+
+	compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
+
+	so->outputs[n].slot = slot;
+	so->outputs[n].regid = regid(n, comp);
+	so->outputs_count = MAX2(so->outputs_count, n + 1);
+
+	for (int i = 0; i < ncomp; i++) {
+		unsigned idx = (n * 4) + i + frac;
+		compile_assert(ctx, idx < ctx->ir->noutputs);
+		ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
+	}
+
+	/* if varying packing doesn't happen, we could end up in a situation
+	 * with "holes" in the output, and since the per-generation code that
+	 * sets up varying linkage registers doesn't expect to have more than
+	 * one varying per vec4 slot, pad the holes.
+	 *
+	 * Note that this should probably generate a performance warning of
+	 * some sort.
+	 */
+	for (int i = 0; i < frac; i++) {
+		unsigned idx = (n * 4) + i;
+		if (!ctx->ir->outputs[idx]) {
+			ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
+		}
+	}
+}
+
+static int
+max_drvloc(struct exec_list *vars)
+{
+	int drvloc = -1;
+	nir_foreach_variable(var, vars) {
+		drvloc = MAX2(drvloc, (int)var->data.driver_location);
+	}
+	return drvloc;
+}
+
+static const unsigned max_sysvals[] = {
+	[MESA_SHADER_FRAGMENT] = 24,  // TODO
+	[MESA_SHADER_VERTEX]  = 16,
+	[MESA_SHADER_COMPUTE] = 16, // TODO how many do we actually need?
+	[MESA_SHADER_KERNEL]  = 16, // TODO how many do we actually need?
+};
+
+static void
+emit_instructions(struct ir3_context *ctx)
+{
+	unsigned ninputs, noutputs;
+	nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
+
+	ninputs  = (max_drvloc(&ctx->s->inputs) + 1) * 4;
+	noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4;
+
+	/* we need to leave room for sysvals:
+	 */
+	ninputs += max_sysvals[ctx->so->type];
+
+	ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
+
+	/* Create inputs in first block: */
+	ctx->block = get_block(ctx, nir_start_block(fxn));
+	ctx->in_block = ctx->block;
+	list_addtail(&ctx->block->node, &ctx->ir->block_list);
+
+	ninputs -= max_sysvals[ctx->so->type];
+
+	/* for fragment shader, the vcoord input register is used as the
+	 * base for bary.f varying fetch instrs:
+	 */
+	struct ir3_instruction *vcoord = NULL;
+	if (ctx->so->type == MESA_SHADER_FRAGMENT) {
+		struct ir3_instruction *xy[2];
+
+		vcoord = create_input_compmask(ctx, 0, 0x3);
+		ir3_split_dest(ctx->block, xy, vcoord, 0, 2);
+
+		ctx->frag_vcoord = ir3_create_collect(ctx, xy, 2);
+	}
+
+	/* Setup inputs: */
+	nir_foreach_variable(var, &ctx->s->inputs) {
+		setup_input(ctx, var);
+	}
+
+	/* Defer add_sysval_input() stuff until after setup_inputs(),
+	 * because sysvals need to be appended after varyings:
+	 */
+	if (vcoord) {
+		add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD,
+				0x3, vcoord);
+	}
+
+	if (ctx->frag_coord) {
+		add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD,
+				0xf, ctx->frag_coord);
+	}
+
+	/* Setup outputs: */
+	nir_foreach_variable(var, &ctx->s->outputs) {
+		setup_output(ctx, var);
+	}
+
+	/* Setup registers (which should only be arrays): */
+	nir_foreach_register(reg, &ctx->s->registers) {
+		ir3_declare_array(ctx, reg);
+	}
+
+	/* NOTE: need to do something more clever when we support >1 fxn */
+	nir_foreach_register(reg, &fxn->registers) {
+		ir3_declare_array(ctx, reg);
+	}
+	/* And emit the body: */
+	ctx->impl = fxn;
+	emit_function(ctx, fxn);
+}
+
+/* from NIR perspective, we actually have varying inputs.  But the varying
+ * inputs, from an IR standpoint, are just bary.f/ldlv instructions.  The
+ * only actual inputs are the sysvals.
+ */
+static void
+fixup_frag_inputs(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	struct ir3 *ir = ctx->ir;
+	unsigned i = 0;
+
+	/* sysvals should appear at the end of the inputs, drop everything else: */
+	while ((i < so->inputs_count) && !so->inputs[i].sysval)
+		i++;
+
+	/* at IR level, inputs are always blocks of 4 scalars: */
+	i *= 4;
+
+	ir->inputs = &ir->inputs[i];
+	ir->ninputs -= i;
+}
+
+/* Fixup tex sampler state for astc/srgb workaround instructions.  We
+ * need to assign the tex state indexes for these after we know the
+ * max tex index.
+ */
+static void
+fixup_astc_srgb(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	/* indexed by original tex idx, value is newly assigned alpha sampler
+	 * state tex idx.  Zero is invalid since there is at least one sampler
+	 * if we get here.
+	 */
+	unsigned alt_tex_state[16] = {0};
+	unsigned tex_idx = ctx->max_texture_index + 1;
+	unsigned idx = 0;
+
+	so->astc_srgb.base = tex_idx;
+
+	for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) {
+		struct ir3_instruction *sam = ctx->ir->astc_srgb[i];
+
+		compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state));
+
+		if (alt_tex_state[sam->cat5.tex] == 0) {
+			/* assign new alternate/alpha tex state slot: */
+			alt_tex_state[sam->cat5.tex] = tex_idx++;
+			so->astc_srgb.orig_idx[idx++] = sam->cat5.tex;
+			so->astc_srgb.count++;
+		}
+
+		sam->cat5.tex = alt_tex_state[sam->cat5.tex];
+	}
+}
+
+static void
+fixup_binning_pass(struct ir3_context *ctx)
+{
+	struct ir3_shader_variant *so = ctx->so;
+	struct ir3 *ir = ctx->ir;
+	unsigned i, j;
+
+	for (i = 0, j = 0; i < so->outputs_count; i++) {
+		unsigned slot = so->outputs[i].slot;
+
+		/* throw away everything but first position/psize */
+		if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
+			if (i != j) {
+				so->outputs[j] = so->outputs[i];
+				ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
+				ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
+				ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
+				ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
+			}
+			j++;
+		}
+	}
+	so->outputs_count = j;
+	ir->noutputs = j * 4;
+}
+
+int
+ir3_compile_shader_nir(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so)
+{
+	struct ir3_context *ctx;
+	struct ir3 *ir;
+	struct ir3_instruction **inputs;
+	unsigned i, actual_in, inloc;
+	int ret = 0, max_bary;
+
+	assert(!so->ir);
+
+	ctx = ir3_context_init(compiler, so);
+	if (!ctx) {
+		DBG("INIT failed!");
+		ret = -1;
+		goto out;
+	}
+
+	emit_instructions(ctx);
+
+	if (ctx->error) {
+		DBG("EMIT failed!");
+		ret = -1;
+		goto out;
+	}
+
+	ir = so->ir = ctx->ir;
+
+	/* keep track of the inputs from TGSI perspective.. */
+	inputs = ir->inputs;
+
+	/* but fixup actual inputs for frag shader: */
+	if (so->type == MESA_SHADER_FRAGMENT)
+		fixup_frag_inputs(ctx);
+
+	/* at this point, for binning pass, throw away unneeded outputs: */
+	if (so->binning_pass && (ctx->compiler->gpu_id < 600))
+		fixup_binning_pass(ctx);
+
+	/* if we want half-precision outputs, mark the output registers
+	 * as half:
+	 */
+	if (so->key.half_precision) {
+		for (i = 0; i < ir->noutputs; i++) {
+			struct ir3_instruction *out = ir->outputs[i];
+
+			if (!out)
+				continue;
+
+			/* if frag shader writes z, that needs to be full precision: */
+			if (so->outputs[i/4].slot == FRAG_RESULT_DEPTH)
+				continue;
+
+			out->regs[0]->flags |= IR3_REG_HALF;
+			/* output could be a fanout (ie. texture fetch output)
+			 * in which case we need to propagate the half-reg flag
+			 * up to the definer so that RA sees it:
+			 */
+			if (out->opc == OPC_META_FO) {
+				out = out->regs[1]->instr;
+				out->regs[0]->flags |= IR3_REG_HALF;
+			}
+
+			if (out->opc == OPC_MOV) {
+				out->cat1.dst_type = half_type(out->cat1.dst_type);
+			}
+		}
+	}
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("BEFORE CP:\n");
+		ir3_print(ir);
+	}
+
+	ir3_cp(ir, so);
+
+	/* at this point, for binning pass, throw away unneeded outputs:
+	 * Note that for a6xx and later, we do this after ir3_cp to ensure
+	 * that the uniform/constant layout for BS and VS matches, so that
+	 * we can re-use same VS_CONST state group.
+	 */
+	if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
+		fixup_binning_pass(ctx);
+
+	/* Insert mov if there's same instruction for each output.
+	 * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
+	 */
+	for (int i = ir->noutputs - 1; i >= 0; i--) {
+		if (!ir->outputs[i])
+			continue;
+		for (unsigned j = 0; j < i; j++) {
+			if (ir->outputs[i] == ir->outputs[j]) {
+				ir->outputs[i] =
+					ir3_MOV(ir->outputs[i]->block, ir->outputs[i], TYPE_F32);
+			}
+		}
+	}
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("BEFORE GROUPING:\n");
+		ir3_print(ir);
+	}
+
+	ir3_sched_add_deps(ir);
+
+	/* Group left/right neighbors, inserting mov's where needed to
+	 * solve conflicts:
+	 */
+	ir3_group(ir);
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("AFTER GROUPING:\n");
+		ir3_print(ir);
+	}
+
+	ir3_depth(ir);
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("AFTER DEPTH:\n");
+		ir3_print(ir);
+	}
+
+	ret = ir3_sched(ir);
+	if (ret) {
+		DBG("SCHED failed!");
+		goto out;
+	}
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("AFTER SCHED:\n");
+		ir3_print(ir);
+	}
+
+	ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
+	if (ret) {
+		DBG("RA failed!");
+		goto out;
+	}
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("AFTER RA:\n");
+		ir3_print(ir);
+	}
+
+	/* fixup input/outputs: */
+	for (i = 0; i < so->outputs_count; i++) {
+		/* sometimes we get outputs that don't write the .x coord, like:
+		 *
+		 *   decl_var shader_out INTERP_MODE_NONE float Color (VARYING_SLOT_VAR9.z, 1, 0)
+		 *
+		 * Presumably the result of varying packing and then eliminating
+		 * some unneeded varyings?  Just skip head to the first valid
+		 * component of the output.
+		 */
+		for (unsigned j = 0; j < 4; j++) {
+			struct ir3_instruction *instr = ir->outputs[(i*4) + j];
+			if (instr) {
+				so->outputs[i].regid = instr->regs[0]->num;
+				break;
+			}
+		}
+	}
+
+	/* Note that some or all channels of an input may be unused: */
+	actual_in = 0;
+	inloc = 0;
+	for (i = 0; i < so->inputs_count; i++) {
+		unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
+		so->inputs[i].ncomp = 0;
+		so->inputs[i].inloc = inloc;
+		for (j = 0; j < 4; j++) {
+			struct ir3_instruction *in = inputs[(i*4) + j];
+			if (in && !(in->flags & IR3_INSTR_UNUSED)) {
+				compmask |= (1 << j);
+				reg = in->regs[0]->num - j;
+				actual_in++;
+				so->inputs[i].ncomp++;
+				if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) {
+					/* assign inloc: */
+					assert(in->regs[1]->flags & IR3_REG_IMMED);
+					in->regs[1]->iim_val = inloc + j;
+					maxcomp = j + 1;
+				}
+			}
+		}
+		if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
+			so->varying_in++;
+			so->inputs[i].compmask = (1 << maxcomp) - 1;
+			inloc += maxcomp;
+		} else if (!so->inputs[i].sysval) {
+			so->inputs[i].compmask = compmask;
+		}
+		so->inputs[i].regid = reg;
+	}
+
+	if (ctx->astc_srgb)
+		fixup_astc_srgb(ctx);
+
+	/* We need to do legalize after (for frag shader's) the "bary.f"
+	 * offsets (inloc) have been assigned.
+	 */
+	ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary);
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		printf("AFTER LEGALIZE:\n");
+		ir3_print(ir);
+	}
+
+	so->branchstack = ctx->max_stack;
+
+	/* Note that actual_in counts inputs that are not bary.f'd for FS: */
+	if (so->type == MESA_SHADER_VERTEX)
+		so->total_in = actual_in;
+	else
+		so->total_in = max_bary + 1;
+
+out:
+	if (ret) {
+		if (so->ir)
+			ir3_destroy(so->ir);
+		so->ir = NULL;
+	}
+	ir3_context_free(ctx);
+
+	return ret;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_context.c mesa-19.0.1/src/freedreno/ir3/ir3_context.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_context.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,594 @@
+/*
+ * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_math.h"
+
+#include "ir3_compiler.h"
+#include "ir3_context.h"
+#include "ir3_shader.h"
+#include "ir3_nir.h"
+
+struct ir3_context *
+ir3_context_init(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so)
+{
+	struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
+
+	if (compiler->gpu_id >= 400) {
+		if (so->type == MESA_SHADER_VERTEX) {
+			ctx->astc_srgb = so->key.vastc_srgb;
+		} else if (so->type == MESA_SHADER_FRAGMENT) {
+			ctx->astc_srgb = so->key.fastc_srgb;
+		}
+
+	} else {
+		if (so->type == MESA_SHADER_VERTEX) {
+			ctx->samples = so->key.vsamples;
+		} else if (so->type == MESA_SHADER_FRAGMENT) {
+			ctx->samples = so->key.fsamples;
+		}
+	}
+
+	ctx->compiler = compiler;
+	ctx->so = so;
+	ctx->def_ht = _mesa_hash_table_create(ctx,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+	ctx->block_ht = _mesa_hash_table_create(ctx,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
+
+	/* TODO: maybe generate some sort of bitmask of what key
+	 * lowers vs what shader has (ie. no need to lower
+	 * texture clamp lowering if no texture sample instrs)..
+	 * although should be done further up the stack to avoid
+	 * creating duplicate variants..
+	 */
+
+	if (ir3_key_lowers_nir(&so->key)) {
+		nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
+		ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
+	} else {
+		/* fast-path for shader key that lowers nothing in NIR: */
+		ctx->s = nir_shader_clone(ctx, so->shader->nir);
+	}
+
+	/* this needs to be the last pass run, so do this here instead of
+	 * in ir3_optimize_nir():
+	 */
+	NIR_PASS_V(ctx->s, nir_lower_bool_to_int32);
+	NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
+	NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
+
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
+			so->shader->id, so->id, so->type,
+			so->key.color_two_side, so->key.half_precision);
+		nir_print_shader(ctx->s, stdout);
+	}
+
+	if (shader_debug_enabled(so->type)) {
+		fprintf(stderr, "NIR (final form) for %s shader:\n",
+			_mesa_shader_stage_to_string(so->type));
+		nir_print_shader(ctx->s, stderr);
+	}
+
+	ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+
+	so->num_uniforms = ctx->s->num_uniforms;
+	so->num_ubos = ctx->s->info.num_ubos;
+
+	/* Layout of constant registers, each section aligned to vec4.  Note
+	 * that pointer size (ubo, etc) changes depending on generation.
+	 *
+	 *    user consts
+	 *    UBO addresses
+	 *    SSBO sizes
+	 *    if (vertex shader) {
+	 *        driver params (IR3_DP_*)
+	 *        if (stream_output.num_outputs > 0)
+	 *           stream-out addresses
+	 *    }
+	 *    immediates
+	 *
+	 * Immediates go last mostly because they are inserted in the CP pass
+	 * after the nir -> ir3 frontend.
+	 */
+	unsigned constoff = align(ctx->s->num_uniforms, 4);
+	unsigned ptrsz = ir3_pointer_size(ctx);
+
+	memset(&so->constbase, ~0, sizeof(so->constbase));
+
+	if (so->num_ubos > 0) {
+		so->constbase.ubo = constoff;
+		constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
+	}
+
+	if (so->const_layout.ssbo_size.count > 0) {
+		unsigned cnt = so->const_layout.ssbo_size.count;
+		so->constbase.ssbo_sizes = constoff;
+		constoff += align(cnt, 4) / 4;
+	}
+
+	if (so->const_layout.image_dims.count > 0) {
+		unsigned cnt = so->const_layout.image_dims.count;
+		so->constbase.image_dims = constoff;
+		constoff += align(cnt, 4) / 4;
+	}
+
+	unsigned num_driver_params = 0;
+	if (so->type == MESA_SHADER_VERTEX) {
+		num_driver_params = IR3_DP_VS_COUNT;
+	} else if (so->type == MESA_SHADER_COMPUTE) {
+		num_driver_params = IR3_DP_CS_COUNT;
+	}
+
+	so->constbase.driver_param = constoff;
+	constoff += align(num_driver_params, 4) / 4;
+
+	if ((so->type == MESA_SHADER_VERTEX) &&
+			(compiler->gpu_id < 500) &&
+			so->shader->stream_output.num_outputs > 0) {
+		so->constbase.tfbo = constoff;
+		constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
+	}
+
+	so->constbase.immediate = constoff;
+
+	return ctx;
+}
+
+void
+ir3_context_free(struct ir3_context *ctx)
+{
+	ralloc_free(ctx);
+}
+
+/*
+ * Misc helpers
+ */
+
+/* allocate a n element value array (to be populated by caller) and
+ * insert in def_ht
+ */
+struct ir3_instruction **
+ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
+{
+	struct ir3_instruction **value =
+		ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
+	_mesa_hash_table_insert(ctx->def_ht, dst, value);
+	return value;
+}
+
+struct ir3_instruction **
+ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
+{
+	struct ir3_instruction **value;
+
+	if (dst->is_ssa) {
+		value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
+	} else {
+		value = ralloc_array(ctx, struct ir3_instruction *, n);
+	}
+
+	/* NOTE: in non-ssa case, we don't really need to store last_dst
+	 * but this helps us catch cases where put_dst() call is forgotten
+	 */
+	compile_assert(ctx, !ctx->last_dst);
+	ctx->last_dst = value;
+	ctx->last_dst_n = n;
+
+	return value;
+}
+
+struct ir3_instruction * const *
+ir3_get_src(struct ir3_context *ctx, nir_src *src)
+{
+	if (src->is_ssa) {
+		struct hash_entry *entry;
+		entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
+		compile_assert(ctx, entry);
+		return entry->data;
+	} else {
+		nir_register *reg = src->reg.reg;
+		struct ir3_array *arr = ir3_get_array(ctx, reg);
+		unsigned num_components = arr->r->num_components;
+		struct ir3_instruction *addr = NULL;
+		struct ir3_instruction **value =
+			ralloc_array(ctx, struct ir3_instruction *, num_components);
+
+		if (src->reg.indirect)
+			addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
+					reg->num_components);
+
+		for (unsigned i = 0; i < num_components; i++) {
+			unsigned n = src->reg.base_offset * reg->num_components + i;
+			compile_assert(ctx, n < arr->length);
+			value[i] = ir3_create_array_load(ctx, arr, n, addr);
+		}
+
+		return value;
+	}
+}
+
+void
+put_dst(struct ir3_context *ctx, nir_dest *dst)
+{
+	unsigned bit_size = nir_dest_bit_size(*dst);
+
+	if (bit_size < 32) {
+		for (unsigned i = 0; i < ctx->last_dst_n; i++) {
+			struct ir3_instruction *dst = ctx->last_dst[i];
+			dst->regs[0]->flags |= IR3_REG_HALF;
+			if (ctx->last_dst[i]->opc == OPC_META_FO)
+				dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
+		}
+	}
+
+	if (!dst->is_ssa) {
+		nir_register *reg = dst->reg.reg;
+		struct ir3_array *arr = ir3_get_array(ctx, reg);
+		unsigned num_components = ctx->last_dst_n;
+		struct ir3_instruction *addr = NULL;
+
+		if (dst->reg.indirect)
+			addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
+					reg->num_components);
+
+		for (unsigned i = 0; i < num_components; i++) {
+			unsigned n = dst->reg.base_offset * reg->num_components + i;
+			compile_assert(ctx, n < arr->length);
+			if (!ctx->last_dst[i])
+				continue;
+			ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
+		}
+
+		ralloc_free(ctx->last_dst);
+	}
+	ctx->last_dst = NULL;
+	ctx->last_dst_n = 0;
+}
+
+struct ir3_instruction *
+ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
+		unsigned arrsz)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *collect;
+
+	if (arrsz == 0)
+		return NULL;
+
+	unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
+
+	collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
+	ir3_reg_create(collect, 0, flags);     /* dst */
+	for (unsigned i = 0; i < arrsz; i++) {
+		struct ir3_instruction *elem = arr[i];
+
+		/* Since arrays are pre-colored in RA, we can't assume that
+		 * things will end up in the right place.  (Ie. if a collect
+		 * joins elements from two different arrays.)  So insert an
+		 * extra mov.
+		 *
+		 * We could possibly skip this if all the collected elements
+		 * are contiguous elements in a single array.. not sure how
+		 * likely that is to happen.
+		 *
+		 * Fixes a problem with glamor shaders, that in effect do
+		 * something like:
+		 *
+		 *   if (foo)
+		 *     texcoord = ..
+		 *   else
+		 *     texcoord = ..
+		 *   color = texture2D(tex, texcoord);
+		 *
+		 * In this case, texcoord will end up as nir registers (which
+		 * translate to ir3 array's of length 1.  And we can't assume
+		 * the two (or more) arrays will get allocated in consecutive
+		 * scalar registers.
+		 *
+		 */
+		if (elem->regs[0]->flags & IR3_REG_ARRAY) {
+			type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
+			elem = ir3_MOV(block, elem, type);
+		}
+
+		compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
+		ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
+	}
+
+	return collect;
+}
+
+/* helper for instructions that produce multiple consecutive scalar
+ * outputs which need to have a split/fanout meta instruction inserted
+ */
+void
+ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
+		struct ir3_instruction *src, unsigned base, unsigned n)
+{
+	struct ir3_instruction *prev = NULL;
+
+	if ((n == 1) && (src->regs[0]->wrmask == 0x1)) {
+		dst[0] = src;
+		return;
+	}
+
+	for (int i = 0, j = 0; i < n; i++) {
+		struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
+		ir3_reg_create(split, 0, IR3_REG_SSA);
+		ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
+		split->fo.off = i + base;
+
+		if (prev) {
+			split->cp.left = prev;
+			split->cp.left_cnt++;
+			prev->cp.right = split;
+			prev->cp.right_cnt++;
+		}
+		prev = split;
+
+		if (src->regs[0]->wrmask & (1 << (i + base)))
+			dst[j++] = split;
+	}
+}
+
+void
+ir3_context_error(struct ir3_context *ctx, const char *format, ...)
+{
+	struct hash_table *errors = NULL;
+	va_list ap;
+	va_start(ap, format);
+	if (ctx->cur_instr) {
+		errors = _mesa_hash_table_create(NULL,
+				_mesa_hash_pointer,
+				_mesa_key_pointer_equal);
+		char *msg = ralloc_vasprintf(errors, format, ap);
+		_mesa_hash_table_insert(errors, ctx->cur_instr, msg);
+	} else {
+		_debug_vprintf(format, ap);
+	}
+	va_end(ap);
+	nir_print_shader_annotated(ctx->s, stdout, errors);
+	ralloc_free(errors);
+	ctx->error = true;
+	debug_assert(0);
+}
+
+static struct ir3_instruction *
+create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
+{
+	struct ir3_instruction *instr, *immed;
+
+	/* TODO in at least some cases, the backend could probably be
+	 * made clever enough to propagate IR3_REG_HALF..
+	 */
+	instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
+	instr->regs[0]->flags |= IR3_REG_HALF;
+
+	switch(align){
+	case 1:
+		/* src *= 1: */
+		break;
+	case 2:
+		/* src *= 2	=> src <<= 1: */
+		immed = create_immed(block, 1);
+		immed->regs[0]->flags |= IR3_REG_HALF;
+
+		instr = ir3_SHL_B(block, instr, 0, immed, 0);
+		instr->regs[0]->flags |= IR3_REG_HALF;
+		instr->regs[1]->flags |= IR3_REG_HALF;
+		break;
+	case 3:
+		/* src *= 3: */
+		immed = create_immed(block, 3);
+		immed->regs[0]->flags |= IR3_REG_HALF;
+
+		instr = ir3_MULL_U(block, instr, 0, immed, 0);
+		instr->regs[0]->flags |= IR3_REG_HALF;
+		instr->regs[1]->flags |= IR3_REG_HALF;
+		break;
+	case 4:
+		/* src *= 4 => src <<= 2: */
+		immed = create_immed(block, 2);
+		immed->regs[0]->flags |= IR3_REG_HALF;
+
+		instr = ir3_SHL_B(block, instr, 0, immed, 0);
+		instr->regs[0]->flags |= IR3_REG_HALF;
+		instr->regs[1]->flags |= IR3_REG_HALF;
+		break;
+	default:
+		unreachable("bad align");
+		return NULL;
+	}
+
+	instr = ir3_MOV(block, instr, TYPE_S16);
+	instr->regs[0]->num = regid(REG_A0, 0);
+	instr->regs[0]->flags |= IR3_REG_HALF;
+	instr->regs[1]->flags |= IR3_REG_HALF;
+
+	return instr;
+}
+
+/* caches addr values to avoid generating multiple cov/shl/mova
+ * sequences for each use of a given NIR level src as address
+ */
+struct ir3_instruction *
+ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
+{
+	struct ir3_instruction *addr;
+	unsigned idx = align - 1;
+
+	compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
+
+	if (!ctx->addr_ht[idx]) {
+		ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
+				_mesa_hash_pointer, _mesa_key_pointer_equal);
+	} else {
+		struct hash_entry *entry;
+		entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
+		if (entry)
+			return entry->data;
+	}
+
+	addr = create_addr(ctx->block, src, align);
+	_mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
+
+	return addr;
+}
+
+struct ir3_instruction *
+ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *cond;
+
+	/* NOTE: only cmps.*.* can write p0.x: */
+	cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
+	cond->cat2.condition = IR3_COND_NE;
+
+	/* condition always goes in predicate register: */
+	cond->regs[0]->num = regid(REG_P0, 0);
+
+	return cond;
+}
+
+/*
+ * Array helpers
+ */
+
+void
+ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
+{
+	struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
+	arr->id = ++ctx->num_arrays;
+	/* NOTE: sometimes we get non array regs, for example for arrays of
+	 * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
+	 * treat a non-array as if it was an array of length 1.
+	 *
+	 * It would be nice if there was a nir pass to convert arrays of
+	 * length 1 to ssa.
+	 */
+	arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
+	compile_assert(ctx, arr->length > 0);
+	arr->r = reg;
+	list_addtail(&arr->node, &ctx->ir->array_list);
+}
+
+struct ir3_array *
+ir3_get_array(struct ir3_context *ctx, nir_register *reg)
+{
+	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+		if (arr->r == reg)
+			return arr;
+	}
+	ir3_context_error(ctx, "bogus reg: %s\n", reg->name);
+	return NULL;
+}
+
+/* relative (indirect) if address!=NULL */
+struct ir3_instruction *
+ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
+		struct ir3_instruction *address)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *mov;
+	struct ir3_register *src;
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	mov->cat1.src_type = TYPE_U32;
+	mov->cat1.dst_type = TYPE_U32;
+	mov->barrier_class = IR3_BARRIER_ARRAY_R;
+	mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
+	ir3_reg_create(mov, 0, 0);
+	src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+			COND(address, IR3_REG_RELATIV));
+	src->instr = arr->last_write;
+	src->size  = arr->length;
+	src->array.id = arr->id;
+	src->array.offset = n;
+
+	if (address)
+		ir3_instr_set_address(mov, address);
+
+	return mov;
+}
+
+/* relative (indirect) if address!=NULL */
+void
+ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
+		struct ir3_instruction *src, struct ir3_instruction *address)
+{
+	struct ir3_block *block = ctx->block;
+	struct ir3_instruction *mov;
+	struct ir3_register *dst;
+
+	/* if not relative store, don't create an extra mov, since that
+	 * ends up being difficult for cp to remove.
+	 */
+	if (!address) {
+		dst = src->regs[0];
+
+		src->barrier_class |= IR3_BARRIER_ARRAY_W;
+		src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
+
+		dst->flags |= IR3_REG_ARRAY;
+		dst->instr = arr->last_write;
+		dst->size = arr->length;
+		dst->array.id = arr->id;
+		dst->array.offset = n;
+
+		arr->last_write = src;
+
+		array_insert(block, block->keeps, src);
+
+		return;
+	}
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	mov->cat1.src_type = TYPE_U32;
+	mov->cat1.dst_type = TYPE_U32;
+	mov->barrier_class = IR3_BARRIER_ARRAY_W;
+	mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
+	dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
+			COND(address, IR3_REG_RELATIV));
+	dst->instr = arr->last_write;
+	dst->size  = arr->length;
+	dst->array.id = arr->id;
+	dst->array.offset = n;
+	ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
+
+	if (address)
+		ir3_instr_set_address(mov, address);
+
+	arr->last_write = mov;
+
+	/* the array store may only matter to something in an earlier
+	 * block (ie. loops), but since arrays are not in SSA, depth
+	 * pass won't know this.. so keep all array stores:
+	 */
+	array_insert(block, block->keeps, mov);
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_context.h mesa-19.0.1/src/freedreno/ir3/ir3_context.h
--- mesa-18.3.3/src/freedreno/ir3/ir3_context.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_CONTEXT_H_
+#define IR3_CONTEXT_H_
+
+#include "ir3_nir.h"
+#include "ir3.h"
+
+/* for conditionally setting boolean flag(s): */
+#define COND(bool, val) ((bool) ? (val) : 0)
+
+#define DBG(fmt, ...) \
+		do { debug_printf("%s:%d: "fmt "\n", \
+				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+
+/**
+ * The context for compilation of a single shader.
+ */
+struct ir3_context {
+	struct ir3_compiler *compiler;
+
+	struct nir_shader *s;
+
+	struct nir_instr *cur_instr;  /* current instruction, just for debug */
+
+	struct ir3 *ir;
+	struct ir3_shader_variant *so;
+
+	struct ir3_block *block;      /* the current block */
+	struct ir3_block *in_block;   /* block created for shader inputs */
+
+	nir_function_impl *impl;
+
+	/* For fragment shaders, varyings are not actual shader inputs,
+	 * instead the hw passes a varying-coord which is used with
+	 * bary.f.
+	 *
+	 * But NIR doesn't know that, it still declares varyings as
+	 * inputs.  So we do all the input tracking normally and fix
+	 * things up after compile_instructions()
+	 *
+	 * NOTE that frag_vcoord is the hardware position (possibly it
+	 * is actually an index or tag or some such.. it is *not*
+	 * values that can be directly used for gl_FragCoord..)
+	 */
+	struct ir3_instruction *frag_vcoord;
+
+	/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
+	struct ir3_instruction *frag_face, *frag_coord;
+
+	/* For vertex shaders, keep track of the system values sources */
+	struct ir3_instruction *vertex_id, *basevertex, *instance_id;
+
+	/* For fragment shaders: */
+	struct ir3_instruction *samp_id, *samp_mask_in;
+
+	/* Compute shader inputs: */
+	struct ir3_instruction *local_invocation_id, *work_group_id;
+
+	/* mapping from nir_register to defining instruction: */
+	struct hash_table *def_ht;
+
+	unsigned num_arrays;
+
+	/* Tracking for max level of flowcontrol (branchstack) needed
+	 * by a5xx+:
+	 */
+	unsigned stack, max_stack;
+
+	/* a common pattern for indirect addressing is to request the
+	 * same address register multiple times.  To avoid generating
+	 * duplicate instruction sequences (which our backend does not
+	 * try to clean up, since that should be done as the NIR stage)
+	 * we cache the address value generated for a given src value:
+	 *
+	 * Note that we have to cache these per alignment, since same
+	 * src used for an array of vec1 cannot be also used for an
+	 * array of vec4.
+	 */
+	struct hash_table *addr_ht[4];
+
+	/* last dst array, for indirect we need to insert a var-store.
+	 */
+	struct ir3_instruction **last_dst;
+	unsigned last_dst_n;
+
+	/* maps nir_block to ir3_block, mostly for the purposes of
+	 * figuring out the blocks successors
+	 */
+	struct hash_table *block_ht;
+
+	/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
+	unsigned astc_srgb;
+
+	unsigned samples;             /* bitmask of x,y sample shifts */
+
+	unsigned max_texture_index;
+
+	/* set if we encounter something we can't handle yet, so we
+	 * can bail cleanly and fallback to TGSI compiler f/e
+	 */
+	bool error;
+};
+
+struct ir3_context * ir3_context_init(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so);
+void ir3_context_free(struct ir3_context *ctx);
+
+/* gpu pointer size in units of 32bit registers/slots */
+static inline
+unsigned ir3_pointer_size(struct ir3_context *ctx)
+{
+	return (ctx->compiler->gpu_id >= 500) ? 2 : 1;
+}
+
+struct ir3_instruction ** ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n);
+struct ir3_instruction ** ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n);
+struct ir3_instruction * const * ir3_get_src(struct ir3_context *ctx, nir_src *src);
+void put_dst(struct ir3_context *ctx, nir_dest *dst);
+struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx,
+		struct ir3_instruction *const *arr, unsigned arrsz);
+void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
+		struct ir3_instruction *src, unsigned base, unsigned n);
+
+void ir3_context_error(struct ir3_context *ctx, const char *format, ...);
+
+#define compile_assert(ctx, cond) do { \
+		if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \
+	} while (0)
+
+struct ir3_instruction * ir3_get_addr(struct ir3_context *ctx,
+		struct ir3_instruction *src, int align);
+struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx,
+		struct ir3_instruction *src);
+
+void ir3_declare_array(struct ir3_context *ctx, nir_register *reg);
+struct ir3_array * ir3_get_array(struct ir3_context *ctx, nir_register *reg);
+struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
+		struct ir3_array *arr, int n, struct ir3_instruction *address);
+void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
+		struct ir3_instruction *src, struct ir3_instruction *address);
+
+static inline type_t utype_for_size(unsigned bit_size)
+{
+	switch (bit_size) {
+	case 32: return TYPE_U32;
+	case 16: return TYPE_U16;
+	case  8: return TYPE_U8;
+	default: unreachable("bad bitsize"); return ~0;
+	}
+}
+
+static inline type_t utype_src(nir_src src)
+{ return utype_for_size(nir_src_bit_size(src)); }
+
+static inline type_t utype_dst(nir_dest dst)
+{ return utype_for_size(nir_dest_bit_size(dst)); }
+
+#endif /* IR3_CONTEXT_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_cp.c mesa-19.0.1/src/freedreno/ir3/ir3_cp.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_cp.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_cp.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,653 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <math.h>
+
+#include "ir3.h"
+#include "ir3_shader.h"
+
+/*
+ * Copy Propagate:
+ */
+
+struct ir3_cp_ctx {
+	struct ir3 *shader;
+	struct ir3_shader_variant *so;
+	unsigned immediate_idx;
+};
+
+/* is it a type preserving mov, with ok flags? */
+static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
+{
+	if (is_same_type_mov(instr)) {
+		struct ir3_register *dst = instr->regs[0];
+		struct ir3_register *src = instr->regs[1];
+		struct ir3_instruction *src_instr = ssa(src);
+
+		/* only if mov src is SSA (not const/immed): */
+		if (!src_instr)
+			return false;
+
+		/* no indirect: */
+		if (dst->flags & IR3_REG_RELATIV)
+			return false;
+		if (src->flags & IR3_REG_RELATIV)
+			return false;
+
+		if (src->flags & IR3_REG_ARRAY)
+			return false;
+
+		if (!allow_flags)
+			if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
+					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+				return false;
+
+		/* TODO: remove this hack: */
+		if (src_instr->opc == OPC_META_FO)
+			return false;
+
+		return true;
+	}
+	return false;
+}
+
+static unsigned cp_flags(unsigned flags)
+{
+	/* only considering these flags (at least for now): */
+	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+			IR3_REG_FNEG | IR3_REG_FABS |
+			IR3_REG_SNEG | IR3_REG_SABS |
+			IR3_REG_BNOT | IR3_REG_RELATIV);
+	return flags;
+}
+
+static bool valid_flags(struct ir3_instruction *instr, unsigned n,
+		unsigned flags)
+{
+	unsigned valid_flags;
+	flags = cp_flags(flags);
+
+	/* If destination is indirect, then source cannot be.. at least
+	 * I don't think so..
+	 */
+	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
+			(flags & IR3_REG_RELATIV))
+		return false;
+
+	/* TODO it seems to *mostly* work to cp RELATIV, except we get some
+	 * intermittent piglit variable-indexing fails.  Newer blob driver
+	 * doesn't seem to cp these.  Possibly this is hw workaround?  Not
+	 * sure, but until that is understood better, lets just switch off
+	 * cp for indirect src's:
+	 */
+	if (flags & IR3_REG_RELATIV)
+		return false;
+
+	switch (opc_cat(instr->opc)) {
+	case 1:
+		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
+		if (flags & ~valid_flags)
+			return false;
+		break;
+	case 2:
+		valid_flags = ir3_cat2_absneg(instr->opc) |
+				IR3_REG_CONST | IR3_REG_RELATIV;
+
+		if (ir3_cat2_int(instr->opc))
+			valid_flags |= IR3_REG_IMMED;
+
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+			unsigned m = (n ^ 1) + 1;
+			/* cannot deal w/ const in both srcs:
+			 * (note that some cat2 actually only have a single src)
+			 */
+			if (m < instr->regs_count) {
+				struct ir3_register *reg = instr->regs[m];
+				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+					return false;
+				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+					return false;
+			}
+			/* cannot be const + ABS|NEG: */
+			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+				return false;
+		}
+		break;
+	case 3:
+		valid_flags = ir3_cat3_absneg(instr->opc) |
+				IR3_REG_CONST | IR3_REG_RELATIV;
+
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
+			/* cannot deal w/ const/relativ in 2nd src: */
+			if (n == 1)
+				return false;
+		}
+
+		if (flags & IR3_REG_CONST) {
+			/* cannot be const + ABS|NEG: */
+			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+				return false;
+		}
+		break;
+	case 4:
+		/* seems like blob compiler avoids const as src.. */
+		/* TODO double check if this is still the case on a4xx */
+		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
+			return false;
+		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+			return false;
+		break;
+	case 5:
+		/* no flags allowed */
+		if (flags)
+			return false;
+		break;
+	case 6:
+		valid_flags = IR3_REG_IMMED;
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & IR3_REG_IMMED) {
+			/* doesn't seem like we can have immediate src for store
+			 * instructions:
+			 *
+			 * TODO this restriction could also apply to load instructions,
+			 * but for load instructions this arg is the address (and not
+			 * really sure any good way to test a hard-coded immed addr src)
+			 */
+			if (is_store(instr) && (n == 1))
+				return false;
+
+			if ((instr->opc == OPC_LDL) && (n != 1))
+				return false;
+
+			if ((instr->opc == OPC_STL) && (n != 2))
+				return false;
+
+			/* disallow CP into anything but the SSBO slot argument for
+			 * atomics:
+			 */
+			if (is_atomic(instr->opc) && (n != 0))
+				return false;
+
+			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
+				return false;
+		}
+
+		break;
+	}
+
+	return true;
+}
+
+/* propagate register flags from src to dst.. negates need special
+ * handling to cancel each other out.
+ */
+static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
+{
+	unsigned srcflags = src->regs[1]->flags;
+
+	/* if what we are combining into already has (abs) flags,
+	 * we can drop (neg) from src:
+	 */
+	if (*dstflags & IR3_REG_FABS)
+		srcflags &= ~IR3_REG_FNEG;
+	if (*dstflags & IR3_REG_SABS)
+		srcflags &= ~IR3_REG_SNEG;
+
+	if (srcflags & IR3_REG_FABS)
+		*dstflags |= IR3_REG_FABS;
+	if (srcflags & IR3_REG_SABS)
+		*dstflags |= IR3_REG_SABS;
+	if (srcflags & IR3_REG_FNEG)
+		*dstflags ^= IR3_REG_FNEG;
+	if (srcflags & IR3_REG_SNEG)
+		*dstflags ^= IR3_REG_SNEG;
+	if (srcflags & IR3_REG_BNOT)
+		*dstflags ^= IR3_REG_BNOT;
+
+	*dstflags &= ~IR3_REG_SSA;
+	*dstflags |= srcflags & IR3_REG_SSA;
+	*dstflags |= srcflags & IR3_REG_CONST;
+	*dstflags |= srcflags & IR3_REG_IMMED;
+	*dstflags |= srcflags & IR3_REG_RELATIV;
+	*dstflags |= srcflags & IR3_REG_ARRAY;
+
+	/* if src of the src is boolean we can drop the (abs) since we know
+	 * the source value is already a postitive integer.  This cleans
+	 * up the absnegs that get inserted when converting between nir and
+	 * native boolean (see ir3_b2n/n2b)
+	 */
+	struct ir3_instruction *srcsrc = ssa(src->regs[1]);
+	if (srcsrc && is_bool(srcsrc))
+		*dstflags &= ~IR3_REG_SABS;
+}
+
+static struct ir3_register *
+lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
+{
+	unsigned swiz, idx, i;
+
+	reg = ir3_reg_clone(ctx->shader, reg);
+
+	/* in some cases, there are restrictions on (abs)/(neg) plus const..
+	 * so just evaluate those and clear the flags:
+	 */
+	if (new_flags & IR3_REG_SABS) {
+		reg->iim_val = abs(reg->iim_val);
+		new_flags &= ~IR3_REG_SABS;
+	}
+
+	if (new_flags & IR3_REG_FABS) {
+		reg->fim_val = fabs(reg->fim_val);
+		new_flags &= ~IR3_REG_FABS;
+	}
+
+	if (new_flags & IR3_REG_SNEG) {
+		reg->iim_val = -reg->iim_val;
+		new_flags &= ~IR3_REG_SNEG;
+	}
+
+	if (new_flags & IR3_REG_FNEG) {
+		reg->fim_val = -reg->fim_val;
+		new_flags &= ~IR3_REG_FNEG;
+	}
+
+	/* Reallocate for 4 more elements whenever it's necessary */
+	if (ctx->immediate_idx == ctx->so->immediates_size * 4) {
+		ctx->so->immediates_size += 4;
+		ctx->so->immediates = realloc (ctx->so->immediates,
+			ctx->so->immediates_size * sizeof (ctx->so->immediates[0]));
+	}
+
+	for (i = 0; i < ctx->immediate_idx; i++) {
+		swiz = i % 4;
+		idx  = i / 4;
+
+		if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
+			break;
+		}
+	}
+
+	if (i == ctx->immediate_idx) {
+		/* need to generate a new immediate: */
+		swiz = i % 4;
+		idx  = i / 4;
+		ctx->so->immediates[idx].val[swiz] = reg->uim_val;
+		ctx->so->immediates_count = idx + 1;
+		ctx->immediate_idx++;
+	}
+
+	new_flags &= ~IR3_REG_IMMED;
+	new_flags |= IR3_REG_CONST;
+	reg->flags = new_flags;
+	reg->num = i + (4 * ctx->so->constbase.immediate);
+
+	return reg;
+}
+
+static void
+unuse(struct ir3_instruction *instr)
+{
+	debug_assert(instr->use_count > 0);
+
+	if (--instr->use_count == 0) {
+		struct ir3_block *block = instr->block;
+
+		instr->barrier_class = 0;
+		instr->barrier_conflict = 0;
+
+		/* we don't want to remove anything in keeps (which could
+		 * be things like array store's)
+		 */
+		for (unsigned i = 0; i < block->keeps_count; i++) {
+			debug_assert(block->keeps[i] != instr);
+		}
+	}
+}
+
+/**
+ * Handle cp for a given src register.  This additionally handles
+ * the cases of collapsing immedate/const (which replace the src
+ * register with a non-ssa src) or collapsing mov's from relative
+ * src (which needs to also fixup the address src reference by the
+ * instruction).
+ */
+static void
+reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
+		struct ir3_register *reg, unsigned n)
+{
+	struct ir3_instruction *src = ssa(reg);
+
+	if (is_eligible_mov(src, true)) {
+		/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
+		struct ir3_register *src_reg = src->regs[1];
+		unsigned new_flags = reg->flags;
+
+		combine_flags(&new_flags, src);
+
+		if (valid_flags(instr, n, new_flags)) {
+			if (new_flags & IR3_REG_ARRAY) {
+				debug_assert(!(reg->flags & IR3_REG_ARRAY));
+				reg->array = src_reg->array;
+			}
+			reg->flags = new_flags;
+			reg->instr = ssa(src_reg);
+
+			instr->barrier_class |= src->barrier_class;
+			instr->barrier_conflict |= src->barrier_conflict;
+
+			unuse(src);
+			reg->instr->use_count++;
+		}
+
+	} else if (is_same_type_mov(src) &&
+			/* cannot collapse const/immed/etc into meta instrs: */
+			!is_meta(instr)) {
+		/* immed/const/etc cases, which require some special handling: */
+		struct ir3_register *src_reg = src->regs[1];
+		unsigned new_flags = reg->flags;
+
+		combine_flags(&new_flags, src);
+
+		if (!valid_flags(instr, n, new_flags)) {
+			/* See if lowering an immediate to const would help. */
+			if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+				debug_assert(new_flags & IR3_REG_IMMED);
+				instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
+				return;
+			}
+
+			/* special case for "normal" mad instructions, we can
+			 * try swapping the first two args if that fits better.
+			 *
+			 * the "plain" MAD's (ie. the ones that don't shift first
+			 * src prior to multiply) can swap their first two srcs if
+			 * src[0] is !CONST and src[1] is CONST:
+			 */
+			if ((n == 1) && is_mad(instr->opc) &&
+					!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
+					valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) {
+				/* swap src[0] and src[1]: */
+				struct ir3_register *tmp;
+				tmp = instr->regs[0 + 1];
+				instr->regs[0 + 1] = instr->regs[1 + 1];
+				instr->regs[1 + 1] = tmp;
+
+				n = 0;
+			} else {
+				return;
+			}
+		}
+
+		/* Here we handle the special case of mov from
+		 * CONST and/or RELATIV.  These need to be handled
+		 * specially, because in the case of move from CONST
+		 * there is no src ir3_instruction so we need to
+		 * replace the ir3_register.  And in the case of
+		 * RELATIV we need to handle the address register
+		 * dependency.
+		 */
+		if (src_reg->flags & IR3_REG_CONST) {
+			/* an instruction cannot reference two different
+			 * address registers:
+			 */
+			if ((src_reg->flags & IR3_REG_RELATIV) &&
+					conflicts(instr->address, reg->instr->address))
+				return;
+
+			/* This seems to be a hw bug, or something where the timings
+			 * just somehow don't work out.  This restriction may only
+			 * apply if the first src is also CONST.
+			 */
+			if ((opc_cat(instr->opc) == 3) && (n == 2) &&
+					(src_reg->flags & IR3_REG_RELATIV) &&
+					(src_reg->array.offset == 0))
+				return;
+
+			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
+			src_reg->flags = new_flags;
+			instr->regs[n+1] = src_reg;
+
+			if (src_reg->flags & IR3_REG_RELATIV)
+				ir3_instr_set_address(instr, reg->instr->address);
+
+			return;
+		}
+
+		if ((src_reg->flags & IR3_REG_RELATIV) &&
+				!conflicts(instr->address, reg->instr->address)) {
+			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
+			src_reg->flags = new_flags;
+			instr->regs[n+1] = src_reg;
+			ir3_instr_set_address(instr, reg->instr->address);
+
+			return;
+		}
+
+		/* NOTE: seems we can only do immed integers, so don't
+		 * need to care about float.  But we do need to handle
+		 * abs/neg *before* checking that the immediate requires
+		 * few enough bits to encode:
+		 *
+		 * TODO: do we need to do something to avoid accidentally
+		 * catching a float immed?
+		 */
+		if (src_reg->flags & IR3_REG_IMMED) {
+			int32_t iim_val = src_reg->iim_val;
+
+			debug_assert((opc_cat(instr->opc) == 1) ||
+					(opc_cat(instr->opc) == 6) ||
+					ir3_cat2_int(instr->opc) ||
+					(is_mad(instr->opc) && (n == 0)));
+
+			if (new_flags & IR3_REG_SABS)
+				iim_val = abs(iim_val);
+
+			if (new_flags & IR3_REG_SNEG)
+				iim_val = -iim_val;
+
+			if (new_flags & IR3_REG_BNOT)
+				iim_val = ~iim_val;
+
+			/* other than category 1 (mov) we can only encode up to 10 bits: */
+			if ((instr->opc == OPC_MOV) ||
+					!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
+				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+				src_reg = ir3_reg_clone(instr->block->shader, src_reg);
+				src_reg->flags = new_flags;
+				src_reg->iim_val = iim_val;
+				instr->regs[n+1] = src_reg;
+			} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+				/* See if lowering an immediate to const would help. */
+				instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
+			}
+
+			return;
+		}
+	}
+}
+
+/* Handle special case of eliminating output mov, and similar cases where
+ * there isn't a normal "consuming" instruction.  In this case we cannot
+ * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
+ * be eliminated)
+ */
+static struct ir3_instruction *
+eliminate_output_mov(struct ir3_instruction *instr)
+{
+	if (is_eligible_mov(instr, false)) {
+		struct ir3_register *reg = instr->regs[1];
+		if (!(reg->flags & IR3_REG_ARRAY)) {
+			struct ir3_instruction *src_instr = ssa(reg);
+			debug_assert(src_instr);
+			return src_instr;
+		}
+	}
+	return instr;
+}
+
+/**
+ * Find instruction src's which are mov's that can be collapsed, replacing
+ * the mov dst with the mov src
+ */
+static void
+instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
+{
+	struct ir3_register *reg;
+
+	if (instr->regs_count == 0)
+		return;
+
+	if (ir3_instr_check_mark(instr))
+		return;
+
+	/* walk down the graph from each src: */
+	foreach_src_n(reg, n, instr) {
+		struct ir3_instruction *src = ssa(reg);
+
+		if (!src)
+			continue;
+
+		instr_cp(ctx, src);
+
+		/* TODO non-indirect access we could figure out which register
+		 * we actually want and allow cp..
+		 */
+		if (reg->flags & IR3_REG_ARRAY)
+			continue;
+
+		/* Don't CP absneg into meta instructions, that won't end well: */
+		if (is_meta(instr) && (src->opc != OPC_MOV))
+			continue;
+
+		reg_cp(ctx, instr, reg, n);
+	}
+
+	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+		struct ir3_instruction *src = ssa(instr->regs[0]);
+		if (src)
+			instr_cp(ctx, src);
+	}
+
+	if (instr->address) {
+		instr_cp(ctx, instr->address);
+		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
+	}
+
+	/* we can end up with extra cmps.s from frontend, which uses a
+	 *
+	 *    cmps.s p0.x, cond, 0
+	 *
+	 * as a way to mov into the predicate register.  But frequently 'cond'
+	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
+	 * just re-write the instruction writing predicate register to get rid
+	 * of the double cmps.
+	 */
+	if ((instr->opc == OPC_CMPS_S) &&
+			(instr->regs[0]->num == regid(REG_P0, 0)) &&
+			ssa(instr->regs[1]) &&
+			(instr->regs[2]->flags & IR3_REG_IMMED) &&
+			(instr->regs[2]->iim_val == 0)) {
+		struct ir3_instruction *cond = ssa(instr->regs[1]);
+		switch (cond->opc) {
+		case OPC_CMPS_S:
+		case OPC_CMPS_F:
+		case OPC_CMPS_U:
+			instr->opc   = cond->opc;
+			instr->flags = cond->flags;
+			instr->cat2  = cond->cat2;
+			instr->address = cond->address;
+			instr->regs[1] = cond->regs[1];
+			instr->regs[2] = cond->regs[2];
+			instr->barrier_class |= cond->barrier_class;
+			instr->barrier_conflict |= cond->barrier_conflict;
+			unuse(cond);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+void
+ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+	struct ir3_cp_ctx ctx = {
+			.shader = ir,
+			.so = so,
+	};
+
+	/* This is a bit annoying, and probably wouldn't be necessary if we
+	 * tracked a reverse link from producing instruction to consumer.
+	 * But we need to know when we've eliminated the last consumer of
+	 * a mov, so we need to do a pass to first count consumers of a
+	 * mov.
+	 */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			struct ir3_instruction *src;
+
+			/* by the way, we don't account for false-dep's, so the CP
+			 * pass should always happen before false-dep's are inserted
+			 */
+			debug_assert(instr->deps_count == 0);
+
+			foreach_ssa_src(src, instr) {
+				src->use_count++;
+			}
+		}
+	}
+
+	ir3_clear_mark(ir);
+
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		if (ir->outputs[i]) {
+			instr_cp(&ctx, ir->outputs[i]);
+			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
+		}
+	}
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		if (block->condition) {
+			instr_cp(&ctx, block->condition);
+			block->condition = eliminate_output_mov(block->condition);
+		}
+
+		for (unsigned i = 0; i < block->keeps_count; i++) {
+			instr_cp(&ctx, block->keeps[i]);
+			block->keeps[i] = eliminate_output_mov(block->keeps[i]);
+		}
+	}
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_depth.c mesa-19.0.1/src/freedreno/ir3/ir3_depth.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_depth.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_depth.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_math.h"
+
+#include "ir3.h"
+
+/*
+ * Instruction Depth:
+ *
+ * Calculates weighted instruction depth, ie. the sum of # of needed
+ * instructions plus delay slots back to original input (ie INPUT or
+ * CONST).  That is to say, an instructions depth is:
+ *
+ *   depth(instr) {
+ *     d = 0;
+ *     // for each src register:
+ *     foreach (src in instr->regs[1..n])
+ *       d = max(d, delayslots(src->instr, n) + depth(src->instr));
+ *     return d + 1;
+ *   }
+ *
+ * After an instruction's depth is calculated, it is inserted into the
+ * blocks depth sorted list, which is used by the scheduling pass.
+ */
+
+/* generally don't count false dependencies, since this can just be
+ * something like a barrier, or SSBO store.  The exception is array
+ * dependencies if the assigner is an array write and the consumer
+ * reads the same array.
+ */
+static bool
+ignore_dep(struct ir3_instruction *assigner,
+		struct ir3_instruction *consumer, unsigned n)
+{
+	if (!__is_false_dep(consumer, n))
+		return false;
+
+	if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
+		struct ir3_register *dst = assigner->regs[0];
+		struct ir3_register *src;
+
+		debug_assert(dst->flags & IR3_REG_ARRAY);
+
+		foreach_src(src, consumer) {
+			if ((src->flags & IR3_REG_ARRAY) &&
+					(dst->array.id == src->array.id)) {
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+/* calculate required # of delay slots between the instruction that
+ * assigns a value and the one that consumes
+ */
+int ir3_delayslots(struct ir3_instruction *assigner,
+		struct ir3_instruction *consumer, unsigned n)
+{
+	if (ignore_dep(assigner, consumer, n))
+		return 0;
+
+	/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
+	 * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
+	 * handled with sync bits
+	 */
+
+	if (is_meta(assigner))
+		return 0;
+
+	if (writes_addr(assigner))
+		return 6;
+
+	/* handled via sync flags: */
+	if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
+		return 0;
+
+	/* assigner must be alu: */
+	if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
+			is_mem(consumer)) {
+		return 6;
+	} else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
+			(n == 3)) {
+		/* special case, 3rd src to cat3 not required on first cycle */
+		return 1;
+	} else {
+		return 3;
+	}
+}
+
+void
+ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
+{
+	/* remove from existing spot in list: */
+	list_delinit(&instr->node);
+
+	/* find where to re-insert instruction: */
+	list_for_each_entry (struct ir3_instruction, pos, list, node) {
+		if (pos->depth > instr->depth) {
+			list_add(&instr->node, &pos->node);
+			return;
+		}
+	}
+	/* if we get here, we didn't find an insertion spot: */
+	list_addtail(&instr->node, list);
+}
+
+static void
+ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
+{
+	struct ir3_instruction *src;
+
+	/* don't mark falsedep's as used, but otherwise process them normally: */
+	if (!falsedep)
+		instr->flags &= ~IR3_INSTR_UNUSED;
+
+	if (ir3_instr_check_mark(instr))
+		return;
+
+	instr->depth = 0;
+
+	foreach_ssa_src_n(src, i, instr) {
+		unsigned sd;
+
+		/* visit child to compute it's depth: */
+		ir3_instr_depth(src, boost, __is_false_dep(instr, i));
+
+		/* for array writes, no need to delay on previous write: */
+		if (i == 0)
+			continue;
+
+		sd = ir3_delayslots(src, instr, i) + src->depth;
+		sd += boost;
+
+		instr->depth = MAX2(instr->depth, sd);
+	}
+
+	if (!is_meta(instr))
+		instr->depth++;
+
+	ir3_insert_by_depth(instr, &instr->block->instr_list);
+}
+
+static bool
+remove_unused_by_block(struct ir3_block *block)
+{
+	bool progress = false;
+	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+		if (instr->opc == OPC_END)
+			continue;
+		if (instr->flags & IR3_INSTR_UNUSED) {
+			if (instr->opc == OPC_META_FO) {
+				struct ir3_instruction *src = ssa(instr->regs[1]);
+				/* leave inputs alone.. we can't optimize out components of
+				 * an input, since the hw is still going to be writing all
+				 * of the components, and we could end up in a situation
+				 * where multiple inputs overlap.
+				 */
+				if ((src->opc != OPC_META_INPUT) &&
+						(src->regs[0]->wrmask > 1)) {
+					src->regs[0]->wrmask &= ~(1 << instr->fo.off);
+
+					/* prune no-longer needed right-neighbors.  We could
+					 * probably do the same for left-neighbors (ie. tex
+					 * fetch that only need .yw components), but that
+					 * makes RA a bit more confusing than it already is
+					 */
+					struct ir3_instruction *n = instr;
+					while (n && n->cp.right)
+						n = n->cp.right;
+					while (n->flags & IR3_INSTR_UNUSED) {
+						n = n->cp.left;
+						if (!n)
+							break;
+						n->cp.right = NULL;
+					}
+				}
+			}
+			list_delinit(&instr->node);
+			progress = true;
+		}
+	}
+	return progress;
+}
+
+static bool
+compute_depth_and_remove_unused(struct ir3 *ir)
+{
+	unsigned i;
+	bool progress = false;
+
+	ir3_clear_mark(ir);
+
+	/* initially mark everything as unused, we'll clear the flag as we
+	 * visit the instructions:
+	 */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			instr->flags |= IR3_INSTR_UNUSED;
+		}
+	}
+
+	for (i = 0; i < ir->noutputs; i++)
+		if (ir->outputs[i])
+			ir3_instr_depth(ir->outputs[i], 0, false);
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		for (i = 0; i < block->keeps_count; i++)
+			ir3_instr_depth(block->keeps[i], 0, false);
+
+		/* We also need to account for if-condition: */
+		if (block->condition)
+			ir3_instr_depth(block->condition, 6, false);
+	}
+
+	/* mark un-used instructions: */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		progress |= remove_unused_by_block(block);
+	}
+
+	/* note that we can end up with unused indirects, but we should
+	 * not end up with unused predicates.
+	 */
+	for (i = 0; i < ir->indirects_count; i++) {
+		struct ir3_instruction *instr = ir->indirects[i];
+		if (instr && (instr->flags & IR3_INSTR_UNUSED))
+			ir->indirects[i] = NULL;
+	}
+
+	/* cleanup unused inputs: */
+	for (i = 0; i < ir->ninputs; i++) {
+		struct ir3_instruction *in = ir->inputs[i];
+		if (in && (in->flags & IR3_INSTR_UNUSED))
+			ir->inputs[i] = NULL;
+	}
+
+	return progress;
+}
+
+void
+ir3_depth(struct ir3 *ir)
+{
+	bool progress;
+	do {
+		progress = compute_depth_and_remove_unused(ir);
+	} while (progress);
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_group.c mesa-19.0.1/src/freedreno/ir3/ir3_group.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_group.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_group.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "ir3.h"
+
+/*
+ * Find/group instruction neighbors:
+ */
+
+/* bleh.. we need to do the same group_n() thing for both inputs/outputs
+ * (where we have a simple instr[] array), and fanin nodes (where we have
+ * an extra indirection via reg->instr).
+ */
+struct group_ops {
+	struct ir3_instruction *(*get)(void *arr, int idx);
+	void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
+};
+
+static struct ir3_instruction *arr_get(void *arr, int idx)
+{
+	return ((struct ir3_instruction **)arr)[idx];
+}
+static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
+{
+	((struct ir3_instruction **)arr)[idx] =
+			ir3_MOV(instr->block, instr, TYPE_F32);
+}
+static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
+{
+	/* so, we can't insert a mov in front of a meta:in.. and the downstream
+	 * instruction already has a pointer to 'instr'.  So we cheat a bit and
+	 * morph the meta:in instruction into a mov and insert a new meta:in
+	 * in front.
+	 */
+	struct ir3_instruction *in;
+
+	debug_assert(instr->regs_count == 1);
+
+	in = ir3_instr_create(instr->block, OPC_META_INPUT);
+	in->inout.block = instr->block;
+	ir3_reg_create(in, instr->regs[0]->num, 0);
+
+	/* create src reg for meta:in and fixup to now be a mov: */
+	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
+	instr->opc = OPC_MOV;
+	instr->cat1.src_type = TYPE_F32;
+	instr->cat1.dst_type = TYPE_F32;
+
+	((struct ir3_instruction **)arr)[idx] = in;
+}
+static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
+static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
+
+static struct ir3_instruction *instr_get(void *arr, int idx)
+{
+	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
+}
+static void
+instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
+{
+	((struct ir3_instruction *)arr)->regs[idx+1]->instr =
+			ir3_MOV(instr->block, instr, TYPE_F32);
+}
+static struct group_ops instr_ops = { instr_get, instr_insert_mov };
+
+/* verify that cur != instr, but cur is also not in instr's neighbor-list: */
+static bool
+in_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos)
+{
+	int idx = 0;
+
+	if (!instr)
+		return false;
+
+	if (instr == cur)
+		return true;
+
+	for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right)
+		if ((idx++ != pos) && (instr == cur))
+			return true;
+
+	return false;
+}
+
+static void
+group_n(struct group_ops *ops, void *arr, unsigned n)
+{
+	unsigned i, j;
+
+	/* first pass, figure out what has conflicts and needs a mov
+	 * inserted.  Do this up front, before starting to setup
+	 * left/right neighbor pointers.  Trying to do it in a single
+	 * pass could result in a situation where we can't even setup
+	 * the mov's right neighbor ptr if the next instr also needs
+	 * a mov.
+	 */
+restart:
+	for (i = 0; i < n; i++) {
+		struct ir3_instruction *instr = ops->get(arr, i);
+		if (instr) {
+			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+			bool conflict;
+
+			/* check for left/right neighbor conflicts: */
+			conflict = conflicts(instr->cp.left, left) ||
+				conflicts(instr->cp.right, right);
+
+			/* Mixing array elements and higher register classes
+			 * (ie. groups) doesn't really work out in RA.  See:
+			 *
+			 * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag
+			 */
+			if (instr->regs[0]->flags & IR3_REG_ARRAY)
+				conflict = true;
+
+			/* we also can't have an instr twice in the group: */
+			for (j = i + 1; (j < n) && !conflict; j++)
+				if (in_neighbor_list(ops->get(arr, j), instr, i))
+					conflict = true;
+
+			if (conflict) {
+				ops->insert_mov(arr, i, instr);
+				/* inserting the mov may have caused a conflict
+				 * against the previous:
+				 */
+				goto restart;
+			}
+		}
+	}
+
+	/* second pass, now that we've inserted mov's, fixup left/right
+	 * neighbors.  This is guaranteed to succeed, since by definition
+	 * the newly inserted mov's cannot conflict with anything.
+	 */
+	for (i = 0; i < n; i++) {
+		struct ir3_instruction *instr = ops->get(arr, i);
+		if (instr) {
+			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+
+			debug_assert(!conflicts(instr->cp.left, left));
+			if (left) {
+				instr->cp.left_cnt++;
+				instr->cp.left = left;
+			}
+
+			debug_assert(!conflicts(instr->cp.right, right));
+			if (right) {
+				instr->cp.right_cnt++;
+				instr->cp.right = right;
+			}
+		}
+	}
+}
+
+static void
+instr_find_neighbors(struct ir3_instruction *instr)
+{
+	struct ir3_instruction *src;
+
+	if (ir3_instr_check_mark(instr))
+		return;
+
+	if (instr->opc == OPC_META_FI)
+		group_n(&instr_ops, instr, instr->regs_count - 1);
+
+	foreach_ssa_src(src, instr)
+		instr_find_neighbors(src);
+}
+
+/* a bit of sadness.. we can't have "holes" in inputs from PoV of
+ * register assignment, they still need to be grouped together.  So
+ * we need to insert dummy/padding instruction for grouping, and
+ * then take it back out again before anyone notices.
+ */
+static void
+pad_and_group_input(struct ir3_instruction **input, unsigned n)
+{
+	int i, mask = 0;
+	struct ir3_block *block = NULL;
+
+	for (i = n - 1; i >= 0; i--) {
+		struct ir3_instruction *instr = input[i];
+		if (instr) {
+			block = instr->block;
+		} else if (block) {
+			instr = ir3_NOP(block);
+			ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dummy dst */
+			input[i] = instr;
+			mask |= (1 << i);
+		}
+	}
+
+	group_n(&arr_ops_in, input, n);
+
+	for (i = 0; i < n; i++) {
+		if (mask & (1 << i))
+			input[i] = NULL;
+	}
+}
+
+static void
+find_neighbors(struct ir3 *ir)
+{
+	unsigned i;
+
+	/* shader inputs/outputs themselves must be contiguous as well:
+	 *
+	 * NOTE: group inputs first, since we only insert mov's
+	 * *before* the conflicted instr (and that would go badly
+	 * for inputs).  By doing inputs first, we should never
+	 * have a conflict on inputs.. pushing any conflict to
+	 * resolve to the outputs, for stuff like:
+	 *
+	 *     MOV OUT[n], IN[m].wzyx
+	 *
+	 * NOTE: we assume here inputs/outputs are grouped in vec4.
+	 * This logic won't quite cut it if we don't align smaller
+	 * on vec4 boundaries
+	 */
+	for (i = 0; i < ir->ninputs; i += 4)
+		pad_and_group_input(&ir->inputs[i], 4);
+	for (i = 0; i < ir->noutputs; i += 4)
+		group_n(&arr_ops_out, &ir->outputs[i], 4);
+
+	for (i = 0; i < ir->noutputs; i++) {
+		if (ir->outputs[i]) {
+			struct ir3_instruction *instr = ir->outputs[i];
+			instr_find_neighbors(instr);
+		}
+	}
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		for (i = 0; i < block->keeps_count; i++) {
+			struct ir3_instruction *instr = block->keeps[i];
+			instr_find_neighbors(instr);
+		}
+
+		/* We also need to account for if-condition: */
+		if (block->condition)
+			instr_find_neighbors(block->condition);
+	}
+}
+
+void
+ir3_group(struct ir3 *ir)
+{
+	ir3_clear_mark(ir);
+	find_neighbors(ir);
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3.h mesa-19.0.1/src/freedreno/ir3/ir3.h
--- mesa-18.3.3/src/freedreno/ir3/ir3.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1447 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR3_H_
+#define IR3_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "compiler/shader_enums.h"
+
+#include "util/u_debug.h"
+#include "util/list.h"
+
+#include "instr-a3xx.h"
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir3_compiler;
+struct ir3;
+struct ir3_instruction;
+struct ir3_block;
+
+struct ir3_info {
+	uint32_t gpu_id;
+	uint16_t sizedwords;
+	uint16_t instrs_count;   /* expanded to account for rpt's */
+	/* NOTE: max_reg, etc, does not include registers not touched
+	 * by the shader (ie. vertex fetched via VFD_DECODE but not
+	 * touched by shader)
+	 */
+	int8_t   max_reg;   /* highest GPR # used by shader */
+	int8_t   max_half_reg;
+	int16_t  max_const;
+
+	/* number of sync bits: */
+	uint16_t ss, sy;
+};
+
+struct ir3_register {
+	enum {
+		IR3_REG_CONST  = 0x001,
+		IR3_REG_IMMED  = 0x002,
+		IR3_REG_HALF   = 0x004,
+		/* high registers are used for some things in compute shaders,
+		 * for example.  Seems to be for things that are global to all
+		 * threads in a wave, so possibly these are global/shared by
+		 * all the threads in the wave?
+		 */
+		IR3_REG_HIGH   = 0x008,
+		IR3_REG_RELATIV= 0x010,
+		IR3_REG_R      = 0x020,
+		/* Most instructions, it seems, can do float abs/neg but not
+		 * integer.  The CP pass needs to know what is intended (int or
+		 * float) in order to do the right thing.  For this reason the
+		 * abs/neg flags are split out into float and int variants.  In
+		 * addition, .b (bitwise) operations, the negate is actually a
+		 * bitwise not, so split that out into a new flag to make it
+		 * more clear.
+		 */
+		IR3_REG_FNEG   = 0x040,
+		IR3_REG_FABS   = 0x080,
+		IR3_REG_SNEG   = 0x100,
+		IR3_REG_SABS   = 0x200,
+		IR3_REG_BNOT   = 0x400,
+		IR3_REG_EVEN   = 0x800,
+		IR3_REG_POS_INF= 0x1000,
+		/* (ei) flag, end-input?  Set on last bary, presumably to signal
+		 * that the shader needs no more input:
+		 */
+		IR3_REG_EI     = 0x2000,
+		/* meta-flags, for intermediate stages of IR, ie.
+		 * before register assignment is done:
+		 */
+		IR3_REG_SSA    = 0x4000,   /* 'instr' is ptr to assigning instr */
+		IR3_REG_ARRAY  = 0x8000,
+
+	} flags;
+
+	/* normal registers:
+	 * the component is in the low two bits of the reg #, so
+	 * rN.x becomes: (N << 2) | x
+	 */
+	int   num;
+	union {
+		/* immediate: */
+		int32_t  iim_val;
+		uint32_t uim_val;
+		float    fim_val;
+		/* relative: */
+		struct {
+			uint16_t id;
+			int16_t offset;
+		} array;
+	};
+
+	/* For IR3_REG_SSA, src registers contain ptr back to assigning
+	 * instruction.
+	 *
+	 * For IR3_REG_ARRAY, the pointer is back to the last dependent
+	 * array access (although the net effect is the same, it points
+	 * back to a previous instruction that we depend on).
+	 */
+	struct ir3_instruction *instr;
+
+	union {
+		/* used for cat5 instructions, but also for internal/IR level
+		 * tracking of what registers are read/written by an instruction.
+		 * wrmask may be a bad name since it is used to represent both
+		 * src and dst that touch multiple adjacent registers.
+		 */
+		unsigned wrmask;
+		/* for relative addressing, 32bits for array size is too small,
+		 * but otoh we don't need to deal with disjoint sets, so instead
+		 * use a simple size field (number of scalar components).
+		 */
+		unsigned size;
+	};
+};
+
+/*
+ * Stupid/simple growable array implementation:
+ */
+#define DECLARE_ARRAY(type, name) \
+	unsigned name ## _count, name ## _sz; \
+	type * name;
+
+#define array_insert(ctx, arr, val) do { \
+		if (arr ## _count == arr ## _sz) { \
+			arr ## _sz = MAX2(2 * arr ## _sz, 16); \
+			arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \
+		} \
+		arr[arr ##_count++] = val; \
+	} while (0)
+
+struct ir3_instruction {
+	struct ir3_block *block;
+	opc_t opc;
+	enum {
+		/* (sy) flag is set on first instruction, and after sample
+		 * instructions (probably just on RAW hazard).
+		 */
+		IR3_INSTR_SY    = 0x001,
+		/* (ss) flag is set on first instruction, and first instruction
+		 * to depend on the result of "long" instructions (RAW hazard):
+		 *
+		 *   rcp, rsq, log2, exp2, sin, cos, sqrt
+		 *
+		 * It seems to synchronize until all in-flight instructions are
+		 * completed, for example:
+		 *
+		 *   rsq hr1.w, hr1.w
+		 *   add.f hr2.z, (neg)hr2.z, hc0.y
+		 *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
+		 *   rsq hr2.x, hr2.x
+		 *   (rpt1)nop
+		 *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
+		 *   nop
+		 *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
+		 *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
+		 *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
+		 *
+		 * The last mul.f does not have (ss) set, presumably because the
+		 * (ss) on the previous instruction does the job.
+		 *
+		 * The blob driver also seems to set it on WAR hazards, although
+		 * not really clear if this is needed or just blob compiler being
+		 * sloppy.  So far I haven't found a case where removing the (ss)
+		 * causes problems for WAR hazard, but I could just be getting
+		 * lucky:
+		 *
+		 *   rcp r1.y, r3.y
+		 *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
+		 *
+		 */
+		IR3_INSTR_SS    = 0x002,
+		/* (jp) flag is set on jump targets:
+		 */
+		IR3_INSTR_JP    = 0x004,
+		IR3_INSTR_UL    = 0x008,
+		IR3_INSTR_3D    = 0x010,
+		IR3_INSTR_A     = 0x020,
+		IR3_INSTR_O     = 0x040,
+		IR3_INSTR_P     = 0x080,
+		IR3_INSTR_S     = 0x100,
+		IR3_INSTR_S2EN  = 0x200,
+		IR3_INSTR_G     = 0x400,
+		IR3_INSTR_SAT   = 0x800,
+		/* meta-flags, for intermediate stages of IR, ie.
+		 * before register assignment is done:
+		 */
+		IR3_INSTR_MARK  = 0x1000,
+		IR3_INSTR_UNUSED= 0x2000,
+	} flags;
+	int repeat;
+#ifdef DEBUG
+	unsigned regs_max;
+#endif
+	unsigned regs_count;
+	struct ir3_register **regs;
+	union {
+		struct {
+			char inv;
+			char comp;
+			int  immed;
+			struct ir3_block *target;
+		} cat0;
+		struct {
+			type_t src_type, dst_type;
+		} cat1;
+		struct {
+			enum {
+				IR3_COND_LT = 0,
+				IR3_COND_LE = 1,
+				IR3_COND_GT = 2,
+				IR3_COND_GE = 3,
+				IR3_COND_EQ = 4,
+				IR3_COND_NE = 5,
+			} condition;
+		} cat2;
+		struct {
+			unsigned samp, tex;
+			type_t type;
+		} cat5;
+		struct {
+			type_t type;
+			int src_offset;
+			int dst_offset;
+			int iim_val : 3;      /* for ldgb/stgb, # of components */
+			int d : 3;
+			bool typed : 1;
+		} cat6;
+		struct {
+			unsigned w : 1;       /* write */
+			unsigned r : 1;       /* read */
+			unsigned l : 1;       /* local */
+			unsigned g : 1;       /* global */
+		} cat7;
+		/* for meta-instructions, just used to hold extra data
+		 * before instruction scheduling, etc
+		 */
+		struct {
+			int off;              /* component/offset */
+		} fo;
+		struct {
+			struct ir3_block *block;
+		} inout;
+	};
+
+	/* transient values used during various algorithms: */
+	union {
+		/* The instruction depth is the max dependency distance to output.
+		 *
+		 * You can also think of it as the "cost", if we did any sort of
+		 * optimization for register footprint.  Ie. a value that is  just
+		 * result of moving a const to a reg would have a low cost,  so to
+		 * it could make sense to duplicate the instruction at various
+		 * points where the result is needed to reduce register footprint.
+		 */
+		unsigned depth;
+		/* When we get to the RA stage, we no longer need depth, but
+		 * we do need instruction's position/name:
+		 */
+		struct {
+			uint16_t ip;
+			uint16_t name;
+		};
+	};
+
+	/* used for per-pass extra instruction data.
+	 */
+	void *data;
+
+	/* Used during CP and RA stages.  For fanin and shader inputs/
+	 * outputs where we need a sequence of consecutive registers,
+	 * keep track of each src instructions left (ie 'n-1') and right
+	 * (ie 'n+1') neighbor.  The front-end must insert enough mov's
+	 * to ensure that each instruction has at most one left and at
+	 * most one right neighbor.  During the copy-propagation pass,
+	 * we only remove mov's when we can preserve this constraint.
+	 * And during the RA stage, we use the neighbor information to
+	 * allocate a block of registers in one shot.
+	 *
+	 * TODO: maybe just add something like:
+	 *   struct ir3_instruction_ref {
+	 *       struct ir3_instruction *instr;
+	 *       unsigned cnt;
+	 *   }
+	 *
+	 * Or can we get away without the refcnt stuff?  It seems like
+	 * it should be overkill..  the problem is if, potentially after
+	 * already eliminating some mov's, if you have a single mov that
+	 * needs to be grouped with it's neighbors in two different
+	 * places (ex. shader output and a fanin).
+	 */
+	struct {
+		struct ir3_instruction *left, *right;
+		uint16_t left_cnt, right_cnt;
+	} cp;
+
+	/* an instruction can reference at most one address register amongst
+	 * it's src/dst registers.  Beyond that, you need to insert mov's.
+	 *
+	 * NOTE: do not write this directly, use ir3_instr_set_address()
+	 */
+	struct ir3_instruction *address;
+
+	/* Tracking for additional dependent instructions.  Used to handle
+	 * barriers, WAR hazards for arrays/SSBOs/etc.
+	 */
+	DECLARE_ARRAY(struct ir3_instruction *, deps);
+
+	/*
+	 * From PoV of instruction scheduling, not execution (ie. ignores global/
+	 * local distinction):
+	 *                            shared  image  atomic  SSBO  everything
+	 *   barrier()/            -   R/W     R/W    R/W     R/W       X
+	 *     groupMemoryBarrier()
+	 *   memoryBarrier()       -           R/W    R/W
+	 *     (but only images declared coherent?)
+	 *   memoryBarrierAtomic() -                  R/W
+	 *   memoryBarrierBuffer() -                          R/W
+	 *   memoryBarrierImage()  -           R/W
+	 *   memoryBarrierShared() -   R/W
+	 *
+	 * TODO I think for SSBO/image/shared, in cases where we can determine
+	 * which variable is accessed, we don't need to care about accesses to
+	 * different variables (unless declared coherent??)
+	 */
+	enum {
+		IR3_BARRIER_EVERYTHING = 1 << 0,
+		IR3_BARRIER_SHARED_R   = 1 << 1,
+		IR3_BARRIER_SHARED_W   = 1 << 2,
+		IR3_BARRIER_IMAGE_R    = 1 << 3,
+		IR3_BARRIER_IMAGE_W    = 1 << 4,
+		IR3_BARRIER_BUFFER_R   = 1 << 5,
+		IR3_BARRIER_BUFFER_W   = 1 << 6,
+		IR3_BARRIER_ARRAY_R    = 1 << 7,
+		IR3_BARRIER_ARRAY_W    = 1 << 8,
+	} barrier_class, barrier_conflict;
+
+	/* Entry in ir3_block's instruction list: */
+	struct list_head node;
+
+	int use_count;      /* currently just updated/used by cp */
+
+#ifdef DEBUG
+	uint32_t serialno;
+#endif
+};
+
+static inline struct ir3_instruction *
+ir3_neighbor_first(struct ir3_instruction *instr)
+{
+	int cnt = 0;
+	while (instr->cp.left) {
+		instr = instr->cp.left;
+		if (++cnt > 0xffff) {
+			debug_assert(0);
+			break;
+		}
+	}
+	return instr;
+}
+
+static inline int ir3_neighbor_count(struct ir3_instruction *instr)
+{
+	int num = 1;
+
+	debug_assert(!instr->cp.left);
+
+	while (instr->cp.right) {
+		num++;
+		instr = instr->cp.right;
+		if (num > 0xffff) {
+			debug_assert(0);
+			break;
+		}
+	}
+
+	return num;
+}
+
+struct ir3 {
+	struct ir3_compiler *compiler;
+
+	unsigned ninputs, noutputs;
+	struct ir3_instruction **inputs;
+	struct ir3_instruction **outputs;
+
+	/* Track bary.f (and ldlv) instructions.. this is needed in
+	 * scheduling to ensure that all varying fetches happen before
+	 * any potential kill instructions.  The hw gets grumpy if all
+	 * threads in a group are killed before the last bary.f gets
+	 * a chance to signal end of input (ei).
+	 */
+	DECLARE_ARRAY(struct ir3_instruction *, baryfs);
+
+	/* Track all indirect instructions (read and write).  To avoid
+	 * deadlock scenario where an address register gets scheduled,
+	 * but other dependent src instructions cannot be scheduled due
+	 * to dependency on a *different* address register value, the
+	 * scheduler needs to ensure that all dependencies other than
+	 * the instruction other than the address register are scheduled
+	 * before the one that writes the address register.  Having a
+	 * convenient list of instructions that reference some address
+	 * register simplifies this.
+	 */
+	DECLARE_ARRAY(struct ir3_instruction *, indirects);
+
+	/* and same for instructions that consume predicate register: */
+	DECLARE_ARRAY(struct ir3_instruction *, predicates);
+
+	/* Track texture sample instructions which need texture state
+	 * patched in (for astc-srgb workaround):
+	 */
+	DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
+
+	/* List of blocks: */
+	struct list_head block_list;
+
+	/* List of ir3_array's: */
+	struct list_head array_list;
+
+#ifdef DEBUG
+	unsigned block_count, instr_count;
+#endif
+};
+
+struct ir3_array {
+	struct list_head node;
+	unsigned length;
+	unsigned id;
+
+	struct nir_register *r;
+
+	/* To avoid array write's from getting DCE'd, keep track of the
+	 * most recent write.  Any array access depends on the most
+	 * recent write.  This way, nothing depends on writes after the
+	 * last read.  But all the writes that happen before that have
+	 * something depending on them
+	 */
+	struct ir3_instruction *last_write;
+
+	/* extra stuff used in RA pass: */
+	unsigned base;      /* base vreg name */
+	unsigned reg;       /* base physical reg */
+	uint16_t start_ip, end_ip;
+};
+
+struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
+
+struct ir3_block {
+	struct list_head node;
+	struct ir3 *shader;
+
+	const struct nir_block *nblock;
+
+	struct list_head instr_list;  /* list of ir3_instruction */
+
+	/* each block has either one or two successors.. in case of
+	 * two successors, 'condition' decides which one to follow.
+	 * A block preceding an if/else has two successors.
+	 */
+	struct ir3_instruction *condition;
+	struct ir3_block *successors[2];
+
+	unsigned predecessors_count;
+	struct ir3_block **predecessors;
+
+	uint16_t start_ip, end_ip;
+
+	/* Track instructions which do not write a register but other-
+	 * wise must not be discarded (such as kill, stg, etc)
+	 */
+	DECLARE_ARRAY(struct ir3_instruction *, keeps);
+
+	/* used for per-pass extra block data.  Mainly used right
+	 * now in RA step to track livein/liveout.
+	 */
+	void *data;
+
+#ifdef DEBUG
+	uint32_t serialno;
+#endif
+};
+
+static inline uint32_t
+block_id(struct ir3_block *block)
+{
+#ifdef DEBUG
+	return block->serialno;
+#else
+	return (uint32_t)(unsigned long)block;
+#endif
+}
+
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+		unsigned nin, unsigned nout);
+void ir3_destroy(struct ir3 *shader);
+void * ir3_assemble(struct ir3 *shader,
+		struct ir3_info *info, uint32_t gpu_id);
+void * ir3_alloc(struct ir3 *shader, int sz);
+
+struct ir3_block * ir3_block_create(struct ir3 *shader);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
+struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
+		opc_t opc, int nreg);
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep);
+const char *ir3_instr_name(struct ir3_instruction *instr);
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+		int num, int flags);
+struct ir3_register * ir3_reg_clone(struct ir3 *shader,
+		struct ir3_register *reg);
+
+void ir3_instr_set_address(struct ir3_instruction *instr,
+		struct ir3_instruction *addr);
+
+static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
+{
+	if (instr->flags & IR3_INSTR_MARK)
+		return true;  /* already visited */
+	instr->flags |= IR3_INSTR_MARK;
+	return false;
+}
+
+void ir3_block_clear_mark(struct ir3_block *block);
+void ir3_clear_mark(struct ir3 *shader);
+
+unsigned ir3_count_instructions(struct ir3 *ir);
+
+static inline int ir3_instr_regno(struct ir3_instruction *instr,
+		struct ir3_register *reg)
+{
+	unsigned i;
+	for (i = 0; i < instr->regs_count; i++)
+		if (reg == instr->regs[i])
+			return i;
+	return -1;
+}
+
+
+#define MAX_ARRAYS 16
+
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+	return (num << 2) | (comp & 0x3);
+}
+
+static inline uint32_t reg_num(struct ir3_register *reg)
+{
+	return reg->num >> 2;
+}
+
+static inline uint32_t reg_comp(struct ir3_register *reg)
+{
+	return reg->num & 0x3;
+}
+
+static inline bool is_flow(struct ir3_instruction *instr)
+{
+	return (opc_cat(instr->opc) == 0);
+}
+
+static inline bool is_kill(struct ir3_instruction *instr)
+{
+	return instr->opc == OPC_KILL;
+}
+
+static inline bool is_nop(struct ir3_instruction *instr)
+{
+	return instr->opc == OPC_NOP;
+}
+
+/* Is it a non-transformative (ie. not type changing) mov?  This can
+ * also include absneg.s/absneg.f, which for the most part can be
+ * treated as a mov (single src argument).
+ */
+static inline bool is_same_type_mov(struct ir3_instruction *instr)
+{
+	struct ir3_register *dst;
+
+	switch (instr->opc) {
+	case OPC_MOV:
+		if (instr->cat1.src_type != instr->cat1.dst_type)
+			return false;
+		break;
+	case OPC_ABSNEG_F:
+	case OPC_ABSNEG_S:
+		if (instr->flags & IR3_INSTR_SAT)
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	dst = instr->regs[0];
+
+	/* mov's that write to a0.x or p0.x are special: */
+	if (dst->num == regid(REG_P0, 0))
+		return false;
+	if (dst->num == regid(REG_A0, 0))
+		return false;
+
+	if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
+		return false;
+
+	return true;
+}
+
+static inline bool is_alu(struct ir3_instruction *instr)
+{
+	return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
+}
+
+static inline bool is_sfu(struct ir3_instruction *instr)
+{
+	return (opc_cat(instr->opc) == 4);
+}
+
+static inline bool is_tex(struct ir3_instruction *instr)
+{
+	return (opc_cat(instr->opc) == 5);
+}
+
+static inline bool is_mem(struct ir3_instruction *instr)
+{
+	return (opc_cat(instr->opc) == 6);
+}
+
+static inline bool is_barrier(struct ir3_instruction *instr)
+{
+	return (opc_cat(instr->opc) == 7);
+}
+
+static inline bool
+is_store(struct ir3_instruction *instr)
+{
+	/* these instructions, the "destination" register is
+	 * actually a source, the address to store to.
+	 */
+	switch (instr->opc) {
+	case OPC_STG:
+	case OPC_STGB:
+	case OPC_STIB:
+	case OPC_STP:
+	case OPC_STL:
+	case OPC_STLW:
+	case OPC_L2G:
+	case OPC_G2L:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_load(struct ir3_instruction *instr)
+{
+	switch (instr->opc) {
+	case OPC_LDG:
+	case OPC_LDGB:
+	case OPC_LDL:
+	case OPC_LDP:
+	case OPC_L2G:
+	case OPC_LDLW:
+	case OPC_LDC:
+	case OPC_LDLV:
+		/* probably some others too.. */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_input(struct ir3_instruction *instr)
+{
+	/* in some cases, ldlv is used to fetch varying without
+	 * interpolation.. fortunately inloc is the first src
+	 * register in either case
+	 */
+	switch (instr->opc) {
+	case OPC_LDLV:
+	case OPC_BARY_F:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_bool(struct ir3_instruction *instr)
+{
+	switch (instr->opc) {
+	case OPC_CMPS_F:
+	case OPC_CMPS_S:
+	case OPC_CMPS_U:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_meta(struct ir3_instruction *instr)
+{
+	/* TODO how should we count PHI (and maybe fan-in/out) which
+	 * might actually contribute some instructions to the final
+	 * result?
+	 */
+	return (opc_cat(instr->opc) == -1);
+}
+
+static inline bool writes_addr(struct ir3_instruction *instr)
+{
+	if (instr->regs_count > 0) {
+		struct ir3_register *dst = instr->regs[0];
+		return reg_num(dst) == REG_A0;
+	}
+	return false;
+}
+
+static inline bool writes_pred(struct ir3_instruction *instr)
+{
+	if (instr->regs_count > 0) {
+		struct ir3_register *dst = instr->regs[0];
+		return reg_num(dst) == REG_P0;
+	}
+	return false;
+}
+
+/* returns defining instruction for reg */
+/* TODO better name */
+static inline struct ir3_instruction *ssa(struct ir3_register *reg)
+{
+	if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
+		return reg->instr;
+	}
+	return NULL;
+}
+
+static inline bool conflicts(struct ir3_instruction *a,
+		struct ir3_instruction *b)
+{
+	return (a && b) && (a != b);
+}
+
+static inline bool reg_gpr(struct ir3_register *r)
+{
+	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+		return false;
+	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+		return false;
+	return true;
+}
+
+static inline type_t half_type(type_t type)
+{
+	switch (type) {
+	case TYPE_F32: return TYPE_F16;
+	case TYPE_U32: return TYPE_U16;
+	case TYPE_S32: return TYPE_S16;
+	case TYPE_F16:
+	case TYPE_U16:
+	case TYPE_S16:
+		return type;
+	default:
+		assert(0);
+		return ~0;
+	}
+}
+
+/* some cat2 instructions (ie. those which are not float) can embed an
+ * immediate:
+ */
+static inline bool ir3_cat2_int(opc_t opc)
+{
+	switch (opc) {
+	case OPC_ADD_U:
+	case OPC_ADD_S:
+	case OPC_SUB_U:
+	case OPC_SUB_S:
+	case OPC_CMPS_U:
+	case OPC_CMPS_S:
+	case OPC_MIN_U:
+	case OPC_MIN_S:
+	case OPC_MAX_U:
+	case OPC_MAX_S:
+	case OPC_CMPV_U:
+	case OPC_CMPV_S:
+	case OPC_MUL_U:
+	case OPC_MUL_S:
+	case OPC_MULL_U:
+	case OPC_CLZ_S:
+	case OPC_ABSNEG_S:
+	case OPC_AND_B:
+	case OPC_OR_B:
+	case OPC_NOT_B:
+	case OPC_XOR_B:
+	case OPC_BFREV_B:
+	case OPC_CLZ_B:
+	case OPC_SHL_B:
+	case OPC_SHR_B:
+	case OPC_ASHR_B:
+	case OPC_MGEN_B:
+	case OPC_GETBIT_B:
+	case OPC_CBITS_B:
+	case OPC_BARY_F:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+
+/* map cat2 instruction to valid abs/neg flags: */
+static inline unsigned ir3_cat2_absneg(opc_t opc)
+{
+	switch (opc) {
+	case OPC_ADD_F:
+	case OPC_MIN_F:
+	case OPC_MAX_F:
+	case OPC_MUL_F:
+	case OPC_SIGN_F:
+	case OPC_CMPS_F:
+	case OPC_ABSNEG_F:
+	case OPC_CMPV_F:
+	case OPC_FLOOR_F:
+	case OPC_CEIL_F:
+	case OPC_RNDNE_F:
+	case OPC_RNDAZ_F:
+	case OPC_TRUNC_F:
+	case OPC_BARY_F:
+		return IR3_REG_FABS | IR3_REG_FNEG;
+
+	case OPC_ADD_U:
+	case OPC_ADD_S:
+	case OPC_SUB_U:
+	case OPC_SUB_S:
+	case OPC_CMPS_U:
+	case OPC_CMPS_S:
+	case OPC_MIN_U:
+	case OPC_MIN_S:
+	case OPC_MAX_U:
+	case OPC_MAX_S:
+	case OPC_CMPV_U:
+	case OPC_CMPV_S:
+	case OPC_MUL_U:
+	case OPC_MUL_S:
+	case OPC_MULL_U:
+	case OPC_CLZ_S:
+		return 0;
+
+	case OPC_ABSNEG_S:
+		return IR3_REG_SABS | IR3_REG_SNEG;
+
+	case OPC_AND_B:
+	case OPC_OR_B:
+	case OPC_NOT_B:
+	case OPC_XOR_B:
+	case OPC_BFREV_B:
+	case OPC_CLZ_B:
+	case OPC_SHL_B:
+	case OPC_SHR_B:
+	case OPC_ASHR_B:
+	case OPC_MGEN_B:
+	case OPC_GETBIT_B:
+	case OPC_CBITS_B:
+		return IR3_REG_BNOT;
+
+	default:
+		return 0;
+	}
+}
+
+/* map cat3 instructions to valid abs/neg flags: */
+static inline unsigned ir3_cat3_absneg(opc_t opc)
+{
+	switch (opc) {
+	case OPC_MAD_F16:
+	case OPC_MAD_F32:
+	case OPC_SEL_F16:
+	case OPC_SEL_F32:
+		return IR3_REG_FNEG;
+
+	case OPC_MAD_U16:
+	case OPC_MADSH_U16:
+	case OPC_MAD_S16:
+	case OPC_MADSH_M16:
+	case OPC_MAD_U24:
+	case OPC_MAD_S24:
+	case OPC_SEL_S16:
+	case OPC_SEL_S32:
+	case OPC_SAD_S16:
+	case OPC_SAD_S32:
+		/* neg *may* work on 3rd src.. */
+
+	case OPC_SEL_B16:
+	case OPC_SEL_B32:
+
+	default:
+		return 0;
+	}
+}
+
+#define MASK(n) ((1 << (n)) - 1)
+
+/* iterator for an instructions's sources (reg), also returns src #: */
+#define foreach_src_n(__srcreg, __n, __instr) \
+	if ((__instr)->regs_count) \
+		for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
+			if ((__srcreg = (__instr)->regs[__n + 1]))
+
+/* iterator for an instructions's sources (reg): */
+#define foreach_src(__srcreg, __instr) \
+	foreach_src_n(__srcreg, __i, __instr)
+
+static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
+{
+	unsigned cnt = instr->regs_count + instr->deps_count;
+	if (instr->address)
+		cnt++;
+	return cnt;
+}
+
+static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
+{
+	if (n == (instr->regs_count + instr->deps_count))
+		return instr->address;
+	if (n >= instr->regs_count)
+		return instr->deps[n - instr->regs_count];
+	return ssa(instr->regs[n]);
+}
+
+static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
+{
+	if (n == (instr->regs_count + instr->deps_count))
+		return false;
+	if (n >= instr->regs_count)
+		return true;
+	return false;
+}
+
+#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
+
+/* iterator for an instruction's SSA sources (instr), also returns src #: */
+#define foreach_ssa_src_n(__srcinst, __n, __instr) \
+	for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
+		if ((__srcinst = __ssa_src_n(__instr, __n)))
+
+/* iterator for an instruction's SSA sources (instr): */
+#define foreach_ssa_src(__srcinst, __instr) \
+	foreach_ssa_src_n(__srcinst, __i, __instr)
+
+
+/* dump: */
+void ir3_print(struct ir3 *ir);
+void ir3_print_instr(struct ir3_instruction *instr);
+
+/* depth calculation: */
+int ir3_delayslots(struct ir3_instruction *assigner,
+		struct ir3_instruction *consumer, unsigned n);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
+void ir3_depth(struct ir3 *ir);
+
+/* copy-propagate: */
+struct ir3_shader_variant;
+void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
+
+/* group neighbors and insert mov's to resolve conflicts: */
+void ir3_group(struct ir3 *ir);
+
+/* scheduling: */
+void ir3_sched_add_deps(struct ir3 *ir);
+int ir3_sched(struct ir3 *ir);
+
+/* register assignment: */
+struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
+int ir3_ra(struct ir3 *ir3, gl_shader_stage type,
+		bool frag_coord, bool frag_face);
+
+/* legalize: */
+void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary);
+
+/* ************************************************************************* */
+/* instruction helpers */
+
+static inline struct ir3_instruction *
+create_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
+{
+	struct ir3_instruction *mov;
+	unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	mov->cat1.src_type = type;
+	mov->cat1.dst_type = type;
+	ir3_reg_create(mov, 0, flags);
+	ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val;
+
+	return mov;
+}
+
+static inline struct ir3_instruction *
+create_immed(struct ir3_block *block, uint32_t val)
+{
+	return create_immed_typed(block, val, TYPE_U32);
+}
+
+static inline struct ir3_instruction *
+create_uniform(struct ir3_block *block, unsigned n)
+{
+	struct ir3_instruction *mov;
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	/* TODO get types right? */
+	mov->cat1.src_type = TYPE_F32;
+	mov->cat1.dst_type = TYPE_F32;
+	ir3_reg_create(mov, 0, 0);
+	ir3_reg_create(mov, n, IR3_REG_CONST);
+
+	return mov;
+}
+
+static inline struct ir3_instruction *
+create_uniform_indirect(struct ir3_block *block, int n,
+		struct ir3_instruction *address)
+{
+	struct ir3_instruction *mov;
+
+	mov = ir3_instr_create(block, OPC_MOV);
+	mov->cat1.src_type = TYPE_U32;
+	mov->cat1.dst_type = TYPE_U32;
+	ir3_reg_create(mov, 0, 0);
+	ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
+
+	ir3_instr_set_address(mov, address);
+
+	return mov;
+}
+
+/* creates SSA src of correct type (ie. half vs full precision) */
+static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr,
+		struct ir3_instruction *src, unsigned flags)
+{
+	struct ir3_register *reg;
+	if (src->regs[0]->flags & IR3_REG_HALF)
+		flags |= IR3_REG_HALF;
+	reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags);
+	reg->instr = src;
+	return reg;
+}
+
+static inline struct ir3_instruction *
+ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
+{
+	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
+	ir3_reg_create(instr, 0, 0);   /* dst */
+	if (src->regs[0]->flags & IR3_REG_ARRAY) {
+		struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
+		src_reg->array = src->regs[0]->array;
+	} else {
+		__ssa_src(instr, src, 0);
+	}
+	debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
+	instr->cat1.src_type = type;
+	instr->cat1.dst_type = type;
+	return instr;
+}
+
+static inline struct ir3_instruction *
+ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
+		type_t src_type, type_t dst_type)
+{
+	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
+	unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0;
+	unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0;
+
+	debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags);
+
+	ir3_reg_create(instr, 0, dst_flags);   /* dst */
+	__ssa_src(instr, src, 0);
+	instr->cat1.src_type = src_type;
+	instr->cat1.dst_type = dst_type;
+	debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
+	return instr;
+}
+
+static inline struct ir3_instruction *
+ir3_NOP(struct ir3_block *block)
+{
+	return ir3_instr_create(block, OPC_NOP);
+}
+
+#define INSTR0(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block)                                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, OPC_##name);                             \
+	return instr;                                                        \
+}
+
+#define INSTR1(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, OPC_##name);                             \
+	ir3_reg_create(instr, 0, 0);   /* dst */                             \
+	__ssa_src(instr, a, aflags);                                         \
+	return instr;                                                        \
+}
+
+#define INSTR2(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, OPC_##name);                             \
+	ir3_reg_create(instr, 0, 0);   /* dst */                             \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	return instr;                                                        \
+}
+
+#define INSTR3(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags,                      \
+		struct ir3_instruction *c, unsigned cflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, OPC_##name);                             \
+	ir3_reg_create(instr, 0, 0);   /* dst */                             \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	__ssa_src(instr, c, cflags);                                         \
+	return instr;                                                        \
+}
+
+#define INSTR4(name)                                                     \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags,                      \
+		struct ir3_instruction *c, unsigned cflags,                      \
+		struct ir3_instruction *d, unsigned dflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create2(block, OPC_##name, 5);                         \
+	ir3_reg_create(instr, 0, 0);   /* dst */                             \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	__ssa_src(instr, c, cflags);                                         \
+	__ssa_src(instr, d, dflags);                                         \
+	return instr;                                                        \
+}
+
+#define INSTR4F(f, name)                                                 \
+static inline struct ir3_instruction *                                   \
+ir3_##name##_##f(struct ir3_block *block,                                \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags,                      \
+		struct ir3_instruction *c, unsigned cflags,                      \
+		struct ir3_instruction *d, unsigned dflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create2(block, OPC_##name, 5);                         \
+	ir3_reg_create(instr, 0, 0);   /* dst */                             \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	__ssa_src(instr, c, cflags);                                         \
+	__ssa_src(instr, d, dflags);                                         \
+	instr->flags |= IR3_INSTR_##f;                                       \
+	return instr;                                                        \
+}
+
+/* cat0 instructions: */
+INSTR0(BR)
+INSTR0(JUMP)
+INSTR1(KILL)
+INSTR0(END)
+
+/* cat2 instructions, most 2 src but some 1 src: */
+INSTR2(ADD_F)
+INSTR2(MIN_F)
+INSTR2(MAX_F)
+INSTR2(MUL_F)
+INSTR1(SIGN_F)
+INSTR2(CMPS_F)
+INSTR1(ABSNEG_F)
+INSTR2(CMPV_F)
+INSTR1(FLOOR_F)
+INSTR1(CEIL_F)
+INSTR1(RNDNE_F)
+INSTR1(RNDAZ_F)
+INSTR1(TRUNC_F)
+INSTR2(ADD_U)
+INSTR2(ADD_S)
+INSTR2(SUB_U)
+INSTR2(SUB_S)
+INSTR2(CMPS_U)
+INSTR2(CMPS_S)
+INSTR2(MIN_U)
+INSTR2(MIN_S)
+INSTR2(MAX_U)
+INSTR2(MAX_S)
+INSTR1(ABSNEG_S)
+INSTR2(AND_B)
+INSTR2(OR_B)
+INSTR1(NOT_B)
+INSTR2(XOR_B)
+INSTR2(CMPV_U)
+INSTR2(CMPV_S)
+INSTR2(MUL_U)
+INSTR2(MUL_S)
+INSTR2(MULL_U)
+INSTR1(BFREV_B)
+INSTR1(CLZ_S)
+INSTR1(CLZ_B)
+INSTR2(SHL_B)
+INSTR2(SHR_B)
+INSTR2(ASHR_B)
+INSTR2(BARY_F)
+INSTR2(MGEN_B)
+INSTR2(GETBIT_B)
+INSTR1(SETRM)
+INSTR1(CBITS_B)
+INSTR2(SHB)
+INSTR2(MSAD)
+
+/* cat3 instructions: */
+INSTR3(MAD_U16)
+INSTR3(MADSH_U16)
+INSTR3(MAD_S16)
+INSTR3(MADSH_M16)
+INSTR3(MAD_U24)
+INSTR3(MAD_S24)
+INSTR3(MAD_F16)
+INSTR3(MAD_F32)
+INSTR3(SEL_B16)
+INSTR3(SEL_B32)
+INSTR3(SEL_S16)
+INSTR3(SEL_S32)
+INSTR3(SEL_F16)
+INSTR3(SEL_F32)
+INSTR3(SAD_S16)
+INSTR3(SAD_S32)
+
+/* cat4 instructions: */
+INSTR1(RCP)
+INSTR1(RSQ)
+INSTR1(LOG2)
+INSTR1(EXP2)
+INSTR1(SIN)
+INSTR1(COS)
+INSTR1(SQRT)
+
+/* cat5 instructions: */
+INSTR1(DSX)
+INSTR1(DSY)
+
+static inline struct ir3_instruction *
+ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
+		unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
+		struct ir3_instruction *src0, struct ir3_instruction *src1)
+{
+	struct ir3_instruction *sam;
+	struct ir3_register *reg;
+
+	sam = ir3_instr_create(block, opc);
+	sam->flags |= flags;
+	ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
+	if (src0) {
+		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+		reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
+		reg->instr = src0;
+	}
+	if (src1) {
+		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+		reg->instr = src1;
+		reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
+	}
+	sam->cat5.samp = samp;
+	sam->cat5.tex  = tex;
+	sam->cat5.type  = type;
+
+	return sam;
+}
+
+/* cat6 instructions: */
+INSTR2(LDLV)
+INSTR2(LDG)
+INSTR2(LDL)
+INSTR3(STG)
+INSTR3(STL)
+INSTR3(LDGB)
+INSTR4(STGB)
+INSTR4(STIB)
+INSTR1(RESINFO)
+INSTR1(RESFMT)
+INSTR2(ATOMIC_ADD)
+INSTR2(ATOMIC_SUB)
+INSTR2(ATOMIC_XCHG)
+INSTR2(ATOMIC_INC)
+INSTR2(ATOMIC_DEC)
+INSTR2(ATOMIC_CMPXCHG)
+INSTR2(ATOMIC_MIN)
+INSTR2(ATOMIC_MAX)
+INSTR2(ATOMIC_AND)
+INSTR2(ATOMIC_OR)
+INSTR2(ATOMIC_XOR)
+INSTR4F(G, ATOMIC_ADD)
+INSTR4F(G, ATOMIC_SUB)
+INSTR4F(G, ATOMIC_XCHG)
+INSTR4F(G, ATOMIC_INC)
+INSTR4F(G, ATOMIC_DEC)
+INSTR4F(G, ATOMIC_CMPXCHG)
+INSTR4F(G, ATOMIC_MIN)
+INSTR4F(G, ATOMIC_MAX)
+INSTR4F(G, ATOMIC_AND)
+INSTR4F(G, ATOMIC_OR)
+INSTR4F(G, ATOMIC_XOR)
+
+/* cat7 instructions: */
+INSTR0(BAR)
+INSTR0(FENCE)
+
+/* ************************************************************************* */
+/* split this out or find some helper to use.. like main/bitset.h.. */
+
+#include <string.h>
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static inline unsigned regmask_idx(struct ir3_register *reg)
+{
+	unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
+	debug_assert(num < MAX_REG);
+	if (reg->flags & IR3_REG_HALF)
+		num += MAX_REG;
+	return num;
+}
+
+static inline void regmask_init(regmask_t *regmask)
+{
+	memset(regmask, 0, sizeof(*regmask));
+}
+
+static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
+{
+	unsigned idx = regmask_idx(reg);
+	if (reg->flags & IR3_REG_RELATIV) {
+		unsigned i;
+		for (i = 0; i < reg->size; i++, idx++)
+			(*regmask)[idx / 8] |= 1 << (idx % 8);
+	} else {
+		unsigned mask;
+		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+			if (mask & 1)
+				(*regmask)[idx / 8] |= 1 << (idx % 8);
+	}
+}
+
+static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
+{
+	unsigned i;
+	for (i = 0; i < ARRAY_SIZE(*dst); i++)
+		(*dst)[i] = (*a)[i] | (*b)[i];
+}
+
+/* set bits in a if not set in b, conceptually:
+ *   a |= (reg & ~b)
+ */
+static inline void regmask_set_if_not(regmask_t *a,
+		struct ir3_register *reg, regmask_t *b)
+{
+	unsigned idx = regmask_idx(reg);
+	if (reg->flags & IR3_REG_RELATIV) {
+		unsigned i;
+		for (i = 0; i < reg->size; i++, idx++)
+			if (!((*b)[idx / 8] & (1 << (idx % 8))))
+				(*a)[idx / 8] |= 1 << (idx % 8);
+	} else {
+		unsigned mask;
+		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+			if (mask & 1)
+				if (!((*b)[idx / 8] & (1 << (idx % 8))))
+					(*a)[idx / 8] |= 1 << (idx % 8);
+	}
+}
+
+static inline bool regmask_get(regmask_t *regmask,
+		struct ir3_register *reg)
+{
+	unsigned idx = regmask_idx(reg);
+	if (reg->flags & IR3_REG_RELATIV) {
+		unsigned i;
+		for (i = 0; i < reg->size; i++, idx++)
+			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+				return true;
+	} else {
+		unsigned mask;
+		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+			if (mask & 1)
+				if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+					return true;
+	}
+	return false;
+}
+
+/* ************************************************************************* */
+
+#endif /* IR3_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_legalize.c mesa-19.0.1/src/freedreno/ir3/ir3_legalize.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_legalize.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_legalize.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/ralloc.h"
+#include "util/u_math.h"
+
+#include "ir3.h"
+
+/*
+ * Legalize:
+ *
+ * We currently require that scheduling ensures that we have enough nop's
+ * in all the right places.  The legalize step mostly handles fixing up
+ * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
+ * into fewer nop's w/ rpt flag.
+ */
+
+struct ir3_legalize_ctx {
+	int num_samp;
+	bool has_ssbo;
+	int max_bary;
+};
+
+struct ir3_legalize_state {
+	regmask_t needs_ss;
+	regmask_t needs_ss_war;       /* write after read */
+	regmask_t needs_sy;
+};
+
+struct ir3_legalize_block_data {
+	bool valid;
+	struct ir3_legalize_state state;
+};
+
+/* We want to evaluate each block from the position of any other
+ * predecessor block, in order that the flags set are the union of
+ * all possible program paths.
+ *
+ * To do this, we need to know the output state (needs_ss/ss_war/sy)
+ * of all predecessor blocks.  The tricky thing is loops, which mean
+ * that we can't simply recursively process each predecessor block
+ * before legalizing the current block.
+ *
+ * How we handle that is by looping over all the blocks until the
+ * results converge.  If the output state of a given block changes
+ * in a given pass, this means that all successor blocks are not
+ * yet fully legalized.
+ */
+
+static bool
+legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
+{
+	struct ir3_legalize_block_data *bd = block->data;
+
+	if (bd->valid)
+		return false;
+
+	struct ir3_instruction *last_input = NULL;
+	struct ir3_instruction *last_rel = NULL;
+	struct ir3_instruction *last_n = NULL;
+	struct list_head instr_list;
+	struct ir3_legalize_state prev_state = bd->state;
+	struct ir3_legalize_state *state = &bd->state;
+
+	/* our input state is the OR of all predecessor blocks' state: */
+	for (unsigned i = 0; i < block->predecessors_count; i++) {
+		struct ir3_legalize_block_data *pbd = block->predecessors[i]->data;
+		struct ir3_legalize_state *pstate = &pbd->state;
+
+		/* Our input (ss)/(sy) state is based on OR'ing the output
+		 * state of all our predecessor blocks
+		 */
+		regmask_or(&state->needs_ss,
+				&state->needs_ss, &pstate->needs_ss);
+		regmask_or(&state->needs_ss_war,
+				&state->needs_ss_war, &pstate->needs_ss_war);
+		regmask_or(&state->needs_sy,
+				&state->needs_sy, &pstate->needs_sy);
+	}
+
+	/* remove all the instructions from the list, we'll be adding
+	 * them back in as we go
+	 */
+	list_replace(&block->instr_list, &instr_list);
+	list_inithead(&block->instr_list);
+
+	list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
+		struct ir3_register *reg;
+		unsigned i;
+
+		n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
+
+		if (is_meta(n))
+			continue;
+
+		if (is_input(n)) {
+			struct ir3_register *inloc = n->regs[1];
+			assert(inloc->flags & IR3_REG_IMMED);
+			ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
+		}
+
+		if (last_n && is_barrier(last_n))
+			n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+
+		/* NOTE: consider dst register too.. it could happen that
+		 * texture sample instruction (for example) writes some
+		 * components which are unused.  A subsequent instruction
+		 * that writes the same register can race w/ the sam instr
+		 * resulting in undefined results:
+		 */
+		for (i = 0; i < n->regs_count; i++) {
+			reg = n->regs[i];
+
+			if (reg_gpr(reg)) {
+
+				/* TODO: we probably only need (ss) for alu
+				 * instr consuming sfu result.. need to make
+				 * some tests for both this and (sy)..
+				 */
+				if (regmask_get(&state->needs_ss, reg)) {
+					n->flags |= IR3_INSTR_SS;
+					regmask_init(&state->needs_ss_war);
+					regmask_init(&state->needs_ss);
+				}
+
+				if (regmask_get(&state->needs_sy, reg)) {
+					n->flags |= IR3_INSTR_SY;
+					regmask_init(&state->needs_sy);
+				}
+			}
+
+			/* TODO: is it valid to have address reg loaded from a
+			 * relative src (ie. mova a0, c<a0.x+4>)?  If so, the
+			 * last_rel check below should be moved ahead of this:
+			 */
+			if (reg->flags & IR3_REG_RELATIV)
+				last_rel = n;
+		}
+
+		if (n->regs_count > 0) {
+			reg = n->regs[0];
+			if (regmask_get(&state->needs_ss_war, reg)) {
+				n->flags |= IR3_INSTR_SS;
+				regmask_init(&state->needs_ss_war);
+				regmask_init(&state->needs_ss);
+			}
+
+			if (last_rel && (reg->num == regid(REG_A0, 0))) {
+				last_rel->flags |= IR3_INSTR_UL;
+				last_rel = NULL;
+			}
+		}
+
+		/* cat5+ does not have an (ss) bit, if needed we need to
+		 * insert a nop to carry the sync flag.  Would be kinda
+		 * clever if we were aware of this during scheduling, but
+		 * this should be a pretty rare case:
+		 */
+		if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) {
+			struct ir3_instruction *nop;
+			nop = ir3_NOP(block);
+			nop->flags |= IR3_INSTR_SS;
+			n->flags &= ~IR3_INSTR_SS;
+		}
+
+		/* need to be able to set (ss) on first instruction: */
+		if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5))
+			ir3_NOP(block);
+
+		if (is_nop(n) && !list_empty(&block->instr_list)) {
+			struct ir3_instruction *last = list_last_entry(&block->instr_list,
+					struct ir3_instruction, node);
+			if (is_nop(last) && (last->repeat < 5)) {
+				last->repeat++;
+				last->flags |= n->flags;
+				continue;
+			}
+		}
+
+		list_addtail(&n->node, &block->instr_list);
+
+		if (is_sfu(n))
+			regmask_set(&state->needs_ss, n->regs[0]);
+
+		if (is_tex(n)) {
+			/* this ends up being the # of samp instructions.. but that
+			 * is ok, everything else only cares whether it is zero or
+			 * not.  We do this here, rather than when we encounter a
+			 * SAMP decl, because (especially in binning pass shader)
+			 * the samp instruction(s) could get eliminated if the
+			 * result is not used.
+			 */
+			ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1);
+			regmask_set(&state->needs_sy, n->regs[0]);
+		} else if (n->opc == OPC_RESINFO) {
+			regmask_set(&state->needs_ss, n->regs[0]);
+			ir3_NOP(block)->flags |= IR3_INSTR_SS;
+		} else if (is_load(n)) {
+			/* seems like ldlv needs (ss) bit instead??  which is odd but
+			 * makes a bunch of flat-varying tests start working on a4xx.
+			 */
+			if ((n->opc == OPC_LDLV) || (n->opc == OPC_LDL))
+				regmask_set(&state->needs_ss, n->regs[0]);
+			else
+				regmask_set(&state->needs_sy, n->regs[0]);
+		} else if (is_atomic(n->opc)) {
+			if (n->flags & IR3_INSTR_G)
+				regmask_set(&state->needs_sy, n->regs[0]);
+			else
+				regmask_set(&state->needs_ss, n->regs[0]);
+		}
+
+		if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
+			ctx->has_ssbo = true;
+
+		/* both tex/sfu appear to not always immediately consume
+		 * their src register(s):
+		 */
+		if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+			foreach_src(reg, n) {
+				if (reg_gpr(reg))
+					regmask_set(&state->needs_ss_war, reg);
+			}
+		}
+
+		if (is_input(n))
+			last_input = n;
+
+		last_n = n;
+	}
+
+	if (last_input) {
+		/* special hack.. if using ldlv to bypass interpolation,
+		 * we need to insert a dummy bary.f on which we can set
+		 * the (ei) flag:
+		 */
+		if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+			struct ir3_instruction *baryf;
+
+			/* (ss)bary.f (ei)r63.x, 0, r0.x */
+			baryf = ir3_instr_create(block, OPC_BARY_F);
+			baryf->flags |= IR3_INSTR_SS;
+			ir3_reg_create(baryf, regid(63, 0), 0);
+			ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+			ir3_reg_create(baryf, regid(0, 0), 0);
+
+			/* insert the dummy bary.f after last_input: */
+			list_delinit(&baryf->node);
+			list_add(&baryf->node, &last_input->node);
+
+			last_input = baryf;
+		}
+		last_input->regs[0]->flags |= IR3_REG_EI;
+	}
+
+	if (last_rel)
+		last_rel->flags |= IR3_INSTR_UL;
+
+	bd->valid = true;
+
+	if (memcmp(&prev_state, state, sizeof(*state))) {
+		/* our output state changed, this invalidates all of our
+		 * successors:
+		 */
+		for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
+			if (!block->successors[i])
+				break;
+			struct ir3_legalize_block_data *pbd = block->successors[i]->data;
+			pbd->valid = false;
+		}
+	}
+
+	return true;
+}
+
+/* NOTE: branch instructions are always the last instruction(s)
+ * in the block.  We take advantage of this as we resolve the
+ * branches, since "if (foo) break;" constructs turn into
+ * something like:
+ *
+ *   block3 {
+ *   	...
+ *   	0029:021: mov.s32s32 r62.x, r1.y
+ *   	0082:022: br !p0.x, target=block5
+ *   	0083:023: br p0.x, target=block4
+ *   	// succs: if _[0029:021: mov.s32s32] block4; else block5;
+ *   }
+ *   block4 {
+ *   	0084:024: jump, target=block6
+ *   	// succs: block6;
+ *   }
+ *   block5 {
+ *   	0085:025: jump, target=block7
+ *   	// succs: block7;
+ *   }
+ *
+ * ie. only instruction in block4/block5 is a jump, so when
+ * resolving branches we can easily detect this by checking
+ * that the first instruction in the target block is itself
+ * a jump, and setup the br directly to the jump's target
+ * (and strip back out the now unreached jump)
+ *
+ * TODO sometimes we end up with things like:
+ *
+ *    br !p0.x, #2
+ *    br p0.x, #12
+ *    add.u r0.y, r0.y, 1
+ *
+ * If we swapped the order of the branches, we could drop one.
+ */
+static struct ir3_block *
+resolve_dest_block(struct ir3_block *block)
+{
+	/* special case for last block: */
+	if (!block->successors[0])
+		return block;
+
+	/* NOTE that we may or may not have inserted the jump
+	 * in the target block yet, so conditions to resolve
+	 * the dest to the dest block's successor are:
+	 *
+	 *   (1) successor[1] == NULL &&
+	 *   (2) (block-is-empty || only-instr-is-jump)
+	 */
+	if (block->successors[1] == NULL) {
+		if (list_empty(&block->instr_list)) {
+			return block->successors[0];
+		} else if (list_length(&block->instr_list) == 1) {
+			struct ir3_instruction *instr = list_first_entry(
+					&block->instr_list, struct ir3_instruction, node);
+			if (instr->opc == OPC_JUMP)
+				return block->successors[0];
+		}
+	}
+	return block;
+}
+
+static bool
+resolve_jump(struct ir3_instruction *instr)
+{
+	struct ir3_block *tblock =
+		resolve_dest_block(instr->cat0.target);
+	struct ir3_instruction *target;
+
+	if (tblock != instr->cat0.target) {
+		list_delinit(&instr->cat0.target->node);
+		instr->cat0.target = tblock;
+		return true;
+	}
+
+	target = list_first_entry(&tblock->instr_list,
+				struct ir3_instruction, node);
+
+	/* TODO maybe a less fragile way to do this.  But we are expecting
+	 * a pattern from sched_block() that looks like:
+	 *
+	 *   br !p0.x, #else-block
+	 *   br p0.x, #if-block
+	 *
+	 * if the first branch target is +2, or if 2nd branch target is +1
+	 * then we can just drop the jump.
+	 */
+	unsigned next_block;
+	if (instr->cat0.inv == true)
+		next_block = 2;
+	else
+		next_block = 1;
+
+	if ((!target) || (target->ip == (instr->ip + next_block))) {
+		list_delinit(&instr->node);
+		return true;
+	} else {
+		instr->cat0.immed =
+			(int)target->ip - (int)instr->ip;
+	}
+	return false;
+}
+
+/* resolve jumps, removing jumps/branches to immediately following
+ * instruction which we end up with from earlier stages.  Since
+ * removing an instruction can invalidate earlier instruction's
+ * branch offsets, we need to do this iteratively until no more
+ * branches are removed.
+ */
+static bool
+resolve_jumps(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+			if (is_flow(instr) && instr->cat0.target)
+				if (resolve_jump(instr))
+					return true;
+
+	return false;
+}
+
+/* we want to mark points where divergent flow control re-converges
+ * with (jp) flags.  For now, since we don't do any optimization for
+ * things that start out as a 'do {} while()', re-convergence points
+ * will always be a branch or jump target.  Note that this is overly
+ * conservative, since unconditional jump targets are not convergence
+ * points, we are just assuming that the other path to reach the jump
+ * target was divergent.  If we were clever enough to optimize the
+ * jump at end of a loop back to a conditional branch into a single
+ * conditional branch, ie. like:
+ *
+ *    add.f r1.w, r0.x, (neg)(r)c2.x   <= loop start
+ *    mul.f r1.z, r1.z, r0.x
+ *    mul.f r1.y, r1.y, r0.x
+ *    mul.f r0.z, r1.x, r0.x
+ *    mul.f r0.w, r0.y, r0.x
+ *    cmps.f.ge r0.x, (r)c2.y, (r)r1.w
+ *    add.s r0.x, (r)r0.x, (r)-1
+ *    sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x
+ *    cmps.f.eq p0.x, r0.x, c3.y
+ *    mov.f32f32 r0.x, r1.w
+ *    mov.f32f32 r0.y, r0.w
+ *    mov.f32f32 r1.x, r0.z
+ *    (rpt2)nop
+ *    br !p0.x, #-13
+ *    (jp)mul.f r0.x, c263.y, r1.y
+ *
+ * Then we'd have to be more clever, as the convergence point is no
+ * longer a branch or jump target.
+ */
+static void
+mark_convergence_points(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			if (is_flow(instr) && instr->cat0.target) {
+				struct ir3_instruction *target =
+					list_first_entry(&instr->cat0.target->instr_list,
+							struct ir3_instruction, node);
+				target->flags |= IR3_INSTR_JP;
+			}
+		}
+	}
+}
+
+void
+ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary)
+{
+	struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
+	bool progress;
+
+	ctx->max_bary = -1;
+
+	/* allocate per-block data: */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		block->data = rzalloc(ctx, struct ir3_legalize_block_data);
+	}
+
+	/* process each block: */
+	do {
+		progress = false;
+		list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+			progress |= legalize_block(ctx, block);
+		}
+	} while (progress);
+
+	*num_samp = ctx->num_samp;
+	*has_ssbo = ctx->has_ssbo;
+	*max_bary = ctx->max_bary;
+
+	do {
+		ir3_count_instructions(ir);
+	} while(resolve_jumps(ir));
+
+	mark_convergence_points(ir);
+
+	ralloc_free(ctx);
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir.c mesa-19.0.1/src/freedreno/ir3/ir3_nir.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_nir.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "util/debug.h"
+
+#include "ir3_nir.h"
+#include "ir3_compiler.h"
+#include "ir3_shader.h"
+
+static const nir_shader_compiler_options options = {
+		.lower_fpow = true,
+		.lower_scmp = true,
+		.lower_flrp32 = true,
+		.lower_flrp64 = true,
+		.lower_ffract = true,
+		.lower_fmod32 = true,
+		.lower_fmod64 = true,
+		.lower_fdiv = true,
+		.lower_ldexp = true,
+		.fuse_ffma = true,
+		.native_integers = true,
+		.vertex_id_zero_based = true,
+		.lower_extract_byte = true,
+		.lower_extract_word = true,
+		.lower_all_io_to_temps = true,
+		.lower_helper_invocation = true,
+};
+
+const nir_shader_compiler_options *
+ir3_get_compiler_options(struct ir3_compiler *compiler)
+{
+	return &options;
+}
+
+/* for given shader key, are any steps handled in nir? */
+bool
+ir3_key_lowers_nir(const struct ir3_shader_key *key)
+{
+	return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
+			key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
+			key->ucp_enables | key->color_two_side |
+			key->fclamp_color | key->vclamp_color;
+}
+
+#define OPT(nir, pass, ...) ({                             \
+   bool this_progress = false;                             \
+   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
+   this_progress;                                          \
+})
+
+#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
+
+static void
+ir3_optimize_loop(nir_shader *s)
+{
+	bool progress;
+	do {
+		progress = false;
+
+		OPT_V(s, nir_lower_vars_to_ssa);
+		progress |= OPT(s, nir_opt_copy_prop_vars);
+		progress |= OPT(s, nir_opt_dead_write_vars);
+		progress |= OPT(s, nir_lower_alu_to_scalar);
+		progress |= OPT(s, nir_lower_phis_to_scalar);
+
+		progress |= OPT(s, nir_copy_prop);
+		progress |= OPT(s, nir_opt_dce);
+		progress |= OPT(s, nir_opt_cse);
+		static int gcm = -1;
+		if (gcm == -1)
+			gcm = env_var_as_unsigned("GCM", 0);
+		if (gcm == 1)
+			progress |= OPT(s, nir_opt_gcm, true);
+		else if (gcm == 2)
+			progress |= OPT(s, nir_opt_gcm, false);
+		progress |= OPT(s, nir_opt_peephole_select, 16, true);
+		progress |= OPT(s, nir_opt_intrinsics);
+		progress |= OPT(s, nir_opt_algebraic);
+		progress |= OPT(s, nir_opt_constant_folding);
+		progress |= OPT(s, nir_opt_dead_cf);
+		if (OPT(s, nir_opt_trivial_continues)) {
+			progress |= true;
+			/* If nir_opt_trivial_continues makes progress, then we need to clean
+			 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
+			 * to make progress.
+			 */
+			OPT(s, nir_copy_prop);
+			OPT(s, nir_opt_dce);
+		}
+		progress |= OPT(s, nir_opt_if);
+		progress |= OPT(s, nir_opt_remove_phis);
+		progress |= OPT(s, nir_opt_undef);
+
+	} while (progress);
+}
+
+struct nir_shader *
+ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
+		const struct ir3_shader_key *key)
+{
+	struct nir_lower_tex_options tex_options = {
+			.lower_rect = 0,
+	};
+
+	if (key) {
+		switch (shader->type) {
+		case MESA_SHADER_FRAGMENT:
+			tex_options.saturate_s = key->fsaturate_s;
+			tex_options.saturate_t = key->fsaturate_t;
+			tex_options.saturate_r = key->fsaturate_r;
+			break;
+		case MESA_SHADER_VERTEX:
+			tex_options.saturate_s = key->vsaturate_s;
+			tex_options.saturate_t = key->vsaturate_t;
+			tex_options.saturate_r = key->vsaturate_r;
+			break;
+		default:
+			/* TODO */
+			break;
+		}
+	}
+
+	if (shader->compiler->gpu_id >= 400) {
+		/* a4xx seems to have *no* sam.p */
+		tex_options.lower_txp = ~0;  /* lower all txp */
+	} else {
+		/* a3xx just needs to avoid sam.p for 3d tex */
+		tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
+	}
+
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(s, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	OPT_V(s, nir_opt_global_to_local);
+	OPT_V(s, nir_lower_regs_to_ssa);
+
+	if (key) {
+		if (s->info.stage == MESA_SHADER_VERTEX) {
+			OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false);
+			if (key->vclamp_color)
+				OPT_V(s, nir_lower_clamp_color_outputs);
+		} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
+			OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
+			if (key->fclamp_color)
+				OPT_V(s, nir_lower_clamp_color_outputs);
+		}
+		if (key->color_two_side) {
+			OPT_V(s, nir_lower_two_sided_color);
+		}
+	} else {
+		/* only want to do this the first time (when key is null)
+		 * and not again on any potential 2nd variant lowering pass:
+		 */
+		OPT_V(s, ir3_nir_apply_trig_workarounds);
+	}
+
+	OPT_V(s, nir_lower_tex, &tex_options);
+	OPT_V(s, nir_lower_load_const_to_scalar);
+	if (shader->compiler->gpu_id < 500)
+		OPT_V(s, ir3_nir_lower_tg4_to_tex);
+
+	ir3_optimize_loop(s);
+
+	/* do idiv lowering after first opt loop to give a chance for
+	 * divide by immed power-of-two to be caught first:
+	 */
+	if (OPT(s, nir_lower_idiv))
+		ir3_optimize_loop(s);
+
+	OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
+
+	OPT_V(s, nir_move_load_const);
+
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(s, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	nir_sweep(s);
+
+	return s;
+}
+
+void
+ir3_nir_scan_driver_consts(nir_shader *shader,
+		struct ir3_driver_const_layout *layout)
+{
+	nir_foreach_function(function, shader) {
+		if (!function->impl)
+			continue;
+
+		nir_foreach_block(block, function->impl) {
+			nir_foreach_instr(instr, block) {
+				if (instr->type != nir_instr_type_intrinsic)
+					continue;
+
+				nir_intrinsic_instr *intr =
+					nir_instr_as_intrinsic(instr);
+				unsigned idx;
+
+				switch (intr->intrinsic) {
+				case nir_intrinsic_get_buffer_size:
+					idx = nir_src_as_const_value(intr->src[0])->u32[0];
+					if (layout->ssbo_size.mask & (1 << idx))
+						break;
+					layout->ssbo_size.mask |= (1 << idx);
+					layout->ssbo_size.off[idx] =
+						layout->ssbo_size.count;
+					layout->ssbo_size.count += 1; /* one const per */
+					break;
+				case nir_intrinsic_image_deref_atomic_add:
+				case nir_intrinsic_image_deref_atomic_min:
+				case nir_intrinsic_image_deref_atomic_max:
+				case nir_intrinsic_image_deref_atomic_and:
+				case nir_intrinsic_image_deref_atomic_or:
+				case nir_intrinsic_image_deref_atomic_xor:
+				case nir_intrinsic_image_deref_atomic_exchange:
+				case nir_intrinsic_image_deref_atomic_comp_swap:
+				case nir_intrinsic_image_deref_store:
+				case nir_intrinsic_image_deref_size:
+					idx = nir_intrinsic_get_var(intr, 0)->data.driver_location;
+					if (layout->image_dims.mask & (1 << idx))
+						break;
+					layout->image_dims.mask |= (1 << idx);
+					layout->image_dims.off[idx] =
+						layout->image_dims.count;
+					layout->image_dims.count += 3; /* three const per */
+					break;
+				default:
+					break;
+				}
+			}
+		}
+	}
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir.h mesa-19.0.1/src/freedreno/ir3/ir3_nir.h
--- mesa-18.3.3/src/freedreno/ir3/ir3_nir.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_nir.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_NIR_H_
+#define IR3_NIR_H_
+
+#include "compiler/nir/nir.h"
+#include "compiler/shader_enums.h"
+
+#include "ir3_shader.h"
+
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+
+bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
+bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
+
+const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
+bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
+struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
+		const struct ir3_shader_key *key);
+
+#endif /* IR3_NIR_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c mesa-19.0.1/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "ir3_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
+ * gather results, rather than before. As a result, it must be emulated with
+ * direct texture calls.
+ */
+
+static bool
+lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx)
+{
+	bool progress = false;
+
+	static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
+
+	nir_foreach_instr_safe(instr, block) {
+		if (instr->type != nir_instr_type_tex)
+			continue;
+
+		nir_tex_instr *tg4 = (nir_tex_instr *)instr;
+
+		if (tg4->op != nir_texop_tg4)
+			continue;
+
+		b->cursor = nir_before_instr(&tg4->instr);
+
+		nir_ssa_def *results[4];
+		int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
+		for (int i = 0; i < 4; i++) {
+			int num_srcs = tg4->num_srcs + 1 /* lod */;
+			if (offset_index < 0 && i < 3)
+				num_srcs++;
+
+			nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
+			tex->op = nir_texop_txl;
+			tex->sampler_dim = tg4->sampler_dim;
+			tex->coord_components = tg4->coord_components;
+			tex->is_array = tg4->is_array;
+			tex->is_shadow = tg4->is_shadow;
+			tex->is_new_style_shadow = tg4->is_new_style_shadow;
+			tex->texture_index = tg4->texture_index;
+			tex->sampler_index = tg4->sampler_index;
+			tex->dest_type = tg4->dest_type;
+
+			for (int j = 0; j < tg4->num_srcs; j++) {
+				nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
+				tex->src[j].src_type = tg4->src[j].src_type;
+			}
+			if (i != 3) {
+				nir_ssa_def *offset =
+					nir_vec2(b, nir_imm_int(b, offsets[i][0]),
+							 nir_imm_int(b, offsets[i][1]));
+				if (offset_index < 0) {
+					tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
+					tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
+				} else {
+					assert(nir_tex_instr_src_size(tex, offset_index) == 2);
+					nir_ssa_def *orig = nir_ssa_for_src(
+							b, tex->src[offset_index].src, 2);
+					tex->src[offset_index].src =
+						nir_src_for_ssa(nir_iadd(b, orig, offset));
+				}
+			}
+			tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
+			tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
+
+			nir_ssa_dest_init(&tex->instr, &tex->dest,
+							  nir_tex_instr_dest_size(tex), 32, NULL);
+			nir_builder_instr_insert(b, &tex->instr);
+
+			results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
+		}
+
+		nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]);
+		nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result));
+
+		nir_instr_remove(&tg4->instr);
+
+		progress = true;
+	}
+
+	return progress;
+}
+
+static bool
+lower_tg4_func(nir_function_impl *impl)
+{
+	void *mem_ctx = ralloc_parent(impl);
+	nir_builder b;
+	nir_builder_init(&b, impl);
+
+	bool progress = false;
+	nir_foreach_block_safe(block, impl) {
+		progress |= lower_tg4(block, &b, mem_ctx);
+	}
+
+	if (progress)
+		nir_metadata_preserve(impl, nir_metadata_block_index |
+									nir_metadata_dominance);
+
+	return progress;
+}
+
+bool
+ir3_nir_lower_tg4_to_tex(nir_shader *shader)
+{
+	bool progress = false;
+
+	nir_foreach_function(function, shader) {
+		if (function->impl)
+			progress |= lower_tg4_func(function->impl);
+	}
+
+	return progress;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir_trig.py mesa-19.0.1/src/freedreno/ir3/ir3_nir_trig.py
--- mesa-18.3.3/src/freedreno/ir3/ir3_nir_trig.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_nir_trig.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,51 @@
+#
+# Copyright (C) 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+from __future__ import print_function
+
+import argparse
+import sys
+
+trig_workarounds = [
+   (('fsin', 'x'), ('fsin', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))),
+   (('fcos', 'x'), ('fcos', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))),
+]
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--import-path', required=True)
+    args = parser.parse_args()
+    sys.path.insert(0, args.import_path)
+    run()
+
+
+def run():
+    import nir_algebraic  # pylint: disable=import-error
+
+    print('#include "ir3_nir.h"')
+    print(nir_algebraic.AlgebraicPass("ir3_nir_apply_trig_workarounds",
+                                      trig_workarounds).render())
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_print.c mesa-19.0.1/src/freedreno/ir3/ir3_print.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_print.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_print.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "ir3.h"
+
+#define PTRID(x) ((unsigned long)(x))
+
+static void print_instr_name(struct ir3_instruction *instr)
+{
+	if (!instr)
+		return;
+#ifdef DEBUG
+	printf("%04u:", instr->serialno);
+#endif
+	printf("%04u:", instr->name);
+	printf("%04u:", instr->ip);
+	printf("%03u: ", instr->depth);
+
+	if (instr->flags & IR3_INSTR_SY)
+		printf("(sy)");
+	if (instr->flags & IR3_INSTR_SS)
+		printf("(ss)");
+
+	if (is_meta(instr)) {
+		switch (instr->opc) {
+		case OPC_META_INPUT:  printf("_meta:in");   break;
+		case OPC_META_FO:     printf("_meta:fo");   break;
+		case OPC_META_FI:     printf("_meta:fi");   break;
+
+		/* shouldn't hit here.. just for debugging: */
+		default: printf("_meta:%d", instr->opc);    break;
+		}
+	} else if (instr->opc == OPC_MOV) {
+		static const char *type[] = {
+				[TYPE_F16] = "f16",
+				[TYPE_F32] = "f32",
+				[TYPE_U16] = "u16",
+				[TYPE_U32] = "u32",
+				[TYPE_S16] = "s16",
+				[TYPE_S32] = "s32",
+				[TYPE_U8]  = "u8",
+				[TYPE_S8]  = "s8",
+		};
+		if (instr->cat1.src_type == instr->cat1.dst_type)
+			printf("mov");
+		else
+			printf("cov");
+		printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
+	} else {
+		printf("%s", ir3_instr_name(instr));
+		if (instr->flags & IR3_INSTR_3D)
+			printf(".3d");
+		if (instr->flags & IR3_INSTR_A)
+			printf(".a");
+		if (instr->flags & IR3_INSTR_O)
+			printf(".o");
+		if (instr->flags & IR3_INSTR_P)
+			printf(".p");
+		if (instr->flags & IR3_INSTR_S)
+			printf(".s");
+		if (instr->flags & IR3_INSTR_S2EN)
+			printf(".s2en");
+	}
+}
+
+static void print_reg_name(struct ir3_register *reg)
+{
+	if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
+			(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
+		printf("(absneg)");
+	else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
+		printf("(neg)");
+	else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
+		printf("(abs)");
+
+	if (reg->flags & IR3_REG_IMMED) {
+		printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
+	} else if (reg->flags & IR3_REG_ARRAY) {
+		printf("arr[id=%u, offset=%d, size=%u", reg->array.id,
+				reg->array.offset, reg->size);
+		/* for ARRAY we could have null src, for example first write
+		 * instruction..
+		 */
+		if (reg->instr) {
+			printf(", _[");
+			print_instr_name(reg->instr);
+			printf("]");
+		}
+		printf("]");
+	} else if (reg->flags & IR3_REG_SSA) {
+		printf("_[");
+		print_instr_name(reg->instr);
+		printf("]");
+	} else if (reg->flags & IR3_REG_RELATIV) {
+		if (reg->flags & IR3_REG_HALF)
+			printf("h");
+		if (reg->flags & IR3_REG_CONST)
+			printf("c<a0.x + %d>", reg->array.offset);
+		else
+			printf("\x1b[0;31mr<a0.x + %d>\x1b[0m (%u)", reg->array.offset, reg->size);
+	} else {
+		if (reg->flags & IR3_REG_HALF)
+			printf("h");
+		if (reg->flags & IR3_REG_CONST)
+			printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
+		else
+			printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
+	}
+}
+
+static void
+tab(int lvl)
+{
+	for (int i = 0; i < lvl; i++)
+		printf("\t");
+}
+
+static void
+print_instr(struct ir3_instruction *instr, int lvl)
+{
+	unsigned i;
+
+	tab(lvl);
+
+	print_instr_name(instr);
+	for (i = 0; i < instr->regs_count; i++) {
+		struct ir3_register *reg = instr->regs[i];
+		printf(i ? ", " : " ");
+		print_reg_name(reg);
+	}
+
+	if (instr->address) {
+		printf(", address=_");
+		printf("[");
+		print_instr_name(instr->address);
+		printf("]");
+	}
+
+	if (instr->cp.left) {
+		printf(", left=_");
+		printf("[");
+		print_instr_name(instr->cp.left);
+		printf("]");
+	}
+
+	if (instr->cp.right) {
+		printf(", right=_");
+		printf("[");
+		print_instr_name(instr->cp.right);
+		printf("]");
+	}
+
+	if (instr->opc == OPC_META_FO) {
+		printf(", off=%d", instr->fo.off);
+	}
+
+	if (is_flow(instr) && instr->cat0.target) {
+		/* the predicate register src is implied: */
+		if (instr->opc == OPC_BR) {
+			printf(" %sp0.x", instr->cat0.inv ? "!" : "");
+		}
+		printf(", target=block%u", block_id(instr->cat0.target));
+	}
+
+	if (instr->deps_count) {
+		printf(", false-deps:");
+		for (unsigned i = 0; i < instr->deps_count; i++) {
+			if (i > 0)
+				printf(", ");
+			printf("_[");
+			print_instr_name(instr->deps[i]);
+			printf("]");
+		}
+	}
+
+	printf("\n");
+}
+
+void ir3_print_instr(struct ir3_instruction *instr)
+{
+	print_instr(instr, 0);
+}
+
+static void
+print_block(struct ir3_block *block, int lvl)
+{
+	tab(lvl); printf("block%u {\n", block_id(block));
+
+	if (block->predecessors_count > 0) {
+		tab(lvl+1);
+		printf("pred: ");
+		for (unsigned i = 0; i < block->predecessors_count; i++) {
+			if (i)
+				printf(", ");
+			printf("block%u", block_id(block->predecessors[i]));
+		}
+		printf("\n");
+	}
+
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		print_instr(instr, lvl+1);
+	}
+
+	tab(lvl+1); printf("/* keeps:\n");
+	for (unsigned i = 0; i < block->keeps_count; i++) {
+		print_instr(block->keeps[i], lvl+2);
+	}
+	tab(lvl+1); printf(" */\n");
+
+	if (block->successors[1]) {
+		/* leading into if/else: */
+		tab(lvl+1);
+		printf("/* succs: if _[");
+		print_instr_name(block->condition);
+		printf("] block%u; else block%u; */\n",
+				block_id(block->successors[0]),
+				block_id(block->successors[1]));
+	} else if (block->successors[0]) {
+		tab(lvl+1);
+		printf("/* succs: block%u; */\n",
+				block_id(block->successors[0]));
+	}
+	tab(lvl); printf("}\n");
+}
+
+void
+ir3_print(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+		print_block(block, 0);
+
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		if (!ir->outputs[i])
+			continue;
+		printf("out%d: ", i);
+		print_instr(ir->outputs[i], 0);
+	}
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_ra.c mesa-19.0.1/src/freedreno/ir3/ir3_ra.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_ra.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_ra.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1128 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_math.h"
+#include "util/register_allocate.h"
+#include "util/ralloc.h"
+#include "util/bitset.h"
+
+#include "ir3.h"
+#include "ir3_compiler.h"
+
+/*
+ * Register Assignment:
+ *
+ * Uses the register_allocate util, which implements graph coloring
+ * algo with interference classes.  To handle the cases where we need
+ * consecutive registers (for example, texture sample instructions),
+ * we model these as larger (double/quad/etc) registers which conflict
+ * with the corresponding registers in other classes.
+ *
+ * Additionally we create additional classes for half-regs, which
+ * do not conflict with the full-reg classes.  We do need at least
+ * sizes 1-4 (to deal w/ texture sample instructions output to half-
+ * reg).  At the moment we don't create the higher order half-reg
+ * classes as half-reg frequently does not have enough precision
+ * for texture coords at higher resolutions.
+ *
+ * There are some additional cases that we need to handle specially,
+ * as the graph coloring algo doesn't understand "partial writes".
+ * For example, a sequence like:
+ *
+ *   add r0.z, ...
+ *   sam (f32)(xy)r0.x, ...
+ *   ...
+ *   sam (f32)(xyzw)r0.w, r0.x, ...  ; 3d texture, so r0.xyz are coord
+ *
+ * In this scenario, we treat r0.xyz as class size 3, which is written
+ * (from a use/def perspective) at the 'add' instruction and ignore the
+ * subsequent partial writes to r0.xy.  So the 'add r0.z, ...' is the
+ * defining instruction, as it is the first to partially write r0.xyz.
+ *
+ * Note i965 has a similar scenario, which they solve with a virtual
+ * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
+ * register assignment.  But for us that is horrible from a scheduling
+ * standpoint.  Instead what we do is use idea of 'definer' instruction.
+ * Ie. the first instruction (lowest ip) to write to the variable is the
+ * one we consider from use/def perspective when building interference
+ * graph.  (Other instructions which write other variable components
+ * just define the variable some more.)
+ *
+ * Arrays of arbitrary size are handled via pre-coloring a consecutive
+ * sequence of registers.  Additional scalar (single component) reg
+ * names are allocated starting at ctx->class_base[total_class_count]
+ * (see arr->base), which are pre-colored.  In the use/def graph direct
+ * access is treated as a single element use/def, and indirect access
+ * is treated as use or def of all array elements.  (Only the first
+ * def is tracked, in case of multiple indirect writes, etc.)
+ *
+ * TODO arrays that fit in one of the pre-defined class sizes should
+ * not need to be pre-colored, but instead could be given a normal
+ * vreg name.  (Ignoring this for now since it is a good way to work
+ * out the kinks with arbitrary sized arrays.)
+ *
+ * TODO might be easier for debugging to split this into two passes,
+ * the first assigning vreg names in a way that we could ir3_print()
+ * the result.
+ */
+
+static const unsigned class_sizes[] = {
+	1, 2, 3, 4,
+	4 + 4, /* txd + 1d/2d */
+	4 + 6, /* txd + 3d */
+};
+#define class_count ARRAY_SIZE(class_sizes)
+
+static const unsigned half_class_sizes[] = {
+	1, 2, 3, 4,
+};
+#define half_class_count  ARRAY_SIZE(half_class_sizes)
+
+/* seems to just be used for compute shaders?  Seems like vec1 and vec3
+ * are sufficient (for now?)
+ */
+static const unsigned high_class_sizes[] = {
+	1, 3,
+};
+#define high_class_count ARRAY_SIZE(high_class_sizes)
+
+#define total_class_count (class_count + half_class_count + high_class_count)
+
+/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
+#define NUM_REGS             (4 * 48)  /* r0 to r47 */
+#define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
+#define FIRST_HIGH_REG       (4 * 48)
+/* Number of virtual regs in a given class: */
+#define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
+#define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
+#define HIGH_CLASS_REGS(i)   (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
+
+#define HALF_OFFSET          (class_count)
+#define HIGH_OFFSET          (class_count + half_class_count)
+
+/* register-set, created one time, used for all shaders: */
+struct ir3_ra_reg_set {
+	struct ra_regs *regs;
+	unsigned int classes[class_count];
+	unsigned int half_classes[half_class_count];
+	unsigned int high_classes[high_class_count];
+	/* maps flat virtual register space to base gpr: */
+	uint16_t *ra_reg_to_gpr;
+	/* maps cls,gpr to flat virtual register space: */
+	uint16_t **gpr_to_ra_reg;
+};
+
+static void
+build_q_values(unsigned int **q_values, unsigned off,
+		const unsigned *sizes, unsigned count)
+{
+	for (unsigned i = 0; i < count; i++) {
+		q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
+
+		/* From register_allocate.c:
+		 *
+		 * q(B,C) (indexed by C, B is this register class) in
+		 * Runeson/Nyström paper.  This is "how many registers of B could
+		 * the worst choice register from C conflict with".
+		 *
+		 * If we just let the register allocation algorithm compute these
+		 * values, is extremely expensive.  However, since all of our
+		 * registers are laid out, we can very easily compute them
+		 * ourselves.  View the register from C as fixed starting at GRF n
+		 * somewhere in the middle, and the register from B as sliding back
+		 * and forth.  Then the first register to conflict from B is the
+		 * one starting at n - class_size[B] + 1 and the last register to
+		 * conflict will start at n + class_size[B] - 1.  Therefore, the
+		 * number of conflicts from B is class_size[B] + class_size[C] - 1.
+		 *
+		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+		 * B | | | | | |n| --> | | | | | | |
+		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+		 *             +-+-+-+-+-+
+		 * C           |n| | | | |
+		 *             +-+-+-+-+-+
+		 *
+		 * (Idea copied from brw_fs_reg_allocate.cpp)
+		 */
+		for (unsigned j = 0; j < count; j++)
+			q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
+	}
+}
+
+/* One-time setup of RA register-set, which describes all the possible
+ * "virtual" registers and their interferences.  Ie. double register
+ * occupies (and conflicts with) two single registers, and so forth.
+ * Since registers do not need to be aligned to their class size, they
+ * can conflict with other registers in the same class too.  Ie:
+ *
+ *    Single (base) |  Double
+ *    --------------+---------------
+ *       R0         |  D0
+ *       R1         |  D0 D1
+ *       R2         |     D1 D2
+ *       R3         |        D2
+ *           .. and so on..
+ *
+ * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
+ * really just four scalar registers.  Don't let that confuse you.)
+ */
+struct ir3_ra_reg_set *
+ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
+{
+	struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
+	unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
+	unsigned int **q_values;
+
+	/* calculate # of regs across all classes: */
+	ra_reg_count = 0;
+	for (unsigned i = 0; i < class_count; i++)
+		ra_reg_count += CLASS_REGS(i);
+	for (unsigned i = 0; i < half_class_count; i++)
+		ra_reg_count += HALF_CLASS_REGS(i);
+	for (unsigned i = 0; i < high_class_count; i++)
+		ra_reg_count += HIGH_CLASS_REGS(i);
+
+	/* allocate and populate q_values: */
+	q_values = ralloc_array(set, unsigned *, total_class_count);
+
+	build_q_values(q_values, 0, class_sizes, class_count);
+	build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
+	build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
+
+	/* allocate the reg-set.. */
+	set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
+	set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
+	set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
+
+	/* .. and classes */
+	reg = 0;
+	for (unsigned i = 0; i < class_count; i++) {
+		set->classes[i] = ra_alloc_reg_class(set->regs);
+
+		set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+		for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+			ra_class_add_reg(set->regs, set->classes[i], reg);
+
+			set->ra_reg_to_gpr[reg] = j;
+			set->gpr_to_ra_reg[i][j] = reg;
+
+			for (unsigned br = j; br < j + class_sizes[i]; br++)
+				ra_add_transitive_reg_conflict(set->regs, br, reg);
+
+			reg++;
+		}
+	}
+
+	first_half_reg = reg;
+	base = HALF_OFFSET;
+
+	for (unsigned i = 0; i < half_class_count; i++) {
+		set->half_classes[i] = ra_alloc_reg_class(set->regs);
+
+		set->gpr_to_ra_reg[base + i] =
+				ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
+
+		for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+			ra_class_add_reg(set->regs, set->half_classes[i], reg);
+
+			set->ra_reg_to_gpr[reg] = j;
+			set->gpr_to_ra_reg[base + i][j] = reg;
+
+			for (unsigned br = j; br < j + half_class_sizes[i]; br++)
+				ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
+
+			reg++;
+		}
+	}
+
+	first_high_reg = reg;
+	base = HIGH_OFFSET;
+
+	for (unsigned i = 0; i < high_class_count; i++) {
+		set->high_classes[i] = ra_alloc_reg_class(set->regs);
+
+		set->gpr_to_ra_reg[base + i] =
+				ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
+
+		for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
+			ra_class_add_reg(set->regs, set->high_classes[i], reg);
+
+			set->ra_reg_to_gpr[reg] = j;
+			set->gpr_to_ra_reg[base + i][j] = reg;
+
+			for (unsigned br = j; br < j + high_class_sizes[i]; br++)
+				ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
+
+			reg++;
+		}
+	}
+
+	/* starting a6xx, half precision regs conflict w/ full precision regs: */
+	if (compiler->gpu_id >= 600) {
+		/* because of transitivity, we can get away with just setting up
+		 * conflicts between the first class of full and half regs:
+		 */
+		for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) {
+			unsigned freg  = set->gpr_to_ra_reg[0][j];
+			unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0];
+			unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1];
+
+			ra_add_transitive_reg_conflict(set->regs, freg, hreg0);
+			ra_add_transitive_reg_conflict(set->regs, freg, hreg1);
+		}
+
+		// TODO also need to update q_values, but for now:
+		ra_set_finalize(set->regs, NULL);
+	} else {
+		ra_set_finalize(set->regs, q_values);
+	}
+
+	ralloc_free(q_values);
+
+	return set;
+}
+
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+	BITSET_WORD *def;        /* variables defined before used in block */
+	BITSET_WORD *use;        /* variables used before defined in block */
+	BITSET_WORD *livein;     /* which defs reach entry point of block */
+	BITSET_WORD *liveout;    /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+	/* cached instruction 'definer' info: */
+	struct ir3_instruction *defn;
+	int off, sz, cls;
+};
+
+/* register-assign context, per-shader */
+struct ir3_ra_ctx {
+	struct ir3 *ir;
+	gl_shader_stage type;
+	bool frag_face;
+
+	struct ir3_ra_reg_set *set;
+	struct ra_graph *g;
+	unsigned alloc_count;
+	/* one per class, plus one slot for arrays: */
+	unsigned class_alloc_count[total_class_count + 1];
+	unsigned class_base[total_class_count + 1];
+	unsigned instr_cnt;
+	unsigned *def, *use;     /* def/use table */
+	struct ir3_ra_instr_data *instrd;
+};
+
+/* does it conflict? */
+static inline bool
+intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end)
+{
+	return !((a_start >= b_end) || (b_start >= a_end));
+}
+
+static bool
+is_half(struct ir3_instruction *instr)
+{
+	return !!(instr->regs[0]->flags & IR3_REG_HALF);
+}
+
+static bool
+is_high(struct ir3_instruction *instr)
+{
+	return !!(instr->regs[0]->flags & IR3_REG_HIGH);
+}
+
+static int
+size_to_class(unsigned sz, bool half, bool high)
+{
+	if (high) {
+		for (unsigned i = 0; i < high_class_count; i++)
+			if (high_class_sizes[i] >= sz)
+				return i + HIGH_OFFSET;
+	} else if (half) {
+		for (unsigned i = 0; i < half_class_count; i++)
+			if (half_class_sizes[i] >= sz)
+				return i + HALF_OFFSET;
+	} else {
+		for (unsigned i = 0; i < class_count; i++)
+			if (class_sizes[i] >= sz)
+				return i;
+	}
+	debug_assert(0);
+	return -1;
+}
+
+static bool
+writes_gpr(struct ir3_instruction *instr)
+{
+	if (is_store(instr))
+		return false;
+	/* is dest a normal temp register: */
+	struct ir3_register *reg = instr->regs[0];
+	if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+		return false;
+	if ((reg->num == regid(REG_A0, 0)) ||
+			(reg->num == regid(REG_P0, 0)))
+		return false;
+	return true;
+}
+
+static bool
+instr_before(struct ir3_instruction *a, struct ir3_instruction *b)
+{
+	if (a->flags & IR3_INSTR_UNUSED)
+		return false;
+	return (a->ip < b->ip);
+}
+
+static struct ir3_instruction *
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+		int *sz, int *off)
+{
+	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+	struct ir3_instruction *d = NULL;
+
+	if (id->defn) {
+		*sz = id->sz;
+		*off = id->off;
+		return id->defn;
+	}
+
+	if (instr->opc == OPC_META_FI) {
+		/* What about the case where collect is subset of array, we
+		 * need to find the distance between where actual array starts
+		 * and fanin..  that probably doesn't happen currently.
+		 */
+		struct ir3_register *src;
+		int dsz, doff;
+
+		/* note: don't use foreach_ssa_src as this gets called once
+		 * while assigning regs (which clears SSA flag)
+		 */
+		foreach_src_n(src, n, instr) {
+			struct ir3_instruction *dd;
+			if (!src->instr)
+				continue;
+
+			dd = get_definer(ctx, src->instr, &dsz, &doff);
+
+			if ((!d) || instr_before(dd, d)) {
+				d = dd;
+				*sz = dsz;
+				*off = doff - n;
+			}
+		}
+
+	} else if (instr->cp.right || instr->cp.left) {
+		/* covers also the meta:fo case, which ends up w/ single
+		 * scalar instructions for each component:
+		 */
+		struct ir3_instruction *f = ir3_neighbor_first(instr);
+
+		/* by definition, the entire sequence forms one linked list
+		 * of single scalar register nodes (even if some of them may
+		 * be fanouts from a texture sample (for example) instr.  We
+		 * just need to walk the list finding the first element of
+		 * the group defined (lowest ip)
+		 */
+		int cnt = 0;
+
+		/* need to skip over unused in the group: */
+		while (f && (f->flags & IR3_INSTR_UNUSED)) {
+			f = f->cp.right;
+			cnt++;
+		}
+
+		while (f) {
+			if ((!d) || instr_before(f, d))
+				d = f;
+			if (f == instr)
+				*off = cnt;
+			f = f->cp.right;
+			cnt++;
+		}
+
+		*sz = cnt;
+
+	} else {
+		/* second case is looking directly at the instruction which
+		 * produces multiple values (eg, texture sample), rather
+		 * than the fanout nodes that point back to that instruction.
+		 * This isn't quite right, because it may be part of a larger
+		 * group, such as:
+		 *
+		 *     sam (f32)(xyzw)r0.x, ...
+		 *     add r1.x, ...
+		 *     add r1.y, ...
+		 *     sam (f32)(xyzw)r2.x, r0.w  <-- (r0.w, r1.x, r1.y)
+		 *
+		 * need to come up with a better way to handle that case.
+		 */
+		if (instr->address) {
+			*sz = instr->regs[0]->size;
+		} else {
+			*sz = util_last_bit(instr->regs[0]->wrmask);
+		}
+		*off = 0;
+		d = instr;
+	}
+
+	if (d->opc == OPC_META_FO) {
+		struct ir3_instruction *dd;
+		int dsz, doff;
+
+		dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
+
+		/* by definition, should come before: */
+		debug_assert(instr_before(dd, d));
+
+		*sz = MAX2(*sz, dsz);
+
+		debug_assert(instr->opc == OPC_META_FO);
+		*off = MAX2(*off, instr->fo.off);
+
+		d = dd;
+	}
+
+	debug_assert(d->opc != OPC_META_FO);
+
+	id->defn = d;
+	id->sz = *sz;
+	id->off = *off;
+
+	return d;
+}
+
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+		if (instr->regs_count == 0)
+			continue;
+		/* couple special cases: */
+		if (writes_addr(instr) || writes_pred(instr)) {
+			id->cls = -1;
+		} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
+			id->cls = total_class_count;
+		} else {
+			id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+			id->cls = size_to_class(id->sz, is_half(id->defn), is_high(id->defn));
+		}
+	}
+}
+
+/* give each instruction a name (and ip), and count up the # of names
+ * of each class
+ */
+static void
+ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+#ifdef DEBUG
+		instr->name = ~0;
+#endif
+
+		ctx->instr_cnt++;
+
+		if (instr->regs_count == 0)
+			continue;
+
+		if (!writes_gpr(instr))
+			continue;
+
+		if (id->defn != instr)
+			continue;
+
+		/* arrays which don't fit in one of the pre-defined class
+		 * sizes are pre-colored:
+		 */
+		if ((id->cls >= 0) && (id->cls < total_class_count)) {
+			instr->name = ctx->class_alloc_count[id->cls]++;
+			ctx->alloc_count++;
+		}
+	}
+}
+
+static void
+ra_init(struct ir3_ra_ctx *ctx)
+{
+	unsigned n, base;
+
+	ir3_clear_mark(ctx->ir);
+	n = ir3_count_instructions(ctx->ir);
+
+	ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_find_definers(ctx, block);
+	}
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_name_instructions(ctx, block);
+	}
+
+	/* figure out the base register name for each class.  The
+	 * actual ra name is class_base[cls] + instr->name;
+	 */
+	ctx->class_base[0] = 0;
+	for (unsigned i = 1; i <= total_class_count; i++) {
+		ctx->class_base[i] = ctx->class_base[i-1] +
+				ctx->class_alloc_count[i-1];
+	}
+
+	/* and vreg names for array elements: */
+	base = ctx->class_base[total_class_count];
+	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+		arr->base = base;
+		ctx->class_alloc_count[total_class_count] += arr->length;
+		base += arr->length;
+	}
+	ctx->alloc_count += ctx->class_alloc_count[total_class_count];
+
+	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+	ralloc_steal(ctx->g, ctx->instrd);
+	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+}
+
+static unsigned
+__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
+{
+	unsigned name;
+	debug_assert(cls >= 0);
+	debug_assert(cls < total_class_count);  /* we shouldn't get arrays here.. */
+	name = ctx->class_base[cls] + defn->name;
+	debug_assert(name < ctx->alloc_count);
+	return name;
+}
+
+static int
+ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
+{
+	/* TODO handle name mapping for arrays */
+	return __ra_name(ctx, id->cls, id->defn);
+}
+
+static void
+ra_destroy(struct ir3_ra_ctx *ctx)
+{
+	ralloc_free(ctx->g);
+}
+
+static void
+ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	struct ir3_ra_block_data *bd;
+	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+
+#define def(name, instr) \
+		do { \
+			/* defined on first write: */ \
+			if (!ctx->def[name]) \
+				ctx->def[name] = instr->ip; \
+			ctx->use[name] = instr->ip; \
+			BITSET_SET(bd->def, name); \
+		} while(0);
+
+#define use(name, instr) \
+		do { \
+			ctx->use[name] = MAX2(ctx->use[name], instr->ip); \
+			if (!BITSET_TEST(bd->def, name)) \
+				BITSET_SET(bd->use, name); \
+		} while(0);
+
+	bd = rzalloc(ctx->g, struct ir3_ra_block_data);
+
+	bd->def     = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->use     = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->livein  = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
+
+	block->data = bd;
+
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_instruction *src;
+		struct ir3_register *reg;
+
+		if (instr->regs_count == 0)
+			continue;
+
+		/* There are a couple special cases to deal with here:
+		 *
+		 * fanout: used to split values from a higher class to a lower
+		 *     class, for example split the results of a texture fetch
+		 *     into individual scalar values;  We skip over these from
+		 *     a 'def' perspective, and for a 'use' we walk the chain
+		 *     up to the defining instruction.
+		 *
+		 * fanin: used to collect values from lower class and assemble
+		 *     them together into a higher class, for example arguments
+		 *     to texture sample instructions;  We consider these to be
+		 *     defined at the earliest fanin source.
+		 *
+		 * Most of this is handled in the get_definer() helper.
+		 *
+		 * In either case, we trace the instruction back to the original
+		 * definer and consider that as the def/use ip.
+		 */
+
+		if (writes_gpr(instr)) {
+			struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+			struct ir3_register *dst = instr->regs[0];
+
+			if (dst->flags & IR3_REG_ARRAY) {
+				struct ir3_array *arr =
+					ir3_lookup_array(ctx->ir, dst->array.id);
+				unsigned i;
+
+				arr->start_ip = MIN2(arr->start_ip, instr->ip);
+				arr->end_ip = MAX2(arr->end_ip, instr->ip);
+
+				/* set the node class now.. in case we don't encounter
+				 * this array dst again.  From register_alloc algo's
+				 * perspective, these are all single/scalar regs:
+				 */
+				for (i = 0; i < arr->length; i++) {
+					unsigned name = arr->base + i;
+					ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
+				}
+
+				/* indirect write is treated like a write to all array
+				 * elements, since we don't know which one is actually
+				 * written:
+				 */
+				if (dst->flags & IR3_REG_RELATIV) {
+					for (i = 0; i < arr->length; i++) {
+						unsigned name = arr->base + i;
+						def(name, instr);
+					}
+				} else {
+					unsigned name = arr->base + dst->array.offset;
+					def(name, instr);
+				}
+
+			} else if (id->defn == instr) {
+				unsigned name = ra_name(ctx, id);
+
+				/* since we are in SSA at this point: */
+				debug_assert(!BITSET_TEST(bd->use, name));
+
+				def(name, id->defn);
+
+				if (is_high(id->defn)) {
+					ra_set_node_class(ctx->g, name,
+							ctx->set->high_classes[id->cls - HIGH_OFFSET]);
+				} else if (is_half(id->defn)) {
+					ra_set_node_class(ctx->g, name,
+							ctx->set->half_classes[id->cls - HALF_OFFSET]);
+				} else {
+					ra_set_node_class(ctx->g, name,
+							ctx->set->classes[id->cls]);
+				}
+			}
+		}
+
+		foreach_src(reg, instr) {
+			if (reg->flags & IR3_REG_ARRAY) {
+				struct ir3_array *arr =
+					ir3_lookup_array(ctx->ir, reg->array.id);
+				arr->start_ip = MIN2(arr->start_ip, instr->ip);
+				arr->end_ip = MAX2(arr->end_ip, instr->ip);
+
+				/* indirect read is treated like a read fromall array
+				 * elements, since we don't know which one is actually
+				 * read:
+				 */
+				if (reg->flags & IR3_REG_RELATIV) {
+					unsigned i;
+					for (i = 0; i < arr->length; i++) {
+						unsigned name = arr->base + i;
+						use(name, instr);
+					}
+				} else {
+					unsigned name = arr->base + reg->array.offset;
+					use(name, instr);
+					/* NOTE: arrays are not SSA so unconditionally
+					 * set use bit:
+					 */
+					BITSET_SET(bd->use, name);
+					debug_assert(reg->array.offset < arr->length);
+				}
+			} else if ((src = ssa(reg)) && writes_gpr(src)) {
+				unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
+				use(name, instr);
+			}
+		}
+	}
+}
+
+static bool
+ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
+{
+	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+	bool progress = false;
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		struct ir3_ra_block_data *bd = block->data;
+
+		/* update livein: */
+		for (unsigned i = 0; i < bitset_words; i++) {
+			BITSET_WORD new_livein =
+				(bd->use[i] | (bd->liveout[i] & ~bd->def[i]));
+
+			if (new_livein & ~bd->livein[i]) {
+				bd->livein[i] |= new_livein;
+				progress = true;
+			}
+		}
+
+		/* update liveout: */
+		for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) {
+			struct ir3_block *succ = block->successors[j];
+			struct ir3_ra_block_data *succ_bd;
+
+			if (!succ)
+				continue;
+
+			succ_bd = succ->data;
+
+			for (unsigned i = 0; i < bitset_words; i++) {
+				BITSET_WORD new_liveout =
+					(succ_bd->livein[i] & ~bd->liveout[i]);
+
+				if (new_liveout) {
+					bd->liveout[i] |= new_liveout;
+					progress = true;
+				}
+			}
+		}
+	}
+
+	return progress;
+}
+
+static void
+print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt)
+{
+	bool first = true;
+	debug_printf("  %s:", name);
+	for (unsigned i = 0; i < cnt; i++) {
+		if (BITSET_TEST(bs, i)) {
+			if (!first)
+				debug_printf(",");
+			debug_printf(" %04u", i);
+			first = false;
+		}
+	}
+	debug_printf("\n");
+}
+
+static void
+ra_add_interference(struct ir3_ra_ctx *ctx)
+{
+	struct ir3 *ir = ctx->ir;
+
+	/* initialize array live ranges: */
+	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
+		arr->start_ip = ~0;
+		arr->end_ip = 0;
+	}
+
+	/* compute live ranges (use/def) on a block level, also updating
+	 * block's def/use bitmasks (used below to calculate per-block
+	 * livein/liveout):
+	 */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		ra_block_compute_live_ranges(ctx, block);
+	}
+
+	/* update per-block livein/liveout: */
+	while (ra_compute_livein_liveout(ctx)) {}
+
+	if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
+		debug_printf("AFTER LIVEIN/OUT:\n");
+		ir3_print(ir);
+		list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+			struct ir3_ra_block_data *bd = block->data;
+			debug_printf("block%u:\n", block_id(block));
+			print_bitset("  def", bd->def, ctx->alloc_count);
+			print_bitset("  use", bd->use, ctx->alloc_count);
+			print_bitset("  l/i", bd->livein, ctx->alloc_count);
+			print_bitset("  l/o", bd->liveout, ctx->alloc_count);
+		}
+		list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
+			debug_printf("array%u:\n", arr->id);
+			debug_printf("  length:   %u\n", arr->length);
+			debug_printf("  start_ip: %u\n", arr->start_ip);
+			debug_printf("  end_ip:   %u\n", arr->end_ip);
+		}
+	}
+
+	/* extend start/end ranges based on livein/liveout info from cfg: */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		struct ir3_ra_block_data *bd = block->data;
+
+		for (unsigned i = 0; i < ctx->alloc_count; i++) {
+			if (BITSET_TEST(bd->livein, i)) {
+				ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
+				ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
+			}
+
+			if (BITSET_TEST(bd->liveout, i)) {
+				ctx->def[i] = MIN2(ctx->def[i], block->end_ip);
+				ctx->use[i] = MAX2(ctx->use[i], block->end_ip);
+			}
+		}
+
+		list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+			for (unsigned i = 0; i < arr->length; i++) {
+				if (BITSET_TEST(bd->livein, i + arr->base)) {
+					arr->start_ip = MIN2(arr->start_ip, block->start_ip);
+				}
+				if (BITSET_TEST(bd->livein, i + arr->base)) {
+					arr->end_ip = MAX2(arr->end_ip, block->end_ip);
+				}
+			}
+		}
+	}
+
+	/* need to fix things up to keep outputs live: */
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		struct ir3_instruction *instr = ir->outputs[i];
+		if (!instr)
+			continue;
+		unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]);
+		ctx->use[name] = ctx->instr_cnt;
+	}
+
+	for (unsigned i = 0; i < ctx->alloc_count; i++) {
+		for (unsigned j = 0; j < ctx->alloc_count; j++) {
+			if (intersects(ctx->def[i], ctx->use[i],
+					ctx->def[j], ctx->use[j])) {
+				ra_add_node_interference(ctx->g, i, j);
+			}
+		}
+	}
+}
+
+/* some instructions need fix-up if dst register is half precision: */
+static void fixup_half_instr_dst(struct ir3_instruction *instr)
+{
+	switch (opc_cat(instr->opc)) {
+	case 1: /* move instructions */
+		instr->cat1.dst_type = half_type(instr->cat1.dst_type);
+		break;
+	case 3:
+		switch (instr->opc) {
+		case OPC_MAD_F32:
+			instr->opc = OPC_MAD_F16;
+			break;
+		case OPC_SEL_B32:
+			instr->opc = OPC_SEL_B16;
+			break;
+		case OPC_SEL_S32:
+			instr->opc = OPC_SEL_S16;
+			break;
+		case OPC_SEL_F32:
+			instr->opc = OPC_SEL_F16;
+			break;
+		case OPC_SAD_S32:
+			instr->opc = OPC_SAD_S16;
+			break;
+		/* instructions may already be fixed up: */
+		case OPC_MAD_F16:
+		case OPC_SEL_B16:
+		case OPC_SEL_S16:
+		case OPC_SEL_F16:
+		case OPC_SAD_S16:
+			break;
+		default:
+			assert(0);
+			break;
+		}
+		break;
+	case 5:
+		instr->cat5.type = half_type(instr->cat5.type);
+		break;
+	}
+}
+/* some instructions need fix-up if src register is half precision: */
+static void fixup_half_instr_src(struct ir3_instruction *instr)
+{
+	switch (instr->opc) {
+	case OPC_MOV:
+		instr->cat1.src_type = half_type(instr->cat1.src_type);
+		break;
+	default:
+		break;
+	}
+}
+
+/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
+ * array access(es) which do not have any previous access to depend
+ * on from scheduling point of view
+ */
+static void
+reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
+		struct ir3_instruction *instr)
+{
+	struct ir3_ra_instr_data *id;
+
+	if (reg->flags & IR3_REG_ARRAY) {
+		struct ir3_array *arr =
+			ir3_lookup_array(ctx->ir, reg->array.id);
+		unsigned name = arr->base + reg->array.offset;
+		unsigned r = ra_get_node_reg(ctx->g, name);
+		unsigned num = ctx->set->ra_reg_to_gpr[r];
+
+		if (reg->flags & IR3_REG_RELATIV) {
+			reg->array.offset = num;
+		} else {
+			reg->num = num;
+			reg->flags &= ~IR3_REG_SSA;
+		}
+
+		reg->flags &= ~IR3_REG_ARRAY;
+	} else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
+		unsigned name = ra_name(ctx, id);
+		unsigned r = ra_get_node_reg(ctx->g, name);
+		unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
+
+		debug_assert(!(reg->flags & IR3_REG_RELATIV));
+
+		if (is_high(id->defn))
+			num += FIRST_HIGH_REG;
+
+		reg->num = num;
+		reg->flags &= ~IR3_REG_SSA;
+
+		if (is_half(id->defn))
+			reg->flags |= IR3_REG_HALF;
+	}
+}
+
+static void
+ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_register *reg;
+
+		if (instr->regs_count == 0)
+			continue;
+
+		if (writes_gpr(instr)) {
+			reg_assign(ctx, instr->regs[0], instr);
+			if (instr->regs[0]->flags & IR3_REG_HALF)
+				fixup_half_instr_dst(instr);
+		}
+
+		foreach_src_n(reg, n, instr) {
+			struct ir3_instruction *src = reg->instr;
+			/* Note: reg->instr could be null for IR3_REG_ARRAY */
+			if (!(src || (reg->flags & IR3_REG_ARRAY)))
+				continue;
+			reg_assign(ctx, instr->regs[n+1], src);
+			if (instr->regs[n+1]->flags & IR3_REG_HALF)
+				fixup_half_instr_src(instr);
+		}
+	}
+}
+
+static int
+ra_alloc(struct ir3_ra_ctx *ctx)
+{
+	/* pre-assign array elements:
+	 */
+	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+		unsigned base = 0;
+
+		if (arr->end_ip == 0)
+			continue;
+
+		/* figure out what else we conflict with which has already
+		 * been assigned:
+		 */
+retry:
+		list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) {
+			if (arr2 == arr)
+				break;
+			if (arr2->end_ip == 0)
+				continue;
+			/* if it intersects with liverange AND register range.. */
+			if (intersects(arr->start_ip, arr->end_ip,
+					arr2->start_ip, arr2->end_ip) &&
+				intersects(base, base + arr->length,
+					arr2->reg, arr2->reg + arr2->length)) {
+				base = MAX2(base, arr2->reg + arr2->length);
+				goto retry;
+			}
+		}
+
+		arr->reg = base;
+
+		for (unsigned i = 0; i < arr->length; i++) {
+			unsigned name, reg;
+
+			name = arr->base + i;
+			reg = ctx->set->gpr_to_ra_reg[0][base++];
+
+			ra_set_node_reg(ctx->g, name, reg);
+		}
+	}
+
+	if (!ra_allocate(ctx->g))
+		return -1;
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_alloc(ctx, block);
+	}
+
+	return 0;
+}
+
+int ir3_ra(struct ir3 *ir, gl_shader_stage type,
+		bool frag_coord, bool frag_face)
+{
+	struct ir3_ra_ctx ctx = {
+			.ir = ir,
+			.type = type,
+			.frag_face = frag_face,
+			.set = ir->compiler->set,
+	};
+	int ret;
+
+	ra_init(&ctx);
+	ra_add_interference(&ctx);
+	ret = ra_alloc(&ctx);
+	ra_destroy(&ctx);
+
+	return ret;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_sched.c mesa-19.0.1/src/freedreno/ir3/ir3_sched.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_sched.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_sched.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,818 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "util/u_math.h"
+
+#include "ir3.h"
+
+/*
+ * Instruction Scheduling:
+ *
+ * A recursive depth based scheduling algo.  Recursively find an eligible
+ * instruction to schedule from the deepest instruction (recursing through
+ * it's unscheduled src instructions).  Normally this would result in a
+ * lot of re-traversal of the same instructions, so we cache results in
+ * instr->data (and clear cached results that would be no longer valid
+ * after scheduling an instruction).
+ *
+ * There are a few special cases that need to be handled, since sched
+ * is currently independent of register allocation.  Usages of address
+ * register (a0.x) or predicate register (p0.x) must be serialized.  Ie.
+ * if you have two pairs of instructions that write the same special
+ * register and then read it, then those pairs cannot be interleaved.
+ * To solve this, when we are in such a scheduling "critical section",
+ * and we encounter a conflicting write to a special register, we try
+ * to schedule any remaining instructions that use that value first.
+ */
+
+struct ir3_sched_ctx {
+	struct ir3_block *block;           /* the current block */
+	struct list_head depth_list;       /* depth sorted unscheduled instrs */
+	struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
+	struct ir3_instruction *addr;      /* current a0.x user, if any */
+	struct ir3_instruction *pred;      /* current p0.x user, if any */
+	bool error;
+};
+
+static bool is_sfu_or_mem(struct ir3_instruction *instr)
+{
+	return is_sfu(instr) || is_mem(instr);
+}
+
+#define NULL_INSTR ((void *)~0)
+
+static void
+clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
+{
+	list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) {
+		if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr)
+			instr2->data = NULL;
+	}
+}
+
+static void
+schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
+{
+	debug_assert(ctx->block == instr->block);
+
+	/* maybe there is a better way to handle this than just stuffing
+	 * a nop.. ideally we'd know about this constraint in the
+	 * scheduling and depth calculation..
+	 */
+	if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
+		ir3_NOP(ctx->block);
+
+	/* remove from depth list:
+	 */
+	list_delinit(&instr->node);
+
+	if (writes_addr(instr)) {
+		debug_assert(ctx->addr == NULL);
+		ctx->addr = instr;
+	}
+
+	if (writes_pred(instr)) {
+		debug_assert(ctx->pred == NULL);
+		ctx->pred = instr;
+	}
+
+	instr->flags |= IR3_INSTR_MARK;
+
+	list_addtail(&instr->node, &instr->block->instr_list);
+	ctx->scheduled = instr;
+
+	if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
+		clear_cache(ctx, NULL);
+	} else {
+		/* invalidate only the necessary entries.. */
+		clear_cache(ctx, instr);
+	}
+}
+
+static struct ir3_instruction *
+deepest(struct ir3_instruction **srcs, unsigned nsrcs)
+{
+	struct ir3_instruction *d = NULL;
+	unsigned i = 0, id = 0;
+
+	while ((i < nsrcs) && !(d = srcs[id = i]))
+		i++;
+
+	if (!d)
+		return NULL;
+
+	for (; i < nsrcs; i++)
+		if (srcs[i] && (srcs[i]->depth > d->depth))
+			d = srcs[id = i];
+
+	srcs[id] = NULL;
+
+	return d;
+}
+
+/**
+ * @block: the block to search in, starting from end; in first pass,
+ *    this will be the block the instruction would be inserted into
+ *    (but has not yet, ie. it only contains already scheduled
+ *    instructions).  For intra-block scheduling (second pass), this
+ *    would be one of the predecessor blocks.
+ * @instr: the instruction to search for
+ * @maxd:  max distance, bail after searching this # of instruction
+ *    slots, since it means the instruction we are looking for is
+ *    far enough away
+ * @pred:  if true, recursively search into predecessor blocks to
+ *    find the worst case (shortest) distance (only possible after
+ *    individual blocks are all scheduled
+ */
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
+		unsigned maxd, bool pred)
+{
+	unsigned d = 0;
+
+	list_for_each_entry_rev (struct ir3_instruction, n, &block->instr_list, node) {
+		if ((n == instr) || (d >= maxd))
+			return d;
+		/* NOTE: don't count branch/jump since we don't know yet if they will
+		 * be eliminated later in resolve_jumps().. really should do that
+		 * earlier so we don't have this constraint.
+		 */
+		if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR)))
+			d++;
+	}
+
+	/* if coming from a predecessor block, assume it is assigned far
+	 * enough away.. we'll fix up later.
+	 */
+	if (!pred)
+		return maxd;
+
+	if (pred && (block->data != block)) {
+		/* Search into predecessor blocks, finding the one with the
+		 * shortest distance, since that will be the worst case
+		 */
+		unsigned min = maxd - d;
+
+		/* (ab)use block->data to prevent recursion: */
+		block->data = block;
+
+		for (unsigned i = 0; i < block->predecessors_count; i++) {
+			unsigned n;
+
+			n = distance(block->predecessors[i], instr, min, pred);
+
+			min = MIN2(min, n);
+		}
+
+		block->data = NULL;
+		d += min;
+	}
+
+	return d;
+}
+
+/* calculate delay for specified src: */
+static unsigned
+delay_calc_srcn(struct ir3_block *block,
+		struct ir3_instruction *assigner,
+		struct ir3_instruction *consumer,
+		unsigned srcn, bool soft, bool pred)
+{
+	unsigned delay = 0;
+
+	if (is_meta(assigner)) {
+		struct ir3_instruction *src;
+		foreach_ssa_src(src, assigner) {
+			unsigned d;
+			d = delay_calc_srcn(block, src, consumer, srcn, soft, pred);
+			delay = MAX2(delay, d);
+		}
+	} else {
+		if (soft) {
+			if (is_sfu(assigner)) {
+				delay = 4;
+			} else {
+				delay = ir3_delayslots(assigner, consumer, srcn);
+			}
+		} else {
+			delay = ir3_delayslots(assigner, consumer, srcn);
+		}
+		delay -= distance(block, assigner, delay, pred);
+	}
+
+	return delay;
+}
+
+/* calculate delay for instruction (maximum of delay for all srcs): */
+static unsigned
+delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
+		bool soft, bool pred)
+{
+	unsigned delay = 0;
+	struct ir3_instruction *src;
+
+	foreach_ssa_src_n(src, i, instr) {
+		unsigned d;
+		d = delay_calc_srcn(block, src, instr, i, soft, pred);
+		delay = MAX2(delay, d);
+	}
+
+	return delay;
+}
+
+struct ir3_sched_notes {
+	/* there is at least one kill which could be scheduled, except
+	 * for unscheduled bary.f's:
+	 */
+	bool blocked_kill;
+	/* there is at least one instruction that could be scheduled,
+	 * except for conflicting address/predicate register usage:
+	 */
+	bool addr_conflict, pred_conflict;
+};
+
+static bool is_scheduled(struct ir3_instruction *instr)
+{
+	return !!(instr->flags & IR3_INSTR_MARK);
+}
+
+/* could an instruction be scheduled if specified ssa src was scheduled? */
+static bool
+could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
+{
+	struct ir3_instruction *other_src;
+	foreach_ssa_src(other_src, instr) {
+		/* if dependency not scheduled, we aren't ready yet: */
+		if ((src != other_src) && !is_scheduled(other_src)) {
+			return false;
+		}
+	}
+	return true;
+}
+
+/* Check if instruction is ok to schedule.  Make sure it is not blocked
+ * by use of addr/predicate register, etc.
+ */
+static bool
+check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		struct ir3_instruction *instr)
+{
+	/* For instructions that write address register we need to
+	 * make sure there is at least one instruction that uses the
+	 * addr value which is otherwise ready.
+	 *
+	 * TODO if any instructions use pred register and have other
+	 * src args, we would need to do the same for writes_pred()..
+	 */
+	if (writes_addr(instr)) {
+		struct ir3 *ir = instr->block->shader;
+		bool ready = false;
+		for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
+			struct ir3_instruction *indirect = ir->indirects[i];
+			if (!indirect)
+				continue;
+			if (indirect->address != instr)
+				continue;
+			ready = could_sched(indirect, instr);
+		}
+
+		/* nothing could be scheduled, so keep looking: */
+		if (!ready)
+			return false;
+	}
+
+	/* if this is a write to address/predicate register, and that
+	 * register is currently in use, we need to defer until it is
+	 * free:
+	 */
+	if (writes_addr(instr) && ctx->addr) {
+		debug_assert(ctx->addr != instr);
+		notes->addr_conflict = true;
+		return false;
+	}
+
+	if (writes_pred(instr) && ctx->pred) {
+		debug_assert(ctx->pred != instr);
+		notes->pred_conflict = true;
+		return false;
+	}
+
+	/* if the instruction is a kill, we need to ensure *every*
+	 * bary.f is scheduled.  The hw seems unhappy if the thread
+	 * gets killed before the end-input (ei) flag is hit.
+	 *
+	 * We could do this by adding each bary.f instruction as
+	 * virtual ssa src for the kill instruction.  But we have
+	 * fixed length instr->regs[].
+	 *
+	 * TODO this wouldn't be quite right if we had multiple
+	 * basic blocks, if any block was conditional.  We'd need
+	 * to schedule the bary.f's outside of any block which
+	 * was conditional that contained a kill.. I think..
+	 */
+	if (is_kill(instr)) {
+		struct ir3 *ir = instr->block->shader;
+
+		for (unsigned i = 0; i < ir->baryfs_count; i++) {
+			struct ir3_instruction *baryf = ir->baryfs[i];
+			if (baryf->flags & IR3_INSTR_UNUSED)
+				continue;
+			if (!is_scheduled(baryf)) {
+				notes->blocked_kill = true;
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+/* Find the best instruction to schedule from specified instruction or
+ * recursively it's ssa sources.
+ */
+static struct ir3_instruction *
+find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		struct ir3_instruction *instr)
+{
+	struct ir3_instruction *srcs[__ssa_src_cnt(instr)];
+	struct ir3_instruction *src;
+	unsigned nsrcs = 0;
+
+	if (is_scheduled(instr))
+		return NULL;
+
+	/* use instr->data to cache the results of recursing up the
+	 * instr src's.  Otherwise the recursive algo can scale quite
+	 * badly w/ shader size.  But this takes some care to clear
+	 * the cache appropriately when instructions are scheduled.
+	 */
+	if (instr->data) {
+		if (instr->data == NULL_INSTR)
+			return NULL;
+		return instr->data;
+	}
+
+	/* find unscheduled srcs: */
+	foreach_ssa_src(src, instr) {
+		if (!is_scheduled(src)) {
+			debug_assert(nsrcs < ARRAY_SIZE(srcs));
+			srcs[nsrcs++] = src;
+		}
+	}
+
+	/* if all our src's are already scheduled: */
+	if (nsrcs == 0) {
+		if (check_instr(ctx, notes, instr)) {
+			instr->data = instr;
+			return instr;
+		}
+		return NULL;
+	}
+
+	while ((src = deepest(srcs, nsrcs))) {
+		struct ir3_instruction *candidate;
+
+		candidate = find_instr_recursive(ctx, notes, src);
+		if (!candidate)
+			continue;
+
+		if (check_instr(ctx, notes, candidate)) {
+			instr->data = candidate;
+			return candidate;
+		}
+	}
+
+	instr->data = NULL_INSTR;
+	return NULL;
+}
+
+/* find instruction to schedule: */
+static struct ir3_instruction *
+find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		bool soft)
+{
+	struct ir3_instruction *best_instr = NULL;
+	unsigned min_delay = ~0;
+
+	/* TODO we'd really rather use the list/array of block outputs.  But we
+	 * don't have such a thing.  Recursing *every* instruction in the list
+	 * will result in a lot of repeated traversal, since instructions will
+	 * get traversed both when they appear as ssa src to a later instruction
+	 * as well as where they appear in the depth_list.
+	 */
+	list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) {
+		struct ir3_instruction *candidate;
+		unsigned delay;
+
+		candidate = find_instr_recursive(ctx, notes, instr);
+		if (!candidate)
+			continue;
+
+		delay = delay_calc(ctx->block, candidate, soft, false);
+		if (delay < min_delay) {
+			best_instr = candidate;
+			min_delay = delay;
+		}
+
+		if (min_delay == 0)
+			break;
+	}
+
+	return best_instr;
+}
+
+/* "spill" the address register by remapping any unscheduled
+ * instructions which depend on the current address register
+ * to a clone of the instruction which wrote the address reg.
+ */
+static struct ir3_instruction *
+split_addr(struct ir3_sched_ctx *ctx)
+{
+	struct ir3 *ir;
+	struct ir3_instruction *new_addr = NULL;
+	unsigned i;
+
+	debug_assert(ctx->addr);
+
+	ir = ctx->addr->block->shader;
+
+	for (i = 0; i < ir->indirects_count; i++) {
+		struct ir3_instruction *indirect = ir->indirects[i];
+
+		if (!indirect)
+			continue;
+
+		/* skip instructions already scheduled: */
+		if (is_scheduled(indirect))
+			continue;
+
+		/* remap remaining instructions using current addr
+		 * to new addr:
+		 */
+		if (indirect->address == ctx->addr) {
+			if (!new_addr) {
+				new_addr = ir3_instr_clone(ctx->addr);
+				/* original addr is scheduled, but new one isn't: */
+				new_addr->flags &= ~IR3_INSTR_MARK;
+			}
+			ir3_instr_set_address(indirect, new_addr);
+		}
+	}
+
+	/* all remaining indirects remapped to new addr: */
+	ctx->addr = NULL;
+
+	return new_addr;
+}
+
+/* "spill" the predicate register by remapping any unscheduled
+ * instructions which depend on the current predicate register
+ * to a clone of the instruction which wrote the address reg.
+ */
+static struct ir3_instruction *
+split_pred(struct ir3_sched_ctx *ctx)
+{
+	struct ir3 *ir;
+	struct ir3_instruction *new_pred = NULL;
+	unsigned i;
+
+	debug_assert(ctx->pred);
+
+	ir = ctx->pred->block->shader;
+
+	for (i = 0; i < ir->predicates_count; i++) {
+		struct ir3_instruction *predicated = ir->predicates[i];
+
+		/* skip instructions already scheduled: */
+		if (is_scheduled(predicated))
+			continue;
+
+		/* remap remaining instructions using current pred
+		 * to new pred:
+		 *
+		 * TODO is there ever a case when pred isn't first
+		 * (and only) src?
+		 */
+		if (ssa(predicated->regs[1]) == ctx->pred) {
+			if (!new_pred) {
+				new_pred = ir3_instr_clone(ctx->pred);
+				/* original pred is scheduled, but new one isn't: */
+				new_pred->flags &= ~IR3_INSTR_MARK;
+			}
+			predicated->regs[1]->instr = new_pred;
+		}
+	}
+
+	/* all remaining predicated remapped to new pred: */
+	ctx->pred = NULL;
+
+	return new_pred;
+}
+
+static void
+sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+{
+	struct list_head unscheduled_list;
+
+	ctx->block = block;
+
+	/* addr/pred writes are per-block: */
+	ctx->addr = NULL;
+	ctx->pred = NULL;
+
+	/* move all instructions to the unscheduled list, and
+	 * empty the block's instruction list (to which we will
+	 * be inserting).
+	 */
+	list_replace(&block->instr_list, &unscheduled_list);
+	list_inithead(&block->instr_list);
+	list_inithead(&ctx->depth_list);
+
+	/* first a pre-pass to schedule all meta:input instructions
+	 * (which need to appear first so that RA knows the register is
+	 * occupied), and move remaining to depth sorted list:
+	 */
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
+		if (instr->opc == OPC_META_INPUT) {
+			schedule(ctx, instr);
+		} else {
+			ir3_insert_by_depth(instr, &ctx->depth_list);
+		}
+	}
+
+	while (!list_empty(&ctx->depth_list)) {
+		struct ir3_sched_notes notes = {0};
+		struct ir3_instruction *instr;
+
+		instr = find_eligible_instr(ctx, &notes, true);
+		if (!instr)
+			instr = find_eligible_instr(ctx, &notes, false);
+
+		if (instr) {
+			unsigned delay = delay_calc(ctx->block, instr, false, false);
+
+			/* and if we run out of instructions that can be scheduled,
+			 * then it is time for nop's:
+			 */
+			debug_assert(delay <= 6);
+			while (delay > 0) {
+				ir3_NOP(block);
+				delay--;
+			}
+
+			schedule(ctx, instr);
+		} else {
+			struct ir3_instruction *new_instr = NULL;
+
+			/* nothing available to schedule.. if we are blocked on
+			 * address/predicate register conflict, then break the
+			 * deadlock by cloning the instruction that wrote that
+			 * reg:
+			 */
+			if (notes.addr_conflict) {
+				new_instr = split_addr(ctx);
+			} else if (notes.pred_conflict) {
+				new_instr = split_pred(ctx);
+			} else {
+				debug_assert(0);
+				ctx->error = true;
+				return;
+			}
+
+			if (new_instr) {
+				/* clearing current addr/pred can change what is
+				 * available to schedule, so clear cache..
+				 */
+				clear_cache(ctx, NULL);
+
+				ir3_insert_by_depth(new_instr, &ctx->depth_list);
+				/* the original instr that wrote addr/pred may have
+				 * originated from a different block:
+				 */
+				new_instr->block = block;
+			}
+		}
+	}
+
+	/* And lastly, insert branch/jump instructions to take us to
+	 * the next block.  Later we'll strip back out the branches
+	 * that simply jump to next instruction.
+	 */
+	if (block->successors[1]) {
+		/* if/else, conditional branches to "then" or "else": */
+		struct ir3_instruction *br;
+		unsigned delay = 6;
+
+		debug_assert(ctx->pred);
+		debug_assert(block->condition);
+
+		delay -= distance(ctx->block, ctx->pred, delay, false);
+
+		while (delay > 0) {
+			ir3_NOP(block);
+			delay--;
+		}
+
+		/* create "else" branch first (since "then" block should
+		 * frequently/always end up being a fall-thru):
+		 */
+		br = ir3_BR(block);
+		br->cat0.inv = true;
+		br->cat0.target = block->successors[1];
+
+		/* NOTE: we have to hard code delay of 6 above, since
+		 * we want to insert the nop's before constructing the
+		 * branch.  Throw in an assert so we notice if this
+		 * ever breaks on future generation:
+		 */
+		debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
+
+		br = ir3_BR(block);
+		br->cat0.target = block->successors[0];
+
+	} else if (block->successors[0]) {
+		/* otherwise unconditional jump to next block: */
+		struct ir3_instruction *jmp;
+
+		jmp = ir3_JUMP(block);
+		jmp->cat0.target = block->successors[0];
+	}
+
+	/* NOTE: if we kept track of the predecessors, we could do a better
+	 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
+	 * Note that as we eliminate blocks which contain only an unconditional
+	 * jump we probably need to propagate (jp) flag..
+	 */
+}
+
+/* After scheduling individual blocks, we still could have cases where
+ * one (or more) paths into a block, a value produced by a previous
+ * has too few delay slots to be legal.  We can't deal with this in the
+ * first pass, because loops (ie. we can't ensure all predecessor blocks
+ * are already scheduled in the first pass).  All we can really do at
+ * this point is stuff in extra nop's until things are legal.
+ */
+static void
+sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+{
+	unsigned n = 0;
+
+	ctx->block = block;
+
+	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+		unsigned delay = 0;
+
+		for (unsigned i = 0; i < block->predecessors_count; i++) {
+			unsigned d = delay_calc(block->predecessors[i], instr, false, true);
+			delay = MAX2(d, delay);
+		}
+
+		while (delay > n) {
+			struct ir3_instruction *nop = ir3_NOP(block);
+
+			/* move to before instr: */
+			list_delinit(&nop->node);
+			list_addtail(&nop->node, &instr->node);
+
+			n++;
+		}
+
+		/* we can bail once we hit worst case delay: */
+		if (++n > 6)
+			break;
+	}
+}
+
+int ir3_sched(struct ir3 *ir)
+{
+	struct ir3_sched_ctx ctx = {0};
+
+	ir3_clear_mark(ir);
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		sched_block(&ctx, block);
+	}
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		sched_intra_block(&ctx, block);
+	}
+
+	if (ctx.error)
+		return -1;
+	return 0;
+}
+
+/* does instruction 'prior' need to be scheduled before 'instr'? */
+static bool
+depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
+{
+	/* TODO for dependencies that are related to a specific object, ie
+	 * a specific SSBO/image/array, we could relax this constraint to
+	 * make accesses to unrelated objects not depend on each other (at
+	 * least as long as not declared coherent)
+	 */
+	if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) ||
+			((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class))
+		return true;
+	return !!(instr->barrier_class & prior->barrier_conflict);
+}
+
+static void
+add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
+{
+	struct list_head *prev = instr->node.prev;
+	struct list_head *next = instr->node.next;
+
+	/* add dependencies on previous instructions that must be scheduled
+	 * prior to the current instruction
+	 */
+	while (prev != &block->instr_list) {
+		struct ir3_instruction *pi =
+			LIST_ENTRY(struct ir3_instruction, prev, node);
+
+		prev = prev->prev;
+
+		if (is_meta(pi))
+			continue;
+
+		if (instr->barrier_class == pi->barrier_class) {
+			ir3_instr_add_dep(instr, pi);
+			break;
+		}
+
+		if (depends_on(instr, pi))
+			ir3_instr_add_dep(instr, pi);
+	}
+
+	/* add dependencies on this instruction to following instructions
+	 * that must be scheduled after the current instruction:
+	 */
+	while (next != &block->instr_list) {
+		struct ir3_instruction *ni =
+			LIST_ENTRY(struct ir3_instruction, next, node);
+
+		next = next->next;
+
+		if (is_meta(ni))
+			continue;
+
+		if (instr->barrier_class == ni->barrier_class) {
+			ir3_instr_add_dep(ni, instr);
+			break;
+		}
+
+		if (depends_on(ni, instr))
+			ir3_instr_add_dep(ni, instr);
+	}
+}
+
+/* before scheduling a block, we need to add any necessary false-dependencies
+ * to ensure that:
+ *
+ *  (1) barriers are scheduled in the right order wrt instructions related
+ *      to the barrier
+ *
+ *  (2) reads that come before a write actually get scheduled before the
+ *      write
+ */
+static void
+calculate_deps(struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		if (instr->barrier_class) {
+			add_barrier_deps(block, instr);
+		}
+	}
+}
+
+void
+ir3_sched_add_deps(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		calculate_deps(block);
+	}
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_shader.c mesa-19.0.1/src/freedreno/ir3/ir3_shader.c
--- mesa-18.3.3/src/freedreno/ir3/ir3_shader.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_shader.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+
+#include "drm/freedreno_drmif.h"
+
+#include "ir3_shader.h"
+#include "ir3_compiler.h"
+#include "ir3_nir.h"
+
+int
+ir3_glsl_type_size(const struct glsl_type *type)
+{
+	return glsl_count_attribute_slots(type, false);
+}
+
+static void
+delete_variant(struct ir3_shader_variant *v)
+{
+	if (v->ir)
+		ir3_destroy(v->ir);
+	if (v->bo)
+		fd_bo_del(v->bo);
+	if (v->immediates)
+		free(v->immediates);
+	free(v);
+}
+
+/* for vertex shader, the inputs are loaded into registers before the shader
+ * is executed, so max_regs from the shader instructions might not properly
+ * reflect the # of registers actually used, especially in case passthrough
+ * varyings.
+ *
+ * Likewise, for fragment shader, we can have some regs which are passed
+ * input values but never touched by the resulting shader (ie. as result
+ * of dead code elimination or simply because we don't know how to turn
+ * the reg off.
+ */
+static void
+fixup_regfootprint(struct ir3_shader_variant *v)
+{
+	unsigned i;
+
+	for (i = 0; i < v->inputs_count; i++) {
+		/* skip frag inputs fetch via bary.f since their reg's are
+		 * not written by gpu before shader starts (and in fact the
+		 * regid's might not even be valid)
+		 */
+		if (v->inputs[i].bary)
+			continue;
+
+		/* ignore high regs that are global to all threads in a warp
+		 * (they exist by default) (a5xx+)
+		 */
+		if (v->inputs[i].regid >= regid(48,0))
+			continue;
+
+		if (v->inputs[i].compmask) {
+			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
+			int32_t regid = (v->inputs[i].regid + n) >> 2;
+			v->info.max_reg = MAX2(v->info.max_reg, regid);
+		}
+	}
+
+	for (i = 0; i < v->outputs_count; i++) {
+		int32_t regid = (v->outputs[i].regid + 3) >> 2;
+		v->info.max_reg = MAX2(v->info.max_reg, regid);
+	}
+}
+
+/* wrapper for ir3_assemble() which does some info fixup based on
+ * shader state.  Non-static since used by ir3_cmdline too.
+ */
+void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
+{
+	void *bin;
+
+	bin = ir3_assemble(v->ir, &v->info, gpu_id);
+	if (!bin)
+		return NULL;
+
+	if (gpu_id >= 400) {
+		v->instrlen = v->info.sizedwords / (2 * 16);
+	} else {
+		v->instrlen = v->info.sizedwords / (2 * 4);
+	}
+
+	/* NOTE: if relative addressing is used, we set constlen in
+	 * the compiler (to worst-case value) since we don't know in
+	 * the assembler what the max addr reg value can be:
+	 */
+	v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
+
+	fixup_regfootprint(v);
+
+	return bin;
+}
+
+static void
+assemble_variant(struct ir3_shader_variant *v)
+{
+	struct ir3_compiler *compiler = v->shader->compiler;
+	struct shader_info *info = &v->shader->nir->info;
+	uint32_t gpu_id = compiler->gpu_id;
+	uint32_t sz, *bin;
+
+	bin = ir3_shader_assemble(v, gpu_id);
+	sz = v->info.sizedwords * 4;
+
+	v->bo = fd_bo_new(compiler->dev, sz,
+			DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+			DRM_FREEDRENO_GEM_TYPE_KMEM,
+			"%s:%s", ir3_shader_stage(v->shader), info->name);
+
+	memcpy(fd_bo_map(v->bo), bin, sz);
+
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		struct ir3_shader_key key = v->key;
+		printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+			v->binning_pass, key.color_two_side, key.half_precision);
+		ir3_shader_disasm(v, bin, stdout);
+	}
+
+	if (shader_debug_enabled(v->shader->type)) {
+		fprintf(stderr, "Native code for unnamed %s shader %s:\n",
+			_mesa_shader_stage_to_string(v->shader->type),
+			v->shader->nir->info.name);
+		if (v->shader->type == MESA_SHADER_FRAGMENT)
+			fprintf(stderr, "SIMD0\n");
+		ir3_shader_disasm(v, bin, stderr);
+	}
+
+	free(bin);
+
+	/* no need to keep the ir around beyond this point: */
+	ir3_destroy(v->ir);
+	v->ir = NULL;
+}
+
+static struct ir3_shader_variant *
+create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
+		bool binning_pass)
+{
+	struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
+	int ret;
+
+	if (!v)
+		return NULL;
+
+	v->id = ++shader->variant_count;
+	v->shader = shader;
+	v->binning_pass = binning_pass;
+	v->key = *key;
+	v->type = shader->type;
+
+	ret = ir3_compile_shader_nir(shader->compiler, v);
+	if (ret) {
+		debug_error("compile failed!");
+		goto fail;
+	}
+
+	assemble_variant(v);
+	if (!v->bo) {
+		debug_error("assemble failed!");
+		goto fail;
+	}
+
+	return v;
+
+fail:
+	delete_variant(v);
+	return NULL;
+}
+
+static inline struct ir3_shader_variant *
+shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
+		bool *created)
+{
+	struct ir3_shader_variant *v;
+
+	*created = false;
+
+	for (v = shader->variants; v; v = v->next)
+		if (ir3_shader_key_equal(key, &v->key))
+			return v;
+
+	/* compile new variant if it doesn't exist already: */
+	v = create_variant(shader, key, false);
+	if (v) {
+		v->next = shader->variants;
+		shader->variants = v;
+		*created = true;
+	}
+
+	return v;
+}
+
+struct ir3_shader_variant *
+ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
+		bool binning_pass, bool *created)
+{
+	struct ir3_shader_variant *v =
+			shader_variant(shader, key, created);
+
+	if (binning_pass) {
+		if (!v->binning)
+			v->binning = create_variant(shader, key, true);
+		return v->binning;
+	}
+
+	return v;
+}
+
+void
+ir3_shader_destroy(struct ir3_shader *shader)
+{
+	struct ir3_shader_variant *v, *t;
+	for (v = shader->variants; v; ) {
+		t = v;
+		v = v->next;
+		delete_variant(t);
+	}
+	ralloc_free(shader->nir);
+	free(shader);
+}
+
+struct ir3_shader *
+ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir)
+{
+	struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
+
+	shader->compiler = compiler;
+	shader->id = ++shader->compiler->shader_count;
+	shader->type = nir->info.stage;
+
+	NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
+			   (nir_lower_io_options)0);
+
+	/* do first pass optimization, ignoring the key: */
+	shader->nir = ir3_optimize_nir(shader, nir, NULL);
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		printf("dump nir%d: type=%d", shader->id, shader->type);
+		nir_print_shader(shader->nir, stdout);
+	}
+
+	return shader;
+}
+
+static void dump_reg(FILE *out, const char *name, uint32_t r)
+{
+	if (r != regid(63,0))
+		fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+
+static void dump_output(FILE *out, struct ir3_shader_variant *so,
+		unsigned slot, const char *name)
+{
+	uint32_t regid;
+	regid = ir3_find_output_regid(so, slot);
+	dump_reg(out, name, regid);
+}
+
+void
+ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
+{
+	struct ir3 *ir = so->ir;
+	struct ir3_register *reg;
+	const char *type = ir3_shader_stage(so->shader);
+	uint8_t regid;
+	unsigned i;
+
+	for (i = 0; i < ir->ninputs; i++) {
+		if (!ir->inputs[i]) {
+			fprintf(out, "; in%d unused\n", i);
+			continue;
+		}
+		reg = ir->inputs[i]->regs[0];
+		regid = reg->num;
+		fprintf(out, "@in(%sr%d.%c)\tin%d\n",
+				(reg->flags & IR3_REG_HALF) ? "h" : "",
+				(regid >> 2), "xyzw"[regid & 0x3], i);
+	}
+
+	for (i = 0; i < ir->noutputs; i++) {
+		if (!ir->outputs[i]) {
+			fprintf(out, "; out%d unused\n", i);
+			continue;
+		}
+		/* kill shows up as a virtual output.. skip it! */
+		if (is_kill(ir->outputs[i]))
+			continue;
+		reg = ir->outputs[i]->regs[0];
+		regid = reg->num;
+		fprintf(out, "@out(%sr%d.%c)\tout%d\n",
+				(reg->flags & IR3_REG_HALF) ? "h" : "",
+				(regid >> 2), "xyzw"[regid & 0x3], i);
+	}
+
+	for (i = 0; i < so->immediates_count; i++) {
+		fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
+		fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+				so->immediates[i].val[0],
+				so->immediates[i].val[1],
+				so->immediates[i].val[2],
+				so->immediates[i].val[3]);
+	}
+
+	disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
+
+	switch (so->type) {
+	case MESA_SHADER_VERTEX:
+		fprintf(out, "; %s: outputs:", type);
+		for (i = 0; i < so->outputs_count; i++) {
+			uint8_t regid = so->outputs[i].regid;
+			fprintf(out, " r%d.%c (%s)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_varying_slot_name(so->outputs[i].slot));
+		}
+		fprintf(out, "\n");
+		fprintf(out, "; %s: inputs:", type);
+		for (i = 0; i < so->inputs_count; i++) {
+			uint8_t regid = so->inputs[i].regid;
+			fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					so->inputs[i].compmask,
+					so->inputs[i].inloc,
+					so->inputs[i].bary);
+		}
+		fprintf(out, "\n");
+		break;
+	case MESA_SHADER_FRAGMENT:
+		fprintf(out, "; %s: outputs:", type);
+		for (i = 0; i < so->outputs_count; i++) {
+			uint8_t regid = so->outputs[i].regid;
+			fprintf(out, " r%d.%c (%s)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_frag_result_name(so->outputs[i].slot));
+		}
+		fprintf(out, "\n");
+		fprintf(out, "; %s: inputs:", type);
+		for (i = 0; i < so->inputs_count; i++) {
+			uint8_t regid = so->inputs[i].regid;
+			fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)",
+					(regid >> 2), "xyzw"[regid & 0x3],
+					gl_varying_slot_name(so->inputs[i].slot),
+					so->inputs[i].compmask,
+					so->inputs[i].inloc,
+					so->inputs[i].bary);
+		}
+		fprintf(out, "\n");
+		break;
+	default:
+		/* TODO */
+		break;
+	}
+
+	/* print generic shader info: */
+	fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n",
+			type, so->shader->id, so->id,
+			so->info.instrs_count,
+			so->info.max_half_reg + 1,
+			so->info.max_reg + 1);
+
+	fprintf(out, "; %d const, %u constlen\n",
+			so->info.max_const + 1,
+			so->constlen);
+
+	fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy);
+
+	/* print shader type specific info: */
+	switch (so->type) {
+	case MESA_SHADER_VERTEX:
+		dump_output(out, so, VARYING_SLOT_POS, "pos");
+		dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
+		break;
+	case MESA_SHADER_FRAGMENT:
+		dump_reg(out, "pos (bary)",
+			ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD));
+		dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
+		if (so->color0_mrt) {
+			dump_output(out, so, FRAG_RESULT_COLOR, "color");
+		} else {
+			dump_output(out, so, FRAG_RESULT_DATA0, "data0");
+			dump_output(out, so, FRAG_RESULT_DATA1, "data1");
+			dump_output(out, so, FRAG_RESULT_DATA2, "data2");
+			dump_output(out, so, FRAG_RESULT_DATA3, "data3");
+			dump_output(out, so, FRAG_RESULT_DATA4, "data4");
+			dump_output(out, so, FRAG_RESULT_DATA5, "data5");
+			dump_output(out, so, FRAG_RESULT_DATA6, "data6");
+			dump_output(out, so, FRAG_RESULT_DATA7, "data7");
+		}
+		/* these two are hard-coded since we don't know how to
+		 * program them to anything but all 0's...
+		 */
+		if (so->frag_coord)
+			fprintf(out, "; fragcoord: r0.x\n");
+		if (so->frag_face)
+			fprintf(out, "; fragface: hr0.x\n");
+		break;
+	default:
+		/* TODO */
+		break;
+	}
+
+	fprintf(out, "\n");
+}
+
+uint64_t
+ir3_shader_outputs(const struct ir3_shader *so)
+{
+	return so->nir->info.outputs_written;
+}
diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_shader.h mesa-19.0.1/src/freedreno/ir3/ir3_shader.h
--- mesa-18.3.3/src/freedreno/ir3/ir3_shader.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/ir3_shader.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,591 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_SHADER_H_
+#define IR3_SHADER_H_
+
+#include <stdio.h>
+
+#include "compiler/shader_enums.h"
+#include "compiler/nir/nir.h"
+#include "util/bitscan.h"
+
+#include "ir3.h"
+
+struct glsl_type;
+
+/* driver param indices: */
+enum ir3_driver_param {
+	/* compute shader driver params: */
+	IR3_DP_NUM_WORK_GROUPS_X = 0,
+	IR3_DP_NUM_WORK_GROUPS_Y = 1,
+	IR3_DP_NUM_WORK_GROUPS_Z = 2,
+	IR3_DP_LOCAL_GROUP_SIZE_X = 4,
+	IR3_DP_LOCAL_GROUP_SIZE_Y = 5,
+	IR3_DP_LOCAL_GROUP_SIZE_Z = 6,
+	/* NOTE: gl_NumWorkGroups should be vec4 aligned because
+	 * glDispatchComputeIndirect() needs to load these from
+	 * the info->indirect buffer.  Keep that in mind when/if
+	 * adding any addition CS driver params.
+	 */
+	IR3_DP_CS_COUNT   = 8,   /* must be aligned to vec4 */
+
+	/* vertex shader driver params: */
+	IR3_DP_VTXID_BASE = 0,
+	IR3_DP_VTXCNT_MAX = 1,
+	/* user-clip-plane components, up to 8x vec4's: */
+	IR3_DP_UCP0_X     = 4,
+	/* .... */
+	IR3_DP_UCP7_W     = 35,
+	IR3_DP_VS_COUNT   = 36   /* must be aligned to vec4 */
+};
+
+#define IR3_MAX_SHADER_BUFFERS   32
+#define IR3_MAX_SHADER_IMAGES    32
+#define IR3_MAX_SO_BUFFERS        4
+#define IR3_MAX_SO_OUTPUTS       64
+
+/**
+ * For consts needed to pass internal values to shader which may or may not
+ * be required, rather than allocating worst-case const space, we scan the
+ * shader and allocate consts as-needed:
+ *
+ *   + SSBO sizes: only needed if shader has a get_buffer_size intrinsic
+ *     for a given SSBO
+ *
+ *   + Image dimensions: needed to calculate pixel offset, but only for
+ *     images that have a image_store intrinsic
+ */
+struct ir3_driver_const_layout {
+	struct {
+		uint32_t mask;  /* bitmask of SSBOs that have get_buffer_size */
+		uint32_t count; /* number of consts allocated */
+		/* one const allocated per SSBO which has get_buffer_size,
+		 * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
+		 * consts:
+		 */
+		uint32_t off[IR3_MAX_SHADER_BUFFERS];
+	} ssbo_size;
+
+	struct {
+		uint32_t mask;  /* bitmask of images that have image_store */
+		uint32_t count; /* number of consts allocated */
+		/* three const allocated per image which has image_store:
+		 *  + cpp         (bytes per pixel)
+		 *  + pitch       (y pitch)
+		 *  + array_pitch (z pitch)
+		 */
+		uint32_t off[IR3_MAX_SHADER_IMAGES];
+	} image_dims;
+};
+
+/**
+ * A single output for vertex transform feedback.
+ */
+struct ir3_stream_output {
+	unsigned register_index:6;  /**< 0 to 63 (OUT index) */
+	unsigned start_component:2; /** 0 to 3 */
+	unsigned num_components:3;  /** 1 to 4 */
+	unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
+	unsigned dst_offset:16;     /**< offset into the buffer in dwords */
+	unsigned stream:2;          /**< 0 to 3 */
+};
+
+/**
+ * Stream output for vertex transform feedback.
+ */
+struct ir3_stream_output_info {
+	unsigned num_outputs;
+	/** stride for an entire vertex for each buffer in dwords */
+	uint16_t stride[IR3_MAX_SO_BUFFERS];
+
+	/**
+	 * Array of stream outputs, in the order they are to be written in.
+	 * Selected components are tightly packed into the output buffer.
+	 */
+	struct ir3_stream_output output[IR3_MAX_SO_OUTPUTS];
+};
+
+/* Configuration key used to identify a shader variant.. different
+ * shader variants can be used to implement features not supported
+ * in hw (two sided color), binning-pass vertex shader, etc.
+ */
+struct ir3_shader_key {
+	union {
+		struct {
+			/*
+			 * Combined Vertex/Fragment shader parameters:
+			 */
+			unsigned ucp_enables : 8;
+
+			/* do we need to check {v,f}saturate_{s,t,r}? */
+			unsigned has_per_samp : 1;
+
+			/*
+			 * Vertex shader variant parameters:
+			 */
+			unsigned vclamp_color : 1;
+
+			/*
+			 * Fragment shader variant parameters:
+			 */
+			unsigned color_two_side : 1;
+			unsigned half_precision : 1;
+			/* used when shader needs to handle flat varyings (a4xx)
+			 * for front/back color inputs to frag shader:
+			 */
+			unsigned rasterflat : 1;
+			unsigned fclamp_color : 1;
+		};
+		uint32_t global;
+	};
+
+	/* bitmask of sampler which needs coords clamped for vertex
+	 * shader:
+	 */
+	uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
+
+	/* bitmask of sampler which needs coords clamped for frag
+	 * shader:
+	 */
+	uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
+
+	/* bitmask of ms shifts */
+	uint32_t vsamples, fsamples;
+
+	/* bitmask of samplers which need astc srgb workaround: */
+	uint16_t vastc_srgb, fastc_srgb;
+};
+
+static inline bool
+ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
+{
+	/* slow-path if we need to check {v,f}saturate_{s,t,r} */
+	if (a->has_per_samp || b->has_per_samp)
+		return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
+	return a->global == b->global;
+}
+
+/* will the two keys produce different lowering for a fragment shader? */
+static inline bool
+ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *last_key)
+{
+	if (last_key->has_per_samp || key->has_per_samp) {
+		if ((last_key->fsaturate_s != key->fsaturate_s) ||
+				(last_key->fsaturate_t != key->fsaturate_t) ||
+				(last_key->fsaturate_r != key->fsaturate_r) ||
+				(last_key->fsamples != key->fsamples) ||
+				(last_key->fastc_srgb != key->fastc_srgb))
+			return true;
+	}
+
+	if (last_key->fclamp_color != key->fclamp_color)
+		return true;
+
+	if (last_key->color_two_side != key->color_two_side)
+		return true;
+
+	if (last_key->half_precision != key->half_precision)
+		return true;
+
+	if (last_key->rasterflat != key->rasterflat)
+		return true;
+
+	if (last_key->ucp_enables != key->ucp_enables)
+		return true;
+
+	return false;
+}
+
+/* will the two keys produce different lowering for a vertex shader? */
+static inline bool
+ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *last_key)
+{
+	if (last_key->has_per_samp || key->has_per_samp) {
+		if ((last_key->vsaturate_s != key->vsaturate_s) ||
+				(last_key->vsaturate_t != key->vsaturate_t) ||
+				(last_key->vsaturate_r != key->vsaturate_r) ||
+				(last_key->vsamples != key->vsamples) ||
+				(last_key->vastc_srgb != key->vastc_srgb))
+			return true;
+	}
+
+	if (last_key->vclamp_color != key->vclamp_color)
+		return true;
+
+	if (last_key->ucp_enables != key->ucp_enables)
+		return true;
+
+	return false;
+}
+
+/* clears shader-key flags which don't apply to the given shader
+ * stage
+ */
+static inline void
+ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
+{
+	switch (type) {
+	case MESA_SHADER_FRAGMENT:
+		if (key->has_per_samp) {
+			key->vsaturate_s = 0;
+			key->vsaturate_t = 0;
+			key->vsaturate_r = 0;
+			key->vastc_srgb = 0;
+			key->vsamples = 0;
+		}
+		break;
+	case MESA_SHADER_VERTEX:
+		key->color_two_side = false;
+		key->half_precision = false;
+		key->rasterflat = false;
+		if (key->has_per_samp) {
+			key->fsaturate_s = 0;
+			key->fsaturate_t = 0;
+			key->fsaturate_r = 0;
+			key->fastc_srgb = 0;
+			key->fsamples = 0;
+		}
+		break;
+	default:
+		/* TODO */
+		break;
+	}
+
+}
+
+struct ir3_shader_variant {
+	struct fd_bo *bo;
+
+	/* variant id (for debug) */
+	uint32_t id;
+
+	struct ir3_shader_key key;
+
+	/* vertex shaders can have an extra version for hwbinning pass,
+	 * which is pointed to by so->binning:
+	 */
+	bool binning_pass;
+	struct ir3_shader_variant *binning;
+
+	struct ir3_driver_const_layout const_layout;
+	struct ir3_info info;
+	struct ir3 *ir;
+
+	/* Levels of nesting of flow control:
+	 */
+	unsigned branchstack;
+
+	/* the instructions length is in units of instruction groups
+	 * (4 instructions for a3xx, 16 instructions for a4xx.. each
+	 * instruction is 2 dwords):
+	 */
+	unsigned instrlen;
+
+	/* the constants length is in units of vec4's, and is the sum of
+	 * the uniforms and the built-in compiler constants
+	 */
+	unsigned constlen;
+
+	/* number of uniforms (in vec4), not including built-in compiler
+	 * constants, etc.
+	 */
+	unsigned num_uniforms;
+
+	unsigned num_ubos;
+
+	/* About Linkage:
+	 *   + Let the frag shader determine the position/compmask for the
+	 *     varyings, since it is the place where we know if the varying
+	 *     is actually used, and if so, which components are used.  So
+	 *     what the hw calls "outloc" is taken from the "inloc" of the
+	 *     frag shader.
+	 *   + From the vert shader, we only need the output regid
+	 */
+
+	bool frag_coord, frag_face, color0_mrt;
+
+	/* NOTE: for input/outputs, slot is:
+	 *   gl_vert_attrib  - for VS inputs
+	 *   gl_varying_slot - for VS output / FS input
+	 *   gl_frag_result  - for FS output
+	 */
+
+	/* varyings/outputs: */
+	unsigned outputs_count;
+	struct {
+		uint8_t slot;
+		uint8_t regid;
+	} outputs[16 + 2];  /* +POSITION +PSIZE */
+	bool writes_pos, writes_psize;
+
+	/* attributes (VS) / varyings (FS):
+	 * Note that sysval's should come *after* normal inputs.
+	 */
+	unsigned inputs_count;
+	struct {
+		uint8_t slot;
+		uint8_t regid;
+		uint8_t compmask;
+		uint8_t ncomp;
+		/* location of input (ie. offset passed to bary.f, etc).  This
+		 * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx
+		 * have the OUTLOCn value offset by 8, presumably to account
+		 * for gl_Position/gl_PointSize)
+		 */
+		uint8_t inloc;
+		/* vertex shader specific: */
+		bool    sysval     : 1;   /* slot is a gl_system_value */
+		/* fragment shader specific: */
+		bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
+		bool    rasterflat : 1;   /* special handling for emit->rasterflat */
+		enum glsl_interp_mode interpolate;
+	} inputs[16 + 2];  /* +POSITION +FACE */
+
+	/* sum of input components (scalar).  For frag shaders, it only counts
+	 * the varying inputs:
+	 */
+	unsigned total_in;
+
+	/* For frag shaders, the total number of inputs (not scalar,
+	 * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR)
+	 */
+	unsigned varying_in;
+
+	/* number of samplers/textures (which are currently 1:1): */
+	int num_samp;
+
+	/* do we have one or more SSBO instructions: */
+	bool has_ssbo;
+
+	/* do we have kill instructions: */
+	bool has_kill;
+
+	/* Layout of constant registers, each section (in vec4). Pointer size
+	 * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
+	 * UBO and stream-out consts.
+	 */
+	struct {
+		/* user const start at zero */
+		unsigned ubo;
+		/* NOTE that a3xx might need a section for SSBO addresses too */
+		unsigned ssbo_sizes;
+		unsigned image_dims;
+		unsigned driver_param;
+		unsigned tfbo;
+		unsigned immediate;
+	} constbase;
+
+	unsigned immediates_count;
+	unsigned immediates_size;
+	struct {
+		uint32_t val[4];
+	} *immediates;
+
+	/* for astc srgb workaround, the number/base of additional
+	 * alpha tex states we need, and index of original tex states
+	 */
+	struct {
+		unsigned base, count;
+		unsigned orig_idx[16];
+	} astc_srgb;
+
+	/* shader variants form a linked list: */
+	struct ir3_shader_variant *next;
+
+	/* replicated here to avoid passing extra ptrs everywhere: */
+	gl_shader_stage type;
+	struct ir3_shader *shader;
+};
+
+struct ir3_shader {
+	gl_shader_stage type;
+
+	/* shader id (for debug): */
+	uint32_t id;
+	uint32_t variant_count;
+
+	/* so we know when we can disable TGSI related hacks: */
+	bool from_tgsi;
+
+	struct ir3_compiler *compiler;
+
+	struct nir_shader *nir;
+	struct ir3_stream_output_info stream_output;
+
+	struct ir3_shader_variant *variants;
+};
+
+void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
+struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader,
+		struct ir3_shader_key *key, bool binning_pass, bool *created);
+struct ir3_shader * ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir);
+void ir3_shader_destroy(struct ir3_shader *shader);
+void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out);
+uint64_t ir3_shader_outputs(const struct ir3_shader *so);
+
+int
+ir3_glsl_type_size(const struct glsl_type *type);
+
+static inline const char *
+ir3_shader_stage(struct ir3_shader *shader)
+{
+	switch (shader->type) {
+	case MESA_SHADER_VERTEX:     return "VERT";
+	case MESA_SHADER_FRAGMENT:   return "FRAG";
+	case MESA_SHADER_COMPUTE:    return "CL";
+	default:
+		unreachable("invalid type");
+		return NULL;
+	}
+}
+
+/*
+ * Helper/util:
+ */
+
+static inline int
+ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
+{
+	int j;
+
+	for (j = 0; j < so->outputs_count; j++)
+		if (so->outputs[j].slot == slot)
+			return j;
+
+	/* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
+	 * in the vertex shader.. but the fragment shader doesn't know this
+	 * so  it will always have both IN.COLOR[n] and IN.BCOLOR[n].  So
+	 * at link time if there is no matching OUT.BCOLOR[n], we must map
+	 * OUT.COLOR[n] to IN.BCOLOR[n].  And visa versa if there is only
+	 * a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
+	 */
+	if (slot == VARYING_SLOT_BFC0) {
+		slot = VARYING_SLOT_COL0;
+	} else if (slot == VARYING_SLOT_BFC1) {
+		slot = VARYING_SLOT_COL1;
+	} else if (slot == VARYING_SLOT_COL0) {
+		slot = VARYING_SLOT_BFC0;
+	} else if (slot == VARYING_SLOT_COL1) {
+		slot = VARYING_SLOT_BFC1;
+	} else {
+		return 0;
+	}
+
+	for (j = 0; j < so->outputs_count; j++)
+		if (so->outputs[j].slot == slot)
+			return j;
+
+	debug_assert(0);
+
+	return 0;
+}
+
+static inline int
+ir3_next_varying(const struct ir3_shader_variant *so, int i)
+{
+	while (++i < so->inputs_count)
+		if (so->inputs[i].compmask && so->inputs[i].bary)
+			break;
+	return i;
+}
+
+struct ir3_shader_linkage {
+	uint8_t max_loc;
+	uint8_t cnt;
+	struct {
+		uint8_t regid;
+		uint8_t compmask;
+		uint8_t loc;
+	} var[32];
+};
+
+static inline void
+ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc)
+{
+	int i = l->cnt++;
+
+	debug_assert(i < ARRAY_SIZE(l->var));
+
+	l->var[i].regid    = regid;
+	l->var[i].compmask = compmask;
+	l->var[i].loc      = loc;
+	l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask));
+}
+
+static inline void
+ir3_link_shaders(struct ir3_shader_linkage *l,
+		const struct ir3_shader_variant *vs,
+		const struct ir3_shader_variant *fs)
+{
+	int j = -1, k;
+
+	while (l->cnt < ARRAY_SIZE(l->var)) {
+		j = ir3_next_varying(fs, j);
+
+		if (j >= fs->inputs_count)
+			break;
+
+		if (fs->inputs[j].inloc >= fs->total_in)
+			continue;
+
+		k = ir3_find_output(vs, fs->inputs[j].slot);
+
+		ir3_link_add(l, vs->outputs[k].regid,
+			fs->inputs[j].compmask, fs->inputs[j].inloc);
+	}
+}
+
+static inline uint32_t
+ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
+{
+	int j;
+	for (j = 0; j < so->outputs_count; j++)
+		if (so->outputs[j].slot == slot)
+			return so->outputs[j].regid;
+	return regid(63, 0);
+}
+
+static inline uint32_t
+ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
+{
+	int j;
+	for (j = 0; j < so->inputs_count; j++)
+		if (so->inputs[j].sysval && (so->inputs[j].slot == slot))
+			return so->inputs[j].regid;
+	return regid(63, 0);
+}
+
+/* calculate register footprint in terms of half-regs (ie. one full
+ * reg counts as two half-regs).
+ */
+static inline uint32_t
+ir3_shader_halfregs(const struct ir3_shader_variant *v)
+{
+	return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
+}
+
+#endif /* IR3_SHADER_H_ */
diff -Nru mesa-18.3.3/src/freedreno/ir3/meson.build mesa-19.0.1/src/freedreno/ir3/meson.build
--- mesa-18.3.3/src/freedreno/ir3/meson.build	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/ir3/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,66 @@
+# Copyright © 2018 Rob Clark
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+ir3_nir_trig_c = custom_target(
+  'ir3_nir_trig.c',
+  input : 'ir3_nir_trig.py',
+  output : 'ir3_nir_trig.c',
+  command : [
+    prog_python, '@INPUT@',
+    '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
+  ],
+  capture : true,
+  depend_files : nir_algebraic_py,
+)
+
+libfreedreno_ir3_files = files(
+  'disasm-a3xx.c',
+  'instr-a3xx.h',
+  'ir3.c',
+  'ir3_compiler_nir.c',
+  'ir3_compiler.c',
+  'ir3_compiler.h',
+  'ir3_context.c',
+  'ir3_context.h',
+  'ir3_cp.c',
+  'ir3_depth.c',
+  'ir3_group.c',
+  'ir3.h',
+  'ir3_legalize.c',
+  'ir3_nir.c',
+  'ir3_nir.h',
+  'ir3_nir_lower_tg4_to_tex.c',
+  'ir3_print.c',
+  'ir3_ra.c',
+  'ir3_sched.c',
+  'ir3_shader.c',
+  'ir3_shader.h',
+)
+
+libfreedreno_ir3 = static_library(
+  'freedreno_ir3',
+  [libfreedreno_ir3_files, ir3_nir_trig_c],
+  include_directories : [inc_freedreno, inc_common],
+  c_args : [c_vis_args, no_override_init_args],
+  cpp_args : [cpp_vis_args],
+  dependencies : idep_nir_headers,
+  build_by_default : false,
+)
+
diff -Nru mesa-18.3.3/src/freedreno/Makefile.am mesa-19.0.1/src/freedreno/Makefile.am
--- mesa-18.3.3/src/freedreno/Makefile.am	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,74 @@
+# Copyright © 2016 Broadcom
+# Copyright © 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-I$(top_builddir)/src \
+	-I$(top_srcdir)/src \
+	-I$(top_srcdir)/src/freedreno/ \
+	-I$(top_srcdir)/src/gallium/auxiliary \
+	-I$(top_srcdir)/src/gallium/include \
+	$(DEFINES)
+
+AM_CFLAGS = \
+	$(EXPAT_CFLAGS)
+
+include Makefile.sources
+
+lib_LTLIBRARIES =
+check_LTLIBRARIES =
+noinst_DATA =
+noinst_HEADERS = $(registers_FILES)
+noinst_LTLIBRARIES =
+noinst_PROGRAMS =
+check_PROGRAMS =
+TESTS =
+BUILT_SOURCES =
+CLEANFILES =
+EXTRA_DIST = \
+	meson.build \
+	drm/meson.build \
+	ir3/ir3_nir_trig.py \
+	ir3/meson.build
+
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS)
+
+CLEANFILES += $(BUILT_SOURCES)
+
+noinst_LTLIBRARIES += libfreedreno_drm.la
+
+libfreedreno_drm_la_SOURCES = $(drm_SOURCES)
+libfreedreno_drm_la_CFLAGS = $(VALGRIND_CFLAGS) $(LIBDRM_CFLAGS)
+
+noinst_LTLIBRARIES += libfreedreno_ir3.la
+
+libfreedreno_ir3_la_SOURCES = $(ir3_SOURCES) $(ir3_GENERATED_FILES)
+libfreedreno_ir3_la_CFLAGS = \
+	-I$(top_srcdir)/src/freedreno/ir3 \
+	-I$(top_builddir)/src/compiler/nir \
+	-I$(top_srcdir)/src/compiler/nir
+
+ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
+	$(MKDIR_GEN)
+	$(AM_V_GEN) $(PYTHON) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py -p $(top_srcdir)/src/compiler/nir > $@ || ($(RM) $@; false)
+
diff -Nru mesa-18.3.3/src/freedreno/Makefile.sources mesa-19.0.1/src/freedreno/Makefile.sources
--- mesa-18.3.3/src/freedreno/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,51 @@
+drm_SOURCES := \
+	drm/freedreno_bo.c \
+	drm/freedreno_drmif.h \
+	drm/freedreno_ringbuffer.c \
+	drm/msm_bo.c \
+	drm/msm_pipe.c \
+	drm/msm_ringbuffer_sp.c \
+	drm/freedreno_bo_cache.c \
+	drm/freedreno_pipe.c \
+	drm/freedreno_ringbuffer.h \
+	drm/msm_device.c \
+	drm/msm_priv.h \
+	drm/freedreno_device.c \
+	drm/freedreno_priv.h \
+	drm/msm_drm.h \
+	drm/msm_ringbuffer.c
+
+ir3_SOURCES := \
+	ir3/disasm-a3xx.c \
+	ir3/instr-a3xx.h \
+	ir3/ir3.c \
+	ir3/ir3_compiler.c \
+	ir3/ir3_compiler.h \
+	ir3/ir3_compiler_nir.c \
+	ir3/ir3_context.c \
+	ir3/ir3_context.h \
+	ir3/ir3_cp.c \
+	ir3/ir3_depth.c \
+	ir3/ir3_group.c \
+	ir3/ir3.h \
+	ir3/ir3_legalize.c \
+	ir3/ir3_nir.c \
+	ir3/ir3_nir.h \
+	ir3/ir3_nir_lower_tg4_to_tex.c \
+	ir3/ir3_print.c \
+	ir3/ir3_ra.c \
+	ir3/ir3_sched.c \
+	ir3/ir3_shader.c \
+	ir3/ir3_shader.h
+
+ir3_GENERATED_FILES := \
+	ir3/ir3_nir_trig.c
+
+registers_FILES := \
+	registers/a2xx.xml.h \
+	registers/a3xx.xml.h \
+	registers/a4xx.xml.h \
+	registers/a5xx.xml.h \
+	registers/a6xx.xml.h \
+	registers/adreno_common.xml.h \
+	registers/adreno_pm4.xml.h
diff -Nru mesa-18.3.3/src/freedreno/meson.build mesa-19.0.1/src/freedreno/meson.build
--- mesa-18.3.3/src/freedreno/meson.build	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,24 @@
+# Copyright © 2018 Rob Clark
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_freedreno = include_directories(['.', './registers'])
+
+subdir('drm')
+subdir('ir3')
diff -Nru mesa-18.3.3/src/freedreno/registers/a2xx.xml.h mesa-19.0.1/src/freedreno/registers/a2xx.xml.h
--- mesa-18.3.3/src/freedreno/registers/a2xx.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/a2xx.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3010 @@
+#ifndef A2XX_XML
+#define A2XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- ./rnndb/adreno/a2xx.xml          (  79608 bytes, from 2018-12-21 03:07:09)
+- ./rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-09-02 13:35:19)
+- ./rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-09-07 18:12:21)
+- ./rnndb/adreno/adreno_pm4.xml    (  42626 bytes, from 2018-09-17 18:20:14)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a2xx_rb_dither_type {
+	DITHER_PIXEL = 0,
+	DITHER_SUBPIXEL = 1,
+};
+
+enum a2xx_colorformatx {
+	COLORX_4_4_4_4 = 0,
+	COLORX_1_5_5_5 = 1,
+	COLORX_5_6_5 = 2,
+	COLORX_8 = 3,
+	COLORX_8_8 = 4,
+	COLORX_8_8_8_8 = 5,
+	COLORX_S8_8_8_8 = 6,
+	COLORX_16_FLOAT = 7,
+	COLORX_16_16_FLOAT = 8,
+	COLORX_16_16_16_16_FLOAT = 9,
+	COLORX_32_FLOAT = 10,
+	COLORX_32_32_FLOAT = 11,
+	COLORX_32_32_32_32_FLOAT = 12,
+	COLORX_2_3_3 = 13,
+	COLORX_8_8_8 = 14,
+};
+
+enum a2xx_sq_surfaceformat {
+	FMT_1_REVERSE = 0,
+	FMT_1 = 1,
+	FMT_8 = 2,
+	FMT_1_5_5_5 = 3,
+	FMT_5_6_5 = 4,
+	FMT_6_5_5 = 5,
+	FMT_8_8_8_8 = 6,
+	FMT_2_10_10_10 = 7,
+	FMT_8_A = 8,
+	FMT_8_B = 9,
+	FMT_8_8 = 10,
+	FMT_Cr_Y1_Cb_Y0 = 11,
+	FMT_Y1_Cr_Y0_Cb = 12,
+	FMT_5_5_5_1 = 13,
+	FMT_8_8_8_8_A = 14,
+	FMT_4_4_4_4 = 15,
+	FMT_8_8_8 = 16,
+	FMT_DXT1 = 18,
+	FMT_DXT2_3 = 19,
+	FMT_DXT4_5 = 20,
+	FMT_10_10_10_2 = 21,
+	FMT_24_8 = 22,
+	FMT_16 = 24,
+	FMT_16_16 = 25,
+	FMT_16_16_16_16 = 26,
+	FMT_16_EXPAND = 27,
+	FMT_16_16_EXPAND = 28,
+	FMT_16_16_16_16_EXPAND = 29,
+	FMT_16_FLOAT = 30,
+	FMT_16_16_FLOAT = 31,
+	FMT_16_16_16_16_FLOAT = 32,
+	FMT_32 = 33,
+	FMT_32_32 = 34,
+	FMT_32_32_32_32 = 35,
+	FMT_32_FLOAT = 36,
+	FMT_32_32_FLOAT = 37,
+	FMT_32_32_32_32_FLOAT = 38,
+	FMT_ATI_TC_RGB = 39,
+	FMT_ATI_TC_RGBA = 40,
+	FMT_ATI_TC_555_565_RGB = 41,
+	FMT_ATI_TC_555_565_RGBA = 42,
+	FMT_ATI_TC_RGBA_INTERP = 43,
+	FMT_ATI_TC_555_565_RGBA_INTERP = 44,
+	FMT_ETC1_RGBA_INTERP = 46,
+	FMT_ETC1_RGB = 47,
+	FMT_ETC1_RGBA = 48,
+	FMT_DXN = 49,
+	FMT_2_3_3 = 51,
+	FMT_2_10_10_10_AS_16_16_16_16 = 54,
+	FMT_10_10_10_2_AS_16_16_16_16 = 55,
+	FMT_32_32_32_FLOAT = 57,
+	FMT_DXT3A = 58,
+	FMT_DXT5A = 59,
+	FMT_CTX1 = 60,
+};
+
+enum a2xx_sq_ps_vtx_mode {
+	POSITION_1_VECTOR = 0,
+	POSITION_2_VECTORS_UNUSED = 1,
+	POSITION_2_VECTORS_SPRITE = 2,
+	POSITION_2_VECTORS_EDGE = 3,
+	POSITION_2_VECTORS_KILL = 4,
+	POSITION_2_VECTORS_SPRITE_KILL = 5,
+	POSITION_2_VECTORS_EDGE_KILL = 6,
+	MULTIPASS = 7,
+};
+
+enum a2xx_sq_sample_cntl {
+	CENTROIDS_ONLY = 0,
+	CENTERS_ONLY = 1,
+	CENTROIDS_AND_CENTERS = 2,
+};
+
+enum a2xx_dx_clip_space {
+	DXCLIP_OPENGL = 0,
+	DXCLIP_DIRECTX = 1,
+};
+
+enum a2xx_pa_su_sc_polymode {
+	POLY_DISABLED = 0,
+	POLY_DUALMODE = 1,
+};
+
+enum a2xx_rb_edram_mode {
+	EDRAM_NOP = 0,
+	COLOR_DEPTH = 4,
+	DEPTH_ONLY = 5,
+	EDRAM_COPY = 6,
+};
+
+enum a2xx_pa_sc_pattern_bit_order {
+	LITTLE = 0,
+	BIG = 1,
+};
+
+enum a2xx_pa_sc_auto_reset_cntl {
+	NEVER = 0,
+	EACH_PRIMITIVE = 1,
+	EACH_PACKET = 2,
+};
+
+enum a2xx_pa_pixcenter {
+	PIXCENTER_D3D = 0,
+	PIXCENTER_OGL = 1,
+};
+
+enum a2xx_pa_roundmode {
+	TRUNCATE = 0,
+	ROUND = 1,
+	ROUNDTOEVEN = 2,
+	ROUNDTOODD = 3,
+};
+
+enum a2xx_pa_quantmode {
+	ONE_SIXTEENTH = 0,
+	ONE_EIGTH = 1,
+	ONE_QUARTER = 2,
+	ONE_HALF = 3,
+	ONE = 4,
+};
+
+enum a2xx_rb_copy_sample_select {
+	SAMPLE_0 = 0,
+	SAMPLE_1 = 1,
+	SAMPLE_2 = 2,
+	SAMPLE_3 = 3,
+	SAMPLE_01 = 4,
+	SAMPLE_23 = 5,
+	SAMPLE_0123 = 6,
+};
+
+enum a2xx_rb_blend_opcode {
+	BLEND2_DST_PLUS_SRC = 0,
+	BLEND2_SRC_MINUS_DST = 1,
+	BLEND2_MIN_DST_SRC = 2,
+	BLEND2_MAX_DST_SRC = 3,
+	BLEND2_DST_MINUS_SRC = 4,
+	BLEND2_DST_PLUS_SRC_BIAS = 5,
+};
+
+enum a2xx_su_perfcnt_select {
+	PERF_PAPC_PASX_REQ = 0,
+	PERF_PAPC_PASX_FIRST_VECTOR = 2,
+	PERF_PAPC_PASX_SECOND_VECTOR = 3,
+	PERF_PAPC_PASX_FIRST_DEAD = 4,
+	PERF_PAPC_PASX_SECOND_DEAD = 5,
+	PERF_PAPC_PASX_VTX_KILL_DISCARD = 6,
+	PERF_PAPC_PASX_VTX_NAN_DISCARD = 7,
+	PERF_PAPC_PA_INPUT_PRIM = 8,
+	PERF_PAPC_PA_INPUT_NULL_PRIM = 9,
+	PERF_PAPC_PA_INPUT_EVENT_FLAG = 10,
+	PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT = 11,
+	PERF_PAPC_PA_INPUT_END_OF_PACKET = 12,
+	PERF_PAPC_CLPR_CULL_PRIM = 13,
+	PERF_PAPC_CLPR_VV_CULL_PRIM = 15,
+	PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM = 17,
+	PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM = 18,
+	PERF_PAPC_CLPR_CULL_TO_NULL_PRIM = 19,
+	PERF_PAPC_CLPR_VV_CLIP_PRIM = 21,
+	PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE = 23,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_1 = 24,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_2 = 25,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_3 = 26,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_4 = 27,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_5 = 28,
+	PERF_PAPC_CLPR_CLIP_PLANE_CNT_6 = 29,
+	PERF_PAPC_CLPR_CLIP_PLANE_NEAR = 30,
+	PERF_PAPC_CLPR_CLIP_PLANE_FAR = 31,
+	PERF_PAPC_CLPR_CLIP_PLANE_LEFT = 32,
+	PERF_PAPC_CLPR_CLIP_PLANE_RIGHT = 33,
+	PERF_PAPC_CLPR_CLIP_PLANE_TOP = 34,
+	PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM = 35,
+	PERF_PAPC_CLSM_NULL_PRIM = 36,
+	PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM = 37,
+	PERF_PAPC_CLSM_CLIP_PRIM = 38,
+	PERF_PAPC_CLSM_CULL_TO_NULL_PRIM = 39,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_1 = 40,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_2 = 41,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_3 = 42,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_4 = 43,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_5 = 44,
+	PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7 = 45,
+	PERF_PAPC_CLSM_NON_TRIVIAL_CULL = 46,
+	PERF_PAPC_SU_INPUT_PRIM = 47,
+	PERF_PAPC_SU_INPUT_CLIP_PRIM = 48,
+	PERF_PAPC_SU_INPUT_NULL_PRIM = 49,
+	PERF_PAPC_SU_ZERO_AREA_CULL_PRIM = 50,
+	PERF_PAPC_SU_BACK_FACE_CULL_PRIM = 51,
+	PERF_PAPC_SU_FRONT_FACE_CULL_PRIM = 52,
+	PERF_PAPC_SU_POLYMODE_FACE_CULL = 53,
+	PERF_PAPC_SU_POLYMODE_BACK_CULL = 54,
+	PERF_PAPC_SU_POLYMODE_FRONT_CULL = 55,
+	PERF_PAPC_SU_POLYMODE_INVALID_FILL = 56,
+	PERF_PAPC_SU_OUTPUT_PRIM = 57,
+	PERF_PAPC_SU_OUTPUT_CLIP_PRIM = 58,
+	PERF_PAPC_SU_OUTPUT_NULL_PRIM = 59,
+	PERF_PAPC_SU_OUTPUT_EVENT_FLAG = 60,
+	PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT = 61,
+	PERF_PAPC_SU_OUTPUT_END_OF_PACKET = 62,
+	PERF_PAPC_SU_OUTPUT_POLYMODE_FACE = 63,
+	PERF_PAPC_SU_OUTPUT_POLYMODE_BACK = 64,
+	PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT = 65,
+	PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE = 66,
+	PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK = 67,
+	PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT = 68,
+	PERF_PAPC_PASX_REQ_IDLE = 69,
+	PERF_PAPC_PASX_REQ_BUSY = 70,
+	PERF_PAPC_PASX_REQ_STALLED = 71,
+	PERF_PAPC_PASX_REC_IDLE = 72,
+	PERF_PAPC_PASX_REC_BUSY = 73,
+	PERF_PAPC_PASX_REC_STARVED_SX = 74,
+	PERF_PAPC_PASX_REC_STALLED = 75,
+	PERF_PAPC_PASX_REC_STALLED_POS_MEM = 76,
+	PERF_PAPC_PASX_REC_STALLED_CCGSM_IN = 77,
+	PERF_PAPC_CCGSM_IDLE = 78,
+	PERF_PAPC_CCGSM_BUSY = 79,
+	PERF_PAPC_CCGSM_STALLED = 80,
+	PERF_PAPC_CLPRIM_IDLE = 81,
+	PERF_PAPC_CLPRIM_BUSY = 82,
+	PERF_PAPC_CLPRIM_STALLED = 83,
+	PERF_PAPC_CLPRIM_STARVED_CCGSM = 84,
+	PERF_PAPC_CLIPSM_IDLE = 85,
+	PERF_PAPC_CLIPSM_BUSY = 86,
+	PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH = 87,
+	PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ = 88,
+	PERF_PAPC_CLIPSM_WAIT_CLIPGA = 89,
+	PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP = 90,
+	PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM = 91,
+	PERF_PAPC_CLIPGA_IDLE = 92,
+	PERF_PAPC_CLIPGA_BUSY = 93,
+	PERF_PAPC_CLIPGA_STARVED_VTE_CLIP = 94,
+	PERF_PAPC_CLIPGA_STALLED = 95,
+	PERF_PAPC_CLIP_IDLE = 96,
+	PERF_PAPC_CLIP_BUSY = 97,
+	PERF_PAPC_SU_IDLE = 98,
+	PERF_PAPC_SU_BUSY = 99,
+	PERF_PAPC_SU_STARVED_CLIP = 100,
+	PERF_PAPC_SU_STALLED_SC = 101,
+	PERF_PAPC_SU_FACENESS_CULL = 102,
+};
+
+enum a2xx_sc_perfcnt_select {
+	SC_SR_WINDOW_VALID = 0,
+	SC_CW_WINDOW_VALID = 1,
+	SC_QM_WINDOW_VALID = 2,
+	SC_FW_WINDOW_VALID = 3,
+	SC_EZ_WINDOW_VALID = 4,
+	SC_IT_WINDOW_VALID = 5,
+	SC_STARVED_BY_PA = 6,
+	SC_STALLED_BY_RB_TILE = 7,
+	SC_STALLED_BY_RB_SAMP = 8,
+	SC_STARVED_BY_RB_EZ = 9,
+	SC_STALLED_BY_SAMPLE_FF = 10,
+	SC_STALLED_BY_SQ = 11,
+	SC_STALLED_BY_SP = 12,
+	SC_TOTAL_NO_PRIMS = 13,
+	SC_NON_EMPTY_PRIMS = 14,
+	SC_NO_TILES_PASSING_QM = 15,
+	SC_NO_PIXELS_PRE_EZ = 16,
+	SC_NO_PIXELS_POST_EZ = 17,
+};
+
+enum a2xx_vgt_perfcount_select {
+	VGT_SQ_EVENT_WINDOW_ACTIVE = 0,
+	VGT_SQ_SEND = 1,
+	VGT_SQ_STALLED = 2,
+	VGT_SQ_STARVED_BUSY = 3,
+	VGT_SQ_STARVED_IDLE = 4,
+	VGT_SQ_STATIC = 5,
+	VGT_PA_EVENT_WINDOW_ACTIVE = 6,
+	VGT_PA_CLIP_V_SEND = 7,
+	VGT_PA_CLIP_V_STALLED = 8,
+	VGT_PA_CLIP_V_STARVED_BUSY = 9,
+	VGT_PA_CLIP_V_STARVED_IDLE = 10,
+	VGT_PA_CLIP_V_STATIC = 11,
+	VGT_PA_CLIP_P_SEND = 12,
+	VGT_PA_CLIP_P_STALLED = 13,
+	VGT_PA_CLIP_P_STARVED_BUSY = 14,
+	VGT_PA_CLIP_P_STARVED_IDLE = 15,
+	VGT_PA_CLIP_P_STATIC = 16,
+	VGT_PA_CLIP_S_SEND = 17,
+	VGT_PA_CLIP_S_STALLED = 18,
+	VGT_PA_CLIP_S_STARVED_BUSY = 19,
+	VGT_PA_CLIP_S_STARVED_IDLE = 20,
+	VGT_PA_CLIP_S_STATIC = 21,
+	RBIU_FIFOS_EVENT_WINDOW_ACTIVE = 22,
+	RBIU_IMMED_DATA_FIFO_STARVED = 23,
+	RBIU_IMMED_DATA_FIFO_STALLED = 24,
+	RBIU_DMA_REQUEST_FIFO_STARVED = 25,
+	RBIU_DMA_REQUEST_FIFO_STALLED = 26,
+	RBIU_DRAW_INITIATOR_FIFO_STARVED = 27,
+	RBIU_DRAW_INITIATOR_FIFO_STALLED = 28,
+	BIN_PRIM_NEAR_CULL = 29,
+	BIN_PRIM_ZERO_CULL = 30,
+	BIN_PRIM_FAR_CULL = 31,
+	BIN_PRIM_BIN_CULL = 32,
+	BIN_PRIM_FACE_CULL = 33,
+	SPARE34 = 34,
+	SPARE35 = 35,
+	SPARE36 = 36,
+	SPARE37 = 37,
+	SPARE38 = 38,
+	SPARE39 = 39,
+	TE_SU_IN_VALID = 40,
+	TE_SU_IN_READ = 41,
+	TE_SU_IN_PRIM = 42,
+	TE_SU_IN_EOP = 43,
+	TE_SU_IN_NULL_PRIM = 44,
+	TE_WK_IN_VALID = 45,
+	TE_WK_IN_READ = 46,
+	TE_OUT_PRIM_VALID = 47,
+	TE_OUT_PRIM_READ = 48,
+};
+
+enum a2xx_tcr_perfcount_select {
+	DGMMPD_IPMUX0_STALL = 0,
+	DGMMPD_IPMUX_ALL_STALL = 4,
+	OPMUX0_L2_WRITES = 5,
+};
+
+enum a2xx_tp_perfcount_select {
+	POINT_QUADS = 0,
+	BILIN_QUADS = 1,
+	ANISO_QUADS = 2,
+	MIP_QUADS = 3,
+	VOL_QUADS = 4,
+	MIP_VOL_QUADS = 5,
+	MIP_ANISO_QUADS = 6,
+	VOL_ANISO_QUADS = 7,
+	ANISO_2_1_QUADS = 8,
+	ANISO_4_1_QUADS = 9,
+	ANISO_6_1_QUADS = 10,
+	ANISO_8_1_QUADS = 11,
+	ANISO_10_1_QUADS = 12,
+	ANISO_12_1_QUADS = 13,
+	ANISO_14_1_QUADS = 14,
+	ANISO_16_1_QUADS = 15,
+	MIP_VOL_ANISO_QUADS = 16,
+	ALIGN_2_QUADS = 17,
+	ALIGN_4_QUADS = 18,
+	PIX_0_QUAD = 19,
+	PIX_1_QUAD = 20,
+	PIX_2_QUAD = 21,
+	PIX_3_QUAD = 22,
+	PIX_4_QUAD = 23,
+	TP_MIPMAP_LOD0 = 24,
+	TP_MIPMAP_LOD1 = 25,
+	TP_MIPMAP_LOD2 = 26,
+	TP_MIPMAP_LOD3 = 27,
+	TP_MIPMAP_LOD4 = 28,
+	TP_MIPMAP_LOD5 = 29,
+	TP_MIPMAP_LOD6 = 30,
+	TP_MIPMAP_LOD7 = 31,
+	TP_MIPMAP_LOD8 = 32,
+	TP_MIPMAP_LOD9 = 33,
+	TP_MIPMAP_LOD10 = 34,
+	TP_MIPMAP_LOD11 = 35,
+	TP_MIPMAP_LOD12 = 36,
+	TP_MIPMAP_LOD13 = 37,
+	TP_MIPMAP_LOD14 = 38,
+};
+
+enum a2xx_tcm_perfcount_select {
+	QUAD0_RD_LAT_FIFO_EMPTY = 0,
+	QUAD0_RD_LAT_FIFO_4TH_FULL = 3,
+	QUAD0_RD_LAT_FIFO_HALF_FULL = 4,
+	QUAD0_RD_LAT_FIFO_FULL = 5,
+	QUAD0_RD_LAT_FIFO_LT_4TH_FULL = 6,
+	READ_STARVED_QUAD0 = 28,
+	READ_STARVED = 32,
+	READ_STALLED_QUAD0 = 33,
+	READ_STALLED = 37,
+	VALID_READ_QUAD0 = 38,
+	TC_TP_STARVED_QUAD0 = 42,
+	TC_TP_STARVED = 46,
+};
+
+enum a2xx_tcf_perfcount_select {
+	VALID_CYCLES = 0,
+	SINGLE_PHASES = 1,
+	ANISO_PHASES = 2,
+	MIP_PHASES = 3,
+	VOL_PHASES = 4,
+	MIP_VOL_PHASES = 5,
+	MIP_ANISO_PHASES = 6,
+	VOL_ANISO_PHASES = 7,
+	ANISO_2_1_PHASES = 8,
+	ANISO_4_1_PHASES = 9,
+	ANISO_6_1_PHASES = 10,
+	ANISO_8_1_PHASES = 11,
+	ANISO_10_1_PHASES = 12,
+	ANISO_12_1_PHASES = 13,
+	ANISO_14_1_PHASES = 14,
+	ANISO_16_1_PHASES = 15,
+	MIP_VOL_ANISO_PHASES = 16,
+	ALIGN_2_PHASES = 17,
+	ALIGN_4_PHASES = 18,
+	TPC_BUSY = 19,
+	TPC_STALLED = 20,
+	TPC_STARVED = 21,
+	TPC_WORKING = 22,
+	TPC_WALKER_BUSY = 23,
+	TPC_WALKER_STALLED = 24,
+	TPC_WALKER_WORKING = 25,
+	TPC_ALIGNER_BUSY = 26,
+	TPC_ALIGNER_STALLED = 27,
+	TPC_ALIGNER_STALLED_BY_BLEND = 28,
+	TPC_ALIGNER_STALLED_BY_CACHE = 29,
+	TPC_ALIGNER_WORKING = 30,
+	TPC_BLEND_BUSY = 31,
+	TPC_BLEND_SYNC = 32,
+	TPC_BLEND_STARVED = 33,
+	TPC_BLEND_WORKING = 34,
+	OPCODE_0x00 = 35,
+	OPCODE_0x01 = 36,
+	OPCODE_0x04 = 37,
+	OPCODE_0x10 = 38,
+	OPCODE_0x11 = 39,
+	OPCODE_0x12 = 40,
+	OPCODE_0x13 = 41,
+	OPCODE_0x18 = 42,
+	OPCODE_0x19 = 43,
+	OPCODE_0x1A = 44,
+	OPCODE_OTHER = 45,
+	IN_FIFO_0_EMPTY = 56,
+	IN_FIFO_0_LT_HALF_FULL = 57,
+	IN_FIFO_0_HALF_FULL = 58,
+	IN_FIFO_0_FULL = 59,
+	IN_FIFO_TPC_EMPTY = 72,
+	IN_FIFO_TPC_LT_HALF_FULL = 73,
+	IN_FIFO_TPC_HALF_FULL = 74,
+	IN_FIFO_TPC_FULL = 75,
+	TPC_TC_XFC = 76,
+	TPC_TC_STATE = 77,
+	TC_STALL = 78,
+	QUAD0_TAPS = 79,
+	QUADS = 83,
+	TCA_SYNC_STALL = 84,
+	TAG_STALL = 85,
+	TCB_SYNC_STALL = 88,
+	TCA_VALID = 89,
+	PROBES_VALID = 90,
+	MISS_STALL = 91,
+	FETCH_FIFO_STALL = 92,
+	TCO_STALL = 93,
+	ANY_STALL = 94,
+	TAG_MISSES = 95,
+	TAG_HITS = 96,
+	SUB_TAG_MISSES = 97,
+	SET0_INVALIDATES = 98,
+	SET1_INVALIDATES = 99,
+	SET2_INVALIDATES = 100,
+	SET3_INVALIDATES = 101,
+	SET0_TAG_MISSES = 102,
+	SET1_TAG_MISSES = 103,
+	SET2_TAG_MISSES = 104,
+	SET3_TAG_MISSES = 105,
+	SET0_TAG_HITS = 106,
+	SET1_TAG_HITS = 107,
+	SET2_TAG_HITS = 108,
+	SET3_TAG_HITS = 109,
+	SET0_SUB_TAG_MISSES = 110,
+	SET1_SUB_TAG_MISSES = 111,
+	SET2_SUB_TAG_MISSES = 112,
+	SET3_SUB_TAG_MISSES = 113,
+	SET0_EVICT1 = 114,
+	SET0_EVICT2 = 115,
+	SET0_EVICT3 = 116,
+	SET0_EVICT4 = 117,
+	SET0_EVICT5 = 118,
+	SET0_EVICT6 = 119,
+	SET0_EVICT7 = 120,
+	SET0_EVICT8 = 121,
+	SET1_EVICT1 = 130,
+	SET1_EVICT2 = 131,
+	SET1_EVICT3 = 132,
+	SET1_EVICT4 = 133,
+	SET1_EVICT5 = 134,
+	SET1_EVICT6 = 135,
+	SET1_EVICT7 = 136,
+	SET1_EVICT8 = 137,
+	SET2_EVICT1 = 146,
+	SET2_EVICT2 = 147,
+	SET2_EVICT3 = 148,
+	SET2_EVICT4 = 149,
+	SET2_EVICT5 = 150,
+	SET2_EVICT6 = 151,
+	SET2_EVICT7 = 152,
+	SET2_EVICT8 = 153,
+	SET3_EVICT1 = 162,
+	SET3_EVICT2 = 163,
+	SET3_EVICT3 = 164,
+	SET3_EVICT4 = 165,
+	SET3_EVICT5 = 166,
+	SET3_EVICT6 = 167,
+	SET3_EVICT7 = 168,
+	SET3_EVICT8 = 169,
+	FF_EMPTY = 178,
+	FF_LT_HALF_FULL = 179,
+	FF_HALF_FULL = 180,
+	FF_FULL = 181,
+	FF_XFC = 182,
+	FF_STALLED = 183,
+	FG_MASKS = 184,
+	FG_LEFT_MASKS = 185,
+	FG_LEFT_MASK_STALLED = 186,
+	FG_LEFT_NOT_DONE_STALL = 187,
+	FG_LEFT_FG_STALL = 188,
+	FG_LEFT_SECTORS = 189,
+	FG0_REQUESTS = 195,
+	FG0_STALLED = 196,
+	MEM_REQ512 = 199,
+	MEM_REQ_SENT = 200,
+	MEM_LOCAL_READ_REQ = 202,
+	TC0_MH_STALLED = 203,
+};
+
+enum a2xx_sq_perfcnt_select {
+	SQ_PIXEL_VECTORS_SUB = 0,
+	SQ_VERTEX_VECTORS_SUB = 1,
+	SQ_ALU0_ACTIVE_VTX_SIMD0 = 2,
+	SQ_ALU1_ACTIVE_VTX_SIMD0 = 3,
+	SQ_ALU0_ACTIVE_PIX_SIMD0 = 4,
+	SQ_ALU1_ACTIVE_PIX_SIMD0 = 5,
+	SQ_ALU0_ACTIVE_VTX_SIMD1 = 6,
+	SQ_ALU1_ACTIVE_VTX_SIMD1 = 7,
+	SQ_ALU0_ACTIVE_PIX_SIMD1 = 8,
+	SQ_ALU1_ACTIVE_PIX_SIMD1 = 9,
+	SQ_EXPORT_CYCLES = 10,
+	SQ_ALU_CST_WRITTEN = 11,
+	SQ_TEX_CST_WRITTEN = 12,
+	SQ_ALU_CST_STALL = 13,
+	SQ_ALU_TEX_STALL = 14,
+	SQ_INST_WRITTEN = 15,
+	SQ_BOOLEAN_WRITTEN = 16,
+	SQ_LOOPS_WRITTEN = 17,
+	SQ_PIXEL_SWAP_IN = 18,
+	SQ_PIXEL_SWAP_OUT = 19,
+	SQ_VERTEX_SWAP_IN = 20,
+	SQ_VERTEX_SWAP_OUT = 21,
+	SQ_ALU_VTX_INST_ISSUED = 22,
+	SQ_TEX_VTX_INST_ISSUED = 23,
+	SQ_VC_VTX_INST_ISSUED = 24,
+	SQ_CF_VTX_INST_ISSUED = 25,
+	SQ_ALU_PIX_INST_ISSUED = 26,
+	SQ_TEX_PIX_INST_ISSUED = 27,
+	SQ_VC_PIX_INST_ISSUED = 28,
+	SQ_CF_PIX_INST_ISSUED = 29,
+	SQ_ALU0_FIFO_EMPTY_SIMD0 = 30,
+	SQ_ALU1_FIFO_EMPTY_SIMD0 = 31,
+	SQ_ALU0_FIFO_EMPTY_SIMD1 = 32,
+	SQ_ALU1_FIFO_EMPTY_SIMD1 = 33,
+	SQ_ALU_NOPS = 34,
+	SQ_PRED_SKIP = 35,
+	SQ_SYNC_ALU_STALL_SIMD0_VTX = 36,
+	SQ_SYNC_ALU_STALL_SIMD1_VTX = 37,
+	SQ_SYNC_TEX_STALL_VTX = 38,
+	SQ_SYNC_VC_STALL_VTX = 39,
+	SQ_CONSTANTS_USED_SIMD0 = 40,
+	SQ_CONSTANTS_SENT_SP_SIMD0 = 41,
+	SQ_GPR_STALL_VTX = 42,
+	SQ_GPR_STALL_PIX = 43,
+	SQ_VTX_RS_STALL = 44,
+	SQ_PIX_RS_STALL = 45,
+	SQ_SX_PC_FULL = 46,
+	SQ_SX_EXP_BUFF_FULL = 47,
+	SQ_SX_POS_BUFF_FULL = 48,
+	SQ_INTERP_QUADS = 49,
+	SQ_INTERP_ACTIVE = 50,
+	SQ_IN_PIXEL_STALL = 51,
+	SQ_IN_VTX_STALL = 52,
+	SQ_VTX_CNT = 53,
+	SQ_VTX_VECTOR2 = 54,
+	SQ_VTX_VECTOR3 = 55,
+	SQ_VTX_VECTOR4 = 56,
+	SQ_PIXEL_VECTOR1 = 57,
+	SQ_PIXEL_VECTOR23 = 58,
+	SQ_PIXEL_VECTOR4 = 59,
+	SQ_CONSTANTS_USED_SIMD1 = 60,
+	SQ_CONSTANTS_SENT_SP_SIMD1 = 61,
+	SQ_SX_MEM_EXP_FULL = 62,
+	SQ_ALU0_ACTIVE_VTX_SIMD2 = 63,
+	SQ_ALU1_ACTIVE_VTX_SIMD2 = 64,
+	SQ_ALU0_ACTIVE_PIX_SIMD2 = 65,
+	SQ_ALU1_ACTIVE_PIX_SIMD2 = 66,
+	SQ_ALU0_ACTIVE_VTX_SIMD3 = 67,
+	SQ_PERFCOUNT_VTX_QUAL_TP_DONE = 68,
+	SQ_ALU0_ACTIVE_PIX_SIMD3 = 69,
+	SQ_PERFCOUNT_PIX_QUAL_TP_DONE = 70,
+	SQ_ALU0_FIFO_EMPTY_SIMD2 = 71,
+	SQ_ALU1_FIFO_EMPTY_SIMD2 = 72,
+	SQ_ALU0_FIFO_EMPTY_SIMD3 = 73,
+	SQ_ALU1_FIFO_EMPTY_SIMD3 = 74,
+	SQ_SYNC_ALU_STALL_SIMD2_VTX = 75,
+	SQ_PERFCOUNT_VTX_POP_THREAD = 76,
+	SQ_SYNC_ALU_STALL_SIMD0_PIX = 77,
+	SQ_SYNC_ALU_STALL_SIMD1_PIX = 78,
+	SQ_SYNC_ALU_STALL_SIMD2_PIX = 79,
+	SQ_PERFCOUNT_PIX_POP_THREAD = 80,
+	SQ_SYNC_TEX_STALL_PIX = 81,
+	SQ_SYNC_VC_STALL_PIX = 82,
+	SQ_CONSTANTS_USED_SIMD2 = 83,
+	SQ_CONSTANTS_SENT_SP_SIMD2 = 84,
+	SQ_PERFCOUNT_VTX_DEALLOC_ACK = 85,
+	SQ_PERFCOUNT_PIX_DEALLOC_ACK = 86,
+	SQ_ALU0_FIFO_FULL_SIMD0 = 87,
+	SQ_ALU1_FIFO_FULL_SIMD0 = 88,
+	SQ_ALU0_FIFO_FULL_SIMD1 = 89,
+	SQ_ALU1_FIFO_FULL_SIMD1 = 90,
+	SQ_ALU0_FIFO_FULL_SIMD2 = 91,
+	SQ_ALU1_FIFO_FULL_SIMD2 = 92,
+	SQ_ALU0_FIFO_FULL_SIMD3 = 93,
+	SQ_ALU1_FIFO_FULL_SIMD3 = 94,
+	VC_PERF_STATIC = 95,
+	VC_PERF_STALLED = 96,
+	VC_PERF_STARVED = 97,
+	VC_PERF_SEND = 98,
+	VC_PERF_ACTUAL_STARVED = 99,
+	PIXEL_THREAD_0_ACTIVE = 100,
+	VERTEX_THREAD_0_ACTIVE = 101,
+	PIXEL_THREAD_0_NUMBER = 102,
+	VERTEX_THREAD_0_NUMBER = 103,
+	VERTEX_EVENT_NUMBER = 104,
+	PIXEL_EVENT_NUMBER = 105,
+	PTRBUFF_EF_PUSH = 106,
+	PTRBUFF_EF_POP_EVENT = 107,
+	PTRBUFF_EF_POP_NEW_VTX = 108,
+	PTRBUFF_EF_POP_DEALLOC = 109,
+	PTRBUFF_EF_POP_PVECTOR = 110,
+	PTRBUFF_EF_POP_PVECTOR_X = 111,
+	PTRBUFF_EF_POP_PVECTOR_VNZ = 112,
+	PTRBUFF_PB_DEALLOC = 113,
+	PTRBUFF_PI_STATE_PPB_POP = 114,
+	PTRBUFF_PI_RTR = 115,
+	PTRBUFF_PI_READ_EN = 116,
+	PTRBUFF_PI_BUFF_SWAP = 117,
+	PTRBUFF_SQ_FREE_BUFF = 118,
+	PTRBUFF_SQ_DEC = 119,
+	PTRBUFF_SC_VALID_CNTL_EVENT = 120,
+	PTRBUFF_SC_VALID_IJ_XFER = 121,
+	PTRBUFF_SC_NEW_VECTOR_1_Q = 122,
+	PTRBUFF_QUAL_NEW_VECTOR = 123,
+	PTRBUFF_QUAL_EVENT = 124,
+	PTRBUFF_END_BUFFER = 125,
+	PTRBUFF_FILL_QUAD = 126,
+	VERTS_WRITTEN_SPI = 127,
+	TP_FETCH_INSTR_EXEC = 128,
+	TP_FETCH_INSTR_REQ = 129,
+	TP_DATA_RETURN = 130,
+	SPI_WRITE_CYCLES_SP = 131,
+	SPI_WRITES_SP = 132,
+	SP_ALU_INSTR_EXEC = 133,
+	SP_CONST_ADDR_TO_SQ = 134,
+	SP_PRED_KILLS_TO_SQ = 135,
+	SP_EXPORT_CYCLES_TO_SX = 136,
+	SP_EXPORTS_TO_SX = 137,
+	SQ_CYCLES_ELAPSED = 138,
+	SQ_TCFS_OPT_ALLOC_EXEC = 139,
+	SQ_TCFS_NO_OPT_ALLOC = 140,
+	SQ_ALU0_NO_OPT_ALLOC = 141,
+	SQ_ALU1_NO_OPT_ALLOC = 142,
+	SQ_TCFS_ARB_XFC_CNT = 143,
+	SQ_ALU0_ARB_XFC_CNT = 144,
+	SQ_ALU1_ARB_XFC_CNT = 145,
+	SQ_TCFS_CFS_UPDATE_CNT = 146,
+	SQ_ALU0_CFS_UPDATE_CNT = 147,
+	SQ_ALU1_CFS_UPDATE_CNT = 148,
+	SQ_VTX_PUSH_THREAD_CNT = 149,
+	SQ_VTX_POP_THREAD_CNT = 150,
+	SQ_PIX_PUSH_THREAD_CNT = 151,
+	SQ_PIX_POP_THREAD_CNT = 152,
+	SQ_PIX_TOTAL = 153,
+	SQ_PIX_KILLED = 154,
+};
+
+enum a2xx_sx_perfcnt_select {
+	SX_EXPORT_VECTORS = 0,
+	SX_DUMMY_QUADS = 1,
+	SX_ALPHA_FAIL = 2,
+	SX_RB_QUAD_BUSY = 3,
+	SX_RB_COLOR_BUSY = 4,
+	SX_RB_QUAD_STALL = 5,
+	SX_RB_COLOR_STALL = 6,
+};
+
+enum a2xx_rbbm_perfcount1_sel {
+	RBBM1_COUNT = 0,
+	RBBM1_NRT_BUSY = 1,
+	RBBM1_RB_BUSY = 2,
+	RBBM1_SQ_CNTX0_BUSY = 3,
+	RBBM1_SQ_CNTX17_BUSY = 4,
+	RBBM1_VGT_BUSY = 5,
+	RBBM1_VGT_NODMA_BUSY = 6,
+	RBBM1_PA_BUSY = 7,
+	RBBM1_SC_CNTX_BUSY = 8,
+	RBBM1_TPC_BUSY = 9,
+	RBBM1_TC_BUSY = 10,
+	RBBM1_SX_BUSY = 11,
+	RBBM1_CP_COHER_BUSY = 12,
+	RBBM1_CP_NRT_BUSY = 13,
+	RBBM1_GFX_IDLE_STALL = 14,
+	RBBM1_INTERRUPT = 15,
+};
+
+enum a2xx_cp_perfcount_sel {
+	ALWAYS_COUNT = 0,
+	TRANS_FIFO_FULL = 1,
+	TRANS_FIFO_AF = 2,
+	RCIU_PFPTRANS_WAIT = 3,
+	RCIU_NRTTRANS_WAIT = 6,
+	CSF_NRT_READ_WAIT = 8,
+	CSF_I1_FIFO_FULL = 9,
+	CSF_I2_FIFO_FULL = 10,
+	CSF_ST_FIFO_FULL = 11,
+	CSF_RING_ROQ_FULL = 13,
+	CSF_I1_ROQ_FULL = 14,
+	CSF_I2_ROQ_FULL = 15,
+	CSF_ST_ROQ_FULL = 16,
+	MIU_TAG_MEM_FULL = 18,
+	MIU_WRITECLEAN = 19,
+	MIU_NRT_WRITE_STALLED = 22,
+	MIU_NRT_READ_STALLED = 23,
+	ME_WRITE_CONFIRM_FIFO_FULL = 24,
+	ME_VS_DEALLOC_FIFO_FULL = 25,
+	ME_PS_DEALLOC_FIFO_FULL = 26,
+	ME_REGS_VS_EVENT_FIFO_FULL = 27,
+	ME_REGS_PS_EVENT_FIFO_FULL = 28,
+	ME_REGS_CF_EVENT_FIFO_FULL = 29,
+	ME_MICRO_RB_STARVED = 30,
+	ME_MICRO_I1_STARVED = 31,
+	ME_MICRO_I2_STARVED = 32,
+	ME_MICRO_ST_STARVED = 33,
+	RCIU_RBBM_DWORD_SENT = 40,
+	ME_BUSY_CLOCKS = 41,
+	ME_WAIT_CONTEXT_AVAIL = 42,
+	PFP_TYPE0_PACKET = 43,
+	PFP_TYPE3_PACKET = 44,
+	CSF_RB_WPTR_NEQ_RPTR = 45,
+	CSF_I1_SIZE_NEQ_ZERO = 46,
+	CSF_I2_SIZE_NEQ_ZERO = 47,
+	CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a2xx_rb_perfcnt_select {
+	RBPERF_CNTX_BUSY = 0,
+	RBPERF_CNTX_BUSY_MAX = 1,
+	RBPERF_SX_QUAD_STARVED = 2,
+	RBPERF_SX_QUAD_STARVED_MAX = 3,
+	RBPERF_GA_GC_CH0_SYS_REQ = 4,
+	RBPERF_GA_GC_CH0_SYS_REQ_MAX = 5,
+	RBPERF_GA_GC_CH1_SYS_REQ = 6,
+	RBPERF_GA_GC_CH1_SYS_REQ_MAX = 7,
+	RBPERF_MH_STARVED = 8,
+	RBPERF_MH_STARVED_MAX = 9,
+	RBPERF_AZ_BC_COLOR_BUSY = 10,
+	RBPERF_AZ_BC_COLOR_BUSY_MAX = 11,
+	RBPERF_AZ_BC_Z_BUSY = 12,
+	RBPERF_AZ_BC_Z_BUSY_MAX = 13,
+	RBPERF_RB_SC_TILE_RTR_N = 14,
+	RBPERF_RB_SC_TILE_RTR_N_MAX = 15,
+	RBPERF_RB_SC_SAMP_RTR_N = 16,
+	RBPERF_RB_SC_SAMP_RTR_N_MAX = 17,
+	RBPERF_RB_SX_QUAD_RTR_N = 18,
+	RBPERF_RB_SX_QUAD_RTR_N_MAX = 19,
+	RBPERF_RB_SX_COLOR_RTR_N = 20,
+	RBPERF_RB_SX_COLOR_RTR_N_MAX = 21,
+	RBPERF_RB_SC_SAMP_LZ_BUSY = 22,
+	RBPERF_RB_SC_SAMP_LZ_BUSY_MAX = 23,
+	RBPERF_ZXP_STALL = 24,
+	RBPERF_ZXP_STALL_MAX = 25,
+	RBPERF_EVENT_PENDING = 26,
+	RBPERF_EVENT_PENDING_MAX = 27,
+	RBPERF_RB_MH_VALID = 28,
+	RBPERF_RB_MH_VALID_MAX = 29,
+	RBPERF_SX_RB_QUAD_SEND = 30,
+	RBPERF_SX_RB_COLOR_SEND = 31,
+	RBPERF_SC_RB_TILE_SEND = 32,
+	RBPERF_SC_RB_SAMPLE_SEND = 33,
+	RBPERF_SX_RB_MEM_EXPORT = 34,
+	RBPERF_SX_RB_QUAD_EVENT = 35,
+	RBPERF_SC_RB_TILE_EVENT_FILTERED = 36,
+	RBPERF_SC_RB_TILE_EVENT_ALL = 37,
+	RBPERF_RB_SC_EZ_SEND = 38,
+	RBPERF_RB_SX_INDEX_SEND = 39,
+	RBPERF_GMEM_INTFO_RD = 40,
+	RBPERF_GMEM_INTF1_RD = 41,
+	RBPERF_GMEM_INTFO_WR = 42,
+	RBPERF_GMEM_INTF1_WR = 43,
+	RBPERF_RB_CP_CONTEXT_DONE = 44,
+	RBPERF_RB_CP_CACHE_FLUSH = 45,
+	RBPERF_ZPASS_DONE = 46,
+	RBPERF_ZCMD_VALID = 47,
+	RBPERF_CCMD_VALID = 48,
+	RBPERF_ACCUM_GRANT = 49,
+	RBPERF_ACCUM_C0_GRANT = 50,
+	RBPERF_ACCUM_C1_GRANT = 51,
+	RBPERF_ACCUM_FULL_BE_WR = 52,
+	RBPERF_ACCUM_REQUEST_NO_GRANT = 53,
+	RBPERF_ACCUM_TIMEOUT_PULSE = 54,
+	RBPERF_ACCUM_LIN_TIMEOUT_PULSE = 55,
+	RBPERF_ACCUM_CAM_HIT_FLUSHING = 56,
+};
+
+enum adreno_mmu_clnt_beh {
+	BEH_NEVR = 0,
+	BEH_TRAN_RNG = 1,
+	BEH_TRAN_FLT = 2,
+};
+
+enum sq_tex_clamp {
+	SQ_TEX_WRAP = 0,
+	SQ_TEX_MIRROR = 1,
+	SQ_TEX_CLAMP_LAST_TEXEL = 2,
+	SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3,
+	SQ_TEX_CLAMP_HALF_BORDER = 4,
+	SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
+	SQ_TEX_CLAMP_BORDER = 6,
+	SQ_TEX_MIRROR_ONCE_BORDER = 7,
+};
+
+enum sq_tex_swiz {
+	SQ_TEX_X = 0,
+	SQ_TEX_Y = 1,
+	SQ_TEX_Z = 2,
+	SQ_TEX_W = 3,
+	SQ_TEX_ZERO = 4,
+	SQ_TEX_ONE = 5,
+};
+
+enum sq_tex_filter {
+	SQ_TEX_FILTER_POINT = 0,
+	SQ_TEX_FILTER_BILINEAR = 1,
+	SQ_TEX_FILTER_BASEMAP = 2,
+	SQ_TEX_FILTER_USE_FETCH_CONST = 3,
+};
+
+enum sq_tex_aniso_filter {
+	SQ_TEX_ANISO_FILTER_DISABLED = 0,
+	SQ_TEX_ANISO_FILTER_MAX_1_1 = 1,
+	SQ_TEX_ANISO_FILTER_MAX_2_1 = 2,
+	SQ_TEX_ANISO_FILTER_MAX_4_1 = 3,
+	SQ_TEX_ANISO_FILTER_MAX_8_1 = 4,
+	SQ_TEX_ANISO_FILTER_MAX_16_1 = 5,
+	SQ_TEX_ANISO_FILTER_USE_FETCH_CONST = 7,
+};
+
+enum sq_tex_dimension {
+	SQ_TEX_DIMENSION_1D = 0,
+	SQ_TEX_DIMENSION_2D = 1,
+	SQ_TEX_DIMENSION_3D = 2,
+	SQ_TEX_DIMENSION_CUBE = 3,
+};
+
+enum sq_tex_border_color {
+	SQ_TEX_BORDER_COLOR_BLACK = 0,
+	SQ_TEX_BORDER_COLOR_WHITE = 1,
+	SQ_TEX_BORDER_COLOR_ACBYCR_BLACK = 2,
+	SQ_TEX_BORDER_COLOR_ACBCRY_BLACK = 3,
+};
+
+enum sq_tex_sign {
+	SQ_TEX_SIGN_UNISIGNED = 0,
+	SQ_TEX_SIGN_SIGNED = 1,
+	SQ_TEX_SIGN_UNISIGNED_BIASED = 2,
+	SQ_TEX_SIGN_GAMMA = 3,
+};
+
+enum sq_tex_endian {
+	SQ_TEX_ENDIAN_NONE = 0,
+	SQ_TEX_ENDIAN_8IN16 = 1,
+	SQ_TEX_ENDIAN_8IN32 = 2,
+	SQ_TEX_ENDIAN_16IN32 = 3,
+};
+
+enum sq_tex_clamp_policy {
+	SQ_TEX_CLAMP_POLICY_D3D = 0,
+	SQ_TEX_CLAMP_POLICY_OGL = 1,
+};
+
+enum sq_tex_num_format {
+	SQ_TEX_NUM_FORMAT_FRAC = 0,
+	SQ_TEX_NUM_FORMAT_INT = 1,
+};
+
+enum sq_tex_type {
+	SQ_TEX_TYPE_0 = 0,
+	SQ_TEX_TYPE_1 = 1,
+	SQ_TEX_TYPE_2 = 2,
+	SQ_TEX_TYPE_3 = 3,
+};
+
+#define REG_A2XX_RBBM_PATCH_RELEASE				0x00000001
+
+#define REG_A2XX_RBBM_CNTL					0x0000003b
+
+#define REG_A2XX_RBBM_SOFT_RESET				0x0000003c
+
+#define REG_A2XX_CP_PFP_UCODE_ADDR				0x000000c0
+
+#define REG_A2XX_CP_PFP_UCODE_DATA				0x000000c1
+
+#define REG_A2XX_MH_MMU_CONFIG					0x00000040
+#define A2XX_MH_MMU_CONFIG_MMU_ENABLE				0x00000001
+#define A2XX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE			0x00000002
+#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK		0x00000030
+#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT		4
+static inline uint32_t A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK		0x000000c0
+#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT		6
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK		0x00000300
+#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT		8
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK		0x00000c00
+#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT		10
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK		0x00003000
+#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT		12
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK		0x0000c000
+#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT		14
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK		0x00030000
+#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT		16
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK		0x000c0000
+#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT		18
+static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK		0x00300000
+#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT		20
+static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK		0x00c00000
+#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT		22
+static inline uint32_t A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK		0x03000000
+#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT		24
+static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
+}
+
+#define REG_A2XX_MH_MMU_VA_RANGE				0x00000041
+#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK		0x00000fff
+#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT		0
+static inline uint32_t A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS(uint32_t val)
+{
+	return ((val) << A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT) & A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK;
+}
+#define A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK			0xfffff000
+#define A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT			12
+static inline uint32_t A2XX_MH_MMU_VA_RANGE_VA_BASE(uint32_t val)
+{
+	return ((val) << A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT) & A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK;
+}
+
+#define REG_A2XX_MH_MMU_PT_BASE					0x00000042
+
+#define REG_A2XX_MH_MMU_PAGE_FAULT				0x00000043
+
+#define REG_A2XX_MH_MMU_TRAN_ERROR				0x00000044
+
+#define REG_A2XX_MH_MMU_INVALIDATE				0x00000045
+#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_ALL			0x00000001
+#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_TC			0x00000002
+
+#define REG_A2XX_MH_MMU_MPU_BASE				0x00000046
+
+#define REG_A2XX_MH_MMU_MPU_END					0x00000047
+
+#define REG_A2XX_NQWAIT_UNTIL					0x00000394
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT			0x00000395
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_LO				0x00000397
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_HI				0x00000398
+
+#define REG_A2XX_RBBM_DEBUG					0x0000039b
+
+#define REG_A2XX_RBBM_PM_OVERRIDE1				0x0000039c
+#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE		0x00000001
+#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE		0x00000002
+#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE		0x00000004
+#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE		0x00000008
+#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE		0x00000010
+#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE		0x00000020
+#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE	0x00000040
+#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE	0x00000080
+#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE		0x00000100
+#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE		0x00000200
+#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE		0x00000400
+#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE		0x00000800
+#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE		0x00001000
+#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE		0x00002000
+#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE		0x00004000
+#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE		0x00008000
+#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE		0x00010000
+#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE		0x00020000
+#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE		0x00040000
+#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE	0x00080000
+#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE		0x00100000
+#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE		0x00200000
+#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE		0x00400000
+#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE		0x00800000
+#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE	0x01000000
+#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE		0x02000000
+#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE		0x04000000
+#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE		0x08000000
+#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE		0x10000000
+#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE		0x20000000
+#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE		0x40000000
+#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE	0x80000000
+
+#define REG_A2XX_RBBM_PM_OVERRIDE2				0x0000039d
+
+#define REG_A2XX_RBBM_DEBUG_OUT					0x000003a0
+
+#define REG_A2XX_RBBM_DEBUG_CNTL				0x000003a1
+
+#define REG_A2XX_RBBM_READ_ERROR				0x000003b3
+
+#define REG_A2XX_RBBM_INT_CNTL					0x000003b4
+#define A2XX_RBBM_INT_CNTL_RDERR_INT_MASK			0x00000001
+#define A2XX_RBBM_INT_CNTL_DISPLAY_UPDATE_INT_MASK		0x00000002
+#define A2XX_RBBM_INT_CNTL_GUI_IDLE_INT_MASK			0x00080000
+
+#define REG_A2XX_RBBM_INT_STATUS				0x000003b5
+
+#define REG_A2XX_RBBM_INT_ACK					0x000003b6
+
+#define REG_A2XX_MASTER_INT_SIGNAL				0x000003b7
+#define A2XX_MASTER_INT_SIGNAL_MH_INT_STAT			0x00000020
+#define A2XX_MASTER_INT_SIGNAL_SQ_INT_STAT			0x04000000
+#define A2XX_MASTER_INT_SIGNAL_CP_INT_STAT			0x40000000
+#define A2XX_MASTER_INT_SIGNAL_RBBM_INT_STAT			0x80000000
+
+#define REG_A2XX_RBBM_PERIPHID1					0x000003f9
+
+#define REG_A2XX_RBBM_PERIPHID2					0x000003fa
+
+#define REG_A2XX_CP_PERFMON_CNTL				0x00000444
+
+#define REG_A2XX_CP_PERFCOUNTER_SELECT				0x00000445
+
+#define REG_A2XX_CP_PERFCOUNTER_LO				0x00000446
+
+#define REG_A2XX_CP_PERFCOUNTER_HI				0x00000447
+
+#define REG_A2XX_RBBM_STATUS					0x000005d0
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK			0x0000001f
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT			0
+static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
+{
+	return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
+}
+#define A2XX_RBBM_STATUS_TC_BUSY				0x00000020
+#define A2XX_RBBM_STATUS_HIRQ_PENDING				0x00000100
+#define A2XX_RBBM_STATUS_CPRQ_PENDING				0x00000200
+#define A2XX_RBBM_STATUS_CFRQ_PENDING				0x00000400
+#define A2XX_RBBM_STATUS_PFRQ_PENDING				0x00000800
+#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA			0x00001000
+#define A2XX_RBBM_STATUS_RBBM_WU_BUSY				0x00004000
+#define A2XX_RBBM_STATUS_CP_NRT_BUSY				0x00010000
+#define A2XX_RBBM_STATUS_MH_BUSY				0x00040000
+#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY			0x00080000
+#define A2XX_RBBM_STATUS_SX_BUSY				0x00200000
+#define A2XX_RBBM_STATUS_TPC_BUSY				0x00400000
+#define A2XX_RBBM_STATUS_SC_CNTX_BUSY				0x01000000
+#define A2XX_RBBM_STATUS_PA_BUSY				0x02000000
+#define A2XX_RBBM_STATUS_VGT_BUSY				0x04000000
+#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY				0x08000000
+#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY				0x10000000
+#define A2XX_RBBM_STATUS_RB_CNTX_BUSY				0x40000000
+#define A2XX_RBBM_STATUS_GUI_ACTIVE				0x80000000
+
+#define REG_A2XX_MH_ARBITER_CONFIG				0x00000a40
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK		0x0000003f
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT		0
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT(uint32_t val)
+{
+	return ((val) << A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_GRANULARITY		0x00000040
+#define A2XX_MH_ARBITER_CONFIG_L1_ARB_ENABLE			0x00000080
+#define A2XX_MH_ARBITER_CONFIG_L1_ARB_HOLD_ENABLE		0x00000100
+#define A2XX_MH_ARBITER_CONFIG_L2_ARB_CONTROL			0x00000200
+#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK			0x00001c00
+#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT			10
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_PAGE_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT) & A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_TC_REORDER_ENABLE		0x00002000
+#define A2XX_MH_ARBITER_CONFIG_TC_ARB_HOLD_ENABLE		0x00004000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT_ENABLE		0x00008000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK		0x003f0000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT		16
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val)
+{
+	return ((val) << A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_CP_CLNT_ENABLE			0x00400000
+#define A2XX_MH_ARBITER_CONFIG_VGT_CLNT_ENABLE			0x00800000
+#define A2XX_MH_ARBITER_CONFIG_TC_CLNT_ENABLE			0x01000000
+#define A2XX_MH_ARBITER_CONFIG_RB_CLNT_ENABLE			0x02000000
+#define A2XX_MH_ARBITER_CONFIG_PA_CLNT_ENABLE			0x04000000
+
+#define REG_A2XX_MH_INTERRUPT_MASK				0x00000a42
+#define A2XX_MH_INTERRUPT_MASK_AXI_READ_ERROR			0x00000001
+#define A2XX_MH_INTERRUPT_MASK_AXI_WRITE_ERROR			0x00000002
+#define A2XX_MH_INTERRUPT_MASK_MMU_PAGE_FAULT			0x00000004
+
+#define REG_A2XX_MH_INTERRUPT_STATUS				0x00000a43
+
+#define REG_A2XX_MH_INTERRUPT_CLEAR				0x00000a44
+
+#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG1			0x00000a54
+
+#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG2			0x00000a55
+
+#define REG_A2XX_A220_VSC_BIN_SIZE				0x00000c01
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK			0x0000001f
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT			0
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK			0x000003e0
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT			5
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+
+#define REG_A2XX_PC_DEBUG_CNTL					0x00000c38
+
+#define REG_A2XX_PC_DEBUG_DATA					0x00000c39
+
+#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS				0x00000c44
+
+#define REG_A2XX_GRAS_DEBUG_CNTL				0x00000c80
+
+#define REG_A2XX_PA_SU_DEBUG_CNTL				0x00000c80
+
+#define REG_A2XX_GRAS_DEBUG_DATA				0x00000c81
+
+#define REG_A2XX_PA_SU_DEBUG_DATA				0x00000c81
+
+#define REG_A2XX_PA_SU_FACE_DATA				0x00000c86
+#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK			0xffffffe0
+#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT			5
+static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val)
+{
+	return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK;
+}
+
+#define REG_A2XX_SQ_GPR_MANAGEMENT				0x00000d00
+#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC			0x00000001
+#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK		0x00000ff0
+#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT		4
+static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val)
+{
+	return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK;
+}
+#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK		0x000ff000
+#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT		12
+static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val)
+{
+	return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK;
+}
+
+#define REG_A2XX_SQ_FLOW_CONTROL				0x00000d01
+
+#define REG_A2XX_SQ_INST_STORE_MANAGMENT			0x00000d02
+#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK	0x00000fff
+#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT	0
+static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val)
+{
+	return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK;
+}
+#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK	0x0fff0000
+#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT	16
+static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val)
+{
+	return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK;
+}
+
+#define REG_A2XX_SQ_DEBUG_MISC					0x00000d05
+
+#define REG_A2XX_SQ_INT_CNTL					0x00000d34
+
+#define REG_A2XX_SQ_INT_STATUS					0x00000d35
+
+#define REG_A2XX_SQ_INT_ACK					0x00000d36
+
+#define REG_A2XX_SQ_DEBUG_INPUT_FSM				0x00000dae
+
+#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM				0x00000daf
+
+#define REG_A2XX_SQ_DEBUG_TP_FSM				0x00000db0
+
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_0				0x00000db1
+
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_1				0x00000db2
+
+#define REG_A2XX_SQ_DEBUG_EXP_ALLOC				0x00000db3
+
+#define REG_A2XX_SQ_DEBUG_PTR_BUFF				0x00000db4
+
+#define REG_A2XX_SQ_DEBUG_GPR_VTX				0x00000db5
+
+#define REG_A2XX_SQ_DEBUG_GPR_PIX				0x00000db6
+
+#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL				0x00000db7
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_0				0x00000db8
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_1				0x00000db9
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG			0x00000dba
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM			0x00000dbb
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_0				0x00000dbc
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0			0x00000dbd
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1			0x00000dbe
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2			0x00000dbf
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3			0x00000dc0
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM			0x00000dc1
+
+#define REG_A2XX_TC_CNTL_STATUS					0x00000e00
+#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE			0x00000001
+
+#define REG_A2XX_TP0_CHICKEN					0x00000e1e
+
+#define REG_A2XX_RB_BC_CONTROL					0x00000f01
+#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE		0x00000001
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK		0x00000006
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT		1
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM			0x00000008
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH	0x00000010
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP		0x00000020
+#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP		0x00000040
+#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE			0x00000080
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK		0x00001f00
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT		8
+static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE			0x00004000
+#define A2XX_RB_BC_CONTROL_CRC_MODE				0x00008000
+#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS		0x00010000
+#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM			0x00020000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK		0x003c0000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT		18
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK;
+}
+#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE		0x00400000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK		0x07800000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT		23
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK	0x18000000
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT	27
+static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE	0x20000000
+#define A2XX_RB_BC_CONTROL_CRC_SYSTEM				0x40000000
+#define A2XX_RB_BC_CONTROL_RESERVED6				0x80000000
+
+#define REG_A2XX_RB_EDRAM_INFO					0x00000f02
+
+#define REG_A2XX_RB_DEBUG_CNTL					0x00000f26
+
+#define REG_A2XX_RB_DEBUG_DATA					0x00000f27
+
+#define REG_A2XX_RB_SURFACE_INFO				0x00002000
+#define A2XX_RB_SURFACE_INFO_SURFACE_PITCH__MASK		0x00003fff
+#define A2XX_RB_SURFACE_INFO_SURFACE_PITCH__SHIFT		0
+static inline uint32_t A2XX_RB_SURFACE_INFO_SURFACE_PITCH(uint32_t val)
+{
+	return ((val) << A2XX_RB_SURFACE_INFO_SURFACE_PITCH__SHIFT) & A2XX_RB_SURFACE_INFO_SURFACE_PITCH__MASK;
+}
+#define A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__MASK			0x0000c000
+#define A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__SHIFT		14
+static inline uint32_t A2XX_RB_SURFACE_INFO_MSAA_SAMPLES(uint32_t val)
+{
+	return ((val) << A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__SHIFT) & A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__MASK;
+}
+
+#define REG_A2XX_RB_COLOR_INFO					0x00002001
+#define A2XX_RB_COLOR_INFO_FORMAT__MASK				0x0000000f
+#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT			0
+static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK			0x00000030
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT			4
+static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK;
+}
+#define A2XX_RB_COLOR_INFO_LINEAR				0x00000040
+#define A2XX_RB_COLOR_INFO_ENDIAN__MASK				0x00000180
+#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT			7
+static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK;
+}
+#define A2XX_RB_COLOR_INFO_SWAP__MASK				0x00000600
+#define A2XX_RB_COLOR_INFO_SWAP__SHIFT				9
+static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COLOR_INFO_BASE__MASK				0xfffff000
+#define A2XX_RB_COLOR_INFO_BASE__SHIFT				12
+static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK;
+}
+
+#define REG_A2XX_RB_DEPTH_INFO					0x00002002
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000001
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+	return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff000
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			12
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A2XX_A225_RB_COLOR_INFO3				0x00002005
+
+#define REG_A2XX_COHER_DEST_BASE_0				0x00002006
+
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL			0x0000200e
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR			0x0000200f
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_WINDOW_OFFSET				0x00002080
+#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE			0x80000000
+
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL			0x00002081
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR			0x00002082
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A2XX_UNKNOWN_2010					0x00002010
+
+#define REG_A2XX_VGT_MAX_VTX_INDX				0x00002100
+
+#define REG_A2XX_VGT_MIN_VTX_INDX				0x00002101
+
+#define REG_A2XX_VGT_INDX_OFFSET				0x00002102
+
+#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX		0x00002103
+
+#define REG_A2XX_RB_COLOR_MASK					0x00002104
+#define A2XX_RB_COLOR_MASK_WRITE_RED				0x00000001
+#define A2XX_RB_COLOR_MASK_WRITE_GREEN				0x00000002
+#define A2XX_RB_COLOR_MASK_WRITE_BLUE				0x00000004
+#define A2XX_RB_COLOR_MASK_WRITE_ALPHA				0x00000008
+
+#define REG_A2XX_RB_BLEND_RED					0x00002105
+
+#define REG_A2XX_RB_BLEND_GREEN					0x00002106
+
+#define REG_A2XX_RB_BLEND_BLUE					0x00002107
+
+#define REG_A2XX_RB_BLEND_ALPHA					0x00002108
+
+#define REG_A2XX_RB_FOG_COLOR					0x00002109
+#define A2XX_RB_FOG_COLOR_FOG_RED__MASK				0x000000ff
+#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT			0
+static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val)
+{
+	return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK;
+}
+#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK			0x0000ff00
+#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT			8
+static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val)
+{
+	return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK;
+}
+#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK			0x00ff0000
+#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT			16
+static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val)
+{
+	return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK;
+}
+
+#define REG_A2XX_RB_STENCILREFMASK_BF				0x0000210c
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A2XX_RB_STENCILREFMASK				0x0000210d
+#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A2XX_RB_ALPHA_REF					0x0000210e
+
+#define REG_A2XX_PA_CL_VPORT_XSCALE				0x0000210f
+#define A2XX_PA_CL_VPORT_XSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_XSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_XOFFSET				0x00002110
+#define A2XX_PA_CL_VPORT_XOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_YSCALE				0x00002111
+#define A2XX_PA_CL_VPORT_YSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_YSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_YOFFSET				0x00002112
+#define A2XX_PA_CL_VPORT_YOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_ZSCALE				0x00002113
+#define A2XX_PA_CL_VPORT_ZSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_ZOFFSET				0x00002114
+#define A2XX_PA_CL_VPORT_ZOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A2XX_SQ_PROGRAM_CNTL				0x00002180
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK			0x000000ff
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT			0
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK			0x0000ff00
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT			8
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE			0x00010000
+#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE			0x00020000
+#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN				0x00040000
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX			0x00080000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK		0x00f00000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT		20
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK		0x07000000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT		24
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK		0x78000000
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT		27
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX			0x80000000
+
+#define REG_A2XX_SQ_CONTEXT_MISC				0x00002181
+#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE			0x00000001
+#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY		0x00000002
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK		0x0000000c
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT		2
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val)
+{
+	return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK		0x0000ff00
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT		8
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF			0x00010000
+#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE			0x00020000
+#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL			0x00040000
+
+#define REG_A2XX_SQ_INTERPOLATOR_CNTL				0x00002182
+#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK		0x0000ffff
+#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT		0
+static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK;
+}
+#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK	0xffff0000
+#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT	16
+static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val)
+{
+	return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK;
+}
+
+#define REG_A2XX_SQ_WRAPPING_0					0x00002183
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK			0x0000000f
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT			0
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK			0x000000f0
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT			4
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK			0x00000f00
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT			8
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK			0x0000f000
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT			12
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK			0x000f0000
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT			16
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK			0x00f00000
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT			20
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK			0x0f000000
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT			24
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK;
+}
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK			0xf0000000
+#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT			28
+static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK;
+}
+
+#define REG_A2XX_SQ_WRAPPING_1					0x00002184
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK			0x0000000f
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT			0
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK			0x000000f0
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT			4
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK			0x00000f00
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT			8
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK			0x0000f000
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT			12
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK			0x000f0000
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT			16
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK			0x00f00000
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT			20
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK			0x0f000000
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT			24
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK;
+}
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK			0xf0000000
+#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT			28
+static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val)
+{
+	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK;
+}
+
+#define REG_A2XX_SQ_PS_PROGRAM					0x000021f6
+#define A2XX_SQ_PS_PROGRAM_BASE__MASK				0x00000fff
+#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK;
+}
+#define A2XX_SQ_PS_PROGRAM_SIZE__MASK				0x00fff000
+#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_VS_PROGRAM					0x000021f7
+#define A2XX_SQ_VS_PROGRAM_BASE__MASK				0x00000fff
+#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK;
+}
+#define A2XX_SQ_VS_PROGRAM_SIZE__MASK				0x00fff000
+#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK;
+}
+
+#define REG_A2XX_VGT_EVENT_INITIATOR				0x000021f9
+
+#define REG_A2XX_VGT_DRAW_INITIATOR				0x000021fc
+#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK			0x0000003f
+#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT		0
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK		0x000000c0
+#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT		6
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK			0x00000600
+#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT			9
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK		0x00000800
+#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT		11
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
+{
+	return ((val) << A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_NOT_EOP				0x00001000
+#define A2XX_VGT_DRAW_INITIATOR_SMALL_INDEX			0x00002000
+#define A2XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE	0x00004000
+#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK		0xff000000
+#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT		24
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
+{
+	return ((val) << A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
+}
+
+#define REG_A2XX_VGT_IMMED_DATA					0x000021fd
+
+#define REG_A2XX_RB_DEPTHCONTROL				0x00002200
+#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE			0x00000001
+#define A2XX_RB_DEPTHCONTROL_Z_ENABLE				0x00000002
+#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE			0x00000004
+#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE			0x00000008
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK			0x00000070
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT			4
+static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE			0x00000080
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK			0x00000700
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT			8
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK			0x00003800
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT			11
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK			0x0001c000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT		14
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK			0x000e0000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT		17
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK		0x00700000
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT		20
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK		0x03800000
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT		23
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK		0x1c000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT		26
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK		0xe0000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT		29
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK;
+}
+
+#define REG_A2XX_RB_BLEND_CONTROL				0x00002201
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK		0x0000001f
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT		0
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK		0x000000e0
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT		5
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum a2xx_rb_blend_opcode val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK		0x00001f00
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT		8
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK		0x001f0000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT		16
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK		0x00e00000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT		21
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum a2xx_rb_blend_opcode val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK		0x1f000000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT		24
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE		0x20000000
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE			0x40000000
+
+#define REG_A2XX_RB_COLORCONTROL				0x00002202
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK			0x00000007
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT			0
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE			0x00000008
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE		0x00000010
+#define A2XX_RB_COLORCONTROL_BLEND_DISABLE			0x00000020
+#define A2XX_RB_COLORCONTROL_VOB_ENABLE				0x00000040
+#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG			0x00000080
+#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK			0x00000f00
+#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK			0x00003000
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT			12
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK			0x0000c000
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT			14
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_PIXEL_FOG				0x00010000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK	0x03000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT	24
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK	0x0c000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT	26
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK	0x30000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT	28
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK	0xc0000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT	30
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK;
+}
+
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX				0x00002203
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK		0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT		0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK			0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT			3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK	0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT	6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK;
+}
+
+#define REG_A2XX_PA_CL_CLIP_CNTL				0x00002204
+#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
+#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA		0x00040000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK		0x00080000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT		19
+static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val)
+{
+	return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK;
+}
+#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT		0x00100000
+#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR			0x00200000
+#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN			0x00400000
+#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN			0x00800000
+#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN			0x01000000
+
+#define REG_A2XX_PA_SU_SC_MODE_CNTL				0x00002205
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT			0x00000001
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK			0x00000002
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE				0x00000004
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK			0x00000018
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT			3
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK		0x000000e0
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT		5
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK		0x00000700
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT		8
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE	0x00000800
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE		0x00001000
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE		0x00002000
+#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE			0x00008000
+#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE	0x00010000
+#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE		0x00040000
+#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST		0x00080000
+#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS			0x00100000
+#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA		0x00200000
+#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE		0x00800000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI		0x02000000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE	0x04000000
+#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS		0x10000000
+#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS		0x20000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE		0x40000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE		0x80000000
+
+#define REG_A2XX_PA_CL_VTE_CNTL					0x00002206
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA			0x00000001
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA			0x00000002
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA			0x00000004
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA			0x00000008
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA			0x00000010
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA			0x00000020
+#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT				0x00000100
+#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT				0x00000200
+#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT				0x00000400
+#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF			0x00000800
+
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN				0x00002207
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK		0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT		0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK			0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT			3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK	0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT	6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK;
+}
+
+#define REG_A2XX_RB_MODECONTROL					0x00002208
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK			0x00000007
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT			0
+static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val)
+{
+	return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK;
+}
+
+#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL			0x00002209
+
+#define REG_A2XX_RB_SAMPLE_POS					0x0000220a
+
+#define REG_A2XX_CLEAR_COLOR					0x0000220b
+#define A2XX_CLEAR_COLOR_RED__MASK				0x000000ff
+#define A2XX_CLEAR_COLOR_RED__SHIFT				0
+static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK;
+}
+#define A2XX_CLEAR_COLOR_GREEN__MASK				0x0000ff00
+#define A2XX_CLEAR_COLOR_GREEN__SHIFT				8
+static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK;
+}
+#define A2XX_CLEAR_COLOR_BLUE__MASK				0x00ff0000
+#define A2XX_CLEAR_COLOR_BLUE__SHIFT				16
+static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK;
+}
+#define A2XX_CLEAR_COLOR_ALPHA__MASK				0xff000000
+#define A2XX_CLEAR_COLOR_ALPHA__SHIFT				24
+static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK;
+}
+
+#define REG_A2XX_A220_GRAS_CONTROL				0x00002210
+
+#define REG_A2XX_PA_SU_POINT_SIZE				0x00002280
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK			0x0000ffff
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT			0
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
+}
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK			0xffff0000
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT			16
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
+}
+
+#define REG_A2XX_PA_SU_POINT_MINMAX				0x00002281
+#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A2XX_PA_SU_LINE_CNTL				0x00002282
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK			0x0000ffff
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT			0
+static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
+}
+
+#define REG_A2XX_PA_SC_LINE_STIPPLE				0x00002283
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK		0x0000ffff
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT		0
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK		0x00ff0000
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT		16
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK		0x10000000
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT	28
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK		0x60000000
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT		29
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK;
+}
+
+#define REG_A2XX_PA_SC_VIZ_QUERY				0x00002293
+#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA			0x00000001
+#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK			0x0000007e
+#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT		1
+static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK;
+}
+#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z		0x00000100
+
+#define REG_A2XX_VGT_ENHANCE					0x00002294
+
+#define REG_A2XX_PA_SC_LINE_CNTL				0x00002300
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK			0x0000ffff
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT			0
+static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK;
+}
+#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL			0x00000100
+#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH			0x00000200
+#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL				0x00000400
+
+#define REG_A2XX_PA_SC_AA_CONFIG				0x00002301
+#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK		0x00000007
+#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT		0
+static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK;
+}
+#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK		0x0001e000
+#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT		13
+static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK;
+}
+
+#define REG_A2XX_PA_SU_VTX_CNTL					0x00002302
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK			0x00000001
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT			0
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK			0x00000006
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT			1
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK			0x00000380
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT			7
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ				0x00002303
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ				0x00002304
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ				0x00002305
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ				0x00002306
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK;
+}
+
+#define REG_A2XX_SQ_VS_CONST					0x00002307
+#define A2XX_SQ_VS_CONST_BASE__MASK				0x000001ff
+#define A2XX_SQ_VS_CONST_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_VS_CONST_SIZE__MASK				0x001ff000
+#define A2XX_SQ_VS_CONST_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_PS_CONST					0x00002308
+#define A2XX_SQ_PS_CONST_BASE__MASK				0x000001ff
+#define A2XX_SQ_PS_CONST_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_PS_CONST_SIZE__MASK				0x001ff000
+#define A2XX_SQ_PS_CONST_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_DEBUG_MISC_0				0x00002309
+
+#define REG_A2XX_SQ_DEBUG_MISC_1				0x0000230a
+
+#define REG_A2XX_PA_SC_AA_MASK					0x00002312
+
+#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL			0x00002316
+#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK	0x00000007
+#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT	0
+static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val)
+{
+	return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK;
+}
+
+#define REG_A2XX_VGT_OUT_DEALLOC_CNTL				0x00002317
+#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK		0x00000003
+#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT		0
+static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val)
+{
+	return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK;
+}
+
+#define REG_A2XX_RB_COPY_CONTROL				0x00002318
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK		0x00000007
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT		0
+static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val)
+{
+	return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK;
+}
+#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE			0x00000008
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK			0x000000f0
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT			4
+static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK;
+}
+
+#define REG_A2XX_RB_COPY_DEST_BASE				0x00002319
+
+#define REG_A2XX_RB_COPY_DEST_PITCH				0x0000231a
+#define A2XX_RB_COPY_DEST_PITCH__MASK				0xffffffff
+#define A2XX_RB_COPY_DEST_PITCH__SHIFT				0
+static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK;
+}
+
+#define REG_A2XX_RB_COPY_DEST_INFO				0x0000231b
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK		0x00000007
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT		0
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_LINEAR				0x00000008
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000f0
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			4
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
+#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK		0x00003000
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT		12
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_WRITE_RED			0x00004000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN			0x00008000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE			0x00010000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA			0x00020000
+
+#define REG_A2XX_RB_COPY_DEST_OFFSET				0x0000231c
+#define A2XX_RB_COPY_DEST_OFFSET_X__MASK			0x00001fff
+#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT			0
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK;
+}
+#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK			0x03ffe000
+#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT			13
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK;
+}
+
+#define REG_A2XX_RB_DEPTH_CLEAR					0x0000231d
+
+#define REG_A2XX_RB_SAMPLE_COUNT_CTL				0x00002324
+
+#define REG_A2XX_RB_COLOR_DEST_MASK				0x00002326
+
+#define REG_A2XX_A225_GRAS_UCP0X				0x00002340
+
+#define REG_A2XX_A225_GRAS_UCP5W				0x00002357
+
+#define REG_A2XX_A225_GRAS_UCP_ENABLED				0x00002360
+
+#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE			0x00002380
+
+#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET			0x00002383
+
+#define REG_A2XX_SQ_CONSTANT_0					0x00004000
+
+#define REG_A2XX_SQ_FETCH_0					0x00004800
+
+#define REG_A2XX_SQ_CF_BOOLEANS					0x00004900
+
+#define REG_A2XX_SQ_CF_LOOP					0x00004908
+
+#define REG_A2XX_COHER_SIZE_PM4					0x00000a29
+
+#define REG_A2XX_COHER_BASE_PM4					0x00000a2a
+
+#define REG_A2XX_COHER_STATUS_PM4				0x00000a2b
+
+#define REG_A2XX_PA_SU_PERFCOUNTER0_SELECT			0x00000c88
+
+#define REG_A2XX_PA_SU_PERFCOUNTER1_SELECT			0x00000c89
+
+#define REG_A2XX_PA_SU_PERFCOUNTER2_SELECT			0x00000c8a
+
+#define REG_A2XX_PA_SU_PERFCOUNTER3_SELECT			0x00000c8b
+
+#define REG_A2XX_PA_SU_PERFCOUNTER0_LOW				0x00000c8c
+
+#define REG_A2XX_PA_SU_PERFCOUNTER0_HI				0x00000c8d
+
+#define REG_A2XX_PA_SU_PERFCOUNTER1_LOW				0x00000c8e
+
+#define REG_A2XX_PA_SU_PERFCOUNTER1_HI				0x00000c8f
+
+#define REG_A2XX_PA_SU_PERFCOUNTER2_LOW				0x00000c90
+
+#define REG_A2XX_PA_SU_PERFCOUNTER2_HI				0x00000c91
+
+#define REG_A2XX_PA_SU_PERFCOUNTER3_LOW				0x00000c92
+
+#define REG_A2XX_PA_SU_PERFCOUNTER3_HI				0x00000c93
+
+#define REG_A2XX_PA_SC_PERFCOUNTER0_SELECT			0x00000c98
+
+#define REG_A2XX_PA_SC_PERFCOUNTER0_LOW				0x00000c99
+
+#define REG_A2XX_PA_SC_PERFCOUNTER0_HI				0x00000c9a
+
+#define REG_A2XX_VGT_PERFCOUNTER0_SELECT			0x00000c48
+
+#define REG_A2XX_VGT_PERFCOUNTER1_SELECT			0x00000c49
+
+#define REG_A2XX_VGT_PERFCOUNTER2_SELECT			0x00000c4a
+
+#define REG_A2XX_VGT_PERFCOUNTER3_SELECT			0x00000c4b
+
+#define REG_A2XX_VGT_PERFCOUNTER0_LOW				0x00000c4c
+
+#define REG_A2XX_VGT_PERFCOUNTER1_LOW				0x00000c4e
+
+#define REG_A2XX_VGT_PERFCOUNTER2_LOW				0x00000c50
+
+#define REG_A2XX_VGT_PERFCOUNTER3_LOW				0x00000c52
+
+#define REG_A2XX_VGT_PERFCOUNTER0_HI				0x00000c4d
+
+#define REG_A2XX_VGT_PERFCOUNTER1_HI				0x00000c4f
+
+#define REG_A2XX_VGT_PERFCOUNTER2_HI				0x00000c51
+
+#define REG_A2XX_VGT_PERFCOUNTER3_HI				0x00000c53
+
+#define REG_A2XX_TCR_PERFCOUNTER0_SELECT			0x00000e05
+
+#define REG_A2XX_TCR_PERFCOUNTER1_SELECT			0x00000e08
+
+#define REG_A2XX_TCR_PERFCOUNTER0_HI				0x00000e06
+
+#define REG_A2XX_TCR_PERFCOUNTER1_HI				0x00000e09
+
+#define REG_A2XX_TCR_PERFCOUNTER0_LOW				0x00000e07
+
+#define REG_A2XX_TCR_PERFCOUNTER1_LOW				0x00000e0a
+
+#define REG_A2XX_TP0_PERFCOUNTER0_SELECT			0x00000e1f
+
+#define REG_A2XX_TP0_PERFCOUNTER0_HI				0x00000e20
+
+#define REG_A2XX_TP0_PERFCOUNTER0_LOW				0x00000e21
+
+#define REG_A2XX_TP0_PERFCOUNTER1_SELECT			0x00000e22
+
+#define REG_A2XX_TP0_PERFCOUNTER1_HI				0x00000e23
+
+#define REG_A2XX_TP0_PERFCOUNTER1_LOW				0x00000e24
+
+#define REG_A2XX_TCM_PERFCOUNTER0_SELECT			0x00000e54
+
+#define REG_A2XX_TCM_PERFCOUNTER1_SELECT			0x00000e57
+
+#define REG_A2XX_TCM_PERFCOUNTER0_HI				0x00000e55
+
+#define REG_A2XX_TCM_PERFCOUNTER1_HI				0x00000e58
+
+#define REG_A2XX_TCM_PERFCOUNTER0_LOW				0x00000e56
+
+#define REG_A2XX_TCM_PERFCOUNTER1_LOW				0x00000e59
+
+#define REG_A2XX_TCF_PERFCOUNTER0_SELECT			0x00000e5a
+
+#define REG_A2XX_TCF_PERFCOUNTER1_SELECT			0x00000e5d
+
+#define REG_A2XX_TCF_PERFCOUNTER2_SELECT			0x00000e60
+
+#define REG_A2XX_TCF_PERFCOUNTER3_SELECT			0x00000e63
+
+#define REG_A2XX_TCF_PERFCOUNTER4_SELECT			0x00000e66
+
+#define REG_A2XX_TCF_PERFCOUNTER5_SELECT			0x00000e69
+
+#define REG_A2XX_TCF_PERFCOUNTER6_SELECT			0x00000e6c
+
+#define REG_A2XX_TCF_PERFCOUNTER7_SELECT			0x00000e6f
+
+#define REG_A2XX_TCF_PERFCOUNTER8_SELECT			0x00000e72
+
+#define REG_A2XX_TCF_PERFCOUNTER9_SELECT			0x00000e75
+
+#define REG_A2XX_TCF_PERFCOUNTER10_SELECT			0x00000e78
+
+#define REG_A2XX_TCF_PERFCOUNTER11_SELECT			0x00000e7b
+
+#define REG_A2XX_TCF_PERFCOUNTER0_HI				0x00000e5b
+
+#define REG_A2XX_TCF_PERFCOUNTER1_HI				0x00000e5e
+
+#define REG_A2XX_TCF_PERFCOUNTER2_HI				0x00000e61
+
+#define REG_A2XX_TCF_PERFCOUNTER3_HI				0x00000e64
+
+#define REG_A2XX_TCF_PERFCOUNTER4_HI				0x00000e67
+
+#define REG_A2XX_TCF_PERFCOUNTER5_HI				0x00000e6a
+
+#define REG_A2XX_TCF_PERFCOUNTER6_HI				0x00000e6d
+
+#define REG_A2XX_TCF_PERFCOUNTER7_HI				0x00000e70
+
+#define REG_A2XX_TCF_PERFCOUNTER8_HI				0x00000e73
+
+#define REG_A2XX_TCF_PERFCOUNTER9_HI				0x00000e76
+
+#define REG_A2XX_TCF_PERFCOUNTER10_HI				0x00000e79
+
+#define REG_A2XX_TCF_PERFCOUNTER11_HI				0x00000e7c
+
+#define REG_A2XX_TCF_PERFCOUNTER0_LOW				0x00000e5c
+
+#define REG_A2XX_TCF_PERFCOUNTER1_LOW				0x00000e5f
+
+#define REG_A2XX_TCF_PERFCOUNTER2_LOW				0x00000e62
+
+#define REG_A2XX_TCF_PERFCOUNTER3_LOW				0x00000e65
+
+#define REG_A2XX_TCF_PERFCOUNTER4_LOW				0x00000e68
+
+#define REG_A2XX_TCF_PERFCOUNTER5_LOW				0x00000e6b
+
+#define REG_A2XX_TCF_PERFCOUNTER6_LOW				0x00000e6e
+
+#define REG_A2XX_TCF_PERFCOUNTER7_LOW				0x00000e71
+
+#define REG_A2XX_TCF_PERFCOUNTER8_LOW				0x00000e74
+
+#define REG_A2XX_TCF_PERFCOUNTER9_LOW				0x00000e77
+
+#define REG_A2XX_TCF_PERFCOUNTER10_LOW				0x00000e7a
+
+#define REG_A2XX_TCF_PERFCOUNTER11_LOW				0x00000e7d
+
+#define REG_A2XX_SQ_PERFCOUNTER0_SELECT				0x00000dc8
+
+#define REG_A2XX_SQ_PERFCOUNTER1_SELECT				0x00000dc9
+
+#define REG_A2XX_SQ_PERFCOUNTER2_SELECT				0x00000dca
+
+#define REG_A2XX_SQ_PERFCOUNTER3_SELECT				0x00000dcb
+
+#define REG_A2XX_SQ_PERFCOUNTER0_LOW				0x00000dcc
+
+#define REG_A2XX_SQ_PERFCOUNTER0_HI				0x00000dcd
+
+#define REG_A2XX_SQ_PERFCOUNTER1_LOW				0x00000dce
+
+#define REG_A2XX_SQ_PERFCOUNTER1_HI				0x00000dcf
+
+#define REG_A2XX_SQ_PERFCOUNTER2_LOW				0x00000dd0
+
+#define REG_A2XX_SQ_PERFCOUNTER2_HI				0x00000dd1
+
+#define REG_A2XX_SQ_PERFCOUNTER3_LOW				0x00000dd2
+
+#define REG_A2XX_SQ_PERFCOUNTER3_HI				0x00000dd3
+
+#define REG_A2XX_SX_PERFCOUNTER0_SELECT				0x00000dd4
+
+#define REG_A2XX_SX_PERFCOUNTER0_LOW				0x00000dd8
+
+#define REG_A2XX_SX_PERFCOUNTER0_HI				0x00000dd9
+
+#define REG_A2XX_MH_PERFCOUNTER0_SELECT				0x00000a46
+
+#define REG_A2XX_MH_PERFCOUNTER1_SELECT				0x00000a4a
+
+#define REG_A2XX_MH_PERFCOUNTER0_CONFIG				0x00000a47
+
+#define REG_A2XX_MH_PERFCOUNTER1_CONFIG				0x00000a4b
+
+#define REG_A2XX_MH_PERFCOUNTER0_LOW				0x00000a48
+
+#define REG_A2XX_MH_PERFCOUNTER1_LOW				0x00000a4c
+
+#define REG_A2XX_MH_PERFCOUNTER0_HI				0x00000a49
+
+#define REG_A2XX_MH_PERFCOUNTER1_HI				0x00000a4d
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT			0x00000395
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_LO				0x00000397
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_HI				0x00000398
+
+#define REG_A2XX_CP_PERFCOUNTER_SELECT				0x00000445
+
+#define REG_A2XX_CP_PERFCOUNTER_LO				0x00000446
+
+#define REG_A2XX_CP_PERFCOUNTER_HI				0x00000447
+
+#define REG_A2XX_RB_PERFCOUNTER0_SELECT				0x00000f04
+
+#define REG_A2XX_RB_PERFCOUNTER0_LOW				0x00000f08
+
+#define REG_A2XX_RB_PERFCOUNTER0_HI				0x00000f09
+
+#define REG_A2XX_SQ_TEX_0					0x00000000
+#define A2XX_SQ_TEX_0_TYPE__MASK				0x00000003
+#define A2XX_SQ_TEX_0_TYPE__SHIFT				0
+static inline uint32_t A2XX_SQ_TEX_0_TYPE(enum sq_tex_type val)
+{
+	return ((val) << A2XX_SQ_TEX_0_TYPE__SHIFT) & A2XX_SQ_TEX_0_TYPE__MASK;
+}
+#define A2XX_SQ_TEX_0_SIGN_X__MASK				0x0000000c
+#define A2XX_SQ_TEX_0_SIGN_X__SHIFT				2
+static inline uint32_t A2XX_SQ_TEX_0_SIGN_X(enum sq_tex_sign val)
+{
+	return ((val) << A2XX_SQ_TEX_0_SIGN_X__SHIFT) & A2XX_SQ_TEX_0_SIGN_X__MASK;
+}
+#define A2XX_SQ_TEX_0_SIGN_Y__MASK				0x00000030
+#define A2XX_SQ_TEX_0_SIGN_Y__SHIFT				4
+static inline uint32_t A2XX_SQ_TEX_0_SIGN_Y(enum sq_tex_sign val)
+{
+	return ((val) << A2XX_SQ_TEX_0_SIGN_Y__SHIFT) & A2XX_SQ_TEX_0_SIGN_Y__MASK;
+}
+#define A2XX_SQ_TEX_0_SIGN_Z__MASK				0x000000c0
+#define A2XX_SQ_TEX_0_SIGN_Z__SHIFT				6
+static inline uint32_t A2XX_SQ_TEX_0_SIGN_Z(enum sq_tex_sign val)
+{
+	return ((val) << A2XX_SQ_TEX_0_SIGN_Z__SHIFT) & A2XX_SQ_TEX_0_SIGN_Z__MASK;
+}
+#define A2XX_SQ_TEX_0_SIGN_W__MASK				0x00000300
+#define A2XX_SQ_TEX_0_SIGN_W__SHIFT				8
+static inline uint32_t A2XX_SQ_TEX_0_SIGN_W(enum sq_tex_sign val)
+{
+	return ((val) << A2XX_SQ_TEX_0_SIGN_W__SHIFT) & A2XX_SQ_TEX_0_SIGN_W__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_X__MASK				0x00001c00
+#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT				10
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Y__MASK				0x0000e000
+#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT				13
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Z__MASK				0x00070000
+#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT				16
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK;
+}
+#define A2XX_SQ_TEX_0_PITCH__MASK				0x7fc00000
+#define A2XX_SQ_TEX_0_PITCH__SHIFT				22
+static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK;
+}
+#define A2XX_SQ_TEX_0_TILED					0x00000002
+
+#define REG_A2XX_SQ_TEX_1					0x00000001
+#define A2XX_SQ_TEX_1_FORMAT__MASK				0x0000003f
+#define A2XX_SQ_TEX_1_FORMAT__SHIFT				0
+static inline uint32_t A2XX_SQ_TEX_1_FORMAT(enum a2xx_sq_surfaceformat val)
+{
+	return ((val) << A2XX_SQ_TEX_1_FORMAT__SHIFT) & A2XX_SQ_TEX_1_FORMAT__MASK;
+}
+#define A2XX_SQ_TEX_1_ENDIANNESS__MASK				0x000000c0
+#define A2XX_SQ_TEX_1_ENDIANNESS__SHIFT				6
+static inline uint32_t A2XX_SQ_TEX_1_ENDIANNESS(enum sq_tex_endian val)
+{
+	return ((val) << A2XX_SQ_TEX_1_ENDIANNESS__SHIFT) & A2XX_SQ_TEX_1_ENDIANNESS__MASK;
+}
+#define A2XX_SQ_TEX_1_REQUEST_SIZE__MASK			0x00000300
+#define A2XX_SQ_TEX_1_REQUEST_SIZE__SHIFT			8
+static inline uint32_t A2XX_SQ_TEX_1_REQUEST_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_1_REQUEST_SIZE__SHIFT) & A2XX_SQ_TEX_1_REQUEST_SIZE__MASK;
+}
+#define A2XX_SQ_TEX_1_STACKED					0x00000400
+#define A2XX_SQ_TEX_1_CLAMP_POLICY__MASK			0x00000800
+#define A2XX_SQ_TEX_1_CLAMP_POLICY__SHIFT			11
+static inline uint32_t A2XX_SQ_TEX_1_CLAMP_POLICY(enum sq_tex_clamp_policy val)
+{
+	return ((val) << A2XX_SQ_TEX_1_CLAMP_POLICY__SHIFT) & A2XX_SQ_TEX_1_CLAMP_POLICY__MASK;
+}
+#define A2XX_SQ_TEX_1_BASE_ADDRESS__MASK			0xfffff000
+#define A2XX_SQ_TEX_1_BASE_ADDRESS__SHIFT			12
+static inline uint32_t A2XX_SQ_TEX_1_BASE_ADDRESS(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A2XX_SQ_TEX_1_BASE_ADDRESS__SHIFT) & A2XX_SQ_TEX_1_BASE_ADDRESS__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_2					0x00000002
+#define A2XX_SQ_TEX_2_WIDTH__MASK				0x00001fff
+#define A2XX_SQ_TEX_2_WIDTH__SHIFT				0
+static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK;
+}
+#define A2XX_SQ_TEX_2_HEIGHT__MASK				0x03ffe000
+#define A2XX_SQ_TEX_2_HEIGHT__SHIFT				13
+static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK;
+}
+#define A2XX_SQ_TEX_2_DEPTH__MASK				0xfc000000
+#define A2XX_SQ_TEX_2_DEPTH__SHIFT				26
+static inline uint32_t A2XX_SQ_TEX_2_DEPTH(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_2_DEPTH__SHIFT) & A2XX_SQ_TEX_2_DEPTH__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_3					0x00000003
+#define A2XX_SQ_TEX_3_NUM_FORMAT__MASK				0x00000001
+#define A2XX_SQ_TEX_3_NUM_FORMAT__SHIFT				0
+static inline uint32_t A2XX_SQ_TEX_3_NUM_FORMAT(enum sq_tex_num_format val)
+{
+	return ((val) << A2XX_SQ_TEX_3_NUM_FORMAT__SHIFT) & A2XX_SQ_TEX_3_NUM_FORMAT__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_X__MASK				0x0000000e
+#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT				1
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Y__MASK				0x00000070
+#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT				4
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Z__MASK				0x00000380
+#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT				7
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_W__MASK				0x00001c00
+#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT				10
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK;
+}
+#define A2XX_SQ_TEX_3_EXP_ADJUST__MASK				0x0007e000
+#define A2XX_SQ_TEX_3_EXP_ADJUST__SHIFT				13
+static inline uint32_t A2XX_SQ_TEX_3_EXP_ADJUST(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_3_EXP_ADJUST__SHIFT) & A2XX_SQ_TEX_3_EXP_ADJUST__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK			0x00180000
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT			19
+static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK			0x00600000
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT			21
+static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_MIP_FILTER__MASK				0x01800000
+#define A2XX_SQ_TEX_3_MIP_FILTER__SHIFT				23
+static inline uint32_t A2XX_SQ_TEX_3_MIP_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_MIP_FILTER__SHIFT) & A2XX_SQ_TEX_3_MIP_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_ANISO_FILTER__MASK			0x0e000000
+#define A2XX_SQ_TEX_3_ANISO_FILTER__SHIFT			25
+static inline uint32_t A2XX_SQ_TEX_3_ANISO_FILTER(enum sq_tex_aniso_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_ANISO_FILTER__SHIFT) & A2XX_SQ_TEX_3_ANISO_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_BORDER_SIZE__MASK				0x80000000
+#define A2XX_SQ_TEX_3_BORDER_SIZE__SHIFT			31
+static inline uint32_t A2XX_SQ_TEX_3_BORDER_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_3_BORDER_SIZE__SHIFT) & A2XX_SQ_TEX_3_BORDER_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_4					0x00000004
+#define A2XX_SQ_TEX_4_VOL_MAG_FILTER__MASK			0x00000001
+#define A2XX_SQ_TEX_4_VOL_MAG_FILTER__SHIFT			0
+static inline uint32_t A2XX_SQ_TEX_4_VOL_MAG_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_4_VOL_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_4_VOL_MAG_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_4_VOL_MIN_FILTER__MASK			0x00000002
+#define A2XX_SQ_TEX_4_VOL_MIN_FILTER__SHIFT			1
+static inline uint32_t A2XX_SQ_TEX_4_VOL_MIN_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_4_VOL_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_4_VOL_MIN_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_4_MIP_MIN_LEVEL__MASK			0x0000003c
+#define A2XX_SQ_TEX_4_MIP_MIN_LEVEL__SHIFT			2
+static inline uint32_t A2XX_SQ_TEX_4_MIP_MIN_LEVEL(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_4_MIP_MIN_LEVEL__SHIFT) & A2XX_SQ_TEX_4_MIP_MIN_LEVEL__MASK;
+}
+#define A2XX_SQ_TEX_4_MIP_MAX_LEVEL__MASK			0x000003c0
+#define A2XX_SQ_TEX_4_MIP_MAX_LEVEL__SHIFT			6
+static inline uint32_t A2XX_SQ_TEX_4_MIP_MAX_LEVEL(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_4_MIP_MAX_LEVEL__SHIFT) & A2XX_SQ_TEX_4_MIP_MAX_LEVEL__MASK;
+}
+#define A2XX_SQ_TEX_4_MAX_ANISO_WALK				0x00000400
+#define A2XX_SQ_TEX_4_MIN_ANISO_WALK				0x00000800
+#define A2XX_SQ_TEX_4_LOD_BIAS__MASK				0x003ff000
+#define A2XX_SQ_TEX_4_LOD_BIAS__SHIFT				12
+static inline uint32_t A2XX_SQ_TEX_4_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 32.0))) << A2XX_SQ_TEX_4_LOD_BIAS__SHIFT) & A2XX_SQ_TEX_4_LOD_BIAS__MASK;
+}
+#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__MASK			0x07c00000
+#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__SHIFT			22
+static inline uint32_t A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__SHIFT) & A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__MASK;
+}
+#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__MASK			0xf8000000
+#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__SHIFT			27
+static inline uint32_t A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__SHIFT) & A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_5					0x00000005
+#define A2XX_SQ_TEX_5_BORDER_COLOR__MASK			0x00000003
+#define A2XX_SQ_TEX_5_BORDER_COLOR__SHIFT			0
+static inline uint32_t A2XX_SQ_TEX_5_BORDER_COLOR(enum sq_tex_border_color val)
+{
+	return ((val) << A2XX_SQ_TEX_5_BORDER_COLOR__SHIFT) & A2XX_SQ_TEX_5_BORDER_COLOR__MASK;
+}
+#define A2XX_SQ_TEX_5_FORCE_BCW_MAX				0x00000004
+#define A2XX_SQ_TEX_5_TRI_CLAMP__MASK				0x00000018
+#define A2XX_SQ_TEX_5_TRI_CLAMP__SHIFT				3
+static inline uint32_t A2XX_SQ_TEX_5_TRI_CLAMP(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_5_TRI_CLAMP__SHIFT) & A2XX_SQ_TEX_5_TRI_CLAMP__MASK;
+}
+#define A2XX_SQ_TEX_5_ANISO_BIAS__MASK				0x000001e0
+#define A2XX_SQ_TEX_5_ANISO_BIAS__SHIFT				5
+static inline uint32_t A2XX_SQ_TEX_5_ANISO_BIAS(float val)
+{
+	return ((((int32_t)(val * 1.0))) << A2XX_SQ_TEX_5_ANISO_BIAS__SHIFT) & A2XX_SQ_TEX_5_ANISO_BIAS__MASK;
+}
+#define A2XX_SQ_TEX_5_DIMENSION__MASK				0x00000600
+#define A2XX_SQ_TEX_5_DIMENSION__SHIFT				9
+static inline uint32_t A2XX_SQ_TEX_5_DIMENSION(enum sq_tex_dimension val)
+{
+	return ((val) << A2XX_SQ_TEX_5_DIMENSION__SHIFT) & A2XX_SQ_TEX_5_DIMENSION__MASK;
+}
+#define A2XX_SQ_TEX_5_PACKED_MIPS				0x00000800
+#define A2XX_SQ_TEX_5_MIP_ADDRESS__MASK				0xfffff000
+#define A2XX_SQ_TEX_5_MIP_ADDRESS__SHIFT			12
+static inline uint32_t A2XX_SQ_TEX_5_MIP_ADDRESS(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A2XX_SQ_TEX_5_MIP_ADDRESS__SHIFT) & A2XX_SQ_TEX_5_MIP_ADDRESS__MASK;
+}
+
+
+#endif /* A2XX_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/a3xx.xml.h mesa-19.0.1/src/freedreno/registers/a3xx.xml.h
--- mesa-18.3.3/src/freedreno/registers/a3xx.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/a3xx.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3239 @@
+#ifndef A3XX_XML
+#define A3XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a3xx_tile_mode {
+	LINEAR = 0,
+	TILE_32X32 = 2,
+};
+
+enum a3xx_state_block_id {
+	HLSQ_BLOCK_ID_TP_TEX = 2,
+	HLSQ_BLOCK_ID_TP_MIPMAP = 3,
+	HLSQ_BLOCK_ID_SP_VS = 4,
+	HLSQ_BLOCK_ID_SP_FS = 6,
+};
+
+enum a3xx_cache_opcode {
+	INVALIDATE = 1,
+};
+
+enum a3xx_vtx_fmt {
+	VFMT_32_FLOAT = 0,
+	VFMT_32_32_FLOAT = 1,
+	VFMT_32_32_32_FLOAT = 2,
+	VFMT_32_32_32_32_FLOAT = 3,
+	VFMT_16_FLOAT = 4,
+	VFMT_16_16_FLOAT = 5,
+	VFMT_16_16_16_FLOAT = 6,
+	VFMT_16_16_16_16_FLOAT = 7,
+	VFMT_32_FIXED = 8,
+	VFMT_32_32_FIXED = 9,
+	VFMT_32_32_32_FIXED = 10,
+	VFMT_32_32_32_32_FIXED = 11,
+	VFMT_16_SINT = 16,
+	VFMT_16_16_SINT = 17,
+	VFMT_16_16_16_SINT = 18,
+	VFMT_16_16_16_16_SINT = 19,
+	VFMT_16_UINT = 20,
+	VFMT_16_16_UINT = 21,
+	VFMT_16_16_16_UINT = 22,
+	VFMT_16_16_16_16_UINT = 23,
+	VFMT_16_SNORM = 24,
+	VFMT_16_16_SNORM = 25,
+	VFMT_16_16_16_SNORM = 26,
+	VFMT_16_16_16_16_SNORM = 27,
+	VFMT_16_UNORM = 28,
+	VFMT_16_16_UNORM = 29,
+	VFMT_16_16_16_UNORM = 30,
+	VFMT_16_16_16_16_UNORM = 31,
+	VFMT_32_UINT = 32,
+	VFMT_32_32_UINT = 33,
+	VFMT_32_32_32_UINT = 34,
+	VFMT_32_32_32_32_UINT = 35,
+	VFMT_32_SINT = 36,
+	VFMT_32_32_SINT = 37,
+	VFMT_32_32_32_SINT = 38,
+	VFMT_32_32_32_32_SINT = 39,
+	VFMT_8_UINT = 40,
+	VFMT_8_8_UINT = 41,
+	VFMT_8_8_8_UINT = 42,
+	VFMT_8_8_8_8_UINT = 43,
+	VFMT_8_UNORM = 44,
+	VFMT_8_8_UNORM = 45,
+	VFMT_8_8_8_UNORM = 46,
+	VFMT_8_8_8_8_UNORM = 47,
+	VFMT_8_SINT = 48,
+	VFMT_8_8_SINT = 49,
+	VFMT_8_8_8_SINT = 50,
+	VFMT_8_8_8_8_SINT = 51,
+	VFMT_8_SNORM = 52,
+	VFMT_8_8_SNORM = 53,
+	VFMT_8_8_8_SNORM = 54,
+	VFMT_8_8_8_8_SNORM = 55,
+	VFMT_10_10_10_2_UINT = 56,
+	VFMT_10_10_10_2_UNORM = 57,
+	VFMT_10_10_10_2_SINT = 58,
+	VFMT_10_10_10_2_SNORM = 59,
+	VFMT_2_10_10_10_UINT = 60,
+	VFMT_2_10_10_10_UNORM = 61,
+	VFMT_2_10_10_10_SINT = 62,
+	VFMT_2_10_10_10_SNORM = 63,
+};
+
+enum a3xx_tex_fmt {
+	TFMT_5_6_5_UNORM = 4,
+	TFMT_5_5_5_1_UNORM = 5,
+	TFMT_4_4_4_4_UNORM = 7,
+	TFMT_Z16_UNORM = 9,
+	TFMT_X8Z24_UNORM = 10,
+	TFMT_Z32_FLOAT = 11,
+	TFMT_UV_64X32 = 16,
+	TFMT_VU_64X32 = 17,
+	TFMT_Y_64X32 = 18,
+	TFMT_NV12_64X32 = 19,
+	TFMT_UV_LINEAR = 20,
+	TFMT_VU_LINEAR = 21,
+	TFMT_Y_LINEAR = 22,
+	TFMT_NV12_LINEAR = 23,
+	TFMT_I420_Y = 24,
+	TFMT_I420_U = 26,
+	TFMT_I420_V = 27,
+	TFMT_ATC_RGB = 32,
+	TFMT_ATC_RGBA_EXPLICIT = 33,
+	TFMT_ETC1 = 34,
+	TFMT_ATC_RGBA_INTERPOLATED = 35,
+	TFMT_DXT1 = 36,
+	TFMT_DXT3 = 37,
+	TFMT_DXT5 = 38,
+	TFMT_2_10_10_10_UNORM = 40,
+	TFMT_10_10_10_2_UNORM = 41,
+	TFMT_9_9_9_E5_FLOAT = 42,
+	TFMT_11_11_10_FLOAT = 43,
+	TFMT_A8_UNORM = 44,
+	TFMT_L8_UNORM = 45,
+	TFMT_L8_A8_UNORM = 47,
+	TFMT_8_UNORM = 48,
+	TFMT_8_8_UNORM = 49,
+	TFMT_8_8_8_UNORM = 50,
+	TFMT_8_8_8_8_UNORM = 51,
+	TFMT_8_SNORM = 52,
+	TFMT_8_8_SNORM = 53,
+	TFMT_8_8_8_SNORM = 54,
+	TFMT_8_8_8_8_SNORM = 55,
+	TFMT_8_UINT = 56,
+	TFMT_8_8_UINT = 57,
+	TFMT_8_8_8_UINT = 58,
+	TFMT_8_8_8_8_UINT = 59,
+	TFMT_8_SINT = 60,
+	TFMT_8_8_SINT = 61,
+	TFMT_8_8_8_SINT = 62,
+	TFMT_8_8_8_8_SINT = 63,
+	TFMT_16_FLOAT = 64,
+	TFMT_16_16_FLOAT = 65,
+	TFMT_16_16_16_16_FLOAT = 67,
+	TFMT_16_UINT = 68,
+	TFMT_16_16_UINT = 69,
+	TFMT_16_16_16_16_UINT = 71,
+	TFMT_16_SINT = 72,
+	TFMT_16_16_SINT = 73,
+	TFMT_16_16_16_16_SINT = 75,
+	TFMT_16_UNORM = 76,
+	TFMT_16_16_UNORM = 77,
+	TFMT_16_16_16_16_UNORM = 79,
+	TFMT_16_SNORM = 80,
+	TFMT_16_16_SNORM = 81,
+	TFMT_16_16_16_16_SNORM = 83,
+	TFMT_32_FLOAT = 84,
+	TFMT_32_32_FLOAT = 85,
+	TFMT_32_32_32_32_FLOAT = 87,
+	TFMT_32_UINT = 88,
+	TFMT_32_32_UINT = 89,
+	TFMT_32_32_32_32_UINT = 91,
+	TFMT_32_SINT = 92,
+	TFMT_32_32_SINT = 93,
+	TFMT_32_32_32_32_SINT = 95,
+	TFMT_2_10_10_10_UINT = 96,
+	TFMT_10_10_10_2_UINT = 97,
+	TFMT_ETC2_RG11_SNORM = 112,
+	TFMT_ETC2_RG11_UNORM = 113,
+	TFMT_ETC2_R11_SNORM = 114,
+	TFMT_ETC2_R11_UNORM = 115,
+	TFMT_ETC2_RGBA8 = 116,
+	TFMT_ETC2_RGB8A1 = 117,
+	TFMT_ETC2_RGB8 = 118,
+};
+
+enum a3xx_tex_fetchsize {
+	TFETCH_DISABLE = 0,
+	TFETCH_1_BYTE = 1,
+	TFETCH_2_BYTE = 2,
+	TFETCH_4_BYTE = 3,
+	TFETCH_8_BYTE = 4,
+	TFETCH_16_BYTE = 5,
+};
+
+enum a3xx_color_fmt {
+	RB_R5G6B5_UNORM = 0,
+	RB_R5G5B5A1_UNORM = 1,
+	RB_R4G4B4A4_UNORM = 3,
+	RB_R8G8B8_UNORM = 4,
+	RB_R8G8B8A8_UNORM = 8,
+	RB_R8G8B8A8_SNORM = 9,
+	RB_R8G8B8A8_UINT = 10,
+	RB_R8G8B8A8_SINT = 11,
+	RB_R8G8_UNORM = 12,
+	RB_R8G8_SNORM = 13,
+	RB_R8_UINT = 14,
+	RB_R8_SINT = 15,
+	RB_R10G10B10A2_UNORM = 16,
+	RB_A2R10G10B10_UNORM = 17,
+	RB_R10G10B10A2_UINT = 18,
+	RB_A2R10G10B10_UINT = 19,
+	RB_A8_UNORM = 20,
+	RB_R8_UNORM = 21,
+	RB_R16_FLOAT = 24,
+	RB_R16G16_FLOAT = 25,
+	RB_R16G16B16A16_FLOAT = 27,
+	RB_R11G11B10_FLOAT = 28,
+	RB_R16_SNORM = 32,
+	RB_R16G16_SNORM = 33,
+	RB_R16G16B16A16_SNORM = 35,
+	RB_R16_UNORM = 36,
+	RB_R16G16_UNORM = 37,
+	RB_R16G16B16A16_UNORM = 39,
+	RB_R16_SINT = 40,
+	RB_R16G16_SINT = 41,
+	RB_R16G16B16A16_SINT = 43,
+	RB_R16_UINT = 44,
+	RB_R16G16_UINT = 45,
+	RB_R16G16B16A16_UINT = 47,
+	RB_R32_FLOAT = 48,
+	RB_R32G32_FLOAT = 49,
+	RB_R32G32B32A32_FLOAT = 51,
+	RB_R32_SINT = 52,
+	RB_R32G32_SINT = 53,
+	RB_R32G32B32A32_SINT = 55,
+	RB_R32_UINT = 56,
+	RB_R32G32_UINT = 57,
+	RB_R32G32B32A32_UINT = 59,
+};
+
+enum a3xx_cp_perfcounter_select {
+	CP_ALWAYS_COUNT = 0,
+	CP_AHB_PFPTRANS_WAIT = 3,
+	CP_AHB_NRTTRANS_WAIT = 6,
+	CP_CSF_NRT_READ_WAIT = 8,
+	CP_CSF_I1_FIFO_FULL = 9,
+	CP_CSF_I2_FIFO_FULL = 10,
+	CP_CSF_ST_FIFO_FULL = 11,
+	CP_RESERVED_12 = 12,
+	CP_CSF_RING_ROQ_FULL = 13,
+	CP_CSF_I1_ROQ_FULL = 14,
+	CP_CSF_I2_ROQ_FULL = 15,
+	CP_CSF_ST_ROQ_FULL = 16,
+	CP_RESERVED_17 = 17,
+	CP_MIU_TAG_MEM_FULL = 18,
+	CP_MIU_NRT_WRITE_STALLED = 22,
+	CP_MIU_NRT_READ_STALLED = 23,
+	CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
+	CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
+	CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
+	CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
+	CP_ME_MICRO_RB_STARVED = 30,
+	CP_AHB_RBBM_DWORD_SENT = 40,
+	CP_ME_BUSY_CLOCKS = 41,
+	CP_ME_WAIT_CONTEXT_AVAIL = 42,
+	CP_PFP_TYPE0_PACKET = 43,
+	CP_PFP_TYPE3_PACKET = 44,
+	CP_CSF_RB_WPTR_NEQ_RPTR = 45,
+	CP_CSF_I1_SIZE_NEQ_ZERO = 46,
+	CP_CSF_I2_SIZE_NEQ_ZERO = 47,
+	CP_CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a3xx_gras_tse_perfcounter_select {
+	GRAS_TSEPERF_INPUT_PRIM = 0,
+	GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
+	GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
+	GRAS_TSEPERF_CLIPPED_PRIM = 3,
+	GRAS_TSEPERF_NEW_PRIM = 4,
+	GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
+	GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
+	GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
+	GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
+	GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
+	GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
+	GRAS_TSEPERF_POST_CLIP_PRIM = 11,
+	GRAS_TSEPERF_WORKING_CYCLES = 12,
+	GRAS_TSEPERF_PC_STARVE = 13,
+	GRAS_TSERASPERF_STALL = 14,
+};
+
+enum a3xx_gras_ras_perfcounter_select {
+	GRAS_RASPERF_16X16_TILES = 0,
+	GRAS_RASPERF_8X8_TILES = 1,
+	GRAS_RASPERF_4X4_TILES = 2,
+	GRAS_RASPERF_WORKING_CYCLES = 3,
+	GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
+	GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
+	GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
+};
+
+enum a3xx_hlsq_perfcounter_select {
+	HLSQ_PERF_SP_VS_CONSTANT = 0,
+	HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
+	HLSQ_PERF_SP_FS_CONSTANT = 2,
+	HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
+	HLSQ_PERF_TP_STATE = 4,
+	HLSQ_PERF_QUADS = 5,
+	HLSQ_PERF_PIXELS = 6,
+	HLSQ_PERF_VERTICES = 7,
+	HLSQ_PERF_FS8_THREADS = 8,
+	HLSQ_PERF_FS16_THREADS = 9,
+	HLSQ_PERF_FS32_THREADS = 10,
+	HLSQ_PERF_VS8_THREADS = 11,
+	HLSQ_PERF_VS16_THREADS = 12,
+	HLSQ_PERF_SP_VS_DATA_BYTES = 13,
+	HLSQ_PERF_SP_FS_DATA_BYTES = 14,
+	HLSQ_PERF_ACTIVE_CYCLES = 15,
+	HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
+	HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
+	HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
+	HLSQ_PERF_STALL_CYCLES_UCHE = 19,
+	HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
+	HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
+	HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
+	HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
+	HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
+	HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
+	HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
+	HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
+	HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
+};
+
+enum a3xx_pc_perfcounter_select {
+	PC_PCPERF_VISIBILITY_STREAMS = 0,
+	PC_PCPERF_TOTAL_INSTANCES = 1,
+	PC_PCPERF_PRIMITIVES_PC_VPC = 2,
+	PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
+	PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
+	PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
+	PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
+	PC_PCPERF_VERTICES_TO_VFD = 7,
+	PC_PCPERF_REUSED_VERTICES = 8,
+	PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
+	PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
+	PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
+	PC_PCPERF_CYCLES_IS_WORKING = 12,
+};
+
+enum a3xx_rb_perfcounter_select {
+	RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
+	RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
+	RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
+	RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
+	RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
+	RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
+	RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
+	RB_RBPERF_RB_MARB_DATA = 7,
+	RB_RBPERF_SP_RB_QUAD = 8,
+	RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
+	RB_RBPERF_GMEM_CH0_READ = 10,
+	RB_RBPERF_GMEM_CH1_READ = 11,
+	RB_RBPERF_GMEM_CH0_WRITE = 12,
+	RB_RBPERF_GMEM_CH1_WRITE = 13,
+	RB_RBPERF_CP_CONTEXT_DONE = 14,
+	RB_RBPERF_CP_CACHE_FLUSH = 15,
+	RB_RBPERF_CP_ZPASS_DONE = 16,
+};
+
+enum a3xx_rbbm_perfcounter_select {
+	RBBM_ALAWYS_ON = 0,
+	RBBM_VBIF_BUSY = 1,
+	RBBM_TSE_BUSY = 2,
+	RBBM_RAS_BUSY = 3,
+	RBBM_PC_DCALL_BUSY = 4,
+	RBBM_PC_VSD_BUSY = 5,
+	RBBM_VFD_BUSY = 6,
+	RBBM_VPC_BUSY = 7,
+	RBBM_UCHE_BUSY = 8,
+	RBBM_VSC_BUSY = 9,
+	RBBM_HLSQ_BUSY = 10,
+	RBBM_ANY_RB_BUSY = 11,
+	RBBM_ANY_TEX_BUSY = 12,
+	RBBM_ANY_USP_BUSY = 13,
+	RBBM_ANY_MARB_BUSY = 14,
+	RBBM_ANY_ARB_BUSY = 15,
+	RBBM_AHB_STATUS_BUSY = 16,
+	RBBM_AHB_STATUS_STALLED = 17,
+	RBBM_AHB_STATUS_TXFR = 18,
+	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+	RBBM_AHB_STATUS_TXFR_ERROR = 20,
+	RBBM_AHB_STATUS_LONG_STALL = 21,
+	RBBM_RBBM_STATUS_MASKED = 22,
+};
+
+enum a3xx_sp_perfcounter_select {
+	SP_LM_LOAD_INSTRUCTIONS = 0,
+	SP_LM_STORE_INSTRUCTIONS = 1,
+	SP_LM_ATOMICS = 2,
+	SP_UCHE_LOAD_INSTRUCTIONS = 3,
+	SP_UCHE_STORE_INSTRUCTIONS = 4,
+	SP_UCHE_ATOMICS = 5,
+	SP_VS_TEX_INSTRUCTIONS = 6,
+	SP_VS_CFLOW_INSTRUCTIONS = 7,
+	SP_VS_EFU_INSTRUCTIONS = 8,
+	SP_VS_FULL_ALU_INSTRUCTIONS = 9,
+	SP_VS_HALF_ALU_INSTRUCTIONS = 10,
+	SP_FS_TEX_INSTRUCTIONS = 11,
+	SP_FS_CFLOW_INSTRUCTIONS = 12,
+	SP_FS_EFU_INSTRUCTIONS = 13,
+	SP_FS_FULL_ALU_INSTRUCTIONS = 14,
+	SP_FS_HALF_ALU_INSTRUCTIONS = 15,
+	SP_FS_BARY_INSTRUCTIONS = 16,
+	SP_VS_INSTRUCTIONS = 17,
+	SP_FS_INSTRUCTIONS = 18,
+	SP_ADDR_LOCK_COUNT = 19,
+	SP_UCHE_READ_TRANS = 20,
+	SP_UCHE_WRITE_TRANS = 21,
+	SP_EXPORT_VPC_TRANS = 22,
+	SP_EXPORT_RB_TRANS = 23,
+	SP_PIXELS_KILLED = 24,
+	SP_ICL1_REQUESTS = 25,
+	SP_ICL1_MISSES = 26,
+	SP_ICL0_REQUESTS = 27,
+	SP_ICL0_MISSES = 28,
+	SP_ALU_ACTIVE_CYCLES = 29,
+	SP_EFU_ACTIVE_CYCLES = 30,
+	SP_STALL_CYCLES_BY_VPC = 31,
+	SP_STALL_CYCLES_BY_TP = 32,
+	SP_STALL_CYCLES_BY_UCHE = 33,
+	SP_STALL_CYCLES_BY_RB = 34,
+	SP_ACTIVE_CYCLES_ANY = 35,
+	SP_ACTIVE_CYCLES_ALL = 36,
+};
+
+enum a3xx_tp_perfcounter_select {
+	TPL1_TPPERF_L1_REQUESTS = 0,
+	TPL1_TPPERF_TP0_L1_REQUESTS = 1,
+	TPL1_TPPERF_TP0_L1_MISSES = 2,
+	TPL1_TPPERF_TP1_L1_REQUESTS = 3,
+	TPL1_TPPERF_TP1_L1_MISSES = 4,
+	TPL1_TPPERF_TP2_L1_REQUESTS = 5,
+	TPL1_TPPERF_TP2_L1_MISSES = 6,
+	TPL1_TPPERF_TP3_L1_REQUESTS = 7,
+	TPL1_TPPERF_TP3_L1_MISSES = 8,
+	TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
+	TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
+	TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
+	TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
+	TPL1_TPPERF_BILINEAR_OPS = 13,
+	TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
+	TPL1_TPPERF_QUADQUADS_SHADOW = 15,
+	TPL1_TPPERF_QUADS_ARRAY = 16,
+	TPL1_TPPERF_QUADS_PROJECTION = 17,
+	TPL1_TPPERF_QUADS_GRADIENT = 18,
+	TPL1_TPPERF_QUADS_1D2D = 19,
+	TPL1_TPPERF_QUADS_3DCUBE = 20,
+	TPL1_TPPERF_ZERO_LOD = 21,
+	TPL1_TPPERF_OUTPUT_TEXELS = 22,
+	TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
+	TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
+	TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
+	TPL1_TPPERF_LATENCY = 26,
+	TPL1_TPPERF_LATENCY_TRANS = 27,
+};
+
+enum a3xx_vfd_perfcounter_select {
+	VFD_PERF_UCHE_BYTE_FETCHED = 0,
+	VFD_PERF_UCHE_TRANS = 1,
+	VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
+	VFD_PERF_FETCH_INSTRUCTIONS = 3,
+	VFD_PERF_DECODE_INSTRUCTIONS = 4,
+	VFD_PERF_ACTIVE_CYCLES = 5,
+	VFD_PERF_STALL_CYCLES_UCHE = 6,
+	VFD_PERF_STALL_CYCLES_HLSQ = 7,
+	VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
+	VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
+};
+
+enum a3xx_vpc_perfcounter_select {
+	VPC_PERF_SP_LM_PRIMITIVES = 0,
+	VPC_PERF_COMPONENTS_FROM_SP = 1,
+	VPC_PERF_SP_LM_COMPONENTS = 2,
+	VPC_PERF_ACTIVE_CYCLES = 3,
+	VPC_PERF_STALL_CYCLES_LM = 4,
+	VPC_PERF_STALL_CYCLES_RAS = 5,
+};
+
+enum a3xx_uche_perfcounter_select {
+	UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
+	UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
+	UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
+	UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
+	UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
+	UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
+	UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
+	UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
+	UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
+	UCHE_UCHEPERF_EVICTS = 16,
+	UCHE_UCHEPERF_FLUSHES = 17,
+	UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
+	UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
+	UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
+};
+
+enum a3xx_intp_mode {
+	SMOOTH = 0,
+	FLAT = 1,
+	ZERO = 2,
+	ONE = 3,
+};
+
+enum a3xx_repl_mode {
+	S = 1,
+	T = 2,
+	ONE_T = 3,
+};
+
+enum a3xx_tex_filter {
+	A3XX_TEX_NEAREST = 0,
+	A3XX_TEX_LINEAR = 1,
+	A3XX_TEX_ANISO = 2,
+};
+
+enum a3xx_tex_clamp {
+	A3XX_TEX_REPEAT = 0,
+	A3XX_TEX_CLAMP_TO_EDGE = 1,
+	A3XX_TEX_MIRROR_REPEAT = 2,
+	A3XX_TEX_CLAMP_TO_BORDER = 3,
+	A3XX_TEX_MIRROR_CLAMP = 4,
+};
+
+enum a3xx_tex_aniso {
+	A3XX_TEX_ANISO_1 = 0,
+	A3XX_TEX_ANISO_2 = 1,
+	A3XX_TEX_ANISO_4 = 2,
+	A3XX_TEX_ANISO_8 = 3,
+	A3XX_TEX_ANISO_16 = 4,
+};
+
+enum a3xx_tex_swiz {
+	A3XX_TEX_X = 0,
+	A3XX_TEX_Y = 1,
+	A3XX_TEX_Z = 2,
+	A3XX_TEX_W = 3,
+	A3XX_TEX_ZERO = 4,
+	A3XX_TEX_ONE = 5,
+};
+
+enum a3xx_tex_type {
+	A3XX_TEX_1D = 0,
+	A3XX_TEX_2D = 1,
+	A3XX_TEX_CUBE = 2,
+	A3XX_TEX_3D = 3,
+};
+
+enum a3xx_tex_msaa {
+	A3XX_TPL1_MSAA1X = 0,
+	A3XX_TPL1_MSAA2X = 1,
+	A3XX_TPL1_MSAA4X = 2,
+	A3XX_TPL1_MSAA8X = 3,
+};
+
+#define A3XX_INT0_RBBM_GPU_IDLE					0x00000001
+#define A3XX_INT0_RBBM_AHB_ERROR				0x00000002
+#define A3XX_INT0_RBBM_REG_TIMEOUT				0x00000004
+#define A3XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
+#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
+#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
+#define A3XX_INT0_VFD_ERROR					0x00000040
+#define A3XX_INT0_CP_SW_INT					0x00000080
+#define A3XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
+#define A3XX_INT0_CP_OPCODE_ERROR				0x00000200
+#define A3XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
+#define A3XX_INT0_CP_HW_FAULT					0x00000800
+#define A3XX_INT0_CP_DMA					0x00001000
+#define A3XX_INT0_CP_IB2_INT					0x00002000
+#define A3XX_INT0_CP_IB1_INT					0x00004000
+#define A3XX_INT0_CP_RB_INT					0x00008000
+#define A3XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
+#define A3XX_INT0_CP_RB_DONE_TS					0x00020000
+#define A3XX_INT0_CP_VS_DONE_TS					0x00040000
+#define A3XX_INT0_CP_PS_DONE_TS					0x00080000
+#define A3XX_INT0_CACHE_FLUSH_TS				0x00100000
+#define A3XX_INT0_CP_AHB_ERROR_HALT				0x00200000
+#define A3XX_INT0_MISC_HANG_DETECT				0x01000000
+#define A3XX_INT0_UCHE_OOB_ACCESS				0x02000000
+#define REG_A3XX_RBBM_HW_VERSION				0x00000000
+
+#define REG_A3XX_RBBM_HW_RELEASE				0x00000001
+
+#define REG_A3XX_RBBM_HW_CONFIGURATION				0x00000002
+
+#define REG_A3XX_RBBM_CLOCK_CTL					0x00000010
+
+#define REG_A3XX_RBBM_SP_HYST_CNT				0x00000012
+
+#define REG_A3XX_RBBM_SW_RESET_CMD				0x00000018
+
+#define REG_A3XX_RBBM_AHB_CTL0					0x00000020
+
+#define REG_A3XX_RBBM_AHB_CTL1					0x00000021
+
+#define REG_A3XX_RBBM_AHB_CMD					0x00000022
+
+#define REG_A3XX_RBBM_AHB_ERROR_STATUS				0x00000027
+
+#define REG_A3XX_RBBM_GPR0_CTL					0x0000002e
+
+#define REG_A3XX_RBBM_STATUS					0x00000030
+#define A3XX_RBBM_STATUS_HI_BUSY				0x00000001
+#define A3XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
+#define A3XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
+#define A3XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
+#define A3XX_RBBM_STATUS_VBIF_BUSY				0x00008000
+#define A3XX_RBBM_STATUS_TSE_BUSY				0x00010000
+#define A3XX_RBBM_STATUS_RAS_BUSY				0x00020000
+#define A3XX_RBBM_STATUS_RB_BUSY				0x00040000
+#define A3XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
+#define A3XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
+#define A3XX_RBBM_STATUS_VFD_BUSY				0x00200000
+#define A3XX_RBBM_STATUS_VPC_BUSY				0x00400000
+#define A3XX_RBBM_STATUS_UCHE_BUSY				0x00800000
+#define A3XX_RBBM_STATUS_SP_BUSY				0x01000000
+#define A3XX_RBBM_STATUS_TPL1_BUSY				0x02000000
+#define A3XX_RBBM_STATUS_MARB_BUSY				0x04000000
+#define A3XX_RBBM_STATUS_VSC_BUSY				0x08000000
+#define A3XX_RBBM_STATUS_ARB_BUSY				0x10000000
+#define A3XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
+#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
+#define A3XX_RBBM_STATUS_GPU_BUSY				0x80000000
+
+#define REG_A3XX_RBBM_NQWAIT_UNTIL				0x00000040
+
+#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x00000033
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL			0x00000050
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0			0x00000051
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1			0x00000054
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2			0x00000057
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3			0x0000005a
+
+#define REG_A3XX_RBBM_INT_SET_CMD				0x00000060
+
+#define REG_A3XX_RBBM_INT_CLEAR_CMD				0x00000061
+
+#define REG_A3XX_RBBM_INT_0_MASK				0x00000063
+
+#define REG_A3XX_RBBM_INT_0_STATUS				0x00000064
+
+#define REG_A3XX_RBBM_PERFCTR_CTL				0x00000080
+#define A3XX_RBBM_PERFCTR_CTL_ENABLE				0x00000001
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0				0x00000081
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1				0x00000082
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000084
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000085
+
+#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT			0x00000086
+
+#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT			0x00000087
+
+#define REG_A3XX_RBBM_GPU_BUSY_MASKED				0x00000088
+
+#define REG_A3XX_RBBM_PERFCTR_CP_0_LO				0x00000090
+
+#define REG_A3XX_RBBM_PERFCTR_CP_0_HI				0x00000091
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO				0x00000092
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI				0x00000093
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO				0x00000094
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI				0x00000095
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_LO				0x00000096
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_HI				0x00000097
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_LO				0x00000098
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_HI				0x00000099
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_LO				0x0000009a
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_HI				0x0000009b
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_LO				0x0000009c
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_HI				0x0000009d
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO				0x0000009e
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI				0x0000009f
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO				0x000000a0
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI				0x000000a1
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000a2
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000a3
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000a4
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000a5
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000a6
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000a7
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000a8
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000a9
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000aa
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000ab
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000ac
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000ad
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO				0x000000ae
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI				0x000000af
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO				0x000000b0
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI				0x000000b1
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO				0x000000b2
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI				0x000000b3
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO				0x000000b4
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI				0x000000b5
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO				0x000000b6
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI				0x000000b7
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO				0x000000b8
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI				0x000000b9
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO				0x000000ba
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI				0x000000bb
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO				0x000000bc
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI				0x000000bd
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO				0x000000be
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI				0x000000bf
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO				0x000000c0
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI				0x000000c1
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO				0x000000c2
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI				0x000000c3
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO				0x000000c4
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI				0x000000c5
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_LO				0x000000c6
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_HI				0x000000c7
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_LO				0x000000c8
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_HI				0x000000c9
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_LO				0x000000ca
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_HI				0x000000cb
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_LO				0x000000cc
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_HI				0x000000cd
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_LO				0x000000ce
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_HI				0x000000cf
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_LO				0x000000d0
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_HI				0x000000d1
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_LO				0x000000d2
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_HI				0x000000d3
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_LO				0x000000d4
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_HI				0x000000d5
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_LO				0x000000d6
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_HI				0x000000d7
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_LO				0x000000d8
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_HI				0x000000d9
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_LO				0x000000da
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_HI				0x000000db
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_LO				0x000000dc
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_HI				0x000000dd
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_LO				0x000000de
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_HI				0x000000df
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_LO				0x000000e0
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_HI				0x000000e1
+
+#define REG_A3XX_RBBM_PERFCTR_RB_0_LO				0x000000e2
+
+#define REG_A3XX_RBBM_PERFCTR_RB_0_HI				0x000000e3
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_LO				0x000000e4
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_HI				0x000000e5
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO				0x000000ea
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI				0x000000eb
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO				0x000000ec
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI				0x000000ed
+
+#define REG_A3XX_RBBM_RBBM_CTL					0x00000100
+
+#define REG_A3XX_RBBM_DEBUG_BUS_CTL				0x00000111
+
+#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS			0x00000112
+
+#define REG_A3XX_CP_PFP_UCODE_ADDR				0x000001c9
+
+#define REG_A3XX_CP_PFP_UCODE_DATA				0x000001ca
+
+#define REG_A3XX_CP_ROQ_ADDR					0x000001cc
+
+#define REG_A3XX_CP_ROQ_DATA					0x000001cd
+
+#define REG_A3XX_CP_MERCIU_ADDR					0x000001d1
+
+#define REG_A3XX_CP_MERCIU_DATA					0x000001d2
+
+#define REG_A3XX_CP_MERCIU_DATA2				0x000001d3
+
+#define REG_A3XX_CP_MEQ_ADDR					0x000001da
+
+#define REG_A3XX_CP_MEQ_DATA					0x000001db
+
+#define REG_A3XX_CP_WFI_PEND_CTR				0x000001f5
+
+#define REG_A3XX_RBBM_PM_OVERRIDE2				0x0000039d
+
+#define REG_A3XX_CP_PERFCOUNTER_SELECT				0x00000445
+
+#define REG_A3XX_CP_HW_FAULT					0x0000045c
+
+#define REG_A3XX_CP_PROTECT_CTRL				0x0000045e
+
+#define REG_A3XX_CP_PROTECT_STATUS				0x0000045f
+
+static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+
+#define REG_A3XX_CP_AHB_FAULT					0x0000054d
+
+#define REG_A3XX_SQ_GPR_MANAGEMENT				0x00000d00
+
+#define REG_A3XX_SQ_INST_STORE_MANAGMENT			0x00000d02
+
+#define REG_A3XX_TP0_CHICKEN					0x00000e1e
+
+#define REG_A3XX_SP_GLOBAL_MEM_SIZE				0x00000e22
+
+#define REG_A3XX_SP_GLOBAL_MEM_ADDR				0x00000e23
+
+#define REG_A3XX_GRAS_CL_CLIP_CNTL				0x00002040
+#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER			0x00001000
+#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE		0x00080000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE			0x00100000
+#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE		0x00200000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD				0x00800000
+#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD				0x01000000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE			0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK	0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT	26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK			0x000ffc00
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT			10
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XOFFSET				0x00002048
+#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XSCALE				0x00002049
+#define A3XX_GRAS_CL_VPORT_XSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YOFFSET				0x0000204a
+#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YSCALE				0x0000204b
+#define A3XX_GRAS_CL_VPORT_YSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET				0x0000204c
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZSCALE				0x0000204d
+#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POINT_MINMAX				0x00002068
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POINT_SIZE				0x00002069
+#define A3XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
+#define A3XX_GRAS_SU_POINT_SIZE__SHIFT				0
+static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
+{
+	return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE			0x0000206c
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK		0x00ffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
+{
+	return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+	return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_MODE_CONTROL				0x00002070
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT			0x00000001
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK			0x00000002
+#define A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW			0x00000004
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK		0x000007f8
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT		3
+static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
+{
+	return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET			0x00000800
+
+#define REG_A3XX_GRAS_SC_CONTROL				0x00002072
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK			0x000000f0
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT			4
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK			0x00000f00
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT		8
+static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK			0x0000f000
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT			12
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL			0x00002074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR			0x00002075
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL			0x00002079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000207a
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_RB_MODE_CONTROL				0x000020c0
+#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS			0x00000080
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK			0x00000700
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT			8
+static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+	return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MRT__MASK				0x00003000
+#define A3XX_RB_MODE_CONTROL_MRT__SHIFT				12
+static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
+{
+	return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE		0x00008000
+#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000
+
+#define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE		0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE			0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE		0x00000004
+#define A3XX_RB_RENDER_CONTROL_FACENESS				0x00000008
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
+static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE		0x00001000
+#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM			0x00002000
+#define A3XX_RB_RENDER_CONTROL_XCOORD				0x00004000
+#define A3XX_RB_RENDER_CONTROL_YCOORD				0x00008000
+#define A3XX_RB_RENDER_CONTROL_ZCOORD				0x00010000
+#define A3XX_RB_RENDER_CONTROL_WCOORD				0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE			0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE		0x00100000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST			0x00400000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
+static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE		0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE			0x80000000
+
+#define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
+#define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK			0x0000f000
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT			12
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK			0xffff0000
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT			16
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A3XX_RB_ALPHA_REF					0x000020c3
+#define A3XX_RB_ALPHA_REF_UINT__MASK				0x0000ff00
+#define A3XX_RB_ALPHA_REF_UINT__SHIFT				8
+static inline uint32_t A3XX_RB_ALPHA_REF_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_ALPHA_REF_UINT__SHIFT) & A3XX_RB_ALPHA_REF_UINT__MASK;
+}
+#define A3XX_RB_ALPHA_REF_FLOAT__MASK				0xffff0000
+#define A3XX_RB_ALPHA_REF_FLOAT__SHIFT				16
+static inline uint32_t A3XX_RB_ALPHA_REF_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_ALPHA_REF_FLOAT__SHIFT) & A3XX_RB_ALPHA_REF_FLOAT__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
+#define A3XX_RB_MRT_CONTROL_BLEND				0x00000010
+#define A3XX_RB_MRT_CONTROL_BLEND2				0x00000020
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK			0x00003000
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT			12
+static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
+static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x000000c0
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		6
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00004000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xfffe0000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		17
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK		0xfffffff0
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT		4
+static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE			0x20000000
+
+#define REG_A3XX_RB_BLEND_RED					0x000020e4
+#define A3XX_RB_BLEND_RED_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_RED_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A3XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_RED_FLOAT__SHIFT				16
+static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_GREEN					0x000020e5
+#define A3XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_GREEN_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A3XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
+static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_BLUE					0x000020e6
+#define A3XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_BLUE_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A3XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
+static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_ALPHA					0x000020e7
+#define A3XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
+static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_CLEAR_COLOR_DW0				0x000020e8
+
+#define REG_A3XX_RB_CLEAR_COLOR_DW1				0x000020e9
+
+#define REG_A3XX_RB_CLEAR_COLOR_DW2				0x000020ea
+
+#define REG_A3XX_RB_CLEAR_COLOR_DW3				0x000020eb
+
+#define REG_A3XX_RB_COPY_CONTROL				0x000020ec
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK			0x00000003
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT		0
+static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_DEPTHCLEAR				0x00000008
+#define A3XX_RB_COPY_CONTROL_MODE__MASK				0x00000070
+#define A3XX_RB_COPY_CONTROL_MODE__SHIFT			4
+static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+	return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_MSAA_SRGB_DOWNSAMPLE		0x00000080
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK			0x00000f00
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT			8
+static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
+{
+	return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE			0x00001000
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK			0xffffc000
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT			14
+static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+	assert(!(val & 0x3fff));
+	return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_BASE				0x000020ed
+#define A3XX_RB_COPY_DEST_BASE_BASE__MASK			0xfffffff0
+#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT			4
+static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_PITCH				0x000020ee
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK			0xffffffff
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT			0
+static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_INFO				0x000020ef
+#define A3XX_RB_COPY_DEST_INFO_TILE__MASK			0x00000003
+#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT			0
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000fc
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			2
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
+#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK		0x0003c000
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT		14
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK			0x001c0000
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT			18
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_CONTROL				0x00002100
+#define A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z			0x00000001
+#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE				0x00000002
+#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE			0x00000004
+#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00000008
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK			0x00000070
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT			4
+static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
+
+#define REG_A3XX_RB_DEPTH_CLEAR					0x00002101
+
+#define REG_A3XX_RB_DEPTH_INFO					0x00002102
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000003
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+	return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff800
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			11
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_PITCH					0x00002103
+#define A3XX_RB_DEPTH_PITCH__MASK				0xffffffff
+#define A3XX_RB_DEPTH_PITCH__SHIFT				0
+static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x7));
+	return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_CONTROL				0x00002104
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
+#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
+#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
+#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_CLEAR				0x00002105
+
+#define REG_A3XX_RB_STENCIL_INFO				0x00002106
+#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK			0xfffff800
+#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT		11
+static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_PITCH				0x00002107
+#define A3XX_RB_STENCIL_PITCH__MASK				0xffffffff
+#define A3XX_RB_STENCIL_PITCH__SHIFT				0
+static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val)
+{
+	assert(!(val & 0x7));
+	return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_STENCILREFMASK				0x00002108
+#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_RB_STENCILREFMASK_BF				0x00002109
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_RB_LRZ_VSC_CONTROL				0x0000210c
+#define A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE			0x00000002
+
+#define REG_A3XX_RB_WINDOW_OFFSET				0x0000210e
+#define A3XX_RB_WINDOW_OFFSET_X__MASK				0x0000ffff
+#define A3XX_RB_WINDOW_OFFSET_X__SHIFT				0
+static inline uint32_t A3XX_RB_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A3XX_RB_WINDOW_OFFSET_X__SHIFT) & A3XX_RB_WINDOW_OFFSET_X__MASK;
+}
+#define A3XX_RB_WINDOW_OFFSET_Y__MASK				0xffff0000
+#define A3XX_RB_WINDOW_OFFSET_Y__SHIFT				16
+static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A3XX_RB_WINDOW_OFFSET_Y__SHIFT) & A3XX_RB_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL			0x00002110
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET			0x00000001
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
+
+#define REG_A3XX_RB_SAMPLE_COUNT_ADDR				0x00002111
+
+#define REG_A3XX_RB_Z_CLAMP_MIN					0x00002114
+
+#define REG_A3XX_RB_Z_CLAMP_MAX					0x00002115
+
+#define REG_A3XX_VGT_BIN_BASE					0x000021e1
+
+#define REG_A3XX_VGT_BIN_SIZE					0x000021e2
+
+#define REG_A3XX_PC_VSTREAM_CONTROL				0x000021e4
+#define A3XX_PC_VSTREAM_CONTROL_SIZE__MASK			0x003f0000
+#define A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT			16
+static inline uint32_t A3XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val)
+{
+	return ((val) << A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A3XX_PC_VSTREAM_CONTROL_SIZE__MASK;
+}
+#define A3XX_PC_VSTREAM_CONTROL_N__MASK				0x07c00000
+#define A3XX_PC_VSTREAM_CONTROL_N__SHIFT			22
+static inline uint32_t A3XX_PC_VSTREAM_CONTROL_N(uint32_t val)
+{
+	return ((val) << A3XX_PC_VSTREAM_CONTROL_N__SHIFT) & A3XX_PC_VSTREAM_CONTROL_N__MASK;
+}
+
+#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL			0x000021ea
+
+#define REG_A3XX_PC_PRIM_VTX_CNTL				0x000021ec
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK		0x0000001f
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT		0
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK	0x000000e0
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT	5
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK		0x00000700
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT	8
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE			0x00001000
+#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART			0x00100000
+#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
+#define A3XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
+
+#define REG_A3XX_PC_RESTART_INDEX				0x000021ed
+
+#define REG_A3XX_HLSQ_CONTROL_0_REG				0x00002200
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000030
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE			0x00000100
+#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
+#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK	0x00fff000
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT	12
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX			0x02000000
+#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK			0x08000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT		27
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE		0x10000000
+#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE		0x20000000
+#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE			0x40000000
+#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
+
+#define REG_A3XX_HLSQ_CONTROL_1_REG				0x00002201
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x000000c0
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK		0x00ff0000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT		16
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK		0xff000000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT		24
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONTROL_2_REG				0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK		0x000003fc
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT		2
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK		0x03fc0000
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT		18
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONTROL_3_REG				0x00002203
+#define A3XX_HLSQ_CONTROL_3_REG_REGID__MASK			0x000000ff
+#define A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT			0
+static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A3XX_HLSQ_CONTROL_3_REG_REGID__MASK;
+}
+
+#define REG_A3XX_HLSQ_VS_CONTROL_REG				0x00002204
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_FS_CONTROL_REG				0x00002205
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG			0x00002206
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG			0x00002207
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG				0x0000220a
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK		0x00000003
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT		0
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK		0x00000ffc
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT		2
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK		0x003ff000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT		12
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK		0xffc00000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT		22
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK;
+}
+
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; }
+
+#define REG_A3XX_HLSQ_CL_CONTROL_0_REG				0x00002211
+
+#define REG_A3XX_HLSQ_CL_CONTROL_1_REG				0x00002212
+
+#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG			0x00002214
+
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; }
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG			0x00002216
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG			0x00002217
+
+#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG				0x0000221a
+
+#define REG_A3XX_VFD_CONTROL_0					0x00002240
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x0003ffff
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
+static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK			0x003c0000
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT			18
+static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK		0x07c00000
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT		22
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK		0xf8000000
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT		27
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+
+#define REG_A3XX_VFD_CONTROL_1					0x00002241
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000000f
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK			0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT			4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK			0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT			8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
+#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4INST__MASK			0xff000000
+#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT			24
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+
+#define REG_A3XX_VFD_INDEX_MIN					0x00002242
+
+#define REG_A3XX_VFD_INDEX_MAX					0x00002243
+
+#define REG_A3XX_VFD_INSTANCEID_OFFSET				0x00002244
+
+#define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
+
+#define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
+
+static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK			0x0000ff80
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT			7
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_INSTANCED			0x00010000
+#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT			0x00020000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK			0x00fc0000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT			18
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK			0xff000000
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT			24
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+}
+
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
+static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_CONSTFILL				0x00000010
+#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK			0x00000fc0
+#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT			6
+static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_REGID__MASK			0x000ff000
+#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT			12
+static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_INT				0x00100000
+#define A3XX_VFD_DECODE_INSTR_SWAP__MASK			0x00c00000
+#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT			22
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A3XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK			0x1f000000
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT			24
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID			0x20000000
+#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT			0x40000000
+
+#define REG_A3XX_VFD_VS_THREADING_THRESHOLD			0x0000227e
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK	0x0000000f
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT	0
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
+}
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK	0x0000ff00
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT	8
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
+}
+
+#define REG_A3XX_VPC_ATTR					0x00002280
+#define A3XX_VPC_ATTR_TOTALATTR__MASK				0x000001ff
+#define A3XX_VPC_ATTR_TOTALATTR__SHIFT				0
+static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A3XX_VPC_ATTR_PSIZE					0x00000200
+#define A3XX_VPC_ATTR_THRDASSIGN__MASK				0x0ffff000
+#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT				12
+static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A3XX_VPC_ATTR_LMSIZE__MASK				0xf0000000
+#define A3XX_VPC_ATTR_LMSIZE__SHIFT				28
+static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
+}
+
+#define REG_A3XX_VPC_PACK					0x00002281
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK			0x0000ff00
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT			8
+static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK			0x00ff0000
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT			16
+static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK			0x00000003
+#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT			0
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK			0x0000000c
+#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT			2
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK			0x00000030
+#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT			4
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK			0x000000c0
+#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT			6
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK			0x00000300
+#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT			8
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK			0x00000c00
+#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT			10
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK			0x00003000
+#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT			12
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK			0x0000c000
+#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT			14
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK			0x00030000
+#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT			16
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK			0x000c0000
+#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT			18
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK			0x00300000
+#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT			20
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK			0x00c00000
+#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT			22
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK			0x03000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT			24
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK			0x0c000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT			26
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK			0x30000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT			28
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK			0xc0000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT			30
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK;
+}
+
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK			0x00000003
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT			0
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK			0x0000000c
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT			2
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK			0x00000030
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT			4
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK			0x000000c0
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT			6
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK			0x00000300
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT			8
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK			0x00000c00
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT			10
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK			0x00003000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT			12
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK			0x0000c000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT			14
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK			0x00030000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT			16
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK			0x000c0000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT			18
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK			0x00300000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT			20
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK			0x00c00000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT			22
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK			0x03000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT			24
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK			0x0c000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT			26
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK			0x30000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT			28
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK			0xc0000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT			30
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val)
+{
+	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK;
+}
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0			0x0000228a
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1			0x0000228b
+
+#define REG_A3XX_SP_SP_CTRL_REG					0x000022c0
+#define A3XX_SP_SP_CTRL_REG_RESOLVE				0x00010000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK			0x00040000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT			18
+static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_BINNING				0x00080000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK			0x00300000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT			20
+static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_L0MODE__MASK			0x00c00000
+#define A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT			22
+static inline uint32_t A3XX_SP_SP_CTRL_REG_L0MODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT) & A3XX_SP_SP_CTRL_REG_L0MODE__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG0				0x000022c4
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE				0x00000008
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK			0xff000000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT			24
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG1				0x000022c5
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x7f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+
+#define REG_A3XX_SP_VS_PARAM_REG				0x000022c6
+#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK			0x0000ff00
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT			8
+static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_POS2DMODE				0x00010000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0x01f00000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
+static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_A_HALF				0x00000100
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_HALF				0x01000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x0000007f
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x00007f00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x007f0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0x7f000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_OFFSET_REG				0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_START_REG				0x000022d5
+
+#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG			0x000022d6
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
+
+#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG				0x000022d7
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
+
+#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG				0x000022d8
+
+#define REG_A3XX_SP_VS_LENGTH_REG				0x000022df
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT		0
+static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG0				0x000022e0
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE				0x00000008
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE			0x00020000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP			0x00040000
+#define A3XX_SP_FS_CTRL_REG0_OUTORDERED				0x00080000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00400000
+#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE			0x00800000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK			0xff000000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT			24
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG1				0x000022e1
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x00f00000
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		20
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK		0x7f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT		24
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_OFFSET_REG				0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_START_REG				0x000022e3
+
+#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG			0x000022e4
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
+
+#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG				0x000022e5
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
+
+#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG				0x000022e6
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0			0x000022e8
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1			0x000022e9
+
+#define REG_A3XX_SP_FS_OUTPUT_REG				0x000022ec
+#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK				0x00000003
+#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT			0
+static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE			0x00000080
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK			0x0000ff00
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT		8
+static inline uint32_t A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
+}
+
+static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+#define A3XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
+#define A3XX_SP_FS_MRT_REG_REGID__SHIFT				0
+static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A3XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
+#define A3XX_SP_FS_MRT_REG_SINT					0x00000400
+#define A3XX_SP_FS_MRT_REG_UINT					0x00000800
+
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK		0x0000003f
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT		0
+static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
+}
+
+#define REG_A3XX_SP_FS_LENGTH_REG				0x000022ff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT		0
+static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_PA_SC_AA_CONFIG				0x00002301
+
+#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET				0x00002340
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR		0x00002341
+
+#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET				0x00002342
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x00002343
+
+#define REG_A3XX_VBIF_CLKON					0x00003001
+
+#define REG_A3XX_VBIF_FIXED_SORT_EN				0x0000300c
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL0				0x0000300d
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL1				0x0000300e
+
+#define REG_A3XX_VBIF_ABIT_SORT					0x0000301c
+
+#define REG_A3XX_VBIF_ABIT_SORT_CONF				0x0000301d
+
+#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF0				0x00003030
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF1				0x00003031
+
+#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0				0x00003034
+
+#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0				0x00003035
+
+#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST				0x00003036
+
+#define REG_A3XX_VBIF_ARB_CTL					0x0000303c
+
+#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
+
+#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0			0x00003058
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN				0x0000305e
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO				0x0000305f
+
+#define REG_A3XX_VBIF_PERF_CNT_EN				0x00003070
+#define A3XX_VBIF_PERF_CNT_EN_CNT0				0x00000001
+#define A3XX_VBIF_PERF_CNT_EN_CNT1				0x00000002
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0				0x00000004
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1				0x00000008
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2				0x00000010
+
+#define REG_A3XX_VBIF_PERF_CNT_CLR				0x00003071
+#define A3XX_VBIF_PERF_CNT_CLR_CNT0				0x00000001
+#define A3XX_VBIF_PERF_CNT_CLR_CNT1				0x00000002
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0				0x00000004
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1				0x00000008
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2				0x00000010
+
+#define REG_A3XX_VBIF_PERF_CNT_SEL				0x00003072
+
+#define REG_A3XX_VBIF_PERF_CNT0_LO				0x00003073
+
+#define REG_A3XX_VBIF_PERF_CNT0_HI				0x00003074
+
+#define REG_A3XX_VBIF_PERF_CNT1_LO				0x00003075
+
+#define REG_A3XX_VBIF_PERF_CNT1_HI				0x00003076
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO				0x00003077
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI				0x00003078
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO				0x00003079
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI				0x0000307a
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO				0x0000307b
+
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI				0x0000307c
+
+#define REG_A3XX_VSC_BIN_SIZE					0x00000c01
+#define A3XX_VSC_BIN_SIZE_WIDTH__MASK				0x0000001f
+#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
+static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK				0x000003e0
+#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT				5
+static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_VSC_SIZE_ADDRESS				0x00000c02
+
+static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+#define A3XX_VSC_PIPE_CONFIG_X__MASK				0x000003ff
+#define A3XX_VSC_PIPE_CONFIG_X__SHIFT				0
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_Y__MASK				0x000ffc00
+#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT				10
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_W__MASK				0x00f00000
+#define A3XX_VSC_PIPE_CONFIG_W__SHIFT				20
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_H__MASK				0x0f000000
+#define A3XX_VSC_PIPE_CONFIG_H__SHIFT				24
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+}
+
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+
+#define REG_A3XX_VSC_BIN_CONTROL				0x00000c3c
+#define A3XX_VSC_BIN_CONTROL_BINNING_ENABLE			0x00000001
+
+#define REG_A3XX_UNKNOWN_0C3D					0x00000c3d
+
+#define REG_A3XX_PC_PERFCOUNTER0_SELECT				0x00000c48
+
+#define REG_A3XX_PC_PERFCOUNTER1_SELECT				0x00000c49
+
+#define REG_A3XX_PC_PERFCOUNTER2_SELECT				0x00000c4a
+
+#define REG_A3XX_PC_PERFCOUNTER3_SELECT				0x00000c4b
+
+#define REG_A3XX_GRAS_TSE_DEBUG_ECO				0x00000c81
+
+#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT			0x00000c88
+
+#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT			0x00000c89
+
+#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT			0x00000c8a
+
+#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT			0x00000c8b
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
+
+#define REG_A3XX_RB_GMEM_BASE_ADDR				0x00000cc0
+
+#define REG_A3XX_RB_DEBUG_ECO_CONTROLS_ADDR			0x00000cc1
+
+#define REG_A3XX_RB_PERFCOUNTER0_SELECT				0x00000cc6
+
+#define REG_A3XX_RB_PERFCOUNTER1_SELECT				0x00000cc7
+
+#define REG_A3XX_RB_FRAME_BUFFER_DIMENSION			0x00000ce0
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK		0x00003fff
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT		0
+static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
+{
+	return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
+}
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK		0x0fffc000
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT		14
+static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
+{
+	return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
+}
+
+#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT			0x00000e00
+
+#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT			0x00000e01
+
+#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT			0x00000e02
+
+#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT			0x00000e03
+
+#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT			0x00000e04
+
+#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT			0x00000e05
+
+#define REG_A3XX_UNKNOWN_0E43					0x00000e43
+
+#define REG_A3XX_VFD_PERFCOUNTER0_SELECT			0x00000e44
+
+#define REG_A3XX_VFD_PERFCOUNTER1_SELECT			0x00000e45
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL				0x00000e61
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ				0x00000e62
+
+#define REG_A3XX_VPC_PERFCOUNTER0_SELECT			0x00000e64
+
+#define REG_A3XX_VPC_PERFCOUNTER1_SELECT			0x00000e65
+
+#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG			0x00000e82
+
+#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT			0x00000e84
+
+#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT			0x00000e85
+
+#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT			0x00000e86
+
+#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT			0x00000e87
+
+#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT			0x00000e88
+
+#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT			0x00000e89
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG			0x00000ea0
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK		0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT		0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
+}
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG			0x00000ea1
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK		0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT		0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK		0x30000000
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT		28
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE		0x80000000
+
+#define REG_A3XX_UNKNOWN_0EA6					0x00000ea6
+
+#define REG_A3XX_SP_PERFCOUNTER0_SELECT				0x00000ec4
+
+#define REG_A3XX_SP_PERFCOUNTER1_SELECT				0x00000ec5
+
+#define REG_A3XX_SP_PERFCOUNTER2_SELECT				0x00000ec6
+
+#define REG_A3XX_SP_PERFCOUNTER3_SELECT				0x00000ec7
+
+#define REG_A3XX_SP_PERFCOUNTER4_SELECT				0x00000ec8
+
+#define REG_A3XX_SP_PERFCOUNTER5_SELECT				0x00000ec9
+
+#define REG_A3XX_SP_PERFCOUNTER6_SELECT				0x00000eca
+
+#define REG_A3XX_SP_PERFCOUNTER7_SELECT				0x00000ecb
+
+#define REG_A3XX_UNKNOWN_0EE0					0x00000ee0
+
+#define REG_A3XX_UNKNOWN_0F03					0x00000f03
+
+#define REG_A3XX_TP_PERFCOUNTER0_SELECT				0x00000f04
+
+#define REG_A3XX_TP_PERFCOUNTER1_SELECT				0x00000f05
+
+#define REG_A3XX_TP_PERFCOUNTER2_SELECT				0x00000f06
+
+#define REG_A3XX_TP_PERFCOUNTER3_SELECT				0x00000f07
+
+#define REG_A3XX_TP_PERFCOUNTER4_SELECT				0x00000f08
+
+#define REG_A3XX_TP_PERFCOUNTER5_SELECT				0x00000f09
+
+#define REG_A3XX_VGT_CL_INITIATOR				0x000021f0
+
+#define REG_A3XX_VGT_EVENT_INITIATOR				0x000021f9
+
+#define REG_A3XX_VGT_DRAW_INITIATOR				0x000021fc
+#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK			0x0000003f
+#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT		0
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK		0x000000c0
+#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT		6
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK			0x00000600
+#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT			9
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK		0x00000800
+#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT		11
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
+{
+	return ((val) << A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_NOT_EOP				0x00001000
+#define A3XX_VGT_DRAW_INITIATOR_SMALL_INDEX			0x00002000
+#define A3XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE	0x00004000
+#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK		0xff000000
+#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT		24
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
+{
+	return ((val) << A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
+}
+
+#define REG_A3XX_VGT_IMMED_DATA					0x000021fd
+
+#define REG_A3XX_TEX_SAMP_0					0x00000000
+#define A3XX_TEX_SAMP_0_CLAMPENABLE				0x00000001
+#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR			0x00000002
+#define A3XX_TEX_SAMP_0_XY_MAG__MASK				0x0000000c
+#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT				2
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A3XX_TEX_SAMP_0_XY_MIN__MASK				0x00000030
+#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT				4
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_S__MASK				0x000001c0
+#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT				6
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_T__MASK				0x00000e00
+#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT				9
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_R__MASK				0x00007000
+#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT				12
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A3XX_TEX_SAMP_0_ANISO__MASK				0x00038000
+#define A3XX_TEX_SAMP_0_ANISO__SHIFT				15
+static inline uint32_t A3XX_TEX_SAMP_0_ANISO(enum a3xx_tex_aniso val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_ANISO__SHIFT) & A3XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK			0x00700000
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT			20
+static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
+}
+#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF			0x01000000
+#define A3XX_TEX_SAMP_0_UNNORM_COORDS				0x80000000
+
+#define REG_A3XX_TEX_SAMP_1					0x00000001
+#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK				0x000007ff
+#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT				0
+static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK;
+}
+#define A3XX_TEX_SAMP_1_MAX_LOD__MASK				0x003ff000
+#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT				12
+static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+	return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A3XX_TEX_SAMP_1_MIN_LOD__MASK				0xffc00000
+#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT				22
+static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+	return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_0					0x00000000
+#define A3XX_TEX_CONST_0_TILED					0x00000001
+#define A3XX_TEX_CONST_0_SRGB					0x00000004
+#define A3XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
+#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT				4
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
+#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
+#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
+#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT				13
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A3XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
+#define A3XX_TEX_CONST_0_MIPLVLS__SHIFT				16
+static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A3XX_TEX_CONST_0_MSAATEX__MASK				0x00300000
+#define A3XX_TEX_CONST_0_MSAATEX__SHIFT				20
+static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val)
+{
+	return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK;
+}
+#define A3XX_TEX_CONST_0_FMT__MASK				0x1fc00000
+#define A3XX_TEX_CONST_0_FMT__SHIFT				22
+static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+{
+	return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+}
+#define A3XX_TEX_CONST_0_NOCONVERT				0x20000000
+#define A3XX_TEX_CONST_0_TYPE__MASK				0xc0000000
+#define A3XX_TEX_CONST_0_TYPE__SHIFT				30
+static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
+{
+	return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_1					0x00000001
+#define A3XX_TEX_CONST_1_HEIGHT__MASK				0x00003fff
+#define A3XX_TEX_CONST_1_HEIGHT__SHIFT				0
+static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A3XX_TEX_CONST_1_WIDTH__MASK				0x0fffc000
+#define A3XX_TEX_CONST_1_WIDTH__SHIFT				14
+static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A3XX_TEX_CONST_1_FETCHSIZE__MASK			0xf0000000
+#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT			28
+static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
+{
+	return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_2					0x00000002
+#define A3XX_TEX_CONST_2_INDX__MASK				0x000001ff
+#define A3XX_TEX_CONST_2_INDX__SHIFT				0
+static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
+}
+#define A3XX_TEX_CONST_2_PITCH__MASK				0x3ffff000
+#define A3XX_TEX_CONST_2_PITCH__SHIFT				12
+static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A3XX_TEX_CONST_2_SWAP__MASK				0xc0000000
+#define A3XX_TEX_CONST_2_SWAP__SHIFT				30
+static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_3					0x00000003
+#define A3XX_TEX_CONST_3_LAYERSZ1__MASK				0x0001ffff
+#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT			0
+static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK;
+}
+#define A3XX_TEX_CONST_3_DEPTH__MASK				0x0ffe0000
+#define A3XX_TEX_CONST_3_DEPTH__SHIFT				17
+static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK;
+}
+#define A3XX_TEX_CONST_3_LAYERSZ2__MASK				0xf0000000
+#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT			28
+static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK;
+}
+
+
+#endif /* A3XX_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/a4xx.xml.h mesa-19.0.1/src/freedreno/registers/a4xx.xml.h
--- mesa-18.3.3/src/freedreno/registers/a4xx.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/a4xx.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,4257 @@
+#ifndef A4XX_XML
+#define A4XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a4xx_color_fmt {
+	RB4_A8_UNORM = 1,
+	RB4_R8_UNORM = 2,
+	RB4_R8_SNORM = 3,
+	RB4_R8_UINT = 4,
+	RB4_R8_SINT = 5,
+	RB4_R4G4B4A4_UNORM = 8,
+	RB4_R5G5B5A1_UNORM = 10,
+	RB4_R5G6B5_UNORM = 14,
+	RB4_R8G8_UNORM = 15,
+	RB4_R8G8_SNORM = 16,
+	RB4_R8G8_UINT = 17,
+	RB4_R8G8_SINT = 18,
+	RB4_R16_UNORM = 19,
+	RB4_R16_SNORM = 20,
+	RB4_R16_FLOAT = 21,
+	RB4_R16_UINT = 22,
+	RB4_R16_SINT = 23,
+	RB4_R8G8B8_UNORM = 25,
+	RB4_R8G8B8A8_UNORM = 26,
+	RB4_R8G8B8A8_SNORM = 28,
+	RB4_R8G8B8A8_UINT = 29,
+	RB4_R8G8B8A8_SINT = 30,
+	RB4_R10G10B10A2_UNORM = 31,
+	RB4_R10G10B10A2_UINT = 34,
+	RB4_R11G11B10_FLOAT = 39,
+	RB4_R16G16_UNORM = 40,
+	RB4_R16G16_SNORM = 41,
+	RB4_R16G16_FLOAT = 42,
+	RB4_R16G16_UINT = 43,
+	RB4_R16G16_SINT = 44,
+	RB4_R32_FLOAT = 45,
+	RB4_R32_UINT = 46,
+	RB4_R32_SINT = 47,
+	RB4_R16G16B16A16_UNORM = 52,
+	RB4_R16G16B16A16_SNORM = 53,
+	RB4_R16G16B16A16_FLOAT = 54,
+	RB4_R16G16B16A16_UINT = 55,
+	RB4_R16G16B16A16_SINT = 56,
+	RB4_R32G32_FLOAT = 57,
+	RB4_R32G32_UINT = 58,
+	RB4_R32G32_SINT = 59,
+	RB4_R32G32B32A32_FLOAT = 60,
+	RB4_R32G32B32A32_UINT = 61,
+	RB4_R32G32B32A32_SINT = 62,
+};
+
+enum a4xx_tile_mode {
+	TILE4_LINEAR = 0,
+	TILE4_2 = 2,
+	TILE4_3 = 3,
+};
+
+enum a4xx_vtx_fmt {
+	VFMT4_32_FLOAT = 1,
+	VFMT4_32_32_FLOAT = 2,
+	VFMT4_32_32_32_FLOAT = 3,
+	VFMT4_32_32_32_32_FLOAT = 4,
+	VFMT4_16_FLOAT = 5,
+	VFMT4_16_16_FLOAT = 6,
+	VFMT4_16_16_16_FLOAT = 7,
+	VFMT4_16_16_16_16_FLOAT = 8,
+	VFMT4_32_FIXED = 9,
+	VFMT4_32_32_FIXED = 10,
+	VFMT4_32_32_32_FIXED = 11,
+	VFMT4_32_32_32_32_FIXED = 12,
+	VFMT4_11_11_10_FLOAT = 13,
+	VFMT4_16_SINT = 16,
+	VFMT4_16_16_SINT = 17,
+	VFMT4_16_16_16_SINT = 18,
+	VFMT4_16_16_16_16_SINT = 19,
+	VFMT4_16_UINT = 20,
+	VFMT4_16_16_UINT = 21,
+	VFMT4_16_16_16_UINT = 22,
+	VFMT4_16_16_16_16_UINT = 23,
+	VFMT4_16_SNORM = 24,
+	VFMT4_16_16_SNORM = 25,
+	VFMT4_16_16_16_SNORM = 26,
+	VFMT4_16_16_16_16_SNORM = 27,
+	VFMT4_16_UNORM = 28,
+	VFMT4_16_16_UNORM = 29,
+	VFMT4_16_16_16_UNORM = 30,
+	VFMT4_16_16_16_16_UNORM = 31,
+	VFMT4_32_UINT = 32,
+	VFMT4_32_32_UINT = 33,
+	VFMT4_32_32_32_UINT = 34,
+	VFMT4_32_32_32_32_UINT = 35,
+	VFMT4_32_SINT = 36,
+	VFMT4_32_32_SINT = 37,
+	VFMT4_32_32_32_SINT = 38,
+	VFMT4_32_32_32_32_SINT = 39,
+	VFMT4_8_UINT = 40,
+	VFMT4_8_8_UINT = 41,
+	VFMT4_8_8_8_UINT = 42,
+	VFMT4_8_8_8_8_UINT = 43,
+	VFMT4_8_UNORM = 44,
+	VFMT4_8_8_UNORM = 45,
+	VFMT4_8_8_8_UNORM = 46,
+	VFMT4_8_8_8_8_UNORM = 47,
+	VFMT4_8_SINT = 48,
+	VFMT4_8_8_SINT = 49,
+	VFMT4_8_8_8_SINT = 50,
+	VFMT4_8_8_8_8_SINT = 51,
+	VFMT4_8_SNORM = 52,
+	VFMT4_8_8_SNORM = 53,
+	VFMT4_8_8_8_SNORM = 54,
+	VFMT4_8_8_8_8_SNORM = 55,
+	VFMT4_10_10_10_2_UINT = 56,
+	VFMT4_10_10_10_2_UNORM = 57,
+	VFMT4_10_10_10_2_SINT = 58,
+	VFMT4_10_10_10_2_SNORM = 59,
+	VFMT4_2_10_10_10_UINT = 60,
+	VFMT4_2_10_10_10_UNORM = 61,
+	VFMT4_2_10_10_10_SINT = 62,
+	VFMT4_2_10_10_10_SNORM = 63,
+};
+
+enum a4xx_tex_fmt {
+	TFMT4_A8_UNORM = 3,
+	TFMT4_8_UNORM = 4,
+	TFMT4_8_SNORM = 5,
+	TFMT4_8_UINT = 6,
+	TFMT4_8_SINT = 7,
+	TFMT4_4_4_4_4_UNORM = 8,
+	TFMT4_5_5_5_1_UNORM = 9,
+	TFMT4_5_6_5_UNORM = 11,
+	TFMT4_L8_A8_UNORM = 13,
+	TFMT4_8_8_UNORM = 14,
+	TFMT4_8_8_SNORM = 15,
+	TFMT4_8_8_UINT = 16,
+	TFMT4_8_8_SINT = 17,
+	TFMT4_16_UNORM = 18,
+	TFMT4_16_SNORM = 19,
+	TFMT4_16_FLOAT = 20,
+	TFMT4_16_UINT = 21,
+	TFMT4_16_SINT = 22,
+	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_8_8_8_SINT = 31,
+	TFMT4_9_9_9_E5_FLOAT = 32,
+	TFMT4_10_10_10_2_UNORM = 33,
+	TFMT4_10_10_10_2_UINT = 34,
+	TFMT4_11_11_10_FLOAT = 37,
+	TFMT4_16_16_UNORM = 38,
+	TFMT4_16_16_SNORM = 39,
+	TFMT4_16_16_FLOAT = 40,
+	TFMT4_16_16_UINT = 41,
+	TFMT4_16_16_SINT = 42,
+	TFMT4_32_FLOAT = 43,
+	TFMT4_32_UINT = 44,
+	TFMT4_32_SINT = 45,
+	TFMT4_16_16_16_16_UNORM = 51,
+	TFMT4_16_16_16_16_SNORM = 52,
+	TFMT4_16_16_16_16_FLOAT = 53,
+	TFMT4_16_16_16_16_UINT = 54,
+	TFMT4_16_16_16_16_SINT = 55,
+	TFMT4_32_32_FLOAT = 56,
+	TFMT4_32_32_UINT = 57,
+	TFMT4_32_32_SINT = 58,
+	TFMT4_32_32_32_FLOAT = 59,
+	TFMT4_32_32_32_UINT = 60,
+	TFMT4_32_32_32_SINT = 61,
+	TFMT4_32_32_32_32_FLOAT = 63,
+	TFMT4_32_32_32_32_UINT = 64,
+	TFMT4_32_32_32_32_SINT = 65,
+	TFMT4_X8Z24_UNORM = 71,
+	TFMT4_DXT1 = 86,
+	TFMT4_DXT3 = 87,
+	TFMT4_DXT5 = 88,
+	TFMT4_RGTC1_UNORM = 90,
+	TFMT4_RGTC1_SNORM = 91,
+	TFMT4_RGTC2_UNORM = 94,
+	TFMT4_RGTC2_SNORM = 95,
+	TFMT4_BPTC_UFLOAT = 97,
+	TFMT4_BPTC_FLOAT = 98,
+	TFMT4_BPTC = 99,
+	TFMT4_ATC_RGB = 100,
+	TFMT4_ATC_RGBA_EXPLICIT = 101,
+	TFMT4_ATC_RGBA_INTERPOLATED = 102,
+	TFMT4_ETC2_RG11_UNORM = 103,
+	TFMT4_ETC2_RG11_SNORM = 104,
+	TFMT4_ETC2_R11_UNORM = 105,
+	TFMT4_ETC2_R11_SNORM = 106,
+	TFMT4_ETC1 = 107,
+	TFMT4_ETC2_RGB8 = 108,
+	TFMT4_ETC2_RGBA8 = 109,
+	TFMT4_ETC2_RGB8A1 = 110,
+	TFMT4_ASTC_4x4 = 111,
+	TFMT4_ASTC_5x4 = 112,
+	TFMT4_ASTC_5x5 = 113,
+	TFMT4_ASTC_6x5 = 114,
+	TFMT4_ASTC_6x6 = 115,
+	TFMT4_ASTC_8x5 = 116,
+	TFMT4_ASTC_8x6 = 117,
+	TFMT4_ASTC_8x8 = 118,
+	TFMT4_ASTC_10x5 = 119,
+	TFMT4_ASTC_10x6 = 120,
+	TFMT4_ASTC_10x8 = 121,
+	TFMT4_ASTC_10x10 = 122,
+	TFMT4_ASTC_12x10 = 123,
+	TFMT4_ASTC_12x12 = 124,
+};
+
+enum a4xx_tex_fetchsize {
+	TFETCH4_1_BYTE = 0,
+	TFETCH4_2_BYTE = 1,
+	TFETCH4_4_BYTE = 2,
+	TFETCH4_8_BYTE = 3,
+	TFETCH4_16_BYTE = 4,
+};
+
+enum a4xx_depth_format {
+	DEPTH4_NONE = 0,
+	DEPTH4_16 = 1,
+	DEPTH4_24_8 = 2,
+	DEPTH4_32 = 3,
+};
+
+enum a4xx_ccu_perfcounter_select {
+	CCU_BUSY_CYCLES = 0,
+	CCU_RB_DEPTH_RETURN_STALL = 2,
+	CCU_RB_COLOR_RETURN_STALL = 3,
+	CCU_DEPTH_BLOCKS = 6,
+	CCU_COLOR_BLOCKS = 7,
+	CCU_DEPTH_BLOCK_HIT = 8,
+	CCU_COLOR_BLOCK_HIT = 9,
+	CCU_DEPTH_FLAG1_COUNT = 10,
+	CCU_DEPTH_FLAG2_COUNT = 11,
+	CCU_DEPTH_FLAG3_COUNT = 12,
+	CCU_DEPTH_FLAG4_COUNT = 13,
+	CCU_COLOR_FLAG1_COUNT = 14,
+	CCU_COLOR_FLAG2_COUNT = 15,
+	CCU_COLOR_FLAG3_COUNT = 16,
+	CCU_COLOR_FLAG4_COUNT = 17,
+	CCU_PARTIAL_BLOCK_READ = 18,
+};
+
+enum a4xx_cp_perfcounter_select {
+	CP_ALWAYS_COUNT = 0,
+	CP_BUSY = 1,
+	CP_PFP_IDLE = 2,
+	CP_PFP_BUSY_WORKING = 3,
+	CP_PFP_STALL_CYCLES_ANY = 4,
+	CP_PFP_STARVE_CYCLES_ANY = 5,
+	CP_PFP_STARVED_PER_LOAD_ADDR = 6,
+	CP_PFP_STALLED_PER_STORE_ADDR = 7,
+	CP_PFP_PC_PROFILE = 8,
+	CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+	CP_PFP_COND_INDIRECT_DISCARDED = 10,
+	CP_LONG_RESUMPTIONS = 11,
+	CP_RESUME_CYCLES = 12,
+	CP_RESUME_TO_BOUNDARY_CYCLES = 13,
+	CP_LONG_PREEMPTIONS = 14,
+	CP_PREEMPT_CYCLES = 15,
+	CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
+	CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
+	CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
+	CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
+	CP_ME_FIFO_FULL_ME_BUSY = 20,
+	CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
+	CP_ME_WAITING_FOR_PACKETS = 22,
+	CP_ME_BUSY_WORKING = 23,
+	CP_ME_STARVE_CYCLES_ANY = 24,
+	CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
+	CP_ME_STALL_CYCLES_PER_PROFILE = 26,
+	CP_ME_PC_PROFILE = 27,
+	CP_RCIU_FIFO_EMPTY = 28,
+	CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
+	CP_RCIU_FIFO_FULL = 30,
+	CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
+	CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
+	CP_RCIU_FIFO_FULL_OTHER = 33,
+	CP_AHB_IDLE = 34,
+	CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
+	CP_AHB_STALL_ON_GRANT_SPLIT = 36,
+	CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
+	CP_AHB_BUSY_WORKING = 38,
+	CP_AHB_BUSY_STALL_ON_HRDY = 39,
+	CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
+};
+
+enum a4xx_gras_ras_perfcounter_select {
+	RAS_SUPER_TILES = 0,
+	RAS_8X8_TILES = 1,
+	RAS_4X4_TILES = 2,
+	RAS_BUSY_CYCLES = 3,
+	RAS_STALL_CYCLES_BY_RB = 4,
+	RAS_STALL_CYCLES_BY_VSC = 5,
+	RAS_STARVE_CYCLES_BY_TSE = 6,
+	RAS_SUPERTILE_CYCLES = 7,
+	RAS_TILE_CYCLES = 8,
+	RAS_FULLY_COVERED_SUPER_TILES = 9,
+	RAS_FULLY_COVERED_8X8_TILES = 10,
+	RAS_4X4_PRIM = 11,
+	RAS_8X4_4X8_PRIM = 12,
+	RAS_8X8_PRIM = 13,
+};
+
+enum a4xx_gras_tse_perfcounter_select {
+	TSE_INPUT_PRIM = 0,
+	TSE_INPUT_NULL_PRIM = 1,
+	TSE_TRIVAL_REJ_PRIM = 2,
+	TSE_CLIPPED_PRIM = 3,
+	TSE_NEW_PRIM = 4,
+	TSE_ZERO_AREA_PRIM = 5,
+	TSE_FACENESS_CULLED_PRIM = 6,
+	TSE_ZERO_PIXEL_PRIM = 7,
+	TSE_OUTPUT_NULL_PRIM = 8,
+	TSE_OUTPUT_VISIBLE_PRIM = 9,
+	TSE_PRE_CLIP_PRIM = 10,
+	TSE_POST_CLIP_PRIM = 11,
+	TSE_BUSY_CYCLES = 12,
+	TSE_PC_STARVE = 13,
+	TSE_RAS_STALL = 14,
+	TSE_STALL_BARYPLANE_FIFO_FULL = 15,
+	TSE_STALL_ZPLANE_FIFO_FULL = 16,
+};
+
+enum a4xx_hlsq_perfcounter_select {
+	HLSQ_SP_VS_STAGE_CONSTANT = 0,
+	HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
+	HLSQ_SP_FS_STAGE_CONSTANT = 2,
+	HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
+	HLSQ_TP_STATE = 4,
+	HLSQ_QUADS = 5,
+	HLSQ_PIXELS = 6,
+	HLSQ_VERTICES = 7,
+	HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
+	HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
+	HLSQ_BUSY_CYCLES = 15,
+	HLSQ_STALL_CYCLES_SP_STATE = 16,
+	HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
+	HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
+	HLSQ_STALL_CYCLES_UCHE = 19,
+	HLSQ_RBBM_LOAD_CYCLES = 20,
+	HLSQ_DI_TO_VS_START_SP = 21,
+	HLSQ_DI_TO_FS_START_SP = 22,
+	HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
+	HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
+	HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
+	HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
+	HLSQ_UCHE_LATENCY_CYCLES = 27,
+	HLSQ_UCHE_LATENCY_COUNT = 28,
+	HLSQ_STARVE_CYCLES_VFD = 29,
+};
+
+enum a4xx_pc_perfcounter_select {
+	PC_VIS_STREAMS_LOADED = 0,
+	PC_VPC_PRIMITIVES = 2,
+	PC_DEAD_PRIM = 3,
+	PC_LIVE_PRIM = 4,
+	PC_DEAD_DRAWCALLS = 5,
+	PC_LIVE_DRAWCALLS = 6,
+	PC_VERTEX_MISSES = 7,
+	PC_STALL_CYCLES_VFD = 9,
+	PC_STALL_CYCLES_TSE = 10,
+	PC_STALL_CYCLES_UCHE = 11,
+	PC_WORKING_CYCLES = 12,
+	PC_IA_VERTICES = 13,
+	PC_GS_PRIMITIVES = 14,
+	PC_HS_INVOCATIONS = 15,
+	PC_DS_INVOCATIONS = 16,
+	PC_DS_PRIMITIVES = 17,
+	PC_STARVE_CYCLES_FOR_INDEX = 20,
+	PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
+	PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
+	PC_STALL_CYCLES_TESS = 23,
+	PC_STARVE_CYCLES_FOR_POSITION = 24,
+	PC_MODE0_DRAWCALL = 25,
+	PC_MODE1_DRAWCALL = 26,
+	PC_MODE2_DRAWCALL = 27,
+	PC_MODE3_DRAWCALL = 28,
+	PC_MODE4_DRAWCALL = 29,
+	PC_PREDICATED_DEAD_DRAWCALL = 30,
+	PC_STALL_CYCLES_BY_TSE_ONLY = 31,
+	PC_STALL_CYCLES_BY_VPC_ONLY = 32,
+	PC_VPC_POS_DATA_TRANSACTION = 33,
+	PC_BUSY_CYCLES = 34,
+	PC_STARVE_CYCLES_DI = 35,
+	PC_STALL_CYCLES_VPC = 36,
+	TESS_WORKING_CYCLES = 37,
+	TESS_NUM_CYCLES_SETUP_WORKING = 38,
+	TESS_NUM_CYCLES_PTGEN_WORKING = 39,
+	TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
+	TESS_BUSY_CYCLES = 41,
+	TESS_STARVE_CYCLES_PC = 42,
+	TESS_STALL_CYCLES_PC = 43,
+};
+
+enum a4xx_pwr_perfcounter_select {
+	PWR_CORE_CLOCK_CYCLES = 0,
+	PWR_BUSY_CLOCK_CYCLES = 1,
+};
+
+enum a4xx_rb_perfcounter_select {
+	RB_BUSY_CYCLES = 0,
+	RB_BUSY_CYCLES_BINNING = 1,
+	RB_BUSY_CYCLES_RENDERING = 2,
+	RB_BUSY_CYCLES_RESOLVE = 3,
+	RB_STARVE_CYCLES_BY_SP = 4,
+	RB_STARVE_CYCLES_BY_RAS = 5,
+	RB_STARVE_CYCLES_BY_MARB = 6,
+	RB_STALL_CYCLES_BY_MARB = 7,
+	RB_STALL_CYCLES_BY_HLSQ = 8,
+	RB_RB_RB_MARB_DATA = 9,
+	RB_SP_RB_QUAD = 10,
+	RB_RAS_RB_Z_QUADS = 11,
+	RB_GMEM_CH0_READ = 12,
+	RB_GMEM_CH1_READ = 13,
+	RB_GMEM_CH0_WRITE = 14,
+	RB_GMEM_CH1_WRITE = 15,
+	RB_CP_CONTEXT_DONE = 16,
+	RB_CP_CACHE_FLUSH = 17,
+	RB_CP_ZPASS_DONE = 18,
+	RB_STALL_FIFO0_FULL = 19,
+	RB_STALL_FIFO1_FULL = 20,
+	RB_STALL_FIFO2_FULL = 21,
+	RB_STALL_FIFO3_FULL = 22,
+	RB_RB_HLSQ_TRANSACTIONS = 23,
+	RB_Z_READ = 24,
+	RB_Z_WRITE = 25,
+	RB_C_READ = 26,
+	RB_C_WRITE = 27,
+	RB_C_READ_LATENCY = 28,
+	RB_Z_READ_LATENCY = 29,
+	RB_STALL_BY_UCHE = 30,
+	RB_MARB_UCHE_TRANSACTIONS = 31,
+	RB_CACHE_STALL_MISS = 32,
+	RB_CACHE_STALL_FIFO_FULL = 33,
+	RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
+	RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
+	RB_SAMPLER_UNITS_ACTIVE = 36,
+	RB_TOTAL_PASS = 38,
+	RB_Z_PASS = 39,
+	RB_Z_FAIL = 40,
+	RB_S_FAIL = 41,
+	RB_POWER0 = 42,
+	RB_POWER1 = 43,
+	RB_POWER2 = 44,
+	RB_POWER3 = 45,
+	RB_POWER4 = 46,
+	RB_POWER5 = 47,
+	RB_POWER6 = 48,
+	RB_POWER7 = 49,
+};
+
+enum a4xx_rbbm_perfcounter_select {
+	RBBM_ALWAYS_ON = 0,
+	RBBM_VBIF_BUSY = 1,
+	RBBM_TSE_BUSY = 2,
+	RBBM_RAS_BUSY = 3,
+	RBBM_PC_DCALL_BUSY = 4,
+	RBBM_PC_VSD_BUSY = 5,
+	RBBM_VFD_BUSY = 6,
+	RBBM_VPC_BUSY = 7,
+	RBBM_UCHE_BUSY = 8,
+	RBBM_VSC_BUSY = 9,
+	RBBM_HLSQ_BUSY = 10,
+	RBBM_ANY_RB_BUSY = 11,
+	RBBM_ANY_TPL1_BUSY = 12,
+	RBBM_ANY_SP_BUSY = 13,
+	RBBM_ANY_MARB_BUSY = 14,
+	RBBM_ANY_ARB_BUSY = 15,
+	RBBM_AHB_STATUS_BUSY = 16,
+	RBBM_AHB_STATUS_STALLED = 17,
+	RBBM_AHB_STATUS_TXFR = 18,
+	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+	RBBM_AHB_STATUS_TXFR_ERROR = 20,
+	RBBM_AHB_STATUS_LONG_STALL = 21,
+	RBBM_STATUS_MASKED = 22,
+	RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
+	RBBM_TESS_BUSY = 24,
+	RBBM_COM_BUSY = 25,
+	RBBM_DCOM_BUSY = 32,
+	RBBM_ANY_CCU_BUSY = 33,
+	RBBM_DPM_BUSY = 34,
+};
+
+enum a4xx_sp_perfcounter_select {
+	SP_LM_LOAD_INSTRUCTIONS = 0,
+	SP_LM_STORE_INSTRUCTIONS = 1,
+	SP_LM_ATOMICS = 2,
+	SP_GM_LOAD_INSTRUCTIONS = 3,
+	SP_GM_STORE_INSTRUCTIONS = 4,
+	SP_GM_ATOMICS = 5,
+	SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
+	SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
+	SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
+	SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
+	SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
+	SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
+	SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
+	SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
+	SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
+	SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
+	SP_VS_INSTRUCTIONS = 17,
+	SP_FS_INSTRUCTIONS = 18,
+	SP_ADDR_LOCK_COUNT = 19,
+	SP_UCHE_READ_TRANS = 20,
+	SP_UCHE_WRITE_TRANS = 21,
+	SP_EXPORT_VPC_TRANS = 22,
+	SP_EXPORT_RB_TRANS = 23,
+	SP_PIXELS_KILLED = 24,
+	SP_ICL1_REQUESTS = 25,
+	SP_ICL1_MISSES = 26,
+	SP_ICL0_REQUESTS = 27,
+	SP_ICL0_MISSES = 28,
+	SP_ALU_WORKING_CYCLES = 29,
+	SP_EFU_WORKING_CYCLES = 30,
+	SP_STALL_CYCLES_BY_VPC = 31,
+	SP_STALL_CYCLES_BY_TP = 32,
+	SP_STALL_CYCLES_BY_UCHE = 33,
+	SP_STALL_CYCLES_BY_RB = 34,
+	SP_BUSY_CYCLES = 35,
+	SP_HS_INSTRUCTIONS = 36,
+	SP_DS_INSTRUCTIONS = 37,
+	SP_GS_INSTRUCTIONS = 38,
+	SP_CS_INSTRUCTIONS = 39,
+	SP_SCHEDULER_NON_WORKING = 40,
+	SP_WAVE_CONTEXTS = 41,
+	SP_WAVE_CONTEXT_CYCLES = 42,
+	SP_POWER0 = 43,
+	SP_POWER1 = 44,
+	SP_POWER2 = 45,
+	SP_POWER3 = 46,
+	SP_POWER4 = 47,
+	SP_POWER5 = 48,
+	SP_POWER6 = 49,
+	SP_POWER7 = 50,
+	SP_POWER8 = 51,
+	SP_POWER9 = 52,
+	SP_POWER10 = 53,
+	SP_POWER11 = 54,
+	SP_POWER12 = 55,
+	SP_POWER13 = 56,
+	SP_POWER14 = 57,
+	SP_POWER15 = 58,
+};
+
+enum a4xx_tp_perfcounter_select {
+	TP_L1_REQUESTS = 0,
+	TP_L1_MISSES = 1,
+	TP_QUADS_OFFSET = 8,
+	TP_QUAD_SHADOW = 9,
+	TP_QUADS_ARRAY = 10,
+	TP_QUADS_GRADIENT = 11,
+	TP_QUADS_1D2D = 12,
+	TP_QUADS_3DCUBE = 13,
+	TP_BUSY_CYCLES = 16,
+	TP_STALL_CYCLES_BY_ARB = 17,
+	TP_STATE_CACHE_REQUESTS = 20,
+	TP_STATE_CACHE_MISSES = 21,
+	TP_POWER0 = 22,
+	TP_POWER1 = 23,
+	TP_POWER2 = 24,
+	TP_POWER3 = 25,
+	TP_POWER4 = 26,
+	TP_POWER5 = 27,
+	TP_POWER6 = 28,
+	TP_POWER7 = 29,
+};
+
+enum a4xx_uche_perfcounter_select {
+	UCHE_VBIF_READ_BEATS_TP = 0,
+	UCHE_VBIF_READ_BEATS_VFD = 1,
+	UCHE_VBIF_READ_BEATS_HLSQ = 2,
+	UCHE_VBIF_READ_BEATS_MARB = 3,
+	UCHE_VBIF_READ_BEATS_SP = 4,
+	UCHE_READ_REQUESTS_TP = 5,
+	UCHE_READ_REQUESTS_VFD = 6,
+	UCHE_READ_REQUESTS_HLSQ = 7,
+	UCHE_READ_REQUESTS_MARB = 8,
+	UCHE_READ_REQUESTS_SP = 9,
+	UCHE_WRITE_REQUESTS_MARB = 10,
+	UCHE_WRITE_REQUESTS_SP = 11,
+	UCHE_TAG_CHECK_FAILS = 12,
+	UCHE_EVICTS = 13,
+	UCHE_FLUSHES = 14,
+	UCHE_VBIF_LATENCY_CYCLES = 15,
+	UCHE_VBIF_LATENCY_SAMPLES = 16,
+	UCHE_BUSY_CYCLES = 17,
+	UCHE_VBIF_READ_BEATS_PC = 18,
+	UCHE_READ_REQUESTS_PC = 19,
+	UCHE_WRITE_REQUESTS_VPC = 20,
+	UCHE_STALL_BY_VBIF = 21,
+	UCHE_WRITE_REQUESTS_VSC = 22,
+	UCHE_POWER0 = 23,
+	UCHE_POWER1 = 24,
+	UCHE_POWER2 = 25,
+	UCHE_POWER3 = 26,
+	UCHE_POWER4 = 27,
+	UCHE_POWER5 = 28,
+	UCHE_POWER6 = 29,
+	UCHE_POWER7 = 30,
+};
+
+enum a4xx_vbif_perfcounter_select {
+	AXI_READ_REQUESTS_ID_0 = 0,
+	AXI_READ_REQUESTS_ID_1 = 1,
+	AXI_READ_REQUESTS_ID_2 = 2,
+	AXI_READ_REQUESTS_ID_3 = 3,
+	AXI_READ_REQUESTS_ID_4 = 4,
+	AXI_READ_REQUESTS_ID_5 = 5,
+	AXI_READ_REQUESTS_ID_6 = 6,
+	AXI_READ_REQUESTS_ID_7 = 7,
+	AXI_READ_REQUESTS_ID_8 = 8,
+	AXI_READ_REQUESTS_ID_9 = 9,
+	AXI_READ_REQUESTS_ID_10 = 10,
+	AXI_READ_REQUESTS_ID_11 = 11,
+	AXI_READ_REQUESTS_ID_12 = 12,
+	AXI_READ_REQUESTS_ID_13 = 13,
+	AXI_READ_REQUESTS_ID_14 = 14,
+	AXI_READ_REQUESTS_ID_15 = 15,
+	AXI0_READ_REQUESTS_TOTAL = 16,
+	AXI1_READ_REQUESTS_TOTAL = 17,
+	AXI2_READ_REQUESTS_TOTAL = 18,
+	AXI3_READ_REQUESTS_TOTAL = 19,
+	AXI_READ_REQUESTS_TOTAL = 20,
+	AXI_WRITE_REQUESTS_ID_0 = 21,
+	AXI_WRITE_REQUESTS_ID_1 = 22,
+	AXI_WRITE_REQUESTS_ID_2 = 23,
+	AXI_WRITE_REQUESTS_ID_3 = 24,
+	AXI_WRITE_REQUESTS_ID_4 = 25,
+	AXI_WRITE_REQUESTS_ID_5 = 26,
+	AXI_WRITE_REQUESTS_ID_6 = 27,
+	AXI_WRITE_REQUESTS_ID_7 = 28,
+	AXI_WRITE_REQUESTS_ID_8 = 29,
+	AXI_WRITE_REQUESTS_ID_9 = 30,
+	AXI_WRITE_REQUESTS_ID_10 = 31,
+	AXI_WRITE_REQUESTS_ID_11 = 32,
+	AXI_WRITE_REQUESTS_ID_12 = 33,
+	AXI_WRITE_REQUESTS_ID_13 = 34,
+	AXI_WRITE_REQUESTS_ID_14 = 35,
+	AXI_WRITE_REQUESTS_ID_15 = 36,
+	AXI0_WRITE_REQUESTS_TOTAL = 37,
+	AXI1_WRITE_REQUESTS_TOTAL = 38,
+	AXI2_WRITE_REQUESTS_TOTAL = 39,
+	AXI3_WRITE_REQUESTS_TOTAL = 40,
+	AXI_WRITE_REQUESTS_TOTAL = 41,
+	AXI_TOTAL_REQUESTS = 42,
+	AXI_READ_DATA_BEATS_ID_0 = 43,
+	AXI_READ_DATA_BEATS_ID_1 = 44,
+	AXI_READ_DATA_BEATS_ID_2 = 45,
+	AXI_READ_DATA_BEATS_ID_3 = 46,
+	AXI_READ_DATA_BEATS_ID_4 = 47,
+	AXI_READ_DATA_BEATS_ID_5 = 48,
+	AXI_READ_DATA_BEATS_ID_6 = 49,
+	AXI_READ_DATA_BEATS_ID_7 = 50,
+	AXI_READ_DATA_BEATS_ID_8 = 51,
+	AXI_READ_DATA_BEATS_ID_9 = 52,
+	AXI_READ_DATA_BEATS_ID_10 = 53,
+	AXI_READ_DATA_BEATS_ID_11 = 54,
+	AXI_READ_DATA_BEATS_ID_12 = 55,
+	AXI_READ_DATA_BEATS_ID_13 = 56,
+	AXI_READ_DATA_BEATS_ID_14 = 57,
+	AXI_READ_DATA_BEATS_ID_15 = 58,
+	AXI0_READ_DATA_BEATS_TOTAL = 59,
+	AXI1_READ_DATA_BEATS_TOTAL = 60,
+	AXI2_READ_DATA_BEATS_TOTAL = 61,
+	AXI3_READ_DATA_BEATS_TOTAL = 62,
+	AXI_READ_DATA_BEATS_TOTAL = 63,
+	AXI_WRITE_DATA_BEATS_ID_0 = 64,
+	AXI_WRITE_DATA_BEATS_ID_1 = 65,
+	AXI_WRITE_DATA_BEATS_ID_2 = 66,
+	AXI_WRITE_DATA_BEATS_ID_3 = 67,
+	AXI_WRITE_DATA_BEATS_ID_4 = 68,
+	AXI_WRITE_DATA_BEATS_ID_5 = 69,
+	AXI_WRITE_DATA_BEATS_ID_6 = 70,
+	AXI_WRITE_DATA_BEATS_ID_7 = 71,
+	AXI_WRITE_DATA_BEATS_ID_8 = 72,
+	AXI_WRITE_DATA_BEATS_ID_9 = 73,
+	AXI_WRITE_DATA_BEATS_ID_10 = 74,
+	AXI_WRITE_DATA_BEATS_ID_11 = 75,
+	AXI_WRITE_DATA_BEATS_ID_12 = 76,
+	AXI_WRITE_DATA_BEATS_ID_13 = 77,
+	AXI_WRITE_DATA_BEATS_ID_14 = 78,
+	AXI_WRITE_DATA_BEATS_ID_15 = 79,
+	AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+	AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+	AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+	AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+	AXI_WRITE_DATA_BEATS_TOTAL = 84,
+	AXI_DATA_BEATS_TOTAL = 85,
+	CYCLES_HELD_OFF_ID_0 = 86,
+	CYCLES_HELD_OFF_ID_1 = 87,
+	CYCLES_HELD_OFF_ID_2 = 88,
+	CYCLES_HELD_OFF_ID_3 = 89,
+	CYCLES_HELD_OFF_ID_4 = 90,
+	CYCLES_HELD_OFF_ID_5 = 91,
+	CYCLES_HELD_OFF_ID_6 = 92,
+	CYCLES_HELD_OFF_ID_7 = 93,
+	CYCLES_HELD_OFF_ID_8 = 94,
+	CYCLES_HELD_OFF_ID_9 = 95,
+	CYCLES_HELD_OFF_ID_10 = 96,
+	CYCLES_HELD_OFF_ID_11 = 97,
+	CYCLES_HELD_OFF_ID_12 = 98,
+	CYCLES_HELD_OFF_ID_13 = 99,
+	CYCLES_HELD_OFF_ID_14 = 100,
+	CYCLES_HELD_OFF_ID_15 = 101,
+	AXI_READ_REQUEST_HELD_OFF = 102,
+	AXI_WRITE_REQUEST_HELD_OFF = 103,
+	AXI_REQUEST_HELD_OFF = 104,
+	AXI_WRITE_DATA_HELD_OFF = 105,
+	OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
+	OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
+	OCMEM_AXI_REQUEST_HELD_OFF = 108,
+	OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
+	ELAPSED_CYCLES_DDR = 110,
+	ELAPSED_CYCLES_OCMEM = 111,
+};
+
+enum a4xx_vfd_perfcounter_select {
+	VFD_UCHE_BYTE_FETCHED = 0,
+	VFD_UCHE_TRANS = 1,
+	VFD_FETCH_INSTRUCTIONS = 3,
+	VFD_BUSY_CYCLES = 5,
+	VFD_STALL_CYCLES_UCHE = 6,
+	VFD_STALL_CYCLES_HLSQ = 7,
+	VFD_STALL_CYCLES_VPC_BYPASS = 8,
+	VFD_STALL_CYCLES_VPC_ALLOC = 9,
+	VFD_MODE_0_FIBERS = 13,
+	VFD_MODE_1_FIBERS = 14,
+	VFD_MODE_2_FIBERS = 15,
+	VFD_MODE_3_FIBERS = 16,
+	VFD_MODE_4_FIBERS = 17,
+	VFD_BFIFO_STALL = 18,
+	VFD_NUM_VERTICES_TOTAL = 19,
+	VFD_PACKER_FULL = 20,
+	VFD_UCHE_REQUEST_FIFO_FULL = 21,
+	VFD_STARVE_CYCLES_PC = 22,
+	VFD_STARVE_CYCLES_UCHE = 23,
+};
+
+enum a4xx_vpc_perfcounter_select {
+	VPC_SP_LM_COMPONENTS = 2,
+	VPC_SP0_LM_BYTES = 3,
+	VPC_SP1_LM_BYTES = 4,
+	VPC_SP2_LM_BYTES = 5,
+	VPC_SP3_LM_BYTES = 6,
+	VPC_WORKING_CYCLES = 7,
+	VPC_STALL_CYCLES_LM = 8,
+	VPC_STARVE_CYCLES_RAS = 9,
+	VPC_STREAMOUT_CYCLES = 10,
+	VPC_UCHE_TRANSACTIONS = 12,
+	VPC_STALL_CYCLES_UCHE = 13,
+	VPC_BUSY_CYCLES = 14,
+	VPC_STARVE_CYCLES_SP = 15,
+};
+
+enum a4xx_vsc_perfcounter_select {
+	VSC_BUSY_CYCLES = 0,
+	VSC_WORKING_CYCLES = 1,
+	VSC_STALL_CYCLES_UCHE = 2,
+	VSC_STARVE_CYCLES_RAS = 3,
+	VSC_EOT_NUM = 4,
+};
+
+enum a4xx_tex_filter {
+	A4XX_TEX_NEAREST = 0,
+	A4XX_TEX_LINEAR = 1,
+	A4XX_TEX_ANISO = 2,
+};
+
+enum a4xx_tex_clamp {
+	A4XX_TEX_REPEAT = 0,
+	A4XX_TEX_CLAMP_TO_EDGE = 1,
+	A4XX_TEX_MIRROR_REPEAT = 2,
+	A4XX_TEX_CLAMP_TO_BORDER = 3,
+	A4XX_TEX_MIRROR_CLAMP = 4,
+};
+
+enum a4xx_tex_aniso {
+	A4XX_TEX_ANISO_1 = 0,
+	A4XX_TEX_ANISO_2 = 1,
+	A4XX_TEX_ANISO_4 = 2,
+	A4XX_TEX_ANISO_8 = 3,
+	A4XX_TEX_ANISO_16 = 4,
+};
+
+enum a4xx_tex_swiz {
+	A4XX_TEX_X = 0,
+	A4XX_TEX_Y = 1,
+	A4XX_TEX_Z = 2,
+	A4XX_TEX_W = 3,
+	A4XX_TEX_ZERO = 4,
+	A4XX_TEX_ONE = 5,
+};
+
+enum a4xx_tex_type {
+	A4XX_TEX_1D = 0,
+	A4XX_TEX_2D = 1,
+	A4XX_TEX_CUBE = 2,
+	A4XX_TEX_3D = 3,
+};
+
+#define A4XX_CGC_HLSQ_EARLY_CYC__MASK				0x00700000
+#define A4XX_CGC_HLSQ_EARLY_CYC__SHIFT				20
+static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
+{
+	return ((val) << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT) & A4XX_CGC_HLSQ_EARLY_CYC__MASK;
+}
+#define A4XX_INT0_RBBM_GPU_IDLE					0x00000001
+#define A4XX_INT0_RBBM_AHB_ERROR				0x00000002
+#define A4XX_INT0_RBBM_REG_TIMEOUT				0x00000004
+#define A4XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
+#define A4XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
+#define A4XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
+#define A4XX_INT0_VFD_ERROR					0x00000040
+#define A4XX_INT0_CP_SW_INT					0x00000080
+#define A4XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
+#define A4XX_INT0_CP_OPCODE_ERROR				0x00000200
+#define A4XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
+#define A4XX_INT0_CP_HW_FAULT					0x00000800
+#define A4XX_INT0_CP_DMA					0x00001000
+#define A4XX_INT0_CP_IB2_INT					0x00002000
+#define A4XX_INT0_CP_IB1_INT					0x00004000
+#define A4XX_INT0_CP_RB_INT					0x00008000
+#define A4XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
+#define A4XX_INT0_CP_RB_DONE_TS					0x00020000
+#define A4XX_INT0_CP_VS_DONE_TS					0x00040000
+#define A4XX_INT0_CP_PS_DONE_TS					0x00080000
+#define A4XX_INT0_CACHE_FLUSH_TS				0x00100000
+#define A4XX_INT0_CP_AHB_ERROR_HALT				0x00200000
+#define A4XX_INT0_MISC_HANG_DETECT				0x01000000
+#define A4XX_INT0_UCHE_OOB_ACCESS				0x02000000
+#define REG_A4XX_RB_GMEM_BASE_ADDR				0x00000cc0
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_0				0x00000cc7
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_1				0x00000cc8
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_2				0x00000cc9
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_3				0x00000cca
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_4				0x00000ccb
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_5				0x00000ccc
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_6				0x00000ccd
+
+#define REG_A4XX_RB_PERFCTR_RB_SEL_7				0x00000cce
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_0				0x00000ccf
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_1				0x00000cd0
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_2				0x00000cd1
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_3				0x00000cd2
+
+#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION			0x00000ce0
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK		0x00003fff
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT		0
+static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
+{
+	return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
+}
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK		0x3fff0000
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT		16
+static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
+{
+	return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
+}
+
+#define REG_A4XX_RB_CLEAR_COLOR_DW0				0x000020cc
+
+#define REG_A4XX_RB_CLEAR_COLOR_DW1				0x000020cd
+
+#define REG_A4XX_RB_CLEAR_COLOR_DW2				0x000020ce
+
+#define REG_A4XX_RB_CLEAR_COLOR_DW3				0x000020cf
+
+#define REG_A4XX_RB_MODE_CONTROL				0x000020a0
+#define A4XX_RB_MODE_CONTROL_WIDTH__MASK			0x0000003f
+#define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT			0
+static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK;
+}
+#define A4XX_RB_MODE_CONTROL_HEIGHT__MASK			0x00003f00
+#define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT			8
+static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK;
+}
+#define A4XX_RB_MODE_CONTROL_ENABLE_GMEM			0x00010000
+
+#define REG_A4XX_RB_RENDER_CONTROL				0x000020a1
+#define A4XX_RB_RENDER_CONTROL_BINNING_PASS			0x00000001
+#define A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE		0x00000020
+
+#define REG_A4XX_RB_MSAA_CONTROL				0x000020a2
+#define A4XX_RB_MSAA_CONTROL_DISABLE				0x00001000
+#define A4XX_RB_MSAA_CONTROL_SAMPLES__MASK			0x0000e000
+#define A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT			13
+static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val)
+{
+	return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+
+#define REG_A4XX_RB_RENDER_CONTROL2				0x000020a3
+#define A4XX_RB_RENDER_CONTROL2_XCOORD				0x00000001
+#define A4XX_RB_RENDER_CONTROL2_YCOORD				0x00000002
+#define A4XX_RB_RENDER_CONTROL2_ZCOORD				0x00000004
+#define A4XX_RB_RENDER_CONTROL2_WCOORD				0x00000008
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK			0x00000010
+#define A4XX_RB_RENDER_CONTROL2_FACENESS			0x00000020
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEID			0x00000040
+#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK		0x00000380
+#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT		7
+static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK;
+}
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR			0x00000800
+#define A4XX_RB_RENDER_CONTROL2_VARYING				0x00001000
+
+static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
+
+static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
+#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
+#define A4XX_RB_MRT_CONTROL_BLEND				0x00000010
+#define A4XX_RB_MRT_CONTROL_BLEND2				0x00000020
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
+#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
+static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+static inline uint32_t REG_A4XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020a5 + 0x5*i0; }
+#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
+#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a4xx_color_fmt val)
+{
+	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x000000c0
+#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		6
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a4xx_tile_mode val)
+{
+	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK			0x00000600
+#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT			9
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00001800
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			11
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00002000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xffffc000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		14
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+
+static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; }
+
+static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; }
+#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK			0x03fffff8
+#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT			3
+static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val)
+{
+	return ((val) << A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT) & A4XX_RB_MRT_CONTROL3_STRIDE__MASK;
+}
+
+static inline uint32_t REG_A4XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020a8 + 0x5*i0; }
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_RED					0x000020f0
+#define A4XX_RB_BLEND_RED_UINT__MASK				0x000000ff
+#define A4XX_RB_BLEND_RED_UINT__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_RED_UINT__SHIFT) & A4XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A4XX_RB_BLEND_RED_SINT__MASK				0x0000ff00
+#define A4XX_RB_BLEND_RED_SINT__SHIFT				8
+static inline uint32_t A4XX_RB_BLEND_RED_SINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_RED_SINT__SHIFT) & A4XX_RB_BLEND_RED_SINT__MASK;
+}
+#define A4XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
+#define A4XX_RB_BLEND_RED_FLOAT__SHIFT				16
+static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_RED_F32				0x000020f1
+#define A4XX_RB_BLEND_RED_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_RED_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_GREEN					0x000020f2
+#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
+#define A4XX_RB_BLEND_GREEN_UINT__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_GREEN_UINT__SHIFT) & A4XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A4XX_RB_BLEND_GREEN_SINT__MASK				0x0000ff00
+#define A4XX_RB_BLEND_GREEN_SINT__SHIFT				8
+static inline uint32_t A4XX_RB_BLEND_GREEN_SINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_GREEN_SINT__SHIFT) & A4XX_RB_BLEND_GREEN_SINT__MASK;
+}
+#define A4XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
+#define A4XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
+static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_GREEN_F32				0x000020f3
+#define A4XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_GREEN_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_BLUE					0x000020f4
+#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
+#define A4XX_RB_BLEND_BLUE_UINT__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_BLUE_UINT__SHIFT) & A4XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A4XX_RB_BLEND_BLUE_SINT__MASK				0x0000ff00
+#define A4XX_RB_BLEND_BLUE_SINT__SHIFT				8
+static inline uint32_t A4XX_RB_BLEND_BLUE_SINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_BLUE_SINT__SHIFT) & A4XX_RB_BLEND_BLUE_SINT__MASK;
+}
+#define A4XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
+#define A4XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
+static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_BLUE_F32				0x000020f5
+#define A4XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_BLUE_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_ALPHA					0x000020f6
+#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
+#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_ALPHA_UINT__SHIFT) & A4XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A4XX_RB_BLEND_ALPHA_SINT__MASK				0x0000ff00
+#define A4XX_RB_BLEND_ALPHA_SINT__SHIFT				8
+static inline uint32_t A4XX_RB_BLEND_ALPHA_SINT(uint32_t val)
+{
+	return ((val) << A4XX_RB_BLEND_ALPHA_SINT__SHIFT) & A4XX_RB_BLEND_ALPHA_SINT__MASK;
+}
+#define A4XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
+#define A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
+static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_ALPHA_F32				0x000020f7
+#define A4XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_ALPHA_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
+#define REG_A4XX_RB_ALPHA_CONTROL				0x000020f8
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
+static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
+{
+	return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
+}
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
+static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A4XX_RB_FS_OUTPUT					0x000020f9
+#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK			0x000000ff
+#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT			0
+static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
+{
+	return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
+}
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND			0x00000100
+#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK			0xffff0000
+#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT			16
+static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
+{
+	return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
+}
+
+#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL			0x000020fa
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK			0xfffffffc
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT		2
+static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK;
+}
+
+#define REG_A4XX_RB_RENDER_COMPONENTS				0x000020fb
+#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
+#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
+#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
+#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
+#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
+#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
+#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
+#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
+#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
+{
+	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK;
+}
+
+#define REG_A4XX_RB_COPY_CONTROL				0x000020fc
+#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK			0x00000003
+#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT		0
+static inline uint32_t A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+	return ((val) << A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_MODE__MASK				0x00000070
+#define A4XX_RB_COPY_CONTROL_MODE__SHIFT			4
+static inline uint32_t A4XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+	return ((val) << A4XX_RB_COPY_CONTROL_MODE__SHIFT) & A4XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK			0x00000f00
+#define A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT			8
+static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
+{
+	return ((val) << A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK			0xffffc000
+#define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT			14
+static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+	assert(!(val & 0x3fff));
+	return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+
+#define REG_A4XX_RB_COPY_DEST_BASE				0x000020fd
+#define A4XX_RB_COPY_DEST_BASE_BASE__MASK			0xffffffe0
+#define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT			5
+static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+
+#define REG_A4XX_RB_COPY_DEST_PITCH				0x000020fe
+#define A4XX_RB_COPY_DEST_PITCH_PITCH__MASK			0xffffffff
+#define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT			0
+static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+
+#define REG_A4XX_RB_COPY_DEST_INFO				0x000020ff
+#define A4XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000fc
+#define A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			2
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_FORMAT(enum a4xx_color_fmt val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A4XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
+#define A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A4XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
+#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK		0x0003c000
+#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT		14
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK			0x001c0000
+#define A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT			18
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_TILE__MASK			0x03000000
+#define A4XX_RB_COPY_DEST_INFO_TILE__SHIFT			24
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val)
+{
+	return ((val) << A4XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A4XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+
+#define REG_A4XX_RB_FS_OUTPUT_REG				0x00002100
+#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK				0x0000000f
+#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT			0
+static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+	return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z			0x00000020
+
+#define REG_A4XX_RB_DEPTH_CONTROL				0x00002101
+#define A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z			0x00000001
+#define A4XX_RB_DEPTH_CONTROL_Z_ENABLE				0x00000002
+#define A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE			0x00000004
+#define A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK			0x00000070
+#define A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT			4
+static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
+#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00010000
+#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS			0x00020000
+#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
+
+#define REG_A4XX_RB_DEPTH_CLEAR					0x00002102
+
+#define REG_A4XX_RB_DEPTH_INFO					0x00002103
+#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000003
+#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
+static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format val)
+{
+	return ((val) << A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff000
+#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			12
+static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A4XX_RB_DEPTH_PITCH					0x00002104
+#define A4XX_RB_DEPTH_PITCH__MASK				0xffffffff
+#define A4XX_RB_DEPTH_PITCH__SHIFT				0
+static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK;
+}
+
+#define REG_A4XX_RB_DEPTH_PITCH2				0x00002105
+#define A4XX_RB_DEPTH_PITCH2__MASK				0xffffffff
+#define A4XX_RB_DEPTH_PITCH2__SHIFT				0
+static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_CONTROL				0x00002106
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
+#define A4XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
+#define A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
+#define A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
+#define A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
+#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
+#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
+#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_CONTROL2				0x00002107
+#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER			0x00000001
+
+#define REG_A4XX_RB_STENCIL_INFO				0x00002108
+#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK			0xfffff000
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT		12
+static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_PITCH				0x00002109
+#define A4XX_RB_STENCIL_PITCH__MASK				0xffffffff
+#define A4XX_RB_STENCIL_PITCH__SHIFT				0
+static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK;
+}
+
+#define REG_A4XX_RB_STENCILREFMASK				0x0000210b
+#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A4XX_RB_STENCILREFMASK_BF				0x0000210c
+#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A4XX_RB_BIN_OFFSET					0x0000210d
+#define A4XX_RB_BIN_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
+#define A4XX_RB_BIN_OFFSET_X__MASK				0x00007fff
+#define A4XX_RB_BIN_OFFSET_X__SHIFT				0
+static inline uint32_t A4XX_RB_BIN_OFFSET_X(uint32_t val)
+{
+	return ((val) << A4XX_RB_BIN_OFFSET_X__SHIFT) & A4XX_RB_BIN_OFFSET_X__MASK;
+}
+#define A4XX_RB_BIN_OFFSET_Y__MASK				0x7fff0000
+#define A4XX_RB_BIN_OFFSET_Y__SHIFT				16
+static inline uint32_t A4XX_RB_BIN_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A4XX_RB_BIN_OFFSET_Y__SHIFT) & A4XX_RB_BIN_OFFSET_Y__MASK;
+}
+
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP(uint32_t i0) { return 0x00002120 + 0x2*i0; }
+
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MIN(uint32_t i0) { return 0x00002120 + 0x2*i0; }
+
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MAX(uint32_t i0) { return 0x00002121 + 0x2*i0; }
+
+#define REG_A4XX_RBBM_HW_VERSION				0x00000000
+
+#define REG_A4XX_RBBM_HW_CONFIGURATION				0x00000002
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP(uint32_t i0) { return 0x00000004 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP_REG(uint32_t i0) { return 0x00000004 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP(uint32_t i0) { return 0x00000008 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP_REG(uint32_t i0) { return 0x00000008 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP(uint32_t i0) { return 0x0000000c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP_REG(uint32_t i0) { return 0x0000000c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP(uint32_t i0) { return 0x00000010 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x00000010 + 0x1*i0; }
+
+#define REG_A4XX_RBBM_CLOCK_CTL_UCHE 				0x00000014
+
+#define REG_A4XX_RBBM_CLOCK_CTL2_UCHE				0x00000015
+
+#define REG_A4XX_RBBM_CLOCK_CTL3_UCHE				0x00000016
+
+#define REG_A4XX_RBBM_CLOCK_CTL4_UCHE				0x00000017
+
+#define REG_A4XX_RBBM_CLOCK_HYST_UCHE				0x00000018
+
+#define REG_A4XX_RBBM_CLOCK_DELAY_UCHE				0x00000019
+
+#define REG_A4XX_RBBM_CLOCK_MODE_GPC				0x0000001a
+
+#define REG_A4XX_RBBM_CLOCK_DELAY_GPC				0x0000001b
+
+#define REG_A4XX_RBBM_CLOCK_HYST_GPC				0x0000001c
+
+#define REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM			0x0000001d
+
+#define REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x0000001e
+
+#define REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x0000001f
+
+#define REG_A4XX_RBBM_CLOCK_CTL					0x00000020
+
+#define REG_A4XX_RBBM_SP_HYST_CNT				0x00000021
+
+#define REG_A4XX_RBBM_SW_RESET_CMD				0x00000022
+
+#define REG_A4XX_RBBM_AHB_CTL0					0x00000023
+
+#define REG_A4XX_RBBM_AHB_CTL1					0x00000024
+
+#define REG_A4XX_RBBM_AHB_CMD					0x00000025
+
+#define REG_A4XX_RBBM_RB_SUB_BLOCK_SEL_CTL			0x00000026
+
+#define REG_A4XX_RBBM_RAM_ACC_63_32				0x00000028
+
+#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x0000002b
+
+#define REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL			0x0000002f
+
+#define REG_A4XX_RBBM_INTERFACE_HANG_MASK_CTL4			0x00000034
+
+#define REG_A4XX_RBBM_INT_CLEAR_CMD				0x00000036
+
+#define REG_A4XX_RBBM_INT_0_MASK				0x00000037
+
+#define REG_A4XX_RBBM_RBBM_CTL					0x0000003e
+
+#define REG_A4XX_RBBM_AHB_DEBUG_CTL				0x0000003f
+
+#define REG_A4XX_RBBM_VBIF_DEBUG_CTL				0x00000041
+
+#define REG_A4XX_RBBM_CLOCK_CTL2				0x00000042
+
+#define REG_A4XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
+
+#define REG_A4XX_RBBM_RESET_CYCLES				0x00000047
+
+#define REG_A4XX_RBBM_EXT_TRACE_BUS_CTL				0x00000049
+
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_A				0x0000004a
+
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_B				0x0000004b
+
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_C				0x0000004c
+
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D				0x0000004d
+
+#define REG_A4XX_RBBM_POWER_CNTL_IP				0x00000098
+#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE			0x00000001
+#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON			0x00100000
+
+#define REG_A4XX_RBBM_PERFCTR_CP_0_LO				0x0000009c
+
+#define REG_A4XX_RBBM_PERFCTR_CP_0_HI				0x0000009d
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_LO				0x0000009e
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_HI				0x0000009f
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_LO				0x000000a0
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_HI				0x000000a1
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_LO				0x000000a2
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_HI				0x000000a3
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_LO				0x000000a4
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_HI				0x000000a5
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_LO				0x000000a6
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_HI				0x000000a7
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_LO				0x000000a8
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_HI				0x000000a9
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_LO				0x000000aa
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_HI				0x000000ab
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO				0x000000ac
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI				0x000000ad
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO				0x000000ae
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI				0x000000af
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO				0x000000b0
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI				0x000000b1
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO				0x000000b2
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI				0x000000b3
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_LO				0x000000b4
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_HI				0x000000b5
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_LO				0x000000b6
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_HI				0x000000b7
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_LO				0x000000b8
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_HI				0x000000b9
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_LO				0x000000ba
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_HI				0x000000bb
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_LO				0x000000bc
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_HI				0x000000bd
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_LO				0x000000be
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_HI				0x000000bf
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_LO				0x000000c0
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_HI				0x000000c1
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_LO				0x000000c2
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_HI				0x000000c3
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO				0x000000c4
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI				0x000000c5
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO				0x000000c6
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI				0x000000c7
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO				0x000000c8
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI				0x000000c9
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO				0x000000ca
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI				0x000000cb
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO				0x000000cc
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI				0x000000cd
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO				0x000000ce
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI				0x000000cf
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO				0x000000d0
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI				0x000000d1
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO				0x000000d2
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI				0x000000d3
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000d4
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000d5
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000d6
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000d7
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000d8
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000d9
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000da
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000db
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000dc
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000dd
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000de
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000df
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO				0x000000e0
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI				0x000000e1
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO				0x000000e2
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI				0x000000e3
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO				0x000000e4
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI				0x000000e5
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO				0x000000e6
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI				0x000000e7
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO				0x000000e8
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI				0x000000e9
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO				0x000000ea
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI				0x000000eb
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO				0x000000ec
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI				0x000000ed
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO				0x000000ee
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI				0x000000ef
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO				0x000000f0
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI				0x000000f1
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO				0x000000f2
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI				0x000000f3
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO				0x000000f4
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI				0x000000f5
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO				0x000000f6
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI				0x000000f7
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO				0x000000f8
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI				0x000000f9
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO				0x000000fa
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI				0x000000fb
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO				0x000000fc
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI				0x000000fd
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO				0x000000fe
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI				0x000000ff
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO				0x00000100
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI				0x00000101
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO				0x00000102
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI				0x00000103
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO				0x00000104
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI				0x00000105
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO				0x00000106
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI				0x00000107
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO				0x00000108
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI				0x00000109
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO				0x0000010a
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI				0x0000010b
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO				0x0000010c
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI				0x0000010d
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO				0x0000010e
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI				0x0000010f
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO				0x00000110
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI				0x00000111
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO				0x00000112
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI				0x00000113
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_LO				0x00000116
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_HI				0x00000117
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_LO				0x00000118
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_HI				0x00000119
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_LO				0x0000011a
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_HI				0x0000011b
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_LO				0x0000011c
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_HI				0x0000011d
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_LO				0x0000011e
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_HI				0x0000011f
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_LO				0x00000120
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_HI				0x00000121
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_LO				0x00000122
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_HI				0x00000123
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_LO				0x00000124
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_HI				0x00000125
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_LO				0x00000126
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_HI				0x00000127
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_LO				0x00000128
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_HI				0x00000129
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_LO				0x0000012a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_HI				0x0000012b
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_LO				0x0000012c
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_HI				0x0000012d
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_LO				0x0000012e
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_HI				0x0000012f
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_LO				0x00000130
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_HI				0x00000131
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_LO				0x00000132
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_HI				0x00000133
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_LO				0x00000134
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_HI				0x00000135
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_LO				0x00000136
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_HI				0x00000137
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_LO				0x00000138
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_HI				0x00000139
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_LO				0x0000013a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_HI				0x0000013b
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_LO				0x0000013c
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_HI				0x0000013d
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_LO				0x0000013e
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_HI				0x0000013f
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_LO				0x00000140
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_HI				0x00000141
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_LO				0x00000142
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_HI				0x00000143
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_LO				0x00000144
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_HI				0x00000145
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_LO				0x00000146
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_HI				0x00000147
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_LO				0x00000148
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_HI				0x00000149
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_LO				0x0000014a
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_HI				0x0000014b
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO				0x0000014c
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI				0x0000014d
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO				0x0000014e
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI				0x0000014f
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO				0x00000166
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI				0x00000167
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI				0x00000169
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO			0x0000016e
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI			0x0000016f
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP(uint32_t i0) { return 0x0000006c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP_REG(uint32_t i0) { return 0x0000006c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP(uint32_t i0) { return 0x00000070 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP_REG(uint32_t i0) { return 0x00000070 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP(uint32_t i0) { return 0x00000074 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP_REG(uint32_t i0) { return 0x00000074 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB(uint32_t i0) { return 0x00000078 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB_REG(uint32_t i0) { return 0x00000078 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB(uint32_t i0) { return 0x0000007c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB_REG(uint32_t i0) { return 0x0000007c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(uint32_t i0) { return 0x00000082 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU_REG(uint32_t i0) { return 0x00000082 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(uint32_t i0) { return 0x00000086 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU_REG(uint32_t i0) { return 0x00000086 + 0x1*i0; }
+
+#define REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM			0x00000080
+
+#define REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM			0x00000081
+
+#define REG_A4XX_RBBM_CLOCK_CTL_HLSQ				0x0000008a
+
+#define REG_A4XX_RBBM_CLOCK_HYST_HLSQ				0x0000008b
+
+#define REG_A4XX_RBBM_CLOCK_DELAY_HLSQ				0x0000008c
+
+#define REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM			0x0000008d
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { return 0x0000008e + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
+
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0			0x00000099
+
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1			0x0000009a
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_CTL				0x00000170
+
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD0				0x00000171
+
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD1				0x00000172
+
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD2				0x00000173
+
+#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000174
+
+#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000175
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0			0x00000176
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1			0x00000177
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2			0x00000178
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3			0x00000179
+
+#define REG_A4XX_RBBM_GPU_BUSY_MASKED				0x0000017a
+
+#define REG_A4XX_RBBM_INT_0_STATUS				0x0000017d
+
+#define REG_A4XX_RBBM_CLOCK_STATUS				0x00000182
+
+#define REG_A4XX_RBBM_AHB_STATUS				0x00000189
+
+#define REG_A4XX_RBBM_AHB_ME_SPLIT_STATUS			0x0000018c
+
+#define REG_A4XX_RBBM_AHB_PFP_SPLIT_STATUS			0x0000018d
+
+#define REG_A4XX_RBBM_AHB_ERROR_STATUS				0x0000018f
+
+#define REG_A4XX_RBBM_STATUS					0x00000191
+#define A4XX_RBBM_STATUS_HI_BUSY				0x00000001
+#define A4XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
+#define A4XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
+#define A4XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
+#define A4XX_RBBM_STATUS_VBIF_BUSY				0x00008000
+#define A4XX_RBBM_STATUS_TSE_BUSY				0x00010000
+#define A4XX_RBBM_STATUS_RAS_BUSY				0x00020000
+#define A4XX_RBBM_STATUS_RB_BUSY				0x00040000
+#define A4XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
+#define A4XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
+#define A4XX_RBBM_STATUS_VFD_BUSY				0x00200000
+#define A4XX_RBBM_STATUS_VPC_BUSY				0x00400000
+#define A4XX_RBBM_STATUS_UCHE_BUSY				0x00800000
+#define A4XX_RBBM_STATUS_SP_BUSY				0x01000000
+#define A4XX_RBBM_STATUS_TPL1_BUSY				0x02000000
+#define A4XX_RBBM_STATUS_MARB_BUSY				0x04000000
+#define A4XX_RBBM_STATUS_VSC_BUSY				0x08000000
+#define A4XX_RBBM_STATUS_ARB_BUSY				0x10000000
+#define A4XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
+#define A4XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
+#define A4XX_RBBM_STATUS_GPU_BUSY				0x80000000
+
+#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5			0x0000019f
+
+#define REG_A4XX_RBBM_POWER_STATUS				0x000001b0
+#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON			0x00100000
+
+#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2			0x000001b8
+
+#define REG_A4XX_CP_SCRATCH_UMASK				0x00000228
+
+#define REG_A4XX_CP_SCRATCH_ADDR				0x00000229
+
+#define REG_A4XX_CP_RB_BASE					0x00000200
+
+#define REG_A4XX_CP_RB_CNTL					0x00000201
+
+#define REG_A4XX_CP_RB_WPTR					0x00000205
+
+#define REG_A4XX_CP_RB_RPTR_ADDR				0x00000203
+
+#define REG_A4XX_CP_RB_RPTR					0x00000204
+
+#define REG_A4XX_CP_IB1_BASE					0x00000206
+
+#define REG_A4XX_CP_IB1_BUFSZ					0x00000207
+
+#define REG_A4XX_CP_IB2_BASE					0x00000208
+
+#define REG_A4XX_CP_IB2_BUFSZ					0x00000209
+
+#define REG_A4XX_CP_ME_NRT_ADDR					0x0000020c
+
+#define REG_A4XX_CP_ME_NRT_DATA					0x0000020d
+
+#define REG_A4XX_CP_ME_RB_DONE_DATA				0x00000217
+
+#define REG_A4XX_CP_QUEUE_THRESH2				0x00000219
+
+#define REG_A4XX_CP_MERCIU_SIZE					0x0000021b
+
+#define REG_A4XX_CP_ROQ_ADDR					0x0000021c
+
+#define REG_A4XX_CP_ROQ_DATA					0x0000021d
+
+#define REG_A4XX_CP_MEQ_ADDR					0x0000021e
+
+#define REG_A4XX_CP_MEQ_DATA					0x0000021f
+
+#define REG_A4XX_CP_MERCIU_ADDR					0x00000220
+
+#define REG_A4XX_CP_MERCIU_DATA					0x00000221
+
+#define REG_A4XX_CP_MERCIU_DATA2				0x00000222
+
+#define REG_A4XX_CP_PFP_UCODE_ADDR				0x00000223
+
+#define REG_A4XX_CP_PFP_UCODE_DATA				0x00000224
+
+#define REG_A4XX_CP_ME_RAM_WADDR				0x00000225
+
+#define REG_A4XX_CP_ME_RAM_RADDR				0x00000226
+
+#define REG_A4XX_CP_ME_RAM_DATA					0x00000227
+
+#define REG_A4XX_CP_PREEMPT					0x0000022a
+
+#define REG_A4XX_CP_CNTL					0x0000022c
+
+#define REG_A4XX_CP_ME_CNTL					0x0000022d
+
+#define REG_A4XX_CP_DEBUG					0x0000022e
+
+#define REG_A4XX_CP_DEBUG_ECO_CONTROL				0x00000231
+
+#define REG_A4XX_CP_DRAW_STATE_ADDR				0x00000232
+
+static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; }
+#define A4XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0001ffff
+#define A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
+static inline uint32_t A4XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
+{
+	return ((val) << A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A4XX_CP_PROTECT_REG_BASE_ADDR__MASK;
+}
+#define A4XX_CP_PROTECT_REG_MASK_LEN__MASK			0x1f000000
+#define A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT			24
+static inline uint32_t A4XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
+{
+	return ((val) << A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A4XX_CP_PROTECT_REG_MASK_LEN__MASK;
+}
+#define A4XX_CP_PROTECT_REG_TRAP_WRITE				0x20000000
+#define A4XX_CP_PROTECT_REG_TRAP_READ				0x40000000
+
+#define REG_A4XX_CP_PROTECT_CTRL				0x00000250
+
+#define REG_A4XX_CP_ST_BASE					0x000004c0
+
+#define REG_A4XX_CP_STQ_AVAIL					0x000004ce
+
+#define REG_A4XX_CP_MERCIU_STAT					0x000004d0
+
+#define REG_A4XX_CP_WFI_PEND_CTR				0x000004d2
+
+#define REG_A4XX_CP_HW_FAULT					0x000004d8
+
+#define REG_A4XX_CP_PROTECT_STATUS				0x000004da
+
+#define REG_A4XX_CP_EVENTS_IN_FLIGHT				0x000004dd
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_0				0x00000500
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_1				0x00000501
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_2				0x00000502
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_3				0x00000503
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_4				0x00000504
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_5				0x00000505
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_6				0x00000506
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_7				0x00000507
+
+#define REG_A4XX_CP_PERFCOMBINER_SELECT				0x0000050b
+
+static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 + 0x1*i0; }
+
+#define REG_A4XX_SP_VS_STATUS					0x00000ec0
+
+#define REG_A4XX_SP_MODE_CONTROL				0x00000ec3
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_0				0x00000ec4
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_1				0x00000ec5
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_2				0x00000ec6
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_3				0x00000ec7
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_4				0x00000ec8
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_5				0x00000ec9
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_6				0x00000eca
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_7				0x00000ecb
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_8				0x00000ecc
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_9				0x00000ecd
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_10				0x00000ece
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_11				0x00000ecf
+
+#define REG_A4XX_SP_SP_CTRL_REG					0x000022c0
+#define A4XX_SP_SP_CTRL_REG_BINNING_PASS			0x00080000
+
+#define REG_A4XX_SP_INSTR_CACHE_CTRL				0x000022c1
+#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER			0x00000080
+#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER			0x00000100
+#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER			0x00000400
+
+#define REG_A4XX_SP_VS_CTRL_REG0				0x000022c4
+#define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_VARYING				0x00000002
+#define A4XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
+#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A4XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00400000
+
+#define REG_A4XX_SP_VS_CTRL_REG1				0x000022c5
+#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK			0x000000ff
+#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x7f000000
+#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
+static inline uint32_t A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+
+#define REG_A4XX_SP_VS_PARAM_REG				0x000022c6
+#define A4XX_SP_VS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A4XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK			0x0000ff00
+#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT			8
+static inline uint32_t A4XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0xfff00000
+#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
+static inline uint32_t A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+#define A4XX_SP_VS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A4XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A4XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A4XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A4XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A4XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A4XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A4XX_SP_VS_OBJ_OFFSET_REG				0x000022e0
+#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A4XX_SP_VS_OBJ_START				0x000022e1
+
+#define REG_A4XX_SP_VS_PVT_MEM_PARAM				0x000022e2
+
+#define REG_A4XX_SP_VS_PVT_MEM_ADDR				0x000022e3
+
+#define REG_A4XX_SP_VS_LENGTH_REG				0x000022e5
+
+#define REG_A4XX_SP_FS_CTRL_REG0				0x000022e8
+#define A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_VARYING				0x00000002
+#define A4XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
+#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A4XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00400000
+
+#define REG_A4XX_SP_FS_CTRL_REG1				0x000022e9
+#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK			0x000000ff
+#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG1_FACENESS				0x00080000
+#define A4XX_SP_FS_CTRL_REG1_VARYING				0x00100000
+#define A4XX_SP_FS_CTRL_REG1_FRAGCOORD				0x00200000
+
+#define REG_A4XX_SP_FS_OBJ_OFFSET_REG				0x000022ea
+#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A4XX_SP_FS_OBJ_START				0x000022eb
+
+#define REG_A4XX_SP_FS_PVT_MEM_PARAM				0x000022ec
+
+#define REG_A4XX_SP_FS_PVT_MEM_ADDR				0x000022ed
+
+#define REG_A4XX_SP_FS_LENGTH_REG				0x000022ef
+
+#define REG_A4XX_SP_FS_OUTPUT_REG				0x000022f0
+#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK				0x0000000f
+#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT			0
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE			0x00000080
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK			0x0000ff00
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT		8
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
+}
+#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK		0xff000000
+#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT		24
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
+#define A4XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
+#define A4XX_SP_FS_MRT_REG_REGID__SHIFT				0
+static inline uint32_t A4XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_FS_MRT_REG_REGID__SHIFT) & A4XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A4XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
+#define A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK			0x0003f000
+#define A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT			12
+static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
+{
+	return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
+}
+#define A4XX_SP_FS_MRT_REG_COLOR_SRGB				0x00040000
+
+#define REG_A4XX_SP_CS_CTRL_REG0				0x00002300
+
+#define REG_A4XX_SP_CS_OBJ_OFFSET_REG				0x00002301
+
+#define REG_A4XX_SP_CS_OBJ_START				0x00002302
+
+#define REG_A4XX_SP_CS_PVT_MEM_PARAM				0x00002303
+
+#define REG_A4XX_SP_CS_PVT_MEM_ADDR				0x00002304
+
+#define REG_A4XX_SP_CS_PVT_MEM_SIZE				0x00002305
+
+#define REG_A4XX_SP_CS_LENGTH_REG				0x00002306
+
+#define REG_A4XX_SP_HS_OBJ_OFFSET_REG				0x0000230d
+#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A4XX_SP_HS_OBJ_START				0x0000230e
+
+#define REG_A4XX_SP_HS_PVT_MEM_PARAM				0x0000230f
+
+#define REG_A4XX_SP_HS_PVT_MEM_ADDR				0x00002310
+
+#define REG_A4XX_SP_HS_LENGTH_REG				0x00002312
+
+#define REG_A4XX_SP_DS_PARAM_REG				0x0000231a
+#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
+static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+#define A4XX_SP_DS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A4XX_SP_DS_OBJ_OFFSET_REG				0x00002334
+#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A4XX_SP_DS_OBJ_START				0x00002335
+
+#define REG_A4XX_SP_DS_PVT_MEM_PARAM				0x00002336
+
+#define REG_A4XX_SP_DS_PVT_MEM_ADDR				0x00002337
+
+#define REG_A4XX_SP_DS_LENGTH_REG				0x00002339
+
+#define REG_A4XX_SP_GS_PARAM_REG				0x00002341
+#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK			0x0000ff00
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT			8
+static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
+static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+#define A4XX_SP_GS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A4XX_SP_GS_OBJ_OFFSET_REG				0x0000235b
+#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A4XX_SP_GS_OBJ_START				0x0000235c
+
+#define REG_A4XX_SP_GS_PVT_MEM_PARAM				0x0000235d
+
+#define REG_A4XX_SP_GS_PVT_MEM_ADDR				0x0000235e
+
+#define REG_A4XX_SP_GS_LENGTH_REG				0x00002360
+
+#define REG_A4XX_VPC_DEBUG_RAM_SEL				0x00000e60
+
+#define REG_A4XX_VPC_DEBUG_RAM_READ				0x00000e61
+
+#define REG_A4XX_VPC_DEBUG_ECO_CONTROL				0x00000e64
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0				0x00000e65
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1				0x00000e66
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2				0x00000e67
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3				0x00000e68
+
+#define REG_A4XX_VPC_ATTR					0x00002140
+#define A4XX_VPC_ATTR_TOTALATTR__MASK				0x000001ff
+#define A4XX_VPC_ATTR_TOTALATTR__SHIFT				0
+static inline uint32_t A4XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+	return ((val) << A4XX_VPC_ATTR_TOTALATTR__SHIFT) & A4XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A4XX_VPC_ATTR_PSIZE					0x00000200
+#define A4XX_VPC_ATTR_THRDASSIGN__MASK				0x00003000
+#define A4XX_VPC_ATTR_THRDASSIGN__SHIFT				12
+static inline uint32_t A4XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+	return ((val) << A4XX_VPC_ATTR_THRDASSIGN__SHIFT) & A4XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A4XX_VPC_ATTR_ENABLE					0x02000000
+
+#define REG_A4XX_VPC_PACK					0x00002141
+#define A4XX_VPC_PACK_NUMBYPASSVAR__MASK			0x000000ff
+#define A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT			0
+static inline uint32_t A4XX_VPC_PACK_NUMBYPASSVAR(uint32_t val)
+{
+	return ((val) << A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT) & A4XX_VPC_PACK_NUMBYPASSVAR__MASK;
+}
+#define A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK			0x0000ff00
+#define A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT			8
+static inline uint32_t A4XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+	return ((val) << A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK			0x00ff0000
+#define A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT			16
+static inline uint32_t A4XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+	return ((val) << A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002142 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002142 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000214a + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000214a + 0x1*i0; }
+
+#define REG_A4XX_VPC_SO_FLUSH_WADDR_3				0x0000216e
+
+#define REG_A4XX_VSC_BIN_SIZE					0x00000c00
+#define A4XX_VSC_BIN_SIZE_WIDTH__MASK				0x0000001f
+#define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
+static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A4XX_VSC_BIN_SIZE_HEIGHT__MASK				0x000003e0
+#define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT				5
+static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A4XX_VSC_SIZE_ADDRESS				0x00000c01
+
+#define REG_A4XX_VSC_SIZE_ADDRESS2				0x00000c02
+
+#define REG_A4XX_VSC_DEBUG_ECO_CONTROL				0x00000c03
+
+static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
+#define A4XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
+#define A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
+{
+	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_X__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
+#define A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
+{
+	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_Y__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_W__MASK			0x00f00000
+#define A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
+{
+	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_W__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_H__MASK			0x0f000000
+#define A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT			24
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
+{
+	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_H__MASK;
+}
+
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
+
+#define REG_A4XX_VSC_PIPE_PARTIAL_POSN_1			0x00000c41
+
+#define REG_A4XX_VSC_PERFCTR_VSC_SEL_0				0x00000c50
+
+#define REG_A4XX_VSC_PERFCTR_VSC_SEL_1				0x00000c51
+
+#define REG_A4XX_VFD_DEBUG_CONTROL				0x00000e40
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0				0x00000e43
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1				0x00000e44
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2				0x00000e45
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3				0x00000e46
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4				0x00000e47
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5				0x00000e48
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6				0x00000e49
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7				0x00000e4a
+
+#define REG_A4XX_VGT_CL_INITIATOR				0x000021d0
+
+#define REG_A4XX_VGT_EVENT_INITIATOR				0x000021d9
+
+#define REG_A4XX_VFD_CONTROL_0					0x00002200
+#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x000000ff
+#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
+static inline uint32_t A4XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK			0x0001fe00
+#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT			9
+static inline uint32_t A4XX_VFD_CONTROL_0_BYPASSATTROVS(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT) & A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK;
+}
+#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK		0x03f00000
+#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT		20
+static inline uint32_t A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK		0xfc000000
+#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT		26
+static inline uint32_t A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+
+#define REG_A4XX_VFD_CONTROL_1					0x00002201
+#define A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000ffff
+#define A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
+static inline uint32_t A4XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A4XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
+#define A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
+static inline uint32_t A4XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A4XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A4XX_VFD_CONTROL_1_REGID4INST__MASK			0xff000000
+#define A4XX_VFD_CONTROL_1_REGID4INST__SHIFT			24
+static inline uint32_t A4XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A4XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+
+#define REG_A4XX_VFD_CONTROL_2					0x00002202
+
+#define REG_A4XX_VFD_CONTROL_3					0x00002203
+#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK			0x0000ff00
+#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT			8
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
+}
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
+
+#define REG_A4XX_VFD_CONTROL_4					0x00002204
+
+#define REG_A4XX_VFD_INDEX_OFFSET				0x00002208
+
+static inline uint32_t REG_A4XX_VFD_FETCH(uint32_t i0) { return 0x0000220a + 0x4*i0; }
+
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x0000220a + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
+#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
+static inline uint32_t A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+	return ((val) << A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK			0x0001ff80
+#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT			7
+static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+	return ((val) << A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT			0x00080000
+#define A4XX_VFD_FETCH_INSTR_0_INSTANCED			0x00100000
+
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; }
+
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK			0xffffffff
+#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT			0
+static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val)
+{
+	return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK;
+}
+
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK			0x000001ff
+#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT			0
+static inline uint32_t A4XX_VFD_FETCH_INSTR_3_STEPRATE(uint32_t val)
+{
+	return ((val) << A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT) & A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK;
+}
+
+static inline uint32_t REG_A4XX_VFD_DECODE(uint32_t i0) { return 0x0000228a + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000228a + 0x1*i0; }
+#define A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
+#define A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
+static inline uint32_t A4XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+	return ((val) << A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_CONSTFILL				0x00000010
+#define A4XX_VFD_DECODE_INSTR_FORMAT__MASK			0x00000fc0
+#define A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT			6
+static inline uint32_t A4XX_VFD_DECODE_INSTR_FORMAT(enum a4xx_vtx_fmt val)
+{
+	return ((val) << A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A4XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_REGID__MASK			0x000ff000
+#define A4XX_VFD_DECODE_INSTR_REGID__SHIFT			12
+static inline uint32_t A4XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+	return ((val) << A4XX_VFD_DECODE_INSTR_REGID__SHIFT) & A4XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_INT				0x00100000
+#define A4XX_VFD_DECODE_INSTR_SWAP__MASK			0x00c00000
+#define A4XX_VFD_DECODE_INSTR_SWAP__SHIFT			22
+static inline uint32_t A4XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A4XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A4XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK			0x1f000000
+#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT			24
+static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+	return ((val) << A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_LASTCOMPVALID			0x20000000
+#define A4XX_VFD_DECODE_INSTR_SWITCHNEXT			0x40000000
+
+#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL				0x00000f00
+
+#define REG_A4XX_TPL1_TP_MODE_CONTROL				0x00000f03
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0				0x00000f04
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1				0x00000f05
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2				0x00000f06
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3				0x00000f07
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4				0x00000f08
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5				0x00000f09
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6				0x00000f0a
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7				0x00000f0b
+
+#define REG_A4XX_TPL1_TP_TEX_OFFSET				0x00002380
+
+#define REG_A4XX_TPL1_TP_TEX_COUNT				0x00002381
+#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK				0x000000ff
+#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT			0
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val)
+{
+	return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK				0x0000ff00
+#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT			8
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val)
+{
+	return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK				0x00ff0000
+#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT			16
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val)
+{
+	return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK				0xff000000
+#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT			24
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
+{
+	return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK;
+}
+
+#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR		0x00002384
+
+#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR		0x00002387
+
+#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR		0x0000238a
+
+#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR		0x0000238d
+
+#define REG_A4XX_TPL1_TP_FS_TEX_COUNT				0x000023a0
+
+#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x000023a1
+
+#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR		0x000023a4
+
+#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR			0x000023a5
+
+#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR			0x000023a6
+
+#define REG_A4XX_GRAS_TSE_STATUS				0x00000c80
+
+#define REG_A4XX_GRAS_DEBUG_ECO_CONTROL				0x00000c81
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0				0x00000c88
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1				0x00000c89
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2				0x00000c8a
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c8b
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0				0x00000c8c
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1				0x00000c8d
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2				0x00000c8e
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3				0x00000c8f
+
+#define REG_A4XX_GRAS_CL_CLIP_CNTL				0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE		0x00010000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
+
+#define REG_A4XX_GRAS_CLEAR_CNTL				0x00002003
+#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR			0x00000001
+
+#define REG_A4XX_GRAS_CL_GB_CLIP_ADJ				0x00002004
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK			0x000ffc00
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT			10
+static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_XOFFSET_0			0x00002008
+#define A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_XOFFSET_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_XSCALE_0				0x00002009
+#define A4XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_XSCALE_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_XSCALE_0__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_YOFFSET_0			0x0000200a
+#define A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_YOFFSET_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_YSCALE_0				0x0000200b
+#define A4XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_YSCALE_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_YSCALE_0__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0			0x0000200c
+#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
+}
+
+#define REG_A4XX_GRAS_CL_VPORT_ZSCALE_0				0x0000200d
+#define A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
+#define A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
+static inline uint32_t A4XX_GRAS_CL_VPORT_ZSCALE_0(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
+}
+
+#define REG_A4XX_GRAS_SU_POINT_MINMAX				0x00002070
+#define A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A4XX_GRAS_SU_POINT_SIZE				0x00002071
+#define A4XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
+#define A4XX_GRAS_SU_POINT_SIZE__SHIFT				0
+static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
+{
+	return ((((int32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_SIZE__SHIFT) & A4XX_GRAS_SU_POINT_SIZE__MASK;
+}
+
+#define REG_A4XX_GRAS_ALPHA_CONTROL				0x00002073
+#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE		0x00000004
+#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS		0x00000008
+
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE			0x00002074
+#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
+}
+
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_OFFSET			0x00002075
+#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP			0x00002076
+#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK			0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT			0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val)
+{
+	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK;
+}
+
+#define REG_A4XX_GRAS_DEPTH_CONTROL				0x00002077
+#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK			0x00000003
+#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT			0
+static inline uint32_t A4XX_GRAS_DEPTH_CONTROL_FORMAT(enum a4xx_depth_format val)
+{
+	return ((val) << A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT) & A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK;
+}
+
+#define REG_A4XX_GRAS_SU_MODE_CONTROL				0x00002078
+#define A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT			0x00000001
+#define A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK			0x00000002
+#define A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW			0x00000004
+#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK		0x000007f8
+#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT		3
+static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
+{
+	return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET			0x00000800
+#define A4XX_GRAS_SU_MODE_CONTROL_MSAA_ENABLE			0x00002000
+#define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS		0x00100000
+
+#define REG_A4XX_GRAS_SC_CONTROL				0x0000207b
+#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK			0x0000000c
+#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT			2
+static inline uint32_t A4XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+	return ((val) << A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK			0x00000380
+#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT		7
+static inline uint32_t A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A4XX_GRAS_SC_CONTROL_MSAA_DISABLE			0x00000800
+#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK			0x0000f000
+#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT			12
+static inline uint32_t A4XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL			0x0000207c
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_BR			0x0000207d
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000209c
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_TL			0x0000209d
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_BR			0x0000209e
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK;
+}
+
+#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_TL			0x0000209f
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK			0x00007fff
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT			0
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_X(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK			0x7fff0000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT			16
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
+{
+	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK;
+}
+
+#define REG_A4XX_UCHE_CACHE_MODE_CONTROL			0x00000e80
+
+#define REG_A4XX_UCHE_TRAP_BASE_LO				0x00000e83
+
+#define REG_A4XX_UCHE_TRAP_BASE_HI				0x00000e84
+
+#define REG_A4XX_UCHE_CACHE_STATUS				0x00000e88
+
+#define REG_A4XX_UCHE_INVALIDATE0				0x00000e8a
+
+#define REG_A4XX_UCHE_INVALIDATE1				0x00000e8b
+
+#define REG_A4XX_UCHE_CACHE_WAYS_VFD				0x00000e8c
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000e8e
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000e8f
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000e90
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000e91
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000e92
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000e93
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000e94
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000e95
+
+#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD				0x00000e00
+
+#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL				0x00000e04
+
+#define REG_A4XX_HLSQ_MODE_CONTROL				0x00000e05
+
+#define REG_A4XX_HLSQ_PERF_PIPE_MASK				0x00000e0e
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x00000e06
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x00000e07
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x00000e08
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x00000e09
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x00000e0a
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x00000e0b
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6			0x00000e0c
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7			0x00000e0d
+
+#define REG_A4XX_HLSQ_CONTROL_0_REG				0x000023c0
+#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000010
+#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
+static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
+#define A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
+#define A4XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
+#define A4XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
+#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK			0x08000000
+#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT		27
+static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE		0x10000000
+#define A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE		0x20000000
+#define A4XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE			0x40000000
+#define A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
+
+#define REG_A4XX_HLSQ_CONTROL_1_REG				0x000023c1
+#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x00000040
+#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
+#define A4XX_HLSQ_CONTROL_1_REG_RESERVED1			0x00000200
+#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK		0x00ff0000
+#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT		16
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK		0xff000000
+#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT		24
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK;
+}
+
+#define REG_A4XX_HLSQ_CONTROL_2_REG				0x000023c2
+#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
+#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000003fc
+#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		2
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK		0x0003fc00
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT		10
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK		0x03fc0000
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT		18
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK;
+}
+
+#define REG_A4XX_HLSQ_CONTROL_3_REG				0x000023c3
+#define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK			0x000000ff
+#define A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT			0
+static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
+}
+
+#define REG_A4XX_HLSQ_CONTROL_4_REG				0x000023c4
+
+#define REG_A4XX_HLSQ_VS_CONTROL_REG				0x000023c5
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_FS_CONTROL_REG				0x000023c6
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_HS_CONTROL_REG				0x000023c7
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_DS_CONTROL_REG				0x000023c8
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_GS_CONTROL_REG				0x000023c9
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_CS_CONTROL_REG				0x000023ca
+#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
+#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
+#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
+static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE			0x00008000
+#define A4XX_HLSQ_CS_CONTROL_REG_ENABLED			0x00010000
+#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
+#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
+static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_0				0x000023cd
+#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK			0x00000003
+#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT			0
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK;
+}
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT		2
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK;
+}
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT		12
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK;
+}
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
+#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT		22
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_1				0x000023ce
+#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK			0xffffffff
+#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT			0
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT) & A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_2				0x000023cf
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_3				0x000023d0
+#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK			0xffffffff
+#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT			0
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT) & A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_4				0x000023d1
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_5				0x000023d2
+#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK			0xffffffff
+#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT			0
+static inline uint32_t A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT) & A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_6				0x000023d3
+
+#define REG_A4XX_HLSQ_CL_CONTROL_0				0x000023d4
+#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK		0x000000ff
+#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT		0
+static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK;
+}
+#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK		0xff000000
+#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT		24
+static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(uint32_t val)
+{
+	return ((val) << A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK;
+}
+
+#define REG_A4XX_HLSQ_CL_CONTROL_1				0x000023d5
+
+#define REG_A4XX_HLSQ_CL_KERNEL_CONST				0x000023d6
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X				0x000023d7
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y				0x000023d8
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z				0x000023d9
+
+#define REG_A4XX_HLSQ_CL_WG_OFFSET				0x000023da
+
+#define REG_A4XX_HLSQ_UPDATE_CONTROL				0x000023db
+
+#define REG_A4XX_PC_BINNING_COMMAND				0x00000d00
+#define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE			0x00000001
+
+#define REG_A4XX_PC_TESSFACTOR_ADDR				0x00000d08
+
+#define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE			0x00000d0c
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_0				0x00000d10
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_1				0x00000d11
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_2				0x00000d12
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_3				0x00000d13
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_4				0x00000d14
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_5				0x00000d15
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_6				0x00000d16
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_7				0x00000d17
+
+#define REG_A4XX_PC_BIN_BASE					0x000021c0
+
+#define REG_A4XX_PC_VSTREAM_CONTROL				0x000021c2
+#define A4XX_PC_VSTREAM_CONTROL_SIZE__MASK			0x003f0000
+#define A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT			16
+static inline uint32_t A4XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val)
+{
+	return ((val) << A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A4XX_PC_VSTREAM_CONTROL_SIZE__MASK;
+}
+#define A4XX_PC_VSTREAM_CONTROL_N__MASK				0x07c00000
+#define A4XX_PC_VSTREAM_CONTROL_N__SHIFT			22
+static inline uint32_t A4XX_PC_VSTREAM_CONTROL_N(uint32_t val)
+{
+	return ((val) << A4XX_PC_VSTREAM_CONTROL_N__SHIFT) & A4XX_PC_VSTREAM_CONTROL_N__MASK;
+}
+
+#define REG_A4XX_PC_PRIM_VTX_CNTL				0x000021c4
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK			0x0000000f
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT			0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART			0x00100000
+#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
+#define A4XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
+
+#define REG_A4XX_PC_PRIM_VTX_CNTL2				0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK	0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT	0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK	0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT	3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE			0x00000040
+
+#define REG_A4XX_PC_RESTART_INDEX				0x000021c6
+
+#define REG_A4XX_PC_GS_PARAM					0x000021e5
+#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK			0x000003ff
+#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT			0
+static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val)
+{
+	return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK;
+}
+#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK			0x0000f800
+#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT			11
+static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val)
+{
+	return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK;
+}
+#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK				0x01800000
+#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT			23
+static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK;
+}
+#define A4XX_PC_GS_PARAM_LAYER					0x80000000
+
+#define REG_A4XX_PC_HS_PARAM					0x000021e7
+#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK			0x0000003f
+#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT			0
+static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val)
+{
+	return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK;
+}
+#define A4XX_PC_HS_PARAM_SPACING__MASK				0x00600000
+#define A4XX_PC_HS_PARAM_SPACING__SHIFT				21
+static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val)
+{
+	return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK;
+}
+#define A4XX_PC_HS_PARAM_CW					0x00800000
+#define A4XX_PC_HS_PARAM_CONNECTED				0x01000000
+
+#define REG_A4XX_VBIF_VERSION					0x00003000
+
+#define REG_A4XX_VBIF_CLKON					0x00003001
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS			0x00000001
+
+#define REG_A4XX_VBIF_ABIT_SORT					0x0000301c
+
+#define REG_A4XX_VBIF_ABIT_SORT_CONF				0x0000301d
+
+#define REG_A4XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
+
+#define REG_A4XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
+
+#define REG_A4XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
+
+#define REG_A4XX_VBIF_IN_WR_LIM_CONF0				0x00003030
+
+#define REG_A4XX_VBIF_IN_WR_LIM_CONF1				0x00003031
+
+#define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
+
+#define REG_A4XX_VBIF_PERF_CNT_EN0				0x000030c0
+
+#define REG_A4XX_VBIF_PERF_CNT_EN1				0x000030c1
+
+#define REG_A4XX_VBIF_PERF_CNT_EN2				0x000030c2
+
+#define REG_A4XX_VBIF_PERF_CNT_EN3				0x000030c3
+
+#define REG_A4XX_VBIF_PERF_CNT_SEL0				0x000030d0
+
+#define REG_A4XX_VBIF_PERF_CNT_SEL1				0x000030d1
+
+#define REG_A4XX_VBIF_PERF_CNT_SEL2				0x000030d2
+
+#define REG_A4XX_VBIF_PERF_CNT_SEL3				0x000030d3
+
+#define REG_A4XX_VBIF_PERF_CNT_LOW0				0x000030d8
+
+#define REG_A4XX_VBIF_PERF_CNT_LOW1				0x000030d9
+
+#define REG_A4XX_VBIF_PERF_CNT_LOW2				0x000030da
+
+#define REG_A4XX_VBIF_PERF_CNT_LOW3				0x000030db
+
+#define REG_A4XX_VBIF_PERF_CNT_HIGH0				0x000030e0
+
+#define REG_A4XX_VBIF_PERF_CNT_HIGH1				0x000030e1
+
+#define REG_A4XX_VBIF_PERF_CNT_HIGH2				0x000030e2
+
+#define REG_A4XX_VBIF_PERF_CNT_HIGH3				0x000030e3
+
+#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
+
+#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
+
+#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
+
+#define REG_A4XX_UNKNOWN_0CC5					0x00000cc5
+
+#define REG_A4XX_UNKNOWN_0CC6					0x00000cc6
+
+#define REG_A4XX_UNKNOWN_0D01					0x00000d01
+
+#define REG_A4XX_UNKNOWN_0E42					0x00000e42
+
+#define REG_A4XX_UNKNOWN_0EC2					0x00000ec2
+
+#define REG_A4XX_UNKNOWN_2001					0x00002001
+
+#define REG_A4XX_UNKNOWN_209B					0x0000209b
+
+#define REG_A4XX_UNKNOWN_20EF					0x000020ef
+
+#define REG_A4XX_UNKNOWN_2152					0x00002152
+
+#define REG_A4XX_UNKNOWN_2153					0x00002153
+
+#define REG_A4XX_UNKNOWN_2154					0x00002154
+
+#define REG_A4XX_UNKNOWN_2155					0x00002155
+
+#define REG_A4XX_UNKNOWN_2156					0x00002156
+
+#define REG_A4XX_UNKNOWN_2157					0x00002157
+
+#define REG_A4XX_UNKNOWN_21C3					0x000021c3
+
+#define REG_A4XX_UNKNOWN_21E6					0x000021e6
+
+#define REG_A4XX_UNKNOWN_2209					0x00002209
+
+#define REG_A4XX_UNKNOWN_22D7					0x000022d7
+
+#define REG_A4XX_UNKNOWN_2352					0x00002352
+
+#define REG_A4XX_TEX_SAMP_0					0x00000000
+#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
+#define A4XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
+#define A4XX_TEX_SAMP_0_XY_MAG__SHIFT				1
+static inline uint32_t A4XX_TEX_SAMP_0_XY_MAG(enum a4xx_tex_filter val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_XY_MAG__SHIFT) & A4XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A4XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
+#define A4XX_TEX_SAMP_0_XY_MIN__SHIFT				3
+static inline uint32_t A4XX_TEX_SAMP_0_XY_MIN(enum a4xx_tex_filter val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_XY_MIN__SHIFT) & A4XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
+#define A4XX_TEX_SAMP_0_WRAP_S__SHIFT				5
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_S(enum a4xx_tex_clamp val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_WRAP_S__SHIFT) & A4XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
+#define A4XX_TEX_SAMP_0_WRAP_T__SHIFT				8
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_T(enum a4xx_tex_clamp val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_WRAP_T__SHIFT) & A4XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
+#define A4XX_TEX_SAMP_0_WRAP_R__SHIFT				11
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A4XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
+#define A4XX_TEX_SAMP_0_ANISO__SHIFT				14
+static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
+
+#define REG_A4XX_TEX_SAMP_1					0x00000001
+#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
+#define A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
+static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
+}
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
+#define A4XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
+#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
+#define A4XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
+#define A4XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
+static inline uint32_t A4XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A4XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A4XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
+#define A4XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
+static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A4XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_0					0x00000000
+#define A4XX_TEX_CONST_0_TILED					0x00000001
+#define A4XX_TEX_CONST_0_SRGB					0x00000004
+#define A4XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
+#define A4XX_TEX_CONST_0_SWIZ_X__SHIFT				4
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val)
+{
+	return ((val) << A4XX_TEX_CONST_0_SWIZ_X__SHIFT) & A4XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
+#define A4XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Y(enum a4xx_tex_swiz val)
+{
+	return ((val) << A4XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
+#define A4XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Z(enum a4xx_tex_swiz val)
+{
+	return ((val) << A4XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
+#define A4XX_TEX_CONST_0_SWIZ_W__SHIFT				13
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_W(enum a4xx_tex_swiz val)
+{
+	return ((val) << A4XX_TEX_CONST_0_SWIZ_W__SHIFT) & A4XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A4XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
+#define A4XX_TEX_CONST_0_MIPLVLS__SHIFT				16
+static inline uint32_t A4XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+	return ((val) << A4XX_TEX_CONST_0_MIPLVLS__SHIFT) & A4XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A4XX_TEX_CONST_0_FMT__MASK				0x1fc00000
+#define A4XX_TEX_CONST_0_FMT__SHIFT				22
+static inline uint32_t A4XX_TEX_CONST_0_FMT(enum a4xx_tex_fmt val)
+{
+	return ((val) << A4XX_TEX_CONST_0_FMT__SHIFT) & A4XX_TEX_CONST_0_FMT__MASK;
+}
+#define A4XX_TEX_CONST_0_TYPE__MASK				0x60000000
+#define A4XX_TEX_CONST_0_TYPE__SHIFT				29
+static inline uint32_t A4XX_TEX_CONST_0_TYPE(enum a4xx_tex_type val)
+{
+	return ((val) << A4XX_TEX_CONST_0_TYPE__SHIFT) & A4XX_TEX_CONST_0_TYPE__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_1					0x00000001
+#define A4XX_TEX_CONST_1_HEIGHT__MASK				0x00007fff
+#define A4XX_TEX_CONST_1_HEIGHT__SHIFT				0
+static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A4XX_TEX_CONST_1_WIDTH__MASK				0x3fff8000
+#define A4XX_TEX_CONST_1_WIDTH__SHIFT				15
+static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+	return ((val) << A4XX_TEX_CONST_1_WIDTH__SHIFT) & A4XX_TEX_CONST_1_WIDTH__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_2					0x00000002
+#define A4XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
+#define A4XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
+static inline uint32_t A4XX_TEX_CONST_2_FETCHSIZE(enum a4xx_tex_fetchsize val)
+{
+	return ((val) << A4XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A4XX_TEX_CONST_2_FETCHSIZE__MASK;
+}
+#define A4XX_TEX_CONST_2_PITCH__MASK				0x3ffffe00
+#define A4XX_TEX_CONST_2_PITCH__SHIFT				9
+static inline uint32_t A4XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+	return ((val) << A4XX_TEX_CONST_2_PITCH__SHIFT) & A4XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A4XX_TEX_CONST_2_SWAP__MASK				0xc0000000
+#define A4XX_TEX_CONST_2_SWAP__SHIFT				30
+static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A4XX_TEX_CONST_2_SWAP__SHIFT) & A4XX_TEX_CONST_2_SWAP__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_3					0x00000003
+#define A4XX_TEX_CONST_3_LAYERSZ__MASK				0x00003fff
+#define A4XX_TEX_CONST_3_LAYERSZ__SHIFT				0
+static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK;
+}
+#define A4XX_TEX_CONST_3_DEPTH__MASK				0x7ffc0000
+#define A4XX_TEX_CONST_3_DEPTH__SHIFT				18
+static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val)
+{
+	return ((val) << A4XX_TEX_CONST_3_DEPTH__SHIFT) & A4XX_TEX_CONST_3_DEPTH__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_4					0x00000004
+#define A4XX_TEX_CONST_4_LAYERSZ__MASK				0x0000000f
+#define A4XX_TEX_CONST_4_LAYERSZ__SHIFT				0
+static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK;
+}
+#define A4XX_TEX_CONST_4_BASE__MASK				0xffffffe0
+#define A4XX_TEX_CONST_4_BASE__SHIFT				5
+static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK;
+}
+
+#define REG_A4XX_TEX_CONST_5					0x00000005
+
+#define REG_A4XX_TEX_CONST_6					0x00000006
+
+#define REG_A4XX_TEX_CONST_7					0x00000007
+
+#define REG_A4XX_SSBO_0_0					0x00000000
+#define A4XX_SSBO_0_0_BASE__MASK				0xffffffe0
+#define A4XX_SSBO_0_0_BASE__SHIFT				5
+static inline uint32_t A4XX_SSBO_0_0_BASE(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A4XX_SSBO_0_0_BASE__SHIFT) & A4XX_SSBO_0_0_BASE__MASK;
+}
+
+#define REG_A4XX_SSBO_0_1					0x00000001
+#define A4XX_SSBO_0_1_PITCH__MASK				0x003fffff
+#define A4XX_SSBO_0_1_PITCH__SHIFT				0
+static inline uint32_t A4XX_SSBO_0_1_PITCH(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_0_1_PITCH__SHIFT) & A4XX_SSBO_0_1_PITCH__MASK;
+}
+
+#define REG_A4XX_SSBO_0_2					0x00000002
+#define A4XX_SSBO_0_2_ARRAY_PITCH__MASK				0x03fff000
+#define A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT			12
+static inline uint32_t A4XX_SSBO_0_2_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A4XX_SSBO_0_2_ARRAY_PITCH__MASK;
+}
+
+#define REG_A4XX_SSBO_0_3					0x00000003
+#define A4XX_SSBO_0_3_CPP__MASK					0x0000003f
+#define A4XX_SSBO_0_3_CPP__SHIFT				0
+static inline uint32_t A4XX_SSBO_0_3_CPP(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_0_3_CPP__SHIFT) & A4XX_SSBO_0_3_CPP__MASK;
+}
+
+#define REG_A4XX_SSBO_1_0					0x00000000
+#define A4XX_SSBO_1_0_CPP__MASK					0x0000001f
+#define A4XX_SSBO_1_0_CPP__SHIFT				0
+static inline uint32_t A4XX_SSBO_1_0_CPP(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_1_0_CPP__SHIFT) & A4XX_SSBO_1_0_CPP__MASK;
+}
+#define A4XX_SSBO_1_0_FMT__MASK					0x0000ff00
+#define A4XX_SSBO_1_0_FMT__SHIFT				8
+static inline uint32_t A4XX_SSBO_1_0_FMT(enum a4xx_color_fmt val)
+{
+	return ((val) << A4XX_SSBO_1_0_FMT__SHIFT) & A4XX_SSBO_1_0_FMT__MASK;
+}
+#define A4XX_SSBO_1_0_WIDTH__MASK				0xffff0000
+#define A4XX_SSBO_1_0_WIDTH__SHIFT				16
+static inline uint32_t A4XX_SSBO_1_0_WIDTH(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_1_0_WIDTH__SHIFT) & A4XX_SSBO_1_0_WIDTH__MASK;
+}
+
+#define REG_A4XX_SSBO_1_1					0x00000001
+#define A4XX_SSBO_1_1_HEIGHT__MASK				0x0000ffff
+#define A4XX_SSBO_1_1_HEIGHT__SHIFT				0
+static inline uint32_t A4XX_SSBO_1_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_1_1_HEIGHT__SHIFT) & A4XX_SSBO_1_1_HEIGHT__MASK;
+}
+#define A4XX_SSBO_1_1_DEPTH__MASK				0xffff0000
+#define A4XX_SSBO_1_1_DEPTH__SHIFT				16
+static inline uint32_t A4XX_SSBO_1_1_DEPTH(uint32_t val)
+{
+	return ((val) << A4XX_SSBO_1_1_DEPTH__SHIFT) & A4XX_SSBO_1_1_DEPTH__MASK;
+}
+
+
+#endif /* A4XX_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/a5xx.xml.h mesa-19.0.1/src/freedreno/registers/a5xx.xml.h
--- mesa-18.3.3/src/freedreno/registers/a5xx.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/a5xx.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,5226 @@
+#ifndef A5XX_XML
+#define A5XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a5xx_color_fmt {
+	RB5_A8_UNORM = 2,
+	RB5_R8_UNORM = 3,
+	RB5_R8_SNORM = 4,
+	RB5_R8_UINT = 5,
+	RB5_R8_SINT = 6,
+	RB5_R4G4B4A4_UNORM = 8,
+	RB5_R5G5B5A1_UNORM = 10,
+	RB5_R5G6B5_UNORM = 14,
+	RB5_R8G8_UNORM = 15,
+	RB5_R8G8_SNORM = 16,
+	RB5_R8G8_UINT = 17,
+	RB5_R8G8_SINT = 18,
+	RB5_R16_UNORM = 21,
+	RB5_R16_SNORM = 22,
+	RB5_R16_FLOAT = 23,
+	RB5_R16_UINT = 24,
+	RB5_R16_SINT = 25,
+	RB5_R8G8B8A8_UNORM = 48,
+	RB5_R8G8B8_UNORM = 49,
+	RB5_R8G8B8A8_SNORM = 50,
+	RB5_R8G8B8A8_UINT = 51,
+	RB5_R8G8B8A8_SINT = 52,
+	RB5_R10G10B10A2_UNORM = 55,
+	RB5_R10G10B10A2_UINT = 58,
+	RB5_R11G11B10_FLOAT = 66,
+	RB5_R16G16_UNORM = 67,
+	RB5_R16G16_SNORM = 68,
+	RB5_R16G16_FLOAT = 69,
+	RB5_R16G16_UINT = 70,
+	RB5_R16G16_SINT = 71,
+	RB5_R32_FLOAT = 74,
+	RB5_R32_UINT = 75,
+	RB5_R32_SINT = 76,
+	RB5_R16G16B16A16_UNORM = 96,
+	RB5_R16G16B16A16_SNORM = 97,
+	RB5_R16G16B16A16_FLOAT = 98,
+	RB5_R16G16B16A16_UINT = 99,
+	RB5_R16G16B16A16_SINT = 100,
+	RB5_R32G32_FLOAT = 103,
+	RB5_R32G32_UINT = 104,
+	RB5_R32G32_SINT = 105,
+	RB5_R32G32B32A32_FLOAT = 130,
+	RB5_R32G32B32A32_UINT = 131,
+	RB5_R32G32B32A32_SINT = 132,
+};
+
+enum a5xx_tile_mode {
+	TILE5_LINEAR = 0,
+	TILE5_2 = 2,
+	TILE5_3 = 3,
+};
+
+enum a5xx_vtx_fmt {
+	VFMT5_8_UNORM = 3,
+	VFMT5_8_SNORM = 4,
+	VFMT5_8_UINT = 5,
+	VFMT5_8_SINT = 6,
+	VFMT5_8_8_UNORM = 15,
+	VFMT5_8_8_SNORM = 16,
+	VFMT5_8_8_UINT = 17,
+	VFMT5_8_8_SINT = 18,
+	VFMT5_16_UNORM = 21,
+	VFMT5_16_SNORM = 22,
+	VFMT5_16_FLOAT = 23,
+	VFMT5_16_UINT = 24,
+	VFMT5_16_SINT = 25,
+	VFMT5_8_8_8_UNORM = 33,
+	VFMT5_8_8_8_SNORM = 34,
+	VFMT5_8_8_8_UINT = 35,
+	VFMT5_8_8_8_SINT = 36,
+	VFMT5_8_8_8_8_UNORM = 48,
+	VFMT5_8_8_8_8_SNORM = 50,
+	VFMT5_8_8_8_8_UINT = 51,
+	VFMT5_8_8_8_8_SINT = 52,
+	VFMT5_10_10_10_2_UNORM = 54,
+	VFMT5_10_10_10_2_SNORM = 57,
+	VFMT5_10_10_10_2_UINT = 58,
+	VFMT5_10_10_10_2_SINT = 59,
+	VFMT5_11_11_10_FLOAT = 66,
+	VFMT5_16_16_UNORM = 67,
+	VFMT5_16_16_SNORM = 68,
+	VFMT5_16_16_FLOAT = 69,
+	VFMT5_16_16_UINT = 70,
+	VFMT5_16_16_SINT = 71,
+	VFMT5_32_UNORM = 72,
+	VFMT5_32_SNORM = 73,
+	VFMT5_32_FLOAT = 74,
+	VFMT5_32_UINT = 75,
+	VFMT5_32_SINT = 76,
+	VFMT5_32_FIXED = 77,
+	VFMT5_16_16_16_UNORM = 88,
+	VFMT5_16_16_16_SNORM = 89,
+	VFMT5_16_16_16_FLOAT = 90,
+	VFMT5_16_16_16_UINT = 91,
+	VFMT5_16_16_16_SINT = 92,
+	VFMT5_16_16_16_16_UNORM = 96,
+	VFMT5_16_16_16_16_SNORM = 97,
+	VFMT5_16_16_16_16_FLOAT = 98,
+	VFMT5_16_16_16_16_UINT = 99,
+	VFMT5_16_16_16_16_SINT = 100,
+	VFMT5_32_32_UNORM = 101,
+	VFMT5_32_32_SNORM = 102,
+	VFMT5_32_32_FLOAT = 103,
+	VFMT5_32_32_UINT = 104,
+	VFMT5_32_32_SINT = 105,
+	VFMT5_32_32_FIXED = 106,
+	VFMT5_32_32_32_UNORM = 112,
+	VFMT5_32_32_32_SNORM = 113,
+	VFMT5_32_32_32_UINT = 114,
+	VFMT5_32_32_32_SINT = 115,
+	VFMT5_32_32_32_FLOAT = 116,
+	VFMT5_32_32_32_FIXED = 117,
+	VFMT5_32_32_32_32_UNORM = 128,
+	VFMT5_32_32_32_32_SNORM = 129,
+	VFMT5_32_32_32_32_FLOAT = 130,
+	VFMT5_32_32_32_32_UINT = 131,
+	VFMT5_32_32_32_32_SINT = 132,
+	VFMT5_32_32_32_32_FIXED = 133,
+};
+
+enum a5xx_tex_fmt {
+	TFMT5_A8_UNORM = 2,
+	TFMT5_8_UNORM = 3,
+	TFMT5_8_SNORM = 4,
+	TFMT5_8_UINT = 5,
+	TFMT5_8_SINT = 6,
+	TFMT5_4_4_4_4_UNORM = 8,
+	TFMT5_5_5_5_1_UNORM = 10,
+	TFMT5_5_6_5_UNORM = 14,
+	TFMT5_8_8_UNORM = 15,
+	TFMT5_8_8_SNORM = 16,
+	TFMT5_8_8_UINT = 17,
+	TFMT5_8_8_SINT = 18,
+	TFMT5_L8_A8_UNORM = 19,
+	TFMT5_16_UNORM = 21,
+	TFMT5_16_SNORM = 22,
+	TFMT5_16_FLOAT = 23,
+	TFMT5_16_UINT = 24,
+	TFMT5_16_SINT = 25,
+	TFMT5_8_8_8_8_UNORM = 48,
+	TFMT5_8_8_8_UNORM = 49,
+	TFMT5_8_8_8_8_SNORM = 50,
+	TFMT5_8_8_8_8_UINT = 51,
+	TFMT5_8_8_8_8_SINT = 52,
+	TFMT5_9_9_9_E5_FLOAT = 53,
+	TFMT5_10_10_10_2_UNORM = 54,
+	TFMT5_10_10_10_2_UINT = 58,
+	TFMT5_11_11_10_FLOAT = 66,
+	TFMT5_16_16_UNORM = 67,
+	TFMT5_16_16_SNORM = 68,
+	TFMT5_16_16_FLOAT = 69,
+	TFMT5_16_16_UINT = 70,
+	TFMT5_16_16_SINT = 71,
+	TFMT5_32_FLOAT = 74,
+	TFMT5_32_UINT = 75,
+	TFMT5_32_SINT = 76,
+	TFMT5_16_16_16_16_UNORM = 96,
+	TFMT5_16_16_16_16_SNORM = 97,
+	TFMT5_16_16_16_16_FLOAT = 98,
+	TFMT5_16_16_16_16_UINT = 99,
+	TFMT5_16_16_16_16_SINT = 100,
+	TFMT5_32_32_FLOAT = 103,
+	TFMT5_32_32_UINT = 104,
+	TFMT5_32_32_SINT = 105,
+	TFMT5_32_32_32_UINT = 114,
+	TFMT5_32_32_32_SINT = 115,
+	TFMT5_32_32_32_FLOAT = 116,
+	TFMT5_32_32_32_32_FLOAT = 130,
+	TFMT5_32_32_32_32_UINT = 131,
+	TFMT5_32_32_32_32_SINT = 132,
+	TFMT5_X8Z24_UNORM = 160,
+	TFMT5_ETC2_RG11_UNORM = 171,
+	TFMT5_ETC2_RG11_SNORM = 172,
+	TFMT5_ETC2_R11_UNORM = 173,
+	TFMT5_ETC2_R11_SNORM = 174,
+	TFMT5_ETC1 = 175,
+	TFMT5_ETC2_RGB8 = 176,
+	TFMT5_ETC2_RGBA8 = 177,
+	TFMT5_ETC2_RGB8A1 = 178,
+	TFMT5_DXT1 = 179,
+	TFMT5_DXT3 = 180,
+	TFMT5_DXT5 = 181,
+	TFMT5_RGTC1_UNORM = 183,
+	TFMT5_RGTC1_SNORM = 184,
+	TFMT5_RGTC2_UNORM = 187,
+	TFMT5_RGTC2_SNORM = 188,
+	TFMT5_BPTC_UFLOAT = 190,
+	TFMT5_BPTC_FLOAT = 191,
+	TFMT5_BPTC = 192,
+	TFMT5_ASTC_4x4 = 193,
+	TFMT5_ASTC_5x4 = 194,
+	TFMT5_ASTC_5x5 = 195,
+	TFMT5_ASTC_6x5 = 196,
+	TFMT5_ASTC_6x6 = 197,
+	TFMT5_ASTC_8x5 = 198,
+	TFMT5_ASTC_8x6 = 199,
+	TFMT5_ASTC_8x8 = 200,
+	TFMT5_ASTC_10x5 = 201,
+	TFMT5_ASTC_10x6 = 202,
+	TFMT5_ASTC_10x8 = 203,
+	TFMT5_ASTC_10x10 = 204,
+	TFMT5_ASTC_12x10 = 205,
+	TFMT5_ASTC_12x12 = 206,
+};
+
+enum a5xx_tex_fetchsize {
+	TFETCH5_1_BYTE = 0,
+	TFETCH5_2_BYTE = 1,
+	TFETCH5_4_BYTE = 2,
+	TFETCH5_8_BYTE = 3,
+	TFETCH5_16_BYTE = 4,
+};
+
+enum a5xx_depth_format {
+	DEPTH5_NONE = 0,
+	DEPTH5_16 = 1,
+	DEPTH5_24_8 = 2,
+	DEPTH5_32 = 4,
+};
+
+enum a5xx_blit_buf {
+	BLIT_MRT0 = 0,
+	BLIT_MRT1 = 1,
+	BLIT_MRT2 = 2,
+	BLIT_MRT3 = 3,
+	BLIT_MRT4 = 4,
+	BLIT_MRT5 = 5,
+	BLIT_MRT6 = 6,
+	BLIT_MRT7 = 7,
+	BLIT_ZS = 8,
+	BLIT_S = 9,
+};
+
+enum a5xx_cp_perfcounter_select {
+	PERF_CP_ALWAYS_COUNT = 0,
+	PERF_CP_BUSY_GFX_CORE_IDLE = 1,
+	PERF_CP_BUSY_CYCLES = 2,
+	PERF_CP_PFP_IDLE = 3,
+	PERF_CP_PFP_BUSY_WORKING = 4,
+	PERF_CP_PFP_STALL_CYCLES_ANY = 5,
+	PERF_CP_PFP_STARVE_CYCLES_ANY = 6,
+	PERF_CP_PFP_ICACHE_MISS = 7,
+	PERF_CP_PFP_ICACHE_HIT = 8,
+	PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+	PERF_CP_ME_BUSY_WORKING = 10,
+	PERF_CP_ME_IDLE = 11,
+	PERF_CP_ME_STARVE_CYCLES_ANY = 12,
+	PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13,
+	PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14,
+	PERF_CP_ME_FIFO_FULL_ME_BUSY = 15,
+	PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16,
+	PERF_CP_ME_STALL_CYCLES_ANY = 17,
+	PERF_CP_ME_ICACHE_MISS = 18,
+	PERF_CP_ME_ICACHE_HIT = 19,
+	PERF_CP_NUM_PREEMPTIONS = 20,
+	PERF_CP_PREEMPTION_REACTION_DELAY = 21,
+	PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22,
+	PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23,
+	PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24,
+	PERF_CP_PREDICATED_DRAWS_KILLED = 25,
+	PERF_CP_MODE_SWITCH = 26,
+	PERF_CP_ZPASS_DONE = 27,
+	PERF_CP_CONTEXT_DONE = 28,
+	PERF_CP_CACHE_FLUSH = 29,
+	PERF_CP_LONG_PREEMPTIONS = 30,
+};
+
+enum a5xx_rbbm_perfcounter_select {
+	PERF_RBBM_ALWAYS_COUNT = 0,
+	PERF_RBBM_ALWAYS_ON = 1,
+	PERF_RBBM_TSE_BUSY = 2,
+	PERF_RBBM_RAS_BUSY = 3,
+	PERF_RBBM_PC_DCALL_BUSY = 4,
+	PERF_RBBM_PC_VSD_BUSY = 5,
+	PERF_RBBM_STATUS_MASKED = 6,
+	PERF_RBBM_COM_BUSY = 7,
+	PERF_RBBM_DCOM_BUSY = 8,
+	PERF_RBBM_VBIF_BUSY = 9,
+	PERF_RBBM_VSC_BUSY = 10,
+	PERF_RBBM_TESS_BUSY = 11,
+	PERF_RBBM_UCHE_BUSY = 12,
+	PERF_RBBM_HLSQ_BUSY = 13,
+};
+
+enum a5xx_pc_perfcounter_select {
+	PERF_PC_BUSY_CYCLES = 0,
+	PERF_PC_WORKING_CYCLES = 1,
+	PERF_PC_STALL_CYCLES_VFD = 2,
+	PERF_PC_STALL_CYCLES_TSE = 3,
+	PERF_PC_STALL_CYCLES_VPC = 4,
+	PERF_PC_STALL_CYCLES_UCHE = 5,
+	PERF_PC_STALL_CYCLES_TESS = 6,
+	PERF_PC_STALL_CYCLES_TSE_ONLY = 7,
+	PERF_PC_STALL_CYCLES_VPC_ONLY = 8,
+	PERF_PC_PASS1_TF_STALL_CYCLES = 9,
+	PERF_PC_STARVE_CYCLES_FOR_INDEX = 10,
+	PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11,
+	PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12,
+	PERF_PC_STARVE_CYCLES_FOR_POSITION = 13,
+	PERF_PC_STARVE_CYCLES_DI = 14,
+	PERF_PC_VIS_STREAMS_LOADED = 15,
+	PERF_PC_INSTANCES = 16,
+	PERF_PC_VPC_PRIMITIVES = 17,
+	PERF_PC_DEAD_PRIM = 18,
+	PERF_PC_LIVE_PRIM = 19,
+	PERF_PC_VERTEX_HITS = 20,
+	PERF_PC_IA_VERTICES = 21,
+	PERF_PC_IA_PRIMITIVES = 22,
+	PERF_PC_GS_PRIMITIVES = 23,
+	PERF_PC_HS_INVOCATIONS = 24,
+	PERF_PC_DS_INVOCATIONS = 25,
+	PERF_PC_VS_INVOCATIONS = 26,
+	PERF_PC_GS_INVOCATIONS = 27,
+	PERF_PC_DS_PRIMITIVES = 28,
+	PERF_PC_VPC_POS_DATA_TRANSACTION = 29,
+	PERF_PC_3D_DRAWCALLS = 30,
+	PERF_PC_2D_DRAWCALLS = 31,
+	PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32,
+	PERF_TESS_BUSY_CYCLES = 33,
+	PERF_TESS_WORKING_CYCLES = 34,
+	PERF_TESS_STALL_CYCLES_PC = 35,
+	PERF_TESS_STARVE_CYCLES_PC = 36,
+};
+
+enum a5xx_vfd_perfcounter_select {
+	PERF_VFD_BUSY_CYCLES = 0,
+	PERF_VFD_STALL_CYCLES_UCHE = 1,
+	PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2,
+	PERF_VFD_STALL_CYCLES_MISS_VB = 3,
+	PERF_VFD_STALL_CYCLES_MISS_Q = 4,
+	PERF_VFD_STALL_CYCLES_SP_INFO = 5,
+	PERF_VFD_STALL_CYCLES_SP_ATTR = 6,
+	PERF_VFD_STALL_CYCLES_VFDP_VB = 7,
+	PERF_VFD_STALL_CYCLES_VFDP_Q = 8,
+	PERF_VFD_DECODER_PACKER_STALL = 9,
+	PERF_VFD_STARVE_CYCLES_UCHE = 10,
+	PERF_VFD_RBUFFER_FULL = 11,
+	PERF_VFD_ATTR_INFO_FIFO_FULL = 12,
+	PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13,
+	PERF_VFD_NUM_ATTRIBUTES = 14,
+	PERF_VFD_INSTRUCTIONS = 15,
+	PERF_VFD_UPPER_SHADER_FIBERS = 16,
+	PERF_VFD_LOWER_SHADER_FIBERS = 17,
+	PERF_VFD_MODE_0_FIBERS = 18,
+	PERF_VFD_MODE_1_FIBERS = 19,
+	PERF_VFD_MODE_2_FIBERS = 20,
+	PERF_VFD_MODE_3_FIBERS = 21,
+	PERF_VFD_MODE_4_FIBERS = 22,
+	PERF_VFD_TOTAL_VERTICES = 23,
+	PERF_VFD_NUM_ATTR_MISS = 24,
+	PERF_VFD_1_BURST_REQ = 25,
+	PERF_VFDP_STALL_CYCLES_VFD = 26,
+	PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27,
+	PERF_VFDP_STALL_CYCLES_VFD_PROG = 28,
+	PERF_VFDP_STARVE_CYCLES_PC = 29,
+	PERF_VFDP_VS_STAGE_32_WAVES = 30,
+};
+
+enum a5xx_hlsq_perfcounter_select {
+	PERF_HLSQ_BUSY_CYCLES = 0,
+	PERF_HLSQ_STALL_CYCLES_UCHE = 1,
+	PERF_HLSQ_STALL_CYCLES_SP_STATE = 2,
+	PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3,
+	PERF_HLSQ_UCHE_LATENCY_CYCLES = 4,
+	PERF_HLSQ_UCHE_LATENCY_COUNT = 5,
+	PERF_HLSQ_FS_STAGE_32_WAVES = 6,
+	PERF_HLSQ_FS_STAGE_64_WAVES = 7,
+	PERF_HLSQ_QUADS = 8,
+	PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9,
+	PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10,
+	PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11,
+	PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12,
+	PERF_HLSQ_CS_INVOCATIONS = 13,
+	PERF_HLSQ_COMPUTE_DRAWCALLS = 14,
+};
+
+enum a5xx_vpc_perfcounter_select {
+	PERF_VPC_BUSY_CYCLES = 0,
+	PERF_VPC_WORKING_CYCLES = 1,
+	PERF_VPC_STALL_CYCLES_UCHE = 2,
+	PERF_VPC_STALL_CYCLES_VFD_WACK = 3,
+	PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4,
+	PERF_VPC_STALL_CYCLES_PC = 5,
+	PERF_VPC_STALL_CYCLES_SP_LM = 6,
+	PERF_VPC_POS_EXPORT_STALL_CYCLES = 7,
+	PERF_VPC_STARVE_CYCLES_SP = 8,
+	PERF_VPC_STARVE_CYCLES_LRZ = 9,
+	PERF_VPC_PC_PRIMITIVES = 10,
+	PERF_VPC_SP_COMPONENTS = 11,
+	PERF_VPC_SP_LM_PRIMITIVES = 12,
+	PERF_VPC_SP_LM_COMPONENTS = 13,
+	PERF_VPC_SP_LM_DWORDS = 14,
+	PERF_VPC_STREAMOUT_COMPONENTS = 15,
+	PERF_VPC_GRANT_PHASES = 16,
+};
+
+enum a5xx_tse_perfcounter_select {
+	PERF_TSE_BUSY_CYCLES = 0,
+	PERF_TSE_CLIPPING_CYCLES = 1,
+	PERF_TSE_STALL_CYCLES_RAS = 2,
+	PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3,
+	PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4,
+	PERF_TSE_STARVE_CYCLES_PC = 5,
+	PERF_TSE_INPUT_PRIM = 6,
+	PERF_TSE_INPUT_NULL_PRIM = 7,
+	PERF_TSE_TRIVAL_REJ_PRIM = 8,
+	PERF_TSE_CLIPPED_PRIM = 9,
+	PERF_TSE_ZERO_AREA_PRIM = 10,
+	PERF_TSE_FACENESS_CULLED_PRIM = 11,
+	PERF_TSE_ZERO_PIXEL_PRIM = 12,
+	PERF_TSE_OUTPUT_NULL_PRIM = 13,
+	PERF_TSE_OUTPUT_VISIBLE_PRIM = 14,
+	PERF_TSE_CINVOCATION = 15,
+	PERF_TSE_CPRIMITIVES = 16,
+	PERF_TSE_2D_INPUT_PRIM = 17,
+	PERF_TSE_2D_ALIVE_CLCLES = 18,
+};
+
+enum a5xx_ras_perfcounter_select {
+	PERF_RAS_BUSY_CYCLES = 0,
+	PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1,
+	PERF_RAS_STALL_CYCLES_LRZ = 2,
+	PERF_RAS_STARVE_CYCLES_TSE = 3,
+	PERF_RAS_SUPER_TILES = 4,
+	PERF_RAS_8X4_TILES = 5,
+	PERF_RAS_MASKGEN_ACTIVE = 6,
+	PERF_RAS_FULLY_COVERED_SUPER_TILES = 7,
+	PERF_RAS_FULLY_COVERED_8X4_TILES = 8,
+	PERF_RAS_PRIM_KILLED_INVISILBE = 9,
+};
+
+enum a5xx_lrz_perfcounter_select {
+	PERF_LRZ_BUSY_CYCLES = 0,
+	PERF_LRZ_STARVE_CYCLES_RAS = 1,
+	PERF_LRZ_STALL_CYCLES_RB = 2,
+	PERF_LRZ_STALL_CYCLES_VSC = 3,
+	PERF_LRZ_STALL_CYCLES_VPC = 4,
+	PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5,
+	PERF_LRZ_STALL_CYCLES_UCHE = 6,
+	PERF_LRZ_LRZ_READ = 7,
+	PERF_LRZ_LRZ_WRITE = 8,
+	PERF_LRZ_READ_LATENCY = 9,
+	PERF_LRZ_MERGE_CACHE_UPDATING = 10,
+	PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11,
+	PERF_LRZ_PRIM_KILLED_BY_LRZ = 12,
+	PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13,
+	PERF_LRZ_FULL_8X8_TILES = 14,
+	PERF_LRZ_PARTIAL_8X8_TILES = 15,
+	PERF_LRZ_TILE_KILLED = 16,
+	PERF_LRZ_TOTAL_PIXEL = 17,
+	PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18,
+};
+
+enum a5xx_uche_perfcounter_select {
+	PERF_UCHE_BUSY_CYCLES = 0,
+	PERF_UCHE_STALL_CYCLES_VBIF = 1,
+	PERF_UCHE_VBIF_LATENCY_CYCLES = 2,
+	PERF_UCHE_VBIF_LATENCY_SAMPLES = 3,
+	PERF_UCHE_VBIF_READ_BEATS_TP = 4,
+	PERF_UCHE_VBIF_READ_BEATS_VFD = 5,
+	PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6,
+	PERF_UCHE_VBIF_READ_BEATS_LRZ = 7,
+	PERF_UCHE_VBIF_READ_BEATS_SP = 8,
+	PERF_UCHE_READ_REQUESTS_TP = 9,
+	PERF_UCHE_READ_REQUESTS_VFD = 10,
+	PERF_UCHE_READ_REQUESTS_HLSQ = 11,
+	PERF_UCHE_READ_REQUESTS_LRZ = 12,
+	PERF_UCHE_READ_REQUESTS_SP = 13,
+	PERF_UCHE_WRITE_REQUESTS_LRZ = 14,
+	PERF_UCHE_WRITE_REQUESTS_SP = 15,
+	PERF_UCHE_WRITE_REQUESTS_VPC = 16,
+	PERF_UCHE_WRITE_REQUESTS_VSC = 17,
+	PERF_UCHE_EVICTS = 18,
+	PERF_UCHE_BANK_REQ0 = 19,
+	PERF_UCHE_BANK_REQ1 = 20,
+	PERF_UCHE_BANK_REQ2 = 21,
+	PERF_UCHE_BANK_REQ3 = 22,
+	PERF_UCHE_BANK_REQ4 = 23,
+	PERF_UCHE_BANK_REQ5 = 24,
+	PERF_UCHE_BANK_REQ6 = 25,
+	PERF_UCHE_BANK_REQ7 = 26,
+	PERF_UCHE_VBIF_READ_BEATS_CH0 = 27,
+	PERF_UCHE_VBIF_READ_BEATS_CH1 = 28,
+	PERF_UCHE_GMEM_READ_BEATS = 29,
+	PERF_UCHE_FLAG_COUNT = 30,
+};
+
+enum a5xx_tp_perfcounter_select {
+	PERF_TP_BUSY_CYCLES = 0,
+	PERF_TP_STALL_CYCLES_UCHE = 1,
+	PERF_TP_LATENCY_CYCLES = 2,
+	PERF_TP_LATENCY_TRANS = 3,
+	PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4,
+	PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5,
+	PERF_TP_L1_CACHELINE_REQUESTS = 6,
+	PERF_TP_L1_CACHELINE_MISSES = 7,
+	PERF_TP_SP_TP_TRANS = 8,
+	PERF_TP_TP_SP_TRANS = 9,
+	PERF_TP_OUTPUT_PIXELS = 10,
+	PERF_TP_FILTER_WORKLOAD_16BIT = 11,
+	PERF_TP_FILTER_WORKLOAD_32BIT = 12,
+	PERF_TP_QUADS_RECEIVED = 13,
+	PERF_TP_QUADS_OFFSET = 14,
+	PERF_TP_QUADS_SHADOW = 15,
+	PERF_TP_QUADS_ARRAY = 16,
+	PERF_TP_QUADS_GRADIENT = 17,
+	PERF_TP_QUADS_1D = 18,
+	PERF_TP_QUADS_2D = 19,
+	PERF_TP_QUADS_BUFFER = 20,
+	PERF_TP_QUADS_3D = 21,
+	PERF_TP_QUADS_CUBE = 22,
+	PERF_TP_STATE_CACHE_REQUESTS = 23,
+	PERF_TP_STATE_CACHE_MISSES = 24,
+	PERF_TP_DIVERGENT_QUADS_RECEIVED = 25,
+	PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26,
+	PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27,
+	PERF_TP_PRT_NON_RESIDENT_EVENTS = 28,
+	PERF_TP_OUTPUT_PIXELS_POINT = 29,
+	PERF_TP_OUTPUT_PIXELS_BILINEAR = 30,
+	PERF_TP_OUTPUT_PIXELS_MIP = 31,
+	PERF_TP_OUTPUT_PIXELS_ANISO = 32,
+	PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33,
+	PERF_TP_FLAG_CACHE_REQUESTS = 34,
+	PERF_TP_FLAG_CACHE_MISSES = 35,
+	PERF_TP_L1_5_L2_REQUESTS = 36,
+	PERF_TP_2D_OUTPUT_PIXELS = 37,
+	PERF_TP_2D_OUTPUT_PIXELS_POINT = 38,
+	PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39,
+	PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40,
+	PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41,
+};
+
+enum a5xx_sp_perfcounter_select {
+	PERF_SP_BUSY_CYCLES = 0,
+	PERF_SP_ALU_WORKING_CYCLES = 1,
+	PERF_SP_EFU_WORKING_CYCLES = 2,
+	PERF_SP_STALL_CYCLES_VPC = 3,
+	PERF_SP_STALL_CYCLES_TP = 4,
+	PERF_SP_STALL_CYCLES_UCHE = 5,
+	PERF_SP_STALL_CYCLES_RB = 6,
+	PERF_SP_SCHEDULER_NON_WORKING = 7,
+	PERF_SP_WAVE_CONTEXTS = 8,
+	PERF_SP_WAVE_CONTEXT_CYCLES = 9,
+	PERF_SP_FS_STAGE_WAVE_CYCLES = 10,
+	PERF_SP_FS_STAGE_WAVE_SAMPLES = 11,
+	PERF_SP_VS_STAGE_WAVE_CYCLES = 12,
+	PERF_SP_VS_STAGE_WAVE_SAMPLES = 13,
+	PERF_SP_FS_STAGE_DURATION_CYCLES = 14,
+	PERF_SP_VS_STAGE_DURATION_CYCLES = 15,
+	PERF_SP_WAVE_CTRL_CYCLES = 16,
+	PERF_SP_WAVE_LOAD_CYCLES = 17,
+	PERF_SP_WAVE_EMIT_CYCLES = 18,
+	PERF_SP_WAVE_NOP_CYCLES = 19,
+	PERF_SP_WAVE_WAIT_CYCLES = 20,
+	PERF_SP_WAVE_FETCH_CYCLES = 21,
+	PERF_SP_WAVE_IDLE_CYCLES = 22,
+	PERF_SP_WAVE_END_CYCLES = 23,
+	PERF_SP_WAVE_LONG_SYNC_CYCLES = 24,
+	PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25,
+	PERF_SP_WAVE_JOIN_CYCLES = 26,
+	PERF_SP_LM_LOAD_INSTRUCTIONS = 27,
+	PERF_SP_LM_STORE_INSTRUCTIONS = 28,
+	PERF_SP_LM_ATOMICS = 29,
+	PERF_SP_GM_LOAD_INSTRUCTIONS = 30,
+	PERF_SP_GM_STORE_INSTRUCTIONS = 31,
+	PERF_SP_GM_ATOMICS = 32,
+	PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33,
+	PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34,
+	PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35,
+	PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36,
+	PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37,
+	PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38,
+	PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39,
+	PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40,
+	PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41,
+	PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42,
+	PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43,
+	PERF_SP_VS_INSTRUCTIONS = 44,
+	PERF_SP_FS_INSTRUCTIONS = 45,
+	PERF_SP_ADDR_LOCK_COUNT = 46,
+	PERF_SP_UCHE_READ_TRANS = 47,
+	PERF_SP_UCHE_WRITE_TRANS = 48,
+	PERF_SP_EXPORT_VPC_TRANS = 49,
+	PERF_SP_EXPORT_RB_TRANS = 50,
+	PERF_SP_PIXELS_KILLED = 51,
+	PERF_SP_ICL1_REQUESTS = 52,
+	PERF_SP_ICL1_MISSES = 53,
+	PERF_SP_ICL0_REQUESTS = 54,
+	PERF_SP_ICL0_MISSES = 55,
+	PERF_SP_HS_INSTRUCTIONS = 56,
+	PERF_SP_DS_INSTRUCTIONS = 57,
+	PERF_SP_GS_INSTRUCTIONS = 58,
+	PERF_SP_CS_INSTRUCTIONS = 59,
+	PERF_SP_GPR_READ = 60,
+	PERF_SP_GPR_WRITE = 61,
+	PERF_SP_LM_CH0_REQUESTS = 62,
+	PERF_SP_LM_CH1_REQUESTS = 63,
+	PERF_SP_LM_BANK_CONFLICTS = 64,
+};
+
+enum a5xx_rb_perfcounter_select {
+	PERF_RB_BUSY_CYCLES = 0,
+	PERF_RB_STALL_CYCLES_CCU = 1,
+	PERF_RB_STALL_CYCLES_HLSQ = 2,
+	PERF_RB_STALL_CYCLES_FIFO0_FULL = 3,
+	PERF_RB_STALL_CYCLES_FIFO1_FULL = 4,
+	PERF_RB_STALL_CYCLES_FIFO2_FULL = 5,
+	PERF_RB_STARVE_CYCLES_SP = 6,
+	PERF_RB_STARVE_CYCLES_LRZ_TILE = 7,
+	PERF_RB_STARVE_CYCLES_CCU = 8,
+	PERF_RB_STARVE_CYCLES_Z_PLANE = 9,
+	PERF_RB_STARVE_CYCLES_BARY_PLANE = 10,
+	PERF_RB_Z_WORKLOAD = 11,
+	PERF_RB_HLSQ_ACTIVE = 12,
+	PERF_RB_Z_READ = 13,
+	PERF_RB_Z_WRITE = 14,
+	PERF_RB_C_READ = 15,
+	PERF_RB_C_WRITE = 16,
+	PERF_RB_TOTAL_PASS = 17,
+	PERF_RB_Z_PASS = 18,
+	PERF_RB_Z_FAIL = 19,
+	PERF_RB_S_FAIL = 20,
+	PERF_RB_BLENDED_FXP_COMPONENTS = 21,
+	PERF_RB_BLENDED_FP16_COMPONENTS = 22,
+	RB_RESERVED = 23,
+	PERF_RB_2D_ALIVE_CYCLES = 24,
+	PERF_RB_2D_STALL_CYCLES_A2D = 25,
+	PERF_RB_2D_STARVE_CYCLES_SRC = 26,
+	PERF_RB_2D_STARVE_CYCLES_SP = 27,
+	PERF_RB_2D_STARVE_CYCLES_DST = 28,
+	PERF_RB_2D_VALID_PIXELS = 29,
+};
+
+enum a5xx_rb_samples_perfcounter_select {
+	TOTAL_SAMPLES = 0,
+	ZPASS_SAMPLES = 1,
+	ZFAIL_SAMPLES = 2,
+	SFAIL_SAMPLES = 3,
+};
+
+enum a5xx_vsc_perfcounter_select {
+	PERF_VSC_BUSY_CYCLES = 0,
+	PERF_VSC_WORKING_CYCLES = 1,
+	PERF_VSC_STALL_CYCLES_UCHE = 2,
+	PERF_VSC_EOT_NUM = 3,
+};
+
+enum a5xx_ccu_perfcounter_select {
+	PERF_CCU_BUSY_CYCLES = 0,
+	PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1,
+	PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2,
+	PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3,
+	PERF_CCU_DEPTH_BLOCKS = 4,
+	PERF_CCU_COLOR_BLOCKS = 5,
+	PERF_CCU_DEPTH_BLOCK_HIT = 6,
+	PERF_CCU_COLOR_BLOCK_HIT = 7,
+	PERF_CCU_PARTIAL_BLOCK_READ = 8,
+	PERF_CCU_GMEM_READ = 9,
+	PERF_CCU_GMEM_WRITE = 10,
+	PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11,
+	PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12,
+	PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13,
+	PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14,
+	PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15,
+	PERF_CCU_COLOR_READ_FLAG0_COUNT = 16,
+	PERF_CCU_COLOR_READ_FLAG1_COUNT = 17,
+	PERF_CCU_COLOR_READ_FLAG2_COUNT = 18,
+	PERF_CCU_COLOR_READ_FLAG3_COUNT = 19,
+	PERF_CCU_COLOR_READ_FLAG4_COUNT = 20,
+	PERF_CCU_2D_BUSY_CYCLES = 21,
+	PERF_CCU_2D_RD_REQ = 22,
+	PERF_CCU_2D_WR_REQ = 23,
+	PERF_CCU_2D_REORDER_STARVE_CYCLES = 24,
+	PERF_CCU_2D_PIXELS = 25,
+};
+
+enum a5xx_cmp_perfcounter_select {
+	PERF_CMPDECMP_STALL_CYCLES_VBIF = 0,
+	PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1,
+	PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2,
+	PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3,
+	PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4,
+	PERF_CMPDECMP_VBIF_READ_REQUEST = 5,
+	PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6,
+	PERF_CMPDECMP_VBIF_READ_DATA = 7,
+	PERF_CMPDECMP_VBIF_WRITE_DATA = 8,
+	PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9,
+	PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21,
+	PERF_CMPDECMP_2D_RD_DATA = 22,
+	PERF_CMPDECMP_2D_WR_DATA = 23,
+};
+
+enum a5xx_vbif_perfcounter_select {
+	AXI_READ_REQUESTS_ID_0 = 0,
+	AXI_READ_REQUESTS_ID_1 = 1,
+	AXI_READ_REQUESTS_ID_2 = 2,
+	AXI_READ_REQUESTS_ID_3 = 3,
+	AXI_READ_REQUESTS_ID_4 = 4,
+	AXI_READ_REQUESTS_ID_5 = 5,
+	AXI_READ_REQUESTS_ID_6 = 6,
+	AXI_READ_REQUESTS_ID_7 = 7,
+	AXI_READ_REQUESTS_ID_8 = 8,
+	AXI_READ_REQUESTS_ID_9 = 9,
+	AXI_READ_REQUESTS_ID_10 = 10,
+	AXI_READ_REQUESTS_ID_11 = 11,
+	AXI_READ_REQUESTS_ID_12 = 12,
+	AXI_READ_REQUESTS_ID_13 = 13,
+	AXI_READ_REQUESTS_ID_14 = 14,
+	AXI_READ_REQUESTS_ID_15 = 15,
+	AXI0_READ_REQUESTS_TOTAL = 16,
+	AXI1_READ_REQUESTS_TOTAL = 17,
+	AXI2_READ_REQUESTS_TOTAL = 18,
+	AXI3_READ_REQUESTS_TOTAL = 19,
+	AXI_READ_REQUESTS_TOTAL = 20,
+	AXI_WRITE_REQUESTS_ID_0 = 21,
+	AXI_WRITE_REQUESTS_ID_1 = 22,
+	AXI_WRITE_REQUESTS_ID_2 = 23,
+	AXI_WRITE_REQUESTS_ID_3 = 24,
+	AXI_WRITE_REQUESTS_ID_4 = 25,
+	AXI_WRITE_REQUESTS_ID_5 = 26,
+	AXI_WRITE_REQUESTS_ID_6 = 27,
+	AXI_WRITE_REQUESTS_ID_7 = 28,
+	AXI_WRITE_REQUESTS_ID_8 = 29,
+	AXI_WRITE_REQUESTS_ID_9 = 30,
+	AXI_WRITE_REQUESTS_ID_10 = 31,
+	AXI_WRITE_REQUESTS_ID_11 = 32,
+	AXI_WRITE_REQUESTS_ID_12 = 33,
+	AXI_WRITE_REQUESTS_ID_13 = 34,
+	AXI_WRITE_REQUESTS_ID_14 = 35,
+	AXI_WRITE_REQUESTS_ID_15 = 36,
+	AXI0_WRITE_REQUESTS_TOTAL = 37,
+	AXI1_WRITE_REQUESTS_TOTAL = 38,
+	AXI2_WRITE_REQUESTS_TOTAL = 39,
+	AXI3_WRITE_REQUESTS_TOTAL = 40,
+	AXI_WRITE_REQUESTS_TOTAL = 41,
+	AXI_TOTAL_REQUESTS = 42,
+	AXI_READ_DATA_BEATS_ID_0 = 43,
+	AXI_READ_DATA_BEATS_ID_1 = 44,
+	AXI_READ_DATA_BEATS_ID_2 = 45,
+	AXI_READ_DATA_BEATS_ID_3 = 46,
+	AXI_READ_DATA_BEATS_ID_4 = 47,
+	AXI_READ_DATA_BEATS_ID_5 = 48,
+	AXI_READ_DATA_BEATS_ID_6 = 49,
+	AXI_READ_DATA_BEATS_ID_7 = 50,
+	AXI_READ_DATA_BEATS_ID_8 = 51,
+	AXI_READ_DATA_BEATS_ID_9 = 52,
+	AXI_READ_DATA_BEATS_ID_10 = 53,
+	AXI_READ_DATA_BEATS_ID_11 = 54,
+	AXI_READ_DATA_BEATS_ID_12 = 55,
+	AXI_READ_DATA_BEATS_ID_13 = 56,
+	AXI_READ_DATA_BEATS_ID_14 = 57,
+	AXI_READ_DATA_BEATS_ID_15 = 58,
+	AXI0_READ_DATA_BEATS_TOTAL = 59,
+	AXI1_READ_DATA_BEATS_TOTAL = 60,
+	AXI2_READ_DATA_BEATS_TOTAL = 61,
+	AXI3_READ_DATA_BEATS_TOTAL = 62,
+	AXI_READ_DATA_BEATS_TOTAL = 63,
+	AXI_WRITE_DATA_BEATS_ID_0 = 64,
+	AXI_WRITE_DATA_BEATS_ID_1 = 65,
+	AXI_WRITE_DATA_BEATS_ID_2 = 66,
+	AXI_WRITE_DATA_BEATS_ID_3 = 67,
+	AXI_WRITE_DATA_BEATS_ID_4 = 68,
+	AXI_WRITE_DATA_BEATS_ID_5 = 69,
+	AXI_WRITE_DATA_BEATS_ID_6 = 70,
+	AXI_WRITE_DATA_BEATS_ID_7 = 71,
+	AXI_WRITE_DATA_BEATS_ID_8 = 72,
+	AXI_WRITE_DATA_BEATS_ID_9 = 73,
+	AXI_WRITE_DATA_BEATS_ID_10 = 74,
+	AXI_WRITE_DATA_BEATS_ID_11 = 75,
+	AXI_WRITE_DATA_BEATS_ID_12 = 76,
+	AXI_WRITE_DATA_BEATS_ID_13 = 77,
+	AXI_WRITE_DATA_BEATS_ID_14 = 78,
+	AXI_WRITE_DATA_BEATS_ID_15 = 79,
+	AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+	AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+	AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+	AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+	AXI_WRITE_DATA_BEATS_TOTAL = 84,
+	AXI_DATA_BEATS_TOTAL = 85,
+};
+
+enum a5xx_tex_filter {
+	A5XX_TEX_NEAREST = 0,
+	A5XX_TEX_LINEAR = 1,
+	A5XX_TEX_ANISO = 2,
+};
+
+enum a5xx_tex_clamp {
+	A5XX_TEX_REPEAT = 0,
+	A5XX_TEX_CLAMP_TO_EDGE = 1,
+	A5XX_TEX_MIRROR_REPEAT = 2,
+	A5XX_TEX_CLAMP_TO_BORDER = 3,
+	A5XX_TEX_MIRROR_CLAMP = 4,
+};
+
+enum a5xx_tex_aniso {
+	A5XX_TEX_ANISO_1 = 0,
+	A5XX_TEX_ANISO_2 = 1,
+	A5XX_TEX_ANISO_4 = 2,
+	A5XX_TEX_ANISO_8 = 3,
+	A5XX_TEX_ANISO_16 = 4,
+};
+
+enum a5xx_tex_swiz {
+	A5XX_TEX_X = 0,
+	A5XX_TEX_Y = 1,
+	A5XX_TEX_Z = 2,
+	A5XX_TEX_W = 3,
+	A5XX_TEX_ZERO = 4,
+	A5XX_TEX_ONE = 5,
+};
+
+enum a5xx_tex_type {
+	A5XX_TEX_1D = 0,
+	A5XX_TEX_2D = 1,
+	A5XX_TEX_CUBE = 2,
+	A5XX_TEX_3D = 3,
+};
+
+#define A5XX_INT0_RBBM_GPU_IDLE					0x00000001
+#define A5XX_INT0_RBBM_AHB_ERROR				0x00000002
+#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT				0x00000004
+#define A5XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
+#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
+#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT				0x00000020
+#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW			0x00000040
+#define A5XX_INT0_RBBM_GPC_ERROR				0x00000080
+#define A5XX_INT0_CP_SW						0x00000100
+#define A5XX_INT0_CP_HW_ERROR					0x00000200
+#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS				0x00000400
+#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS				0x00000800
+#define A5XX_INT0_CP_CCU_RESOLVE_TS				0x00001000
+#define A5XX_INT0_CP_IB2					0x00002000
+#define A5XX_INT0_CP_IB1					0x00004000
+#define A5XX_INT0_CP_RB						0x00008000
+#define A5XX_INT0_CP_UNUSED_1					0x00010000
+#define A5XX_INT0_CP_RB_DONE_TS					0x00020000
+#define A5XX_INT0_CP_WT_DONE_TS					0x00040000
+#define A5XX_INT0_UNKNOWN_1					0x00080000
+#define A5XX_INT0_CP_CACHE_FLUSH_TS				0x00100000
+#define A5XX_INT0_UNUSED_2					0x00200000
+#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00400000
+#define A5XX_INT0_MISC_HANG_DETECT				0x00800000
+#define A5XX_INT0_UCHE_OOB_ACCESS				0x01000000
+#define A5XX_INT0_UCHE_TRAP_INTR				0x02000000
+#define A5XX_INT0_DEBBUS_INTR_0					0x04000000
+#define A5XX_INT0_DEBBUS_INTR_1					0x08000000
+#define A5XX_INT0_GPMU_VOLTAGE_DROOP				0x10000000
+#define A5XX_INT0_GPMU_FIRMWARE					0x20000000
+#define A5XX_INT0_ISDB_CPU_IRQ					0x40000000
+#define A5XX_INT0_ISDB_UNDER_DEBUG				0x80000000
+#define A5XX_CP_INT_CP_OPCODE_ERROR				0x00000001
+#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR			0x00000002
+#define A5XX_CP_INT_CP_HW_FAULT_ERROR				0x00000004
+#define A5XX_CP_INT_CP_DMA_ERROR				0x00000008
+#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR		0x00000010
+#define A5XX_CP_INT_CP_AHB_ERROR				0x00000020
+#define REG_A5XX_CP_RB_BASE					0x00000800
+
+#define REG_A5XX_CP_RB_BASE_HI					0x00000801
+
+#define REG_A5XX_CP_RB_CNTL					0x00000802
+
+#define REG_A5XX_CP_RB_RPTR_ADDR				0x00000804
+
+#define REG_A5XX_CP_RB_RPTR_ADDR_HI				0x00000805
+
+#define REG_A5XX_CP_RB_RPTR					0x00000806
+
+#define REG_A5XX_CP_RB_WPTR					0x00000807
+
+#define REG_A5XX_CP_PFP_STAT_ADDR				0x00000808
+
+#define REG_A5XX_CP_PFP_STAT_DATA				0x00000809
+
+#define REG_A5XX_CP_DRAW_STATE_ADDR				0x0000080b
+
+#define REG_A5XX_CP_DRAW_STATE_DATA				0x0000080c
+
+#define REG_A5XX_CP_ME_NRT_ADDR_LO				0x0000080d
+
+#define REG_A5XX_CP_ME_NRT_ADDR_HI				0x0000080e
+
+#define REG_A5XX_CP_ME_NRT_DATA					0x00000810
+
+#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO			0x00000817
+
+#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI			0x00000818
+
+#define REG_A5XX_CP_CRASH_DUMP_CNTL				0x00000819
+
+#define REG_A5XX_CP_ME_STAT_ADDR				0x0000081a
+
+#define REG_A5XX_CP_ROQ_THRESHOLDS_1				0x0000081f
+
+#define REG_A5XX_CP_ROQ_THRESHOLDS_2				0x00000820
+
+#define REG_A5XX_CP_ROQ_DBG_ADDR				0x00000821
+
+#define REG_A5XX_CP_ROQ_DBG_DATA				0x00000822
+
+#define REG_A5XX_CP_MEQ_DBG_ADDR				0x00000823
+
+#define REG_A5XX_CP_MEQ_DBG_DATA				0x00000824
+
+#define REG_A5XX_CP_MEQ_THRESHOLDS				0x00000825
+
+#define REG_A5XX_CP_MERCIU_SIZE					0x00000826
+
+#define REG_A5XX_CP_MERCIU_DBG_ADDR				0x00000827
+
+#define REG_A5XX_CP_MERCIU_DBG_DATA_1				0x00000828
+
+#define REG_A5XX_CP_MERCIU_DBG_DATA_2				0x00000829
+
+#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR				0x0000082a
+
+#define REG_A5XX_CP_PFP_UCODE_DBG_DATA				0x0000082b
+
+#define REG_A5XX_CP_ME_UCODE_DBG_ADDR				0x0000082f
+
+#define REG_A5XX_CP_ME_UCODE_DBG_DATA				0x00000830
+
+#define REG_A5XX_CP_CNTL					0x00000831
+
+#define REG_A5XX_CP_PFP_ME_CNTL					0x00000832
+
+#define REG_A5XX_CP_CHICKEN_DBG					0x00000833
+
+#define REG_A5XX_CP_PFP_INSTR_BASE_LO				0x00000835
+
+#define REG_A5XX_CP_PFP_INSTR_BASE_HI				0x00000836
+
+#define REG_A5XX_CP_ME_INSTR_BASE_LO				0x00000838
+
+#define REG_A5XX_CP_ME_INSTR_BASE_HI				0x00000839
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL				0x0000083b
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO		0x0000083c
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI		0x0000083d
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO			0x0000083e
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI			0x0000083f
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO			0x00000840
+
+#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI			0x00000841
+
+#define REG_A5XX_CP_ADDR_MODE_CNTL				0x00000860
+
+#define REG_A5XX_CP_ME_STAT_DATA				0x00000b14
+
+#define REG_A5XX_CP_WFI_PEND_CTR				0x00000b15
+
+#define REG_A5XX_CP_INTERRUPT_STATUS				0x00000b18
+
+#define REG_A5XX_CP_HW_FAULT					0x00000b1a
+
+#define REG_A5XX_CP_PROTECT_STATUS				0x00000b1c
+
+#define REG_A5XX_CP_IB1_BASE					0x00000b1f
+
+#define REG_A5XX_CP_IB1_BASE_HI					0x00000b20
+
+#define REG_A5XX_CP_IB1_BUFSZ					0x00000b21
+
+#define REG_A5XX_CP_IB2_BASE					0x00000b22
+
+#define REG_A5XX_CP_IB2_BASE_HI					0x00000b23
+
+#define REG_A5XX_CP_IB2_BUFSZ					0x00000b24
+
+static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; }
+#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0001ffff
+#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
+static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
+{
+	return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK;
+}
+#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK			0x1f000000
+#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT			24
+static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
+{
+	return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK;
+}
+#define A5XX_CP_PROTECT_REG_TRAP_WRITE				0x20000000
+#define A5XX_CP_PROTECT_REG_TRAP_READ				0x40000000
+
+#define REG_A5XX_CP_PROTECT_CNTL				0x000008a0
+
+#define REG_A5XX_CP_AHB_FAULT					0x00000b1b
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_0				0x00000bb0
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_1				0x00000bb1
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_2				0x00000bb2
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_3				0x00000bb3
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_4				0x00000bb4
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_5				0x00000bb5
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_6				0x00000bb6
+
+#define REG_A5XX_CP_PERFCTR_CP_SEL_7				0x00000bb7
+
+#define REG_A5XX_VSC_ADDR_MODE_CNTL				0x00000bc1
+
+#define REG_A5XX_CP_POWERCTR_CP_SEL_0				0x00000bba
+
+#define REG_A5XX_CP_POWERCTR_CP_SEL_1				0x00000bbb
+
+#define REG_A5XX_CP_POWERCTR_CP_SEL_2				0x00000bbc
+
+#define REG_A5XX_CP_POWERCTR_CP_SEL_3				0x00000bbd
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A				0x00000004
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B				0x00000005
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C				0x00000006
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D				0x00000007
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT				0x00000008
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM				0x00000009
+
+#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT		0x00000018
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_OPL				0x0000000a
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_OPE				0x0000000b
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0				0x0000000c
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1				0x0000000d
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2				0x0000000e
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3				0x0000000f
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0			0x00000010
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1			0x00000011
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2			0x00000012
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3			0x00000013
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0			0x00000014
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1			0x00000015
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0				0x00000016
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1				0x00000017
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2				0x00000018
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3				0x00000019
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0			0x0000001a
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1			0x0000001b
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2			0x0000001c
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3			0x0000001d
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE			0x0000001e
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0				0x0000001f
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1				0x00000020
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG			0x00000021
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_IDX				0x00000022
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC				0x00000023
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT			0x00000024
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL			0x0000002f
+
+#define REG_A5XX_RBBM_INT_CLEAR_CMD				0x00000037
+
+#define REG_A5XX_RBBM_INT_0_MASK				0x00000038
+#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE			0x00000001
+#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR			0x00000002
+#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT		0x00000004
+#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT			0x00000008
+#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT		0x00000010
+#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT		0x00000020
+#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW		0x00000040
+#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR			0x00000080
+#define A5XX_RBBM_INT_0_MASK_CP_SW				0x00000100
+#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR			0x00000200
+#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS		0x00000400
+#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS		0x00000800
+#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS			0x00001000
+#define A5XX_RBBM_INT_0_MASK_CP_IB2				0x00002000
+#define A5XX_RBBM_INT_0_MASK_CP_IB1				0x00004000
+#define A5XX_RBBM_INT_0_MASK_CP_RB				0x00008000
+#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS			0x00020000
+#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS			0x00040000
+#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS			0x00100000
+#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW		0x00400000
+#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT			0x00800000
+#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS			0x01000000
+#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR			0x02000000
+#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0			0x04000000
+#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1			0x08000000
+#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP			0x10000000
+#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE			0x20000000
+#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ			0x40000000
+#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG			0x80000000
+
+#define REG_A5XX_RBBM_AHB_DBG_CNTL				0x0000003f
+
+#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL				0x00000041
+
+#define REG_A5XX_RBBM_SW_RESET_CMD				0x00000043
+
+#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
+
+#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2			0x00000046
+
+#define REG_A5XX_RBBM_DBG_LO_HI_GPIO				0x00000048
+
+#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL			0x00000049
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_TP0				0x0000004a
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_TP1				0x0000004b
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_TP2				0x0000004c
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_TP3				0x0000004d
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0				0x0000004e
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1				0x0000004f
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2				0x00000050
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3				0x00000051
+
+#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0				0x00000052
+
+#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1				0x00000053
+
+#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2				0x00000054
+
+#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3				0x00000055
+
+#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG			0x00000059
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE				0x0000005a
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE				0x0000005b
+
+#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE				0x0000005c
+
+#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE				0x0000005d
+
+#define REG_A5XX_RBBM_CLOCK_HYST_UCHE				0x0000005e
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE				0x0000005f
+
+#define REG_A5XX_RBBM_CLOCK_MODE_GPC				0x00000060
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_GPC				0x00000061
+
+#define REG_A5XX_RBBM_CLOCK_HYST_GPC				0x00000062
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM			0x00000063
+
+#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x00000064
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x00000065
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ				0x00000066
+
+#define REG_A5XX_RBBM_CLOCK_CNTL				0x00000067
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_SP0				0x00000068
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_SP1				0x00000069
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_SP2				0x0000006a
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_SP3				0x0000006b
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0				0x0000006c
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1				0x0000006d
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2				0x0000006e
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3				0x0000006f
+
+#define REG_A5XX_RBBM_CLOCK_HYST_SP0				0x00000070
+
+#define REG_A5XX_RBBM_CLOCK_HYST_SP1				0x00000071
+
+#define REG_A5XX_RBBM_CLOCK_HYST_SP2				0x00000072
+
+#define REG_A5XX_RBBM_CLOCK_HYST_SP3				0x00000073
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_SP0				0x00000074
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_SP1				0x00000075
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_SP2				0x00000076
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_SP3				0x00000077
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_RB0				0x00000078
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_RB1				0x00000079
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_RB2				0x0000007a
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_RB3				0x0000007b
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0				0x0000007c
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1				0x0000007d
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2				0x0000007e
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3				0x0000007f
+
+#define REG_A5XX_RBBM_CLOCK_HYST_RAC				0x00000080
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_RAC				0x00000081
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0				0x00000082
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1				0x00000083
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2				0x00000084
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3				0x00000085
+
+#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0			0x00000086
+
+#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1			0x00000087
+
+#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2			0x00000088
+
+#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3			0x00000089
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_RAC				0x0000008a
+
+#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC				0x0000008b
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0			0x0000008c
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1			0x0000008d
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2			0x0000008e
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3			0x0000008f
+
+#define REG_A5XX_RBBM_CLOCK_HYST_VFD				0x00000090
+
+#define REG_A5XX_RBBM_CLOCK_MODE_VFD				0x00000091
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_VFD				0x00000092
+
+#define REG_A5XX_RBBM_AHB_CNTL0					0x00000093
+
+#define REG_A5XX_RBBM_AHB_CNTL1					0x00000094
+
+#define REG_A5XX_RBBM_AHB_CNTL2					0x00000095
+
+#define REG_A5XX_RBBM_AHB_CMD					0x00000096
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11		0x0000009c
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12		0x0000009d
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13		0x0000009e
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14		0x0000009f
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15		0x000000a0
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16		0x000000a1
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17		0x000000a2
+
+#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18		0x000000a3
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_TP0				0x000000a4
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_TP1				0x000000a5
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_TP2				0x000000a6
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_TP3				0x000000a7
+
+#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0				0x000000a8
+
+#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1				0x000000a9
+
+#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2				0x000000aa
+
+#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3				0x000000ab
+
+#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0				0x000000ac
+
+#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1				0x000000ad
+
+#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2				0x000000ae
+
+#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3				0x000000af
+
+#define REG_A5XX_RBBM_CLOCK_HYST_TP0				0x000000b0
+
+#define REG_A5XX_RBBM_CLOCK_HYST_TP1				0x000000b1
+
+#define REG_A5XX_RBBM_CLOCK_HYST_TP2				0x000000b2
+
+#define REG_A5XX_RBBM_CLOCK_HYST_TP3				0x000000b3
+
+#define REG_A5XX_RBBM_CLOCK_HYST2_TP0				0x000000b4
+
+#define REG_A5XX_RBBM_CLOCK_HYST2_TP1				0x000000b5
+
+#define REG_A5XX_RBBM_CLOCK_HYST2_TP2				0x000000b6
+
+#define REG_A5XX_RBBM_CLOCK_HYST2_TP3				0x000000b7
+
+#define REG_A5XX_RBBM_CLOCK_HYST3_TP0				0x000000b8
+
+#define REG_A5XX_RBBM_CLOCK_HYST3_TP1				0x000000b9
+
+#define REG_A5XX_RBBM_CLOCK_HYST3_TP2				0x000000ba
+
+#define REG_A5XX_RBBM_CLOCK_HYST3_TP3				0x000000bb
+
+#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU				0x000000c8
+
+#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU				0x000000c9
+
+#define REG_A5XX_RBBM_CLOCK_HYST_GPMU				0x000000ca
+
+#define REG_A5XX_RBBM_PERFCTR_CP_0_LO				0x000003a0
+
+#define REG_A5XX_RBBM_PERFCTR_CP_0_HI				0x000003a1
+
+#define REG_A5XX_RBBM_PERFCTR_CP_1_LO				0x000003a2
+
+#define REG_A5XX_RBBM_PERFCTR_CP_1_HI				0x000003a3
+
+#define REG_A5XX_RBBM_PERFCTR_CP_2_LO				0x000003a4
+
+#define REG_A5XX_RBBM_PERFCTR_CP_2_HI				0x000003a5
+
+#define REG_A5XX_RBBM_PERFCTR_CP_3_LO				0x000003a6
+
+#define REG_A5XX_RBBM_PERFCTR_CP_3_HI				0x000003a7
+
+#define REG_A5XX_RBBM_PERFCTR_CP_4_LO				0x000003a8
+
+#define REG_A5XX_RBBM_PERFCTR_CP_4_HI				0x000003a9
+
+#define REG_A5XX_RBBM_PERFCTR_CP_5_LO				0x000003aa
+
+#define REG_A5XX_RBBM_PERFCTR_CP_5_HI				0x000003ab
+
+#define REG_A5XX_RBBM_PERFCTR_CP_6_LO				0x000003ac
+
+#define REG_A5XX_RBBM_PERFCTR_CP_6_HI				0x000003ad
+
+#define REG_A5XX_RBBM_PERFCTR_CP_7_LO				0x000003ae
+
+#define REG_A5XX_RBBM_PERFCTR_CP_7_HI				0x000003af
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO				0x000003b0
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI				0x000003b1
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO				0x000003b2
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI				0x000003b3
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO				0x000003b4
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI				0x000003b5
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO				0x000003b6
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI				0x000003b7
+
+#define REG_A5XX_RBBM_PERFCTR_PC_0_LO				0x000003b8
+
+#define REG_A5XX_RBBM_PERFCTR_PC_0_HI				0x000003b9
+
+#define REG_A5XX_RBBM_PERFCTR_PC_1_LO				0x000003ba
+
+#define REG_A5XX_RBBM_PERFCTR_PC_1_HI				0x000003bb
+
+#define REG_A5XX_RBBM_PERFCTR_PC_2_LO				0x000003bc
+
+#define REG_A5XX_RBBM_PERFCTR_PC_2_HI				0x000003bd
+
+#define REG_A5XX_RBBM_PERFCTR_PC_3_LO				0x000003be
+
+#define REG_A5XX_RBBM_PERFCTR_PC_3_HI				0x000003bf
+
+#define REG_A5XX_RBBM_PERFCTR_PC_4_LO				0x000003c0
+
+#define REG_A5XX_RBBM_PERFCTR_PC_4_HI				0x000003c1
+
+#define REG_A5XX_RBBM_PERFCTR_PC_5_LO				0x000003c2
+
+#define REG_A5XX_RBBM_PERFCTR_PC_5_HI				0x000003c3
+
+#define REG_A5XX_RBBM_PERFCTR_PC_6_LO				0x000003c4
+
+#define REG_A5XX_RBBM_PERFCTR_PC_6_HI				0x000003c5
+
+#define REG_A5XX_RBBM_PERFCTR_PC_7_LO				0x000003c6
+
+#define REG_A5XX_RBBM_PERFCTR_PC_7_HI				0x000003c7
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO				0x000003c8
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI				0x000003c9
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO				0x000003ca
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI				0x000003cb
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO				0x000003cc
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI				0x000003cd
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO				0x000003ce
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI				0x000003cf
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO				0x000003d0
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI				0x000003d1
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO				0x000003d2
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI				0x000003d3
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO				0x000003d4
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI				0x000003d5
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO				0x000003d6
+
+#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI				0x000003d7
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO				0x000003d8
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI				0x000003d9
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO				0x000003da
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI				0x000003db
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO				0x000003dc
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI				0x000003dd
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO				0x000003de
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI				0x000003df
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO				0x000003e0
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI				0x000003e1
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO				0x000003e2
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI				0x000003e3
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO				0x000003e4
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI				0x000003e5
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO				0x000003e6
+
+#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI				0x000003e7
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO				0x000003e8
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI				0x000003e9
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO				0x000003ea
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI				0x000003eb
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO				0x000003ec
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI				0x000003ed
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO				0x000003ee
+
+#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI				0x000003ef
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO				0x000003f0
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI				0x000003f1
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO				0x000003f2
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI				0x000003f3
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO				0x000003f4
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI				0x000003f5
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO				0x000003f6
+
+#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI				0x000003f7
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO				0x000003f8
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI				0x000003f9
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO				0x000003fa
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI				0x000003fb
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO				0x000003fc
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI				0x000003fd
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO				0x000003fe
+
+#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI				0x000003ff
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO				0x00000400
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI				0x00000401
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO				0x00000402
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI				0x00000403
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO				0x00000404
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI				0x00000405
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO				0x00000406
+
+#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI				0x00000407
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO				0x00000408
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI				0x00000409
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO				0x0000040a
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI				0x0000040b
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO				0x0000040c
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI				0x0000040d
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO				0x0000040e
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI				0x0000040f
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO				0x00000410
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI				0x00000411
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO				0x00000412
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI				0x00000413
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO				0x00000414
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI				0x00000415
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO				0x00000416
+
+#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI				0x00000417
+
+#define REG_A5XX_RBBM_PERFCTR_TP_0_LO				0x00000418
+
+#define REG_A5XX_RBBM_PERFCTR_TP_0_HI				0x00000419
+
+#define REG_A5XX_RBBM_PERFCTR_TP_1_LO				0x0000041a
+
+#define REG_A5XX_RBBM_PERFCTR_TP_1_HI				0x0000041b
+
+#define REG_A5XX_RBBM_PERFCTR_TP_2_LO				0x0000041c
+
+#define REG_A5XX_RBBM_PERFCTR_TP_2_HI				0x0000041d
+
+#define REG_A5XX_RBBM_PERFCTR_TP_3_LO				0x0000041e
+
+#define REG_A5XX_RBBM_PERFCTR_TP_3_HI				0x0000041f
+
+#define REG_A5XX_RBBM_PERFCTR_TP_4_LO				0x00000420
+
+#define REG_A5XX_RBBM_PERFCTR_TP_4_HI				0x00000421
+
+#define REG_A5XX_RBBM_PERFCTR_TP_5_LO				0x00000422
+
+#define REG_A5XX_RBBM_PERFCTR_TP_5_HI				0x00000423
+
+#define REG_A5XX_RBBM_PERFCTR_TP_6_LO				0x00000424
+
+#define REG_A5XX_RBBM_PERFCTR_TP_6_HI				0x00000425
+
+#define REG_A5XX_RBBM_PERFCTR_TP_7_LO				0x00000426
+
+#define REG_A5XX_RBBM_PERFCTR_TP_7_HI				0x00000427
+
+#define REG_A5XX_RBBM_PERFCTR_SP_0_LO				0x00000428
+
+#define REG_A5XX_RBBM_PERFCTR_SP_0_HI				0x00000429
+
+#define REG_A5XX_RBBM_PERFCTR_SP_1_LO				0x0000042a
+
+#define REG_A5XX_RBBM_PERFCTR_SP_1_HI				0x0000042b
+
+#define REG_A5XX_RBBM_PERFCTR_SP_2_LO				0x0000042c
+
+#define REG_A5XX_RBBM_PERFCTR_SP_2_HI				0x0000042d
+
+#define REG_A5XX_RBBM_PERFCTR_SP_3_LO				0x0000042e
+
+#define REG_A5XX_RBBM_PERFCTR_SP_3_HI				0x0000042f
+
+#define REG_A5XX_RBBM_PERFCTR_SP_4_LO				0x00000430
+
+#define REG_A5XX_RBBM_PERFCTR_SP_4_HI				0x00000431
+
+#define REG_A5XX_RBBM_PERFCTR_SP_5_LO				0x00000432
+
+#define REG_A5XX_RBBM_PERFCTR_SP_5_HI				0x00000433
+
+#define REG_A5XX_RBBM_PERFCTR_SP_6_LO				0x00000434
+
+#define REG_A5XX_RBBM_PERFCTR_SP_6_HI				0x00000435
+
+#define REG_A5XX_RBBM_PERFCTR_SP_7_LO				0x00000436
+
+#define REG_A5XX_RBBM_PERFCTR_SP_7_HI				0x00000437
+
+#define REG_A5XX_RBBM_PERFCTR_SP_8_LO				0x00000438
+
+#define REG_A5XX_RBBM_PERFCTR_SP_8_HI				0x00000439
+
+#define REG_A5XX_RBBM_PERFCTR_SP_9_LO				0x0000043a
+
+#define REG_A5XX_RBBM_PERFCTR_SP_9_HI				0x0000043b
+
+#define REG_A5XX_RBBM_PERFCTR_SP_10_LO				0x0000043c
+
+#define REG_A5XX_RBBM_PERFCTR_SP_10_HI				0x0000043d
+
+#define REG_A5XX_RBBM_PERFCTR_SP_11_LO				0x0000043e
+
+#define REG_A5XX_RBBM_PERFCTR_SP_11_HI				0x0000043f
+
+#define REG_A5XX_RBBM_PERFCTR_RB_0_LO				0x00000440
+
+#define REG_A5XX_RBBM_PERFCTR_RB_0_HI				0x00000441
+
+#define REG_A5XX_RBBM_PERFCTR_RB_1_LO				0x00000442
+
+#define REG_A5XX_RBBM_PERFCTR_RB_1_HI				0x00000443
+
+#define REG_A5XX_RBBM_PERFCTR_RB_2_LO				0x00000444
+
+#define REG_A5XX_RBBM_PERFCTR_RB_2_HI				0x00000445
+
+#define REG_A5XX_RBBM_PERFCTR_RB_3_LO				0x00000446
+
+#define REG_A5XX_RBBM_PERFCTR_RB_3_HI				0x00000447
+
+#define REG_A5XX_RBBM_PERFCTR_RB_4_LO				0x00000448
+
+#define REG_A5XX_RBBM_PERFCTR_RB_4_HI				0x00000449
+
+#define REG_A5XX_RBBM_PERFCTR_RB_5_LO				0x0000044a
+
+#define REG_A5XX_RBBM_PERFCTR_RB_5_HI				0x0000044b
+
+#define REG_A5XX_RBBM_PERFCTR_RB_6_LO				0x0000044c
+
+#define REG_A5XX_RBBM_PERFCTR_RB_6_HI				0x0000044d
+
+#define REG_A5XX_RBBM_PERFCTR_RB_7_LO				0x0000044e
+
+#define REG_A5XX_RBBM_PERFCTR_RB_7_HI				0x0000044f
+
+#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO				0x00000450
+
+#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI				0x00000451
+
+#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO				0x00000452
+
+#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI				0x00000453
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO				0x00000454
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI				0x00000455
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO				0x00000456
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI				0x00000457
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO				0x00000458
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI				0x00000459
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO				0x0000045a
+
+#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI				0x0000045b
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO				0x0000045c
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI				0x0000045d
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO				0x0000045e
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI				0x0000045f
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO				0x00000460
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI				0x00000461
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO				0x00000462
+
+#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI				0x00000463
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0			0x0000046b
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1			0x0000046c
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2			0x0000046d
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000046e
+
+#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO			0x000004d2
+
+#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI			0x000004d3
+
+#define REG_A5XX_RBBM_STATUS					0x000004f5
+#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB			0x80000000
+#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP			0x40000000
+#define A5XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
+#define A5XX_RBBM_STATUS_VSC_BUSY				0x10000000
+#define A5XX_RBBM_STATUS_TPL1_BUSY				0x08000000
+#define A5XX_RBBM_STATUS_SP_BUSY				0x04000000
+#define A5XX_RBBM_STATUS_UCHE_BUSY				0x02000000
+#define A5XX_RBBM_STATUS_VPC_BUSY				0x01000000
+#define A5XX_RBBM_STATUS_VFDP_BUSY				0x00800000
+#define A5XX_RBBM_STATUS_VFD_BUSY				0x00400000
+#define A5XX_RBBM_STATUS_TESS_BUSY				0x00200000
+#define A5XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
+#define A5XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
+#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY			0x00040000
+#define A5XX_RBBM_STATUS_DCOM_BUSY				0x00020000
+#define A5XX_RBBM_STATUS_COM_BUSY				0x00010000
+#define A5XX_RBBM_STATUS_LRZ_BUZY				0x00008000
+#define A5XX_RBBM_STATUS_A2D_DSP_BUSY				0x00004000
+#define A5XX_RBBM_STATUS_CCUFCHE_BUSY				0x00002000
+#define A5XX_RBBM_STATUS_RB_BUSY				0x00001000
+#define A5XX_RBBM_STATUS_RAS_BUSY				0x00000800
+#define A5XX_RBBM_STATUS_TSE_BUSY				0x00000400
+#define A5XX_RBBM_STATUS_VBIF_BUSY				0x00000200
+#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST			0x00000100
+#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST			0x00000080
+#define A5XX_RBBM_STATUS_CP_BUSY				0x00000040
+#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY			0x00000020
+#define A5XX_RBBM_STATUS_CP_CRASH_BUSY				0x00000010
+#define A5XX_RBBM_STATUS_CP_ETS_BUSY				0x00000008
+#define A5XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
+#define A5XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
+#define A5XX_RBBM_STATUS_HI_BUSY				0x00000001
+
+#define REG_A5XX_RBBM_STATUS3					0x00000530
+
+#define REG_A5XX_RBBM_INT_0_STATUS				0x000004e1
+
+#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS			0x000004f0
+
+#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS			0x000004f1
+
+#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS			0x000004f3
+
+#define REG_A5XX_RBBM_AHB_ERROR_STATUS				0x000004f4
+
+#define REG_A5XX_RBBM_PERFCTR_CNTL				0x00000464
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0				0x00000465
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1				0x00000466
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2				0x00000467
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3				0x00000468
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000469
+
+#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x0000046a
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0			0x0000046b
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1			0x0000046c
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2			0x0000046d
+
+#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000046e
+
+#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED			0x0000046f
+
+#define REG_A5XX_RBBM_AHB_ERROR					0x000004ed
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC			0x00000504
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_OVER				0x00000505
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0				0x00000506
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1				0x00000507
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2				0x00000508
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3				0x00000509
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4				0x0000050a
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5				0x0000050b
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR			0x0000050c
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0			0x0000050d
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1			0x0000050e
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2			0x0000050f
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3			0x00000510
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4			0x00000511
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0				0x00000512
+
+#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1				0x00000513
+
+#define REG_A5XX_RBBM_ISDB_CNT					0x00000533
+
+#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG			0x0000f000
+
+#define REG_A5XX_RBBM_SECVID_TRUST_CNTL				0x0000f400
+
+#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO		0x0000f800
+
+#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI		0x0000f801
+
+#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE			0x0000f802
+
+#define REG_A5XX_RBBM_SECVID_TSB_CNTL				0x0000f803
+
+#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO			0x0000f804
+
+#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI			0x0000f805
+
+#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO			0x0000f806
+
+#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI			0x0000f807
+
+#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL			0x0000f810
+
+#define REG_A5XX_VSC_BIN_SIZE					0x00000bc2
+#define A5XX_VSC_BIN_SIZE_WIDTH__MASK				0x000000ff
+#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
+static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK				0x0001fe00
+#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT				9
+static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A5XX_VSC_SIZE_ADDRESS_LO				0x00000bc3
+
+#define REG_A5XX_VSC_SIZE_ADDRESS_HI				0x00000bc4
+
+#define REG_A5XX_UNKNOWN_0BC5					0x00000bc5
+
+#define REG_A5XX_UNKNOWN_0BC6					0x00000bc6
+
+static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; }
+#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
+#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
+static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
+{
+	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK;
+}
+#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
+#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
+static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
+{
+	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK;
+}
+#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK			0x00f00000
+#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
+static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
+{
+	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK;
+}
+#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK			0x0f000000
+#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT			24
+static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
+{
+	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK;
+}
+
+static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; }
+
+static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; }
+
+static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; }
+
+static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; }
+
+#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0				0x00000c60
+
+#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1				0x00000c61
+
+#define REG_A5XX_VSC_RESOLVE_CNTL				0x00000cdd
+#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE		0x80000000
+#define A5XX_VSC_RESOLVE_CNTL_X__MASK				0x00007fff
+#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT				0
+static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val)
+{
+	return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK;
+}
+#define A5XX_VSC_RESOLVE_CNTL_Y__MASK				0x7fff0000
+#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT				16
+static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val)
+{
+	return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_ADDR_MODE_CNTL				0x00000c81
+
+#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0				0x00000c90
+
+#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1				0x00000c91
+
+#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2				0x00000c92
+
+#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c93
+
+#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0				0x00000c94
+
+#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1				0x00000c95
+
+#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2				0x00000c96
+
+#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3				0x00000c97
+
+#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0				0x00000c98
+
+#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1				0x00000c99
+
+#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2				0x00000c9a
+
+#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3				0x00000c9b
+
+#define REG_A5XX_RB_DBG_ECO_CNTL				0x00000cc4
+
+#define REG_A5XX_RB_ADDR_MODE_CNTL				0x00000cc5
+
+#define REG_A5XX_RB_MODE_CNTL					0x00000cc6
+
+#define REG_A5XX_RB_CCU_CNTL					0x00000cc7
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_0				0x00000cd0
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_1				0x00000cd1
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_2				0x00000cd2
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_3				0x00000cd3
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_4				0x00000cd4
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_5				0x00000cd5
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_6				0x00000cd6
+
+#define REG_A5XX_RB_PERFCTR_RB_SEL_7				0x00000cd7
+
+#define REG_A5XX_RB_PERFCTR_CCU_SEL_0				0x00000cd8
+
+#define REG_A5XX_RB_PERFCTR_CCU_SEL_1				0x00000cd9
+
+#define REG_A5XX_RB_PERFCTR_CCU_SEL_2				0x00000cda
+
+#define REG_A5XX_RB_PERFCTR_CCU_SEL_3				0x00000cdb
+
+#define REG_A5XX_RB_POWERCTR_RB_SEL_0				0x00000ce0
+
+#define REG_A5XX_RB_POWERCTR_RB_SEL_1				0x00000ce1
+
+#define REG_A5XX_RB_POWERCTR_RB_SEL_2				0x00000ce2
+
+#define REG_A5XX_RB_POWERCTR_RB_SEL_3				0x00000ce3
+
+#define REG_A5XX_RB_POWERCTR_CCU_SEL_0				0x00000ce4
+
+#define REG_A5XX_RB_POWERCTR_CCU_SEL_1				0x00000ce5
+
+#define REG_A5XX_RB_PERFCTR_CMP_SEL_0				0x00000cec
+
+#define REG_A5XX_RB_PERFCTR_CMP_SEL_1				0x00000ced
+
+#define REG_A5XX_RB_PERFCTR_CMP_SEL_2				0x00000cee
+
+#define REG_A5XX_RB_PERFCTR_CMP_SEL_3				0x00000cef
+
+#define REG_A5XX_PC_DBG_ECO_CNTL				0x00000d00
+#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI			0x00000100
+
+#define REG_A5XX_PC_ADDR_MODE_CNTL				0x00000d01
+
+#define REG_A5XX_PC_MODE_CNTL					0x00000d02
+
+#define REG_A5XX_PC_INDEX_BUF_LO				0x00000d04
+
+#define REG_A5XX_PC_INDEX_BUF_HI				0x00000d05
+
+#define REG_A5XX_PC_START_INDEX					0x00000d06
+
+#define REG_A5XX_PC_MAX_INDEX					0x00000d07
+
+#define REG_A5XX_PC_TESSFACTOR_ADDR_LO				0x00000d08
+
+#define REG_A5XX_PC_TESSFACTOR_ADDR_HI				0x00000d09
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_0				0x00000d10
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_1				0x00000d11
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_2				0x00000d12
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_3				0x00000d13
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_4				0x00000d14
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_5				0x00000d15
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_6				0x00000d16
+
+#define REG_A5XX_PC_PERFCTR_PC_SEL_7				0x00000d17
+
+#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0			0x00000e00
+
+#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1			0x00000e01
+
+#define REG_A5XX_HLSQ_ADDR_MODE_CNTL				0x00000e05
+
+#define REG_A5XX_HLSQ_MODE_CNTL					0x00000e06
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x00000e10
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x00000e11
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x00000e12
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x00000e13
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x00000e14
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x00000e15
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6			0x00000e16
+
+#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7			0x00000e17
+
+#define REG_A5XX_HLSQ_SPTP_RDSEL				0x00000f08
+
+#define REG_A5XX_HLSQ_DBG_READ_SEL				0x0000bc00
+
+#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE			0x0000a000
+
+#define REG_A5XX_VFD_ADDR_MODE_CNTL				0x00000e41
+
+#define REG_A5XX_VFD_MODE_CNTL					0x00000e42
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0				0x00000e50
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1				0x00000e51
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2				0x00000e52
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3				0x00000e53
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4				0x00000e54
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5				0x00000e55
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6				0x00000e56
+
+#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7				0x00000e57
+
+#define REG_A5XX_VPC_DBG_ECO_CNTL				0x00000e60
+
+#define REG_A5XX_VPC_ADDR_MODE_CNTL				0x00000e61
+
+#define REG_A5XX_VPC_MODE_CNTL					0x00000e62
+#define A5XX_VPC_MODE_CNTL_BINNING_PASS				0x00000001
+
+#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0				0x00000e64
+
+#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1				0x00000e65
+
+#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2				0x00000e66
+
+#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3				0x00000e67
+
+#define REG_A5XX_UCHE_ADDR_MODE_CNTL				0x00000e80
+
+#define REG_A5XX_UCHE_SVM_CNTL					0x00000e82
+
+#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO			0x00000e87
+
+#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI			0x00000e88
+
+#define REG_A5XX_UCHE_TRAP_BASE_LO				0x00000e89
+
+#define REG_A5XX_UCHE_TRAP_BASE_HI				0x00000e8a
+
+#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO				0x00000e8b
+
+#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI				0x00000e8c
+
+#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO				0x00000e8d
+
+#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI				0x00000e8e
+
+#define REG_A5XX_UCHE_DBG_ECO_CNTL_2				0x00000e8f
+
+#define REG_A5XX_UCHE_DBG_ECO_CNTL				0x00000e90
+
+#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO			0x00000e91
+
+#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI			0x00000e92
+
+#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO			0x00000e93
+
+#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI			0x00000e94
+
+#define REG_A5XX_UCHE_CACHE_INVALIDATE				0x00000e95
+
+#define REG_A5XX_UCHE_CACHE_WAYS				0x00000e96
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000ea0
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000ea1
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000ea2
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000ea3
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000ea4
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000ea5
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000ea6
+
+#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000ea7
+
+#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0			0x00000ea8
+
+#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1			0x00000ea9
+
+#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2			0x00000eaa
+
+#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3			0x00000eab
+
+#define REG_A5XX_UCHE_TRAP_LOG_LO				0x00000eb1
+
+#define REG_A5XX_UCHE_TRAP_LOG_HI				0x00000eb2
+
+#define REG_A5XX_SP_DBG_ECO_CNTL				0x00000ec0
+
+#define REG_A5XX_SP_ADDR_MODE_CNTL				0x00000ec1
+
+#define REG_A5XX_SP_MODE_CNTL					0x00000ec2
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_0				0x00000ed0
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_1				0x00000ed1
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_2				0x00000ed2
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_3				0x00000ed3
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_4				0x00000ed4
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_5				0x00000ed5
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_6				0x00000ed6
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_7				0x00000ed7
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_8				0x00000ed8
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_9				0x00000ed9
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_10				0x00000eda
+
+#define REG_A5XX_SP_PERFCTR_SP_SEL_11				0x00000edb
+
+#define REG_A5XX_SP_POWERCTR_SP_SEL_0				0x00000edc
+
+#define REG_A5XX_SP_POWERCTR_SP_SEL_1				0x00000edd
+
+#define REG_A5XX_SP_POWERCTR_SP_SEL_2				0x00000ede
+
+#define REG_A5XX_SP_POWERCTR_SP_SEL_3				0x00000edf
+
+#define REG_A5XX_TPL1_ADDR_MODE_CNTL				0x00000f01
+
+#define REG_A5XX_TPL1_MODE_CNTL					0x00000f02
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0				0x00000f10
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1				0x00000f11
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2				0x00000f12
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3				0x00000f13
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4				0x00000f14
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5				0x00000f15
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6				0x00000f16
+
+#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7				0x00000f17
+
+#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0				0x00000f18
+
+#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1				0x00000f19
+
+#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2				0x00000f1a
+
+#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3				0x00000f1b
+
+#define REG_A5XX_VBIF_VERSION					0x00003000
+
+#define REG_A5XX_VBIF_CLKON					0x00003001
+
+#define REG_A5XX_VBIF_ABIT_SORT					0x00003028
+
+#define REG_A5XX_VBIF_ABIT_SORT_CONF				0x00003029
+
+#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
+
+#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
+
+#define REG_A5XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
+
+#define REG_A5XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
+
+#define REG_A5XX_VBIF_XIN_HALT_CTRL0				0x00003080
+
+#define REG_A5XX_VBIF_XIN_HALT_CTRL1				0x00003081
+
+#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL				0x00003084
+
+#define REG_A5XX_VBIF_TEST_BUS1_CTRL0				0x00003085
+
+#define REG_A5XX_VBIF_TEST_BUS1_CTRL1				0x00003086
+
+#define REG_A5XX_VBIF_TEST_BUS2_CTRL0				0x00003087
+
+#define REG_A5XX_VBIF_TEST_BUS2_CTRL1				0x00003088
+
+#define REG_A5XX_VBIF_TEST_BUS_OUT				0x0000308c
+
+#define REG_A5XX_VBIF_PERF_CNT_EN0				0x000030c0
+
+#define REG_A5XX_VBIF_PERF_CNT_EN1				0x000030c1
+
+#define REG_A5XX_VBIF_PERF_CNT_EN2				0x000030c2
+
+#define REG_A5XX_VBIF_PERF_CNT_EN3				0x000030c3
+
+#define REG_A5XX_VBIF_PERF_CNT_CLR0				0x000030c8
+
+#define REG_A5XX_VBIF_PERF_CNT_CLR1				0x000030c9
+
+#define REG_A5XX_VBIF_PERF_CNT_CLR2				0x000030ca
+
+#define REG_A5XX_VBIF_PERF_CNT_CLR3				0x000030cb
+
+#define REG_A5XX_VBIF_PERF_CNT_SEL0				0x000030d0
+
+#define REG_A5XX_VBIF_PERF_CNT_SEL1				0x000030d1
+
+#define REG_A5XX_VBIF_PERF_CNT_SEL2				0x000030d2
+
+#define REG_A5XX_VBIF_PERF_CNT_SEL3				0x000030d3
+
+#define REG_A5XX_VBIF_PERF_CNT_LOW0				0x000030d8
+
+#define REG_A5XX_VBIF_PERF_CNT_LOW1				0x000030d9
+
+#define REG_A5XX_VBIF_PERF_CNT_LOW2				0x000030da
+
+#define REG_A5XX_VBIF_PERF_CNT_LOW3				0x000030db
+
+#define REG_A5XX_VBIF_PERF_CNT_HIGH0				0x000030e0
+
+#define REG_A5XX_VBIF_PERF_CNT_HIGH1				0x000030e1
+
+#define REG_A5XX_VBIF_PERF_CNT_HIGH2				0x000030e2
+
+#define REG_A5XX_VBIF_PERF_CNT_HIGH3				0x000030e3
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0				0x00003110
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1				0x00003111
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2				0x00003112
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0			0x00003118
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1			0x00003119
+
+#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2			0x0000311a
+
+#define REG_A5XX_GPMU_INST_RAM_BASE				0x00008800
+
+#define REG_A5XX_GPMU_DATA_RAM_BASE				0x00009800
+
+#define REG_A5XX_GPMU_SP_POWER_CNTL				0x0000a881
+
+#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL				0x0000a886
+
+#define REG_A5XX_GPMU_RBCCU_POWER_CNTL				0x0000a887
+
+#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS				0x0000a88b
+#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON			0x00100000
+
+#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS			0x0000a88d
+#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON			0x00100000
+
+#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY			0x0000a891
+
+#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL			0x0000a892
+
+#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST			0x0000a893
+
+#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL			0x0000a894
+
+#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL			0x0000a8a3
+
+#define REG_A5XX_GPMU_WFI_CONFIG				0x0000a8c1
+
+#define REG_A5XX_GPMU_RBBM_INTR_INFO				0x0000a8d6
+
+#define REG_A5XX_GPMU_CM3_SYSRESET				0x0000a8d8
+
+#define REG_A5XX_GPMU_GENERAL_0					0x0000a8e0
+
+#define REG_A5XX_GPMU_GENERAL_1					0x0000a8e1
+
+#define REG_A5XX_SP_POWER_COUNTER_0_LO				0x0000a840
+
+#define REG_A5XX_SP_POWER_COUNTER_0_HI				0x0000a841
+
+#define REG_A5XX_SP_POWER_COUNTER_1_LO				0x0000a842
+
+#define REG_A5XX_SP_POWER_COUNTER_1_HI				0x0000a843
+
+#define REG_A5XX_SP_POWER_COUNTER_2_LO				0x0000a844
+
+#define REG_A5XX_SP_POWER_COUNTER_2_HI				0x0000a845
+
+#define REG_A5XX_SP_POWER_COUNTER_3_LO				0x0000a846
+
+#define REG_A5XX_SP_POWER_COUNTER_3_HI				0x0000a847
+
+#define REG_A5XX_TP_POWER_COUNTER_0_LO				0x0000a848
+
+#define REG_A5XX_TP_POWER_COUNTER_0_HI				0x0000a849
+
+#define REG_A5XX_TP_POWER_COUNTER_1_LO				0x0000a84a
+
+#define REG_A5XX_TP_POWER_COUNTER_1_HI				0x0000a84b
+
+#define REG_A5XX_TP_POWER_COUNTER_2_LO				0x0000a84c
+
+#define REG_A5XX_TP_POWER_COUNTER_2_HI				0x0000a84d
+
+#define REG_A5XX_TP_POWER_COUNTER_3_LO				0x0000a84e
+
+#define REG_A5XX_TP_POWER_COUNTER_3_HI				0x0000a84f
+
+#define REG_A5XX_RB_POWER_COUNTER_0_LO				0x0000a850
+
+#define REG_A5XX_RB_POWER_COUNTER_0_HI				0x0000a851
+
+#define REG_A5XX_RB_POWER_COUNTER_1_LO				0x0000a852
+
+#define REG_A5XX_RB_POWER_COUNTER_1_HI				0x0000a853
+
+#define REG_A5XX_RB_POWER_COUNTER_2_LO				0x0000a854
+
+#define REG_A5XX_RB_POWER_COUNTER_2_HI				0x0000a855
+
+#define REG_A5XX_RB_POWER_COUNTER_3_LO				0x0000a856
+
+#define REG_A5XX_RB_POWER_COUNTER_3_HI				0x0000a857
+
+#define REG_A5XX_CCU_POWER_COUNTER_0_LO				0x0000a858
+
+#define REG_A5XX_CCU_POWER_COUNTER_0_HI				0x0000a859
+
+#define REG_A5XX_CCU_POWER_COUNTER_1_LO				0x0000a85a
+
+#define REG_A5XX_CCU_POWER_COUNTER_1_HI				0x0000a85b
+
+#define REG_A5XX_UCHE_POWER_COUNTER_0_LO			0x0000a85c
+
+#define REG_A5XX_UCHE_POWER_COUNTER_0_HI			0x0000a85d
+
+#define REG_A5XX_UCHE_POWER_COUNTER_1_LO			0x0000a85e
+
+#define REG_A5XX_UCHE_POWER_COUNTER_1_HI			0x0000a85f
+
+#define REG_A5XX_UCHE_POWER_COUNTER_2_LO			0x0000a860
+
+#define REG_A5XX_UCHE_POWER_COUNTER_2_HI			0x0000a861
+
+#define REG_A5XX_UCHE_POWER_COUNTER_3_LO			0x0000a862
+
+#define REG_A5XX_UCHE_POWER_COUNTER_3_HI			0x0000a863
+
+#define REG_A5XX_CP_POWER_COUNTER_0_LO				0x0000a864
+
+#define REG_A5XX_CP_POWER_COUNTER_0_HI				0x0000a865
+
+#define REG_A5XX_CP_POWER_COUNTER_1_LO				0x0000a866
+
+#define REG_A5XX_CP_POWER_COUNTER_1_HI				0x0000a867
+
+#define REG_A5XX_CP_POWER_COUNTER_2_LO				0x0000a868
+
+#define REG_A5XX_CP_POWER_COUNTER_2_HI				0x0000a869
+
+#define REG_A5XX_CP_POWER_COUNTER_3_LO				0x0000a86a
+
+#define REG_A5XX_CP_POWER_COUNTER_3_HI				0x0000a86b
+
+#define REG_A5XX_GPMU_POWER_COUNTER_0_LO			0x0000a86c
+
+#define REG_A5XX_GPMU_POWER_COUNTER_0_HI			0x0000a86d
+
+#define REG_A5XX_GPMU_POWER_COUNTER_1_LO			0x0000a86e
+
+#define REG_A5XX_GPMU_POWER_COUNTER_1_HI			0x0000a86f
+
+#define REG_A5XX_GPMU_POWER_COUNTER_2_LO			0x0000a870
+
+#define REG_A5XX_GPMU_POWER_COUNTER_2_HI			0x0000a871
+
+#define REG_A5XX_GPMU_POWER_COUNTER_3_LO			0x0000a872
+
+#define REG_A5XX_GPMU_POWER_COUNTER_3_HI			0x0000a873
+
+#define REG_A5XX_GPMU_POWER_COUNTER_4_LO			0x0000a874
+
+#define REG_A5XX_GPMU_POWER_COUNTER_4_HI			0x0000a875
+
+#define REG_A5XX_GPMU_POWER_COUNTER_5_LO			0x0000a876
+
+#define REG_A5XX_GPMU_POWER_COUNTER_5_HI			0x0000a877
+
+#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE			0x0000a878
+
+#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO			0x0000a879
+
+#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI			0x0000a87a
+
+#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET			0x0000a87b
+
+#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0			0x0000a87c
+
+#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1			0x0000a87d
+
+#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL			0x0000a8a3
+
+#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL		0x0000a8a8
+
+#define REG_A5XX_GPMU_TEMP_SENSOR_ID				0x0000ac00
+
+#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG			0x0000ac01
+
+#define REG_A5XX_GPMU_TEMP_VAL					0x0000ac02
+
+#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD			0x0000ac03
+
+#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS		0x0000ac05
+
+#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK		0x0000ac06
+
+#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1			0x0000ac40
+
+#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3			0x0000ac41
+
+#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1			0x0000ac42
+
+#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3			0x0000ac43
+
+#define REG_A5XX_GPMU_BASE_LEAKAGE				0x0000ac46
+
+#define REG_A5XX_GPMU_GPMU_VOLTAGE				0x0000ac60
+
+#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS			0x0000ac61
+
+#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK			0x0000ac62
+
+#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD			0x0000ac80
+
+#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL			0x0000acc4
+
+#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS			0x0000acc5
+
+#define REG_A5XX_GDPM_CONFIG1					0x0000b80c
+
+#define REG_A5XX_GDPM_CONFIG2					0x0000b80d
+
+#define REG_A5XX_GDPM_INT_EN					0x0000b80f
+
+#define REG_A5XX_GDPM_INT_MASK					0x0000b811
+
+#define REG_A5XX_GPMU_BEC_ENABLE				0x0000b9a0
+
+#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS			0x0000c41a
+
+#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0		0x0000c41d
+
+#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2		0x0000c41f
+
+#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4		0x0000c421
+
+#define REG_A5XX_GPU_CS_ENABLE_REG				0x0000c520
+
+#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1		0x0000c557
+
+#define REG_A5XX_GRAS_CL_CNTL					0x0000e000
+#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z			0x00000040
+
+#define REG_A5XX_UNKNOWN_E001					0x0000e001
+
+#define REG_A5XX_UNKNOWN_E004					0x0000e004
+
+#define REG_A5XX_GRAS_CNTL					0x0000e005
+#define A5XX_GRAS_CNTL_VARYING					0x00000001
+#define A5XX_GRAS_CNTL_UNK3					0x00000008
+#define A5XX_GRAS_CNTL_XCOORD					0x00000040
+#define A5XX_GRAS_CNTL_YCOORD					0x00000080
+#define A5XX_GRAS_CNTL_ZCOORD					0x00000100
+#define A5XX_GRAS_CNTL_WCOORD					0x00000200
+
+#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ			0x0000e006
+#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK		0x000003ff
+#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT		0
+static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK;
+}
+#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK		0x000ffc00
+#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT		10
+static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0			0x0000e010
+#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0				0x0000e011
+#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0			0x0000e012
+#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0				0x0000e013
+#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0			0x0000e014
+#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
+}
+
+#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0				0x0000e015
+#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
+#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
+static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_CNTL					0x0000e090
+#define A5XX_GRAS_SU_CNTL_CULL_FRONT				0x00000001
+#define A5XX_GRAS_SU_CNTL_CULL_BACK				0x00000002
+#define A5XX_GRAS_SU_CNTL_FRONT_CW				0x00000004
+#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK			0x000007f8
+#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT			3
+static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val)
+{
+	return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
+}
+#define A5XX_GRAS_SU_CNTL_POLY_OFFSET				0x00000800
+#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE				0x00002000
+
+#define REG_A5XX_GRAS_SU_POINT_MINMAX				0x0000e091
+#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_POINT_SIZE				0x0000e092
+#define A5XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
+#define A5XX_GRAS_SU_POINT_SIZE__SHIFT				0
+static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val)
+{
+	return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_LAYERED				0x0000e093
+
+#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL			0x0000e094
+#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z		0x00000001
+#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1			0x00000002
+
+#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE			0x0000e095
+#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
+#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
+static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000e096
+#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
+#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
+static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP		0x0000e097
+#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK		0xffffffff
+#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT		0
+static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val)
+{
+	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO			0x0000e098
+#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK	0x00000007
+#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT	0
+static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val)
+{
+	return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
+}
+
+#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL			0x0000e099
+
+#define REG_A5XX_GRAS_SC_CNTL					0x0000e0a0
+#define A5XX_GRAS_SC_CNTL_BINNING_PASS				0x00000001
+#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED			0x00008000
+
+#define REG_A5XX_GRAS_SC_BIN_CNTL				0x0000e0a1
+
+#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL				0x0000e0a2
+#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK		0x00000003
+#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL				0x0000e0a3
+#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK		0x00000003
+#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE		0x00000004
+
+#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL			0x0000e0a4
+
+#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0			0x0000e0aa
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK		0x00007fff
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK;
+}
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK		0x7fff0000
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT		16
+static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0			0x0000e0ab
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK		0x00007fff
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK;
+}
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK		0x7fff0000
+#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT		16
+static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0			0x0000e0ca
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK		0x00007fff
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK;
+}
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK		0x7fff0000
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT		16
+static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0			0x0000e0cb
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK		0x00007fff
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT		0
+static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK;
+}
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK		0x7fff0000
+#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT		16
+static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL			0x0000e0ea
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000e0eb
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A5XX_GRAS_LRZ_CNTL					0x0000e100
+#define A5XX_GRAS_LRZ_CNTL_ENABLE				0x00000001
+#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE				0x00000002
+#define A5XX_GRAS_LRZ_CNTL_GREATER				0x00000004
+
+#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO			0x0000e101
+
+#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI			0x0000e102
+
+#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH				0x0000e103
+#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK			0xffffffff
+#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT			0
+static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK;
+}
+
+#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO		0x0000e104
+
+#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI		0x0000e105
+
+#define REG_A5XX_RB_CNTL					0x0000e140
+#define A5XX_RB_CNTL_WIDTH__MASK				0x000000ff
+#define A5XX_RB_CNTL_WIDTH__SHIFT				0
+static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK;
+}
+#define A5XX_RB_CNTL_HEIGHT__MASK				0x0001fe00
+#define A5XX_RB_CNTL_HEIGHT__SHIFT				9
+static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK;
+}
+#define A5XX_RB_CNTL_BYPASS					0x00020000
+
+#define REG_A5XX_RB_RENDER_CNTL					0x0000e141
+#define A5XX_RB_RENDER_CNTL_BINNING_PASS			0x00000001
+#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED			0x00000040
+#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE			0x00000080
+#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH				0x00004000
+#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2				0x00008000
+#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK			0x00ff0000
+#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT			16
+static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK;
+}
+#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK			0xff000000
+#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT			24
+static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK;
+}
+
+#define REG_A5XX_RB_RAS_MSAA_CNTL				0x0000e142
+#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A5XX_RB_DEST_MSAA_CNTL				0x0000e143
+#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
+
+#define REG_A5XX_RB_RENDER_CONTROL0				0x0000e144
+#define A5XX_RB_RENDER_CONTROL0_VARYING				0x00000001
+#define A5XX_RB_RENDER_CONTROL0_UNK3				0x00000008
+#define A5XX_RB_RENDER_CONTROL0_XCOORD				0x00000040
+#define A5XX_RB_RENDER_CONTROL0_YCOORD				0x00000080
+#define A5XX_RB_RENDER_CONTROL0_ZCOORD				0x00000100
+#define A5XX_RB_RENDER_CONTROL0_WCOORD				0x00000200
+
+#define REG_A5XX_RB_RENDER_CONTROL1				0x0000e145
+#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK			0x00000001
+#define A5XX_RB_RENDER_CONTROL1_FACENESS			0x00000002
+#define A5XX_RB_RENDER_CONTROL1_SAMPLEID			0x00000004
+
+#define REG_A5XX_RB_FS_OUTPUT_CNTL				0x0000e146
+#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK			0x0000000f
+#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT			0
+static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val)
+{
+	return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK;
+}
+#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z			0x00000020
+
+#define REG_A5XX_RB_RENDER_COMPONENTS				0x0000e147
+#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
+#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
+#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
+#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
+#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
+#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
+#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
+#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK;
+}
+#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
+#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
+static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
+{
+	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; }
+#define A5XX_RB_MRT_CONTROL_BLEND				0x00000001
+#define A5XX_RB_MRT_CONTROL_BLEND2				0x00000002
+#define A5XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000004
+#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000078
+#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			3
+static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x00000780
+#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		7
+static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; }
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
+#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
+#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
+static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; }
+#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x000000ff
+#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x00000300
+#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		8
+static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK			0x00001800
+#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT			11
+static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK;
+}
+#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00006000
+#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			13
+static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00008000
+
+static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; }
+#define A5XX_RB_MRT_PITCH__MASK					0xffffffff
+#define A5XX_RB_MRT_PITCH__SHIFT				0
+static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; }
+#define A5XX_RB_MRT_ARRAY_PITCH__MASK				0xffffffff
+#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT				0
+static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; }
+
+#define REG_A5XX_RB_BLEND_RED					0x0000e1a0
+#define A5XX_RB_BLEND_RED_UINT__MASK				0x000000ff
+#define A5XX_RB_BLEND_RED_UINT__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A5XX_RB_BLEND_RED_SINT__MASK				0x0000ff00
+#define A5XX_RB_BLEND_RED_SINT__SHIFT				8
+static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK;
+}
+#define A5XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
+#define A5XX_RB_BLEND_RED_FLOAT__SHIFT				16
+static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_RED_F32				0x0000e1a1
+#define A5XX_RB_BLEND_RED_F32__MASK				0xffffffff
+#define A5XX_RB_BLEND_RED_F32__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_RED_F32(float val)
+{
+	return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_GREEN					0x0000e1a2
+#define A5XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
+#define A5XX_RB_BLEND_GREEN_UINT__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A5XX_RB_BLEND_GREEN_SINT__MASK				0x0000ff00
+#define A5XX_RB_BLEND_GREEN_SINT__SHIFT				8
+static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK;
+}
+#define A5XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
+#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
+static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_GREEN_F32				0x0000e1a3
+#define A5XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
+#define A5XX_RB_BLEND_GREEN_F32__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val)
+{
+	return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_BLUE					0x0000e1a4
+#define A5XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
+#define A5XX_RB_BLEND_BLUE_UINT__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A5XX_RB_BLEND_BLUE_SINT__MASK				0x0000ff00
+#define A5XX_RB_BLEND_BLUE_SINT__SHIFT				8
+static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK;
+}
+#define A5XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
+#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
+static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_BLUE_F32				0x0000e1a5
+#define A5XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
+#define A5XX_RB_BLEND_BLUE_F32__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val)
+{
+	return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_ALPHA					0x0000e1a6
+#define A5XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
+#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A5XX_RB_BLEND_ALPHA_SINT__MASK				0x0000ff00
+#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT				8
+static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK;
+}
+#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
+#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
+static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_ALPHA_F32				0x0000e1a7
+#define A5XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
+#define A5XX_RB_BLEND_ALPHA_F32__SHIFT				0
+static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val)
+{
+	return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
+#define REG_A5XX_RB_ALPHA_CONTROL				0x0000e1a8
+#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
+#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
+static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
+{
+	return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
+}
+#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
+#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
+#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
+static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A5XX_RB_BLEND_CNTL					0x0000e1a9
+#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK			0x000000ff
+#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT			0
+static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
+}
+#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND			0x00000100
+#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
+#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK			0xffff0000
+#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT			16
+static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A5XX_RB_DEPTH_PLANE_CNTL				0x0000e1b0
+#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z			0x00000001
+#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1				0x00000002
+
+#define REG_A5XX_RB_DEPTH_CNTL					0x0000e1b1
+#define A5XX_RB_DEPTH_CNTL_Z_ENABLE				0x00000001
+#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE			0x00000002
+#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK				0x0000001c
+#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT				2
+static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK;
+}
+#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE			0x00000040
+
+#define REG_A5XX_RB_DEPTH_BUFFER_INFO				0x0000e1b2
+#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK		0x00000007
+#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT		0
+static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val)
+{
+	return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
+}
+
+#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO			0x0000e1b3
+
+#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI			0x0000e1b4
+
+#define REG_A5XX_RB_DEPTH_BUFFER_PITCH				0x0000e1b5
+#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK			0xffffffff
+#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH			0x0000e1b6
+#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK			0xffffffff
+#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_STENCIL_CONTROL				0x0000e1c0
+#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
+#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
+#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
+#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
+#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
+#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
+#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
+#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
+#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
+#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
+#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
+#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
+static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A5XX_RB_STENCIL_INFO				0x0000e1c1
+#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
+
+#define REG_A5XX_RB_STENCIL_BASE_LO				0x0000e1c2
+
+#define REG_A5XX_RB_STENCIL_BASE_HI				0x0000e1c3
+
+#define REG_A5XX_RB_STENCIL_PITCH				0x0000e1c4
+#define A5XX_RB_STENCIL_PITCH__MASK				0xffffffff
+#define A5XX_RB_STENCIL_PITCH__SHIFT				0
+static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_STENCIL_ARRAY_PITCH				0x0000e1c5
+#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK			0xffffffff
+#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_STENCILREFMASK				0x0000e1c6
+#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A5XX_RB_STENCILREFMASK_BF				0x0000e1c7
+#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A5XX_RB_WINDOW_OFFSET				0x0000e1d0
+#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
+#define A5XX_RB_WINDOW_OFFSET_X__MASK				0x00007fff
+#define A5XX_RB_WINDOW_OFFSET_X__SHIFT				0
+static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK;
+}
+#define A5XX_RB_WINDOW_OFFSET_Y__MASK				0x7fff0000
+#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT				16
+static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL			0x0000e1d1
+#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
+
+#define REG_A5XX_RB_BLIT_CNTL					0x0000e210
+#define A5XX_RB_BLIT_CNTL_BUF__MASK				0x0000000f
+#define A5XX_RB_BLIT_CNTL_BUF__SHIFT				0
+static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val)
+{
+	return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK;
+}
+
+#define REG_A5XX_RB_RESOLVE_CNTL_1				0x0000e211
+#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE		0x80000000
+#define A5XX_RB_RESOLVE_CNTL_1_X__MASK				0x00007fff
+#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT				0
+static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val)
+{
+	return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK;
+}
+#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK				0x7fff0000
+#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT				16
+static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val)
+{
+	return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK;
+}
+
+#define REG_A5XX_RB_RESOLVE_CNTL_2				0x0000e212
+#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE		0x80000000
+#define A5XX_RB_RESOLVE_CNTL_2_X__MASK				0x00007fff
+#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT				0
+static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val)
+{
+	return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK;
+}
+#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK				0x7fff0000
+#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT				16
+static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val)
+{
+	return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK;
+}
+
+#define REG_A5XX_RB_RESOLVE_CNTL_3				0x0000e213
+#define A5XX_RB_RESOLVE_CNTL_3_TILED				0x00000001
+
+#define REG_A5XX_RB_BLIT_DST_LO					0x0000e214
+
+#define REG_A5XX_RB_BLIT_DST_HI					0x0000e215
+
+#define REG_A5XX_RB_BLIT_DST_PITCH				0x0000e216
+#define A5XX_RB_BLIT_DST_PITCH__MASK				0xffffffff
+#define A5XX_RB_BLIT_DST_PITCH__SHIFT				0
+static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH			0x0000e217
+#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK			0xffffffff
+#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_CLEAR_COLOR_DW0				0x0000e218
+
+#define REG_A5XX_RB_CLEAR_COLOR_DW1				0x0000e219
+
+#define REG_A5XX_RB_CLEAR_COLOR_DW2				0x0000e21a
+
+#define REG_A5XX_RB_CLEAR_COLOR_DW3				0x0000e21b
+
+#define REG_A5XX_RB_CLEAR_CNTL					0x0000e21c
+#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR				0x00000002
+#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE				0x00000004
+#define A5XX_RB_CLEAR_CNTL_MASK__MASK				0x000000f0
+#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT				4
+static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val)
+{
+	return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK;
+}
+
+#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO			0x0000e240
+
+#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI			0x0000e241
+
+#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH			0x0000e242
+
+static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; }
+#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK			0xffffffff
+#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK;
+}
+
+static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; }
+#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK		0xffffffff
+#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT		0
+static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_BLIT_FLAG_DST_LO				0x0000e263
+
+#define REG_A5XX_RB_BLIT_FLAG_DST_HI				0x0000e264
+
+#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH				0x0000e265
+#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK			0xffffffff
+#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH			0x0000e266
+#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK			0xffffffff
+#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT		0
+static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO			0x0000e267
+
+#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI			0x0000e268
+
+#define REG_A5XX_VPC_CNTL_0					0x0000e280
+#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK			0x0000007f
+#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT			0
+static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK;
+}
+#define A5XX_VPC_CNTL_0_VARYING					0x00000800
+
+static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; }
+
+#define REG_A5XX_UNKNOWN_E292					0x0000e292
+
+#define REG_A5XX_UNKNOWN_E293					0x0000e293
+
+static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; }
+
+#define REG_A5XX_VPC_GS_SIV_CNTL				0x0000e298
+
+#define REG_A5XX_UNKNOWN_E29A					0x0000e29a
+
+#define REG_A5XX_VPC_PACK					0x0000e29d
+#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK			0x000000ff
+#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT			0
+static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
+{
+	return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK;
+}
+#define A5XX_VPC_PACK_PSIZELOC__MASK				0x0000ff00
+#define A5XX_VPC_PACK_PSIZELOC__SHIFT				8
+static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val)
+{
+	return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK;
+}
+
+#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL			0x0000e2a0
+
+#define REG_A5XX_VPC_SO_BUF_CNTL				0x0000e2a1
+#define A5XX_VPC_SO_BUF_CNTL_BUF0				0x00000001
+#define A5XX_VPC_SO_BUF_CNTL_BUF1				0x00000008
+#define A5XX_VPC_SO_BUF_CNTL_BUF2				0x00000040
+#define A5XX_VPC_SO_BUF_CNTL_BUF3				0x00000200
+#define A5XX_VPC_SO_BUF_CNTL_ENABLE				0x00008000
+
+#define REG_A5XX_VPC_SO_OVERRIDE				0x0000e2a2
+#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE				0x00000001
+
+#define REG_A5XX_VPC_SO_CNTL					0x0000e2a3
+#define A5XX_VPC_SO_CNTL_ENABLE					0x00010000
+
+#define REG_A5XX_VPC_SO_PROG					0x0000e2a4
+#define A5XX_VPC_SO_PROG_A_BUF__MASK				0x00000003
+#define A5XX_VPC_SO_PROG_A_BUF__SHIFT				0
+static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val)
+{
+	return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK;
+}
+#define A5XX_VPC_SO_PROG_A_OFF__MASK				0x000007fc
+#define A5XX_VPC_SO_PROG_A_OFF__SHIFT				2
+static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK;
+}
+#define A5XX_VPC_SO_PROG_A_EN					0x00000800
+#define A5XX_VPC_SO_PROG_B_BUF__MASK				0x00003000
+#define A5XX_VPC_SO_PROG_B_BUF__SHIFT				12
+static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val)
+{
+	return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK;
+}
+#define A5XX_VPC_SO_PROG_B_OFF__MASK				0x007fc000
+#define A5XX_VPC_SO_PROG_B_OFF__SHIFT				14
+static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK;
+}
+#define A5XX_VPC_SO_PROG_B_EN					0x00800000
+
+static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; }
+
+static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; }
+
+#define REG_A5XX_PC_PRIMITIVE_CNTL				0x0000e384
+#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK		0x0000007f
+#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT		0
+static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART		0x00000100
+#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES			0x00000200
+#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST		0x00000400
+
+#define REG_A5XX_PC_PRIM_VTX_CNTL				0x0000e385
+#define A5XX_PC_PRIM_VTX_CNTL_PSIZE				0x00000800
+
+#define REG_A5XX_PC_RASTER_CNTL					0x0000e388
+#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK		0x00000007
+#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT		0
+static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK		0x00000038
+#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT		3
+static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE			0x00000040
+
+#define REG_A5XX_UNKNOWN_E389					0x0000e389
+
+#define REG_A5XX_PC_RESTART_INDEX				0x0000e38c
+
+#define REG_A5XX_PC_GS_LAYERED					0x0000e38d
+
+#define REG_A5XX_PC_GS_PARAM					0x0000e38e
+#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK			0x000003ff
+#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT			0
+static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val)
+{
+	return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK;
+}
+#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK			0x0000f800
+#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT			11
+static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val)
+{
+	return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK;
+}
+#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK				0x01800000
+#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT			23
+static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK;
+}
+
+#define REG_A5XX_PC_HS_PARAM					0x0000e38f
+#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK			0x0000003f
+#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT			0
+static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val)
+{
+	return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK;
+}
+#define A5XX_PC_HS_PARAM_SPACING__MASK				0x00600000
+#define A5XX_PC_HS_PARAM_SPACING__SHIFT				21
+static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val)
+{
+	return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK;
+}
+#define A5XX_PC_HS_PARAM_CW					0x00800000
+#define A5XX_PC_HS_PARAM_CONNECTED				0x01000000
+
+#define REG_A5XX_PC_POWER_CNTL					0x0000e3b0
+
+#define REG_A5XX_VFD_CONTROL_0					0x0000e400
+#define A5XX_VFD_CONTROL_0_VTXCNT__MASK				0x0000003f
+#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT			0
+static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK;
+}
+
+#define REG_A5XX_VFD_CONTROL_1					0x0000e401
+#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK			0x000000ff
+#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT			0
+static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A5XX_VFD_CONTROL_1_REGID4INST__MASK			0x0000ff00
+#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT			8
+static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK			0x00ff0000
+#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT			16
+static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK;
+}
+
+#define REG_A5XX_VFD_CONTROL_2					0x0000e402
+#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK			0x000000ff
+#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT			0
+static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK;
+}
+
+#define REG_A5XX_VFD_CONTROL_3					0x0000e403
+#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK			0x0000ff00
+#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT			8
+static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK;
+}
+#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
+#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
+static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
+#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
+static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+	return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
+
+#define REG_A5XX_VFD_CONTROL_4					0x0000e404
+
+#define REG_A5XX_VFD_CONTROL_5					0x0000e405
+
+#define REG_A5XX_VFD_INDEX_OFFSET				0x0000e408
+
+#define REG_A5XX_VFD_INSTANCE_START_OFFSET			0x0000e409
+
+static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; }
+
+static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; }
+
+static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; }
+#define A5XX_VFD_DECODE_INSTR_IDX__MASK				0x0000001f
+#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT			0
+static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val)
+{
+	return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK;
+}
+#define A5XX_VFD_DECODE_INSTR_INSTANCED				0x00020000
+#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK			0x0ff00000
+#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT			20
+static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val)
+{
+	return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A5XX_VFD_DECODE_INSTR_SWAP__MASK			0x30000000
+#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT			28
+static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A5XX_VFD_DECODE_INSTR_UNK30				0x40000000
+#define A5XX_VFD_DECODE_INSTR_FLOAT				0x80000000
+
+static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; }
+
+static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; }
+#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK		0x0000000f
+#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT		0
+static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val)
+{
+	return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK;
+}
+#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK			0x00000ff0
+#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT			4
+static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val)
+{
+	return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK;
+}
+
+#define REG_A5XX_VFD_POWER_CNTL					0x0000e4f0
+
+#define REG_A5XX_SP_SP_CNTL					0x0000e580
+
+#define REG_A5XX_SP_VS_CONFIG					0x0000e584
+#define A5XX_SP_VS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_FS_CONFIG					0x0000e585
+#define A5XX_SP_FS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_HS_CONFIG					0x0000e586
+#define A5XX_SP_HS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_DS_CONFIG					0x0000e587
+#define A5XX_SP_DS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_GS_CONFIG					0x0000e588
+#define A5XX_SP_GS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_CS_CONFIG					0x0000e589
+#define A5XX_SP_CS_CONFIG_ENABLED				0x00000001
+#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
+#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_SP_VS_CONFIG_MAX_CONST				0x0000e58a
+
+#define REG_A5XX_SP_FS_CONFIG_MAX_CONST				0x0000e58b
+
+#define REG_A5XX_SP_VS_CTRL_REG0				0x0000e590
+#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_VS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_SP_PRIMITIVE_CNTL				0x0000e592
+#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK			0x0000001f
+#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT			0
+static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val)
+{
+	return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK;
+}
+
+static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; }
+#define A5XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
+#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00000f00
+#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			8
+static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A5XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
+#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x0f000000
+#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			24
+static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; }
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E5AB					0x0000e5ab
+
+#define REG_A5XX_SP_VS_OBJ_START_LO				0x0000e5ac
+
+#define REG_A5XX_SP_VS_OBJ_START_HI				0x0000e5ad
+
+#define REG_A5XX_SP_FS_CTRL_REG0				0x0000e5c0
+#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_FS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E5C2					0x0000e5c2
+
+#define REG_A5XX_SP_FS_OBJ_START_LO				0x0000e5c3
+
+#define REG_A5XX_SP_FS_OBJ_START_HI				0x0000e5c4
+
+#define REG_A5XX_SP_BLEND_CNTL					0x0000e5c9
+#define A5XX_SP_BLEND_CNTL_ENABLED				0x00000001
+#define A5XX_SP_BLEND_CNTL_UNK8					0x00000100
+#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
+
+#define REG_A5XX_SP_FS_OUTPUT_CNTL				0x0000e5ca
+#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK			0x0000000f
+#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT			0
+static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK;
+}
+#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK		0x00001fe0
+#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT		5
+static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK;
+}
+#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK		0x001fe000
+#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT		13
+static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK;
+}
+
+static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; }
+#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK			0x000000ff
+#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT			0
+static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val)
+{
+	return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK;
+}
+#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION			0x00000100
+
+static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; }
+
+static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; }
+#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK			0x000000ff
+#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT			0
+static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
+}
+#define A5XX_SP_FS_MRT_REG_COLOR_SINT				0x00000100
+#define A5XX_SP_FS_MRT_REG_COLOR_UINT				0x00000200
+#define A5XX_SP_FS_MRT_REG_COLOR_SRGB				0x00000400
+
+#define REG_A5XX_UNKNOWN_E5DB					0x0000e5db
+
+#define REG_A5XX_SP_CS_CTRL_REG0				0x0000e5f0
+#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_CS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E5F2					0x0000e5f2
+
+#define REG_A5XX_SP_CS_OBJ_START_LO				0x0000e5f3
+
+#define REG_A5XX_SP_CS_OBJ_START_HI				0x0000e5f4
+
+#define REG_A5XX_SP_HS_CTRL_REG0				0x0000e600
+#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_HS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E602					0x0000e602
+
+#define REG_A5XX_SP_HS_OBJ_START_LO				0x0000e603
+
+#define REG_A5XX_SP_HS_OBJ_START_HI				0x0000e604
+
+#define REG_A5XX_SP_DS_CTRL_REG0				0x0000e610
+#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_DS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E62B					0x0000e62b
+
+#define REG_A5XX_SP_DS_OBJ_START_LO				0x0000e62c
+
+#define REG_A5XX_SP_DS_OBJ_START_HI				0x0000e62d
+
+#define REG_A5XX_SP_GS_CTRL_REG0				0x0000e640
+#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK			0x00000008
+#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT			3
+static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
+#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A5XX_SP_GS_CTRL_REG0_VARYING				0x00010000
+#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE			0x00100000
+#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
+#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT			25
+static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+
+#define REG_A5XX_UNKNOWN_E65B					0x0000e65b
+
+#define REG_A5XX_SP_GS_OBJ_START_LO				0x0000e65c
+
+#define REG_A5XX_SP_GS_OBJ_START_HI				0x0000e65d
+
+#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL				0x0000e704
+#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK		0x00000003
+#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT		0
+static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL				0x0000e705
+#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK		0x00000003
+#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
+static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE		0x00000004
+
+#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO		0x0000e706
+
+#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI		0x0000e707
+
+#define REG_A5XX_TPL1_VS_TEX_COUNT				0x0000e700
+
+#define REG_A5XX_TPL1_HS_TEX_COUNT				0x0000e701
+
+#define REG_A5XX_TPL1_DS_TEX_COUNT				0x0000e702
+
+#define REG_A5XX_TPL1_GS_TEX_COUNT				0x0000e703
+
+#define REG_A5XX_TPL1_VS_TEX_SAMP_LO				0x0000e722
+
+#define REG_A5XX_TPL1_VS_TEX_SAMP_HI				0x0000e723
+
+#define REG_A5XX_TPL1_HS_TEX_SAMP_LO				0x0000e724
+
+#define REG_A5XX_TPL1_HS_TEX_SAMP_HI				0x0000e725
+
+#define REG_A5XX_TPL1_DS_TEX_SAMP_LO				0x0000e726
+
+#define REG_A5XX_TPL1_DS_TEX_SAMP_HI				0x0000e727
+
+#define REG_A5XX_TPL1_GS_TEX_SAMP_LO				0x0000e728
+
+#define REG_A5XX_TPL1_GS_TEX_SAMP_HI				0x0000e729
+
+#define REG_A5XX_TPL1_VS_TEX_CONST_LO				0x0000e72a
+
+#define REG_A5XX_TPL1_VS_TEX_CONST_HI				0x0000e72b
+
+#define REG_A5XX_TPL1_HS_TEX_CONST_LO				0x0000e72c
+
+#define REG_A5XX_TPL1_HS_TEX_CONST_HI				0x0000e72d
+
+#define REG_A5XX_TPL1_DS_TEX_CONST_LO				0x0000e72e
+
+#define REG_A5XX_TPL1_DS_TEX_CONST_HI				0x0000e72f
+
+#define REG_A5XX_TPL1_GS_TEX_CONST_LO				0x0000e730
+
+#define REG_A5XX_TPL1_GS_TEX_CONST_HI				0x0000e731
+
+#define REG_A5XX_TPL1_FS_TEX_COUNT				0x0000e750
+
+#define REG_A5XX_TPL1_CS_TEX_COUNT				0x0000e751
+
+#define REG_A5XX_TPL1_FS_TEX_SAMP_LO				0x0000e75a
+
+#define REG_A5XX_TPL1_FS_TEX_SAMP_HI				0x0000e75b
+
+#define REG_A5XX_TPL1_CS_TEX_SAMP_LO				0x0000e75c
+
+#define REG_A5XX_TPL1_CS_TEX_SAMP_HI				0x0000e75d
+
+#define REG_A5XX_TPL1_FS_TEX_CONST_LO				0x0000e75e
+
+#define REG_A5XX_TPL1_FS_TEX_CONST_HI				0x0000e75f
+
+#define REG_A5XX_TPL1_CS_TEX_CONST_LO				0x0000e760
+
+#define REG_A5XX_TPL1_CS_TEX_CONST_HI				0x0000e761
+
+#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL			0x0000e764
+
+#define REG_A5XX_HLSQ_CONTROL_0_REG				0x0000e784
+#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000001
+#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK		0x00000004
+#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT		2
+static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK;
+}
+
+#define REG_A5XX_HLSQ_CONTROL_1_REG				0x0000e785
+#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK	0x0000003f
+#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT	0
+static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+
+#define REG_A5XX_HLSQ_CONTROL_2_REG				0x0000e786
+#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000000ff
+#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
+}
+#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK			0x0000ff00
+#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT			8
+static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK;
+}
+#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK		0x00ff0000
+#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT		16
+static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK;
+}
+
+#define REG_A5XX_HLSQ_CONTROL_3_REG				0x0000e787
+#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK		0x000000ff
+#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK;
+}
+
+#define REG_A5XX_HLSQ_CONTROL_4_REG				0x0000e788
+#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK		0x00ff0000
+#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT		16
+static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK;
+}
+#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK		0xff000000
+#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT		24
+static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK;
+}
+
+#define REG_A5XX_HLSQ_UPDATE_CNTL				0x0000e78a
+
+#define REG_A5XX_HLSQ_VS_CONFIG					0x0000e78b
+#define A5XX_HLSQ_VS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_FS_CONFIG					0x0000e78c
+#define A5XX_HLSQ_FS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_HS_CONFIG					0x0000e78d
+#define A5XX_HLSQ_HS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_DS_CONFIG					0x0000e78e
+#define A5XX_HLSQ_DS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_GS_CONFIG					0x0000e78f
+#define A5XX_HLSQ_GS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_CONFIG					0x0000e790
+#define A5XX_HLSQ_CS_CONFIG_ENABLED				0x00000001
+#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
+#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
+static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK;
+}
+#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
+#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT		8
+static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A5XX_HLSQ_VS_CNTL					0x0000e791
+#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_FS_CNTL					0x0000e792
+#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_HS_CNTL					0x0000e793
+#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_DS_CNTL					0x0000e794
+#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_GS_CNTL					0x0000e795
+#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_CNTL					0x0000e796
+#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE				0x00000001
+#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK			0xfffffffe
+#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT			1
+static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X				0x0000e7b9
+
+#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y				0x0000e7ba
+
+#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z				0x0000e7bb
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_0				0x0000e7b0
+#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK			0x00000003
+#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT			0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK;
+}
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT		2
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK;
+}
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT		12
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK;
+}
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
+#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT		22
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_1				0x0000e7b1
+#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_2				0x0000e7b2
+#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_3				0x0000e7b3
+#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_4				0x0000e7b4
+#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_5				0x0000e7b5
+#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_NDRANGE_6				0x0000e7b6
+#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK		0xffffffff
+#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT		0
+static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_CNTL_0					0x0000e7b7
+#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK			0x000000ff
+#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT			0
+static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK;
+}
+#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK				0x0000ff00
+#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT				8
+static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK;
+}
+#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK				0x00ff0000
+#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT				16
+static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK;
+}
+#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK			0xff000000
+#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT			24
+static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK;
+}
+
+#define REG_A5XX_HLSQ_CS_CNTL_1					0x0000e7b8
+
+#define REG_A5XX_UNKNOWN_E7C0					0x0000e7c0
+
+#define REG_A5XX_HLSQ_VS_CONSTLEN				0x0000e7c3
+
+#define REG_A5XX_HLSQ_VS_INSTRLEN				0x0000e7c4
+
+#define REG_A5XX_UNKNOWN_E7C5					0x0000e7c5
+
+#define REG_A5XX_HLSQ_HS_CONSTLEN				0x0000e7c8
+
+#define REG_A5XX_HLSQ_HS_INSTRLEN				0x0000e7c9
+
+#define REG_A5XX_UNKNOWN_E7CA					0x0000e7ca
+
+#define REG_A5XX_HLSQ_DS_CONSTLEN				0x0000e7cd
+
+#define REG_A5XX_HLSQ_DS_INSTRLEN				0x0000e7ce
+
+#define REG_A5XX_UNKNOWN_E7CF					0x0000e7cf
+
+#define REG_A5XX_HLSQ_GS_CONSTLEN				0x0000e7d2
+
+#define REG_A5XX_HLSQ_GS_INSTRLEN				0x0000e7d3
+
+#define REG_A5XX_UNKNOWN_E7D4					0x0000e7d4
+
+#define REG_A5XX_HLSQ_FS_CONSTLEN				0x0000e7d7
+
+#define REG_A5XX_HLSQ_FS_INSTRLEN				0x0000e7d8
+
+#define REG_A5XX_UNKNOWN_E7D9					0x0000e7d9
+
+#define REG_A5XX_HLSQ_CS_CONSTLEN				0x0000e7dc
+
+#define REG_A5XX_HLSQ_CS_INSTRLEN				0x0000e7dd
+
+#define REG_A5XX_RB_2D_BLIT_CNTL				0x00002100
+
+#define REG_A5XX_RB_2D_SRC_SOLID_DW0				0x00002101
+
+#define REG_A5XX_RB_2D_SRC_SOLID_DW1				0x00002102
+
+#define REG_A5XX_RB_2D_SRC_SOLID_DW2				0x00002103
+
+#define REG_A5XX_RB_2D_SRC_SOLID_DW3				0x00002104
+
+#define REG_A5XX_RB_2D_SRC_INFO					0x00002107
+#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK			0x000000ff
+#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT			0
+static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK;
+}
+#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
+#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK;
+}
+#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK;
+}
+#define A5XX_RB_2D_SRC_INFO_FLAGS				0x00001000
+
+#define REG_A5XX_RB_2D_SRC_LO					0x00002108
+
+#define REG_A5XX_RB_2D_SRC_HI					0x00002109
+
+#define REG_A5XX_RB_2D_SRC_SIZE					0x0000210a
+#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK				0x0000ffff
+#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK;
+}
+#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK			0xffff0000
+#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT			16
+static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_2D_DST_INFO					0x00002110
+#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK			0x000000ff
+#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT			0
+static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK;
+}
+#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK			0x00000300
+#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK;
+}
+#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK;
+}
+#define A5XX_RB_2D_DST_INFO_FLAGS				0x00001000
+
+#define REG_A5XX_RB_2D_DST_LO					0x00002111
+
+#define REG_A5XX_RB_2D_DST_HI					0x00002112
+
+#define REG_A5XX_RB_2D_DST_SIZE					0x00002113
+#define A5XX_RB_2D_DST_SIZE_PITCH__MASK				0x0000ffff
+#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT			0
+static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK;
+}
+#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK			0xffff0000
+#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT			16
+static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_RB_2D_SRC_FLAGS_LO				0x00002140
+
+#define REG_A5XX_RB_2D_SRC_FLAGS_HI				0x00002141
+
+#define REG_A5XX_RB_2D_DST_FLAGS_LO				0x00002143
+
+#define REG_A5XX_RB_2D_DST_FLAGS_HI				0x00002144
+
+#define REG_A5XX_GRAS_2D_BLIT_CNTL				0x00002180
+
+#define REG_A5XX_GRAS_2D_SRC_INFO				0x00002181
+#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK		0x000000ff
+#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK;
+}
+#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
+#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK;
+}
+#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK;
+}
+#define A5XX_GRAS_2D_SRC_INFO_FLAGS				0x00001000
+
+#define REG_A5XX_GRAS_2D_DST_INFO				0x00002182
+#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK		0x000000ff
+#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
+{
+	return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK;
+}
+#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK			0x00000300
+#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK;
+}
+#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK;
+}
+#define A5XX_GRAS_2D_DST_INFO_FLAGS				0x00001000
+
+#define REG_A5XX_UNKNOWN_2100					0x00002100
+
+#define REG_A5XX_UNKNOWN_2180					0x00002180
+
+#define REG_A5XX_UNKNOWN_2184					0x00002184
+
+#define REG_A5XX_TEX_SAMP_0					0x00000000
+#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
+#define A5XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
+#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT				1
+static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A5XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
+#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT				3
+static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A5XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
+#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT				5
+static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A5XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
+#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT				8
+static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A5XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
+#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT				11
+static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A5XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
+#define A5XX_TEX_SAMP_0_ANISO__SHIFT				14
+static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val)
+{
+	return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
+#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
+static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
+
+#define REG_A5XX_TEX_SAMP_1					0x00000001
+#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
+#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
+static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
+}
+#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
+#define A5XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
+#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
+#define A5XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
+#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
+static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A5XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
+#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
+static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+
+#define REG_A5XX_TEX_SAMP_2					0x00000002
+#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK			0xfffffff0
+#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT			4
+static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val)
+{
+	return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK;
+}
+
+#define REG_A5XX_TEX_SAMP_3					0x00000003
+
+#define REG_A5XX_TEX_CONST_0					0x00000000
+#define A5XX_TEX_CONST_0_TILE_MODE__MASK			0x00000003
+#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT			0
+static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val)
+{
+	return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK;
+}
+#define A5XX_TEX_CONST_0_SRGB					0x00000004
+#define A5XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
+#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT				4
+static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A5XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
+#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
+static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A5XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
+#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
+static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A5XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
+#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT				13
+static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A5XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
+#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT				16
+static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A5XX_TEX_CONST_0_SAMPLES__MASK				0x00300000
+#define A5XX_TEX_CONST_0_SAMPLES__SHIFT				20
+static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK;
+}
+#define A5XX_TEX_CONST_0_FMT__MASK				0x3fc00000
+#define A5XX_TEX_CONST_0_FMT__SHIFT				22
+static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val)
+{
+	return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK;
+}
+#define A5XX_TEX_CONST_0_SWAP__MASK				0xc0000000
+#define A5XX_TEX_CONST_0_SWAP__SHIFT				30
+static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK;
+}
+
+#define REG_A5XX_TEX_CONST_1					0x00000001
+#define A5XX_TEX_CONST_1_WIDTH__MASK				0x00007fff
+#define A5XX_TEX_CONST_1_WIDTH__SHIFT				0
+static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A5XX_TEX_CONST_1_HEIGHT__MASK				0x3fff8000
+#define A5XX_TEX_CONST_1_HEIGHT__SHIFT				15
+static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK;
+}
+
+#define REG_A5XX_TEX_CONST_2					0x00000002
+#define A5XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
+#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
+static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val)
+{
+	return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK;
+}
+#define A5XX_TEX_CONST_2_PITCH__MASK				0x1fffff80
+#define A5XX_TEX_CONST_2_PITCH__SHIFT				7
+static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A5XX_TEX_CONST_2_TYPE__MASK				0x60000000
+#define A5XX_TEX_CONST_2_TYPE__SHIFT				29
+static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val)
+{
+	return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK;
+}
+
+#define REG_A5XX_TEX_CONST_3					0x00000003
+#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK			0x00003fff
+#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK;
+}
+#define A5XX_TEX_CONST_3_FLAG					0x10000000
+
+#define REG_A5XX_TEX_CONST_4					0x00000004
+#define A5XX_TEX_CONST_4_BASE_LO__MASK				0xffffffe0
+#define A5XX_TEX_CONST_4_BASE_LO__SHIFT				5
+static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK;
+}
+
+#define REG_A5XX_TEX_CONST_5					0x00000005
+#define A5XX_TEX_CONST_5_BASE_HI__MASK				0x0001ffff
+#define A5XX_TEX_CONST_5_BASE_HI__SHIFT				0
+static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK;
+}
+#define A5XX_TEX_CONST_5_DEPTH__MASK				0x3ffe0000
+#define A5XX_TEX_CONST_5_DEPTH__SHIFT				17
+static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val)
+{
+	return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK;
+}
+
+#define REG_A5XX_TEX_CONST_6					0x00000006
+
+#define REG_A5XX_TEX_CONST_7					0x00000007
+
+#define REG_A5XX_TEX_CONST_8					0x00000008
+
+#define REG_A5XX_TEX_CONST_9					0x00000009
+
+#define REG_A5XX_TEX_CONST_10					0x0000000a
+
+#define REG_A5XX_TEX_CONST_11					0x0000000b
+
+#define REG_A5XX_SSBO_0_0					0x00000000
+#define A5XX_SSBO_0_0_BASE_LO__MASK				0xffffffe0
+#define A5XX_SSBO_0_0_BASE_LO__SHIFT				5
+static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK;
+}
+
+#define REG_A5XX_SSBO_0_1					0x00000001
+#define A5XX_SSBO_0_1_PITCH__MASK				0x003fffff
+#define A5XX_SSBO_0_1_PITCH__SHIFT				0
+static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK;
+}
+
+#define REG_A5XX_SSBO_0_2					0x00000002
+#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK				0x03fff000
+#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT			12
+static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK;
+}
+
+#define REG_A5XX_SSBO_0_3					0x00000003
+#define A5XX_SSBO_0_3_CPP__MASK					0x0000003f
+#define A5XX_SSBO_0_3_CPP__SHIFT				0
+static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK;
+}
+
+#define REG_A5XX_SSBO_1_0					0x00000000
+#define A5XX_SSBO_1_0_FMT__MASK					0x0000ff00
+#define A5XX_SSBO_1_0_FMT__SHIFT				8
+static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val)
+{
+	return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK;
+}
+#define A5XX_SSBO_1_0_WIDTH__MASK				0xffff0000
+#define A5XX_SSBO_1_0_WIDTH__SHIFT				16
+static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK;
+}
+
+#define REG_A5XX_SSBO_1_1					0x00000001
+#define A5XX_SSBO_1_1_HEIGHT__MASK				0x0000ffff
+#define A5XX_SSBO_1_1_HEIGHT__SHIFT				0
+static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK;
+}
+#define A5XX_SSBO_1_1_DEPTH__MASK				0xffff0000
+#define A5XX_SSBO_1_1_DEPTH__SHIFT				16
+static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK;
+}
+
+#define REG_A5XX_SSBO_2_0					0x00000000
+#define A5XX_SSBO_2_0_BASE_LO__MASK				0xffffffff
+#define A5XX_SSBO_2_0_BASE_LO__SHIFT				0
+static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK;
+}
+
+#define REG_A5XX_SSBO_2_1					0x00000001
+#define A5XX_SSBO_2_1_BASE_HI__MASK				0xffffffff
+#define A5XX_SSBO_2_1_BASE_HI__SHIFT				0
+static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val)
+{
+	return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK;
+}
+
+
+#endif /* A5XX_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/a6xx.xml.h mesa-19.0.1/src/freedreno/registers/a6xx.xml.h
--- mesa-18.3.3/src/freedreno/registers/a6xx.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/a6xx.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,5506 @@
+#ifndef A6XX_XML
+#define A6XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a6xx_color_fmt {
+	RB6_A8_UNORM = 2,
+	RB6_R8_UNORM = 3,
+	RB6_R8_SNORM = 4,
+	RB6_R8_UINT = 5,
+	RB6_R8_SINT = 6,
+	RB6_R4G4B4A4_UNORM = 8,
+	RB6_R5G5B5A1_UNORM = 10,
+	RB6_R5G6B5_UNORM = 14,
+	RB6_R8G8_UNORM = 15,
+	RB6_R8G8_SNORM = 16,
+	RB6_R8G8_UINT = 17,
+	RB6_R8G8_SINT = 18,
+	RB6_R16_UNORM = 21,
+	RB6_R16_SNORM = 22,
+	RB6_R16_FLOAT = 23,
+	RB6_R16_UINT = 24,
+	RB6_R16_SINT = 25,
+	RB6_R8G8B8A8_UNORM = 48,
+	RB6_R8G8B8_UNORM = 49,
+	RB6_R8G8B8A8_SNORM = 50,
+	RB6_R8G8B8A8_UINT = 51,
+	RB6_R8G8B8A8_SINT = 52,
+	RB6_R10G10B10A2_UNORM = 55,
+	RB6_R10G10B10A2_UINT = 58,
+	RB6_R11G11B10_FLOAT = 66,
+	RB6_R16G16_UNORM = 67,
+	RB6_R16G16_SNORM = 68,
+	RB6_R16G16_FLOAT = 69,
+	RB6_R16G16_UINT = 70,
+	RB6_R16G16_SINT = 71,
+	RB6_R32_FLOAT = 74,
+	RB6_R32_UINT = 75,
+	RB6_R32_SINT = 76,
+	RB6_R16G16B16A16_UNORM = 96,
+	RB6_R16G16B16A16_SNORM = 97,
+	RB6_R16G16B16A16_FLOAT = 98,
+	RB6_R16G16B16A16_UINT = 99,
+	RB6_R16G16B16A16_SINT = 100,
+	RB6_R32G32_FLOAT = 103,
+	RB6_R32G32_UINT = 104,
+	RB6_R32G32_SINT = 105,
+	RB6_R32G32B32A32_FLOAT = 130,
+	RB6_R32G32B32A32_UINT = 131,
+	RB6_R32G32B32A32_SINT = 132,
+	RB6_X8Z24_UNORM = 160,
+};
+
+enum a6xx_tile_mode {
+	TILE6_LINEAR = 0,
+	TILE6_2 = 2,
+	TILE6_3 = 3,
+};
+
+enum a6xx_vtx_fmt {
+	VFMT6_8_UNORM = 3,
+	VFMT6_8_SNORM = 4,
+	VFMT6_8_UINT = 5,
+	VFMT6_8_SINT = 6,
+	VFMT6_8_8_UNORM = 15,
+	VFMT6_8_8_SNORM = 16,
+	VFMT6_8_8_UINT = 17,
+	VFMT6_8_8_SINT = 18,
+	VFMT6_16_UNORM = 21,
+	VFMT6_16_SNORM = 22,
+	VFMT6_16_FLOAT = 23,
+	VFMT6_16_UINT = 24,
+	VFMT6_16_SINT = 25,
+	VFMT6_8_8_8_UNORM = 33,
+	VFMT6_8_8_8_SNORM = 34,
+	VFMT6_8_8_8_UINT = 35,
+	VFMT6_8_8_8_SINT = 36,
+	VFMT6_8_8_8_8_UNORM = 48,
+	VFMT6_8_8_8_8_SNORM = 50,
+	VFMT6_8_8_8_8_UINT = 51,
+	VFMT6_8_8_8_8_SINT = 52,
+	VFMT6_10_10_10_2_UNORM = 54,
+	VFMT6_10_10_10_2_SNORM = 57,
+	VFMT6_10_10_10_2_UINT = 58,
+	VFMT6_10_10_10_2_SINT = 59,
+	VFMT6_11_11_10_FLOAT = 66,
+	VFMT6_16_16_UNORM = 67,
+	VFMT6_16_16_SNORM = 68,
+	VFMT6_16_16_FLOAT = 69,
+	VFMT6_16_16_UINT = 70,
+	VFMT6_16_16_SINT = 71,
+	VFMT6_32_UNORM = 72,
+	VFMT6_32_SNORM = 73,
+	VFMT6_32_FLOAT = 74,
+	VFMT6_32_UINT = 75,
+	VFMT6_32_SINT = 76,
+	VFMT6_32_FIXED = 77,
+	VFMT6_16_16_16_UNORM = 88,
+	VFMT6_16_16_16_SNORM = 89,
+	VFMT6_16_16_16_FLOAT = 90,
+	VFMT6_16_16_16_UINT = 91,
+	VFMT6_16_16_16_SINT = 92,
+	VFMT6_16_16_16_16_UNORM = 96,
+	VFMT6_16_16_16_16_SNORM = 97,
+	VFMT6_16_16_16_16_FLOAT = 98,
+	VFMT6_16_16_16_16_UINT = 99,
+	VFMT6_16_16_16_16_SINT = 100,
+	VFMT6_32_32_UNORM = 101,
+	VFMT6_32_32_SNORM = 102,
+	VFMT6_32_32_FLOAT = 103,
+	VFMT6_32_32_UINT = 104,
+	VFMT6_32_32_SINT = 105,
+	VFMT6_32_32_FIXED = 106,
+	VFMT6_32_32_32_UNORM = 112,
+	VFMT6_32_32_32_SNORM = 113,
+	VFMT6_32_32_32_UINT = 114,
+	VFMT6_32_32_32_SINT = 115,
+	VFMT6_32_32_32_FLOAT = 116,
+	VFMT6_32_32_32_FIXED = 117,
+	VFMT6_32_32_32_32_UNORM = 128,
+	VFMT6_32_32_32_32_SNORM = 129,
+	VFMT6_32_32_32_32_FLOAT = 130,
+	VFMT6_32_32_32_32_UINT = 131,
+	VFMT6_32_32_32_32_SINT = 132,
+	VFMT6_32_32_32_32_FIXED = 133,
+};
+
+enum a6xx_tex_fmt {
+	TFMT6_A8_UNORM = 2,
+	TFMT6_8_UNORM = 3,
+	TFMT6_8_SNORM = 4,
+	TFMT6_8_UINT = 5,
+	TFMT6_8_SINT = 6,
+	TFMT6_4_4_4_4_UNORM = 8,
+	TFMT6_5_5_5_1_UNORM = 10,
+	TFMT6_5_6_5_UNORM = 14,
+	TFMT6_8_8_UNORM = 15,
+	TFMT6_8_8_SNORM = 16,
+	TFMT6_8_8_UINT = 17,
+	TFMT6_8_8_SINT = 18,
+	TFMT6_L8_A8_UNORM = 19,
+	TFMT6_16_UNORM = 21,
+	TFMT6_16_SNORM = 22,
+	TFMT6_16_FLOAT = 23,
+	TFMT6_16_UINT = 24,
+	TFMT6_16_SINT = 25,
+	TFMT6_8_8_8_8_UNORM = 48,
+	TFMT6_8_8_8_UNORM = 49,
+	TFMT6_8_8_8_8_SNORM = 50,
+	TFMT6_8_8_8_8_UINT = 51,
+	TFMT6_8_8_8_8_SINT = 52,
+	TFMT6_9_9_9_E5_FLOAT = 53,
+	TFMT6_10_10_10_2_UNORM = 54,
+	TFMT6_10_10_10_2_UINT = 58,
+	TFMT6_11_11_10_FLOAT = 66,
+	TFMT6_16_16_UNORM = 67,
+	TFMT6_16_16_SNORM = 68,
+	TFMT6_16_16_FLOAT = 69,
+	TFMT6_16_16_UINT = 70,
+	TFMT6_16_16_SINT = 71,
+	TFMT6_32_FLOAT = 74,
+	TFMT6_32_UINT = 75,
+	TFMT6_32_SINT = 76,
+	TFMT6_16_16_16_16_UNORM = 96,
+	TFMT6_16_16_16_16_SNORM = 97,
+	TFMT6_16_16_16_16_FLOAT = 98,
+	TFMT6_16_16_16_16_UINT = 99,
+	TFMT6_16_16_16_16_SINT = 100,
+	TFMT6_32_32_FLOAT = 103,
+	TFMT6_32_32_UINT = 104,
+	TFMT6_32_32_SINT = 105,
+	TFMT6_32_32_32_UINT = 114,
+	TFMT6_32_32_32_SINT = 115,
+	TFMT6_32_32_32_FLOAT = 116,
+	TFMT6_32_32_32_32_FLOAT = 130,
+	TFMT6_32_32_32_32_UINT = 131,
+	TFMT6_32_32_32_32_SINT = 132,
+	TFMT6_X8Z24_UNORM = 160,
+	TFMT6_ETC2_RG11_UNORM = 171,
+	TFMT6_ETC2_RG11_SNORM = 172,
+	TFMT6_ETC2_R11_UNORM = 173,
+	TFMT6_ETC2_R11_SNORM = 174,
+	TFMT6_ETC1 = 175,
+	TFMT6_ETC2_RGB8 = 176,
+	TFMT6_ETC2_RGBA8 = 177,
+	TFMT6_ETC2_RGB8A1 = 178,
+	TFMT6_DXT1 = 179,
+	TFMT6_DXT3 = 180,
+	TFMT6_DXT5 = 181,
+	TFMT6_RGTC1_UNORM = 183,
+	TFMT6_RGTC1_SNORM = 184,
+	TFMT6_RGTC2_UNORM = 187,
+	TFMT6_RGTC2_SNORM = 188,
+	TFMT6_BPTC_UFLOAT = 190,
+	TFMT6_BPTC_FLOAT = 191,
+	TFMT6_BPTC = 192,
+	TFMT6_ASTC_4x4 = 193,
+	TFMT6_ASTC_5x4 = 194,
+	TFMT6_ASTC_5x5 = 195,
+	TFMT6_ASTC_6x5 = 196,
+	TFMT6_ASTC_6x6 = 197,
+	TFMT6_ASTC_8x5 = 198,
+	TFMT6_ASTC_8x6 = 199,
+	TFMT6_ASTC_8x8 = 200,
+	TFMT6_ASTC_10x5 = 201,
+	TFMT6_ASTC_10x6 = 202,
+	TFMT6_ASTC_10x8 = 203,
+	TFMT6_ASTC_10x10 = 204,
+	TFMT6_ASTC_12x10 = 205,
+	TFMT6_ASTC_12x12 = 206,
+};
+
+enum a6xx_tex_fetchsize {
+	TFETCH6_1_BYTE = 0,
+	TFETCH6_2_BYTE = 1,
+	TFETCH6_4_BYTE = 2,
+	TFETCH6_8_BYTE = 3,
+	TFETCH6_16_BYTE = 4,
+};
+
+enum a6xx_depth_format {
+	DEPTH6_NONE = 0,
+	DEPTH6_16 = 1,
+	DEPTH6_24_8 = 2,
+	DEPTH6_32 = 4,
+};
+
+enum a6xx_shader_id {
+	A6XX_TP0_TMO_DATA = 9,
+	A6XX_TP0_SMO_DATA = 10,
+	A6XX_TP0_MIPMAP_BASE_DATA = 11,
+	A6XX_TP1_TMO_DATA = 25,
+	A6XX_TP1_SMO_DATA = 26,
+	A6XX_TP1_MIPMAP_BASE_DATA = 27,
+	A6XX_SP_INST_DATA = 41,
+	A6XX_SP_LB_0_DATA = 42,
+	A6XX_SP_LB_1_DATA = 43,
+	A6XX_SP_LB_2_DATA = 44,
+	A6XX_SP_LB_3_DATA = 45,
+	A6XX_SP_LB_4_DATA = 46,
+	A6XX_SP_LB_5_DATA = 47,
+	A6XX_SP_CB_BINDLESS_DATA = 48,
+	A6XX_SP_CB_LEGACY_DATA = 49,
+	A6XX_SP_UAV_DATA = 50,
+	A6XX_SP_INST_TAG = 51,
+	A6XX_SP_CB_BINDLESS_TAG = 52,
+	A6XX_SP_TMO_UMO_TAG = 53,
+	A6XX_SP_SMO_TAG = 54,
+	A6XX_SP_STATE_DATA = 55,
+	A6XX_HLSQ_CHUNK_CVS_RAM = 73,
+	A6XX_HLSQ_CHUNK_CPS_RAM = 74,
+	A6XX_HLSQ_CHUNK_CVS_RAM_TAG = 75,
+	A6XX_HLSQ_CHUNK_CPS_RAM_TAG = 76,
+	A6XX_HLSQ_ICB_CVS_CB_BASE_TAG = 77,
+	A6XX_HLSQ_ICB_CPS_CB_BASE_TAG = 78,
+	A6XX_HLSQ_CVS_MISC_RAM = 80,
+	A6XX_HLSQ_CPS_MISC_RAM = 81,
+	A6XX_HLSQ_INST_RAM = 82,
+	A6XX_HLSQ_GFX_CVS_CONST_RAM = 83,
+	A6XX_HLSQ_GFX_CPS_CONST_RAM = 84,
+	A6XX_HLSQ_CVS_MISC_RAM_TAG = 85,
+	A6XX_HLSQ_CPS_MISC_RAM_TAG = 86,
+	A6XX_HLSQ_INST_RAM_TAG = 87,
+	A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG = 88,
+	A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG = 89,
+	A6XX_HLSQ_PWR_REST_RAM = 90,
+	A6XX_HLSQ_PWR_REST_TAG = 91,
+	A6XX_HLSQ_DATAPATH_META = 96,
+	A6XX_HLSQ_FRONTEND_META = 97,
+	A6XX_HLSQ_INDIRECT_META = 98,
+	A6XX_HLSQ_BACKEND_META = 99,
+};
+
+enum a6xx_debugbus_id {
+	A6XX_DBGBUS_CP = 1,
+	A6XX_DBGBUS_RBBM = 2,
+	A6XX_DBGBUS_VBIF = 3,
+	A6XX_DBGBUS_HLSQ = 4,
+	A6XX_DBGBUS_UCHE = 5,
+	A6XX_DBGBUS_DPM = 6,
+	A6XX_DBGBUS_TESS = 7,
+	A6XX_DBGBUS_PC = 8,
+	A6XX_DBGBUS_VFDP = 9,
+	A6XX_DBGBUS_VPC = 10,
+	A6XX_DBGBUS_TSE = 11,
+	A6XX_DBGBUS_RAS = 12,
+	A6XX_DBGBUS_VSC = 13,
+	A6XX_DBGBUS_COM = 14,
+	A6XX_DBGBUS_LRZ = 16,
+	A6XX_DBGBUS_A2D = 17,
+	A6XX_DBGBUS_CCUFCHE = 18,
+	A6XX_DBGBUS_GMU_CX = 19,
+	A6XX_DBGBUS_RBP = 20,
+	A6XX_DBGBUS_DCS = 21,
+	A6XX_DBGBUS_DBGC = 22,
+	A6XX_DBGBUS_CX = 23,
+	A6XX_DBGBUS_GMU_GX = 24,
+	A6XX_DBGBUS_TPFCHE = 25,
+	A6XX_DBGBUS_GBIF_GX = 26,
+	A6XX_DBGBUS_GPC = 29,
+	A6XX_DBGBUS_LARC = 30,
+	A6XX_DBGBUS_HLSQ_SPTP = 31,
+	A6XX_DBGBUS_RB_0 = 32,
+	A6XX_DBGBUS_RB_1 = 33,
+	A6XX_DBGBUS_UCHE_WRAPPER = 36,
+	A6XX_DBGBUS_CCU_0 = 40,
+	A6XX_DBGBUS_CCU_1 = 41,
+	A6XX_DBGBUS_VFD_0 = 56,
+	A6XX_DBGBUS_VFD_1 = 57,
+	A6XX_DBGBUS_VFD_2 = 58,
+	A6XX_DBGBUS_VFD_3 = 59,
+	A6XX_DBGBUS_SP_0 = 64,
+	A6XX_DBGBUS_SP_1 = 65,
+	A6XX_DBGBUS_TPL1_0 = 72,
+	A6XX_DBGBUS_TPL1_1 = 73,
+	A6XX_DBGBUS_TPL1_2 = 74,
+	A6XX_DBGBUS_TPL1_3 = 75,
+};
+
+enum a6xx_cp_perfcounter_select {
+	PERF_CP_ALWAYS_COUNT = 0,
+	PERF_CP_BUSY_GFX_CORE_IDLE = 1,
+	PERF_CP_BUSY_CYCLES = 2,
+	PERF_CP_NUM_PREEMPTIONS = 3,
+	PERF_CP_PREEMPTION_REACTION_DELAY = 4,
+	PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 5,
+	PERF_CP_PREEMPTION_SWITCH_IN_TIME = 6,
+	PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 7,
+	PERF_CP_PREDICATED_DRAWS_KILLED = 8,
+	PERF_CP_MODE_SWITCH = 9,
+	PERF_CP_ZPASS_DONE = 10,
+	PERF_CP_CONTEXT_DONE = 11,
+	PERF_CP_CACHE_FLUSH = 12,
+	PERF_CP_LONG_PREEMPTIONS = 13,
+	PERF_CP_SQE_I_CACHE_STARVE = 14,
+	PERF_CP_SQE_IDLE = 15,
+	PERF_CP_SQE_PM4_STARVE_RB_IB = 16,
+	PERF_CP_SQE_PM4_STARVE_SDS = 17,
+	PERF_CP_SQE_MRB_STARVE = 18,
+	PERF_CP_SQE_RRB_STARVE = 19,
+	PERF_CP_SQE_VSD_STARVE = 20,
+	PERF_CP_VSD_DECODE_STARVE = 21,
+	PERF_CP_SQE_PIPE_OUT_STALL = 22,
+	PERF_CP_SQE_SYNC_STALL = 23,
+	PERF_CP_SQE_PM4_WFI_STALL = 24,
+	PERF_CP_SQE_SYS_WFI_STALL = 25,
+	PERF_CP_SQE_T4_EXEC = 26,
+	PERF_CP_SQE_LOAD_STATE_EXEC = 27,
+	PERF_CP_SQE_SAVE_SDS_STATE = 28,
+	PERF_CP_SQE_DRAW_EXEC = 29,
+	PERF_CP_SQE_CTXT_REG_BUNCH_EXEC = 30,
+	PERF_CP_SQE_EXEC_PROFILED = 31,
+	PERF_CP_MEMORY_POOL_EMPTY = 32,
+	PERF_CP_MEMORY_POOL_SYNC_STALL = 33,
+	PERF_CP_MEMORY_POOL_ABOVE_THRESH = 34,
+	PERF_CP_AHB_WR_STALL_PRE_DRAWS = 35,
+	PERF_CP_AHB_STALL_SQE_GMU = 36,
+	PERF_CP_AHB_STALL_SQE_WR_OTHER = 37,
+	PERF_CP_AHB_STALL_SQE_RD_OTHER = 38,
+	PERF_CP_CLUSTER0_EMPTY = 39,
+	PERF_CP_CLUSTER1_EMPTY = 40,
+	PERF_CP_CLUSTER2_EMPTY = 41,
+	PERF_CP_CLUSTER3_EMPTY = 42,
+	PERF_CP_CLUSTER4_EMPTY = 43,
+	PERF_CP_CLUSTER5_EMPTY = 44,
+	PERF_CP_PM4_DATA = 45,
+	PERF_CP_PM4_HEADERS = 46,
+	PERF_CP_VBIF_READ_BEATS = 47,
+	PERF_CP_VBIF_WRITE_BEATS = 48,
+	PERF_CP_SQE_INSTR_COUNTER = 49,
+};
+
+enum a6xx_rbbm_perfcounter_select {
+	PERF_RBBM_ALWAYS_COUNT = 0,
+	PERF_RBBM_ALWAYS_ON = 1,
+	PERF_RBBM_TSE_BUSY = 2,
+	PERF_RBBM_RAS_BUSY = 3,
+	PERF_RBBM_PC_DCALL_BUSY = 4,
+	PERF_RBBM_PC_VSD_BUSY = 5,
+	PERF_RBBM_STATUS_MASKED = 6,
+	PERF_RBBM_COM_BUSY = 7,
+	PERF_RBBM_DCOM_BUSY = 8,
+	PERF_RBBM_VBIF_BUSY = 9,
+	PERF_RBBM_VSC_BUSY = 10,
+	PERF_RBBM_TESS_BUSY = 11,
+	PERF_RBBM_UCHE_BUSY = 12,
+	PERF_RBBM_HLSQ_BUSY = 13,
+};
+
+enum a6xx_pc_perfcounter_select {
+	PERF_PC_BUSY_CYCLES = 0,
+	PERF_PC_WORKING_CYCLES = 1,
+	PERF_PC_STALL_CYCLES_VFD = 2,
+	PERF_PC_STALL_CYCLES_TSE = 3,
+	PERF_PC_STALL_CYCLES_VPC = 4,
+	PERF_PC_STALL_CYCLES_UCHE = 5,
+	PERF_PC_STALL_CYCLES_TESS = 6,
+	PERF_PC_STALL_CYCLES_TSE_ONLY = 7,
+	PERF_PC_STALL_CYCLES_VPC_ONLY = 8,
+	PERF_PC_PASS1_TF_STALL_CYCLES = 9,
+	PERF_PC_STARVE_CYCLES_FOR_INDEX = 10,
+	PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11,
+	PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12,
+	PERF_PC_STARVE_CYCLES_FOR_POSITION = 13,
+	PERF_PC_STARVE_CYCLES_DI = 14,
+	PERF_PC_VIS_STREAMS_LOADED = 15,
+	PERF_PC_INSTANCES = 16,
+	PERF_PC_VPC_PRIMITIVES = 17,
+	PERF_PC_DEAD_PRIM = 18,
+	PERF_PC_LIVE_PRIM = 19,
+	PERF_PC_VERTEX_HITS = 20,
+	PERF_PC_IA_VERTICES = 21,
+	PERF_PC_IA_PRIMITIVES = 22,
+	PERF_PC_GS_PRIMITIVES = 23,
+	PERF_PC_HS_INVOCATIONS = 24,
+	PERF_PC_DS_INVOCATIONS = 25,
+	PERF_PC_VS_INVOCATIONS = 26,
+	PERF_PC_GS_INVOCATIONS = 27,
+	PERF_PC_DS_PRIMITIVES = 28,
+	PERF_PC_VPC_POS_DATA_TRANSACTION = 29,
+	PERF_PC_3D_DRAWCALLS = 30,
+	PERF_PC_2D_DRAWCALLS = 31,
+	PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32,
+	PERF_TESS_BUSY_CYCLES = 33,
+	PERF_TESS_WORKING_CYCLES = 34,
+	PERF_TESS_STALL_CYCLES_PC = 35,
+	PERF_TESS_STARVE_CYCLES_PC = 36,
+	PERF_PC_TSE_TRANSACTION = 37,
+	PERF_PC_TSE_VERTEX = 38,
+	PERF_PC_TESS_PC_UV_TRANS = 39,
+	PERF_PC_TESS_PC_UV_PATCHES = 40,
+	PERF_PC_TESS_FACTOR_TRANS = 41,
+};
+
+enum a6xx_vfd_perfcounter_select {
+	PERF_VFD_BUSY_CYCLES = 0,
+	PERF_VFD_STALL_CYCLES_UCHE = 1,
+	PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2,
+	PERF_VFD_STALL_CYCLES_SP_INFO = 3,
+	PERF_VFD_STALL_CYCLES_SP_ATTR = 4,
+	PERF_VFD_STARVE_CYCLES_UCHE = 5,
+	PERF_VFD_RBUFFER_FULL = 6,
+	PERF_VFD_ATTR_INFO_FIFO_FULL = 7,
+	PERF_VFD_DECODED_ATTRIBUTE_BYTES = 8,
+	PERF_VFD_NUM_ATTRIBUTES = 9,
+	PERF_VFD_UPPER_SHADER_FIBERS = 10,
+	PERF_VFD_LOWER_SHADER_FIBERS = 11,
+	PERF_VFD_MODE_0_FIBERS = 12,
+	PERF_VFD_MODE_1_FIBERS = 13,
+	PERF_VFD_MODE_2_FIBERS = 14,
+	PERF_VFD_MODE_3_FIBERS = 15,
+	PERF_VFD_MODE_4_FIBERS = 16,
+	PERF_VFD_TOTAL_VERTICES = 17,
+	PERF_VFDP_STALL_CYCLES_VFD = 18,
+	PERF_VFDP_STALL_CYCLES_VFD_INDEX = 19,
+	PERF_VFDP_STALL_CYCLES_VFD_PROG = 20,
+	PERF_VFDP_STARVE_CYCLES_PC = 21,
+	PERF_VFDP_VS_STAGE_WAVES = 22,
+};
+
+enum a6xx_hlsq_perfcounter_select {
+	PERF_HLSQ_BUSY_CYCLES = 0,
+	PERF_HLSQ_STALL_CYCLES_UCHE = 1,
+	PERF_HLSQ_STALL_CYCLES_SP_STATE = 2,
+	PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3,
+	PERF_HLSQ_UCHE_LATENCY_CYCLES = 4,
+	PERF_HLSQ_UCHE_LATENCY_COUNT = 5,
+	PERF_HLSQ_FS_STAGE_1X_WAVES = 6,
+	PERF_HLSQ_FS_STAGE_2X_WAVES = 7,
+	PERF_HLSQ_QUADS = 8,
+	PERF_HLSQ_CS_INVOCATIONS = 9,
+	PERF_HLSQ_COMPUTE_DRAWCALLS = 10,
+	PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING = 11,
+	PERF_HLSQ_DUAL_FS_PROG_ACTIVE = 12,
+	PERF_HLSQ_DUAL_VS_PROG_ACTIVE = 13,
+	PERF_HLSQ_FS_BATCH_COUNT_ZERO = 14,
+	PERF_HLSQ_VS_BATCH_COUNT_ZERO = 15,
+	PERF_HLSQ_WAVE_PENDING_NO_QUAD = 16,
+	PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE = 17,
+	PERF_HLSQ_STALL_CYCLES_VPC = 18,
+	PERF_HLSQ_PIXELS = 19,
+	PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC = 20,
+};
+
+enum a6xx_vpc_perfcounter_select {
+	PERF_VPC_BUSY_CYCLES = 0,
+	PERF_VPC_WORKING_CYCLES = 1,
+	PERF_VPC_STALL_CYCLES_UCHE = 2,
+	PERF_VPC_STALL_CYCLES_VFD_WACK = 3,
+	PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4,
+	PERF_VPC_STALL_CYCLES_PC = 5,
+	PERF_VPC_STALL_CYCLES_SP_LM = 6,
+	PERF_VPC_STARVE_CYCLES_SP = 7,
+	PERF_VPC_STARVE_CYCLES_LRZ = 8,
+	PERF_VPC_PC_PRIMITIVES = 9,
+	PERF_VPC_SP_COMPONENTS = 10,
+	PERF_VPC_STALL_CYCLES_VPCRAM_POS = 11,
+	PERF_VPC_LRZ_ASSIGN_PRIMITIVES = 12,
+	PERF_VPC_RB_VISIBLE_PRIMITIVES = 13,
+	PERF_VPC_LM_TRANSACTION = 14,
+	PERF_VPC_STREAMOUT_TRANSACTION = 15,
+	PERF_VPC_VS_BUSY_CYCLES = 16,
+	PERF_VPC_PS_BUSY_CYCLES = 17,
+	PERF_VPC_VS_WORKING_CYCLES = 18,
+	PERF_VPC_PS_WORKING_CYCLES = 19,
+	PERF_VPC_STARVE_CYCLES_RB = 20,
+	PERF_VPC_NUM_VPCRAM_READ_POS = 21,
+	PERF_VPC_WIT_FULL_CYCLES = 22,
+	PERF_VPC_VPCRAM_FULL_CYCLES = 23,
+	PERF_VPC_LM_FULL_WAIT_FOR_INTP_END = 24,
+	PERF_VPC_NUM_VPCRAM_WRITE = 25,
+	PERF_VPC_NUM_VPCRAM_READ_SO = 26,
+	PERF_VPC_NUM_ATTR_REQ_LM = 27,
+};
+
+enum a6xx_tse_perfcounter_select {
+	PERF_TSE_BUSY_CYCLES = 0,
+	PERF_TSE_CLIPPING_CYCLES = 1,
+	PERF_TSE_STALL_CYCLES_RAS = 2,
+	PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3,
+	PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4,
+	PERF_TSE_STARVE_CYCLES_PC = 5,
+	PERF_TSE_INPUT_PRIM = 6,
+	PERF_TSE_INPUT_NULL_PRIM = 7,
+	PERF_TSE_TRIVAL_REJ_PRIM = 8,
+	PERF_TSE_CLIPPED_PRIM = 9,
+	PERF_TSE_ZERO_AREA_PRIM = 10,
+	PERF_TSE_FACENESS_CULLED_PRIM = 11,
+	PERF_TSE_ZERO_PIXEL_PRIM = 12,
+	PERF_TSE_OUTPUT_NULL_PRIM = 13,
+	PERF_TSE_OUTPUT_VISIBLE_PRIM = 14,
+	PERF_TSE_CINVOCATION = 15,
+	PERF_TSE_CPRIMITIVES = 16,
+	PERF_TSE_2D_INPUT_PRIM = 17,
+	PERF_TSE_2D_ALIVE_CYCLES = 18,
+	PERF_TSE_CLIP_PLANES = 19,
+};
+
+enum a6xx_ras_perfcounter_select {
+	PERF_RAS_BUSY_CYCLES = 0,
+	PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1,
+	PERF_RAS_STALL_CYCLES_LRZ = 2,
+	PERF_RAS_STARVE_CYCLES_TSE = 3,
+	PERF_RAS_SUPER_TILES = 4,
+	PERF_RAS_8X4_TILES = 5,
+	PERF_RAS_MASKGEN_ACTIVE = 6,
+	PERF_RAS_FULLY_COVERED_SUPER_TILES = 7,
+	PERF_RAS_FULLY_COVERED_8X4_TILES = 8,
+	PERF_RAS_PRIM_KILLED_INVISILBE = 9,
+	PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES = 10,
+	PERF_RAS_LRZ_INTF_WORKING_CYCLES = 11,
+	PERF_RAS_BLOCKS = 12,
+};
+
+enum a6xx_uche_perfcounter_select {
+	PERF_UCHE_BUSY_CYCLES = 0,
+	PERF_UCHE_STALL_CYCLES_ARBITER = 1,
+	PERF_UCHE_VBIF_LATENCY_CYCLES = 2,
+	PERF_UCHE_VBIF_LATENCY_SAMPLES = 3,
+	PERF_UCHE_VBIF_READ_BEATS_TP = 4,
+	PERF_UCHE_VBIF_READ_BEATS_VFD = 5,
+	PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6,
+	PERF_UCHE_VBIF_READ_BEATS_LRZ = 7,
+	PERF_UCHE_VBIF_READ_BEATS_SP = 8,
+	PERF_UCHE_READ_REQUESTS_TP = 9,
+	PERF_UCHE_READ_REQUESTS_VFD = 10,
+	PERF_UCHE_READ_REQUESTS_HLSQ = 11,
+	PERF_UCHE_READ_REQUESTS_LRZ = 12,
+	PERF_UCHE_READ_REQUESTS_SP = 13,
+	PERF_UCHE_WRITE_REQUESTS_LRZ = 14,
+	PERF_UCHE_WRITE_REQUESTS_SP = 15,
+	PERF_UCHE_WRITE_REQUESTS_VPC = 16,
+	PERF_UCHE_WRITE_REQUESTS_VSC = 17,
+	PERF_UCHE_EVICTS = 18,
+	PERF_UCHE_BANK_REQ0 = 19,
+	PERF_UCHE_BANK_REQ1 = 20,
+	PERF_UCHE_BANK_REQ2 = 21,
+	PERF_UCHE_BANK_REQ3 = 22,
+	PERF_UCHE_BANK_REQ4 = 23,
+	PERF_UCHE_BANK_REQ5 = 24,
+	PERF_UCHE_BANK_REQ6 = 25,
+	PERF_UCHE_BANK_REQ7 = 26,
+	PERF_UCHE_VBIF_READ_BEATS_CH0 = 27,
+	PERF_UCHE_VBIF_READ_BEATS_CH1 = 28,
+	PERF_UCHE_GMEM_READ_BEATS = 29,
+	PERF_UCHE_TPH_REF_FULL = 30,
+	PERF_UCHE_TPH_VICTIM_FULL = 31,
+	PERF_UCHE_TPH_EXT_FULL = 32,
+	PERF_UCHE_VBIF_STALL_WRITE_DATA = 33,
+	PERF_UCHE_DCMP_LATENCY_SAMPLES = 34,
+	PERF_UCHE_DCMP_LATENCY_CYCLES = 35,
+	PERF_UCHE_VBIF_READ_BEATS_PC = 36,
+	PERF_UCHE_READ_REQUESTS_PC = 37,
+	PERF_UCHE_RAM_READ_REQ = 38,
+	PERF_UCHE_RAM_WRITE_REQ = 39,
+};
+
+enum a6xx_tp_perfcounter_select {
+	PERF_TP_BUSY_CYCLES = 0,
+	PERF_TP_STALL_CYCLES_UCHE = 1,
+	PERF_TP_LATENCY_CYCLES = 2,
+	PERF_TP_LATENCY_TRANS = 3,
+	PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4,
+	PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5,
+	PERF_TP_L1_CACHELINE_REQUESTS = 6,
+	PERF_TP_L1_CACHELINE_MISSES = 7,
+	PERF_TP_SP_TP_TRANS = 8,
+	PERF_TP_TP_SP_TRANS = 9,
+	PERF_TP_OUTPUT_PIXELS = 10,
+	PERF_TP_FILTER_WORKLOAD_16BIT = 11,
+	PERF_TP_FILTER_WORKLOAD_32BIT = 12,
+	PERF_TP_QUADS_RECEIVED = 13,
+	PERF_TP_QUADS_OFFSET = 14,
+	PERF_TP_QUADS_SHADOW = 15,
+	PERF_TP_QUADS_ARRAY = 16,
+	PERF_TP_QUADS_GRADIENT = 17,
+	PERF_TP_QUADS_1D = 18,
+	PERF_TP_QUADS_2D = 19,
+	PERF_TP_QUADS_BUFFER = 20,
+	PERF_TP_QUADS_3D = 21,
+	PERF_TP_QUADS_CUBE = 22,
+	PERF_TP_DIVERGENT_QUADS_RECEIVED = 23,
+	PERF_TP_PRT_NON_RESIDENT_EVENTS = 24,
+	PERF_TP_OUTPUT_PIXELS_POINT = 25,
+	PERF_TP_OUTPUT_PIXELS_BILINEAR = 26,
+	PERF_TP_OUTPUT_PIXELS_MIP = 27,
+	PERF_TP_OUTPUT_PIXELS_ANISO = 28,
+	PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 29,
+	PERF_TP_FLAG_CACHE_REQUESTS = 30,
+	PERF_TP_FLAG_CACHE_MISSES = 31,
+	PERF_TP_L1_5_L2_REQUESTS = 32,
+	PERF_TP_2D_OUTPUT_PIXELS = 33,
+	PERF_TP_2D_OUTPUT_PIXELS_POINT = 34,
+	PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 35,
+	PERF_TP_2D_FILTER_WORKLOAD_16BIT = 36,
+	PERF_TP_2D_FILTER_WORKLOAD_32BIT = 37,
+	PERF_TP_TPA2TPC_TRANS = 38,
+	PERF_TP_L1_MISSES_ASTC_1TILE = 39,
+	PERF_TP_L1_MISSES_ASTC_2TILE = 40,
+	PERF_TP_L1_MISSES_ASTC_4TILE = 41,
+	PERF_TP_L1_5_L2_COMPRESS_REQS = 42,
+	PERF_TP_L1_5_L2_COMPRESS_MISS = 43,
+	PERF_TP_L1_BANK_CONFLICT = 44,
+	PERF_TP_L1_5_MISS_LATENCY_CYCLES = 45,
+	PERF_TP_L1_5_MISS_LATENCY_TRANS = 46,
+	PERF_TP_QUADS_CONSTANT_MULTIPLIED = 47,
+	PERF_TP_FRONTEND_WORKING_CYCLES = 48,
+	PERF_TP_L1_TAG_WORKING_CYCLES = 49,
+	PERF_TP_L1_DATA_WRITE_WORKING_CYCLES = 50,
+	PERF_TP_PRE_L1_DECOM_WORKING_CYCLES = 51,
+	PERF_TP_BACKEND_WORKING_CYCLES = 52,
+	PERF_TP_FLAG_CACHE_WORKING_CYCLES = 53,
+	PERF_TP_L1_5_CACHE_WORKING_CYCLES = 54,
+	PERF_TP_STARVE_CYCLES_SP = 55,
+	PERF_TP_STARVE_CYCLES_UCHE = 56,
+};
+
+enum a6xx_sp_perfcounter_select {
+	PERF_SP_BUSY_CYCLES = 0,
+	PERF_SP_ALU_WORKING_CYCLES = 1,
+	PERF_SP_EFU_WORKING_CYCLES = 2,
+	PERF_SP_STALL_CYCLES_VPC = 3,
+	PERF_SP_STALL_CYCLES_TP = 4,
+	PERF_SP_STALL_CYCLES_UCHE = 5,
+	PERF_SP_STALL_CYCLES_RB = 6,
+	PERF_SP_NON_EXECUTION_CYCLES = 7,
+	PERF_SP_WAVE_CONTEXTS = 8,
+	PERF_SP_WAVE_CONTEXT_CYCLES = 9,
+	PERF_SP_FS_STAGE_WAVE_CYCLES = 10,
+	PERF_SP_FS_STAGE_WAVE_SAMPLES = 11,
+	PERF_SP_VS_STAGE_WAVE_CYCLES = 12,
+	PERF_SP_VS_STAGE_WAVE_SAMPLES = 13,
+	PERF_SP_FS_STAGE_DURATION_CYCLES = 14,
+	PERF_SP_VS_STAGE_DURATION_CYCLES = 15,
+	PERF_SP_WAVE_CTRL_CYCLES = 16,
+	PERF_SP_WAVE_LOAD_CYCLES = 17,
+	PERF_SP_WAVE_EMIT_CYCLES = 18,
+	PERF_SP_WAVE_NOP_CYCLES = 19,
+	PERF_SP_WAVE_WAIT_CYCLES = 20,
+	PERF_SP_WAVE_FETCH_CYCLES = 21,
+	PERF_SP_WAVE_IDLE_CYCLES = 22,
+	PERF_SP_WAVE_END_CYCLES = 23,
+	PERF_SP_WAVE_LONG_SYNC_CYCLES = 24,
+	PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25,
+	PERF_SP_WAVE_JOIN_CYCLES = 26,
+	PERF_SP_LM_LOAD_INSTRUCTIONS = 27,
+	PERF_SP_LM_STORE_INSTRUCTIONS = 28,
+	PERF_SP_LM_ATOMICS = 29,
+	PERF_SP_GM_LOAD_INSTRUCTIONS = 30,
+	PERF_SP_GM_STORE_INSTRUCTIONS = 31,
+	PERF_SP_GM_ATOMICS = 32,
+	PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33,
+	PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 34,
+	PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 35,
+	PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 36,
+	PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 37,
+	PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 38,
+	PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 39,
+	PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 40,
+	PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 41,
+	PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 42,
+	PERF_SP_VS_INSTRUCTIONS = 43,
+	PERF_SP_FS_INSTRUCTIONS = 44,
+	PERF_SP_ADDR_LOCK_COUNT = 45,
+	PERF_SP_UCHE_READ_TRANS = 46,
+	PERF_SP_UCHE_WRITE_TRANS = 47,
+	PERF_SP_EXPORT_VPC_TRANS = 48,
+	PERF_SP_EXPORT_RB_TRANS = 49,
+	PERF_SP_PIXELS_KILLED = 50,
+	PERF_SP_ICL1_REQUESTS = 51,
+	PERF_SP_ICL1_MISSES = 52,
+	PERF_SP_HS_INSTRUCTIONS = 53,
+	PERF_SP_DS_INSTRUCTIONS = 54,
+	PERF_SP_GS_INSTRUCTIONS = 55,
+	PERF_SP_CS_INSTRUCTIONS = 56,
+	PERF_SP_GPR_READ = 57,
+	PERF_SP_GPR_WRITE = 58,
+	PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS = 59,
+	PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS = 60,
+	PERF_SP_LM_BANK_CONFLICTS = 61,
+	PERF_SP_TEX_CONTROL_WORKING_CYCLES = 62,
+	PERF_SP_LOAD_CONTROL_WORKING_CYCLES = 63,
+	PERF_SP_FLOW_CONTROL_WORKING_CYCLES = 64,
+	PERF_SP_LM_WORKING_CYCLES = 65,
+	PERF_SP_DISPATCHER_WORKING_CYCLES = 66,
+	PERF_SP_SEQUENCER_WORKING_CYCLES = 67,
+	PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP = 68,
+	PERF_SP_STARVE_CYCLES_HLSQ = 69,
+	PERF_SP_NON_EXECUTION_LS_CYCLES = 70,
+	PERF_SP_WORKING_EU = 71,
+	PERF_SP_ANY_EU_WORKING = 72,
+	PERF_SP_WORKING_EU_FS_STAGE = 73,
+	PERF_SP_ANY_EU_WORKING_FS_STAGE = 74,
+	PERF_SP_WORKING_EU_VS_STAGE = 75,
+	PERF_SP_ANY_EU_WORKING_VS_STAGE = 76,
+	PERF_SP_WORKING_EU_CS_STAGE = 77,
+	PERF_SP_ANY_EU_WORKING_CS_STAGE = 78,
+	PERF_SP_GPR_READ_PREFETCH = 79,
+	PERF_SP_GPR_READ_CONFLICT = 80,
+	PERF_SP_GPR_WRITE_CONFLICT = 81,
+	PERF_SP_GM_LOAD_LATENCY_CYCLES = 82,
+	PERF_SP_GM_LOAD_LATENCY_SAMPLES = 83,
+	PERF_SP_EXECUTABLE_WAVES = 84,
+};
+
+enum a6xx_rb_perfcounter_select {
+	PERF_RB_BUSY_CYCLES = 0,
+	PERF_RB_STALL_CYCLES_HLSQ = 1,
+	PERF_RB_STALL_CYCLES_FIFO0_FULL = 2,
+	PERF_RB_STALL_CYCLES_FIFO1_FULL = 3,
+	PERF_RB_STALL_CYCLES_FIFO2_FULL = 4,
+	PERF_RB_STARVE_CYCLES_SP = 5,
+	PERF_RB_STARVE_CYCLES_LRZ_TILE = 6,
+	PERF_RB_STARVE_CYCLES_CCU = 7,
+	PERF_RB_STARVE_CYCLES_Z_PLANE = 8,
+	PERF_RB_STARVE_CYCLES_BARY_PLANE = 9,
+	PERF_RB_Z_WORKLOAD = 10,
+	PERF_RB_HLSQ_ACTIVE = 11,
+	PERF_RB_Z_READ = 12,
+	PERF_RB_Z_WRITE = 13,
+	PERF_RB_C_READ = 14,
+	PERF_RB_C_WRITE = 15,
+	PERF_RB_TOTAL_PASS = 16,
+	PERF_RB_Z_PASS = 17,
+	PERF_RB_Z_FAIL = 18,
+	PERF_RB_S_FAIL = 19,
+	PERF_RB_BLENDED_FXP_COMPONENTS = 20,
+	PERF_RB_BLENDED_FP16_COMPONENTS = 21,
+	PERF_RB_PS_INVOCATIONS = 22,
+	PERF_RB_2D_ALIVE_CYCLES = 23,
+	PERF_RB_2D_STALL_CYCLES_A2D = 24,
+	PERF_RB_2D_STARVE_CYCLES_SRC = 25,
+	PERF_RB_2D_STARVE_CYCLES_SP = 26,
+	PERF_RB_2D_STARVE_CYCLES_DST = 27,
+	PERF_RB_2D_VALID_PIXELS = 28,
+	PERF_RB_3D_PIXELS = 29,
+	PERF_RB_BLENDER_WORKING_CYCLES = 30,
+	PERF_RB_ZPROC_WORKING_CYCLES = 31,
+	PERF_RB_CPROC_WORKING_CYCLES = 32,
+	PERF_RB_SAMPLER_WORKING_CYCLES = 33,
+	PERF_RB_STALL_CYCLES_CCU_COLOR_READ = 34,
+	PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE = 35,
+	PERF_RB_STALL_CYCLES_CCU_DEPTH_READ = 36,
+	PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE = 37,
+	PERF_RB_STALL_CYCLES_VPC = 38,
+	PERF_RB_2D_INPUT_TRANS = 39,
+	PERF_RB_2D_OUTPUT_RB_DST_TRANS = 40,
+	PERF_RB_2D_OUTPUT_RB_SRC_TRANS = 41,
+	PERF_RB_BLENDED_FP32_COMPONENTS = 42,
+	PERF_RB_COLOR_PIX_TILES = 43,
+	PERF_RB_STALL_CYCLES_CCU = 44,
+	PERF_RB_EARLY_Z_ARB3_GRANT = 45,
+	PERF_RB_LATE_Z_ARB3_GRANT = 46,
+	PERF_RB_EARLY_Z_SKIP_GRANT = 47,
+};
+
+enum a6xx_vsc_perfcounter_select {
+	PERF_VSC_BUSY_CYCLES = 0,
+	PERF_VSC_WORKING_CYCLES = 1,
+	PERF_VSC_STALL_CYCLES_UCHE = 2,
+	PERF_VSC_EOT_NUM = 3,
+	PERF_VSC_INPUT_TILES = 4,
+};
+
+enum a6xx_ccu_perfcounter_select {
+	PERF_CCU_BUSY_CYCLES = 0,
+	PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1,
+	PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2,
+	PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3,
+	PERF_CCU_DEPTH_BLOCKS = 4,
+	PERF_CCU_COLOR_BLOCKS = 5,
+	PERF_CCU_DEPTH_BLOCK_HIT = 6,
+	PERF_CCU_COLOR_BLOCK_HIT = 7,
+	PERF_CCU_PARTIAL_BLOCK_READ = 8,
+	PERF_CCU_GMEM_READ = 9,
+	PERF_CCU_GMEM_WRITE = 10,
+	PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11,
+	PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12,
+	PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13,
+	PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14,
+	PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15,
+	PERF_CCU_DEPTH_READ_FLAG5_COUNT = 16,
+	PERF_CCU_DEPTH_READ_FLAG6_COUNT = 17,
+	PERF_CCU_DEPTH_READ_FLAG8_COUNT = 18,
+	PERF_CCU_COLOR_READ_FLAG0_COUNT = 19,
+	PERF_CCU_COLOR_READ_FLAG1_COUNT = 20,
+	PERF_CCU_COLOR_READ_FLAG2_COUNT = 21,
+	PERF_CCU_COLOR_READ_FLAG3_COUNT = 22,
+	PERF_CCU_COLOR_READ_FLAG4_COUNT = 23,
+	PERF_CCU_COLOR_READ_FLAG5_COUNT = 24,
+	PERF_CCU_COLOR_READ_FLAG6_COUNT = 25,
+	PERF_CCU_COLOR_READ_FLAG8_COUNT = 26,
+	PERF_CCU_2D_RD_REQ = 27,
+	PERF_CCU_2D_WR_REQ = 28,
+};
+
+enum a6xx_lrz_perfcounter_select {
+	PERF_LRZ_BUSY_CYCLES = 0,
+	PERF_LRZ_STARVE_CYCLES_RAS = 1,
+	PERF_LRZ_STALL_CYCLES_RB = 2,
+	PERF_LRZ_STALL_CYCLES_VSC = 3,
+	PERF_LRZ_STALL_CYCLES_VPC = 4,
+	PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5,
+	PERF_LRZ_STALL_CYCLES_UCHE = 6,
+	PERF_LRZ_LRZ_READ = 7,
+	PERF_LRZ_LRZ_WRITE = 8,
+	PERF_LRZ_READ_LATENCY = 9,
+	PERF_LRZ_MERGE_CACHE_UPDATING = 10,
+	PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11,
+	PERF_LRZ_PRIM_KILLED_BY_LRZ = 12,
+	PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13,
+	PERF_LRZ_FULL_8X8_TILES = 14,
+	PERF_LRZ_PARTIAL_8X8_TILES = 15,
+	PERF_LRZ_TILE_KILLED = 16,
+	PERF_LRZ_TOTAL_PIXEL = 17,
+	PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18,
+	PERF_LRZ_FULLY_COVERED_TILES = 19,
+	PERF_LRZ_PARTIAL_COVERED_TILES = 20,
+	PERF_LRZ_FEEDBACK_ACCEPT = 21,
+	PERF_LRZ_FEEDBACK_DISCARD = 22,
+	PERF_LRZ_FEEDBACK_STALL = 23,
+	PERF_LRZ_STALL_CYCLES_RB_ZPLANE = 24,
+	PERF_LRZ_STALL_CYCLES_RB_BPLANE = 25,
+	PERF_LRZ_STALL_CYCLES_VC = 26,
+	PERF_LRZ_RAS_MASK_TRANS = 27,
+};
+
+enum a6xx_cmp_perfcounter_select {
+	PERF_CMPDECMP_STALL_CYCLES_ARB = 0,
+	PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1,
+	PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2,
+	PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3,
+	PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4,
+	PERF_CMPDECMP_VBIF_READ_REQUEST = 5,
+	PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6,
+	PERF_CMPDECMP_VBIF_READ_DATA = 7,
+	PERF_CMPDECMP_VBIF_WRITE_DATA = 8,
+	PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9,
+	PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG5_COUNT = 15,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG6_COUNT = 16,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG8_COUNT = 17,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 18,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 19,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 20,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 21,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG5_COUNT = 22,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG6_COUNT = 23,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG8_COUNT = 24,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 25,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 26,
+	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 27,
+	PERF_CMPDECMP_2D_RD_DATA = 28,
+	PERF_CMPDECMP_2D_WR_DATA = 29,
+	PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH0 = 30,
+	PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH1 = 31,
+	PERF_CMPDECMP_2D_OUTPUT_TRANS = 32,
+	PERF_CMPDECMP_VBIF_WRITE_DATA_UCHE = 33,
+	PERF_CMPDECMP_DEPTH_WRITE_FLAG0_COUNT = 34,
+	PERF_CMPDECMP_COLOR_WRITE_FLAG0_COUNT = 35,
+	PERF_CMPDECMP_COLOR_WRITE_FLAGALPHA_COUNT = 36,
+	PERF_CMPDECMP_2D_BUSY_CYCLES = 37,
+	PERF_CMPDECMP_2D_REORDER_STARVE_CYCLES = 38,
+	PERF_CMPDECMP_2D_PIXELS = 39,
+};
+
+enum a6xx_2d_ifmt {
+	R2D_UNORM8 = 16,
+	R2D_INT32 = 7,
+	R2D_INT16 = 6,
+	R2D_INT8 = 5,
+	R2D_FLOAT32 = 4,
+	R2D_FLOAT16 = 3,
+};
+
+enum a6xx_tex_filter {
+	A6XX_TEX_NEAREST = 0,
+	A6XX_TEX_LINEAR = 1,
+	A6XX_TEX_ANISO = 2,
+};
+
+enum a6xx_tex_clamp {
+	A6XX_TEX_REPEAT = 0,
+	A6XX_TEX_CLAMP_TO_EDGE = 1,
+	A6XX_TEX_MIRROR_REPEAT = 2,
+	A6XX_TEX_CLAMP_TO_BORDER = 3,
+	A6XX_TEX_MIRROR_CLAMP = 4,
+};
+
+enum a6xx_tex_aniso {
+	A6XX_TEX_ANISO_1 = 0,
+	A6XX_TEX_ANISO_2 = 1,
+	A6XX_TEX_ANISO_4 = 2,
+	A6XX_TEX_ANISO_8 = 3,
+	A6XX_TEX_ANISO_16 = 4,
+};
+
+enum a6xx_tex_swiz {
+	A6XX_TEX_X = 0,
+	A6XX_TEX_Y = 1,
+	A6XX_TEX_Z = 2,
+	A6XX_TEX_W = 3,
+	A6XX_TEX_ZERO = 4,
+	A6XX_TEX_ONE = 5,
+};
+
+enum a6xx_tex_type {
+	A6XX_TEX_1D = 0,
+	A6XX_TEX_2D = 1,
+	A6XX_TEX_CUBE = 2,
+	A6XX_TEX_3D = 3,
+};
+
+#define A6XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE			0x00000001
+#define A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR			0x00000002
+#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW	0x00000040
+#define A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR			0x00000080
+#define A6XX_RBBM_INT_0_MASK_CP_SW				0x00000100
+#define A6XX_RBBM_INT_0_MASK_CP_HW_ERROR			0x00000200
+#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS		0x00000400
+#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS		0x00000800
+#define A6XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS			0x00001000
+#define A6XX_RBBM_INT_0_MASK_CP_IB2				0x00002000
+#define A6XX_RBBM_INT_0_MASK_CP_IB1				0x00004000
+#define A6XX_RBBM_INT_0_MASK_CP_RB				0x00008000
+#define A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS			0x00020000
+#define A6XX_RBBM_INT_0_MASK_CP_WT_DONE_TS			0x00040000
+#define A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS			0x00100000
+#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW		0x00400000
+#define A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT			0x00800000
+#define A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS			0x01000000
+#define A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR			0x02000000
+#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_0			0x04000000
+#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_1			0x08000000
+#define A6XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ			0x40000000
+#define A6XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG			0x80000000
+#define A6XX_CP_INT_CP_OPCODE_ERROR				0x00000001
+#define A6XX_CP_INT_CP_UCODE_ERROR				0x00000002
+#define A6XX_CP_INT_CP_HW_FAULT_ERROR				0x00000004
+#define A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR		0x00000010
+#define A6XX_CP_INT_CP_AHB_ERROR				0x00000020
+#define A6XX_CP_INT_CP_VSD_PARITY_ERROR				0x00000040
+#define A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR			0x00000080
+#define REG_A6XX_CP_RB_BASE					0x00000800
+
+#define REG_A6XX_CP_RB_BASE_HI					0x00000801
+
+#define REG_A6XX_CP_RB_CNTL					0x00000802
+
+#define REG_A6XX_CP_RB_RPTR_ADDR_LO				0x00000804
+
+#define REG_A6XX_CP_RB_RPTR_ADDR_HI				0x00000805
+
+#define REG_A6XX_CP_RB_RPTR					0x00000806
+
+#define REG_A6XX_CP_RB_WPTR					0x00000807
+
+#define REG_A6XX_CP_SQE_CNTL					0x00000808
+
+#define REG_A6XX_CP_HW_FAULT					0x00000821
+
+#define REG_A6XX_CP_INTERRUPT_STATUS				0x00000823
+
+#define REG_A6XX_CP_PROTECT_STATUS				0x00000824
+
+#define REG_A6XX_CP_SQE_INSTR_BASE_LO				0x00000830
+
+#define REG_A6XX_CP_SQE_INSTR_BASE_HI				0x00000831
+
+#define REG_A6XX_CP_MISC_CNTL					0x00000840
+
+#define REG_A6XX_CP_ROQ_THRESHOLDS_1				0x000008c1
+
+#define REG_A6XX_CP_ROQ_THRESHOLDS_2				0x000008c2
+
+#define REG_A6XX_CP_MEM_POOL_SIZE				0x000008c3
+
+#define REG_A6XX_CP_CHICKEN_DBG					0x00000841
+
+#define REG_A6XX_CP_ADDR_MODE_CNTL				0x00000842
+
+#define REG_A6XX_CP_DBG_ECO_CNTL				0x00000843
+
+#define REG_A6XX_CP_PROTECT_CNTL				0x0000084f
+
+static inline uint32_t REG_A6XX_CP_SCRATCH(uint32_t i0) { return 0x00000883 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000883 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_CP_PROTECT(uint32_t i0) { return 0x00000850 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000850 + 0x1*i0; }
+#define A6XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0003ffff
+#define A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
+static inline uint32_t A6XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
+{
+	return ((val) << A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A6XX_CP_PROTECT_REG_BASE_ADDR__MASK;
+}
+#define A6XX_CP_PROTECT_REG_MASK_LEN__MASK			0x7ffc0000
+#define A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT			18
+static inline uint32_t A6XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
+{
+	return ((val) << A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A6XX_CP_PROTECT_REG_MASK_LEN__MASK;
+}
+#define A6XX_CP_PROTECT_REG_READ				0x80000000
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_CNTL				0x000008a0
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO			0x000008a1
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI			0x000008a2
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO	0x000008a3
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI	0x000008a4
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO	0x000008a5
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI	0x000008a6
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO	0x000008a7
+
+#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI	0x000008a8
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_0				0x000008d0
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_1				0x000008d1
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_2				0x000008d2
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_3				0x000008d3
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_4				0x000008d4
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_5				0x000008d5
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_6				0x000008d6
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_7				0x000008d7
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_8				0x000008d8
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_9				0x000008d9
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_10				0x000008da
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_11				0x000008db
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_12				0x000008dc
+
+#define REG_A6XX_CP_PERFCTR_CP_SEL_13				0x000008dd
+
+#define REG_A6XX_CP_CRASH_SCRIPT_BASE_LO			0x00000900
+
+#define REG_A6XX_CP_CRASH_SCRIPT_BASE_HI			0x00000901
+
+#define REG_A6XX_CP_CRASH_DUMP_CNTL				0x00000902
+
+#define REG_A6XX_CP_CRASH_DUMP_STATUS				0x00000903
+
+#define REG_A6XX_CP_SQE_STAT_ADDR				0x00000908
+
+#define REG_A6XX_CP_SQE_STAT_DATA				0x00000909
+
+#define REG_A6XX_CP_DRAW_STATE_ADDR				0x0000090a
+
+#define REG_A6XX_CP_DRAW_STATE_DATA				0x0000090b
+
+#define REG_A6XX_CP_ROQ_DBG_ADDR				0x0000090c
+
+#define REG_A6XX_CP_ROQ_DBG_DATA				0x0000090d
+
+#define REG_A6XX_CP_MEM_POOL_DBG_ADDR				0x0000090e
+
+#define REG_A6XX_CP_MEM_POOL_DBG_DATA				0x0000090f
+
+#define REG_A6XX_CP_SQE_UCODE_DBG_ADDR				0x00000910
+
+#define REG_A6XX_CP_SQE_UCODE_DBG_DATA				0x00000911
+
+#define REG_A6XX_CP_IB1_BASE					0x00000928
+
+#define REG_A6XX_CP_IB1_BASE_HI					0x00000929
+
+#define REG_A6XX_CP_IB1_REM_SIZE				0x0000092a
+
+#define REG_A6XX_CP_IB2_BASE					0x0000092b
+
+#define REG_A6XX_CP_IB2_BASE_HI					0x0000092c
+
+#define REG_A6XX_CP_IB2_REM_SIZE				0x0000092d
+
+#define REG_A6XX_CP_ALWAYS_ON_COUNTER_LO			0x00000980
+
+#define REG_A6XX_CP_ALWAYS_ON_COUNTER_HI			0x00000981
+
+#define REG_A6XX_CP_AHB_CNTL					0x0000098d
+
+#define REG_A6XX_CP_APERTURE_CNTL_HOST				0x00000a00
+
+#define REG_A6XX_CP_APERTURE_CNTL_CD				0x00000a03
+
+#define REG_A6XX_VSC_ADDR_MODE_CNTL				0x00000c01
+
+#define REG_A6XX_RBBM_INT_0_STATUS				0x00000201
+
+#define REG_A6XX_RBBM_STATUS					0x00000210
+#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB			0x00800000
+#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP			0x00400000
+#define A6XX_RBBM_STATUS_HLSQ_BUSY				0x00200000
+#define A6XX_RBBM_STATUS_VSC_BUSY				0x00100000
+#define A6XX_RBBM_STATUS_TPL1_BUSY				0x00080000
+#define A6XX_RBBM_STATUS_SP_BUSY				0x00040000
+#define A6XX_RBBM_STATUS_UCHE_BUSY				0x00020000
+#define A6XX_RBBM_STATUS_VPC_BUSY				0x00010000
+#define A6XX_RBBM_STATUS_VFD_BUSY				0x00008000
+#define A6XX_RBBM_STATUS_TESS_BUSY				0x00004000
+#define A6XX_RBBM_STATUS_PC_VSD_BUSY				0x00002000
+#define A6XX_RBBM_STATUS_PC_DCALL_BUSY				0x00001000
+#define A6XX_RBBM_STATUS_COM_DCOM_BUSY				0x00000800
+#define A6XX_RBBM_STATUS_LRZ_BUSY				0x00000400
+#define A6XX_RBBM_STATUS_A2D_BUSY				0x00000200
+#define A6XX_RBBM_STATUS_CCU_BUSY				0x00000100
+#define A6XX_RBBM_STATUS_RB_BUSY				0x00000080
+#define A6XX_RBBM_STATUS_RAS_BUSY				0x00000040
+#define A6XX_RBBM_STATUS_TSE_BUSY				0x00000020
+#define A6XX_RBBM_STATUS_VBIF_BUSY				0x00000010
+#define A6XX_RBBM_STATUS_GFX_DBGC_BUSY				0x00000008
+#define A6XX_RBBM_STATUS_CP_BUSY				0x00000004
+#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CP_MASTER			0x00000002
+#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER			0x00000001
+
+#define REG_A6XX_RBBM_STATUS3					0x00000213
+
+#define REG_A6XX_RBBM_VBIF_GX_RESET_STATUS			0x00000215
+
+#define REG_A6XX_RBBM_PERFCTR_CP_0_LO				0x00000400
+
+#define REG_A6XX_RBBM_PERFCTR_CP_0_HI				0x00000401
+
+#define REG_A6XX_RBBM_PERFCTR_CP_1_LO				0x00000402
+
+#define REG_A6XX_RBBM_PERFCTR_CP_1_HI				0x00000403
+
+#define REG_A6XX_RBBM_PERFCTR_CP_2_LO				0x00000404
+
+#define REG_A6XX_RBBM_PERFCTR_CP_2_HI				0x00000405
+
+#define REG_A6XX_RBBM_PERFCTR_CP_3_LO				0x00000406
+
+#define REG_A6XX_RBBM_PERFCTR_CP_3_HI				0x00000407
+
+#define REG_A6XX_RBBM_PERFCTR_CP_4_LO				0x00000408
+
+#define REG_A6XX_RBBM_PERFCTR_CP_4_HI				0x00000409
+
+#define REG_A6XX_RBBM_PERFCTR_CP_5_LO				0x0000040a
+
+#define REG_A6XX_RBBM_PERFCTR_CP_5_HI				0x0000040b
+
+#define REG_A6XX_RBBM_PERFCTR_CP_6_LO				0x0000040c
+
+#define REG_A6XX_RBBM_PERFCTR_CP_6_HI				0x0000040d
+
+#define REG_A6XX_RBBM_PERFCTR_CP_7_LO				0x0000040e
+
+#define REG_A6XX_RBBM_PERFCTR_CP_7_HI				0x0000040f
+
+#define REG_A6XX_RBBM_PERFCTR_CP_8_LO				0x00000410
+
+#define REG_A6XX_RBBM_PERFCTR_CP_8_HI				0x00000411
+
+#define REG_A6XX_RBBM_PERFCTR_CP_9_LO				0x00000412
+
+#define REG_A6XX_RBBM_PERFCTR_CP_9_HI				0x00000413
+
+#define REG_A6XX_RBBM_PERFCTR_CP_10_LO				0x00000414
+
+#define REG_A6XX_RBBM_PERFCTR_CP_10_HI				0x00000415
+
+#define REG_A6XX_RBBM_PERFCTR_CP_11_LO				0x00000416
+
+#define REG_A6XX_RBBM_PERFCTR_CP_11_HI				0x00000417
+
+#define REG_A6XX_RBBM_PERFCTR_CP_12_LO				0x00000418
+
+#define REG_A6XX_RBBM_PERFCTR_CP_12_HI				0x00000419
+
+#define REG_A6XX_RBBM_PERFCTR_CP_13_LO				0x0000041a
+
+#define REG_A6XX_RBBM_PERFCTR_CP_13_HI				0x0000041b
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_0_LO				0x0000041c
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_0_HI				0x0000041d
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_1_LO				0x0000041e
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_1_HI				0x0000041f
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_2_LO				0x00000420
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_2_HI				0x00000421
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_3_LO				0x00000422
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_3_HI				0x00000423
+
+#define REG_A6XX_RBBM_PERFCTR_PC_0_LO				0x00000424
+
+#define REG_A6XX_RBBM_PERFCTR_PC_0_HI				0x00000425
+
+#define REG_A6XX_RBBM_PERFCTR_PC_1_LO				0x00000426
+
+#define REG_A6XX_RBBM_PERFCTR_PC_1_HI				0x00000427
+
+#define REG_A6XX_RBBM_PERFCTR_PC_2_LO				0x00000428
+
+#define REG_A6XX_RBBM_PERFCTR_PC_2_HI				0x00000429
+
+#define REG_A6XX_RBBM_PERFCTR_PC_3_LO				0x0000042a
+
+#define REG_A6XX_RBBM_PERFCTR_PC_3_HI				0x0000042b
+
+#define REG_A6XX_RBBM_PERFCTR_PC_4_LO				0x0000042c
+
+#define REG_A6XX_RBBM_PERFCTR_PC_4_HI				0x0000042d
+
+#define REG_A6XX_RBBM_PERFCTR_PC_5_LO				0x0000042e
+
+#define REG_A6XX_RBBM_PERFCTR_PC_5_HI				0x0000042f
+
+#define REG_A6XX_RBBM_PERFCTR_PC_6_LO				0x00000430
+
+#define REG_A6XX_RBBM_PERFCTR_PC_6_HI				0x00000431
+
+#define REG_A6XX_RBBM_PERFCTR_PC_7_LO				0x00000432
+
+#define REG_A6XX_RBBM_PERFCTR_PC_7_HI				0x00000433
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_0_LO				0x00000434
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_0_HI				0x00000435
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_1_LO				0x00000436
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_1_HI				0x00000437
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_2_LO				0x00000438
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_2_HI				0x00000439
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_3_LO				0x0000043a
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_3_HI				0x0000043b
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_4_LO				0x0000043c
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_4_HI				0x0000043d
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_5_LO				0x0000043e
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_5_HI				0x0000043f
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_6_LO				0x00000440
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_6_HI				0x00000441
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_7_LO				0x00000442
+
+#define REG_A6XX_RBBM_PERFCTR_VFD_7_HI				0x00000443
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_LO				0x00000444
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_HI				0x00000445
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_LO				0x00000446
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_HI				0x00000447
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_LO				0x00000448
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_HI				0x00000449
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_LO				0x0000044a
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_HI				0x0000044b
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_LO				0x0000044c
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_HI				0x0000044d
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_LO				0x0000044e
+
+#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_HI				0x0000044f
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_0_LO				0x00000450
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_0_HI				0x00000451
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_1_LO				0x00000452
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_1_HI				0x00000453
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_2_LO				0x00000454
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_2_HI				0x00000455
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_3_LO				0x00000456
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_3_HI				0x00000457
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_4_LO				0x00000458
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_4_HI				0x00000459
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_5_LO				0x0000045a
+
+#define REG_A6XX_RBBM_PERFCTR_VPC_5_HI				0x0000045b
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_0_LO				0x0000045c
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_0_HI				0x0000045d
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_1_LO				0x0000045e
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_1_HI				0x0000045f
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_2_LO				0x00000460
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_2_HI				0x00000461
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_3_LO				0x00000462
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_3_HI				0x00000463
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_4_LO				0x00000464
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI				0x00000465
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO				0x00000466
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI				0x00000467
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO				0x00000468
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI				0x00000469
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO				0x0000046a
+
+#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI				0x00000465
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO				0x00000466
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI				0x00000467
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO				0x00000468
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI				0x00000469
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO				0x0000046a
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_2_HI				0x0000046b
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_3_LO				0x0000046c
+
+#define REG_A6XX_RBBM_PERFCTR_TSE_3_HI				0x0000046d
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_0_LO				0x0000046e
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_0_HI				0x0000046f
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_1_LO				0x00000470
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_1_HI				0x00000471
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_2_LO				0x00000472
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_2_HI				0x00000473
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_3_LO				0x00000474
+
+#define REG_A6XX_RBBM_PERFCTR_RAS_3_HI				0x00000475
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_0_LO				0x00000476
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_0_HI				0x00000477
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_1_LO				0x00000478
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_1_HI				0x00000479
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_2_LO				0x0000047a
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_2_HI				0x0000047b
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_3_LO				0x0000047c
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_3_HI				0x0000047d
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_4_LO				0x0000047e
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_4_HI				0x0000047f
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_5_LO				0x00000480
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_5_HI				0x00000481
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_6_LO				0x00000482
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_6_HI				0x00000483
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_7_LO				0x00000484
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_7_HI				0x00000485
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_8_LO				0x00000486
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_8_HI				0x00000487
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_9_LO				0x00000488
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_9_HI				0x00000489
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_10_LO			0x0000048a
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_10_HI			0x0000048b
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_11_LO			0x0000048c
+
+#define REG_A6XX_RBBM_PERFCTR_UCHE_11_HI			0x0000048d
+
+#define REG_A6XX_RBBM_PERFCTR_TP_0_LO				0x0000048e
+
+#define REG_A6XX_RBBM_PERFCTR_TP_0_HI				0x0000048f
+
+#define REG_A6XX_RBBM_PERFCTR_TP_1_LO				0x00000490
+
+#define REG_A6XX_RBBM_PERFCTR_TP_1_HI				0x00000491
+
+#define REG_A6XX_RBBM_PERFCTR_TP_2_LO				0x00000492
+
+#define REG_A6XX_RBBM_PERFCTR_TP_2_HI				0x00000493
+
+#define REG_A6XX_RBBM_PERFCTR_TP_3_LO				0x00000494
+
+#define REG_A6XX_RBBM_PERFCTR_TP_3_HI				0x00000495
+
+#define REG_A6XX_RBBM_PERFCTR_TP_4_LO				0x00000496
+
+#define REG_A6XX_RBBM_PERFCTR_TP_4_HI				0x00000497
+
+#define REG_A6XX_RBBM_PERFCTR_TP_5_LO				0x00000498
+
+#define REG_A6XX_RBBM_PERFCTR_TP_5_HI				0x00000499
+
+#define REG_A6XX_RBBM_PERFCTR_TP_6_LO				0x0000049a
+
+#define REG_A6XX_RBBM_PERFCTR_TP_6_HI				0x0000049b
+
+#define REG_A6XX_RBBM_PERFCTR_TP_7_LO				0x0000049c
+
+#define REG_A6XX_RBBM_PERFCTR_TP_7_HI				0x0000049d
+
+#define REG_A6XX_RBBM_PERFCTR_TP_8_LO				0x0000049e
+
+#define REG_A6XX_RBBM_PERFCTR_TP_8_HI				0x0000049f
+
+#define REG_A6XX_RBBM_PERFCTR_TP_9_LO				0x000004a0
+
+#define REG_A6XX_RBBM_PERFCTR_TP_9_HI				0x000004a1
+
+#define REG_A6XX_RBBM_PERFCTR_TP_10_LO				0x000004a2
+
+#define REG_A6XX_RBBM_PERFCTR_TP_10_HI				0x000004a3
+
+#define REG_A6XX_RBBM_PERFCTR_TP_11_LO				0x000004a4
+
+#define REG_A6XX_RBBM_PERFCTR_TP_11_HI				0x000004a5
+
+#define REG_A6XX_RBBM_PERFCTR_SP_0_LO				0x000004a6
+
+#define REG_A6XX_RBBM_PERFCTR_SP_0_HI				0x000004a7
+
+#define REG_A6XX_RBBM_PERFCTR_SP_1_LO				0x000004a8
+
+#define REG_A6XX_RBBM_PERFCTR_SP_1_HI				0x000004a9
+
+#define REG_A6XX_RBBM_PERFCTR_SP_2_LO				0x000004aa
+
+#define REG_A6XX_RBBM_PERFCTR_SP_2_HI				0x000004ab
+
+#define REG_A6XX_RBBM_PERFCTR_SP_3_LO				0x000004ac
+
+#define REG_A6XX_RBBM_PERFCTR_SP_3_HI				0x000004ad
+
+#define REG_A6XX_RBBM_PERFCTR_SP_4_LO				0x000004ae
+
+#define REG_A6XX_RBBM_PERFCTR_SP_4_HI				0x000004af
+
+#define REG_A6XX_RBBM_PERFCTR_SP_5_LO				0x000004b0
+
+#define REG_A6XX_RBBM_PERFCTR_SP_5_HI				0x000004b1
+
+#define REG_A6XX_RBBM_PERFCTR_SP_6_LO				0x000004b2
+
+#define REG_A6XX_RBBM_PERFCTR_SP_6_HI				0x000004b3
+
+#define REG_A6XX_RBBM_PERFCTR_SP_7_LO				0x000004b4
+
+#define REG_A6XX_RBBM_PERFCTR_SP_7_HI				0x000004b5
+
+#define REG_A6XX_RBBM_PERFCTR_SP_8_LO				0x000004b6
+
+#define REG_A6XX_RBBM_PERFCTR_SP_8_HI				0x000004b7
+
+#define REG_A6XX_RBBM_PERFCTR_SP_9_LO				0x000004b8
+
+#define REG_A6XX_RBBM_PERFCTR_SP_9_HI				0x000004b9
+
+#define REG_A6XX_RBBM_PERFCTR_SP_10_LO				0x000004ba
+
+#define REG_A6XX_RBBM_PERFCTR_SP_10_HI				0x000004bb
+
+#define REG_A6XX_RBBM_PERFCTR_SP_11_LO				0x000004bc
+
+#define REG_A6XX_RBBM_PERFCTR_SP_11_HI				0x000004bd
+
+#define REG_A6XX_RBBM_PERFCTR_SP_12_LO				0x000004be
+
+#define REG_A6XX_RBBM_PERFCTR_SP_12_HI				0x000004bf
+
+#define REG_A6XX_RBBM_PERFCTR_SP_13_LO				0x000004c0
+
+#define REG_A6XX_RBBM_PERFCTR_SP_13_HI				0x000004c1
+
+#define REG_A6XX_RBBM_PERFCTR_SP_14_LO				0x000004c2
+
+#define REG_A6XX_RBBM_PERFCTR_SP_14_HI				0x000004c3
+
+#define REG_A6XX_RBBM_PERFCTR_SP_15_LO				0x000004c4
+
+#define REG_A6XX_RBBM_PERFCTR_SP_15_HI				0x000004c5
+
+#define REG_A6XX_RBBM_PERFCTR_SP_16_LO				0x000004c6
+
+#define REG_A6XX_RBBM_PERFCTR_SP_16_HI				0x000004c7
+
+#define REG_A6XX_RBBM_PERFCTR_SP_17_LO				0x000004c8
+
+#define REG_A6XX_RBBM_PERFCTR_SP_17_HI				0x000004c9
+
+#define REG_A6XX_RBBM_PERFCTR_SP_18_LO				0x000004ca
+
+#define REG_A6XX_RBBM_PERFCTR_SP_18_HI				0x000004cb
+
+#define REG_A6XX_RBBM_PERFCTR_SP_19_LO				0x000004cc
+
+#define REG_A6XX_RBBM_PERFCTR_SP_19_HI				0x000004cd
+
+#define REG_A6XX_RBBM_PERFCTR_SP_20_LO				0x000004ce
+
+#define REG_A6XX_RBBM_PERFCTR_SP_20_HI				0x000004cf
+
+#define REG_A6XX_RBBM_PERFCTR_SP_21_LO				0x000004d0
+
+#define REG_A6XX_RBBM_PERFCTR_SP_21_HI				0x000004d1
+
+#define REG_A6XX_RBBM_PERFCTR_SP_22_LO				0x000004d2
+
+#define REG_A6XX_RBBM_PERFCTR_SP_22_HI				0x000004d3
+
+#define REG_A6XX_RBBM_PERFCTR_SP_23_LO				0x000004d4
+
+#define REG_A6XX_RBBM_PERFCTR_SP_23_HI				0x000004d5
+
+#define REG_A6XX_RBBM_PERFCTR_RB_0_LO				0x000004d6
+
+#define REG_A6XX_RBBM_PERFCTR_RB_0_HI				0x000004d7
+
+#define REG_A6XX_RBBM_PERFCTR_RB_1_LO				0x000004d8
+
+#define REG_A6XX_RBBM_PERFCTR_RB_1_HI				0x000004d9
+
+#define REG_A6XX_RBBM_PERFCTR_RB_2_LO				0x000004da
+
+#define REG_A6XX_RBBM_PERFCTR_RB_2_HI				0x000004db
+
+#define REG_A6XX_RBBM_PERFCTR_RB_3_LO				0x000004dc
+
+#define REG_A6XX_RBBM_PERFCTR_RB_3_HI				0x000004dd
+
+#define REG_A6XX_RBBM_PERFCTR_RB_4_LO				0x000004de
+
+#define REG_A6XX_RBBM_PERFCTR_RB_4_HI				0x000004df
+
+#define REG_A6XX_RBBM_PERFCTR_RB_5_LO				0x000004e0
+
+#define REG_A6XX_RBBM_PERFCTR_RB_5_HI				0x000004e1
+
+#define REG_A6XX_RBBM_PERFCTR_RB_6_LO				0x000004e2
+
+#define REG_A6XX_RBBM_PERFCTR_RB_6_HI				0x000004e3
+
+#define REG_A6XX_RBBM_PERFCTR_RB_7_LO				0x000004e4
+
+#define REG_A6XX_RBBM_PERFCTR_RB_7_HI				0x000004e5
+
+#define REG_A6XX_RBBM_PERFCTR_VSC_0_LO				0x000004e6
+
+#define REG_A6XX_RBBM_PERFCTR_VSC_0_HI				0x000004e7
+
+#define REG_A6XX_RBBM_PERFCTR_VSC_1_LO				0x000004e8
+
+#define REG_A6XX_RBBM_PERFCTR_VSC_1_HI				0x000004e9
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_0_LO				0x000004ea
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_0_HI				0x000004eb
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_1_LO				0x000004ec
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_1_HI				0x000004ed
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_2_LO				0x000004ee
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_2_HI				0x000004ef
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_3_LO				0x000004f0
+
+#define REG_A6XX_RBBM_PERFCTR_LRZ_3_HI				0x000004f1
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_0_LO				0x000004f2
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_0_HI				0x000004f3
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_1_LO				0x000004f4
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_1_HI				0x000004f5
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_2_LO				0x000004f6
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_2_HI				0x000004f7
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_3_LO				0x000004f8
+
+#define REG_A6XX_RBBM_PERFCTR_CMP_3_HI				0x000004f9
+
+#define REG_A6XX_RBBM_PERFCTR_CNTL				0x00000500
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD0				0x00000501
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD1				0x00000502
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD2				0x00000503
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD3				0x00000504
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000505
+
+#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000506
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_0			0x00000507
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_1			0x00000508
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_2			0x00000509
+
+#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000050a
+
+#define REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED			0x0000050b
+
+#define REG_A6XX_RBBM_ISDB_CNT					0x00000533
+
+#define REG_A6XX_RBBM_SECVID_TRUST_CNTL				0x0000f400
+
+#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO		0x0000f800
+
+#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI		0x0000f801
+
+#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE			0x0000f802
+
+#define REG_A6XX_RBBM_SECVID_TSB_CNTL				0x0000f803
+
+#define REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL			0x0000f810
+
+#define REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL			0x00000010
+
+#define REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL			0x0000001f
+
+#define REG_A6XX_RBBM_INT_CLEAR_CMD				0x00000037
+
+#define REG_A6XX_RBBM_INT_0_MASK				0x00000038
+
+#define REG_A6XX_RBBM_SP_HYST_CNT				0x00000042
+
+#define REG_A6XX_RBBM_SW_RESET_CMD				0x00000043
+
+#define REG_A6XX_RBBM_RAC_THRESHOLD_CNT				0x00000044
+
+#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
+
+#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD2			0x00000046
+
+#define REG_A6XX_RBBM_CLOCK_CNTL				0x000000ae
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_SP0				0x000000b0
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_SP1				0x000000b1
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_SP2				0x000000b2
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_SP3				0x000000b3
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_SP0				0x000000b4
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_SP1				0x000000b5
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_SP2				0x000000b6
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_SP3				0x000000b7
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_SP0				0x000000b8
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_SP1				0x000000b9
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_SP2				0x000000ba
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_SP3				0x000000bb
+
+#define REG_A6XX_RBBM_CLOCK_HYST_SP0				0x000000bc
+
+#define REG_A6XX_RBBM_CLOCK_HYST_SP1				0x000000bd
+
+#define REG_A6XX_RBBM_CLOCK_HYST_SP2				0x000000be
+
+#define REG_A6XX_RBBM_CLOCK_HYST_SP3				0x000000bf
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_TP0				0x000000c0
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_TP1				0x000000c1
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_TP2				0x000000c2
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_TP3				0x000000c3
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_TP0				0x000000c4
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_TP1				0x000000c5
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_TP2				0x000000c6
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_TP3				0x000000c7
+
+#define REG_A6XX_RBBM_CLOCK_CNTL3_TP0				0x000000c8
+
+#define REG_A6XX_RBBM_CLOCK_CNTL3_TP1				0x000000c9
+
+#define REG_A6XX_RBBM_CLOCK_CNTL3_TP2				0x000000ca
+
+#define REG_A6XX_RBBM_CLOCK_CNTL3_TP3				0x000000cb
+
+#define REG_A6XX_RBBM_CLOCK_CNTL4_TP0				0x000000cc
+
+#define REG_A6XX_RBBM_CLOCK_CNTL4_TP1				0x000000cd
+
+#define REG_A6XX_RBBM_CLOCK_CNTL4_TP2				0x000000ce
+
+#define REG_A6XX_RBBM_CLOCK_CNTL4_TP3				0x000000cf
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_TP0				0x000000d0
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_TP1				0x000000d1
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_TP2				0x000000d2
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_TP3				0x000000d3
+
+#define REG_A6XX_RBBM_CLOCK_DELAY2_TP0				0x000000d4
+
+#define REG_A6XX_RBBM_CLOCK_DELAY2_TP1				0x000000d5
+
+#define REG_A6XX_RBBM_CLOCK_DELAY2_TP2				0x000000d6
+
+#define REG_A6XX_RBBM_CLOCK_DELAY2_TP3				0x000000d7
+
+#define REG_A6XX_RBBM_CLOCK_DELAY3_TP0				0x000000d8
+
+#define REG_A6XX_RBBM_CLOCK_DELAY3_TP1				0x000000d9
+
+#define REG_A6XX_RBBM_CLOCK_DELAY3_TP2				0x000000da
+
+#define REG_A6XX_RBBM_CLOCK_DELAY3_TP3				0x000000db
+
+#define REG_A6XX_RBBM_CLOCK_DELAY4_TP0				0x000000dc
+
+#define REG_A6XX_RBBM_CLOCK_DELAY4_TP1				0x000000dd
+
+#define REG_A6XX_RBBM_CLOCK_DELAY4_TP2				0x000000de
+
+#define REG_A6XX_RBBM_CLOCK_DELAY4_TP3				0x000000df
+
+#define REG_A6XX_RBBM_CLOCK_HYST_TP0				0x000000e0
+
+#define REG_A6XX_RBBM_CLOCK_HYST_TP1				0x000000e1
+
+#define REG_A6XX_RBBM_CLOCK_HYST_TP2				0x000000e2
+
+#define REG_A6XX_RBBM_CLOCK_HYST_TP3				0x000000e3
+
+#define REG_A6XX_RBBM_CLOCK_HYST2_TP0				0x000000e4
+
+#define REG_A6XX_RBBM_CLOCK_HYST2_TP1				0x000000e5
+
+#define REG_A6XX_RBBM_CLOCK_HYST2_TP2				0x000000e6
+
+#define REG_A6XX_RBBM_CLOCK_HYST2_TP3				0x000000e7
+
+#define REG_A6XX_RBBM_CLOCK_HYST3_TP0				0x000000e8
+
+#define REG_A6XX_RBBM_CLOCK_HYST3_TP1				0x000000e9
+
+#define REG_A6XX_RBBM_CLOCK_HYST3_TP2				0x000000ea
+
+#define REG_A6XX_RBBM_CLOCK_HYST3_TP3				0x000000eb
+
+#define REG_A6XX_RBBM_CLOCK_HYST4_TP0				0x000000ec
+
+#define REG_A6XX_RBBM_CLOCK_HYST4_TP1				0x000000ed
+
+#define REG_A6XX_RBBM_CLOCK_HYST4_TP2				0x000000ee
+
+#define REG_A6XX_RBBM_CLOCK_HYST4_TP3				0x000000ef
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_RB0				0x000000f0
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_RB1				0x000000f1
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_RB2				0x000000f2
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_RB3				0x000000f3
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_RB0				0x000000f4
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_RB1				0x000000f5
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_RB2				0x000000f6
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_RB3				0x000000f7
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_CCU0				0x000000f8
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_CCU1				0x000000f9
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_CCU2				0x000000fa
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_CCU3				0x000000fb
+
+#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0			0x00000100
+
+#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1			0x00000101
+
+#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2			0x00000102
+
+#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3			0x00000103
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_RAC				0x00000104
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_RAC				0x00000105
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_RAC				0x00000106
+
+#define REG_A6XX_RBBM_CLOCK_HYST_RAC				0x00000107
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM			0x00000108
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x00000109
+
+#define REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x0000010a
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_UCHE				0x0000010b
+
+#define REG_A6XX_RBBM_CLOCK_CNTL2_UCHE				0x0000010c
+
+#define REG_A6XX_RBBM_CLOCK_CNTL3_UCHE				0x0000010d
+
+#define REG_A6XX_RBBM_CLOCK_CNTL4_UCHE				0x0000010e
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_UCHE				0x0000010f
+
+#define REG_A6XX_RBBM_CLOCK_HYST_UCHE				0x00000110
+
+#define REG_A6XX_RBBM_CLOCK_MODE_VFD				0x00000111
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_VFD				0x00000112
+
+#define REG_A6XX_RBBM_CLOCK_HYST_VFD				0x00000113
+
+#define REG_A6XX_RBBM_CLOCK_MODE_GPC				0x00000114
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_GPC				0x00000115
+
+#define REG_A6XX_RBBM_CLOCK_HYST_GPC				0x00000116
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2			0x00000117
+
+#define REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX				0x00000118
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX			0x00000119
+
+#define REG_A6XX_RBBM_CLOCK_HYST_GMU_GX				0x0000011a
+
+#define REG_A6XX_RBBM_CLOCK_MODE_HLSQ				0x0000011b
+
+#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ				0x0000011c
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_A				0x00000600
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_B				0x00000601
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_C				0x00000602
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_D				0x00000603
+#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK		0x000000ff
+#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT		0
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK		0x0000ff00
+#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT		8
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK;
+}
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLT				0x00000604
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK		0x0000003f
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT		0
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK			0x00007000
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT			12
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK			0xf0000000
+#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT			28
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK;
+}
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLM				0x00000605
+#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK			0x0f000000
+#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT		24
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK;
+}
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0				0x00000608
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1				0x00000609
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2				0x0000060a
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3				0x0000060b
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0			0x0000060c
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1			0x0000060d
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2			0x0000060e
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3			0x0000060f
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0			0x00000610
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK		0x0000000f
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT		0
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK		0x000000f0
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT		4
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK		0x00000f00
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT		8
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK		0x0000f000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT		12
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK		0x000f0000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT		16
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK		0x00f00000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT		20
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK		0x0f000000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT		24
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK		0xf0000000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT		28
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK;
+}
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1			0x00000611
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK		0x0000000f
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT		0
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK		0x000000f0
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT		4
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK		0x00000f00
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT		8
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK		0x0000f000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT		12
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK		0x000f0000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT		16
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK		0x00f00000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT		20
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK		0x0f000000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT		24
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK;
+}
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK		0xf0000000
+#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT		28
+static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val)
+{
+	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK;
+}
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1			0x0000062f
+
+#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2			0x00000630
+
+#define REG_A6XX_VSC_PERFCTR_VSC_SEL_0				0x00000cd8
+
+#define REG_A6XX_VSC_PERFCTR_VSC_SEL_1				0x00000cd9
+
+#define REG_A6XX_GRAS_ADDR_MODE_CNTL				0x00008601
+
+#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_0				0x00008610
+
+#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_1				0x00008611
+
+#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_2				0x00008612
+
+#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_3				0x00008613
+
+#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_0				0x00008614
+
+#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_1				0x00008615
+
+#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_2				0x00008616
+
+#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_3				0x00008617
+
+#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_0				0x00008618
+
+#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_1				0x00008619
+
+#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_2				0x0000861a
+
+#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_3				0x0000861b
+
+#define REG_A6XX_RB_ADDR_MODE_CNTL				0x00008e05
+
+#define REG_A6XX_RB_NC_MODE_CNTL				0x00008e08
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_0				0x00008e10
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_1				0x00008e11
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_2				0x00008e12
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_3				0x00008e13
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_4				0x00008e14
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_5				0x00008e15
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_6				0x00008e16
+
+#define REG_A6XX_RB_PERFCTR_RB_SEL_7				0x00008e17
+
+#define REG_A6XX_RB_PERFCTR_CCU_SEL_0				0x00008e18
+
+#define REG_A6XX_RB_PERFCTR_CCU_SEL_1				0x00008e19
+
+#define REG_A6XX_RB_PERFCTR_CCU_SEL_2				0x00008e1a
+
+#define REG_A6XX_RB_PERFCTR_CCU_SEL_3				0x00008e1b
+
+#define REG_A6XX_RB_PERFCTR_CCU_SEL_4				0x00008e1c
+
+#define REG_A6XX_RB_PERFCTR_CMP_SEL_0				0x00008e2c
+
+#define REG_A6XX_RB_PERFCTR_CMP_SEL_1				0x00008e2d
+
+#define REG_A6XX_RB_PERFCTR_CMP_SEL_2				0x00008e2e
+
+#define REG_A6XX_RB_PERFCTR_CMP_SEL_3				0x00008e2f
+
+#define REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD			0x00008e3d
+
+#define REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE		0x00008e50
+
+#define REG_A6XX_PC_DBG_ECO_CNTL				0x00009e00
+
+#define REG_A6XX_PC_ADDR_MODE_CNTL				0x00009e01
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_0				0x00009e34
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_1				0x00009e35
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_2				0x00009e36
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_3				0x00009e37
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_4				0x00009e38
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_5				0x00009e39
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_6				0x00009e3a
+
+#define REG_A6XX_PC_PERFCTR_PC_SEL_7				0x00009e3b
+
+#define REG_A6XX_HLSQ_ADDR_MODE_CNTL				0x0000be05
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x0000be10
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x0000be11
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x0000be12
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x0000be13
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x0000be14
+
+#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x0000be15
+
+#define REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE			0x0000c800
+
+#define REG_A6XX_HLSQ_DBG_READ_SEL				0x0000d000
+
+#define REG_A6XX_VFD_ADDR_MODE_CNTL				0x0000a601
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_0				0x0000a610
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_1				0x0000a611
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_2				0x0000a612
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_3				0x0000a613
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_4				0x0000a614
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_5				0x0000a615
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_6				0x0000a616
+
+#define REG_A6XX_VFD_PERFCTR_VFD_SEL_7				0x0000a617
+
+#define REG_A6XX_VPC_ADDR_MODE_CNTL				0x00009601
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_0				0x00009604
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_1				0x00009605
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_2				0x00009606
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_3				0x00009607
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_4				0x00009608
+
+#define REG_A6XX_VPC_PERFCTR_VPC_SEL_5				0x00009609
+
+#define REG_A6XX_UCHE_ADDR_MODE_CNTL				0x00000e00
+
+#define REG_A6XX_UCHE_MODE_CNTL					0x00000e01
+
+#define REG_A6XX_UCHE_WRITE_RANGE_MAX_LO			0x00000e05
+
+#define REG_A6XX_UCHE_WRITE_RANGE_MAX_HI			0x00000e06
+
+#define REG_A6XX_UCHE_WRITE_THRU_BASE_LO			0x00000e07
+
+#define REG_A6XX_UCHE_WRITE_THRU_BASE_HI			0x00000e08
+
+#define REG_A6XX_UCHE_TRAP_BASE_LO				0x00000e09
+
+#define REG_A6XX_UCHE_TRAP_BASE_HI				0x00000e0a
+
+#define REG_A6XX_UCHE_GMEM_RANGE_MIN_LO				0x00000e0b
+
+#define REG_A6XX_UCHE_GMEM_RANGE_MIN_HI				0x00000e0c
+
+#define REG_A6XX_UCHE_GMEM_RANGE_MAX_LO				0x00000e0d
+
+#define REG_A6XX_UCHE_GMEM_RANGE_MAX_HI				0x00000e0e
+
+#define REG_A6XX_UCHE_CACHE_WAYS				0x00000e17
+
+#define REG_A6XX_UCHE_FILTER_CNTL				0x00000e18
+
+#define REG_A6XX_UCHE_CLIENT_PF					0x00000e19
+#define A6XX_UCHE_CLIENT_PF_PERFSEL__MASK			0x000000ff
+#define A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT			0
+static inline uint32_t A6XX_UCHE_CLIENT_PF_PERFSEL(uint32_t val)
+{
+	return ((val) << A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT) & A6XX_UCHE_CLIENT_PF_PERFSEL__MASK;
+}
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000e1c
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000e1d
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000e1e
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000e1f
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000e20
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000e21
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000e22
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000e23
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_8			0x00000e24
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_9			0x00000e25
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_10			0x00000e26
+
+#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_11			0x00000e27
+
+#define REG_A6XX_SP_ADDR_MODE_CNTL				0x0000ae01
+
+#define REG_A6XX_SP_NC_MODE_CNTL				0x0000ae02
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_0				0x0000ae10
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_1				0x0000ae11
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_2				0x0000ae12
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_3				0x0000ae13
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_4				0x0000ae14
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_5				0x0000ae15
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_6				0x0000ae16
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_7				0x0000ae17
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_8				0x0000ae18
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_9				0x0000ae19
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_10				0x0000ae1a
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_11				0x0000ae1b
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_12				0x0000ae1c
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_13				0x0000ae1d
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_14				0x0000ae1e
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_15				0x0000ae1f
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_16				0x0000ae20
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_17				0x0000ae21
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_18				0x0000ae22
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_19				0x0000ae23
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_20				0x0000ae24
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_21				0x0000ae25
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_22				0x0000ae26
+
+#define REG_A6XX_SP_PERFCTR_SP_SEL_23				0x0000ae27
+
+#define REG_A6XX_TPL1_ADDR_MODE_CNTL				0x0000b601
+
+#define REG_A6XX_TPL1_NC_MODE_CNTL				0x0000b604
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_0				0x0000b610
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_1				0x0000b611
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_2				0x0000b612
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_3				0x0000b613
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_4				0x0000b614
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_5				0x0000b615
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_6				0x0000b616
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_7				0x0000b617
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_8				0x0000b618
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_9				0x0000b619
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_10				0x0000b61a
+
+#define REG_A6XX_TPL1_PERFCTR_TP_SEL_11				0x0000b61b
+
+#define REG_A6XX_VBIF_VERSION					0x00003000
+
+#define REG_A6XX_VBIF_CLKON					0x00003001
+#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS			0x00000002
+
+#define REG_A6XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
+
+#define REG_A6XX_VBIF_XIN_HALT_CTRL0				0x00003080
+
+#define REG_A6XX_VBIF_XIN_HALT_CTRL1				0x00003081
+
+#define REG_A6XX_VBIF_TEST_BUS_OUT_CTRL				0x00003084
+
+#define REG_A6XX_VBIF_TEST_BUS1_CTRL0				0x00003085
+
+#define REG_A6XX_VBIF_TEST_BUS1_CTRL1				0x00003086
+#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK		0x0000000f
+#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT		0
+static inline uint32_t A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL(uint32_t val)
+{
+	return ((val) << A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK;
+}
+
+#define REG_A6XX_VBIF_TEST_BUS2_CTRL0				0x00003087
+
+#define REG_A6XX_VBIF_TEST_BUS2_CTRL1				0x00003088
+#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK		0x000001ff
+#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT		0
+static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)
+{
+	return ((val) << A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK;
+}
+
+#define REG_A6XX_VBIF_TEST_BUS_OUT				0x0000308c
+
+#define REG_A6XX_VBIF_PERF_CNT_SEL0				0x000030d0
+
+#define REG_A6XX_VBIF_PERF_CNT_SEL1				0x000030d1
+
+#define REG_A6XX_VBIF_PERF_CNT_SEL2				0x000030d2
+
+#define REG_A6XX_VBIF_PERF_CNT_SEL3				0x000030d3
+
+#define REG_A6XX_VBIF_PERF_CNT_LOW0				0x000030d8
+
+#define REG_A6XX_VBIF_PERF_CNT_LOW1				0x000030d9
+
+#define REG_A6XX_VBIF_PERF_CNT_LOW2				0x000030da
+
+#define REG_A6XX_VBIF_PERF_CNT_LOW3				0x000030db
+
+#define REG_A6XX_VBIF_PERF_CNT_HIGH0				0x000030e0
+
+#define REG_A6XX_VBIF_PERF_CNT_HIGH1				0x000030e1
+
+#define REG_A6XX_VBIF_PERF_CNT_HIGH2				0x000030e2
+
+#define REG_A6XX_VBIF_PERF_CNT_HIGH3				0x000030e3
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW0				0x00003110
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW1				0x00003111
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW2				0x00003112
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH0			0x00003118
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH1			0x00003119
+
+#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2			0x0000311a
+
+#define REG_A6XX_RB_WINDOW_OFFSET2				0x000088d4
+#define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_RB_WINDOW_OFFSET2_X__MASK				0x00007fff
+#define A6XX_RB_WINDOW_OFFSET2_X__SHIFT				0
+static inline uint32_t A6XX_RB_WINDOW_OFFSET2_X(uint32_t val)
+{
+	return ((val) << A6XX_RB_WINDOW_OFFSET2_X__SHIFT) & A6XX_RB_WINDOW_OFFSET2_X__MASK;
+}
+#define A6XX_RB_WINDOW_OFFSET2_Y__MASK				0x7fff0000
+#define A6XX_RB_WINDOW_OFFSET2_Y__SHIFT				16
+static inline uint32_t A6XX_RB_WINDOW_OFFSET2_Y(uint32_t val)
+{
+	return ((val) << A6XX_RB_WINDOW_OFFSET2_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET2_Y__MASK;
+}
+
+#define REG_A6XX_SP_WINDOW_OFFSET				0x0000b4d1
+#define A6XX_SP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_SP_WINDOW_OFFSET_X__MASK				0x00007fff
+#define A6XX_SP_WINDOW_OFFSET_X__SHIFT				0
+static inline uint32_t A6XX_SP_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A6XX_SP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_WINDOW_OFFSET_X__MASK;
+}
+#define A6XX_SP_WINDOW_OFFSET_Y__MASK				0x7fff0000
+#define A6XX_SP_WINDOW_OFFSET_Y__SHIFT				16
+static inline uint32_t A6XX_SP_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A6XX_SP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A6XX_SP_TP_WINDOW_OFFSET				0x0000b307
+#define A6XX_SP_TP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_SP_TP_WINDOW_OFFSET_X__MASK			0x00007fff
+#define A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT			0
+static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_X__MASK;
+}
+#define A6XX_SP_TP_WINDOW_OFFSET_Y__MASK			0x7fff0000
+#define A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT			16
+static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_BIN_CONTROL				0x000080a1
+#define A6XX_GRAS_BIN_CONTROL_BINW__MASK			0x000000ff
+#define A6XX_GRAS_BIN_CONTROL_BINW__SHIFT			0
+static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINW(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_GRAS_BIN_CONTROL_BINW__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINW__MASK;
+}
+#define A6XX_GRAS_BIN_CONTROL_BINH__MASK			0x0001ff00
+#define A6XX_GRAS_BIN_CONTROL_BINH__SHIFT			8
+static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_GRAS_BIN_CONTROL_BINH__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINH__MASK;
+}
+#define A6XX_GRAS_BIN_CONTROL_BINNING_PASS			0x00040000
+#define A6XX_GRAS_BIN_CONTROL_USE_VIZ				0x00200000
+
+#define REG_A6XX_RB_BIN_CONTROL2				0x000088d3
+#define A6XX_RB_BIN_CONTROL2_BINW__MASK				0x000000ff
+#define A6XX_RB_BIN_CONTROL2_BINW__SHIFT			0
+static inline uint32_t A6XX_RB_BIN_CONTROL2_BINW(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_RB_BIN_CONTROL2_BINW__SHIFT) & A6XX_RB_BIN_CONTROL2_BINW__MASK;
+}
+#define A6XX_RB_BIN_CONTROL2_BINH__MASK				0x0001ff00
+#define A6XX_RB_BIN_CONTROL2_BINH__SHIFT			8
+static inline uint32_t A6XX_RB_BIN_CONTROL2_BINH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_RB_BIN_CONTROL2_BINH__SHIFT) & A6XX_RB_BIN_CONTROL2_BINH__MASK;
+}
+
+#define REG_A6XX_VSC_BIN_SIZE					0x00000c02
+#define A6XX_VSC_BIN_SIZE_WIDTH__MASK				0x000000ff
+#define A6XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
+static inline uint32_t A6XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A6XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A6XX_VSC_BIN_SIZE_HEIGHT__MASK				0x0001ff00
+#define A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT				8
+static inline uint32_t A6XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A6XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A6XX_VSC_SIZE_ADDRESS_LO				0x00000c03
+
+#define REG_A6XX_VSC_SIZE_ADDRESS_HI				0x00000c04
+
+#define REG_A6XX_VSC_BIN_COUNT					0x00000c06
+#define A6XX_VSC_BIN_COUNT_NX__MASK				0x000007fe
+#define A6XX_VSC_BIN_COUNT_NX__SHIFT				1
+static inline uint32_t A6XX_VSC_BIN_COUNT_NX(uint32_t val)
+{
+	return ((val) << A6XX_VSC_BIN_COUNT_NX__SHIFT) & A6XX_VSC_BIN_COUNT_NX__MASK;
+}
+#define A6XX_VSC_BIN_COUNT_NY__MASK				0x001ff800
+#define A6XX_VSC_BIN_COUNT_NY__SHIFT				11
+static inline uint32_t A6XX_VSC_BIN_COUNT_NY(uint32_t val)
+{
+	return ((val) << A6XX_VSC_BIN_COUNT_NY__SHIFT) & A6XX_VSC_BIN_COUNT_NY__MASK;
+}
+
+static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+#define A6XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
+#define A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
+static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
+{
+	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_X__MASK;
+}
+#define A6XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
+#define A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
+static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
+{
+	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_Y__MASK;
+}
+#define A6XX_VSC_PIPE_CONFIG_REG_W__MASK			0x03f00000
+#define A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
+static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
+{
+	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_W__MASK;
+}
+#define A6XX_VSC_PIPE_CONFIG_REG_H__MASK			0xfc000000
+#define A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT			26
+static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
+{
+	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_H__MASK;
+}
+
+#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO			0x00000c30
+
+#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_HI			0x00000c31
+
+#define REG_A6XX_VSC_PIPE_DATA2_PITCH				0x00000c32
+
+#define REG_A6XX_VSC_PIPE_DATA2_ARRAY_PITCH			0x00000c33
+#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK			0xffffffff
+#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO			0x00000c34
+
+#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_HI			0x00000c35
+
+#define REG_A6XX_VSC_PIPE_DATA_PITCH				0x00000c36
+
+#define REG_A6XX_VSC_PIPE_DATA_ARRAY_PITCH			0x00000c37
+#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK			0xffffffff
+#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A6XX_VSC_PIPE_DATA_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK;
+}
+
+static inline uint32_t REG_A6XX_VSC_SIZE(uint32_t i0) { return 0x00000c78 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VSC_SIZE_REG(uint32_t i0) { return 0x00000c78 + 0x1*i0; }
+
+#define REG_A6XX_UCHE_UNKNOWN_0E12				0x00000e12
+
+#define REG_A6XX_GRAS_UNKNOWN_8000				0x00008000
+
+#define REG_A6XX_GRAS_UNKNOWN_8001				0x00008001
+
+#define REG_A6XX_GRAS_UNKNOWN_8004				0x00008004
+
+#define REG_A6XX_GRAS_CNTL					0x00008005
+#define A6XX_GRAS_CNTL_VARYING					0x00000001
+#define A6XX_GRAS_CNTL_UNK3					0x00000008
+#define A6XX_GRAS_CNTL_XCOORD					0x00000040
+#define A6XX_GRAS_CNTL_YCOORD					0x00000080
+#define A6XX_GRAS_CNTL_ZCOORD					0x00000100
+#define A6XX_GRAS_CNTL_WCOORD					0x00000200
+
+#define REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ			0x00008006
+#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK		0x000003ff
+#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT		0
+static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK;
+}
+#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK		0x000ffc00
+#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT		10
+static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_XOFFSET_0			0x00008010
+#define A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_XOFFSET_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_XSCALE_0				0x00008011
+#define A6XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_XSCALE_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_XSCALE_0__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_YOFFSET_0			0x00008012
+#define A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_YOFFSET_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_YSCALE_0				0x00008013
+#define A6XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_YSCALE_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_YSCALE_0__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_ZOFFSET_0			0x00008014
+#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
+}
+
+#define REG_A6XX_GRAS_CL_VPORT_ZSCALE_0				0x00008015
+#define A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
+#define A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
+static inline uint32_t A6XX_GRAS_CL_VPORT_ZSCALE_0(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_CNTL					0x00008090
+#define A6XX_GRAS_SU_CNTL_CULL_FRONT				0x00000001
+#define A6XX_GRAS_SU_CNTL_CULL_BACK				0x00000002
+#define A6XX_GRAS_SU_CNTL_FRONT_CW				0x00000004
+#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK			0x000007f8
+#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT			3
+static inline uint32_t A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val)
+{
+	return ((((int32_t)(val * 4.0))) << A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
+}
+#define A6XX_GRAS_SU_CNTL_POLY_OFFSET				0x00000800
+#define A6XX_GRAS_SU_CNTL_MSAA_ENABLE				0x00002000
+
+#define REG_A6XX_GRAS_SU_POINT_MINMAX				0x00008091
+#define A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_POINT_SIZE				0x00008092
+#define A6XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
+#define A6XX_GRAS_SU_POINT_SIZE__SHIFT				0
+static inline uint32_t A6XX_GRAS_SU_POINT_SIZE(float val)
+{
+	return ((((int32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_SIZE__SHIFT) & A6XX_GRAS_SU_POINT_SIZE__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL			0x00008094
+#define A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z		0x00000001
+
+#define REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE			0x00008095
+#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
+#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
+static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET			0x00008096
+#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
+#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
+static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP		0x00008097
+#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK		0xffffffff
+#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT		0
+static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val)
+{
+	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK;
+}
+
+#define REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO			0x00008098
+#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK	0x00000007
+#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT	0
+static inline uint32_t A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val)
+{
+	return ((val) << A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
+}
+
+#define REG_A6XX_GRAS_UNKNOWN_8099				0x00008099
+
+#define REG_A6XX_GRAS_UNKNOWN_809B				0x0000809b
+
+#define REG_A6XX_GRAS_UNKNOWN_80A0				0x000080a0
+
+#define REG_A6XX_GRAS_RAS_MSAA_CNTL				0x000080a2
+#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A6XX_GRAS_DEST_MSAA_CNTL				0x000080a3
+#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
+
+#define REG_A6XX_GRAS_UNKNOWN_80A4				0x000080a4
+
+#define REG_A6XX_GRAS_UNKNOWN_80A5				0x000080a5
+
+#define REG_A6XX_GRAS_UNKNOWN_80A6				0x000080a6
+
+#define REG_A6XX_GRAS_UNKNOWN_80AF				0x000080af
+
+#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0			0x000080b0
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK		0x00007fff
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT		0
+static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK;
+}
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK		0x7fff0000
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT		16
+static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0			0x000080b1
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK		0x00007fff
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT		0
+static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK;
+}
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK		0x7fff0000
+#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT		16
+static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0			0x000080d0
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK		0x00007fff
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT		0
+static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK;
+}
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK		0x7fff0000
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT		16
+static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0			0x000080d1
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK		0x00007fff
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT		0
+static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK;
+}
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK		0x7fff0000
+#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT		16
+static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL			0x000080f0
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_BR			0x000080f1
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_LRZ_CNTL					0x00008100
+#define A6XX_GRAS_LRZ_CNTL_ENABLE				0x00000001
+#define A6XX_GRAS_LRZ_CNTL_LRZ_WRITE				0x00000002
+#define A6XX_GRAS_LRZ_CNTL_GREATER				0x00000004
+#define A6XX_GRAS_LRZ_CNTL_UNK3					0x00000008
+#define A6XX_GRAS_LRZ_CNTL_UNK4					0x00000010
+
+#define REG_A6XX_GRAS_UNKNOWN_8101				0x00008101
+
+#define REG_A6XX_GRAS_2D_BLIT_INFO				0x00008102
+#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK		0x000000ff
+#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK;
+}
+
+#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO			0x00008103
+
+#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_HI			0x00008104
+
+#define REG_A6XX_GRAS_LRZ_BUFFER_PITCH				0x00008105
+#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK			0x000007ff
+#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT			0
+static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK;
+}
+#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK		0x003ff800
+#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT		11
+static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO		0x00008106
+
+#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI		0x00008107
+
+#define REG_A6XX_GRAS_UNKNOWN_8109				0x00008109
+
+#define REG_A6XX_GRAS_UNKNOWN_8110				0x00008110
+
+#define REG_A6XX_GRAS_2D_BLIT_CNTL				0x00008400
+#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK		0x0000ff00
+#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT		8
+static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK;
+}
+#define A6XX_GRAS_2D_BLIT_CNTL_SCISSOR				0x00010000
+#define A6XX_GRAS_2D_BLIT_CNTL_IFMT__MASK			0x1f000000
+#define A6XX_GRAS_2D_BLIT_CNTL_IFMT__SHIFT			24
+static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_IFMT(enum a6xx_2d_ifmt val)
+{
+	return ((val) << A6XX_GRAS_2D_BLIT_CNTL_IFMT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_IFMT__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_SRC_TL_X				0x00008401
+#define A6XX_GRAS_2D_SRC_TL_X_X__MASK				0x00ffff00
+#define A6XX_GRAS_2D_SRC_TL_X_X__SHIFT				8
+static inline uint32_t A6XX_GRAS_2D_SRC_TL_X_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_SRC_TL_X_X__SHIFT) & A6XX_GRAS_2D_SRC_TL_X_X__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_SRC_BR_X				0x00008402
+#define A6XX_GRAS_2D_SRC_BR_X_X__MASK				0x00ffff00
+#define A6XX_GRAS_2D_SRC_BR_X_X__SHIFT				8
+static inline uint32_t A6XX_GRAS_2D_SRC_BR_X_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_SRC_BR_X_X__SHIFT) & A6XX_GRAS_2D_SRC_BR_X_X__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_SRC_TL_Y				0x00008403
+#define A6XX_GRAS_2D_SRC_TL_Y_Y__MASK				0x00ffff00
+#define A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT				8
+static inline uint32_t A6XX_GRAS_2D_SRC_TL_Y_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_TL_Y_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_SRC_BR_Y				0x00008404
+#define A6XX_GRAS_2D_SRC_BR_Y_Y__MASK				0x00ffff00
+#define A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT				8
+static inline uint32_t A6XX_GRAS_2D_SRC_BR_Y_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_BR_Y_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_DST_TL					0x00008405
+#define A6XX_GRAS_2D_DST_TL_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_GRAS_2D_DST_TL_X__MASK				0x00007fff
+#define A6XX_GRAS_2D_DST_TL_X__SHIFT				0
+static inline uint32_t A6XX_GRAS_2D_DST_TL_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_DST_TL_X__SHIFT) & A6XX_GRAS_2D_DST_TL_X__MASK;
+}
+#define A6XX_GRAS_2D_DST_TL_Y__MASK				0x7fff0000
+#define A6XX_GRAS_2D_DST_TL_Y__SHIFT				16
+static inline uint32_t A6XX_GRAS_2D_DST_TL_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_DST_TL_Y__SHIFT) & A6XX_GRAS_2D_DST_TL_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_2D_DST_BR					0x00008406
+#define A6XX_GRAS_2D_DST_BR_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_GRAS_2D_DST_BR_X__MASK				0x00007fff
+#define A6XX_GRAS_2D_DST_BR_X__SHIFT				0
+static inline uint32_t A6XX_GRAS_2D_DST_BR_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_DST_BR_X__SHIFT) & A6XX_GRAS_2D_DST_BR_X__MASK;
+}
+#define A6XX_GRAS_2D_DST_BR_Y__MASK				0x7fff0000
+#define A6XX_GRAS_2D_DST_BR_Y__SHIFT				16
+static inline uint32_t A6XX_GRAS_2D_DST_BR_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_2D_DST_BR_Y__SHIFT) & A6XX_GRAS_2D_DST_BR_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_RESOLVE_CNTL_1				0x0000840a
+#define A6XX_GRAS_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_GRAS_RESOLVE_CNTL_1_X__MASK			0x00007fff
+#define A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT			0
+static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_X__MASK;
+}
+#define A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK			0x7fff0000
+#define A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT			16
+static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_RESOLVE_CNTL_2				0x0000840b
+#define A6XX_GRAS_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_GRAS_RESOLVE_CNTL_2_X__MASK			0x00007fff
+#define A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT			0
+static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_X(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_X__MASK;
+}
+#define A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK			0x7fff0000
+#define A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT			16
+static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_Y(uint32_t val)
+{
+	return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK;
+}
+
+#define REG_A6XX_GRAS_UNKNOWN_8600				0x00008600
+
+#define REG_A6XX_RB_BIN_CONTROL					0x00008800
+#define A6XX_RB_BIN_CONTROL_BINW__MASK				0x000000ff
+#define A6XX_RB_BIN_CONTROL_BINW__SHIFT				0
+static inline uint32_t A6XX_RB_BIN_CONTROL_BINW(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_RB_BIN_CONTROL_BINW__SHIFT) & A6XX_RB_BIN_CONTROL_BINW__MASK;
+}
+#define A6XX_RB_BIN_CONTROL_BINH__MASK				0x0001ff00
+#define A6XX_RB_BIN_CONTROL_BINH__SHIFT				8
+static inline uint32_t A6XX_RB_BIN_CONTROL_BINH(uint32_t val)
+{
+	assert(!(val & 0xf));
+	return ((val >> 4) << A6XX_RB_BIN_CONTROL_BINH__SHIFT) & A6XX_RB_BIN_CONTROL_BINH__MASK;
+}
+#define A6XX_RB_BIN_CONTROL_BINNING_PASS			0x00040000
+#define A6XX_RB_BIN_CONTROL_USE_VIZ				0x00200000
+
+#define REG_A6XX_RB_RENDER_CNTL					0x00008801
+#define A6XX_RB_RENDER_CNTL_UNK4				0x00000010
+#define A6XX_RB_RENDER_CNTL_BINNING				0x00000080
+#define A6XX_RB_RENDER_CNTL_FLAG_DEPTH				0x00004000
+#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK			0x00ff0000
+#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT			16
+static inline uint32_t A6XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK;
+}
+
+#define REG_A6XX_RB_RAS_MSAA_CNTL				0x00008802
+#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A6XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A6XX_RB_DEST_MSAA_CNTL				0x00008803
+#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A6XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
+
+#define REG_A6XX_RB_UNKNOWN_8804				0x00008804
+
+#define REG_A6XX_RB_UNKNOWN_8805				0x00008805
+
+#define REG_A6XX_RB_UNKNOWN_8806				0x00008806
+
+#define REG_A6XX_RB_RENDER_CONTROL0				0x00008809
+#define A6XX_RB_RENDER_CONTROL0_VARYING				0x00000001
+#define A6XX_RB_RENDER_CONTROL0_UNK3				0x00000008
+#define A6XX_RB_RENDER_CONTROL0_XCOORD				0x00000040
+#define A6XX_RB_RENDER_CONTROL0_YCOORD				0x00000080
+#define A6XX_RB_RENDER_CONTROL0_ZCOORD				0x00000100
+#define A6XX_RB_RENDER_CONTROL0_WCOORD				0x00000200
+#define A6XX_RB_RENDER_CONTROL0_UNK10				0x00000400
+
+#define REG_A6XX_RB_RENDER_CONTROL1				0x0000880a
+#define A6XX_RB_RENDER_CONTROL1_SAMPLEMASK			0x00000001
+#define A6XX_RB_RENDER_CONTROL1_FACENESS			0x00000002
+#define A6XX_RB_RENDER_CONTROL1_SAMPLEID			0x00000008
+
+#define REG_A6XX_RB_FS_OUTPUT_CNTL0				0x0000880b
+#define A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z			0x00000002
+
+#define REG_A6XX_RB_FS_OUTPUT_CNTL1				0x0000880c
+#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK			0x0000000f
+#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT			0
+static inline uint32_t A6XX_RB_FS_OUTPUT_CNTL1_MRT(uint32_t val)
+{
+	return ((val) << A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK;
+}
+
+#define REG_A6XX_RB_RENDER_COMPONENTS				0x0000880d
+#define A6XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
+#define A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT0__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
+#define A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT1__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
+#define A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT2__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
+#define A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT3__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
+#define A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT4__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
+#define A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT5__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
+#define A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT6__MASK;
+}
+#define A6XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
+#define A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
+static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
+{
+	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT7__MASK;
+}
+
+#define REG_A6XX_RB_DITHER_CNTL					0x0000880e
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK		0x00000003
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT		0
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK		0x0000000c
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT		2
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK		0x00000030
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT		4
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK		0x000000c0
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT		6
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK		0x00000300
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT		8
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK		0x00000c00
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT		10
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK		0x00001000
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT		12
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK;
+}
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK		0x0000c000
+#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT		14
+static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK;
+}
+
+#define REG_A6XX_RB_SRGB_CNTL					0x0000880f
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT0				0x00000001
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT1				0x00000002
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT2				0x00000004
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT3				0x00000008
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT4				0x00000010
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT5				0x00000020
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT6				0x00000040
+#define A6XX_RB_SRGB_CNTL_SRGB_MRT7				0x00000080
+
+#define REG_A6XX_RB_UNKNOWN_8810				0x00008810
+
+#define REG_A6XX_RB_UNKNOWN_8811				0x00008811
+
+#define REG_A6XX_RB_UNKNOWN_8818				0x00008818
+
+#define REG_A6XX_RB_UNKNOWN_8819				0x00008819
+
+#define REG_A6XX_RB_UNKNOWN_881A				0x0000881a
+
+#define REG_A6XX_RB_UNKNOWN_881B				0x0000881b
+
+#define REG_A6XX_RB_UNKNOWN_881C				0x0000881c
+
+#define REG_A6XX_RB_UNKNOWN_881D				0x0000881d
+
+#define REG_A6XX_RB_UNKNOWN_881E				0x0000881e
+
+static inline uint32_t REG_A6XX_RB_MRT(uint32_t i0) { return 0x00008820 + 0x8*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_CONTROL(uint32_t i0) { return 0x00008820 + 0x8*i0; }
+#define A6XX_RB_MRT_CONTROL_BLEND				0x00000001
+#define A6XX_RB_MRT_CONTROL_BLEND2				0x00000002
+#define A6XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000004
+#define A6XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000078
+#define A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			3
+static inline uint32_t A6XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A6XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x00000780
+#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		7
+static inline uint32_t A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+static inline uint32_t REG_A6XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x00008821 + 0x8*i0; }
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
+#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
+#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
+static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+
+static inline uint32_t REG_A6XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x00008822 + 0x8*i0; }
+#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x000000ff
+#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x00000300
+#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		8
+static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a6xx_tile_mode val)
+{
+	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00006000
+#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			13
+static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+
+static inline uint32_t REG_A6XX_RB_MRT_PITCH(uint32_t i0) { return 0x00008823 + 0x8*i0; }
+#define A6XX_RB_MRT_PITCH__MASK					0xffffffff
+#define A6XX_RB_MRT_PITCH__SHIFT				0
+static inline uint32_t A6XX_RB_MRT_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_MRT_PITCH__SHIFT) & A6XX_RB_MRT_PITCH__MASK;
+}
+
+static inline uint32_t REG_A6XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x00008824 + 0x8*i0; }
+#define A6XX_RB_MRT_ARRAY_PITCH__MASK				0xffffffff
+#define A6XX_RB_MRT_ARRAY_PITCH__SHIFT				0
+static inline uint32_t A6XX_RB_MRT_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_MRT_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_ARRAY_PITCH__MASK;
+}
+
+static inline uint32_t REG_A6XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x00008825 + 0x8*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x00008826 + 0x8*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_BASE_GMEM(uint32_t i0) { return 0x00008827 + 0x8*i0; }
+
+#define REG_A6XX_RB_BLEND_RED_F32				0x00008860
+#define A6XX_RB_BLEND_RED_F32__MASK				0xffffffff
+#define A6XX_RB_BLEND_RED_F32__SHIFT				0
+static inline uint32_t A6XX_RB_BLEND_RED_F32(float val)
+{
+	return ((fui(val)) << A6XX_RB_BLEND_RED_F32__SHIFT) & A6XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A6XX_RB_BLEND_GREEN_F32				0x00008861
+#define A6XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
+#define A6XX_RB_BLEND_GREEN_F32__SHIFT				0
+static inline uint32_t A6XX_RB_BLEND_GREEN_F32(float val)
+{
+	return ((fui(val)) << A6XX_RB_BLEND_GREEN_F32__SHIFT) & A6XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A6XX_RB_BLEND_BLUE_F32				0x00008862
+#define A6XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
+#define A6XX_RB_BLEND_BLUE_F32__SHIFT				0
+static inline uint32_t A6XX_RB_BLEND_BLUE_F32(float val)
+{
+	return ((fui(val)) << A6XX_RB_BLEND_BLUE_F32__SHIFT) & A6XX_RB_BLEND_BLUE_F32__MASK;
+}
+
+#define REG_A6XX_RB_BLEND_ALPHA_F32				0x00008863
+#define A6XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
+#define A6XX_RB_BLEND_ALPHA_F32__SHIFT				0
+static inline uint32_t A6XX_RB_BLEND_ALPHA_F32(float val)
+{
+	return ((fui(val)) << A6XX_RB_BLEND_ALPHA_F32__SHIFT) & A6XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
+#define REG_A6XX_RB_ALPHA_CONTROL				0x00008864
+#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
+#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
+static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
+{
+	return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
+}
+#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
+#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
+#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
+static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A6XX_RB_BLEND_CNTL					0x00008865
+#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK			0x000000ff
+#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT			0
+static inline uint32_t A6XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
+}
+#define A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND			0x00000100
+#define A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
+#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK			0xffff0000
+#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT			16
+static inline uint32_t A6XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A6XX_RB_DEPTH_PLANE_CNTL				0x00008870
+#define A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z			0x00000001
+
+#define REG_A6XX_RB_DEPTH_CNTL					0x00008871
+#define A6XX_RB_DEPTH_CNTL_Z_ENABLE				0x00000001
+#define A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE			0x00000002
+#define A6XX_RB_DEPTH_CNTL_ZFUNC__MASK				0x0000001c
+#define A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT				2
+static inline uint32_t A6XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
+}
+#define A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE			0x00000040
+
+#define REG_A6XX_RB_DEPTH_BUFFER_INFO				0x00008872
+#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK		0x00000007
+#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT		0
+static inline uint32_t A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val)
+{
+	return ((val) << A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
+}
+
+#define REG_A6XX_RB_DEPTH_BUFFER_PITCH				0x00008873
+#define A6XX_RB_DEPTH_BUFFER_PITCH__MASK			0xffffffff
+#define A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT			0
+static inline uint32_t A6XX_RB_DEPTH_BUFFER_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH			0x00008874
+#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK			0xffffffff
+#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_DEPTH_BUFFER_BASE_LO			0x00008875
+
+#define REG_A6XX_RB_DEPTH_BUFFER_BASE_HI			0x00008876
+
+#define REG_A6XX_RB_DEPTH_BUFFER_BASE_GMEM			0x00008877
+
+#define REG_A6XX_RB_UNKNOWN_8878				0x00008878
+
+#define REG_A6XX_RB_UNKNOWN_8879				0x00008879
+
+#define REG_A6XX_RB_STENCIL_CONTROL				0x00008880
+#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
+#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
+#define A6XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
+#define A6XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
+#define A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
+#define A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
+#define A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
+#define A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
+#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
+#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
+#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
+#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
+static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A6XX_RB_STENCIL_INFO				0x00008881
+#define A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
+
+#define REG_A6XX_RB_STENCIL_BUFFER_PITCH			0x00008882
+#define A6XX_RB_STENCIL_BUFFER_PITCH__MASK			0xffffffff
+#define A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT			0
+static inline uint32_t A6XX_RB_STENCIL_BUFFER_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH			0x00008883
+#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK		0xffffffff
+#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT		0
+static inline uint32_t A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_STENCIL_BUFFER_BASE_LO			0x00008884
+
+#define REG_A6XX_RB_STENCIL_BUFFER_BASE_HI			0x00008885
+
+#define REG_A6XX_RB_STENCIL_BUFFER_BASE_GMEM			0x00008886
+
+#define REG_A6XX_RB_STENCILREF					0x00008887
+#define A6XX_RB_STENCILREF_REF__MASK				0x000000ff
+#define A6XX_RB_STENCILREF_REF__SHIFT				0
+static inline uint32_t A6XX_RB_STENCILREF_REF(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILREF_REF__SHIFT) & A6XX_RB_STENCILREF_REF__MASK;
+}
+#define A6XX_RB_STENCILREF_BFREF__MASK				0x0000ff00
+#define A6XX_RB_STENCILREF_BFREF__SHIFT				8
+static inline uint32_t A6XX_RB_STENCILREF_BFREF(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILREF_BFREF__SHIFT) & A6XX_RB_STENCILREF_BFREF__MASK;
+}
+
+#define REG_A6XX_RB_STENCILMASK					0x00008888
+#define A6XX_RB_STENCILMASK_MASK__MASK				0x000000ff
+#define A6XX_RB_STENCILMASK_MASK__SHIFT				0
+static inline uint32_t A6XX_RB_STENCILMASK_MASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILMASK_MASK__SHIFT) & A6XX_RB_STENCILMASK_MASK__MASK;
+}
+#define A6XX_RB_STENCILMASK_BFMASK__MASK			0x0000ff00
+#define A6XX_RB_STENCILMASK_BFMASK__SHIFT			8
+static inline uint32_t A6XX_RB_STENCILMASK_BFMASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILMASK_BFMASK__SHIFT) & A6XX_RB_STENCILMASK_BFMASK__MASK;
+}
+
+#define REG_A6XX_RB_STENCILWRMASK				0x00008889
+#define A6XX_RB_STENCILWRMASK_WRMASK__MASK			0x000000ff
+#define A6XX_RB_STENCILWRMASK_WRMASK__SHIFT			0
+static inline uint32_t A6XX_RB_STENCILWRMASK_WRMASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILWRMASK_WRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_WRMASK__MASK;
+}
+#define A6XX_RB_STENCILWRMASK_BFWRMASK__MASK			0x0000ff00
+#define A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT			8
+static inline uint32_t A6XX_RB_STENCILWRMASK_BFWRMASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_BFWRMASK__MASK;
+}
+
+#define REG_A6XX_RB_WINDOW_OFFSET				0x00008890
+#define A6XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_RB_WINDOW_OFFSET_X__MASK				0x00007fff
+#define A6XX_RB_WINDOW_OFFSET_X__SHIFT				0
+static inline uint32_t A6XX_RB_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A6XX_RB_WINDOW_OFFSET_X__SHIFT) & A6XX_RB_WINDOW_OFFSET_X__MASK;
+}
+#define A6XX_RB_WINDOW_OFFSET_Y__MASK				0x7fff0000
+#define A6XX_RB_WINDOW_OFFSET_Y__SHIFT				16
+static inline uint32_t A6XX_RB_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A6XX_RB_WINDOW_OFFSET_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A6XX_RB_SAMPLE_COUNT_CONTROL			0x00008891
+#define A6XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
+
+#define REG_A6XX_RB_LRZ_CNTL					0x00008898
+#define A6XX_RB_LRZ_CNTL_ENABLE					0x00000001
+
+#define REG_A6XX_RB_UNKNOWN_88D0				0x000088d0
+
+#define REG_A6XX_RB_BLIT_SCISSOR_TL				0x000088d1
+#define A6XX_RB_BLIT_SCISSOR_TL_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_RB_BLIT_SCISSOR_TL_X__MASK				0x00007fff
+#define A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_X__MASK;
+}
+#define A6XX_RB_BLIT_SCISSOR_TL_Y__MASK				0x7fff0000
+#define A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A6XX_RB_BLIT_SCISSOR_BR				0x000088d2
+#define A6XX_RB_BLIT_SCISSOR_BR_WINDOW_OFFSET_DISABLE		0x80000000
+#define A6XX_RB_BLIT_SCISSOR_BR_X__MASK				0x00007fff
+#define A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_X__MASK;
+}
+#define A6XX_RB_BLIT_SCISSOR_BR_Y__MASK				0x7fff0000
+#define A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A6XX_RB_MSAA_CNTL					0x000088d5
+#define A6XX_RB_MSAA_CNTL_SAMPLES__MASK				0x00000018
+#define A6XX_RB_MSAA_CNTL_SAMPLES__SHIFT			3
+static inline uint32_t A6XX_RB_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_RB_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A6XX_RB_BLIT_BASE_GMEM				0x000088d6
+
+#define REG_A6XX_RB_BLIT_DST_INFO				0x000088d7
+#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK			0x00000003
+#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT			0
+static inline uint32_t A6XX_RB_BLIT_DST_INFO_TILE_MODE(enum a6xx_tile_mode val)
+{
+	return ((val) << A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK;
+}
+#define A6XX_RB_BLIT_DST_INFO_FLAGS				0x00000004
+#define A6XX_RB_BLIT_DST_INFO_SAMPLES__MASK			0x00000018
+#define A6XX_RB_BLIT_DST_INFO_SAMPLES__SHIFT			3
+static inline uint32_t A6XX_RB_BLIT_DST_INFO_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_RB_BLIT_DST_INFO_SAMPLES__SHIFT) & A6XX_RB_BLIT_DST_INFO_SAMPLES__MASK;
+}
+#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK		0x00007f80
+#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT		7
+static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK;
+}
+#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK			0x00000060
+#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT			5
+static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK;
+}
+
+#define REG_A6XX_RB_BLIT_DST_LO					0x000088d8
+
+#define REG_A6XX_RB_BLIT_DST_HI					0x000088d9
+
+#define REG_A6XX_RB_BLIT_DST_PITCH				0x000088da
+#define A6XX_RB_BLIT_DST_PITCH__MASK				0xffffffff
+#define A6XX_RB_BLIT_DST_PITCH__SHIFT				0
+static inline uint32_t A6XX_RB_BLIT_DST_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_BLIT_DST_PITCH__SHIFT) & A6XX_RB_BLIT_DST_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_BLIT_DST_ARRAY_PITCH			0x000088db
+#define A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK			0xffffffff
+#define A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A6XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_BLIT_FLAG_DST_LO				0x000088dc
+
+#define REG_A6XX_RB_BLIT_FLAG_DST_HI				0x000088dd
+
+#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0			0x000088df
+
+#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW1			0x000088e0
+
+#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW2			0x000088e1
+
+#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW3			0x000088e2
+
+#define REG_A6XX_RB_BLIT_INFO					0x000088e3
+#define A6XX_RB_BLIT_INFO_UNK0					0x00000001
+#define A6XX_RB_BLIT_INFO_GMEM					0x00000002
+#define A6XX_RB_BLIT_INFO_INTEGER				0x00000004
+#define A6XX_RB_BLIT_INFO_DEPTH					0x00000008
+#define A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK			0x000000f0
+#define A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT			4
+static inline uint32_t A6XX_RB_BLIT_INFO_CLEAR_MASK(uint32_t val)
+{
+	return ((val) << A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT) & A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK;
+}
+
+#define REG_A6XX_RB_UNKNOWN_88F0				0x000088f0
+
+#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO			0x00008900
+
+#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_HI			0x00008901
+
+#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_PITCH			0x00008902
+
+static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x00008903 + 0x3*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x00008903 + 0x3*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x00008904 + 0x3*i0; }
+
+static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x00008905 + 0x3*i0; }
+#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK		0x000007ff
+#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT		0
+static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK;
+}
+#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK		0x003ff800
+#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT	11
+static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO			0x00008927
+
+#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_HI			0x00008928
+
+#define REG_A6XX_RB_2D_BLIT_CNTL				0x00008c00
+#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK			0x0000ff00
+#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT		8
+static inline uint32_t A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK;
+}
+#define A6XX_RB_2D_BLIT_CNTL_SCISSOR				0x00010000
+#define A6XX_RB_2D_BLIT_CNTL_IFMT__MASK				0x1f000000
+#define A6XX_RB_2D_BLIT_CNTL_IFMT__SHIFT			24
+static inline uint32_t A6XX_RB_2D_BLIT_CNTL_IFMT(enum a6xx_2d_ifmt val)
+{
+	return ((val) << A6XX_RB_2D_BLIT_CNTL_IFMT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_IFMT__MASK;
+}
+
+#define REG_A6XX_RB_UNKNOWN_8C01				0x00008c01
+
+#define REG_A6XX_RB_2D_DST_INFO					0x00008c17
+#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK			0x000000ff
+#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT			0
+static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK;
+}
+#define A6XX_RB_2D_DST_INFO_TILE_MODE__MASK			0x00000300
+#define A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A6XX_RB_2D_DST_INFO_TILE_MODE(enum a6xx_tile_mode val)
+{
+	return ((val) << A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_2D_DST_INFO_TILE_MODE__MASK;
+}
+#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK;
+}
+#define A6XX_RB_2D_DST_INFO_FLAGS				0x00001000
+
+#define REG_A6XX_RB_2D_DST_LO					0x00008c18
+
+#define REG_A6XX_RB_2D_DST_HI					0x00008c19
+
+#define REG_A6XX_RB_2D_DST_SIZE					0x00008c1a
+#define A6XX_RB_2D_DST_SIZE_PITCH__MASK				0x0000ffff
+#define A6XX_RB_2D_DST_SIZE_PITCH__SHIFT			0
+static inline uint32_t A6XX_RB_2D_DST_SIZE_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A6XX_RB_2D_DST_SIZE_PITCH__MASK;
+}
+
+#define REG_A6XX_RB_2D_DST_FLAGS_LO				0x00008c20
+
+#define REG_A6XX_RB_2D_DST_FLAGS_HI				0x00008c21
+
+#define REG_A6XX_RB_2D_SRC_SOLID_C0				0x00008c2c
+
+#define REG_A6XX_RB_2D_SRC_SOLID_C1				0x00008c2d
+
+#define REG_A6XX_RB_2D_SRC_SOLID_C2				0x00008c2e
+
+#define REG_A6XX_RB_2D_SRC_SOLID_C3				0x00008c2f
+
+#define REG_A6XX_RB_UNKNOWN_8E01				0x00008e01
+
+#define REG_A6XX_RB_UNKNOWN_8E04				0x00008e04
+
+#define REG_A6XX_RB_CCU_CNTL					0x00008e07
+
+#define REG_A6XX_VPC_UNKNOWN_9101				0x00009101
+
+#define REG_A6XX_VPC_GS_SIV_CNTL				0x00009104
+
+#define REG_A6XX_VPC_UNKNOWN_9107				0x00009107
+
+#define REG_A6XX_VPC_UNKNOWN_9108				0x00009108
+
+static inline uint32_t REG_A6XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00009200 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00009200 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00009208 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00009208 + 0x1*i0; }
+
+#define REG_A6XX_VPC_UNKNOWN_9210				0x00009210
+
+#define REG_A6XX_VPC_UNKNOWN_9211				0x00009211
+
+static inline uint32_t REG_A6XX_VPC_VAR(uint32_t i0) { return 0x00009212 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x00009212 + 0x1*i0; }
+
+#define REG_A6XX_VPC_SO_CNTL					0x00009216
+#define A6XX_VPC_SO_CNTL_ENABLE					0x00010000
+
+#define REG_A6XX_VPC_SO_PROG					0x00009217
+#define A6XX_VPC_SO_PROG_A_BUF__MASK				0x00000003
+#define A6XX_VPC_SO_PROG_A_BUF__SHIFT				0
+static inline uint32_t A6XX_VPC_SO_PROG_A_BUF(uint32_t val)
+{
+	return ((val) << A6XX_VPC_SO_PROG_A_BUF__SHIFT) & A6XX_VPC_SO_PROG_A_BUF__MASK;
+}
+#define A6XX_VPC_SO_PROG_A_OFF__MASK				0x000007fc
+#define A6XX_VPC_SO_PROG_A_OFF__SHIFT				2
+static inline uint32_t A6XX_VPC_SO_PROG_A_OFF(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_VPC_SO_PROG_A_OFF__SHIFT) & A6XX_VPC_SO_PROG_A_OFF__MASK;
+}
+#define A6XX_VPC_SO_PROG_A_EN					0x00000800
+#define A6XX_VPC_SO_PROG_B_BUF__MASK				0x00003000
+#define A6XX_VPC_SO_PROG_B_BUF__SHIFT				12
+static inline uint32_t A6XX_VPC_SO_PROG_B_BUF(uint32_t val)
+{
+	return ((val) << A6XX_VPC_SO_PROG_B_BUF__SHIFT) & A6XX_VPC_SO_PROG_B_BUF__MASK;
+}
+#define A6XX_VPC_SO_PROG_B_OFF__MASK				0x007fc000
+#define A6XX_VPC_SO_PROG_B_OFF__SHIFT				14
+static inline uint32_t A6XX_VPC_SO_PROG_B_OFF(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_VPC_SO_PROG_B_OFF__SHIFT) & A6XX_VPC_SO_PROG_B_OFF__MASK;
+}
+#define A6XX_VPC_SO_PROG_B_EN					0x00800000
+
+static inline uint32_t REG_A6XX_VPC_SO(uint32_t i0) { return 0x0000921a + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000921a + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000921b + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000921c + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000921d + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000921e + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000921f + 0x7*i0; }
+
+static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x00009220 + 0x7*i0; }
+
+#define REG_A6XX_VPC_UNKNOWN_9236				0x00009236
+
+#define REG_A6XX_VPC_UNKNOWN_9300				0x00009300
+
+#define REG_A6XX_VPC_PACK					0x00009301
+#define A6XX_VPC_PACK_STRIDE_IN_VPC__MASK			0x000000ff
+#define A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT			0
+static inline uint32_t A6XX_VPC_PACK_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT) & A6XX_VPC_PACK_STRIDE_IN_VPC__MASK;
+}
+#define A6XX_VPC_PACK_NUMNONPOSVAR__MASK			0x0000ff00
+#define A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT			8
+static inline uint32_t A6XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
+{
+	return ((val) << A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A6XX_VPC_PACK_NUMNONPOSVAR__MASK;
+}
+#define A6XX_VPC_PACK_PSIZELOC__MASK				0x00ff0000
+#define A6XX_VPC_PACK_PSIZELOC__SHIFT				16
+static inline uint32_t A6XX_VPC_PACK_PSIZELOC(uint32_t val)
+{
+	return ((val) << A6XX_VPC_PACK_PSIZELOC__SHIFT) & A6XX_VPC_PACK_PSIZELOC__MASK;
+}
+
+#define REG_A6XX_VPC_CNTL_0					0x00009304
+#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK			0x000000ff
+#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT			0
+static inline uint32_t A6XX_VPC_CNTL_0_NUMNONPOSVAR(uint32_t val)
+{
+	return ((val) << A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT) & A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK;
+}
+#define A6XX_VPC_CNTL_0_VARYING					0x00010000
+
+#define REG_A6XX_VPC_SO_BUF_CNTL				0x00009305
+#define A6XX_VPC_SO_BUF_CNTL_BUF0				0x00000001
+#define A6XX_VPC_SO_BUF_CNTL_BUF1				0x00000008
+#define A6XX_VPC_SO_BUF_CNTL_BUF2				0x00000040
+#define A6XX_VPC_SO_BUF_CNTL_BUF3				0x00000200
+#define A6XX_VPC_SO_BUF_CNTL_ENABLE				0x00008000
+
+#define REG_A6XX_VPC_SO_OVERRIDE				0x00009306
+#define A6XX_VPC_SO_OVERRIDE_SO_DISABLE				0x00000001
+
+#define REG_A6XX_VPC_UNKNOWN_9600				0x00009600
+
+#define REG_A6XX_VPC_UNKNOWN_9602				0x00009602
+
+#define REG_A6XX_PC_UNKNOWN_9801				0x00009801
+
+#define REG_A6XX_PC_RESTART_INDEX				0x00009803
+
+#define REG_A6XX_PC_MODE_CNTL					0x00009804
+
+#define REG_A6XX_PC_UNKNOWN_9805				0x00009805
+
+#define REG_A6XX_PC_UNKNOWN_9806				0x00009806
+
+#define REG_A6XX_PC_UNKNOWN_9980				0x00009980
+
+#define REG_A6XX_PC_UNKNOWN_9981				0x00009981
+
+#define REG_A6XX_PC_UNKNOWN_9990				0x00009990
+
+#define REG_A6XX_PC_PRIMITIVE_CNTL_0				0x00009b00
+#define A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART		0x00000001
+#define A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST		0x00000002
+
+#define REG_A6XX_PC_PRIMITIVE_CNTL_1				0x00009b01
+#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK		0x0000007f
+#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT		0
+static inline uint32_t A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT) & A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK;
+}
+#define A6XX_PC_PRIMITIVE_CNTL_1_PSIZE				0x00000100
+
+#define REG_A6XX_PC_UNKNOWN_9B06				0x00009b06
+
+#define REG_A6XX_PC_UNKNOWN_9B07				0x00009b07
+
+#define REG_A6XX_PC_TESSFACTOR_ADDR_LO				0x00009e08
+
+#define REG_A6XX_PC_TESSFACTOR_ADDR_HI				0x00009e09
+
+#define REG_A6XX_PC_UNKNOWN_9E72				0x00009e72
+
+#define REG_A6XX_VFD_CONTROL_0					0x0000a000
+#define A6XX_VFD_CONTROL_0_VTXCNT__MASK				0x0000003f
+#define A6XX_VFD_CONTROL_0_VTXCNT__SHIFT			0
+static inline uint32_t A6XX_VFD_CONTROL_0_VTXCNT(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A6XX_VFD_CONTROL_0_VTXCNT__MASK;
+}
+
+#define REG_A6XX_VFD_CONTROL_1					0x0000a001
+#define A6XX_VFD_CONTROL_1_REGID4VTX__MASK			0x000000ff
+#define A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT			0
+static inline uint32_t A6XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A6XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A6XX_VFD_CONTROL_1_REGID4INST__MASK			0x0000ff00
+#define A6XX_VFD_CONTROL_1_REGID4INST__SHIFT			8
+static inline uint32_t A6XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A6XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+#define A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK			0x00ff0000
+#define A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT			16
+static inline uint32_t A6XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK;
+}
+
+#define REG_A6XX_VFD_CONTROL_2					0x0000a002
+#define A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK			0x000000ff
+#define A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT			0
+static inline uint32_t A6XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK;
+}
+
+#define REG_A6XX_VFD_CONTROL_3					0x0000a003
+#define A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK			0x0000ff00
+#define A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT			8
+static inline uint32_t A6XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK;
+}
+#define A6XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
+#define A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
+static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A6XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
+#define A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
+static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+	return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
+
+#define REG_A6XX_VFD_CONTROL_4					0x0000a004
+
+#define REG_A6XX_VFD_CONTROL_5					0x0000a005
+
+#define REG_A6XX_VFD_CONTROL_6					0x0000a006
+
+#define REG_A6XX_VFD_MODE_CNTL					0x0000a007
+#define A6XX_VFD_MODE_CNTL_BINNING_PASS				0x00000001
+
+#define REG_A6XX_VFD_UNKNOWN_A008				0x0000a008
+
+#define REG_A6XX_VFD_UNKNOWN_A009				0x0000a009
+
+#define REG_A6XX_VFD_INDEX_OFFSET				0x0000a00e
+
+#define REG_A6XX_VFD_INSTANCE_START_OFFSET			0x0000a00f
+
+static inline uint32_t REG_A6XX_VFD_FETCH(uint32_t i0) { return 0x0000a010 + 0x4*i0; }
+
+static inline uint32_t REG_A6XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000a010 + 0x4*i0; }
+
+static inline uint32_t REG_A6XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000a011 + 0x4*i0; }
+
+static inline uint32_t REG_A6XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000a012 + 0x4*i0; }
+
+static inline uint32_t REG_A6XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000a013 + 0x4*i0; }
+
+static inline uint32_t REG_A6XX_VFD_DECODE(uint32_t i0) { return 0x0000a090 + 0x2*i0; }
+
+static inline uint32_t REG_A6XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000a090 + 0x2*i0; }
+#define A6XX_VFD_DECODE_INSTR_IDX__MASK				0x0000001f
+#define A6XX_VFD_DECODE_INSTR_IDX__SHIFT			0
+static inline uint32_t A6XX_VFD_DECODE_INSTR_IDX(uint32_t val)
+{
+	return ((val) << A6XX_VFD_DECODE_INSTR_IDX__SHIFT) & A6XX_VFD_DECODE_INSTR_IDX__MASK;
+}
+#define A6XX_VFD_DECODE_INSTR_INSTANCED				0x00020000
+#define A6XX_VFD_DECODE_INSTR_FORMAT__MASK			0x0ff00000
+#define A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT			20
+static inline uint32_t A6XX_VFD_DECODE_INSTR_FORMAT(enum a6xx_vtx_fmt val)
+{
+	return ((val) << A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A6XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A6XX_VFD_DECODE_INSTR_SWAP__MASK			0x30000000
+#define A6XX_VFD_DECODE_INSTR_SWAP__SHIFT			28
+static inline uint32_t A6XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A6XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A6XX_VFD_DECODE_INSTR_UNK30				0x40000000
+#define A6XX_VFD_DECODE_INSTR_FLOAT				0x80000000
+
+static inline uint32_t REG_A6XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000a091 + 0x2*i0; }
+
+static inline uint32_t REG_A6XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; }
+#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK		0x0000000f
+#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT		0
+static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val)
+{
+	return ((val) << A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK;
+}
+#define A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK			0x00000ff0
+#define A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT			4
+static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val)
+{
+	return ((val) << A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK;
+}
+
+#define REG_A6XX_SP_UNKNOWN_A0F8				0x0000a0f8
+
+#define REG_A6XX_SP_PRIMITIVE_CNTL				0x0000a802
+#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK			0x0000001f
+#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT			0
+static inline uint32_t A6XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val)
+{
+	return ((val) << A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK;
+}
+
+static inline uint32_t REG_A6XX_SP_VS_OUT(uint32_t i0) { return 0x0000a803 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000a803 + 0x1*i0; }
+#define A6XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
+#define A6XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A6XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00000f00
+#define A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			8
+static inline uint32_t A6XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A6XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
+#define A6XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A6XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x0f000000
+#define A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			24
+static inline uint32_t A6XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A6XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000a813 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000a813 + 0x1*i0; }
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A6XX_SP_VS_CTRL_REG0				0x0000a800
+#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_VS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_VS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_UNKNOWN_A81B				0x0000a81b
+
+#define REG_A6XX_SP_VS_OBJ_START_LO				0x0000a81c
+
+#define REG_A6XX_SP_VS_OBJ_START_HI				0x0000a81d
+
+#define REG_A6XX_SP_VS_TEX_COUNT				0x0000a822
+
+#define REG_A6XX_SP_VS_CONFIG					0x0000a823
+#define A6XX_SP_VS_CONFIG_ENABLED				0x00000100
+#define A6XX_SP_VS_CONFIG_NTEX__MASK				0x0001fe00
+#define A6XX_SP_VS_CONFIG_NTEX__SHIFT				9
+static inline uint32_t A6XX_SP_VS_CONFIG_NTEX(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_CONFIG_NTEX__SHIFT) & A6XX_SP_VS_CONFIG_NTEX__MASK;
+}
+#define A6XX_SP_VS_CONFIG_NSAMP__MASK				0x01fe0000
+#define A6XX_SP_VS_CONFIG_NSAMP__SHIFT				17
+static inline uint32_t A6XX_SP_VS_CONFIG_NSAMP(uint32_t val)
+{
+	return ((val) << A6XX_SP_VS_CONFIG_NSAMP__SHIFT) & A6XX_SP_VS_CONFIG_NSAMP__MASK;
+}
+
+#define REG_A6XX_SP_VS_INSTRLEN					0x0000a824
+
+#define REG_A6XX_SP_HS_CTRL_REG0				0x0000a830
+#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_HS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_HS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_HS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_HS_UNKNOWN_A831				0x0000a831
+
+#define REG_A6XX_SP_HS_OBJ_START_LO				0x0000a834
+
+#define REG_A6XX_SP_HS_OBJ_START_HI				0x0000a835
+
+#define REG_A6XX_SP_HS_TEX_COUNT				0x0000a83a
+
+#define REG_A6XX_SP_HS_CONFIG					0x0000a83b
+#define A6XX_SP_HS_CONFIG_ENABLED				0x00000100
+#define A6XX_SP_HS_CONFIG_NTEX__MASK				0x0001fe00
+#define A6XX_SP_HS_CONFIG_NTEX__SHIFT				9
+static inline uint32_t A6XX_SP_HS_CONFIG_NTEX(uint32_t val)
+{
+	return ((val) << A6XX_SP_HS_CONFIG_NTEX__SHIFT) & A6XX_SP_HS_CONFIG_NTEX__MASK;
+}
+#define A6XX_SP_HS_CONFIG_NSAMP__MASK				0x01fe0000
+#define A6XX_SP_HS_CONFIG_NSAMP__SHIFT				17
+static inline uint32_t A6XX_SP_HS_CONFIG_NSAMP(uint32_t val)
+{
+	return ((val) << A6XX_SP_HS_CONFIG_NSAMP__SHIFT) & A6XX_SP_HS_CONFIG_NSAMP__MASK;
+}
+
+#define REG_A6XX_SP_HS_INSTRLEN					0x0000a83c
+
+#define REG_A6XX_SP_DS_CTRL_REG0				0x0000a840
+#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_DS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_DS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_DS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_DS_OBJ_START_LO				0x0000a85c
+
+#define REG_A6XX_SP_DS_OBJ_START_HI				0x0000a85d
+
+#define REG_A6XX_SP_DS_TEX_COUNT				0x0000a862
+
+#define REG_A6XX_SP_DS_CONFIG					0x0000a863
+#define A6XX_SP_DS_CONFIG_ENABLED				0x00000100
+#define A6XX_SP_DS_CONFIG_NTEX__MASK				0x0001fe00
+#define A6XX_SP_DS_CONFIG_NTEX__SHIFT				9
+static inline uint32_t A6XX_SP_DS_CONFIG_NTEX(uint32_t val)
+{
+	return ((val) << A6XX_SP_DS_CONFIG_NTEX__SHIFT) & A6XX_SP_DS_CONFIG_NTEX__MASK;
+}
+#define A6XX_SP_DS_CONFIG_NSAMP__MASK				0x01fe0000
+#define A6XX_SP_DS_CONFIG_NSAMP__SHIFT				17
+static inline uint32_t A6XX_SP_DS_CONFIG_NSAMP(uint32_t val)
+{
+	return ((val) << A6XX_SP_DS_CONFIG_NSAMP__SHIFT) & A6XX_SP_DS_CONFIG_NSAMP__MASK;
+}
+
+#define REG_A6XX_SP_DS_INSTRLEN					0x0000a864
+
+#define REG_A6XX_SP_GS_CTRL_REG0				0x0000a870
+#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_GS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_GS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_GS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_GS_UNKNOWN_A871				0x0000a871
+
+#define REG_A6XX_SP_GS_OBJ_START_LO				0x0000a88d
+
+#define REG_A6XX_SP_GS_OBJ_START_HI				0x0000a88e
+
+#define REG_A6XX_SP_GS_TEX_COUNT				0x0000a893
+
+#define REG_A6XX_SP_GS_CONFIG					0x0000a894
+#define A6XX_SP_GS_CONFIG_ENABLED				0x00000100
+#define A6XX_SP_GS_CONFIG_NTEX__MASK				0x0001fe00
+#define A6XX_SP_GS_CONFIG_NTEX__SHIFT				9
+static inline uint32_t A6XX_SP_GS_CONFIG_NTEX(uint32_t val)
+{
+	return ((val) << A6XX_SP_GS_CONFIG_NTEX__SHIFT) & A6XX_SP_GS_CONFIG_NTEX__MASK;
+}
+#define A6XX_SP_GS_CONFIG_NSAMP__MASK				0x01fe0000
+#define A6XX_SP_GS_CONFIG_NSAMP__SHIFT				17
+static inline uint32_t A6XX_SP_GS_CONFIG_NSAMP(uint32_t val)
+{
+	return ((val) << A6XX_SP_GS_CONFIG_NSAMP__SHIFT) & A6XX_SP_GS_CONFIG_NSAMP__MASK;
+}
+
+#define REG_A6XX_SP_GS_INSTRLEN					0x0000a895
+
+#define REG_A6XX_SP_VS_TEX_SAMP_LO				0x0000a8a0
+
+#define REG_A6XX_SP_VS_TEX_SAMP_HI				0x0000a8a1
+
+#define REG_A6XX_SP_HS_TEX_SAMP_LO				0x0000a8a2
+
+#define REG_A6XX_SP_HS_TEX_SAMP_HI				0x0000a8a3
+
+#define REG_A6XX_SP_DS_TEX_SAMP_LO				0x0000a8a4
+
+#define REG_A6XX_SP_DS_TEX_SAMP_HI				0x0000a8a5
+
+#define REG_A6XX_SP_GS_TEX_SAMP_LO				0x0000a8a6
+
+#define REG_A6XX_SP_GS_TEX_SAMP_HI				0x0000a8a7
+
+#define REG_A6XX_SP_VS_TEX_CONST_LO				0x0000a8a8
+
+#define REG_A6XX_SP_VS_TEX_CONST_HI				0x0000a8a9
+
+#define REG_A6XX_SP_HS_TEX_CONST_LO				0x0000a8aa
+
+#define REG_A6XX_SP_HS_TEX_CONST_HI				0x0000a8ab
+
+#define REG_A6XX_SP_DS_TEX_CONST_LO				0x0000a8ac
+
+#define REG_A6XX_SP_DS_TEX_CONST_HI				0x0000a8ad
+
+#define REG_A6XX_SP_GS_TEX_CONST_LO				0x0000a8ae
+
+#define REG_A6XX_SP_GS_TEX_CONST_HI				0x0000a8af
+
+#define REG_A6XX_SP_FS_CTRL_REG0				0x0000a980
+#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_FS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_FS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_UNKNOWN_A982				0x0000a982
+
+#define REG_A6XX_SP_FS_OBJ_START_LO				0x0000a983
+
+#define REG_A6XX_SP_FS_OBJ_START_HI				0x0000a984
+
+#define REG_A6XX_SP_BLEND_CNTL					0x0000a989
+#define A6XX_SP_BLEND_CNTL_ENABLED				0x00000001
+#define A6XX_SP_BLEND_CNTL_UNK8					0x00000100
+#define A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
+
+#define REG_A6XX_SP_SRGB_CNTL					0x0000a98a
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT0				0x00000001
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT1				0x00000002
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT2				0x00000004
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT3				0x00000008
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT4				0x00000010
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT5				0x00000020
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT6				0x00000040
+#define A6XX_SP_SRGB_CNTL_SRGB_MRT7				0x00000080
+
+#define REG_A6XX_SP_FS_RENDER_COMPONENTS			0x0000a98b
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK			0x0000000f
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT			0
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT0(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK			0x000000f0
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT			4
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT1(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK			0x00000f00
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT			8
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT2(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK			0x0000f000
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT			12
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT3(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK			0x000f0000
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT			16
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT4(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK			0x00f00000
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT			20
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT5(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK			0x0f000000
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT			24
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT6(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK;
+}
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK			0xf0000000
+#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT			28
+static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT7(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK;
+}
+
+#define REG_A6XX_SP_FS_OUTPUT_CNTL0				0x0000a98c
+#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK		0x0000ff00
+#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT		8
+static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK;
+}
+
+#define REG_A6XX_SP_FS_OUTPUT_CNTL1				0x0000a98d
+#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK			0x0000000f
+#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT			0
+static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL1_MRT(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK;
+}
+
+static inline uint32_t REG_A6XX_SP_FS_MRT(uint32_t i0) { return 0x0000a996 + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000a996 + 0x1*i0; }
+#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK			0x000000ff
+#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT			0
+static inline uint32_t A6XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
+}
+#define A6XX_SP_FS_MRT_REG_COLOR_SINT				0x00000100
+#define A6XX_SP_FS_MRT_REG_COLOR_UINT				0x00000200
+
+#define REG_A6XX_SP_UNKNOWN_A99E				0x0000a99e
+
+#define REG_A6XX_SP_FS_TEX_COUNT				0x0000a9a7
+
+#define REG_A6XX_SP_UNKNOWN_A9A8				0x0000a9a8
+
+#define REG_A6XX_SP_FS_TEX_SAMP_LO				0x0000a9e0
+
+#define REG_A6XX_SP_FS_TEX_SAMP_HI				0x0000a9e1
+
+#define REG_A6XX_SP_CS_TEX_SAMP_LO				0x0000a9e2
+
+#define REG_A6XX_SP_CS_TEX_SAMP_HI				0x0000a9e3
+
+#define REG_A6XX_SP_FS_TEX_CONST_LO				0x0000a9e4
+
+#define REG_A6XX_SP_FS_TEX_CONST_HI				0x0000a9e5
+
+#define REG_A6XX_SP_CS_TEX_CONST_LO				0x0000a9e6
+
+#define REG_A6XX_SP_CS_TEX_CONST_HI				0x0000a9e7
+
+static inline uint32_t REG_A6XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000a98e + 0x1*i0; }
+
+static inline uint32_t REG_A6XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000a98e + 0x1*i0; }
+#define A6XX_SP_FS_OUTPUT_REG_REGID__MASK			0x000000ff
+#define A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT			0
+static inline uint32_t A6XX_SP_FS_OUTPUT_REG_REGID(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_REG_REGID__MASK;
+}
+#define A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION			0x00000100
+
+#define REG_A6XX_SP_CS_CTRL_REG0				0x0000a9b0
+#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
+#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
+static inline uint32_t A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
+#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
+static inline uint32_t A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
+#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT			14
+static inline uint32_t A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val)
+{
+	return ((val) << A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK;
+}
+#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A6XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A6XX_SP_CS_CTRL_REG0_VARYING				0x00400000
+#define A6XX_SP_CS_CTRL_REG0_PIXLODENABLE			0x04000000
+#define A6XX_SP_CS_CTRL_REG0_MERGEDREGS				0x80000000
+
+#define REG_A6XX_SP_CS_OBJ_START_LO				0x0000a9b4
+
+#define REG_A6XX_SP_CS_OBJ_START_HI				0x0000a9b5
+
+#define REG_A6XX_SP_CS_INSTRLEN					0x0000a9bc
+
+#define REG_A6XX_SP_UNKNOWN_AB00				0x0000ab00
+
+#define REG_A6XX_SP_FS_CONFIG					0x0000ab04
+#define A6XX_SP_FS_CONFIG_ENABLED				0x00000100
+#define A6XX_SP_FS_CONFIG_NTEX__MASK				0x0001fe00
+#define A6XX_SP_FS_CONFIG_NTEX__SHIFT				9
+static inline uint32_t A6XX_SP_FS_CONFIG_NTEX(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_CONFIG_NTEX__SHIFT) & A6XX_SP_FS_CONFIG_NTEX__MASK;
+}
+#define A6XX_SP_FS_CONFIG_NSAMP__MASK				0x01fe0000
+#define A6XX_SP_FS_CONFIG_NSAMP__SHIFT				17
+static inline uint32_t A6XX_SP_FS_CONFIG_NSAMP(uint32_t val)
+{
+	return ((val) << A6XX_SP_FS_CONFIG_NSAMP__SHIFT) & A6XX_SP_FS_CONFIG_NSAMP__MASK;
+}
+
+#define REG_A6XX_SP_FS_INSTRLEN					0x0000ab05
+
+#define REG_A6XX_SP_UNKNOWN_AB20				0x0000ab20
+
+#define REG_A6XX_SP_2D_SRC_FORMAT				0x0000acc0
+#define A6XX_SP_2D_SRC_FORMAT_NORM				0x00000001
+#define A6XX_SP_2D_SRC_FORMAT_SINT				0x00000002
+#define A6XX_SP_2D_SRC_FORMAT_UINT				0x00000004
+#define A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__MASK		0x000007f8
+#define A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__SHIFT		3
+static inline uint32_t A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__SHIFT) & A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__MASK;
+}
+
+#define REG_A6XX_SP_UNKNOWN_AE00				0x0000ae00
+
+#define REG_A6XX_SP_UNKNOWN_AE03				0x0000ae03
+
+#define REG_A6XX_SP_UNKNOWN_AE04				0x0000ae04
+
+#define REG_A6XX_SP_UNKNOWN_AE0F				0x0000ae0f
+
+#define REG_A6XX_SP_UNKNOWN_B182				0x0000b182
+
+#define REG_A6XX_SP_UNKNOWN_B183				0x0000b183
+
+#define REG_A6XX_SP_TP_RAS_MSAA_CNTL				0x0000b300
+#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
+static inline uint32_t A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK;
+}
+
+#define REG_A6XX_SP_TP_DEST_MSAA_CNTL				0x0000b301
+#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
+#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
+static inline uint32_t A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK;
+}
+#define A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
+
+#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO		0x0000b302
+
+#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_HI		0x0000b303
+
+#define REG_A6XX_SP_TP_UNKNOWN_B304				0x0000b304
+
+#define REG_A6XX_SP_TP_UNKNOWN_B309				0x0000b309
+
+#define REG_A6XX_SP_PS_2D_SRC_INFO				0x0000b4c0
+#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK		0x000000ff
+#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
+{
+	return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK;
+}
+#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
+#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT			8
+static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(enum a6xx_tile_mode val)
+{
+	return ((val) << A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK;
+}
+#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT		10
+static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK;
+}
+#define A6XX_SP_PS_2D_SRC_INFO_FLAGS				0x00001000
+#define A6XX_SP_PS_2D_SRC_INFO_FILTER				0x00010000
+
+#define REG_A6XX_SP_PS_2D_SRC_SIZE				0x0000b4c1
+#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK			0x00007fff
+#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT			0
+static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK;
+}
+#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK			0x3fff8000
+#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT			15
+static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A6XX_SP_PS_2D_SRC_LO				0x0000b4c2
+
+#define REG_A6XX_SP_PS_2D_SRC_HI				0x0000b4c3
+
+#define REG_A6XX_SP_PS_2D_SRC_PITCH				0x0000b4c4
+#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK			0x01fffe00
+#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT			9
+static inline uint32_t A6XX_SP_PS_2D_SRC_PITCH_PITCH(uint32_t val)
+{
+	assert(!(val & 0x3f));
+	return ((val >> 6) << A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT) & A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK;
+}
+
+#define REG_A6XX_SP_PS_2D_SRC_FLAGS_LO				0x0000b4ca
+
+#define REG_A6XX_SP_PS_2D_SRC_FLAGS_HI				0x0000b4cb
+
+#define REG_A6XX_SP_UNKNOWN_B600				0x0000b600
+
+#define REG_A6XX_SP_UNKNOWN_B605				0x0000b605
+
+#define REG_A6XX_HLSQ_VS_CNTL					0x0000b800
+#define A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK			0x000000ff
+#define A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT			0
+static inline uint32_t A6XX_HLSQ_VS_CNTL_CONSTLEN(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK;
+}
+
+#define REG_A6XX_HLSQ_HS_CNTL					0x0000b801
+#define A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK			0x000000ff
+#define A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT			0
+static inline uint32_t A6XX_HLSQ_HS_CNTL_CONSTLEN(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK;
+}
+
+#define REG_A6XX_HLSQ_DS_CNTL					0x0000b802
+#define A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK			0x000000ff
+#define A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT			0
+static inline uint32_t A6XX_HLSQ_DS_CNTL_CONSTLEN(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK;
+}
+
+#define REG_A6XX_HLSQ_GS_CNTL					0x0000b803
+#define A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK			0x000000ff
+#define A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT			0
+static inline uint32_t A6XX_HLSQ_GS_CNTL_CONSTLEN(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK;
+}
+
+#define REG_A6XX_HLSQ_UNKNOWN_B980				0x0000b980
+
+#define REG_A6XX_HLSQ_CONTROL_1_REG				0x0000b982
+
+#define REG_A6XX_HLSQ_CONTROL_2_REG				0x0000b983
+#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000000ff
+#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
+}
+#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK			0x0000ff00
+#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT			8
+static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK;
+}
+#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK		0x00ff0000
+#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT		16
+static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK;
+}
+
+#define REG_A6XX_HLSQ_CONTROL_3_REG				0x0000b984
+#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK		0x000000ff
+#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK;
+}
+
+#define REG_A6XX_HLSQ_CONTROL_4_REG				0x0000b985
+#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK		0x00ff0000
+#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT		16
+static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK;
+}
+#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK		0xff000000
+#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT		24
+static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK;
+}
+
+#define REG_A6XX_HLSQ_CONTROL_5_REG				0x0000b986
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_0				0x0000b990
+#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK			0x00000003
+#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT			0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK;
+}
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT		2
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK;
+}
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT		12
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK;
+}
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
+#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT		22
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_1				0x0000b991
+#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_2				0x0000b992
+#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_3				0x0000b993
+#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_4				0x0000b994
+#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_5				0x0000b995
+#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_NDRANGE_6				0x0000b996
+#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK		0xffffffff
+#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT		0
+static inline uint32_t A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_CNTL_0					0x0000b997
+#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK			0x000000ff
+#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT			0
+static inline uint32_t A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK;
+}
+#define A6XX_HLSQ_CS_CNTL_0_UNK0__MASK				0x0000ff00
+#define A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT				8
+static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK0__MASK;
+}
+#define A6XX_HLSQ_CS_CNTL_0_UNK1__MASK				0x00ff0000
+#define A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT				16
+static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK1__MASK;
+}
+#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK			0xff000000
+#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT			24
+static inline uint32_t A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val)
+{
+	return ((val) << A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK;
+}
+
+#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_X				0x0000b999
+
+#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y				0x0000b99a
+
+#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z				0x0000b99b
+
+#define REG_A6XX_HLSQ_UPDATE_CNTL				0x0000bb08
+
+#define REG_A6XX_HLSQ_FS_CNTL					0x0000bb10
+#define A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK			0x000000ff
+#define A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT			0
+static inline uint32_t A6XX_HLSQ_FS_CNTL_CONSTLEN(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK;
+}
+
+#define REG_A6XX_HLSQ_UNKNOWN_BB11				0x0000bb11
+
+#define REG_A6XX_HLSQ_UNKNOWN_BE00				0x0000be00
+
+#define REG_A6XX_HLSQ_UNKNOWN_BE01				0x0000be01
+
+#define REG_A6XX_HLSQ_UNKNOWN_BE04				0x0000be04
+
+#define REG_A6XX_TEX_SAMP_0					0x00000000
+#define A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
+#define A6XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
+#define A6XX_TEX_SAMP_0_XY_MAG__SHIFT				1
+static inline uint32_t A6XX_TEX_SAMP_0_XY_MAG(enum a6xx_tex_filter val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_XY_MAG__SHIFT) & A6XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A6XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
+#define A6XX_TEX_SAMP_0_XY_MIN__SHIFT				3
+static inline uint32_t A6XX_TEX_SAMP_0_XY_MIN(enum a6xx_tex_filter val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_XY_MIN__SHIFT) & A6XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A6XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
+#define A6XX_TEX_SAMP_0_WRAP_S__SHIFT				5
+static inline uint32_t A6XX_TEX_SAMP_0_WRAP_S(enum a6xx_tex_clamp val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_WRAP_S__SHIFT) & A6XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A6XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
+#define A6XX_TEX_SAMP_0_WRAP_T__SHIFT				8
+static inline uint32_t A6XX_TEX_SAMP_0_WRAP_T(enum a6xx_tex_clamp val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_WRAP_T__SHIFT) & A6XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A6XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
+#define A6XX_TEX_SAMP_0_WRAP_R__SHIFT				11
+static inline uint32_t A6XX_TEX_SAMP_0_WRAP_R(enum a6xx_tex_clamp val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_WRAP_R__SHIFT) & A6XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A6XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
+#define A6XX_TEX_SAMP_0_ANISO__SHIFT				14
+static inline uint32_t A6XX_TEX_SAMP_0_ANISO(enum a6xx_tex_aniso val)
+{
+	return ((val) << A6XX_TEX_SAMP_0_ANISO__SHIFT) & A6XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define A6XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
+#define A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
+static inline uint32_t A6XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 256.0))) << A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A6XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
+
+#define REG_A6XX_TEX_SAMP_1					0x00000001
+#define A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
+#define A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
+static inline uint32_t A6XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
+}
+#define A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
+#define A6XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
+#define A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
+#define A6XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
+#define A6XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
+static inline uint32_t A6XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A6XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A6XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
+#define A6XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
+static inline uint32_t A6XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+	return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A6XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+
+#define REG_A6XX_TEX_SAMP_2					0x00000002
+#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK			0xfffffff0
+#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT			4
+static inline uint32_t A6XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val)
+{
+	return ((val) << A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK;
+}
+
+#define REG_A6XX_TEX_SAMP_3					0x00000003
+
+#define REG_A6XX_TEX_CONST_0					0x00000000
+#define A6XX_TEX_CONST_0_TILE_MODE__MASK			0x00000003
+#define A6XX_TEX_CONST_0_TILE_MODE__SHIFT			0
+static inline uint32_t A6XX_TEX_CONST_0_TILE_MODE(enum a6xx_tile_mode val)
+{
+	return ((val) << A6XX_TEX_CONST_0_TILE_MODE__SHIFT) & A6XX_TEX_CONST_0_TILE_MODE__MASK;
+}
+#define A6XX_TEX_CONST_0_SRGB					0x00000004
+#define A6XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
+#define A6XX_TEX_CONST_0_SWIZ_X__SHIFT				4
+static inline uint32_t A6XX_TEX_CONST_0_SWIZ_X(enum a6xx_tex_swiz val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SWIZ_X__SHIFT) & A6XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A6XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
+#define A6XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
+static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Y(enum a6xx_tex_swiz val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A6XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
+#define A6XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
+static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Z(enum a6xx_tex_swiz val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A6XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
+#define A6XX_TEX_CONST_0_SWIZ_W__SHIFT				13
+static inline uint32_t A6XX_TEX_CONST_0_SWIZ_W(enum a6xx_tex_swiz val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SWIZ_W__SHIFT) & A6XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A6XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
+#define A6XX_TEX_CONST_0_MIPLVLS__SHIFT				16
+static inline uint32_t A6XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_0_MIPLVLS__SHIFT) & A6XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A6XX_TEX_CONST_0_SAMPLES__MASK				0x00300000
+#define A6XX_TEX_CONST_0_SAMPLES__SHIFT				20
+static inline uint32_t A6XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SAMPLES__SHIFT) & A6XX_TEX_CONST_0_SAMPLES__MASK;
+}
+#define A6XX_TEX_CONST_0_FMT__MASK				0x3fc00000
+#define A6XX_TEX_CONST_0_FMT__SHIFT				22
+static inline uint32_t A6XX_TEX_CONST_0_FMT(enum a6xx_tex_fmt val)
+{
+	return ((val) << A6XX_TEX_CONST_0_FMT__SHIFT) & A6XX_TEX_CONST_0_FMT__MASK;
+}
+#define A6XX_TEX_CONST_0_SWAP__MASK				0xc0000000
+#define A6XX_TEX_CONST_0_SWAP__SHIFT				30
+static inline uint32_t A6XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A6XX_TEX_CONST_0_SWAP__SHIFT) & A6XX_TEX_CONST_0_SWAP__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_1					0x00000001
+#define A6XX_TEX_CONST_1_WIDTH__MASK				0x00007fff
+#define A6XX_TEX_CONST_1_WIDTH__SHIFT				0
+static inline uint32_t A6XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_1_WIDTH__SHIFT) & A6XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A6XX_TEX_CONST_1_HEIGHT__MASK				0x3fff8000
+#define A6XX_TEX_CONST_1_HEIGHT__SHIFT				15
+static inline uint32_t A6XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_1_HEIGHT__SHIFT) & A6XX_TEX_CONST_1_HEIGHT__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_2					0x00000002
+#define A6XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
+#define A6XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
+static inline uint32_t A6XX_TEX_CONST_2_FETCHSIZE(enum a6xx_tex_fetchsize val)
+{
+	return ((val) << A6XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A6XX_TEX_CONST_2_FETCHSIZE__MASK;
+}
+#define A6XX_TEX_CONST_2_PITCH__MASK				0x1fffff80
+#define A6XX_TEX_CONST_2_PITCH__SHIFT				7
+static inline uint32_t A6XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_2_PITCH__SHIFT) & A6XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A6XX_TEX_CONST_2_TYPE__MASK				0x60000000
+#define A6XX_TEX_CONST_2_TYPE__SHIFT				29
+static inline uint32_t A6XX_TEX_CONST_2_TYPE(enum a6xx_tex_type val)
+{
+	return ((val) << A6XX_TEX_CONST_2_TYPE__SHIFT) & A6XX_TEX_CONST_2_TYPE__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_3					0x00000003
+#define A6XX_TEX_CONST_3_ARRAY_PITCH__MASK			0x00003fff
+#define A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT			0
+static inline uint32_t A6XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A6XX_TEX_CONST_3_ARRAY_PITCH__MASK;
+}
+#define A6XX_TEX_CONST_3_MIN_LAYERSZ__MASK			0x07800000
+#define A6XX_TEX_CONST_3_MIN_LAYERSZ__SHIFT			23
+static inline uint32_t A6XX_TEX_CONST_3_MIN_LAYERSZ(uint32_t val)
+{
+	assert(!(val & 0xfff));
+	return ((val >> 12) << A6XX_TEX_CONST_3_MIN_LAYERSZ__SHIFT) & A6XX_TEX_CONST_3_MIN_LAYERSZ__MASK;
+}
+#define A6XX_TEX_CONST_3_FLAG					0x10000000
+
+#define REG_A6XX_TEX_CONST_4					0x00000004
+#define A6XX_TEX_CONST_4_BASE_LO__MASK				0xffffffe0
+#define A6XX_TEX_CONST_4_BASE_LO__SHIFT				5
+static inline uint32_t A6XX_TEX_CONST_4_BASE_LO(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_TEX_CONST_4_BASE_LO__SHIFT) & A6XX_TEX_CONST_4_BASE_LO__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_5					0x00000005
+#define A6XX_TEX_CONST_5_BASE_HI__MASK				0x0001ffff
+#define A6XX_TEX_CONST_5_BASE_HI__SHIFT				0
+static inline uint32_t A6XX_TEX_CONST_5_BASE_HI(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_5_BASE_HI__SHIFT) & A6XX_TEX_CONST_5_BASE_HI__MASK;
+}
+#define A6XX_TEX_CONST_5_DEPTH__MASK				0x3ffe0000
+#define A6XX_TEX_CONST_5_DEPTH__SHIFT				17
+static inline uint32_t A6XX_TEX_CONST_5_DEPTH(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_5_DEPTH__SHIFT) & A6XX_TEX_CONST_5_DEPTH__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_6					0x00000006
+
+#define REG_A6XX_TEX_CONST_7					0x00000007
+#define A6XX_TEX_CONST_7_FLAG_LO__MASK				0xffffffe0
+#define A6XX_TEX_CONST_7_FLAG_LO__SHIFT				5
+static inline uint32_t A6XX_TEX_CONST_7_FLAG_LO(uint32_t val)
+{
+	assert(!(val & 0x1f));
+	return ((val >> 5) << A6XX_TEX_CONST_7_FLAG_LO__SHIFT) & A6XX_TEX_CONST_7_FLAG_LO__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_8					0x00000008
+#define A6XX_TEX_CONST_8_FLAG_HI__MASK				0x0001ffff
+#define A6XX_TEX_CONST_8_FLAG_HI__SHIFT				0
+static inline uint32_t A6XX_TEX_CONST_8_FLAG_HI(uint32_t val)
+{
+	return ((val) << A6XX_TEX_CONST_8_FLAG_HI__SHIFT) & A6XX_TEX_CONST_8_FLAG_HI__MASK;
+}
+
+#define REG_A6XX_TEX_CONST_9					0x00000009
+
+#define REG_A6XX_TEX_CONST_10					0x0000000a
+
+#define REG_A6XX_TEX_CONST_11					0x0000000b
+
+#define REG_A6XX_TEX_CONST_12					0x0000000c
+
+#define REG_A6XX_TEX_CONST_13					0x0000000d
+
+#define REG_A6XX_TEX_CONST_14					0x0000000e
+
+#define REG_A6XX_TEX_CONST_15					0x0000000f
+
+#define REG_A6XX_PDC_GPU_ENABLE_PDC				0x00001140
+
+#define REG_A6XX_PDC_GPU_SEQ_START_ADDR				0x00001148
+
+#define REG_A6XX_PDC_GPU_TCS0_CONTROL				0x00001540
+
+#define REG_A6XX_PDC_GPU_TCS0_CMD_ENABLE_BANK			0x00001541
+
+#define REG_A6XX_PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK		0x00001542
+
+#define REG_A6XX_PDC_GPU_TCS0_CMD0_MSGID			0x00001543
+
+#define REG_A6XX_PDC_GPU_TCS0_CMD0_ADDR				0x00001544
+
+#define REG_A6XX_PDC_GPU_TCS0_CMD0_DATA				0x00001545
+
+#define REG_A6XX_PDC_GPU_TCS1_CONTROL				0x00001572
+
+#define REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK			0x00001573
+
+#define REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK		0x00001574
+
+#define REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID			0x00001575
+
+#define REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR				0x00001576
+
+#define REG_A6XX_PDC_GPU_TCS1_CMD0_DATA				0x00001577
+
+#define REG_A6XX_PDC_GPU_TCS2_CONTROL				0x000015a4
+
+#define REG_A6XX_PDC_GPU_TCS2_CMD_ENABLE_BANK			0x000015a5
+
+#define REG_A6XX_PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK		0x000015a6
+
+#define REG_A6XX_PDC_GPU_TCS2_CMD0_MSGID			0x000015a7
+
+#define REG_A6XX_PDC_GPU_TCS2_CMD0_ADDR				0x000015a8
+
+#define REG_A6XX_PDC_GPU_TCS2_CMD0_DATA				0x000015a9
+
+#define REG_A6XX_PDC_GPU_TCS3_CONTROL				0x000015d6
+
+#define REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK			0x000015d7
+
+#define REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK		0x000015d8
+
+#define REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID			0x000015d9
+
+#define REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR				0x000015da
+
+#define REG_A6XX_PDC_GPU_TCS3_CMD0_DATA				0x000015db
+
+#define REG_A6XX_PDC_GPU_SEQ_MEM_0				0x00000000
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A			0x00000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK		0x000000ff
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT		0
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK	0x0000ff00
+#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT	8
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK;
+}
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B			0x00000001
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C			0x00000002
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D			0x00000003
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT			0x00000004
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK		0x0000003f
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT		0
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK		0x00007000
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT		12
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK		0xf0000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT		28
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK;
+}
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM			0x00000005
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK		0x0f000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT		24
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK;
+}
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0			0x00000008
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1			0x00000009
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2			0x0000000a
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3			0x0000000b
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0			0x0000000c
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1			0x0000000d
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2			0x0000000e
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3			0x0000000f
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0			0x00000010
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK		0x0000000f
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT		0
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK		0x000000f0
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT		4
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK		0x00000f00
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT		8
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK		0x0000f000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT		12
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK		0x000f0000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT		16
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK		0x00f00000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT		20
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK		0x0f000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT		24
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK		0xf0000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT		28
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK;
+}
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1			0x00000011
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK		0x0000000f
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT		0
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK		0x000000f0
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT		4
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK		0x00000f00
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT		8
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK		0x0000f000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT		12
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK		0x000f0000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT		16
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK		0x00f00000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT		20
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK		0x0f000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT		24
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK;
+}
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK		0xf0000000
+#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT		28
+static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val)
+{
+	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK;
+}
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1			0x0000002f
+
+#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2			0x00000030
+
+#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0			0x00000001
+
+#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1			0x00000002
+
+
+#endif /* A6XX_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/adreno_common.xml.h mesa-19.0.1/src/freedreno/registers/adreno_common.xml.h
--- mesa-18.3.3/src/freedreno/registers/adreno_common.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/adreno_common.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,536 @@
+#ifndef ADRENO_COMMON_XML
+#define ADRENO_COMMON_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum chip {
+	A2XX = 0,
+	A3XX = 0,
+	A4XX = 0,
+	A5XX = 0,
+	A6XX = 0,
+};
+
+enum adreno_pa_su_sc_draw {
+	PC_DRAW_POINTS = 0,
+	PC_DRAW_LINES = 1,
+	PC_DRAW_TRIANGLES = 2,
+};
+
+enum adreno_compare_func {
+	FUNC_NEVER = 0,
+	FUNC_LESS = 1,
+	FUNC_EQUAL = 2,
+	FUNC_LEQUAL = 3,
+	FUNC_GREATER = 4,
+	FUNC_NOTEQUAL = 5,
+	FUNC_GEQUAL = 6,
+	FUNC_ALWAYS = 7,
+};
+
+enum adreno_stencil_op {
+	STENCIL_KEEP = 0,
+	STENCIL_ZERO = 1,
+	STENCIL_REPLACE = 2,
+	STENCIL_INCR_CLAMP = 3,
+	STENCIL_DECR_CLAMP = 4,
+	STENCIL_INVERT = 5,
+	STENCIL_INCR_WRAP = 6,
+	STENCIL_DECR_WRAP = 7,
+};
+
+enum adreno_rb_blend_factor {
+	FACTOR_ZERO = 0,
+	FACTOR_ONE = 1,
+	FACTOR_SRC_COLOR = 4,
+	FACTOR_ONE_MINUS_SRC_COLOR = 5,
+	FACTOR_SRC_ALPHA = 6,
+	FACTOR_ONE_MINUS_SRC_ALPHA = 7,
+	FACTOR_DST_COLOR = 8,
+	FACTOR_ONE_MINUS_DST_COLOR = 9,
+	FACTOR_DST_ALPHA = 10,
+	FACTOR_ONE_MINUS_DST_ALPHA = 11,
+	FACTOR_CONSTANT_COLOR = 12,
+	FACTOR_ONE_MINUS_CONSTANT_COLOR = 13,
+	FACTOR_CONSTANT_ALPHA = 14,
+	FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
+	FACTOR_SRC_ALPHA_SATURATE = 16,
+	FACTOR_SRC1_COLOR = 20,
+	FACTOR_ONE_MINUS_SRC1_COLOR = 21,
+	FACTOR_SRC1_ALPHA = 22,
+	FACTOR_ONE_MINUS_SRC1_ALPHA = 23,
+};
+
+enum adreno_rb_surface_endian {
+	ENDIAN_NONE = 0,
+	ENDIAN_8IN16 = 1,
+	ENDIAN_8IN32 = 2,
+	ENDIAN_16IN32 = 3,
+	ENDIAN_8IN64 = 4,
+	ENDIAN_8IN128 = 5,
+};
+
+enum adreno_rb_dither_mode {
+	DITHER_DISABLE = 0,
+	DITHER_ALWAYS = 1,
+	DITHER_IF_ALPHA_OFF = 2,
+};
+
+enum adreno_rb_depth_format {
+	DEPTHX_16 = 0,
+	DEPTHX_24_8 = 1,
+	DEPTHX_32 = 2,
+};
+
+enum adreno_rb_copy_control_mode {
+	RB_COPY_RESOLVE = 1,
+	RB_COPY_CLEAR = 2,
+	RB_COPY_DEPTH_STENCIL = 5,
+};
+
+enum a3xx_rop_code {
+	ROP_CLEAR = 0,
+	ROP_NOR = 1,
+	ROP_AND_INVERTED = 2,
+	ROP_COPY_INVERTED = 3,
+	ROP_AND_REVERSE = 4,
+	ROP_INVERT = 5,
+	ROP_XOR = 6,
+	ROP_NAND = 7,
+	ROP_AND = 8,
+	ROP_EQUIV = 9,
+	ROP_NOOP = 10,
+	ROP_OR_INVERTED = 11,
+	ROP_COPY = 12,
+	ROP_OR_REVERSE = 13,
+	ROP_OR = 14,
+	ROP_SET = 15,
+};
+
+enum a3xx_render_mode {
+	RB_RENDERING_PASS = 0,
+	RB_TILING_PASS = 1,
+	RB_RESOLVE_PASS = 2,
+	RB_COMPUTE_PASS = 3,
+};
+
+enum a3xx_msaa_samples {
+	MSAA_ONE = 0,
+	MSAA_TWO = 1,
+	MSAA_FOUR = 2,
+	MSAA_EIGHT = 3,
+};
+
+enum a3xx_threadmode {
+	MULTI = 0,
+	SINGLE = 1,
+};
+
+enum a3xx_instrbuffermode {
+	CACHE = 0,
+	BUFFER = 1,
+};
+
+enum a3xx_threadsize {
+	TWO_QUADS = 0,
+	FOUR_QUADS = 1,
+};
+
+enum a3xx_color_swap {
+	WZYX = 0,
+	WXYZ = 1,
+	ZYXW = 2,
+	XYZW = 3,
+};
+
+enum a3xx_rb_blend_opcode {
+	BLEND_DST_PLUS_SRC = 0,
+	BLEND_SRC_MINUS_DST = 1,
+	BLEND_DST_MINUS_SRC = 2,
+	BLEND_MIN_DST_SRC = 3,
+	BLEND_MAX_DST_SRC = 4,
+};
+
+enum a4xx_tess_spacing {
+	EQUAL_SPACING = 0,
+	ODD_SPACING = 2,
+	EVEN_SPACING = 3,
+};
+
+#define REG_AXXX_CP_RB_BASE					0x000001c0
+
+#define REG_AXXX_CP_RB_CNTL					0x000001c1
+#define AXXX_CP_RB_CNTL_BUFSZ__MASK				0x0000003f
+#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT				0
+static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BLKSZ__MASK				0x00003f00
+#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT				8
+static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK				0x00030000
+#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT				16
+static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
+}
+#define AXXX_CP_RB_CNTL_POLL_EN					0x00100000
+#define AXXX_CP_RB_CNTL_NO_UPDATE				0x08000000
+#define AXXX_CP_RB_CNTL_RPTR_WR_EN				0x80000000
+
+#define REG_AXXX_CP_RB_RPTR_ADDR				0x000001c3
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK				0x00000003
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT			0
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
+}
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK				0xfffffffc
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT			2
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
+}
+
+#define REG_AXXX_CP_RB_RPTR					0x000001c4
+
+#define REG_AXXX_CP_RB_WPTR					0x000001c5
+
+#define REG_AXXX_CP_RB_WPTR_DELAY				0x000001c6
+
+#define REG_AXXX_CP_RB_RPTR_WR					0x000001c7
+
+#define REG_AXXX_CP_RB_WPTR_BASE				0x000001c8
+
+#define REG_AXXX_CP_QUEUE_THRESHOLDS				0x000001d5
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK		0x0000000f
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT		0
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK		0x00000f00
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT		8
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK		0x000f0000
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT		16
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_THRESHOLDS				0x000001d6
+#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK			0x001f0000
+#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT			16
+static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_MEQ_END(uint32_t val)
+{
+	return ((val) << AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK;
+}
+#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK			0x1f000000
+#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT			24
+static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_ROQ_END(uint32_t val)
+{
+	return ((val) << AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_AVAIL					0x000001d7
+#define AXXX_CP_CSQ_AVAIL_RING__MASK				0x0000007f
+#define AXXX_CP_CSQ_AVAIL_RING__SHIFT				0
+static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB1__MASK				0x00007f00
+#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT				8
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB2__MASK				0x007f0000
+#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT				16
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
+}
+
+#define REG_AXXX_CP_STQ_AVAIL					0x000001d8
+#define AXXX_CP_STQ_AVAIL_ST__MASK				0x0000007f
+#define AXXX_CP_STQ_AVAIL_ST__SHIFT				0
+static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
+{
+	return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_AVAIL					0x000001d9
+#define AXXX_CP_MEQ_AVAIL_MEQ__MASK				0x0000001f
+#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT				0
+static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
+{
+	return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
+}
+
+#define REG_AXXX_SCRATCH_UMSK					0x000001dc
+#define AXXX_SCRATCH_UMSK_UMSK__MASK				0x000000ff
+#define AXXX_SCRATCH_UMSK_UMSK__SHIFT				0
+static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
+{
+	return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
+}
+#define AXXX_SCRATCH_UMSK_SWAP__MASK				0x00030000
+#define AXXX_SCRATCH_UMSK_SWAP__SHIFT				16
+static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
+}
+
+#define REG_AXXX_SCRATCH_ADDR					0x000001dd
+
+#define REG_AXXX_CP_ME_RDADDR					0x000001ea
+
+#define REG_AXXX_CP_STATE_DEBUG_INDEX				0x000001ec
+
+#define REG_AXXX_CP_STATE_DEBUG_DATA				0x000001ed
+
+#define REG_AXXX_CP_INT_CNTL					0x000001f2
+#define AXXX_CP_INT_CNTL_SW_INT_MASK				0x00080000
+#define AXXX_CP_INT_CNTL_T0_PACKET_IN_IB_MASK			0x00800000
+#define AXXX_CP_INT_CNTL_OPCODE_ERROR_MASK			0x01000000
+#define AXXX_CP_INT_CNTL_PROTECTED_MODE_ERROR_MASK		0x02000000
+#define AXXX_CP_INT_CNTL_RESERVED_BIT_ERROR_MASK		0x04000000
+#define AXXX_CP_INT_CNTL_IB_ERROR_MASK				0x08000000
+#define AXXX_CP_INT_CNTL_IB2_INT_MASK				0x20000000
+#define AXXX_CP_INT_CNTL_IB1_INT_MASK				0x40000000
+#define AXXX_CP_INT_CNTL_RB_INT_MASK				0x80000000
+
+#define REG_AXXX_CP_INT_STATUS					0x000001f3
+
+#define REG_AXXX_CP_INT_ACK					0x000001f4
+
+#define REG_AXXX_CP_ME_CNTL					0x000001f6
+#define AXXX_CP_ME_CNTL_BUSY					0x20000000
+#define AXXX_CP_ME_CNTL_HALT					0x10000000
+
+#define REG_AXXX_CP_ME_STATUS					0x000001f7
+
+#define REG_AXXX_CP_ME_RAM_WADDR				0x000001f8
+
+#define REG_AXXX_CP_ME_RAM_RADDR				0x000001f9
+
+#define REG_AXXX_CP_ME_RAM_DATA					0x000001fa
+
+#define REG_AXXX_CP_DEBUG					0x000001fc
+#define AXXX_CP_DEBUG_PREDICATE_DISABLE				0x00800000
+#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE			0x01000000
+#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE			0x02000000
+#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS			0x04000000
+#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE			0x08000000
+#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE			0x10000000
+#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL			0x40000000
+#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE			0x80000000
+
+#define REG_AXXX_CP_CSQ_RB_STAT					0x000001fd
+#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT				0
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT				16
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB1_STAT				0x000001fe
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT			0
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT			16
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB2_STAT				0x000001ff
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT			0
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT			16
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_NON_PREFETCH_CNTRS				0x00000440
+
+#define REG_AXXX_CP_STQ_ST_STAT					0x00000443
+
+#define REG_AXXX_CP_ST_BASE					0x0000044d
+
+#define REG_AXXX_CP_ST_BUFSZ					0x0000044e
+
+#define REG_AXXX_CP_MEQ_STAT					0x0000044f
+
+#define REG_AXXX_CP_MIU_TAG_STAT				0x00000452
+
+#define REG_AXXX_CP_BIN_MASK_LO					0x00000454
+
+#define REG_AXXX_CP_BIN_MASK_HI					0x00000455
+
+#define REG_AXXX_CP_BIN_SELECT_LO				0x00000456
+
+#define REG_AXXX_CP_BIN_SELECT_HI				0x00000457
+
+#define REG_AXXX_CP_IB1_BASE					0x00000458
+
+#define REG_AXXX_CP_IB1_BUFSZ					0x00000459
+
+#define REG_AXXX_CP_IB2_BASE					0x0000045a
+
+#define REG_AXXX_CP_IB2_BUFSZ					0x0000045b
+
+#define REG_AXXX_CP_STAT					0x0000047f
+#define AXXX_CP_STAT_CP_BUSY					0x80000000
+#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY				0x40000000
+#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY				0x20000000
+#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY				0x10000000
+#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY				0x08000000
+#define AXXX_CP_STAT_ME_BUSY					0x04000000
+#define AXXX_CP_STAT_MIU_WR_C_BUSY				0x02000000
+#define AXXX_CP_STAT_CP_3D_BUSY					0x00800000
+#define AXXX_CP_STAT_CP_NRT_BUSY				0x00400000
+#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY				0x00200000
+#define AXXX_CP_STAT_RCIU_ME_BUSY				0x00100000
+#define AXXX_CP_STAT_RCIU_PFP_BUSY				0x00080000
+#define AXXX_CP_STAT_MEQ_RING_BUSY				0x00040000
+#define AXXX_CP_STAT_PFP_BUSY					0x00020000
+#define AXXX_CP_STAT_ST_QUEUE_BUSY				0x00010000
+#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY			0x00002000
+#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY			0x00001000
+#define AXXX_CP_STAT_RING_QUEUE_BUSY				0x00000800
+#define AXXX_CP_STAT_CSF_BUSY					0x00000400
+#define AXXX_CP_STAT_CSF_ST_BUSY				0x00000200
+#define AXXX_CP_STAT_EVENT_BUSY					0x00000100
+#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY				0x00000080
+#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY				0x00000040
+#define AXXX_CP_STAT_CSF_RING_BUSY				0x00000020
+#define AXXX_CP_STAT_RCIU_BUSY					0x00000010
+#define AXXX_CP_STAT_RBIU_BUSY					0x00000008
+#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY				0x00000004
+#define AXXX_CP_STAT_MIU_RD_REQ_BUSY				0x00000002
+#define AXXX_CP_STAT_MIU_WR_BUSY				0x00000001
+
+#define REG_AXXX_CP_SCRATCH_REG0				0x00000578
+
+#define REG_AXXX_CP_SCRATCH_REG1				0x00000579
+
+#define REG_AXXX_CP_SCRATCH_REG2				0x0000057a
+
+#define REG_AXXX_CP_SCRATCH_REG3				0x0000057b
+
+#define REG_AXXX_CP_SCRATCH_REG4				0x0000057c
+
+#define REG_AXXX_CP_SCRATCH_REG5				0x0000057d
+
+#define REG_AXXX_CP_SCRATCH_REG6				0x0000057e
+
+#define REG_AXXX_CP_SCRATCH_REG7				0x0000057f
+
+#define REG_AXXX_CP_ME_VS_EVENT_SRC				0x00000600
+
+#define REG_AXXX_CP_ME_VS_EVENT_ADDR				0x00000601
+
+#define REG_AXXX_CP_ME_VS_EVENT_DATA				0x00000602
+
+#define REG_AXXX_CP_ME_VS_EVENT_ADDR_SWM			0x00000603
+
+#define REG_AXXX_CP_ME_VS_EVENT_DATA_SWM			0x00000604
+
+#define REG_AXXX_CP_ME_PS_EVENT_SRC				0x00000605
+
+#define REG_AXXX_CP_ME_PS_EVENT_ADDR				0x00000606
+
+#define REG_AXXX_CP_ME_PS_EVENT_DATA				0x00000607
+
+#define REG_AXXX_CP_ME_PS_EVENT_ADDR_SWM			0x00000608
+
+#define REG_AXXX_CP_ME_PS_EVENT_DATA_SWM			0x00000609
+
+#define REG_AXXX_CP_ME_CF_EVENT_SRC				0x0000060a
+
+#define REG_AXXX_CP_ME_CF_EVENT_ADDR				0x0000060b
+
+#define REG_AXXX_CP_ME_CF_EVENT_DATA				0x0000060c
+
+#define REG_AXXX_CP_ME_NRT_ADDR					0x0000060d
+
+#define REG_AXXX_CP_ME_NRT_DATA					0x0000060e
+
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_SRC			0x00000612
+
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_ADDR			0x00000613
+
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_DATA			0x00000614
+
+
+#endif /* ADRENO_COMMON_XML */
diff -Nru mesa-18.3.3/src/freedreno/registers/adreno_pm4.xml.h mesa-19.0.1/src/freedreno/registers/adreno_pm4.xml.h
--- mesa-18.3.3/src/freedreno/registers/adreno_pm4.xml.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/freedreno/registers/adreno_pm4.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1569 @@
+#ifndef ADRENO_PM4_XML
+#define ADRENO_PM4_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  42463 bytes, from 2018-11-19 13:44:03)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14239 bytes, from 2018-12-05 15:25:53)
+- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  43052 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
+- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 141895 bytes, from 2018-12-21 18:21:34)
+- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
+- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
+
+Copyright (C) 2013-2018 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum vgt_event_type {
+	VS_DEALLOC = 0,
+	PS_DEALLOC = 1,
+	VS_DONE_TS = 2,
+	PS_DONE_TS = 3,
+	CACHE_FLUSH_TS = 4,
+	CONTEXT_DONE = 5,
+	CACHE_FLUSH = 6,
+	HLSQ_FLUSH = 7,
+	VIZQUERY_START = 7,
+	VIZQUERY_END = 8,
+	SC_WAIT_WC = 9,
+	RST_PIX_CNT = 13,
+	RST_VTX_CNT = 14,
+	TILE_FLUSH = 15,
+	STAT_EVENT = 16,
+	CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+	ZPASS_DONE = 21,
+	CACHE_FLUSH_AND_INV_EVENT = 22,
+	PERFCOUNTER_START = 23,
+	PERFCOUNTER_STOP = 24,
+	VS_FETCH_DONE = 27,
+	FACENESS_FLUSH = 28,
+	FLUSH_SO_0 = 17,
+	FLUSH_SO_1 = 18,
+	FLUSH_SO_2 = 19,
+	FLUSH_SO_3 = 20,
+	PC_CCU_INVALIDATE_DEPTH = 24,
+	PC_CCU_INVALIDATE_COLOR = 25,
+	UNK_1C = 28,
+	UNK_1D = 29,
+	BLIT = 30,
+	UNK_25 = 37,
+	LRZ_FLUSH = 38,
+	UNK_2C = 44,
+	UNK_2D = 45,
+};
+
+enum pc_di_primtype {
+	DI_PT_NONE = 0,
+	DI_PT_POINTLIST_PSIZE = 1,
+	DI_PT_LINELIST = 2,
+	DI_PT_LINESTRIP = 3,
+	DI_PT_TRILIST = 4,
+	DI_PT_TRIFAN = 5,
+	DI_PT_TRISTRIP = 6,
+	DI_PT_LINELOOP = 7,
+	DI_PT_RECTLIST = 8,
+	DI_PT_POINTLIST = 9,
+	DI_PT_LINE_ADJ = 10,
+	DI_PT_LINESTRIP_ADJ = 11,
+	DI_PT_TRI_ADJ = 12,
+	DI_PT_TRISTRIP_ADJ = 13,
+};
+
+enum pc_di_src_sel {
+	DI_SRC_SEL_DMA = 0,
+	DI_SRC_SEL_IMMEDIATE = 1,
+	DI_SRC_SEL_AUTO_INDEX = 2,
+	DI_SRC_SEL_RESERVED = 3,
+};
+
+enum pc_di_face_cull_sel {
+	DI_FACE_CULL_NONE = 0,
+	DI_FACE_CULL_FETCH = 1,
+	DI_FACE_BACKFACE_CULL = 2,
+	DI_FACE_FRONTFACE_CULL = 3,
+};
+
+enum pc_di_index_size {
+	INDEX_SIZE_IGN = 0,
+	INDEX_SIZE_16_BIT = 0,
+	INDEX_SIZE_32_BIT = 1,
+	INDEX_SIZE_8_BIT = 2,
+	INDEX_SIZE_INVALID = 0,
+};
+
+enum pc_di_vis_cull_mode {
+	IGNORE_VISIBILITY = 0,
+	USE_VISIBILITY = 1,
+};
+
+enum adreno_pm4_packet_type {
+	CP_TYPE0_PKT = 0,
+	CP_TYPE1_PKT = 0x40000000,
+	CP_TYPE2_PKT = 0x80000000,
+	CP_TYPE3_PKT = 0xc0000000,
+	CP_TYPE4_PKT = 0x40000000,
+	CP_TYPE7_PKT = 0x70000000,
+};
+
+enum adreno_pm4_type3_packets {
+	CP_ME_INIT = 72,
+	CP_NOP = 16,
+	CP_PREEMPT_ENABLE = 28,
+	CP_PREEMPT_TOKEN = 30,
+	CP_INDIRECT_BUFFER = 63,
+	CP_INDIRECT_BUFFER_PFD = 55,
+	CP_WAIT_FOR_IDLE = 38,
+	CP_WAIT_REG_MEM = 60,
+	CP_WAIT_REG_EQ = 82,
+	CP_WAIT_REG_GTE = 83,
+	CP_WAIT_UNTIL_READ = 92,
+	CP_WAIT_IB_PFD_COMPLETE = 93,
+	CP_REG_RMW = 33,
+	CP_SET_BIN_DATA = 47,
+	CP_SET_BIN_DATA5 = 47,
+	CP_REG_TO_MEM = 62,
+	CP_MEM_WRITE = 61,
+	CP_MEM_WRITE_CNTR = 79,
+	CP_COND_EXEC = 68,
+	CP_COND_WRITE = 69,
+	CP_COND_WRITE5 = 69,
+	CP_EVENT_WRITE = 70,
+	CP_EVENT_WRITE_SHD = 88,
+	CP_EVENT_WRITE_CFL = 89,
+	CP_EVENT_WRITE_ZPD = 91,
+	CP_RUN_OPENCL = 49,
+	CP_DRAW_INDX = 34,
+	CP_DRAW_INDX_2 = 54,
+	CP_DRAW_INDX_BIN = 52,
+	CP_DRAW_INDX_2_BIN = 53,
+	CP_VIZ_QUERY = 35,
+	CP_SET_STATE = 37,
+	CP_SET_CONSTANT = 45,
+	CP_IM_LOAD = 39,
+	CP_IM_LOAD_IMMEDIATE = 43,
+	CP_LOAD_CONSTANT_CONTEXT = 46,
+	CP_INVALIDATE_STATE = 59,
+	CP_SET_SHADER_BASES = 74,
+	CP_SET_BIN_MASK = 80,
+	CP_SET_BIN_SELECT = 81,
+	CP_CONTEXT_UPDATE = 94,
+	CP_INTERRUPT = 64,
+	CP_IM_STORE = 44,
+	CP_SET_DRAW_INIT_FLAGS = 75,
+	CP_SET_PROTECTED_MODE = 95,
+	CP_BOOTSTRAP_UCODE = 111,
+	CP_LOAD_STATE = 48,
+	CP_LOAD_STATE4 = 48,
+	CP_COND_INDIRECT_BUFFER_PFE = 58,
+	CP_COND_INDIRECT_BUFFER_PFD = 50,
+	CP_INDIRECT_BUFFER_PFE = 63,
+	CP_SET_BIN = 76,
+	CP_TEST_TWO_MEMS = 113,
+	CP_REG_WR_NO_CTXT = 120,
+	CP_RECORD_PFP_TIMESTAMP = 17,
+	CP_SET_SECURE_MODE = 102,
+	CP_WAIT_FOR_ME = 19,
+	CP_SET_DRAW_STATE = 67,
+	CP_DRAW_INDX_OFFSET = 56,
+	CP_DRAW_INDIRECT = 40,
+	CP_DRAW_INDX_INDIRECT = 41,
+	CP_DRAW_AUTO = 36,
+	CP_UNKNOWN_19 = 25,
+	CP_UNKNOWN_1A = 26,
+	CP_UNKNOWN_4E = 78,
+	CP_WIDE_REG_WRITE = 116,
+	CP_SCRATCH_TO_REG = 77,
+	CP_REG_TO_SCRATCH = 74,
+	CP_WAIT_MEM_WRITES = 18,
+	CP_COND_REG_EXEC = 71,
+	CP_MEM_TO_REG = 66,
+	CP_EXEC_CS_INDIRECT = 65,
+	CP_EXEC_CS = 51,
+	CP_PERFCOUNTER_ACTION = 80,
+	CP_SMMU_TABLE_UPDATE = 83,
+	CP_SET_MARKER = 101,
+	CP_SET_PSEUDO_REG = 86,
+	CP_CONTEXT_REG_BUNCH = 92,
+	CP_YIELD_ENABLE = 28,
+	CP_SKIP_IB2_ENABLE_GLOBAL = 29,
+	CP_SKIP_IB2_ENABLE_LOCAL = 35,
+	CP_SET_SUBDRAW_SIZE = 53,
+	CP_SET_VISIBILITY_OVERRIDE = 100,
+	CP_PREEMPT_ENABLE_GLOBAL = 105,
+	CP_PREEMPT_ENABLE_LOCAL = 106,
+	CP_CONTEXT_SWITCH_YIELD = 107,
+	CP_SET_RENDER_MODE = 108,
+	CP_COMPUTE_CHECKPOINT = 110,
+	CP_MEM_TO_MEM = 115,
+	CP_BLIT = 44,
+	CP_REG_TEST = 57,
+	CP_SET_MODE = 99,
+	CP_LOAD_STATE6_GEOM = 50,
+	CP_LOAD_STATE6_FRAG = 52,
+	IN_IB_PREFETCH_END = 23,
+	IN_SUBBLK_PREFETCH = 31,
+	IN_INSTR_PREFETCH = 32,
+	IN_INSTR_MATCH = 71,
+	IN_CONST_PREFETCH = 73,
+	IN_INCR_UPDT_STATE = 85,
+	IN_INCR_UPDT_CONST = 86,
+	IN_INCR_UPDT_INSTR = 87,
+	PKT4 = 4,
+	CP_UNK_A6XX_14 = 20,
+	CP_UNK_A6XX_36 = 54,
+	CP_UNK_A6XX_55 = 85,
+	CP_REG_WRITE = 109,
+};
+
+enum adreno_state_block {
+	SB_VERT_TEX = 0,
+	SB_VERT_MIPADDR = 1,
+	SB_FRAG_TEX = 2,
+	SB_FRAG_MIPADDR = 3,
+	SB_VERT_SHADER = 4,
+	SB_GEOM_SHADER = 5,
+	SB_FRAG_SHADER = 6,
+	SB_COMPUTE_SHADER = 7,
+};
+
+enum adreno_state_type {
+	ST_SHADER = 0,
+	ST_CONSTANTS = 1,
+};
+
+enum adreno_state_src {
+	SS_DIRECT = 0,
+	SS_INVALID_ALL_IC = 2,
+	SS_INVALID_PART_IC = 3,
+	SS_INDIRECT = 4,
+	SS_INDIRECT_TCM = 5,
+	SS_INDIRECT_STM = 6,
+};
+
+enum a4xx_state_block {
+	SB4_VS_TEX = 0,
+	SB4_HS_TEX = 1,
+	SB4_DS_TEX = 2,
+	SB4_GS_TEX = 3,
+	SB4_FS_TEX = 4,
+	SB4_CS_TEX = 5,
+	SB4_VS_SHADER = 8,
+	SB4_HS_SHADER = 9,
+	SB4_DS_SHADER = 10,
+	SB4_GS_SHADER = 11,
+	SB4_FS_SHADER = 12,
+	SB4_CS_SHADER = 13,
+	SB4_SSBO = 14,
+	SB4_CS_SSBO = 15,
+};
+
+enum a4xx_state_type {
+	ST4_SHADER = 0,
+	ST4_CONSTANTS = 1,
+};
+
+enum a4xx_state_src {
+	SS4_DIRECT = 0,
+	SS4_INDIRECT = 2,
+};
+
+enum a6xx_state_block {
+	SB6_VS_TEX = 0,
+	SB6_HS_TEX = 1,
+	SB6_DS_TEX = 2,
+	SB6_GS_TEX = 3,
+	SB6_FS_TEX = 4,
+	SB6_CS_TEX = 5,
+	SB6_VS_SHADER = 8,
+	SB6_HS_SHADER = 9,
+	SB6_DS_SHADER = 10,
+	SB6_GS_SHADER = 11,
+	SB6_FS_SHADER = 12,
+	SB6_CS_SHADER = 13,
+	SB6_SSBO = 14,
+	SB6_CS_SSBO = 15,
+};
+
+enum a6xx_state_type {
+	ST6_SHADER = 0,
+	ST6_CONSTANTS = 1,
+};
+
+enum a6xx_state_src {
+	SS6_DIRECT = 0,
+	SS6_INDIRECT = 2,
+};
+
+enum a4xx_index_size {
+	INDEX4_SIZE_8_BIT = 0,
+	INDEX4_SIZE_16_BIT = 1,
+	INDEX4_SIZE_32_BIT = 2,
+};
+
+enum cp_cond_function {
+	WRITE_ALWAYS = 0,
+	WRITE_LT = 1,
+	WRITE_LE = 2,
+	WRITE_EQ = 3,
+	WRITE_NE = 4,
+	WRITE_GE = 5,
+	WRITE_GT = 6,
+};
+
+enum render_mode_cmd {
+	BYPASS = 1,
+	BINNING = 2,
+	GMEM = 3,
+	BLIT2D = 5,
+	BLIT2DSCALE = 7,
+	END2D = 8,
+};
+
+enum cp_blit_cmd {
+	BLIT_OP_FILL = 0,
+	BLIT_OP_COPY = 1,
+	BLIT_OP_SCALE = 3,
+};
+
+enum a6xx_render_mode {
+	RM6_BYPASS = 1,
+	RM6_BINNING = 2,
+	RM6_GMEM = 4,
+	RM6_BLIT2D = 5,
+	RM6_RESOLVE = 6,
+	RM6_BLIT2DSCALE = 12,
+};
+
+enum pseudo_reg {
+	SMMU_INFO = 0,
+	NON_SECURE_SAVE_ADDR = 1,
+	SECURE_SAVE_ADDR = 2,
+	NON_PRIV_SAVE_ADDR = 3,
+	COUNTER = 4,
+};
+
+#define REG_CP_LOAD_STATE_0					0x00000000
+#define CP_LOAD_STATE_0_DST_OFF__MASK				0x0000ffff
+#define CP_LOAD_STATE_0_DST_OFF__SHIFT				0
+static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_SRC__MASK				0x00070000
+#define CP_LOAD_STATE_0_STATE_SRC__SHIFT			16
+static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val)
+{
+	return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_BLOCK__MASK			0x00380000
+#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT			19
+static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
+{
+	return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
+}
+#define CP_LOAD_STATE_0_NUM_UNIT__MASK				0xffc00000
+#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT				22
+static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK;
+}
+
+#define REG_CP_LOAD_STATE_1					0x00000001
+#define CP_LOAD_STATE_1_STATE_TYPE__MASK			0x00000003
+#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT			0
+static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val)
+{
+	return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK;
+}
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK			0xfffffffc
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT			2
+static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK;
+}
+
+#define REG_CP_LOAD_STATE4_0					0x00000000
+#define CP_LOAD_STATE4_0_DST_OFF__MASK				0x00003fff
+#define CP_LOAD_STATE4_0_DST_OFF__SHIFT				0
+static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK;
+}
+#define CP_LOAD_STATE4_0_STATE_SRC__MASK			0x00030000
+#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT			16
+static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val)
+{
+	return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK;
+}
+#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK			0x003c0000
+#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT			18
+static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val)
+{
+	return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK;
+}
+#define CP_LOAD_STATE4_0_NUM_UNIT__MASK				0xffc00000
+#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT			22
+static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK;
+}
+
+#define REG_CP_LOAD_STATE4_1					0x00000001
+#define CP_LOAD_STATE4_1_STATE_TYPE__MASK			0x00000003
+#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT			0
+static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val)
+{
+	return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK;
+}
+#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK			0xfffffffc
+#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT			2
+static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK;
+}
+
+#define REG_CP_LOAD_STATE4_2					0x00000002
+#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK			0xffffffff
+#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT			0
+static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK;
+}
+
+#define REG_CP_LOAD_STATE6_0					0x00000000
+#define CP_LOAD_STATE6_0_DST_OFF__MASK				0x00003fff
+#define CP_LOAD_STATE6_0_DST_OFF__SHIFT				0
+static inline uint32_t CP_LOAD_STATE6_0_DST_OFF(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE6_0_DST_OFF__SHIFT) & CP_LOAD_STATE6_0_DST_OFF__MASK;
+}
+#define CP_LOAD_STATE6_0_STATE_TYPE__MASK			0x00004000
+#define CP_LOAD_STATE6_0_STATE_TYPE__SHIFT			14
+static inline uint32_t CP_LOAD_STATE6_0_STATE_TYPE(enum a6xx_state_type val)
+{
+	return ((val) << CP_LOAD_STATE6_0_STATE_TYPE__SHIFT) & CP_LOAD_STATE6_0_STATE_TYPE__MASK;
+}
+#define CP_LOAD_STATE6_0_STATE_SRC__MASK			0x00030000
+#define CP_LOAD_STATE6_0_STATE_SRC__SHIFT			16
+static inline uint32_t CP_LOAD_STATE6_0_STATE_SRC(enum a6xx_state_src val)
+{
+	return ((val) << CP_LOAD_STATE6_0_STATE_SRC__SHIFT) & CP_LOAD_STATE6_0_STATE_SRC__MASK;
+}
+#define CP_LOAD_STATE6_0_STATE_BLOCK__MASK			0x003c0000
+#define CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT			18
+static inline uint32_t CP_LOAD_STATE6_0_STATE_BLOCK(enum a6xx_state_block val)
+{
+	return ((val) << CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE6_0_STATE_BLOCK__MASK;
+}
+#define CP_LOAD_STATE6_0_NUM_UNIT__MASK				0xffc00000
+#define CP_LOAD_STATE6_0_NUM_UNIT__SHIFT			22
+static inline uint32_t CP_LOAD_STATE6_0_NUM_UNIT(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE6_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE6_0_NUM_UNIT__MASK;
+}
+
+#define REG_CP_LOAD_STATE6_1					0x00000001
+#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK			0xfffffffc
+#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT			2
+static inline uint32_t CP_LOAD_STATE6_1_EXT_SRC_ADDR(uint32_t val)
+{
+	assert(!(val & 0x3));
+	return ((val >> 2) << CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK;
+}
+
+#define REG_CP_LOAD_STATE6_2					0x00000002
+#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK			0xffffffff
+#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT			0
+static inline uint32_t CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK;
+}
+
+#define REG_CP_DRAW_INDX_0					0x00000000
+#define CP_DRAW_INDX_0_VIZ_QUERY__MASK				0xffffffff
+#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT				0
+static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK;
+}
+
+#define REG_CP_DRAW_INDX_1					0x00000001
+#define CP_DRAW_INDX_1_PRIM_TYPE__MASK				0x0000003f
+#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT				0
+static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK			0x000000c0
+#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT			6
+static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_1_VIS_CULL__MASK				0x00000600
+#define CP_DRAW_INDX_1_VIS_CULL__SHIFT				9
+static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_1_INDEX_SIZE__MASK				0x00000800
+#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT			11
+static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val)
+{
+	return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_1_NOT_EOP					0x00001000
+#define CP_DRAW_INDX_1_SMALL_INDEX				0x00002000
+#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE		0x00004000
+#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK			0xff000000
+#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT			24
+static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_2					0x00000002
+#define CP_DRAW_INDX_2_NUM_INDICES__MASK			0xffffffff
+#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_3					0x00000003
+#define CP_DRAW_INDX_3_INDX_BASE__MASK				0xffffffff
+#define CP_DRAW_INDX_3_INDX_BASE__SHIFT				0
+static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK;
+}
+
+#define REG_CP_DRAW_INDX_4					0x00000004
+#define CP_DRAW_INDX_4_INDX_SIZE__MASK				0xffffffff
+#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT				0
+static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK;
+}
+
+#define REG_CP_DRAW_INDX_2_0					0x00000000
+#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK			0xffffffff
+#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK;
+}
+
+#define REG_CP_DRAW_INDX_2_1					0x00000001
+#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK			0x0000003f
+#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK			0x000000c0
+#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT			6
+static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_2_1_VIS_CULL__MASK				0x00000600
+#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT			9
+static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK			0x00000800
+#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT			11
+static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val)
+{
+	return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_2_1_NOT_EOP				0x00001000
+#define CP_DRAW_INDX_2_1_SMALL_INDEX				0x00002000
+#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE		0x00004000
+#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK			0xff000000
+#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT			24
+static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_2_2					0x00000002
+#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK			0xffffffff
+#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_0				0x00000000
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK			0x0000003f
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK		0x000000c0
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT		6
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK			0x00000300
+#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT			8
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK			0x00000c00
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT			10
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK			0x01f00000
+#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT			20
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_1				0x00000001
+#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK		0xffffffff
+#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT		0
+static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_2				0x00000002
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK			0xffffffff
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT		0
+static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_3				0x00000003
+
+#define REG_CP_DRAW_INDX_OFFSET_4				0x00000004
+#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK			0xffffffff
+#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK;
+}
+
+#define REG_CP_DRAW_INDX_OFFSET_5				0x00000005
+#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK			0xffffffff
+#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT			0
+static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val)
+{
+	return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK;
+}
+
+#define REG_A4XX_CP_DRAW_INDIRECT_0				0x00000000
+#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK			0x0000003f
+#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT		0
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK;
+}
+#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK		0x000000c0
+#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT		6
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK;
+}
+#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK			0x00000300
+#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT			8
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK;
+}
+#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK		0x00000c00
+#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT		10
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK;
+}
+#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK			0x01f00000
+#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT		20
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK;
+}
+
+#define REG_A4XX_CP_DRAW_INDIRECT_1				0x00000001
+#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK			0xffffffff
+#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT			0
+static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK;
+}
+
+
+#define REG_A5XX_CP_DRAW_INDIRECT_2				0x00000002
+#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT		0
+static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK;
+}
+
+#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0			0x00000000
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK		0x0000003f
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT		0
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK;
+}
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK	0x000000c0
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT	6
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK;
+}
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK		0x00000300
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT		8
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK;
+}
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK		0x00000c00
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT		10
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK;
+}
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK		0x01f00000
+#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT		20
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK;
+}
+
+
+#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1			0x00000001
+#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK		0xffffffff
+#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT		0
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK;
+}
+
+#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2			0x00000002
+#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK		0xffffffff
+#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT		0
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK;
+}
+
+#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3			0x00000003
+#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK		0xffffffff
+#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT		0
+static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val)
+{
+	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK;
+}
+
+
+#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1			0x00000001
+#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT	0
+static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK;
+}
+
+#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2			0x00000002
+#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT	0
+static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK;
+}
+
+#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3			0x00000003
+#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT		0
+static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK;
+}
+
+#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4			0x00000004
+#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT		0
+static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK;
+}
+
+#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5			0x00000005
+#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK		0xffffffff
+#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT		0
+static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val)
+{
+	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK;
+}
+
+static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; }
+
+static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; }
+#define CP_SET_DRAW_STATE__0_COUNT__MASK			0x0000ffff
+#define CP_SET_DRAW_STATE__0_COUNT__SHIFT			0
+static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val)
+{
+	return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK;
+}
+#define CP_SET_DRAW_STATE__0_DIRTY				0x00010000
+#define CP_SET_DRAW_STATE__0_DISABLE				0x00020000
+#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS			0x00040000
+#define CP_SET_DRAW_STATE__0_LOAD_IMMED				0x00080000
+#define CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK			0x00f00000
+#define CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT			20
+static inline uint32_t CP_SET_DRAW_STATE__0_ENABLE_MASK(uint32_t val)
+{
+	return ((val) << CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT) & CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK;
+}
+#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK			0x1f000000
+#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT			24
+static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val)
+{
+	return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK;
+}
+
+static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; }
+#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK			0xffffffff
+#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT			0
+static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val)
+{
+	return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK;
+}
+
+static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; }
+#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK			0xffffffff
+#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT			0
+static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK;
+}
+
+#define REG_CP_SET_BIN_0					0x00000000
+
+#define REG_CP_SET_BIN_1					0x00000001
+#define CP_SET_BIN_1_X1__MASK					0x0000ffff
+#define CP_SET_BIN_1_X1__SHIFT					0
+static inline uint32_t CP_SET_BIN_1_X1(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK;
+}
+#define CP_SET_BIN_1_Y1__MASK					0xffff0000
+#define CP_SET_BIN_1_Y1__SHIFT					16
+static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK;
+}
+
+#define REG_CP_SET_BIN_2					0x00000002
+#define CP_SET_BIN_2_X2__MASK					0x0000ffff
+#define CP_SET_BIN_2_X2__SHIFT					0
+static inline uint32_t CP_SET_BIN_2_X2(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK;
+}
+#define CP_SET_BIN_2_Y2__MASK					0xffff0000
+#define CP_SET_BIN_2_Y2__SHIFT					16
+static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA_0					0x00000000
+#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK			0xffffffff
+#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT			0
+static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA_1					0x00000001
+#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK		0xffffffff
+#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_0					0x00000000
+#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK			0x003f0000
+#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT			16
+static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK;
+}
+#define CP_SET_BIN_DATA5_0_VSC_N__MASK				0x07c00000
+#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT				22
+static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_1					0x00000001
+#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_2					0x00000002
+#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_3					0x00000003
+#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_4					0x00000004
+#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_5					0x00000005
+#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK;
+}
+
+#define REG_CP_SET_BIN_DATA5_6					0x00000006
+#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK		0xffffffff
+#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT		0
+static inline uint32_t CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK;
+}
+
+#define REG_CP_REG_TO_MEM_0					0x00000000
+#define CP_REG_TO_MEM_0_REG__MASK				0x0000ffff
+#define CP_REG_TO_MEM_0_REG__SHIFT				0
+static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
+}
+#define CP_REG_TO_MEM_0_CNT__MASK				0x3ff80000
+#define CP_REG_TO_MEM_0_CNT__SHIFT				19
+static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
+}
+#define CP_REG_TO_MEM_0_64B					0x40000000
+#define CP_REG_TO_MEM_0_ACCUMULATE				0x80000000
+
+#define REG_CP_REG_TO_MEM_1					0x00000001
+#define CP_REG_TO_MEM_1_DEST__MASK				0xffffffff
+#define CP_REG_TO_MEM_1_DEST__SHIFT				0
+static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
+}
+
+#define REG_CP_REG_TO_MEM_2					0x00000002
+#define CP_REG_TO_MEM_2_DEST_HI__MASK				0xffffffff
+#define CP_REG_TO_MEM_2_DEST_HI__SHIFT				0
+static inline uint32_t CP_REG_TO_MEM_2_DEST_HI(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_2_DEST_HI__SHIFT) & CP_REG_TO_MEM_2_DEST_HI__MASK;
+}
+
+#define REG_CP_MEM_TO_REG_0					0x00000000
+#define CP_MEM_TO_REG_0_REG__MASK				0x0000ffff
+#define CP_MEM_TO_REG_0_REG__SHIFT				0
+static inline uint32_t CP_MEM_TO_REG_0_REG(uint32_t val)
+{
+	return ((val) << CP_MEM_TO_REG_0_REG__SHIFT) & CP_MEM_TO_REG_0_REG__MASK;
+}
+#define CP_MEM_TO_REG_0_CNT__MASK				0x3ff80000
+#define CP_MEM_TO_REG_0_CNT__SHIFT				19
+static inline uint32_t CP_MEM_TO_REG_0_CNT(uint32_t val)
+{
+	return ((val) << CP_MEM_TO_REG_0_CNT__SHIFT) & CP_MEM_TO_REG_0_CNT__MASK;
+}
+#define CP_MEM_TO_REG_0_64B					0x40000000
+#define CP_MEM_TO_REG_0_ACCUMULATE				0x80000000
+
+#define REG_CP_MEM_TO_REG_1					0x00000001
+#define CP_MEM_TO_REG_1_SRC__MASK				0xffffffff
+#define CP_MEM_TO_REG_1_SRC__SHIFT				0
+static inline uint32_t CP_MEM_TO_REG_1_SRC(uint32_t val)
+{
+	return ((val) << CP_MEM_TO_REG_1_SRC__SHIFT) & CP_MEM_TO_REG_1_SRC__MASK;
+}
+
+#define REG_CP_MEM_TO_REG_2					0x00000002
+#define CP_MEM_TO_REG_2_SRC_HI__MASK				0xffffffff
+#define CP_MEM_TO_REG_2_SRC_HI__SHIFT				0
+static inline uint32_t CP_MEM_TO_REG_2_SRC_HI(uint32_t val)
+{
+	return ((val) << CP_MEM_TO_REG_2_SRC_HI__SHIFT) & CP_MEM_TO_REG_2_SRC_HI__MASK;
+}
+
+#define REG_CP_MEM_TO_MEM_0					0x00000000
+#define CP_MEM_TO_MEM_0_NEG_A					0x00000001
+#define CP_MEM_TO_MEM_0_NEG_B					0x00000002
+#define CP_MEM_TO_MEM_0_NEG_C					0x00000004
+#define CP_MEM_TO_MEM_0_DOUBLE					0x20000000
+
+#define REG_CP_COND_WRITE_0					0x00000000
+#define CP_COND_WRITE_0_FUNCTION__MASK				0x00000007
+#define CP_COND_WRITE_0_FUNCTION__SHIFT				0
+static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val)
+{
+	return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK;
+}
+#define CP_COND_WRITE_0_POLL_MEMORY				0x00000010
+#define CP_COND_WRITE_0_WRITE_MEMORY				0x00000100
+
+#define REG_CP_COND_WRITE_1					0x00000001
+#define CP_COND_WRITE_1_POLL_ADDR__MASK				0xffffffff
+#define CP_COND_WRITE_1_POLL_ADDR__SHIFT			0
+static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK;
+}
+
+#define REG_CP_COND_WRITE_2					0x00000002
+#define CP_COND_WRITE_2_REF__MASK				0xffffffff
+#define CP_COND_WRITE_2_REF__SHIFT				0
+static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK;
+}
+
+#define REG_CP_COND_WRITE_3					0x00000003
+#define CP_COND_WRITE_3_MASK__MASK				0xffffffff
+#define CP_COND_WRITE_3_MASK__SHIFT				0
+static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK;
+}
+
+#define REG_CP_COND_WRITE_4					0x00000004
+#define CP_COND_WRITE_4_WRITE_ADDR__MASK			0xffffffff
+#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT			0
+static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK;
+}
+
+#define REG_CP_COND_WRITE_5					0x00000005
+#define CP_COND_WRITE_5_WRITE_DATA__MASK			0xffffffff
+#define CP_COND_WRITE_5_WRITE_DATA__SHIFT			0
+static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK;
+}
+
+#define REG_CP_COND_WRITE5_0					0x00000000
+#define CP_COND_WRITE5_0_FUNCTION__MASK				0x00000007
+#define CP_COND_WRITE5_0_FUNCTION__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val)
+{
+	return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK;
+}
+#define CP_COND_WRITE5_0_POLL_MEMORY				0x00000010
+#define CP_COND_WRITE5_0_WRITE_MEMORY				0x00000100
+
+#define REG_CP_COND_WRITE5_1					0x00000001
+#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK			0xffffffff
+#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK;
+}
+
+#define REG_CP_COND_WRITE5_2					0x00000002
+#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK			0xffffffff
+#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK;
+}
+
+#define REG_CP_COND_WRITE5_3					0x00000003
+#define CP_COND_WRITE5_3_REF__MASK				0xffffffff
+#define CP_COND_WRITE5_3_REF__SHIFT				0
+static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK;
+}
+
+#define REG_CP_COND_WRITE5_4					0x00000004
+#define CP_COND_WRITE5_4_MASK__MASK				0xffffffff
+#define CP_COND_WRITE5_4_MASK__SHIFT				0
+static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK;
+}
+
+#define REG_CP_COND_WRITE5_5					0x00000005
+#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK			0xffffffff
+#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK;
+}
+
+#define REG_CP_COND_WRITE5_6					0x00000006
+#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK			0xffffffff
+#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK;
+}
+
+#define REG_CP_COND_WRITE5_7					0x00000007
+#define CP_COND_WRITE5_7_WRITE_DATA__MASK			0xffffffff
+#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT			0
+static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val)
+{
+	return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK;
+}
+
+#define REG_CP_DISPATCH_COMPUTE_0				0x00000000
+
+#define REG_CP_DISPATCH_COMPUTE_1				0x00000001
+#define CP_DISPATCH_COMPUTE_1_X__MASK				0xffffffff
+#define CP_DISPATCH_COMPUTE_1_X__SHIFT				0
+static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val)
+{
+	return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK;
+}
+
+#define REG_CP_DISPATCH_COMPUTE_2				0x00000002
+#define CP_DISPATCH_COMPUTE_2_Y__MASK				0xffffffff
+#define CP_DISPATCH_COMPUTE_2_Y__SHIFT				0
+static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val)
+{
+	return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK;
+}
+
+#define REG_CP_DISPATCH_COMPUTE_3				0x00000003
+#define CP_DISPATCH_COMPUTE_3_Z__MASK				0xffffffff
+#define CP_DISPATCH_COMPUTE_3_Z__SHIFT				0
+static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val)
+{
+	return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_0				0x00000000
+#define CP_SET_RENDER_MODE_0_MODE__MASK				0x000001ff
+#define CP_SET_RENDER_MODE_0_MODE__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val)
+{
+	return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_1				0x00000001
+#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK			0xffffffff
+#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val)
+{
+	return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_2				0x00000002
+#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK			0xffffffff
+#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val)
+{
+	return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_3				0x00000003
+#define CP_SET_RENDER_MODE_3_VSC_ENABLE				0x00000008
+#define CP_SET_RENDER_MODE_3_GMEM_ENABLE			0x00000010
+
+#define REG_CP_SET_RENDER_MODE_4				0x00000004
+
+#define REG_CP_SET_RENDER_MODE_5				0x00000005
+#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK			0xffffffff
+#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val)
+{
+	return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_6				0x00000006
+#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK			0xffffffff
+#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val)
+{
+	return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK;
+}
+
+#define REG_CP_SET_RENDER_MODE_7				0x00000007
+#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK			0xffffffff
+#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT			0
+static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val)
+{
+	return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_0				0x00000000
+#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK			0xffffffff
+#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT		0
+static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val)
+{
+	return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_1				0x00000001
+#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK			0xffffffff
+#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT		0
+static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val)
+{
+	return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_2				0x00000002
+
+#define REG_CP_COMPUTE_CHECKPOINT_3				0x00000003
+#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK		0xffffffff
+#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT		0
+static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val)
+{
+	return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_4				0x00000004
+
+#define REG_CP_COMPUTE_CHECKPOINT_5				0x00000005
+#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK			0xffffffff
+#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT		0
+static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val)
+{
+	return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_6				0x00000006
+#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK			0xffffffff
+#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT		0
+static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val)
+{
+	return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK;
+}
+
+#define REG_CP_COMPUTE_CHECKPOINT_7				0x00000007
+
+#define REG_CP_PERFCOUNTER_ACTION_0				0x00000000
+
+#define REG_CP_PERFCOUNTER_ACTION_1				0x00000001
+#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK			0xffffffff
+#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT		0
+static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val)
+{
+	return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK;
+}
+
+#define REG_CP_PERFCOUNTER_ACTION_2				0x00000002
+#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK			0xffffffff
+#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT		0
+static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val)
+{
+	return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK;
+}
+
+#define REG_CP_EVENT_WRITE_0					0x00000000
+#define CP_EVENT_WRITE_0_EVENT__MASK				0x000000ff
+#define CP_EVENT_WRITE_0_EVENT__SHIFT				0
+static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val)
+{
+	return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK;
+}
+#define CP_EVENT_WRITE_0_TIMESTAMP				0x40000000
+
+#define REG_CP_EVENT_WRITE_1					0x00000001
+#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK			0xffffffff
+#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT			0
+static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val)
+{
+	return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK;
+}
+
+#define REG_CP_EVENT_WRITE_2					0x00000002
+#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK			0xffffffff
+#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT			0
+static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val)
+{
+	return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK;
+}
+
+#define REG_CP_EVENT_WRITE_3					0x00000003
+
+#define REG_CP_BLIT_0						0x00000000
+#define CP_BLIT_0_OP__MASK					0x0000000f
+#define CP_BLIT_0_OP__SHIFT					0
+static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val)
+{
+	return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK;
+}
+
+#define REG_CP_BLIT_1						0x00000001
+#define CP_BLIT_1_SRC_X1__MASK					0x00003fff
+#define CP_BLIT_1_SRC_X1__SHIFT					0
+static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val)
+{
+	return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK;
+}
+#define CP_BLIT_1_SRC_Y1__MASK					0x3fff0000
+#define CP_BLIT_1_SRC_Y1__SHIFT					16
+static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val)
+{
+	return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK;
+}
+
+#define REG_CP_BLIT_2						0x00000002
+#define CP_BLIT_2_SRC_X2__MASK					0x00003fff
+#define CP_BLIT_2_SRC_X2__SHIFT					0
+static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val)
+{
+	return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK;
+}
+#define CP_BLIT_2_SRC_Y2__MASK					0x3fff0000
+#define CP_BLIT_2_SRC_Y2__SHIFT					16
+static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val)
+{
+	return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK;
+}
+
+#define REG_CP_BLIT_3						0x00000003
+#define CP_BLIT_3_DST_X1__MASK					0x00003fff
+#define CP_BLIT_3_DST_X1__SHIFT					0
+static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val)
+{
+	return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK;
+}
+#define CP_BLIT_3_DST_Y1__MASK					0x3fff0000
+#define CP_BLIT_3_DST_Y1__SHIFT					16
+static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val)
+{
+	return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK;
+}
+
+#define REG_CP_BLIT_4						0x00000004
+#define CP_BLIT_4_DST_X2__MASK					0x00003fff
+#define CP_BLIT_4_DST_X2__SHIFT					0
+static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val)
+{
+	return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK;
+}
+#define CP_BLIT_4_DST_Y2__MASK					0x3fff0000
+#define CP_BLIT_4_DST_Y2__SHIFT					16
+static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val)
+{
+	return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK;
+}
+
+#define REG_CP_EXEC_CS_0					0x00000000
+
+#define REG_CP_EXEC_CS_1					0x00000001
+#define CP_EXEC_CS_1_NGROUPS_X__MASK				0xffffffff
+#define CP_EXEC_CS_1_NGROUPS_X__SHIFT				0
+static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val)
+{
+	return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK;
+}
+
+#define REG_CP_EXEC_CS_2					0x00000002
+#define CP_EXEC_CS_2_NGROUPS_Y__MASK				0xffffffff
+#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT				0
+static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val)
+{
+	return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK;
+}
+
+#define REG_CP_EXEC_CS_3					0x00000003
+#define CP_EXEC_CS_3_NGROUPS_Z__MASK				0xffffffff
+#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT				0
+static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val)
+{
+	return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK;
+}
+
+#define REG_A4XX_CP_EXEC_CS_INDIRECT_0				0x00000000
+
+
+#define REG_A4XX_CP_EXEC_CS_INDIRECT_1				0x00000001
+#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK			0xffffffff
+#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT			0
+static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val)
+{
+	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK;
+}
+
+#define REG_A4XX_CP_EXEC_CS_INDIRECT_2				0x00000002
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK		0x00000ffc
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT		2
+static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val)
+{
+	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK;
+}
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK		0x003ff000
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT		12
+static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val)
+{
+	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK;
+}
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK		0xffc00000
+#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT		22
+static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val)
+{
+	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK;
+}
+
+
+#define REG_A5XX_CP_EXEC_CS_INDIRECT_1				0x00000001
+#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK		0xffffffff
+#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT		0
+static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val)
+{
+	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK;
+}
+
+#define REG_A5XX_CP_EXEC_CS_INDIRECT_2				0x00000002
+#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK		0xffffffff
+#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT		0
+static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val)
+{
+	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK;
+}
+
+#define REG_A5XX_CP_EXEC_CS_INDIRECT_3				0x00000003
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK		0x00000ffc
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT		2
+static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val)
+{
+	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK;
+}
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK		0x003ff000
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT		12
+static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val)
+{
+	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK;
+}
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK		0xffc00000
+#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT		22
+static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val)
+{
+	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK;
+}
+
+#define REG_A2XX_CP_SET_MARKER_0				0x00000000
+#define A2XX_CP_SET_MARKER_0_MARKER__MASK			0x0000000f
+#define A2XX_CP_SET_MARKER_0_MARKER__SHIFT			0
+static inline uint32_t A2XX_CP_SET_MARKER_0_MARKER(uint32_t val)
+{
+	return ((val) << A2XX_CP_SET_MARKER_0_MARKER__SHIFT) & A2XX_CP_SET_MARKER_0_MARKER__MASK;
+}
+#define A2XX_CP_SET_MARKER_0_MODE__MASK				0x0000000f
+#define A2XX_CP_SET_MARKER_0_MODE__SHIFT			0
+static inline uint32_t A2XX_CP_SET_MARKER_0_MODE(enum a6xx_render_mode val)
+{
+	return ((val) << A2XX_CP_SET_MARKER_0_MODE__SHIFT) & A2XX_CP_SET_MARKER_0_MODE__MASK;
+}
+#define A2XX_CP_SET_MARKER_0_IFPC				0x00000100
+
+static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG_(uint32_t i0) { return 0x00000000 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__0(uint32_t i0) { return 0x00000000 + 0x3*i0; }
+#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK		0x00000007
+#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT		0
+static inline uint32_t A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG(enum pseudo_reg val)
+{
+	return ((val) << A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT) & A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK;
+}
+
+static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__1(uint32_t i0) { return 0x00000001 + 0x3*i0; }
+#define A2XX_CP_SET_PSEUDO_REG__1_LO__MASK			0xffffffff
+#define A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT			0
+static inline uint32_t A2XX_CP_SET_PSEUDO_REG__1_LO(uint32_t val)
+{
+	return ((val) << A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT) & A2XX_CP_SET_PSEUDO_REG__1_LO__MASK;
+}
+
+static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__2(uint32_t i0) { return 0x00000002 + 0x3*i0; }
+#define A2XX_CP_SET_PSEUDO_REG__2_HI__MASK			0xffffffff
+#define A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT			0
+static inline uint32_t A2XX_CP_SET_PSEUDO_REG__2_HI(uint32_t val)
+{
+	return ((val) << A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT) & A2XX_CP_SET_PSEUDO_REG__2_HI__MASK;
+}
+
+#define REG_A2XX_CP_REG_TEST_0					0x00000000
+#define A2XX_CP_REG_TEST_0_REG__MASK				0x00000fff
+#define A2XX_CP_REG_TEST_0_REG__SHIFT				0
+static inline uint32_t A2XX_CP_REG_TEST_0_REG(uint32_t val)
+{
+	return ((val) << A2XX_CP_REG_TEST_0_REG__SHIFT) & A2XX_CP_REG_TEST_0_REG__MASK;
+}
+#define A2XX_CP_REG_TEST_0_BIT__MASK				0x01f00000
+#define A2XX_CP_REG_TEST_0_BIT__SHIFT				20
+static inline uint32_t A2XX_CP_REG_TEST_0_BIT(uint32_t val)
+{
+	return ((val) << A2XX_CP_REG_TEST_0_BIT__SHIFT) & A2XX_CP_REG_TEST_0_BIT__MASK;
+}
+#define A2XX_CP_REG_TEST_0_UNK25				0x02000000
+
+
+#endif /* ADRENO_PM4_XML */
diff -Nru mesa-18.3.3/src/gallium/Android.mk mesa-19.0.1/src/gallium/Android.mk
--- mesa-18.3.3/src/gallium/Android.mk	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -38,7 +38,7 @@
 SUBDIRS += winsys/freedreno/drm drivers/freedreno
 SUBDIRS += winsys/i915/drm drivers/i915
 SUBDIRS += winsys/nouveau/drm drivers/nouveau
-SUBDIRS += winsys/pl111/drm drivers/pl111
+SUBDIRS += winsys/kmsro/drm drivers/kmsro
 SUBDIRS += winsys/radeon/drm drivers/r300
 SUBDIRS += winsys/radeon/drm drivers/r600
 SUBDIRS += winsys/radeon/drm winsys/amdgpu/drm drivers/radeonsi
@@ -46,7 +46,6 @@
 SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
 SUBDIRS += winsys/svga/drm drivers/svga
 SUBDIRS += winsys/etnaviv/drm drivers/etnaviv drivers/renderonly
-SUBDIRS += winsys/imx/drm
 SUBDIRS += state_trackers/dri
 
 # sort to eliminate any duplicates
diff -Nru mesa-18.3.3/src/gallium/auxiliary/cso_cache/cso_context.c mesa-19.0.1/src/gallium/auxiliary/cso_cache/cso_context.c
--- mesa-18.3.3/src/gallium/auxiliary/cso_cache/cso_context.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/cso_cache/cso_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -329,7 +329,8 @@
       int supported_irs =
          pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
                                         PIPE_SHADER_CAP_SUPPORTED_IRS);
-      if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
+      if (supported_irs & ((1 << PIPE_SHADER_IR_TGSI) |
+                           (1 << PIPE_SHADER_IR_NIR))) {
          ctx->has_compute_shader = TRUE;
       }
    }
diff -Nru mesa-18.3.3/src/gallium/auxiliary/draw/draw_pipe_stipple.c mesa-19.0.1/src/gallium/auxiliary/draw/draw_pipe_stipple.c
--- mesa-18.3.3/src/gallium/auxiliary/draw/draw_pipe_stipple.c	2018-03-13 20:41:43.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/draw/draw_pipe_stipple.c	2019-03-31 23:16:37.000000000 +0000
@@ -48,8 +48,8 @@
 struct stipple_stage {
    struct draw_stage stage;
    float counter;
-   uint pattern;
-   uint factor;
+   ushort pattern;
+   ushort factor;
    bool smooth;
 };
 
@@ -110,7 +110,7 @@
 
 
 static inline bool
-stipple_test(int counter, ushort pattern, int factor)
+stipple_test(int counter, ushort pattern, ushort factor)
 {
    int b = (counter / factor) & 0xf;
    return !!((1 << b) & pattern);
@@ -136,6 +136,10 @@
 
    float length;
    int i;
+   int intlength;
+
+   if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+      stipple->counter = 0;
 
    if (stipple->smooth) {
       float dx = x1 - x0;
@@ -147,21 +151,21 @@
       length = MAX2(dx, dy);
    }
 
-   if (header->flags & DRAW_PIPE_RESET_STIPPLE)
-      stipple->counter = 0;
+   if (util_is_inf_or_nan(length))
+      intlength = 0;
+   else
+      intlength = ceilf(length);
 
    /* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
     */
-   for (i = 0; i < length; i++) {
+   for (i = 0; i < intlength; i++) {
       bool result = stipple_test((int)stipple->counter + i,
-                                 (ushort)stipple->pattern, stipple->factor);
+                                 stipple->pattern, stipple->factor);
       if (result != state) {
          /* changing from "off" to "on" or vice versa */
          if (state) {
-            if (start != i) {
-               /* finishing an "on" segment */
-               emit_segment(stage, header, start / length, i / length);
-            }
+            /* finishing an "on" segment */
+            emit_segment(stage, header, start / length, i / length);
          }
          else {
             /* starting an "on" segment */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_context.c mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_context.c
--- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -596,7 +596,6 @@
    cnd_destroy(&dctx->cond);
 
    assert(list_empty(&dctx->records));
-   assert(!dctx->record_pending);
 
    if (pipe->set_log_context) {
       pipe->set_log_context(pipe, NULL);
diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_draw.c mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_draw.c
--- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_draw.c	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -988,10 +988,8 @@
       encountered_hang = true;
    }
 
-   if (num_later || dctx->record_pending) {
-      fprintf(stderr, "... and %u%s additional draws.\n", num_later,
-              dctx->record_pending ? "+1 (pending)" : "");
-   }
+   if (num_later)
+      fprintf(stderr, "... and %u additional draws.\n", num_later);
 
    fprintf(stderr, "\nDone.\n");
    dd_kill_process();
@@ -1008,9 +1006,6 @@
 
    for (;;) {
       struct list_head records;
-      struct pipe_fence_handle *fence;
-      struct pipe_fence_handle *fence2 = NULL;
-
       list_replace(&dctx->records, &records);
       list_inithead(&dctx->records);
       dctx->num_records = 0;
@@ -1018,36 +1013,36 @@
       if (dctx->api_stalled)
          cnd_signal(&dctx->cond);
 
-      if (!list_empty(&records)) {
-         /* Wait for the youngest draw. This means hangs can take a bit longer
-          * to detect, but it's more efficient this way. */
-         struct dd_draw_record *youngest =
-            LIST_ENTRY(struct dd_draw_record, records.prev, list);
-         fence = youngest->bottom_of_pipe;
-      } else if (dctx->record_pending) {
-         /* Wait for pending fences, in case the driver ends up hanging internally. */
-         fence = dctx->record_pending->prev_bottom_of_pipe;
-         fence2 = dctx->record_pending->top_of_pipe;
-      } else if (dctx->kill_thread) {
-         break;
-      } else {
+      if (list_empty(&records)) {
+         if (dctx->kill_thread)
+            break;
+
          cnd_wait(&dctx->cond, &dctx->mutex);
          continue;
       }
+
       mtx_unlock(&dctx->mutex);
 
-      /* Fences can be NULL legitimately when timeout detection is disabled. */
-      if ((fence &&
-           !screen->fence_finish(screen, NULL, fence,
-                                 (uint64_t)dscreen->timeout_ms * 1000*1000)) ||
-          (fence2 &&
-           !screen->fence_finish(screen, NULL, fence2,
-                                 (uint64_t)dscreen->timeout_ms * 1000*1000))) {
-         mtx_lock(&dctx->mutex);
-         list_splice(&records, &dctx->records);
-         dd_report_hang(dctx);
-         /* we won't actually get here */
-         mtx_unlock(&dctx->mutex);
+      /* Wait for the youngest draw. This means hangs can take a bit longer
+       * to detect, but it's more efficient this way.  */
+      struct dd_draw_record *youngest =
+         list_last_entry(&records, struct dd_draw_record, list);
+
+      if (dscreen->timeout_ms > 0) {
+         uint64_t abs_timeout = os_time_get_absolute_timeout(
+                                 (uint64_t)dscreen->timeout_ms * 1000*1000);
+
+         if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) ||
+             !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe,
+                                   (uint64_t)dscreen->timeout_ms * 1000*1000)) {
+            mtx_lock(&dctx->mutex);
+            list_splice(&records, &dctx->records);
+            dd_report_hang(dctx);
+            /* we won't actually get here */
+            mtx_unlock(&dctx->mutex);
+         }
+      } else {
+         util_queue_fence_wait(&youngest->driver_finished);
       }
 
       list_for_each_entry_safe(struct dd_draw_record, record, &records, list) {
@@ -1079,6 +1074,7 @@
    record->bottom_of_pipe = NULL;
    record->log_page = NULL;
    util_queue_fence_init(&record->driver_finished);
+   util_queue_fence_reset(&record->driver_finished);
 
    dd_init_copy_of_draw_state(&record->draw_state);
    dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state);
@@ -1115,13 +1111,25 @@
          pipe->flush(pipe, &record->top_of_pipe,
                      PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE);
       }
+   } else if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) {
+      pipe->flush(pipe, NULL, 0);
+   }
 
-      mtx_lock(&dctx->mutex);
-      dctx->record_pending = record;
-      if (list_empty(&dctx->records))
-         cnd_signal(&dctx->cond);
-      mtx_unlock(&dctx->mutex);
+   mtx_lock(&dctx->mutex);
+   if (unlikely(dctx->num_records > 10000)) {
+      dctx->api_stalled = true;
+      /* Since this is only a heuristic to prevent the API thread from getting
+       * too far ahead, we don't need a loop here. */
+      cnd_wait(&dctx->cond, &dctx->mutex);
+      dctx->api_stalled = false;
    }
+
+   if (list_empty(&dctx->records))
+      cnd_signal(&dctx->cond);
+
+   list_addtail(&record->list, &dctx->records);
+   dctx->num_records++;
+   mtx_unlock(&dctx->mutex);
 }
 
 static void
@@ -1134,8 +1142,7 @@
    record->log_page = u_log_new_page(&dctx->log);
    record->time_after = os_time_get_nano();
 
-   if (!util_queue_fence_is_signalled(&record->driver_finished))
-      util_queue_fence_signal(&record->driver_finished);
+   util_queue_fence_signal(&record->driver_finished);
 
    if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL &&
        dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) {
@@ -1158,34 +1165,14 @@
       else
          flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE;
       pipe->flush(pipe, &record->bottom_of_pipe, flush_flags);
-
-      assert(record == dctx->record_pending);
    }
 
    if (pipe->callback) {
-      util_queue_fence_reset(&record->driver_finished);
       pipe->callback(pipe, dd_after_draw_async, record, true);
    } else {
       dd_after_draw_async(record);
    }
 
-   mtx_lock(&dctx->mutex);
-   if (unlikely(dctx->num_records > 10000)) {
-      dctx->api_stalled = true;
-      /* Since this is only a heuristic to prevent the API thread from getting
-       * too far ahead, we don't need a loop here. */
-      cnd_wait(&dctx->cond, &dctx->mutex);
-      dctx->api_stalled = false;
-   }
-
-   if (list_empty(&dctx->records))
-      cnd_signal(&dctx->cond);
-
-   list_addtail(&record->list, &dctx->records);
-   dctx->record_pending = NULL;
-   dctx->num_records++;
-   mtx_unlock(&dctx->mutex);
-
    ++dctx->num_draw_calls;
    if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
       fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_pipe.h mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_pipe.h
--- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_pipe.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_pipe.h	2019-03-31 23:16:37.000000000 +0000
@@ -274,6 +274,7 @@
    int64_t time_after;
    unsigned draw_call;
 
+   /* The fence pointers are guaranteed to be valid once driver_finished is signalled */
    struct pipe_fence_handle *prev_bottom_of_pipe;
    struct pipe_fence_handle *top_of_pipe;
    struct pipe_fence_handle *bottom_of_pipe;
@@ -297,24 +298,18 @@
 
    /* Pipelined hang detection.
     *
-    * This is without unnecessary flushes and waits. There is a memory-based
-    * fence that is incremented by clear_buffer every draw call. Driver fences
-    * are not used.
+    * Before each draw call, a new dd_draw_record is created that contains
+    * a copy of all states. After each draw call, the driver's log is added
+    * to this record. Additionally, deferred fences are associated to each
+    * record both before and after the draw.
     *
-    * After each draw call, a new dd_draw_record is created that contains
-    * a copy of all states, the output of pipe_context::dump_debug_state,
-    * and it has a fence number assigned. That's done without knowing whether
-    * that draw call is problematic or not. The record is added into the list
-    * of all records.
-    *
-    * An independent, separate thread loops over the list of records and checks
-    * their fences. Records with signalled fences are freed. On fence timeout,
-    * the thread dumps the records of in-flight draws.
+    * The records are handed off to a separate thread which waits on the
+    * records' fences. Records with signalled fences are freed. When a timeout
+    * is detected, the thread dumps the records of in-flight draws.
     */
    thrd_t thread;
    mtx_t mutex;
    cnd_t cond;
-   struct dd_draw_record *record_pending; /* currently inside the driver */
    struct list_head records; /* oldest record first */
    unsigned num_records;
    bool kill_thread;
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_arit.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_arit.c	2019-03-31 23:16:37.000000000 +0000
@@ -1992,6 +1992,8 @@
    else if ((util_cpu_caps.has_altivec &&
             (type.width == 32 && type.length == 4)))
       return TRUE;
+   else if (util_cpu_caps.has_neon)
+      return TRUE;
 
    return FALSE;
 }
@@ -2099,7 +2101,7 @@
                     LLVMValueRef a,
                     enum lp_build_round_mode mode)
 {
-   if (util_cpu_caps.has_sse4_1) {
+   if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) {
       LLVMBuilderRef builder = bld->gallivm->builder;
       const struct lp_type type = bld->type;
       const char *intrinsic_root;
@@ -2477,7 +2479,7 @@
    else {
       LLVMValueRef half;
 
-      half = lp_build_const_vec(bld->gallivm, type, 0.5);
+      half = lp_build_const_vec(bld->gallivm, type, nextafterf(0.5, 0.0));
 
       if (type.sign) {
          LLVMTypeRef vec_type = bld->vec_type;
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c	2019-03-31 23:16:37.000000000 +0000
@@ -464,6 +464,7 @@
  * \param ptr  address of the pixel block (or the texel if uncompressed)
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  *              these will always be (0, 0).
+ * \param cache  optional value pointing to a lp_build_format_cache structure
  * \return  a 4 element vector with the pixel's RGBA values.
  */
 LLVMValueRef
@@ -728,7 +729,7 @@
     * s3tc rgb formats
     */
 
-   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) {
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
       struct lp_type tmp_type;
       LLVMValueRef tmp;
 
@@ -737,7 +738,7 @@
       tmp_type.length = num_pixels * 4;
       tmp_type.norm = TRUE;
 
-      tmp = lp_build_fetch_cached_texels(gallivm,
+      tmp = lp_build_fetch_s3tc_rgba_aos(gallivm,
                                          format_desc,
                                          num_pixels,
                                          base_ptr,
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,374 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2015 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "lp_bld_format.h"
-#include "lp_bld_type.h"
-#include "lp_bld_struct.h"
-#include "lp_bld_const.h"
-#include "lp_bld_flow.h"
-#include "lp_bld_swizzle.h"
-
-#include "util/u_math.h"
-
-
-/**
- * @file
- * Complex block-compression based formats are handled here by using a cache,
- * so re-decoding of every pixel is not required.
- * Especially for bilinear filtering, texel reuse is very high hence even
- * a small cache helps.
- * The elements in the cache are the decoded blocks - currently things
- * are restricted to formats which are 4x4 block based, and the decoded
- * texels must fit into 4x8 bits.
- * The cache is direct mapped so hitrates aren't all that great and cache
- * thrashing could happen.
- *
- * @author Roland Scheidegger <sroland@vmware.com>
- */
-
-
-#if LP_BUILD_FORMAT_CACHE_DEBUG
-static void
-update_cache_access(struct gallivm_state *gallivm,
-                    LLVMValueRef ptr,
-                    unsigned count,
-                    unsigned index)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef member_ptr, cache_access;
-
-   assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
-          index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
-
-   member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
-   cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
-   cache_access = LLVMBuildAdd(builder, cache_access,
-                               LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
-                                                                   count, 0), "");
-   LLVMBuildStore(builder, cache_access, member_ptr);
-}
-#endif
-
-
-static void
-store_cached_block(struct gallivm_state *gallivm,
-                   LLVMValueRef *col,
-                   LLVMValueRef tag_value,
-                   LLVMValueRef hash_index,
-                   LLVMValueRef cache)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef ptr, indices[3];
-   LLVMTypeRef type_ptr4x32;
-   unsigned count;
-
-   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
-   indices[0] = lp_build_const_int32(gallivm, 0);
-   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
-   indices[2] = hash_index;
-   ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
-   LLVMBuildStore(builder, tag_value, ptr);
-
-   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
-   hash_index = LLVMBuildMul(builder, hash_index,
-                             lp_build_const_int32(gallivm, 16), "");
-   for (count = 0; count < 4; count++) {
-      indices[2] = hash_index;
-      ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
-      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
-      LLVMBuildStore(builder, col[count], ptr);
-      hash_index = LLVMBuildAdd(builder, hash_index,
-                                lp_build_const_int32(gallivm, 4), "");
-   }
-}
-
-
-static LLVMValueRef
-lookup_cached_pixel(struct gallivm_state *gallivm,
-                    LLVMValueRef ptr,
-                    LLVMValueRef index)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef member_ptr, indices[3];
-
-   indices[0] = lp_build_const_int32(gallivm, 0);
-   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
-   indices[2] = index;
-   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
-   return LLVMBuildLoad(builder, member_ptr, "cache_data");
-}
-
-
-static LLVMValueRef
-lookup_tag_data(struct gallivm_state *gallivm,
-                LLVMValueRef ptr,
-                LLVMValueRef index)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef member_ptr, indices[3];
-
-   indices[0] = lp_build_const_int32(gallivm, 0);
-   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
-   indices[2] = index;
-   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
-   return LLVMBuildLoad(builder, member_ptr, "tag_data");
-}
-
-
-static void
-update_cached_block(struct gallivm_state *gallivm,
-                    const struct util_format_description *format_desc,
-                    LLVMValueRef ptr_addr,
-                    LLVMValueRef hash_index,
-                    LLVMValueRef cache)
-
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
-   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
-   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
-   LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
-   LLVMValueRef function;
-   LLVMValueRef tag_value, tmp_ptr;
-   LLVMValueRef col[4];
-   unsigned i, j;
-
-   /*
-    * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
-    * This doesn't actually make any sense whatsoever, someone would need
-    * to write a function doing this for all pixels in a block (either as
-    * an external c function or with generated code). Don't ask.
-    */
-
-   {
-      /*
-       * Function to call looks like:
-       *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
-       */
-      LLVMTypeRef ret_type;
-      LLVMTypeRef arg_types[4];
-      LLVMTypeRef function_type;
-
-      assert(format_desc->fetch_rgba_8unorm);
-
-      ret_type = LLVMVoidTypeInContext(gallivm->context);
-      arg_types[0] = pi8t;
-      arg_types[1] = pi8t;
-      arg_types[2] = i32t;
-      arg_types[3] = i32t;
-      function_type = LLVMFunctionType(ret_type, arg_types,
-                                       ARRAY_SIZE(arg_types), 0);
-
-      /* make const pointer for the C fetch_rgba_8unorm function */
-      function = lp_build_const_int_pointer(gallivm,
-         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
-
-      /* cast the callee pointer to the function's type */
-      function = LLVMBuildBitCast(builder, function,
-                                  LLVMPointerType(function_type, 0),
-                                  "cast callee");
-   }
-
-   tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
-                                   lp_build_const_int32(gallivm, 16),
-                                   "tmp_decode_store");
-   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
-
-   /*
-    * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
-    * This is going to be really really slow.
-    * Note: the block store format is actually
-    * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
-    */
-   for (i = 0; i < 4; ++i) {
-      for (j = 0; j < 4; ++j) {
-         LLVMValueRef args[4];
-         LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
-
-         /*
-          * Note we actually supply a pointer to the start of the block,
-          * not the start of the texture.
-          */
-         args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
-         args[1] = ptr_addr;
-         args[2] = LLVMConstInt(i32t, i, 0);
-         args[3] = LLVMConstInt(i32t, j, 0);
-         LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
-      }
-   }
-
-   /* Finally store the block - pointless mem copy + update tag. */
-   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
-   for (i = 0; i < 4; ++i) {
-      LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
-      LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
-      col[i] = LLVMBuildLoad(builder, ptr, "");
-   }
-
-   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
-                                 LLVMInt64TypeInContext(gallivm->context), "");
-   store_cached_block(gallivm, col, tag_value, hash_index, cache);
-}
-
-
-/*
- * Do a cached lookup.
- *
- * Returns (vectors of) 4x8 rgba aos value
- */
-LLVMValueRef
-lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
-                             const struct util_format_description *format_desc,
-                             unsigned n,
-                             LLVMValueRef base_ptr,
-                             LLVMValueRef offset,
-                             LLVMValueRef i,
-                             LLVMValueRef j,
-                             LLVMValueRef cache)
-
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   unsigned count, low_bit, log2size;
-   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
-   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
-   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
-   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
-   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
-   struct lp_type type;
-   struct lp_build_context bld32;
-   memset(&type, 0, sizeof type);
-   type.width = 32;
-   type.length = n;
-
-   assert(format_desc->block.width == 4);
-   assert(format_desc->block.height == 4);
-
-   lp_build_context_init(&bld32, gallivm, type);
-
-   /*
-    * compute hash - we use direct mapped cache, the hash function could
-    *                be better but it needs to be simple
-    * per-element:
-    *    compare offset with offset stored at tag (hash)
-    *    if not equal decode/store block, update tag
-    *    extract color from cache
-    *    assemble result vector
-    */
-
-   /* TODO: not ideal with 32bit pointers... */
-
-   low_bit = util_logbase2(format_desc->block.bits / 8);
-   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
-   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
-   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
-   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
-   /* For the hash function, first mask off the unused lowest bits. Then just
-      do some xor with address bits - only use lower 32bits */
-   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
-   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
-                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
-   /* This only really makes sense for size 64,128,256 */
-   hash_index = ptr_addrtrunc;
-   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
-                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
-   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
-   tmp = LLVMBuildLShr(builder, hash_index,
-                       lp_build_const_int_vec(gallivm, type, log2size), "");
-   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
-
-   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
-   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
-   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
-   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
-   block_index = LLVMBuildShl(builder, hash_index,
-                              lp_build_const_int_vec(gallivm, type, 4), "");
-   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
-
-   if (n > 1) {
-      color = LLVMGetUndef(LLVMVectorType(i32t, n));
-      for (count = 0; count < n; count++) {
-         LLVMValueRef index, cond, colorx;
-         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
-         struct lp_build_if_state if_ctx;
-
-         index = lp_build_const_int32(gallivm, count);
-         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
-         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
-         addrx = LLVMBuildAdd(builder, addrx, addr, "");
-         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
-         hash_indexx = LLVMBuildLShr(builder, block_indexx,
-                                     lp_build_const_int32(gallivm, 4), "");
-         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
-         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
-
-         lp_build_if(&if_ctx, gallivm, cond);
-         {
-            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
-                                          LLVMPointerType(i8t, 0), "");
-            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
-#if LP_BUILD_FORMAT_CACHE_DEBUG
-            update_cache_access(gallivm, cache, 1,
-                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
-#endif
-         }
-         lp_build_endif(&if_ctx);
-
-         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
-
-         color = LLVMBuildInsertElement(builder, color, colorx,
-                                        lp_build_const_int32(gallivm, count), "");
-      }
-   }
-   else {
-      LLVMValueRef cond;
-      struct lp_build_if_state if_ctx;
-
-      tmp = LLVMBuildZExt(builder, offset, i64t, "");
-      addr = LLVMBuildAdd(builder, tmp, addr, "");
-      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
-      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
-
-      lp_build_if(&if_ctx, gallivm, cond);
-      {
-         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
-         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
-#if LP_BUILD_FORMAT_CACHE_DEBUG
-         update_cache_access(gallivm, cache, 1,
-                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
-#endif
-      }
-      lp_build_endif(&if_ctx);
-
-      color = lookup_cached_pixel(gallivm, cache, block_index);
-   }
-#if LP_BUILD_FORMAT_CACHE_DEBUG
-   update_cache_access(gallivm, cache, n,
-                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
-#endif
-   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
-}
-
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format.h mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format.h
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format.h	2019-03-31 23:16:37.000000000 +0000
@@ -165,8 +165,12 @@
                                    LLVMValueRef j);
 
 
+/*
+ * S3TC
+ */
+
 LLVMValueRef
-lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm,
                              const struct util_format_description *format_desc,
                              unsigned n,
                              LLVMValueRef base_ptr,
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,2266 @@
+/**************************************************************************
+ *
+ * Copyright 2010-2018 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * s3tc pixel format manipulation.
+ *
+ * @author Roland Scheidegger <sroland@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_string.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_debug.h"
+
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_printf.h"
+#include "lp_bld_struct.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_init.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_intr.h"
+
+
+/**
+ * Reverse an interleave2_half
+ * (ie. pick every second element, independent lower/upper halfs)
+ * sse2 can only do that with 32bit (shufps) or larger elements
+ * natively. (Otherwise, and/pack (even) or shift/pack (odd)
+ * could be used, ideally llvm would do that for us.)
+ * XXX: Unfortunately, this does NOT translate to a shufps if those
+ * are int vectors (and casting will not help, llvm needs to recognize it
+ * as "real" float). Instead, llvm will use a pshufd/pshufd/punpcklqdq
+ * sequence which I'm pretty sure is a lot worse despite domain transition
+ * penalties with shufps (except maybe on Nehalem).
+ */
+static LLVMValueRef
+lp_build_uninterleave2_half(struct gallivm_state *gallivm,
+                            struct lp_type type,
+                            LLVMValueRef a,
+                            LLVMValueRef b,
+                            unsigned lo_hi)
+{
+   LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i, j;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+   assert(lo_hi < 2);
+
+   if (type.length * type.width == 256) {
+      assert(type.length >= 4);
+      for (i = 0, j = 0; i < type.length; ++i) {
+         if (i == type.length / 4) {
+            j = type.length;
+         } else if (i == type.length / 2) {
+            j = type.length / 2;
+         } else if (i == 3 * type.length / 4) {
+            j = 3 * type.length / 4;
+         } else {
+            j += 2;
+         }
+         elems[i] = lp_build_const_int32(gallivm, j + lo_hi);
+      }
+   } else {
+      for (i = 0; i < type.length; ++i) {
+         elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+      }
+   }
+
+   shuffle = LLVMConstVector(elems, type.length);
+
+   return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, "");
+
+}
+
+
+/**
+ * Build shuffle for extending vectors.
+ */
+static LLVMValueRef
+lp_build_const_extend_shuffle(struct gallivm_state *gallivm,
+                              unsigned n, unsigned length)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(n <= length);
+   assert(length <= LP_MAX_VECTOR_LENGTH);
+
+   /* TODO: cache results in a static table */
+
+   for(i = 0; i < n; i++) {
+      elems[i] = lp_build_const_int32(gallivm, i);
+   }
+   for (i = n; i < length; i++) {
+      elems[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+   }
+
+   return LLVMConstVector(elems, length);
+}
+
+static LLVMValueRef
+lp_build_const_unpackx2_shuffle(struct gallivm_state *gallivm, unsigned n)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i, j;
+
+   assert(n <= LP_MAX_VECTOR_LENGTH);
+
+   /* TODO: cache results in a static table */
+
+   for(i = 0, j = 0; i < n; i += 2, ++j) {
+      elems[i + 0] = lp_build_const_int32(gallivm, 0 + j);
+      elems[i + 1] = lp_build_const_int32(gallivm, n + j);
+      elems[n + i + 0] = lp_build_const_int32(gallivm, 0 + n/2 + j);
+      elems[n + i + 1] = lp_build_const_int32(gallivm, n + n/2 + j);
+   }
+
+   return LLVMConstVector(elems, n * 2);
+}
+
+/*
+ * broadcast 1 element to all elements
+ */
+static LLVMValueRef
+lp_build_const_shuffle1(struct gallivm_state *gallivm,
+                        unsigned index, unsigned n)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(n <= LP_MAX_VECTOR_LENGTH);
+
+   /* TODO: cache results in a static table */
+
+   for (i = 0; i < n; i++) {
+      elems[i] = lp_build_const_int32(gallivm, index);
+   }
+
+   return LLVMConstVector(elems, n);
+}
+
+/*
+ * move 1 element to pos 0, rest undef
+ */
+static LLVMValueRef
+lp_build_shuffle1undef(struct gallivm_state *gallivm,
+                       LLVMValueRef a, unsigned index, unsigned n)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH], shuf;
+   unsigned i;
+
+   assert(n <= LP_MAX_VECTOR_LENGTH);
+
+   elems[0] = lp_build_const_int32(gallivm, index);
+
+   for (i = 1; i < n; i++) {
+      elems[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+   }
+   shuf = LLVMConstVector(elems, n);
+
+   return LLVMBuildShuffleVector(gallivm->builder, a, a, shuf, "");
+}
+
+static boolean
+format_dxt1_variant(enum pipe_format format)
+{
+  return format == PIPE_FORMAT_DXT1_RGB ||
+         format == PIPE_FORMAT_DXT1_RGBA ||
+         format == PIPE_FORMAT_DXT1_SRGB ||
+         format == PIPE_FORMAT_DXT1_SRGBA;
+
+}
+
+/**
+ * Gather elements from scatter positions in memory into vectors.
+ * This is customised for fetching texels from s3tc textures.
+ * For SSE, typical value is length=4.
+ *
+ * @param length length of the offsets
+ * @param colors the stored colors of the blocks will be extracted into this.
+ * @param codewords the codewords of the blocks will be extracted into this.
+ * @param alpha_lo used for storing lower 32bit of alpha components for dxt3/5
+ * @param alpha_hi used for storing higher 32bit of alpha components for dxt3/5
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+static void
+lp_build_gather_s3tc(struct gallivm_state *gallivm,
+                     unsigned length,
+                     const struct util_format_description *format_desc,
+                     LLVMValueRef *colors,
+                     LLVMValueRef *codewords,
+                     LLVMValueRef *alpha_lo,
+                     LLVMValueRef *alpha_hi,
+                     LLVMValueRef base_ptr,
+                     LLVMValueRef offsets)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned block_bits = format_desc->block.bits;
+   unsigned i;
+   LLVMValueRef elems[8];
+   LLVMTypeRef type32 = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef type64 = LLVMInt64TypeInContext(gallivm->context);
+   LLVMTypeRef type32dxt;
+   struct lp_type lp_type32dxt;
+
+   memset(&lp_type32dxt, 0, sizeof lp_type32dxt);
+   lp_type32dxt.width = 32;
+   lp_type32dxt.length = block_bits / 32;
+   type32dxt = lp_build_vec_type(gallivm, lp_type32dxt);
+
+   assert(block_bits == 64 || block_bits == 128);
+   assert(length == 1 || length == 4 || length == 8);
+
+   for (i = 0; i < length; ++i) {
+      elems[i] = lp_build_gather_elem(gallivm, length,
+                                      block_bits, block_bits, TRUE,
+                                      base_ptr, offsets, i, FALSE);
+      elems[i] = LLVMBuildBitCast(builder, elems[i], type32dxt, "");
+   }
+   if (length == 1) {
+      LLVMValueRef elem = elems[0];
+      if (block_bits == 128) {
+         *alpha_lo = LLVMBuildExtractElement(builder, elem,
+                                             lp_build_const_int32(gallivm, 0), "");
+         *alpha_hi = LLVMBuildExtractElement(builder, elem,
+                                             lp_build_const_int32(gallivm, 1), "");
+         *colors = LLVMBuildExtractElement(builder, elem,
+                                           lp_build_const_int32(gallivm, 2), "");
+         *codewords = LLVMBuildExtractElement(builder, elem,
+                                              lp_build_const_int32(gallivm, 3), "");
+      }
+      else {
+         *alpha_lo = LLVMGetUndef(type32);
+         *alpha_hi = LLVMGetUndef(type32);
+         *colors = LLVMBuildExtractElement(builder, elem,
+                                           lp_build_const_int32(gallivm, 0), "");
+         *codewords = LLVMBuildExtractElement(builder, elem,
+                                              lp_build_const_int32(gallivm, 1), "");
+      }
+   }
+   else {
+      LLVMValueRef tmp[4], cc01, cc23;
+      struct lp_type lp_type32, lp_type64, lp_type32dxt;
+      memset(&lp_type32, 0, sizeof lp_type32);
+      lp_type32.width = 32;
+      lp_type32.length = length;
+      memset(&lp_type64, 0, sizeof lp_type64);
+      lp_type64.width = 64;
+      lp_type64.length = length/2;
+
+      if (block_bits == 128) {
+         if (length == 8) {
+            for (i = 0; i < 4; ++i) {
+               tmp[0] = elems[i];
+               tmp[1] = elems[i+4];
+               elems[i] = lp_build_concat(gallivm, tmp, lp_type32dxt, 2);
+            }
+         }
+         lp_build_transpose_aos(gallivm, lp_type32, elems, tmp);
+         *colors = tmp[2];
+         *codewords = tmp[3];
+         *alpha_lo = tmp[0];
+         *alpha_hi = tmp[1];
+      } else {
+         LLVMTypeRef type64_vec = LLVMVectorType(type64, length/2);
+         LLVMTypeRef type32_vec = LLVMVectorType(type32, length);
+
+         for (i = 0; i < length; ++i) {
+            /* no-op shuffle */
+            elems[i] = LLVMBuildShuffleVector(builder, elems[i],
+                                              LLVMGetUndef(type32dxt),
+                                              lp_build_const_extend_shuffle(gallivm, 2, 4), "");
+         }
+         if (length == 8) {
+            for (i = 0; i < 4; ++i) {
+               tmp[0] = elems[i];
+               tmp[1] = elems[i+4];
+               elems[i] = lp_build_concat(gallivm, tmp, lp_type32, 2);
+            }
+         }
+         cc01 = lp_build_interleave2_half(gallivm, lp_type32, elems[0], elems[1], 0);
+         cc23 = lp_build_interleave2_half(gallivm, lp_type32, elems[2], elems[3], 0);
+         cc01 = LLVMBuildBitCast(builder, cc01, type64_vec, "");
+         cc23 = LLVMBuildBitCast(builder, cc23, type64_vec, "");
+         *colors = lp_build_interleave2_half(gallivm, lp_type64, cc01, cc23, 0);
+         *codewords = lp_build_interleave2_half(gallivm, lp_type64, cc01, cc23, 1);
+         *colors = LLVMBuildBitCast(builder, *colors, type32_vec, "");
+         *codewords = LLVMBuildBitCast(builder, *codewords, type32_vec, "");
+      }
+   }
+}
+
+/** Convert from <n x i32> containing 2 x n rgb565 colors
+ * to 2 <n x i32> rgba8888 colors
+ * This is the most optimized version I can think of
+ * should be nearly as fast as decoding only one color
+ * NOTE: alpha channel will be set to 0
+ * @param colors  is a <n x i32> vector containing the rgb565 colors
+ */
+static void
+color_expand2_565_to_8888(struct gallivm_state *gallivm,
+                          unsigned n,
+                          LLVMValueRef colors,
+                          LLVMValueRef *color0,
+                          LLVMValueRef *color1)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef r, g, b, rblo, glo;
+   LLVMValueRef rgblomask, rb, rgb0, rgb1;
+   struct lp_type type, type16, type8;
+
+   assert(n > 1);
+
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 2 * n;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 4 * n;
+
+   rgblomask = lp_build_const_int_vec(gallivm, type16, 0x0707);
+   colors = LLVMBuildBitCast(builder, colors,
+                             lp_build_vec_type(gallivm, type16), "");
+   /* move r into low 8 bits, b into high 8 bits, g into another reg (low bits)
+    * make sure low bits of r are zero - could use AND but requires constant */
+   r = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type16, 11), "");
+   r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type16, 3), "");
+   b = LLVMBuildShl(builder, colors, lp_build_const_int_vec(gallivm, type16, 11), "");
+   rb = LLVMBuildOr(builder, r, b, "");
+   rblo = LLVMBuildLShr(builder, rb, lp_build_const_int_vec(gallivm, type16, 5), "");
+   /* don't have byte shift hence need mask */
+   rblo = LLVMBuildAnd(builder, rblo, rgblomask, "");
+   rb = LLVMBuildOr(builder, rb, rblo, "");
+
+   /* make sure low bits of g are zero */
+   g = LLVMBuildAnd(builder, colors, lp_build_const_int_vec(gallivm, type16, 0x07e0), "");
+   g = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type16, 3), "");
+   glo = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type16, 6), "");
+   g = LLVMBuildOr(builder, g, glo, "");
+
+   rb = LLVMBuildBitCast(builder, rb, lp_build_vec_type(gallivm, type8), "");
+   g = LLVMBuildBitCast(builder, g, lp_build_vec_type(gallivm, type8), "");
+   rgb0 = lp_build_interleave2_half(gallivm, type8, rb, g, 0);
+   rgb1 = lp_build_interleave2_half(gallivm, type8, rb, g, 1);
+
+   rgb0 = LLVMBuildBitCast(builder, rgb0, lp_build_vec_type(gallivm, type), "");
+   rgb1 = LLVMBuildBitCast(builder, rgb1, lp_build_vec_type(gallivm, type), "");
+
+   /* rgb0 is rgb00, rgb01, rgb10, rgb11
+    * instead of rgb00, rgb10, rgb20, rgb30 hence need reshuffle
+    * on x86 this _should_ just generate one shufps...
+    */
+   *color0 = lp_build_uninterleave2_half(gallivm, type, rgb0, rgb1, 0);
+   *color1 = lp_build_uninterleave2_half(gallivm, type, rgb0, rgb1, 1);
+}
+
+
+/** Convert from <n x i32> containing rgb565 colors
+ * (in first 16 bits) to <n x i32> rgba8888 colors
+ * bits 16-31 MBZ
+ * NOTE: alpha channel will be set to 0
+ * @param colors  is a <n x i32> vector containing the rgb565 colors
+ */
+static LLVMValueRef
+color_expand_565_to_8888(struct gallivm_state *gallivm,
+                         unsigned n,
+                         LLVMValueRef colors)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef rgba, r, g, b, rgblo, glo;
+   LLVMValueRef rbhimask, g6mask, rgblomask;
+   struct lp_type type;
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   /* color expansion:
+    * first extract and shift colors into their final locations
+    * (high bits - low bits zero at this point)
+    * then replicate highest bits to the lowest bits
+    * note rb replication can be done in parallel but not g
+    * (different shift)
+    * r5mask = 0xf800, g6mask = 0x07e0, b5mask = 0x001f
+    * rhigh = 8, ghigh = 5, bhigh = 19
+    * rblow = 5, glow = 6
+    * rgblowmask = 0x00070307
+    * r = colors >> rhigh
+    * b = colors << bhigh
+    * g = (colors & g6mask) << ghigh
+    * rb = (r | b) rbhimask
+    * rbtmp = rb >> rblow
+    * gtmp = rb >> glow
+    * rbtmp = rbtmp | gtmp
+    * rbtmp = rbtmp & rgblowmask
+    * rgb = rb | g | rbtmp
+    */
+   g6mask = lp_build_const_int_vec(gallivm, type, 0x07e0);
+   rbhimask = lp_build_const_int_vec(gallivm, type, 0x00f800f8);
+   rgblomask = lp_build_const_int_vec(gallivm, type, 0x00070307);
+
+   r = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type, 8), "");
+   b = LLVMBuildShl(builder, colors, lp_build_const_int_vec(gallivm, type, 19), "");
+   g = LLVMBuildAnd(builder, colors, g6mask, "");
+   g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 5), "");
+   rgba = LLVMBuildOr(builder, r, b, "");
+   rgba = LLVMBuildAnd(builder, rgba, rbhimask, "");
+   rgblo = LLVMBuildLShr(builder, rgba, lp_build_const_int_vec(gallivm, type, 5), "");
+   glo = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type, 6), "");
+   rgblo = LLVMBuildOr(builder, rgblo, glo, "");
+   rgblo = LLVMBuildAnd(builder, rgblo, rgblomask, "");
+   rgba = LLVMBuildOr(builder, rgba, g, "");
+   rgba = LLVMBuildOr(builder, rgba, rgblo, "");
+
+   return rgba;
+}
+
+
+/*
+ * Average two byte vectors. (Will always round up.)
+ */
+static LLVMValueRef
+lp_build_pavgb(struct lp_build_context *bld8,
+               LLVMValueRef v0,
+               LLVMValueRef v1)
+{
+   struct gallivm_state *gallivm = bld8->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   assert(bld8->type.width == 8);
+   assert(bld8->type.length == 16 || bld8->type.length == 32);
+   if (HAVE_LLVM < 0x0600) {
+      LLVMValueRef intrargs[2];
+      char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" :
+                                                  "llvm.x86.sse2.pavg.b";
+      intrargs[0] = v0;
+      intrargs[1] = v1;
+      return lp_build_intrinsic(builder, intr_name,
+                                bld8->vec_type, intrargs, 2, 0);
+   } else {
+      /*
+       * Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
+       * You better hope the backend code manages to detect the pattern, and
+       * the pattern doesn't change there...
+       */
+      struct lp_type type_ext = bld8->type;
+      LLVMTypeRef vec_type_ext;
+      LLVMValueRef res;
+      LLVMValueRef ext_one;
+      type_ext.width = 16;
+      vec_type_ext = lp_build_vec_type(gallivm, type_ext);
+      ext_one = lp_build_const_vec(gallivm, type_ext, 1);
+
+      v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
+      v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
+      res = LLVMBuildAdd(builder, v0, v1, "");
+      res = LLVMBuildAdd(builder, res, ext_one, "");
+      res = LLVMBuildLShr(builder, res, ext_one, "");
+      res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
+      return res;
+   }
+}
+
+/**
+ * Calculate 1/3(v1-v0) + v0
+ * and 2*1/3(v1-v0) + v0
+ */
+static void
+lp_build_lerp23(struct lp_build_context *bld,
+                LLVMValueRef v0,
+                LLVMValueRef v1,
+                LLVMValueRef *res0,
+                LLVMValueRef *res1)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   LLVMValueRef x, x_lo, x_hi, delta_lo, delta_hi;
+   LLVMValueRef mul_lo, mul_hi, v0_lo, v0_hi, v1_lo, v1_hi, tmp;
+   const struct lp_type type = bld->type;
+   LLVMBuilderRef builder = bld->gallivm->builder;
+   struct lp_type i16_type = lp_wider_type(type);
+   struct lp_build_context bld2;
+
+   assert(lp_check_value(type, v0));
+   assert(lp_check_value(type, v1));
+   assert(!type.floating && !type.fixed && !type.norm && type.width == 8);
+
+   lp_build_context_init(&bld2, gallivm, i16_type);
+   bld2.type.sign = TRUE;
+   x = lp_build_const_int_vec(gallivm, bld->type, 255*1/3);
+
+   /* FIXME: use native avx256 unpack/pack */
+   lp_build_unpack2(gallivm, type, i16_type, x, &x_lo, &x_hi);
+   lp_build_unpack2(gallivm, type, i16_type, v0, &v0_lo, &v0_hi);
+   lp_build_unpack2(gallivm, type, i16_type, v1, &v1_lo, &v1_hi);
+   delta_lo = lp_build_sub(&bld2, v1_lo, v0_lo);
+   delta_hi = lp_build_sub(&bld2, v1_hi, v0_hi);
+
+   mul_lo = LLVMBuildMul(builder, x_lo, delta_lo, "");
+   mul_hi = LLVMBuildMul(builder, x_hi, delta_hi, "");
+
+   x_lo = LLVMBuildLShr(builder, mul_lo, lp_build_const_int_vec(gallivm, i16_type, 8), "");
+   x_hi = LLVMBuildLShr(builder, mul_hi, lp_build_const_int_vec(gallivm, i16_type, 8), "");
+   /* lerp optimization: pack now, do add afterwards */
+   tmp = lp_build_pack2(gallivm, i16_type, type, x_lo, x_hi);
+   *res0 = lp_build_add(bld, tmp, v0);
+
+   x_lo = LLVMBuildLShr(builder, mul_lo, lp_build_const_int_vec(gallivm, i16_type, 7), "");
+   x_hi = LLVMBuildLShr(builder, mul_hi, lp_build_const_int_vec(gallivm, i16_type, 7), "");
+   /* unlike above still need mask (but add still afterwards). */
+   x_lo = LLVMBuildAnd(builder, x_lo, lp_build_const_int_vec(gallivm, i16_type, 0xff), "");
+   x_hi = LLVMBuildAnd(builder, x_hi, lp_build_const_int_vec(gallivm, i16_type, 0xff), "");
+   tmp = lp_build_pack2(gallivm, i16_type, type, x_lo, x_hi);
+   *res1 = lp_build_add(bld, tmp, v0);
+}
+
+/**
+ * Convert from <n x i64> s3tc dxt1 to <4n x i8> RGBA AoS
+ * @param colors  is a <n x i32> vector with n x 2x16bit colors
+ * @param codewords  is a <n x i32> vector containing the codewords
+ * @param i  is a <n x i32> vector with the x pixel coordinate (0 to 3)
+ * @param j  is a <n x i32> vector with the y pixel coordinate (0 to 3)
+ */
+static LLVMValueRef
+s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
+                           unsigned n,
+                           enum pipe_format format,
+                           LLVMValueRef colors,
+                           LLVMValueRef codewords,
+                           LLVMValueRef i,
+                           LLVMValueRef j)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef color0, color1, color2, color3, color2_2, color3_2;
+   LLVMValueRef rgba, a, colors0, colors1, col0, col1, const2;
+   LLVMValueRef bit_pos, sel_mask, sel_lo, sel_hi, indices;
+   struct lp_type type, type8;
+   struct lp_build_context bld8, bld32;
+   boolean is_dxt1_variant = format_dxt1_variant(format);
+
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 4*n;
+
+   assert(lp_check_value(type, i));
+   assert(lp_check_value(type, j));
+
+   a = lp_build_const_int_vec(gallivm, type, 0xff000000);
+
+   lp_build_context_init(&bld32, gallivm, type);
+   lp_build_context_init(&bld8, gallivm, type8);
+
+   /*
+    * works as follows:
+    * - expand color0/color1 to rgba8888
+    * - calculate color2/3 (interpolation) according to color0 < color1 rules
+    * - calculate color2/3 according to color0 >= color1 rules
+    * - do selection of color2/3 according to comparison of color0/1
+    * - extract indices (vector shift).
+    * - use compare/select to select the correct color. Since we have 2bit
+    *   indices (and 4 colors), needs at least three compare/selects.
+    */
+   /*
+    * expand the two colors
+    */
+   col0 = LLVMBuildAnd(builder, colors, lp_build_const_int_vec(gallivm, type, 0x0000ffff), "");
+   col1 = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type, 16), "");
+   if (n > 1) {
+      color_expand2_565_to_8888(gallivm, n, colors, &color0, &color1);
+   }
+   else {
+      color0 = color_expand_565_to_8888(gallivm, n, col0);
+      color1 = color_expand_565_to_8888(gallivm, n, col1);
+   }
+
+   /*
+    * interpolate colors
+    * color2_1 is 2/3 color0 + 1/3 color1
+    * color3_1 is 1/3 color0 + 2/3 color1
+    * color2_2 is 1/2 color0 + 1/2 color1
+    * color3_2 is 0
+    */
+
+   colors0 = LLVMBuildBitCast(builder, color0, bld8.vec_type, "");
+   colors1 = LLVMBuildBitCast(builder, color1, bld8.vec_type, "");
+   /* can combine 2 lerps into one mostly - still looks expensive enough. */
+   lp_build_lerp23(&bld8, colors0, colors1, &color2, &color3);
+   color2 = LLVMBuildBitCast(builder, color2, bld32.vec_type, "");
+   color3 = LLVMBuildBitCast(builder, color3, bld32.vec_type, "");
+
+   /* dxt3/5 always use 4-color encoding */
+   if (is_dxt1_variant) {
+      /* fix up alpha */
+      if (format == PIPE_FORMAT_DXT1_RGBA ||
+          format == PIPE_FORMAT_DXT1_SRGBA) {
+         color0 = LLVMBuildOr(builder, color0, a, "");
+         color1 = LLVMBuildOr(builder, color1, a, "");
+         color3 = LLVMBuildOr(builder, color3, a, "");
+      }
+      /*
+       * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1.
+       * Much cheaper (but we don't care that much if n == 1).
+       */
+      if ((util_cpu_caps.has_sse2 && n == 4) ||
+          (util_cpu_caps.has_avx2 && n == 8)) {
+         color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
+         color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
+      }
+      else {
+         struct lp_type i16_type = lp_wider_type(type8);
+         struct lp_build_context bld2;
+         LLVMValueRef v0_lo, v0_hi, v1_lo, v1_hi, addlo, addhi;
+
+         lp_build_context_init(&bld2, gallivm, i16_type);
+         bld2.type.sign = TRUE;
+
+         /*
+          * This isn't as expensive as it looks (the unpack is the same as
+          * for lerp23), with correct rounding.
+          * (Note that while rounding is correct, this will always round down,
+          * whereas pavgb will always round up.)
+          */
+         /* FIXME: use native avx256 unpack/pack */
+         lp_build_unpack2(gallivm, type8, i16_type, colors0, &v0_lo, &v0_hi);
+         lp_build_unpack2(gallivm, type8, i16_type, colors1, &v1_lo, &v1_hi);
+
+         addlo = lp_build_add(&bld2, v0_lo, v1_lo);
+         addhi = lp_build_add(&bld2, v0_hi, v1_hi);
+         addlo = LLVMBuildLShr(builder, addlo,
+                               lp_build_const_int_vec(gallivm, i16_type, 1), "");
+         addhi = LLVMBuildLShr(builder, addhi,
+                               lp_build_const_int_vec(gallivm, i16_type, 1), "");
+         color2_2 = lp_build_pack2(gallivm, i16_type, type8, addlo, addhi);
+         color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
+      }
+      color3_2 = lp_build_const_int_vec(gallivm, type, 0);
+
+      /* select between colors2/3 */
+      /* signed compare is faster saves some xors */
+      type.sign = TRUE;
+      sel_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, col0, col1);
+      color2 = lp_build_select(&bld32, sel_mask, color2, color2_2);
+      color3 = lp_build_select(&bld32, sel_mask, color3, color3_2);
+      type.sign = FALSE;
+
+      if (format == PIPE_FORMAT_DXT1_RGBA ||
+          format == PIPE_FORMAT_DXT1_SRGBA) {
+         color2 = LLVMBuildOr(builder, color2, a, "");
+      }
+   }
+
+   const2 = lp_build_const_int_vec(gallivm, type, 2);
+   /* extract 2-bit index values */
+   bit_pos = LLVMBuildShl(builder, j, const2, "");
+   bit_pos = LLVMBuildAdd(builder, bit_pos, i, "");
+   bit_pos = LLVMBuildAdd(builder, bit_pos, bit_pos, "");
+   /*
+    * NOTE: This innocent looking shift is very expensive with x86/ssex.
+    * Shifts with per-elemnent shift count get roughly translated to
+    * extract (count), extract (value), shift, move (back to xmm), unpack
+    * per element!
+    * So about 20 instructions here for 4xi32.
+    * Newer llvm versions (3.7+) will not do extract/insert but use a
+    * a couple constant count vector shifts plus shuffles. About same
+    * amount of instructions unfortunately...
+    * Would get much worse with 8xi16 even...
+    * We could actually do better here:
+    * - subtract bit_pos from 128+30, shl 23, convert float to int...
+    * - now do mul with codewords followed by shr 30...
+    * But requires 32bit->32bit mul, sse41 only (well that's emulatable
+    * with 2 32bit->64bit muls...) and not exactly cheap
+    * AVX2, of course, fixes this nonsense.
+    */
+   indices = LLVMBuildLShr(builder, codewords, bit_pos, "");
+
+   /* finally select the colors */
+   sel_lo = LLVMBuildAnd(builder, indices, bld32.one, "");
+   sel_lo = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, sel_lo, bld32.one);
+   color0 = lp_build_select(&bld32, sel_lo, color1, color0);
+   color2 = lp_build_select(&bld32, sel_lo, color3, color2);
+   sel_hi = LLVMBuildAnd(builder, indices, const2, "");
+   sel_hi = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, sel_hi, const2);
+   rgba = lp_build_select(&bld32, sel_hi, color2, color0);
+
+   /* fix up alpha */
+   if (format == PIPE_FORMAT_DXT1_RGB ||
+       format == PIPE_FORMAT_DXT1_SRGB) {
+      rgba = LLVMBuildOr(builder, rgba, a, "");
+   }
+   return LLVMBuildBitCast(builder, rgba, bld8.vec_type, "");
+}
+
+
+static LLVMValueRef
+s3tc_dxt1_to_rgba_aos(struct gallivm_state *gallivm,
+                      unsigned n,
+                      enum pipe_format format,
+                      LLVMValueRef colors,
+                      LLVMValueRef codewords,
+                      LLVMValueRef i,
+                      LLVMValueRef j)
+{
+   return s3tc_dxt1_full_to_rgba_aos(gallivm, n, format,
+                                     colors, codewords, i, j);
+}
+
+
+/**
+ * Convert from <n x i128> s3tc dxt3 to <4n x i8> RGBA AoS
+ * @param colors  is a <n x i32> vector with n x 2x16bit colors
+ * @param codewords  is a <n x i32> vector containing the codewords
+ * @param alphas  is a <n x i64> vector containing the alpha values
+ * @param i  is a <n x i32> vector with the x pixel coordinate (0 to 3)
+ * @param j  is a <n x i32> vector with the y pixel coordinate (0 to 3)
+ */
+static LLVMValueRef
+s3tc_dxt3_to_rgba_aos(struct gallivm_state *gallivm,
+                      unsigned n,
+                      enum pipe_format format,
+                      LLVMValueRef colors,
+                      LLVMValueRef codewords,
+                      LLVMValueRef alpha_low,
+                      LLVMValueRef alpha_hi,
+                      LLVMValueRef i,
+                      LLVMValueRef j)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef rgba, tmp, tmp2;
+   LLVMValueRef bit_pos, sel_mask;
+   struct lp_type type, type8;
+   struct lp_build_context bld;
+
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = n*4;
+
+   assert(lp_check_value(type, i));
+   assert(lp_check_value(type, j));
+
+   lp_build_context_init(&bld, gallivm, type);
+
+   rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format,
+                                colors, codewords, i, j);
+
+   rgba = LLVMBuildBitCast(builder, rgba, bld.vec_type, "");
+
+   /*
+    * Extract alpha values. Since we now need to select from
+    * which 32bit vector values are fetched, construct selection
+    * mask from highest bit of bit_pos, and use select, then shift
+    * according to the bit_pos (without the highest bit).
+    * Note this is pointless for n == 1 case. Could just
+    * directly use 64bit arithmetic if we'd extract 64bit
+    * alpha value instead of 2x32...
+    */
+   /* pos = 4*(4j+i) */
+   bit_pos = LLVMBuildShl(builder, j, lp_build_const_int_vec(gallivm, type, 2), "");
+   bit_pos = LLVMBuildAdd(builder, bit_pos, i, "");
+   bit_pos = LLVMBuildShl(builder, bit_pos,
+                          lp_build_const_int_vec(gallivm, type, 2), "");
+   sel_mask = LLVMBuildLShr(builder, bit_pos,
+                            lp_build_const_int_vec(gallivm, type, 5), "");
+   sel_mask = LLVMBuildSub(builder, sel_mask, bld.one, "");
+   tmp = lp_build_select(&bld, sel_mask, alpha_low, alpha_hi);
+   bit_pos = LLVMBuildAnd(builder, bit_pos,
+                          lp_build_const_int_vec(gallivm, type, 0xffffffdf), "");
+   /* Warning: slow shift with per element count */
+   /*
+    * Could do pshufb here as well - just use appropriate 2 bits in bit_pos
+    * to select the right byte with pshufb. Then for the remaining one bit
+    * just do shift/select.
+    */
+   tmp = LLVMBuildLShr(builder, tmp, bit_pos, "");
+
+   /* combined expand from a4 to a8 and shift into position */
+   tmp = LLVMBuildShl(builder, tmp, lp_build_const_int_vec(gallivm, type, 28), "");
+   tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 4), "");
+   tmp = LLVMBuildOr(builder, tmp, tmp2, "");
+
+   rgba = LLVMBuildOr(builder, tmp, rgba, "");
+
+   return LLVMBuildBitCast(builder, rgba, lp_build_vec_type(gallivm, type8), "");
+}
+
+static LLVMValueRef
+lp_build_lerpdxta(struct gallivm_state *gallivm,
+                  LLVMValueRef alpha0,
+                  LLVMValueRef alpha1,
+                  LLVMValueRef code,
+                  LLVMValueRef sel_mask,
+                  unsigned n)
+{
+   /*
+    * note we're doing lerp in 16bit since 32bit pmulld is only available in sse41
+    * (plus pmullw is actually faster...)
+    * we just pretend our 32bit values (which are really only 8bit) are 16bits.
+    * Note that this is obviously a disaster for the scalar case.
+    */
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef delta, ainterp;
+   LLVMValueRef weight5, weight7, weight;
+   struct lp_type type32, type16, type8;
+   struct lp_build_context bld16;
+
+   memset(&type32, 0, sizeof type32);
+   type32.width = 32;
+   type32.length = n;
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 2*n;
+   type16.sign = TRUE;
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 4*n;
+
+   lp_build_context_init(&bld16, gallivm, type16);
+   /* 255/7 is a bit off - increase accuracy at the expense of shift later */
+   sel_mask = LLVMBuildBitCast(builder, sel_mask, bld16.vec_type, "");
+   weight5 = lp_build_const_int_vec(gallivm, type16, 255*64/5);
+   weight7 = lp_build_const_int_vec(gallivm, type16, 255*64/7);
+   weight = lp_build_select(&bld16, sel_mask, weight7, weight5);
+
+   alpha0 = LLVMBuildBitCast(builder, alpha0, bld16.vec_type, "");
+   alpha1 = LLVMBuildBitCast(builder, alpha1, bld16.vec_type, "");
+   code = LLVMBuildBitCast(builder, code, bld16.vec_type, "");
+   /* we'll get garbage in the elements which had code 0 (or larger than 5 or 7)
+      but we don't care */
+   code = LLVMBuildSub(builder, code, bld16.one, "");
+
+   weight = LLVMBuildMul(builder, weight, code, "");
+   weight = LLVMBuildLShr(builder, weight,
+                          lp_build_const_int_vec(gallivm, type16, 6), "");
+
+   delta = LLVMBuildSub(builder, alpha1, alpha0, "");
+
+   ainterp = LLVMBuildMul(builder, delta, weight, "");
+   ainterp = LLVMBuildLShr(builder, ainterp,
+                           lp_build_const_int_vec(gallivm, type16, 8), "");
+
+   ainterp = LLVMBuildBitCast(builder, ainterp, lp_build_vec_type(gallivm, type8), "");
+   alpha0 = LLVMBuildBitCast(builder, alpha0, lp_build_vec_type(gallivm, type8), "");
+   ainterp = LLVMBuildAdd(builder, alpha0, ainterp, "");
+   ainterp = LLVMBuildBitCast(builder, ainterp, lp_build_vec_type(gallivm, type32), "");
+
+   return ainterp;
+}
+
+/**
+ * Convert from <n x i128> s3tc dxt5 to <4n x i8> RGBA AoS
+ * @param colors  is a <n x i32> vector with n x 2x16bit colors
+ * @param codewords  is a <n x i32> vector containing the codewords
+ * @param alphas  is a <n x i64> vector containing the alpha values
+ * @param i  is a <n x i32> vector with the x pixel coordinate (0 to 3)
+ * @param j  is a <n x i32> vector with the y pixel coordinate (0 to 3)
+ */
+static LLVMValueRef
+s3tc_dxt5_full_to_rgba_aos(struct gallivm_state *gallivm,
+                           unsigned n,
+                           enum pipe_format format,
+                           LLVMValueRef colors,
+                           LLVMValueRef codewords,
+                           LLVMValueRef alpha_lo,
+                           LLVMValueRef alpha_hi,
+                           LLVMValueRef i,
+                           LLVMValueRef j)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef rgba, tmp, alpha0, alpha1, alphac, alphac0, bit_pos, shift;
+   LLVMValueRef sel_mask, tmp_mask, alpha, alpha64, code_s;
+   LLVMValueRef mask6, mask7, ainterp;
+   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   struct lp_type type, type8;
+   struct lp_build_context bld32;
+
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = n*4;
+
+   assert(lp_check_value(type, i));
+   assert(lp_check_value(type, j));
+
+   lp_build_context_init(&bld32, gallivm, type);
+
+   assert(lp_check_value(type, i));
+   assert(lp_check_value(type, j));
+
+   rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format,
+                                colors, codewords, i, j);
+
+   rgba = LLVMBuildBitCast(builder, rgba, bld32.vec_type, "");
+
+   /* this looks pretty complex for vectorization:
+    * extract a0/a1 values
+    * extract code
+    * select weights for interpolation depending on a0 > a1
+    * mul weights by code - 1
+    * lerp a0/a1/weights
+    * use selects for getting either a0, a1, interp a, interp a/0.0, interp a/1.0
+    */
+
+   alpha0 = LLVMBuildAnd(builder, alpha_lo,
+                         lp_build_const_int_vec(gallivm, type, 0xff), "");
+   alpha1 = LLVMBuildLShr(builder, alpha_lo,
+                          lp_build_const_int_vec(gallivm, type, 8), "");
+   alpha1 = LLVMBuildAnd(builder, alpha1,
+                         lp_build_const_int_vec(gallivm, type, 0xff), "");
+
+   /* pos = 3*(4j+i) */
+   bit_pos = LLVMBuildShl(builder, j, lp_build_const_int_vec(gallivm, type, 2), "");
+   bit_pos = LLVMBuildAdd(builder, bit_pos, i, "");
+   tmp = LLVMBuildAdd(builder, bit_pos, bit_pos, "");
+   bit_pos = LLVMBuildAdd(builder, bit_pos, tmp, "");
+   /* get rid of first 2 bytes - saves shifts of alpha_lo/hi */
+   bit_pos = LLVMBuildAdd(builder, bit_pos,
+                          lp_build_const_int_vec(gallivm, type, 16), "");
+
+   if (n == 1) {
+      struct lp_type type64;
+      memset(&type64, 0, sizeof type64);
+      type64.width = 64;
+      type64.length = 1;
+      /* This is pretty pointless could avoid by just directly extracting
+         64bit in the first place but makes it more complicated elsewhere */
+      alpha_lo = LLVMBuildZExt(builder, alpha_lo, i64t, "");
+      alpha_hi = LLVMBuildZExt(builder, alpha_hi, i64t, "");
+      alphac0 = LLVMBuildShl(builder, alpha_hi,
+                             lp_build_const_int_vec(gallivm, type64, 32), "");
+      alphac0 = LLVMBuildOr(builder, alpha_lo, alphac0, "");
+
+      shift = LLVMBuildZExt(builder, bit_pos, i64t, "");
+      alphac0 = LLVMBuildLShr(builder, alphac0, shift, "");
+      alphac0 = LLVMBuildTrunc(builder, alphac0, i32t, "");
+      alphac = LLVMBuildAnd(builder, alphac0,
+                            lp_build_const_int_vec(gallivm, type, 0x7), "");
+   }
+   else {
+      /*
+       * Using non-native vector length here (actually, with avx2 and
+       * n == 4 llvm will indeed expand to ymm regs...)
+       * At least newer llvm versions handle that ok.
+       * llvm 3.7+ will even handle the emulated 64bit shift with variable
+       * shift count without extraction (and it's actually easier to
+       * emulate than the 32bit one).
+       */
+      alpha64 = LLVMBuildShuffleVector(builder, alpha_lo, alpha_hi,
+                                       lp_build_const_unpackx2_shuffle(gallivm, n), "");
+
+      alpha64 = LLVMBuildBitCast(builder, alpha64, LLVMVectorType(i64t, n), "");
+      shift = LLVMBuildZExt(builder, bit_pos, LLVMVectorType(i64t, n), "");
+      alphac = LLVMBuildLShr(builder, alpha64, shift, "");
+      alphac = LLVMBuildTrunc(builder, alphac, bld32.vec_type, "");
+
+      alphac = LLVMBuildAnd(builder, alphac,
+                            lp_build_const_int_vec(gallivm, type, 0x7), "");
+   }
+
+   /* signed compare is faster saves some xors */
+   type.sign = TRUE;
+   /* alpha0 > alpha1 selection */
+   sel_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER,
+                               alpha0, alpha1);
+   ainterp = lp_build_lerpdxta(gallivm, alpha0, alpha1, alphac, sel_mask, n);
+
+   /*
+    * if a0 > a1 then we select a0 for case 0, a1 for case 1, interp otherwise.
+    * else we select a0 for case 0, a1 for case 1,
+    * interp for case 2-5, 00 for 6 and 0xff(ffffff) for 7
+    * a = (c == 0) ? a0 : a1
+    * a = (c > 1) ? ainterp : a
+    * Finally handle case 6/7 for !(a0 > a1)
+    * a = (!(a0 > a1) && c == 6) ? 0 : a (andnot with mask)
+    * a = (!(a0 > a1) && c == 7) ? 0xffffffff : a (or with mask)
+    */
+   tmp_mask = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL,
+                               alphac, bld32.zero);
+   alpha = lp_build_select(&bld32, tmp_mask, alpha0, alpha1);
+   tmp_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER,
+                               alphac, bld32.one);
+   alpha = lp_build_select(&bld32, tmp_mask, ainterp, alpha);
+
+   code_s = LLVMBuildAnd(builder, alphac,
+                         LLVMBuildNot(builder, sel_mask, ""), "");
+   mask6 = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL,
+                            code_s, lp_build_const_int_vec(gallivm, type, 6));
+   mask7 = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL,
+                            code_s, lp_build_const_int_vec(gallivm, type, 7));
+   alpha = LLVMBuildAnd(builder, alpha, LLVMBuildNot(builder, mask6, ""), "");
+   alpha = LLVMBuildOr(builder, alpha, mask7, "");
+
+   alpha = LLVMBuildShl(builder, alpha, lp_build_const_int_vec(gallivm, type, 24), "");
+   rgba = LLVMBuildOr(builder, alpha, rgba, "");
+
+   return LLVMBuildBitCast(builder, rgba, lp_build_vec_type(gallivm, type8), "");
+}
+
+
+static void
+lp_build_gather_s3tc_simple_scalar(struct gallivm_state *gallivm,
+                                   const struct util_format_description *format_desc,
+                                   LLVMValueRef *dxt_block,
+                                   LLVMValueRef ptr)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned block_bits = format_desc->block.bits;
+   LLVMValueRef elem, shuf;
+   LLVMTypeRef type32 = LLVMIntTypeInContext(gallivm->context, 32);
+   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, block_bits);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+   LLVMTypeRef type32_4 = LLVMVectorType(type32, 4);
+
+   assert(block_bits == 64 || block_bits == 128);
+
+   ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, "");
+   elem = LLVMBuildLoad(builder, ptr, "");
+
+   if (block_bits == 128) {
+      /* just return block as is */
+      *dxt_block = LLVMBuildBitCast(builder, elem, type32_4, "");
+   }
+   else {
+      LLVMTypeRef type32_2 = LLVMVectorType(type32, 2);
+      shuf = lp_build_const_extend_shuffle(gallivm, 2, 4);
+      elem = LLVMBuildBitCast(builder, elem, type32_2, "");
+      *dxt_block = LLVMBuildShuffleVector(builder, elem,
+                                          LLVMGetUndef(type32_2), shuf, "");
+   }
+}
+
+
+static void
+s3tc_store_cached_block(struct gallivm_state *gallivm,
+                        LLVMValueRef *col,
+                        LLVMValueRef tag_value,
+                        LLVMValueRef hash_index,
+                        LLVMValueRef cache)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef ptr, indices[3];
+   LLVMTypeRef type_ptr4x32;
+   unsigned count;
+
+   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = hash_index;
+   ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
+   LLVMBuildStore(builder, tag_value, ptr);
+
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   hash_index = LLVMBuildMul(builder, hash_index,
+                             lp_build_const_int32(gallivm, 16), "");
+   for (count = 0; count < 4; count++) {
+      indices[2] = hash_index;
+      ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
+      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
+      LLVMBuildStore(builder, col[count], ptr);
+      hash_index = LLVMBuildAdd(builder, hash_index,
+                                lp_build_const_int32(gallivm, 4), "");
+   }
+}
+
+static LLVMValueRef
+s3tc_lookup_cached_pixel(struct gallivm_state *gallivm,
+                         LLVMValueRef ptr,
+                         LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "cache_data");
+}
+
+static LLVMValueRef
+s3tc_lookup_tag_data(struct gallivm_state *gallivm,
+                     LLVMValueRef ptr,
+                     LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "tag_data");
+}
+
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+static void
+s3tc_update_cache_access(struct gallivm_state *gallivm,
+                         LLVMValueRef ptr,
+                         unsigned count,
+                         unsigned index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, cache_access;
+
+   assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
+          index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+
+   member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
+   cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
+   cache_access = LLVMBuildAdd(builder, cache_access,
+                               LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
+                                                                   count, 0), "");
+   LLVMBuildStore(builder, cache_access, member_ptr);
+}
+#endif
+
+/** 
+ * Calculate 1/3(v1-v0) + v0 and 2*1/3(v1-v0) + v0.
+ * The lerp is performed between the first 2 32bit colors
+ * in the source vector, both results are returned packed in result vector.
+ */
+static LLVMValueRef
+lp_build_lerp23_single(struct lp_build_context *bld,
+                       LLVMValueRef v01)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   LLVMValueRef x, mul, delta, res, v0, v1, elems[8];
+   const struct lp_type type = bld->type;
+   LLVMBuilderRef builder = bld->gallivm->builder;
+   struct lp_type i16_type = lp_wider_type(type);
+   struct lp_type i32_type = lp_wider_type(i16_type);
+   struct lp_build_context bld2;
+
+   assert(!type.floating && !type.fixed && !type.norm && type.width == 8);
+
+   lp_build_context_init(&bld2, gallivm, i16_type);
+   bld2.type.sign = TRUE;
+
+   /* weights 256/3, 256*2/3, with correct rounding */
+   elems[0] = elems[1] = elems[2] = elems[3] =
+      lp_build_const_elem(gallivm, i16_type, 255*1/3);
+   elems[4] = elems[5] = elems[6] = elems[7] =
+      lp_build_const_elem(gallivm, i16_type, 171);
+   x = LLVMConstVector(elems, 8);
+
+   /*
+    * v01 has col0 in 32bit elem 0, col1 in elem 1.
+    * Interleave/unpack will give us separate v0/v1 vectors.
+    */
+   v01 = lp_build_interleave2(gallivm, i32_type, v01, v01, 0);
+   v01 = LLVMBuildBitCast(builder, v01, bld->vec_type, "");
+
+   lp_build_unpack2(gallivm, type, i16_type, v01, &v0, &v1);
+   delta = lp_build_sub(&bld2, v1, v0);
+
+   mul = LLVMBuildMul(builder, x, delta, "");
+
+   mul = LLVMBuildLShr(builder, mul, lp_build_const_int_vec(gallivm, i16_type, 8), "");
+   /* lerp optimization: pack now, do add afterwards */
+   res = lp_build_pack2(gallivm, i16_type, type, mul, bld2.undef);
+   /* only lower 2 elems are valid - for these v0 is really v0 */
+   return lp_build_add(bld, res, v01);
+}
+
+/*
+ * decode one dxt1 block.
+ */
+static void
+s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
+                       enum pipe_format format,
+                       LLVMValueRef dxt_block,
+                       LLVMValueRef *col)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef color01, color23, color01_16, color0123;
+   LLVMValueRef rgba, tmp, a, sel_mask, indices, code, const2;
+   struct lp_type type8, type32, type16, type64;
+   struct lp_build_context bld8, bld32, bld16, bld64;
+   unsigned i;
+   boolean is_dxt1_variant = format_dxt1_variant(format);
+
+   memset(&type32, 0, sizeof type32);
+   type32.width = 32;
+   type32.length = 4;
+   type32.sign = TRUE;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 16;
+
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 8;
+
+   memset(&type64, 0, sizeof type64);
+   type64.width = 64;
+   type64.length = 2;
+
+   a = lp_build_const_int_vec(gallivm, type32, 0xff000000);
+   const2 = lp_build_const_int_vec(gallivm, type32, 2);
+
+   lp_build_context_init(&bld32, gallivm, type32);
+   lp_build_context_init(&bld16, gallivm, type16);
+   lp_build_context_init(&bld8, gallivm, type8);
+   lp_build_context_init(&bld64, gallivm, type64);
+
+   if (is_dxt1_variant) {
+      color01 = lp_build_shuffle1undef(gallivm, dxt_block, 0, 4);
+      code = lp_build_shuffle1undef(gallivm, dxt_block, 1, 4);
+   } else {
+      color01 = lp_build_shuffle1undef(gallivm, dxt_block, 2, 4);
+      code = lp_build_shuffle1undef(gallivm, dxt_block, 3, 4);
+   }
+   code = LLVMBuildBitCast(builder, code, bld8.vec_type, "");
+   /* expand bytes to dwords */
+   code = lp_build_interleave2(gallivm, type8, code, code, 0);
+   code = lp_build_interleave2(gallivm, type8, code, code, 0);
+
+
+   /*
+    * works as follows:
+    * - expand color0/color1 to rgba8888
+    * - calculate color2/3 (interpolation) according to color0 < color1 rules
+    * - calculate color2/3 according to color0 >= color1 rules
+    * - do selection of color2/3 according to comparison of color0/1
+    * - extract indices.
+    * - use compare/select to select the correct color. Since we have 2bit
+    *   indices (and 4 colors), needs at least three compare/selects.
+    */
+
+   /*
+    * expand the two colors
+    */
+   color01 = LLVMBuildBitCast(builder, color01, bld16.vec_type, "");
+   color01 = lp_build_interleave2(gallivm, type16, color01,
+                                  bld16.zero, 0);
+   color01_16 = LLVMBuildBitCast(builder, color01, bld32.vec_type, "");
+   color01 = color_expand_565_to_8888(gallivm, 4, color01_16);
+
+   /*
+    * interpolate colors
+    * color2_1 is 2/3 color0 + 1/3 color1
+    * color3_1 is 1/3 color0 + 2/3 color1
+    * color2_2 is 1/2 color0 + 1/2 color1
+    * color3_2 is 0
+    */
+
+   /* TODO: since this is now always scalar, should
+    * probably just use control flow here instead of calculating
+    * both cases and then selection
+    */
+   if (format == PIPE_FORMAT_DXT1_RGBA ||
+       format == PIPE_FORMAT_DXT1_SRGBA) {
+      color01 = LLVMBuildOr(builder, color01, a, "");
+   }
+   /* can combine 2 lerps into one mostly */
+   color23 = lp_build_lerp23_single(&bld8, color01);
+   color23 = LLVMBuildBitCast(builder, color23, bld32.vec_type, "");
+
+   /* dxt3/5 always use 4-color encoding */
+   if (is_dxt1_variant) {
+      LLVMValueRef color23_2, color2_2;
+
+      if (util_cpu_caps.has_sse2) {
+         LLVMValueRef intrargs[2];
+         intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, "");
+         /* same interleave as for lerp23 - correct result in 2nd element */
+         intrargs[1] = lp_build_interleave2(gallivm, type32, color01, color01, 0);
+         intrargs[1] = LLVMBuildBitCast(builder, intrargs[1], bld8.vec_type, "");
+         color2_2 = lp_build_pavgb(&bld8, intrargs[0], intrargs[1]);
+      }
+      else {
+         LLVMValueRef v01, v0, v1, vhalf;
+         /*
+          * This isn't as expensive as it looks (the unpack is the same as
+          * for lerp23, which is the reason why we do the pointless
+          * interleave2 too), with correct rounding (the two lower elements
+          * will be the same).
+          */
+         v01 = lp_build_interleave2(gallivm, type32, color01, color01, 0);
+         v01 = LLVMBuildBitCast(builder, v01, bld8.vec_type, "");
+         lp_build_unpack2(gallivm, type8, type16, v01, &v0, &v1);
+         vhalf = lp_build_add(&bld16, v0, v1);
+         vhalf = LLVMBuildLShr(builder, vhalf, bld16.one, "");
+         color2_2 = lp_build_pack2(gallivm, type16, type8, vhalf, bld16.undef);
+      }
+      /* shuffle in color 3 as elem 2 zero, color 2 elem 1 */
+      color23_2 = LLVMBuildBitCast(builder, color2_2, bld64.vec_type, "");
+      color23_2 = LLVMBuildLShr(builder, color23_2,
+                                lp_build_const_int_vec(gallivm, type64, 32), "");
+      color23_2 = LLVMBuildBitCast(builder, color23_2, bld32.vec_type, "");
+
+      tmp = LLVMBuildBitCast(builder, color01_16, bld64.vec_type, "");
+      tmp = LLVMBuildLShr(builder, tmp,
+                          lp_build_const_int_vec(gallivm, type64, 32), "");
+      tmp = LLVMBuildBitCast(builder, tmp, bld32.vec_type, "");
+      sel_mask = lp_build_compare(gallivm, type32, PIPE_FUNC_GREATER,
+                                  color01_16, tmp);
+      sel_mask = lp_build_interleave2(gallivm, type32, sel_mask, sel_mask, 0);
+      color23 = lp_build_select(&bld32, sel_mask, color23, color23_2);
+   }
+
+   if (util_cpu_caps.has_ssse3) {
+      /*
+       * Use pshufb as mini-lut. (Only doable with intrinsics as the
+       * final shuffles are non-constant. pshufb is awesome!)
+       */
+      LLVMValueRef shuf[16], low2mask;
+      LLVMValueRef intrargs[2], lut_ind, lut_adj;
+
+      color01 = LLVMBuildBitCast(builder, color01, bld64.vec_type, "");
+      color23 = LLVMBuildBitCast(builder, color23, bld64.vec_type, "");
+      color0123 = lp_build_interleave2(gallivm, type64, color01, color23, 0);
+      color0123 = LLVMBuildBitCast(builder, color0123, bld32.vec_type, "");
+
+      if (format == PIPE_FORMAT_DXT1_RGB ||
+          format == PIPE_FORMAT_DXT1_SRGB) {
+         color0123 = LLVMBuildOr(builder, color0123, a, "");
+      }
+
+      /* shuffle as r0r1r2r3g0g1... */
+      for (i = 0; i < 4; i++) {
+         shuf[4*i] = lp_build_const_int32(gallivm, 0 + i);
+         shuf[4*i+1] = lp_build_const_int32(gallivm, 4 + i);
+         shuf[4*i+2] = lp_build_const_int32(gallivm, 8 + i);
+         shuf[4*i+3] = lp_build_const_int32(gallivm, 12 + i);
+      }
+      color0123 = LLVMBuildBitCast(builder, color0123, bld8.vec_type, "");
+      color0123 = LLVMBuildShuffleVector(builder, color0123, bld8.undef,
+                                         LLVMConstVector(shuf, 16), "");
+
+      /* lowest 2 bits of each 8 bit value contain index into "LUT" */
+      low2mask = lp_build_const_int_vec(gallivm, type8, 3);
+      /* add 0/4/8/12 for r/g/b/a */
+      lut_adj = lp_build_const_int_vec(gallivm, type32, 0x0c080400);
+      lut_adj = LLVMBuildBitCast(builder, lut_adj, bld8.vec_type, "");
+      intrargs[0] = color0123;
+      for (i = 0; i < 4; i++) {
+         lut_ind = LLVMBuildAnd(builder, code, low2mask, "");
+         lut_ind = LLVMBuildOr(builder, lut_ind, lut_adj, "");
+         intrargs[1] = lut_ind;
+         col[i] = lp_build_intrinsic(builder, "llvm.x86.ssse3.pshuf.b.128",
+                                     bld8.vec_type, intrargs, 2, 0);
+         col[i] = LLVMBuildBitCast(builder, col[i], bld32.vec_type, "");
+         code = LLVMBuildBitCast(builder, code, bld32.vec_type, "");
+         code = LLVMBuildLShr(builder, code, const2, "");
+         code = LLVMBuildBitCast(builder, code, bld8.vec_type, "");
+      }
+   }
+   else {
+      /* Thanks to vectorization can do 4 texels in parallel */
+      LLVMValueRef color0, color1, color2, color3;
+      if (format == PIPE_FORMAT_DXT1_RGB ||
+          format == PIPE_FORMAT_DXT1_SRGB) {
+         color01 = LLVMBuildOr(builder, color01, a, "");
+         color23 = LLVMBuildOr(builder, color23, a, "");
+      }
+      color0 = LLVMBuildShuffleVector(builder, color01, bld32.undef,
+                                      lp_build_const_shuffle1(gallivm, 0, 4), "");
+      color1 = LLVMBuildShuffleVector(builder, color01, bld32.undef,
+                                      lp_build_const_shuffle1(gallivm, 1, 4), "");
+      color2 = LLVMBuildShuffleVector(builder, color23, bld32.undef,
+                                      lp_build_const_shuffle1(gallivm, 0, 4), "");
+      color3 = LLVMBuildShuffleVector(builder, color23, bld32.undef,
+                                      lp_build_const_shuffle1(gallivm, 1, 4), "");
+      code = LLVMBuildBitCast(builder, code, bld32.vec_type, "");
+
+      for (i = 0; i < 4; i++) {
+         /* select the colors */
+         LLVMValueRef selmasklo, rgba01, rgba23, bitlo;
+         bitlo = bld32.one;
+         indices = LLVMBuildAnd(builder, code, bitlo, "");
+         selmasklo = lp_build_compare(gallivm, type32, PIPE_FUNC_EQUAL,
+                                      indices, bitlo);
+         rgba01 = lp_build_select(&bld32, selmasklo, color1, color0);
+
+         LLVMValueRef selmaskhi;
+         indices = LLVMBuildAnd(builder, code, const2, "");
+         selmaskhi = lp_build_compare(gallivm, type32, PIPE_FUNC_EQUAL,
+                                      indices, const2);
+         rgba23 = lp_build_select(&bld32, selmasklo, color3, color2);
+         rgba = lp_build_select(&bld32, selmaskhi, rgba23, rgba01);
+
+         /*
+          * Note that this will give "wrong" order.
+          * col0 will be rgba0, rgba4, rgba8, rgba12, col1 rgba1, rgba5, ...
+          * This would be easily fixable by using different shuffle, bitlo/hi
+          * vectors above (and different shift), but seems slightly easier to
+          * deal with for dxt3/dxt5 alpha too. So instead change lookup.
+          */
+         col[i] = rgba;
+         code = LLVMBuildLShr(builder, code, const2, "");
+      }
+   }
+}
+
+/*
+ * decode one dxt3 block.
+ */
+static void
+s3tc_decode_block_dxt3(struct gallivm_state *gallivm,
+                       enum pipe_format format,
+                       LLVMValueRef dxt_block,
+                       LLVMValueRef *col)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef alpha, alphas0, alphas1, shift4_16, a[4], mask8hi;
+   struct lp_type type32, type8, type16;
+   unsigned i;
+
+   memset(&type32, 0, sizeof type32);
+   type32.width = 32;
+   type32.length = 4;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 16;
+
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 8;
+
+   s3tc_decode_block_dxt1(gallivm, format, dxt_block, col);
+
+   shift4_16 = lp_build_const_int_vec(gallivm, type16, 4);
+   mask8hi = lp_build_const_int_vec(gallivm, type32, 0xff000000);
+
+   alpha = LLVMBuildBitCast(builder, dxt_block,
+                            lp_build_vec_type(gallivm, type8), "");
+   alpha = lp_build_interleave2(gallivm, type8, alpha, alpha, 0);
+   alpha = LLVMBuildBitCast(builder, alpha,
+                            lp_build_vec_type(gallivm, type16), "");
+   alpha = LLVMBuildAnd(builder, alpha,
+                        lp_build_const_int_vec(gallivm, type16, 0xf00f), "");
+   alphas0 = LLVMBuildLShr(builder, alpha, shift4_16, "");
+   alphas1 = LLVMBuildShl(builder, alpha, shift4_16, "");
+   alpha = LLVMBuildOr(builder, alphas0, alpha, "");
+   alpha = LLVMBuildOr(builder, alphas1, alpha, "");
+   alpha = LLVMBuildBitCast(builder, alpha,
+                            lp_build_vec_type(gallivm, type32), "");
+   /*
+    * alpha now contains elems 0,1,2,3,... (ubytes)
+    * we need 0,4,8,12, 1,5,9,13 etc. in dwords to match color (which
+    * is just as easy as "natural" order - 3 shift/and instead of 6 unpack).
+    */
+   a[0] = LLVMBuildShl(builder, alpha,
+                       lp_build_const_int_vec(gallivm, type32, 24), "");
+   a[1] = LLVMBuildShl(builder, alpha,
+                       lp_build_const_int_vec(gallivm, type32, 16), "");
+   a[1] = LLVMBuildAnd(builder, a[1], mask8hi, "");
+   a[2] = LLVMBuildShl(builder, alpha,
+                       lp_build_const_int_vec(gallivm, type32, 8), "");
+   a[2] = LLVMBuildAnd(builder, a[2], mask8hi, "");
+   a[3] = LLVMBuildAnd(builder, alpha, mask8hi, "");
+
+   for (i = 0; i < 4; i++) {
+      col[i] = LLVMBuildOr(builder, col[i], a[i], "");
+   }
+}
+
+
+static LLVMValueRef
+lp_build_lerpdxta_block(struct gallivm_state *gallivm,
+                        LLVMValueRef alpha0,
+                        LLVMValueRef alpha1,
+                        LLVMValueRef code,
+                        LLVMValueRef sel_mask)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef delta, ainterp;
+   LLVMValueRef weight5, weight7, weight;
+   struct lp_type type16;
+   struct lp_build_context bld;
+
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 8;
+   type16.sign = TRUE;
+
+   lp_build_context_init(&bld, gallivm, type16);
+   /*
+    * 256/7 is only 36.57 so we'd lose quite some precision. Since it would
+    * actually be desirable to do this here with even higher accuracy than
+    * even 8 bit (more or less required for rgtc, albeit that's not handled
+    * here right now), shift the weights after multiplication by code.
+    */
+   weight5 = lp_build_const_int_vec(gallivm, type16, 256*64/5);
+   weight7 = lp_build_const_int_vec(gallivm, type16, 256*64/7);
+   weight = lp_build_select(&bld, sel_mask, weight7, weight5);
+
+   /*
+    * we'll get garbage in the elements which had code 0 (or larger than
+    * 5 or 7) but we don't care (or rather, need to fix up anyway).
+    */
+   code = LLVMBuildSub(builder, code, bld.one, "");
+
+   weight = LLVMBuildMul(builder, weight, code, "");
+   weight = LLVMBuildLShr(builder, weight,
+                          lp_build_const_int_vec(gallivm, type16, 6), "");
+
+   delta = LLVMBuildSub(builder, alpha1, alpha0, "");
+
+   ainterp = LLVMBuildMul(builder, delta, weight, "");
+   ainterp = LLVMBuildLShr(builder, ainterp,
+                           lp_build_const_int_vec(gallivm, type16, 8), "");
+
+   /* lerp is done later (with packed values) */
+
+   return ainterp;
+}
+
+
+/*
+ * decode one dxt5 block.
+ */
+static void
+s3tc_decode_block_dxt5(struct gallivm_state *gallivm,
+                       enum pipe_format format,
+                       LLVMValueRef dxt_block,
+                       LLVMValueRef *col)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef alpha, alpha0, alpha1, ares;
+   LLVMValueRef ainterp, ainterp0, ainterp1, shuffle1, sel_mask, sel_mask2;
+   LLVMValueRef a[4], acode, tmp0, tmp1;
+   LLVMTypeRef i64t, i32t;
+   struct lp_type type32, type64, type8, type16;
+   struct lp_build_context bld16, bld8;
+   unsigned i;
+
+   memset(&type32, 0, sizeof type32);
+   type32.width = 32;
+   type32.length = 4;
+
+   memset(&type64, 0, sizeof type64);
+   type64.width = 64;
+   type64.length = 2;
+
+   memset(&type8, 0, sizeof type8);
+   type8.width = 8;
+   type8.length = 16;
+
+   memset(&type16, 0, sizeof type16);
+   type16.width = 16;
+   type16.length = 8;
+
+   lp_build_context_init(&bld16, gallivm, type16);
+   lp_build_context_init(&bld8, gallivm, type8);
+
+   i64t = lp_build_vec_type(gallivm, type64);
+   i32t = lp_build_vec_type(gallivm, type32);
+
+   s3tc_decode_block_dxt1(gallivm, format, dxt_block, col);
+
+   /*
+    * three possible strategies for vectorizing alpha:
+    * 1) compute all 8 values then use scalar extraction
+    *    (i.e. have all 8 alpha values packed in one 64bit scalar
+    *    and do something like ax = vals >> (codex * 8) followed
+    *    by inserting these values back into color)
+    * 2) same as 8 but just use pshufb as a mini-LUT for selection.
+    *    (without pshufb would need boatloads of cmp/selects trying to
+    *    keep things vectorized for essentially scalar selection).
+    * 3) do something similar to the uncached case
+    *    needs more calculations (need to calc 16 values instead of 8 though
+    *    that's only an issue for the lerp which we need to do twice otherwise
+    *    everything still fits into 128bit) but keeps things vectorized mostly.
+    * Trying 3) here though not sure it's really faster...
+    * With pshufb, we try 2) (cheaper and more accurate)
+    */
+
+   /*
+    * Ideally, we'd use 2 variable 16bit shifts here (byte shifts wouldn't
+    * help since code crosses 8bit boundaries). But variable shifts are
+    * AVX2 only, and even then only dword/quadword (intel _really_ hates
+    * shifts!). Instead, emulate by 16bit muls.
+    * Also, the required byte shuffles are essentially non-emulatable, so
+    * require ssse3 (albeit other archs might do them fine).
+    * This is not directly tied to ssse3 - just need sane byte shuffles.
+    * But ordering is going to be different below so use same condition.
+    */
+
+
+   /* vectorize alpha */
+   alpha = LLVMBuildBitCast(builder, dxt_block, i64t, "");
+   alpha0 = LLVMBuildAnd(builder, alpha,
+                         lp_build_const_int_vec(gallivm, type64, 0xff), "");
+   alpha0 = LLVMBuildBitCast(builder, alpha0, bld16.vec_type, "");
+   alpha = LLVMBuildBitCast(builder, alpha, bld16.vec_type, "");
+   alpha1 = LLVMBuildLShr(builder, alpha,
+                          lp_build_const_int_vec(gallivm, type16, 8), "");
+   alpha = LLVMBuildBitCast(builder, alpha,  i64t, "");
+   shuffle1 = lp_build_const_shuffle1(gallivm, 0, 8);
+   /* XXX this shuffle broken with LLVM 2.8 */
+   alpha0 = LLVMBuildShuffleVector(builder, alpha0, alpha0, shuffle1, "");
+   alpha1 = LLVMBuildShuffleVector(builder, alpha1, alpha1, shuffle1, "");
+
+   type16.sign = TRUE;
+   sel_mask = lp_build_compare(gallivm, type16, PIPE_FUNC_GREATER,
+                               alpha0, alpha1);
+   type16.sign = FALSE;
+   sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, "");
+
+   if (!util_cpu_caps.has_ssse3) {
+      LLVMValueRef acodeg, mask1, acode0, acode1;
+
+      /* extraction of the 3 bit values into something more useful is HARD */
+      /* first steps are actually scalar */
+      acode = LLVMBuildLShr(builder, alpha,
+                            lp_build_const_int_vec(gallivm, type64, 16), "");
+      tmp0 = LLVMBuildAnd(builder, acode,
+                          lp_build_const_int_vec(gallivm, type64, 0xffffff), "");
+      tmp1 =  LLVMBuildLShr(builder, acode,
+                            lp_build_const_int_vec(gallivm, type64, 24), "");
+      tmp0 = LLVMBuildBitCast(builder, tmp0, i32t, "");
+      tmp1 = LLVMBuildBitCast(builder, tmp1, i32t, "");
+      acode = lp_build_interleave2(gallivm, type32, tmp0, tmp1, 0);
+      /* now have 2x24bit in 4x32bit, order 01234567, 89..., undef, undef */
+      tmp0 = LLVMBuildAnd(builder, acode,
+                          lp_build_const_int_vec(gallivm, type32, 0xfff), "");
+      tmp1 =  LLVMBuildLShr(builder, acode,
+                            lp_build_const_int_vec(gallivm, type32, 12), "");
+      acode = lp_build_interleave2(gallivm, type32, tmp0, tmp1, 0);
+      /* now have 4x12bit in 4x32bit, order 0123, 4567, ,,, */
+      tmp0 = LLVMBuildAnd(builder, acode,
+                          lp_build_const_int_vec(gallivm, type32, 0x3f), "");
+      tmp1 =  LLVMBuildLShr(builder, acode,
+                            lp_build_const_int_vec(gallivm, type32, 6), "");
+      /* use signed pack doesn't matter and otherwise need sse41 */
+      type32.sign = type16.sign = TRUE;
+      acode = lp_build_pack2(gallivm, type32, type16, tmp0, tmp1);
+      type32.sign = type16.sign = FALSE;
+      /* now have 8x6bit in 8x16bit, 01, 45, 89, ..., 23, 67, ... */
+      acode0 = LLVMBuildAnd(builder, acode,
+                            lp_build_const_int_vec(gallivm, type16, 0x7), "");
+      acode1 =  LLVMBuildLShr(builder, acode,
+                              lp_build_const_int_vec(gallivm, type16, 3), "");
+      acode = lp_build_pack2(gallivm, type16, type8, acode0, acode1);
+      /* acode0 contains elems 0,4,8,12,2,6,10,14, acode1 1,5,9,... */
+
+      acodeg = LLVMBuildAnd(builder, acode,
+                            LLVMBuildNot(builder, sel_mask, ""), "");
+      mask1 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL,
+                               acode, bld8.one);
+
+      sel_mask = LLVMBuildBitCast(builder, sel_mask, bld16.vec_type, "");
+      ainterp0 = lp_build_lerpdxta_block(gallivm, alpha0, alpha1, acode0, sel_mask);
+      ainterp1 = lp_build_lerpdxta_block(gallivm, alpha0, alpha1, acode1, sel_mask);
+      sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, "");
+      ainterp = lp_build_pack2(gallivm, type16, type8, ainterp0, ainterp1);
+      alpha0 = lp_build_pack2(gallivm, type16, type8, alpha0, alpha0);
+      alpha1 = lp_build_pack2(gallivm, type16, type8, alpha1, alpha1);
+      ainterp = LLVMBuildAdd(builder, ainterp, alpha0, "");
+      /* Fix up val01 */
+      sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL,
+                                   acode, bld8.zero);
+      ainterp = lp_build_select(&bld8, sel_mask2, alpha0, ainterp);
+      ainterp = lp_build_select(&bld8, mask1, alpha1, ainterp);
+
+      /* fix up val67 if a0 <= a1 */
+      sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL,
+                                   acodeg, lp_build_const_int_vec(gallivm, type8, 6));
+      ares = LLVMBuildAnd(builder, ainterp, LLVMBuildNot(builder, sel_mask2, ""), "");
+      sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL,
+                                   acodeg, lp_build_const_int_vec(gallivm, type8, 7));
+      ares = LLVMBuildOr(builder, ares, sel_mask2, "");
+
+      /* unpack in right order (0,4,8,12,1,5,..) */
+      /* this gives us zero, a0, zero, a4, zero, a8, ... for tmp0 */
+      tmp0 = lp_build_interleave2(gallivm, type8, bld8.zero, ares, 0);
+      tmp1 = lp_build_interleave2(gallivm, type8, bld8.zero, ares, 1);
+      tmp0 = LLVMBuildBitCast(builder, tmp0, bld16.vec_type, "");
+      tmp1 = LLVMBuildBitCast(builder, tmp1, bld16.vec_type, "");
+
+      a[0] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp0, 0);
+      a[1] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp1, 0);
+      a[2] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp0, 1);
+      a[3] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp1, 1);
+   }
+   else {
+      LLVMValueRef elems[16], intrargs[2], shufa, mulclo, mulchi, mask8hi;
+      LLVMTypeRef type16s = LLVMInt16TypeInContext(gallivm->context);
+      LLVMTypeRef type8s = LLVMInt8TypeInContext(gallivm->context);
+      unsigned i, j;
+      /*
+       * Ideally, we'd use 2 variable 16bit shifts here (byte shifts wouldn't
+       * help since code crosses 8bit boundaries). But variable shifts are
+       * AVX2 only, and even then only dword/quadword (intel _really_ hates
+       * shifts!). Instead, emulate by 16bit muls.
+       * Also, the required byte shuffles are essentially non-emulatable, so
+       * require ssse3 (albeit other archs might do them fine, but the
+       * complete path is ssse3 only for now).
+       */
+      for (i = 0, j = 0; i < 16; i += 8, j += 3) {
+         elems[i+0] = elems[i+1] = elems[i+2] = lp_build_const_int32(gallivm, j+2);
+         elems[i+3] = elems[i+4] = lp_build_const_int32(gallivm, j+3);
+         elems[i+5] = elems[i+6] = elems[i+7] = lp_build_const_int32(gallivm, j+4);
+      }
+      shufa = LLVMConstVector(elems, 16);
+      alpha = LLVMBuildBitCast(builder, alpha, bld8.vec_type, "");
+      acode = LLVMBuildShuffleVector(builder, alpha, bld8.undef, shufa, "");
+      acode = LLVMBuildBitCast(builder, acode, bld16.vec_type, "");
+      /*
+       * Put 0/2/4/6 into high 3 bits of 16 bits (save AND mask)
+       * Do the same for 1/3/5/7 (albeit still need mask there - ideally
+       * we'd place them into bits 4-7 so could save shift but impossible.)
+       */
+      for (i = 0; i < 8; i += 4) {
+         elems[i+0] = LLVMConstInt(type16s, 1 << (13-0), 0);
+         elems[i+1] = LLVMConstInt(type16s, 1 << (13-6), 0);
+         elems[i+2] = LLVMConstInt(type16s, 1 << (13-4), 0);
+         elems[i+3] = LLVMConstInt(type16s, 1 << (13-2), 0);
+      }
+      mulclo = LLVMConstVector(elems, 8);
+      for (i = 0; i < 8; i += 4) {
+         elems[i+0] = LLVMConstInt(type16s, 1 << (13-3), 0);
+         elems[i+1] = LLVMConstInt(type16s, 1 << (13-9), 0);
+         elems[i+2] = LLVMConstInt(type16s, 1 << (13-7), 0);
+         elems[i+3] = LLVMConstInt(type16s, 1 << (13-5), 0);
+      }
+      mulchi = LLVMConstVector(elems, 8);
+
+      tmp0 = LLVMBuildMul(builder, acode, mulclo, "");
+      tmp1 = LLVMBuildMul(builder, acode, mulchi, "");
+      tmp0 = LLVMBuildLShr(builder, tmp0,
+                           lp_build_const_int_vec(gallivm, type16, 13), "");
+      tmp1 = LLVMBuildLShr(builder, tmp1,
+                           lp_build_const_int_vec(gallivm, type16, 5), "");
+      tmp1 = LLVMBuildAnd(builder, tmp1,
+                          lp_build_const_int_vec(gallivm, type16, 0x700), "");
+      acode = LLVMBuildOr(builder, tmp0, tmp1, "");
+      acode = LLVMBuildBitCast(builder, acode, bld8.vec_type, "");
+
+      /*
+       * Note that ordering is different here to non-ssse3 path:
+       * 0/1/2/3/4/5...
+       */
+
+      LLVMValueRef weight0, weight1, weight, delta;
+      LLVMValueRef constff_elem7, const0_elem6;
+      /* weights, correctly rounded (round(256*x/7)) */
+      elems[0] = LLVMConstInt(type16s, 256, 0);
+      elems[1] = LLVMConstInt(type16s, 0, 0);
+      elems[2] = LLVMConstInt(type16s, 219, 0);
+      elems[3] =  LLVMConstInt(type16s, 183, 0);
+      elems[4] =  LLVMConstInt(type16s, 146, 0);
+      elems[5] =  LLVMConstInt(type16s, 110, 0);
+      elems[6] =  LLVMConstInt(type16s, 73, 0);
+      elems[7] =  LLVMConstInt(type16s, 37, 0);
+      weight0 = LLVMConstVector(elems, 8);
+
+      elems[0] = LLVMConstInt(type16s, 256, 0);
+      elems[1] = LLVMConstInt(type16s, 0, 0);
+      elems[2] = LLVMConstInt(type16s, 205, 0);
+      elems[3] =  LLVMConstInt(type16s, 154, 0);
+      elems[4] =  LLVMConstInt(type16s, 102, 0);
+      elems[5] =  LLVMConstInt(type16s, 51, 0);
+      elems[6] =  LLVMConstInt(type16s, 0, 0);
+      elems[7] =  LLVMConstInt(type16s, 0, 0);
+      weight1 = LLVMConstVector(elems, 8);
+
+      weight0 = LLVMBuildBitCast(builder, weight0, bld8.vec_type, "");
+      weight1 = LLVMBuildBitCast(builder, weight1, bld8.vec_type, "");
+      weight = lp_build_select(&bld8, sel_mask, weight0, weight1);
+      weight = LLVMBuildBitCast(builder, weight, bld16.vec_type, "");
+
+      for (i = 0; i < 16; i++) {
+         elems[i] = LLVMConstNull(type8s);
+      }
+      elems[7] = LLVMConstInt(type8s, 255, 0);
+      constff_elem7 = LLVMConstVector(elems, 16);
+
+      for (i = 0; i < 16; i++) {
+         elems[i] = LLVMConstInt(type8s, 255, 0);
+      }
+      elems[6] = LLVMConstInt(type8s, 0, 0);
+      const0_elem6 = LLVMConstVector(elems, 16);
+
+      /* standard simple lerp - but the version we need isn't available */
+      delta = LLVMBuildSub(builder, alpha0, alpha1, "");
+      ainterp = LLVMBuildMul(builder, delta, weight, "");
+      ainterp = LLVMBuildLShr(builder, ainterp,
+                              lp_build_const_int_vec(gallivm, type16, 8), "");
+      ainterp = LLVMBuildBitCast(builder, ainterp, bld8.vec_type, "");
+      alpha1 = LLVMBuildBitCast(builder, alpha1, bld8.vec_type, "");
+      ainterp = LLVMBuildAdd(builder, ainterp, alpha1, "");
+      ainterp = LLVMBuildBitCast(builder, ainterp, bld16.vec_type, "");
+      ainterp = lp_build_pack2(gallivm, type16, type8, ainterp, bld16.undef);
+
+      /* fixing 0/0xff case is slightly more complex */
+      constff_elem7 = LLVMBuildAnd(builder, constff_elem7,
+                                   LLVMBuildNot(builder, sel_mask, ""), "");
+      const0_elem6 = LLVMBuildOr(builder, const0_elem6, sel_mask, "");
+      ainterp = LLVMBuildOr(builder, ainterp, constff_elem7, "");
+      ainterp = LLVMBuildAnd(builder, ainterp, const0_elem6, "");
+
+      /* now pick all 16 elements at once! */
+      intrargs[0] = ainterp;
+      intrargs[1] = acode;
+      ares = lp_build_intrinsic(builder, "llvm.x86.ssse3.pshuf.b.128",
+                                bld8.vec_type, intrargs, 2, 0);
+
+      ares = LLVMBuildBitCast(builder, ares, i32t, "");
+      mask8hi = lp_build_const_int_vec(gallivm, type32, 0xff000000);
+      a[0] = LLVMBuildShl(builder, ares,
+                          lp_build_const_int_vec(gallivm, type32, 24), "");
+      a[1] = LLVMBuildShl(builder, ares,
+                          lp_build_const_int_vec(gallivm, type32, 16), "");
+      a[1] = LLVMBuildAnd(builder, a[1], mask8hi, "");
+      a[2] = LLVMBuildShl(builder, ares,
+                          lp_build_const_int_vec(gallivm, type32, 8), "");
+      a[2] = LLVMBuildAnd(builder, a[2], mask8hi, "");
+      a[3] = LLVMBuildAnd(builder, ares, mask8hi, "");
+   }
+
+   for (i = 0; i < 4; i++) {
+      a[i] = LLVMBuildBitCast(builder, a[i], i32t, "");
+      col[i] = LLVMBuildOr(builder, col[i], a[i], "");
+   }
+}
+
+
+static void
+generate_update_cache_one_block(struct gallivm_state *gallivm,
+                                LLVMValueRef function,
+                                const struct util_format_description *format_desc)
+{
+   LLVMBasicBlockRef block;
+   LLVMBuilderRef old_builder;
+   LLVMValueRef ptr_addr;
+   LLVMValueRef hash_index;
+   LLVMValueRef cache;
+   LLVMValueRef dxt_block, tag_value;
+   LLVMValueRef col[LP_MAX_VECTOR_LENGTH];
+
+   ptr_addr     = LLVMGetParam(function, 0);
+   hash_index   = LLVMGetParam(function, 1);
+   cache        = LLVMGetParam(function, 2);
+
+   lp_build_name(ptr_addr,   "ptr_addr"  );
+   lp_build_name(hash_index, "hash_index");
+   lp_build_name(cache,      "cache_addr");
+
+   /*
+    * Function body
+    */
+
+   old_builder = gallivm->builder;
+   block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
+   gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
+   LLVMPositionBuilderAtEnd(gallivm->builder, block);
+
+   lp_build_gather_s3tc_simple_scalar(gallivm, format_desc, &dxt_block,
+                                      ptr_addr);
+
+   switch (format_desc->format) {
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT1_SRGB:
+   case PIPE_FORMAT_DXT1_SRGBA:
+      s3tc_decode_block_dxt1(gallivm, format_desc->format, dxt_block, col);
+      break;
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT3_SRGBA:
+      s3tc_decode_block_dxt3(gallivm, format_desc->format, dxt_block, col);
+      break;
+   case PIPE_FORMAT_DXT5_RGBA:
+   case PIPE_FORMAT_DXT5_SRGBA:
+      s3tc_decode_block_dxt5(gallivm, format_desc->format, dxt_block, col);
+      break;
+   default:
+      assert(0);
+      s3tc_decode_block_dxt1(gallivm, format_desc->format, dxt_block, col);
+      break;
+   }
+
+   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
+                                 LLVMInt64TypeInContext(gallivm->context), "");
+   s3tc_store_cached_block(gallivm, col, tag_value, hash_index, cache);
+
+   LLVMBuildRetVoid(gallivm->builder);
+
+   LLVMDisposeBuilder(gallivm->builder);
+   gallivm->builder = old_builder;
+
+   gallivm_verify_function(gallivm, function);
+}
+
+
+static void
+update_cached_block(struct gallivm_state *gallivm,
+                    const struct util_format_description *format_desc,
+                    LLVMValueRef ptr_addr,
+                    LLVMValueRef hash_index,
+                    LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMModuleRef module = gallivm->module;
+   char name[256];
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+   LLVMValueRef function, inst;
+   LLVMBasicBlockRef bb;
+   LLVMValueRef args[3];
+
+   util_snprintf(name, sizeof name, "%s_update_cache_one_block",
+                 format_desc->short_name);
+   function = LLVMGetNamedFunction(module, name);
+
+   if (!function) {
+      LLVMTypeRef ret_type;
+      LLVMTypeRef arg_types[3];
+      LLVMTypeRef function_type;
+      unsigned arg;
+
+      /*
+       * Generate the function prototype.
+       */
+
+      ret_type = LLVMVoidTypeInContext(gallivm->context);
+      arg_types[0] = pi8t;
+      arg_types[1] = LLVMInt32TypeInContext(gallivm->context);
+      arg_types[2] = LLVMTypeOf(cache); // XXX: put right type here
+      function_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0);
+      function = LLVMAddFunction(module, name, function_type);
+
+      for (arg = 0; arg < ARRAY_SIZE(arg_types); ++arg)
+         if (LLVMGetTypeKind(arg_types[arg]) == LLVMPointerTypeKind)
+            lp_add_function_attr(function, arg + 1, LP_FUNC_ATTR_NOALIAS);
+
+      LLVMSetFunctionCallConv(function, LLVMFastCallConv);
+      LLVMSetVisibility(function, LLVMHiddenVisibility);
+      generate_update_cache_one_block(gallivm, function, format_desc);
+   }
+
+   args[0] = ptr_addr;
+   args[1] = hash_index;
+   args[2] = cache;
+ 
+   LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
+   bb = LLVMGetInsertBlock(builder);
+   inst = LLVMGetLastInstruction(bb);
+   LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
+}
+
+/*
+ * cached lookup
+ */
+static LLVMValueRef
+compressed_fetch_cached(struct gallivm_state *gallivm,
+                        const struct util_format_description *format_desc,
+                        unsigned n,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offset,
+                        LLVMValueRef i,
+                        LLVMValueRef j,
+                        LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned count, low_bit, log2size;
+   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
+   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
+   struct lp_type type;
+   struct lp_build_context bld32;
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   lp_build_context_init(&bld32, gallivm, type);
+
+   /*
+    * compute hash - we use direct mapped cache, the hash function could
+    *                be better but it needs to be simple
+    * per-element:
+    *    compare offset with offset stored at tag (hash)
+    *    if not equal extract block, store block, update tag
+    *    extract color from cache
+    *    assemble colors
+    */
+
+   low_bit = util_logbase2(format_desc->block.bits / 8);
+   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
+   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
+   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
+   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
+   /* For the hash function, first mask off the unused lowest bits. Then just
+      do some xor with address bits - only use lower 32bits */
+   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
+   /* This only really makes sense for size 64,128,256 */
+   hash_index = ptr_addrtrunc;
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
+   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
+   tmp = LLVMBuildLShr(builder, hash_index,
+                       lp_build_const_int_vec(gallivm, type, log2size), "");
+   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
+
+   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
+   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
+   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
+   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
+   block_index = LLVMBuildShl(builder, hash_index,
+                              lp_build_const_int_vec(gallivm, type, 4), "");
+   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
+
+   if (n > 1) {
+      color = bld32.undef;
+      for (count = 0; count < n; count++) {
+         LLVMValueRef index, cond, colorx;
+         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
+         struct lp_build_if_state if_ctx;
+
+         index = lp_build_const_int32(gallivm, count);
+         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
+         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
+         addrx = LLVMBuildAdd(builder, addrx, addr, "");
+         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
+         hash_indexx = LLVMBuildLShr(builder, block_indexx,
+                                     lp_build_const_int32(gallivm, 4), "");
+         offset_stored = s3tc_lookup_tag_data(gallivm, cache, hash_indexx);
+         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
+
+         lp_build_if(&if_ctx, gallivm, cond);
+         {
+            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
+                                          LLVMPointerType(i8t, 0), "");
+            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+            s3tc_update_cache_access(gallivm, cache, 1,
+                                     LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+         }
+         lp_build_endif(&if_ctx);
+
+         colorx = s3tc_lookup_cached_pixel(gallivm, cache, block_indexx);
+
+         color = LLVMBuildInsertElement(builder, color, colorx,
+                                        lp_build_const_int32(gallivm, count), "");
+      }
+   }
+   else {
+      LLVMValueRef cond;
+      struct lp_build_if_state if_ctx;
+
+      tmp = LLVMBuildZExt(builder, offset, i64t, "");
+      addr = LLVMBuildAdd(builder, tmp, addr, "");
+      offset_stored = s3tc_lookup_tag_data(gallivm, cache, hash_index);
+      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
+
+      lp_build_if(&if_ctx, gallivm, cond);
+      {
+         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
+         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+         s3tc_update_cache_access(gallivm, cache, 1,
+                                  LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+      }
+      lp_build_endif(&if_ctx);
+
+      color = s3tc_lookup_cached_pixel(gallivm, cache, block_index);
+   }
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   s3tc_update_cache_access(gallivm, cache, n,
+                            LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
+#endif
+   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
+}
+
+
+static LLVMValueRef
+s3tc_dxt5_to_rgba_aos(struct gallivm_state *gallivm,
+                      unsigned n,
+                      enum pipe_format format,
+                      LLVMValueRef colors,
+                      LLVMValueRef codewords,
+                      LLVMValueRef alpha_lo,
+                      LLVMValueRef alpha_hi,
+                      LLVMValueRef i,
+                      LLVMValueRef j)
+{
+   return s3tc_dxt5_full_to_rgba_aos(gallivm, n, format, colors,
+                                     codewords, alpha_lo, alpha_hi, i, j);
+}
+
+
+/**
+ * @param n  number of pixels processed (usually n=4, but it should also work with n=1
+ *           and multiples of 4)
+ * @param base_ptr  base pointer (32bit or 64bit pointer depending on the architecture)
+ * @param offset <n x i32> vector with the relative offsets of the S3TC blocks
+ * @param i  is a <n x i32> vector with the x subpixel coordinate (0..3)
+ * @param j  is a <n x i32> vector with the y subpixel coordinate (0..3)
+ * @return  a <4*n x i8> vector with the pixel RGBA values in AoS
+ */
+LLVMValueRef
+lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm,
+                             const struct util_format_description *format_desc,
+                             unsigned n,
+                             LLVMValueRef base_ptr,
+                             LLVMValueRef offset,
+                             LLVMValueRef i,
+                             LLVMValueRef j,
+                             LLVMValueRef cache)
+{
+   LLVMValueRef rgba;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMBuilderRef builder = gallivm->builder;
+
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC);
+   assert(format_desc->block.width == 4);
+   assert(format_desc->block.height == 4);
+
+   assert((n == 1) || (n % 4 == 0));
+
+/*   debug_printf("format = %d\n", format_desc->format);*/
+   if (cache) {
+      rgba = compressed_fetch_cached(gallivm, format_desc, n,
+                                     base_ptr, offset, i, j, cache);
+      return rgba;
+   }
+
+   if (n > 4) {
+      unsigned count;
+      LLVMTypeRef i8_vectype = LLVMVectorType(i8t, 4 * n);
+      LLVMTypeRef i128_type = LLVMIntTypeInContext(gallivm->context, 128);
+      LLVMTypeRef i128_vectype =  LLVMVectorType(i128_type, n / 4);
+      LLVMTypeRef i324_vectype = LLVMVectorType(LLVMInt32TypeInContext(
+                                                gallivm->context), 4);
+      LLVMValueRef offset4, i4, j4, rgba4[LP_MAX_VECTOR_LENGTH/16];
+      struct lp_type lp_324_vectype = lp_type_uint_vec(32, 128);
+
+      assert(n / 4 <= ARRAY_SIZE(rgba4));
+
+      rgba = LLVMGetUndef(i128_vectype);
+
+      for (count = 0; count < n / 4; count++) {
+         LLVMValueRef colors, codewords, alpha_lo, alpha_hi;
+
+         i4 = lp_build_extract_range(gallivm, i, count * 4, 4);
+         j4 = lp_build_extract_range(gallivm, j, count * 4, 4);
+         offset4 = lp_build_extract_range(gallivm, offset, count * 4, 4);
+
+         lp_build_gather_s3tc(gallivm, 4, format_desc, &colors, &codewords,
+                              &alpha_lo, &alpha_hi, base_ptr, offset4);
+
+         switch (format_desc->format) {
+         case PIPE_FORMAT_DXT1_RGB:
+         case PIPE_FORMAT_DXT1_RGBA:
+         case PIPE_FORMAT_DXT1_SRGB:
+         case PIPE_FORMAT_DXT1_SRGBA:
+            rgba4[count] = s3tc_dxt1_to_rgba_aos(gallivm, 4, format_desc->format,
+                                                 colors, codewords, i4, j4);
+            break;
+         case PIPE_FORMAT_DXT3_RGBA:
+         case PIPE_FORMAT_DXT3_SRGBA:
+            rgba4[count] = s3tc_dxt3_to_rgba_aos(gallivm, 4, format_desc->format, colors,
+                                                 codewords, alpha_lo, alpha_hi, i4, j4);
+            break;
+         case PIPE_FORMAT_DXT5_RGBA:
+         case PIPE_FORMAT_DXT5_SRGBA:
+            rgba4[count] = s3tc_dxt5_to_rgba_aos(gallivm, 4, format_desc->format, colors,
+                                                 codewords, alpha_lo, alpha_hi, i4, j4);
+            break;
+         default:
+            assert(0);
+            rgba4[count] = LLVMGetUndef(LLVMVectorType(i8t, 4));
+            break;
+         }
+         /* shuffles typically give best results with dword elements...*/
+         rgba4[count] = LLVMBuildBitCast(builder, rgba4[count], i324_vectype, "");
+      }
+      rgba = lp_build_concat(gallivm, rgba4, lp_324_vectype, n / 4);
+      rgba = LLVMBuildBitCast(builder, rgba, i8_vectype, "");
+   }
+   else {
+      LLVMValueRef colors, codewords, alpha_lo, alpha_hi;
+
+      lp_build_gather_s3tc(gallivm, n, format_desc, &colors, &codewords,
+                           &alpha_lo, &alpha_hi, base_ptr, offset);
+
+      switch (format_desc->format) {
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT1_SRGB:
+      case PIPE_FORMAT_DXT1_SRGBA:
+         rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format_desc->format,
+                                      colors, codewords, i, j);
+         break;
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT3_SRGBA:
+         rgba = s3tc_dxt3_to_rgba_aos(gallivm, n, format_desc->format, colors,
+                                      codewords, alpha_lo, alpha_hi, i, j);
+         break;
+      case PIPE_FORMAT_DXT5_RGBA:
+      case PIPE_FORMAT_DXT5_SRGBA:
+         rgba = s3tc_dxt5_to_rgba_aos(gallivm, n, format_desc->format, colors,
+                                      codewords, alpha_lo, alpha_hi, i, j);
+         break;
+      default:
+         assert(0);
+         rgba = LLVMGetUndef(LLVMVectorType(i8t, 4*n));
+         break;
+      }
+   }
+
+   /* always return just decompressed values - srgb conversion is done later */
+
+   return rgba;
+}
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_intr.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_intr.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_intr.c	2018-01-29 17:10:31.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_intr.c	2019-03-31 23:16:37.000000000 +0000
@@ -241,6 +241,16 @@
 
       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
 
+      /*
+       * If llvm removes an intrinsic we use, we'll hit this abort (rather
+       * than a call to address zero in the jited code).
+       */
+      if (LLVMGetIntrinsicID(function) == 0) {
+         _debug_printf("llvm (version 0x%x) found no intrinsic for %s, going to crash...\n",
+                HAVE_LLVM, name);
+         abort();
+      }
+
       if (!set_callsite_attrs)
          lp_add_func_attributes(function, attr_mask);
 
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c	2019-03-31 23:16:37.000000000 +0000
@@ -132,68 +132,6 @@
 
 
 /**
- * Build LLVM code for texture coord wrapping, for nearest filtering,
- * for float texcoords.
- * \param coord  the incoming texcoord (s,t or r)
- * \param length  the texture size along one dimension
- * \param offset  the texel offset along the coord axis
- * \param is_pot  if TRUE, length is a power of two
- * \param wrap_mode  one of PIPE_TEX_WRAP_x
- * \param icoord  the texcoord after wrapping, as int
- */
-static void
-lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
-                                   LLVMValueRef coord,
-                                   LLVMValueRef length,
-                                   LLVMValueRef offset,
-                                   boolean is_pot,
-                                   unsigned wrap_mode,
-                                   LLVMValueRef *icoord)
-{
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   LLVMValueRef length_minus_one;
-
-   switch(wrap_mode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      if (offset) {
-         /* this is definitely not ideal for POT case */
-         offset = lp_build_int_to_float(coord_bld, offset);
-         offset = lp_build_div(coord_bld, offset, length);
-         coord = lp_build_add(coord_bld, coord, offset);
-      }
-      /* take fraction, unnormalize */
-      coord = lp_build_fract_safe(coord_bld, coord);
-      coord = lp_build_mul(coord_bld, coord, length);
-      *icoord = lp_build_itrunc(coord_bld, coord);
-      break;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
-      if (bld->static_sampler_state->normalized_coords) {
-         /* scale coord to length */
-         coord = lp_build_mul(coord_bld, coord, length);
-      }
-      if (offset) {
-         offset = lp_build_int_to_float(coord_bld, offset);
-         coord = lp_build_add(coord_bld, coord, offset);
-      }
-      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
-                             length_minus_one);
-      *icoord = lp_build_itrunc(coord_bld, coord);
-      break;
-
-   case PIPE_TEX_WRAP_CLAMP:
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-   default:
-      assert(0);
-   }
-}
-
-
-/**
  * Helper to compute the first coord and the weight for
  * linear wrap repeat npot textures
  */
@@ -425,129 +363,6 @@
 
 
 /**
- * Build LLVM code for texture coord wrapping, for linear filtering,
- * for float texcoords.
- * \param block_length  is the length of the pixel block along the
- *                      coordinate axis
- * \param coord  the incoming texcoord (s,t or r)
- * \param length  the texture size along one dimension
- * \param offset  the texel offset along the coord axis
- * \param is_pot  if TRUE, length is a power of two
- * \param wrap_mode  one of PIPE_TEX_WRAP_x
- * \param coord0  the first texcoord after wrapping, as int
- * \param coord1  the second texcoord after wrapping, as int
- * \param weight  the filter weight as int (0-255)
- * \param force_nearest  if this coord actually uses nearest filtering
- */
-static void
-lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
-                                  unsigned block_length,
-                                  LLVMValueRef coord,
-                                  LLVMValueRef length,
-                                  LLVMValueRef offset,
-                                  boolean is_pot,
-                                  unsigned wrap_mode,
-                                  LLVMValueRef *coord0,
-                                  LLVMValueRef *coord1,
-                                  LLVMValueRef *weight,
-                                  unsigned force_nearest)
-{
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   struct lp_build_context *coord_bld = &bld->coord_bld;
-   LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
-   LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
-
-   switch(wrap_mode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      if (is_pot) {
-         /* mul by size and subtract 0.5 */
-         coord = lp_build_mul(coord_bld, coord, length);
-         if (offset) {
-            offset = lp_build_int_to_float(coord_bld, offset);
-            coord = lp_build_add(coord_bld, coord, offset);
-         }
-         if (!force_nearest)
-            coord = lp_build_sub(coord_bld, coord, half);
-         *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
-         /* convert to int, compute lerp weight */
-         lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
-         *coord1 = lp_build_ifloor(coord_bld, *coord1);
-         /* repeat wrap */
-         length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
-         *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
-         *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
-      }
-      else {
-         LLVMValueRef mask;
-         if (offset) {
-            offset = lp_build_int_to_float(coord_bld, offset);
-            offset = lp_build_div(coord_bld, offset, length);
-            coord = lp_build_add(coord_bld, coord, offset);
-         }
-         /* wrap with normalized floats is just fract */
-         coord = lp_build_fract(coord_bld, coord);
-         /* unnormalize */
-         coord = lp_build_mul(coord_bld, coord, length);
-         /*
-          * we avoided the 0.5/length division, have to fix up wrong
-          * edge cases with selects
-          */
-         *coord1 = lp_build_add(coord_bld, coord, half);
-         coord = lp_build_sub(coord_bld, coord, half);
-         *weight = lp_build_fract(coord_bld, coord);
-         /*
-          * It is important for this comparison to be unordered
-          * (or need fract_safe above).
-          */
-         mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
-                                 PIPE_FUNC_LESS, coord, coord_bld->zero);
-         *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
-         *coord0 = lp_build_itrunc(coord_bld, *coord0);
-         mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
-                                 PIPE_FUNC_LESS, *coord1, length);
-         *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
-         *coord1 = lp_build_itrunc(coord_bld, *coord1);
-      }
-      break;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      if (bld->static_sampler_state->normalized_coords) {
-         /* mul by tex size */
-         coord = lp_build_mul(coord_bld, coord, length);
-      }
-      if (offset) {
-         offset = lp_build_int_to_float(coord_bld, offset);
-         coord = lp_build_add(coord_bld, coord, offset);
-      }
-      /* subtract 0.5 */
-      if (!force_nearest) {
-         coord = lp_build_sub(coord_bld, coord, half);
-      }
-      /* clamp to [0, length - 1] */
-      coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
-                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-      coord = lp_build_max(coord_bld, coord, coord_bld->zero);
-      *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
-      /* convert to int, compute lerp weight */
-      lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
-      /* coord1 = min(coord1, length-1) */
-      *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
-      *coord1 = lp_build_itrunc(coord_bld, *coord1);
-      break;
-   default:
-      assert(0);
-      *coord0 = int_coord_bld->zero;
-      *coord1 = int_coord_bld->zero;
-      *weight = coord_bld->zero;
-      break;
-   }
-   *weight = lp_build_mul_imm(coord_bld, *weight, 256);
-   *weight = lp_build_itrunc(coord_bld, *weight);
-   return;
-}
-
-
-/**
  * Fetch texels for image with nearest sampling.
  * Return filtered color as two vectors of 16-bit fixed point values.
  */
@@ -737,96 +552,6 @@
 
 
 /**
- * Sample a single texture image with nearest sampling.
- * If sampling a cube texture, r = cube face in [0,5].
- * Return filtered color as two vectors of 16-bit fixed point values.
- * Does address calcs (except offsets) with floats.
- * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
- */
-static void
-lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
-                                     LLVMValueRef int_size,
-                                     LLVMValueRef row_stride_vec,
-                                     LLVMValueRef img_stride_vec,
-                                     LLVMValueRef data_ptr,
-                                     LLVMValueRef mipoffsets,
-                                     LLVMValueRef s,
-                                     LLVMValueRef t,
-                                     LLVMValueRef r,
-                                     const LLVMValueRef *offsets,
-                                     LLVMValueRef *colors)
-   {
-   const unsigned dims = bld->dims;
-   LLVMValueRef width_vec, height_vec, depth_vec;
-   LLVMValueRef offset;
-   LLVMValueRef x_subcoord, y_subcoord;
-   LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
-   LLVMValueRef flt_size;
-
-   flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
-
-   lp_build_extract_image_sizes(bld,
-                                &bld->float_size_bld,
-                                bld->coord_type,
-                                flt_size,
-                                &width_vec,
-                                &height_vec,
-                                &depth_vec);
-
-   /* Do texcoord wrapping */
-   lp_build_sample_wrap_nearest_float(bld,
-                                      s, width_vec, offsets[0],
-                                      bld->static_texture_state->pot_width,
-                                      bld->static_sampler_state->wrap_s,
-                                      &x_icoord);
-
-   if (dims >= 2) {
-      lp_build_sample_wrap_nearest_float(bld,
-                                         t, height_vec, offsets[1],
-                                         bld->static_texture_state->pot_height,
-                                         bld->static_sampler_state->wrap_t,
-                                         &y_icoord);
-
-      if (dims >= 3) {
-         lp_build_sample_wrap_nearest_float(bld,
-                                            r, depth_vec, offsets[2],
-                                            bld->static_texture_state->pot_depth,
-                                            bld->static_sampler_state->wrap_r,
-                                            &z_icoord);
-      }
-   }
-   if (has_layer_coord(bld->static_texture_state->target)) {
-      z_icoord = r;
-   }
-
-   /*
-    * From here on we deal with ints, and we should split up the 256bit
-    * vectors manually for better generated code.
-    */
-
-   /*
-    * compute texel offsets -
-    * cannot do offset calc with floats, difficult for block-based formats,
-    * and not enough precision anyway.
-    */
-   lp_build_sample_offset(&bld->int_coord_bld,
-                          bld->format_desc,
-                          x_icoord, y_icoord,
-                          z_icoord,
-                          row_stride_vec, img_stride_vec,
-                          &offset,
-                          &x_subcoord, &y_subcoord);
-   if (mipoffsets) {
-      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
-   }
-
-   lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
-                                       x_subcoord, y_subcoord,
-                                       colors);
-}
-
-
-/**
  * Fetch texels for image with linear sampling.
  * Return filtered color as two vectors of 16-bit fixed point values.
  */
@@ -1213,175 +938,6 @@
 
 
 /**
- * Sample a single texture image with (bi-)(tri-)linear sampling.
- * Return filtered color as two vectors of 16-bit fixed point values.
- * Does address calcs (except offsets) with floats.
- * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
- */
-static void
-lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
-                                    LLVMValueRef int_size,
-                                    LLVMValueRef row_stride_vec,
-                                    LLVMValueRef img_stride_vec,
-                                    LLVMValueRef data_ptr,
-                                    LLVMValueRef mipoffsets,
-                                    LLVMValueRef s,
-                                    LLVMValueRef t,
-                                    LLVMValueRef r,
-                                    const LLVMValueRef *offsets,
-                                    LLVMValueRef *colors)
-{
-   const unsigned dims = bld->dims;
-   LLVMValueRef width_vec, height_vec, depth_vec;
-   LLVMValueRef s_fpart;
-   LLVMValueRef t_fpart = NULL;
-   LLVMValueRef r_fpart = NULL;
-   LLVMValueRef x_stride, y_stride, z_stride;
-   LLVMValueRef x_offset0, x_offset1;
-   LLVMValueRef y_offset0, y_offset1;
-   LLVMValueRef z_offset0, z_offset1;
-   LLVMValueRef offset[2][2][2]; /* [z][y][x] */
-   LLVMValueRef x_subcoord[2], y_subcoord[2];
-   LLVMValueRef flt_size;
-   LLVMValueRef x_icoord0, x_icoord1;
-   LLVMValueRef y_icoord0, y_icoord1;
-   LLVMValueRef z_icoord0, z_icoord1;
-   unsigned x, y, z;
-
-   flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
-
-   lp_build_extract_image_sizes(bld,
-                                &bld->float_size_bld,
-                                bld->coord_type,
-                                flt_size,
-                                &width_vec,
-                                &height_vec,
-                                &depth_vec);
-
-   /* do texcoord wrapping and compute texel offsets */
-   lp_build_sample_wrap_linear_float(bld,
-                                     bld->format_desc->block.width,
-                                     s, width_vec, offsets[0],
-                                     bld->static_texture_state->pot_width,
-                                     bld->static_sampler_state->wrap_s,
-                                     &x_icoord0, &x_icoord1,
-                                     &s_fpart,
-                                     bld->static_sampler_state->force_nearest_s);
-
-   if (dims >= 2) {
-      lp_build_sample_wrap_linear_float(bld,
-                                        bld->format_desc->block.height,
-                                        t, height_vec, offsets[1],
-                                        bld->static_texture_state->pot_height,
-                                        bld->static_sampler_state->wrap_t,
-                                        &y_icoord0, &y_icoord1,
-                                        &t_fpart,
-                                        bld->static_sampler_state->force_nearest_t);
-
-      if (dims >= 3) {
-         lp_build_sample_wrap_linear_float(bld,
-                                           1, /* block length (depth) */
-                                           r, depth_vec, offsets[2],
-                                           bld->static_texture_state->pot_depth,
-                                           bld->static_sampler_state->wrap_r,
-                                           &z_icoord0, &z_icoord1,
-                                           &r_fpart, 0);
-      }
-   }
-
-   /*
-    * From here on we deal with ints, and we should split up the 256bit
-    * vectors manually for better generated code.
-    */
-
-   /* get pixel, row and image strides */
-   x_stride = lp_build_const_vec(bld->gallivm,
-                                 bld->int_coord_bld.type,
-                                 bld->format_desc->block.bits/8);
-   y_stride = row_stride_vec;
-   z_stride = img_stride_vec;
-
-   /*
-    * compute texel offset -
-    * cannot do offset calc with floats, difficult for block-based formats,
-    * and not enough precision anyway.
-    */
-   lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                  bld->format_desc->block.width,
-                                  x_icoord0, x_stride,
-                                  &x_offset0, &x_subcoord[0]);
-   lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                  bld->format_desc->block.width,
-                                  x_icoord1, x_stride,
-                                  &x_offset1, &x_subcoord[1]);
-
-   /* add potential cube/array/mip offsets now as they are constant per pixel */
-   if (has_layer_coord(bld->static_texture_state->target)) {
-      LLVMValueRef z_offset;
-      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
-      /* The r coord is the cube face in [0,5] or array layer */
-      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
-      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
-   }
-   if (mipoffsets) {
-      x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
-      x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
-   }
-
-   for (z = 0; z < 2; z++) {
-      for (y = 0; y < 2; y++) {
-         offset[z][y][0] = x_offset0;
-         offset[z][y][1] = x_offset1;
-      }
-   }
-
-   if (dims >= 2) {
-      lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                     bld->format_desc->block.height,
-                                     y_icoord0, y_stride,
-                                     &y_offset0, &y_subcoord[0]);
-      lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                     bld->format_desc->block.height,
-                                     y_icoord1, y_stride,
-                                     &y_offset1, &y_subcoord[1]);
-      for (z = 0; z < 2; z++) {
-         for (x = 0; x < 2; x++) {
-            offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
-                                           offset[z][0][x], y_offset0);
-            offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
-                                           offset[z][1][x], y_offset1);
-         }
-      }
-   }
-
-   if (dims >= 3) {
-      LLVMValueRef z_subcoord[2];
-      lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                     1,
-                                     z_icoord0, z_stride,
-                                     &z_offset0, &z_subcoord[0]);
-      lp_build_sample_partial_offset(&bld->int_coord_bld,
-                                     1,
-                                     z_icoord1, z_stride,
-                                     &z_offset1, &z_subcoord[1]);
-      for (y = 0; y < 2; y++) {
-         for (x = 0; x < 2; x++) {
-            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
-                                           offset[0][y][x], z_offset0);
-            offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
-                                           offset[1][y][x], z_offset1);
-         }
-      }
-   }
-
-   lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
-                                      x_subcoord, y_subcoord,
-                                      s_fpart, t_fpart, r_fpart,
-                                      colors);
-}
-
-
-/**
  * Sample the texture/mipmap using given image filter and mip filter.
  * data0_ptr and data1_ptr point to the two mipmap levels to sample
  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
@@ -1413,9 +969,6 @@
    LLVMValueRef mipoff1 = NULL;
    LLVMValueRef colors0;
    LLVMValueRef colors1;
-   boolean use_floats = util_cpu_caps.has_avx &&
-                        !util_cpu_caps.has_avx2 &&
-                        bld->coord_type.length > 4;
 
    /* sample the first mipmap level */
    lp_build_mipmap_level_sizes(bld, ilevel0,
@@ -1430,39 +983,20 @@
       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
    }
 
-   if (use_floats) {
-      if (img_filter == PIPE_TEX_FILTER_NEAREST) {
-         lp_build_sample_image_nearest_afloat(bld,
-                                              size0,
-                                              row_stride0_vec, img_stride0_vec,
-                                              data_ptr0, mipoff0, s, t, r, offsets,
-                                              &colors0);
-      }
-      else {
-         assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-         lp_build_sample_image_linear_afloat(bld,
-                                             size0,
-                                             row_stride0_vec, img_stride0_vec,
-                                             data_ptr0, mipoff0, s, t, r, offsets,
-                                             &colors0);
-      }
+   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+      lp_build_sample_image_nearest(bld,
+                                    size0,
+                                    row_stride0_vec, img_stride0_vec,
+                                    data_ptr0, mipoff0, s, t, r, offsets,
+                                    &colors0);
    }
    else {
-      if (img_filter == PIPE_TEX_FILTER_NEAREST) {
-         lp_build_sample_image_nearest(bld,
-                                       size0,
-                                       row_stride0_vec, img_stride0_vec,
-                                       data_ptr0, mipoff0, s, t, r, offsets,
-                                       &colors0);
-      }
-      else {
-         assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-         lp_build_sample_image_linear(bld,
-                                      size0,
-                                      row_stride0_vec, img_stride0_vec,
-                                      data_ptr0, mipoff0, s, t, r, offsets,
-                                      &colors0);
-      }
+      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+      lp_build_sample_image_linear(bld,
+                                   size0,
+                                   row_stride0_vec, img_stride0_vec,
+                                   data_ptr0, mipoff0, s, t, r, offsets,
+                                   &colors0);
    }
 
    /* Store the first level's colors in the output variables */
@@ -1521,37 +1055,19 @@
             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
          }
 
-         if (use_floats) {
-            if (img_filter == PIPE_TEX_FILTER_NEAREST) {
-               lp_build_sample_image_nearest_afloat(bld,
-                                                    size1,
-                                                    row_stride1_vec, img_stride1_vec,
-                                                    data_ptr1, mipoff1, s, t, r, offsets,
-                                                    &colors1);
-            }
-            else {
-               lp_build_sample_image_linear_afloat(bld,
-                                                   size1,
-                                                   row_stride1_vec, img_stride1_vec,
-                                                   data_ptr1, mipoff1, s, t, r, offsets,
-                                                   &colors1);
-            }
+         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+            lp_build_sample_image_nearest(bld,
+                                          size1,
+                                          row_stride1_vec, img_stride1_vec,
+                                          data_ptr1, mipoff1, s, t, r, offsets,
+                                          &colors1);
          }
          else {
-            if (img_filter == PIPE_TEX_FILTER_NEAREST) {
-               lp_build_sample_image_nearest(bld,
-                                             size1,
-                                             row_stride1_vec, img_stride1_vec,
-                                             data_ptr1, mipoff1, s, t, r, offsets,
-                                             &colors1);
-            }
-            else {
-               lp_build_sample_image_linear(bld,
-                                            size1,
-                                            row_stride1_vec, img_stride1_vec,
-                                            data_ptr1, mipoff1, s, t, r, offsets,
-                                            &colors1);
-            }
+            lp_build_sample_image_linear(bld,
+                                         size1,
+                                         row_stride1_vec, img_stride1_vec,
+                                         data_ptr1, mipoff1, s, t, r, offsets,
+                                         &colors1);
          }
 
          /* interpolate samples from the two mipmap levels */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c	2019-03-31 23:16:37.000000000 +0000
@@ -3549,10 +3549,6 @@
       const struct util_format_description *format_desc;
       format_desc = util_format_description(static_texture_state->format);
       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-         /*
-          * This is not 100% correct, if we have cache but the
-          * util_format_s3tc_prefer is true the cache won't get used
-          * regardless (could hook up the block decode there...) */
          need_cache = TRUE;
       }
    }
diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
--- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c	2019-03-31 23:16:37.000000000 +0000
@@ -41,6 +41,7 @@
 #include "util/u_debug.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_prim.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_exec.h"
 #include "tgsi/tgsi_info.h"
@@ -1059,7 +1060,8 @@
 static LLVMValueRef
 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
                    unsigned reg_file, unsigned reg_index,
-                   const struct tgsi_ind_register *indirect_reg)
+                   const struct tgsi_ind_register *indirect_reg,
+                   int index_limit)
 {
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
@@ -1106,9 +1108,9 @@
     * larger than the declared size but smaller than the buffer size.
     */
    if (reg_file != TGSI_FILE_CONSTANT) {
+      assert(index_limit > 0);
       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
-                                         uint_bld->type,
-                                         bld->bld_base.info->file_max[reg_file]);
+                                         uint_bld->type, index_limit);
 
       assert(!uint_bld->type.sign);
       index = lp_build_min(uint_bld, index, max_index);
@@ -1225,7 +1227,8 @@
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
-                                          &reg->Indirect);
+                                          &reg->Indirect,
+                                          bld->bld_base.info->file_max[reg->Register.File]);
 
       /* All fetches are from the same constant buffer, so
        * we need to propagate the size to a vector to do a
@@ -1364,7 +1367,8 @@
          indirect_index = get_indirect_index(bld,
                                              reg->Register.File,
                                              reg->Register.Index,
-                                             &reg->Indirect);
+                                             &reg->Indirect,
+                                             bld->bld_base.info->file_max[reg->Register.File]);
          /*
           * Unlike for other reg classes, adding pixel offsets is unnecessary -
           * immediates are stored as full vectors (FIXME??? - might be better
@@ -1438,7 +1442,8 @@
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
-                                          &reg->Indirect);
+                                          &reg->Indirect,
+                                          bld->bld_base.info->file_max[reg->Register.File]);
 
       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
                                         indirect_index,
@@ -1524,19 +1529,33 @@
    }
 
    if (reg->Register.Indirect) {
+      /*
+       * XXX: this is possibly not quite the right value, since file_max may be
+       * larger than the max attrib index, due to it being the max of declared
+       * inputs AND the max vertices per prim (which is 6 for tri adj).
+       * It should however be safe to use (since we always allocate
+       * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
+       */
+      int index_limit = info->file_max[reg->Register.File];
       attrib_index = get_indirect_index(bld,
                                         reg->Register.File,
                                         reg->Register.Index,
-                                        &reg->Indirect);
+                                        &reg->Indirect,
+                                        index_limit);
    } else {
       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
    }
 
    if (reg->Dimension.Indirect) {
+      /*
+       * A fixed 6 should do as well (which is what we allocate).
+       */
+      int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
       vertex_index = get_indirect_index(bld,
                                         reg->Register.File,
                                         reg->Dimension.Index,
-                                        &reg->DimIndirect);
+                                        &reg->DimIndirect,
+                                        index_limit);
    } else {
       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
    }
@@ -1591,7 +1610,8 @@
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
-                                          &reg->Indirect);
+                                          &reg->Indirect,
+                                          bld->bld_base.info->file_max[reg->Register.File]);
 
       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
                                         indirect_index,
@@ -1811,7 +1831,8 @@
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
-                                          &reg->Indirect);
+                                          &reg->Indirect,
+                                          bld->bld_base.info->file_max[reg->Register.File]);
    } else {
       assert(reg->Register.Index <=
                              bld_base->info->file_max[reg->Register.File]);
diff -Nru mesa-18.3.3/src/gallium/auxiliary/Makefile.sources mesa-19.0.1/src/gallium/auxiliary/Makefile.sources
--- mesa-18.3.3/src/gallium/auxiliary/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -102,10 +102,6 @@
 	indices/u_indices_priv.h \
 	indices/u_primconvert.c \
 	indices/u_primconvert.h \
-	os/os_memory_aligned.h \
-	os/os_memory_debug.h \
-	os/os_memory_stdc.h \
-	os/os_memory.h \
 	os/os_mman.h \
 	os/os_process.c \
 	os/os_process.h \
@@ -290,7 +286,6 @@
 	util/u_linear.h \
 	util/u_log.c \
 	util/u_log.h \
-	util/u_memory.h \
 	util/u_mm.c \
 	util/u_mm.h \
 	util/u_network.c \
@@ -423,11 +418,11 @@
 	gallivm/lp_bld_flow.h \
 	gallivm/lp_bld_format_aos_array.c \
 	gallivm/lp_bld_format_aos.c \
-	gallivm/lp_bld_format_cached.c \
 	gallivm/lp_bld_format_float.c \
 	gallivm/lp_bld_format.c \
 	gallivm/lp_bld_format.h \
 	gallivm/lp_bld_format_soa.c \
+	gallivm/lp_bld_format_s3tc.c \
 	gallivm/lp_bld_format_srgb.c \
 	gallivm/lp_bld_format_yuv.c \
 	gallivm/lp_bld_gather.c \
diff -Nru mesa-18.3.3/src/gallium/auxiliary/meson.build mesa-19.0.1/src/gallium/auxiliary/meson.build
--- mesa-18.3.3/src/gallium/auxiliary/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -122,10 +122,6 @@
   'indices/u_indices_priv.h',
   'indices/u_primconvert.c',
   'indices/u_primconvert.h',
-  'os/os_memory_aligned.h',
-  'os/os_memory_debug.h',
-  'os/os_memory_stdc.h',
-  'os/os_memory.h',
   'os/os_mman.h',
   'os/os_process.c',
   'os/os_process.h',
@@ -310,7 +306,6 @@
   'util/u_linear.h',
   'util/u_log.c',
   'util/u_log.h',
-  'util/u_memory.h',
   'util/u_mm.c',
   'util/u_mm.h',
   'util/u_network.c',
@@ -394,8 +389,8 @@
     'gallivm/lp_bld_flow.h',
     'gallivm/lp_bld_format_aos_array.c',
     'gallivm/lp_bld_format_aos.c',
-    'gallivm/lp_bld_format_cached.c',
     'gallivm/lp_bld_format_float.c',
+    'gallivm/lp_bld_format_s3tc.c',
     'gallivm/lp_bld_format.c',
     'gallivm/lp_bld_format.h',
     'gallivm/lp_bld_format_soa.c',
diff -Nru mesa-18.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c mesa-19.0.1/src/gallium/auxiliary/nir/tgsi_to_nir.c
--- mesa-18.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/nir/tgsi_to_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -181,8 +181,8 @@
          /* for arrays, we create variables instead of registers: */
          nir_variable *var = rzalloc(b->shader, nir_variable);
 
-         var->type = glsl_array_type(glsl_vec4_type(), array_size);
-         var->data.mode = nir_var_global;
+         var->type = glsl_array_type(glsl_vec4_type(), array_size, 0);
+         var->data.mode = nir_var_shader_temp;
          var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
 
          exec_list_push_tail(&b->shader->globals, &var->node);
@@ -265,7 +265,7 @@
 
          var->type = glsl_vec4_type();
          if (is_array)
-            var->type = glsl_array_type(var->type, array_size);
+            var->type = glsl_array_type(var->type, array_size, 0);
 
          switch (file) {
          case TGSI_FILE_INPUT:
@@ -516,8 +516,7 @@
           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
          nir_ssa_def *tgsi_frontface[4] = {
             nir_bcsel(&c->build,
-                      nir_load_system_value(&c->build,
-                                            nir_intrinsic_load_front_face, 0),
+                      nir_load_front_face(&c->build, 1),
                       nir_imm_float(&c->build, 1.0),
                       nir_imm_float(&c->build, -1.0)),
             nir_imm_float(&c->build, 0.0),
@@ -938,9 +937,7 @@
 static void
 ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
-   nir_ssa_def *cmp = nir_bany_inequal4(b, nir_flt(b, src[0],
-                                                   nir_imm_float(b, 0.0)),
-                                        nir_imm_int(b, 0));
+   nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
    nir_intrinsic_instr *discard =
       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
    discard->src[0] = nir_src_for_ssa(cmp);
@@ -1471,10 +1468,10 @@
    [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
 
    [TGSI_OPCODE_NOP] = 0,
-   [TGSI_OPCODE_FSEQ] = nir_op_feq,
-   [TGSI_OPCODE_FSGE] = nir_op_fge,
-   [TGSI_OPCODE_FSLT] = nir_op_flt,
-   [TGSI_OPCODE_FSNE] = nir_op_fne,
+   [TGSI_OPCODE_FSEQ] = nir_op_feq32,
+   [TGSI_OPCODE_FSGE] = nir_op_fge32,
+   [TGSI_OPCODE_FSLT] = nir_op_flt32,
+   [TGSI_OPCODE_FSNE] = nir_op_fne32,
 
    [TGSI_OPCODE_KILL_IF] = 0,
 
@@ -1485,9 +1482,9 @@
    [TGSI_OPCODE_IMAX] = nir_op_imax,
    [TGSI_OPCODE_IMIN] = nir_op_imin,
    [TGSI_OPCODE_INEG] = nir_op_ineg,
-   [TGSI_OPCODE_ISGE] = nir_op_ige,
+   [TGSI_OPCODE_ISGE] = nir_op_ige32,
    [TGSI_OPCODE_ISHR] = nir_op_ishr,
-   [TGSI_OPCODE_ISLT] = nir_op_ilt,
+   [TGSI_OPCODE_ISLT] = nir_op_ilt32,
    [TGSI_OPCODE_F2U] = nir_op_f2u32,
    [TGSI_OPCODE_U2F] = nir_op_u2f32,
    [TGSI_OPCODE_UADD] = nir_op_iadd,
@@ -1497,11 +1494,11 @@
    [TGSI_OPCODE_UMIN] = nir_op_umin,
    [TGSI_OPCODE_UMOD] = nir_op_umod,
    [TGSI_OPCODE_UMUL] = nir_op_imul,
-   [TGSI_OPCODE_USEQ] = nir_op_ieq,
-   [TGSI_OPCODE_USGE] = nir_op_uge,
+   [TGSI_OPCODE_USEQ] = nir_op_ieq32,
+   [TGSI_OPCODE_USGE] = nir_op_uge32,
    [TGSI_OPCODE_USHR] = nir_op_ushr,
-   [TGSI_OPCODE_USLT] = nir_op_ult,
-   [TGSI_OPCODE_USNE] = nir_op_ine,
+   [TGSI_OPCODE_USLT] = nir_op_ult32,
+   [TGSI_OPCODE_USNE] = nir_op_ine32,
 
    [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
    [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_aligned.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_aligned.h
--- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_aligned.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_aligned.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,98 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008-2010 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/*
- * Memory alignment wrappers.
- */
-
-
-#ifndef _OS_MEMORY_H_
-#error "Must not be included directly. Include os_memory.h instead"
-#endif
-
-
-#include "pipe/p_compiler.h"
-
-
-
-/**
- * Add two size_t values with integer overflow check.
- * TODO: leverage __builtin_add_overflow where available
- */
-static inline bool
-add_overflow_size_t(size_t a, size_t b, size_t *res)
-{
-   *res = a + b;
-   return *res < a || *res < b;
-}
-
-
-/**
- * Return memory on given byte alignment
- */
-static inline void *
-os_malloc_aligned(size_t size, size_t alignment)
-{
-   char *ptr, *buf;
-   size_t alloc_size;
-
-   /*
-    * Calculate
-    *
-    *   alloc_size = size + alignment + sizeof(void *)
-    *
-    * while checking for overflow.
-    */
-   if (add_overflow_size_t(size, alignment, &alloc_size) ||
-       add_overflow_size_t(alloc_size, sizeof(void *), &alloc_size)) {
-      return NULL;
-   }
-
-   ptr = (char *) os_malloc(alloc_size);
-   if (!ptr)
-      return NULL;
-
-   buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~((uintptr_t)(alignment - 1)));
-   *(char **)(buf - sizeof(void *)) = ptr;
-
-   return buf;
-}
-
-
-/**
- * Free memory returned by os_malloc_aligned().
- */
-static inline void
-os_free_aligned(void *ptr)
-{
-   if (ptr) {
-      void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
-      void *realAddr = *cubbyHole;
-      os_free(realAddr);
-   }
-}
diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_debug.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_debug.h
--- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_debug.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_debug.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,92 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008-2010 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/*
- * Debugging wrappers for OS memory management abstractions.
- */
-
-
-#ifndef _OS_MEMORY_H_
-#error "Must not be included directly. Include os_memory.h instead"
-#endif
-
-
-#include "pipe/p_compiler.h"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-void *
-debug_malloc(const char *file, unsigned line, const char *function,
-             size_t size);
-
-void *
-debug_calloc(const char *file, unsigned line, const char *function,
-             size_t count, size_t size );
-
-void
-debug_free(const char *file, unsigned line, const char *function,
-           void *ptr);
-
-void *
-debug_realloc(const char *file, unsigned line, const char *function,
-              void *old_ptr, size_t old_size, size_t new_size );
-
-void
-debug_memory_tag(void *ptr, unsigned tag);
-
-void
-debug_memory_check_block(void *ptr);
-
-void 
-debug_memory_check(void);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#ifndef DEBUG_MEMORY_IMPLEMENTATION
-
-#define os_malloc( _size ) \
-   debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
-#define os_calloc( _count, _size ) \
-   debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size )
-#define os_free( _ptr ) \
-   debug_free( __FILE__, __LINE__, __FUNCTION__,  _ptr )
-#define os_realloc( _ptr, _old_size, _new_size ) \
-   debug_realloc( __FILE__, __LINE__, __FUNCTION__,  _ptr, _old_size, _new_size )
-
-/* TODO: wrap os_malloc_aligned() and os_free_aligned() too */
-#include "os_memory_aligned.h"
-
-#endif /* !DEBUG_MEMORY_IMPLEMENTATION */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory.h
--- mesa-18.3.3/src/gallium/auxiliary/os/os_memory.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,80 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Vmware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/*
- * OS memory management abstractions
- */
-
-
-#ifndef _OS_MEMORY_H_
-#define _OS_MEMORY_H_
-
-
-#include "pipe/p_config.h"
-#include "pipe/p_compiler.h"
-
-
-#if defined(PIPE_SUBSYSTEM_EMBEDDED)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void *
-os_malloc(size_t size);
-
-void *
-os_calloc(size_t count, size_t size);
-
-void
-os_free(void *ptr);
-
-void *
-os_realloc(void *ptr, size_t old_size, size_t new_size);
-
-void *
-os_malloc_aligned(size_t size, size_t alignment);
-
-void
-os_free_aligned(void *ptr);
-
-#ifdef __cplusplus
-}
-#endif
-
-#elif defined(PIPE_OS_WINDOWS) && defined(DEBUG) && !defined(DEBUG_MEMORY_IMPLEMENTATION)
-
-#  include "os_memory_debug.h"
-
-#else
-
-#  include "os_memory_stdc.h"
-
-#endif
-
-#endif /* _OS_MEMORY_H_ */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_stdc.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_stdc.h
--- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_stdc.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_stdc.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,76 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008-2010 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/*
- * OS memory management abstractions for the standard C library.
- */
-
-
-#ifndef _OS_MEMORY_H_
-#error "Must not be included directly. Include os_memory.h instead"
-#endif
-
-#include <stdlib.h>
-
-#include "pipe/p_compiler.h"
-
-
-#define os_malloc(_size)  malloc(_size)
-#define os_calloc(_count, _size )  calloc(_count, _size )
-#define os_free(_ptr)  free(_ptr)
-
-#define os_realloc( _old_ptr, _old_size, _new_size) \
-   realloc(_old_ptr, _new_size + 0*(_old_size))
-
-
-#if defined(HAVE_POSIX_MEMALIGN)
-
-static inline void *
-os_malloc_aligned(size_t size, size_t alignment)
-{
-   void *ptr;
-   alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
-   if(posix_memalign(&ptr, alignment, size) != 0)
-      return NULL;
-   return ptr;
-}
-
-#define os_free_aligned(_ptr) free(_ptr)
-
-#elif defined(PIPE_OS_WINDOWS)
-
-#include <malloc.h>
-
-#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
-#define os_free_aligned(_ptr) _aligned_free(_ptr)
-
-#else
-
-#include "os_memory_aligned.h"
-
-#endif
diff -Nru mesa-18.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c mesa-19.0.1/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
--- mesa-18.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c	2019-03-31 23:16:37.000000000 +0000
@@ -107,11 +107,6 @@
         .configuration = pipe_default_configuration_query,
     },
     {
-       .driver_name = "pl111",
-        .create_screen = pipe_pl111_create_screen,
-        .configuration = pipe_default_configuration_query,
-    },
-    {
         .driver_name = "virtio_gpu",
         .create_screen = pipe_virgl_create_screen,
         .configuration = pipe_default_configuration_query,
@@ -132,16 +127,18 @@
         .configuration = pipe_default_configuration_query,
     },
     {
-        .driver_name = "imx-drm",
-        .create_screen = pipe_imx_drm_create_screen,
-        .configuration = pipe_default_configuration_query,
-    },
-    {
         .driver_name = "tegra",
         .create_screen = pipe_tegra_create_screen,
         .configuration = pipe_default_configuration_query,
     },
 };
+
+static const struct drm_driver_descriptor default_driver_descriptor = {
+        .driver_name = "kmsro",
+        .create_screen = pipe_kmsro_create_screen,
+        .configuration = pipe_default_configuration_query,
+};
+
 #endif
 
 static const struct drm_driver_descriptor *
@@ -152,6 +149,7 @@
       if (strcmp(driver_descriptors[i].driver_name, driver_name) == 0)
          return &driver_descriptors[i];
    }
+   return &default_driver_descriptor;
 #else
    *plib = pipe_loader_find_module(driver_name, PIPE_SEARCH_DIR);
    if (!*plib)
diff -Nru mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper.h
--- mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper.h	2019-03-31 23:16:37.000000000 +0000
@@ -83,24 +83,24 @@
 
 #endif
 
-#ifdef GALLIUM_PL111
-#include "pl111/drm/pl111_drm_public.h"
+#ifdef GALLIUM_KMSRO
+#include "kmsro/drm/kmsro_drm_public.h"
 
 struct pipe_screen *
-pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config)
+pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config)
 {
    struct pipe_screen *screen;
 
-   screen = pl111_drm_screen_create(fd);
+   screen = kmsro_drm_screen_create(fd);
    return screen ? debug_screen_wrap(screen) : NULL;
 }
 
 #else
 
 struct pipe_screen *
-pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config)
+pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config)
 {
-   fprintf(stderr, "pl111: driver missing\n");
+   fprintf(stderr, "kmsro: driver missing\n");
    return NULL;
 }
 
@@ -248,7 +248,7 @@
 {
    struct pipe_screen *screen;
 
-   screen = fd_drm_screen_create(fd);
+   screen = fd_drm_screen_create(fd, NULL);
    return screen ? debug_screen_wrap(screen) : NULL;
 }
 
@@ -354,29 +354,6 @@
    return NULL;
 }
 
-#endif
-
-#ifdef GALLIUM_IMX
-#include "imx/drm/imx_drm_public.h"
-
-struct pipe_screen *
-pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config)
-{
-   struct pipe_screen *screen;
-
-   screen = imx_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-
-#else
-
-struct pipe_screen *
-pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config)
-{
-   fprintf(stderr, "imx-drm: driver missing\n");
-   return NULL;
-}
-
 #endif
 
 #ifdef GALLIUM_TEGRA
diff -Nru mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper_public.h
--- mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper_public.h	2019-03-31 23:16:37.000000000 +0000
@@ -43,7 +43,7 @@
 pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
-pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config);
+pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
 pipe_etna_create_screen(int fd, const struct pipe_screen_config *config);
diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_exec.c
--- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_exec.c	2019-03-31 23:16:37.000000000 +0000
@@ -4253,6 +4253,9 @@
       if (val == value[0].u[0])
          val = value2[0].u[0];
       break;
+   case TGSI_OPCODE_ATOMFADD:
+      val = fui(r[0].f[0] + value[0].f[0]);
+      break;
    default:
       break;
    }
@@ -5933,6 +5936,7 @@
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
       exec_atomop(mach, inst);
       break;
 
diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h
--- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h	2019-03-31 23:16:37.000000000 +0000
@@ -101,7 +101,7 @@
 OPCODE(0, 0, NONE, BGNSUB, .post_indent = 1)
 OPCODE(0, 0, NONE, ENDLOOP, .is_branch = 1, .pre_dedent = 1)
 OPCODE(0, 0, NONE, ENDSUB, .pre_dedent = 1)
-OPCODE_GAP(103) /* removed */
+OPCODE(1, 3, OTHR, ATOMFADD, .is_store = 1)
 OPCODE(1, 1, OTHR, TXQS, .is_tex = 1)
 OPCODE(1, 1, OTHR, RESQ)
 OPCODE(1, 1, COMP, READ_FIRST)
diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.c
--- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.c	2019-03-31 23:16:37.000000000 +0000
@@ -368,7 +368,19 @@
          info->uses_bindless_samplers = true;
       break;
    case TGSI_OPCODE_RESQ:
+      if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File))
+         info->uses_bindless_images = true;
+      break;
    case TGSI_OPCODE_LOAD:
+      if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) {
+         info->uses_bindless_images = true;
+
+         if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER)
+            info->uses_bindless_buffer_load = true;
+         else
+            info->uses_bindless_image_load = true;
+      }
+      break;
    case TGSI_OPCODE_ATOMUADD:
    case TGSI_OPCODE_ATOMXCHG:
    case TGSI_OPCODE_ATOMCAS:
@@ -379,12 +391,25 @@
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
-      if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File))
+   case TGSI_OPCODE_ATOMFADD:
+      if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) {
          info->uses_bindless_images = true;
+
+         if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER)
+            info->uses_bindless_buffer_atomic = true;
+         else
+            info->uses_bindless_image_atomic = true;
+      }
       break;
    case TGSI_OPCODE_STORE:
-      if (tgsi_is_bindless_image_file(fullinst->Dst[0].Register.File))
+      if (tgsi_is_bindless_image_file(fullinst->Dst[0].Register.File)) {
          info->uses_bindless_images = true;
+
+         if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER)
+            info->uses_bindless_buffer_store = true;
+         else
+            info->uses_bindless_image_store = true;
+      }
       break;
    default:
       break;
diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.h
--- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h	2018-02-27 16:44:19.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.h	2019-03-31 23:16:37.000000000 +0000
@@ -153,6 +153,13 @@
    unsigned shader_buffers_load; /**< bitmask of shader buffers using loads */
    unsigned shader_buffers_store; /**< bitmask of shader buffers using stores */
    unsigned shader_buffers_atomic; /**< bitmask of shader buffers using atomics */
+   bool uses_bindless_buffer_load;
+   bool uses_bindless_buffer_store;
+   bool uses_bindless_buffer_atomic;
+   bool uses_bindless_image_load;
+   bool uses_bindless_image_store;
+   bool uses_bindless_image_atomic;
+
    /**
     * Bitmask indicating which register files are accessed with
     * indirect addressing.  The bits are (1 << TGSI_FILE_x), etc.
@@ -213,7 +220,9 @@
 {
    return file != TGSI_FILE_IMAGE &&
           file != TGSI_FILE_MEMORY &&
-          file != TGSI_FILE_BUFFER;
+          file != TGSI_FILE_BUFFER &&
+          file != TGSI_FILE_CONSTBUF &&
+          file != TGSI_FILE_HW_ATOMIC;
 }
 
 #ifdef __cplusplus
diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_util.c
--- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_util.c	2019-03-31 23:16:37.000000000 +0000
@@ -385,6 +385,7 @@
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
       if (src_idx == 0) {
          read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */
       } else if (src_idx == 1) {
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_debug_memory.c mesa-19.0.1/src/gallium/auxiliary/util/u_debug_memory.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_debug_memory.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_debug_memory.c	2019-03-31 23:16:37.000000000 +0000
@@ -36,14 +36,14 @@
 
 #define DEBUG_MEMORY_IMPLEMENTATION
 
-#include "os/os_memory.h"
-#include "os/os_memory_debug.h"
 #include "os/os_thread.h"
 
 #include "util/u_debug.h"
 #include "util/u_debug_gallium.h"
 #include "util/u_debug_stack.h"
 #include "util/list.h"
+#include "util/os_memory.h"
+#include "util/os_memory_debug.h"
 
 
 #define DEBUG_MEMORY_MAGIC 0x6e34090aU
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_debug_stack_android.cpp mesa-19.0.1/src/gallium/auxiliary/util/u_debug_stack_android.cpp
--- mesa-18.3.3/src/gallium/auxiliary/util/u_debug_stack_android.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_debug_stack_android.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -23,7 +23,7 @@
 
 #include <backtrace/Backtrace.h>
 
-#include "u_debug.h"
+#include "util/u_debug.h"
 #include "u_debug_stack.h"
 #include "util/hash_table.h"
 #include "os/os_thread.h"
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.c mesa-19.0.1/src/gallium/auxiliary/util/u_format.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.c	2019-03-31 23:16:37.000000000 +0000
@@ -32,7 +32,7 @@
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
 
-#include "u_memory.h"
+#include "util/u_memory.h"
 #include "u_format.h"
 #include "u_format_s3tc.h"
 #include "u_surface.h"
@@ -149,24 +149,25 @@
 }
 
 /**
- * Returns true if all non-void channels are normalized signed.
+ * Returns true if the format contains normalized signed channels.
  */
 boolean
 util_format_is_snorm(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
-   int i;
 
-   if (desc->is_mixed)
-      return FALSE;
+   return desc->is_snorm;
+}
 
-   i = util_format_get_first_non_void_channel(format);
-   if (i == -1)
-      return FALSE;
+/**
+ * Returns true if the format contains normalized unsigned channels.
+ */
+boolean
+util_format_is_unorm(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
 
-   return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED &&
-          !desc->channel[i].pure_integer &&
-          desc->channel[i].normalized;
+   return desc->is_unorm;
 }
 
 boolean
@@ -865,3 +866,43 @@
       }
    }
 }
+
+enum pipe_format
+util_format_snorm8_to_sint8(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R8_SNORM:
+      return PIPE_FORMAT_R8_SINT;
+   case PIPE_FORMAT_R8G8_SNORM:
+      return PIPE_FORMAT_R8G8_SINT;
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return PIPE_FORMAT_R8G8B8_SINT;
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return PIPE_FORMAT_R8G8B8A8_SINT;
+
+   case PIPE_FORMAT_A8_SNORM:
+      return PIPE_FORMAT_A8_SINT;
+   case PIPE_FORMAT_L8_SNORM:
+      return PIPE_FORMAT_L8_SINT;
+   case PIPE_FORMAT_L8A8_SNORM:
+      return PIPE_FORMAT_L8A8_SINT;
+   case PIPE_FORMAT_I8_SNORM:
+      return PIPE_FORMAT_I8_SINT;
+
+   case PIPE_FORMAT_R8G8B8X8_SNORM:
+      return PIPE_FORMAT_R8G8B8X8_SINT;
+   case PIPE_FORMAT_R8A8_SNORM:
+      return PIPE_FORMAT_R8A8_SINT;
+   case PIPE_FORMAT_A8L8_SNORM:
+      return PIPE_FORMAT_A8L8_SINT;
+   case PIPE_FORMAT_G8R8_SNORM:
+      return PIPE_FORMAT_G8R8_SINT;
+   case PIPE_FORMAT_A8B8G8R8_SNORM:
+      return PIPE_FORMAT_A8B8G8R8_SINT;
+   case PIPE_FORMAT_X8B8G8R8_SNORM:
+      return PIPE_FORMAT_X8B8G8R8_SINT;
+
+   default:
+      return format;
+   }
+}
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.csv mesa-19.0.1/src/gallium/auxiliary/util/u_format.csv
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format.csv	2018-02-08 14:40:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.csv	2019-03-31 23:16:37.000000000 +0000
@@ -45,7 +45,7 @@
 #   - optionally followed by 'p' if it is pure
 #   - number of bits
 # - channel swizzle 
-# - color space: rgb, yub, sz
+# - color space: rgb, srgb, yuv, zs
 # - (optional) channel encoding for big-endian targets
 # - (optional) channel swizzle for big-endian targets
 #
@@ -114,6 +114,7 @@
 
 # SRGB formats
 PIPE_FORMAT_L8_SRGB               , plain, 1, 1, un8 ,     ,     ,     , xxx1, srgb 
+PIPE_FORMAT_R8_SRGB               , plain, 1, 1, un8 ,     ,     ,     , x001, srgb
 PIPE_FORMAT_L8A8_SRGB             , plain, 1, 1, un8 , un8 ,     ,     , xxxy, srgb 
 PIPE_FORMAT_R8G8B8_SRGB           , plain, 1, 1, un8 , un8 , un8 ,     , xyz1, srgb 
 PIPE_FORMAT_R8G8B8A8_SRGB         , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb 
@@ -433,13 +434,17 @@
 
 PIPE_FORMAT_A8L8_UNORM            , plain, 1, 1, un8 , un8 , , , yyyx, rgb
 PIPE_FORMAT_A8L8_SNORM            , plain, 1, 1, sn8 , sn8 , , , yyyx, rgb
+PIPE_FORMAT_A8L8_SINT             , plain, 1, 1, sp8 , sp8 , , , yyyx, rgb
 PIPE_FORMAT_A8L8_SRGB             , plain, 1, 1, un8 , un8 , , , yyyx, srgb
 PIPE_FORMAT_A16L16_UNORM          , plain, 1, 1, un16, un16, , , yyyx, rgb
 
 PIPE_FORMAT_G8R8_UNORM            , plain, 1, 1, un8 , un8 , , , yx01, rgb
 PIPE_FORMAT_G8R8_SNORM            , plain, 1, 1, sn8 , sn8 , , , yx01, rgb
+PIPE_FORMAT_G8R8_SINT             , plain, 1, 1, sp8 , sp8 , , , yx01, rgb
 PIPE_FORMAT_G16R16_UNORM          , plain, 1, 1, un16, un16, , , yx01, rgb
 PIPE_FORMAT_G16R16_SNORM          , plain, 1, 1, sn16, sn16, , , yx01, rgb
 
 PIPE_FORMAT_A8B8G8R8_SNORM        , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb
+PIPE_FORMAT_A8B8G8R8_SINT         , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , wzyx, rgb
 PIPE_FORMAT_X8B8G8R8_SNORM        , plain, 1, 1, x8,   sn8,  sn8,  sn8,  wzy1, rgb
+PIPE_FORMAT_X8B8G8R8_SINT         , plain, 1, 1, x8,   sp8,  sp8,  sp8,  wzy1, rgb
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.h mesa-19.0.1/src/gallium/auxiliary/util/u_format.h
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.h	2019-03-31 23:16:37.000000000 +0000
@@ -178,6 +178,16 @@
    unsigned is_mixed:1;
 
    /**
+    * Whether the format contains UNORM channels
+    */
+   unsigned is_unorm:1;
+
+   /**
+    * Whether the format contains SNORM channels
+    */
+   unsigned is_snorm:1;
+
+   /**
     * Input channel description, in the order XYZW.
     *
     * Only valid for UTIL_FORMAT_LAYOUT_PLAIN formats.
@@ -727,6 +737,9 @@
 util_format_is_snorm(enum pipe_format format);
 
 boolean
+util_format_is_unorm(enum pipe_format format);
+
+boolean
 util_format_is_snorm8(enum pipe_format format);
 
 /**
@@ -925,6 +938,8 @@
    switch (format) {
    case PIPE_FORMAT_L8_UNORM:
       return PIPE_FORMAT_L8_SRGB;
+   case PIPE_FORMAT_R8_UNORM:
+      return PIPE_FORMAT_R8_SRGB;
    case PIPE_FORMAT_L8A8_UNORM:
       return PIPE_FORMAT_L8A8_SRGB;
    case PIPE_FORMAT_R8G8B8_UNORM:
@@ -1001,6 +1016,8 @@
    switch (format) {
    case PIPE_FORMAT_L8_SRGB:
       return PIPE_FORMAT_L8_UNORM;
+   case PIPE_FORMAT_R8_SRGB:
+      return PIPE_FORMAT_R8_UNORM;
    case PIPE_FORMAT_L8A8_SRGB:
       return PIPE_FORMAT_L8A8_UNORM;
    case PIPE_FORMAT_R8G8B8_SRGB:
@@ -1351,6 +1368,9 @@
 void util_format_unswizzle_4f(float *dst, const float *src,
                               const unsigned char swz[4]);
 
+enum pipe_format
+util_format_snorm8_to_sint8(enum pipe_format format);
+
 #ifdef __cplusplus
 } // extern "C" {
 #endif
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_parse.py mesa-19.0.1/src/gallium/auxiliary/util/u_format_parse.py
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format_parse.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_parse.py	2019-03-31 23:16:37.000000000 +0000
@@ -187,6 +187,26 @@
                     return True
         return False
 
+    def is_compressed(self):
+        for channel in self.le_channels:
+            if channel.type != VOID:
+                return False
+        return True
+
+    def is_unorm(self):
+        # Non-compressed formats all have unorm or srgb in their name.
+        for keyword in ['_UNORM', '_SRGB']:
+            if keyword in self.name:
+                return True
+
+        # All the compressed formats in GLES3.2 and GL4.6 ("Table 8.14: Generic
+        # and specific compressed internal formats.") that aren't snorm for
+        # border colors are unorm, other than BPTC_*_FLOAT.
+        return self.is_compressed() and not ('FLOAT' in self.name or self.is_snorm())
+
+    def is_snorm(self):
+        return '_SNORM' in self.name
+
     def is_pot(self):
         return is_pot(self.block_size())
 
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_table.py mesa-19.0.1/src/gallium/auxiliary/util/u_format_table.py
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format_table.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_table.py	2019-03-31 23:16:37.000000000 +0000
@@ -136,6 +136,8 @@
         print("   %s,\t/* is_array */" % (bool_map(format.is_array()),))
         print("   %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),))
         print("   %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),))
+        print("   %s,\t/* is_unorm */" % (bool_map(format.is_unorm()),))
+        print("   %s,\t/* is_snorm */" % (bool_map(format.is_snorm()),))
         u_format_pack.print_channels(format, do_channel_array)
         u_format_pack.print_channels(format, do_swizzle_array)
         print("   %s," % (colorspace_map(format.colorspace),))
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_tests.c mesa-19.0.1/src/gallium/auxiliary/util/u_format_tests.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_format_tests.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_tests.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,7 +30,7 @@
 #include <float.h>
 
 #include "pipe/p_config.h"
-#include "u_memory.h"
+#include "util/u_memory.h"
 #include "u_format_tests.h"
 
 
@@ -236,6 +236,10 @@
    {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 1.0)},
    {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)},
 
+   {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)},
+
    {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)},
    {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00bc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 0.0)},
    {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)},
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_framebuffer.c mesa-19.0.1/src/gallium/auxiliary/util/u_framebuffer.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_framebuffer.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_framebuffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -229,13 +229,19 @@
    if (!(fb->nr_cbufs || fb->zsbuf))
       return MAX2(fb->samples, 1);
 
+   /**
+    * If a driver doesn't advertise PIPE_CAP_SURFACE_SAMPLE_COUNT,
+    * pipe_surface::nr_samples will always be 0.
+    */
    for (i = 0; i < fb->nr_cbufs; i++) {
       if (fb->cbufs[i]) {
-         return MAX2(1, fb->cbufs[i]->texture->nr_samples);
+         return MAX3(1, fb->cbufs[i]->texture->nr_samples,
+                     fb->cbufs[i]->nr_samples);
       }
    }
    if (fb->zsbuf) {
-      return MAX2(1, fb->zsbuf->texture->nr_samples);
+      return MAX3(1, fb->zsbuf->texture->nr_samples,
+                  fb->zsbuf->nr_samples);
    }
 
    return 1;
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.c mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.c	2019-03-31 23:16:37.000000000 +0000
@@ -121,43 +121,6 @@
    return *out_buffer != NULL;
 }
 
-#ifdef HAVE_PTHREAD_SETAFFINITY
-
-static unsigned L3_cache_number;
-static once_flag thread_pinning_once_flag = ONCE_FLAG_INIT;
-
-static void
-util_set_full_cpu_affinity(void)
-{
-   cpu_set_t cpuset;
-
-   CPU_ZERO(&cpuset);
-   for (unsigned i = 0; i < CPU_SETSIZE; i++)
-      CPU_SET(i, &cpuset);
-
-   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
-}
-
-static void
-util_init_thread_pinning(void)
-{
-   /* Get a semi-random number. */
-   int64_t t = os_time_get_nano();
-   L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
-
-   /* Reset thread affinity for all child processes to prevent them from
-    * inheriting the current thread's affinity.
-    *
-    * XXX: If the driver is unloaded after this, and the app later calls
-    * fork(), the child process will likely crash before fork() returns,
-    * because the address where util_set_full_cpu_affinity was located
-    * will either be unmapped or point to random other contents.
-    */
-   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
-}
-
-#endif
-
 /**
  * Called by MakeCurrent. Used to notify the driver that the application
  * thread may have been changed.
@@ -170,30 +133,21 @@
  *                      pinned.
  */
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread)
 {
-#ifdef HAVE_PTHREAD_SETAFFINITY
    /* If pinning has no effect, don't do anything. */
    if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
       return;
 
-   thrd_t current = thrd_current();
-   int cache = util_get_L3_for_pinned_thread(current,
-                                             util_cpu_caps.cores_per_L3);
-
-   call_once(&thread_pinning_once_flag, util_init_thread_pinning);
-
-   /* If the main thread is not pinned, choose the L3 cache. */
-   if (cache == -1) {
-      unsigned num_L3_caches = util_cpu_caps.nr_cpus /
-                               util_cpu_caps.cores_per_L3;
-
-      /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&L3_cache_number) % num_L3_caches;
-      util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
-   }
+   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
+                            util_cpu_caps.cores_per_L3;
+
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
 
-   /* Tell the driver to pin its threads to the same L3 cache. */
+   /* Tell the driver to pin its threads to the selected L3 cache. */
    if (ctx->set_context_param) {
       ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
                              cache);
@@ -202,7 +156,6 @@
    /* Do the same for the upper level thread if there is any (e.g. glthread) */
    if (upper_thread)
       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
-#endif
 }
 
 /* This is a helper for hardware bring-up. Don't remove. */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.h mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.h
--- mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.h	2019-03-31 23:16:37.000000000 +0000
@@ -52,7 +52,8 @@
                               unsigned *out_offset);
 
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread);
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread);
 
 struct pipe_query *
 util_begin_pipestat_query(struct pipe_context *ctx);
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_log.c mesa-19.0.1/src/gallium/auxiliary/util/u_log.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_log.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_log.c	2019-03-31 23:16:37.000000000 +0000
@@ -23,7 +23,7 @@
 
 #include "u_log.h"
 
-#include "u_memory.h"
+#include "util/u_memory.h"
 #include "util/u_string.h"
 
 struct page_entry {
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_memory.h mesa-19.0.1/src/gallium/auxiliary/util/u_memory.h
--- mesa-18.3.3/src/gallium/auxiliary/util/u_memory.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_memory.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,100 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2008 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-/*
- * Memory functions
- */
-
-
-#ifndef U_MEMORY_H
-#define U_MEMORY_H
-
-
-#include "util/u_pointer.h"
-#include "util/u_debug.h"
-#include "os/os_memory.h"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#define MALLOC(_size)  os_malloc(_size)
-
-#define CALLOC(_count, _size) os_calloc(_count, _size)
-
-#define FREE(_ptr ) os_free(_ptr)
-
-#define REALLOC(_ptr, _old_size, _size) os_realloc(_ptr, _old_size, _size)
-
-#define MALLOC_STRUCT(T)   (struct T *) MALLOC(sizeof(struct T))
-
-#define CALLOC_STRUCT(T)   (struct T *) CALLOC(1, sizeof(struct T))
-
-#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size)   ((struct T *) CALLOC(1, sizeof(struct T) + more_size))
-
-
-#define align_malloc(_size, _alignment) os_malloc_aligned(_size, _alignment)
-#define align_free(_ptr) os_free_aligned(_ptr)
-
-static inline void *
-align_calloc(size_t size, unsigned long alignment)
-{
-   void *ptr = align_malloc(size, alignment);
-   if (ptr)
-      memset(ptr, 0, size);
-   return ptr;
-}
-
-/**
- * Duplicate a block of memory.
- */
-static inline void *
-mem_dup(const void *src, uint size)
-{
-   void *dup = MALLOC(size);
-   if (dup)
-      memcpy(dup, src, size);
-   return dup;
-}
-
-
-/**
- * Offset of a field in a struct, in bytes.
- */
-#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER))
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* U_MEMORY_H */
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_prim_restart.c mesa-19.0.1/src/gallium/auxiliary/util/u_prim_restart.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_prim_restart.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_prim_restart.c	2019-03-31 23:16:37.000000000 +0000
@@ -26,7 +26,7 @@
 
 
 #include "u_inlines.h"
-#include "u_memory.h"
+#include "util/u_memory.h"
 #include "u_prim_restart.h"
 
 
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_screen.c mesa-19.0.1/src/gallium/auxiliary/util/u_screen.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -77,6 +77,7 @@
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
       return 0;
 
    case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -145,6 +146,7 @@
       return 1;
 
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
       return 0;
 
@@ -261,6 +263,9 @@
    case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
       return 1;
 
+   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
+      return 0;
+
    case PIPE_CAP_TGSI_FS_FBFETCH:
    case PIPE_CAP_TGSI_MUL_ZERO_WINS:
    case PIPE_CAP_DOUBLES:
@@ -311,6 +316,7 @@
    case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_CAP_TGSI_ATOMFADD:
       return 0;
 
    case PIPE_CAP_MAX_GS_INVOCATIONS:
@@ -326,6 +332,11 @@
    case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
       return 2047;
 
+   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+      return 0;
+   case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
+      return 1;
+
    default:
       unreachable("bad PIPE_CAP_*");
    }
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_tests.c mesa-19.0.1/src/gallium/auxiliary/util/u_tests.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_tests.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_tests.c	2019-03-31 23:16:37.000000000 +0000
@@ -787,6 +787,80 @@
    util_report_result_helper(pass, name);
 }
 
+static void
+test_compute_clear_image(struct pipe_context *ctx)
+{
+   struct cso_context *cso;
+   struct pipe_resource *cb;
+   const char *text;
+
+   cso = cso_create_context(ctx, 0);
+   cb = util_create_texture2d(ctx->screen, 256, 256,
+                              PIPE_FORMAT_R8G8B8A8_UNORM, 1);
+
+   /* Compute shader. */
+   text = "COMP\n"
+          "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
+          "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
+          "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+          "DCL SV[0], THREAD_ID\n"
+          "DCL SV[1], BLOCK_ID\n"
+          "DCL IMAGE[0], 2D, PIPE_FORMAT_R8G8B8A8_UNORM, WR\n"
+          "DCL TEMP[0]\n"
+          "IMM[0] UINT32 { 8, 8, 0, 0}\n"
+          "IMM[1] FLT32 { 1, 0, 0, 0}\n"
+
+          /* TEMP[0].xy = SV[1] * IMM[0] + SV[0]; */
+          "UMAD TEMP[0].xy, SV[1], IMM[0], SV[0]\n"
+          "STORE IMAGE[0], TEMP[0], IMM[1], 2D, PIPE_FORMAT_R8G8B8A8_UNORM\n"
+          "END\n";
+
+   struct tgsi_token tokens[1000];
+   if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
+      assert(0);
+      util_report_result(FAIL);
+      return;
+   }
+
+   struct pipe_compute_state state = {0};
+   state.ir_type = PIPE_SHADER_IR_TGSI;
+   state.prog = tokens;
+
+   void *compute_shader = ctx->create_compute_state(ctx, &state);
+   cso_set_compute_shader_handle(cso, compute_shader);
+
+   /* Bind the image. */
+   struct pipe_image_view image = {0};
+   image.resource = cb;
+   image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
+   image.format = cb->format;
+
+   ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image);
+
+   /* Dispatch compute. */
+   struct pipe_grid_info info = {0};
+   info.block[0] = 8;
+   info.block[1] = 8;
+   info.block[2] = 1;
+   info.grid[0] = cb->width0 / 8;
+   info.grid[1] = cb->height0 / 8;
+   info.grid[2] = 1;
+
+   ctx->launch_grid(ctx, &info);
+
+   /* Check pixels. */
+   static const float expected[] = {1.0, 0.0, 0.0, 0.0};
+   bool pass = util_probe_rect_rgba(ctx, cb, 0, 0,
+                                    cb->width0, cb->height0, expected);
+
+   /* Cleanup. */
+   cso_destroy_context(cso);
+   ctx->delete_compute_state(ctx, compute_shader);
+   pipe_resource_reference(&cb, NULL);
+
+   util_report_result(pass);
+}
+
 /**
  * Run all tests. This should be run with a clean context after
  * context_create.
@@ -808,6 +882,8 @@
    for (int i = 1; i <= 8; i = i * 2)
       test_texture_barrier(ctx, true, i);
 
+   test_compute_clear_image(ctx);
+
    ctx->destroy(ctx);
 
    puts("Done. Exiting..");
diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_threaded_context.c mesa-19.0.1/src/gallium/auxiliary/util/u_threaded_context.c
--- mesa-18.3.3/src/gallium/auxiliary/util/u_threaded_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/auxiliary/util/u_threaded_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -1524,7 +1524,8 @@
    if (ttrans->staging) {
       struct pipe_box src_box;
 
-      u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
+      u_box_1d(ttrans->offset + ttrans->b.box.x % tc->map_buffer_alignment +
+               (box->x - ttrans->b.box.x),
                box->width, &src_box);
 
       /* Copy the staging buffer into the original one. */
diff -Nru mesa-18.3.3/src/gallium/docs/source/context.rst mesa-19.0.1/src/gallium/docs/source/context.rst
--- mesa-18.3.3/src/gallium/docs/source/context.rst	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/docs/source/context.rst	2019-03-31 23:16:37.000000000 +0000
@@ -491,6 +491,11 @@
 If a shader type is not supported by the device/driver,
 the corresponding values should be set to 0.
 
+``PIPE_QUERY_PIPELINE_STATISTICS_SINGLE`` returns a single counter from
+the ``PIPE_QUERY_PIPELINE_STATISTICS`` group.  The specific counter must
+be selected when calling ``create_query`` by passing one of the
+``PIPE_STAT_QUERY`` enums as the query's ``index``.
+
 Gallium does not guarantee the availability of any query types; one must
 always check the capabilities of the :ref:`Screen` first.
 
diff -Nru mesa-18.3.3/src/gallium/docs/source/screen.rst mesa-19.0.1/src/gallium/docs/source/screen.rst
--- mesa-18.3.3/src/gallium/docs/source/screen.rst	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/docs/source/screen.rst	2019-03-31 23:16:37.000000000 +0000
@@ -409,7 +409,7 @@
   for a driver that does not support multiple output streams (i.e.,
   ``PIPE_CAP_MAX_VERTEX_STREAMS`` is 1), both query types are identical.
 * ``PIPE_CAP_MEMOBJ``: Whether operations on memory objects are supported.
-* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports TGSI_OPCODE_LOAD use
+* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports ``TGSI_OPCODE_LOAD`` use
   with constant buffers.
 * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used as
   an address for indirect register indexing.
@@ -434,7 +434,7 @@
   Whether pipe_vertex_buffer::buffer_offset is treated as signed. The u_vbuf
   module needs this for optimal performance in workstation applications.
 * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support per-context
-  priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the
+  priorities, this returns a bitmask of ``PIPE_CONTEXT_PRIORITY_x`` for the
   supported priority levels.  A driver that does not support prioritized
   contexts can return 0.
 * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling semaphores
@@ -446,17 +446,17 @@
 * ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms
   as opposed to padding to vec4s.
 * ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES``: Whether the
-  PIPE_CONSERVATIVE_RASTER_POST_SNAP mode is supported for triangles.
+  ``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for triangles.
 * ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES``: Whether the
-PIPE_CONSERVATIVE_RASTER_POST_SNAP mode is supported for points and lines.
+  ``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for points and lines.
 * ``PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES``: Whether the
-PIPE_CONSERVATIVE_RASTER_PRE_SNAP mode is supported for triangles.
+  ``PIPE_CONSERVATIVE_RASTER_PRE_SNAP`` mode is supported for triangles.
 * ``PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES``: Whether the
-PIPE_CONSERVATIVE_RASTER_PRE_SNAP mode is supported for points and lines.
-* ``PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE``: Whether PIPE_CAP_POST_DEPTH_COVERAGE
-works with conservative rasterization.
+  ``PIPE_CONSERVATIVE_RASTER_PRE_SNAP`` mode is supported for points and lines.
+* ``PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE``: Whether
+  ``PIPE_CAP_POST_DEPTH_COVERAGE`` works with conservative rasterization.
 * ``PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS``: The maximum
-subpixel precision bias in bits during conservative rasterization.
+  subpixel precision bias in bits during conservative rasterization.
 * ``PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS``: True is the driver supports
   programmable sample location through ```get_sample_pixel_grid``` and
   ```set_sample_locations```.
@@ -472,11 +472,25 @@
 * ``PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS``: Maximum total number of
   atomic counter buffers. A value of 0 means the sum of all per-shader stage
   maximums (see ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS``).
-* ``PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: Maximum recommend memory size
+* ``PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET``: Maximum recommend memory size
   for all active texture uploads combined. This is a performance hint.
   0 means no limit.
 * ``PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET``: The maximum supported value for
   of pipe_vertex_element::src_offset.
+* ``PIPE_CAP_SURFACE_SAMPLE_COUNT``: Whether the driver
+  supports pipe_surface overrides of resource nr_samples. If set, will
+  enable EXT_multisampled_render_to_texture.
+* ``PIPE_CAP_TGSI_ATOMFADD``: Atomic floating point adds are supported on
+  images, buffers, and shared memory.
+* ``PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND``: True if the driver needs blend state to use zero/one instead of destination alpha for RGB/XRGB formats.
+* ``PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS``: True if the driver wants TESSINNER and TESSOUTER to be inputs (rather than system values) for tessellation evaluation shaders.
+* ``PIPE_CAP_DEST_SURFACE_SRGB_CONTROL``: Indicates whether the drivers
+  supports switching the format between sRGB and linear for a surface that is
+  used as destination in draw and blit calls.
+* ``PIPE_CAP_MAX_VARYINGS``: The maximum number of fragment shader
+  varyings. This will generally correspond to
+  ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some
+  cases may be a smaller number.
 
 .. _pipe_capf:
 
diff -Nru mesa-18.3.3/src/gallium/docs/source/tgsi.rst mesa-19.0.1/src/gallium/docs/source/tgsi.rst
--- mesa-18.3.3/src/gallium/docs/source/tgsi.rst	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/docs/source/tgsi.rst	2019-03-31 23:16:37.000000000 +0000
@@ -2684,6 +2684,21 @@
   resource[offset] = dst_x + src_x
 
 
+.. opcode:: ATOMFADD - Atomic floating point addition
+
+  Syntax: ``ATOMFADD dst, resource, offset, src``
+
+  Example: ``ATOMFADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically:
+
+.. math::
+
+  dst_x = resource[offset]
+
+  resource[offset] = dst_x + src_x
+
+
 .. opcode:: ATOMXCHG - Atomic exchange
 
   Syntax: ``ATOMXCHG dst, resource, offset, src``
@@ -3190,24 +3205,6 @@
 last vertex processing stage is used.
 
 
-TGSI_SEMANTIC_CULLDIST
-""""""""""""""""""""""
-
-Used as distance to plane for performing application-defined culling
-of individual primitives against a plane. When components of vertex
-elements are given this label, these values are assumed to be a
-float32 signed distance to a plane. Primitives will be completely
-discarded if the plane distance for all of the vertices in the
-primitive are < 0. If a vertex has a cull distance of NaN, that
-vertex counts as "out" (as if its < 0);
-The limits on both clip and cull distances are bound
-by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_COUNT define which defines
-the maximum number of components that can be used to hold the
-distances and by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT
-which specifies the maximum number of registers which can be
-annotated with those semantics.
-
-
 TGSI_SEMANTIC_CLIPDIST
 """"""""""""""""""""""
 
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_blend.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c	2017-11-23 00:32:52.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_blend.c	2019-03-31 23:16:37.000000000 +0000
@@ -114,10 +114,11 @@
    struct pipe_blend_state *pblend = ctx->blend;
    struct etna_blend_state *blend = etna_blend_state(pblend);
    const struct pipe_rt_blend_state *rt0 = &pblend->rt[0];
+   const struct util_format_description *desc;
    uint32_t colormask;
 
    if (pfb->cbufs[0] &&
-       translate_rs_format_rb_swap(pfb->cbufs[0]->texture->format)) {
+       translate_rs_format_rb_swap(pfb->cbufs[0]->format)) {
       colormask = rt0->colormask & (PIPE_MASK_A | PIPE_MASK_G);
       if (rt0->colormask & PIPE_MASK_R)
          colormask |= PIPE_MASK_B;
@@ -128,11 +129,13 @@
    }
 
    /* If the complete render target is written, set full_overwrite:
-    * - The color mask is 1111
-    * - No blending is used
+    * - The color mask covers all channels of the render target
+    * - No blending or logicop is used
     */
-   bool full_overwrite = ((rt0->colormask == 0xf) && blend->fo_allowed) ||
-                         !pfb->cbufs[0];
+   if (pfb->cbufs[0])
+      desc = util_format_description(pfb->cbufs[0]->format);
+   bool full_overwrite = !pfb->cbufs[0] || ((blend->fo_allowed &&
+                         util_format_colormask_full(desc, colormask)));
    blend->PE_COLOR_FORMAT =
             VIVS_PE_COLOR_FORMAT_COMPONENTS(colormask) |
             COND(full_overwrite, VIVS_PE_COLOR_FORMAT_OVERWRITE);
@@ -158,7 +161,7 @@
    struct compiled_blend_color *cs = &ctx->blend_color;
 
    if (pfb->cbufs[0] &&
-       translate_rs_format_rb_swap(pfb->cbufs[0]->texture->format)) {
+       translate_rs_format_rb_swap(pfb->cbufs[0]->format)) {
       cs->PE_ALPHA_BLEND_COLOR =
          VIVS_PE_ALPHA_BLEND_COLOR_R(etna_cfloat_to_uint8(cs->color[2])) |
          VIVS_PE_ALPHA_BLEND_COLOR_G(etna_cfloat_to_uint8(cs->color[1])) |
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_compiler.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_compiler.c	2019-03-31 23:16:37.000000000 +0000
@@ -477,8 +477,7 @@
 etna_compile_parse_declarations(struct etna_compile *c)
 {
    struct tgsi_parse_context ctx = { };
-   unsigned status = TGSI_PARSE_OK;
-   status = tgsi_parse_init(&ctx, c->tokens);
+   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
    assert(status == TGSI_PARSE_OK);
 
    while (!tgsi_parse_end_of_tokens(&ctx)) {
@@ -530,8 +529,7 @@
 etna_compile_pass_check_usage(struct etna_compile *c)
 {
    struct tgsi_parse_context ctx = { };
-   unsigned status = TGSI_PARSE_OK;
-   status = tgsi_parse_init(&ctx, c->tokens);
+   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
    assert(status == TGSI_PARSE_OK);
 
    for (int idx = 0; idx < c->total_decls; ++idx) {
@@ -662,8 +660,7 @@
 {
    struct tgsi_parse_context ctx = { };
    int inst_idx = 0;
-   unsigned status = TGSI_PARSE_OK;
-   status = tgsi_parse_init(&ctx, c->tokens);
+   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
    assert(status == TGSI_PARSE_OK);
 
    while (!tgsi_parse_end_of_tokens(&ctx)) {
@@ -1812,7 +1809,7 @@
 etna_compile_pass_generate_code(struct etna_compile *c)
 {
    struct tgsi_parse_context ctx = { };
-   unsigned status = tgsi_parse_init(&ctx, c->tokens);
+   MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens);
    assert(status == TGSI_PARSE_OK);
 
    int inst_idx = 0;
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -60,6 +60,9 @@
 {
    struct etna_context *ctx = etna_context(pctx);
 
+   if (ctx->dummy_rt)
+      etna_bo_del(ctx->dummy_rt);
+
    util_copy_framebuffer_state(&ctx->framebuffer_s, NULL);
 
    if (ctx->primconvert)
@@ -211,13 +214,8 @@
    ctx->dirty |= ETNA_DIRTY_INDEX_BUFFER;
 
    struct etna_shader_key key = {};
-   struct etna_surface *cbuf = etna_surface(pfb->cbufs[0]);
-
-   if (cbuf) {
-      struct etna_resource *res = etna_resource(cbuf->base.texture);
-
-      key.frag_rb_swap = !!translate_rs_format_rb_swap(res->base.format);
-   }
+   if (pfb->cbufs[0])
+      key.frag_rb_swap = !!translate_rs_format_rb_swap(pfb->cbufs[0]->format);
 
    if (!etna_get_vs(ctx, key) || !etna_get_fs(ctx, key)) {
       BUG("compiled shaders are not okay");
@@ -488,6 +486,16 @@
    slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
    list_inithead(&ctx->active_hw_queries);
 
+   /* create dummy RT buffer, used when rendering with no color buffer */
+   ctx->dummy_rt = etna_bo_new(ctx->screen->dev, 64 * 64 * 4,
+                               DRM_ETNA_GEM_CACHE_WC);
+   if (!ctx->dummy_rt)
+      goto fail;
+
+   ctx->dummy_rt_reloc.bo = ctx->dummy_rt;
+   ctx->dummy_rt_reloc.offset = 0;
+   ctx->dummy_rt_reloc.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE;
+
    return pctx;
 
 fail:
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -190,6 +190,9 @@
 
    /* list of active hardware queries */
    struct list_head active_hw_queries;
+
+   struct etna_bo *dummy_rt;
+   struct etna_reloc dummy_rt_reloc;
 };
 
 static inline struct etna_context *
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -176,10 +176,20 @@
    return size;
 }
 
+/* Is rs alignment needed? */
+static bool is_rs_align(struct etna_screen *screen,
+                        const struct pipe_resource *tmpl)
+{
+   return screen->specs.use_blt ? false : (
+      VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) ||
+      !etna_resource_sampler_only(tmpl));
+}
+
 /* Create a new resource object, using the given template info */
 struct pipe_resource *
 etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
-                    uint64_t modifier, const struct pipe_resource *templat)
+                    enum etna_resource_addressing_mode mode, uint64_t modifier,
+                    const struct pipe_resource *templat)
 {
    struct etna_screen *screen = etna_screen(pscreen);
    struct etna_resource *rsc;
@@ -217,11 +227,9 @@
        * resolve engine's width.  If not, we must not align resources used
        * only for textures. If this GPU uses the BLT engine, never do RS align.
        */
-      bool rs_align = screen->specs.use_blt ? false : (
-                         VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) ||
-                         !etna_resource_sampler_only(templat));
-      etna_layout_multiple(layout, screen->specs.pixel_pipes, rs_align, &paddingX,
-                           &paddingY, &halign);
+      etna_layout_multiple(layout, screen->specs.pixel_pipes,
+                           is_rs_align (screen, templat),
+                           &paddingX, &paddingY, &halign);
       assert(paddingX && paddingY);
    } else {
       /* Compressed textures are padded to their block size, but we don't have
@@ -273,6 +281,7 @@
    rsc->base.nr_samples = nr_samples;
    rsc->layout = layout;
    rsc->halign = halign;
+   rsc->addressing_mode = mode;
 
    pipe_reference_init(&rsc->base.reference, 1);
    list_inithead(&rsc->list);
@@ -309,12 +318,14 @@
 {
    struct etna_screen *screen = etna_screen(pscreen);
 
-   /* Figure out what tiling to use -- for now, assume that texture cannot be linear.
-    * there is a capability LINEAR_TEXTURE_SUPPORT (supported on gc880 and
-    * gc2000 at least), but not sure how it works.
+   /* Figure out what tiling and address mode to use -- for now, assume that
+    * texture cannot be linear. there is a capability LINEAR_TEXTURE_SUPPORT
+    * (supported on gc880 and gc2000 at least), but not sure how it works.
     * Buffers always have LINEAR layout.
     */
    unsigned layout = ETNA_LAYOUT_LINEAR;
+   enum etna_resource_addressing_mode mode = ETNA_ADDRESSING_MODE_TILED;
+
    if (etna_resource_sampler_only(templat)) {
       /* The buffer is only used for texturing, so create something
        * directly compatible with the sampler.  Such a buffer can
@@ -357,7 +368,7 @@
       layout = ETNA_LAYOUT_LINEAR;
 
    /* modifier is only used for scanout surfaces, so safe to use LINEAR here */
-   return etna_resource_alloc(pscreen, layout, DRM_FORMAT_MOD_LINEAR, templat);
+   return etna_resource_alloc(pscreen, layout, mode, DRM_FORMAT_MOD_LINEAR, templat);
 }
 
 enum modifier_priority {
@@ -438,7 +449,7 @@
    tmpl.bind |= PIPE_BIND_SCANOUT;
 
    return etna_resource_alloc(pscreen, modifier_to_layout(modifier),
-                              modifier, &tmpl);
+                              ETNA_ADDRESSING_MODE_TILED, modifier, &tmpl);
 }
 
 static void
@@ -511,6 +522,7 @@
    rsc->seqno = 1;
    rsc->layout = modifier_to_layout(handle->modifier);
    rsc->halign = TEXTURE_HALIGN_FOUR;
+   rsc->addressing_mode = ETNA_ADDRESSING_MODE_TILED;
 
 
    level->width = tmpl->width0;
@@ -519,7 +531,7 @@
    /* Determine padding of the imported resource. */
    unsigned paddingX = 0, paddingY = 0;
    etna_layout_multiple(rsc->layout, screen->specs.pixel_pipes,
-                        VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN),
+                        is_rs_align(screen, tmpl),
                         &paddingX, &paddingY, &rsc->halign);
 
    if (!screen->specs.use_blt)
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.h	2019-03-31 23:16:37.000000000 +0000
@@ -49,6 +49,11 @@
    bool ts_valid;
 };
 
+enum etna_resource_addressing_mode {
+   ETNA_ADDRESSING_MODE_TILED = 0,
+   ETNA_ADDRESSING_MODE_LINEAR,
+};
+
 /* status of queued up but not flushed reads and write operations.
  * In _transfer_map() we need to know if queued up rendering needs
  * to be flushed to preserve the order of cpu and gpu access. */
@@ -66,6 +71,7 @@
    /* only lod 0 used for non-texture buffers */
    /* Layout for surface (tiled, multitiled, split tiled, ...) */
    enum etna_surface_layout layout;
+   enum etna_resource_addressing_mode addressing_mode;
    /* Horizontal alignment for texture unit (TEXTURE_HALIGN_*) */
    unsigned halign;
    struct etna_bo *bo; /* Surface video memory */
@@ -155,7 +161,8 @@
 
 struct pipe_resource *
 etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
-                    uint64_t modifier, const struct pipe_resource *templat);
+                    enum etna_resource_addressing_mode mode, uint64_t modifier,
+                    const struct pipe_resource *templat);
 
 void
 etna_resource_screen_init(struct pipe_screen *pscreen);
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_screen.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,7 +63,7 @@
    {"no_autodisable", ETNA_DBG_NO_AUTODISABLE, "Disable autodisable"},
    {"no_supertile",   ETNA_DBG_NO_SUPERTILE, "Disable supertiles"},
    {"no_early_z",     ETNA_DBG_NO_EARLY_Z, "Disable early z"},
-   {"cflush_all",     ETNA_DBG_CFLUSH_ALL, "Flush every cash before state update"},
+   {"cflush_all",     ETNA_DBG_CFLUSH_ALL, "Flush every cache before state update"},
    {"msaa2x",         ETNA_DBG_MSAA_2X, "Force 2x msaa"},
    {"msaa4x",         ETNA_DBG_MSAA_4X, "Force 4x msaa"},
    {"flush_all",      ETNA_DBG_FLUSH_ALL, "Flush after every rendered primitive"},
@@ -360,6 +360,9 @@
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return 0;
 
+   case PIPE_CAP_MAX_VARYINGS:
+      return screen->specs.max_varyings;
+
    case PIPE_CAP_PCI_GROUP:
    case PIPE_CAP_PCI_BUS:
    case PIPE_CAP_PCI_DEVICE:
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_shader.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_shader.c	2019-03-31 23:16:37.000000000 +0000
@@ -230,8 +230,7 @@
 }
 
 static bool
-etna_shader_update_vs_inputs(struct etna_context *ctx,
-                             struct compiled_shader_state *cs,
+etna_shader_update_vs_inputs(struct compiled_shader_state *cs,
                              const struct etna_shader_variant *vs,
                              const struct compiled_vertex_elements_state *ves)
 {
@@ -246,7 +245,7 @@
    num_vs_inputs = MAX2(ves->num_elements, vs->infile.num_reg);
    if (num_vs_inputs != ves->num_elements) {
       BUG("Number of elements %u does not match the number of VS inputs %zu",
-          ctx->vertex_elements->num_elements, ctx->shader.vs->infile.num_reg);
+          ves->num_elements, vs->infile.num_reg);
       return false;
    }
 
@@ -312,7 +311,7 @@
 bool
 etna_shader_update_vertex(struct etna_context *ctx)
 {
-   return etna_shader_update_vs_inputs(ctx, &ctx->shader_state, ctx->shader.vs,
+   return etna_shader_update_vs_inputs(&ctx->shader_state, ctx->shader.vs,
                                        ctx->vertex_elements);
 }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_state.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -190,8 +190,9 @@
       cs->TS_COLOR_STATUS_BASE.bo = NULL;
       cs->TS_COLOR_SURFACE_BASE.bo = NULL;
 
-      for (int i = 0; i < ETNA_MAX_PIXELPIPES; i++)
-         cs->PE_PIPE_COLOR_ADDR[i].bo = NULL;
+      cs->PE_COLOR_ADDR = ctx->dummy_rt_reloc;
+      for (int i = 0; i < ctx->specs.pixel_pipes; i++)
+         cs->PE_PIPE_COLOR_ADDR[i] = ctx->dummy_rt_reloc;
    }
 
    if (sv->zsbuf != NULL) {
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture.c	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -172,7 +172,9 @@
    if (res->layout == ETNA_LAYOUT_SUPER_TILED && VIV_FEATURE(screen, chipMinorFeatures2, SUPERTILED_TEXTURE))
       return true;
 
-   /* TODO: LINEAR_TEXTURE_SUPPORT */
+   /* This GPU supports texturing from linear textures? */
+   if (res->layout == ETNA_LAYOUT_LINEAR && VIV_FEATURE(screen, chipMinorFeatures1, LINEAR_TEXTURE_SUPPORT))
+      return true;
 
    /* Otherwise, only support tiled layouts */
    if (res->layout != ETNA_LAYOUT_TILED)
@@ -203,6 +205,7 @@
                            PIPE_BIND_BLENDABLE);
          res->texture =
             etna_resource_alloc(pctx->screen, ETNA_LAYOUT_TILED,
+                                ETNA_ADDRESSING_MODE_TILED,
                                 DRM_FORMAT_MOD_LINEAR, &templat);
       }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.c	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -131,6 +131,17 @@
       return NULL;
    }
 
+   if (res->addressing_mode == ETNA_ADDRESSING_MODE_LINEAR) {
+      sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_LINEAR);
+
+      for (int lod = 0; lod <= res->base.last_level; ++lod)
+         sv->TE_SAMPLER_LINEAR_STRIDE[lod] = res->levels[lod].stride;
+
+   } else {
+      sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_TILED);
+      memset(&sv->TE_SAMPLER_LINEAR_STRIDE, 0, sizeof(sv->TE_SAMPLER_LINEAR_STRIDE));
+   }
+
    sv->TE_SAMPLER_CONFIG1 = COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) |
                             COND(astc, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(TEXTURE_FORMAT_EXT_ASTC)) |
                             VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz;
@@ -293,6 +304,16 @@
             }
          }
       }
+   }
+   if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) {
+      for (int y = 0; y < VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN; ++y) {
+         for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
+            if ((1 << x) & active_samplers) {
+               struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]);
+               /*02C00*/ EMIT_STATE(TE_SAMPLER_LINEAR_STRIDE(x, y), sv->TE_SAMPLER_LINEAR_STRIDE[y]);
+            }
+         }
+      }
    }
    if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) {
       for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.h	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.h	2019-03-31 23:16:37.000000000 +0000
@@ -62,6 +62,7 @@
    uint32_t TE_SAMPLER_SIZE;
    uint32_t TE_SAMPLER_LOG_SIZE;
    uint32_t TE_SAMPLER_ASTC0;
+   uint32_t TE_SAMPLER_LINEAR_STRIDE[VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN];
    struct etna_reloc TE_SAMPLER_LOD_ADDR[VIVS_TE_SAMPLER_LOD_ADDR__LEN];
    unsigned min_lod, max_lod; /* 5.5 fixp */
 
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_transfer.c
--- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_transfer.c	2019-03-31 23:16:37.000000000 +0000
@@ -208,7 +208,8 @@
       templ.bind = PIPE_BIND_RENDER_TARGET;
 
       trans->rsc = etna_resource_alloc(pctx->screen, ETNA_LAYOUT_LINEAR,
-                                       DRM_FORMAT_MOD_LINEAR, &templ);
+                                       ETNA_ADDRESSING_MODE_TILED, DRM_FORMAT_MOD_LINEAR,
+                                       &templ);
       if (!trans->rsc) {
          slab_free(&ctx->transfer_pool, trans);
          return NULL;
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,11 +8,11 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- cmdstream.xml (  16929 bytes, from 2017-10-13 12:22:46)
-- copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
-- common.xml    (  26187 bytes, from 2017-10-31 19:05:01)
+- cmdstream.xml (  16930 bytes, from 2019-01-04 11:37:39)
+- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
+- common.xml    (  35468 bytes, from 2018-02-10 13:09:26)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2019 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common_3d.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common_3d.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common_3d.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,12 +8,12 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
-- copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
-- common.xml     (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
+- texdesc_3d.xml (   3183 bytes, from 2018-02-10 13:09:26)
+- copyright.xml  (   1597 bytes, from 2018-02-10 13:09:26)
+- common.xml     (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml  (  14843 bytes, from 2019-01-18 10:13:41)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2019 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
@@ -127,6 +127,8 @@
 #define TEXTURE_HALIGN_SPLIT_SUPER_TILED			0x00000004
 #define TS_CACHE_MODE_128					0x00000000
 #define TS_CACHE_MODE_256					0x00000001
+#define TEXTURE_ADDRESSING_MODE_TILED				0x00000000
+#define TEXTURE_ADDRESSING_MODE_LINEAR				0x00000003
 #define COLOR_COMPRESSION_FORMAT_A4R4G4B4			0x00000000
 #define COLOR_COMPRESSION_FORMAT_A1R5G5B5			0x00000001
 #define COLOR_COMPRESSION_FORMAT_R5G6B5				0x00000002
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,12 +8,12 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
-- copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
-- common.xml     (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
+- texdesc_3d.xml (   3183 bytes, from 2018-02-10 13:09:26)
+- copyright.xml  (   1597 bytes, from 2018-02-10 13:09:26)
+- common.xml     (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml  (  14843 bytes, from 2019-01-18 10:13:41)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2018 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
@@ -320,5 +320,166 @@
 #define chipMinorFeatures6_DEC					0x00000004
 #define chipMinorFeatures6_VS_TILE_NV12				0x00000008
 #define chipMinorFeatures6_VS_TILE_NV12_10BIT			0x00000010
+#define chipMinorFeatures6_RENDER_TARGET_8			0x00000020
+#define chipMinorFeatures6_TEX_LOD_FLOW_CORR			0x00000040
+#define chipMinorFeatures6_FACE_LOD				0x00000080
+#define chipMinorFeatures6_MULTI_CORE_SEMAPHORE_STALL_V2	0x00000100
+#define chipMinorFeatures6_VMSAA				0x00000200
+#define chipMinorFeatures6_CHIP_ENABLE_LINK			0x00000400
+#define chipMinorFeatures6_MULTI_SRC_BLT_1_5_ENHANCEMENT	0x00000800
+#define chipMinorFeatures6_MULTI_SRC_BLT_BILINEAR_FILTER	0x00001000
+#define chipMinorFeatures6_RA_HZEZ_CLOCK_CONTROL		0x00002000
+#define chipMinorFeatures6_CACHE128B256BPERLINE			0x00004000
+#define chipMinorFeatures6_V4_COMPRESSION			0x00008000
+#define chipMinorFeatures6_PE2D_MAJOR_SUPER_TILE		0x00010000
+#define chipMinorFeatures6_PE_32BPC_COLORMASK_FIX		0x00020000
+#define chipMinorFeatures6_ALPHA_BLENDING_OPT			0x00040000
+#define chipMinorFeatures6_NEW_GPIPE				0x00080000
+#define chipMinorFeatures6_PIPELINE_32_ATTRIBUTES		0x00100000
+#define chipMinorFeatures6_MSAA_SHADING				0x00200000
+#define chipMinorFeatures6_NO_ANISTRO_FILTER			0x00400000
+#define chipMinorFeatures6_NO_ASTC				0x00800000
+#define chipMinorFeatures6_NO_DXT				0x01000000
+#define chipMinorFeatures6_HWTFB				0x02000000
+#define chipMinorFeatures6_RA_DEPTH_WRITE_MSAA1X_FIX		0x04000000
+#define chipMinorFeatures6_EZHZ_CLOCKGATE_FIX			0x08000000
+#define chipMinorFeatures6_SH_SNAP2PAGE_FIX			0x10000000
+#define chipMinorFeatures6_SH_HALFDEPENDENCY_FIX		0x20000000
+#define chipMinorFeatures6_USC_MCFILL_FIX			0x40000000
+#define chipMinorFeatures6_TPG_TCPERF_FIX			0x80000000
+#define chipMinorFeatures7_USC_MDFIFO_OVERFLOW_FIX		0x00000001
+#define chipMinorFeatures7_SH_TEXLD_BARRIER_IN_CS_FIX		0x00000002
+#define chipMinorFeatures7_RS_NEW_BASEADDR			0x00000004
+#define chipMinorFeatures7_PE_8BPP_DUALPIPE_FIX			0x00000008
+#define chipMinorFeatures7_SH_ADVANCED_INSTR			0x00000010
+#define chipMinorFeatures7_SH_FLAT_INTERPOLATION_DUAL16_FIX	0x00000020
+#define chipMinorFeatures7_USC_CONTINUOUS_FLUS_FIX		0x00000040
+#define chipMinorFeatures7_SH_SUPPORT_V4			0x00000080
+#define chipMinorFeatures7_SH_SUPPORT_ALPHA_KILL		0x00000100
+#define chipMinorFeatures7_PE_NO_ALPHA_TEST			0x00000200
+#define chipMinorFeatures7_TX_LOD_NEAREST_SELECT		0x00000400
+#define chipMinorFeatures7_SH_FIX_LDEXP				0x00000800
+#define chipMinorFeatures7_SUPPORT_MOVAI			0x00001000
+#define chipMinorFeatures7_SH_SNAP2PAGE_MAXPAGES_FIX		0x00002000
+#define chipMinorFeatures7_PE_RGBA16I_FIX			0x00004000
+#define chipMinorFeatures7_BLT_8bpp_256TILE_FC_FIX		0x00008000
+#define chipMinorFeatures7_PE_64BIT_FENCE_FIX			0x00010000
+#define chipMinorFeatures7_USC_FULL_CACHE_FIX			0x00020000
+#define chipMinorFeatures7_TX_YUV_ASSEMBLER_10BIT		0x00040000
+#define chipMinorFeatures7_FE_32BIT_INDEX_FIX			0x00080000
+#define chipMinorFeatures7_BLT_64BPP_MASKED_CLEAR_FIX		0x00100000
+#define chipMinorFeatures7_BIT_SECURITY				0x00200000
+#define chipMinorFeatures7_BIT_ROBUSTNESS			0x00400000
+#define chipMinorFeatures7_USC_ATOMIC_FIX			0x00800000
+#define chipMinorFeatures7_SH_PSO_MSAA1x_FIX			0x01000000
+#define chipMinorFeatures7_BIT_USC_VX_PERF_FIX			0x02000000
+#define chipMinorFeatures7_EVIS_NO_ABSDIFF			0x04000000
+#define chipMinorFeatures7_EVIS_NO_BITREPLACE			0x08000000
+#define chipMinorFeatures7_EVIS_NO_BOXFILTER			0x10000000
+#define chipMinorFeatures7_EVIS_NO_CORDIAC			0x20000000
+#define chipMinorFeatures7_EVIS_NO_DP32				0x40000000
+#define chipMinorFeatures7_EVIS_NO_FILTER			0x80000000
+#define chipMinorFeatures8_EVIS_NO_IADD				0x00000001
+#define chipMinorFeatures8_EVIS_NO_SELECTADD			0x00000002
+#define chipMinorFeatures8_EVIS_LERP_7OUTPUT			0x00000004
+#define chipMinorFeatures8_EVIS_ACCSQ_8OUTPUT			0x00000008
+#define chipMinorFeatures8_USC_GOS_ADDR_FIX			0x00000010
+#define chipMinorFeatures8_TX_8BIT_UVFRAC			0x00000020
+#define chipMinorFeatures8_TX_DESC_CACHE_CLOCKGATE_FIX		0x00000040
+#define chipMinorFeatures8_RSBLT_MSAA_DECOMPRESSION		0x00000080
+#define chipMinorFeatures8_TX_INTEGER_COORDINATE		0x00000100
+#define chipMinorFeatures8_DRAWID				0x00000200
+#define chipMinorFeatures8_PSIO_SAMPLEMASK_IN_R0ZW_FIX		0x00000400
+#define chipMinorFeatures8_TX_INTEGER_COORDINATE_V2		0x00000800
+#define chipMinorFeatures8_MULTI_CORE_BLOCK_SET_CONFIG		0x00001000
+#define chipMinorFeatures8_VG_RESOLVE_ENGINE			0x00002000
+#define chipMinorFeatures8_VG_PE_COLOR_KEY			0x00004000
+#define chipMinorFeatures8_VG_IM_INDEX_FORMAT			0x00008000
+#define chipMinorFeatures8_SNAPPAGE_CMD				0x00010000
+#define chipMinorFeatures8_SH_NO_INDEX_CONST_ON_A0		0x00020000
+#define chipMinorFeatures8_SH_NO_ONECONST_LIMIT			0x00040000
+#define chipMinorFeatures8_SH_IMG_LDST_ON_TEMP			0x00080000
+#define chipMinorFeatures8_COMPUTE_ONLY				0x00100000
+#define chipMinorFeatures8_SH_IMG_LDST_CLAMP			0x00200000
+#define chipMinorFeatures8_SH_ICACHE_ALLOC_COUNT_FIX		0x00400000
+#define chipMinorFeatures8_SH_ICACHE_PREFETCH			0x00800000
+#define chipMinorFeatures8_PE2D_SEPARATE_CACHE			0x01000000
+#define chipMinorFeatures8_VG_AYUV_INPUT_OUTPUT			0x02000000
+#define chipMinorFeatures8_VG_DOUBLE_IMAGE			0x04000000
+#define chipMinorFeatures8_VG_RECTANGLE_STRIPE_MODE		0x08000000
+#define chipMinorFeatures8_VG_MMU				0x10000000
+#define chipMinorFeatures8_VG_IM_FILTER				0x20000000
+#define chipMinorFeatures8_VG_IM_YUV_PACKET			0x40000000
+#define chipMinorFeatures8_VG_IM_YUV_PLANAR			0x80000000
+#define chipMinorFeatures9_VG_PE_YUV_PACKET			0x00000001
+#define chipMinorFeatures9_VG_COLOR_PRECISION_8_BIT		0x00000002
+#define chipMinorFeatures9_PE_MSAA_OQ_FIX			0x00000004
+#define chipMinorFeatures9_PSIO_MSAA_CL_FIX			0x00000008
+#define chipMinorFeatures9_USC_DEFER_FILL_FIX			0x00000010
+#define chipMinorFeatures9_SH_CLOCK_GATE_FIX			0x00000020
+#define chipMinorFeatures9_FE_NEED_DUMMYDRAW			0x00000040
+#define chipMinorFeatures9_PE2D_LINEAR_YUV420_OUTPUT		0x00000080
+#define chipMinorFeatures9_PE2D_LINEAR_YUV420_10BIT		0x00000100
+#define chipMinorFeatures9_MULTI_CLUSTER			0x00000200
+#define chipMinorFeatures9_VG_TS_CULLING			0x00000400
+#define chipMinorFeatures9_VG_FP25				0x00000800
+#define chipMinorFeatures9_SH_MULTI_WG_PACK			0x00001000
+#define chipMinorFeatures9_SH_DUAL16_SAMPLEMASK_ZW		0x00002000
+#define chipMinorFeatures9_TPG_TRIVIAL_MODE_FIX			0x00004000
+#define chipMinorFeatures9_TX_ASTC_MULTISLICE_FIX		0x00008000
+#define chipMinorFeatures9_FE_ROBUST_FIX			0x00010000
+#define chipMinorFeatures9_SH_GPIPE_ACCESS_FULLTEMPS		0x00020000
+#define chipMinorFeatures9_PSIO_INTERLOCK			0x00040000
+#define chipMinorFeatures9_PA_WIDELINE_FIX			0x00080000
+#define chipMinorFeatures9_WIDELINE_HELPER_FIX			0x00100000
+#define chipMinorFeatures9_G2D_3RD_PARTY_COMPRESSION_1_1	0x00200000
+#define chipMinorFeatures9_TX_FLUSH_L1CACHE			0x00400000
+#define chipMinorFeatures9_PE_DITHER_FIX2			0x00800000
+#define chipMinorFeatures9_G2D_DEC400				0x01000000
+#define chipMinorFeatures9_SH_TEXLD_U_FIX			0x02000000
+#define chipMinorFeatures9_MC_FCCACHE_BYTEMASK			0x04000000
+#define chipMinorFeatures9_SH_MULTI_WG_PACK_FIX			0x08000000
+#define chipMinorFeatures9_DC_OVERLAY_SCALING			0x10000000
+#define chipMinorFeatures9_DC_SOURCE_ROTATION			0x20000000
+#define chipMinorFeatures9_DC_TILED				0x40000000
+#define chipMinorFeatures9_DC_YUV_L1				0x80000000
+#define chipMinorFeatures10_DC_D30_OUTPUT			0x00000001
+#define chipMinorFeatures10_DC_MMU				0x00000002
+#define chipMinorFeatures10_DC_COMPRESSION			0x00000004
+#define chipMinorFeatures10_DC_QOS				0x00000008
+#define chipMinorFeatures10_PE_ADVANCE_BLEND_PART0		0x00000010
+#define chipMinorFeatures10_FE_PATCHLIST_FETCH_FIX		0x00000020
+#define chipMinorFeatures10_RA_CG_FIX				0x00000040
+#define chipMinorFeatures10_EVIS_VX2				0x00000080
+#define chipMinorFeatures10_NN_FLOAT				0x00000100
+#define chipMinorFeatures10_DEC400				0x00000200
+#define chipMinorFeatures10_LS_SUPPORT_PERCOMP_DEPENDENCY	0x00000400
+#define chipMinorFeatures10_TP_ENGINE				0x00000800
+#define chipMinorFeatures10_MULTI_CORE_BLOCK_SET_CONFIG2	0x00001000
+#define chipMinorFeatures10_PE_VMSAA_COVERAGE_CACHE_FIX		0x00002000
+#define chipMinorFeatures10_SECURITY_AHB			0x00004000
+#define chipMinorFeatures10_MULTICORE_SEMAPHORESTALL_V3		0x00008000
+#define chipMinorFeatures10_SMALLBATCH				0x00010000
+#define chipMinorFeatures10_SH_CMPLX				0x00020000
+#define chipMinorFeatures10_SH_IDIV0_SWZL_EHS			0x00040000
+#define chipMinorFeatures10_TX_LERP_LESS_BIT			0x00080000
+#define chipMinorFeatures10_SH_GM_ENDIAN			0x00100000
+#define chipMinorFeatures10_SH_GM_USC_UNALLOC			0x00200000
+#define chipMinorFeatures10_SH_END_OF_BB			0x00400000
+#define chipMinorFeatures10_VIP_V7				0x00800000
+#define chipMinorFeatures10_TX_BORDER_CLAMP_FIX			0x01000000
+#define chipMinorFeatures10_SH_IMG_LD_LASTPIXEL_FIX		0x02000000
+#define chipMinorFeatures10_ASYNC_BLT				0x04000000
+#define chipMinorFeatures10_ASYNC_FE_FENCE_FIX			0x08000000
+#define chipMinorFeatures10_PSCS_THROTTLE			0x10000000
+#define chipMinorFeatures10_SEPARATE_LS				0x20000000
+#define chipMinorFeatures10_MCFE				0x40000000
+#define chipMinorFeatures10_WIDELINE_TRIANGLE_EMU		0x80000000
+#define chipMinorFeatures11_VG_RESOLUTION_8K			0x00000001
+#define chipMinorFeatures11_FENCE_32BIT				0x00000002
+#define chipMinorFeatures11_FENCE_64BIT				0x00000004
+#define chipMinorFeatures11_NN_INTERLEVE8			0x00000008
+#define chipMinorFeatures11_TP_REORDER				0x00000010
+#define chipMinorFeatures11_PE_DEPTH_ONLY_OQFIX			0x00000020
 
 #endif /* COMMON_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/isa.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/isa.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,10 +8,10 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- isa.xml       (  37079 bytes, from 2017-10-19 09:48:25)
-- copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
+- isa.xml       (  37079 bytes, from 2018-02-10 13:09:26)
+- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2018 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_3d.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_3d.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,17 +8,17 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state.xml     (  26087 bytes, from 2017-10-30 13:44:54)
-- common.xml    (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
-- state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
-- copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
-- state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
-- state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
+- state.xml     (  26087 bytes, from 2018-02-10 13:09:26)
+- common.xml    (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml (  14843 bytes, from 2019-01-18 10:13:41)
+- state_hi.xml  (  30232 bytes, from 2018-03-30 07:48:22)
+- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
+- state_2d.xml  (  51552 bytes, from 2018-02-10 13:09:26)
+- state_3d.xml  (  79992 bytes, from 2019-01-18 10:10:57)
+- state_blt.xml (  13405 bytes, from 2018-02-10 13:09:26)
+- state_vg.xml  (   5975 bytes, from 2018-02-10 13:09:26)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2019 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
@@ -1400,6 +1400,9 @@
 #define VIVS_TE_SAMPLER_CONFIG0_FORMAT__SHIFT			13
 #define VIVS_TE_SAMPLER_CONFIG0_FORMAT(x)			(((x) << VIVS_TE_SAMPLER_CONFIG0_FORMAT__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_FORMAT__MASK)
 #define VIVS_TE_SAMPLER_CONFIG0_ROUND_UV			0x00080000
+#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK		0x00300000
+#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT		20
+#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(x)		(((x) << VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK)
 #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN__MASK			0x00c00000
 #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN__SHIFT			22
 #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN(x)			(((x) << VIVS_TE_SAMPLER_CONFIG0_ENDIAN__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_ENDIAN__MASK)
@@ -1520,6 +1523,9 @@
 #define VIVS_NTE_SAMPLER_CONFIG0_FORMAT__SHIFT			13
 #define VIVS_NTE_SAMPLER_CONFIG0_FORMAT(x)			(((x) << VIVS_NTE_SAMPLER_CONFIG0_FORMAT__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_FORMAT__MASK)
 #define VIVS_NTE_SAMPLER_CONFIG0_ROUND_UV			0x00080000
+#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK		0x00300000
+#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT		20
+#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE(x)		(((x) << VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK)
 #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__MASK			0x00c00000
 #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__SHIFT			22
 #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN(x)			(((x) << VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__MASK)
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_blt.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_blt.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_blt.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_blt.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,17 +8,17 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state.xml     (  26087 bytes, from 2017-10-30 13:44:54)
-- common.xml    (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
-- state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
-- copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
-- state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
-- state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
+- state.xml     (  26087 bytes, from 2018-02-10 13:09:26)
+- common.xml    (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml (  14843 bytes, from 2019-01-18 10:13:41)
+- state_hi.xml  (  30232 bytes, from 2018-03-30 07:48:22)
+- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
+- state_2d.xml  (  51552 bytes, from 2018-02-10 13:09:26)
+- state_3d.xml  (  79992 bytes, from 2019-01-18 10:10:57)
+- state_blt.xml (  13405 bytes, from 2018-02-10 13:09:26)
+- state_vg.xml  (   5975 bytes, from 2018-02-10 13:09:26)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2018 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,17 +8,17 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state.xml     (  26087 bytes, from 2017-10-30 13:44:54)
-- common.xml    (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml (  14615 bytes, from 2017-11-04 14:03:35)
-- state_hi.xml  (  27733 bytes, from 2017-10-02 19:00:30)
-- copyright.xml (   1597 bytes, from 2016-10-29 07:29:22)
-- state_2d.xml  (  51552 bytes, from 2016-10-29 07:29:22)
-- state_3d.xml  (  79992 bytes, from 2017-11-07 10:44:35)
-- state_blt.xml (  13405 bytes, from 2017-10-16 17:42:46)
-- state_vg.xml  (   5975 bytes, from 2016-10-29 07:29:22)
+- state.xml     (  26087 bytes, from 2018-02-10 13:09:26)
+- common.xml    (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml (  14843 bytes, from 2019-01-18 10:13:41)
+- state_hi.xml  (  30232 bytes, from 2018-03-30 07:48:22)
+- copyright.xml (   1597 bytes, from 2018-02-10 13:09:26)
+- state_2d.xml  (  51552 bytes, from 2018-02-10 13:09:26)
+- state_3d.xml  (  79992 bytes, from 2019-01-18 10:10:57)
+- state_blt.xml (  13405 bytes, from 2018-02-10 13:09:26)
+- state_vg.xml  (   5975 bytes, from 2018-02-10 13:09:26)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2018 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h
--- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h	2017-11-16 18:44:33.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h	2019-03-31 23:16:37.000000000 +0000
@@ -8,12 +8,12 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- texdesc_3d.xml (   3183 bytes, from 2017-10-31 19:05:01)
-- copyright.xml  (   1597 bytes, from 2016-10-29 07:29:22)
-- common.xml     (  26187 bytes, from 2017-10-31 19:05:01)
-- common_3d.xml  (  14615 bytes, from 2017-11-04 14:03:35)
+- texdesc_3d.xml (   3183 bytes, from 2018-02-10 13:09:26)
+- copyright.xml  (   1597 bytes, from 2018-02-10 13:09:26)
+- common.xml     (  35468 bytes, from 2018-02-10 13:09:26)
+- common_3d.xml  (  14843 bytes, from 2019-01-18 10:13:41)
 
-Copyright (C) 2012-2017 by the following authors:
+Copyright (C) 2012-2018 by the following authors:
 - Wladimir J. van der Laan <laanwj@gmail.com>
 - Christian Gmeiner <christian.gmeiner@gmail.com>
 - Lucas Stach <l.stach@pengutronix.de>
@@ -67,6 +67,9 @@
 #define TEXDESC_CONFIG0_FORMAT__SHIFT				13
 #define TEXDESC_CONFIG0_FORMAT(x)				(((x) << TEXDESC_CONFIG0_FORMAT__SHIFT) & TEXDESC_CONFIG0_FORMAT__MASK)
 #define TEXDESC_CONFIG0_ROUND_UV				0x00080000
+#define TEXDESC_CONFIG0_ADDRESSING_MODE__MASK			0x00300000
+#define TEXDESC_CONFIG0_ADDRESSING_MODE__SHIFT			20
+#define TEXDESC_CONFIG0_ADDRESSING_MODE(x)			(((x) << TEXDESC_CONFIG0_ADDRESSING_MODE__SHIFT) & TEXDESC_CONFIG0_ADDRESSING_MODE__MASK)
 #define TEXDESC_CONFIG0_ENDIAN__MASK				0x00c00000
 #define TEXDESC_CONFIG0_ENDIAN__SHIFT				22
 #define TEXDESC_CONFIG0_ENDIAN(x)				(((x) << TEXDESC_CONFIG0_ENDIAN__SHIFT) & TEXDESC_CONFIG0_ENDIAN__MASK)
diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/meson.build mesa-19.0.1/src/gallium/drivers/etnaviv/meson.build
--- mesa-18.3.3/src/gallium/drivers/etnaviv/meson.build	2018-06-01 16:49:01.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/etnaviv/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -101,7 +101,8 @@
   include_directories : [inc_include, inc_src, inc_gallium, inc_gallium_aux],
   link_with : [libmesa_util, libgallium, libetnaviv],
   dependencies : [dep_libdrm_etnaviv],
-  build_by_default : false,
+  build_by_default : with_tools.contains('etnaviv'),
+  install : with_tools.contains('etnaviv'),
 )
 
 driver_etnaviv = declare_dependency(
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,1880 +0,0 @@
-#ifndef A2XX_XML
-#define A2XX_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum a2xx_rb_dither_type {
-	DITHER_PIXEL = 0,
-	DITHER_SUBPIXEL = 1,
-};
-
-enum a2xx_colorformatx {
-	COLORX_4_4_4_4 = 0,
-	COLORX_1_5_5_5 = 1,
-	COLORX_5_6_5 = 2,
-	COLORX_8 = 3,
-	COLORX_8_8 = 4,
-	COLORX_8_8_8_8 = 5,
-	COLORX_S8_8_8_8 = 6,
-	COLORX_16_FLOAT = 7,
-	COLORX_16_16_FLOAT = 8,
-	COLORX_16_16_16_16_FLOAT = 9,
-	COLORX_32_FLOAT = 10,
-	COLORX_32_32_FLOAT = 11,
-	COLORX_32_32_32_32_FLOAT = 12,
-	COLORX_2_3_3 = 13,
-	COLORX_8_8_8 = 14,
-};
-
-enum a2xx_sq_surfaceformat {
-	FMT_1_REVERSE = 0,
-	FMT_1 = 1,
-	FMT_8 = 2,
-	FMT_1_5_5_5 = 3,
-	FMT_5_6_5 = 4,
-	FMT_6_5_5 = 5,
-	FMT_8_8_8_8 = 6,
-	FMT_2_10_10_10 = 7,
-	FMT_8_A = 8,
-	FMT_8_B = 9,
-	FMT_8_8 = 10,
-	FMT_Cr_Y1_Cb_Y0 = 11,
-	FMT_Y1_Cr_Y0_Cb = 12,
-	FMT_5_5_5_1 = 13,
-	FMT_8_8_8_8_A = 14,
-	FMT_4_4_4_4 = 15,
-	FMT_8_8_8 = 16,
-	FMT_DXT1 = 18,
-	FMT_DXT2_3 = 19,
-	FMT_DXT4_5 = 20,
-	FMT_10_10_10_2 = 21,
-	FMT_24_8 = 22,
-	FMT_16 = 24,
-	FMT_16_16 = 25,
-	FMT_16_16_16_16 = 26,
-	FMT_16_EXPAND = 27,
-	FMT_16_16_EXPAND = 28,
-	FMT_16_16_16_16_EXPAND = 29,
-	FMT_16_FLOAT = 30,
-	FMT_16_16_FLOAT = 31,
-	FMT_16_16_16_16_FLOAT = 32,
-	FMT_32 = 33,
-	FMT_32_32 = 34,
-	FMT_32_32_32_32 = 35,
-	FMT_32_FLOAT = 36,
-	FMT_32_32_FLOAT = 37,
-	FMT_32_32_32_32_FLOAT = 38,
-	FMT_ATI_TC_RGB = 39,
-	FMT_ATI_TC_RGBA = 40,
-	FMT_ATI_TC_555_565_RGB = 41,
-	FMT_ATI_TC_555_565_RGBA = 42,
-	FMT_ATI_TC_RGBA_INTERP = 43,
-	FMT_ATI_TC_555_565_RGBA_INTERP = 44,
-	FMT_ETC1_RGBA_INTERP = 46,
-	FMT_ETC1_RGB = 47,
-	FMT_ETC1_RGBA = 48,
-	FMT_DXN = 49,
-	FMT_2_3_3 = 51,
-	FMT_2_10_10_10_AS_16_16_16_16 = 54,
-	FMT_10_10_10_2_AS_16_16_16_16 = 55,
-	FMT_32_32_32_FLOAT = 57,
-	FMT_DXT3A = 58,
-	FMT_DXT5A = 59,
-	FMT_CTX1 = 60,
-};
-
-enum a2xx_sq_ps_vtx_mode {
-	POSITION_1_VECTOR = 0,
-	POSITION_2_VECTORS_UNUSED = 1,
-	POSITION_2_VECTORS_SPRITE = 2,
-	POSITION_2_VECTORS_EDGE = 3,
-	POSITION_2_VECTORS_KILL = 4,
-	POSITION_2_VECTORS_SPRITE_KILL = 5,
-	POSITION_2_VECTORS_EDGE_KILL = 6,
-	MULTIPASS = 7,
-};
-
-enum a2xx_sq_sample_cntl {
-	CENTROIDS_ONLY = 0,
-	CENTERS_ONLY = 1,
-	CENTROIDS_AND_CENTERS = 2,
-};
-
-enum a2xx_dx_clip_space {
-	DXCLIP_OPENGL = 0,
-	DXCLIP_DIRECTX = 1,
-};
-
-enum a2xx_pa_su_sc_polymode {
-	POLY_DISABLED = 0,
-	POLY_DUALMODE = 1,
-};
-
-enum a2xx_rb_edram_mode {
-	EDRAM_NOP = 0,
-	COLOR_DEPTH = 4,
-	DEPTH_ONLY = 5,
-	EDRAM_COPY = 6,
-};
-
-enum a2xx_pa_sc_pattern_bit_order {
-	LITTLE = 0,
-	BIG = 1,
-};
-
-enum a2xx_pa_sc_auto_reset_cntl {
-	NEVER = 0,
-	EACH_PRIMITIVE = 1,
-	EACH_PACKET = 2,
-};
-
-enum a2xx_pa_pixcenter {
-	PIXCENTER_D3D = 0,
-	PIXCENTER_OGL = 1,
-};
-
-enum a2xx_pa_roundmode {
-	TRUNCATE = 0,
-	ROUND = 1,
-	ROUNDTOEVEN = 2,
-	ROUNDTOODD = 3,
-};
-
-enum a2xx_pa_quantmode {
-	ONE_SIXTEENTH = 0,
-	ONE_EIGTH = 1,
-	ONE_QUARTER = 2,
-	ONE_HALF = 3,
-	ONE = 4,
-};
-
-enum a2xx_rb_copy_sample_select {
-	SAMPLE_0 = 0,
-	SAMPLE_1 = 1,
-	SAMPLE_2 = 2,
-	SAMPLE_3 = 3,
-	SAMPLE_01 = 4,
-	SAMPLE_23 = 5,
-	SAMPLE_0123 = 6,
-};
-
-enum a2xx_rb_blend_opcode {
-	BLEND2_DST_PLUS_SRC = 0,
-	BLEND2_SRC_MINUS_DST = 1,
-	BLEND2_MIN_DST_SRC = 2,
-	BLEND2_MAX_DST_SRC = 3,
-	BLEND2_DST_MINUS_SRC = 4,
-	BLEND2_DST_PLUS_SRC_BIAS = 5,
-};
-
-enum adreno_mmu_clnt_beh {
-	BEH_NEVR = 0,
-	BEH_TRAN_RNG = 1,
-	BEH_TRAN_FLT = 2,
-};
-
-enum sq_tex_clamp {
-	SQ_TEX_WRAP = 0,
-	SQ_TEX_MIRROR = 1,
-	SQ_TEX_CLAMP_LAST_TEXEL = 2,
-	SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3,
-	SQ_TEX_CLAMP_HALF_BORDER = 4,
-	SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
-	SQ_TEX_CLAMP_BORDER = 6,
-	SQ_TEX_MIRROR_ONCE_BORDER = 7,
-};
-
-enum sq_tex_swiz {
-	SQ_TEX_X = 0,
-	SQ_TEX_Y = 1,
-	SQ_TEX_Z = 2,
-	SQ_TEX_W = 3,
-	SQ_TEX_ZERO = 4,
-	SQ_TEX_ONE = 5,
-};
-
-enum sq_tex_filter {
-	SQ_TEX_FILTER_POINT = 0,
-	SQ_TEX_FILTER_BILINEAR = 1,
-	SQ_TEX_FILTER_BICUBIC = 2,
-};
-
-#define REG_A2XX_RBBM_PATCH_RELEASE				0x00000001
-
-#define REG_A2XX_RBBM_CNTL					0x0000003b
-
-#define REG_A2XX_RBBM_SOFT_RESET				0x0000003c
-
-#define REG_A2XX_CP_PFP_UCODE_ADDR				0x000000c0
-
-#define REG_A2XX_CP_PFP_UCODE_DATA				0x000000c1
-
-#define REG_A2XX_MH_MMU_CONFIG					0x00000040
-#define A2XX_MH_MMU_CONFIG_MMU_ENABLE				0x00000001
-#define A2XX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE			0x00000002
-#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK		0x00000030
-#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT		4
-static inline uint32_t A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK		0x000000c0
-#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT		6
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK		0x00000300
-#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT		8
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK		0x00000c00
-#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT		10
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK		0x00003000
-#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT		12
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK		0x0000c000
-#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT		14
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK		0x00030000
-#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT		16
-static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK		0x000c0000
-#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT		18
-static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK		0x00300000
-#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT		20
-static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK		0x00c00000
-#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT		22
-static inline uint32_t A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
-}
-#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK		0x03000000
-#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT		24
-static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
-{
-	return ((val) << A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
-}
-
-#define REG_A2XX_MH_MMU_VA_RANGE				0x00000041
-#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK		0x00000fff
-#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT		0
-static inline uint32_t A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS(uint32_t val)
-{
-	return ((val) << A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT) & A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK;
-}
-#define A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK			0xfffff000
-#define A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT			12
-static inline uint32_t A2XX_MH_MMU_VA_RANGE_VA_BASE(uint32_t val)
-{
-	return ((val) << A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT) & A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK;
-}
-
-#define REG_A2XX_MH_MMU_PT_BASE					0x00000042
-
-#define REG_A2XX_MH_MMU_PAGE_FAULT				0x00000043
-
-#define REG_A2XX_MH_MMU_TRAN_ERROR				0x00000044
-
-#define REG_A2XX_MH_MMU_INVALIDATE				0x00000045
-#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_ALL			0x00000001
-#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_TC			0x00000002
-
-#define REG_A2XX_MH_MMU_MPU_BASE				0x00000046
-
-#define REG_A2XX_MH_MMU_MPU_END					0x00000047
-
-#define REG_A2XX_NQWAIT_UNTIL					0x00000394
-
-#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT			0x00000395
-
-#define REG_A2XX_RBBM_PERFCOUNTER1_LO				0x00000397
-
-#define REG_A2XX_RBBM_PERFCOUNTER1_HI				0x00000398
-
-#define REG_A2XX_RBBM_DEBUG					0x0000039b
-
-#define REG_A2XX_RBBM_PM_OVERRIDE1				0x0000039c
-#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE		0x00000001
-#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE		0x00000002
-#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE		0x00000004
-#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE		0x00000008
-#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE		0x00000010
-#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE		0x00000020
-#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE	0x00000040
-#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE	0x00000080
-#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE		0x00000100
-#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE		0x00000200
-#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE		0x00000400
-#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE		0x00000800
-#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE		0x00001000
-#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE		0x00002000
-#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE		0x00004000
-#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE		0x00008000
-#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE		0x00010000
-#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE		0x00020000
-#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE		0x00040000
-#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE	0x00080000
-#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE		0x00100000
-#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE		0x00200000
-#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE		0x00400000
-#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE		0x00800000
-#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE	0x01000000
-#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE		0x02000000
-#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE		0x04000000
-#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE		0x08000000
-#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE		0x10000000
-#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE		0x20000000
-#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE		0x40000000
-#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE	0x80000000
-
-#define REG_A2XX_RBBM_PM_OVERRIDE2				0x0000039d
-
-#define REG_A2XX_RBBM_DEBUG_OUT					0x000003a0
-
-#define REG_A2XX_RBBM_DEBUG_CNTL				0x000003a1
-
-#define REG_A2XX_RBBM_READ_ERROR				0x000003b3
-
-#define REG_A2XX_RBBM_INT_CNTL					0x000003b4
-#define A2XX_RBBM_INT_CNTL_RDERR_INT_MASK			0x00000001
-#define A2XX_RBBM_INT_CNTL_DISPLAY_UPDATE_INT_MASK		0x00000002
-#define A2XX_RBBM_INT_CNTL_GUI_IDLE_INT_MASK			0x00080000
-
-#define REG_A2XX_RBBM_INT_STATUS				0x000003b5
-
-#define REG_A2XX_RBBM_INT_ACK					0x000003b6
-
-#define REG_A2XX_MASTER_INT_SIGNAL				0x000003b7
-#define A2XX_MASTER_INT_SIGNAL_MH_INT_STAT			0x00000020
-#define A2XX_MASTER_INT_SIGNAL_SQ_INT_STAT			0x04000000
-#define A2XX_MASTER_INT_SIGNAL_CP_INT_STAT			0x40000000
-#define A2XX_MASTER_INT_SIGNAL_RBBM_INT_STAT			0x80000000
-
-#define REG_A2XX_RBBM_PERIPHID1					0x000003f9
-
-#define REG_A2XX_RBBM_PERIPHID2					0x000003fa
-
-#define REG_A2XX_CP_PERFMON_CNTL				0x00000444
-
-#define REG_A2XX_CP_PERFCOUNTER_SELECT				0x00000445
-
-#define REG_A2XX_CP_PERFCOUNTER_LO				0x00000446
-
-#define REG_A2XX_CP_PERFCOUNTER_HI				0x00000447
-
-#define REG_A2XX_RBBM_STATUS					0x000005d0
-#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK			0x0000001f
-#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT			0
-static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
-{
-	return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
-}
-#define A2XX_RBBM_STATUS_TC_BUSY				0x00000020
-#define A2XX_RBBM_STATUS_HIRQ_PENDING				0x00000100
-#define A2XX_RBBM_STATUS_CPRQ_PENDING				0x00000200
-#define A2XX_RBBM_STATUS_CFRQ_PENDING				0x00000400
-#define A2XX_RBBM_STATUS_PFRQ_PENDING				0x00000800
-#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA			0x00001000
-#define A2XX_RBBM_STATUS_RBBM_WU_BUSY				0x00004000
-#define A2XX_RBBM_STATUS_CP_NRT_BUSY				0x00010000
-#define A2XX_RBBM_STATUS_MH_BUSY				0x00040000
-#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY			0x00080000
-#define A2XX_RBBM_STATUS_SX_BUSY				0x00200000
-#define A2XX_RBBM_STATUS_TPC_BUSY				0x00400000
-#define A2XX_RBBM_STATUS_SC_CNTX_BUSY				0x01000000
-#define A2XX_RBBM_STATUS_PA_BUSY				0x02000000
-#define A2XX_RBBM_STATUS_VGT_BUSY				0x04000000
-#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY				0x08000000
-#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY				0x10000000
-#define A2XX_RBBM_STATUS_RB_CNTX_BUSY				0x40000000
-#define A2XX_RBBM_STATUS_GUI_ACTIVE				0x80000000
-
-#define REG_A2XX_MH_ARBITER_CONFIG				0x00000a40
-#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK		0x0000003f
-#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT		0
-static inline uint32_t A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT(uint32_t val)
-{
-	return ((val) << A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK;
-}
-#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_GRANULARITY		0x00000040
-#define A2XX_MH_ARBITER_CONFIG_L1_ARB_ENABLE			0x00000080
-#define A2XX_MH_ARBITER_CONFIG_L1_ARB_HOLD_ENABLE		0x00000100
-#define A2XX_MH_ARBITER_CONFIG_L2_ARB_CONTROL			0x00000200
-#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK			0x00001c00
-#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT			10
-static inline uint32_t A2XX_MH_ARBITER_CONFIG_PAGE_SIZE(uint32_t val)
-{
-	return ((val) << A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT) & A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK;
-}
-#define A2XX_MH_ARBITER_CONFIG_TC_REORDER_ENABLE		0x00002000
-#define A2XX_MH_ARBITER_CONFIG_TC_ARB_HOLD_ENABLE		0x00004000
-#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT_ENABLE		0x00008000
-#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK		0x003f0000
-#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT		16
-static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val)
-{
-	return ((val) << A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK;
-}
-#define A2XX_MH_ARBITER_CONFIG_CP_CLNT_ENABLE			0x00400000
-#define A2XX_MH_ARBITER_CONFIG_VGT_CLNT_ENABLE			0x00800000
-#define A2XX_MH_ARBITER_CONFIG_TC_CLNT_ENABLE			0x01000000
-#define A2XX_MH_ARBITER_CONFIG_RB_CLNT_ENABLE			0x02000000
-#define A2XX_MH_ARBITER_CONFIG_PA_CLNT_ENABLE			0x04000000
-
-#define REG_A2XX_MH_INTERRUPT_MASK				0x00000a42
-#define A2XX_MH_INTERRUPT_MASK_AXI_READ_ERROR			0x00000001
-#define A2XX_MH_INTERRUPT_MASK_AXI_WRITE_ERROR			0x00000002
-#define A2XX_MH_INTERRUPT_MASK_MMU_PAGE_FAULT			0x00000004
-
-#define REG_A2XX_MH_INTERRUPT_STATUS				0x00000a43
-
-#define REG_A2XX_MH_INTERRUPT_CLEAR				0x00000a44
-
-#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG1			0x00000a54
-
-#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG2			0x00000a55
-
-#define REG_A2XX_A220_VSC_BIN_SIZE				0x00000c01
-#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK			0x0000001f
-#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT			0
-static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK;
-}
-#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK			0x000003e0
-#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT			5
-static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
-}
-
-static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-
-static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-
-static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
-
-static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
-
-#define REG_A2XX_PC_DEBUG_CNTL					0x00000c38
-
-#define REG_A2XX_PC_DEBUG_DATA					0x00000c39
-
-#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS				0x00000c44
-
-#define REG_A2XX_GRAS_DEBUG_CNTL				0x00000c80
-
-#define REG_A2XX_PA_SU_DEBUG_CNTL				0x00000c80
-
-#define REG_A2XX_GRAS_DEBUG_DATA				0x00000c81
-
-#define REG_A2XX_PA_SU_DEBUG_DATA				0x00000c81
-
-#define REG_A2XX_PA_SU_FACE_DATA				0x00000c86
-#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK			0xffffffe0
-#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT			5
-static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val)
-{
-	return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK;
-}
-
-#define REG_A2XX_SQ_GPR_MANAGEMENT				0x00000d00
-#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC			0x00000001
-#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK		0x00000ff0
-#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT		4
-static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val)
-{
-	return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK;
-}
-#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK		0x000ff000
-#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT		12
-static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val)
-{
-	return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK;
-}
-
-#define REG_A2XX_SQ_FLOW_CONTROL				0x00000d01
-
-#define REG_A2XX_SQ_INST_STORE_MANAGMENT			0x00000d02
-#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK	0x00000fff
-#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT	0
-static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val)
-{
-	return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK;
-}
-#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK	0x0fff0000
-#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT	16
-static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val)
-{
-	return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK;
-}
-
-#define REG_A2XX_SQ_DEBUG_MISC					0x00000d05
-
-#define REG_A2XX_SQ_INT_CNTL					0x00000d34
-
-#define REG_A2XX_SQ_INT_STATUS					0x00000d35
-
-#define REG_A2XX_SQ_INT_ACK					0x00000d36
-
-#define REG_A2XX_SQ_DEBUG_INPUT_FSM				0x00000dae
-
-#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM				0x00000daf
-
-#define REG_A2XX_SQ_DEBUG_TP_FSM				0x00000db0
-
-#define REG_A2XX_SQ_DEBUG_FSM_ALU_0				0x00000db1
-
-#define REG_A2XX_SQ_DEBUG_FSM_ALU_1				0x00000db2
-
-#define REG_A2XX_SQ_DEBUG_EXP_ALLOC				0x00000db3
-
-#define REG_A2XX_SQ_DEBUG_PTR_BUFF				0x00000db4
-
-#define REG_A2XX_SQ_DEBUG_GPR_VTX				0x00000db5
-
-#define REG_A2XX_SQ_DEBUG_GPR_PIX				0x00000db6
-
-#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL				0x00000db7
-
-#define REG_A2XX_SQ_DEBUG_VTX_TB_0				0x00000db8
-
-#define REG_A2XX_SQ_DEBUG_VTX_TB_1				0x00000db9
-
-#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG			0x00000dba
-
-#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM			0x00000dbb
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_0				0x00000dbc
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0			0x00000dbd
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1			0x00000dbe
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2			0x00000dbf
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3			0x00000dc0
-
-#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM			0x00000dc1
-
-#define REG_A2XX_TC_CNTL_STATUS					0x00000e00
-#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE			0x00000001
-
-#define REG_A2XX_TP0_CHICKEN					0x00000e1e
-
-#define REG_A2XX_RB_BC_CONTROL					0x00000f01
-#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE		0x00000001
-#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK		0x00000006
-#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT		1
-static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val)
-{
-	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK;
-}
-#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM			0x00000008
-#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH	0x00000010
-#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP		0x00000020
-#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP		0x00000040
-#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE			0x00000080
-#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK		0x00001f00
-#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT		8
-static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val)
-{
-	return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK;
-}
-#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE			0x00004000
-#define A2XX_RB_BC_CONTROL_CRC_MODE				0x00008000
-#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS		0x00010000
-#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM			0x00020000
-#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK		0x003c0000
-#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT		18
-static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK;
-}
-#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE		0x00400000
-#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK		0x07800000
-#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT		23
-static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val)
-{
-	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK;
-}
-#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK	0x18000000
-#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT	27
-static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val)
-{
-	return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK;
-}
-#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE	0x20000000
-#define A2XX_RB_BC_CONTROL_CRC_SYSTEM				0x40000000
-#define A2XX_RB_BC_CONTROL_RESERVED6				0x80000000
-
-#define REG_A2XX_RB_EDRAM_INFO					0x00000f02
-
-#define REG_A2XX_RB_DEBUG_CNTL					0x00000f26
-
-#define REG_A2XX_RB_DEBUG_DATA					0x00000f27
-
-#define REG_A2XX_RB_SURFACE_INFO				0x00002000
-
-#define REG_A2XX_RB_COLOR_INFO					0x00002001
-#define A2XX_RB_COLOR_INFO_FORMAT__MASK				0x0000000f
-#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT			0
-static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val)
-{
-	return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK;
-}
-#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK			0x00000030
-#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT			4
-static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK;
-}
-#define A2XX_RB_COLOR_INFO_LINEAR				0x00000040
-#define A2XX_RB_COLOR_INFO_ENDIAN__MASK				0x00000180
-#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT			7
-static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK;
-}
-#define A2XX_RB_COLOR_INFO_SWAP__MASK				0x00000600
-#define A2XX_RB_COLOR_INFO_SWAP__SHIFT				9
-static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK;
-}
-#define A2XX_RB_COLOR_INFO_BASE__MASK				0xfffff000
-#define A2XX_RB_COLOR_INFO_BASE__SHIFT				12
-static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val)
-{
-	assert(!(val & 0x3ff));
-	return ((val >> 10) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK;
-}
-
-#define REG_A2XX_RB_DEPTH_INFO					0x00002002
-#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000001
-#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
-static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
-{
-	return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
-}
-#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff000
-#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			12
-static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
-}
-
-#define REG_A2XX_A225_RB_COLOR_INFO3				0x00002005
-
-#define REG_A2XX_COHER_DEST_BASE_0				0x00002006
-
-#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL			0x0000200e
-#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
-#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK;
-}
-#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR			0x0000200f
-#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
-#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK;
-}
-#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A2XX_PA_SC_WINDOW_OFFSET				0x00002080
-#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK			0x00007fff
-#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT			0
-static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK;
-}
-#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK			0x7fff0000
-#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT			16
-static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK;
-}
-#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE			0x80000000
-
-#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL			0x00002081
-#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
-#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK;
-}
-#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR			0x00002082
-#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
-#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK;
-}
-#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A2XX_UNKNOWN_2010					0x00002010
-
-#define REG_A2XX_VGT_MAX_VTX_INDX				0x00002100
-
-#define REG_A2XX_VGT_MIN_VTX_INDX				0x00002101
-
-#define REG_A2XX_VGT_INDX_OFFSET				0x00002102
-
-#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX		0x00002103
-
-#define REG_A2XX_RB_COLOR_MASK					0x00002104
-#define A2XX_RB_COLOR_MASK_WRITE_RED				0x00000001
-#define A2XX_RB_COLOR_MASK_WRITE_GREEN				0x00000002
-#define A2XX_RB_COLOR_MASK_WRITE_BLUE				0x00000004
-#define A2XX_RB_COLOR_MASK_WRITE_ALPHA				0x00000008
-
-#define REG_A2XX_RB_BLEND_RED					0x00002105
-
-#define REG_A2XX_RB_BLEND_GREEN					0x00002106
-
-#define REG_A2XX_RB_BLEND_BLUE					0x00002107
-
-#define REG_A2XX_RB_BLEND_ALPHA					0x00002108
-
-#define REG_A2XX_RB_FOG_COLOR					0x00002109
-#define A2XX_RB_FOG_COLOR_FOG_RED__MASK				0x000000ff
-#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT			0
-static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val)
-{
-	return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK;
-}
-#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK			0x0000ff00
-#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT			8
-static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val)
-{
-	return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK;
-}
-#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK			0x00ff0000
-#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT			16
-static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val)
-{
-	return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK;
-}
-
-#define REG_A2XX_RB_STENCILREFMASK_BF				0x0000210c
-#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
-#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
-static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
-}
-#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
-#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
-static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
-}
-#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
-#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
-static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A2XX_RB_STENCILREFMASK				0x0000210d
-#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
-#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
-static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK;
-}
-#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
-#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
-static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK;
-}
-#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
-#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
-static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A2XX_RB_ALPHA_REF					0x0000210e
-
-#define REG_A2XX_PA_CL_VPORT_XSCALE				0x0000210f
-#define A2XX_PA_CL_VPORT_XSCALE__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_XSCALE__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK;
-}
-
-#define REG_A2XX_PA_CL_VPORT_XOFFSET				0x00002110
-#define A2XX_PA_CL_VPORT_XOFFSET__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK;
-}
-
-#define REG_A2XX_PA_CL_VPORT_YSCALE				0x00002111
-#define A2XX_PA_CL_VPORT_YSCALE__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_YSCALE__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK;
-}
-
-#define REG_A2XX_PA_CL_VPORT_YOFFSET				0x00002112
-#define A2XX_PA_CL_VPORT_YOFFSET__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK;
-}
-
-#define REG_A2XX_PA_CL_VPORT_ZSCALE				0x00002113
-#define A2XX_PA_CL_VPORT_ZSCALE__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK;
-}
-
-#define REG_A2XX_PA_CL_VPORT_ZOFFSET				0x00002114
-#define A2XX_PA_CL_VPORT_ZOFFSET__MASK				0xffffffff
-#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT				0
-static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK;
-}
-
-#define REG_A2XX_SQ_PROGRAM_CNTL				0x00002180
-#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK			0x000000ff
-#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT			0
-static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK;
-}
-#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK			0x0000ff00
-#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT			8
-static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK;
-}
-#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE			0x00010000
-#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE			0x00020000
-#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN				0x00040000
-#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX			0x00080000
-#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK		0x00f00000
-#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT		20
-static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK;
-}
-#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK		0x07000000
-#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT		24
-static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val)
-{
-	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK;
-}
-#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK		0x78000000
-#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT		27
-static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK;
-}
-#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX			0x80000000
-
-#define REG_A2XX_SQ_CONTEXT_MISC				0x00002181
-#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE			0x00000001
-#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY		0x00000002
-#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK		0x0000000c
-#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT		2
-static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val)
-{
-	return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK;
-}
-#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK		0x0000ff00
-#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT		8
-static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val)
-{
-	return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK;
-}
-#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF			0x00010000
-#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE			0x00020000
-#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL			0x00040000
-
-#define REG_A2XX_SQ_INTERPOLATOR_CNTL				0x00002182
-#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK		0x0000ffff
-#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT		0
-static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK;
-}
-#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK	0xffff0000
-#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT	16
-static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val)
-{
-	return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK;
-}
-
-#define REG_A2XX_SQ_WRAPPING_0					0x00002183
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK			0x0000000f
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT			0
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK			0x000000f0
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT			4
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK			0x00000f00
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT			8
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK			0x0000f000
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT			12
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK			0x000f0000
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT			16
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK			0x00f00000
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT			20
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK			0x0f000000
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT			24
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK;
-}
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK			0xf0000000
-#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT			28
-static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK;
-}
-
-#define REG_A2XX_SQ_WRAPPING_1					0x00002184
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK			0x0000000f
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT			0
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK			0x000000f0
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT			4
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK			0x00000f00
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT			8
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK			0x0000f000
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT			12
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK			0x000f0000
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT			16
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK			0x00f00000
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT			20
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK			0x0f000000
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT			24
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK;
-}
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK			0xf0000000
-#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT			28
-static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val)
-{
-	return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK;
-}
-
-#define REG_A2XX_SQ_PS_PROGRAM					0x000021f6
-#define A2XX_SQ_PS_PROGRAM_BASE__MASK				0x00000fff
-#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT				0
-static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK;
-}
-#define A2XX_SQ_PS_PROGRAM_SIZE__MASK				0x00fff000
-#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT				12
-static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK;
-}
-
-#define REG_A2XX_SQ_VS_PROGRAM					0x000021f7
-#define A2XX_SQ_VS_PROGRAM_BASE__MASK				0x00000fff
-#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT				0
-static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK;
-}
-#define A2XX_SQ_VS_PROGRAM_SIZE__MASK				0x00fff000
-#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT				12
-static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK;
-}
-
-#define REG_A2XX_VGT_EVENT_INITIATOR				0x000021f9
-
-#define REG_A2XX_VGT_DRAW_INITIATOR				0x000021fc
-#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK			0x0000003f
-#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT		0
-static inline uint32_t A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
-}
-#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK		0x000000c0
-#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT		6
-static inline uint32_t A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
-}
-#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK			0x00000600
-#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT			9
-static inline uint32_t A2XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
-}
-#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK		0x00000800
-#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT		11
-static inline uint32_t A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
-{
-	return ((val) << A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
-}
-#define A2XX_VGT_DRAW_INITIATOR_NOT_EOP				0x00001000
-#define A2XX_VGT_DRAW_INITIATOR_SMALL_INDEX			0x00002000
-#define A2XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE	0x00004000
-#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK		0xff000000
-#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT		24
-static inline uint32_t A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
-{
-	return ((val) << A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
-}
-
-#define REG_A2XX_VGT_IMMED_DATA					0x000021fd
-
-#define REG_A2XX_RB_DEPTHCONTROL				0x00002200
-#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE			0x00000001
-#define A2XX_RB_DEPTHCONTROL_Z_ENABLE				0x00000002
-#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE			0x00000004
-#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE			0x00000008
-#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK			0x00000070
-#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT			4
-static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE			0x00000080
-#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK			0x00000700
-#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT			8
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK			0x00003800
-#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT			11
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK			0x0001c000
-#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT		14
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK			0x000e0000
-#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT		17
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK		0x00700000
-#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT		20
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK		0x03800000
-#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT		23
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK		0x1c000000
-#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT		26
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK;
-}
-#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK		0xe0000000
-#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT		29
-static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK;
-}
-
-#define REG_A2XX_RB_BLEND_CONTROL				0x00002201
-#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK		0x0000001f
-#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT		0
-static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK		0x000000e0
-#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT		5
-static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum a2xx_rb_blend_opcode val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK		0x00001f00
-#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT		8
-static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK		0x001f0000
-#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT		16
-static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK		0x00e00000
-#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT		21
-static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum a2xx_rb_blend_opcode val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK		0x1f000000
-#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT		24
-static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK;
-}
-#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE		0x20000000
-#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE			0x40000000
-
-#define REG_A2XX_RB_COLORCONTROL				0x00002202
-#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK			0x00000007
-#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT			0
-static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK;
-}
-#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE			0x00000008
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE		0x00000010
-#define A2XX_RB_COLORCONTROL_BLEND_DISABLE			0x00000020
-#define A2XX_RB_COLORCONTROL_VOB_ENABLE				0x00000040
-#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG			0x00000080
-#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK			0x00000f00
-#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT			8
-static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK;
-}
-#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK			0x00003000
-#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT			12
-static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK;
-}
-#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK			0x0000c000
-#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT			14
-static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK;
-}
-#define A2XX_RB_COLORCONTROL_PIXEL_FOG				0x00010000
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK	0x03000000
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT	24
-static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK;
-}
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK	0x0c000000
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT	26
-static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK;
-}
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK	0x30000000
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT	28
-static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK;
-}
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK	0xc0000000
-#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT	30
-static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val)
-{
-	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK;
-}
-
-#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX				0x00002203
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK		0x00000007
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT		0
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK;
-}
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK			0x00000038
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT			3
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK;
-}
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK	0x000001c0
-#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT	6
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK;
-}
-
-#define REG_A2XX_PA_CL_CLIP_CNTL				0x00002204
-#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
-#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA		0x00040000
-#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK		0x00080000
-#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT		19
-static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val)
-{
-	return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK;
-}
-#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT		0x00100000
-#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR			0x00200000
-#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN			0x00400000
-#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN			0x00800000
-#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN			0x01000000
-
-#define REG_A2XX_PA_SU_SC_MODE_CNTL				0x00002205
-#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT			0x00000001
-#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK			0x00000002
-#define A2XX_PA_SU_SC_MODE_CNTL_FACE				0x00000004
-#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK			0x00000018
-#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT			3
-static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val)
-{
-	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK;
-}
-#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK		0x000000e0
-#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT		5
-static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK;
-}
-#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK		0x00000700
-#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT		8
-static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK;
-}
-#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE	0x00000800
-#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE		0x00001000
-#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE		0x00002000
-#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE			0x00008000
-#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE	0x00010000
-#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE		0x00040000
-#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST		0x00080000
-#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS			0x00100000
-#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA		0x00200000
-#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE		0x00800000
-#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI		0x02000000
-#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE	0x04000000
-#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS		0x10000000
-#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS		0x20000000
-#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE		0x40000000
-#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE		0x80000000
-
-#define REG_A2XX_PA_CL_VTE_CNTL					0x00002206
-#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA			0x00000001
-#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA			0x00000002
-#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA			0x00000004
-#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA			0x00000008
-#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA			0x00000010
-#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA			0x00000020
-#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT				0x00000100
-#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT				0x00000200
-#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT				0x00000400
-#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF			0x00000800
-
-#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN				0x00002207
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK		0x00000007
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT		0
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK;
-}
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK			0x00000038
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT			3
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK;
-}
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK	0x000001c0
-#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT	6
-static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val)
-{
-	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK;
-}
-
-#define REG_A2XX_RB_MODECONTROL					0x00002208
-#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK			0x00000007
-#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT			0
-static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val)
-{
-	return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK;
-}
-
-#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL			0x00002209
-
-#define REG_A2XX_RB_SAMPLE_POS					0x0000220a
-
-#define REG_A2XX_CLEAR_COLOR					0x0000220b
-#define A2XX_CLEAR_COLOR_RED__MASK				0x000000ff
-#define A2XX_CLEAR_COLOR_RED__SHIFT				0
-static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val)
-{
-	return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK;
-}
-#define A2XX_CLEAR_COLOR_GREEN__MASK				0x0000ff00
-#define A2XX_CLEAR_COLOR_GREEN__SHIFT				8
-static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val)
-{
-	return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK;
-}
-#define A2XX_CLEAR_COLOR_BLUE__MASK				0x00ff0000
-#define A2XX_CLEAR_COLOR_BLUE__SHIFT				16
-static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val)
-{
-	return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK;
-}
-#define A2XX_CLEAR_COLOR_ALPHA__MASK				0xff000000
-#define A2XX_CLEAR_COLOR_ALPHA__SHIFT				24
-static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val)
-{
-	return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK;
-}
-
-#define REG_A2XX_A220_GRAS_CONTROL				0x00002210
-
-#define REG_A2XX_PA_SU_POINT_SIZE				0x00002280
-#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK			0x0000ffff
-#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT			0
-static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
-}
-#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK			0xffff0000
-#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT			16
-static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
-}
-
-#define REG_A2XX_PA_SU_POINT_MINMAX				0x00002281
-#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
-#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT			0
-static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
-}
-#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK			0xffff0000
-#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT			16
-static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
-}
-
-#define REG_A2XX_PA_SU_LINE_CNTL				0x00002282
-#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK			0x0000ffff
-#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT			0
-static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
-}
-
-#define REG_A2XX_PA_SC_LINE_STIPPLE				0x00002283
-#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK		0x0000ffff
-#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT		0
-static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK;
-}
-#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK		0x00ff0000
-#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT		16
-static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK;
-}
-#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK		0x10000000
-#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT	28
-static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val)
-{
-	return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK;
-}
-#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK		0x60000000
-#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT		29
-static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val)
-{
-	return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK;
-}
-
-#define REG_A2XX_PA_SC_VIZ_QUERY				0x00002293
-#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA			0x00000001
-#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK			0x0000007e
-#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT		1
-static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK;
-}
-#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z		0x00000100
-
-#define REG_A2XX_VGT_ENHANCE					0x00002294
-
-#define REG_A2XX_PA_SC_LINE_CNTL				0x00002300
-#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK			0x0000ffff
-#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT			0
-static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK;
-}
-#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL			0x00000100
-#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH			0x00000200
-#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL				0x00000400
-
-#define REG_A2XX_PA_SC_AA_CONFIG				0x00002301
-#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK		0x00000007
-#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT		0
-static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK;
-}
-#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK		0x0001e000
-#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT		13
-static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val)
-{
-	return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK;
-}
-
-#define REG_A2XX_PA_SU_VTX_CNTL					0x00002302
-#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK			0x00000001
-#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT			0
-static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val)
-{
-	return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK;
-}
-#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK			0x00000006
-#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT			1
-static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val)
-{
-	return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK;
-}
-#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK			0x00000380
-#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT			7
-static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val)
-{
-	return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK;
-}
-
-#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ				0x00002303
-#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK			0xffffffff
-#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT			0
-static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK;
-}
-
-#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ				0x00002304
-#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK			0xffffffff
-#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT			0
-static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK;
-}
-
-#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ				0x00002305
-#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK			0xffffffff
-#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT			0
-static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK;
-}
-
-#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ				0x00002306
-#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK			0xffffffff
-#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT			0
-static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val)
-{
-	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK;
-}
-
-#define REG_A2XX_SQ_VS_CONST					0x00002307
-#define A2XX_SQ_VS_CONST_BASE__MASK				0x000001ff
-#define A2XX_SQ_VS_CONST_BASE__SHIFT				0
-static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK;
-}
-#define A2XX_SQ_VS_CONST_SIZE__MASK				0x001ff000
-#define A2XX_SQ_VS_CONST_SIZE__SHIFT				12
-static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK;
-}
-
-#define REG_A2XX_SQ_PS_CONST					0x00002308
-#define A2XX_SQ_PS_CONST_BASE__MASK				0x000001ff
-#define A2XX_SQ_PS_CONST_BASE__SHIFT				0
-static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK;
-}
-#define A2XX_SQ_PS_CONST_SIZE__MASK				0x001ff000
-#define A2XX_SQ_PS_CONST_SIZE__SHIFT				12
-static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val)
-{
-	return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK;
-}
-
-#define REG_A2XX_SQ_DEBUG_MISC_0				0x00002309
-
-#define REG_A2XX_SQ_DEBUG_MISC_1				0x0000230a
-
-#define REG_A2XX_PA_SC_AA_MASK					0x00002312
-
-#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL			0x00002316
-#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK	0x00000007
-#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT	0
-static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val)
-{
-	return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK;
-}
-
-#define REG_A2XX_VGT_OUT_DEALLOC_CNTL				0x00002317
-#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK		0x00000003
-#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT		0
-static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val)
-{
-	return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK;
-}
-
-#define REG_A2XX_RB_COPY_CONTROL				0x00002318
-#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK		0x00000007
-#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT		0
-static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val)
-{
-	return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK;
-}
-#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE			0x00000008
-#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK			0x000000f0
-#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT			4
-static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val)
-{
-	return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK;
-}
-
-#define REG_A2XX_RB_COPY_DEST_BASE				0x00002319
-
-#define REG_A2XX_RB_COPY_DEST_PITCH				0x0000231a
-#define A2XX_RB_COPY_DEST_PITCH__MASK				0xffffffff
-#define A2XX_RB_COPY_DEST_PITCH__SHIFT				0
-static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK;
-}
-
-#define REG_A2XX_RB_COPY_DEST_INFO				0x0000231b
-#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK		0x00000007
-#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT		0
-static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK;
-}
-#define A2XX_RB_COPY_DEST_INFO_LINEAR				0x00000008
-#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000f0
-#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			4
-static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK;
-}
-#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
-#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
-static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK;
-}
-#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
-#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
-static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
-}
-#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK		0x00003000
-#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT		12
-static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK;
-}
-#define A2XX_RB_COPY_DEST_INFO_WRITE_RED			0x00004000
-#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN			0x00008000
-#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE			0x00010000
-#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA			0x00020000
-
-#define REG_A2XX_RB_COPY_DEST_OFFSET				0x0000231c
-#define A2XX_RB_COPY_DEST_OFFSET_X__MASK			0x00001fff
-#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT			0
-static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK;
-}
-#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK			0x03ffe000
-#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT			13
-static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK;
-}
-
-#define REG_A2XX_RB_DEPTH_CLEAR					0x0000231d
-
-#define REG_A2XX_RB_SAMPLE_COUNT_CTL				0x00002324
-
-#define REG_A2XX_RB_COLOR_DEST_MASK				0x00002326
-
-#define REG_A2XX_A225_GRAS_UCP0X				0x00002340
-
-#define REG_A2XX_A225_GRAS_UCP5W				0x00002357
-
-#define REG_A2XX_A225_GRAS_UCP_ENABLED				0x00002360
-
-#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE			0x00002380
-
-#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET			0x00002383
-
-#define REG_A2XX_SQ_CONSTANT_0					0x00004000
-
-#define REG_A2XX_SQ_FETCH_0					0x00004800
-
-#define REG_A2XX_SQ_CF_BOOLEANS					0x00004900
-
-#define REG_A2XX_SQ_CF_LOOP					0x00004908
-
-#define REG_A2XX_COHER_SIZE_PM4					0x00000a29
-
-#define REG_A2XX_COHER_BASE_PM4					0x00000a2a
-
-#define REG_A2XX_COHER_STATUS_PM4				0x00000a2b
-
-#define REG_A2XX_SQ_TEX_0					0x00000000
-#define A2XX_SQ_TEX_0_CLAMP_X__MASK				0x00001c00
-#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT				10
-static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val)
-{
-	return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK;
-}
-#define A2XX_SQ_TEX_0_CLAMP_Y__MASK				0x0000e000
-#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT				13
-static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val)
-{
-	return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK;
-}
-#define A2XX_SQ_TEX_0_CLAMP_Z__MASK				0x00070000
-#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT				16
-static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val)
-{
-	return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK;
-}
-#define A2XX_SQ_TEX_0_PITCH__MASK				0xffc00000
-#define A2XX_SQ_TEX_0_PITCH__SHIFT				22
-static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK;
-}
-
-#define REG_A2XX_SQ_TEX_1					0x00000001
-
-#define REG_A2XX_SQ_TEX_2					0x00000002
-#define A2XX_SQ_TEX_2_WIDTH__MASK				0x00001fff
-#define A2XX_SQ_TEX_2_WIDTH__SHIFT				0
-static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val)
-{
-	return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK;
-}
-#define A2XX_SQ_TEX_2_HEIGHT__MASK				0x03ffe000
-#define A2XX_SQ_TEX_2_HEIGHT__SHIFT				13
-static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val)
-{
-	return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK;
-}
-
-#define REG_A2XX_SQ_TEX_3					0x00000003
-#define A2XX_SQ_TEX_3_SWIZ_X__MASK				0x0000000e
-#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT				1
-static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val)
-{
-	return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK;
-}
-#define A2XX_SQ_TEX_3_SWIZ_Y__MASK				0x00000070
-#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT				4
-static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val)
-{
-	return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK;
-}
-#define A2XX_SQ_TEX_3_SWIZ_Z__MASK				0x00000380
-#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT				7
-static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val)
-{
-	return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK;
-}
-#define A2XX_SQ_TEX_3_SWIZ_W__MASK				0x00001c00
-#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT				10
-static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val)
-{
-	return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK;
-}
-#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK			0x00180000
-#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT			19
-static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val)
-{
-	return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK;
-}
-#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK			0x00600000
-#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT			21
-static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val)
-{
-	return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK;
-}
-
-
-#endif /* A2XX_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c	2019-03-31 23:16:37.000000000 +0000
@@ -96,17 +96,17 @@
 	}
 }
 
-static void print_export_comment(uint32_t num, enum shader_t type)
+static void print_export_comment(uint32_t num, gl_shader_stage type)
 {
 	const char *name = NULL;
 	switch (type) {
-	case SHADER_VERTEX:
+	case MESA_SHADER_VERTEX:
 		switch (num) {
 		case 62: name = "gl_Position";  break;
 		case 63: name = "gl_PointSize"; break;
 		}
 		break;
-	case SHADER_FRAGMENT:
+	case MESA_SHADER_FRAGMENT:
 		switch (num) {
 		case 0:  name = "gl_FragColor"; break;
 		}
@@ -212,7 +212,7 @@
 };
 
 static int disasm_alu(uint32_t *dwords, uint32_t alu_off,
-		int level, int sync, enum shader_t type)
+		int level, int sync, gl_shader_stage type)
 {
 	instr_alu_t *alu = (instr_alu_t *)dwords;
 
@@ -592,7 +592,7 @@
  *   2) ALU and FETCH instructions
  */
 
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type)
 {
 	instr_cf_t *cfs = (instr_cf_t *)dwords;
 	int idx, max_idx;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1119 +0,0 @@
-/*
- * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_dump.h"
-
-#include "fd2_compiler.h"
-#include "fd2_program.h"
-#include "fd2_util.h"
-
-#include "instr-a2xx.h"
-#include "ir-a2xx.h"
-
-struct fd2_compile_context {
-	struct fd_program_stateobj *prog;
-	struct fd2_shader_stateobj *so;
-
-	struct tgsi_parse_context parser;
-	unsigned type;
-
-	/* predicate stack: */
-	int pred_depth;
-	enum ir2_pred pred_stack[8];
-
-	/* Internal-Temporary and Predicate register assignment:
-	 *
-	 * Some TGSI instructions which translate into multiple actual
-	 * instructions need one or more temporary registers, which are not
-	 * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
-	 * And some instructions (texture fetch) cannot write directly to
-	 * output registers.  We could be more clever and re-use dst or a
-	 * src register in some cases.  But for now don't try to be clever.
-	 * Eventually we should implement an optimization pass that re-
-	 * juggles the register usage and gets rid of unneeded temporaries.
-	 *
-	 * The predicate register must be valid across multiple TGSI
-	 * instructions, but internal temporary's do not.  For this reason,
-	 * once the predicate register is requested, until it is no longer
-	 * needed, it gets the first register slot after after the TGSI
-	 * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
-	 * internal temporaries get the register slots above this.
-	 */
-
-	int pred_reg;
-	int num_internal_temps;
-
-	uint8_t num_regs[TGSI_FILE_COUNT];
-
-	/* maps input register idx to prog->export_linkage idx: */
-	uint8_t input_export_idx[64];
-
-	/* maps output register idx to prog->export_linkage idx: */
-	uint8_t output_export_idx[64];
-
-	/* idx/slot for last compiler generated immediate */
-	unsigned immediate_idx;
-
-	// TODO we can skip emit exports in the VS that the FS doesn't need..
-	// and get rid perhaps of num_param..
-	unsigned num_position, num_param;
-	unsigned position, psize;
-
-	uint64_t need_sync;
-};
-
-static int
-semantic_idx(struct tgsi_declaration_semantic *semantic)
-{
-	int idx = semantic->Name;
-	if (idx == TGSI_SEMANTIC_GENERIC)
-		idx = TGSI_SEMANTIC_COUNT + semantic->Index;
-	return idx;
-}
-
-/* assign/get the input/export register # for given semantic idx as
- * returned by semantic_idx():
- */
-static int
-export_linkage(struct fd2_compile_context *ctx, int idx)
-{
-	struct fd_program_stateobj *prog = ctx->prog;
-
-	/* if first time we've seen this export, assign the next available slot: */
-	if (prog->export_linkage[idx] == 0xff)
-		prog->export_linkage[idx] = prog->num_exports++;
-
-	return prog->export_linkage[idx];
-}
-
-static unsigned
-compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
-		struct fd2_shader_stateobj *so)
-{
-	unsigned ret;
-
-	ctx->prog = prog;
-	ctx->so = so;
-	ctx->pred_depth = 0;
-
-	ret = tgsi_parse_init(&ctx->parser, so->tokens);
-	if (ret != TGSI_PARSE_OK)
-		return ret;
-
-	ctx->type = ctx->parser.FullHeader.Processor.Processor;
-	ctx->position = ~0;
-	ctx->psize = ~0;
-	ctx->num_position = 0;
-	ctx->num_param = 0;
-	ctx->need_sync = 0;
-	ctx->immediate_idx = 0;
-	ctx->pred_reg = -1;
-	ctx->num_internal_temps = 0;
-
-	memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
-	memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
-	memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
-
-	/* do first pass to extract declarations: */
-	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
-		tgsi_parse_token(&ctx->parser);
-
-		switch (ctx->parser.FullToken.Token.Type) {
-		case TGSI_TOKEN_TYPE_DECLARATION: {
-			struct tgsi_full_declaration *decl =
-					&ctx->parser.FullToken.FullDeclaration;
-			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
-				unsigned name = decl->Semantic.Name;
-
-				assert(decl->Declaration.Semantic);  // TODO is this ever not true?
-
-				ctx->output_export_idx[decl->Range.First] =
-						semantic_idx(&decl->Semantic);
-
-				if (ctx->type == PIPE_SHADER_VERTEX) {
-					switch (name) {
-					case TGSI_SEMANTIC_POSITION:
-						ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
-						ctx->num_position++;
-						break;
-					case TGSI_SEMANTIC_PSIZE:
-						ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
-						ctx->num_position++;
-						break;
-					case TGSI_SEMANTIC_COLOR:
-					case TGSI_SEMANTIC_GENERIC:
-						ctx->num_param++;
-						break;
-					default:
-						DBG("unknown VS semantic name: %s",
-								tgsi_semantic_names[name]);
-						assert(0);
-					}
-				} else {
-					switch (name) {
-					case TGSI_SEMANTIC_COLOR:
-					case TGSI_SEMANTIC_GENERIC:
-						ctx->num_param++;
-						break;
-					default:
-						DBG("unknown PS semantic name: %s",
-								tgsi_semantic_names[name]);
-						assert(0);
-					}
-				}
-			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-				ctx->input_export_idx[decl->Range.First] =
-						semantic_idx(&decl->Semantic);
-			}
-			ctx->num_regs[decl->Declaration.File] =
-					MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
-			break;
-		}
-		case TGSI_TOKEN_TYPE_IMMEDIATE: {
-			struct tgsi_full_immediate *imm =
-					&ctx->parser.FullToken.FullImmediate;
-			unsigned n = ctx->so->num_immediates++;
-			memcpy(ctx->so->immediates[n].val, imm->u, 16);
-			break;
-		}
-		default:
-			break;
-		}
-	}
-
-	/* TGSI generated immediates are always entire vec4's, ones we
-	 * generate internally are not:
-	 */
-	ctx->immediate_idx = ctx->so->num_immediates * 4;
-
-	ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
-
-	tgsi_parse_free(&ctx->parser);
-
-	return tgsi_parse_init(&ctx->parser, so->tokens);
-}
-
-static void
-compile_free(struct fd2_compile_context *ctx)
-{
-	tgsi_parse_free(&ctx->parser);
-}
-
-static void
-compile_vtx_fetch(struct fd2_compile_context *ctx)
-{
-	struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
-	int i;
-	for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
-		struct ir2_instruction *instr = ir2_instr_create(
-				ctx->so->ir, IR2_FETCH);
-		instr->fetch.opc = VTX_FETCH;
-
-		ctx->need_sync |= 1 << (i+1);
-
-		ir2_dst_create(instr, i+1, "xyzw", 0);
-		ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
-
-		if (i == 0)
-			instr->sync = true;
-
-		vfetch_instrs[i] = instr;
-	}
-	ctx->so->num_vfetch_instrs = i;
-}
-
-/*
- * For vertex shaders (VS):
- * --- ------ -------------
- *
- *   Inputs:     R1-R(num_input)
- *   Constants:  C0-C(num_const-1)
- *   Immediates: C(num_const)-C(num_const+num_imm-1)
- *   Outputs:    export0-export(n) and export62, export63
- *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
- *   Temps:      R(num_input+1)-R(num_input+num_temps)
- *
- * R0 could be clobbered after the vertex fetch instructions.. so we
- * could use it for one of the temporaries.
- *
- * TODO: maybe the vertex fetch part could fetch first input into R0 as
- * the last vtx fetch instruction, which would let us use the same
- * register layout in either case.. although this is not what the blob
- * compiler does.
- *
- *
- * For frag shaders (PS):
- * --- ---- -------------
- *
- *   Inputs:     R0-R(num_input-1)
- *   Constants:  same as VS
- *   Immediates: same as VS
- *   Outputs:    export0-export(num_outputs)
- *   Temps:      R(num_input)-R(num_input+num_temps-1)
- *
- * In either case, immediates are are postpended to the constants
- * (uniforms).
- *
- */
-
-static unsigned
-get_temp_gpr(struct fd2_compile_context *ctx, int idx)
-{
-	unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
-	if (ctx->type == PIPE_SHADER_VERTEX)
-		num++;
-	return num;
-}
-
-static struct ir2_dst_register *
-add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
-		const struct tgsi_dst_register *dst)
-{
-	unsigned flags = 0, num = 0;
-	char swiz[5];
-
-	switch (dst->File) {
-	case TGSI_FILE_OUTPUT:
-		flags |= IR2_REG_EXPORT;
-		if (ctx->type == PIPE_SHADER_VERTEX) {
-			if (dst->Index == ctx->position) {
-				num = 62;
-			} else if (dst->Index == ctx->psize) {
-				num = 63;
-			} else {
-				num = export_linkage(ctx,
-						ctx->output_export_idx[dst->Index]);
-			}
-		} else {
-			num = dst->Index;
-		}
-		break;
-	case TGSI_FILE_TEMPORARY:
-		num = get_temp_gpr(ctx, dst->Index);
-		break;
-	default:
-		DBG("unsupported dst register file: %s",
-			tgsi_file_name(dst->File));
-		assert(0);
-		break;
-	}
-
-	swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
-	swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
-	swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
-	swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
-	swiz[4] = '\0';
-
-	return ir2_dst_create(alu, num, swiz, flags);
-}
-
-static struct ir2_src_register *
-add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
-		const struct tgsi_src_register *src)
-{
-	static const char swiz_vals[] = {
-			'x', 'y', 'z', 'w',
-	};
-	char swiz[5];
-	unsigned flags = 0, num = 0;
-
-	switch (src->File) {
-	case TGSI_FILE_CONSTANT:
-		num = src->Index;
-		flags |= IR2_REG_CONST;
-		break;
-	case TGSI_FILE_INPUT:
-		if (ctx->type == PIPE_SHADER_VERTEX) {
-			num = src->Index + 1;
-		} else {
-			flags |= IR2_REG_INPUT;
-			num = export_linkage(ctx,
-					ctx->input_export_idx[src->Index]);
-		}
-		break;
-	case TGSI_FILE_TEMPORARY:
-		num = get_temp_gpr(ctx, src->Index);
-		break;
-	case TGSI_FILE_IMMEDIATE:
-		num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
-		flags |= IR2_REG_CONST;
-		break;
-	default:
-		DBG("unsupported src register file: %s",
-			tgsi_file_name(src->File));
-		assert(0);
-		break;
-	}
-
-	if (src->Absolute)
-		flags |= IR2_REG_ABS;
-	if (src->Negate)
-		flags |= IR2_REG_NEGATE;
-
-	swiz[0] = swiz_vals[src->SwizzleX];
-	swiz[1] = swiz_vals[src->SwizzleY];
-	swiz[2] = swiz_vals[src->SwizzleZ];
-	swiz[3] = swiz_vals[src->SwizzleW];
-	swiz[4] = '\0';
-
-	if ((ctx->need_sync & ((uint64_t)1 << num)) &&
-			!(flags & IR2_REG_CONST)) {
-		alu->sync = true;
-		ctx->need_sync &= ~((uint64_t)1 << num);
-	}
-
-	return ir2_reg_create(alu, num, swiz, flags);
-}
-
-static void
-add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	if (inst->Instruction.Saturate) {
-		alu->alu_vector.clamp = true;
-	}
-}
-
-static void
-add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	if (inst->Instruction.Saturate) {
-		alu->alu_scalar.clamp = true;
-	}
-}
-
-static void
-add_regs_vector_1(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	assert(inst->Instruction.NumSrcRegs == 1);
-	assert(inst->Instruction.NumDstRegs == 1);
-
-	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-	add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_vector_2(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	assert(inst->Instruction.NumSrcRegs == 2);
-	assert(inst->Instruction.NumDstRegs == 1);
-
-	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[1].Register);
-	add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_vector_3(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	assert(inst->Instruction.NumSrcRegs == 3);
-	assert(inst->Instruction.NumDstRegs == 1);
-
-	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[1].Register);
-	add_src_reg(ctx, alu, &inst->Src[2].Register);
-	add_vector_clamp(inst, alu);
-}
-
-static void
-add_regs_scalar_1(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
-{
-	assert(inst->Instruction.NumSrcRegs == 1);
-	assert(inst->Instruction.NumDstRegs == 1);
-
-	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-	add_scalar_clamp(inst, alu);
-}
-
-/*
- * Helpers for TGSI instructions that don't map to a single shader instr:
- */
-
-static void
-src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
-{
-	src->File      = dst->File;
-	src->Indirect  = dst->Indirect;
-	src->Dimension = dst->Dimension;
-	src->Index     = dst->Index;
-	src->Absolute  = 0;
-	src->Negate    = 0;
-	src->SwizzleX  = TGSI_SWIZZLE_X;
-	src->SwizzleY  = TGSI_SWIZZLE_Y;
-	src->SwizzleZ  = TGSI_SWIZZLE_Z;
-	src->SwizzleW  = TGSI_SWIZZLE_W;
-}
-
-/* Get internal-temp src/dst to use for a sequence of instructions
- * generated by a single TGSI op.
- */
-static void
-get_internal_temp(struct fd2_compile_context *ctx,
-		struct tgsi_dst_register *tmp_dst,
-		struct tgsi_src_register *tmp_src)
-{
-	int n;
-
-	tmp_dst->File      = TGSI_FILE_TEMPORARY;
-	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
-	tmp_dst->Indirect  = 0;
-	tmp_dst->Dimension = 0;
-
-	/* assign next temporary: */
-	n = ctx->num_internal_temps++;
-	if (ctx->pred_reg != -1)
-		n++;
-
-	tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
-
-	src_from_dst(tmp_src, tmp_dst);
-}
-
-static void
-get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
-		struct tgsi_src_register *src)
-{
-	assert(ctx->pred_reg != -1);
-
-	dst->File      = TGSI_FILE_TEMPORARY;
-	dst->WriteMask = TGSI_WRITEMASK_W;
-	dst->Indirect  = 0;
-	dst->Dimension = 0;
-	dst->Index     = get_temp_gpr(ctx, ctx->pred_reg);
-
-	if (src) {
-		src_from_dst(src, dst);
-		src->SwizzleX  = TGSI_SWIZZLE_W;
-		src->SwizzleY  = TGSI_SWIZZLE_W;
-		src->SwizzleZ  = TGSI_SWIZZLE_W;
-		src->SwizzleW  = TGSI_SWIZZLE_W;
-	}
-}
-
-static void
-push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
-{
-	struct ir2_instruction *alu;
-	struct tgsi_dst_register pred_dst;
-
-	if (ctx->pred_depth == 0) {
-		/* assign predicate register: */
-		ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
-
-		get_predicate(ctx, &pred_dst, NULL);
-
-		alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs);
-		add_dst_reg(ctx, alu, &pred_dst);
-		add_src_reg(ctx, alu, src);
-	} else {
-		struct tgsi_src_register pred_src;
-
-		get_predicate(ctx, &pred_dst, &pred_src);
-
-		alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-		add_dst_reg(ctx, alu, &pred_dst);
-		add_src_reg(ctx, alu, &pred_src);
-		add_src_reg(ctx, alu, src);
-
-		// XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
-		// sure src reg is valid if it was calculated with a predicate
-		// condition..
-		alu->pred = IR2_PRED_NONE;
-	}
-
-	/* save previous pred state to restore in pop_predicate(): */
-	ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
-}
-
-static void
-pop_predicate(struct fd2_compile_context *ctx)
-{
-	/* restore previous predicate state: */
-	ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
-
-	if (ctx->pred_depth != 0) {
-		struct ir2_instruction *alu;
-		struct tgsi_dst_register pred_dst;
-		struct tgsi_src_register pred_src;
-
-		get_predicate(ctx, &pred_dst, &pred_src);
-
-		alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs);
-		add_dst_reg(ctx, alu, &pred_dst);
-		add_src_reg(ctx, alu, &pred_src);
-		alu->pred = IR2_PRED_NONE;
-	} else {
-		/* predicate register no longer needed: */
-		ctx->pred_reg = -1;
-	}
-}
-
-static void
-get_immediate(struct fd2_compile_context *ctx,
-		struct tgsi_src_register *reg, uint32_t val)
-{
-	unsigned neg, swiz, idx, i;
-	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
-	static const unsigned swiz2tgsi[] = {
-			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
-	};
-
-	for (i = 0; i < ctx->immediate_idx; i++) {
-		swiz = i % 4;
-		idx  = i / 4;
-
-		if (ctx->so->immediates[idx].val[swiz] == val) {
-			neg = 0;
-			break;
-		}
-
-		if (ctx->so->immediates[idx].val[swiz] == -val) {
-			neg = 1;
-			break;
-		}
-	}
-
-	if (i == ctx->immediate_idx) {
-		/* need to generate a new immediate: */
-		swiz = i % 4;
-		idx  = i / 4;
-		neg  = 0;
-		ctx->so->immediates[idx].val[swiz] = val;
-		ctx->so->num_immediates = idx + 1;
-		ctx->immediate_idx++;
-	}
-
-	reg->File      = TGSI_FILE_IMMEDIATE;
-	reg->Indirect  = 0;
-	reg->Dimension = 0;
-	reg->Index     = idx;
-	reg->Absolute  = 0;
-	reg->Negate    = neg;
-	reg->SwizzleX  = swiz2tgsi[swiz];
-	reg->SwizzleY  = swiz2tgsi[swiz];
-	reg->SwizzleZ  = swiz2tgsi[swiz];
-	reg->SwizzleW  = swiz2tgsi[swiz];
-}
-
-/* POW(a,b) = EXP2(b * LOG2(a)) */
-static void
-translate_pow(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register tmp_src;
-	struct ir2_instruction *alu;
-
-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
-	alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP);
-	add_dst_reg(ctx, alu, &tmp_dst);
-	add_src_reg(ctx, alu, &inst->Src[0].Register);
-
-	alu = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-	add_dst_reg(ctx, alu, &tmp_dst);
-	add_src_reg(ctx, alu, &tmp_src);
-	add_src_reg(ctx, alu, &inst->Src[1].Register);
-
-	/* NOTE: some of the instructions, like EXP_IEEE, seem hard-
-	 * coded to take their input from the w component.
-	 */
-	switch(inst->Dst[0].Register.WriteMask) {
-	case TGSI_WRITEMASK_X:
-		tmp_src.SwizzleW = TGSI_SWIZZLE_X;
-		break;
-	case TGSI_WRITEMASK_Y:
-		tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
-		break;
-	case TGSI_WRITEMASK_Z:
-		tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
-		break;
-	case TGSI_WRITEMASK_W:
-		tmp_src.SwizzleW = TGSI_SWIZZLE_W;
-		break;
-	default:
-		DBG("invalid writemask!");
-		assert(0);
-		break;
-	}
-
-	alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
-	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
-	add_src_reg(ctx, alu, &tmp_src);
-	add_scalar_clamp(inst, alu);
-}
-
-static void
-translate_tex(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, unsigned opc)
-{
-	struct ir2_instruction *instr;
-	struct ir2_src_register *reg;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register tmp_src;
-	const struct tgsi_src_register *coord;
-	bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
-			inst->Instruction.Saturate;
-	int idx;
-
-	if (using_temp || (opc == TGSI_OPCODE_TXP))
-		get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
-	if (opc == TGSI_OPCODE_TXP) {
-		static const char *swiz[] = {
-				[TGSI_SWIZZLE_X] = "xxxx",
-				[TGSI_SWIZZLE_Y] = "yyyy",
-				[TGSI_SWIZZLE_Z] = "zzzz",
-				[TGSI_SWIZZLE_W] = "wwww",
-		};
-
-		/* TXP - Projective Texture Lookup:
-		 *
-		 *  coord.x = src0.x / src.w
-		 *  coord.y = src0.y / src.w
-		 *  coord.z = src0.z / src.w
-		 *  coord.w = src0.w
-		 *  bias = 0.0
-		 *
-		 *  dst = texture_sample(unit, coord, bias)
-		 */
-
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
-		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
-		add_src_reg(ctx, instr, &inst->Src[0].Register);
-		add_src_reg(ctx, instr, &inst->Src[0].Register);
-
-		instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
-		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
-		memcpy(add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle,
-			   swiz[inst->Src[0].Register.SwizzleW], 4);
-
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
-		add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
-		add_src_reg(ctx, instr, &inst->Src[0].Register);
-
-		coord = &tmp_src;
-	} else {
-		coord = &inst->Src[0].Register;
-	}
-
-	instr = ir2_instr_create(ctx->so->ir, IR2_FETCH);
-	instr->fetch.opc = TEX_FETCH;
-	instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
-	instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT);
-	assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
-
-	/* save off the tex fetch to be patched later with correct const_idx: */
-	idx = ctx->so->num_tfetch_instrs++;
-	ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
-	ctx->so->tfetch_instrs[idx].instr = instr;
-
-	add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
-	reg = add_src_reg(ctx, instr, coord);
-
-	/* blob compiler always sets 3rd component to same as 1st for 2d: */
-	if (inst->Texture.Texture == TGSI_TEXTURE_2D || inst->Texture.Texture == TGSI_TEXTURE_RECT)
-		reg->swizzle[2] = reg->swizzle[0];
-
-	/* dst register needs to be marked for sync: */
-	ctx->need_sync |= 1 << instr->dst_reg.num;
-
-	/* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
-	instr->sync = true;
-
-	if (using_temp) {
-		/* texture fetch can't write directly to export, so if tgsi
-		 * is telling us the dst register is in output file, we load
-		 * the texture to a temp and the use ALU instruction to move
-		 * to output
-		 */
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
-
-		add_dst_reg(ctx, instr, &inst->Dst[0].Register);
-		add_src_reg(ctx, instr, &tmp_src);
-		add_src_reg(ctx, instr, &tmp_src);
-		add_vector_clamp(inst, instr);
-	}
-}
-
-/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
-/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
-/* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */
-/* SNE(a,b) = EQU((b - a), 0.0, 1.0) */
-static void
-translate_sge_slt_seq_sne(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst, unsigned opc)
-{
-	struct ir2_instruction *instr;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register tmp_src;
-	struct tgsi_src_register tmp_const;
-	float c0, c1;
-	instr_vector_opc_t vopc;
-
-	switch (opc) {
-	default:
-		assert(0);
-	case TGSI_OPCODE_SGE:
-		c0 = 1.0;
-		c1 = 0.0;
-		vopc = CNDGTEv;
-		break;
-	case TGSI_OPCODE_SLT:
-		c0 = 0.0;
-		c1 = 1.0;
-		vopc = CNDGTEv;
-		break;
-	case TGSI_OPCODE_SEQ:
-		c0 = 0.0;
-		c1 = 1.0;
-		vopc = CNDEv;
-		break;
-	case TGSI_OPCODE_SNE:
-		c0 = 1.0;
-		c1 = 0.0;
-		vopc = CNDEv;
-		break;
-	}
-
-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
-	instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
-	add_dst_reg(ctx, instr, &tmp_dst);
-	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
-	add_src_reg(ctx, instr, &inst->Src[1].Register);
-
-	instr = ir2_instr_create_alu_v(ctx->so->ir, vopc);
-	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
-	add_src_reg(ctx, instr, &tmp_src);
-	get_immediate(ctx, &tmp_const, fui(c1));
-	add_src_reg(ctx, instr, &tmp_const);
-	get_immediate(ctx, &tmp_const, fui(c0));
-	add_src_reg(ctx, instr, &tmp_const);
-}
-
-/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
-static void
-translate_lrp(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst,
-		unsigned opc)
-{
-	struct ir2_instruction *instr;
-	struct tgsi_dst_register tmp_dst1, tmp_dst2;
-	struct tgsi_src_register tmp_src1, tmp_src2;
-	struct tgsi_src_register tmp_const;
-
-	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
-	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
-
-	get_immediate(ctx, &tmp_const, fui(1.0));
-
-	/* tmp1 = (a * b) */
-	instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-	add_dst_reg(ctx, instr, &tmp_dst1);
-	add_src_reg(ctx, instr, &inst->Src[0].Register);
-	add_src_reg(ctx, instr, &inst->Src[1].Register);
-
-	/* tmp2 = (1 - a) */
-	instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
-	add_dst_reg(ctx, instr, &tmp_dst2);
-	add_src_reg(ctx, instr, &tmp_const);
-	add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
-
-	/* tmp2 = tmp2 * c */
-	instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-	add_dst_reg(ctx, instr, &tmp_dst2);
-	add_src_reg(ctx, instr, &tmp_src2);
-	add_src_reg(ctx, instr, &inst->Src[2].Register);
-
-	/* dst = tmp1 + tmp2 */
-	instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
-	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
-	add_src_reg(ctx, instr, &tmp_src1);
-	add_src_reg(ctx, instr, &tmp_src2);
-}
-
-static void
-translate_trig(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst,
-		unsigned opc)
-{
-	struct ir2_instruction *instr;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register tmp_src;
-	struct tgsi_src_register tmp_const;
-	instr_scalar_opc_t op;
-
-	switch (opc) {
-	default:
-		assert(0);
-	case TGSI_OPCODE_SIN:
-		op = SIN;
-		break;
-	case TGSI_OPCODE_COS:
-		op = COS;
-		break;
-	}
-
-	get_internal_temp(ctx, &tmp_dst, &tmp_src);
-
-	tmp_dst.WriteMask = TGSI_WRITEMASK_X;
-	tmp_src.SwizzleX = tmp_src.SwizzleY =
-			tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
-
-	instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
-	add_dst_reg(ctx, instr, &tmp_dst);
-	add_src_reg(ctx, instr, &inst->Src[0].Register);
-	get_immediate(ctx, &tmp_const, fui(0.159155));
-	add_src_reg(ctx, instr, &tmp_const);
-	get_immediate(ctx, &tmp_const, fui(0.5));
-	add_src_reg(ctx, instr, &tmp_const);
-
-	instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
-	add_dst_reg(ctx, instr, &tmp_dst);
-	add_src_reg(ctx, instr, &tmp_src);
-	add_src_reg(ctx, instr, &tmp_src);
-
-	instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
-	add_dst_reg(ctx, instr, &tmp_dst);
-	add_src_reg(ctx, instr, &tmp_src);
-	get_immediate(ctx, &tmp_const, fui(6.283185));
-	add_src_reg(ctx, instr, &tmp_const);
-	get_immediate(ctx, &tmp_const, fui(-3.141593));
-	add_src_reg(ctx, instr, &tmp_const);
-
-	instr = ir2_instr_create_alu_s(ctx->so->ir, op);
-	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
-	add_src_reg(ctx, instr, &tmp_src);
-}
-
-static void
-translate_dp2(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst,
-		unsigned opc)
-{
-	struct tgsi_src_register tmp_const;
-	struct ir2_instruction *instr;
-	/* DP2ADD c,a,b -> dot2(a,b) + c */
-	/* for c we use the constant 0.0 */
-	instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv);
-	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
-	add_src_reg(ctx, instr, &inst->Src[0].Register);
-	add_src_reg(ctx, instr, &inst->Src[1].Register);
-	get_immediate(ctx, &tmp_const, fui(0.0f));
-	add_src_reg(ctx, instr, &tmp_const);
-	add_vector_clamp(inst, instr);
-}
-
-/*
- * Main part of compiler/translator:
- */
-
-static void
-translate_instruction(struct fd2_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	unsigned opc = inst->Instruction.Opcode;
-	struct ir2_instruction *instr;
-
-	if (opc == TGSI_OPCODE_END)
-		return;
-
-	/* TODO turn this into a table: */
-	switch (opc) {
-	case TGSI_OPCODE_MOV:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
-		add_regs_vector_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_RCP:
-		instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE);
-		add_regs_scalar_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_RSQ:
-		instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE);
-		add_regs_scalar_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_SQRT:
-		instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE);
-		add_regs_scalar_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_MUL:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MULv);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_ADD:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_DP2:
-		translate_dp2(ctx, inst, opc);
-		break;
-	case TGSI_OPCODE_DP3:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_DP4:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_MIN:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MINv);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_MAX:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv);
-		add_regs_vector_2(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_SLT:
-	case TGSI_OPCODE_SGE:
-	case TGSI_OPCODE_SEQ:
-	case TGSI_OPCODE_SNE:
-		translate_sge_slt_seq_sne(ctx, inst, opc);
-		break;
-	case TGSI_OPCODE_MAD:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv);
-		add_regs_vector_3(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_LRP:
-		translate_lrp(ctx, inst, opc);
-		break;
-	case TGSI_OPCODE_FRC:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv);
-		add_regs_vector_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_FLR:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv);
-		add_regs_vector_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_EX2:
-		instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE);
-		add_regs_scalar_1(ctx, inst, instr);
-		break;
-	case TGSI_OPCODE_POW:
-		translate_pow(ctx, inst);
-		break;
-	case TGSI_OPCODE_COS:
-	case TGSI_OPCODE_SIN:
-		translate_trig(ctx, inst, opc);
-		break;
-	case TGSI_OPCODE_TEX:
-	case TGSI_OPCODE_TXP:
-		translate_tex(ctx, inst, opc);
-		break;
-	case TGSI_OPCODE_CMP:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv);
-		add_regs_vector_3(ctx, inst, instr);
-		instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */
-		break;
-	case TGSI_OPCODE_IF:
-		push_predicate(ctx, &inst->Src[0].Register);
-		ctx->so->ir->pred = IR2_PRED_EQ;
-		break;
-	case TGSI_OPCODE_ELSE:
-		ctx->so->ir->pred = IR2_PRED_NE;
-		break;
-	case TGSI_OPCODE_ENDIF:
-		pop_predicate(ctx);
-		break;
-	case TGSI_OPCODE_F2I:
-		instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv);
-		add_regs_vector_1(ctx, inst, instr);
-		break;
-	default:
-		DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
-		tgsi_dump(ctx->so->tokens, 0);
-		assert(0);
-		break;
-	}
-
-	/* internal temporaries are only valid for the duration of a single
-	 * TGSI instruction:
-	 */
-	ctx->num_internal_temps = 0;
-}
-
-static void
-compile_instructions(struct fd2_compile_context *ctx)
-{
-	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
-		tgsi_parse_token(&ctx->parser);
-
-		switch (ctx->parser.FullToken.Token.Type) {
-		case TGSI_TOKEN_TYPE_INSTRUCTION:
-			translate_instruction(ctx,
-					&ctx->parser.FullToken.FullInstruction);
-			break;
-		default:
-			break;
-		}
-	}
-}
-
-int
-fd2_compile_shader(struct fd_program_stateobj *prog,
-		struct fd2_shader_stateobj *so)
-{
-	struct fd2_compile_context ctx;
-
-	ir2_shader_destroy(so->ir);
-	so->ir = ir2_shader_create();
-	so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
-
-	if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
-		return -1;
-
-	if (ctx.type == PIPE_SHADER_VERTEX) {
-		compile_vtx_fetch(&ctx);
-	} else if (ctx.type == PIPE_SHADER_FRAGMENT) {
-		prog->num_exports = 0;
-		memset(prog->export_linkage, 0xff,
-				sizeof(prog->export_linkage));
-	}
-
-	compile_instructions(&ctx);
-
-	compile_free(&ctx);
-
-	return 0;
-}
-
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef FD2_COMPILER_H_
-#define FD2_COMPILER_H_
-
-#include "fd2_program.h"
-#include "fd2_util.h"
-
-int fd2_compile_shader(struct fd_program_stateobj *prog,
-		struct fd2_shader_stateobj *so);
-
-#endif /* FD2_COMPILER_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,7 @@
 #include "fd2_emit.h"
 #include "fd2_gmem.h"
 #include "fd2_program.h"
+#include "fd2_query.h"
 #include "fd2_rasterizer.h"
 #include "fd2_texture.h"
 #include "fd2_zsa.h"
@@ -46,17 +47,18 @@
 create_solid_vertexbuf(struct pipe_context *pctx)
 {
 	static const float init_shader_const[] = {
-			/* for clear/gmem2mem: */
-			-1.000000, +1.000000, +1.000000, +1.100000,
-			+1.000000, +1.000000, -1.000000, -1.100000,
-			+1.000000, +1.100000, -1.100000, +1.000000,
-			/* for mem2gmem: (vertices) */
-			-1.000000, +1.000000, +1.000000, +1.000000,
-			+1.000000, +1.000000, -1.000000, -1.000000,
-			+1.000000, +1.000000, -1.000000, +1.000000,
+			/* for clear/gmem2mem/mem2gmem (vertices): */
+			-1.000000, +1.000000, +1.000000,
+			+1.000000, +1.000000, +1.000000,
+			-1.000000, -1.000000, +1.000000,
 			/* for mem2gmem: (tex coords) */
-			+0.000000, +0.000000, +1.000000, +0.000000,
-			+0.000000, +1.000000, +1.000000, +1.000000,
+			+0.000000, +0.000000,
+			+1.000000, +0.000000,
+			+0.000000, +1.000000,
+			/* SCREEN_SCISSOR_BR value (must be at 60 byte offset in page) */
+			0.0,
+			/* zero indices dummy draw workaround (3 16-bit zeros) */
+			0.0, 0.0,
 	};
 	struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
 			PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
@@ -119,5 +121,7 @@
 	/* construct vertex state used for solid ops (clear, and gmem<->mem) */
 	fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
 
+	fd2_query_context_init(pctx);
+
 	return pctx;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -75,31 +75,43 @@
 	// CONST(20,0) (or CONST(26,0) in soliv_vp)
 
 	fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
+	fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
 }
 
-static bool
-fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
-             unsigned index_offset)
+static void
+draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
+		   struct fd_ringbuffer *ring, unsigned index_offset, bool binning)
 {
-	struct fd_ringbuffer *ring = ctx->batch->draw;
-
-	if (ctx->dirty & FD_DIRTY_VTXBUF)
-		emit_vertexbufs(ctx);
-
-	fd2_emit_state(ctx, ctx->dirty);
-
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
-	OUT_RING(ring, info->start);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-	OUT_RING(ring, 0x0000003b);
+	OUT_RING(ring, info->index_size ? 0 : info->start);
 
 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
 
-	if (!is_a20x(ctx->screen)) {
+	if (is_a20x(ctx->screen)) {
+		/* wait for DMA to finish and
+		 * dummy draw one triangle with indexes 0,0,0.
+		 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
+		 *
+		 * this workaround is for a HW bug related to DMA alignment:
+		 * it is necessary for indexed draws and possibly also
+		 * draws that read binning data
+		 */
+		OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+		OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
+		OUT_RING(ring, 0x00000000);
+		OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
+		OUT_RING(ring, 0x00000001);
+
+		OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
+		OUT_RING(ring, 0x00000000);
+		OUT_RING(ring, 0x0003c004);
+		OUT_RING(ring, 0x00000000);
+		OUT_RING(ring, 0x00000003);
+		OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 0);
+		OUT_RING(ring, 0x00000006);
+	} else {
 		OUT_WFI (ring);
 
 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
@@ -108,134 +120,132 @@
 		OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */
 	}
 
+	/* binning shader will take offset from C64 */
+	if (binning && is_a20x(ctx->screen)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+		OUT_RING(ring, 0x00000180);
+		OUT_RING(ring, fui(ctx->batch->num_vertices));
+		OUT_RING(ring, fui(0.0f));
+		OUT_RING(ring, fui(0.0f));
+		OUT_RING(ring, fui(0.0f));
+	}
+
+	enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
+	if (binning || info->mode == PIPE_PRIM_POINTS)
+		vismode = IGNORE_VISIBILITY;
+
 	fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode],
-				 IGNORE_VISIBILITY, info, index_offset);
+				 vismode, info, index_offset);
 
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
-	OUT_RING(ring, 0x00000000);
+	if (is_a20x(ctx->screen)) {
+		/* not sure why this is required, but it fixes some hangs */
+		OUT_WFI(ring);
+	} else {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
+		OUT_RING(ring, 0x00000000);
+	}
 
 	emit_cacheflush(ring);
+}
+
+
+static bool
+fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
+			 unsigned index_offset)
+{
+	if (!ctx->prog.fp || !ctx->prog.vp)
+		return false;
+
+	if (ctx->dirty & FD_DIRTY_VTXBUF)
+		emit_vertexbufs(ctx);
+
+	if (fd_binning_enabled)
+		fd2_emit_state_binning(ctx, ctx->dirty);
+
+	fd2_emit_state(ctx, ctx->dirty);
+
+	/* a2xx can draw only 65535 vertices at once
+	 * on a22x the field in the draw command is 32bits but seems limited too
+	 * using a limit of 32k because it fixes an unexplained hang
+	 * 32766 works for all primitives (multiple of 2 and 3)
+	 */
+	if (pinfo->count > 32766) {
+		static const uint16_t step_tbl[PIPE_PRIM_MAX] = {
+			[0 ... PIPE_PRIM_MAX - 1]  = 32766,
+			[PIPE_PRIM_LINE_STRIP]     = 32765,
+			[PIPE_PRIM_TRIANGLE_STRIP] = 32764,
+
+			/* needs more work */
+			[PIPE_PRIM_TRIANGLE_FAN]   = 0,
+			[PIPE_PRIM_LINE_LOOP]      = 0,
+		};
+
+		struct pipe_draw_info info = *pinfo;
+		unsigned count = info.count;
+		unsigned step = step_tbl[info.mode];
+		unsigned num_vertices = ctx->batch->num_vertices;
+
+		if (!step)
+			return false;
+
+		for (; count + step > 32766; count -= step) {
+			info.count = MIN2(count, 32766);
+			draw_impl(ctx, &info, ctx->batch->draw, index_offset, false);
+			draw_impl(ctx, &info, ctx->batch->binning, index_offset, true);
+			info.start += step;
+			ctx->batch->num_vertices += step;
+		}
+		/* changing this value is a hack, restore it */
+		ctx->batch->num_vertices = num_vertices;
+	} else {
+		draw_impl(ctx, pinfo, ctx->batch->draw, index_offset, false);
+		draw_impl(ctx, pinfo, ctx->batch->binning, index_offset, true);
+	}
 
 	fd_context_all_clean(ctx);
 
 	return true;
 }
 
-
-static bool
-fd2_clear(struct fd_context *ctx, unsigned buffers,
-		const union pipe_color_union *color, double depth, unsigned stencil)
+static void
+clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
+	unsigned buffers, bool fast_clear)
 {
+	struct fd_context *ctx = batch->ctx;
 	struct fd2_context *fd2_ctx = fd2_context(ctx);
-	struct fd_ringbuffer *ring = ctx->batch->draw;
-	struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
-	uint32_t reg, colr = 0;
-
-	if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
-		colr = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f);
-
-	/* emit generic state now: */
-	fd2_emit_state(ctx, ctx->dirty &
-			(FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT |
-					FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+	uint32_t reg;
 
 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
-			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 48 },
+			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
 		}, 1);
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
 	OUT_RING(ring, 0);
 
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-	OUT_RING(ring, 0x0000028f);
-
-	fd2_program_emit(ring, &ctx->solid_prog);
+	fd2_program_emit(ctx, ring, &ctx->solid_prog);
 
 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
 
-	if (is_a20x(ctx->screen)) {
-		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
-		OUT_RING(ring, 0x00000480);
-		OUT_RING(ring, color->ui[0]);
-		OUT_RING(ring, color->ui[1]);
-		OUT_RING(ring, color->ui[2]);
-		OUT_RING(ring, color->ui[3]);
-	} else {
-		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-		OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
-		OUT_RING(ring, colr);
-	}
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
-	OUT_RING(ring, 0x00000084);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
-	reg = 0;
-	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-		reg |= A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
-		switch (fd_pipe2depth(fb->zsbuf->format)) {
-		case DEPTHX_24_8:
-			if (buffers & PIPE_CLEAR_DEPTH)
-				reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xe);
-			if (buffers & PIPE_CLEAR_STENCIL)
-				reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0x1);
-			break;
-		case DEPTHX_16:
-			if (buffers & PIPE_CLEAR_DEPTH)
-				reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf);
-			break;
-		default:
-			debug_assert(0);
-			break;
-		}
-	}
-	OUT_RING(ring, reg);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
-	reg = 0;
 	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-		switch (fd_pipe2depth(fb->zsbuf->format)) {
-		case DEPTHX_24_8:
-			reg = (((uint32_t)(0xffffff * depth)) << 8) |
-				(stencil & 0xff);
-			break;
-		case DEPTHX_16:
-			reg = (uint32_t)(0xffffffff * depth);
-			break;
-		default:
-			debug_assert(0);
-			break;
-		}
-	}
-	OUT_RING(ring, reg);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
-	reg = 0;
-	if (buffers & PIPE_CLEAR_DEPTH) {
-		reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+		reg = 0;
+		if (buffers & PIPE_CLEAR_DEPTH) {
+			reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
 				A2XX_RB_DEPTHCONTROL_Z_ENABLE |
 				A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
 				A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+		}
+		if (buffers & PIPE_CLEAR_STENCIL) {
+			reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
+					A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+					A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+		}
+		OUT_RING(ring, reg);
 	}
-	if (buffers & PIPE_CLEAR_STENCIL) {
-		reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
-				A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
-				A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
-	}
-	OUT_RING(ring, reg);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
-	OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
-	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
-	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
@@ -250,18 +260,19 @@
 	OUT_RING(ring, 0x00000000);        /* PA_CL_CLIP_CNTL */
 	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
 			A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
-			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+			A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+			(fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
+
+	if (fast_clear) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+		OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
+	}
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
 	OUT_RING(ring, 0x0000ffff);
 
-	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
-	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
-	OUT_RING(ring, xy2d(0,0));	        /* PA_SC_WINDOW_SCISSOR_TL */
-	OUT_RING(ring, xy2d(fb->width,      /* PA_SC_WINDOW_SCISSOR_BR */
-			fb->height));
-
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
 	if (buffers & PIPE_CLEAR_COLOR) {
@@ -273,24 +284,325 @@
 		OUT_RING(ring, 0x0);
 	}
 
-	if (!is_a20x(ctx->screen)) {
-		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
-		OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
-		OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
-		OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
-	}
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+	OUT_RING(ring, 0);
 
-	fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
-			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+	if (is_a20x(batch->ctx->screen))
+		return;
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+	OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
+	OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+	OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
-	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000084);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000028f);
+}
+
+static void
+clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+	if (is_a20x(ctx->screen))
+		return;
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
 	OUT_RING(ring, 0x00000000);
 
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000003b);
+}
+
+static void
+clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
+	uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
+{
+	BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
+
+	/* zero values are patched in */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+	OUT_RINGP(ring, patch_type, &batch->gmem_patches);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+	OUT_RING(ring, 0x8000 | 32);
+	OUT_RING(ring, 0);
+	OUT_RING(ring, 0);
+
+	/* set fill values */
+	if (!is_a20x(batch->ctx->screen)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+		OUT_RING(ring, color_clear);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+		OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+			A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+		OUT_RING(ring, depth_clear);
+	} else {
+		const float sc = 1.0f / 255.0f;
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+		OUT_RING(ring, 0x00000480);
+		OUT_RING(ring, fui((float) (color_clear >>  0 & 0xff) * sc));
+		OUT_RING(ring, fui((float) (color_clear >>  8 & 0xff) * sc));
+		OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc));
+		OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc));
+
+		// XXX if using float the rounding error breaks it..
+		float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff);
+		assert((unsigned) (((double) depth * (double) 0xffffff)) ==
+			(depth_clear >> 8));
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+		OUT_RING(ring, fui(0.0f));
+		OUT_RING(ring, fui(depth));
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+		OUT_RING(ring, 0xff000000 |
+			A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
+			A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+		OUT_RING(ring, 0xff000000 |
+			A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
+			A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+	}
+
+	fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+
+static bool
+fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
+		const union pipe_color_union *color, double depth, unsigned stencil)
+{
+	/* using 4x MSAA allows clearing ~2x faster
+	 * then we can use higher bpp clearing to clear lower bpp
+	 * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
+	 * note: its possible to clear with 32_32_32_32 format but its not faster
+	 * note: fast clear doesn't work with sysmem rendering
+	 * (sysmem rendering is disabled when clear is used)
+	 *
+	 * we only have 16-bit / 32-bit color formats
+	 * and 16-bit / 32-bit depth formats
+	 * so there are only a few possible combinations
+	 *
+	 * if the bpp of the color/depth doesn't match
+	 * we clear with depth/color individually
+	 */
+	struct fd2_context *fd2_ctx = fd2_context(ctx);
+	struct fd_batch *batch = ctx->batch;
+	struct fd_ringbuffer *ring = batch->draw;
+	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+	uint32_t color_clear = 0, depth_clear = 0;
+	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+	int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
+	int color_size = -1;
+
+	/* TODO: need to test performance on a22x */
+	if (!is_a20x(ctx->screen))
+		return false;
+
+	if (buffers & PIPE_CLEAR_COLOR)
+		color_size = util_format_get_blocksizebits(format) == 32;
+
+	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
+		depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
+
+	assert(color_size >= 0 || depth_size >= 0);
+
+	/* when clearing 24_8, depth/stencil must be both cleared
+	 * TODO: if buffer isn't attached we can clear it anyway
+	 */
+	if (depth_size == 1 && !(buffers & PIPE_CLEAR_STENCIL) != !(buffers & PIPE_CLEAR_DEPTH))
+		return false;
+
+	if (color_size == 0) {
+		color_clear = pack_rgba(format, color->f);
+		color_clear = (color_clear << 16) | (color_clear & 0xffff);
+	} else if (color_size == 1) {
+		color_clear = pack_rgba(format, color->f);
+	}
+
+	if (depth_size == 0) {
+		depth_clear = (uint32_t)(0xffff * depth);
+		depth_clear |= depth_clear << 16;
+	} else if (depth_size == 1) {
+		depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
+		depth_clear |= (stencil & 0xff);
+	}
+
+	/* disable "window" scissor.. */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+	OUT_RING(ring, xy2d(0, 0));
+	OUT_RING(ring, xy2d(0x7fff, 0x7fff));
+
+	/* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+	OUT_RING(ring, fui(4096.0));
+	OUT_RING(ring, fui(4096.0));
+	OUT_RING(ring, fui(4096.0));
+	OUT_RING(ring, fui(4096.0));
+
+	clear_state(batch, ring, ~0u, true);
+
+	if (color_size >= 0 && depth_size != color_size)
+		clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR);
+
+	if (depth_size >= 0 && depth_size != color_size)
+		clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH);
+
+	if (depth_size == color_size)
+		clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
+
+	clear_state_restore(ctx, ring);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+	OUT_RING(ring, 0);
+
+	/* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
+	 * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
+	 * the value is read from byte offset 60 in the given bo
+	 */
+	OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
+	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
+	OUT_RING(ring, 1);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+	OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
+	OUT_RING(ring, 0);
+	OUT_RING(ring, 0);
+	return true;
+}
+
+static bool
+fd2_clear(struct fd_context *ctx, unsigned buffers,
+		const union pipe_color_union *color, double depth, unsigned stencil)
+{
+	struct fd_ringbuffer *ring = ctx->batch->draw;
+	struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
+
+	if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
+		goto dirty;
+
+	/* set clear value */
+	if (is_a20x(ctx->screen)) {
+		if (buffers & PIPE_CLEAR_COLOR) {
+			/* C0 used by fragment shader */
+			OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+			OUT_RING(ring, 0x00000480);
+			OUT_RING(ring, color->ui[0]);
+			OUT_RING(ring, color->ui[1]);
+			OUT_RING(ring, color->ui[2]);
+			OUT_RING(ring, color->ui[3]);
+		}
+
+		if (buffers & PIPE_CLEAR_DEPTH) {
+			/* use viewport to set depth value */
+			OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+			OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
+			OUT_RING(ring, fui(0.0f));
+			OUT_RING(ring, fui(depth));
+		}
+
+		if (buffers & PIPE_CLEAR_STENCIL) {
+			OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+			OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+			OUT_RING(ring, 0xff000000 |
+				A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
+				A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+			OUT_RING(ring, 0xff000000 |
+				A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+				A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+		}
+	} else {
+		if (buffers & PIPE_CLEAR_COLOR) {
+			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+			OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+			OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
+		}
+
+		if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+			uint32_t clear_mask, depth_clear;
+			if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+				switch (fd_pipe2depth(fb->zsbuf->format)) {
+				case DEPTHX_24_8:
+					clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |
+						((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);
+					depth_clear = (((uint32_t)(0xffffff * depth)) << 8) |
+						(stencil & 0xff);
+					break;
+				case DEPTHX_16:
+					clear_mask = 0xf;
+					depth_clear = (uint32_t)(0xffffffff * depth);
+					break;
+				default:
+					debug_assert(0);
+					break;
+				}
+			}
+
+			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+			OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+			OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
+				A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
+
+			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+			OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+			OUT_RING(ring, depth_clear);
+		}
+	}
+
+	/* scissor state */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+	OUT_RING(ring, xy2d(0, 0));
+	OUT_RING(ring, xy2d(fb->width, fb->height));
+
+	/* viewport state */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+	OUT_RING(ring, fui((float) fb->width / 2.0));
+	OUT_RING(ring, fui((float) fb->width / 2.0));
+	OUT_RING(ring, fui((float) fb->height / 2.0));
+	OUT_RING(ring, fui((float) fb->height / 2.0));
+
+	/* common state */
+	clear_state(ctx->batch, ring, buffers, false);
+
+	fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+			DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+
+	clear_state_restore(ctx, ring);
+
+dirty:
 	ctx->dirty |= FD_DIRTY_ZSA |
 			FD_DIRTY_VIEWPORT |
 			FD_DIRTY_RASTERIZER |
@@ -298,7 +610,8 @@
 			FD_DIRTY_PROG |
 			FD_DIRTY_CONST |
 			FD_DIRTY_BLEND |
-			FD_DIRTY_FRAMEBUFFER;
+			FD_DIRTY_FRAMEBUFFER |
+			FD_DIRTY_SCISSOR;
 
 	ctx->dirty_shader[PIPE_SHADER_VERTEX]   |= FD_DIRTY_SHADER_PROG;
 	ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.h	2019-03-31 23:16:37.000000000 +0000
@@ -33,4 +33,11 @@
 
 void fd2_draw_init(struct pipe_context *pctx);
 
+enum {
+	GMEM_PATCH_FASTCLEAR_COLOR,
+	GMEM_PATCH_FASTCLEAR_DEPTH,
+	GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
+	GMEM_PATCH_RESTORE_INFO,
+};
+
 #endif /* FD2_DRAW_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -118,6 +118,7 @@
 	static const struct fd2_pipe_sampler_view dummy_view = {};
 	const struct fd2_sampler_stateobj *sampler;
 	const struct fd2_pipe_sampler_view *view;
+	struct fd_resource *rsc;
 
 	if (emitted & (1 << const_idx))
 		return 0;
@@ -129,19 +130,25 @@
 			fd2_pipe_sampler_view(tex->textures[samp_id]) :
 			&dummy_view;
 
+	rsc = view->base.texture ? fd_resource(view->base.texture) : NULL;
+
 	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
 	OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
 
 	OUT_RING(ring, sampler->tex0 | view->tex0);
-	if (view->base.texture)
-		OUT_RELOC(ring, fd_resource(view->base.texture)->bo, 0, view->fmt, 0);
+	if (rsc)
+		OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0);
 	else
 		OUT_RING(ring, 0);
 
 	OUT_RING(ring, view->tex2);
 	OUT_RING(ring, sampler->tex3 | view->tex3);
-	OUT_RING(ring, sampler->tex4);
-	OUT_RING(ring, sampler->tex5);
+	OUT_RING(ring, sampler->tex4 | view->tex4);
+
+	if (rsc && rsc->base.last_level)
+		OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0);
+	else
+		OUT_RING(ring, view->tex5);
 
 	return (1 << const_idx);
 }
@@ -179,10 +186,63 @@
 }
 
 void
+fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
+{
+	struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
+	struct fd_ringbuffer *ring = ctx->batch->binning;
+
+	/* subset of fd2_emit_state needed for hw binning on a20x */
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE))
+		fd2_program_emit(ctx, ring, &ctx->prog);
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
+		emit_constants(ring,  VS_CONST_BASE * 4,
+				&ctx->constbuf[PIPE_SHADER_VERTEX],
+				(dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
+	}
+
+	if (dirty & FD_DIRTY_VIEWPORT) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 9);
+		OUT_RING(ring, 0x00000184);
+		OUT_RING(ring, fui(ctx->viewport.translate[0]));
+		OUT_RING(ring, fui(ctx->viewport.translate[1]));
+		OUT_RING(ring, fui(ctx->viewport.translate[2]));
+		OUT_RING(ring, fui(0.0f));
+		OUT_RING(ring, fui(ctx->viewport.scale[0]));
+		OUT_RING(ring, fui(ctx->viewport.scale[1]));
+		OUT_RING(ring, fui(ctx->viewport.scale[2]));
+		OUT_RING(ring, fui(0.0f));
+	}
+
+	/* not sure why this is needed */
+	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
+		enum pipe_format format =
+			pipe_surface_format(ctx->batch->framebuffer.cbufs[0]);
+		bool has_alpha = util_format_has_alpha(format);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+		OUT_RING(ring, blend->rb_blendcontrol_alpha |
+			COND(has_alpha, blend->rb_blendcontrol_rgb) |
+			COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+		OUT_RING(ring, blend->rb_colormask);
+	}
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+	OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE);
+}
+
+void
 fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
 {
 	struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
 	struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
+	struct fd2_shader_stateobj *fp = ctx->prog.fp;
 	struct fd_ringbuffer *ring = ctx->batch->draw;
 
 	/* NOTE: we probably want to eventually refactor this so each state
@@ -198,12 +258,16 @@
 		OUT_RING(ring, ctx->sample_mask);
 	}
 
-	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) {
 		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+		uint32_t val = zsa->rb_depthcontrol;
+
+		if (fp->has_kill)
+			val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
 
 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 		OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
-		OUT_RING(ring, zsa->rb_depthcontrol);
+		OUT_RING(ring, val);
 
 		OUT_PKT3(ring, CP_SET_CONSTANT, 4);
 		OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
@@ -266,21 +330,23 @@
 		OUT_RING(ring, fui(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
 		OUT_RING(ring, fui(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */
 
-		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-		OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
-		OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
-				A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
-				A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
-				A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
-				A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
-				A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
-				A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+		/* set viewport in C65/C66, for a20x hw binning and fragcoord.z */
+		OUT_PKT3(ring, CP_SET_CONSTANT, 9);
+		OUT_RING(ring, 0x00000184);
+
+		OUT_RING(ring, fui(ctx->viewport.translate[0]));
+		OUT_RING(ring, fui(ctx->viewport.translate[1]));
+		OUT_RING(ring, fui(ctx->viewport.translate[2]));
+		OUT_RING(ring, fui(0.0f));
+
+		OUT_RING(ring, fui(ctx->viewport.scale[0]));
+		OUT_RING(ring, fui(ctx->viewport.scale[1]));
+		OUT_RING(ring, fui(ctx->viewport.scale[2]));
+		OUT_RING(ring, fui(0.0f));
 	}
 
-	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
-		fd2_program_validate(ctx);
-		fd2_program_emit(ring, &ctx->prog);
-	}
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE))
+		fd2_program_emit(ctx, ring, &ctx->prog);
 
 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) {
 		emit_constants(ring,  VS_CONST_BASE * 4,
@@ -294,7 +360,7 @@
 	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
-		OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0);
+		OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
 	}
 
 	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
@@ -304,13 +370,13 @@
 
 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 		OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
-		OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha |
+		OUT_RING(ring, blend->rb_blendcontrol_alpha |
 			COND(has_alpha, blend->rb_blendcontrol_rgb) |
-			COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0);
+			COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
 
 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 		OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
-		OUT_RING(ring, blend ? blend->rb_colormask : 0xf);
+		OUT_RING(ring, blend->rb_colormask);
 	}
 
 	if (dirty & FD_DIRTY_BLEND_COLOR) {
@@ -339,8 +405,34 @@
 			A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
 			A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |
 			A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
+
+		/* not sure why this is required */
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY));
+		OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16));
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0x00000002);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL));
+		OUT_RING(ring, 0x00000002);
+	} else {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0x0000003b);
 	}
 
+	/* enable perfcntrs */
+	OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1);
+	OUT_RING(ring, COND(fd_mesa_debug & FD_DBG_PERFC, 1));
+
+	/* note: perfcntrs don't work without the PM_OVERRIDE bit */
+	OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2);
+	OUT_RING(ring, 0xffffffff);
+	OUT_RING(ring, 0x00000fff);
+
 	OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
 	OUT_RING(ring, 0x00000002);
 
@@ -367,10 +459,6 @@
 	OUT_RING(ring, 0x00000000);
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-	OUT_RING(ring, 0x0000003b);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
 	OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
 
@@ -463,6 +551,16 @@
 	OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
 	OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
 	OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
 }
 
 static void
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.h	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,8 @@
 
 void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
 		struct fd2_vertex_buf *vbufs, uint32_t n);
-void fd2_emit_state(struct fd_context *ctx, enum fd_dirty_3d_state dirty);
+void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty);
+void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty);
 void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring);
 
 void fd2_emit_init(struct pipe_context *pctx);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -39,6 +39,8 @@
 #include "fd2_program.h"
 #include "fd2_util.h"
 #include "fd2_zsa.h"
+#include "fd2_draw.h"
+#include "instr-a2xx.h"
 
 static uint32_t fmt2swap(enum pipe_format format)
 {
@@ -57,6 +59,28 @@
 	}
 }
 
+static bool
+use_hw_binning(struct fd_batch *batch)
+{
+	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+
+	/* we hardcoded a limit of 8 "pipes", we can increase this limit
+	 * at the cost of a slightly larger command stream
+	 * however very few cases will need more than 8
+	 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
+	 */
+	if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
+		return false;
+
+	/* only a20x hw binning is implement
+	 * a22x is more like a3xx, but perhaps the a20x works? (TODO)
+	 */
+	if (!is_a20x(batch->ctx->screen))
+		return false;
+
+	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+}
+
 /* transfer from gmem to system memory (ie. normal RAM) */
 
 static void
@@ -66,6 +90,13 @@
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct fd_resource *rsc = fd_resource(psurf->texture);
 	uint32_t swap = fmt2swap(psurf->format);
+	struct fd_resource_slice *slice =
+		fd_resource_slice(rsc, psurf->u.tex.level);
+	uint32_t offset =
+		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+
+	assert((slice->pitch & 31) == 0);
+	assert((offset & 0xfff) == 0);
 
 	if (!rsc->valid)
 		return;
@@ -79,8 +110,8 @@
 	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
 	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
-	OUT_RELOCW(ring, rsc->bo, 0, 0, 0);     /* RB_COPY_DEST_BASE */
-	OUT_RING(ring, rsc->slices[0].pitch >> 5); /* RB_COPY_DEST_PITCH */
+	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);     /* RB_COPY_DEST_BASE */
+	OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */
 	OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
 			A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
 			A2XX_RB_COPY_DEST_INFO_LINEAR |
@@ -108,11 +139,12 @@
 {
 	struct fd_context *ctx = batch->ctx;
 	struct fd2_context *fd2_ctx = fd2_context(ctx);
+	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 
 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
-			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 48 },
+			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
 		}, 1);
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
@@ -123,11 +155,13 @@
 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
 	OUT_RING(ring, 0);
 
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-	OUT_RING(ring, 0x0000028f);
+	if (!is_a20x(ctx->screen)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0x0000028f);
+	}
 
-	fd2_program_emit(ring, &ctx->solid_prog);
+	fd2_program_emit(ctx, ring, &ctx->solid_prog);
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
@@ -149,17 +183,16 @@
 	OUT_RING(ring, xy2d(pfb->width, pfb->height));    /* PA_SC_WINDOW_SCISSOR_BR */
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
-	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
-			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
-			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
-			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
-			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
 	OUT_RING(ring, 0x00000000);
 
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+	OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XSCALE */
+	OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XOFFSET */
+	OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YSCALE */
+	OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YOFFSET */
+
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
 	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
@@ -170,14 +203,20 @@
 			A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
 
 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
-		emit_gmem2mem_surf(batch, tile->bin_w * tile->bin_h, pfb->zsbuf);
+		emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
 
 	if (batch->resolve & FD_BUFFER_COLOR)
-		emit_gmem2mem_surf(batch, 0, pfb->cbufs[0]);
+		emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
 	OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+
+	if (!is_a20x(ctx->screen)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0x0000003b);
+	}
 }
 
 /* transfer from system memory to gmem */
@@ -188,6 +227,10 @@
 {
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct fd_resource *rsc = fd_resource(psurf->texture);
+	struct fd_resource_slice *slice =
+		fd_resource_slice(rsc, psurf->u.tex.level);
+	uint32_t offset =
+		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
 	uint32_t swiz;
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
@@ -205,17 +248,18 @@
 	OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
 			A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
 			A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
-			A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch));
-	OUT_RELOC(ring, rsc->bo, 0,
-			fd2_pipe2surface(psurf->format) | 0x800, 0);
+			A2XX_SQ_TEX_0_PITCH(slice->pitch));
+	OUT_RELOC(ring, rsc->bo, offset,
+			fd2_pipe2surface(psurf->format) |
+			A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0);
 	OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
 			A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
-	OUT_RING(ring, 0x01000000 | // XXX
+	OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
 			swiz |
 			A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
 			A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
 	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000200);
+	OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
 
 	if (!is_a20x(batch->ctx->screen)) {
 		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
@@ -233,6 +277,7 @@
 {
 	struct fd_context *ctx = batch->ctx;
 	struct fd2_context *fd2_ctx = fd2_context(ctx);
+	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 	unsigned bin_w = tile->bin_w;
@@ -240,8 +285,8 @@
 	float x0, y0, x1, y1;
 
 	fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
-			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 48, .offset = 0x30 },
-			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 32, .offset = 0x60 },
+			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 36 },
+			{ .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 },
 		}, 2);
 
 	/* write texture coordinates to vertexbuf: */
@@ -249,26 +294,20 @@
 	x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
 	y0 = ((float)tile->yoff) / ((float)pfb->height);
 	y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
-	OUT_PKT3(ring, CP_MEM_WRITE, 9);
-	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
+	OUT_PKT3(ring, CP_MEM_WRITE, 7);
+	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
 	OUT_RING(ring, fui(x0));
 	OUT_RING(ring, fui(y0));
 	OUT_RING(ring, fui(x1));
 	OUT_RING(ring, fui(y0));
 	OUT_RING(ring, fui(x0));
 	OUT_RING(ring, fui(y1));
-	OUT_RING(ring, fui(x1));
-	OUT_RING(ring, fui(y1));
 
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
 	OUT_RING(ring, 0);
 
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
-	OUT_RING(ring, 0x0000003b);
-
-	fd2_program_emit(ring, &ctx->blit_prog[0]);
+	fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
 
 	OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
 	OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
@@ -331,14 +370,107 @@
 	OUT_RING(ring, 0x00000000);
 
 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
-		emit_mem2gmem_surf(batch, bin_w * bin_h, pfb->zsbuf);
+		emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
 
 	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
-		emit_mem2gmem_surf(batch, 0, pfb->cbufs[0]);
+		emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+	OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+			A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+			A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
 
 	/* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
 }
 
+static void
+patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
+{
+	unsigned i;
+
+	if (!is_a20x(batch->ctx->screen)) {
+		/* identical to a3xx */
+		for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
+			struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
+			*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
+		}
+		util_dynarray_resize(&batch->draw_patches, 0);
+		return;
+	}
+
+	if (vismode == USE_VISIBILITY)
+		return;
+
+	for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) {
+		uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i);
+		unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
+
+		/* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
+		 * replace first two DWORDS with NOP and move the rest down
+		 * (we don't want to have to move the idx buffer reloc)
+		 */
+		ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
+		ptr[1] = 0x00000000;
+
+		ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
+		ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8);
+		ptr[3] = 0x00000000;
+	}
+}
+
+static void
+fd2_emit_sysmem_prep(struct fd_batch *batch)
+{
+	struct fd_context *ctx = batch->ctx;
+	struct fd_ringbuffer *ring = batch->gmem;
+	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+	struct pipe_surface *psurf = pfb->cbufs[0];
+
+	if (!psurf)
+		return;
+
+	struct fd_resource *rsc = fd_resource(psurf->texture);
+	struct fd_resource_slice *slice =
+		fd_resource_slice(rsc, psurf->u.tex.level);
+	uint32_t offset =
+		fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
+
+	assert((slice->pitch & 31) == 0);
+	assert((offset & 0xfff) == 0);
+
+	fd2_emit_restore(ctx, ring);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+	OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+	OUT_RELOCW(ring, rsc->bo, offset, A2XX_RB_COLOR_INFO_LINEAR |
+		A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
+		A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
+	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
+	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
+		A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) |
+			A2XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+	patch_draws(batch, IGNORE_VISIBILITY);
+	util_dynarray_resize(&batch->draw_patches, 0);
+	util_dynarray_resize(&batch->shader_patches, 0);
+}
+
 /* before first tile */
 static void
 fd2_emit_tile_init(struct fd_batch *batch)
@@ -357,10 +489,168 @@
 	OUT_RING(ring, gmem->bin_w);                 /* RB_SURFACE_INFO */
 	OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
 			A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
-	reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(align(gmem->bin_w * gmem->bin_h, 4));
+	reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
 	if (pfb->zsbuf)
 		reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
 	OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
+
+	/* fast clear patches */
+	int depth_size = -1;
+	int color_size = -1;
+
+	if (pfb->cbufs[0])
+		color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
+
+	if (pfb->zsbuf)
+		depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
+
+	for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
+		struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
+		uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
+		uint32_t size, lines;
+
+		/* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
+		switch (patch->val) {
+		case GMEM_PATCH_FASTCLEAR_COLOR:
+			size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
+			lines = size / 1024;
+			depth_base = size / 2;
+			break;
+		case GMEM_PATCH_FASTCLEAR_DEPTH:
+			size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
+			lines = size / 1024;
+			color_base = depth_base;
+			depth_base = depth_base + size / 2;
+			break;
+		case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
+			lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
+			break;
+		case GMEM_PATCH_RESTORE_INFO:
+			patch->cs[0] = gmem->bin_w;
+			patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+					A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
+			patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+			if (pfb->zsbuf)
+				patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+			continue;
+		default:
+			continue;
+		}
+
+		patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
+			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
+		patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
+			A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
+		patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
+			A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
+	}
+	util_dynarray_resize(&batch->gmem_patches, 0);
+
+	/* set to zero, for some reason hardware doesn't like certain values */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+	OUT_RING(ring, 0);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+	OUT_RING(ring, 0);
+
+	if (use_hw_binning(batch)) {
+		/* patch out unneeded memory exports by changing EXEC CF to EXEC_END
+		 *
+		 * in the shader compiler, we guarantee that the shader ends with
+		 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
+		 *
+		 * the since patches point only to dwords and CFs are 1.5 dwords
+		 * the patch is aligned and might point to a ALLOC CF
+		 */
+		for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) {
+			instr_cf_t *cf =
+				*util_dynarray_element(&batch->shader_patches, instr_cf_t*, i);
+			if (cf->opc == ALLOC)
+				cf++;
+			assert(cf->opc == EXEC);
+			assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END);
+			cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END;
+		}
+
+		patch_draws(batch, USE_VISIBILITY);
+
+		/* initialize shader constants for the binning memexport */
+		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
+		OUT_RING(ring, 0x0000000C);
+
+		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+			struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
+
+			/* XXX we know how large this needs to be..
+			 * should do some sort of realloc
+			 * it should be ctx->batch->num_vertices bytes large
+			 * with this size it will break with more than 256k vertices..
+			 */
+			if (!pipe->bo) {
+				pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+						DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
+			}
+
+			/* memory export address (export32):
+			 * .x: (base_address >> 2) | 0x40000000 (?)
+			 * .y: index (float) - set by shader
+			 * .z: 0x4B00D000 (?)
+			 * .w: 0x4B000000 (?) | max_index (?)
+			*/
+			OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x4B00D000);
+			OUT_RING(ring, 0x4B000000 | 0x40000);
+		}
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
+		OUT_RING(ring, 0x0000018C);
+
+		for (int i = 0; i < gmem->num_vsc_pipes; i++) {
+			struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
+			float off_x, off_y, mul_x, mul_y;
+
+			/* const to tranform from [-1,1] to bin coordinates for this pipe
+			 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
+			 * 8 possible values on x/y axis,
+			 * to clip at binning stage: only use center 6x6
+			 * TODO: set the z parameters too so that hw binning
+			 * can clip primitives in Z too
+			 */
+
+			mul_x = 1.0f / (float) (gmem->bin_w * 8);
+			mul_y = 1.0f / (float) (gmem->bin_h * 8);
+			off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx;
+			off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny;
+
+			OUT_RING(ring, fui(off_x * (256.0f/255.0f)));
+			OUT_RING(ring, fui(off_y * (256.0f/255.0f)));
+			OUT_RING(ring, 0x3f000000);
+			OUT_RING(ring, fui(0.0f));
+
+			OUT_RING(ring, fui(mul_x * (256.0f/255.0f)));
+			OUT_RING(ring, fui(mul_y * (256.0f/255.0f)));
+			OUT_RING(ring, fui(0.0f));
+			OUT_RING(ring, fui(0.0f));
+		}
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0);
+
+		ctx->emit_ib(ring, batch->binning);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+		OUT_RING(ring, 0x00000002);
+	} else {
+		patch_draws(batch, IGNORE_VISIBILITY);
+	}
+
+	util_dynarray_resize(&batch->draw_patches, 0);
+	util_dynarray_resize(&batch->shader_patches, 0);
 }
 
 /* before mem2gmem */
@@ -389,6 +679,8 @@
 static void
 fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
 {
+	struct fd_context *ctx = batch->ctx;
+	struct fd2_context *fd2_ctx = fd2_context(ctx);
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
@@ -405,6 +697,38 @@
 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
 	OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
 			A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
+
+	/* write SCISSOR_BR to memory so fast clear path can restore from it */
+	OUT_PKT3(ring, CP_MEM_WRITE, 2);
+	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
+	OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
+			A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
+
+	/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
+	if (is_a20x(ctx->screen)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+		OUT_RING(ring, 0x00000580);
+		OUT_RING(ring, fui(tile->xoff));
+		OUT_RING(ring, fui(tile->yoff));
+		OUT_RING(ring, fui(0.0f));
+		OUT_RING(ring, fui(0.0f));
+	}
+
+	if (use_hw_binning(batch)) {
+		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
+		OUT_RING(ring, tile->n);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
+		OUT_RING(ring, tile->n);
+
+		/* TODO only emit this when tile->p changes */
+		OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+		OUT_RELOC(ring, pipe->bo, 0, 0, 0);
+	}
 }
 
 void
@@ -412,6 +736,7 @@
 {
 	struct fd_context *ctx = fd_context(pctx);
 
+	ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
 	ctx->emit_tile_init = fd2_emit_tile_init;
 	ctx->emit_tile_prep = fd2_emit_tile_prep;
 	ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,813 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_perfcntr.h"
+#include "freedreno_util.h"
+#include "a2xx.xml.h"
+
+#define REG(_x) REG_A2XX_ ## _x
+
+#define COUNTER(_sel, _lo, _hi) {  \
+	.select_reg = REG(_sel),       \
+	.counter_reg_lo = REG(_lo),    \
+	.counter_reg_hi = REG(_hi),    \
+}
+
+#define COUNTABLE(_selector, _query_type, _result_type) {            \
+	.name        = #_selector,                                       \
+	.selector    = _selector,                                        \
+	.query_type  = PIPE_DRIVER_QUERY_TYPE_ ## _query_type,           \
+	.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type,   \
+}
+
+#define GROUP(_name, _counters, _countables) {   \
+	.name           = _name,                     \
+	.num_counters   = ARRAY_SIZE(_counters),     \
+	.counters       = _counters,                 \
+	.num_countables = ARRAY_SIZE(_countables),   \
+	.countables     = _countables, \
+}
+
+static const struct fd_perfcntr_countable pa_su_countables[] = {
+	COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE),
+	COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable pa_sc_countables[] = {
+	COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE),
+	COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE),
+	COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE),
+	COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE),
+	COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE),
+	COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE),
+	COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE),
+	COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE),
+	COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE),
+	COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE),
+	COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE),
+	COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE),
+	COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable vgt_countables[] = {
+	COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE),
+	COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE),
+	COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE),
+	COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE),
+	COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE),
+	COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE),
+	COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE),
+	COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE),
+	COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE),
+	COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE),
+	COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE),
+	COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE),
+	COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE),
+	COUNTABLE(SPARE34, UINT64, AVERAGE),
+	COUNTABLE(SPARE35, UINT64, AVERAGE),
+	COUNTABLE(SPARE36, UINT64, AVERAGE),
+	COUNTABLE(SPARE37, UINT64, AVERAGE),
+	COUNTABLE(SPARE38, UINT64, AVERAGE),
+	COUNTABLE(SPARE39, UINT64, AVERAGE),
+	COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE),
+	COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE),
+	COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE),
+	COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE),
+	COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE),
+	COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE),
+	COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE),
+	COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE),
+	COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcr_countables[] = {
+	COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE),
+	COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE),
+	COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tp0_countables[] = {
+	COUNTABLE(POINT_QUADS, UINT64, AVERAGE),
+	COUNTABLE(BILIN_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_QUADS, UINT64, AVERAGE),
+	COUNTABLE(MIP_QUADS, UINT64, AVERAGE),
+	COUNTABLE(VOL_QUADS, UINT64, AVERAGE),
+	COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE),
+	COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE),
+	COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE),
+	COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE),
+	COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE),
+	COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE),
+	COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE),
+	COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE),
+	COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE),
+	COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE),
+	COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcm_countables[] = {
+	COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE),
+	COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE),
+	COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE),
+	COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE),
+	COUNTABLE(READ_STARVED, UINT64, AVERAGE),
+	COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE),
+	COUNTABLE(READ_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE),
+	COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE),
+	COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable tcf_countables[] = {
+	COUNTABLE(VALID_CYCLES, UINT64, AVERAGE),
+	COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_PHASES, UINT64, AVERAGE),
+	COUNTABLE(MIP_PHASES, UINT64, AVERAGE),
+	COUNTABLE(VOL_PHASES, UINT64, AVERAGE),
+	COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE),
+	COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE),
+	COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE),
+	COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE),
+	COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE),
+	COUNTABLE(TPC_BUSY, UINT64, AVERAGE),
+	COUNTABLE(TPC_STALLED, UINT64, AVERAGE),
+	COUNTABLE(TPC_STARVED, UINT64, AVERAGE),
+	COUNTABLE(TPC_WORKING, UINT64, AVERAGE),
+	COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE),
+	COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE),
+	COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE),
+	COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE),
+	COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE),
+	COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE),
+	COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE),
+	COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE),
+	COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE),
+	COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE),
+	COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE),
+	COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x00, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x01, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x04, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x10, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x11, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x12, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x13, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x18, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x19, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE),
+	COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE),
+	COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE),
+	COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE),
+	COUNTABLE(TC_STALL, UINT64, AVERAGE),
+	COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE),
+	COUNTABLE(QUADS, UINT64, AVERAGE),
+	COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE),
+	COUNTABLE(TAG_STALL, UINT64, AVERAGE),
+	COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE),
+	COUNTABLE(TCA_VALID, UINT64, AVERAGE),
+	COUNTABLE(PROBES_VALID, UINT64, AVERAGE),
+	COUNTABLE(MISS_STALL, UINT64, AVERAGE),
+	COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE),
+	COUNTABLE(TCO_STALL, UINT64, AVERAGE),
+	COUNTABLE(ANY_STALL, UINT64, AVERAGE),
+	COUNTABLE(TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(TAG_HITS, UINT64, AVERAGE),
+	COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE),
+	COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE),
+	COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE),
+	COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE),
+	COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE),
+	COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE),
+	COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE),
+	COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE),
+	COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT1, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT2, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT3, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT4, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT5, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT6, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT7, UINT64, AVERAGE),
+	COUNTABLE(SET0_EVICT8, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT1, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT2, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT3, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT4, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT5, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT6, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT7, UINT64, AVERAGE),
+	COUNTABLE(SET1_EVICT8, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT1, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT2, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT3, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT4, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT5, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT6, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT7, UINT64, AVERAGE),
+	COUNTABLE(SET2_EVICT8, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT1, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT2, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT3, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT4, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT5, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT6, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT7, UINT64, AVERAGE),
+	COUNTABLE(SET3_EVICT8, UINT64, AVERAGE),
+	COUNTABLE(FF_EMPTY, UINT64, AVERAGE),
+	COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE),
+	COUNTABLE(FF_FULL, UINT64, AVERAGE),
+	COUNTABLE(FF_XFC, UINT64, AVERAGE),
+	COUNTABLE(FF_STALLED, UINT64, AVERAGE),
+	COUNTABLE(FG_MASKS, UINT64, AVERAGE),
+	COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE),
+	COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE),
+	COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE),
+	COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE),
+	COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE),
+	COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE),
+	COUNTABLE(FG0_STALLED, UINT64, AVERAGE),
+	COUNTABLE(MEM_REQ512, UINT64, AVERAGE),
+	COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE),
+	COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE),
+	COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sq_countables[] = {
+	COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE),
+	COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE),
+	COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE),
+	COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE),
+	COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE),
+	COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE),
+	COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE),
+	COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE),
+	COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE),
+	COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE),
+	COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE),
+	COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE),
+	COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE),
+	COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE),
+	COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE),
+	COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE),
+	COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE),
+	COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE),
+	COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE),
+	COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE),
+	COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE),
+	COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE),
+	COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE),
+	COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE),
+	COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE),
+	COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE),
+	COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE),
+	COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE),
+	COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE),
+	COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE),
+	COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE),
+	COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE),
+	COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE),
+	COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE),
+	COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE),
+	COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE),
+	COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE),
+	COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE),
+	COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE),
+	COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable sx_countables[] = {
+	COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE),
+	COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE),
+	COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE),
+	COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE),
+	COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE),
+	COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE),
+	COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable rb_countables[] = {
+	COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE),
+	COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter pa_su_counters[] = {
+	COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI),
+	COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI),
+	COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI),
+	COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter pa_sc_counters[] = {
+	COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI),
+};
+
+static const struct fd_perfcntr_counter vgt_counters[] = {
+	COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI),
+	COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI),
+	COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI),
+	COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_counter tcr_counters[] = {
+	COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI),
+	COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tp0_counters[] = {
+	COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI),
+	COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcm_counters[] = {
+	COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI),
+	COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter tcf_counters[] = {
+	COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI),
+	COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI),
+	COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI),
+	COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI),
+	COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI),
+	COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI),
+	COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI),
+	COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI),
+	COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI),
+	COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI),
+	COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI),
+	COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI),
+};
+
+static const struct fd_perfcntr_counter sq_counters[] = {
+	COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI),
+	COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI),
+	COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI),
+	COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI),
+};
+
+static const struct fd_perfcntr_countable rbbm_countables[] = {
+	COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE),
+	COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_countable cp_countables[] = {
+	COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE),
+	COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE),
+	COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE),
+	COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE),
+	COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE),
+	COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE),
+	COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE),
+	COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE),
+	COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE),
+	COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE),
+	COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE),
+	COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE),
+	COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE),
+	COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE),
+	COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE),
+	COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE),
+	COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE),
+	COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE),
+	COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE),
+	COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE),
+	COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE),
+	COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE),
+	COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+	COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE),
+	COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE),
+};
+
+static const struct fd_perfcntr_counter sx_counters[] = {
+	COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI),
+};
+
+// We don't have the enums for MH perfcntrs
+#if 0
+static const struct fd_perfcntr_counter mh_counters[] = {
+	COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI),
+	COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI),
+};
+#endif
+
+static const struct fd_perfcntr_counter rbbm_counters[] = {
+	COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI),
+};
+
+static const struct fd_perfcntr_counter cp_counters[] = {
+	COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI),
+};
+
+static const struct fd_perfcntr_counter rb_counters[] = {
+	COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI),
+};
+
+const struct fd_perfcntr_group a2xx_perfcntr_groups[] = {
+	GROUP("PA_SU", pa_su_counters, pa_su_countables),
+	GROUP("PA_SC", pa_sc_counters, pa_sc_countables),
+	GROUP("VGT", vgt_counters, vgt_countables),
+	GROUP("TCR", tcr_counters, tcr_countables),
+	GROUP("TP0", tp0_counters, tp0_countables),
+	GROUP("TCM", tcm_counters, tcm_countables),
+	GROUP("TCF", tcf_counters, tcf_countables),
+	GROUP("SQ", sq_counters, sq_countables),
+	GROUP("SX", sx_counters, sx_countables),
+//	GROUP("MH", mh_counters, mh_countables),
+	GROUP("RBBM", rbbm_counters, rbbm_countables),
+	GROUP("CP", cp_counters, cp_countables),
+	GROUP("RB", rb_counters, rb_countables),
+};
+
+const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -22,6 +22,7 @@
  *
  * Authors:
  *    Rob Clark <robclark@freedesktop.org>
+ *    Jonathan Marek <jonathan@marek.ca>
  */
 
 #include "pipe/p_state.h"
@@ -34,18 +35,20 @@
 
 #include "freedreno_program.h"
 
+#include "ir2.h"
 #include "fd2_program.h"
-#include "fd2_compiler.h"
 #include "fd2_texture.h"
 #include "fd2_util.h"
+#include "instr-a2xx.h"
 
 static struct fd2_shader_stateobj *
-create_shader(enum shader_t type)
+create_shader(struct pipe_context *pctx, gl_shader_stage type)
 {
 	struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
 	if (!so)
 		return NULL;
 	so->type = type;
+	so->is_a20x = is_a20x(fd_context(pctx)->screen);
 	return so;
 }
 
@@ -54,89 +57,71 @@
 {
 	if (!so)
 		return;
-	ir2_shader_destroy(so->ir);
-	free(so->tokens);
-	free(so->bin);
+	ralloc_free(so->nir);
+	for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
+		free(so->variant[i].info.dwords);
 	free(so);
 }
 
-static struct fd2_shader_stateobj *
-assemble(struct fd2_shader_stateobj *so)
+static void
+emit(struct fd_ringbuffer *ring, gl_shader_stage type,
+	struct ir2_shader_info *info, struct util_dynarray *patches)
 {
-	free(so->bin);
-	so->bin = ir2_shader_assemble(so->ir, &so->info);
-	if (!so->bin)
-		goto fail;
+	unsigned i;
 
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		DBG("disassemble: type=%d", so->type);
-		disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
-	}
+	assert(info->sizedwords);
 
-	return so;
+	OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
+	OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
+	OUT_RING(ring, info->sizedwords);
 
-fail:
-	debug_error("assemble failed!");
-	delete_shader(so);
-	return NULL;
+	if (patches)
+		util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
+
+	for (i = 0; i < info->sizedwords; i++)
+		OUT_RING(ring, info->dwords[i]);
 }
 
-static struct fd2_shader_stateobj *
-compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
+static int
+ir2_glsl_type_size(const struct glsl_type *type)
 {
-	int ret;
+	return glsl_count_attribute_slots(type, false);
+}
 
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		DBG("dump tgsi: type=%d", so->type);
-		tgsi_dump(so->tokens, 0);
+static void *
+fd2_fp_state_create(struct pipe_context *pctx,
+		const struct pipe_shader_state *cso)
+{
+	struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
+	if (!so)
+		return NULL;
+
+	if (cso->type == PIPE_SHADER_IR_NIR) {
+		so->nir = cso->ir.nir;
+		NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
+			   (nir_lower_io_options)0);
+	} else {
+		assert(cso->type == PIPE_SHADER_IR_TGSI);
+		so->nir = ir2_tgsi_to_nir(cso->tokens);
 	}
 
-	ret = fd2_compile_shader(prog, so);
-	if (ret)
+	if (ir2_optimize_nir(so->nir, true))
 		goto fail;
 
-	/* NOTE: we don't assemble yet because for VS we don't know the
-	 * type information for vertex fetch yet.. so those need to be
-	 * patched up later before assembling.
-	 */
+	so->first_immediate = so->nir->num_uniforms;
 
-	so->info.sizedwords = 0;
+	ir2_compile(so, 0, NULL);
 
+	ralloc_free(so->nir);
+	so->nir = NULL;
 	return so;
 
 fail:
-	debug_error("compile failed!");
 	delete_shader(so);
 	return NULL;
 }
 
 static void
-emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
-{
-	unsigned i;
-
-	if (so->info.sizedwords == 0)
-		assemble(so);
-
-	OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
-	OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
-	OUT_RING(ring, so->info.sizedwords);
-	for (i = 0; i < so->info.sizedwords; i++)
-		OUT_RING(ring, so->bin[i]);
-}
-
-static void *
-fd2_fp_state_create(struct pipe_context *pctx,
-		const struct pipe_shader_state *cso)
-{
-	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-	if (!so)
-		return NULL;
-	so->tokens = tgsi_dup_tokens(cso->tokens);
-	return so;
-}
-
-static void
 fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
 {
 	struct fd2_shader_stateobj *so = hwcso;
@@ -147,11 +132,32 @@
 fd2_vp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
+	struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
 	if (!so)
 		return NULL;
-	so->tokens = tgsi_dup_tokens(cso->tokens);
+
+	if (cso->type == PIPE_SHADER_IR_NIR) {
+		so->nir = cso->ir.nir;
+		NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
+			   (nir_lower_io_options)0);
+	} else {
+		assert(cso->type == PIPE_SHADER_IR_TGSI);
+		so->nir = ir2_tgsi_to_nir(cso->tokens);
+	}
+
+	if (ir2_optimize_nir(so->nir, true))
+		goto fail;
+
+	so->first_immediate = so->nir->num_uniforms;
+
+	/* compile binning variant now */
+	ir2_compile(so, 0, NULL);
+
 	return so;
+
+fail:
+	delete_shader(so);
+	return NULL;
 }
 
 static void
@@ -162,277 +168,146 @@
 }
 
 static void
-patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
-		struct fd_vertex_stateobj *vtx)
+patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
+	instr_fetch_vtx_t *instr, uint16_t dst_swiz)
 {
-	unsigned i;
-
-	assert(so->num_vfetch_instrs == vtx->num_elements);
-
-	/* update vtx fetch instructions: */
-	for (i = 0; i < so->num_vfetch_instrs; i++) {
-		struct ir2_instruction *instr = so->vfetch_instrs[i];
-		struct pipe_vertex_element *elem = &vtx->pipe[i];
-		struct pipe_vertex_buffer *vb =
+	struct pipe_vertex_buffer *vb =
 				&ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
-		enum pipe_format format = elem->src_format;
-		const struct util_format_description *desc =
-				util_format_description(format);
-		unsigned j;
-
-		/* Find the first non-VOID channel. */
-		for (j = 0; j < 4; j++)
-			if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
-				break;
-
-		/* CI/CIS can probably be set in compiler instead: */
-		instr->fetch.const_idx = 20 + (i / 3);
-		instr->fetch.const_idx_sel = i % 3;
-
-		instr->fetch.fmt = fd2_pipe2surface(format);
-		instr->fetch.is_normalized = desc->channel[j].normalized;
-		instr->fetch.is_signed =
-				desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
-		instr->fetch.stride = vb->stride ? : 1;
-		instr->fetch.offset = elem->src_offset;
-
-		for (j = 0; j < 4; j++)
-			instr->dst_reg.swizzle[j] = "xyzw01__"[desc->swizzle[j]];
-
-		assert(instr->fetch.fmt != ~0);
-
-		DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
-				"stride=%d, offset=%d",
-				i, util_format_name(format),
-				instr->fetch.fmt,
-				instr->fetch.const_idx,
-				instr->fetch.const_idx_sel,
-				elem->instance_divisor,
-				instr->dst_reg.swizzle,
-				instr->fetch.stride,
-				instr->fetch.offset);
+	enum pipe_format format = elem->src_format;
+	const struct util_format_description *desc =
+			util_format_description(format);
+	unsigned j;
+
+	/* Find the first non-VOID channel. */
+	for (j = 0; j < 4; j++)
+		if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
+			break;
+
+	instr->format = fd2_pipe2surface(format);
+	instr->num_format_all = !desc->channel[j].normalized;
+	instr->format_comp_all = desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
+	instr->stride = vb->stride;
+	instr->offset = elem->src_offset;
+
+	unsigned swiz = 0;
+	for (int i = 0; i < 4; i++) {
+		unsigned s = dst_swiz >> i*3 & 7;
+		swiz |= (s >= 4 ? s : desc->swizzle[s]) << i*3;
 	}
-
-	/* trigger re-assemble: */
-	so->info.sizedwords = 0;
+	instr->dst_swiz = swiz;
 }
 
 static void
-patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
-		struct fd_texture_stateobj *tex)
+patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
+	struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
 {
-	unsigned i;
+	for (int i = 0; i < info->num_fetch_instrs; i++) {
+		struct ir2_fetch_info *fi = &info->fetch_info[i];
 
-	/* update tex fetch instructions: */
-	for (i = 0; i < so->num_tfetch_instrs; i++) {
-		struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
-		unsigned samp_id = so->tfetch_instrs[i].samp_id;
-		unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
-
-		if (const_idx != instr->fetch.const_idx) {
-			instr->fetch.const_idx = const_idx;
-			/* trigger re-assemble: */
-			so->info.sizedwords = 0;
+		instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
+		if (instr->opc == VTX_FETCH) {
+			unsigned idx = (instr->vtx.const_index - 20) * 3 +
+				instr->vtx.const_index_sel;
+			patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
+			continue;
 		}
-	}
-}
 
-void
-fd2_program_validate(struct fd_context *ctx)
-{
-	struct fd_program_stateobj *prog = &ctx->prog;
-	bool dirty_fp = !!(ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_PROG);
-	bool dirty_vp = !!(ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_PROG);
-
-	/* if vertex or frag shader is dirty, we may need to recompile. Compile
-	 * frag shader first, as that assigns the register slots for exports
-	 * from the vertex shader.  And therefore if frag shader has changed we
-	 * need to recompile both vert and frag shader.
-	 */
-	if (dirty_fp)
-		compile(prog, prog->fp);
-
-	if (dirty_fp || dirty_vp)
-		compile(prog, prog->vp);
-
-	/* if necessary, fix up vertex fetch instructions: */
-	if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
-		patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
-
-	/* if necessary, fix up texture fetch instructions: */
-	if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
-		patch_tex_fetches(ctx, prog->vp, &ctx->tex[PIPE_SHADER_VERTEX]);
-		patch_tex_fetches(ctx, prog->fp, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+		assert(instr->opc == TEX_FETCH);
+		instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
+		instr->tex.src_swiz = fi->tex.src_swiz;
+		if (fd2_texture_swap_xy(tex, fi->tex.samp_id)) {
+			unsigned x = instr->tex.src_swiz;
+			instr->tex.src_swiz = (x & 0x30) | (x & 3) << 2 | (x >> 2 & 3);
+		}
 	}
 }
 
 void
-fd2_program_emit(struct fd_ringbuffer *ring,
+fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		struct fd_program_stateobj *prog)
 {
-	struct ir2_shader_info *vsi =
-		&((struct fd2_shader_stateobj *)prog->vp)->info;
-	struct ir2_shader_info *fsi =
-		&((struct fd2_shader_stateobj *)prog->fp)->info;
-	uint8_t vs_gprs, fs_gprs, vs_export;
-
-	emit(ring, prog->vp);
-	emit(ring, prog->fp);
-
-	vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
-	fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
-	vs_export = MAX2(1, prog->num_exports) - 1;
-
-	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-	OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
-	OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
-			A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
-			A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
-			A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
-			A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
-			A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
-}
-
-/* Creates shader:
- *    EXEC ADDR(0x2) CNT(0x1)
- *       (S)FETCH:	SAMPLE	R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x3) CNT(0x1)
- *          ALU:	MAXv	export0 = R0, R0	; gl_FragColor
- *    NOP
- */
-static struct fd2_shader_stateobj *
-create_blit_fp(void)
-{
-	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-	struct ir2_instruction *instr;
-
-	if (!so)
-		return NULL;
-
-	so->ir = ir2_shader_create();
-
-	instr = ir2_instr_create_tex_fetch(so->ir, 0);
-	ir2_dst_create(instr, 0, "xyzw", 0);
-	ir2_reg_create(instr, 0, "xyx", IR2_REG_INPUT);
-	instr->sync = true;
-
-	instr = ir2_instr_create_alu_v(so->ir, MAXv);
-	ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
-	ir2_reg_create(instr, 0, NULL, 0);
-	ir2_reg_create(instr, 0, NULL, 0);
-
-	return assemble(so);
-}
-
-/* Creates shader:
-*     EXEC ADDR(0x3) CNT(0x2)
-*           FETCH:	VERTEX	R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
-*           FETCH:	VERTEX	R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
-*     ALLOC POSITION SIZE(0x0)
-*     EXEC ADDR(0x5) CNT(0x1)
-*           ALU:	MAXv	export62 = R2, R2	; gl_Position
-*     ALLOC PARAM/PIXEL SIZE(0x0)
-*     EXEC_END ADDR(0x6) CNT(0x1)
-*           ALU:	MAXv	export0 = R1, R1
-*     NOP
- */
-static struct fd2_shader_stateobj *
-create_blit_vp(void)
-{
-	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
-	struct ir2_instruction *instr;
-
-	if (!so)
-		return NULL;
-
-	so->ir = ir2_shader_create();
-
-	instr = ir2_instr_create_vtx_fetch(so->ir, 26, 1, FMT_32_32_FLOAT, false, 8);
-	instr->fetch.is_normalized = true;
-	ir2_dst_create(instr, 1, "xy01", 0);
-	ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
-
-	instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12);
-	instr->fetch.is_normalized = true;
-	ir2_dst_create(instr, 2, "xyz1", 0);
-	ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
-
-	instr = ir2_instr_create_alu_v(so->ir, MAXv);
-	ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT);
-	ir2_reg_create(instr, 2, NULL, 0);
-	ir2_reg_create(instr, 2, NULL, 0);
-
-	instr = ir2_instr_create_alu_v(so->ir, MAXv);
-	ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
-	ir2_reg_create(instr, 1, NULL, 0);
-	ir2_reg_create(instr, 1, NULL, 0);
-
-	return assemble(so);
-}
-
-/* Creates shader:
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x1) CNT(0x1)
- *          ALU:	MAXv	export0 = C0, C0	; gl_FragColor
- */
-static struct fd2_shader_stateobj *
-create_solid_fp(void)
-{
-	struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-	struct ir2_instruction *instr;
-
-	if (!so)
-		return NULL;
+	struct fd2_shader_stateobj *fp = NULL, *vp;
+	struct ir2_shader_info *fpi, *vpi;
+	struct ir2_frag_linkage *f;
+	uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
+	enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
+	bool binning = (ctx->batch && ring == ctx->batch->binning);
+	unsigned variant = 0;
+
+	vp = prog->vp;
+
+	/* find variant matching the linked fragment shader */
+	if (!binning) {
+		fp = prog->fp;
+		for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
+			/* if checked all variants, compile a new variant */
+			if (!vp->variant[variant].info.sizedwords) {
+				ir2_compile(vp, variant, fp);
+				break;
+			}
 
-	so->ir = ir2_shader_create();
+			/* check if fragment shader linkage matches */
+			if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
+					sizeof(struct ir2_frag_linkage)))
+				break;
+		}
+		assert(variant < ARRAY_SIZE(vp->variant));
+	}
 
-	instr = ir2_instr_create_alu_v(so->ir, MAXv);
-	ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT);
-	ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
-	ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
-
-	return assemble(so);
-}
-
-/* Creates shader:
- *    EXEC ADDR(0x3) CNT(0x1)
- *       (S)FETCH:	VERTEX	R1.xyz1 = R0.x FMT_32_32_32_FLOAT
- *                           UNSIGNED STRIDE(12) CONST(26, 0)
- *    ALLOC POSITION SIZE(0x0)
- *    EXEC ADDR(0x4) CNT(0x1)
- *          ALU:	MAXv	export62 = R1, R1	; gl_Position
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x5) CNT(0x0)
- */
-static struct fd2_shader_stateobj *
-create_solid_vp(void)
-{
-	struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
-	struct ir2_instruction *instr;
+	vpi = &vp->variant[variant].info;
+	fpi = &fp->variant[0].info;
+	f = &fp->variant[0].f;
+
+	/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
+	if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
+		patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
+		if (fp)
+			patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+	}
 
-	if (!so)
-		return NULL;
+	emit(ring, MESA_SHADER_VERTEX, vpi,
+		binning ? &ctx->batch->shader_patches : NULL);
 
-	so->ir = ir2_shader_create();
+	if (fp) {
+		emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
+		fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
+		vs_export = MAX2(1, f->inputs_count) - 1;
+	}
 
-	instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12);
-	ir2_dst_create(instr, 1, "xyz1", 0);
-	ir2_reg_create(instr, 0, "x", IR2_REG_INPUT);
+	vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
 
-	instr = ir2_instr_create_alu_v(so->ir, MAXv);
-	ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT);
-	ir2_reg_create(instr, 1, NULL, 0);
-	ir2_reg_create(instr, 1, NULL, 0);
+	if (vp->writes_psize && !binning)
+		mode = POSITION_2_VECTORS_SPRITE;
 
+	/* set register to use for param (fragcoord/pointcoord/frontfacing) */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
+	OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
+		COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
+		/* we need SCREEN_XY for both fragcoord and frontfacing */
+		A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
 
-	return assemble(so);
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
+	OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
+			A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
+			A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
+			A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
+			A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+			A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+			A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
+			COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
+			COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
 }
 
 void
 fd2_prog_init(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
+	struct fd_program_stateobj *prog;
+	struct fd2_shader_stateobj *so;
+	struct ir2_shader_info *info;
+	instr_fetch_vtx_t *instr;
 
 	pctx->create_fs_state = fd2_fp_state_create;
 	pctx->delete_fs_state = fd2_fp_state_delete;
@@ -442,8 +317,47 @@
 
 	fd_prog_init(pctx);
 
-	ctx->solid_prog.fp = create_solid_fp();
-	ctx->solid_prog.vp = create_solid_vp();
-	ctx->blit_prog[0].fp = create_blit_fp();
-	ctx->blit_prog[0].vp = create_blit_vp();
+	/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
+
+	prog = &ctx->solid_prog;
+	so = prog->vp;
+	ir2_compile(prog->vp, 1, prog->fp);
+
+#define IR2_FETCH_SWIZ_XY01 0xb08
+#define IR2_FETCH_SWIZ_XYZ1 0xa88
+
+	info = &so->variant[1].info;
+
+	instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
+	instr->const_index = 26;
+	instr->const_index_sel = 0;
+	instr->format = FMT_32_32_32_FLOAT;
+	instr->format_comp_all = false;
+	instr->stride = 12;
+	instr->num_format_all = true;
+	instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
+
+	prog = &ctx->blit_prog[0];
+	so = prog->vp;
+	ir2_compile(prog->vp, 1, prog->fp);
+
+	info = &so->variant[1].info;
+
+	instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
+	instr->const_index = 26;
+	instr->const_index_sel = 1;
+	instr->format = FMT_32_32_FLOAT;
+	instr->format_comp_all = false;
+	instr->stride = 8;
+	instr->num_format_all = false;
+	instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
+
+	instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
+	instr->const_index = 26;
+	instr->const_index_sel = 0;
+	instr->format = FMT_32_32_32_FLOAT;
+	instr->format_comp_all = false;
+	instr->stride = 12;
+	instr->num_format_all = false;
+	instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,48 +31,39 @@
 
 #include "freedreno_context.h"
 
-#include "ir-a2xx.h"
+#include "ir2.h"
 #include "disasm.h"
 
 struct fd2_shader_stateobj {
-	enum shader_t type;
+	nir_shader *nir;
+	gl_shader_stage type;
+	bool is_a20x;
 
-	uint32_t *bin;
-
-	struct tgsi_token *tokens;
-
-	/* note that we defer compiling shader until we know both vs and ps..
-	 * and if one changes, we potentially need to recompile in order to
-	 * get varying linkages correct:
-	 */
-	struct ir2_shader_info info;
-	struct ir2_shader *ir;
-
-	/* for vertex shaders, the fetch instructions which need to be
-	 * patched up before assembly:
-	 */
-	unsigned num_vfetch_instrs;
-	struct ir2_instruction *vfetch_instrs[64];
-
-	/* for all shaders, any tex fetch instructions which need to be
-	 * patched before assembly:
+	/* note: using same set of immediates for all variants
+	 * it doesn't matter, other than the slightly larger command stream
 	 */
-	unsigned num_tfetch_instrs;
-	struct {
-		unsigned samp_id;
-		struct ir2_instruction *instr;
-	} tfetch_instrs[64];
-
 	unsigned first_immediate;     /* const reg # of first immediate */
 	unsigned num_immediates;
 	struct {
 		uint32_t val[4];
+		unsigned ncomp;
 	} immediates[64];
+
+	bool writes_psize;
+	bool need_param;
+	bool has_kill;
+
+	/* note:
+	 * fragment shader only has one variant
+	 * first vertex shader variant is always binning shader
+	 * we should use a dynamic array but in normal case there is
+	 * only 2 variants (and 3 sometimes with GALLIUM_HUD)
+	 */
+	struct ir2_shader_variant variant[8];
 };
 
-void fd2_program_emit(struct fd_ringbuffer *ring,
+void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		struct fd_program_stateobj *prog);
-void fd2_program_validate(struct fd_context *ctx);
 
 void fd2_prog_init(struct pipe_context *pctx);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?)
+ * so we work with 32-bits only. we accumulate start/stop separately,
+ * which differs from a5xx but works with only accumulate (no add/neg)
+ */
+
+#include "freedreno_query_acc.h"
+#include "freedreno_resource.h"
+
+#include "fd2_context.h"
+#include "fd2_query.h"
+
+struct PACKED fd2_query_sample {
+	uint32_t start;
+	uint32_t stop;
+};
+
+/* offset of a single field of an array of fd2_query_sample: */
+#define query_sample_idx(aq, idx, field)        \
+	fd_resource((aq)->prsc)->bo,                \
+	(idx * sizeof(struct fd2_query_sample)) +   \
+	offsetof(struct fd2_query_sample, field),   \
+	0, 0
+
+/* offset of a single field of fd2_query_sample: */
+#define query_sample(aq, field)                 \
+	query_sample_idx(aq, 0, field)
+
+/*
+ * Performance Counter (batch) queries:
+ *
+ * Only one of these is active at a time, per design of the gallium
+ * batch_query API design.  On perfcntr query tracks N query_types,
+ * each of which has a 'fd_batch_query_entry' that maps it back to
+ * the associated group and counter.
+ */
+
+struct fd_batch_query_entry {
+	uint8_t gid;        /* group-id */
+	uint8_t cid;        /* countable-id within the group */
+};
+
+struct fd_batch_query_data {
+	struct fd_screen *screen;
+	unsigned num_query_entries;
+	struct fd_batch_query_entry query_entries[];
+};
+
+static void
+perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+	struct fd_batch_query_data *data = aq->query_data;
+	struct fd_screen *screen = data->screen;
+	struct fd_ringbuffer *ring = batch->draw;
+
+	unsigned counters_per_group[screen->num_perfcntr_groups];
+	memset(counters_per_group, 0, sizeof(counters_per_group));
+
+	fd_wfi(batch, ring);
+
+	/* configure performance counters for the requested queries: */
+	for (unsigned i = 0; i < data->num_query_entries; i++) {
+		struct fd_batch_query_entry *entry = &data->query_entries[i];
+		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+		unsigned counter_idx = counters_per_group[entry->gid]++;
+
+		debug_assert(counter_idx < g->num_counters);
+
+		OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1);
+		OUT_RING(ring, g->countables[entry->cid].selector);
+	}
+
+	memset(counters_per_group, 0, sizeof(counters_per_group));
+
+	/* and snapshot the start values */
+	for (unsigned i = 0; i < data->num_query_entries; i++) {
+		struct fd_batch_query_entry *entry = &data->query_entries[i];
+		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+		unsigned counter_idx = counters_per_group[entry->gid]++;
+		const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+		OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+		OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+		OUT_RELOCW(ring, query_sample_idx(aq, i, start));
+	}
+}
+
+static void
+perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch)
+{
+	struct fd_batch_query_data *data = aq->query_data;
+	struct fd_screen *screen = data->screen;
+	struct fd_ringbuffer *ring = batch->draw;
+
+	unsigned counters_per_group[screen->num_perfcntr_groups];
+	memset(counters_per_group, 0, sizeof(counters_per_group));
+
+	fd_wfi(batch, ring);
+
+	/* TODO do we need to bother to turn anything off? */
+
+	/* snapshot the end values: */
+	for (unsigned i = 0; i < data->num_query_entries; i++) {
+		struct fd_batch_query_entry *entry = &data->query_entries[i];
+		const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
+		unsigned counter_idx = counters_per_group[entry->gid]++;
+		const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
+
+		OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+		OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE);
+		OUT_RELOCW(ring, query_sample_idx(aq, i, stop));
+	}
+}
+
+static void
+perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
+		union pipe_query_result *result)
+{
+	struct fd_batch_query_data *data = aq->query_data;
+	struct fd2_query_sample *sp = buf;
+
+	for (unsigned i = 0; i < data->num_query_entries; i++)
+		result->batch[i].u64 = sp[i].stop - sp[i].start;
+}
+
+static const struct fd_acc_sample_provider perfcntr = {
+		.query_type = FD_QUERY_FIRST_PERFCNTR,
+		.active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
+		.resume = perfcntr_resume,
+		.pause = perfcntr_pause,
+		.result = perfcntr_accumulate_result,
+};
+
+static struct pipe_query *
+fd2_create_batch_query(struct pipe_context *pctx,
+		unsigned num_queries, unsigned *query_types)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_screen *screen = ctx->screen;
+	struct fd_query *q;
+	struct fd_acc_query *aq;
+	struct fd_batch_query_data *data;
+
+	data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data,
+			num_queries * sizeof(data->query_entries[0]));
+
+	data->screen = screen;
+	data->num_query_entries = num_queries;
+
+	/* validate the requested query_types and ensure we don't try
+	 * to request more query_types of a given group than we have
+	 * counters:
+	 */
+	unsigned counters_per_group[screen->num_perfcntr_groups];
+	memset(counters_per_group, 0, sizeof(counters_per_group));
+
+	for (unsigned i = 0; i < num_queries; i++) {
+		unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
+
+		/* verify valid query_type, ie. is it actually a perfcntr? */
+		if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
+				(idx >= screen->num_perfcntr_queries)) {
+			debug_printf("invalid batch query query_type: %u\n", query_types[i]);
+			goto error;
+		}
+
+		struct fd_batch_query_entry *entry = &data->query_entries[i];
+		struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
+
+		entry->gid = pq->group_id;
+
+		/* the perfcntr_queries[] table flattens all the countables
+		 * for each group in series, ie:
+		 *
+		 *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
+		 *
+		 * So to find the countable index just step back through the
+		 * table to find the first entry with the same group-id.
+		 */
+		while (pq > screen->perfcntr_queries) {
+			pq--;
+			if (pq->group_id == entry->gid)
+				entry->cid++;
+		}
+
+		if (counters_per_group[entry->gid] >=
+				screen->perfcntr_groups[entry->gid].num_counters) {
+			debug_printf("too many counters for group %u\n", entry->gid);
+			goto error;
+		}
+
+		counters_per_group[entry->gid]++;
+	}
+
+	q = fd_acc_create_query2(ctx, 0, &perfcntr);
+	aq = fd_acc_query(q);
+
+	/* sample buffer size is based on # of queries: */
+	aq->size = num_queries * sizeof(struct fd2_query_sample);
+	aq->query_data = data;
+
+	return (struct pipe_query *)q;
+
+error:
+	free(data);
+	return NULL;
+}
+
+void
+fd2_query_context_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	ctx->create_query = fd_acc_create_query;
+	ctx->query_set_stage = fd_acc_query_set_stage;
+
+	pctx->create_batch_query = fd2_create_batch_query;
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2019 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD2_QUERY_H_
+#define FD2_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd2_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD2_QUERY_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c	2019-03-31 23:16:37.000000000 +0000
@@ -47,7 +47,7 @@
 
 	if (cso->point_size_per_vertex) {
 		psize_min = util_get_min_point_size(cso);
-		psize_max = 8192;
+		psize_max = 8192.0 - 0.0625;
 	} else {
 		/* Force the point size to be as if the vertex output was disabled. */
 		psize_min = cso->point_size;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "fd2_resource.h"
+
+uint32_t
+fd2_setup_slices(struct fd_resource *rsc)
+{
+	struct pipe_resource *prsc = &rsc->base;
+	enum pipe_format format = rsc->base.format;
+	uint32_t level, size = 0;
+	uint32_t width = prsc->width0;
+	uint32_t height = prsc->height0;
+	uint32_t depth = prsc->depth0;
+
+	for (level = 0; level <= prsc->last_level; level++) {
+		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+		uint32_t blocks;
+
+		/* 32 * 32 block alignment */
+		switch (prsc->target) {
+		default: assert(0);
+		case PIPE_TEXTURE_2D:
+		case PIPE_TEXTURE_2D_ARRAY:
+		case PIPE_TEXTURE_RECT:
+		case PIPE_TEXTURE_CUBE:
+			height = align(height, 32 * util_format_get_blockheight(format));
+		case PIPE_TEXTURE_1D:
+		case PIPE_TEXTURE_1D_ARRAY:
+			width = align(width, 32 * util_format_get_blockwidth(format));
+		case PIPE_BUFFER:
+			break;
+		}
+
+		/* mipmaps have power of two sizes in memory */
+		if (level) {
+			width = util_next_power_of_two(width);
+			height = util_next_power_of_two(height);
+		}
+
+		slice->pitch = width;
+		slice->offset = size;
+
+		blocks = util_format_get_nblocks(format, width, height);
+
+		/* 4k aligned size */
+		slice->size0 = align(blocks * rsc->cpp, 4096);
+
+		size += slice->size0 * depth * prsc->array_size;
+
+		width = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth = u_minify(depth, 1);
+	}
+	return size;
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#ifndef FD2_RESOURCE_H_
+#define FD2_RESOURCE_H_
+
+#include "freedreno_resource.h"
+
+uint32_t fd2_setup_slices(struct fd_resource *rsc);
+
+#endif /* FD2_RESOURCE_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,6 +30,7 @@
 #include "fd2_screen.h"
 #include "fd2_context.h"
 #include "fd2_util.h"
+#include "fd2_resource.h"
 
 static boolean
 fd2_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -104,10 +105,21 @@
 	return retval == usage;
 }
 
+extern const struct fd_perfcntr_group a2xx_perfcntr_groups[];
+extern const unsigned a2xx_num_perfcntr_groups;
+
 void
 fd2_screen_init(struct pipe_screen *pscreen)
 {
-	fd_screen(pscreen)->max_rts = 1;
+	struct fd_screen *screen = fd_screen(pscreen);
+
+	screen->max_rts = 1;
 	pscreen->context_create = fd2_context_create;
 	pscreen->is_format_supported = fd2_screen_is_format_supported;
+	screen->setup_slices = fd2_setup_slices;
+
+	if (fd_mesa_debug & FD_DBG_PERFC) {
+		screen->perfcntr_groups = a2xx_perfcntr_groups;
+		screen->num_perfcntr_groups = a2xx_num_perfcntr_groups;
+	}
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,22 @@
 	}
 }
 
+static enum sq_tex_filter
+mip_filter(unsigned filter)
+{
+	switch (filter) {
+	case PIPE_TEX_MIPFILTER_NONE:
+		return SQ_TEX_FILTER_BASEMAP;
+	case PIPE_TEX_MIPFILTER_NEAREST:
+		return SQ_TEX_FILTER_POINT;
+	case PIPE_TEX_MIPFILTER_LINEAR:
+		return SQ_TEX_FILTER_BILINEAR;
+	default:
+		DBG("invalid filter: %u", filter);
+		return 0;
+	}
+}
+
 static void *
 fd2_sampler_state_create(struct pipe_context *pctx,
 		const struct pipe_sampler_state *cso)
@@ -83,6 +99,11 @@
 
 	so->base = *cso;
 
+	/* TODO
+	 * cso->max_anisotropy
+	 * cso->normalized_coords (dealt with by shader for rect textures?)
+	 */
+
 	/* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
 	so->tex0 =
 		A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
@@ -91,10 +112,12 @@
 
 	so->tex3 =
 		A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
-		A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter));
+		A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) |
+		A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter));
 
-	so->tex4 = 0x00000000; /* ??? */
-	so->tex5 = 0x00000200; /* ??? */
+	so->tex4 = 0;
+	if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE)
+		so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias);
 
 	return so;
 }
@@ -121,6 +144,26 @@
 	fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
 }
 
+static enum sq_tex_dimension
+tex_dimension(unsigned target)
+{
+	switch (target) {
+	default:
+		assert(0);
+	case PIPE_TEXTURE_1D:
+		assert(0); /* TODO */
+		return SQ_TEX_DIMENSION_1D;
+	case PIPE_TEXTURE_RECT:
+	case PIPE_TEXTURE_2D:
+		return SQ_TEX_DIMENSION_2D;
+	case PIPE_TEXTURE_3D:
+		assert(0); /* TODO */
+		return SQ_TEX_DIMENSION_3D;
+	case PIPE_TEXTURE_CUBE:
+		return SQ_TEX_DIMENSION_CUBE;
+	}
+}
+
 static struct pipe_sampler_view *
 fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 		const struct pipe_sampler_view *cso)
@@ -137,15 +180,22 @@
 	so->base.reference.count = 1;
 	so->base.context = pctx;
 
-	so->fmt = fd2_pipe2surface(cso->format);
-
 	so->tex0 = A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch);
+	so->tex1 =
+		A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(cso->format)) |
+		A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL);
 	so->tex2 =
 		A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
 		A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
 	so->tex3 = fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
 			cso->swizzle_b, cso->swizzle_a);
 
+	so->tex4 =
+		A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) |
+		A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso));
+
+	so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target));
+
 	return &so->base;
 }
 
@@ -188,6 +238,13 @@
 	return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers;
 }
 
+/* for reasons unknown, it appears ETC1 cubemap needs swapped xy coordinates */
+bool fd2_texture_swap_xy(struct fd_texture_stateobj *tex, unsigned samp_id)
+{
+	return tex->textures[samp_id]->format == PIPE_FORMAT_ETC1_RGB8 &&
+		tex->textures[samp_id]->texture->target == PIPE_TEXTURE_CUBE;
+}
+
 void
 fd2_texture_init(struct pipe_context *pctx)
 {
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.h	2019-03-31 23:16:37.000000000 +0000
@@ -37,7 +37,7 @@
 
 struct fd2_sampler_stateobj {
 	struct pipe_sampler_state base;
-	uint32_t tex0, tex3, tex4, tex5;
+	uint32_t tex0, tex3, tex4;
 };
 
 static inline struct fd2_sampler_stateobj *
@@ -48,8 +48,7 @@
 
 struct fd2_pipe_sampler_view {
 	struct pipe_sampler_view base;
-	enum a2xx_sq_surfaceformat fmt;
-	uint32_t tex0, tex2, tex3;
+	uint32_t tex0, tex1, tex2, tex3, tex4, tex5;
 };
 
 static inline struct fd2_pipe_sampler_view *
@@ -61,6 +60,8 @@
 unsigned fd2_get_const_idx(struct fd_context *ctx,
 		struct fd_texture_stateobj *tex, unsigned samp_id);
 
+bool fd2_texture_swap_xy(struct fd_texture_stateobj *tex, unsigned samp_id);
+
 void fd2_texture_init(struct pipe_context *pctx);
 
 #endif /* FD2_TEXTURE_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c	2019-03-31 23:16:37.000000000 +0000
@@ -49,7 +49,8 @@
 		A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
 
 	if (cso->depth.enabled)
-		so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE;
+		so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE |
+			COND(!cso->alpha.enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
 	if (cso->depth.writemask)
 		so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h	2019-03-31 23:16:37.000000000 +0000
@@ -87,6 +87,7 @@
 	SIN = 48,
 	COS = 49,
 	RETAIN_PREV = 50,
+	SCALAR_NONE = 63,
 } instr_scalar_opc_t;
 
 typedef enum {
@@ -120,6 +121,7 @@
 	KILLNEv = 27,
 	DSTv = 28,
 	MOVAv = 29,
+	VECTOR_NONE = 31,
 } instr_vector_opc_t;
 
 typedef struct PACKED {
@@ -161,9 +163,9 @@
 		};
 		/* constants have full 8-bit index */
 		struct {
-			uint8_t             src3_reg_const   : 8;
-			uint8_t             src2_reg_const   : 8;
-			uint8_t             src1_reg_const   : 8;
+			uint8_t             src3_reg_byte    : 8;
+			uint8_t             src2_reg_byte    : 8;
+			uint8_t             src1_reg_byte    : 8;
 		};
 	};
 	instr_vector_opc_t  vector_opc               : 5;
@@ -389,10 +391,17 @@
 		instr_fetch_opc_t opc                    : 5;
 		uint32_t        dummy0                   : 27;
 		/* dword1: */
-		uint32_t        dummy1                   : 32;
+		uint32_t        dummy1                   : 31;
+		uint8_t         pred_select              : 1;
 		/* dword2: */
-		uint32_t        dummy2                   : 32;
+		uint32_t        dummy2                   : 31;
+		uint8_t         pred_condition           : 1;
 	};
 } instr_fetch_t;
 
+typedef union PACKED {
+	instr_alu_t alu;
+	instr_fetch_t fetch;
+} instr_t;
+
 #endif /* INSTR_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,548 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "ir2_private.h"
+
+static unsigned
+src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
+{
+	struct ir2_reg_component *comps;
+	unsigned swiz = 0;
+
+	switch (src->type) {
+	case IR2_SRC_SSA:
+	case IR2_SRC_REG:
+		break;
+	default:
+		return src->swizzle;
+	}
+	/* we need to take into account where the components were allocated */
+	comps = get_reg_src(ctx, src)->comp;
+	for (int i = 0; i < ncomp; i++) {
+		swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i);
+	}
+	return swiz;
+}
+
+/* alu instr need to take into how the output components are allocated */
+
+/* scalar doesn't need to take into account dest swizzle */
+
+static unsigned
+alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg)
+{
+	/* hardware seems to take from W, but swizzle everywhere just in case */
+	return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX);
+}
+
+static unsigned
+alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src)
+{
+	struct ir2_reg_component *comp = get_reg(instr)->comp;
+	unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr));
+	unsigned swiz = 0;
+
+	/* non per component special cases */
+	switch (instr->alu.vector_opc) {
+	case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
+		return alu_swizzle_scalar(ctx, src);
+	case DOT2ADDv:
+	case DOT3v:
+	case DOT4v:
+	case CUBEv:
+		return swiz0;
+	default:
+		break;
+	}
+
+	for (int i = 0, j = 0; i < dst_ncomp(instr); j++) {
+		if (instr->alu.write_mask & 1 << j) {
+			if (comp[j].c != 7)
+				swiz |= swiz_set(i, comp[j].c);
+			i++;
+		}
+	}
+	return swiz_merge(swiz0, swiz);
+}
+
+static unsigned
+alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1)
+{
+	/* hardware seems to take from ZW, but swizzle everywhere (ABAB) */
+	unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0);
+	return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY);
+}
+
+/* write_mask needs to be transformed by allocation information */
+
+static unsigned
+alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr)
+{
+	struct ir2_reg_component *comp = get_reg(instr)->comp;
+	unsigned write_mask = 0;
+
+	for (int i = 0; i < 4; i++) {
+		if (instr->alu.write_mask & 1 << i)
+			write_mask |= 1 << comp[i].c;
+	}
+
+	return write_mask;
+}
+
+/* fetch instructions can swizzle dest, but src swizzle needs conversion */
+
+static unsigned
+fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp)
+{
+	unsigned alu_swiz = src_swizzle(ctx, src, ncomp);
+	unsigned swiz = 0;
+	for (int i = 0; i < ncomp; i++)
+		swiz |= swiz_get(alu_swiz, i) << i * 2;
+	return swiz;
+}
+
+static unsigned
+fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr)
+{
+	struct ir2_reg_component *comp = get_reg(instr)->comp;
+	unsigned dst_swiz = 0xfff;
+	for (int i = 0; i < dst_ncomp(instr); i++) {
+		dst_swiz &= ~(7 << comp[i].c * 3);
+		dst_swiz |= i << comp[i].c * 3;
+	}
+	return dst_swiz;
+}
+
+/* register / export # for instr */
+static unsigned
+dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr)
+{
+	if (is_export(instr))
+		return instr->alu.export;
+
+	return get_reg(instr)->idx;
+}
+
+/* register # for src */
+static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src)
+{
+	return get_reg_src(ctx, src)->idx;
+}
+
+static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src)
+{
+	if (src->type == IR2_SRC_CONST) {
+		assert(!src->abs); /* no abs bit for const */
+		return src->num;
+	}
+	return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0);
+}
+
+/* produce the 12 byte binary instruction for a given sched_instr */
+static void
+fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched,
+		   instr_t *bc, bool * is_fetch)
+{
+	struct ir2_instr *instr = sched->instr, *instr_s, *instr_v;
+
+	*bc = (instr_t) {};
+
+	if (instr && instr->type == IR2_FETCH) {
+		*is_fetch = true;
+
+		bc->fetch.opc = instr->fetch.opc;
+		bc->fetch.pred_select = !!instr->pred;
+		bc->fetch.pred_condition = instr->pred & 1;
+
+		struct ir2_src *src = instr->src;
+
+		if (instr->fetch.opc == VTX_FETCH) {
+			instr_fetch_vtx_t *vtx = &bc->fetch.vtx;
+
+			assert(instr->fetch.vtx.const_idx <= 0x1f);
+			assert(instr->fetch.vtx.const_idx_sel <= 0x3);
+
+			vtx->src_reg = src_to_reg(ctx, src);
+			vtx->src_swiz = fetch_swizzle(ctx, src, 1);
+			vtx->dst_reg = dst_to_reg(ctx, instr);
+			vtx->dst_swiz = fetch_dst_swiz(ctx, instr);
+
+			vtx->must_be_one = 1;
+			vtx->const_index = instr->fetch.vtx.const_idx;
+			vtx->const_index_sel = instr->fetch.vtx.const_idx_sel;
+
+			/* other fields will be patched */
+
+			/* XXX seems like every FETCH but the first has
+			 * this bit set:
+			 */
+			vtx->reserved3 = instr->idx ? 0x1 : 0x0;
+			vtx->reserved0 = instr->idx ? 0x2 : 0x3;
+		} else if (instr->fetch.opc == TEX_FETCH) {
+			instr_fetch_tex_t *tex = &bc->fetch.tex;
+
+			tex->src_reg = src_to_reg(ctx, src);
+			tex->src_swiz = fetch_swizzle(ctx, src, 3);
+			tex->dst_reg = dst_to_reg(ctx, instr);
+			tex->dst_swiz = fetch_dst_swiz(ctx, instr);
+			/* tex->const_idx = patch_fetches */
+			tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+			tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+			tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT;
+			tex->use_reg_lod = instr->src_count == 2;
+			tex->sample_location = SAMPLE_CENTER;
+			tex->tx_coord_denorm = instr->fetch.tex.is_rect;
+		} else if (instr->fetch.opc == TEX_SET_TEX_LOD) {
+			instr_fetch_tex_t *tex = &bc->fetch.tex;
+
+			tex->src_reg = src_to_reg(ctx, src);
+			tex->src_swiz = fetch_swizzle(ctx, src, 1);
+			tex->dst_reg = 0;
+			tex->dst_swiz = 0xfff;
+
+			tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+			tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+			tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+			tex->use_comp_lod = 1;
+			tex->use_reg_lod = 0;
+			tex->sample_location = SAMPLE_CENTER;
+		} else {
+			assert(0);
+		}
+		return;
+	}
+
+	instr_v = sched->instr;
+	instr_s = sched->instr_s;
+
+	if (instr_v) {
+		struct ir2_src src1, src2, *src3;
+
+		src1 = instr_v->src[0];
+		src2 = instr_v->src[instr_v->src_count > 1];
+		src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL;
+
+		bc->alu.vector_opc = instr_v->alu.vector_opc;
+		bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v);
+		bc->alu.vector_dest = dst_to_reg(ctx, instr_v);
+		bc->alu.vector_clamp = instr_v->alu.saturate;
+		bc->alu.export_data = instr_v->alu.export >= 0;
+
+		/* single operand SETEv, use 0.0f as src2 */
+		if (instr_v->src_count == 1 &&
+			(bc->alu.vector_opc == SETEv ||
+			bc->alu.vector_opc == SETNEv ||
+			bc->alu.vector_opc == SETGTv ||
+			bc->alu.vector_opc == SETGTEv))
+			src2 = ir2_zero(ctx);
+
+		/* export32 instr for a20x hw binning has this bit set..
+		 * it seems to do more than change the base address of constants
+		 * XXX this is a hack
+		 */
+		bc->alu.relative_addr =
+			(bc->alu.export_data && bc->alu.vector_dest == 32);
+
+		bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1);
+		bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1);
+		bc->alu.src1_reg_negate = src1.negate;
+		bc->alu.src1_sel = src1.type != IR2_SRC_CONST;
+
+		bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2);
+		bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2);
+		bc->alu.src2_reg_negate = src2.negate;
+		bc->alu.src2_sel = src2.type != IR2_SRC_CONST;
+
+		if (src3) {
+			bc->alu.src3_reg_byte = src_reg_byte(ctx, src3);
+			bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3);
+			bc->alu.src3_reg_negate = src3->negate;
+			bc->alu.src3_sel = src3->type != IR2_SRC_CONST;
+		}
+
+		bc->alu.pred_select = instr_v->pred;
+	}
+
+	if (instr_s) {
+		struct ir2_src *src = instr_s->src;
+
+		bc->alu.scalar_opc = instr_s->alu.scalar_opc;
+		bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s);
+		bc->alu.scalar_dest = dst_to_reg(ctx, instr_s);
+		bc->alu.scalar_clamp = instr_s->alu.saturate;
+		bc->alu.export_data = instr_s->alu.export >= 0;
+
+		if (instr_s->src_count == 1) {
+			bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
+			bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src);
+			bc->alu.src3_reg_negate = src->negate;
+			bc->alu.src3_sel = src->type != IR2_SRC_CONST;
+		} else {
+			assert(instr_s->src_count == 2);
+
+			bc->alu.src3_reg_byte = src_reg_byte(ctx, src);
+			bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle);
+			bc->alu.src3_reg_negate = src->negate;
+			bc->alu.src3_sel = src->type != IR2_SRC_CONST;;
+		}
+
+		if (instr_v)
+			assert(instr_s->pred == instr_v->pred);
+		bc->alu.pred_select = instr_s->pred;
+	}
+
+	*is_fetch = false;
+	return;
+}
+
+static unsigned
+write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx,
+		  instr_cf_alloc_t *alloc, instr_cf_exec_t *exec)
+{
+	assert(exec->count);
+
+	if (alloc)
+		cfs[cf_idx++].alloc = *alloc;
+
+	/* for memory alloc offset for patching */
+	if (alloc && alloc->buffer_select == SQ_MEMORY &&
+		ctx->info->mem_export_ptr == -1)
+		ctx->info->mem_export_ptr = cf_idx / 2 * 3;
+
+	cfs[cf_idx++].exec = *exec;
+	exec->address += exec->count;
+	exec->serialize = 0;
+	exec->count = 0;
+
+	return cf_idx;
+}
+
+/* assemble the final shader */
+void assemble(struct ir2_context *ctx, bool binning)
+{
+	/* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384)
+	 * address is 9 bits so could it be 512 ?
+	 */
+	instr_cf_t cfs[384];
+	instr_t bytecode[384], bc;
+	unsigned block_addr[128];
+	unsigned num_cf = 0;
+
+	/* CF instr state */
+	instr_cf_exec_t exec = {.opc = EXEC};
+	instr_cf_alloc_t alloc = {.opc = ALLOC};
+
+	int sync_id, sync_id_prev = -1;
+	bool is_fetch = false;
+	bool need_sync = true;
+	bool need_alloc = false;
+	unsigned block_idx = 0;
+
+	ctx->info->mem_export_ptr = -1;
+	ctx->info->num_fetch_instrs = 0;
+
+	/* vertex shader always needs to allocate at least one parameter
+	 * if it will never happen,
+	 */
+	if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) {
+		alloc.buffer_select = SQ_PARAMETER_PIXEL;
+		cfs[num_cf++].alloc = alloc;
+	}
+
+	block_addr[0] = 0;
+
+	for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) {
+		struct ir2_instr *instr = ctx->instr_sched[j].instr;
+
+		/* catch IR2_CF since it isn't a regular instruction */
+		if (instr && instr->type == IR2_CF) {
+			assert(!need_alloc); /* XXX */
+
+			/* flush any exec cf before inserting jmp */
+			if (exec.count)
+				num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec);
+
+			cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) {
+				.opc = COND_JMP,
+				.address = instr->cf.block_idx, /* will be fixed later */
+				.force_call = !instr->pred,
+				.predicated_jmp = 1,
+				.direction = instr->cf.block_idx > instr->block_idx,
+				.condition = instr->pred & 1,
+			};
+			continue;
+		}
+
+		/* fill the 3 dwords for the instruction */
+		fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch);
+
+		/* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */
+		sync_id = 0;
+		if (is_fetch)
+			sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2;
+
+		need_sync = sync_id != sync_id_prev;
+		sync_id_prev = sync_id;
+
+		unsigned block;
+		{
+
+			if (ctx->instr_sched[j].instr)
+				block = ctx->instr_sched[j].instr->block_idx;
+			else
+				block = ctx->instr_sched[j].instr_s->block_idx;
+
+			assert(block_idx <= block);
+		}
+
+		/* info for patching */
+		if (is_fetch) {
+			struct ir2_fetch_info *info =
+				&ctx->info->fetch_info[ctx->info->num_fetch_instrs++];
+			info->offset = i * 3;	/* add cf offset later */
+
+			if (bc.fetch.opc == VTX_FETCH) {
+				info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz;
+			} else if (bc.fetch.opc == TEX_FETCH) {
+				info->tex.samp_id = instr->fetch.tex.samp_id;
+				info->tex.src_swiz = bc.fetch.tex.src_swiz;
+			} else {
+				ctx->info->num_fetch_instrs--;
+			}
+		}
+
+		/* exec cf after 6 instr or when switching between fetch / alu */
+		if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) {
+			num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+			need_alloc = false;
+		}
+
+		/* update block_addrs for jmp patching */
+		while (block_idx < block)
+			block_addr[++block_idx] = num_cf;
+
+		/* export - fill alloc cf */
+		if (!is_fetch && bc.alu.export_data) {
+			/* get the export buffer from either vector/scalar dest */
+			instr_alloc_type_t buffer =
+				export_buf(bc.alu.vector_dest);
+			if (bc.alu.scalar_write_mask) {
+				if (bc.alu.vector_write_mask)
+					assert(buffer == export_buf(bc.alu.scalar_dest));
+				buffer = export_buf(bc.alu.scalar_dest);
+			}
+
+			/* flush previous alloc if the buffer changes */
+			bool need_new_alloc = buffer != alloc.buffer_select;
+
+			/* memory export always in 32/33 pair, new alloc on 32 */
+			if (bc.alu.vector_dest == 32)
+				need_new_alloc = true;
+
+			if (need_new_alloc && exec.count) {
+				num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+				need_alloc = false;
+			}
+
+			need_alloc |= need_new_alloc;
+
+			alloc.size = 0;
+			alloc.buffer_select = buffer;
+
+			if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX)
+				alloc.size = ctx->f->inputs_count - 1;
+
+			if (buffer == SQ_POSITION)
+				alloc.size = ctx->so->writes_psize;
+		}
+
+		if (is_fetch)
+			exec.serialize |= 0x1 << exec.count * 2;
+		if (need_sync)
+			exec.serialize |= 0x2 << exec.count * 2;
+
+		need_sync = false;
+		exec.count += 1;
+		bytecode[i++] = bc;
+	}
+
+	/* final exec cf */
+	exec.opc = EXEC_END;
+	num_cf =
+		write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec);
+
+	/* insert nop to get an even # of CFs */
+	if (num_cf % 2)
+		cfs[num_cf++] = (instr_cf_t) {
+		.opc = NOP};
+
+	/* patch cf addrs */
+	for (int idx = 0; idx < num_cf; idx++) {
+		switch (cfs[idx].opc) {
+		case NOP:
+		case ALLOC:
+			break;
+		case EXEC:
+		case EXEC_END:
+			cfs[idx].exec.address += num_cf / 2;
+			break;
+		case COND_JMP:
+			cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address];
+			break;
+		default:
+			assert(0);
+		}
+	}
+
+	/* concatenate cfs and alu/fetch */
+	uint32_t cfdwords = num_cf / 2 * 3;
+	uint32_t alufetchdwords = exec.address * 3;
+	uint32_t sizedwords = cfdwords + alufetchdwords;
+	uint32_t *dwords = malloc(sizedwords * 4);
+	assert(dwords);
+	memcpy(dwords, cfs, cfdwords * 4);
+	memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4);
+
+	/* finalize ir2_shader_info */
+	ctx->info->dwords = dwords;
+	ctx->info->sizedwords = sizedwords;
+	for (int i = 0; i < ctx->info->num_fetch_instrs; i++)
+		ctx->info->fetch_info[i].offset += cfdwords;
+
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		DBG("disassemble: type=%d", ctx->so->type);
+		disasm_a2xx(dwords, sizedwords, 0, ctx->so->type);
+	}
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,442 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "ir2_private.h"
+
+static bool scalar_possible(struct ir2_instr *instr)
+{
+	if (instr->alu.scalar_opc == SCALAR_NONE)
+		return false;
+
+	return src_ncomp(instr) == 1;
+}
+
+static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
+{
+	if (!a)
+		return true;
+
+	/* dont use same instruction twice */
+	if (a == b)
+		return false;
+
+	/* PRED_SET must be alone */
+	if (b->alu.scalar_opc >= PRED_SETEs &&
+		b->alu.scalar_opc <= PRED_SET_RESTOREs)
+		return false;
+
+	/* must write to same export (issues otherwise?) */
+	return a->alu.export == b->alu.export;
+}
+
+/* priority of vector instruction for scheduling (lower=higher prio) */
+static unsigned alu_vector_prio(struct ir2_instr *instr)
+{
+	if (instr->alu.vector_opc == VECTOR_NONE)
+		return ~0u;
+
+	if (is_export(instr))
+		return 4;
+
+	/* TODO check src type and ncomps */
+	if (instr->src_count == 3)
+		return 0;
+
+	if (!scalar_possible(instr))
+		return 1;
+
+	return instr->src_count == 2 ? 2 : 3;
+}
+
+/* priority of scalar instruction for scheduling (lower=higher prio) */
+static unsigned alu_scalar_prio(struct ir2_instr *instr)
+{
+	if (!scalar_possible(instr))
+		return ~0u;
+
+	/* this case is dealt with later */
+	if (instr->src_count > 1)
+		return ~0u;
+
+	if (is_export(instr))
+		return 4;
+
+	/* PRED to end of block */
+	if (instr->alu.scalar_opc >= PRED_SETEs &&
+		instr->alu.scalar_opc <= PRED_SET_RESTOREs)
+		return 5;
+
+	/* scalar only have highest priority */
+	return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
+}
+
+/* this is a bit messy:
+ * we want to find a slot where we can insert a scalar MOV with
+ * a vector instruction that was already scheduled
+ */
+static struct ir2_sched_instr*
+insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
+	struct ir2_src src1, unsigned *comp)
+{
+	struct ir2_sched_instr *sched = NULL, *s;
+	unsigned i, mask = 0xf;
+
+	/* go first earliest point where the mov can be inserted */
+	for (i = ctx->instr_sched_count-1; i > 0; i--) {
+		s = &ctx->instr_sched[i - 1];
+
+		if (s->instr && s->instr->block_idx != block_idx)
+			break;
+		if (s->instr_s && s->instr_s->block_idx != block_idx)
+			break;
+
+		if (src1.type == IR2_SRC_SSA) {
+			if ((s->instr && s->instr->idx == src1.num) ||
+				(s->instr_s && s->instr_s->idx == src1.num))
+				break;
+		}
+
+		unsigned mr = ~(s->reg_state[reg_idx/8] >> reg_idx%8*4 & 0xf);
+		if ((mask & mr) == 0)
+			break;
+
+		mask &= mr;
+		if (s->instr_s || s->instr->src_count == 3)
+			continue;
+
+		if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
+			continue;
+
+		sched = s;
+	}
+	*comp = ffs(mask) - 1;
+	return sched;
+}
+
+/* case1:
+ * in this case, insert a mov to place the 2nd src into to same reg
+ * (scalar sources come from the same register)
+ *
+ * this is a common case which works when one of the srcs is input/const
+ * but for instrs which have 2 ssa/reg srcs, then its not ideal
+ */
+static bool
+scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order)
+{
+	struct ir2_src src0 = instr->src[ order];
+	struct ir2_src src1 = instr->src[!order];
+	struct ir2_sched_instr *sched;
+	struct ir2_instr *ins;
+	struct ir2_reg *reg;
+	unsigned idx, comp;
+
+	switch (src0.type) {
+	case IR2_SRC_CONST:
+	case IR2_SRC_INPUT:
+		return false;
+	default:
+		break;
+	}
+
+	/* TODO, insert needs logic for this */
+	if (src1.type == IR2_SRC_REG)
+		return false;
+
+	/* we could do something if they match src1.. */
+	if (src0.negate || src0.abs)
+		return false;
+
+	reg = get_reg_src(ctx, &src0);
+
+	/* result not used more since we will overwrite */
+	for (int i = 0; i < 4; i++)
+		if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
+			return false;
+
+	/* find a place to insert the mov */
+	sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
+	if (!sched)
+		return false;
+
+	ins = &ctx->instr[idx = ctx->instr_count++];
+	ins->idx = idx;
+	ins->type = IR2_ALU;
+	ins->src[0] = src1;
+	ins->src_count = 1;
+	ins->is_ssa = true;
+	ins->ssa.idx = reg->idx;
+	ins->ssa.ncomp = 1;
+	ins->ssa.comp[0].c = comp;
+	ins->alu.scalar_opc = MAXs;
+	ins->alu.export = -1;
+	ins->alu.write_mask = 1;
+	ins->pred = instr->pred;
+	ins->block_idx = instr->block_idx;
+
+	instr->src[0] = src0;
+	instr->alu.src1_swizzle = comp;
+
+	sched->instr_s = ins;
+	return true;
+}
+
+/* fill sched with next fetch or (vector and/or scalar) alu instruction */
+static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
+{
+	struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
+	unsigned avail_count = 0;
+
+	instr_alloc_type_t export = ~0u;
+	int block_idx = -1;
+
+	/* XXX merge this loop with the other one somehow? */
+	ir2_foreach_instr(instr, ctx) {
+		if (!instr->need_emit)
+			continue;
+		if (is_export(instr))
+			export = MIN2(export, export_buf(instr->alu.export));
+	}
+
+	ir2_foreach_instr(instr, ctx) {
+		if (!instr->need_emit)
+			continue;
+
+		/* dont mix exports */
+		if (is_export(instr) && export_buf(instr->alu.export) != export)
+			continue;
+
+		if (block_idx < 0)
+			block_idx = instr->block_idx;
+		else if (block_idx != instr->block_idx || /* must be same block */
+			instr->type == IR2_CF || /* CF/MEM must be alone */
+			(is_export(instr) && export == SQ_MEMORY))
+			break;
+		/* it works because IR2_CF is always at end of block
+		 * and somewhat same idea with MEM exports, which might not be alone
+		 * but will end up in-order at least
+		 */
+
+		/* check if dependencies are satisfied */
+		bool is_ok = true;
+		ir2_foreach_src(src, instr) {
+			if (src->type == IR2_SRC_REG) {
+				/* need to check if all previous instructions in the block
+				 * which write the reg have been emitted
+				 * slow..
+				 * XXX: check components instead of whole register
+				 */
+				struct ir2_reg *reg = get_reg_src(ctx, src);
+				ir2_foreach_instr(p, ctx) {
+					if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
+						is_ok &= !p->need_emit;
+				}
+			} else if (src->type == IR2_SRC_SSA) {
+				/* in this case its easy, just check need_emit */
+				is_ok &= !ctx->instr[src->num].need_emit;
+			}
+		}
+		if (!is_ok)
+			continue;
+
+		avail[avail_count++] = instr;
+	}
+
+	if (!avail_count) {
+		assert(block_idx == -1);
+		return -1;
+	}
+
+	/* priority to FETCH instructions */
+	ir2_foreach_avail(instr) {
+		if (instr->type == IR2_ALU)
+			continue;
+
+		ra_src_free(ctx, instr);
+		ra_reg(ctx, get_reg(instr), -1, false, 0);
+
+		instr->need_emit = false;
+		sched->instr = instr;
+		sched->instr_s = NULL;
+		return block_idx;
+	}
+
+	/* TODO precompute priorities */
+
+	unsigned prio_v = ~0u, prio_s = ~0u, prio;
+	ir2_foreach_avail(instr) {
+		prio = alu_vector_prio(instr);
+		if (prio < prio_v) {
+			instr_v = instr;
+			prio_v = prio;
+		}
+	}
+
+	/* TODO can still insert scalar if src_count=3, if smart about it */
+	if (!instr_v || instr_v->src_count < 3) {
+		ir2_foreach_avail(instr) {
+			bool compat = is_alu_compatible(instr_v, instr);
+
+			prio = alu_scalar_prio(instr);
+			if (prio >= prio_v && !compat)
+				continue;
+
+			if (prio < prio_s) {
+				instr_s = instr;
+				prio_s = prio;
+				if (!compat)
+					instr_v = NULL;
+			}
+		}
+	}
+
+	assert(instr_v || instr_s);
+
+	/* now, we try more complex insertion of vector instruction as scalar
+	 * TODO: if we are smart we can still insert if instr_v->src_count==3
+	 */
+	if (!instr_s && instr_v->src_count < 3) {
+		ir2_foreach_avail(instr) {
+			if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
+				continue;
+
+			/* at this point, src_count should always be 2 */
+			assert(instr->src_count == 2);
+
+			if (scalarize_case1(ctx, instr, 0)) {
+				instr_s = instr;
+				break;
+			}
+			if (scalarize_case1(ctx, instr, 1)) {
+				instr_s = instr;
+				break;
+			}
+		}
+	}
+
+	/* free src registers */
+	if (instr_v) {
+		instr_v->need_emit = false;
+		ra_src_free(ctx, instr_v);
+	}
+
+	if (instr_s) {
+		instr_s->need_emit = false;
+		ra_src_free(ctx, instr_s);
+	}
+
+	/* allocate dst registers */
+	if (instr_v)
+		ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask);
+
+	if (instr_s)
+		ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask);
+
+	sched->instr = instr_v;
+	sched->instr_s = instr_s;
+	return block_idx;
+}
+
+/* scheduling: determine order of instructions */
+static void schedule_instrs(struct ir2_context *ctx)
+{
+	struct ir2_sched_instr *sched;
+	int block_idx;
+
+	/* allocate input registers */
+	for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
+		if (ctx->input[idx].initialized)
+			ra_reg(ctx, &ctx->input[idx], idx, false, 0);
+
+	for (;;) {
+		sched = &ctx->instr_sched[ctx->instr_sched_count++];
+		block_idx = sched_next(ctx, sched);
+		if (block_idx < 0)
+			break;
+		memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
+
+		/* catch texture fetch after scheduling and insert the
+		 * SET_TEX_LOD right before it if necessary
+		 * TODO clean this up
+		 */
+		struct ir2_instr *instr = sched->instr, *tex_lod;
+		if (instr && instr->type == IR2_FETCH &&
+			instr->fetch.opc == TEX_FETCH && instr->src_count == 2) {
+			/* generate the SET_LOD instruction */
+			tex_lod = &ctx->instr[ctx->instr_count++];
+			tex_lod->type = IR2_FETCH;
+			tex_lod->block_idx = instr->block_idx;
+			tex_lod->pred = instr->pred;
+			tex_lod->fetch.opc = TEX_SET_TEX_LOD;
+			tex_lod->src[0] = instr->src[1];
+			tex_lod->src_count = 1;
+
+			sched[1] = sched[0];
+			sched->instr = tex_lod;
+			ctx->instr_sched_count++;
+		}
+
+		bool free_block = true;
+		ir2_foreach_instr(instr, ctx)
+			free_block &= instr->block_idx != block_idx;
+		if (free_block)
+			ra_block_free(ctx, block_idx);
+	};
+	ctx->instr_sched_count--;
+}
+
+void
+ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
+		struct fd2_shader_stateobj *fp)
+{
+	struct ir2_context ctx = { };
+	bool binning = !fp && so->type == MESA_SHADER_VERTEX;
+
+	if (fp)
+		so->variant[variant].f = fp->variant[0].f;
+
+	ctx.so = so;
+	ctx.info = &so->variant[variant].info;
+	ctx.f = &so->variant[variant].f;
+	ctx.info->max_reg = -1;
+
+	/* convert nir to internal representation */
+	ir2_nir_compile(&ctx, binning);
+
+	/* copy propagate srcs */
+	cp_src(&ctx);
+
+	/* get ref_counts and kill non-needed instructions */
+	ra_count_refs(&ctx);
+
+	/* remove movs used to write outputs */
+	cp_export(&ctx);
+
+	/* instruction order.. and vector->scalar conversions */
+	schedule_instrs(&ctx);
+
+	/* finally, assemble to bitcode */
+	assemble(&ctx, binning);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_cp.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_cp.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_cp.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_cp.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "ir2_private.h"
+
+static bool is_mov(struct ir2_instr *instr)
+{
+	return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
+		instr->src_count == 1;
+}
+
+static void src_combine(struct ir2_src *src, struct ir2_src b)
+{
+	src->num = b.num;
+	src->type = b.type;
+	src->swizzle = swiz_merge(b.swizzle, src->swizzle);
+	if (!src->abs) /* if we have abs we don't care about previous negate */
+		src->negate ^= b.negate;
+	src->abs |= b.abs;
+}
+
+/* cp_src: replace src regs when they refer to a mov instruction
+ * example:
+ *	ALU:      MAXv    R7 = C7, C7
+ *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
+ * becomes:
+ *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
+ */
+void cp_src(struct ir2_context *ctx)
+{
+	struct ir2_instr *p;
+
+	ir2_foreach_instr(instr, ctx) {
+		ir2_foreach_src(src, instr) {
+			/* loop to replace recursively */
+			do {
+				if (src->type != IR2_SRC_SSA)
+					break;
+
+				p = &ctx->instr[src->num];
+				/* don't work across blocks to avoid possible issues */
+				if (p->block_idx != instr->block_idx)
+					break;
+
+				if (!is_mov(p))
+					break;
+
+				/* cant apply abs to const src, const src only for alu */
+				if (p->src[0].type == IR2_SRC_CONST &&
+					(src->abs || instr->type != IR2_ALU))
+					break;
+
+				src_combine(src, p->src[0]);
+			} while (1);
+		}
+	}
+}
+
+/* cp_export: replace mov to export when possible
+ * in the cp_src pass we bypass any mov instructions related
+ * to the src registers, but for exports for need something different
+ * example:
+ *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
+ *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
+ *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
+ * becomes:
+ *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
+ *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
+ *
+ */
+void cp_export(struct ir2_context *ctx)
+{
+	struct ir2_instr *c[4], *ins[4];
+	struct ir2_src *src;
+	struct ir2_reg *reg;
+	unsigned ncomp;
+
+	ir2_foreach_instr(instr, ctx) {
+		if (!is_export(instr)) /* TODO */
+			continue;
+
+		if (!is_mov(instr))
+			continue;
+
+		src = &instr->src[0];
+
+		if (src->negate || src->abs) /* TODO handle these cases */
+			continue;
+
+		if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
+			continue;
+
+		reg = get_reg_src(ctx, src);
+		ncomp = dst_ncomp(instr);
+
+		unsigned reswiz[4] = {};
+		unsigned num_instr = 0;
+
+		/* fill array c with pointers to instrs that write each component */
+		if (src->type == IR2_SRC_SSA) {
+			struct ir2_instr *instr = &ctx->instr[src->num];
+
+			if (instr->type != IR2_ALU)
+				continue;
+
+			for (int i = 0; i < ncomp; i++)
+				c[i] = instr;
+
+			ins[num_instr++] = instr;
+			reswiz[0] = src->swizzle;
+		} else {
+			bool ok = true;
+			unsigned write_mask = 0;
+
+			ir2_foreach_instr(instr, ctx) {
+				if (instr->is_ssa || instr->reg != reg)
+					continue;
+
+				/* set by non-ALU */
+				if (instr->type != IR2_ALU) {
+					ok = false;
+					break;
+				}
+
+				/* component written more than once */
+				if (write_mask & instr->alu.write_mask) {
+					ok = false;
+					break;
+				}
+
+				write_mask |= instr->alu.write_mask;
+
+				/* src pointers for components */
+				for (int i = 0, j = 0; i < 4; i++) {
+					unsigned k = swiz_get(src->swizzle, i);
+					if (instr->alu.write_mask & 1 << k) {
+						c[i] = instr;
+
+						/* reswiz = compressed src->swizzle */
+						unsigned x = 0;
+						for (int i = 0; i < k; i++)
+							x += !!(instr->alu.write_mask & 1 << i);
+
+						assert(src->swizzle || x == j);
+						reswiz[num_instr] |= swiz_set(x, j++);
+					}
+				}
+				ins[num_instr++] = instr;
+			}
+			if (!ok)
+				continue;
+		}
+
+		bool redirect = true;
+
+		/* must all be in same block */
+		for (int i = 0; i < ncomp; i++)
+			redirect &= (c[i]->block_idx == instr->block_idx);
+
+		/* no other instr using the value */
+		ir2_foreach_instr(p, ctx) {
+			if (p == instr)
+				continue;
+			ir2_foreach_src(src, p)
+				redirect &= reg != get_reg_src(ctx, src);
+		}
+
+		if (!redirect)
+			continue;
+
+		/* redirect the instructions writing to the register */
+		for (int i = 0; i < num_instr; i++) {
+			struct ir2_instr *p = ins[i];
+
+			p->alu.export = instr->alu.export;
+			p->alu.write_mask = 0;
+			p->is_ssa = true;
+			p->ssa.ncomp = 0;
+			memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
+
+			switch (instr->alu.vector_opc) {
+			case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
+			case DOT2ADDv:
+			case DOT3v:
+			case DOT4v:
+			case CUBEv:
+				continue;
+			default:
+				break;
+			}
+			ir2_foreach_src(s, p)
+				swiz_merge_p(&s->swizzle, reswiz[i]);
+		}
+
+		for (int i = 0; i < ncomp; i++) {
+			c[i]->alu.write_mask |= (1 << i);
+			c[i]->ssa.ncomp++;
+		}
+		instr->type = IR2_NONE;
+		instr->need_emit = false;
+	}
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#ifndef IR2_H_
+#define IR2_H_
+
+#include "compiler/nir/nir.h"
+
+struct ir2_fetch_info {
+	/* dword offset of the fetch instruction */
+	uint16_t offset;
+	union {
+		/* swizzle to merge with tgsi swizzle */
+		struct {
+			uint16_t dst_swiz;
+		} vtx;
+		/* sampler id to patch const_idx */
+		struct {
+			uint16_t samp_id;
+			uint8_t src_swiz;
+		} tex;
+	};
+};
+
+struct ir2_shader_info {
+	/* compiler shader */
+	uint32_t *dwords;
+
+	/* size of the compiled shader in dwords */
+	uint16_t sizedwords;
+
+	/* highest GPR # used by shader */
+	int8_t max_reg;
+
+	/* offset in dwords of first MEMORY export CF (for a20x hw binning) */
+	int16_t mem_export_ptr;
+
+	/* fetch instruction info for patching */
+	uint16_t num_fetch_instrs;
+	struct ir2_fetch_info fetch_info[64];
+};
+
+struct ir2_frag_linkage {
+	unsigned inputs_count;
+	struct {
+		uint8_t slot;
+		uint8_t ncomp;
+	} inputs[16];
+
+	/* driver_location of fragcoord.zw, -1 if not used */
+	int fragcoord;
+};
+
+struct ir2_shader_variant {
+	struct ir2_shader_info info;
+	struct ir2_frag_linkage f;
+};
+
+struct fd2_shader_stateobj;
+struct tgsi_token;
+
+void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
+		struct fd2_shader_stateobj *fp);
+
+struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens);
+
+const nir_shader_compiler_options *ir2_get_compiler_options(void);
+
+int ir2_optimize_nir(nir_shader *s, bool lower);
+
+#endif							/* IR2_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1174 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "ir2_private.h"
+#include "nir/tgsi_to_nir.h"
+
+#include "freedreno_util.h"
+#include "fd2_program.h"
+
+static const nir_shader_compiler_options options = {
+	.lower_fpow = true,
+	.lower_flrp32 = true,
+	.lower_fmod32 = true,
+	.lower_fdiv = true,
+	.lower_fceil = true,
+	.fuse_ffma = true,
+	/* .fdot_replicates = true, it is replicated, but it makes things worse */
+	.lower_all_io_to_temps = true,
+	.vertex_id_zero_based = true, /* its not implemented anyway */
+};
+
+struct nir_shader *
+ir2_tgsi_to_nir(const struct tgsi_token *tokens)
+{
+	return tgsi_to_nir(tokens, &options);
+}
+
+const nir_shader_compiler_options *
+ir2_get_compiler_options(void)
+{
+	return &options;
+}
+
+#define OPT(nir, pass, ...) ({                             \
+   bool this_progress = false;                             \
+   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
+   this_progress;                                          \
+})
+#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
+
+static void
+ir2_optimize_loop(nir_shader *s)
+{
+	bool progress;
+	do {
+		progress = false;
+
+		OPT_V(s, nir_lower_vars_to_ssa);
+		progress |= OPT(s, nir_opt_copy_prop_vars);
+		progress |= OPT(s, nir_copy_prop);
+		progress |= OPT(s, nir_opt_dce);
+		progress |= OPT(s, nir_opt_cse);
+		/* progress |= OPT(s, nir_opt_gcm, true); */
+		progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true);
+		progress |= OPT(s, nir_opt_intrinsics);
+		progress |= OPT(s, nir_opt_algebraic);
+		progress |= OPT(s, nir_opt_constant_folding);
+		progress |= OPT(s, nir_opt_dead_cf);
+		if (OPT(s, nir_opt_trivial_continues)) {
+			progress |= true;
+			/* If nir_opt_trivial_continues makes progress, then we need to clean
+			 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
+			 * to make progress.
+			 */
+			OPT(s, nir_copy_prop);
+			OPT(s, nir_opt_dce);
+		}
+		progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+		progress |= OPT(s, nir_opt_if);
+		progress |= OPT(s, nir_opt_remove_phis);
+		progress |= OPT(s, nir_opt_undef);
+
+	}
+	while (progress);
+}
+
+/* trig workarounds is the same as ir3.. but we don't want to include ir3 */
+bool ir3_nir_apply_trig_workarounds(nir_shader * shader);
+
+int
+ir2_optimize_nir(nir_shader *s, bool lower)
+{
+	struct nir_lower_tex_options tex_options = {
+		.lower_txp = ~0u,
+		.lower_rect = 0,
+	};
+
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(s, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	OPT_V(s, nir_opt_global_to_local);
+	OPT_V(s, nir_lower_regs_to_ssa);
+	OPT_V(s, nir_lower_vars_to_ssa);
+	OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
+
+	if (lower) {
+		OPT_V(s, ir3_nir_apply_trig_workarounds);
+		OPT_V(s, nir_lower_tex, &tex_options);
+	}
+
+	ir2_optimize_loop(s);
+
+	OPT_V(s, nir_remove_dead_variables, nir_var_function_temp);
+	OPT_V(s, nir_move_load_const);
+
+	/* TODO we dont want to get shaders writing to depth for depth textures */
+	if (s->info.stage == MESA_SHADER_FRAGMENT) {
+		nir_foreach_variable(var, &s->outputs) {
+			if (var->data.location == FRAG_RESULT_DEPTH)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+static struct ir2_src
+load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
+{
+	struct fd2_shader_stateobj *so = ctx->so;
+	unsigned imm_ncomp, swiz, idx, i, j;
+	uint32_t *value = (uint32_t*) value_f;
+
+	/* try to merge with existing immediate (TODO: try with neg) */
+	for (idx = 0; idx < so->num_immediates; idx++) {
+		swiz = 0;
+		imm_ncomp = so->immediates[idx].ncomp;
+		for (i = 0; i < ncomp; i++) {
+			for (j = 0; j < imm_ncomp; j++) {
+				if (value[i] == so->immediates[idx].val[j])
+					break;
+			}
+			if (j == imm_ncomp) {
+				if (j == 4)
+					break;
+				so->immediates[idx].val[imm_ncomp++] = value[i];
+			}
+			swiz |= swiz_set(j, i);
+		}
+		/* matched all components */
+		if (i == ncomp)
+			break;
+	}
+
+	/* need to allocate new immediate */
+	if (idx == so->num_immediates) {
+		swiz = 0;
+		imm_ncomp = 0;
+		for (i = 0; i < ncomp; i++) {
+			for (j = 0; j < imm_ncomp; j++) {
+				if (value[i] == ctx->so->immediates[idx].val[j])
+					break;
+			}
+			if (j == imm_ncomp) {
+				so->immediates[idx].val[imm_ncomp++] = value[i];
+			}
+			swiz |= swiz_set(j, i);
+		}
+		so->num_immediates++;
+	}
+	so->immediates[idx].ncomp = imm_ncomp;
+
+	if (ncomp == 1)
+		swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
+
+	return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
+}
+
+struct ir2_src
+ir2_zero(struct ir2_context *ctx)
+{
+	return load_const(ctx, (float[]) {0.0f}, 1);
+}
+
+static void
+update_range(struct ir2_context *ctx, struct ir2_reg *reg)
+{
+	if (!reg->initialized) {
+		reg->initialized = true;
+		reg->loop_depth = ctx->loop_depth;
+	}
+
+	if (ctx->loop_depth > reg->loop_depth) {
+		reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
+	} else {
+		reg->loop_depth = ctx->loop_depth;
+		reg->block_idx_free = -1;
+	}
+
+	/* for regs we want to free at the end of the loop in any case
+	 * XXX dont do this for ssa
+	 */
+	if (reg->loop_depth)
+		reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
+}
+
+static struct ir2_src
+make_src(struct ir2_context *ctx, nir_src src)
+{
+	struct ir2_src res = {};
+	struct ir2_reg *reg;
+
+	nir_const_value *const_value = nir_src_as_const_value(src);
+
+	if (const_value) {
+		assert(src.is_ssa);
+		return load_const(ctx, &const_value->f32[0], src.ssa->num_components);
+	}
+
+	if (!src.is_ssa) {
+		res.num = src.reg.reg->index;
+		res.type = IR2_SRC_REG;
+		reg = &ctx->reg[res.num];
+	} else {
+		assert(ctx->ssa_map[src.ssa->index] >= 0);
+		res.num = ctx->ssa_map[src.ssa->index];
+		res.type = IR2_SRC_SSA;
+		reg = &ctx->instr[res.num].ssa;
+	}
+
+	update_range(ctx, reg);
+	return res;
+}
+
+static void
+set_index(struct ir2_context *ctx, nir_dest * dst,
+		  struct ir2_instr *instr)
+{
+	struct ir2_reg *reg = &instr->ssa;
+
+	if (dst->is_ssa) {
+		ctx->ssa_map[dst->ssa.index] = instr->idx;
+	} else {
+		assert(instr->is_ssa);
+		reg = &ctx->reg[dst->reg.reg->index];
+
+		instr->is_ssa = false;
+		instr->reg = reg;
+	}
+	update_range(ctx, reg);
+}
+
+static struct ir2_instr *
+ir2_instr_create(struct ir2_context *ctx, int type)
+{
+	struct ir2_instr *instr;
+
+	instr = &ctx->instr[ctx->instr_count++];
+	instr->idx = ctx->instr_count - 1;
+	instr->type = type;
+	instr->block_idx = ctx->block_idx;
+	instr->pred = ctx->pred;
+	instr->is_ssa = true;
+	return instr;
+}
+
+static struct ir2_instr *
+instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
+{
+	/* emit_alu will fixup instrs that don't map directly */
+	static const struct ir2_opc {
+		int8_t scalar, vector;
+	} nir_ir2_opc[nir_num_opcodes+1] = {
+		[0 ... nir_num_opcodes - 1] = {-1, -1},
+
+		[nir_op_fmov] = {MAXs, MAXv},
+		[nir_op_fsign] = {-1, CNDGTEv},
+		[nir_op_fnot] = {SETEs, SETEv},
+		[nir_op_for] = {MAXs, MAXv},
+		[nir_op_fand] = {MINs, MINv},
+		[nir_op_fxor] = {-1, SETNEv},
+		[nir_op_fadd] = {ADDs, ADDv},
+		[nir_op_fsub] = {ADDs, ADDv},
+		[nir_op_fmul] = {MULs, MULv},
+		[nir_op_ffma] = {-1, MULADDv},
+		[nir_op_fmax] = {MAXs, MAXv},
+		[nir_op_fmin] = {MINs, MINv},
+		[nir_op_ffloor] = {FLOORs, FLOORv},
+		[nir_op_ffract] = {FRACs, FRACv},
+		[nir_op_ftrunc] = {TRUNCs, TRUNCv},
+		[nir_op_fdot2] = {-1, DOT2ADDv},
+		[nir_op_fdot3] = {-1, DOT3v},
+		[nir_op_fdot4] = {-1, DOT4v},
+		[nir_op_sge] = {-1, SETGTEv},
+		[nir_op_slt] = {-1, SETGTv},
+		[nir_op_sne] = {-1, SETNEv},
+		[nir_op_seq] = {-1, SETEv},
+		[nir_op_fcsel] = {-1, CNDEv},
+		[nir_op_frsq] = {RECIPSQ_IEEE, -1},
+		[nir_op_frcp] = {RECIP_IEEE, -1},
+		[nir_op_flog2] = {LOG_IEEE, -1},
+		[nir_op_fexp2] = {EXP_IEEE, -1},
+		[nir_op_fsqrt] = {SQRT_IEEE, -1},
+		[nir_op_fcos] = {COS, -1},
+		[nir_op_fsin] = {SIN, -1},
+		/* no fsat, fneg, fabs since source mods deal with those */
+
+		/* some nir passes still generate nir_op_imov */
+		[nir_op_imov] = {MAXs, MAXv},
+
+		/* so we can use this function with non-nir op */
+#define ir2_op_cube nir_num_opcodes
+		[ir2_op_cube] = {-1, CUBEv},
+	};
+
+	struct ir2_opc op = nir_ir2_opc[opcode];
+	assert(op.vector >= 0 || op.scalar >= 0);
+
+	struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
+	instr->alu.vector_opc = op.vector;
+	instr->alu.scalar_opc = op.scalar;
+	instr->alu.export = -1;
+	instr->alu.write_mask = (1 << ncomp) - 1;
+	instr->src_count = opcode == ir2_op_cube ? 2 :
+		nir_op_infos[opcode].num_inputs;
+	instr->ssa.ncomp = ncomp;
+	return instr;
+}
+
+static struct ir2_instr *
+instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode,
+		uint8_t write_mask, struct ir2_instr *share_reg)
+{
+	struct ir2_instr *instr;
+	struct ir2_reg *reg;
+
+	reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
+	reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
+
+	instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
+	instr->alu.write_mask = write_mask;
+	instr->reg = reg;
+	instr->is_ssa = false;
+	return instr;
+}
+
+
+static struct ir2_instr *
+instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_dest *dst)
+{
+	struct ir2_instr *instr;
+	instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst));
+	set_index(ctx, dst, instr);
+	return instr;
+}
+
+static struct ir2_instr *
+ir2_instr_create_fetch(struct ir2_context *ctx, nir_dest *dst,
+		instr_fetch_opc_t opc)
+{
+	struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
+	instr->fetch.opc = opc;
+	instr->src_count = 1;
+	instr->ssa.ncomp = nir_dest_num_components(*dst);
+	set_index(ctx, dst, instr);
+	return instr;
+}
+
+static struct ir2_src
+make_src_noconst(struct ir2_context *ctx, nir_src src)
+{
+	struct ir2_instr *instr;
+
+	if (nir_src_as_const_value(src)) {
+		assert(src.is_ssa);
+		instr = instr_create_alu(ctx, nir_op_fmov, src.ssa->num_components);
+		instr->src[0] = make_src(ctx, src);
+		return ir2_src(instr->idx, 0, IR2_SRC_SSA);
+	}
+
+	return make_src(ctx, src);
+}
+
+static void
+emit_alu(struct ir2_context *ctx, nir_alu_instr * alu)
+{
+	const nir_op_info *info = &nir_op_infos[alu->op];
+	nir_dest *dst = &alu->dest.dest;
+	struct ir2_instr *instr;
+	struct ir2_src tmp;
+	unsigned ncomp;
+
+	/* get the number of dst components */
+	if (dst->is_ssa) {
+		ncomp = dst->ssa.num_components;
+	} else {
+		ncomp = 0;
+		for (int i = 0; i < 4; i++)
+			ncomp += !!(alu->dest.write_mask & 1 << i);
+	}
+
+	instr = instr_create_alu(ctx, alu->op, ncomp);
+	set_index(ctx, dst, instr);
+	instr->alu.saturate = alu->dest.saturate;
+	instr->alu.write_mask = alu->dest.write_mask;
+
+	for (int i = 0; i < info->num_inputs; i++) {
+		nir_alu_src *src = &alu->src[i];
+
+		/* compress swizzle with writemask when applicable */
+		unsigned swiz = 0, j = 0;
+		for (int i = 0; i < 4; i++) {
+			if (!(alu->dest.write_mask & 1 << i) && !info->output_size)
+				continue;
+			swiz |= swiz_set(src->swizzle[i], j++);
+		}
+
+		instr->src[i] = make_src(ctx, src->src);
+		instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
+		instr->src[i].negate = src->negate;
+		instr->src[i].abs = src->abs;
+	}
+
+	/* workarounds for NIR ops that don't map directly to a2xx ops */
+	switch (alu->op) {
+	case nir_op_slt:
+		tmp = instr->src[0];
+		instr->src[0] = instr->src[1];
+		instr->src[1] = tmp;
+		break;
+	case nir_op_fcsel:
+		tmp = instr->src[1];
+		instr->src[1] = instr->src[2];
+		instr->src[2] = tmp;
+		break;
+	case nir_op_fsub:
+		instr->src[1].negate = !instr->src[1].negate;
+		break;
+	case nir_op_fdot2:
+		instr->src_count = 3;
+		instr->src[2] = ir2_zero(ctx);
+		break;
+	case nir_op_fsign: {
+		/* we need an extra instruction to deal with the zero case */
+		struct ir2_instr *tmp;
+
+		/* tmp = x == 0 ? 0 : 1 */
+		tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
+		tmp->src[0] = instr->src[0];
+		tmp->src[1] = ir2_zero(ctx);
+		tmp->src[2] = load_const(ctx, (float[]) {1.0f}, 1);
+
+		/* result = x >= 0 ? tmp : -tmp */
+		instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
+		instr->src[2] = instr->src[1];
+		instr->src[2].negate = true;
+		instr->src_count = 3;
+	} break;
+	default:
+		break;
+	}
+}
+
+static void
+load_input(struct ir2_context *ctx, nir_dest *dst, unsigned idx)
+{
+	struct ir2_instr *instr;
+	int slot = -1;
+
+	if (ctx->so->type == MESA_SHADER_VERTEX) {
+		instr = ir2_instr_create_fetch(ctx, dst, 0);
+		instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
+		instr->fetch.vtx.const_idx = 20 + (idx / 3);
+		instr->fetch.vtx.const_idx_sel = idx % 3;
+		return;
+	}
+
+	/* get slot from idx */
+	nir_foreach_variable(var, &ctx->nir->inputs) {
+		if (var->data.driver_location == idx) {
+			slot = var->data.location;
+			break;
+		}
+	}
+	assert(slot >= 0);
+
+	switch (slot) {
+	case VARYING_SLOT_PNTC:
+		/* need to extract with abs and invert y */
+		instr = instr_create_alu_dest(ctx, nir_op_ffma, dst);
+		instr->src[0] = ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
+		instr->src[0].abs = true;
+		instr->src[1] = load_const(ctx, (float[]) {1.0f, -1.0f}, 2);
+		instr->src[2] = load_const(ctx, (float[]) {0.0f, 1.0f}, 2);
+		break;
+	case VARYING_SLOT_POS:
+		/* need to extract xy with abs and add tile offset on a20x
+		 * zw from fragcoord input (w inverted in fragment shader)
+		 * TODO: only components that are required by fragment shader
+		 */
+		instr = instr_create_alu_reg(ctx,
+			ctx->so->is_a20x ? nir_op_fadd : nir_op_fmov, 3, NULL);
+		instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
+		instr->src[0].abs = true;
+		/* on a20x, C64 contains the tile offset */
+		instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
+
+		instr = instr_create_alu_reg(ctx, nir_op_fmov, 4, instr);
+		instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
+
+		instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
+		instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
+
+		unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
+		instr = instr_create_alu_dest(ctx, nir_op_fmov, dst);
+		instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
+		break;
+	default:
+		instr = instr_create_alu_dest(ctx, nir_op_fmov, dst);
+		instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
+		break;
+	}
+}
+
+static unsigned
+output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
+{
+	int slot = -1;
+	unsigned idx = nir_intrinsic_base(intr);
+	nir_foreach_variable(var, &ctx->nir->outputs) {
+		if (var->data.driver_location == idx) {
+			slot = var->data.location;
+			break;
+		}
+	}
+	assert(slot != -1);
+	return slot;
+}
+
+static void
+store_output(struct ir2_context *ctx, nir_src src, unsigned slot, unsigned ncomp)
+{
+	struct ir2_instr *instr;
+	unsigned idx = 0;
+
+	if (ctx->so->type == MESA_SHADER_VERTEX) {
+		switch (slot) {
+		case VARYING_SLOT_POS:
+			ctx->position = make_src(ctx, src);
+			idx = 62;
+			break;
+		case VARYING_SLOT_PSIZ:
+			ctx->so->writes_psize = true;
+			idx = 63;
+			break;
+		default:
+			/* find matching slot from fragment shader input */
+			for (idx = 0; idx < ctx->f->inputs_count; idx++)
+				if (ctx->f->inputs[idx].slot == slot)
+					break;
+			if (idx == ctx->f->inputs_count)
+				return;
+		}
+	} else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
+		/* only color output is implemented */
+		return;
+	}
+
+	instr = instr_create_alu(ctx, nir_op_fmov, ncomp);
+	instr->src[0] = make_src(ctx, src);
+	instr->alu.export = idx;
+}
+
+static void
+emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir2_instr *instr;
+	nir_const_value *const_offset;
+	nir_deref_instr *deref;
+	unsigned idx;
+
+	switch (intr->intrinsic) {
+	case nir_intrinsic_load_input:
+		load_input(ctx, &intr->dest, nir_intrinsic_base(intr));
+		break;
+	case nir_intrinsic_store_output:
+		store_output(ctx, intr->src[0], output_slot(ctx, intr), intr->num_components);
+		break;
+	case nir_intrinsic_load_deref:
+		deref = nir_src_as_deref(intr->src[0]);
+		assert(deref->deref_type == nir_deref_type_var);
+		load_input(ctx, &intr->dest, deref->var->data.driver_location);
+		break;
+	case nir_intrinsic_store_deref:
+		deref = nir_src_as_deref(intr->src[0]);
+		assert(deref->deref_type == nir_deref_type_var);
+		store_output(ctx, intr->src[1], deref->var->data.location, intr->num_components);
+		break;
+	case nir_intrinsic_load_uniform:
+		const_offset = nir_src_as_const_value(intr->src[0]);
+		assert(const_offset); /* TODO can be false in ES2? */
+		idx = nir_intrinsic_base(intr);
+		idx += (uint32_t) nir_src_as_const_value(intr->src[0])->f32[0];
+		instr = instr_create_alu_dest(ctx, nir_op_fmov, &intr->dest);
+		instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
+		break;
+	case nir_intrinsic_discard:
+	case nir_intrinsic_discard_if:
+		instr = ir2_instr_create(ctx, IR2_ALU);
+		instr->alu.vector_opc = VECTOR_NONE;
+		if (intr->intrinsic == nir_intrinsic_discard_if) {
+			instr->alu.scalar_opc = KILLNEs;
+			instr->src[0] = make_src(ctx, intr->src[0]);
+		} else {
+			instr->alu.scalar_opc = KILLEs;
+			instr->src[0] = ir2_zero(ctx);
+		}
+		instr->alu.export = -1;
+		instr->src_count = 1;
+		ctx->so->has_kill = true;
+		break;
+	case nir_intrinsic_load_front_face:
+		/* gl_FrontFacing is in the sign of param.x
+		 * rcp required because otherwise we can't differentiate -0.0 and +0.0
+		 */
+		ctx->so->need_param = true;
+
+		struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
+		tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
+
+		instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest);
+		instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
+		instr->src[1] = ir2_zero(ctx);
+		break;
+	default:
+		compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
+		break;
+	}
+}
+
+static void
+emit_tex(struct ir2_context *ctx, nir_tex_instr * tex)
+{
+	bool is_rect = false, is_cube = false;
+	struct ir2_instr *instr;
+	nir_src *coord, *lod_bias;
+
+	coord = lod_bias = NULL;
+
+	for (unsigned i = 0; i < tex->num_srcs; i++) {
+		switch (tex->src[i].src_type) {
+		case nir_tex_src_coord:
+			coord = &tex->src[i].src;
+			break;
+		case nir_tex_src_bias:
+		case nir_tex_src_lod:
+			assert(!lod_bias);
+			lod_bias = &tex->src[i].src;
+			break;
+		default:
+			compile_error(ctx, "Unhandled NIR tex src type: %d\n",
+						  tex->src[i].src_type);
+			return;
+		}
+	}
+
+	switch (tex->op) {
+	case nir_texop_tex:
+	case nir_texop_txb:
+	case nir_texop_txl:
+		break;
+	default:
+		compile_error(ctx, "unimplemented texop %d\n", tex->op);
+		return;
+	}
+
+	switch (tex->sampler_dim) {
+	case GLSL_SAMPLER_DIM_2D:
+		break;
+	case GLSL_SAMPLER_DIM_RECT:
+		is_rect = true;
+		break;
+	case GLSL_SAMPLER_DIM_CUBE:
+		is_cube = true;
+		break;
+	default:
+		compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
+		return;
+	}
+
+	struct ir2_src src_coord = make_src_noconst(ctx, *coord);
+
+	/* for cube maps
+	 * tmp = cube(coord)
+	 * tmp.xy = tmp.xy / |tmp.z| + 1.5
+	 * coord = tmp.xyw
+	 */
+	if (is_cube) {
+		struct ir2_instr *rcp, *coord_xy;
+		unsigned reg_idx;
+
+		instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
+		instr->src[0] = src_coord;
+		instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
+		instr->src[1] = src_coord;
+		instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
+
+		reg_idx = instr->reg - ctx->reg; /* hacky */
+
+		rcp = instr_create_alu(ctx, nir_op_frcp, 1);
+		rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
+		rcp->src[0].abs = true;
+
+		coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
+		coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
+		coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+		coord_xy->src[2] = load_const(ctx, (float[]) {1.5f}, 1);
+
+		src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
+		/* TODO: lod/bias transformed by src_coord.z ? */
+	}
+
+	instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH);
+	instr->src[0] = src_coord;
+	instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_XYW : 0;
+	instr->fetch.tex.is_cube = is_cube;
+	instr->fetch.tex.is_rect = is_rect;
+	instr->fetch.tex.samp_id = tex->sampler_index;
+
+	/* for lod/bias, we insert an extra src for the backend to deal with */
+	if (lod_bias) {
+		instr->src[1] = make_src_noconst(ctx, *lod_bias);
+		/* backend will use 2-3 components so apply swizzle */
+		swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
+		instr->src_count = 2;
+	}
+}
+
+static void
+setup_input(struct ir2_context *ctx, nir_variable * in)
+{
+	struct fd2_shader_stateobj *so = ctx->so;
+	unsigned array_len = MAX2(glsl_get_length(in->type), 1);
+	unsigned n = in->data.driver_location;
+	unsigned slot = in->data.location;
+
+	assert(array_len == 1);
+
+	/* handle later */
+	if (ctx->so->type == MESA_SHADER_VERTEX)
+		return;
+
+	if (ctx->so->type != MESA_SHADER_FRAGMENT)
+		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
+
+	if (slot == VARYING_SLOT_PNTC) {
+		so->need_param = true;
+		return;
+	}
+
+	n = ctx->f->inputs_count++;
+
+	/* half of fragcoord from param reg, half from a varying */
+	if (slot == VARYING_SLOT_POS) {
+		ctx->f->fragcoord = n;
+		so->need_param = true;
+	}
+
+	ctx->f->inputs[n].slot = slot;
+	ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
+
+	/* in->data.interpolation?
+	 * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
+	 */
+}
+
+static void
+emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr * undef)
+{
+	/* TODO we don't want to emit anything for undefs */
+
+	struct ir2_instr *instr;
+
+	instr = instr_create_alu_dest(ctx, nir_op_fmov,
+		&(nir_dest) {.ssa = undef->def,.is_ssa = true});
+	instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
+}
+
+static void
+emit_instr(struct ir2_context *ctx, nir_instr * instr)
+{
+	switch (instr->type) {
+	case nir_instr_type_alu:
+		emit_alu(ctx, nir_instr_as_alu(instr));
+		break;
+	case nir_instr_type_deref:
+		/* ignored, handled as part of the intrinsic they are src to */
+		break;
+	case nir_instr_type_intrinsic:
+		emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+		break;
+	case nir_instr_type_load_const:
+		/* dealt with when using nir_src */
+		break;
+	case nir_instr_type_tex:
+		emit_tex(ctx, nir_instr_as_tex(instr));
+		break;
+	case nir_instr_type_jump:
+		ctx->block_has_jump[ctx->block_idx] = true;
+		break;
+	case nir_instr_type_ssa_undef:
+		emit_undef(ctx, nir_instr_as_ssa_undef(instr));
+		break;
+	default:
+		break;
+	}
+}
+
+/* fragcoord.zw and a20x hw binning outputs */
+static void
+extra_position_exports(struct ir2_context *ctx, bool binning)
+{
+	struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
+
+	if (ctx->f->fragcoord < 0 && !binning)
+		return;
+
+	instr = instr_create_alu(ctx, nir_op_fmax, 1);
+	instr->src[0] = ctx->position;
+	instr->src[0].swizzle = IR2_SWIZZLE_W;
+	instr->src[1] = ir2_zero(ctx);
+
+	rcp = instr_create_alu(ctx, nir_op_frcp, 1);
+	rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
+
+	sc = instr_create_alu(ctx, nir_op_fmul, 4);
+	sc->src[0] = ctx->position;
+	sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+
+	wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
+	wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
+	wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
+	wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
+
+	/* fragcoord z/w */
+	if (ctx->f->fragcoord >= 0 && !binning) {
+		instr = instr_create_alu(ctx, nir_op_fmov, 1);
+		instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
+		instr->alu.export = ctx->f->fragcoord;
+
+		instr = instr_create_alu(ctx, nir_op_fmov, 1);
+		instr->src[0] = ctx->position;
+		instr->src[0].swizzle = IR2_SWIZZLE_W;
+		instr->alu.export = ctx->f->fragcoord;
+		instr->alu.write_mask = 2;
+	}
+
+	if (!binning)
+		return;
+
+	off = instr_create_alu(ctx, nir_op_fadd, 1);
+	off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
+	off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
+
+	/* 8 max set in freedreno_screen.. unneeded instrs patched out */
+	for (int i = 0; i < 8; i++) {
+		instr = instr_create_alu(ctx, nir_op_ffma, 4);
+		instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
+		instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
+		instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
+		instr->alu.export = 32;
+
+		instr = instr_create_alu(ctx, nir_op_ffma, 4);
+		instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
+		instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
+		instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
+		instr->alu.export = 33;
+	}
+}
+
+static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
+
+static bool
+emit_block(struct ir2_context *ctx, nir_block * block)
+{
+	struct ir2_instr *instr;
+	nir_block *succs = block->successors[0];
+
+	ctx->block_idx = block->index;
+
+	nir_foreach_instr(instr, block)
+		emit_instr(ctx, instr);
+
+	if (!succs || !succs->index)
+		return false;
+
+	/* we want to be smart and always jump and have the backend cleanup
+	 * but we are not, so there are two cases where jump is needed:
+	 *  loops (succs index lower)
+	 *  jumps (jump instruction seen in block)
+	 */
+	if (succs->index > block->index && !ctx->block_has_jump[block->index])
+		return false;
+
+	assert(block->successors[1] == NULL);
+
+	instr = ir2_instr_create(ctx, IR2_CF);
+	instr->cf.block_idx = succs->index;
+	/* XXX can't jump to a block with different predicate */
+	return true;
+}
+
+static void
+emit_if(struct ir2_context *ctx, nir_if * nif)
+{
+	unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
+	struct ir2_instr *instr;
+
+	/* XXX: blob seems to always use same register for condition */
+
+	instr = ir2_instr_create(ctx, IR2_ALU);
+	instr->src[0] = make_src(ctx, nif->condition);
+	instr->src_count = 1;
+	instr->ssa.ncomp = 1;
+	instr->alu.vector_opc = VECTOR_NONE;
+	instr->alu.scalar_opc = SCALAR_NONE;
+	instr->alu.export = -1;
+	instr->alu.write_mask = 1;
+	instr->pred = 0;
+
+	/* if nested, use PRED_SETNE_PUSHv */
+	if (pred) {
+		instr->alu.vector_opc = PRED_SETNE_PUSHv;
+		instr->src[1] = instr->src[0];
+		instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
+		instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
+		instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
+		instr->src_count = 2;
+	} else {
+		instr->alu.scalar_opc = PRED_SETNEs;
+	}
+
+	ctx->pred_idx = instr->idx;
+	ctx->pred = 3;
+
+	emit_cf_list(ctx, &nif->then_list);
+
+	/* TODO: if these is no else branch we don't need this
+	 * and if the else branch is simple, can just flip ctx->pred instead
+	 */
+	instr = ir2_instr_create(ctx, IR2_ALU);
+	instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
+	instr->src_count = 1;
+	instr->ssa.ncomp = 1;
+	instr->alu.vector_opc = VECTOR_NONE;
+	instr->alu.scalar_opc = PRED_SET_INVs;
+	instr->alu.export = -1;
+	instr->alu.write_mask = 1;
+	instr->pred = 0;
+	ctx->pred_idx = instr->idx;
+
+	emit_cf_list(ctx, &nif->else_list);
+
+	/* restore predicate for nested predicates */
+	if (pred) {
+		instr = ir2_instr_create(ctx, IR2_ALU);
+		instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
+		instr->src_count = 1;
+		instr->ssa.ncomp = 1;
+		instr->alu.vector_opc = VECTOR_NONE;
+		instr->alu.scalar_opc = PRED_SET_POPs;
+		instr->alu.export = -1;
+		instr->alu.write_mask = 1;
+		instr->pred = 0;
+		ctx->pred_idx = instr->idx;
+	}
+
+	/* restore ctx->pred */
+	ctx->pred = pred;
+}
+
+/* get the highest block idx in the loop, so we know when
+ * we can free registers that are allocated outside the loop
+ */
+static unsigned
+loop_last_block(struct exec_list *list)
+{
+	nir_cf_node *node =
+		exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
+	switch (node->type) {
+	case nir_cf_node_block:
+		return nir_cf_node_as_block(node)->index;
+	case nir_cf_node_if:
+		assert(0); /* XXX could this ever happen? */
+		return 0;
+	case nir_cf_node_loop:
+		return loop_last_block(&nir_cf_node_as_loop(node)->body);
+	default:
+		compile_error(ctx, "Not supported\n");
+		return 0;
+	}
+}
+
+static void
+emit_loop(struct ir2_context *ctx, nir_loop *nloop)
+{
+	ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
+	emit_cf_list(ctx, &nloop->body);
+	ctx->loop_depth--;
+}
+
+static bool
+emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
+{
+	bool ret = false;
+	foreach_list_typed(nir_cf_node, node, node, list) {
+		ret = false;
+		switch (node->type) {
+		case nir_cf_node_block:
+			ret = emit_block(ctx, nir_cf_node_as_block(node));
+			break;
+		case nir_cf_node_if:
+			emit_if(ctx, nir_cf_node_as_if(node));
+			break;
+		case nir_cf_node_loop:
+			emit_loop(ctx, nir_cf_node_as_loop(node));
+			break;
+		case nir_cf_node_function:
+			compile_error(ctx, "Not supported\n");
+			break;
+		}
+	}
+	return ret;
+}
+
+static void cleanup_binning(struct ir2_context *ctx)
+{
+	assert(ctx->so->type == MESA_SHADER_VERTEX);
+
+	/* kill non-position outputs for binning variant */
+	nir_foreach_block(block, nir_shader_get_entrypoint(ctx->nir)) {
+		nir_foreach_instr_safe(instr, block) {
+			if (instr->type != nir_instr_type_intrinsic)
+				continue;
+
+			nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+			unsigned slot;
+			switch (intr->intrinsic) {
+			case nir_intrinsic_store_deref: {
+				nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+				assert(deref->deref_type == nir_deref_type_var);
+				slot = deref->var->data.location;
+			} break;
+			case nir_intrinsic_store_output:
+				slot = output_slot(ctx, intr);
+				break;
+			default:
+				continue;
+			}
+
+			if (slot != VARYING_SLOT_POS)
+				nir_instr_remove(instr);
+		}
+	}
+
+	ir2_optimize_nir(ctx->nir, false);
+}
+
+void
+ir2_nir_compile(struct ir2_context *ctx, bool binning)
+{
+	struct fd2_shader_stateobj *so = ctx->so;
+
+	memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
+
+	ctx->nir = nir_shader_clone(NULL, so->nir);
+
+	if (binning)
+		cleanup_binning(ctx);
+
+	/* postprocess */
+	OPT_V(ctx->nir, nir_opt_algebraic_late);
+
+	OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods);
+	OPT_V(ctx->nir, nir_copy_prop);
+	OPT_V(ctx->nir, nir_opt_dce);
+	OPT_V(ctx->nir, nir_opt_move_comparisons);
+
+	OPT_V(ctx->nir, nir_lower_bool_to_float);
+
+	/* lower to scalar instructions that can only be scalar on a2xx */
+	OPT_V(ctx->nir, ir2_nir_lower_scalar);
+
+	OPT_V(ctx->nir, nir_lower_locals_to_regs);
+
+	OPT_V(ctx->nir, nir_convert_from_ssa, true);
+
+	OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
+	OPT_V(ctx->nir, nir_lower_vec_to_movs);
+
+	OPT_V(ctx->nir, nir_opt_dce);
+
+	nir_sweep(ctx->nir);
+
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(ctx->nir, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	/* fd2_shader_stateobj init */
+	if (so->type == MESA_SHADER_FRAGMENT) {
+		ctx->f->fragcoord = -1;
+		ctx->f->inputs_count = 0;
+		memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
+	}
+
+	/* Setup inputs: */
+	nir_foreach_variable(in, &ctx->nir->inputs)
+		setup_input(ctx, in);
+
+	if (so->type == MESA_SHADER_FRAGMENT) {
+		unsigned idx;
+		for (idx = 0; idx < ctx->f->inputs_count; idx++) {
+			ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
+			update_range(ctx, &ctx->input[idx]);
+		}
+		/* assume we have param input and kill it later if not */
+		ctx->input[idx].ncomp = 4;
+		update_range(ctx, &ctx->input[idx]);
+	} else {
+		ctx->input[0].ncomp = 1;
+		ctx->input[2].ncomp = 1;
+		update_range(ctx, &ctx->input[0]);
+		update_range(ctx, &ctx->input[2]);
+	}
+
+	/* And emit the body: */
+	nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
+
+	nir_foreach_register(reg, &fxn->registers) {
+		ctx->reg[reg->index].ncomp = reg->num_components;
+		ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1);
+	}
+
+	nir_metadata_require(fxn, nir_metadata_block_index);
+	emit_cf_list(ctx, &fxn->body);
+	/* TODO emit_block(ctx, fxn->end_block); */
+
+	if (so->type == MESA_SHADER_VERTEX)
+		extra_position_exports(ctx, binning);
+
+	ralloc_free(ctx->nir);
+
+	/* kill unused param input */
+	if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
+		ctx->input[ctx->f->inputs_count].initialized = false;
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+/* some operations can only be scalar on a2xx:
+ *  rsq, rcp, log2, exp2, cos, sin, sqrt
+ * mostly copy-pasted from nir_lower_alu_to_scalar.c
+ */
+
+#include "ir2_private.h"
+#include "compiler/nir/nir_builder.h"
+
+static void
+nir_alu_ssa_dest_init(nir_alu_instr * instr, unsigned num_components,
+					  unsigned bit_size)
+{
+	nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+					  bit_size, NULL);
+	instr->dest.write_mask = (1 << num_components) - 1;
+}
+
+static void
+lower_reduction(nir_alu_instr * instr, nir_op chan_op, nir_op merge_op,
+				nir_builder * builder)
+{
+	unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+
+	nir_ssa_def *last = NULL;
+	for (unsigned i = 0; i < num_components; i++) {
+		nir_alu_instr *chan =
+			nir_alu_instr_create(builder->shader, chan_op);
+		nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size);
+		nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
+		chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+		if (nir_op_infos[chan_op].num_inputs > 1) {
+			assert(nir_op_infos[chan_op].num_inputs == 2);
+			nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
+			chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+		}
+		chan->exact = instr->exact;
+
+		nir_builder_instr_insert(builder, &chan->instr);
+
+		if (i == 0) {
+			last = &chan->dest.dest.ssa;
+		} else {
+			last = nir_build_alu(builder, merge_op,
+								 last, &chan->dest.dest.ssa, NULL, NULL);
+		}
+	}
+
+	assert(instr->dest.write_mask == 1);
+	nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last));
+	nir_instr_remove(&instr->instr);
+}
+
+static bool lower_scalar(nir_alu_instr * instr, nir_builder * b)
+{
+	assert(instr->dest.dest.is_ssa);
+	assert(instr->dest.write_mask != 0);
+
+	b->cursor = nir_before_instr(&instr->instr);
+	b->exact = instr->exact;
+
+#define LOWER_REDUCTION(name, chan, merge) \
+	case name##2: \
+	case name##3: \
+	case name##4: \
+		lower_reduction(instr, chan, merge, b); \
+		return true;
+
+	switch (instr->op) {
+		/* TODO: handle these instead of lowering */
+		LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+		LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+
+	default:
+		return false;
+	case nir_op_frsq:
+	case nir_op_frcp:
+	case nir_op_flog2:
+	case nir_op_fexp2:
+	case nir_op_fcos:
+	case nir_op_fsin:
+	case nir_op_fsqrt:
+		break;
+	}
+
+	assert(nir_op_infos[instr->op].num_inputs == 1);
+
+	unsigned num_components = instr->dest.dest.ssa.num_components;
+	nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL };
+	unsigned chan;
+
+	if (num_components == 1)
+		return false;
+
+	for (chan = 0; chan < num_components; chan++) {
+		assert(instr->dest.write_mask & (1 << chan));
+
+		nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op);
+
+		nir_alu_src_copy(&lower->src[0], &instr->src[0], lower);
+		lower->src[0].swizzle[0] = instr->src[0].swizzle[chan];
+
+		nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
+		lower->dest.saturate = instr->dest.saturate;
+		comps[chan] = &lower->dest.dest.ssa;
+		lower->exact = instr->exact;
+
+		nir_builder_instr_insert(b, &lower->instr);
+	}
+
+	nir_ssa_def *vec = nir_vec(b, comps, num_components);
+
+	nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
+
+	nir_instr_remove(&instr->instr);
+	return true;
+}
+
+static bool lower_scalar_impl(nir_function_impl * impl)
+{
+	nir_builder builder;
+	nir_builder_init(&builder, impl);
+	bool progress = false;
+
+	nir_foreach_block(block, impl) {
+		nir_foreach_instr_safe(instr, block) {
+			if (instr->type == nir_instr_type_alu)
+				progress = lower_scalar(nir_instr_as_alu(instr), &builder)
+					|| progress;
+		}
+	}
+
+	nir_metadata_preserve(impl, nir_metadata_block_index |
+						  nir_metadata_dominance);
+
+	return progress;
+}
+
+bool ir2_nir_lower_scalar(nir_shader * shader)
+{
+	bool progress = false;
+
+	nir_foreach_function(function, shader) {
+		if (function->impl)
+			progress = lower_scalar_impl(function->impl) || progress;
+	}
+
+	return progress;
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_private.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_private.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_private.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "ir2.h"
+#include "fd2_program.h"
+#include "instr-a2xx.h"
+
+enum ir2_src_type {
+	IR2_SRC_SSA,
+	IR2_SRC_REG,
+	IR2_SRC_INPUT,
+	IR2_SRC_CONST,
+};
+
+struct ir2_src {
+	/* num can mean different things
+	 *   ssa: index of instruction
+	 *   reg: index in ctx->reg array
+	 *   input: index in ctx->input array
+	 *   const: constant index (C0, C1, etc)
+	 */
+	uint16_t num;
+	uint8_t swizzle;
+	enum ir2_src_type type : 2;
+	uint8_t abs : 1;
+	uint8_t negate : 1;
+	uint8_t : 4;
+};
+
+struct ir2_reg_component {
+	uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
+	bool alloc : 1; /* is it currently allocated */
+	uint8_t ref_count; /* for ra */
+};
+
+struct ir2_reg {
+	uint8_t idx; /* assigned hardware register */
+	uint8_t ncomp;
+
+	uint8_t loop_depth;
+	bool initialized;
+	/* block_idx to free on (-1 = free on ref_count==0) */
+	int block_idx_free;
+	struct ir2_reg_component comp[4];
+};
+
+struct ir2_instr {
+	unsigned idx;
+
+	unsigned block_idx;
+
+	enum {
+		IR2_NONE,
+		IR2_FETCH,
+		IR2_ALU,
+		IR2_CF,
+	} type : 2;
+
+	/* instruction needs to be emitted (for scheduling) */
+	bool need_emit : 1;
+
+	/* predicate value - (usually) same for entire block */
+	uint8_t pred : 2;
+
+	/* src */
+	uint8_t src_count;
+	struct ir2_src src[4];
+
+	/* dst */
+	bool is_ssa;
+	union {
+		struct ir2_reg ssa;
+		struct ir2_reg *reg;
+	};
+
+	/* type-specific */
+	union {
+		struct {
+			instr_fetch_opc_t opc : 5;
+			union {
+				struct {
+					uint8_t const_idx;
+					uint8_t const_idx_sel;
+				} vtx;
+				struct {
+					bool is_cube : 1;
+					bool is_rect : 1;
+					uint8_t samp_id;
+				} tex;
+			};
+		} fetch;
+		struct {
+			/* store possible opcs, then we can choose vector/scalar instr */
+			instr_scalar_opc_t scalar_opc : 6;
+			instr_vector_opc_t vector_opc : 5;
+			/* same as nir */
+			uint8_t write_mask : 4;
+			bool saturate : 1;
+
+			/* export idx (-1 no export) */
+			int8_t export;
+
+			/* for scalarized 2 src instruction */
+			uint8_t src1_swizzle;
+		} alu;
+		struct {
+			/* jmp dst block_idx */
+			uint8_t block_idx;
+		} cf;
+	};
+};
+
+struct ir2_sched_instr {
+	uint32_t reg_state[8];
+	struct ir2_instr *instr, *instr_s;
+};
+
+struct ir2_context {
+	struct fd2_shader_stateobj *so;
+
+	unsigned block_idx, pred_idx;
+	uint8_t pred;
+	bool block_has_jump[64];
+
+	unsigned loop_last_block[64];
+	unsigned loop_depth;
+
+	nir_shader *nir;
+
+	/* ssa index of position output */
+	struct ir2_src position;
+
+	/* to translate SSA ids to instruction ids */
+	int16_t ssa_map[1024];
+
+	struct ir2_shader_info *info;
+	struct ir2_frag_linkage *f;
+
+	int prev_export;
+
+	/* RA state */
+	struct ir2_reg* live_regs[64];
+	uint32_t reg_state[256/32]; /* 64*4 bits */
+
+	/* inputs */
+	struct ir2_reg input[16 + 1]; /* 16 + param */
+
+	/* non-ssa regs */
+	struct ir2_reg reg[64];
+	unsigned reg_count;
+
+	struct ir2_instr instr[0x300];
+	unsigned instr_count;
+
+	struct ir2_sched_instr instr_sched[0x180];
+	unsigned instr_sched_count;
+};
+
+void assemble(struct ir2_context *ctx, bool binning);
+
+void ir2_nir_compile(struct ir2_context *ctx, bool binning);
+bool ir2_nir_lower_scalar(nir_shader * shader);
+
+void ra_count_refs(struct ir2_context *ctx);
+void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
+	bool export, uint8_t export_writemask);
+void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
+void ra_block_free(struct ir2_context *ctx, unsigned block);
+
+void cp_src(struct ir2_context *ctx);
+void cp_export(struct ir2_context *ctx);
+
+/* utils */
+enum {
+	IR2_SWIZZLE_Y = 1 << 0,
+	IR2_SWIZZLE_Z = 2 << 0,
+	IR2_SWIZZLE_W = 3 << 0,
+
+	IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
+
+	IR2_SWIZZLE_XYW = 0 << 0 | 0 << 2 | 1 << 4,
+
+	IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
+	IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
+	IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
+	IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
+	IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
+	IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
+	IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
+	IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
+};
+
+#define compile_error(ctx, args...) ({ \
+	printf(args); \
+	assert(0); \
+})
+
+static inline struct ir2_src
+ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
+{
+	return (struct ir2_src) {
+		.num = num,
+		.swizzle = swizzle,
+		.type = type
+	};
+}
+
+/* ir2_assemble uses it .. */
+struct ir2_src ir2_zero(struct ir2_context *ctx);
+
+#define ir2_foreach_instr(it, ctx) \
+	for (struct ir2_instr *it = (ctx)->instr; ({ \
+		while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
+		 it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
+
+#define ir2_foreach_live_reg(it, ctx) \
+	for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
+		while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
+		 __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
+
+#define ir2_foreach_avail(it) \
+	for (struct ir2_instr **__instrp = avail, *it; \
+		it = *__instrp,  __instrp != &avail[avail_count]; __instrp++)
+
+#define ir2_foreach_src(it, instr) \
+	for (struct ir2_src *it = instr->src; \
+		 it != &instr->src[instr->src_count]; it++)
+
+/* mask for register allocation
+ * 64 registers with 4 components each = 256 bits
+ */
+/* typedef struct {
+	uint64_t data[4];
+} regmask_t; */
+
+static inline bool mask_isset(uint32_t * mask, unsigned num)
+{
+	return ! !(mask[num / 32] & 1 << num % 32);
+}
+
+static inline void mask_set(uint32_t * mask, unsigned num)
+{
+	mask[num / 32] |= 1 << num % 32;
+}
+
+static inline void mask_unset(uint32_t * mask, unsigned num)
+{
+	mask[num / 32] &= ~(1 << num % 32);
+}
+
+static inline unsigned mask_reg(uint32_t * mask, unsigned num)
+{
+	return mask[num / 8] >> num % 8 * 4 & 0xf;
+}
+
+static inline bool is_export(struct ir2_instr *instr)
+{
+	return instr->type == IR2_ALU && instr->alu.export >= 0;
+}
+
+static inline instr_alloc_type_t export_buf(unsigned num)
+{
+	return num < 32 ? SQ_PARAMETER_PIXEL :
+		num >= 62 ? SQ_POSITION : SQ_MEMORY;
+}
+
+/* component c for channel i */
+static inline unsigned swiz_set(unsigned c, unsigned i)
+{
+	return ((c - i) & 3) << i * 2;
+}
+
+/* get swizzle in channel i */
+static inline unsigned swiz_get(unsigned swiz, unsigned i)
+{
+	return ((swiz >> i * 2) + i) & 3;
+}
+
+static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
+{
+	unsigned swiz = 0;
+	for (int i = 0; i < 4; i++)
+		swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
+	return swiz;
+}
+
+static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
+{
+	unsigned swiz = 0;
+	for (int i = 0; i < 4; i++)
+		swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
+	*swiz0 = swiz;
+}
+
+static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
+{
+	return instr->is_ssa ? &instr->ssa : instr->reg;
+}
+
+static inline struct ir2_reg *
+get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
+{
+	switch (src->type) {
+	case IR2_SRC_INPUT:
+		return &ctx->input[src->num];
+	case IR2_SRC_SSA:
+		return &ctx->instr[src->num].ssa;
+	case IR2_SRC_REG:
+		return &ctx->reg[src->num];
+	default:
+		return NULL;
+	}
+}
+
+/* gets a ncomp value for the dst */
+static inline unsigned dst_ncomp(struct ir2_instr *instr)
+{
+	if (instr->is_ssa)
+		return instr->ssa.ncomp;
+
+	if (instr->type == IR2_FETCH)
+		return instr->reg->ncomp;
+
+	assert(instr->type == IR2_ALU);
+
+	unsigned ncomp = 0;
+	for (int i = 0; i < instr->reg->ncomp; i++)
+		ncomp += !!(instr->alu.write_mask & 1 << i);
+	return ncomp;
+}
+
+/* gets a ncomp value for the src registers */
+static inline unsigned src_ncomp(struct ir2_instr *instr)
+{
+	if (instr->type == IR2_FETCH) {
+		switch (instr->fetch.opc) {
+		case VTX_FETCH:
+			return 1;
+		case TEX_FETCH:
+			return instr->fetch.tex.is_cube ? 3 : 2;
+		case TEX_SET_TEX_LOD:
+			return 1;
+		default:
+			assert(0);
+		}
+	}
+
+	switch (instr->alu.scalar_opc) {
+	case PRED_SETEs ... KILLONEs:
+		return 1;
+	default:
+		break;
+	}
+
+	switch (instr->alu.vector_opc) {
+	case DOT2ADDv:
+		return 2;
+	case DOT3v:
+		return 3;
+	case DOT4v:
+	case CUBEv:
+	case PRED_SETE_PUSHv:
+		return 4;
+	default:
+		return dst_ncomp(instr);
+	}
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_ra.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_ra.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_ra.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_ra.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ */
+
+#include "ir2_private.h"
+
+/* if an instruction has side effects, we should never kill it */
+static bool has_side_effects(struct ir2_instr *instr)
+{
+	if (instr->type == IR2_CF)
+		return true;
+	else if (instr->type == IR2_FETCH)
+		return false;
+
+	switch (instr->alu.scalar_opc) {
+	case PRED_SETEs ... KILLONEs:
+		return true;
+	default:
+		break;
+	}
+
+	switch (instr->alu.vector_opc) {
+	case PRED_SETE_PUSHv ... KILLNEv:
+		return true;
+	default:
+		break;
+	}
+
+	return instr->alu.export >= 0;
+}
+
+/* mark an instruction as required, and all its sources recursively */
+static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)
+{
+	struct ir2_reg *reg;
+
+	/* don't repeat work already done */
+	if (instr->need_emit)
+		return;
+
+	instr->need_emit = true;
+
+	ir2_foreach_src(src, instr) {
+		switch (src->type) {
+		case IR2_SRC_SSA:
+			set_need_emit(ctx, &ctx->instr[src->num]);
+			break;
+		case IR2_SRC_REG:
+			/* slow ..  */
+			reg = get_reg_src(ctx, src);
+			ir2_foreach_instr(instr, ctx) {
+				if (!instr->is_ssa && instr->reg == reg)
+					set_need_emit(ctx, instr);
+			}
+		default:
+			break;
+		}
+	}
+}
+
+/* get current bit mask of allocated components for a register */
+static unsigned reg_mask(struct ir2_context *ctx, unsigned idx)
+{
+	return ctx->reg_state[idx/8] >> idx%8*4 & 0xf;
+}
+
+static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)
+{
+	idx = idx * 4 + c;
+	ctx->reg_state[idx/32] |= 1 << idx%32;
+}
+
+static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)
+{
+	idx = idx * 4 + c;
+	ctx->reg_state[idx/32] &= ~(1 << idx%32);
+}
+
+void ra_count_refs(struct ir2_context *ctx)
+{
+	struct ir2_reg *reg;
+
+	/* mark instructions as needed
+	 * need to do this because "substitutions" pass makes many movs not needed
+	 */
+	ir2_foreach_instr(instr, ctx) {
+		if (has_side_effects(instr))
+			set_need_emit(ctx, instr);
+	}
+
+	/* compute ref_counts */
+	ir2_foreach_instr(instr, ctx) {
+		/* kill non-needed so they can be skipped */
+		if (!instr->need_emit) {
+			instr->type = IR2_NONE;
+			continue;
+		}
+
+		ir2_foreach_src(src, instr) {
+			if (src->type == IR2_SRC_CONST)
+				continue;
+
+			reg = get_reg_src(ctx, src);
+			for (int i = 0; i < src_ncomp(instr); i++)
+				reg->comp[swiz_get(src->swizzle, i)].ref_count++;
+		}
+	}
+}
+
+void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
+	bool export, uint8_t export_writemask)
+{
+	/* for export, don't allocate anything but set component layout */
+	if (export) {
+		for (int i = 0; i < 4; i++)
+			reg->comp[i].c = i;
+		return;
+	}
+
+	unsigned idx = force_idx;
+
+	/* TODO: allocate into the same register if theres room
+	 * note: the blob doesn't do it, so verify that it is indeed better
+	 * also, doing it would conflict with scalar mov insertion
+	 */
+
+	/* check if already allocated */
+	for (int i = 0; i < reg->ncomp; i++) {
+		if (reg->comp[i].alloc)
+			return;
+	}
+
+	if (force_idx < 0) {
+		for (idx = 0; idx < 64; idx++) {
+			if (reg_mask(ctx, idx) == 0)
+				break;
+		}
+	}
+	assert(idx != 64); /* TODO ran out of register space.. */
+
+	/* update max_reg value */
+	ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx);
+
+	unsigned mask = reg_mask(ctx, idx);
+
+	for (int i = 0; i < reg->ncomp; i++) {
+		/* don't allocate never used values */
+		if (reg->comp[i].ref_count == 0) {
+			reg->comp[i].c = 7;
+			continue;
+		}
+
+		/* TODO */
+		unsigned c = 1 ? i : (ffs(~mask) - 1);
+		mask |= 1 << c;
+		reg->comp[i].c = c;
+		reg_setmask(ctx, idx, c);
+		reg->comp[i].alloc = true;
+	}
+
+	reg->idx = idx;
+	ctx->live_regs[reg->idx] = reg;
+}
+
+/* reduce srcs ref_count and free if needed */
+void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)
+{
+	struct ir2_reg *reg;
+	struct ir2_reg_component *comp;
+
+	ir2_foreach_src(src, instr) {
+		if (src->type == IR2_SRC_CONST)
+			continue;
+
+		reg = get_reg_src(ctx, src);
+		/* XXX use before write case */
+
+		for (int i = 0; i < src_ncomp(instr); i++) {
+			comp = &reg->comp[swiz_get(src->swizzle, i)];
+			if (!--comp->ref_count && reg->block_idx_free < 0) {
+				reg_freemask(ctx, reg->idx, comp->c);
+				comp->alloc = false;
+			}
+		}
+	}
+}
+
+/* free any regs left for a block */
+void ra_block_free(struct ir2_context *ctx, unsigned block)
+{
+	ir2_foreach_live_reg(reg, ctx) {
+		if (reg->block_idx_free != block)
+			continue;
+
+		for (int i = 0; i < reg->ncomp; i++) {
+			if (!reg->comp[i].alloc) /* XXX should never be true? */
+				continue;
+
+			reg_freemask(ctx, reg->idx, reg->comp[i].c);
+			reg->comp[i].alloc = false;
+		}
+		ctx->live_regs[reg->idx] = NULL;
+	}
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,809 +0,0 @@
-/*
- * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ir-a2xx.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "freedreno_util.h"
-#include "instr-a2xx.h"
-
-#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
-#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
-#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
-
-static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
-		uint32_t idx, struct ir2_shader_info *info);
-
-static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n);
-static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg);
-static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg);
-static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg);
-
-/* simple allocator to carve allocations out of an up-front allocated heap,
- * so that we can free everything easily in one shot.
- */
-static void * ir2_alloc(struct ir2_shader *shader, int sz)
-{
-	void *ptr = &shader->heap[shader->heap_idx];
-	shader->heap_idx += align(sz, 4) / 4;
-	return ptr;
-}
-
-static char * ir2_strdup(struct ir2_shader *shader, const char *str)
-{
-	char *ptr = NULL;
-	if (str) {
-		int len = strlen(str);
-		ptr = ir2_alloc(shader, len+1);
-		memcpy(ptr, str, len);
-		ptr[len] = '\0';
-	}
-	return ptr;
-}
-
-struct ir2_shader * ir2_shader_create(void)
-{
-	DEBUG_MSG("");
-	struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader));
-	shader->max_reg = -1;
-	return shader;
-}
-
-void ir2_shader_destroy(struct ir2_shader *shader)
-{
-	DEBUG_MSG("");
-	free(shader);
-}
-
-/* check if an instruction is a simple MOV
- */
-static struct ir2_instruction * simple_mov(struct ir2_instruction *instr,
-		bool output)
-{
-    struct ir2_src_register *src_reg = instr->src_reg;
-    struct ir2_dst_register *dst_reg = &instr->dst_reg;
-    struct ir2_register *reg;
-    unsigned i;
-
-    /* MAXv used for MOV */
-    if (instr->instr_type != IR2_ALU_VECTOR ||
-		instr->alu_vector.opc != MAXv)
-		return NULL;
-
-	/* non identical srcs */
-	if (src_reg[0].num != src_reg[1].num)
-		return NULL;
-
-	/* flags */
-	int flags = IR2_REG_NEGATE | IR2_REG_ABS;
-	if (output)
-		flags |= IR2_REG_INPUT | IR2_REG_CONST;
-	if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags))
-		return NULL;
-
-	/* clamping */
-	if (instr->alu_vector.clamp)
-		return NULL;
-
-	/* swizzling */
-    for (i = 0; i < 4; i++) {
-		char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i];
-		if (swiz == '_')
-			continue;
-
-		if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] ||
-			swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i])
-			return NULL;
-    }
-
-    if (output)
-		reg = &instr->shader->reg[src_reg[0].num];
-	else
-		reg = &instr->shader->reg[dst_reg->num];
-
-	assert(reg->write_idx >= 0);
-    if (reg->write_idx != reg->write_idx2)
-		return NULL;
-
-	if (!output)
-		return instr;
-
-	instr = instr->shader->instr[reg->write_idx];
-	return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr;
-}
-
-static int src_to_reg(struct ir2_instruction *instr,
-		struct ir2_src_register *reg)
-{
-	if (reg->flags & IR2_REG_CONST)
-		return reg->num;
-
-	return instr->shader->reg[reg->num].reg;
-}
-
-static int dst_to_reg(struct ir2_instruction *instr,
-		struct ir2_dst_register *reg)
-{
-	if (reg->flags & IR2_REG_EXPORT)
-		return reg->num;
-
-	return instr->shader->reg[reg->num].reg;
-}
-
-static bool mask_get(uint32_t *mask, unsigned index)
-{
-    return !!(mask[index / 32] & 1 << index % 32);
-}
-
-static void mask_set(uint32_t *mask, struct ir2_register *reg, int index)
-{
-	if (reg) {
-		unsigned i;
-		for (i = 0; i < ARRAY_SIZE(reg->regmask); i++)
-			mask[i] |= reg->regmask[i];
-	}
-	if (index >= 0)
-		mask[index / 32] |= 1 << index % 32;
-}
-
-static bool sets_pred(struct ir2_instruction *instr)
-{
-    return instr->instr_type == IR2_ALU_SCALAR &&
-		instr->alu_scalar.opc >= PRED_SETEs &&
-		instr->alu_scalar.opc <= PRED_SET_RESTOREs;
-}
-
-
-
-void* ir2_shader_assemble(struct ir2_shader *shader,
-		struct ir2_shader_info *info)
-{
-	/* NOTES
-	 * blob compiler seems to always puts PRED_* instrs in a CF by
-	 * themselves, and wont combine EQ/NE in the same CF
-	 * (not doing this - doesn't seem to make a difference)
-	 *
-	 * TODO: implement scheduling for combining vector+scalar instructions
-	 * -some vector instructions can be replaced by scalar
-	 */
-
-	/* first step:
-	 * 1. remove "NOP" MOV instructions generated by TGSI for input/output:
-	 * 2. track information for register allocation, and to remove
-	 * the dead code when some exports are not needed
-	 * 3. add additional instructions for a20x hw binning if needed
-	 * NOTE: modifies the shader instrs
-	 * this step could be done as instructions are added by compiler instead
-	 */
-
-	/* mask of exports that must be generated
-	 * used to avoid calculating ps exports with hw binning
-	*/
-	uint64_t export = ~0ull;
-	/* bitmask of variables required for exports defined by "export" */
-	uint32_t export_mask[REG_MASK/32+1] = {};
-
-	unsigned idx, reg_idx;
-	unsigned max_input = 0;
-	int export_size = -1;
-
-	for (idx = 0; idx < shader->instr_count; idx++) {
-		struct ir2_instruction *instr = shader->instr[idx], *prev;
-		struct ir2_dst_register dst_reg = instr->dst_reg;
-
-		if (dst_reg.flags & IR2_REG_EXPORT) {
-			if (dst_reg.num < 32)
-				export_size++;
-
-			if ((prev = simple_mov(instr, true))) {
-				/* copy instruction but keep dst */
-				*instr = *prev;
-				instr->dst_reg = dst_reg;
-			}
-		}
-
-		for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) {
-			struct ir2_src_register *src_reg = &instr->src_reg[reg_idx];
-			struct ir2_register *reg;
-			int num;
-
-			if (src_reg->flags & IR2_REG_CONST)
-				continue;
-
-			num = src_reg->num;
-			reg = &shader->reg[num];
-			reg->read_idx = idx;
-
-			if (src_reg->flags & IR2_REG_INPUT) {
-				max_input = MAX2(max_input, num);
-			} else {
-				/* bypass simple mov used to set src_reg */
-				assert(reg->write_idx >= 0);
-				prev = shader->instr[reg->write_idx];
-				if (simple_mov(prev, false)) {
-					*src_reg = prev->src_reg[0];
-					/* process same src_reg again */
-					reg_idx -= 1;
-					continue;
-				}
-			}
-
-			/* update dependencies */
-			uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ?
-					export_mask : shader->reg[dst_reg.num].regmask;
-			mask_set(mask, reg, num);
-			if (sets_pred(instr))
-				mask_set(export_mask, reg, num);
-		}
-	}
-
-	/* second step:
-	 * emit instructions (with CFs) + RA
-	 */
-	instr_cf_t cfs[128], *cf = cfs;
-	uint32_t alufetch[3*256], *af = alufetch;
-
-	/* RA is done on write, so inputs must be allocated here */
-	for (reg_idx = 0; reg_idx <= max_input; reg_idx++)
-		shader->reg[reg_idx].reg = reg_idx;
-	info->max_reg = max_input;
-
-	/* CF instr state */
-	instr_cf_exec_t exec = { .opc = EXEC };
-	instr_cf_alloc_t alloc = { .opc = ALLOC };
-	bool need_alloc = 0;
-	bool pos_export = 0;
-
-	export_size = MAX2(export_size, 0);
-
-	for (idx = 0; idx < shader->instr_count; idx++) {
-		struct ir2_instruction *instr = shader->instr[idx];
-		struct ir2_dst_register *dst_reg = &instr->dst_reg;
-		unsigned num = dst_reg->num;
-		struct ir2_register *reg;
-
-		/* a2xx only has 64 registers, so we can use a single 64-bit mask */
-		uint64_t regmask = 0ull;
-
-		/* compute the current regmask */
-		for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) {
-			reg = &shader->reg[reg_idx];
-			if ((int) idx > reg->write_idx && idx < reg->read_idx)
-				regmask |= (1ull << reg->reg);
-		}
-
-		if (dst_reg->flags & IR2_REG_EXPORT) {
-			/* skip if export is not needed */
-			if (!(export & (1ull << num)))
-				continue;
-
-            /* ALLOC CF:
-             * want to alloc all < 32 at once
-			 * 32/33 and 62/63 come in pairs
-			 * XXX assuming all 3 types are never interleaved
-			 */
-            if (num < 32) {
-				alloc.size = export_size;
-				alloc.buffer_select = SQ_PARAMETER_PIXEL;
-				need_alloc = export_size >= 0;
-				export_size = -1;
-			} else if (num == 32 || num == 33) {
-				alloc.size = 0;
-				alloc.buffer_select = SQ_MEMORY;
-				need_alloc = num != 33;
-			} else {
-				alloc.size = 0;
-				alloc.buffer_select = SQ_POSITION;
-				need_alloc = !pos_export;
-				pos_export = true;
-			}
-
-		} else {
-			/* skip if dst register not needed to compute exports */
-			if (!mask_get(export_mask, num))
-				continue;
-
-			/* RA on first write */
-			reg = &shader->reg[num];
-			if (reg->write_idx == idx) {
-				reg->reg = ffsll(~regmask) - 1;
-				info->max_reg = MAX2(info->max_reg, reg->reg);
-			}
-		}
-
-		if (exec.count == 6 || (exec.count && need_alloc)) {
-			*cf++ = *(instr_cf_t*) &exec;
-			exec.address += exec.count;
-			exec.serialize = 0;
-			exec.count = 0;
-		}
-
-		if (need_alloc) {
-			*cf++ = *(instr_cf_t*) &alloc;
-			need_alloc = false;
-		}
-
-		int ret = instr_emit(instr, af, idx, info); af += 3;
-		assert(!ret);
-
-		if (instr->instr_type == IR2_FETCH)
-			exec.serialize |= 0x1 << exec.count * 2;
-		if (instr->sync)
-			exec.serialize |= 0x2 << exec.count * 2;
-		 exec.count += 1;
-	}
-
-
-	exec.opc = !export_size ? EXEC : EXEC_END;
-	*cf++ = *(instr_cf_t*) &exec;
-	exec.address += exec.count;
-	exec.serialize = 0;
-	exec.count = 0;
-
-	/* GPU will hang without at least one pixel alloc */
-	if (!export_size) {
-		alloc.size = 0;
-		alloc.buffer_select = SQ_PARAMETER_PIXEL;
-		*cf++ = *(instr_cf_t*) &alloc;
-
-		exec.opc = EXEC_END;
-		*cf++ = *(instr_cf_t*) &exec;
-	}
-
-	unsigned num_cfs = cf - cfs;
-
-	/* insert nop to get an even # of CFs */
-	if (num_cfs % 2) {
-		*cf++ = (instr_cf_t) { .opc = NOP };
-		num_cfs++;
-	}
-
-	/* offset cf addrs */
-	for (idx = 0; idx < num_cfs; idx++) {
-        switch (cfs[idx].opc) {
-		case EXEC:
-		case EXEC_END:
-			cfs[idx].exec.address += num_cfs / 2;
-			break;
-		default:
-			break;
-		/* XXX  and any other address using cf that gets implemented */
-		}
-	}
-
-	/* concatenate cfs+alufetchs */
-	uint32_t cfdwords = num_cfs / 2 * 3;
-	uint32_t alufetchdwords = exec.address * 3;
-	info->sizedwords = cfdwords + alufetchdwords;
-	uint32_t *dwords = malloc(info->sizedwords * 4);
-	assert(dwords);
-	memcpy(dwords, cfs, cfdwords * 4);
-	memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4);
-	return dwords;
-}
-
-struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader,
-		int instr_type)
-{
-	struct ir2_instruction *instr =
-			ir2_alloc(shader, sizeof(struct ir2_instruction));
-	DEBUG_MSG("%d", instr_type);
-	instr->shader = shader;
-	instr->idx = shader->instr_count;
-	instr->pred = shader->pred;
-	instr->instr_type = instr_type;
-	shader->instr[shader->instr_count++] = instr;
-	return instr;
-}
-
-
-/*
- * FETCH instructions:
- */
-
-static int instr_emit_fetch(struct ir2_instruction *instr,
-		uint32_t *dwords, uint32_t idx,
-		struct ir2_shader_info *info)
-{
-	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
-	struct ir2_dst_register *dst_reg = &instr->dst_reg;
-	struct ir2_src_register *src_reg = &instr->src_reg[0];
-
-	memset(fetch, 0, sizeof(*fetch));
-
-	fetch->opc = instr->fetch.opc;
-
-	if (instr->fetch.opc == VTX_FETCH) {
-		instr_fetch_vtx_t *vtx = &fetch->vtx;
-
-		assert(instr->fetch.stride <= 0xff);
-		assert(instr->fetch.fmt <= 0x3f);
-		assert(instr->fetch.const_idx <= 0x1f);
-		assert(instr->fetch.const_idx_sel <= 0x3);
-
-		vtx->src_reg = src_to_reg(instr, src_reg);
-		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
-		vtx->dst_reg = dst_to_reg(instr, dst_reg);
-		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
-		vtx->must_be_one = 1;
-		vtx->const_index = instr->fetch.const_idx;
-		vtx->const_index_sel = instr->fetch.const_idx_sel;
-		vtx->format_comp_all = !!instr->fetch.is_signed;
-		vtx->num_format_all = !instr->fetch.is_normalized;
-		vtx->format = instr->fetch.fmt;
-		vtx->stride = instr->fetch.stride;
-		vtx->offset = instr->fetch.offset;
-
-		if (instr->pred != IR2_PRED_NONE) {
-			vtx->pred_select = 1;
-			vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
-		}
-
-		/* XXX seems like every FETCH but the first has
-		 * this bit set:
-		 */
-		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
-		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
-	} else if (instr->fetch.opc == TEX_FETCH) {
-		instr_fetch_tex_t *tex = &fetch->tex;
-
-		assert(instr->fetch.const_idx <= 0x1f);
-
-		tex->src_reg = src_to_reg(instr, src_reg);
-		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
-		tex->dst_reg = dst_to_reg(instr, dst_reg);
-		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
-		tex->const_idx = instr->fetch.const_idx;
-		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
-		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
-		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
-		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
-		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
-		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
-		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
-		tex->use_comp_lod = 1;
-		tex->use_reg_lod = !instr->fetch.is_cube;
-		tex->sample_location = SAMPLE_CENTER;
-		tex->tx_coord_denorm = instr->fetch.is_rect;
-
-		if (instr->pred != IR2_PRED_NONE) {
-			tex->pred_select = 1;
-			tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
-		}
-
-	} else {
-		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
-		return -1;
-	}
-
-	return 0;
-}
-
-/*
- * ALU instructions:
- */
-
-static int instr_emit_alu(struct ir2_instruction *instr_v,
-		struct ir2_instruction *instr_s, uint32_t *dwords,
-		struct ir2_shader_info *info)
-{
-	instr_alu_t *alu = (instr_alu_t *)dwords;
-	struct ir2_dst_register *vdst_reg, *sdst_reg;
-	struct ir2_src_register *src1_reg, *src2_reg, *src3_reg;
-	struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader;
-	enum ir2_pred pred = IR2_PRED_NONE;
-
-	memset(alu, 0, sizeof(*alu));
-
-	vdst_reg = NULL;
-	sdst_reg = NULL;
-	src1_reg = NULL;
-	src2_reg = NULL;
-	src3_reg = NULL;
-
-	if (instr_v) {
-		vdst_reg = &instr_v->dst_reg;
-		assert(instr_v->src_reg_count >= 2);
-		src1_reg = &instr_v->src_reg[0];
-		src2_reg = &instr_v->src_reg[1];
-		if (instr_v->src_reg_count > 2)
-			src3_reg = &instr_v->src_reg[2];
-		pred = instr_v->pred;
-	}
-
-	if (instr_s) {
-		sdst_reg = &instr_s->dst_reg;
-		assert(instr_s->src_reg_count == 1);
-		assert(!instr_v || vdst_reg->flags == sdst_reg->flags);
-		assert(!instr_v || pred == instr_s->pred);
-		if (src3_reg) {
-			assert(src3_reg->flags == instr_s->src_reg[0].flags);
-			assert(src3_reg->num == instr_s->src_reg[0].num);
-			assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle));
-		}
-		src3_reg = &instr_s->src_reg[0];
-		pred = instr_s->pred;
-	}
-
-	if (vdst_reg) {
-		assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0);
-		assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4));
-		alu->vector_opc          = instr_v->alu_vector.opc;
-		alu->vector_write_mask   = reg_alu_dst_swiz(vdst_reg);
-		alu->vector_dest         = dst_to_reg(instr_v, vdst_reg);
-	} else {
-		alu->vector_opc          = MAXv;
-	}
-
-	if (sdst_reg) {
-		alu->scalar_opc          = instr_s->alu_scalar.opc;
-		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
-		alu->scalar_dest         = dst_to_reg(instr_s, sdst_reg);
-	} else {
-		/* not sure if this is required, but adreno compiler seems
-		 * to always set scalar opc to MAXs if it is not used:
-		 */
-		alu->scalar_opc = MAXs;
-	}
-
-	alu->export_data =
-		!!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT);
-
-	/* export32 has this bit set.. it seems to do more than just set
-	 * the base address of the constants used to zero
-	 * TODO make this less of a hack
-	 */
-	if (alu->export_data && alu->vector_dest == 32) {
-		assert(!instr_s);
-		alu->relative_addr = 1;
-	}
-
-	if (src1_reg) {
-		if (src1_reg->flags & IR2_REG_CONST) {
-			assert(!(src1_reg->flags & IR2_REG_ABS));
-			alu->src1_reg_const  = src1_reg->num;
-		} else {
-			alu->src1_reg        = shader->reg[src1_reg->num].reg;
-			alu->src1_reg_abs    = !!(src1_reg->flags & IR2_REG_ABS);
-		}
-		alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
-		alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
-		alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
-    }  else {
-		alu->src1_sel = 1;
-	}
-
-    if (src2_reg) {
-		if (src2_reg->flags & IR2_REG_CONST) {
-			assert(!(src2_reg->flags & IR2_REG_ABS));
-			alu->src2_reg_const  = src2_reg->num;
-		} else {
-			alu->src2_reg        = shader->reg[src2_reg->num].reg;
-			alu->src2_reg_abs    = !!(src2_reg->flags & IR2_REG_ABS);
-		}
-		alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
-		alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
-		alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
-    } else {
-		alu->src2_sel = 1;
-    }
-
-    if (src3_reg) {
-		if (src3_reg->flags & IR2_REG_CONST) {
-			assert(!(src3_reg->flags & IR2_REG_ABS));
-			alu->src3_reg_const  = src3_reg->num;
-		} else {
-			alu->src3_reg        = shader->reg[src3_reg->num].reg;
-			alu->src3_reg_abs    = !!(src3_reg->flags & IR2_REG_ABS);
-		}
-		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
-		alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
-		alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
-	} else {
-		/* not sure if this is required, but adreno compiler seems
-		 * to always set register bank for 3rd src if unused:
-		 */
-		alu->src3_sel = 1;
-	}
-
-	alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0;
-	alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0;
-
-	if (pred != IR2_PRED_NONE)
-		alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2;
-
-	return 0;
-}
-
-static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
-		uint32_t idx, struct ir2_shader_info *info)
-{
-	switch (instr->instr_type) {
-	case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
-	case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info);
-	case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info);
-	}
-	return -1;
-}
-
-struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr,
-		int num, const char *swizzle, int flags)
-{
-	if (!(flags & IR2_REG_EXPORT)) {
-		struct ir2_register *reg = &instr->shader->reg[num];
-
-		unsigned i;
-		for (i = instr->shader->max_reg + 1; i <= num; i++)
-			instr->shader->reg[i].write_idx = -1;
-		instr->shader->max_reg = i - 1;
-
-		if (reg->write_idx < 0)
-            reg->write_idx = instr->idx;
-		reg->write_idx2 = instr->idx;
-	}
-
-	struct ir2_dst_register *reg = &instr->dst_reg;
-	reg->flags = flags;
-	reg->num = num;
-	reg->swizzle = ir2_strdup(instr->shader, swizzle);
-	return reg;
-}
-
-struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr,
-		int num, const char *swizzle, int flags)
-{
-	assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg));
-	if (!(flags & IR2_REG_CONST)) {
-		struct ir2_register *reg = &instr->shader->reg[num];
-
-		reg->read_idx = instr->idx;
-
-		unsigned i;
-		for (i = instr->shader->max_reg + 1; i <= num; i++)
-			instr->shader->reg[i].write_idx = -1;
-		instr->shader->max_reg = i - 1;
-	}
-
-	struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++];
-	reg->flags = flags;
-	reg->num = num;
-	reg->swizzle = ir2_strdup(instr->shader, swizzle);
-	return reg;
-}
-
-static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n)
-{
-	uint32_t swiz = 0;
-	int i;
-
-	assert((reg->flags & ~IR2_REG_INPUT) == 0);
-	assert(reg->swizzle);
-
-	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
-
-	for (i = n-1; i >= 0; i--) {
-		swiz <<= 2;
-		switch (reg->swizzle[i]) {
-		default:
-			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
-		case 'x': swiz |= 0x0; break;
-		case 'y': swiz |= 0x1; break;
-		case 'z': swiz |= 0x2; break;
-		case 'w': swiz |= 0x3; break;
-		}
-	}
-
-	return swiz;
-}
-
-static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg)
-{
-	uint32_t swiz = 0;
-	int i;
-
-	assert(reg->flags == 0);
-	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
-	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
-
-	if (reg->swizzle) {
-		for (i = 3; i >= 0; i--) {
-			swiz <<= 3;
-			switch (reg->swizzle[i]) {
-			default:
-				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
-			case 'x': swiz |= 0x0; break;
-			case 'y': swiz |= 0x1; break;
-			case 'z': swiz |= 0x2; break;
-			case 'w': swiz |= 0x3; break;
-			case '0': swiz |= 0x4; break;
-			case '1': swiz |= 0x5; break;
-			case '_': swiz |= 0x7; break;
-			}
-		}
-	} else {
-		swiz = 0x688;
-	}
-
-	return swiz;
-}
-
-/* actually, a write-mask */
-static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg)
-{
-	uint32_t swiz = 0;
-	int i;
-
-	assert((reg->flags & ~IR2_REG_EXPORT) == 0);
-	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
-	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
-
-	if (reg->swizzle) {
-		for (i = 3; i >= 0; i--) {
-			swiz <<= 1;
-			if (reg->swizzle[i] == "xyzw"[i]) {
-				swiz |= 0x1;
-			} else if (reg->swizzle[i] != '_') {
-				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
-				break;
-			}
-		}
-	} else {
-		swiz = 0xf;
-	}
-
-	return swiz;
-}
-
-static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg)
-{
-	uint32_t swiz = 0;
-	int i;
-
-	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
-
-	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
-
-	if (reg->swizzle) {
-		for (i = 3; i >= 0; i--) {
-			swiz <<= 2;
-			switch (reg->swizzle[i]) {
-			default:
-				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
-			case 'x': swiz |= (0x0 - i) & 0x3; break;
-			case 'y': swiz |= (0x1 - i) & 0x3; break;
-			case 'z': swiz |= (0x2 - i) & 0x3; break;
-			case 'w': swiz |= (0x3 - i) & 0x3; break;
-			}
-		}
-	} else {
-		swiz = 0x0;
-	}
-
-	return swiz;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IR2_H_
-#define IR2_H_
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "instr-a2xx.h"
-
-/* low level intermediate representation of an adreno a2xx shader program */
-
-struct ir2_shader;
-
-#define REG_MASK 0xff
-
-struct ir2_shader_info {
-	uint16_t sizedwords;
-	int8_t   max_reg;   /* highest GPR # used by shader */
-};
-
-struct ir2_register {
-	int16_t write_idx, write_idx2, read_idx, reg;
-	/* bitmask of variables on which this one depends
-	 * XXX: use bitmask util?
-	 */
-	uint32_t regmask[REG_MASK/32+1];
-};
-
-struct ir2_src_register {
-	enum {
-		IR2_REG_INPUT  = 0x1,
-		IR2_REG_CONST  = 0x2,
-		IR2_REG_NEGATE = 0x4,
-		IR2_REG_ABS    = 0x8,
-	} flags;
-	int num;
-	char *swizzle;
-};
-
-struct ir2_dst_register {
-	enum {
-		IR2_REG_EXPORT = 0x1,
-	} flags;
-	int num;
-	char *swizzle;
-};
-
-enum ir2_pred {
-	IR2_PRED_NONE = 0,
-	IR2_PRED_EQ = 1,
-	IR2_PRED_NE = 2,
-};
-
-struct ir2_instruction {
-	struct ir2_shader *shader;
-	unsigned idx;
-	enum {
-		IR2_FETCH,
-		IR2_ALU_VECTOR,
-		IR2_ALU_SCALAR,
-	} instr_type;
-	enum ir2_pred pred;
-	int sync;
-	unsigned src_reg_count;
-	struct ir2_dst_register dst_reg;
-	struct ir2_src_register src_reg[3];
-	union {
-		/* FETCH specific: */
-		struct {
-			instr_fetch_opc_t opc;
-			unsigned const_idx;
-			/* texture fetch specific: */
-			bool is_cube : 1;
-			bool is_rect : 1;
-			/* vertex fetch specific: */
-			unsigned const_idx_sel;
-			enum a2xx_sq_surfaceformat fmt;
-			bool is_signed : 1;
-			bool is_normalized : 1;
-			uint32_t stride;
-			uint32_t offset;
-		} fetch;
-		/* ALU-Vector specific: */
-		struct {
-			instr_vector_opc_t opc;
-			bool clamp;
-		} alu_vector;
-		/* ALU-Scalar specific: */
-		struct {
-			instr_scalar_opc_t opc;
-			bool clamp;
-		} alu_scalar;
-	};
-};
-
-struct ir2_shader {
-	unsigned instr_count;
-	int max_reg;
-	struct ir2_register reg[REG_MASK+1];
-
-	struct ir2_instruction *instr[0x200];
-	uint32_t heap[100 * 4096];
-	unsigned heap_idx;
-
-	enum ir2_pred pred;  /* pred inherited by newly created instrs */
-};
-
-struct ir2_shader * ir2_shader_create(void);
-void ir2_shader_destroy(struct ir2_shader *shader);
-void * ir2_shader_assemble(struct ir2_shader *shader,
-		struct ir2_shader_info *info);
-
-struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader,
-		int instr_type);
-
-struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr,
-		int num, const char *swizzle, int flags);
-struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr,
-		int num, const char *swizzle, int flags);
-
-/* some helper fxns: */
-
-static inline struct ir2_instruction *
-ir2_instr_create_alu_v(struct ir2_shader *shader, instr_vector_opc_t vop)
-{
-	struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_VECTOR);
-	if (!instr)
-		return instr;
-	instr->alu_vector.opc = vop;
-	return instr;
-}
-
-static inline struct ir2_instruction *
-ir2_instr_create_alu_s(struct ir2_shader *shader, instr_scalar_opc_t sop)
-{
-	struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_SCALAR);
-	if (!instr)
-		return instr;
-	instr->alu_scalar.opc = sop;
-	return instr;
-}
-
-static inline struct ir2_instruction *
-ir2_instr_create_vtx_fetch(struct ir2_shader *shader, int ci, int cis,
-		enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride)
-{
-	struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH);
-	instr->fetch.opc = VTX_FETCH;
-	instr->fetch.const_idx = ci;
-	instr->fetch.const_idx_sel = cis;
-	instr->fetch.fmt = fmt;
-	instr->fetch.is_signed = is_signed;
-	instr->fetch.stride = stride;
-	return instr;
-}
-static inline struct ir2_instruction *
-ir2_instr_create_tex_fetch(struct ir2_shader *shader, int ci)
-{
-	struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH);
-	instr->fetch.opc = TEX_FETCH;
-	instr->fetch.const_idx = ci;
-	return instr;
-}
-
-
-#endif /* IR2_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,3239 +0,0 @@
-#ifndef A3XX_XML
-#define A3XX_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum a3xx_tile_mode {
-	LINEAR = 0,
-	TILE_32X32 = 2,
-};
-
-enum a3xx_state_block_id {
-	HLSQ_BLOCK_ID_TP_TEX = 2,
-	HLSQ_BLOCK_ID_TP_MIPMAP = 3,
-	HLSQ_BLOCK_ID_SP_VS = 4,
-	HLSQ_BLOCK_ID_SP_FS = 6,
-};
-
-enum a3xx_cache_opcode {
-	INVALIDATE = 1,
-};
-
-enum a3xx_vtx_fmt {
-	VFMT_32_FLOAT = 0,
-	VFMT_32_32_FLOAT = 1,
-	VFMT_32_32_32_FLOAT = 2,
-	VFMT_32_32_32_32_FLOAT = 3,
-	VFMT_16_FLOAT = 4,
-	VFMT_16_16_FLOAT = 5,
-	VFMT_16_16_16_FLOAT = 6,
-	VFMT_16_16_16_16_FLOAT = 7,
-	VFMT_32_FIXED = 8,
-	VFMT_32_32_FIXED = 9,
-	VFMT_32_32_32_FIXED = 10,
-	VFMT_32_32_32_32_FIXED = 11,
-	VFMT_16_SINT = 16,
-	VFMT_16_16_SINT = 17,
-	VFMT_16_16_16_SINT = 18,
-	VFMT_16_16_16_16_SINT = 19,
-	VFMT_16_UINT = 20,
-	VFMT_16_16_UINT = 21,
-	VFMT_16_16_16_UINT = 22,
-	VFMT_16_16_16_16_UINT = 23,
-	VFMT_16_SNORM = 24,
-	VFMT_16_16_SNORM = 25,
-	VFMT_16_16_16_SNORM = 26,
-	VFMT_16_16_16_16_SNORM = 27,
-	VFMT_16_UNORM = 28,
-	VFMT_16_16_UNORM = 29,
-	VFMT_16_16_16_UNORM = 30,
-	VFMT_16_16_16_16_UNORM = 31,
-	VFMT_32_UINT = 32,
-	VFMT_32_32_UINT = 33,
-	VFMT_32_32_32_UINT = 34,
-	VFMT_32_32_32_32_UINT = 35,
-	VFMT_32_SINT = 36,
-	VFMT_32_32_SINT = 37,
-	VFMT_32_32_32_SINT = 38,
-	VFMT_32_32_32_32_SINT = 39,
-	VFMT_8_UINT = 40,
-	VFMT_8_8_UINT = 41,
-	VFMT_8_8_8_UINT = 42,
-	VFMT_8_8_8_8_UINT = 43,
-	VFMT_8_UNORM = 44,
-	VFMT_8_8_UNORM = 45,
-	VFMT_8_8_8_UNORM = 46,
-	VFMT_8_8_8_8_UNORM = 47,
-	VFMT_8_SINT = 48,
-	VFMT_8_8_SINT = 49,
-	VFMT_8_8_8_SINT = 50,
-	VFMT_8_8_8_8_SINT = 51,
-	VFMT_8_SNORM = 52,
-	VFMT_8_8_SNORM = 53,
-	VFMT_8_8_8_SNORM = 54,
-	VFMT_8_8_8_8_SNORM = 55,
-	VFMT_10_10_10_2_UINT = 56,
-	VFMT_10_10_10_2_UNORM = 57,
-	VFMT_10_10_10_2_SINT = 58,
-	VFMT_10_10_10_2_SNORM = 59,
-	VFMT_2_10_10_10_UINT = 60,
-	VFMT_2_10_10_10_UNORM = 61,
-	VFMT_2_10_10_10_SINT = 62,
-	VFMT_2_10_10_10_SNORM = 63,
-};
-
-enum a3xx_tex_fmt {
-	TFMT_5_6_5_UNORM = 4,
-	TFMT_5_5_5_1_UNORM = 5,
-	TFMT_4_4_4_4_UNORM = 7,
-	TFMT_Z16_UNORM = 9,
-	TFMT_X8Z24_UNORM = 10,
-	TFMT_Z32_FLOAT = 11,
-	TFMT_UV_64X32 = 16,
-	TFMT_VU_64X32 = 17,
-	TFMT_Y_64X32 = 18,
-	TFMT_NV12_64X32 = 19,
-	TFMT_UV_LINEAR = 20,
-	TFMT_VU_LINEAR = 21,
-	TFMT_Y_LINEAR = 22,
-	TFMT_NV12_LINEAR = 23,
-	TFMT_I420_Y = 24,
-	TFMT_I420_U = 26,
-	TFMT_I420_V = 27,
-	TFMT_ATC_RGB = 32,
-	TFMT_ATC_RGBA_EXPLICIT = 33,
-	TFMT_ETC1 = 34,
-	TFMT_ATC_RGBA_INTERPOLATED = 35,
-	TFMT_DXT1 = 36,
-	TFMT_DXT3 = 37,
-	TFMT_DXT5 = 38,
-	TFMT_2_10_10_10_UNORM = 40,
-	TFMT_10_10_10_2_UNORM = 41,
-	TFMT_9_9_9_E5_FLOAT = 42,
-	TFMT_11_11_10_FLOAT = 43,
-	TFMT_A8_UNORM = 44,
-	TFMT_L8_UNORM = 45,
-	TFMT_L8_A8_UNORM = 47,
-	TFMT_8_UNORM = 48,
-	TFMT_8_8_UNORM = 49,
-	TFMT_8_8_8_UNORM = 50,
-	TFMT_8_8_8_8_UNORM = 51,
-	TFMT_8_SNORM = 52,
-	TFMT_8_8_SNORM = 53,
-	TFMT_8_8_8_SNORM = 54,
-	TFMT_8_8_8_8_SNORM = 55,
-	TFMT_8_UINT = 56,
-	TFMT_8_8_UINT = 57,
-	TFMT_8_8_8_UINT = 58,
-	TFMT_8_8_8_8_UINT = 59,
-	TFMT_8_SINT = 60,
-	TFMT_8_8_SINT = 61,
-	TFMT_8_8_8_SINT = 62,
-	TFMT_8_8_8_8_SINT = 63,
-	TFMT_16_FLOAT = 64,
-	TFMT_16_16_FLOAT = 65,
-	TFMT_16_16_16_16_FLOAT = 67,
-	TFMT_16_UINT = 68,
-	TFMT_16_16_UINT = 69,
-	TFMT_16_16_16_16_UINT = 71,
-	TFMT_16_SINT = 72,
-	TFMT_16_16_SINT = 73,
-	TFMT_16_16_16_16_SINT = 75,
-	TFMT_16_UNORM = 76,
-	TFMT_16_16_UNORM = 77,
-	TFMT_16_16_16_16_UNORM = 79,
-	TFMT_16_SNORM = 80,
-	TFMT_16_16_SNORM = 81,
-	TFMT_16_16_16_16_SNORM = 83,
-	TFMT_32_FLOAT = 84,
-	TFMT_32_32_FLOAT = 85,
-	TFMT_32_32_32_32_FLOAT = 87,
-	TFMT_32_UINT = 88,
-	TFMT_32_32_UINT = 89,
-	TFMT_32_32_32_32_UINT = 91,
-	TFMT_32_SINT = 92,
-	TFMT_32_32_SINT = 93,
-	TFMT_32_32_32_32_SINT = 95,
-	TFMT_2_10_10_10_UINT = 96,
-	TFMT_10_10_10_2_UINT = 97,
-	TFMT_ETC2_RG11_SNORM = 112,
-	TFMT_ETC2_RG11_UNORM = 113,
-	TFMT_ETC2_R11_SNORM = 114,
-	TFMT_ETC2_R11_UNORM = 115,
-	TFMT_ETC2_RGBA8 = 116,
-	TFMT_ETC2_RGB8A1 = 117,
-	TFMT_ETC2_RGB8 = 118,
-};
-
-enum a3xx_tex_fetchsize {
-	TFETCH_DISABLE = 0,
-	TFETCH_1_BYTE = 1,
-	TFETCH_2_BYTE = 2,
-	TFETCH_4_BYTE = 3,
-	TFETCH_8_BYTE = 4,
-	TFETCH_16_BYTE = 5,
-};
-
-enum a3xx_color_fmt {
-	RB_R5G6B5_UNORM = 0,
-	RB_R5G5B5A1_UNORM = 1,
-	RB_R4G4B4A4_UNORM = 3,
-	RB_R8G8B8_UNORM = 4,
-	RB_R8G8B8A8_UNORM = 8,
-	RB_R8G8B8A8_SNORM = 9,
-	RB_R8G8B8A8_UINT = 10,
-	RB_R8G8B8A8_SINT = 11,
-	RB_R8G8_UNORM = 12,
-	RB_R8G8_SNORM = 13,
-	RB_R8_UINT = 14,
-	RB_R8_SINT = 15,
-	RB_R10G10B10A2_UNORM = 16,
-	RB_A2R10G10B10_UNORM = 17,
-	RB_R10G10B10A2_UINT = 18,
-	RB_A2R10G10B10_UINT = 19,
-	RB_A8_UNORM = 20,
-	RB_R8_UNORM = 21,
-	RB_R16_FLOAT = 24,
-	RB_R16G16_FLOAT = 25,
-	RB_R16G16B16A16_FLOAT = 27,
-	RB_R11G11B10_FLOAT = 28,
-	RB_R16_SNORM = 32,
-	RB_R16G16_SNORM = 33,
-	RB_R16G16B16A16_SNORM = 35,
-	RB_R16_UNORM = 36,
-	RB_R16G16_UNORM = 37,
-	RB_R16G16B16A16_UNORM = 39,
-	RB_R16_SINT = 40,
-	RB_R16G16_SINT = 41,
-	RB_R16G16B16A16_SINT = 43,
-	RB_R16_UINT = 44,
-	RB_R16G16_UINT = 45,
-	RB_R16G16B16A16_UINT = 47,
-	RB_R32_FLOAT = 48,
-	RB_R32G32_FLOAT = 49,
-	RB_R32G32B32A32_FLOAT = 51,
-	RB_R32_SINT = 52,
-	RB_R32G32_SINT = 53,
-	RB_R32G32B32A32_SINT = 55,
-	RB_R32_UINT = 56,
-	RB_R32G32_UINT = 57,
-	RB_R32G32B32A32_UINT = 59,
-};
-
-enum a3xx_cp_perfcounter_select {
-	CP_ALWAYS_COUNT = 0,
-	CP_AHB_PFPTRANS_WAIT = 3,
-	CP_AHB_NRTTRANS_WAIT = 6,
-	CP_CSF_NRT_READ_WAIT = 8,
-	CP_CSF_I1_FIFO_FULL = 9,
-	CP_CSF_I2_FIFO_FULL = 10,
-	CP_CSF_ST_FIFO_FULL = 11,
-	CP_RESERVED_12 = 12,
-	CP_CSF_RING_ROQ_FULL = 13,
-	CP_CSF_I1_ROQ_FULL = 14,
-	CP_CSF_I2_ROQ_FULL = 15,
-	CP_CSF_ST_ROQ_FULL = 16,
-	CP_RESERVED_17 = 17,
-	CP_MIU_TAG_MEM_FULL = 18,
-	CP_MIU_NRT_WRITE_STALLED = 22,
-	CP_MIU_NRT_READ_STALLED = 23,
-	CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
-	CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
-	CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
-	CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
-	CP_ME_MICRO_RB_STARVED = 30,
-	CP_AHB_RBBM_DWORD_SENT = 40,
-	CP_ME_BUSY_CLOCKS = 41,
-	CP_ME_WAIT_CONTEXT_AVAIL = 42,
-	CP_PFP_TYPE0_PACKET = 43,
-	CP_PFP_TYPE3_PACKET = 44,
-	CP_CSF_RB_WPTR_NEQ_RPTR = 45,
-	CP_CSF_I1_SIZE_NEQ_ZERO = 46,
-	CP_CSF_I2_SIZE_NEQ_ZERO = 47,
-	CP_CSF_RBI1I2_FETCHING = 48,
-};
-
-enum a3xx_gras_tse_perfcounter_select {
-	GRAS_TSEPERF_INPUT_PRIM = 0,
-	GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
-	GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
-	GRAS_TSEPERF_CLIPPED_PRIM = 3,
-	GRAS_TSEPERF_NEW_PRIM = 4,
-	GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
-	GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
-	GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
-	GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
-	GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
-	GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
-	GRAS_TSEPERF_POST_CLIP_PRIM = 11,
-	GRAS_TSEPERF_WORKING_CYCLES = 12,
-	GRAS_TSEPERF_PC_STARVE = 13,
-	GRAS_TSERASPERF_STALL = 14,
-};
-
-enum a3xx_gras_ras_perfcounter_select {
-	GRAS_RASPERF_16X16_TILES = 0,
-	GRAS_RASPERF_8X8_TILES = 1,
-	GRAS_RASPERF_4X4_TILES = 2,
-	GRAS_RASPERF_WORKING_CYCLES = 3,
-	GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
-	GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
-	GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
-};
-
-enum a3xx_hlsq_perfcounter_select {
-	HLSQ_PERF_SP_VS_CONSTANT = 0,
-	HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
-	HLSQ_PERF_SP_FS_CONSTANT = 2,
-	HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
-	HLSQ_PERF_TP_STATE = 4,
-	HLSQ_PERF_QUADS = 5,
-	HLSQ_PERF_PIXELS = 6,
-	HLSQ_PERF_VERTICES = 7,
-	HLSQ_PERF_FS8_THREADS = 8,
-	HLSQ_PERF_FS16_THREADS = 9,
-	HLSQ_PERF_FS32_THREADS = 10,
-	HLSQ_PERF_VS8_THREADS = 11,
-	HLSQ_PERF_VS16_THREADS = 12,
-	HLSQ_PERF_SP_VS_DATA_BYTES = 13,
-	HLSQ_PERF_SP_FS_DATA_BYTES = 14,
-	HLSQ_PERF_ACTIVE_CYCLES = 15,
-	HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
-	HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
-	HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
-	HLSQ_PERF_STALL_CYCLES_UCHE = 19,
-	HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
-	HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
-	HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
-	HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
-	HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
-	HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
-	HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
-	HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
-	HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
-};
-
-enum a3xx_pc_perfcounter_select {
-	PC_PCPERF_VISIBILITY_STREAMS = 0,
-	PC_PCPERF_TOTAL_INSTANCES = 1,
-	PC_PCPERF_PRIMITIVES_PC_VPC = 2,
-	PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
-	PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
-	PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
-	PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
-	PC_PCPERF_VERTICES_TO_VFD = 7,
-	PC_PCPERF_REUSED_VERTICES = 8,
-	PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
-	PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
-	PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
-	PC_PCPERF_CYCLES_IS_WORKING = 12,
-};
-
-enum a3xx_rb_perfcounter_select {
-	RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
-	RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
-	RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
-	RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
-	RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
-	RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
-	RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
-	RB_RBPERF_RB_MARB_DATA = 7,
-	RB_RBPERF_SP_RB_QUAD = 8,
-	RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
-	RB_RBPERF_GMEM_CH0_READ = 10,
-	RB_RBPERF_GMEM_CH1_READ = 11,
-	RB_RBPERF_GMEM_CH0_WRITE = 12,
-	RB_RBPERF_GMEM_CH1_WRITE = 13,
-	RB_RBPERF_CP_CONTEXT_DONE = 14,
-	RB_RBPERF_CP_CACHE_FLUSH = 15,
-	RB_RBPERF_CP_ZPASS_DONE = 16,
-};
-
-enum a3xx_rbbm_perfcounter_select {
-	RBBM_ALAWYS_ON = 0,
-	RBBM_VBIF_BUSY = 1,
-	RBBM_TSE_BUSY = 2,
-	RBBM_RAS_BUSY = 3,
-	RBBM_PC_DCALL_BUSY = 4,
-	RBBM_PC_VSD_BUSY = 5,
-	RBBM_VFD_BUSY = 6,
-	RBBM_VPC_BUSY = 7,
-	RBBM_UCHE_BUSY = 8,
-	RBBM_VSC_BUSY = 9,
-	RBBM_HLSQ_BUSY = 10,
-	RBBM_ANY_RB_BUSY = 11,
-	RBBM_ANY_TEX_BUSY = 12,
-	RBBM_ANY_USP_BUSY = 13,
-	RBBM_ANY_MARB_BUSY = 14,
-	RBBM_ANY_ARB_BUSY = 15,
-	RBBM_AHB_STATUS_BUSY = 16,
-	RBBM_AHB_STATUS_STALLED = 17,
-	RBBM_AHB_STATUS_TXFR = 18,
-	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
-	RBBM_AHB_STATUS_TXFR_ERROR = 20,
-	RBBM_AHB_STATUS_LONG_STALL = 21,
-	RBBM_RBBM_STATUS_MASKED = 22,
-};
-
-enum a3xx_sp_perfcounter_select {
-	SP_LM_LOAD_INSTRUCTIONS = 0,
-	SP_LM_STORE_INSTRUCTIONS = 1,
-	SP_LM_ATOMICS = 2,
-	SP_UCHE_LOAD_INSTRUCTIONS = 3,
-	SP_UCHE_STORE_INSTRUCTIONS = 4,
-	SP_UCHE_ATOMICS = 5,
-	SP_VS_TEX_INSTRUCTIONS = 6,
-	SP_VS_CFLOW_INSTRUCTIONS = 7,
-	SP_VS_EFU_INSTRUCTIONS = 8,
-	SP_VS_FULL_ALU_INSTRUCTIONS = 9,
-	SP_VS_HALF_ALU_INSTRUCTIONS = 10,
-	SP_FS_TEX_INSTRUCTIONS = 11,
-	SP_FS_CFLOW_INSTRUCTIONS = 12,
-	SP_FS_EFU_INSTRUCTIONS = 13,
-	SP_FS_FULL_ALU_INSTRUCTIONS = 14,
-	SP_FS_HALF_ALU_INSTRUCTIONS = 15,
-	SP_FS_BARY_INSTRUCTIONS = 16,
-	SP_VS_INSTRUCTIONS = 17,
-	SP_FS_INSTRUCTIONS = 18,
-	SP_ADDR_LOCK_COUNT = 19,
-	SP_UCHE_READ_TRANS = 20,
-	SP_UCHE_WRITE_TRANS = 21,
-	SP_EXPORT_VPC_TRANS = 22,
-	SP_EXPORT_RB_TRANS = 23,
-	SP_PIXELS_KILLED = 24,
-	SP_ICL1_REQUESTS = 25,
-	SP_ICL1_MISSES = 26,
-	SP_ICL0_REQUESTS = 27,
-	SP_ICL0_MISSES = 28,
-	SP_ALU_ACTIVE_CYCLES = 29,
-	SP_EFU_ACTIVE_CYCLES = 30,
-	SP_STALL_CYCLES_BY_VPC = 31,
-	SP_STALL_CYCLES_BY_TP = 32,
-	SP_STALL_CYCLES_BY_UCHE = 33,
-	SP_STALL_CYCLES_BY_RB = 34,
-	SP_ACTIVE_CYCLES_ANY = 35,
-	SP_ACTIVE_CYCLES_ALL = 36,
-};
-
-enum a3xx_tp_perfcounter_select {
-	TPL1_TPPERF_L1_REQUESTS = 0,
-	TPL1_TPPERF_TP0_L1_REQUESTS = 1,
-	TPL1_TPPERF_TP0_L1_MISSES = 2,
-	TPL1_TPPERF_TP1_L1_REQUESTS = 3,
-	TPL1_TPPERF_TP1_L1_MISSES = 4,
-	TPL1_TPPERF_TP2_L1_REQUESTS = 5,
-	TPL1_TPPERF_TP2_L1_MISSES = 6,
-	TPL1_TPPERF_TP3_L1_REQUESTS = 7,
-	TPL1_TPPERF_TP3_L1_MISSES = 8,
-	TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
-	TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
-	TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
-	TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
-	TPL1_TPPERF_BILINEAR_OPS = 13,
-	TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
-	TPL1_TPPERF_QUADQUADS_SHADOW = 15,
-	TPL1_TPPERF_QUADS_ARRAY = 16,
-	TPL1_TPPERF_QUADS_PROJECTION = 17,
-	TPL1_TPPERF_QUADS_GRADIENT = 18,
-	TPL1_TPPERF_QUADS_1D2D = 19,
-	TPL1_TPPERF_QUADS_3DCUBE = 20,
-	TPL1_TPPERF_ZERO_LOD = 21,
-	TPL1_TPPERF_OUTPUT_TEXELS = 22,
-	TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
-	TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
-	TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
-	TPL1_TPPERF_LATENCY = 26,
-	TPL1_TPPERF_LATENCY_TRANS = 27,
-};
-
-enum a3xx_vfd_perfcounter_select {
-	VFD_PERF_UCHE_BYTE_FETCHED = 0,
-	VFD_PERF_UCHE_TRANS = 1,
-	VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
-	VFD_PERF_FETCH_INSTRUCTIONS = 3,
-	VFD_PERF_DECODE_INSTRUCTIONS = 4,
-	VFD_PERF_ACTIVE_CYCLES = 5,
-	VFD_PERF_STALL_CYCLES_UCHE = 6,
-	VFD_PERF_STALL_CYCLES_HLSQ = 7,
-	VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
-	VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
-};
-
-enum a3xx_vpc_perfcounter_select {
-	VPC_PERF_SP_LM_PRIMITIVES = 0,
-	VPC_PERF_COMPONENTS_FROM_SP = 1,
-	VPC_PERF_SP_LM_COMPONENTS = 2,
-	VPC_PERF_ACTIVE_CYCLES = 3,
-	VPC_PERF_STALL_CYCLES_LM = 4,
-	VPC_PERF_STALL_CYCLES_RAS = 5,
-};
-
-enum a3xx_uche_perfcounter_select {
-	UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
-	UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
-	UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
-	UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
-	UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
-	UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
-	UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
-	UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
-	UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
-	UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
-	UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
-	UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
-	UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
-	UCHE_UCHEPERF_EVICTS = 16,
-	UCHE_UCHEPERF_FLUSHES = 17,
-	UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
-	UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
-	UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
-};
-
-enum a3xx_intp_mode {
-	SMOOTH = 0,
-	FLAT = 1,
-	ZERO = 2,
-	ONE = 3,
-};
-
-enum a3xx_repl_mode {
-	S = 1,
-	T = 2,
-	ONE_T = 3,
-};
-
-enum a3xx_tex_filter {
-	A3XX_TEX_NEAREST = 0,
-	A3XX_TEX_LINEAR = 1,
-	A3XX_TEX_ANISO = 2,
-};
-
-enum a3xx_tex_clamp {
-	A3XX_TEX_REPEAT = 0,
-	A3XX_TEX_CLAMP_TO_EDGE = 1,
-	A3XX_TEX_MIRROR_REPEAT = 2,
-	A3XX_TEX_CLAMP_TO_BORDER = 3,
-	A3XX_TEX_MIRROR_CLAMP = 4,
-};
-
-enum a3xx_tex_aniso {
-	A3XX_TEX_ANISO_1 = 0,
-	A3XX_TEX_ANISO_2 = 1,
-	A3XX_TEX_ANISO_4 = 2,
-	A3XX_TEX_ANISO_8 = 3,
-	A3XX_TEX_ANISO_16 = 4,
-};
-
-enum a3xx_tex_swiz {
-	A3XX_TEX_X = 0,
-	A3XX_TEX_Y = 1,
-	A3XX_TEX_Z = 2,
-	A3XX_TEX_W = 3,
-	A3XX_TEX_ZERO = 4,
-	A3XX_TEX_ONE = 5,
-};
-
-enum a3xx_tex_type {
-	A3XX_TEX_1D = 0,
-	A3XX_TEX_2D = 1,
-	A3XX_TEX_CUBE = 2,
-	A3XX_TEX_3D = 3,
-};
-
-enum a3xx_tex_msaa {
-	A3XX_TPL1_MSAA1X = 0,
-	A3XX_TPL1_MSAA2X = 1,
-	A3XX_TPL1_MSAA4X = 2,
-	A3XX_TPL1_MSAA8X = 3,
-};
-
-#define A3XX_INT0_RBBM_GPU_IDLE					0x00000001
-#define A3XX_INT0_RBBM_AHB_ERROR				0x00000002
-#define A3XX_INT0_RBBM_REG_TIMEOUT				0x00000004
-#define A3XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
-#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
-#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
-#define A3XX_INT0_VFD_ERROR					0x00000040
-#define A3XX_INT0_CP_SW_INT					0x00000080
-#define A3XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
-#define A3XX_INT0_CP_OPCODE_ERROR				0x00000200
-#define A3XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
-#define A3XX_INT0_CP_HW_FAULT					0x00000800
-#define A3XX_INT0_CP_DMA					0x00001000
-#define A3XX_INT0_CP_IB2_INT					0x00002000
-#define A3XX_INT0_CP_IB1_INT					0x00004000
-#define A3XX_INT0_CP_RB_INT					0x00008000
-#define A3XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
-#define A3XX_INT0_CP_RB_DONE_TS					0x00020000
-#define A3XX_INT0_CP_VS_DONE_TS					0x00040000
-#define A3XX_INT0_CP_PS_DONE_TS					0x00080000
-#define A3XX_INT0_CACHE_FLUSH_TS				0x00100000
-#define A3XX_INT0_CP_AHB_ERROR_HALT				0x00200000
-#define A3XX_INT0_MISC_HANG_DETECT				0x01000000
-#define A3XX_INT0_UCHE_OOB_ACCESS				0x02000000
-#define REG_A3XX_RBBM_HW_VERSION				0x00000000
-
-#define REG_A3XX_RBBM_HW_RELEASE				0x00000001
-
-#define REG_A3XX_RBBM_HW_CONFIGURATION				0x00000002
-
-#define REG_A3XX_RBBM_CLOCK_CTL					0x00000010
-
-#define REG_A3XX_RBBM_SP_HYST_CNT				0x00000012
-
-#define REG_A3XX_RBBM_SW_RESET_CMD				0x00000018
-
-#define REG_A3XX_RBBM_AHB_CTL0					0x00000020
-
-#define REG_A3XX_RBBM_AHB_CTL1					0x00000021
-
-#define REG_A3XX_RBBM_AHB_CMD					0x00000022
-
-#define REG_A3XX_RBBM_AHB_ERROR_STATUS				0x00000027
-
-#define REG_A3XX_RBBM_GPR0_CTL					0x0000002e
-
-#define REG_A3XX_RBBM_STATUS					0x00000030
-#define A3XX_RBBM_STATUS_HI_BUSY				0x00000001
-#define A3XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
-#define A3XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
-#define A3XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
-#define A3XX_RBBM_STATUS_VBIF_BUSY				0x00008000
-#define A3XX_RBBM_STATUS_TSE_BUSY				0x00010000
-#define A3XX_RBBM_STATUS_RAS_BUSY				0x00020000
-#define A3XX_RBBM_STATUS_RB_BUSY				0x00040000
-#define A3XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
-#define A3XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
-#define A3XX_RBBM_STATUS_VFD_BUSY				0x00200000
-#define A3XX_RBBM_STATUS_VPC_BUSY				0x00400000
-#define A3XX_RBBM_STATUS_UCHE_BUSY				0x00800000
-#define A3XX_RBBM_STATUS_SP_BUSY				0x01000000
-#define A3XX_RBBM_STATUS_TPL1_BUSY				0x02000000
-#define A3XX_RBBM_STATUS_MARB_BUSY				0x04000000
-#define A3XX_RBBM_STATUS_VSC_BUSY				0x08000000
-#define A3XX_RBBM_STATUS_ARB_BUSY				0x10000000
-#define A3XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
-#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
-#define A3XX_RBBM_STATUS_GPU_BUSY				0x80000000
-
-#define REG_A3XX_RBBM_NQWAIT_UNTIL				0x00000040
-
-#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x00000033
-
-#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL			0x00000050
-
-#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0			0x00000051
-
-#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1			0x00000054
-
-#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2			0x00000057
-
-#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3			0x0000005a
-
-#define REG_A3XX_RBBM_INT_SET_CMD				0x00000060
-
-#define REG_A3XX_RBBM_INT_CLEAR_CMD				0x00000061
-
-#define REG_A3XX_RBBM_INT_0_MASK				0x00000063
-
-#define REG_A3XX_RBBM_INT_0_STATUS				0x00000064
-
-#define REG_A3XX_RBBM_PERFCTR_CTL				0x00000080
-#define A3XX_RBBM_PERFCTR_CTL_ENABLE				0x00000001
-
-#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0				0x00000081
-
-#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1				0x00000082
-
-#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000084
-
-#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000085
-
-#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT			0x00000086
-
-#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT			0x00000087
-
-#define REG_A3XX_RBBM_GPU_BUSY_MASKED				0x00000088
-
-#define REG_A3XX_RBBM_PERFCTR_CP_0_LO				0x00000090
-
-#define REG_A3XX_RBBM_PERFCTR_CP_0_HI				0x00000091
-
-#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO				0x00000092
-
-#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI				0x00000093
-
-#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO				0x00000094
-
-#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI				0x00000095
-
-#define REG_A3XX_RBBM_PERFCTR_PC_0_LO				0x00000096
-
-#define REG_A3XX_RBBM_PERFCTR_PC_0_HI				0x00000097
-
-#define REG_A3XX_RBBM_PERFCTR_PC_1_LO				0x00000098
-
-#define REG_A3XX_RBBM_PERFCTR_PC_1_HI				0x00000099
-
-#define REG_A3XX_RBBM_PERFCTR_PC_2_LO				0x0000009a
-
-#define REG_A3XX_RBBM_PERFCTR_PC_2_HI				0x0000009b
-
-#define REG_A3XX_RBBM_PERFCTR_PC_3_LO				0x0000009c
-
-#define REG_A3XX_RBBM_PERFCTR_PC_3_HI				0x0000009d
-
-#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO				0x0000009e
-
-#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI				0x0000009f
-
-#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO				0x000000a0
-
-#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI				0x000000a1
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000a2
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000a3
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000a4
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000a5
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000a6
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000a7
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000a8
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000a9
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000aa
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000ab
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000ac
-
-#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000ad
-
-#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO				0x000000ae
-
-#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI				0x000000af
-
-#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO				0x000000b0
-
-#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI				0x000000b1
-
-#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO				0x000000b2
-
-#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI				0x000000b3
-
-#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO				0x000000b4
-
-#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI				0x000000b5
-
-#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO				0x000000b6
-
-#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI				0x000000b7
-
-#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO				0x000000b8
-
-#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI				0x000000b9
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO				0x000000ba
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI				0x000000bb
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO				0x000000bc
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI				0x000000bd
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO				0x000000be
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI				0x000000bf
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO				0x000000c0
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI				0x000000c1
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO				0x000000c2
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI				0x000000c3
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO				0x000000c4
-
-#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI				0x000000c5
-
-#define REG_A3XX_RBBM_PERFCTR_TP_0_LO				0x000000c6
-
-#define REG_A3XX_RBBM_PERFCTR_TP_0_HI				0x000000c7
-
-#define REG_A3XX_RBBM_PERFCTR_TP_1_LO				0x000000c8
-
-#define REG_A3XX_RBBM_PERFCTR_TP_1_HI				0x000000c9
-
-#define REG_A3XX_RBBM_PERFCTR_TP_2_LO				0x000000ca
-
-#define REG_A3XX_RBBM_PERFCTR_TP_2_HI				0x000000cb
-
-#define REG_A3XX_RBBM_PERFCTR_TP_3_LO				0x000000cc
-
-#define REG_A3XX_RBBM_PERFCTR_TP_3_HI				0x000000cd
-
-#define REG_A3XX_RBBM_PERFCTR_TP_4_LO				0x000000ce
-
-#define REG_A3XX_RBBM_PERFCTR_TP_4_HI				0x000000cf
-
-#define REG_A3XX_RBBM_PERFCTR_TP_5_LO				0x000000d0
-
-#define REG_A3XX_RBBM_PERFCTR_TP_5_HI				0x000000d1
-
-#define REG_A3XX_RBBM_PERFCTR_SP_0_LO				0x000000d2
-
-#define REG_A3XX_RBBM_PERFCTR_SP_0_HI				0x000000d3
-
-#define REG_A3XX_RBBM_PERFCTR_SP_1_LO				0x000000d4
-
-#define REG_A3XX_RBBM_PERFCTR_SP_1_HI				0x000000d5
-
-#define REG_A3XX_RBBM_PERFCTR_SP_2_LO				0x000000d6
-
-#define REG_A3XX_RBBM_PERFCTR_SP_2_HI				0x000000d7
-
-#define REG_A3XX_RBBM_PERFCTR_SP_3_LO				0x000000d8
-
-#define REG_A3XX_RBBM_PERFCTR_SP_3_HI				0x000000d9
-
-#define REG_A3XX_RBBM_PERFCTR_SP_4_LO				0x000000da
-
-#define REG_A3XX_RBBM_PERFCTR_SP_4_HI				0x000000db
-
-#define REG_A3XX_RBBM_PERFCTR_SP_5_LO				0x000000dc
-
-#define REG_A3XX_RBBM_PERFCTR_SP_5_HI				0x000000dd
-
-#define REG_A3XX_RBBM_PERFCTR_SP_6_LO				0x000000de
-
-#define REG_A3XX_RBBM_PERFCTR_SP_6_HI				0x000000df
-
-#define REG_A3XX_RBBM_PERFCTR_SP_7_LO				0x000000e0
-
-#define REG_A3XX_RBBM_PERFCTR_SP_7_HI				0x000000e1
-
-#define REG_A3XX_RBBM_PERFCTR_RB_0_LO				0x000000e2
-
-#define REG_A3XX_RBBM_PERFCTR_RB_0_HI				0x000000e3
-
-#define REG_A3XX_RBBM_PERFCTR_RB_1_LO				0x000000e4
-
-#define REG_A3XX_RBBM_PERFCTR_RB_1_HI				0x000000e5
-
-#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO				0x000000ea
-
-#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI				0x000000eb
-
-#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO				0x000000ec
-
-#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI				0x000000ed
-
-#define REG_A3XX_RBBM_RBBM_CTL					0x00000100
-
-#define REG_A3XX_RBBM_DEBUG_BUS_CTL				0x00000111
-
-#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS			0x00000112
-
-#define REG_A3XX_CP_PFP_UCODE_ADDR				0x000001c9
-
-#define REG_A3XX_CP_PFP_UCODE_DATA				0x000001ca
-
-#define REG_A3XX_CP_ROQ_ADDR					0x000001cc
-
-#define REG_A3XX_CP_ROQ_DATA					0x000001cd
-
-#define REG_A3XX_CP_MERCIU_ADDR					0x000001d1
-
-#define REG_A3XX_CP_MERCIU_DATA					0x000001d2
-
-#define REG_A3XX_CP_MERCIU_DATA2				0x000001d3
-
-#define REG_A3XX_CP_MEQ_ADDR					0x000001da
-
-#define REG_A3XX_CP_MEQ_DATA					0x000001db
-
-#define REG_A3XX_CP_WFI_PEND_CTR				0x000001f5
-
-#define REG_A3XX_RBBM_PM_OVERRIDE2				0x0000039d
-
-#define REG_A3XX_CP_PERFCOUNTER_SELECT				0x00000445
-
-#define REG_A3XX_CP_HW_FAULT					0x0000045c
-
-#define REG_A3XX_CP_PROTECT_CTRL				0x0000045e
-
-#define REG_A3XX_CP_PROTECT_STATUS				0x0000045f
-
-static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
-
-#define REG_A3XX_CP_AHB_FAULT					0x0000054d
-
-#define REG_A3XX_SQ_GPR_MANAGEMENT				0x00000d00
-
-#define REG_A3XX_SQ_INST_STORE_MANAGMENT			0x00000d02
-
-#define REG_A3XX_TP0_CHICKEN					0x00000e1e
-
-#define REG_A3XX_SP_GLOBAL_MEM_SIZE				0x00000e22
-
-#define REG_A3XX_SP_GLOBAL_MEM_ADDR				0x00000e23
-
-#define REG_A3XX_GRAS_CL_CLIP_CNTL				0x00002040
-#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER			0x00001000
-#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
-#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
-#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE		0x00080000
-#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE			0x00100000
-#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE		0x00200000
-#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
-#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD				0x00800000
-#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD				0x01000000
-#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE			0x02000000
-#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK	0x1c000000
-#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT	26
-static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
-#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
-#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
-}
-#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK			0x000ffc00
-#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT			10
-static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_XOFFSET				0x00002048
-#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK			0xffffffff
-#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_XSCALE				0x00002049
-#define A3XX_GRAS_CL_VPORT_XSCALE__MASK				0xffffffff
-#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_YOFFSET				0x0000204a
-#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK			0xffffffff
-#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_YSCALE				0x0000204b
-#define A3XX_GRAS_CL_VPORT_YSCALE__MASK				0xffffffff
-#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET				0x0000204c
-#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK			0xffffffff
-#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
-}
-
-#define REG_A3XX_GRAS_CL_VPORT_ZSCALE				0x0000204d
-#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK				0xffffffff
-#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT			0
-static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
-{
-	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
-}
-
-#define REG_A3XX_GRAS_SU_POINT_MINMAX				0x00002068
-#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
-#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
-static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
-}
-#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
-#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
-static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
-}
-
-#define REG_A3XX_GRAS_SU_POINT_SIZE				0x00002069
-#define A3XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
-#define A3XX_GRAS_SU_POINT_SIZE__SHIFT				0
-static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
-{
-	return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
-}
-
-#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE			0x0000206c
-#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK		0x00ffffff
-#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
-static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
-{
-	return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
-}
-
-#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
-#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
-#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
-static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
-{
-	return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
-}
-
-#define REG_A3XX_GRAS_SU_MODE_CONTROL				0x00002070
-#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT			0x00000001
-#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK			0x00000002
-#define A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW			0x00000004
-#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK		0x000007f8
-#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT		3
-static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
-{
-	return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
-}
-#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET			0x00000800
-
-#define REG_A3XX_GRAS_SC_CONTROL				0x00002072
-#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK			0x000000f0
-#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT			4
-static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
-{
-	return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
-}
-#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK			0x00000f00
-#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT		8
-static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
-}
-#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK			0x0000f000
-#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT			12
-static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
-}
-
-#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL			0x00002074
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
-}
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR			0x00002075
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
-}
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL			0x00002079
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
-}
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000207a
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
-}
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A3XX_RB_MODE_CONTROL				0x000020c0
-#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS			0x00000080
-#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK			0x00000700
-#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT			8
-static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
-{
-	return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
-}
-#define A3XX_RB_MODE_CONTROL_MRT__MASK				0x00003000
-#define A3XX_RB_MODE_CONTROL_MRT__SHIFT				12
-static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
-{
-	return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK;
-}
-#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE		0x00008000
-#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000
-
-#define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
-#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE		0x00000001
-#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE			0x00000002
-#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE		0x00000004
-#define A3XX_RB_RENDER_CONTROL_FACENESS				0x00000008
-#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
-#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
-static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
-}
-#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE		0x00001000
-#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM			0x00002000
-#define A3XX_RB_RENDER_CONTROL_XCOORD				0x00004000
-#define A3XX_RB_RENDER_CONTROL_YCOORD				0x00008000
-#define A3XX_RB_RENDER_CONTROL_ZCOORD				0x00010000
-#define A3XX_RB_RENDER_CONTROL_WCOORD				0x00020000
-#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE			0x00080000
-#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE		0x00100000
-#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST			0x00400000
-#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
-#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
-static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
-}
-#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE		0x40000000
-#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE			0x80000000
-
-#define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
-#define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
-#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK			0x0000f000
-#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT			12
-static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
-}
-#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK			0xffff0000
-#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT			16
-static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
-{
-	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
-}
-
-#define REG_A3XX_RB_ALPHA_REF					0x000020c3
-#define A3XX_RB_ALPHA_REF_UINT__MASK				0x0000ff00
-#define A3XX_RB_ALPHA_REF_UINT__SHIFT				8
-static inline uint32_t A3XX_RB_ALPHA_REF_UINT(uint32_t val)
-{
-	return ((val) << A3XX_RB_ALPHA_REF_UINT__SHIFT) & A3XX_RB_ALPHA_REF_UINT__MASK;
-}
-#define A3XX_RB_ALPHA_REF_FLOAT__MASK				0xffff0000
-#define A3XX_RB_ALPHA_REF_FLOAT__SHIFT				16
-static inline uint32_t A3XX_RB_ALPHA_REF_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A3XX_RB_ALPHA_REF_FLOAT__SHIFT) & A3XX_RB_ALPHA_REF_FLOAT__MASK;
-}
-
-static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
-
-static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
-#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
-#define A3XX_RB_MRT_CONTROL_BLEND				0x00000010
-#define A3XX_RB_MRT_CONTROL_BLEND2				0x00000020
-#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
-#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
-static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
-{
-	return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
-}
-#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK			0x00003000
-#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT			12
-static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
-}
-#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
-#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
-static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
-}
-
-static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
-#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
-#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
-{
-	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
-}
-#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x000000c0
-#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		6
-static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
-{
-	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
-}
-#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
-}
-#define A3XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00004000
-#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xfffe0000
-#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		17
-static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
-}
-
-static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
-#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK		0xfffffff0
-#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT		4
-static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
-}
-
-static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
-#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
-#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
-static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
-}
-#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE			0x20000000
-
-#define REG_A3XX_RB_BLEND_RED					0x000020e4
-#define A3XX_RB_BLEND_RED_UINT__MASK				0x000000ff
-#define A3XX_RB_BLEND_RED_UINT__SHIFT				0
-static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
-{
-	return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
-}
-#define A3XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
-#define A3XX_RB_BLEND_RED_FLOAT__SHIFT				16
-static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
-}
-
-#define REG_A3XX_RB_BLEND_GREEN					0x000020e5
-#define A3XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
-#define A3XX_RB_BLEND_GREEN_UINT__SHIFT				0
-static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
-{
-	return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
-}
-#define A3XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
-#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
-static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
-}
-
-#define REG_A3XX_RB_BLEND_BLUE					0x000020e6
-#define A3XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
-#define A3XX_RB_BLEND_BLUE_UINT__SHIFT				0
-static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
-{
-	return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
-}
-#define A3XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
-#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
-static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
-}
-
-#define REG_A3XX_RB_BLEND_ALPHA					0x000020e7
-#define A3XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
-#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT				0
-static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
-{
-	return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
-}
-#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
-#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
-static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
-}
-
-#define REG_A3XX_RB_CLEAR_COLOR_DW0				0x000020e8
-
-#define REG_A3XX_RB_CLEAR_COLOR_DW1				0x000020e9
-
-#define REG_A3XX_RB_CLEAR_COLOR_DW2				0x000020ea
-
-#define REG_A3XX_RB_CLEAR_COLOR_DW3				0x000020eb
-
-#define REG_A3XX_RB_COPY_CONTROL				0x000020ec
-#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK			0x00000003
-#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT		0
-static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
-{
-	return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
-}
-#define A3XX_RB_COPY_CONTROL_DEPTHCLEAR				0x00000008
-#define A3XX_RB_COPY_CONTROL_MODE__MASK				0x00000070
-#define A3XX_RB_COPY_CONTROL_MODE__SHIFT			4
-static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
-{
-	return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
-}
-#define A3XX_RB_COPY_CONTROL_MSAA_SRGB_DOWNSAMPLE		0x00000080
-#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK			0x00000f00
-#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT			8
-static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
-{
-	return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
-}
-#define A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE			0x00001000
-#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK			0xffffc000
-#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT			14
-static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
-{
-	assert(!(val & 0x3fff));
-	return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
-}
-
-#define REG_A3XX_RB_COPY_DEST_BASE				0x000020ed
-#define A3XX_RB_COPY_DEST_BASE_BASE__MASK			0xfffffff0
-#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT			4
-static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
-}
-
-#define REG_A3XX_RB_COPY_DEST_PITCH				0x000020ee
-#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK			0xffffffff
-#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT			0
-static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
-}
-
-#define REG_A3XX_RB_COPY_DEST_INFO				0x000020ef
-#define A3XX_RB_COPY_DEST_INFO_TILE__MASK			0x00000003
-#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT			0
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
-}
-#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000fc
-#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			2
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
-}
-#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
-#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
-}
-#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
-#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
-}
-#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK		0x0003c000
-#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT		14
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
-}
-#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK			0x001c0000
-#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT			18
-static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
-{
-	return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
-}
-
-#define REG_A3XX_RB_DEPTH_CONTROL				0x00002100
-#define A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z			0x00000001
-#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE				0x00000002
-#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE			0x00000004
-#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00000008
-#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK			0x00000070
-#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT			4
-static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
-}
-#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
-#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
-
-#define REG_A3XX_RB_DEPTH_CLEAR					0x00002101
-
-#define REG_A3XX_RB_DEPTH_INFO					0x00002102
-#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000003
-#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
-static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
-{
-	return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
-}
-#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff800
-#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			11
-static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
-}
-
-#define REG_A3XX_RB_DEPTH_PITCH					0x00002103
-#define A3XX_RB_DEPTH_PITCH__MASK				0xffffffff
-#define A3XX_RB_DEPTH_PITCH__SHIFT				0
-static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x7));
-	return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
-}
-
-#define REG_A3XX_RB_STENCIL_CONTROL				0x00002104
-#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
-#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
-#define A3XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
-#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
-#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
-#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
-#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
-#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
-#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
-#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
-#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
-}
-#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
-#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
-static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
-}
-
-#define REG_A3XX_RB_STENCIL_CLEAR				0x00002105
-
-#define REG_A3XX_RB_STENCIL_INFO				0x00002106
-#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK			0xfffff800
-#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT		11
-static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
-}
-
-#define REG_A3XX_RB_STENCIL_PITCH				0x00002107
-#define A3XX_RB_STENCIL_PITCH__MASK				0xffffffff
-#define A3XX_RB_STENCIL_PITCH__SHIFT				0
-static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val)
-{
-	assert(!(val & 0x7));
-	return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK;
-}
-
-#define REG_A3XX_RB_STENCILREFMASK				0x00002108
-#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
-#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
-static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
-}
-#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
-#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
-static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
-}
-#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
-#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
-static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A3XX_RB_STENCILREFMASK_BF				0x00002109
-#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
-#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
-static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
-}
-#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
-#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
-static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
-}
-#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
-#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
-static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A3XX_RB_LRZ_VSC_CONTROL				0x0000210c
-#define A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE			0x00000002
-
-#define REG_A3XX_RB_WINDOW_OFFSET				0x0000210e
-#define A3XX_RB_WINDOW_OFFSET_X__MASK				0x0000ffff
-#define A3XX_RB_WINDOW_OFFSET_X__SHIFT				0
-static inline uint32_t A3XX_RB_WINDOW_OFFSET_X(uint32_t val)
-{
-	return ((val) << A3XX_RB_WINDOW_OFFSET_X__SHIFT) & A3XX_RB_WINDOW_OFFSET_X__MASK;
-}
-#define A3XX_RB_WINDOW_OFFSET_Y__MASK				0xffff0000
-#define A3XX_RB_WINDOW_OFFSET_Y__SHIFT				16
-static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A3XX_RB_WINDOW_OFFSET_Y__SHIFT) & A3XX_RB_WINDOW_OFFSET_Y__MASK;
-}
-
-#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL			0x00002110
-#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET			0x00000001
-#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
-
-#define REG_A3XX_RB_SAMPLE_COUNT_ADDR				0x00002111
-
-#define REG_A3XX_RB_Z_CLAMP_MIN					0x00002114
-
-#define REG_A3XX_RB_Z_CLAMP_MAX					0x00002115
-
-#define REG_A3XX_VGT_BIN_BASE					0x000021e1
-
-#define REG_A3XX_VGT_BIN_SIZE					0x000021e2
-
-#define REG_A3XX_PC_VSTREAM_CONTROL				0x000021e4
-#define A3XX_PC_VSTREAM_CONTROL_SIZE__MASK			0x003f0000
-#define A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT			16
-static inline uint32_t A3XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val)
-{
-	return ((val) << A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A3XX_PC_VSTREAM_CONTROL_SIZE__MASK;
-}
-#define A3XX_PC_VSTREAM_CONTROL_N__MASK				0x07c00000
-#define A3XX_PC_VSTREAM_CONTROL_N__SHIFT			22
-static inline uint32_t A3XX_PC_VSTREAM_CONTROL_N(uint32_t val)
-{
-	return ((val) << A3XX_PC_VSTREAM_CONTROL_N__SHIFT) & A3XX_PC_VSTREAM_CONTROL_N__MASK;
-}
-
-#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL			0x000021ea
-
-#define REG_A3XX_PC_PRIM_VTX_CNTL				0x000021ec
-#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK		0x0000001f
-#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT		0
-static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
-{
-	return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
-}
-#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK	0x000000e0
-#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT	5
-static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
-}
-#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK		0x00000700
-#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT	8
-static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
-}
-#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE			0x00001000
-#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART			0x00100000
-#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
-#define A3XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
-
-#define REG_A3XX_PC_RESTART_INDEX				0x000021ed
-
-#define REG_A3XX_HLSQ_CONTROL_0_REG				0x00002200
-#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000030
-#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
-static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
-}
-#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
-#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE			0x00000100
-#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
-#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
-#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK	0x00fff000
-#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT	12
-static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK;
-}
-#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX			0x02000000
-#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
-#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK			0x08000000
-#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT		27
-static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
-}
-#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE		0x10000000
-#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE		0x20000000
-#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE			0x40000000
-#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
-
-#define REG_A3XX_HLSQ_CONTROL_1_REG				0x00002201
-#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x000000c0
-#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
-static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
-}
-#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
-#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK		0x00ff0000
-#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT		16
-static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK;
-}
-#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK		0xff000000
-#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT		24
-static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK;
-}
-
-#define REG_A3XX_HLSQ_CONTROL_2_REG				0x00002202
-#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK		0x000003fc
-#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT		2
-static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK;
-}
-#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK		0x03fc0000
-#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT		18
-static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK;
-}
-#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
-#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
-static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
-}
-
-#define REG_A3XX_HLSQ_CONTROL_3_REG				0x00002203
-#define A3XX_HLSQ_CONTROL_3_REG_REGID__MASK			0x000000ff
-#define A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT			0
-static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A3XX_HLSQ_CONTROL_3_REG_REGID__MASK;
-}
-
-#define REG_A3XX_HLSQ_VS_CONTROL_REG				0x00002204
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
-static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
-}
-#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A3XX_HLSQ_FS_CONTROL_REG				0x00002205
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
-static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
-}
-#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG			0x00002206
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
-static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
-}
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
-static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
-}
-
-#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG			0x00002207
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
-static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
-}
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
-static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
-}
-
-#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG				0x0000220a
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK		0x00000003
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT		0
-static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK;
-}
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK		0x00000ffc
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT		2
-static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK;
-}
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK		0x003ff000
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT		12
-static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK;
-}
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK		0xffc00000
-#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT		22
-static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val)
-{
-	return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK;
-}
-
-static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; }
-
-static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; }
-
-static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; }
-
-#define REG_A3XX_HLSQ_CL_CONTROL_0_REG				0x00002211
-
-#define REG_A3XX_HLSQ_CL_CONTROL_1_REG				0x00002212
-
-#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG			0x00002214
-
-static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; }
-
-#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG			0x00002216
-
-#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG			0x00002217
-
-#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG				0x0000221a
-
-#define REG_A3XX_VFD_CONTROL_0					0x00002240
-#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x0003ffff
-#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
-static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
-}
-#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK			0x003c0000
-#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT			18
-static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
-}
-#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK		0x07c00000
-#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT		22
-static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
-}
-#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK		0xf8000000
-#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT		27
-static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
-}
-
-#define REG_A3XX_VFD_CONTROL_1					0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000000f
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
-static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
-}
-#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK			0x000000f0
-#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT			4
-static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
-}
-#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK			0x00000f00
-#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT			8
-static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
-}
-#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
-#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
-static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
-}
-#define A3XX_VFD_CONTROL_1_REGID4INST__MASK			0xff000000
-#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT			24
-static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
-{
-	return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
-}
-
-#define REG_A3XX_VFD_INDEX_MIN					0x00002242
-
-#define REG_A3XX_VFD_INDEX_MAX					0x00002243
-
-#define REG_A3XX_VFD_INSTANCEID_OFFSET				0x00002244
-
-#define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
-
-#define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
-
-static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
-
-static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
-#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
-#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
-static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
-}
-#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK			0x0000ff80
-#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT			7
-static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
-}
-#define A3XX_VFD_FETCH_INSTR_0_INSTANCED			0x00010000
-#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT			0x00020000
-#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK			0x00fc0000
-#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT			18
-static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
-}
-#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK			0xff000000
-#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT			24
-static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
-{
-	return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
-}
-
-static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
-
-static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
-#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
-#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
-static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
-{
-	return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
-}
-#define A3XX_VFD_DECODE_INSTR_CONSTFILL				0x00000010
-#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK			0x00000fc0
-#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT			6
-static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
-{
-	return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
-}
-#define A3XX_VFD_DECODE_INSTR_REGID__MASK			0x000ff000
-#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT			12
-static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
-{
-	return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
-}
-#define A3XX_VFD_DECODE_INSTR_INT				0x00100000
-#define A3XX_VFD_DECODE_INSTR_SWAP__MASK			0x00c00000
-#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT			22
-static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A3XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A3XX_VFD_DECODE_INSTR_SWAP__MASK;
-}
-#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK			0x1f000000
-#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT			24
-static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
-{
-	return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
-}
-#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID			0x20000000
-#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT			0x40000000
-
-#define REG_A3XX_VFD_VS_THREADING_THRESHOLD			0x0000227e
-#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK	0x0000000f
-#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT	0
-static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
-{
-	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
-}
-#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK	0x0000ff00
-#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT	8
-static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
-{
-	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
-}
-
-#define REG_A3XX_VPC_ATTR					0x00002280
-#define A3XX_VPC_ATTR_TOTALATTR__MASK				0x000001ff
-#define A3XX_VPC_ATTR_TOTALATTR__SHIFT				0
-static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
-{
-	return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
-}
-#define A3XX_VPC_ATTR_PSIZE					0x00000200
-#define A3XX_VPC_ATTR_THRDASSIGN__MASK				0x0ffff000
-#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT				12
-static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
-{
-	return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
-}
-#define A3XX_VPC_ATTR_LMSIZE__MASK				0xf0000000
-#define A3XX_VPC_ATTR_LMSIZE__SHIFT				28
-static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
-{
-	return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
-}
-
-#define REG_A3XX_VPC_PACK					0x00002281
-#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK			0x0000ff00
-#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT			8
-static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
-{
-	return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
-}
-#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK			0x00ff0000
-#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT			16
-static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
-{
-	return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
-}
-
-static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
-#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK			0x00000003
-#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT			0
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK			0x0000000c
-#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT			2
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK			0x00000030
-#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT			4
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK			0x000000c0
-#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT			6
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK			0x00000300
-#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT			8
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK			0x00000c00
-#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT			10
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK			0x00003000
-#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT			12
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK			0x0000c000
-#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT			14
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK			0x00030000
-#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT			16
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK			0x000c0000
-#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT			18
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK			0x00300000
-#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT			20
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK			0x00c00000
-#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT			22
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK			0x03000000
-#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT			24
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK			0x0c000000
-#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT			26
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK			0x30000000
-#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT			28
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK;
-}
-#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK			0xc0000000
-#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT			30
-static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK;
-}
-
-static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK			0x00000003
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT			0
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK			0x0000000c
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT			2
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK			0x00000030
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT			4
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK			0x000000c0
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT			6
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK			0x00000300
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT			8
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK			0x00000c00
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT			10
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK			0x00003000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT			12
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK			0x0000c000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT			14
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK			0x00030000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT			16
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK			0x000c0000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT			18
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK			0x00300000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT			20
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK			0x00c00000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT			22
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK			0x03000000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT			24
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK			0x0c000000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT			26
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK			0x30000000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT			28
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK;
-}
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK			0xc0000000
-#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT			30
-static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val)
-{
-	return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK;
-}
-
-#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0			0x0000228a
-
-#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1			0x0000228b
-
-#define REG_A3XX_SP_SP_CTRL_REG					0x000022c0
-#define A3XX_SP_SP_CTRL_REG_RESOLVE				0x00010000
-#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK			0x00040000
-#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT			18
-static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
-{
-	return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
-}
-#define A3XX_SP_SP_CTRL_REG_BINNING				0x00080000
-#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK			0x00300000
-#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT			20
-static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
-{
-	return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
-}
-#define A3XX_SP_SP_CTRL_REG_L0MODE__MASK			0x00c00000
-#define A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT			22
-static inline uint32_t A3XX_SP_SP_CTRL_REG_L0MODE(uint32_t val)
-{
-	return ((val) << A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT) & A3XX_SP_SP_CTRL_REG_L0MODE__MASK;
-}
-
-#define REG_A3XX_SP_VS_CTRL_REG0				0x000022c4
-#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK			0x00000001
-#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT			0
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
-#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
-#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE				0x00000008
-#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
-#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK			0xff000000
-#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT			24
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
-}
-
-#define REG_A3XX_SP_VS_CTRL_REG1				0x000022c5
-#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
-#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT			0
-static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
-#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
-static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
-}
-#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x7f000000
-#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
-static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
-}
-
-#define REG_A3XX_SP_VS_PARAM_REG				0x000022c6
-#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK			0x000000ff
-#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT			0
-static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
-}
-#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK			0x0000ff00
-#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT			8
-static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
-}
-#define A3XX_SP_VS_PARAM_REG_POS2DMODE				0x00010000
-#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0x01f00000
-#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
-static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
-}
-
-static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-#define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
-#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
-}
-#define A3XX_SP_VS_OUT_REG_A_HALF				0x00000100
-#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
-#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
-static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A3XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
-#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
-}
-#define A3XX_SP_VS_OUT_REG_B_HALF				0x01000000
-#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
-#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
-static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x0000007f
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x00007f00
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x007f0000
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0x7f000000
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A3XX_SP_VS_OBJ_OFFSET_REG				0x000022d4
-#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
-#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
-static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
-}
-#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A3XX_SP_VS_OBJ_START_REG				0x000022d5
-
-#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG			0x000022d6
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
-static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
-}
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
-static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
-}
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
-#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
-static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
-}
-
-#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG				0x000022d7
-#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
-#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
-static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
-}
-#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
-#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
-static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
-}
-
-#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG				0x000022d8
-
-#define REG_A3XX_SP_VS_LENGTH_REG				0x000022df
-#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
-#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT		0
-static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
-}
-
-#define REG_A3XX_SP_FS_CTRL_REG0				0x000022e0
-#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK			0x00000001
-#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT			0
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
-#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
-#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE				0x00000008
-#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE			0x00020000
-#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP			0x00040000
-#define A3XX_SP_FS_CTRL_REG0_OUTORDERED				0x00080000
-#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE			0x00200000
-#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00400000
-#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE			0x00800000
-#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK			0xff000000
-#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT			24
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
-}
-
-#define REG_A3XX_SP_FS_CTRL_REG1				0x000022e1
-#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
-#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT			0
-static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
-#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
-static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x00f00000
-#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		20
-static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
-}
-#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK		0x7f000000
-#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT		24
-static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
-}
-
-#define REG_A3XX_SP_FS_OBJ_OFFSET_REG				0x000022e2
-#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
-#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
-static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
-}
-#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A3XX_SP_FS_OBJ_START_REG				0x000022e3
-
-#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG			0x000022e4
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
-static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
-}
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
-static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
-}
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
-#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
-static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
-}
-
-#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG				0x000022e5
-#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
-#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
-static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
-}
-#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
-#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
-static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
-}
-
-#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG				0x000022e6
-
-#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0			0x000022e8
-
-#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1			0x000022e9
-
-#define REG_A3XX_SP_FS_OUTPUT_REG				0x000022ec
-#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK				0x00000003
-#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT			0
-static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK;
-}
-#define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE			0x00000080
-#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK			0x0000ff00
-#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT		8
-static inline uint32_t A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
-}
-
-static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
-#define A3XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
-#define A3XX_SP_FS_MRT_REG_REGID__SHIFT				0
-static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
-}
-#define A3XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
-#define A3XX_SP_FS_MRT_REG_SINT					0x00000400
-#define A3XX_SP_FS_MRT_REG_UINT					0x00000800
-
-static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
-
-static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
-#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK		0x0000003f
-#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT		0
-static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
-{
-	return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
-}
-
-#define REG_A3XX_SP_FS_LENGTH_REG				0x000022ff
-#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
-#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT		0
-static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
-}
-
-#define REG_A3XX_PA_SC_AA_CONFIG				0x00002301
-
-#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET				0x00002340
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
-static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
-}
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
-static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
-}
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
-#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
-static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
-}
-
-#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR		0x00002341
-
-#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET				0x00002342
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
-static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
-}
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
-static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
-}
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
-#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
-static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
-{
-	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
-}
-
-#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x00002343
-
-#define REG_A3XX_VBIF_CLKON					0x00003001
-
-#define REG_A3XX_VBIF_FIXED_SORT_EN				0x0000300c
-
-#define REG_A3XX_VBIF_FIXED_SORT_SEL0				0x0000300d
-
-#define REG_A3XX_VBIF_FIXED_SORT_SEL1				0x0000300e
-
-#define REG_A3XX_VBIF_ABIT_SORT					0x0000301c
-
-#define REG_A3XX_VBIF_ABIT_SORT_CONF				0x0000301d
-
-#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
-
-#define REG_A3XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
-
-#define REG_A3XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
-
-#define REG_A3XX_VBIF_IN_WR_LIM_CONF0				0x00003030
-
-#define REG_A3XX_VBIF_IN_WR_LIM_CONF1				0x00003031
-
-#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0				0x00003034
-
-#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0				0x00003035
-
-#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST				0x00003036
-
-#define REG_A3XX_VBIF_ARB_CTL					0x0000303c
-
-#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
-
-#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0			0x00003058
-
-#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN				0x0000305e
-
-#define REG_A3XX_VBIF_OUT_AXI_AOOO				0x0000305f
-
-#define REG_A3XX_VBIF_PERF_CNT_EN				0x00003070
-#define A3XX_VBIF_PERF_CNT_EN_CNT0				0x00000001
-#define A3XX_VBIF_PERF_CNT_EN_CNT1				0x00000002
-#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0				0x00000004
-#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1				0x00000008
-#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2				0x00000010
-
-#define REG_A3XX_VBIF_PERF_CNT_CLR				0x00003071
-#define A3XX_VBIF_PERF_CNT_CLR_CNT0				0x00000001
-#define A3XX_VBIF_PERF_CNT_CLR_CNT1				0x00000002
-#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0				0x00000004
-#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1				0x00000008
-#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2				0x00000010
-
-#define REG_A3XX_VBIF_PERF_CNT_SEL				0x00003072
-
-#define REG_A3XX_VBIF_PERF_CNT0_LO				0x00003073
-
-#define REG_A3XX_VBIF_PERF_CNT0_HI				0x00003074
-
-#define REG_A3XX_VBIF_PERF_CNT1_LO				0x00003075
-
-#define REG_A3XX_VBIF_PERF_CNT1_HI				0x00003076
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO				0x00003077
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI				0x00003078
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO				0x00003079
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI				0x0000307a
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO				0x0000307b
-
-#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI				0x0000307c
-
-#define REG_A3XX_VSC_BIN_SIZE					0x00000c01
-#define A3XX_VSC_BIN_SIZE_WIDTH__MASK				0x0000001f
-#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
-static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
-}
-#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK				0x000003e0
-#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT				5
-static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
-}
-
-#define REG_A3XX_VSC_SIZE_ADDRESS				0x00000c02
-
-static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-
-static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
-#define A3XX_VSC_PIPE_CONFIG_X__MASK				0x000003ff
-#define A3XX_VSC_PIPE_CONFIG_X__SHIFT				0
-static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
-{
-	return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
-}
-#define A3XX_VSC_PIPE_CONFIG_Y__MASK				0x000ffc00
-#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT				10
-static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
-{
-	return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
-}
-#define A3XX_VSC_PIPE_CONFIG_W__MASK				0x00f00000
-#define A3XX_VSC_PIPE_CONFIG_W__SHIFT				20
-static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
-{
-	return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
-}
-#define A3XX_VSC_PIPE_CONFIG_H__MASK				0x0f000000
-#define A3XX_VSC_PIPE_CONFIG_H__SHIFT				24
-static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
-{
-	return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
-}
-
-static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
-
-static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
-
-#define REG_A3XX_VSC_BIN_CONTROL				0x00000c3c
-#define A3XX_VSC_BIN_CONTROL_BINNING_ENABLE			0x00000001
-
-#define REG_A3XX_UNKNOWN_0C3D					0x00000c3d
-
-#define REG_A3XX_PC_PERFCOUNTER0_SELECT				0x00000c48
-
-#define REG_A3XX_PC_PERFCOUNTER1_SELECT				0x00000c49
-
-#define REG_A3XX_PC_PERFCOUNTER2_SELECT				0x00000c4a
-
-#define REG_A3XX_PC_PERFCOUNTER3_SELECT				0x00000c4b
-
-#define REG_A3XX_GRAS_TSE_DEBUG_ECO				0x00000c81
-
-#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT			0x00000c88
-
-#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT			0x00000c89
-
-#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT			0x00000c8a
-
-#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT			0x00000c8b
-
-static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
-
-static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
-
-static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
-
-static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
-
-static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
-
-#define REG_A3XX_RB_GMEM_BASE_ADDR				0x00000cc0
-
-#define REG_A3XX_RB_DEBUG_ECO_CONTROLS_ADDR			0x00000cc1
-
-#define REG_A3XX_RB_PERFCOUNTER0_SELECT				0x00000cc6
-
-#define REG_A3XX_RB_PERFCOUNTER1_SELECT				0x00000cc7
-
-#define REG_A3XX_RB_FRAME_BUFFER_DIMENSION			0x00000ce0
-#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK		0x00003fff
-#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT		0
-static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
-{
-	return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
-}
-#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK		0x0fffc000
-#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT		14
-static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
-{
-	return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
-}
-
-#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT			0x00000e00
-
-#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT			0x00000e01
-
-#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT			0x00000e02
-
-#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT			0x00000e03
-
-#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT			0x00000e04
-
-#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT			0x00000e05
-
-#define REG_A3XX_UNKNOWN_0E43					0x00000e43
-
-#define REG_A3XX_VFD_PERFCOUNTER0_SELECT			0x00000e44
-
-#define REG_A3XX_VFD_PERFCOUNTER1_SELECT			0x00000e45
-
-#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL				0x00000e61
-
-#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ				0x00000e62
-
-#define REG_A3XX_VPC_PERFCOUNTER0_SELECT			0x00000e64
-
-#define REG_A3XX_VPC_PERFCOUNTER1_SELECT			0x00000e65
-
-#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG			0x00000e82
-
-#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT			0x00000e84
-
-#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT			0x00000e85
-
-#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT			0x00000e86
-
-#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT			0x00000e87
-
-#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT			0x00000e88
-
-#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT			0x00000e89
-
-#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG			0x00000ea0
-#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK		0x0fffffff
-#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT		0
-static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
-{
-	return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
-}
-
-#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG			0x00000ea1
-#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK		0x0fffffff
-#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT		0
-static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
-{
-	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
-}
-#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK		0x30000000
-#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT		28
-static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
-{
-	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
-}
-#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE		0x80000000
-
-#define REG_A3XX_UNKNOWN_0EA6					0x00000ea6
-
-#define REG_A3XX_SP_PERFCOUNTER0_SELECT				0x00000ec4
-
-#define REG_A3XX_SP_PERFCOUNTER1_SELECT				0x00000ec5
-
-#define REG_A3XX_SP_PERFCOUNTER2_SELECT				0x00000ec6
-
-#define REG_A3XX_SP_PERFCOUNTER3_SELECT				0x00000ec7
-
-#define REG_A3XX_SP_PERFCOUNTER4_SELECT				0x00000ec8
-
-#define REG_A3XX_SP_PERFCOUNTER5_SELECT				0x00000ec9
-
-#define REG_A3XX_SP_PERFCOUNTER6_SELECT				0x00000eca
-
-#define REG_A3XX_SP_PERFCOUNTER7_SELECT				0x00000ecb
-
-#define REG_A3XX_UNKNOWN_0EE0					0x00000ee0
-
-#define REG_A3XX_UNKNOWN_0F03					0x00000f03
-
-#define REG_A3XX_TP_PERFCOUNTER0_SELECT				0x00000f04
-
-#define REG_A3XX_TP_PERFCOUNTER1_SELECT				0x00000f05
-
-#define REG_A3XX_TP_PERFCOUNTER2_SELECT				0x00000f06
-
-#define REG_A3XX_TP_PERFCOUNTER3_SELECT				0x00000f07
-
-#define REG_A3XX_TP_PERFCOUNTER4_SELECT				0x00000f08
-
-#define REG_A3XX_TP_PERFCOUNTER5_SELECT				0x00000f09
-
-#define REG_A3XX_VGT_CL_INITIATOR				0x000021f0
-
-#define REG_A3XX_VGT_EVENT_INITIATOR				0x000021f9
-
-#define REG_A3XX_VGT_DRAW_INITIATOR				0x000021fc
-#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK			0x0000003f
-#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT		0
-static inline uint32_t A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
-}
-#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK		0x000000c0
-#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT		6
-static inline uint32_t A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
-}
-#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK			0x00000600
-#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT			9
-static inline uint32_t A3XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
-}
-#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK		0x00000800
-#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT		11
-static inline uint32_t A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
-{
-	return ((val) << A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
-}
-#define A3XX_VGT_DRAW_INITIATOR_NOT_EOP				0x00001000
-#define A3XX_VGT_DRAW_INITIATOR_SMALL_INDEX			0x00002000
-#define A3XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE	0x00004000
-#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK		0xff000000
-#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT		24
-static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
-{
-	return ((val) << A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
-}
-
-#define REG_A3XX_VGT_IMMED_DATA					0x000021fd
-
-#define REG_A3XX_TEX_SAMP_0					0x00000000
-#define A3XX_TEX_SAMP_0_CLAMPENABLE				0x00000001
-#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR			0x00000002
-#define A3XX_TEX_SAMP_0_XY_MAG__MASK				0x0000000c
-#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT				2
-static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
-}
-#define A3XX_TEX_SAMP_0_XY_MIN__MASK				0x00000030
-#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT				4
-static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
-}
-#define A3XX_TEX_SAMP_0_WRAP_S__MASK				0x000001c0
-#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT				6
-static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
-}
-#define A3XX_TEX_SAMP_0_WRAP_T__MASK				0x00000e00
-#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT				9
-static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
-}
-#define A3XX_TEX_SAMP_0_WRAP_R__MASK				0x00007000
-#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT				12
-static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
-}
-#define A3XX_TEX_SAMP_0_ANISO__MASK				0x00038000
-#define A3XX_TEX_SAMP_0_ANISO__SHIFT				15
-static inline uint32_t A3XX_TEX_SAMP_0_ANISO(enum a3xx_tex_aniso val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_ANISO__SHIFT) & A3XX_TEX_SAMP_0_ANISO__MASK;
-}
-#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK			0x00700000
-#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT			20
-static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
-}
-#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF			0x01000000
-#define A3XX_TEX_SAMP_0_UNNORM_COORDS				0x80000000
-
-#define REG_A3XX_TEX_SAMP_1					0x00000001
-#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK				0x000007ff
-#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT				0
-static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val)
-{
-	return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK;
-}
-#define A3XX_TEX_SAMP_1_MAX_LOD__MASK				0x003ff000
-#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT				12
-static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val)
-{
-	return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK;
-}
-#define A3XX_TEX_SAMP_1_MIN_LOD__MASK				0xffc00000
-#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT				22
-static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
-{
-	return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK;
-}
-
-#define REG_A3XX_TEX_CONST_0					0x00000000
-#define A3XX_TEX_CONST_0_TILED					0x00000001
-#define A3XX_TEX_CONST_0_SRGB					0x00000004
-#define A3XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
-#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT				4
-static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
-{
-	return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
-}
-#define A3XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
-#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
-static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
-{
-	return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
-}
-#define A3XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
-#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
-static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
-{
-	return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
-}
-#define A3XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
-#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT				13
-static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
-{
-	return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
-}
-#define A3XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
-#define A3XX_TEX_CONST_0_MIPLVLS__SHIFT				16
-static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK;
-}
-#define A3XX_TEX_CONST_0_MSAATEX__MASK				0x00300000
-#define A3XX_TEX_CONST_0_MSAATEX__SHIFT				20
-static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val)
-{
-	return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK;
-}
-#define A3XX_TEX_CONST_0_FMT__MASK				0x1fc00000
-#define A3XX_TEX_CONST_0_FMT__SHIFT				22
-static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
-{
-	return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
-}
-#define A3XX_TEX_CONST_0_NOCONVERT				0x20000000
-#define A3XX_TEX_CONST_0_TYPE__MASK				0xc0000000
-#define A3XX_TEX_CONST_0_TYPE__SHIFT				30
-static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
-{
-	return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
-}
-
-#define REG_A3XX_TEX_CONST_1					0x00000001
-#define A3XX_TEX_CONST_1_HEIGHT__MASK				0x00003fff
-#define A3XX_TEX_CONST_1_HEIGHT__SHIFT				0
-static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
-}
-#define A3XX_TEX_CONST_1_WIDTH__MASK				0x0fffc000
-#define A3XX_TEX_CONST_1_WIDTH__SHIFT				14
-static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
-}
-#define A3XX_TEX_CONST_1_FETCHSIZE__MASK			0xf0000000
-#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT			28
-static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
-{
-	return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
-}
-
-#define REG_A3XX_TEX_CONST_2					0x00000002
-#define A3XX_TEX_CONST_2_INDX__MASK				0x000001ff
-#define A3XX_TEX_CONST_2_INDX__SHIFT				0
-static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
-}
-#define A3XX_TEX_CONST_2_PITCH__MASK				0x3ffff000
-#define A3XX_TEX_CONST_2_PITCH__SHIFT				12
-static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
-}
-#define A3XX_TEX_CONST_2_SWAP__MASK				0xc0000000
-#define A3XX_TEX_CONST_2_SWAP__SHIFT				30
-static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
-}
-
-#define REG_A3XX_TEX_CONST_3					0x00000003
-#define A3XX_TEX_CONST_3_LAYERSZ1__MASK				0x0001ffff
-#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT			0
-static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK;
-}
-#define A3XX_TEX_CONST_3_DEPTH__MASK				0x0ffe0000
-#define A3XX_TEX_CONST_3_DEPTH__SHIFT				17
-static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val)
-{
-	return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK;
-}
-#define A3XX_TEX_CONST_3_LAYERSZ2__MASK				0xf0000000
-#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT			28
-static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK;
-}
-
-
-#endif /* A3XX_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -101,13 +101,13 @@
 	fd_hw_query_init(pctx);
 
 	fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
 
 	fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
 
 	fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
 
 	fd_context_setup_common_vbos(&fd3_ctx->base);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,7 +31,7 @@
 
 #include "freedreno_context.h"
 
-#include "ir3_shader.h"
+#include "ir3/ir3_shader.h"
 
 
 struct fd3_context {
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -44,8 +44,8 @@
 #include "fd3_zsa.h"
 
 static const enum adreno_state_block sb[] = {
-	[SHADER_VERTEX]   = SB_VERT_SHADER,
-	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
+	[MESA_SHADER_VERTEX]   = SB_VERT_SHADER,
+	[MESA_SHADER_FRAGMENT] = SB_FRAG_SHADER,
 };
 
 /* regid:          base const register
@@ -53,7 +53,7 @@
  * sizedwords:     size of const value buffer
  */
 static void
-fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+fd3_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
@@ -91,7 +91,7 @@
 }
 
 static void
-fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+fd3_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
 {
 	uint32_t anum = align(num, 4);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.h	2019-03-31 23:16:37.000000000 +0000
@@ -32,7 +32,7 @@
 #include "freedreno_context.h"
 #include "fd3_format.h"
 #include "fd3_program.h"
-#include "ir3_shader.h"
+#include "ir3_gallium.h"
 
 struct fd_ringbuffer;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -785,7 +785,7 @@
 
 		if (!pipe->bo) {
 			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
-					DRM_FREEDRENO_GEM_TYPE_KMEM);
+					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
 		}
 
 		OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,7 @@
 
 static struct ir3_shader *
 create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
-		enum shader_t type)
+		gl_shader_stage type)
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct ir3_compiler *compiler = ctx->screen->compiler;
@@ -51,7 +51,7 @@
 fd3_fp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT);
 }
 
 static void
@@ -65,7 +65,7 @@
 fd3_vp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX);
 }
 
 static void
@@ -97,7 +97,7 @@
 	enum adreno_state_src src;
 	uint32_t i, sz, *bin;
 
-	if (so->type == SHADER_VERTEX) {
+	if (so->type == MESA_SHADER_VERTEX) {
 		sb = SB_VERT_SHADER;
 	} else {
 		sb = SB_FRAG_SHADER;
@@ -122,7 +122,7 @@
 		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
 				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
 	} else {
-		OUT_RELOC(ring, so->bo, 0,
+		OUT_RELOCD(ring, so->bo, 0,
 				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
 	}
 	for (i = 0; i < sz; i++) {
@@ -226,6 +226,7 @@
 
 	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
 	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+			A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
 			A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
 			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
 			 * flush some caches? I think we only need to set those
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.h	2019-03-31 23:16:37.000000000 +0000
@@ -29,7 +29,8 @@
 
 #include "pipe/p_context.h"
 #include "freedreno_context.h"
-#include "ir3_shader.h"
+
+#include "ir3/ir3_shader.h"
 
 struct fd3_emit;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,7 +30,8 @@
 #include "fd3_screen.h"
 #include "fd3_context.h"
 #include "fd3_format.h"
-#include "ir3_compiler.h"
+
+#include "ir3/ir3_compiler.h"
 
 static boolean
 fd3_screen_is_format_supported(struct pipe_screen *pscreen,
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,4257 +0,0 @@
-#ifndef A4XX_XML
-#define A4XX_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum a4xx_color_fmt {
-	RB4_A8_UNORM = 1,
-	RB4_R8_UNORM = 2,
-	RB4_R8_SNORM = 3,
-	RB4_R8_UINT = 4,
-	RB4_R8_SINT = 5,
-	RB4_R4G4B4A4_UNORM = 8,
-	RB4_R5G5B5A1_UNORM = 10,
-	RB4_R5G6B5_UNORM = 14,
-	RB4_R8G8_UNORM = 15,
-	RB4_R8G8_SNORM = 16,
-	RB4_R8G8_UINT = 17,
-	RB4_R8G8_SINT = 18,
-	RB4_R16_UNORM = 19,
-	RB4_R16_SNORM = 20,
-	RB4_R16_FLOAT = 21,
-	RB4_R16_UINT = 22,
-	RB4_R16_SINT = 23,
-	RB4_R8G8B8_UNORM = 25,
-	RB4_R8G8B8A8_UNORM = 26,
-	RB4_R8G8B8A8_SNORM = 28,
-	RB4_R8G8B8A8_UINT = 29,
-	RB4_R8G8B8A8_SINT = 30,
-	RB4_R10G10B10A2_UNORM = 31,
-	RB4_R10G10B10A2_UINT = 34,
-	RB4_R11G11B10_FLOAT = 39,
-	RB4_R16G16_UNORM = 40,
-	RB4_R16G16_SNORM = 41,
-	RB4_R16G16_FLOAT = 42,
-	RB4_R16G16_UINT = 43,
-	RB4_R16G16_SINT = 44,
-	RB4_R32_FLOAT = 45,
-	RB4_R32_UINT = 46,
-	RB4_R32_SINT = 47,
-	RB4_R16G16B16A16_UNORM = 52,
-	RB4_R16G16B16A16_SNORM = 53,
-	RB4_R16G16B16A16_FLOAT = 54,
-	RB4_R16G16B16A16_UINT = 55,
-	RB4_R16G16B16A16_SINT = 56,
-	RB4_R32G32_FLOAT = 57,
-	RB4_R32G32_UINT = 58,
-	RB4_R32G32_SINT = 59,
-	RB4_R32G32B32A32_FLOAT = 60,
-	RB4_R32G32B32A32_UINT = 61,
-	RB4_R32G32B32A32_SINT = 62,
-};
-
-enum a4xx_tile_mode {
-	TILE4_LINEAR = 0,
-	TILE4_2 = 2,
-	TILE4_3 = 3,
-};
-
-enum a4xx_vtx_fmt {
-	VFMT4_32_FLOAT = 1,
-	VFMT4_32_32_FLOAT = 2,
-	VFMT4_32_32_32_FLOAT = 3,
-	VFMT4_32_32_32_32_FLOAT = 4,
-	VFMT4_16_FLOAT = 5,
-	VFMT4_16_16_FLOAT = 6,
-	VFMT4_16_16_16_FLOAT = 7,
-	VFMT4_16_16_16_16_FLOAT = 8,
-	VFMT4_32_FIXED = 9,
-	VFMT4_32_32_FIXED = 10,
-	VFMT4_32_32_32_FIXED = 11,
-	VFMT4_32_32_32_32_FIXED = 12,
-	VFMT4_11_11_10_FLOAT = 13,
-	VFMT4_16_SINT = 16,
-	VFMT4_16_16_SINT = 17,
-	VFMT4_16_16_16_SINT = 18,
-	VFMT4_16_16_16_16_SINT = 19,
-	VFMT4_16_UINT = 20,
-	VFMT4_16_16_UINT = 21,
-	VFMT4_16_16_16_UINT = 22,
-	VFMT4_16_16_16_16_UINT = 23,
-	VFMT4_16_SNORM = 24,
-	VFMT4_16_16_SNORM = 25,
-	VFMT4_16_16_16_SNORM = 26,
-	VFMT4_16_16_16_16_SNORM = 27,
-	VFMT4_16_UNORM = 28,
-	VFMT4_16_16_UNORM = 29,
-	VFMT4_16_16_16_UNORM = 30,
-	VFMT4_16_16_16_16_UNORM = 31,
-	VFMT4_32_UINT = 32,
-	VFMT4_32_32_UINT = 33,
-	VFMT4_32_32_32_UINT = 34,
-	VFMT4_32_32_32_32_UINT = 35,
-	VFMT4_32_SINT = 36,
-	VFMT4_32_32_SINT = 37,
-	VFMT4_32_32_32_SINT = 38,
-	VFMT4_32_32_32_32_SINT = 39,
-	VFMT4_8_UINT = 40,
-	VFMT4_8_8_UINT = 41,
-	VFMT4_8_8_8_UINT = 42,
-	VFMT4_8_8_8_8_UINT = 43,
-	VFMT4_8_UNORM = 44,
-	VFMT4_8_8_UNORM = 45,
-	VFMT4_8_8_8_UNORM = 46,
-	VFMT4_8_8_8_8_UNORM = 47,
-	VFMT4_8_SINT = 48,
-	VFMT4_8_8_SINT = 49,
-	VFMT4_8_8_8_SINT = 50,
-	VFMT4_8_8_8_8_SINT = 51,
-	VFMT4_8_SNORM = 52,
-	VFMT4_8_8_SNORM = 53,
-	VFMT4_8_8_8_SNORM = 54,
-	VFMT4_8_8_8_8_SNORM = 55,
-	VFMT4_10_10_10_2_UINT = 56,
-	VFMT4_10_10_10_2_UNORM = 57,
-	VFMT4_10_10_10_2_SINT = 58,
-	VFMT4_10_10_10_2_SNORM = 59,
-	VFMT4_2_10_10_10_UINT = 60,
-	VFMT4_2_10_10_10_UNORM = 61,
-	VFMT4_2_10_10_10_SINT = 62,
-	VFMT4_2_10_10_10_SNORM = 63,
-};
-
-enum a4xx_tex_fmt {
-	TFMT4_A8_UNORM = 3,
-	TFMT4_8_UNORM = 4,
-	TFMT4_8_SNORM = 5,
-	TFMT4_8_UINT = 6,
-	TFMT4_8_SINT = 7,
-	TFMT4_4_4_4_4_UNORM = 8,
-	TFMT4_5_5_5_1_UNORM = 9,
-	TFMT4_5_6_5_UNORM = 11,
-	TFMT4_L8_A8_UNORM = 13,
-	TFMT4_8_8_UNORM = 14,
-	TFMT4_8_8_SNORM = 15,
-	TFMT4_8_8_UINT = 16,
-	TFMT4_8_8_SINT = 17,
-	TFMT4_16_UNORM = 18,
-	TFMT4_16_SNORM = 19,
-	TFMT4_16_FLOAT = 20,
-	TFMT4_16_UINT = 21,
-	TFMT4_16_SINT = 22,
-	TFMT4_8_8_8_8_UNORM = 28,
-	TFMT4_8_8_8_8_SNORM = 29,
-	TFMT4_8_8_8_8_UINT = 30,
-	TFMT4_8_8_8_8_SINT = 31,
-	TFMT4_9_9_9_E5_FLOAT = 32,
-	TFMT4_10_10_10_2_UNORM = 33,
-	TFMT4_10_10_10_2_UINT = 34,
-	TFMT4_11_11_10_FLOAT = 37,
-	TFMT4_16_16_UNORM = 38,
-	TFMT4_16_16_SNORM = 39,
-	TFMT4_16_16_FLOAT = 40,
-	TFMT4_16_16_UINT = 41,
-	TFMT4_16_16_SINT = 42,
-	TFMT4_32_FLOAT = 43,
-	TFMT4_32_UINT = 44,
-	TFMT4_32_SINT = 45,
-	TFMT4_16_16_16_16_UNORM = 51,
-	TFMT4_16_16_16_16_SNORM = 52,
-	TFMT4_16_16_16_16_FLOAT = 53,
-	TFMT4_16_16_16_16_UINT = 54,
-	TFMT4_16_16_16_16_SINT = 55,
-	TFMT4_32_32_FLOAT = 56,
-	TFMT4_32_32_UINT = 57,
-	TFMT4_32_32_SINT = 58,
-	TFMT4_32_32_32_FLOAT = 59,
-	TFMT4_32_32_32_UINT = 60,
-	TFMT4_32_32_32_SINT = 61,
-	TFMT4_32_32_32_32_FLOAT = 63,
-	TFMT4_32_32_32_32_UINT = 64,
-	TFMT4_32_32_32_32_SINT = 65,
-	TFMT4_X8Z24_UNORM = 71,
-	TFMT4_DXT1 = 86,
-	TFMT4_DXT3 = 87,
-	TFMT4_DXT5 = 88,
-	TFMT4_RGTC1_UNORM = 90,
-	TFMT4_RGTC1_SNORM = 91,
-	TFMT4_RGTC2_UNORM = 94,
-	TFMT4_RGTC2_SNORM = 95,
-	TFMT4_BPTC_UFLOAT = 97,
-	TFMT4_BPTC_FLOAT = 98,
-	TFMT4_BPTC = 99,
-	TFMT4_ATC_RGB = 100,
-	TFMT4_ATC_RGBA_EXPLICIT = 101,
-	TFMT4_ATC_RGBA_INTERPOLATED = 102,
-	TFMT4_ETC2_RG11_UNORM = 103,
-	TFMT4_ETC2_RG11_SNORM = 104,
-	TFMT4_ETC2_R11_UNORM = 105,
-	TFMT4_ETC2_R11_SNORM = 106,
-	TFMT4_ETC1 = 107,
-	TFMT4_ETC2_RGB8 = 108,
-	TFMT4_ETC2_RGBA8 = 109,
-	TFMT4_ETC2_RGB8A1 = 110,
-	TFMT4_ASTC_4x4 = 111,
-	TFMT4_ASTC_5x4 = 112,
-	TFMT4_ASTC_5x5 = 113,
-	TFMT4_ASTC_6x5 = 114,
-	TFMT4_ASTC_6x6 = 115,
-	TFMT4_ASTC_8x5 = 116,
-	TFMT4_ASTC_8x6 = 117,
-	TFMT4_ASTC_8x8 = 118,
-	TFMT4_ASTC_10x5 = 119,
-	TFMT4_ASTC_10x6 = 120,
-	TFMT4_ASTC_10x8 = 121,
-	TFMT4_ASTC_10x10 = 122,
-	TFMT4_ASTC_12x10 = 123,
-	TFMT4_ASTC_12x12 = 124,
-};
-
-enum a4xx_tex_fetchsize {
-	TFETCH4_1_BYTE = 0,
-	TFETCH4_2_BYTE = 1,
-	TFETCH4_4_BYTE = 2,
-	TFETCH4_8_BYTE = 3,
-	TFETCH4_16_BYTE = 4,
-};
-
-enum a4xx_depth_format {
-	DEPTH4_NONE = 0,
-	DEPTH4_16 = 1,
-	DEPTH4_24_8 = 2,
-	DEPTH4_32 = 3,
-};
-
-enum a4xx_ccu_perfcounter_select {
-	CCU_BUSY_CYCLES = 0,
-	CCU_RB_DEPTH_RETURN_STALL = 2,
-	CCU_RB_COLOR_RETURN_STALL = 3,
-	CCU_DEPTH_BLOCKS = 6,
-	CCU_COLOR_BLOCKS = 7,
-	CCU_DEPTH_BLOCK_HIT = 8,
-	CCU_COLOR_BLOCK_HIT = 9,
-	CCU_DEPTH_FLAG1_COUNT = 10,
-	CCU_DEPTH_FLAG2_COUNT = 11,
-	CCU_DEPTH_FLAG3_COUNT = 12,
-	CCU_DEPTH_FLAG4_COUNT = 13,
-	CCU_COLOR_FLAG1_COUNT = 14,
-	CCU_COLOR_FLAG2_COUNT = 15,
-	CCU_COLOR_FLAG3_COUNT = 16,
-	CCU_COLOR_FLAG4_COUNT = 17,
-	CCU_PARTIAL_BLOCK_READ = 18,
-};
-
-enum a4xx_cp_perfcounter_select {
-	CP_ALWAYS_COUNT = 0,
-	CP_BUSY = 1,
-	CP_PFP_IDLE = 2,
-	CP_PFP_BUSY_WORKING = 3,
-	CP_PFP_STALL_CYCLES_ANY = 4,
-	CP_PFP_STARVE_CYCLES_ANY = 5,
-	CP_PFP_STARVED_PER_LOAD_ADDR = 6,
-	CP_PFP_STALLED_PER_STORE_ADDR = 7,
-	CP_PFP_PC_PROFILE = 8,
-	CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
-	CP_PFP_COND_INDIRECT_DISCARDED = 10,
-	CP_LONG_RESUMPTIONS = 11,
-	CP_RESUME_CYCLES = 12,
-	CP_RESUME_TO_BOUNDARY_CYCLES = 13,
-	CP_LONG_PREEMPTIONS = 14,
-	CP_PREEMPT_CYCLES = 15,
-	CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
-	CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
-	CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
-	CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
-	CP_ME_FIFO_FULL_ME_BUSY = 20,
-	CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
-	CP_ME_WAITING_FOR_PACKETS = 22,
-	CP_ME_BUSY_WORKING = 23,
-	CP_ME_STARVE_CYCLES_ANY = 24,
-	CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
-	CP_ME_STALL_CYCLES_PER_PROFILE = 26,
-	CP_ME_PC_PROFILE = 27,
-	CP_RCIU_FIFO_EMPTY = 28,
-	CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
-	CP_RCIU_FIFO_FULL = 30,
-	CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
-	CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
-	CP_RCIU_FIFO_FULL_OTHER = 33,
-	CP_AHB_IDLE = 34,
-	CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
-	CP_AHB_STALL_ON_GRANT_SPLIT = 36,
-	CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
-	CP_AHB_BUSY_WORKING = 38,
-	CP_AHB_BUSY_STALL_ON_HRDY = 39,
-	CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
-};
-
-enum a4xx_gras_ras_perfcounter_select {
-	RAS_SUPER_TILES = 0,
-	RAS_8X8_TILES = 1,
-	RAS_4X4_TILES = 2,
-	RAS_BUSY_CYCLES = 3,
-	RAS_STALL_CYCLES_BY_RB = 4,
-	RAS_STALL_CYCLES_BY_VSC = 5,
-	RAS_STARVE_CYCLES_BY_TSE = 6,
-	RAS_SUPERTILE_CYCLES = 7,
-	RAS_TILE_CYCLES = 8,
-	RAS_FULLY_COVERED_SUPER_TILES = 9,
-	RAS_FULLY_COVERED_8X8_TILES = 10,
-	RAS_4X4_PRIM = 11,
-	RAS_8X4_4X8_PRIM = 12,
-	RAS_8X8_PRIM = 13,
-};
-
-enum a4xx_gras_tse_perfcounter_select {
-	TSE_INPUT_PRIM = 0,
-	TSE_INPUT_NULL_PRIM = 1,
-	TSE_TRIVAL_REJ_PRIM = 2,
-	TSE_CLIPPED_PRIM = 3,
-	TSE_NEW_PRIM = 4,
-	TSE_ZERO_AREA_PRIM = 5,
-	TSE_FACENESS_CULLED_PRIM = 6,
-	TSE_ZERO_PIXEL_PRIM = 7,
-	TSE_OUTPUT_NULL_PRIM = 8,
-	TSE_OUTPUT_VISIBLE_PRIM = 9,
-	TSE_PRE_CLIP_PRIM = 10,
-	TSE_POST_CLIP_PRIM = 11,
-	TSE_BUSY_CYCLES = 12,
-	TSE_PC_STARVE = 13,
-	TSE_RAS_STALL = 14,
-	TSE_STALL_BARYPLANE_FIFO_FULL = 15,
-	TSE_STALL_ZPLANE_FIFO_FULL = 16,
-};
-
-enum a4xx_hlsq_perfcounter_select {
-	HLSQ_SP_VS_STAGE_CONSTANT = 0,
-	HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
-	HLSQ_SP_FS_STAGE_CONSTANT = 2,
-	HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
-	HLSQ_TP_STATE = 4,
-	HLSQ_QUADS = 5,
-	HLSQ_PIXELS = 6,
-	HLSQ_VERTICES = 7,
-	HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
-	HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
-	HLSQ_BUSY_CYCLES = 15,
-	HLSQ_STALL_CYCLES_SP_STATE = 16,
-	HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
-	HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
-	HLSQ_STALL_CYCLES_UCHE = 19,
-	HLSQ_RBBM_LOAD_CYCLES = 20,
-	HLSQ_DI_TO_VS_START_SP = 21,
-	HLSQ_DI_TO_FS_START_SP = 22,
-	HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
-	HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
-	HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
-	HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
-	HLSQ_UCHE_LATENCY_CYCLES = 27,
-	HLSQ_UCHE_LATENCY_COUNT = 28,
-	HLSQ_STARVE_CYCLES_VFD = 29,
-};
-
-enum a4xx_pc_perfcounter_select {
-	PC_VIS_STREAMS_LOADED = 0,
-	PC_VPC_PRIMITIVES = 2,
-	PC_DEAD_PRIM = 3,
-	PC_LIVE_PRIM = 4,
-	PC_DEAD_DRAWCALLS = 5,
-	PC_LIVE_DRAWCALLS = 6,
-	PC_VERTEX_MISSES = 7,
-	PC_STALL_CYCLES_VFD = 9,
-	PC_STALL_CYCLES_TSE = 10,
-	PC_STALL_CYCLES_UCHE = 11,
-	PC_WORKING_CYCLES = 12,
-	PC_IA_VERTICES = 13,
-	PC_GS_PRIMITIVES = 14,
-	PC_HS_INVOCATIONS = 15,
-	PC_DS_INVOCATIONS = 16,
-	PC_DS_PRIMITIVES = 17,
-	PC_STARVE_CYCLES_FOR_INDEX = 20,
-	PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
-	PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
-	PC_STALL_CYCLES_TESS = 23,
-	PC_STARVE_CYCLES_FOR_POSITION = 24,
-	PC_MODE0_DRAWCALL = 25,
-	PC_MODE1_DRAWCALL = 26,
-	PC_MODE2_DRAWCALL = 27,
-	PC_MODE3_DRAWCALL = 28,
-	PC_MODE4_DRAWCALL = 29,
-	PC_PREDICATED_DEAD_DRAWCALL = 30,
-	PC_STALL_CYCLES_BY_TSE_ONLY = 31,
-	PC_STALL_CYCLES_BY_VPC_ONLY = 32,
-	PC_VPC_POS_DATA_TRANSACTION = 33,
-	PC_BUSY_CYCLES = 34,
-	PC_STARVE_CYCLES_DI = 35,
-	PC_STALL_CYCLES_VPC = 36,
-	TESS_WORKING_CYCLES = 37,
-	TESS_NUM_CYCLES_SETUP_WORKING = 38,
-	TESS_NUM_CYCLES_PTGEN_WORKING = 39,
-	TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
-	TESS_BUSY_CYCLES = 41,
-	TESS_STARVE_CYCLES_PC = 42,
-	TESS_STALL_CYCLES_PC = 43,
-};
-
-enum a4xx_pwr_perfcounter_select {
-	PWR_CORE_CLOCK_CYCLES = 0,
-	PWR_BUSY_CLOCK_CYCLES = 1,
-};
-
-enum a4xx_rb_perfcounter_select {
-	RB_BUSY_CYCLES = 0,
-	RB_BUSY_CYCLES_BINNING = 1,
-	RB_BUSY_CYCLES_RENDERING = 2,
-	RB_BUSY_CYCLES_RESOLVE = 3,
-	RB_STARVE_CYCLES_BY_SP = 4,
-	RB_STARVE_CYCLES_BY_RAS = 5,
-	RB_STARVE_CYCLES_BY_MARB = 6,
-	RB_STALL_CYCLES_BY_MARB = 7,
-	RB_STALL_CYCLES_BY_HLSQ = 8,
-	RB_RB_RB_MARB_DATA = 9,
-	RB_SP_RB_QUAD = 10,
-	RB_RAS_RB_Z_QUADS = 11,
-	RB_GMEM_CH0_READ = 12,
-	RB_GMEM_CH1_READ = 13,
-	RB_GMEM_CH0_WRITE = 14,
-	RB_GMEM_CH1_WRITE = 15,
-	RB_CP_CONTEXT_DONE = 16,
-	RB_CP_CACHE_FLUSH = 17,
-	RB_CP_ZPASS_DONE = 18,
-	RB_STALL_FIFO0_FULL = 19,
-	RB_STALL_FIFO1_FULL = 20,
-	RB_STALL_FIFO2_FULL = 21,
-	RB_STALL_FIFO3_FULL = 22,
-	RB_RB_HLSQ_TRANSACTIONS = 23,
-	RB_Z_READ = 24,
-	RB_Z_WRITE = 25,
-	RB_C_READ = 26,
-	RB_C_WRITE = 27,
-	RB_C_READ_LATENCY = 28,
-	RB_Z_READ_LATENCY = 29,
-	RB_STALL_BY_UCHE = 30,
-	RB_MARB_UCHE_TRANSACTIONS = 31,
-	RB_CACHE_STALL_MISS = 32,
-	RB_CACHE_STALL_FIFO_FULL = 33,
-	RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
-	RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
-	RB_SAMPLER_UNITS_ACTIVE = 36,
-	RB_TOTAL_PASS = 38,
-	RB_Z_PASS = 39,
-	RB_Z_FAIL = 40,
-	RB_S_FAIL = 41,
-	RB_POWER0 = 42,
-	RB_POWER1 = 43,
-	RB_POWER2 = 44,
-	RB_POWER3 = 45,
-	RB_POWER4 = 46,
-	RB_POWER5 = 47,
-	RB_POWER6 = 48,
-	RB_POWER7 = 49,
-};
-
-enum a4xx_rbbm_perfcounter_select {
-	RBBM_ALWAYS_ON = 0,
-	RBBM_VBIF_BUSY = 1,
-	RBBM_TSE_BUSY = 2,
-	RBBM_RAS_BUSY = 3,
-	RBBM_PC_DCALL_BUSY = 4,
-	RBBM_PC_VSD_BUSY = 5,
-	RBBM_VFD_BUSY = 6,
-	RBBM_VPC_BUSY = 7,
-	RBBM_UCHE_BUSY = 8,
-	RBBM_VSC_BUSY = 9,
-	RBBM_HLSQ_BUSY = 10,
-	RBBM_ANY_RB_BUSY = 11,
-	RBBM_ANY_TPL1_BUSY = 12,
-	RBBM_ANY_SP_BUSY = 13,
-	RBBM_ANY_MARB_BUSY = 14,
-	RBBM_ANY_ARB_BUSY = 15,
-	RBBM_AHB_STATUS_BUSY = 16,
-	RBBM_AHB_STATUS_STALLED = 17,
-	RBBM_AHB_STATUS_TXFR = 18,
-	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
-	RBBM_AHB_STATUS_TXFR_ERROR = 20,
-	RBBM_AHB_STATUS_LONG_STALL = 21,
-	RBBM_STATUS_MASKED = 22,
-	RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
-	RBBM_TESS_BUSY = 24,
-	RBBM_COM_BUSY = 25,
-	RBBM_DCOM_BUSY = 32,
-	RBBM_ANY_CCU_BUSY = 33,
-	RBBM_DPM_BUSY = 34,
-};
-
-enum a4xx_sp_perfcounter_select {
-	SP_LM_LOAD_INSTRUCTIONS = 0,
-	SP_LM_STORE_INSTRUCTIONS = 1,
-	SP_LM_ATOMICS = 2,
-	SP_GM_LOAD_INSTRUCTIONS = 3,
-	SP_GM_STORE_INSTRUCTIONS = 4,
-	SP_GM_ATOMICS = 5,
-	SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
-	SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
-	SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
-	SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
-	SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
-	SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
-	SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
-	SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
-	SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
-	SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
-	SP_VS_INSTRUCTIONS = 17,
-	SP_FS_INSTRUCTIONS = 18,
-	SP_ADDR_LOCK_COUNT = 19,
-	SP_UCHE_READ_TRANS = 20,
-	SP_UCHE_WRITE_TRANS = 21,
-	SP_EXPORT_VPC_TRANS = 22,
-	SP_EXPORT_RB_TRANS = 23,
-	SP_PIXELS_KILLED = 24,
-	SP_ICL1_REQUESTS = 25,
-	SP_ICL1_MISSES = 26,
-	SP_ICL0_REQUESTS = 27,
-	SP_ICL0_MISSES = 28,
-	SP_ALU_WORKING_CYCLES = 29,
-	SP_EFU_WORKING_CYCLES = 30,
-	SP_STALL_CYCLES_BY_VPC = 31,
-	SP_STALL_CYCLES_BY_TP = 32,
-	SP_STALL_CYCLES_BY_UCHE = 33,
-	SP_STALL_CYCLES_BY_RB = 34,
-	SP_BUSY_CYCLES = 35,
-	SP_HS_INSTRUCTIONS = 36,
-	SP_DS_INSTRUCTIONS = 37,
-	SP_GS_INSTRUCTIONS = 38,
-	SP_CS_INSTRUCTIONS = 39,
-	SP_SCHEDULER_NON_WORKING = 40,
-	SP_WAVE_CONTEXTS = 41,
-	SP_WAVE_CONTEXT_CYCLES = 42,
-	SP_POWER0 = 43,
-	SP_POWER1 = 44,
-	SP_POWER2 = 45,
-	SP_POWER3 = 46,
-	SP_POWER4 = 47,
-	SP_POWER5 = 48,
-	SP_POWER6 = 49,
-	SP_POWER7 = 50,
-	SP_POWER8 = 51,
-	SP_POWER9 = 52,
-	SP_POWER10 = 53,
-	SP_POWER11 = 54,
-	SP_POWER12 = 55,
-	SP_POWER13 = 56,
-	SP_POWER14 = 57,
-	SP_POWER15 = 58,
-};
-
-enum a4xx_tp_perfcounter_select {
-	TP_L1_REQUESTS = 0,
-	TP_L1_MISSES = 1,
-	TP_QUADS_OFFSET = 8,
-	TP_QUAD_SHADOW = 9,
-	TP_QUADS_ARRAY = 10,
-	TP_QUADS_GRADIENT = 11,
-	TP_QUADS_1D2D = 12,
-	TP_QUADS_3DCUBE = 13,
-	TP_BUSY_CYCLES = 16,
-	TP_STALL_CYCLES_BY_ARB = 17,
-	TP_STATE_CACHE_REQUESTS = 20,
-	TP_STATE_CACHE_MISSES = 21,
-	TP_POWER0 = 22,
-	TP_POWER1 = 23,
-	TP_POWER2 = 24,
-	TP_POWER3 = 25,
-	TP_POWER4 = 26,
-	TP_POWER5 = 27,
-	TP_POWER6 = 28,
-	TP_POWER7 = 29,
-};
-
-enum a4xx_uche_perfcounter_select {
-	UCHE_VBIF_READ_BEATS_TP = 0,
-	UCHE_VBIF_READ_BEATS_VFD = 1,
-	UCHE_VBIF_READ_BEATS_HLSQ = 2,
-	UCHE_VBIF_READ_BEATS_MARB = 3,
-	UCHE_VBIF_READ_BEATS_SP = 4,
-	UCHE_READ_REQUESTS_TP = 5,
-	UCHE_READ_REQUESTS_VFD = 6,
-	UCHE_READ_REQUESTS_HLSQ = 7,
-	UCHE_READ_REQUESTS_MARB = 8,
-	UCHE_READ_REQUESTS_SP = 9,
-	UCHE_WRITE_REQUESTS_MARB = 10,
-	UCHE_WRITE_REQUESTS_SP = 11,
-	UCHE_TAG_CHECK_FAILS = 12,
-	UCHE_EVICTS = 13,
-	UCHE_FLUSHES = 14,
-	UCHE_VBIF_LATENCY_CYCLES = 15,
-	UCHE_VBIF_LATENCY_SAMPLES = 16,
-	UCHE_BUSY_CYCLES = 17,
-	UCHE_VBIF_READ_BEATS_PC = 18,
-	UCHE_READ_REQUESTS_PC = 19,
-	UCHE_WRITE_REQUESTS_VPC = 20,
-	UCHE_STALL_BY_VBIF = 21,
-	UCHE_WRITE_REQUESTS_VSC = 22,
-	UCHE_POWER0 = 23,
-	UCHE_POWER1 = 24,
-	UCHE_POWER2 = 25,
-	UCHE_POWER3 = 26,
-	UCHE_POWER4 = 27,
-	UCHE_POWER5 = 28,
-	UCHE_POWER6 = 29,
-	UCHE_POWER7 = 30,
-};
-
-enum a4xx_vbif_perfcounter_select {
-	AXI_READ_REQUESTS_ID_0 = 0,
-	AXI_READ_REQUESTS_ID_1 = 1,
-	AXI_READ_REQUESTS_ID_2 = 2,
-	AXI_READ_REQUESTS_ID_3 = 3,
-	AXI_READ_REQUESTS_ID_4 = 4,
-	AXI_READ_REQUESTS_ID_5 = 5,
-	AXI_READ_REQUESTS_ID_6 = 6,
-	AXI_READ_REQUESTS_ID_7 = 7,
-	AXI_READ_REQUESTS_ID_8 = 8,
-	AXI_READ_REQUESTS_ID_9 = 9,
-	AXI_READ_REQUESTS_ID_10 = 10,
-	AXI_READ_REQUESTS_ID_11 = 11,
-	AXI_READ_REQUESTS_ID_12 = 12,
-	AXI_READ_REQUESTS_ID_13 = 13,
-	AXI_READ_REQUESTS_ID_14 = 14,
-	AXI_READ_REQUESTS_ID_15 = 15,
-	AXI0_READ_REQUESTS_TOTAL = 16,
-	AXI1_READ_REQUESTS_TOTAL = 17,
-	AXI2_READ_REQUESTS_TOTAL = 18,
-	AXI3_READ_REQUESTS_TOTAL = 19,
-	AXI_READ_REQUESTS_TOTAL = 20,
-	AXI_WRITE_REQUESTS_ID_0 = 21,
-	AXI_WRITE_REQUESTS_ID_1 = 22,
-	AXI_WRITE_REQUESTS_ID_2 = 23,
-	AXI_WRITE_REQUESTS_ID_3 = 24,
-	AXI_WRITE_REQUESTS_ID_4 = 25,
-	AXI_WRITE_REQUESTS_ID_5 = 26,
-	AXI_WRITE_REQUESTS_ID_6 = 27,
-	AXI_WRITE_REQUESTS_ID_7 = 28,
-	AXI_WRITE_REQUESTS_ID_8 = 29,
-	AXI_WRITE_REQUESTS_ID_9 = 30,
-	AXI_WRITE_REQUESTS_ID_10 = 31,
-	AXI_WRITE_REQUESTS_ID_11 = 32,
-	AXI_WRITE_REQUESTS_ID_12 = 33,
-	AXI_WRITE_REQUESTS_ID_13 = 34,
-	AXI_WRITE_REQUESTS_ID_14 = 35,
-	AXI_WRITE_REQUESTS_ID_15 = 36,
-	AXI0_WRITE_REQUESTS_TOTAL = 37,
-	AXI1_WRITE_REQUESTS_TOTAL = 38,
-	AXI2_WRITE_REQUESTS_TOTAL = 39,
-	AXI3_WRITE_REQUESTS_TOTAL = 40,
-	AXI_WRITE_REQUESTS_TOTAL = 41,
-	AXI_TOTAL_REQUESTS = 42,
-	AXI_READ_DATA_BEATS_ID_0 = 43,
-	AXI_READ_DATA_BEATS_ID_1 = 44,
-	AXI_READ_DATA_BEATS_ID_2 = 45,
-	AXI_READ_DATA_BEATS_ID_3 = 46,
-	AXI_READ_DATA_BEATS_ID_4 = 47,
-	AXI_READ_DATA_BEATS_ID_5 = 48,
-	AXI_READ_DATA_BEATS_ID_6 = 49,
-	AXI_READ_DATA_BEATS_ID_7 = 50,
-	AXI_READ_DATA_BEATS_ID_8 = 51,
-	AXI_READ_DATA_BEATS_ID_9 = 52,
-	AXI_READ_DATA_BEATS_ID_10 = 53,
-	AXI_READ_DATA_BEATS_ID_11 = 54,
-	AXI_READ_DATA_BEATS_ID_12 = 55,
-	AXI_READ_DATA_BEATS_ID_13 = 56,
-	AXI_READ_DATA_BEATS_ID_14 = 57,
-	AXI_READ_DATA_BEATS_ID_15 = 58,
-	AXI0_READ_DATA_BEATS_TOTAL = 59,
-	AXI1_READ_DATA_BEATS_TOTAL = 60,
-	AXI2_READ_DATA_BEATS_TOTAL = 61,
-	AXI3_READ_DATA_BEATS_TOTAL = 62,
-	AXI_READ_DATA_BEATS_TOTAL = 63,
-	AXI_WRITE_DATA_BEATS_ID_0 = 64,
-	AXI_WRITE_DATA_BEATS_ID_1 = 65,
-	AXI_WRITE_DATA_BEATS_ID_2 = 66,
-	AXI_WRITE_DATA_BEATS_ID_3 = 67,
-	AXI_WRITE_DATA_BEATS_ID_4 = 68,
-	AXI_WRITE_DATA_BEATS_ID_5 = 69,
-	AXI_WRITE_DATA_BEATS_ID_6 = 70,
-	AXI_WRITE_DATA_BEATS_ID_7 = 71,
-	AXI_WRITE_DATA_BEATS_ID_8 = 72,
-	AXI_WRITE_DATA_BEATS_ID_9 = 73,
-	AXI_WRITE_DATA_BEATS_ID_10 = 74,
-	AXI_WRITE_DATA_BEATS_ID_11 = 75,
-	AXI_WRITE_DATA_BEATS_ID_12 = 76,
-	AXI_WRITE_DATA_BEATS_ID_13 = 77,
-	AXI_WRITE_DATA_BEATS_ID_14 = 78,
-	AXI_WRITE_DATA_BEATS_ID_15 = 79,
-	AXI0_WRITE_DATA_BEATS_TOTAL = 80,
-	AXI1_WRITE_DATA_BEATS_TOTAL = 81,
-	AXI2_WRITE_DATA_BEATS_TOTAL = 82,
-	AXI3_WRITE_DATA_BEATS_TOTAL = 83,
-	AXI_WRITE_DATA_BEATS_TOTAL = 84,
-	AXI_DATA_BEATS_TOTAL = 85,
-	CYCLES_HELD_OFF_ID_0 = 86,
-	CYCLES_HELD_OFF_ID_1 = 87,
-	CYCLES_HELD_OFF_ID_2 = 88,
-	CYCLES_HELD_OFF_ID_3 = 89,
-	CYCLES_HELD_OFF_ID_4 = 90,
-	CYCLES_HELD_OFF_ID_5 = 91,
-	CYCLES_HELD_OFF_ID_6 = 92,
-	CYCLES_HELD_OFF_ID_7 = 93,
-	CYCLES_HELD_OFF_ID_8 = 94,
-	CYCLES_HELD_OFF_ID_9 = 95,
-	CYCLES_HELD_OFF_ID_10 = 96,
-	CYCLES_HELD_OFF_ID_11 = 97,
-	CYCLES_HELD_OFF_ID_12 = 98,
-	CYCLES_HELD_OFF_ID_13 = 99,
-	CYCLES_HELD_OFF_ID_14 = 100,
-	CYCLES_HELD_OFF_ID_15 = 101,
-	AXI_READ_REQUEST_HELD_OFF = 102,
-	AXI_WRITE_REQUEST_HELD_OFF = 103,
-	AXI_REQUEST_HELD_OFF = 104,
-	AXI_WRITE_DATA_HELD_OFF = 105,
-	OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
-	OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
-	OCMEM_AXI_REQUEST_HELD_OFF = 108,
-	OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
-	ELAPSED_CYCLES_DDR = 110,
-	ELAPSED_CYCLES_OCMEM = 111,
-};
-
-enum a4xx_vfd_perfcounter_select {
-	VFD_UCHE_BYTE_FETCHED = 0,
-	VFD_UCHE_TRANS = 1,
-	VFD_FETCH_INSTRUCTIONS = 3,
-	VFD_BUSY_CYCLES = 5,
-	VFD_STALL_CYCLES_UCHE = 6,
-	VFD_STALL_CYCLES_HLSQ = 7,
-	VFD_STALL_CYCLES_VPC_BYPASS = 8,
-	VFD_STALL_CYCLES_VPC_ALLOC = 9,
-	VFD_MODE_0_FIBERS = 13,
-	VFD_MODE_1_FIBERS = 14,
-	VFD_MODE_2_FIBERS = 15,
-	VFD_MODE_3_FIBERS = 16,
-	VFD_MODE_4_FIBERS = 17,
-	VFD_BFIFO_STALL = 18,
-	VFD_NUM_VERTICES_TOTAL = 19,
-	VFD_PACKER_FULL = 20,
-	VFD_UCHE_REQUEST_FIFO_FULL = 21,
-	VFD_STARVE_CYCLES_PC = 22,
-	VFD_STARVE_CYCLES_UCHE = 23,
-};
-
-enum a4xx_vpc_perfcounter_select {
-	VPC_SP_LM_COMPONENTS = 2,
-	VPC_SP0_LM_BYTES = 3,
-	VPC_SP1_LM_BYTES = 4,
-	VPC_SP2_LM_BYTES = 5,
-	VPC_SP3_LM_BYTES = 6,
-	VPC_WORKING_CYCLES = 7,
-	VPC_STALL_CYCLES_LM = 8,
-	VPC_STARVE_CYCLES_RAS = 9,
-	VPC_STREAMOUT_CYCLES = 10,
-	VPC_UCHE_TRANSACTIONS = 12,
-	VPC_STALL_CYCLES_UCHE = 13,
-	VPC_BUSY_CYCLES = 14,
-	VPC_STARVE_CYCLES_SP = 15,
-};
-
-enum a4xx_vsc_perfcounter_select {
-	VSC_BUSY_CYCLES = 0,
-	VSC_WORKING_CYCLES = 1,
-	VSC_STALL_CYCLES_UCHE = 2,
-	VSC_STARVE_CYCLES_RAS = 3,
-	VSC_EOT_NUM = 4,
-};
-
-enum a4xx_tex_filter {
-	A4XX_TEX_NEAREST = 0,
-	A4XX_TEX_LINEAR = 1,
-	A4XX_TEX_ANISO = 2,
-};
-
-enum a4xx_tex_clamp {
-	A4XX_TEX_REPEAT = 0,
-	A4XX_TEX_CLAMP_TO_EDGE = 1,
-	A4XX_TEX_MIRROR_REPEAT = 2,
-	A4XX_TEX_CLAMP_TO_BORDER = 3,
-	A4XX_TEX_MIRROR_CLAMP = 4,
-};
-
-enum a4xx_tex_aniso {
-	A4XX_TEX_ANISO_1 = 0,
-	A4XX_TEX_ANISO_2 = 1,
-	A4XX_TEX_ANISO_4 = 2,
-	A4XX_TEX_ANISO_8 = 3,
-	A4XX_TEX_ANISO_16 = 4,
-};
-
-enum a4xx_tex_swiz {
-	A4XX_TEX_X = 0,
-	A4XX_TEX_Y = 1,
-	A4XX_TEX_Z = 2,
-	A4XX_TEX_W = 3,
-	A4XX_TEX_ZERO = 4,
-	A4XX_TEX_ONE = 5,
-};
-
-enum a4xx_tex_type {
-	A4XX_TEX_1D = 0,
-	A4XX_TEX_2D = 1,
-	A4XX_TEX_CUBE = 2,
-	A4XX_TEX_3D = 3,
-};
-
-#define A4XX_CGC_HLSQ_EARLY_CYC__MASK				0x00700000
-#define A4XX_CGC_HLSQ_EARLY_CYC__SHIFT				20
-static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
-{
-	return ((val) << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT) & A4XX_CGC_HLSQ_EARLY_CYC__MASK;
-}
-#define A4XX_INT0_RBBM_GPU_IDLE					0x00000001
-#define A4XX_INT0_RBBM_AHB_ERROR				0x00000002
-#define A4XX_INT0_RBBM_REG_TIMEOUT				0x00000004
-#define A4XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
-#define A4XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
-#define A4XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
-#define A4XX_INT0_VFD_ERROR					0x00000040
-#define A4XX_INT0_CP_SW_INT					0x00000080
-#define A4XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
-#define A4XX_INT0_CP_OPCODE_ERROR				0x00000200
-#define A4XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
-#define A4XX_INT0_CP_HW_FAULT					0x00000800
-#define A4XX_INT0_CP_DMA					0x00001000
-#define A4XX_INT0_CP_IB2_INT					0x00002000
-#define A4XX_INT0_CP_IB1_INT					0x00004000
-#define A4XX_INT0_CP_RB_INT					0x00008000
-#define A4XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
-#define A4XX_INT0_CP_RB_DONE_TS					0x00020000
-#define A4XX_INT0_CP_VS_DONE_TS					0x00040000
-#define A4XX_INT0_CP_PS_DONE_TS					0x00080000
-#define A4XX_INT0_CACHE_FLUSH_TS				0x00100000
-#define A4XX_INT0_CP_AHB_ERROR_HALT				0x00200000
-#define A4XX_INT0_MISC_HANG_DETECT				0x01000000
-#define A4XX_INT0_UCHE_OOB_ACCESS				0x02000000
-#define REG_A4XX_RB_GMEM_BASE_ADDR				0x00000cc0
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_0				0x00000cc7
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_1				0x00000cc8
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_2				0x00000cc9
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_3				0x00000cca
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_4				0x00000ccb
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_5				0x00000ccc
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_6				0x00000ccd
-
-#define REG_A4XX_RB_PERFCTR_RB_SEL_7				0x00000cce
-
-#define REG_A4XX_RB_PERFCTR_CCU_SEL_0				0x00000ccf
-
-#define REG_A4XX_RB_PERFCTR_CCU_SEL_1				0x00000cd0
-
-#define REG_A4XX_RB_PERFCTR_CCU_SEL_2				0x00000cd1
-
-#define REG_A4XX_RB_PERFCTR_CCU_SEL_3				0x00000cd2
-
-#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION			0x00000ce0
-#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK		0x00003fff
-#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT		0
-static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
-{
-	return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
-}
-#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK		0x3fff0000
-#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT		16
-static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
-{
-	return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
-}
-
-#define REG_A4XX_RB_CLEAR_COLOR_DW0				0x000020cc
-
-#define REG_A4XX_RB_CLEAR_COLOR_DW1				0x000020cd
-
-#define REG_A4XX_RB_CLEAR_COLOR_DW2				0x000020ce
-
-#define REG_A4XX_RB_CLEAR_COLOR_DW3				0x000020cf
-
-#define REG_A4XX_RB_MODE_CONTROL				0x000020a0
-#define A4XX_RB_MODE_CONTROL_WIDTH__MASK			0x0000003f
-#define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT			0
-static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK;
-}
-#define A4XX_RB_MODE_CONTROL_HEIGHT__MASK			0x00003f00
-#define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT			8
-static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK;
-}
-#define A4XX_RB_MODE_CONTROL_ENABLE_GMEM			0x00010000
-
-#define REG_A4XX_RB_RENDER_CONTROL				0x000020a1
-#define A4XX_RB_RENDER_CONTROL_BINNING_PASS			0x00000001
-#define A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE		0x00000020
-
-#define REG_A4XX_RB_MSAA_CONTROL				0x000020a2
-#define A4XX_RB_MSAA_CONTROL_DISABLE				0x00001000
-#define A4XX_RB_MSAA_CONTROL_SAMPLES__MASK			0x0000e000
-#define A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT			13
-static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val)
-{
-	return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK;
-}
-
-#define REG_A4XX_RB_RENDER_CONTROL2				0x000020a3
-#define A4XX_RB_RENDER_CONTROL2_XCOORD				0x00000001
-#define A4XX_RB_RENDER_CONTROL2_YCOORD				0x00000002
-#define A4XX_RB_RENDER_CONTROL2_ZCOORD				0x00000004
-#define A4XX_RB_RENDER_CONTROL2_WCOORD				0x00000008
-#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK			0x00000010
-#define A4XX_RB_RENDER_CONTROL2_FACENESS			0x00000020
-#define A4XX_RB_RENDER_CONTROL2_SAMPLEID			0x00000040
-#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK		0x00000380
-#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT		7
-static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK;
-}
-#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR			0x00000800
-#define A4XX_RB_RENDER_CONTROL2_VARYING				0x00001000
-
-static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
-
-static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
-#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
-#define A4XX_RB_MRT_CONTROL_BLEND				0x00000010
-#define A4XX_RB_MRT_CONTROL_BLEND2				0x00000020
-#define A4XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000040
-#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
-#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
-static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
-{
-	return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
-}
-#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
-#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
-static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
-}
-
-static inline uint32_t REG_A4XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020a5 + 0x5*i0; }
-#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
-#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a4xx_color_fmt val)
-{
-	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
-}
-#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x000000c0
-#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		6
-static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a4xx_tile_mode val)
-{
-	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
-}
-#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK			0x00000600
-#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT			9
-static inline uint32_t A4XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK;
-}
-#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00001800
-#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			11
-static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
-}
-#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00002000
-#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xffffc000
-#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		14
-static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
-}
-
-static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; }
-
-static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; }
-#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK			0x03fffff8
-#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT			3
-static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val)
-{
-	return ((val) << A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT) & A4XX_RB_MRT_CONTROL3_STRIDE__MASK;
-}
-
-static inline uint32_t REG_A4XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020a8 + 0x5*i0; }
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
-}
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
-}
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
-#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
-}
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
-}
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
-}
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
-#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
-static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_RED					0x000020f0
-#define A4XX_RB_BLEND_RED_UINT__MASK				0x000000ff
-#define A4XX_RB_BLEND_RED_UINT__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_RED_UINT__SHIFT) & A4XX_RB_BLEND_RED_UINT__MASK;
-}
-#define A4XX_RB_BLEND_RED_SINT__MASK				0x0000ff00
-#define A4XX_RB_BLEND_RED_SINT__SHIFT				8
-static inline uint32_t A4XX_RB_BLEND_RED_SINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_RED_SINT__SHIFT) & A4XX_RB_BLEND_RED_SINT__MASK;
-}
-#define A4XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
-#define A4XX_RB_BLEND_RED_FLOAT__SHIFT				16
-static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_RED_F32				0x000020f1
-#define A4XX_RB_BLEND_RED_F32__MASK				0xffffffff
-#define A4XX_RB_BLEND_RED_F32__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
-{
-	return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_GREEN					0x000020f2
-#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
-#define A4XX_RB_BLEND_GREEN_UINT__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_GREEN_UINT__SHIFT) & A4XX_RB_BLEND_GREEN_UINT__MASK;
-}
-#define A4XX_RB_BLEND_GREEN_SINT__MASK				0x0000ff00
-#define A4XX_RB_BLEND_GREEN_SINT__SHIFT				8
-static inline uint32_t A4XX_RB_BLEND_GREEN_SINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_GREEN_SINT__SHIFT) & A4XX_RB_BLEND_GREEN_SINT__MASK;
-}
-#define A4XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
-#define A4XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
-static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_GREEN_F32				0x000020f3
-#define A4XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
-#define A4XX_RB_BLEND_GREEN_F32__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
-{
-	return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_BLUE					0x000020f4
-#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
-#define A4XX_RB_BLEND_BLUE_UINT__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_BLUE_UINT__SHIFT) & A4XX_RB_BLEND_BLUE_UINT__MASK;
-}
-#define A4XX_RB_BLEND_BLUE_SINT__MASK				0x0000ff00
-#define A4XX_RB_BLEND_BLUE_SINT__SHIFT				8
-static inline uint32_t A4XX_RB_BLEND_BLUE_SINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_BLUE_SINT__SHIFT) & A4XX_RB_BLEND_BLUE_SINT__MASK;
-}
-#define A4XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
-#define A4XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
-static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_BLUE_F32				0x000020f5
-#define A4XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
-#define A4XX_RB_BLEND_BLUE_F32__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
-{
-	return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_ALPHA					0x000020f6
-#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
-#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_ALPHA_UINT__SHIFT) & A4XX_RB_BLEND_ALPHA_UINT__MASK;
-}
-#define A4XX_RB_BLEND_ALPHA_SINT__MASK				0x0000ff00
-#define A4XX_RB_BLEND_ALPHA_SINT__SHIFT				8
-static inline uint32_t A4XX_RB_BLEND_ALPHA_SINT(uint32_t val)
-{
-	return ((val) << A4XX_RB_BLEND_ALPHA_SINT__SHIFT) & A4XX_RB_BLEND_ALPHA_SINT__MASK;
-}
-#define A4XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
-#define A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
-static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
-}
-
-#define REG_A4XX_RB_BLEND_ALPHA_F32				0x000020f7
-#define A4XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
-#define A4XX_RB_BLEND_ALPHA_F32__SHIFT				0
-static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
-{
-	return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
-}
-
-#define REG_A4XX_RB_ALPHA_CONTROL				0x000020f8
-#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
-#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
-static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
-{
-	return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
-}
-#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
-#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
-#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
-static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
-}
-
-#define REG_A4XX_RB_FS_OUTPUT					0x000020f9
-#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK			0x000000ff
-#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT			0
-static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
-{
-	return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
-}
-#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND			0x00000100
-#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK			0xffff0000
-#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT			16
-static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
-{
-	return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
-}
-
-#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL			0x000020fa
-#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
-#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK			0xfffffffc
-#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT		2
-static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK;
-}
-
-#define REG_A4XX_RB_RENDER_COMPONENTS				0x000020fb
-#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
-#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
-#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
-#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
-#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
-#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
-#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
-#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK;
-}
-#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
-#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
-static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
-{
-	return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK;
-}
-
-#define REG_A4XX_RB_COPY_CONTROL				0x000020fc
-#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK			0x00000003
-#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT		0
-static inline uint32_t A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
-{
-	return ((val) << A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
-}
-#define A4XX_RB_COPY_CONTROL_MODE__MASK				0x00000070
-#define A4XX_RB_COPY_CONTROL_MODE__SHIFT			4
-static inline uint32_t A4XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
-{
-	return ((val) << A4XX_RB_COPY_CONTROL_MODE__SHIFT) & A4XX_RB_COPY_CONTROL_MODE__MASK;
-}
-#define A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK			0x00000f00
-#define A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT			8
-static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
-{
-	return ((val) << A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
-}
-#define A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK			0xffffc000
-#define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT			14
-static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
-{
-	assert(!(val & 0x3fff));
-	return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
-}
-
-#define REG_A4XX_RB_COPY_DEST_BASE				0x000020fd
-#define A4XX_RB_COPY_DEST_BASE_BASE__MASK			0xffffffe0
-#define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT			5
-static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK;
-}
-
-#define REG_A4XX_RB_COPY_DEST_PITCH				0x000020fe
-#define A4XX_RB_COPY_DEST_PITCH_PITCH__MASK			0xffffffff
-#define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT			0
-static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK;
-}
-
-#define REG_A4XX_RB_COPY_DEST_INFO				0x000020ff
-#define A4XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000fc
-#define A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			2
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_FORMAT(enum a4xx_color_fmt val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A4XX_RB_COPY_DEST_INFO_FORMAT__MASK;
-}
-#define A4XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
-#define A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A4XX_RB_COPY_DEST_INFO_SWAP__MASK;
-}
-#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
-#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
-}
-#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK		0x0003c000
-#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT		14
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
-}
-#define A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK			0x001c0000
-#define A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT			18
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
-}
-#define A4XX_RB_COPY_DEST_INFO_TILE__MASK			0x03000000
-#define A4XX_RB_COPY_DEST_INFO_TILE__SHIFT			24
-static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val)
-{
-	return ((val) << A4XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A4XX_RB_COPY_DEST_INFO_TILE__MASK;
-}
-
-#define REG_A4XX_RB_FS_OUTPUT_REG				0x00002100
-#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK				0x0000000f
-#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT			0
-static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val)
-{
-	return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK;
-}
-#define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z			0x00000020
-
-#define REG_A4XX_RB_DEPTH_CONTROL				0x00002101
-#define A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z			0x00000001
-#define A4XX_RB_DEPTH_CONTROL_Z_ENABLE				0x00000002
-#define A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE			0x00000004
-#define A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK			0x00000070
-#define A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT			4
-static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
-}
-#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
-#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00010000
-#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS			0x00020000
-#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
-
-#define REG_A4XX_RB_DEPTH_CLEAR					0x00002102
-
-#define REG_A4XX_RB_DEPTH_INFO					0x00002103
-#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000003
-#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
-static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format val)
-{
-	return ((val) << A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
-}
-#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff000
-#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			12
-static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
-}
-
-#define REG_A4XX_RB_DEPTH_PITCH					0x00002104
-#define A4XX_RB_DEPTH_PITCH__MASK				0xffffffff
-#define A4XX_RB_DEPTH_PITCH__SHIFT				0
-static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK;
-}
-
-#define REG_A4XX_RB_DEPTH_PITCH2				0x00002105
-#define A4XX_RB_DEPTH_PITCH2__MASK				0xffffffff
-#define A4XX_RB_DEPTH_PITCH2__SHIFT				0
-static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK;
-}
-
-#define REG_A4XX_RB_STENCIL_CONTROL				0x00002106
-#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
-#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
-#define A4XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
-#define A4XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
-#define A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
-#define A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
-#define A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
-#define A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
-#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
-#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
-#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
-}
-#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
-#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
-static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
-}
-
-#define REG_A4XX_RB_STENCIL_CONTROL2				0x00002107
-#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER			0x00000001
-
-#define REG_A4XX_RB_STENCIL_INFO				0x00002108
-#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
-#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK			0xfffff000
-#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT		12
-static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
-}
-
-#define REG_A4XX_RB_STENCIL_PITCH				0x00002109
-#define A4XX_RB_STENCIL_PITCH__MASK				0xffffffff
-#define A4XX_RB_STENCIL_PITCH__SHIFT				0
-static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK;
-}
-
-#define REG_A4XX_RB_STENCILREFMASK				0x0000210b
-#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
-#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
-static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILREF__MASK;
-}
-#define A4XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
-#define A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
-static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILMASK__MASK;
-}
-#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
-#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
-static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A4XX_RB_STENCILREFMASK_BF				0x0000210c
-#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
-#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
-static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
-}
-#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
-#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
-static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
-}
-#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
-#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
-static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A4XX_RB_BIN_OFFSET					0x0000210d
-#define A4XX_RB_BIN_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
-#define A4XX_RB_BIN_OFFSET_X__MASK				0x00007fff
-#define A4XX_RB_BIN_OFFSET_X__SHIFT				0
-static inline uint32_t A4XX_RB_BIN_OFFSET_X(uint32_t val)
-{
-	return ((val) << A4XX_RB_BIN_OFFSET_X__SHIFT) & A4XX_RB_BIN_OFFSET_X__MASK;
-}
-#define A4XX_RB_BIN_OFFSET_Y__MASK				0x7fff0000
-#define A4XX_RB_BIN_OFFSET_Y__SHIFT				16
-static inline uint32_t A4XX_RB_BIN_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A4XX_RB_BIN_OFFSET_Y__SHIFT) & A4XX_RB_BIN_OFFSET_Y__MASK;
-}
-
-static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP(uint32_t i0) { return 0x00002120 + 0x2*i0; }
-
-static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MIN(uint32_t i0) { return 0x00002120 + 0x2*i0; }
-
-static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MAX(uint32_t i0) { return 0x00002121 + 0x2*i0; }
-
-#define REG_A4XX_RBBM_HW_VERSION				0x00000000
-
-#define REG_A4XX_RBBM_HW_CONFIGURATION				0x00000002
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP(uint32_t i0) { return 0x00000004 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP_REG(uint32_t i0) { return 0x00000004 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP(uint32_t i0) { return 0x00000008 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP_REG(uint32_t i0) { return 0x00000008 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP(uint32_t i0) { return 0x0000000c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP_REG(uint32_t i0) { return 0x0000000c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP(uint32_t i0) { return 0x00000010 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x00000010 + 0x1*i0; }
-
-#define REG_A4XX_RBBM_CLOCK_CTL_UCHE 				0x00000014
-
-#define REG_A4XX_RBBM_CLOCK_CTL2_UCHE				0x00000015
-
-#define REG_A4XX_RBBM_CLOCK_CTL3_UCHE				0x00000016
-
-#define REG_A4XX_RBBM_CLOCK_CTL4_UCHE				0x00000017
-
-#define REG_A4XX_RBBM_CLOCK_HYST_UCHE				0x00000018
-
-#define REG_A4XX_RBBM_CLOCK_DELAY_UCHE				0x00000019
-
-#define REG_A4XX_RBBM_CLOCK_MODE_GPC				0x0000001a
-
-#define REG_A4XX_RBBM_CLOCK_DELAY_GPC				0x0000001b
-
-#define REG_A4XX_RBBM_CLOCK_HYST_GPC				0x0000001c
-
-#define REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM			0x0000001d
-
-#define REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x0000001e
-
-#define REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x0000001f
-
-#define REG_A4XX_RBBM_CLOCK_CTL					0x00000020
-
-#define REG_A4XX_RBBM_SP_HYST_CNT				0x00000021
-
-#define REG_A4XX_RBBM_SW_RESET_CMD				0x00000022
-
-#define REG_A4XX_RBBM_AHB_CTL0					0x00000023
-
-#define REG_A4XX_RBBM_AHB_CTL1					0x00000024
-
-#define REG_A4XX_RBBM_AHB_CMD					0x00000025
-
-#define REG_A4XX_RBBM_RB_SUB_BLOCK_SEL_CTL			0x00000026
-
-#define REG_A4XX_RBBM_RAM_ACC_63_32				0x00000028
-
-#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x0000002b
-
-#define REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL			0x0000002f
-
-#define REG_A4XX_RBBM_INTERFACE_HANG_MASK_CTL4			0x00000034
-
-#define REG_A4XX_RBBM_INT_CLEAR_CMD				0x00000036
-
-#define REG_A4XX_RBBM_INT_0_MASK				0x00000037
-
-#define REG_A4XX_RBBM_RBBM_CTL					0x0000003e
-
-#define REG_A4XX_RBBM_AHB_DEBUG_CTL				0x0000003f
-
-#define REG_A4XX_RBBM_VBIF_DEBUG_CTL				0x00000041
-
-#define REG_A4XX_RBBM_CLOCK_CTL2				0x00000042
-
-#define REG_A4XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
-
-#define REG_A4XX_RBBM_RESET_CYCLES				0x00000047
-
-#define REG_A4XX_RBBM_EXT_TRACE_BUS_CTL				0x00000049
-
-#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_A				0x0000004a
-
-#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_B				0x0000004b
-
-#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_C				0x0000004c
-
-#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D				0x0000004d
-
-#define REG_A4XX_RBBM_POWER_CNTL_IP				0x00000098
-#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE			0x00000001
-#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON			0x00100000
-
-#define REG_A4XX_RBBM_PERFCTR_CP_0_LO				0x0000009c
-
-#define REG_A4XX_RBBM_PERFCTR_CP_0_HI				0x0000009d
-
-#define REG_A4XX_RBBM_PERFCTR_CP_1_LO				0x0000009e
-
-#define REG_A4XX_RBBM_PERFCTR_CP_1_HI				0x0000009f
-
-#define REG_A4XX_RBBM_PERFCTR_CP_2_LO				0x000000a0
-
-#define REG_A4XX_RBBM_PERFCTR_CP_2_HI				0x000000a1
-
-#define REG_A4XX_RBBM_PERFCTR_CP_3_LO				0x000000a2
-
-#define REG_A4XX_RBBM_PERFCTR_CP_3_HI				0x000000a3
-
-#define REG_A4XX_RBBM_PERFCTR_CP_4_LO				0x000000a4
-
-#define REG_A4XX_RBBM_PERFCTR_CP_4_HI				0x000000a5
-
-#define REG_A4XX_RBBM_PERFCTR_CP_5_LO				0x000000a6
-
-#define REG_A4XX_RBBM_PERFCTR_CP_5_HI				0x000000a7
-
-#define REG_A4XX_RBBM_PERFCTR_CP_6_LO				0x000000a8
-
-#define REG_A4XX_RBBM_PERFCTR_CP_6_HI				0x000000a9
-
-#define REG_A4XX_RBBM_PERFCTR_CP_7_LO				0x000000aa
-
-#define REG_A4XX_RBBM_PERFCTR_CP_7_HI				0x000000ab
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO				0x000000ac
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI				0x000000ad
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO				0x000000ae
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI				0x000000af
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO				0x000000b0
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI				0x000000b1
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO				0x000000b2
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI				0x000000b3
-
-#define REG_A4XX_RBBM_PERFCTR_PC_0_LO				0x000000b4
-
-#define REG_A4XX_RBBM_PERFCTR_PC_0_HI				0x000000b5
-
-#define REG_A4XX_RBBM_PERFCTR_PC_1_LO				0x000000b6
-
-#define REG_A4XX_RBBM_PERFCTR_PC_1_HI				0x000000b7
-
-#define REG_A4XX_RBBM_PERFCTR_PC_2_LO				0x000000b8
-
-#define REG_A4XX_RBBM_PERFCTR_PC_2_HI				0x000000b9
-
-#define REG_A4XX_RBBM_PERFCTR_PC_3_LO				0x000000ba
-
-#define REG_A4XX_RBBM_PERFCTR_PC_3_HI				0x000000bb
-
-#define REG_A4XX_RBBM_PERFCTR_PC_4_LO				0x000000bc
-
-#define REG_A4XX_RBBM_PERFCTR_PC_4_HI				0x000000bd
-
-#define REG_A4XX_RBBM_PERFCTR_PC_5_LO				0x000000be
-
-#define REG_A4XX_RBBM_PERFCTR_PC_5_HI				0x000000bf
-
-#define REG_A4XX_RBBM_PERFCTR_PC_6_LO				0x000000c0
-
-#define REG_A4XX_RBBM_PERFCTR_PC_6_HI				0x000000c1
-
-#define REG_A4XX_RBBM_PERFCTR_PC_7_LO				0x000000c2
-
-#define REG_A4XX_RBBM_PERFCTR_PC_7_HI				0x000000c3
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO				0x000000c4
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI				0x000000c5
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO				0x000000c6
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI				0x000000c7
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO				0x000000c8
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI				0x000000c9
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO				0x000000ca
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI				0x000000cb
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO				0x000000cc
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI				0x000000cd
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO				0x000000ce
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI				0x000000cf
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO				0x000000d0
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI				0x000000d1
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO				0x000000d2
-
-#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI				0x000000d3
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000d4
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000d5
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000d6
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000d7
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000d8
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000d9
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000da
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000db
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000dc
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000dd
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000de
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000df
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO				0x000000e0
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI				0x000000e1
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO				0x000000e2
-
-#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI				0x000000e3
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO				0x000000e4
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI				0x000000e5
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO				0x000000e6
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI				0x000000e7
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO				0x000000e8
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI				0x000000e9
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO				0x000000ea
-
-#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI				0x000000eb
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO				0x000000ec
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI				0x000000ed
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO				0x000000ee
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI				0x000000ef
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO				0x000000f0
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI				0x000000f1
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO				0x000000f2
-
-#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI				0x000000f3
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO				0x000000f4
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI				0x000000f5
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO				0x000000f6
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI				0x000000f7
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO				0x000000f8
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI				0x000000f9
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO				0x000000fa
-
-#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI				0x000000fb
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO				0x000000fc
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI				0x000000fd
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO				0x000000fe
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI				0x000000ff
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO				0x00000100
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI				0x00000101
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO				0x00000102
-
-#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI				0x00000103
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO				0x00000104
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI				0x00000105
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO				0x00000106
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI				0x00000107
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO				0x00000108
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI				0x00000109
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO				0x0000010a
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI				0x0000010b
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO				0x0000010c
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI				0x0000010d
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO				0x0000010e
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI				0x0000010f
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO				0x00000110
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI				0x00000111
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO				0x00000112
-
-#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI				0x00000113
-
-#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
-
-#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
-
-#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
-
-#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
-
-#define REG_A4XX_RBBM_PERFCTR_TP_1_LO				0x00000116
-
-#define REG_A4XX_RBBM_PERFCTR_TP_1_HI				0x00000117
-
-#define REG_A4XX_RBBM_PERFCTR_TP_2_LO				0x00000118
-
-#define REG_A4XX_RBBM_PERFCTR_TP_2_HI				0x00000119
-
-#define REG_A4XX_RBBM_PERFCTR_TP_3_LO				0x0000011a
-
-#define REG_A4XX_RBBM_PERFCTR_TP_3_HI				0x0000011b
-
-#define REG_A4XX_RBBM_PERFCTR_TP_4_LO				0x0000011c
-
-#define REG_A4XX_RBBM_PERFCTR_TP_4_HI				0x0000011d
-
-#define REG_A4XX_RBBM_PERFCTR_TP_5_LO				0x0000011e
-
-#define REG_A4XX_RBBM_PERFCTR_TP_5_HI				0x0000011f
-
-#define REG_A4XX_RBBM_PERFCTR_TP_6_LO				0x00000120
-
-#define REG_A4XX_RBBM_PERFCTR_TP_6_HI				0x00000121
-
-#define REG_A4XX_RBBM_PERFCTR_TP_7_LO				0x00000122
-
-#define REG_A4XX_RBBM_PERFCTR_TP_7_HI				0x00000123
-
-#define REG_A4XX_RBBM_PERFCTR_SP_0_LO				0x00000124
-
-#define REG_A4XX_RBBM_PERFCTR_SP_0_HI				0x00000125
-
-#define REG_A4XX_RBBM_PERFCTR_SP_1_LO				0x00000126
-
-#define REG_A4XX_RBBM_PERFCTR_SP_1_HI				0x00000127
-
-#define REG_A4XX_RBBM_PERFCTR_SP_2_LO				0x00000128
-
-#define REG_A4XX_RBBM_PERFCTR_SP_2_HI				0x00000129
-
-#define REG_A4XX_RBBM_PERFCTR_SP_3_LO				0x0000012a
-
-#define REG_A4XX_RBBM_PERFCTR_SP_3_HI				0x0000012b
-
-#define REG_A4XX_RBBM_PERFCTR_SP_4_LO				0x0000012c
-
-#define REG_A4XX_RBBM_PERFCTR_SP_4_HI				0x0000012d
-
-#define REG_A4XX_RBBM_PERFCTR_SP_5_LO				0x0000012e
-
-#define REG_A4XX_RBBM_PERFCTR_SP_5_HI				0x0000012f
-
-#define REG_A4XX_RBBM_PERFCTR_SP_6_LO				0x00000130
-
-#define REG_A4XX_RBBM_PERFCTR_SP_6_HI				0x00000131
-
-#define REG_A4XX_RBBM_PERFCTR_SP_7_LO				0x00000132
-
-#define REG_A4XX_RBBM_PERFCTR_SP_7_HI				0x00000133
-
-#define REG_A4XX_RBBM_PERFCTR_SP_8_LO				0x00000134
-
-#define REG_A4XX_RBBM_PERFCTR_SP_8_HI				0x00000135
-
-#define REG_A4XX_RBBM_PERFCTR_SP_9_LO				0x00000136
-
-#define REG_A4XX_RBBM_PERFCTR_SP_9_HI				0x00000137
-
-#define REG_A4XX_RBBM_PERFCTR_SP_10_LO				0x00000138
-
-#define REG_A4XX_RBBM_PERFCTR_SP_10_HI				0x00000139
-
-#define REG_A4XX_RBBM_PERFCTR_SP_11_LO				0x0000013a
-
-#define REG_A4XX_RBBM_PERFCTR_SP_11_HI				0x0000013b
-
-#define REG_A4XX_RBBM_PERFCTR_RB_0_LO				0x0000013c
-
-#define REG_A4XX_RBBM_PERFCTR_RB_0_HI				0x0000013d
-
-#define REG_A4XX_RBBM_PERFCTR_RB_1_LO				0x0000013e
-
-#define REG_A4XX_RBBM_PERFCTR_RB_1_HI				0x0000013f
-
-#define REG_A4XX_RBBM_PERFCTR_RB_2_LO				0x00000140
-
-#define REG_A4XX_RBBM_PERFCTR_RB_2_HI				0x00000141
-
-#define REG_A4XX_RBBM_PERFCTR_RB_3_LO				0x00000142
-
-#define REG_A4XX_RBBM_PERFCTR_RB_3_HI				0x00000143
-
-#define REG_A4XX_RBBM_PERFCTR_RB_4_LO				0x00000144
-
-#define REG_A4XX_RBBM_PERFCTR_RB_4_HI				0x00000145
-
-#define REG_A4XX_RBBM_PERFCTR_RB_5_LO				0x00000146
-
-#define REG_A4XX_RBBM_PERFCTR_RB_5_HI				0x00000147
-
-#define REG_A4XX_RBBM_PERFCTR_RB_6_LO				0x00000148
-
-#define REG_A4XX_RBBM_PERFCTR_RB_6_HI				0x00000149
-
-#define REG_A4XX_RBBM_PERFCTR_RB_7_LO				0x0000014a
-
-#define REG_A4XX_RBBM_PERFCTR_RB_7_HI				0x0000014b
-
-#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO				0x0000014c
-
-#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI				0x0000014d
-
-#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO				0x0000014e
-
-#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI				0x0000014f
-
-#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO				0x00000166
-
-#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI				0x00000167
-
-#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
-
-#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI				0x00000169
-
-#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO			0x0000016e
-
-#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI			0x0000016f
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP(uint32_t i0) { return 0x0000006c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP_REG(uint32_t i0) { return 0x0000006c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP(uint32_t i0) { return 0x00000070 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP_REG(uint32_t i0) { return 0x00000070 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP(uint32_t i0) { return 0x00000074 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP_REG(uint32_t i0) { return 0x00000074 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB(uint32_t i0) { return 0x00000078 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB_REG(uint32_t i0) { return 0x00000078 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB(uint32_t i0) { return 0x0000007c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB_REG(uint32_t i0) { return 0x0000007c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(uint32_t i0) { return 0x00000082 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU_REG(uint32_t i0) { return 0x00000082 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(uint32_t i0) { return 0x00000086 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU_REG(uint32_t i0) { return 0x00000086 + 0x1*i0; }
-
-#define REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM			0x00000080
-
-#define REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM			0x00000081
-
-#define REG_A4XX_RBBM_CLOCK_CTL_HLSQ				0x0000008a
-
-#define REG_A4XX_RBBM_CLOCK_HYST_HLSQ				0x0000008b
-
-#define REG_A4XX_RBBM_CLOCK_DELAY_HLSQ				0x0000008c
-
-#define REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM			0x0000008d
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { return 0x0000008e + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
-
-#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0			0x00000099
-
-#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1			0x0000009a
-
-#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
-
-#define REG_A4XX_RBBM_PERFCTR_CTL				0x00000170
-
-#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD0				0x00000171
-
-#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD1				0x00000172
-
-#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD2				0x00000173
-
-#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000174
-
-#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000175
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0			0x00000176
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1			0x00000177
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2			0x00000178
-
-#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3			0x00000179
-
-#define REG_A4XX_RBBM_GPU_BUSY_MASKED				0x0000017a
-
-#define REG_A4XX_RBBM_INT_0_STATUS				0x0000017d
-
-#define REG_A4XX_RBBM_CLOCK_STATUS				0x00000182
-
-#define REG_A4XX_RBBM_AHB_STATUS				0x00000189
-
-#define REG_A4XX_RBBM_AHB_ME_SPLIT_STATUS			0x0000018c
-
-#define REG_A4XX_RBBM_AHB_PFP_SPLIT_STATUS			0x0000018d
-
-#define REG_A4XX_RBBM_AHB_ERROR_STATUS				0x0000018f
-
-#define REG_A4XX_RBBM_STATUS					0x00000191
-#define A4XX_RBBM_STATUS_HI_BUSY				0x00000001
-#define A4XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
-#define A4XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
-#define A4XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
-#define A4XX_RBBM_STATUS_VBIF_BUSY				0x00008000
-#define A4XX_RBBM_STATUS_TSE_BUSY				0x00010000
-#define A4XX_RBBM_STATUS_RAS_BUSY				0x00020000
-#define A4XX_RBBM_STATUS_RB_BUSY				0x00040000
-#define A4XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
-#define A4XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
-#define A4XX_RBBM_STATUS_VFD_BUSY				0x00200000
-#define A4XX_RBBM_STATUS_VPC_BUSY				0x00400000
-#define A4XX_RBBM_STATUS_UCHE_BUSY				0x00800000
-#define A4XX_RBBM_STATUS_SP_BUSY				0x01000000
-#define A4XX_RBBM_STATUS_TPL1_BUSY				0x02000000
-#define A4XX_RBBM_STATUS_MARB_BUSY				0x04000000
-#define A4XX_RBBM_STATUS_VSC_BUSY				0x08000000
-#define A4XX_RBBM_STATUS_ARB_BUSY				0x10000000
-#define A4XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
-#define A4XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
-#define A4XX_RBBM_STATUS_GPU_BUSY				0x80000000
-
-#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5			0x0000019f
-
-#define REG_A4XX_RBBM_POWER_STATUS				0x000001b0
-#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON			0x00100000
-
-#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2			0x000001b8
-
-#define REG_A4XX_CP_SCRATCH_UMASK				0x00000228
-
-#define REG_A4XX_CP_SCRATCH_ADDR				0x00000229
-
-#define REG_A4XX_CP_RB_BASE					0x00000200
-
-#define REG_A4XX_CP_RB_CNTL					0x00000201
-
-#define REG_A4XX_CP_RB_WPTR					0x00000205
-
-#define REG_A4XX_CP_RB_RPTR_ADDR				0x00000203
-
-#define REG_A4XX_CP_RB_RPTR					0x00000204
-
-#define REG_A4XX_CP_IB1_BASE					0x00000206
-
-#define REG_A4XX_CP_IB1_BUFSZ					0x00000207
-
-#define REG_A4XX_CP_IB2_BASE					0x00000208
-
-#define REG_A4XX_CP_IB2_BUFSZ					0x00000209
-
-#define REG_A4XX_CP_ME_NRT_ADDR					0x0000020c
-
-#define REG_A4XX_CP_ME_NRT_DATA					0x0000020d
-
-#define REG_A4XX_CP_ME_RB_DONE_DATA				0x00000217
-
-#define REG_A4XX_CP_QUEUE_THRESH2				0x00000219
-
-#define REG_A4XX_CP_MERCIU_SIZE					0x0000021b
-
-#define REG_A4XX_CP_ROQ_ADDR					0x0000021c
-
-#define REG_A4XX_CP_ROQ_DATA					0x0000021d
-
-#define REG_A4XX_CP_MEQ_ADDR					0x0000021e
-
-#define REG_A4XX_CP_MEQ_DATA					0x0000021f
-
-#define REG_A4XX_CP_MERCIU_ADDR					0x00000220
-
-#define REG_A4XX_CP_MERCIU_DATA					0x00000221
-
-#define REG_A4XX_CP_MERCIU_DATA2				0x00000222
-
-#define REG_A4XX_CP_PFP_UCODE_ADDR				0x00000223
-
-#define REG_A4XX_CP_PFP_UCODE_DATA				0x00000224
-
-#define REG_A4XX_CP_ME_RAM_WADDR				0x00000225
-
-#define REG_A4XX_CP_ME_RAM_RADDR				0x00000226
-
-#define REG_A4XX_CP_ME_RAM_DATA					0x00000227
-
-#define REG_A4XX_CP_PREEMPT					0x0000022a
-
-#define REG_A4XX_CP_CNTL					0x0000022c
-
-#define REG_A4XX_CP_ME_CNTL					0x0000022d
-
-#define REG_A4XX_CP_DEBUG					0x0000022e
-
-#define REG_A4XX_CP_DEBUG_ECO_CONTROL				0x00000231
-
-#define REG_A4XX_CP_DRAW_STATE_ADDR				0x00000232
-
-static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; }
-#define A4XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0001ffff
-#define A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
-static inline uint32_t A4XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
-{
-	return ((val) << A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A4XX_CP_PROTECT_REG_BASE_ADDR__MASK;
-}
-#define A4XX_CP_PROTECT_REG_MASK_LEN__MASK			0x1f000000
-#define A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT			24
-static inline uint32_t A4XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
-{
-	return ((val) << A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A4XX_CP_PROTECT_REG_MASK_LEN__MASK;
-}
-#define A4XX_CP_PROTECT_REG_TRAP_WRITE				0x20000000
-#define A4XX_CP_PROTECT_REG_TRAP_READ				0x40000000
-
-#define REG_A4XX_CP_PROTECT_CTRL				0x00000250
-
-#define REG_A4XX_CP_ST_BASE					0x000004c0
-
-#define REG_A4XX_CP_STQ_AVAIL					0x000004ce
-
-#define REG_A4XX_CP_MERCIU_STAT					0x000004d0
-
-#define REG_A4XX_CP_WFI_PEND_CTR				0x000004d2
-
-#define REG_A4XX_CP_HW_FAULT					0x000004d8
-
-#define REG_A4XX_CP_PROTECT_STATUS				0x000004da
-
-#define REG_A4XX_CP_EVENTS_IN_FLIGHT				0x000004dd
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_0				0x00000500
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_1				0x00000501
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_2				0x00000502
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_3				0x00000503
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_4				0x00000504
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_5				0x00000505
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_6				0x00000506
-
-#define REG_A4XX_CP_PERFCTR_CP_SEL_7				0x00000507
-
-#define REG_A4XX_CP_PERFCOMBINER_SELECT				0x0000050b
-
-static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 + 0x1*i0; }
-
-#define REG_A4XX_SP_VS_STATUS					0x00000ec0
-
-#define REG_A4XX_SP_MODE_CONTROL				0x00000ec3
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_0				0x00000ec4
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_1				0x00000ec5
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_2				0x00000ec6
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_3				0x00000ec7
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_4				0x00000ec8
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_5				0x00000ec9
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_6				0x00000eca
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_7				0x00000ecb
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_8				0x00000ecc
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_9				0x00000ecd
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_10				0x00000ece
-
-#define REG_A4XX_SP_PERFCTR_SP_SEL_11				0x00000ecf
-
-#define REG_A4XX_SP_SP_CTRL_REG					0x000022c0
-#define A4XX_SP_SP_CTRL_REG_BINNING_PASS			0x00080000
-
-#define REG_A4XX_SP_INSTR_CACHE_CTRL				0x000022c1
-#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER			0x00000080
-#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER			0x00000100
-#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER			0x00000400
-
-#define REG_A4XX_SP_VS_CTRL_REG0				0x000022c4
-#define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK			0x00000001
-#define A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT			0
-static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG0_VARYING				0x00000002
-#define A4XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
-#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
-#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
-static inline uint32_t A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
-#define A4XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00400000
-
-#define REG_A4XX_SP_VS_CTRL_REG1				0x000022c5
-#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK			0x000000ff
-#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT			0
-static inline uint32_t A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
-}
-#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x7f000000
-#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
-static inline uint32_t A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
-}
-
-#define REG_A4XX_SP_VS_PARAM_REG				0x000022c6
-#define A4XX_SP_VS_PARAM_REG_POSREGID__MASK			0x000000ff
-#define A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT			0
-static inline uint32_t A4XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_POSREGID__MASK;
-}
-#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK			0x0000ff00
-#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT			8
-static inline uint32_t A4XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
-}
-#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0xfff00000
-#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
-static inline uint32_t A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-#define A4XX_SP_VS_OUT_REG_A_REGID__MASK			0x000001ff
-#define A4XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A4XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_A_REGID__MASK;
-}
-#define A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
-#define A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
-static inline uint32_t A4XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A4XX_SP_VS_OUT_REG_B_REGID__MASK			0x01ff0000
-#define A4XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A4XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_B_REGID__MASK;
-}
-#define A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
-#define A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
-static inline uint32_t A4XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
-#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A4XX_SP_VS_OBJ_OFFSET_REG				0x000022e0
-#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A4XX_SP_VS_OBJ_START				0x000022e1
-
-#define REG_A4XX_SP_VS_PVT_MEM_PARAM				0x000022e2
-
-#define REG_A4XX_SP_VS_PVT_MEM_ADDR				0x000022e3
-
-#define REG_A4XX_SP_VS_LENGTH_REG				0x000022e5
-
-#define REG_A4XX_SP_FS_CTRL_REG0				0x000022e8
-#define A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK			0x00000001
-#define A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT			0
-static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG0_VARYING				0x00000002
-#define A4XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
-#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
-#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
-static inline uint32_t A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE			0x00200000
-#define A4XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00400000
-
-#define REG_A4XX_SP_FS_CTRL_REG1				0x000022e9
-#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK			0x000000ff
-#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT			0
-static inline uint32_t A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
-}
-#define A4XX_SP_FS_CTRL_REG1_FACENESS				0x00080000
-#define A4XX_SP_FS_CTRL_REG1_VARYING				0x00100000
-#define A4XX_SP_FS_CTRL_REG1_FRAGCOORD				0x00200000
-
-#define REG_A4XX_SP_FS_OBJ_OFFSET_REG				0x000022ea
-#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A4XX_SP_FS_OBJ_START				0x000022eb
-
-#define REG_A4XX_SP_FS_PVT_MEM_PARAM				0x000022ec
-
-#define REG_A4XX_SP_FS_PVT_MEM_ADDR				0x000022ed
-
-#define REG_A4XX_SP_FS_LENGTH_REG				0x000022ef
-
-#define REG_A4XX_SP_FS_OUTPUT_REG				0x000022f0
-#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK				0x0000000f
-#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT			0
-static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK;
-}
-#define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE			0x00000080
-#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK			0x0000ff00
-#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT		8
-static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
-}
-#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK		0xff000000
-#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT		24
-static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
-#define A4XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
-#define A4XX_SP_FS_MRT_REG_REGID__SHIFT				0
-static inline uint32_t A4XX_SP_FS_MRT_REG_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_FS_MRT_REG_REGID__SHIFT) & A4XX_SP_FS_MRT_REG_REGID__MASK;
-}
-#define A4XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
-#define A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK			0x0003f000
-#define A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT			12
-static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
-{
-	return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
-}
-#define A4XX_SP_FS_MRT_REG_COLOR_SRGB				0x00040000
-
-#define REG_A4XX_SP_CS_CTRL_REG0				0x00002300
-
-#define REG_A4XX_SP_CS_OBJ_OFFSET_REG				0x00002301
-
-#define REG_A4XX_SP_CS_OBJ_START				0x00002302
-
-#define REG_A4XX_SP_CS_PVT_MEM_PARAM				0x00002303
-
-#define REG_A4XX_SP_CS_PVT_MEM_ADDR				0x00002304
-
-#define REG_A4XX_SP_CS_PVT_MEM_SIZE				0x00002305
-
-#define REG_A4XX_SP_CS_LENGTH_REG				0x00002306
-
-#define REG_A4XX_SP_HS_OBJ_OFFSET_REG				0x0000230d
-#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A4XX_SP_HS_OBJ_START				0x0000230e
-
-#define REG_A4XX_SP_HS_PVT_MEM_PARAM				0x0000230f
-
-#define REG_A4XX_SP_HS_PVT_MEM_ADDR				0x00002310
-
-#define REG_A4XX_SP_HS_LENGTH_REG				0x00002312
-
-#define REG_A4XX_SP_DS_PARAM_REG				0x0000231a
-#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK			0x000000ff
-#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT			0
-static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK;
-}
-#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
-#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
-static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; }
-#define A4XX_SP_DS_OUT_REG_A_REGID__MASK			0x000001ff
-#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK;
-}
-#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK			0x00001e00
-#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT			9
-static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A4XX_SP_DS_OUT_REG_B_REGID__MASK			0x01ff0000
-#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK;
-}
-#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK			0x1e000000
-#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT			25
-static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; }
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
-#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A4XX_SP_DS_OBJ_OFFSET_REG				0x00002334
-#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A4XX_SP_DS_OBJ_START				0x00002335
-
-#define REG_A4XX_SP_DS_PVT_MEM_PARAM				0x00002336
-
-#define REG_A4XX_SP_DS_PVT_MEM_ADDR				0x00002337
-
-#define REG_A4XX_SP_DS_LENGTH_REG				0x00002339
-
-#define REG_A4XX_SP_GS_PARAM_REG				0x00002341
-#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK			0x000000ff
-#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT			0
-static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK;
-}
-#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK			0x0000ff00
-#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT			8
-static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK;
-}
-#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
-#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
-static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; }
-#define A4XX_SP_GS_OUT_REG_A_REGID__MASK			0x000001ff
-#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK;
-}
-#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK			0x00001e00
-#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT			9
-static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A4XX_SP_GS_OUT_REG_B_REGID__MASK			0x01ff0000
-#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK;
-}
-#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK			0x1e000000
-#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT			25
-static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; }
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
-#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A4XX_SP_GS_OBJ_OFFSET_REG				0x0000235b
-#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
-#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
-static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
-#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
-static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A4XX_SP_GS_OBJ_START				0x0000235c
-
-#define REG_A4XX_SP_GS_PVT_MEM_PARAM				0x0000235d
-
-#define REG_A4XX_SP_GS_PVT_MEM_ADDR				0x0000235e
-
-#define REG_A4XX_SP_GS_LENGTH_REG				0x00002360
-
-#define REG_A4XX_VPC_DEBUG_RAM_SEL				0x00000e60
-
-#define REG_A4XX_VPC_DEBUG_RAM_READ				0x00000e61
-
-#define REG_A4XX_VPC_DEBUG_ECO_CONTROL				0x00000e64
-
-#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0				0x00000e65
-
-#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1				0x00000e66
-
-#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2				0x00000e67
-
-#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3				0x00000e68
-
-#define REG_A4XX_VPC_ATTR					0x00002140
-#define A4XX_VPC_ATTR_TOTALATTR__MASK				0x000001ff
-#define A4XX_VPC_ATTR_TOTALATTR__SHIFT				0
-static inline uint32_t A4XX_VPC_ATTR_TOTALATTR(uint32_t val)
-{
-	return ((val) << A4XX_VPC_ATTR_TOTALATTR__SHIFT) & A4XX_VPC_ATTR_TOTALATTR__MASK;
-}
-#define A4XX_VPC_ATTR_PSIZE					0x00000200
-#define A4XX_VPC_ATTR_THRDASSIGN__MASK				0x00003000
-#define A4XX_VPC_ATTR_THRDASSIGN__SHIFT				12
-static inline uint32_t A4XX_VPC_ATTR_THRDASSIGN(uint32_t val)
-{
-	return ((val) << A4XX_VPC_ATTR_THRDASSIGN__SHIFT) & A4XX_VPC_ATTR_THRDASSIGN__MASK;
-}
-#define A4XX_VPC_ATTR_ENABLE					0x02000000
-
-#define REG_A4XX_VPC_PACK					0x00002141
-#define A4XX_VPC_PACK_NUMBYPASSVAR__MASK			0x000000ff
-#define A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT			0
-static inline uint32_t A4XX_VPC_PACK_NUMBYPASSVAR(uint32_t val)
-{
-	return ((val) << A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT) & A4XX_VPC_PACK_NUMBYPASSVAR__MASK;
-}
-#define A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK			0x0000ff00
-#define A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT			8
-static inline uint32_t A4XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
-{
-	return ((val) << A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
-}
-#define A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK			0x00ff0000
-#define A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT			16
-static inline uint32_t A4XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
-{
-	return ((val) << A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
-}
-
-static inline uint32_t REG_A4XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002142 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002142 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000214a + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000214a + 0x1*i0; }
-
-#define REG_A4XX_VPC_SO_FLUSH_WADDR_3				0x0000216e
-
-#define REG_A4XX_VSC_BIN_SIZE					0x00000c00
-#define A4XX_VSC_BIN_SIZE_WIDTH__MASK				0x0000001f
-#define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
-static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK;
-}
-#define A4XX_VSC_BIN_SIZE_HEIGHT__MASK				0x000003e0
-#define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT				5
-static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK;
-}
-
-#define REG_A4XX_VSC_SIZE_ADDRESS				0x00000c01
-
-#define REG_A4XX_VSC_SIZE_ADDRESS2				0x00000c02
-
-#define REG_A4XX_VSC_DEBUG_ECO_CONTROL				0x00000c03
-
-static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
-#define A4XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
-#define A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
-static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
-{
-	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_X__MASK;
-}
-#define A4XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
-#define A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
-static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
-{
-	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_Y__MASK;
-}
-#define A4XX_VSC_PIPE_CONFIG_REG_W__MASK			0x00f00000
-#define A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
-static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
-{
-	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_W__MASK;
-}
-#define A4XX_VSC_PIPE_CONFIG_REG_H__MASK			0x0f000000
-#define A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT			24
-static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
-{
-	return ((val) << A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_H__MASK;
-}
-
-static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
-
-#define REG_A4XX_VSC_PIPE_PARTIAL_POSN_1			0x00000c41
-
-#define REG_A4XX_VSC_PERFCTR_VSC_SEL_0				0x00000c50
-
-#define REG_A4XX_VSC_PERFCTR_VSC_SEL_1				0x00000c51
-
-#define REG_A4XX_VFD_DEBUG_CONTROL				0x00000e40
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0				0x00000e43
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1				0x00000e44
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2				0x00000e45
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3				0x00000e46
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4				0x00000e47
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5				0x00000e48
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6				0x00000e49
-
-#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7				0x00000e4a
-
-#define REG_A4XX_VGT_CL_INITIATOR				0x000021d0
-
-#define REG_A4XX_VGT_EVENT_INITIATOR				0x000021d9
-
-#define REG_A4XX_VFD_CONTROL_0					0x00002200
-#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x000000ff
-#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
-static inline uint32_t A4XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
-}
-#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK			0x0001fe00
-#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT			9
-static inline uint32_t A4XX_VFD_CONTROL_0_BYPASSATTROVS(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT) & A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK;
-}
-#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK		0x03f00000
-#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT		20
-static inline uint32_t A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
-}
-#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK		0xfc000000
-#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT		26
-static inline uint32_t A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
-}
-
-#define REG_A4XX_VFD_CONTROL_1					0x00002201
-#define A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000ffff
-#define A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
-static inline uint32_t A4XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
-}
-#define A4XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
-#define A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
-static inline uint32_t A4XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A4XX_VFD_CONTROL_1_REGID4VTX__MASK;
-}
-#define A4XX_VFD_CONTROL_1_REGID4INST__MASK			0xff000000
-#define A4XX_VFD_CONTROL_1_REGID4INST__SHIFT			24
-static inline uint32_t A4XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A4XX_VFD_CONTROL_1_REGID4INST__MASK;
-}
-
-#define REG_A4XX_VFD_CONTROL_2					0x00002202
-
-#define REG_A4XX_VFD_CONTROL_3					0x00002203
-#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK			0x0000ff00
-#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT			8
-static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
-}
-#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
-#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
-static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK;
-}
-#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
-#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
-static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
-{
-	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK;
-}
-
-#define REG_A4XX_VFD_CONTROL_4					0x00002204
-
-#define REG_A4XX_VFD_INDEX_OFFSET				0x00002208
-
-static inline uint32_t REG_A4XX_VFD_FETCH(uint32_t i0) { return 0x0000220a + 0x4*i0; }
-
-static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x0000220a + 0x4*i0; }
-#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
-#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
-static inline uint32_t A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
-{
-	return ((val) << A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
-}
-#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK			0x0001ff80
-#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT			7
-static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
-{
-	return ((val) << A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
-}
-#define A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT			0x00080000
-#define A4XX_VFD_FETCH_INSTR_0_INSTANCED			0x00100000
-
-static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; }
-
-static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; }
-#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK			0xffffffff
-#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT			0
-static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val)
-{
-	return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK;
-}
-
-static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; }
-#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK			0x000001ff
-#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT			0
-static inline uint32_t A4XX_VFD_FETCH_INSTR_3_STEPRATE(uint32_t val)
-{
-	return ((val) << A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT) & A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK;
-}
-
-static inline uint32_t REG_A4XX_VFD_DECODE(uint32_t i0) { return 0x0000228a + 0x1*i0; }
-
-static inline uint32_t REG_A4XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000228a + 0x1*i0; }
-#define A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
-#define A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
-static inline uint32_t A4XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
-{
-	return ((val) << A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
-}
-#define A4XX_VFD_DECODE_INSTR_CONSTFILL				0x00000010
-#define A4XX_VFD_DECODE_INSTR_FORMAT__MASK			0x00000fc0
-#define A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT			6
-static inline uint32_t A4XX_VFD_DECODE_INSTR_FORMAT(enum a4xx_vtx_fmt val)
-{
-	return ((val) << A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A4XX_VFD_DECODE_INSTR_FORMAT__MASK;
-}
-#define A4XX_VFD_DECODE_INSTR_REGID__MASK			0x000ff000
-#define A4XX_VFD_DECODE_INSTR_REGID__SHIFT			12
-static inline uint32_t A4XX_VFD_DECODE_INSTR_REGID(uint32_t val)
-{
-	return ((val) << A4XX_VFD_DECODE_INSTR_REGID__SHIFT) & A4XX_VFD_DECODE_INSTR_REGID__MASK;
-}
-#define A4XX_VFD_DECODE_INSTR_INT				0x00100000
-#define A4XX_VFD_DECODE_INSTR_SWAP__MASK			0x00c00000
-#define A4XX_VFD_DECODE_INSTR_SWAP__SHIFT			22
-static inline uint32_t A4XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A4XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A4XX_VFD_DECODE_INSTR_SWAP__MASK;
-}
-#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK			0x1f000000
-#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT			24
-static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
-{
-	return ((val) << A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
-}
-#define A4XX_VFD_DECODE_INSTR_LASTCOMPVALID			0x20000000
-#define A4XX_VFD_DECODE_INSTR_SWITCHNEXT			0x40000000
-
-#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL				0x00000f00
-
-#define REG_A4XX_TPL1_TP_MODE_CONTROL				0x00000f03
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0				0x00000f04
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1				0x00000f05
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2				0x00000f06
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3				0x00000f07
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4				0x00000f08
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5				0x00000f09
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6				0x00000f0a
-
-#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7				0x00000f0b
-
-#define REG_A4XX_TPL1_TP_TEX_OFFSET				0x00002380
-
-#define REG_A4XX_TPL1_TP_TEX_COUNT				0x00002381
-#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK				0x000000ff
-#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT			0
-static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val)
-{
-	return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK;
-}
-#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK				0x0000ff00
-#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT			8
-static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val)
-{
-	return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK;
-}
-#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK				0x00ff0000
-#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT			16
-static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val)
-{
-	return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK;
-}
-#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK				0xff000000
-#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT			24
-static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
-{
-	return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK;
-}
-
-#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR		0x00002384
-
-#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR		0x00002387
-
-#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR		0x0000238a
-
-#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR		0x0000238d
-
-#define REG_A4XX_TPL1_TP_FS_TEX_COUNT				0x000023a0
-
-#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x000023a1
-
-#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR		0x000023a4
-
-#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR			0x000023a5
-
-#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR			0x000023a6
-
-#define REG_A4XX_GRAS_TSE_STATUS				0x00000c80
-
-#define REG_A4XX_GRAS_DEBUG_ECO_CONTROL				0x00000c81
-
-#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0				0x00000c88
-
-#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1				0x00000c89
-
-#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2				0x00000c8a
-
-#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c8b
-
-#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0				0x00000c8c
-
-#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1				0x00000c8d
-
-#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2				0x00000c8e
-
-#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3				0x00000c8f
-
-#define REG_A4XX_GRAS_CL_CLIP_CNTL				0x00002000
-#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00008000
-#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE		0x00010000
-#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
-#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
-
-#define REG_A4XX_GRAS_CLEAR_CNTL				0x00002003
-#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR			0x00000001
-
-#define REG_A4XX_GRAS_CL_GB_CLIP_ADJ				0x00002004
-#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
-#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
-}
-#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK			0x000ffc00
-#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT			10
-static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_XOFFSET_0			0x00002008
-#define A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_XOFFSET_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_XSCALE_0				0x00002009
-#define A4XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_XSCALE_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_XSCALE_0__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_YOFFSET_0			0x0000200a
-#define A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_YOFFSET_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_YSCALE_0				0x0000200b
-#define A4XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_YSCALE_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_YSCALE_0__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0			0x0000200c
-#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
-}
-
-#define REG_A4XX_GRAS_CL_VPORT_ZSCALE_0				0x0000200d
-#define A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
-#define A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
-static inline uint32_t A4XX_GRAS_CL_VPORT_ZSCALE_0(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
-}
-
-#define REG_A4XX_GRAS_SU_POINT_MINMAX				0x00002070
-#define A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
-#define A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
-static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MIN(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
-}
-#define A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
-#define A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
-static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MAX(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
-}
-
-#define REG_A4XX_GRAS_SU_POINT_SIZE				0x00002071
-#define A4XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
-#define A4XX_GRAS_SU_POINT_SIZE__SHIFT				0
-static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
-{
-	return ((((int32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_SIZE__SHIFT) & A4XX_GRAS_SU_POINT_SIZE__MASK;
-}
-
-#define REG_A4XX_GRAS_ALPHA_CONTROL				0x00002073
-#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE		0x00000004
-#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS		0x00000008
-
-#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE			0x00002074
-#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
-#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
-static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
-}
-
-#define REG_A4XX_GRAS_SU_POLY_OFFSET_OFFSET			0x00002075
-#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
-#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
-static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
-}
-
-#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP			0x00002076
-#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK			0xffffffff
-#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT			0
-static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val)
-{
-	return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK;
-}
-
-#define REG_A4XX_GRAS_DEPTH_CONTROL				0x00002077
-#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK			0x00000003
-#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT			0
-static inline uint32_t A4XX_GRAS_DEPTH_CONTROL_FORMAT(enum a4xx_depth_format val)
-{
-	return ((val) << A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT) & A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK;
-}
-
-#define REG_A4XX_GRAS_SU_MODE_CONTROL				0x00002078
-#define A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT			0x00000001
-#define A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK			0x00000002
-#define A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW			0x00000004
-#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK		0x000007f8
-#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT		3
-static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
-{
-	return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
-}
-#define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET			0x00000800
-#define A4XX_GRAS_SU_MODE_CONTROL_MSAA_ENABLE			0x00002000
-#define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS		0x00100000
-
-#define REG_A4XX_GRAS_SC_CONTROL				0x0000207b
-#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK			0x0000000c
-#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT			2
-static inline uint32_t A4XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
-{
-	return ((val) << A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
-}
-#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK			0x00000380
-#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT		7
-static inline uint32_t A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
-}
-#define A4XX_GRAS_SC_CONTROL_MSAA_DISABLE			0x00000800
-#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK			0x0000f000
-#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT			12
-static inline uint32_t A4XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL			0x0000207c
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
-}
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_BR			0x0000207d
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
-}
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000209c
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
-}
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_TL			0x0000209d
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
-}
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_BR			0x0000209e
-#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK;
-}
-#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK;
-}
-
-#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_TL			0x0000209f
-#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK			0x00007fff
-#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT			0
-static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_X(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK;
-}
-#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK			0x7fff0000
-#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT			16
-static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
-{
-	return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK;
-}
-
-#define REG_A4XX_UCHE_CACHE_MODE_CONTROL			0x00000e80
-
-#define REG_A4XX_UCHE_TRAP_BASE_LO				0x00000e83
-
-#define REG_A4XX_UCHE_TRAP_BASE_HI				0x00000e84
-
-#define REG_A4XX_UCHE_CACHE_STATUS				0x00000e88
-
-#define REG_A4XX_UCHE_INVALIDATE0				0x00000e8a
-
-#define REG_A4XX_UCHE_INVALIDATE1				0x00000e8b
-
-#define REG_A4XX_UCHE_CACHE_WAYS_VFD				0x00000e8c
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000e8e
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000e8f
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000e90
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000e91
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000e92
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000e93
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000e94
-
-#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000e95
-
-#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD				0x00000e00
-
-#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL				0x00000e04
-
-#define REG_A4XX_HLSQ_MODE_CONTROL				0x00000e05
-
-#define REG_A4XX_HLSQ_PERF_PIPE_MASK				0x00000e0e
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x00000e06
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x00000e07
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x00000e08
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x00000e09
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x00000e0a
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x00000e0b
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6			0x00000e0c
-
-#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7			0x00000e0d
-
-#define REG_A4XX_HLSQ_CONTROL_0_REG				0x000023c0
-#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000010
-#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
-static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
-}
-#define A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
-#define A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
-#define A4XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
-#define A4XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
-#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK			0x08000000
-#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT		27
-static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
-}
-#define A4XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE		0x10000000
-#define A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE		0x20000000
-#define A4XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE			0x40000000
-#define A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
-
-#define REG_A4XX_HLSQ_CONTROL_1_REG				0x000023c1
-#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x00000040
-#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
-static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
-}
-#define A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
-#define A4XX_HLSQ_CONTROL_1_REG_RESERVED1			0x00000200
-#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK		0x00ff0000
-#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT		16
-static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK;
-}
-#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK		0xff000000
-#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT		24
-static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK;
-}
-
-#define REG_A4XX_HLSQ_CONTROL_2_REG				0x000023c2
-#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
-#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
-static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
-}
-#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000003fc
-#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		2
-static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
-}
-#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK		0x0003fc00
-#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT		10
-static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK;
-}
-#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK		0x03fc0000
-#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT		18
-static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK;
-}
-
-#define REG_A4XX_HLSQ_CONTROL_3_REG				0x000023c3
-#define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK			0x000000ff
-#define A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT			0
-static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
-}
-
-#define REG_A4XX_HLSQ_CONTROL_4_REG				0x000023c4
-
-#define REG_A4XX_HLSQ_VS_CONTROL_REG				0x000023c5
-#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_FS_CONTROL_REG				0x000023c6
-#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_HS_CONTROL_REG				0x000023c7
-#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_DS_CONTROL_REG				0x000023c8
-#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_GS_CONTROL_REG				0x000023c9
-#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_CS_CONTROL_REG				0x000023ca
-#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
-#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT		0
-static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK;
-}
-#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK	0x00007f00
-#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT	8
-static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
-}
-#define A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE			0x00008000
-#define A4XX_HLSQ_CS_CONTROL_REG_ENABLED			0x00010000
-#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK		0x00fe0000
-#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT		17
-static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK;
-}
-#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
-#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT		24
-static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_0				0x000023cd
-#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK			0x00000003
-#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT			0
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK;
-}
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT		2
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK;
-}
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT		12
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK;
-}
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
-#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT		22
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_1				0x000023ce
-#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK			0xffffffff
-#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT			0
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT) & A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_2				0x000023cf
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_3				0x000023d0
-#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK			0xffffffff
-#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT			0
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT) & A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_4				0x000023d1
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_5				0x000023d2
-#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK			0xffffffff
-#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT			0
-static inline uint32_t A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT) & A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_NDRANGE_6				0x000023d3
-
-#define REG_A4XX_HLSQ_CL_CONTROL_0				0x000023d4
-#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK		0x000000ff
-#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT		0
-static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK;
-}
-#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK		0xff000000
-#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT		24
-static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(uint32_t val)
-{
-	return ((val) << A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK;
-}
-
-#define REG_A4XX_HLSQ_CL_CONTROL_1				0x000023d5
-
-#define REG_A4XX_HLSQ_CL_KERNEL_CONST				0x000023d6
-
-#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X				0x000023d7
-
-#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y				0x000023d8
-
-#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z				0x000023d9
-
-#define REG_A4XX_HLSQ_CL_WG_OFFSET				0x000023da
-
-#define REG_A4XX_HLSQ_UPDATE_CONTROL				0x000023db
-
-#define REG_A4XX_PC_BINNING_COMMAND				0x00000d00
-#define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE			0x00000001
-
-#define REG_A4XX_PC_TESSFACTOR_ADDR				0x00000d08
-
-#define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE			0x00000d0c
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_0				0x00000d10
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_1				0x00000d11
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_2				0x00000d12
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_3				0x00000d13
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_4				0x00000d14
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_5				0x00000d15
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_6				0x00000d16
-
-#define REG_A4XX_PC_PERFCTR_PC_SEL_7				0x00000d17
-
-#define REG_A4XX_PC_BIN_BASE					0x000021c0
-
-#define REG_A4XX_PC_VSTREAM_CONTROL				0x000021c2
-#define A4XX_PC_VSTREAM_CONTROL_SIZE__MASK			0x003f0000
-#define A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT			16
-static inline uint32_t A4XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val)
-{
-	return ((val) << A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A4XX_PC_VSTREAM_CONTROL_SIZE__MASK;
-}
-#define A4XX_PC_VSTREAM_CONTROL_N__MASK				0x07c00000
-#define A4XX_PC_VSTREAM_CONTROL_N__SHIFT			22
-static inline uint32_t A4XX_PC_VSTREAM_CONTROL_N(uint32_t val)
-{
-	return ((val) << A4XX_PC_VSTREAM_CONTROL_N__SHIFT) & A4XX_PC_VSTREAM_CONTROL_N__MASK;
-}
-
-#define REG_A4XX_PC_PRIM_VTX_CNTL				0x000021c4
-#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK			0x0000000f
-#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT			0
-static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
-{
-	return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK;
-}
-#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART			0x00100000
-#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
-#define A4XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
-
-#define REG_A4XX_PC_PRIM_VTX_CNTL2				0x000021c5
-#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK	0x00000007
-#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT	0
-static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
-}
-#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK	0x00000038
-#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT	3
-static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
-}
-#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE			0x00000040
-
-#define REG_A4XX_PC_RESTART_INDEX				0x000021c6
-
-#define REG_A4XX_PC_GS_PARAM					0x000021e5
-#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK			0x000003ff
-#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT			0
-static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val)
-{
-	return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK;
-}
-#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK			0x0000f800
-#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT			11
-static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val)
-{
-	return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK;
-}
-#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK				0x01800000
-#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT			23
-static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK;
-}
-#define A4XX_PC_GS_PARAM_LAYER					0x80000000
-
-#define REG_A4XX_PC_HS_PARAM					0x000021e7
-#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK			0x0000003f
-#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT			0
-static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val)
-{
-	return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK;
-}
-#define A4XX_PC_HS_PARAM_SPACING__MASK				0x00600000
-#define A4XX_PC_HS_PARAM_SPACING__SHIFT				21
-static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val)
-{
-	return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK;
-}
-#define A4XX_PC_HS_PARAM_CW					0x00800000
-#define A4XX_PC_HS_PARAM_CONNECTED				0x01000000
-
-#define REG_A4XX_VBIF_VERSION					0x00003000
-
-#define REG_A4XX_VBIF_CLKON					0x00003001
-#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS			0x00000001
-
-#define REG_A4XX_VBIF_ABIT_SORT					0x0000301c
-
-#define REG_A4XX_VBIF_ABIT_SORT_CONF				0x0000301d
-
-#define REG_A4XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
-
-#define REG_A4XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
-
-#define REG_A4XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
-
-#define REG_A4XX_VBIF_IN_WR_LIM_CONF0				0x00003030
-
-#define REG_A4XX_VBIF_IN_WR_LIM_CONF1				0x00003031
-
-#define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
-
-#define REG_A4XX_VBIF_PERF_CNT_EN0				0x000030c0
-
-#define REG_A4XX_VBIF_PERF_CNT_EN1				0x000030c1
-
-#define REG_A4XX_VBIF_PERF_CNT_EN2				0x000030c2
-
-#define REG_A4XX_VBIF_PERF_CNT_EN3				0x000030c3
-
-#define REG_A4XX_VBIF_PERF_CNT_SEL0				0x000030d0
-
-#define REG_A4XX_VBIF_PERF_CNT_SEL1				0x000030d1
-
-#define REG_A4XX_VBIF_PERF_CNT_SEL2				0x000030d2
-
-#define REG_A4XX_VBIF_PERF_CNT_SEL3				0x000030d3
-
-#define REG_A4XX_VBIF_PERF_CNT_LOW0				0x000030d8
-
-#define REG_A4XX_VBIF_PERF_CNT_LOW1				0x000030d9
-
-#define REG_A4XX_VBIF_PERF_CNT_LOW2				0x000030da
-
-#define REG_A4XX_VBIF_PERF_CNT_LOW3				0x000030db
-
-#define REG_A4XX_VBIF_PERF_CNT_HIGH0				0x000030e0
-
-#define REG_A4XX_VBIF_PERF_CNT_HIGH1				0x000030e1
-
-#define REG_A4XX_VBIF_PERF_CNT_HIGH2				0x000030e2
-
-#define REG_A4XX_VBIF_PERF_CNT_HIGH3				0x000030e3
-
-#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
-
-#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
-
-#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
-
-#define REG_A4XX_UNKNOWN_0CC5					0x00000cc5
-
-#define REG_A4XX_UNKNOWN_0CC6					0x00000cc6
-
-#define REG_A4XX_UNKNOWN_0D01					0x00000d01
-
-#define REG_A4XX_UNKNOWN_0E42					0x00000e42
-
-#define REG_A4XX_UNKNOWN_0EC2					0x00000ec2
-
-#define REG_A4XX_UNKNOWN_2001					0x00002001
-
-#define REG_A4XX_UNKNOWN_209B					0x0000209b
-
-#define REG_A4XX_UNKNOWN_20EF					0x000020ef
-
-#define REG_A4XX_UNKNOWN_2152					0x00002152
-
-#define REG_A4XX_UNKNOWN_2153					0x00002153
-
-#define REG_A4XX_UNKNOWN_2154					0x00002154
-
-#define REG_A4XX_UNKNOWN_2155					0x00002155
-
-#define REG_A4XX_UNKNOWN_2156					0x00002156
-
-#define REG_A4XX_UNKNOWN_2157					0x00002157
-
-#define REG_A4XX_UNKNOWN_21C3					0x000021c3
-
-#define REG_A4XX_UNKNOWN_21E6					0x000021e6
-
-#define REG_A4XX_UNKNOWN_2209					0x00002209
-
-#define REG_A4XX_UNKNOWN_22D7					0x000022d7
-
-#define REG_A4XX_UNKNOWN_2352					0x00002352
-
-#define REG_A4XX_TEX_SAMP_0					0x00000000
-#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
-#define A4XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
-#define A4XX_TEX_SAMP_0_XY_MAG__SHIFT				1
-static inline uint32_t A4XX_TEX_SAMP_0_XY_MAG(enum a4xx_tex_filter val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_XY_MAG__SHIFT) & A4XX_TEX_SAMP_0_XY_MAG__MASK;
-}
-#define A4XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
-#define A4XX_TEX_SAMP_0_XY_MIN__SHIFT				3
-static inline uint32_t A4XX_TEX_SAMP_0_XY_MIN(enum a4xx_tex_filter val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_XY_MIN__SHIFT) & A4XX_TEX_SAMP_0_XY_MIN__MASK;
-}
-#define A4XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
-#define A4XX_TEX_SAMP_0_WRAP_S__SHIFT				5
-static inline uint32_t A4XX_TEX_SAMP_0_WRAP_S(enum a4xx_tex_clamp val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_WRAP_S__SHIFT) & A4XX_TEX_SAMP_0_WRAP_S__MASK;
-}
-#define A4XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
-#define A4XX_TEX_SAMP_0_WRAP_T__SHIFT				8
-static inline uint32_t A4XX_TEX_SAMP_0_WRAP_T(enum a4xx_tex_clamp val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_WRAP_T__SHIFT) & A4XX_TEX_SAMP_0_WRAP_T__MASK;
-}
-#define A4XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
-#define A4XX_TEX_SAMP_0_WRAP_R__SHIFT				11
-static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK;
-}
-#define A4XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
-#define A4XX_TEX_SAMP_0_ANISO__SHIFT				14
-static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
-{
-	return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
-}
-#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
-#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
-static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
-{
-	return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
-}
-
-#define REG_A4XX_TEX_SAMP_1					0x00000001
-#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
-#define A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
-static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
-}
-#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
-#define A4XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
-#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
-#define A4XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
-#define A4XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
-static inline uint32_t A4XX_TEX_SAMP_1_MAX_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A4XX_TEX_SAMP_1_MAX_LOD__MASK;
-}
-#define A4XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
-#define A4XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
-static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A4XX_TEX_SAMP_1_MIN_LOD__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_0					0x00000000
-#define A4XX_TEX_CONST_0_TILED					0x00000001
-#define A4XX_TEX_CONST_0_SRGB					0x00000004
-#define A4XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
-#define A4XX_TEX_CONST_0_SWIZ_X__SHIFT				4
-static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val)
-{
-	return ((val) << A4XX_TEX_CONST_0_SWIZ_X__SHIFT) & A4XX_TEX_CONST_0_SWIZ_X__MASK;
-}
-#define A4XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
-#define A4XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
-static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Y(enum a4xx_tex_swiz val)
-{
-	return ((val) << A4XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Y__MASK;
-}
-#define A4XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
-#define A4XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
-static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Z(enum a4xx_tex_swiz val)
-{
-	return ((val) << A4XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Z__MASK;
-}
-#define A4XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
-#define A4XX_TEX_CONST_0_SWIZ_W__SHIFT				13
-static inline uint32_t A4XX_TEX_CONST_0_SWIZ_W(enum a4xx_tex_swiz val)
-{
-	return ((val) << A4XX_TEX_CONST_0_SWIZ_W__SHIFT) & A4XX_TEX_CONST_0_SWIZ_W__MASK;
-}
-#define A4XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
-#define A4XX_TEX_CONST_0_MIPLVLS__SHIFT				16
-static inline uint32_t A4XX_TEX_CONST_0_MIPLVLS(uint32_t val)
-{
-	return ((val) << A4XX_TEX_CONST_0_MIPLVLS__SHIFT) & A4XX_TEX_CONST_0_MIPLVLS__MASK;
-}
-#define A4XX_TEX_CONST_0_FMT__MASK				0x1fc00000
-#define A4XX_TEX_CONST_0_FMT__SHIFT				22
-static inline uint32_t A4XX_TEX_CONST_0_FMT(enum a4xx_tex_fmt val)
-{
-	return ((val) << A4XX_TEX_CONST_0_FMT__SHIFT) & A4XX_TEX_CONST_0_FMT__MASK;
-}
-#define A4XX_TEX_CONST_0_TYPE__MASK				0x60000000
-#define A4XX_TEX_CONST_0_TYPE__SHIFT				29
-static inline uint32_t A4XX_TEX_CONST_0_TYPE(enum a4xx_tex_type val)
-{
-	return ((val) << A4XX_TEX_CONST_0_TYPE__SHIFT) & A4XX_TEX_CONST_0_TYPE__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_1					0x00000001
-#define A4XX_TEX_CONST_1_HEIGHT__MASK				0x00007fff
-#define A4XX_TEX_CONST_1_HEIGHT__SHIFT				0
-static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
-}
-#define A4XX_TEX_CONST_1_WIDTH__MASK				0x3fff8000
-#define A4XX_TEX_CONST_1_WIDTH__SHIFT				15
-static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
-{
-	return ((val) << A4XX_TEX_CONST_1_WIDTH__SHIFT) & A4XX_TEX_CONST_1_WIDTH__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_2					0x00000002
-#define A4XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
-#define A4XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
-static inline uint32_t A4XX_TEX_CONST_2_FETCHSIZE(enum a4xx_tex_fetchsize val)
-{
-	return ((val) << A4XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A4XX_TEX_CONST_2_FETCHSIZE__MASK;
-}
-#define A4XX_TEX_CONST_2_PITCH__MASK				0x3ffffe00
-#define A4XX_TEX_CONST_2_PITCH__SHIFT				9
-static inline uint32_t A4XX_TEX_CONST_2_PITCH(uint32_t val)
-{
-	return ((val) << A4XX_TEX_CONST_2_PITCH__SHIFT) & A4XX_TEX_CONST_2_PITCH__MASK;
-}
-#define A4XX_TEX_CONST_2_SWAP__MASK				0xc0000000
-#define A4XX_TEX_CONST_2_SWAP__SHIFT				30
-static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A4XX_TEX_CONST_2_SWAP__SHIFT) & A4XX_TEX_CONST_2_SWAP__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_3					0x00000003
-#define A4XX_TEX_CONST_3_LAYERSZ__MASK				0x00003fff
-#define A4XX_TEX_CONST_3_LAYERSZ__SHIFT				0
-static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK;
-}
-#define A4XX_TEX_CONST_3_DEPTH__MASK				0x7ffc0000
-#define A4XX_TEX_CONST_3_DEPTH__SHIFT				18
-static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val)
-{
-	return ((val) << A4XX_TEX_CONST_3_DEPTH__SHIFT) & A4XX_TEX_CONST_3_DEPTH__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_4					0x00000004
-#define A4XX_TEX_CONST_4_LAYERSZ__MASK				0x0000000f
-#define A4XX_TEX_CONST_4_LAYERSZ__SHIFT				0
-static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK;
-}
-#define A4XX_TEX_CONST_4_BASE__MASK				0xffffffe0
-#define A4XX_TEX_CONST_4_BASE__SHIFT				5
-static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK;
-}
-
-#define REG_A4XX_TEX_CONST_5					0x00000005
-
-#define REG_A4XX_TEX_CONST_6					0x00000006
-
-#define REG_A4XX_TEX_CONST_7					0x00000007
-
-#define REG_A4XX_SSBO_0_0					0x00000000
-#define A4XX_SSBO_0_0_BASE__MASK				0xffffffe0
-#define A4XX_SSBO_0_0_BASE__SHIFT				5
-static inline uint32_t A4XX_SSBO_0_0_BASE(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A4XX_SSBO_0_0_BASE__SHIFT) & A4XX_SSBO_0_0_BASE__MASK;
-}
-
-#define REG_A4XX_SSBO_0_1					0x00000001
-#define A4XX_SSBO_0_1_PITCH__MASK				0x003fffff
-#define A4XX_SSBO_0_1_PITCH__SHIFT				0
-static inline uint32_t A4XX_SSBO_0_1_PITCH(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_0_1_PITCH__SHIFT) & A4XX_SSBO_0_1_PITCH__MASK;
-}
-
-#define REG_A4XX_SSBO_0_2					0x00000002
-#define A4XX_SSBO_0_2_ARRAY_PITCH__MASK				0x03fff000
-#define A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT			12
-static inline uint32_t A4XX_SSBO_0_2_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A4XX_SSBO_0_2_ARRAY_PITCH__MASK;
-}
-
-#define REG_A4XX_SSBO_0_3					0x00000003
-#define A4XX_SSBO_0_3_CPP__MASK					0x0000003f
-#define A4XX_SSBO_0_3_CPP__SHIFT				0
-static inline uint32_t A4XX_SSBO_0_3_CPP(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_0_3_CPP__SHIFT) & A4XX_SSBO_0_3_CPP__MASK;
-}
-
-#define REG_A4XX_SSBO_1_0					0x00000000
-#define A4XX_SSBO_1_0_CPP__MASK					0x0000001f
-#define A4XX_SSBO_1_0_CPP__SHIFT				0
-static inline uint32_t A4XX_SSBO_1_0_CPP(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_1_0_CPP__SHIFT) & A4XX_SSBO_1_0_CPP__MASK;
-}
-#define A4XX_SSBO_1_0_FMT__MASK					0x0000ff00
-#define A4XX_SSBO_1_0_FMT__SHIFT				8
-static inline uint32_t A4XX_SSBO_1_0_FMT(enum a4xx_color_fmt val)
-{
-	return ((val) << A4XX_SSBO_1_0_FMT__SHIFT) & A4XX_SSBO_1_0_FMT__MASK;
-}
-#define A4XX_SSBO_1_0_WIDTH__MASK				0xffff0000
-#define A4XX_SSBO_1_0_WIDTH__SHIFT				16
-static inline uint32_t A4XX_SSBO_1_0_WIDTH(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_1_0_WIDTH__SHIFT) & A4XX_SSBO_1_0_WIDTH__MASK;
-}
-
-#define REG_A4XX_SSBO_1_1					0x00000001
-#define A4XX_SSBO_1_1_HEIGHT__MASK				0x0000ffff
-#define A4XX_SSBO_1_1_HEIGHT__SHIFT				0
-static inline uint32_t A4XX_SSBO_1_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_1_1_HEIGHT__SHIFT) & A4XX_SSBO_1_1_HEIGHT__MASK;
-}
-#define A4XX_SSBO_1_1_DEPTH__MASK				0xffff0000
-#define A4XX_SSBO_1_1_DEPTH__SHIFT				16
-static inline uint32_t A4XX_SSBO_1_1_DEPTH(uint32_t val)
-{
-	return ((val) << A4XX_SSBO_1_1_DEPTH__SHIFT) & A4XX_SSBO_1_1_DEPTH__MASK;
-}
-
-
-#endif /* A4XX_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -101,13 +101,13 @@
 	fd_hw_query_init(pctx);
 
 	fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt");
 
 	fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt");
 
 	fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
 
 	fd_context_setup_common_vbos(&fd4_ctx->base);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,7 +31,7 @@
 
 #include "freedreno_context.h"
 
-#include "ir3_shader.h"
+#include "ir3/ir3_shader.h"
 
 struct fd4_context {
 	struct fd_context base;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -48,7 +48,7 @@
  * sizedwords:     size of const value buffer
  */
 static void
-fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+fd4_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
@@ -86,7 +86,7 @@
 }
 
 static void
-fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+fd4_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
 {
 	uint32_t anum = align(num, 4);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.h	2019-03-31 23:16:37.000000000 +0000
@@ -32,7 +32,7 @@
 #include "freedreno_context.h"
 #include "fd4_format.h"
 #include "fd4_program.h"
-#include "ir3_shader.h"
+#include "ir3_gallium.h"
 
 struct fd_ringbuffer;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -582,7 +582,7 @@
 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
 		if (!pipe->bo) {
 			pipe->bo = fd_bo_new(ctx->dev, 0x40000,
-					DRM_FREEDRENO_GEM_TYPE_KMEM);
+					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
 		}
 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
 	}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -39,7 +39,7 @@
 
 static struct ir3_shader *
 create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
-		enum shader_t type)
+		gl_shader_stage type)
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct ir3_compiler *compiler = ctx->screen->compiler;
@@ -50,7 +50,7 @@
 fd4_fp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT);
 }
 
 static void
@@ -64,7 +64,7 @@
 fd4_vp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX);
 }
 
 static void
@@ -101,7 +101,7 @@
 		OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) |
 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
 	} else {
-		OUT_RELOC(ring, so->bo, 0,
+		OUT_RELOCD(ring, so->bo, 0,
 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
 	}
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.h	2019-03-31 23:16:37.000000000 +0000
@@ -29,7 +29,8 @@
 
 #include "pipe/p_context.h"
 #include "freedreno_context.h"
-#include "ir3_shader.h"
+
+#include "ir3/ir3_shader.h"
 
 struct fd4_emit;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,7 +30,8 @@
 #include "fd4_screen.h"
 #include "fd4_context.h"
 #include "fd4_format.h"
-#include "ir3_compiler.h"
+
+#include "ir3/ir3_compiler.h"
 
 static boolean
 fd4_screen_is_format_supported(struct pipe_screen *pscreen,
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,5226 +0,0 @@
-#ifndef A5XX_XML
-#define A5XX_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum a5xx_color_fmt {
-	RB5_A8_UNORM = 2,
-	RB5_R8_UNORM = 3,
-	RB5_R8_SNORM = 4,
-	RB5_R8_UINT = 5,
-	RB5_R8_SINT = 6,
-	RB5_R4G4B4A4_UNORM = 8,
-	RB5_R5G5B5A1_UNORM = 10,
-	RB5_R5G6B5_UNORM = 14,
-	RB5_R8G8_UNORM = 15,
-	RB5_R8G8_SNORM = 16,
-	RB5_R8G8_UINT = 17,
-	RB5_R8G8_SINT = 18,
-	RB5_R16_UNORM = 21,
-	RB5_R16_SNORM = 22,
-	RB5_R16_FLOAT = 23,
-	RB5_R16_UINT = 24,
-	RB5_R16_SINT = 25,
-	RB5_R8G8B8A8_UNORM = 48,
-	RB5_R8G8B8_UNORM = 49,
-	RB5_R8G8B8A8_SNORM = 50,
-	RB5_R8G8B8A8_UINT = 51,
-	RB5_R8G8B8A8_SINT = 52,
-	RB5_R10G10B10A2_UNORM = 55,
-	RB5_R10G10B10A2_UINT = 58,
-	RB5_R11G11B10_FLOAT = 66,
-	RB5_R16G16_UNORM = 67,
-	RB5_R16G16_SNORM = 68,
-	RB5_R16G16_FLOAT = 69,
-	RB5_R16G16_UINT = 70,
-	RB5_R16G16_SINT = 71,
-	RB5_R32_FLOAT = 74,
-	RB5_R32_UINT = 75,
-	RB5_R32_SINT = 76,
-	RB5_R16G16B16A16_UNORM = 96,
-	RB5_R16G16B16A16_SNORM = 97,
-	RB5_R16G16B16A16_FLOAT = 98,
-	RB5_R16G16B16A16_UINT = 99,
-	RB5_R16G16B16A16_SINT = 100,
-	RB5_R32G32_FLOAT = 103,
-	RB5_R32G32_UINT = 104,
-	RB5_R32G32_SINT = 105,
-	RB5_R32G32B32A32_FLOAT = 130,
-	RB5_R32G32B32A32_UINT = 131,
-	RB5_R32G32B32A32_SINT = 132,
-};
-
-enum a5xx_tile_mode {
-	TILE5_LINEAR = 0,
-	TILE5_2 = 2,
-	TILE5_3 = 3,
-};
-
-enum a5xx_vtx_fmt {
-	VFMT5_8_UNORM = 3,
-	VFMT5_8_SNORM = 4,
-	VFMT5_8_UINT = 5,
-	VFMT5_8_SINT = 6,
-	VFMT5_8_8_UNORM = 15,
-	VFMT5_8_8_SNORM = 16,
-	VFMT5_8_8_UINT = 17,
-	VFMT5_8_8_SINT = 18,
-	VFMT5_16_UNORM = 21,
-	VFMT5_16_SNORM = 22,
-	VFMT5_16_FLOAT = 23,
-	VFMT5_16_UINT = 24,
-	VFMT5_16_SINT = 25,
-	VFMT5_8_8_8_UNORM = 33,
-	VFMT5_8_8_8_SNORM = 34,
-	VFMT5_8_8_8_UINT = 35,
-	VFMT5_8_8_8_SINT = 36,
-	VFMT5_8_8_8_8_UNORM = 48,
-	VFMT5_8_8_8_8_SNORM = 50,
-	VFMT5_8_8_8_8_UINT = 51,
-	VFMT5_8_8_8_8_SINT = 52,
-	VFMT5_10_10_10_2_UNORM = 54,
-	VFMT5_10_10_10_2_SNORM = 57,
-	VFMT5_10_10_10_2_UINT = 58,
-	VFMT5_10_10_10_2_SINT = 59,
-	VFMT5_11_11_10_FLOAT = 66,
-	VFMT5_16_16_UNORM = 67,
-	VFMT5_16_16_SNORM = 68,
-	VFMT5_16_16_FLOAT = 69,
-	VFMT5_16_16_UINT = 70,
-	VFMT5_16_16_SINT = 71,
-	VFMT5_32_UNORM = 72,
-	VFMT5_32_SNORM = 73,
-	VFMT5_32_FLOAT = 74,
-	VFMT5_32_UINT = 75,
-	VFMT5_32_SINT = 76,
-	VFMT5_32_FIXED = 77,
-	VFMT5_16_16_16_UNORM = 88,
-	VFMT5_16_16_16_SNORM = 89,
-	VFMT5_16_16_16_FLOAT = 90,
-	VFMT5_16_16_16_UINT = 91,
-	VFMT5_16_16_16_SINT = 92,
-	VFMT5_16_16_16_16_UNORM = 96,
-	VFMT5_16_16_16_16_SNORM = 97,
-	VFMT5_16_16_16_16_FLOAT = 98,
-	VFMT5_16_16_16_16_UINT = 99,
-	VFMT5_16_16_16_16_SINT = 100,
-	VFMT5_32_32_UNORM = 101,
-	VFMT5_32_32_SNORM = 102,
-	VFMT5_32_32_FLOAT = 103,
-	VFMT5_32_32_UINT = 104,
-	VFMT5_32_32_SINT = 105,
-	VFMT5_32_32_FIXED = 106,
-	VFMT5_32_32_32_UNORM = 112,
-	VFMT5_32_32_32_SNORM = 113,
-	VFMT5_32_32_32_UINT = 114,
-	VFMT5_32_32_32_SINT = 115,
-	VFMT5_32_32_32_FLOAT = 116,
-	VFMT5_32_32_32_FIXED = 117,
-	VFMT5_32_32_32_32_UNORM = 128,
-	VFMT5_32_32_32_32_SNORM = 129,
-	VFMT5_32_32_32_32_FLOAT = 130,
-	VFMT5_32_32_32_32_UINT = 131,
-	VFMT5_32_32_32_32_SINT = 132,
-	VFMT5_32_32_32_32_FIXED = 133,
-};
-
-enum a5xx_tex_fmt {
-	TFMT5_A8_UNORM = 2,
-	TFMT5_8_UNORM = 3,
-	TFMT5_8_SNORM = 4,
-	TFMT5_8_UINT = 5,
-	TFMT5_8_SINT = 6,
-	TFMT5_4_4_4_4_UNORM = 8,
-	TFMT5_5_5_5_1_UNORM = 10,
-	TFMT5_5_6_5_UNORM = 14,
-	TFMT5_8_8_UNORM = 15,
-	TFMT5_8_8_SNORM = 16,
-	TFMT5_8_8_UINT = 17,
-	TFMT5_8_8_SINT = 18,
-	TFMT5_L8_A8_UNORM = 19,
-	TFMT5_16_UNORM = 21,
-	TFMT5_16_SNORM = 22,
-	TFMT5_16_FLOAT = 23,
-	TFMT5_16_UINT = 24,
-	TFMT5_16_SINT = 25,
-	TFMT5_8_8_8_8_UNORM = 48,
-	TFMT5_8_8_8_UNORM = 49,
-	TFMT5_8_8_8_8_SNORM = 50,
-	TFMT5_8_8_8_8_UINT = 51,
-	TFMT5_8_8_8_8_SINT = 52,
-	TFMT5_9_9_9_E5_FLOAT = 53,
-	TFMT5_10_10_10_2_UNORM = 54,
-	TFMT5_10_10_10_2_UINT = 58,
-	TFMT5_11_11_10_FLOAT = 66,
-	TFMT5_16_16_UNORM = 67,
-	TFMT5_16_16_SNORM = 68,
-	TFMT5_16_16_FLOAT = 69,
-	TFMT5_16_16_UINT = 70,
-	TFMT5_16_16_SINT = 71,
-	TFMT5_32_FLOAT = 74,
-	TFMT5_32_UINT = 75,
-	TFMT5_32_SINT = 76,
-	TFMT5_16_16_16_16_UNORM = 96,
-	TFMT5_16_16_16_16_SNORM = 97,
-	TFMT5_16_16_16_16_FLOAT = 98,
-	TFMT5_16_16_16_16_UINT = 99,
-	TFMT5_16_16_16_16_SINT = 100,
-	TFMT5_32_32_FLOAT = 103,
-	TFMT5_32_32_UINT = 104,
-	TFMT5_32_32_SINT = 105,
-	TFMT5_32_32_32_UINT = 114,
-	TFMT5_32_32_32_SINT = 115,
-	TFMT5_32_32_32_FLOAT = 116,
-	TFMT5_32_32_32_32_FLOAT = 130,
-	TFMT5_32_32_32_32_UINT = 131,
-	TFMT5_32_32_32_32_SINT = 132,
-	TFMT5_X8Z24_UNORM = 160,
-	TFMT5_ETC2_RG11_UNORM = 171,
-	TFMT5_ETC2_RG11_SNORM = 172,
-	TFMT5_ETC2_R11_UNORM = 173,
-	TFMT5_ETC2_R11_SNORM = 174,
-	TFMT5_ETC1 = 175,
-	TFMT5_ETC2_RGB8 = 176,
-	TFMT5_ETC2_RGBA8 = 177,
-	TFMT5_ETC2_RGB8A1 = 178,
-	TFMT5_DXT1 = 179,
-	TFMT5_DXT3 = 180,
-	TFMT5_DXT5 = 181,
-	TFMT5_RGTC1_UNORM = 183,
-	TFMT5_RGTC1_SNORM = 184,
-	TFMT5_RGTC2_UNORM = 187,
-	TFMT5_RGTC2_SNORM = 188,
-	TFMT5_BPTC_UFLOAT = 190,
-	TFMT5_BPTC_FLOAT = 191,
-	TFMT5_BPTC = 192,
-	TFMT5_ASTC_4x4 = 193,
-	TFMT5_ASTC_5x4 = 194,
-	TFMT5_ASTC_5x5 = 195,
-	TFMT5_ASTC_6x5 = 196,
-	TFMT5_ASTC_6x6 = 197,
-	TFMT5_ASTC_8x5 = 198,
-	TFMT5_ASTC_8x6 = 199,
-	TFMT5_ASTC_8x8 = 200,
-	TFMT5_ASTC_10x5 = 201,
-	TFMT5_ASTC_10x6 = 202,
-	TFMT5_ASTC_10x8 = 203,
-	TFMT5_ASTC_10x10 = 204,
-	TFMT5_ASTC_12x10 = 205,
-	TFMT5_ASTC_12x12 = 206,
-};
-
-enum a5xx_tex_fetchsize {
-	TFETCH5_1_BYTE = 0,
-	TFETCH5_2_BYTE = 1,
-	TFETCH5_4_BYTE = 2,
-	TFETCH5_8_BYTE = 3,
-	TFETCH5_16_BYTE = 4,
-};
-
-enum a5xx_depth_format {
-	DEPTH5_NONE = 0,
-	DEPTH5_16 = 1,
-	DEPTH5_24_8 = 2,
-	DEPTH5_32 = 4,
-};
-
-enum a5xx_blit_buf {
-	BLIT_MRT0 = 0,
-	BLIT_MRT1 = 1,
-	BLIT_MRT2 = 2,
-	BLIT_MRT3 = 3,
-	BLIT_MRT4 = 4,
-	BLIT_MRT5 = 5,
-	BLIT_MRT6 = 6,
-	BLIT_MRT7 = 7,
-	BLIT_ZS = 8,
-	BLIT_S = 9,
-};
-
-enum a5xx_cp_perfcounter_select {
-	PERF_CP_ALWAYS_COUNT = 0,
-	PERF_CP_BUSY_GFX_CORE_IDLE = 1,
-	PERF_CP_BUSY_CYCLES = 2,
-	PERF_CP_PFP_IDLE = 3,
-	PERF_CP_PFP_BUSY_WORKING = 4,
-	PERF_CP_PFP_STALL_CYCLES_ANY = 5,
-	PERF_CP_PFP_STARVE_CYCLES_ANY = 6,
-	PERF_CP_PFP_ICACHE_MISS = 7,
-	PERF_CP_PFP_ICACHE_HIT = 8,
-	PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
-	PERF_CP_ME_BUSY_WORKING = 10,
-	PERF_CP_ME_IDLE = 11,
-	PERF_CP_ME_STARVE_CYCLES_ANY = 12,
-	PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13,
-	PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14,
-	PERF_CP_ME_FIFO_FULL_ME_BUSY = 15,
-	PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16,
-	PERF_CP_ME_STALL_CYCLES_ANY = 17,
-	PERF_CP_ME_ICACHE_MISS = 18,
-	PERF_CP_ME_ICACHE_HIT = 19,
-	PERF_CP_NUM_PREEMPTIONS = 20,
-	PERF_CP_PREEMPTION_REACTION_DELAY = 21,
-	PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22,
-	PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23,
-	PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24,
-	PERF_CP_PREDICATED_DRAWS_KILLED = 25,
-	PERF_CP_MODE_SWITCH = 26,
-	PERF_CP_ZPASS_DONE = 27,
-	PERF_CP_CONTEXT_DONE = 28,
-	PERF_CP_CACHE_FLUSH = 29,
-	PERF_CP_LONG_PREEMPTIONS = 30,
-};
-
-enum a5xx_rbbm_perfcounter_select {
-	PERF_RBBM_ALWAYS_COUNT = 0,
-	PERF_RBBM_ALWAYS_ON = 1,
-	PERF_RBBM_TSE_BUSY = 2,
-	PERF_RBBM_RAS_BUSY = 3,
-	PERF_RBBM_PC_DCALL_BUSY = 4,
-	PERF_RBBM_PC_VSD_BUSY = 5,
-	PERF_RBBM_STATUS_MASKED = 6,
-	PERF_RBBM_COM_BUSY = 7,
-	PERF_RBBM_DCOM_BUSY = 8,
-	PERF_RBBM_VBIF_BUSY = 9,
-	PERF_RBBM_VSC_BUSY = 10,
-	PERF_RBBM_TESS_BUSY = 11,
-	PERF_RBBM_UCHE_BUSY = 12,
-	PERF_RBBM_HLSQ_BUSY = 13,
-};
-
-enum a5xx_pc_perfcounter_select {
-	PERF_PC_BUSY_CYCLES = 0,
-	PERF_PC_WORKING_CYCLES = 1,
-	PERF_PC_STALL_CYCLES_VFD = 2,
-	PERF_PC_STALL_CYCLES_TSE = 3,
-	PERF_PC_STALL_CYCLES_VPC = 4,
-	PERF_PC_STALL_CYCLES_UCHE = 5,
-	PERF_PC_STALL_CYCLES_TESS = 6,
-	PERF_PC_STALL_CYCLES_TSE_ONLY = 7,
-	PERF_PC_STALL_CYCLES_VPC_ONLY = 8,
-	PERF_PC_PASS1_TF_STALL_CYCLES = 9,
-	PERF_PC_STARVE_CYCLES_FOR_INDEX = 10,
-	PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11,
-	PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12,
-	PERF_PC_STARVE_CYCLES_FOR_POSITION = 13,
-	PERF_PC_STARVE_CYCLES_DI = 14,
-	PERF_PC_VIS_STREAMS_LOADED = 15,
-	PERF_PC_INSTANCES = 16,
-	PERF_PC_VPC_PRIMITIVES = 17,
-	PERF_PC_DEAD_PRIM = 18,
-	PERF_PC_LIVE_PRIM = 19,
-	PERF_PC_VERTEX_HITS = 20,
-	PERF_PC_IA_VERTICES = 21,
-	PERF_PC_IA_PRIMITIVES = 22,
-	PERF_PC_GS_PRIMITIVES = 23,
-	PERF_PC_HS_INVOCATIONS = 24,
-	PERF_PC_DS_INVOCATIONS = 25,
-	PERF_PC_VS_INVOCATIONS = 26,
-	PERF_PC_GS_INVOCATIONS = 27,
-	PERF_PC_DS_PRIMITIVES = 28,
-	PERF_PC_VPC_POS_DATA_TRANSACTION = 29,
-	PERF_PC_3D_DRAWCALLS = 30,
-	PERF_PC_2D_DRAWCALLS = 31,
-	PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32,
-	PERF_TESS_BUSY_CYCLES = 33,
-	PERF_TESS_WORKING_CYCLES = 34,
-	PERF_TESS_STALL_CYCLES_PC = 35,
-	PERF_TESS_STARVE_CYCLES_PC = 36,
-};
-
-enum a5xx_vfd_perfcounter_select {
-	PERF_VFD_BUSY_CYCLES = 0,
-	PERF_VFD_STALL_CYCLES_UCHE = 1,
-	PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2,
-	PERF_VFD_STALL_CYCLES_MISS_VB = 3,
-	PERF_VFD_STALL_CYCLES_MISS_Q = 4,
-	PERF_VFD_STALL_CYCLES_SP_INFO = 5,
-	PERF_VFD_STALL_CYCLES_SP_ATTR = 6,
-	PERF_VFD_STALL_CYCLES_VFDP_VB = 7,
-	PERF_VFD_STALL_CYCLES_VFDP_Q = 8,
-	PERF_VFD_DECODER_PACKER_STALL = 9,
-	PERF_VFD_STARVE_CYCLES_UCHE = 10,
-	PERF_VFD_RBUFFER_FULL = 11,
-	PERF_VFD_ATTR_INFO_FIFO_FULL = 12,
-	PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13,
-	PERF_VFD_NUM_ATTRIBUTES = 14,
-	PERF_VFD_INSTRUCTIONS = 15,
-	PERF_VFD_UPPER_SHADER_FIBERS = 16,
-	PERF_VFD_LOWER_SHADER_FIBERS = 17,
-	PERF_VFD_MODE_0_FIBERS = 18,
-	PERF_VFD_MODE_1_FIBERS = 19,
-	PERF_VFD_MODE_2_FIBERS = 20,
-	PERF_VFD_MODE_3_FIBERS = 21,
-	PERF_VFD_MODE_4_FIBERS = 22,
-	PERF_VFD_TOTAL_VERTICES = 23,
-	PERF_VFD_NUM_ATTR_MISS = 24,
-	PERF_VFD_1_BURST_REQ = 25,
-	PERF_VFDP_STALL_CYCLES_VFD = 26,
-	PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27,
-	PERF_VFDP_STALL_CYCLES_VFD_PROG = 28,
-	PERF_VFDP_STARVE_CYCLES_PC = 29,
-	PERF_VFDP_VS_STAGE_32_WAVES = 30,
-};
-
-enum a5xx_hlsq_perfcounter_select {
-	PERF_HLSQ_BUSY_CYCLES = 0,
-	PERF_HLSQ_STALL_CYCLES_UCHE = 1,
-	PERF_HLSQ_STALL_CYCLES_SP_STATE = 2,
-	PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3,
-	PERF_HLSQ_UCHE_LATENCY_CYCLES = 4,
-	PERF_HLSQ_UCHE_LATENCY_COUNT = 5,
-	PERF_HLSQ_FS_STAGE_32_WAVES = 6,
-	PERF_HLSQ_FS_STAGE_64_WAVES = 7,
-	PERF_HLSQ_QUADS = 8,
-	PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9,
-	PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10,
-	PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11,
-	PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12,
-	PERF_HLSQ_CS_INVOCATIONS = 13,
-	PERF_HLSQ_COMPUTE_DRAWCALLS = 14,
-};
-
-enum a5xx_vpc_perfcounter_select {
-	PERF_VPC_BUSY_CYCLES = 0,
-	PERF_VPC_WORKING_CYCLES = 1,
-	PERF_VPC_STALL_CYCLES_UCHE = 2,
-	PERF_VPC_STALL_CYCLES_VFD_WACK = 3,
-	PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4,
-	PERF_VPC_STALL_CYCLES_PC = 5,
-	PERF_VPC_STALL_CYCLES_SP_LM = 6,
-	PERF_VPC_POS_EXPORT_STALL_CYCLES = 7,
-	PERF_VPC_STARVE_CYCLES_SP = 8,
-	PERF_VPC_STARVE_CYCLES_LRZ = 9,
-	PERF_VPC_PC_PRIMITIVES = 10,
-	PERF_VPC_SP_COMPONENTS = 11,
-	PERF_VPC_SP_LM_PRIMITIVES = 12,
-	PERF_VPC_SP_LM_COMPONENTS = 13,
-	PERF_VPC_SP_LM_DWORDS = 14,
-	PERF_VPC_STREAMOUT_COMPONENTS = 15,
-	PERF_VPC_GRANT_PHASES = 16,
-};
-
-enum a5xx_tse_perfcounter_select {
-	PERF_TSE_BUSY_CYCLES = 0,
-	PERF_TSE_CLIPPING_CYCLES = 1,
-	PERF_TSE_STALL_CYCLES_RAS = 2,
-	PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3,
-	PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4,
-	PERF_TSE_STARVE_CYCLES_PC = 5,
-	PERF_TSE_INPUT_PRIM = 6,
-	PERF_TSE_INPUT_NULL_PRIM = 7,
-	PERF_TSE_TRIVAL_REJ_PRIM = 8,
-	PERF_TSE_CLIPPED_PRIM = 9,
-	PERF_TSE_ZERO_AREA_PRIM = 10,
-	PERF_TSE_FACENESS_CULLED_PRIM = 11,
-	PERF_TSE_ZERO_PIXEL_PRIM = 12,
-	PERF_TSE_OUTPUT_NULL_PRIM = 13,
-	PERF_TSE_OUTPUT_VISIBLE_PRIM = 14,
-	PERF_TSE_CINVOCATION = 15,
-	PERF_TSE_CPRIMITIVES = 16,
-	PERF_TSE_2D_INPUT_PRIM = 17,
-	PERF_TSE_2D_ALIVE_CLCLES = 18,
-};
-
-enum a5xx_ras_perfcounter_select {
-	PERF_RAS_BUSY_CYCLES = 0,
-	PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1,
-	PERF_RAS_STALL_CYCLES_LRZ = 2,
-	PERF_RAS_STARVE_CYCLES_TSE = 3,
-	PERF_RAS_SUPER_TILES = 4,
-	PERF_RAS_8X4_TILES = 5,
-	PERF_RAS_MASKGEN_ACTIVE = 6,
-	PERF_RAS_FULLY_COVERED_SUPER_TILES = 7,
-	PERF_RAS_FULLY_COVERED_8X4_TILES = 8,
-	PERF_RAS_PRIM_KILLED_INVISILBE = 9,
-};
-
-enum a5xx_lrz_perfcounter_select {
-	PERF_LRZ_BUSY_CYCLES = 0,
-	PERF_LRZ_STARVE_CYCLES_RAS = 1,
-	PERF_LRZ_STALL_CYCLES_RB = 2,
-	PERF_LRZ_STALL_CYCLES_VSC = 3,
-	PERF_LRZ_STALL_CYCLES_VPC = 4,
-	PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5,
-	PERF_LRZ_STALL_CYCLES_UCHE = 6,
-	PERF_LRZ_LRZ_READ = 7,
-	PERF_LRZ_LRZ_WRITE = 8,
-	PERF_LRZ_READ_LATENCY = 9,
-	PERF_LRZ_MERGE_CACHE_UPDATING = 10,
-	PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11,
-	PERF_LRZ_PRIM_KILLED_BY_LRZ = 12,
-	PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13,
-	PERF_LRZ_FULL_8X8_TILES = 14,
-	PERF_LRZ_PARTIAL_8X8_TILES = 15,
-	PERF_LRZ_TILE_KILLED = 16,
-	PERF_LRZ_TOTAL_PIXEL = 17,
-	PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18,
-};
-
-enum a5xx_uche_perfcounter_select {
-	PERF_UCHE_BUSY_CYCLES = 0,
-	PERF_UCHE_STALL_CYCLES_VBIF = 1,
-	PERF_UCHE_VBIF_LATENCY_CYCLES = 2,
-	PERF_UCHE_VBIF_LATENCY_SAMPLES = 3,
-	PERF_UCHE_VBIF_READ_BEATS_TP = 4,
-	PERF_UCHE_VBIF_READ_BEATS_VFD = 5,
-	PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6,
-	PERF_UCHE_VBIF_READ_BEATS_LRZ = 7,
-	PERF_UCHE_VBIF_READ_BEATS_SP = 8,
-	PERF_UCHE_READ_REQUESTS_TP = 9,
-	PERF_UCHE_READ_REQUESTS_VFD = 10,
-	PERF_UCHE_READ_REQUESTS_HLSQ = 11,
-	PERF_UCHE_READ_REQUESTS_LRZ = 12,
-	PERF_UCHE_READ_REQUESTS_SP = 13,
-	PERF_UCHE_WRITE_REQUESTS_LRZ = 14,
-	PERF_UCHE_WRITE_REQUESTS_SP = 15,
-	PERF_UCHE_WRITE_REQUESTS_VPC = 16,
-	PERF_UCHE_WRITE_REQUESTS_VSC = 17,
-	PERF_UCHE_EVICTS = 18,
-	PERF_UCHE_BANK_REQ0 = 19,
-	PERF_UCHE_BANK_REQ1 = 20,
-	PERF_UCHE_BANK_REQ2 = 21,
-	PERF_UCHE_BANK_REQ3 = 22,
-	PERF_UCHE_BANK_REQ4 = 23,
-	PERF_UCHE_BANK_REQ5 = 24,
-	PERF_UCHE_BANK_REQ6 = 25,
-	PERF_UCHE_BANK_REQ7 = 26,
-	PERF_UCHE_VBIF_READ_BEATS_CH0 = 27,
-	PERF_UCHE_VBIF_READ_BEATS_CH1 = 28,
-	PERF_UCHE_GMEM_READ_BEATS = 29,
-	PERF_UCHE_FLAG_COUNT = 30,
-};
-
-enum a5xx_tp_perfcounter_select {
-	PERF_TP_BUSY_CYCLES = 0,
-	PERF_TP_STALL_CYCLES_UCHE = 1,
-	PERF_TP_LATENCY_CYCLES = 2,
-	PERF_TP_LATENCY_TRANS = 3,
-	PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4,
-	PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5,
-	PERF_TP_L1_CACHELINE_REQUESTS = 6,
-	PERF_TP_L1_CACHELINE_MISSES = 7,
-	PERF_TP_SP_TP_TRANS = 8,
-	PERF_TP_TP_SP_TRANS = 9,
-	PERF_TP_OUTPUT_PIXELS = 10,
-	PERF_TP_FILTER_WORKLOAD_16BIT = 11,
-	PERF_TP_FILTER_WORKLOAD_32BIT = 12,
-	PERF_TP_QUADS_RECEIVED = 13,
-	PERF_TP_QUADS_OFFSET = 14,
-	PERF_TP_QUADS_SHADOW = 15,
-	PERF_TP_QUADS_ARRAY = 16,
-	PERF_TP_QUADS_GRADIENT = 17,
-	PERF_TP_QUADS_1D = 18,
-	PERF_TP_QUADS_2D = 19,
-	PERF_TP_QUADS_BUFFER = 20,
-	PERF_TP_QUADS_3D = 21,
-	PERF_TP_QUADS_CUBE = 22,
-	PERF_TP_STATE_CACHE_REQUESTS = 23,
-	PERF_TP_STATE_CACHE_MISSES = 24,
-	PERF_TP_DIVERGENT_QUADS_RECEIVED = 25,
-	PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26,
-	PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27,
-	PERF_TP_PRT_NON_RESIDENT_EVENTS = 28,
-	PERF_TP_OUTPUT_PIXELS_POINT = 29,
-	PERF_TP_OUTPUT_PIXELS_BILINEAR = 30,
-	PERF_TP_OUTPUT_PIXELS_MIP = 31,
-	PERF_TP_OUTPUT_PIXELS_ANISO = 32,
-	PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33,
-	PERF_TP_FLAG_CACHE_REQUESTS = 34,
-	PERF_TP_FLAG_CACHE_MISSES = 35,
-	PERF_TP_L1_5_L2_REQUESTS = 36,
-	PERF_TP_2D_OUTPUT_PIXELS = 37,
-	PERF_TP_2D_OUTPUT_PIXELS_POINT = 38,
-	PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39,
-	PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40,
-	PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41,
-};
-
-enum a5xx_sp_perfcounter_select {
-	PERF_SP_BUSY_CYCLES = 0,
-	PERF_SP_ALU_WORKING_CYCLES = 1,
-	PERF_SP_EFU_WORKING_CYCLES = 2,
-	PERF_SP_STALL_CYCLES_VPC = 3,
-	PERF_SP_STALL_CYCLES_TP = 4,
-	PERF_SP_STALL_CYCLES_UCHE = 5,
-	PERF_SP_STALL_CYCLES_RB = 6,
-	PERF_SP_SCHEDULER_NON_WORKING = 7,
-	PERF_SP_WAVE_CONTEXTS = 8,
-	PERF_SP_WAVE_CONTEXT_CYCLES = 9,
-	PERF_SP_FS_STAGE_WAVE_CYCLES = 10,
-	PERF_SP_FS_STAGE_WAVE_SAMPLES = 11,
-	PERF_SP_VS_STAGE_WAVE_CYCLES = 12,
-	PERF_SP_VS_STAGE_WAVE_SAMPLES = 13,
-	PERF_SP_FS_STAGE_DURATION_CYCLES = 14,
-	PERF_SP_VS_STAGE_DURATION_CYCLES = 15,
-	PERF_SP_WAVE_CTRL_CYCLES = 16,
-	PERF_SP_WAVE_LOAD_CYCLES = 17,
-	PERF_SP_WAVE_EMIT_CYCLES = 18,
-	PERF_SP_WAVE_NOP_CYCLES = 19,
-	PERF_SP_WAVE_WAIT_CYCLES = 20,
-	PERF_SP_WAVE_FETCH_CYCLES = 21,
-	PERF_SP_WAVE_IDLE_CYCLES = 22,
-	PERF_SP_WAVE_END_CYCLES = 23,
-	PERF_SP_WAVE_LONG_SYNC_CYCLES = 24,
-	PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25,
-	PERF_SP_WAVE_JOIN_CYCLES = 26,
-	PERF_SP_LM_LOAD_INSTRUCTIONS = 27,
-	PERF_SP_LM_STORE_INSTRUCTIONS = 28,
-	PERF_SP_LM_ATOMICS = 29,
-	PERF_SP_GM_LOAD_INSTRUCTIONS = 30,
-	PERF_SP_GM_STORE_INSTRUCTIONS = 31,
-	PERF_SP_GM_ATOMICS = 32,
-	PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33,
-	PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34,
-	PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35,
-	PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36,
-	PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37,
-	PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38,
-	PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39,
-	PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40,
-	PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41,
-	PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42,
-	PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43,
-	PERF_SP_VS_INSTRUCTIONS = 44,
-	PERF_SP_FS_INSTRUCTIONS = 45,
-	PERF_SP_ADDR_LOCK_COUNT = 46,
-	PERF_SP_UCHE_READ_TRANS = 47,
-	PERF_SP_UCHE_WRITE_TRANS = 48,
-	PERF_SP_EXPORT_VPC_TRANS = 49,
-	PERF_SP_EXPORT_RB_TRANS = 50,
-	PERF_SP_PIXELS_KILLED = 51,
-	PERF_SP_ICL1_REQUESTS = 52,
-	PERF_SP_ICL1_MISSES = 53,
-	PERF_SP_ICL0_REQUESTS = 54,
-	PERF_SP_ICL0_MISSES = 55,
-	PERF_SP_HS_INSTRUCTIONS = 56,
-	PERF_SP_DS_INSTRUCTIONS = 57,
-	PERF_SP_GS_INSTRUCTIONS = 58,
-	PERF_SP_CS_INSTRUCTIONS = 59,
-	PERF_SP_GPR_READ = 60,
-	PERF_SP_GPR_WRITE = 61,
-	PERF_SP_LM_CH0_REQUESTS = 62,
-	PERF_SP_LM_CH1_REQUESTS = 63,
-	PERF_SP_LM_BANK_CONFLICTS = 64,
-};
-
-enum a5xx_rb_perfcounter_select {
-	PERF_RB_BUSY_CYCLES = 0,
-	PERF_RB_STALL_CYCLES_CCU = 1,
-	PERF_RB_STALL_CYCLES_HLSQ = 2,
-	PERF_RB_STALL_CYCLES_FIFO0_FULL = 3,
-	PERF_RB_STALL_CYCLES_FIFO1_FULL = 4,
-	PERF_RB_STALL_CYCLES_FIFO2_FULL = 5,
-	PERF_RB_STARVE_CYCLES_SP = 6,
-	PERF_RB_STARVE_CYCLES_LRZ_TILE = 7,
-	PERF_RB_STARVE_CYCLES_CCU = 8,
-	PERF_RB_STARVE_CYCLES_Z_PLANE = 9,
-	PERF_RB_STARVE_CYCLES_BARY_PLANE = 10,
-	PERF_RB_Z_WORKLOAD = 11,
-	PERF_RB_HLSQ_ACTIVE = 12,
-	PERF_RB_Z_READ = 13,
-	PERF_RB_Z_WRITE = 14,
-	PERF_RB_C_READ = 15,
-	PERF_RB_C_WRITE = 16,
-	PERF_RB_TOTAL_PASS = 17,
-	PERF_RB_Z_PASS = 18,
-	PERF_RB_Z_FAIL = 19,
-	PERF_RB_S_FAIL = 20,
-	PERF_RB_BLENDED_FXP_COMPONENTS = 21,
-	PERF_RB_BLENDED_FP16_COMPONENTS = 22,
-	RB_RESERVED = 23,
-	PERF_RB_2D_ALIVE_CYCLES = 24,
-	PERF_RB_2D_STALL_CYCLES_A2D = 25,
-	PERF_RB_2D_STARVE_CYCLES_SRC = 26,
-	PERF_RB_2D_STARVE_CYCLES_SP = 27,
-	PERF_RB_2D_STARVE_CYCLES_DST = 28,
-	PERF_RB_2D_VALID_PIXELS = 29,
-};
-
-enum a5xx_rb_samples_perfcounter_select {
-	TOTAL_SAMPLES = 0,
-	ZPASS_SAMPLES = 1,
-	ZFAIL_SAMPLES = 2,
-	SFAIL_SAMPLES = 3,
-};
-
-enum a5xx_vsc_perfcounter_select {
-	PERF_VSC_BUSY_CYCLES = 0,
-	PERF_VSC_WORKING_CYCLES = 1,
-	PERF_VSC_STALL_CYCLES_UCHE = 2,
-	PERF_VSC_EOT_NUM = 3,
-};
-
-enum a5xx_ccu_perfcounter_select {
-	PERF_CCU_BUSY_CYCLES = 0,
-	PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1,
-	PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2,
-	PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3,
-	PERF_CCU_DEPTH_BLOCKS = 4,
-	PERF_CCU_COLOR_BLOCKS = 5,
-	PERF_CCU_DEPTH_BLOCK_HIT = 6,
-	PERF_CCU_COLOR_BLOCK_HIT = 7,
-	PERF_CCU_PARTIAL_BLOCK_READ = 8,
-	PERF_CCU_GMEM_READ = 9,
-	PERF_CCU_GMEM_WRITE = 10,
-	PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11,
-	PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12,
-	PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13,
-	PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14,
-	PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15,
-	PERF_CCU_COLOR_READ_FLAG0_COUNT = 16,
-	PERF_CCU_COLOR_READ_FLAG1_COUNT = 17,
-	PERF_CCU_COLOR_READ_FLAG2_COUNT = 18,
-	PERF_CCU_COLOR_READ_FLAG3_COUNT = 19,
-	PERF_CCU_COLOR_READ_FLAG4_COUNT = 20,
-	PERF_CCU_2D_BUSY_CYCLES = 21,
-	PERF_CCU_2D_RD_REQ = 22,
-	PERF_CCU_2D_WR_REQ = 23,
-	PERF_CCU_2D_REORDER_STARVE_CYCLES = 24,
-	PERF_CCU_2D_PIXELS = 25,
-};
-
-enum a5xx_cmp_perfcounter_select {
-	PERF_CMPDECMP_STALL_CYCLES_VBIF = 0,
-	PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1,
-	PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2,
-	PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3,
-	PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4,
-	PERF_CMPDECMP_VBIF_READ_REQUEST = 5,
-	PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6,
-	PERF_CMPDECMP_VBIF_READ_DATA = 7,
-	PERF_CMPDECMP_VBIF_WRITE_DATA = 8,
-	PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9,
-	PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21,
-	PERF_CMPDECMP_2D_RD_DATA = 22,
-	PERF_CMPDECMP_2D_WR_DATA = 23,
-};
-
-enum a5xx_vbif_perfcounter_select {
-	AXI_READ_REQUESTS_ID_0 = 0,
-	AXI_READ_REQUESTS_ID_1 = 1,
-	AXI_READ_REQUESTS_ID_2 = 2,
-	AXI_READ_REQUESTS_ID_3 = 3,
-	AXI_READ_REQUESTS_ID_4 = 4,
-	AXI_READ_REQUESTS_ID_5 = 5,
-	AXI_READ_REQUESTS_ID_6 = 6,
-	AXI_READ_REQUESTS_ID_7 = 7,
-	AXI_READ_REQUESTS_ID_8 = 8,
-	AXI_READ_REQUESTS_ID_9 = 9,
-	AXI_READ_REQUESTS_ID_10 = 10,
-	AXI_READ_REQUESTS_ID_11 = 11,
-	AXI_READ_REQUESTS_ID_12 = 12,
-	AXI_READ_REQUESTS_ID_13 = 13,
-	AXI_READ_REQUESTS_ID_14 = 14,
-	AXI_READ_REQUESTS_ID_15 = 15,
-	AXI0_READ_REQUESTS_TOTAL = 16,
-	AXI1_READ_REQUESTS_TOTAL = 17,
-	AXI2_READ_REQUESTS_TOTAL = 18,
-	AXI3_READ_REQUESTS_TOTAL = 19,
-	AXI_READ_REQUESTS_TOTAL = 20,
-	AXI_WRITE_REQUESTS_ID_0 = 21,
-	AXI_WRITE_REQUESTS_ID_1 = 22,
-	AXI_WRITE_REQUESTS_ID_2 = 23,
-	AXI_WRITE_REQUESTS_ID_3 = 24,
-	AXI_WRITE_REQUESTS_ID_4 = 25,
-	AXI_WRITE_REQUESTS_ID_5 = 26,
-	AXI_WRITE_REQUESTS_ID_6 = 27,
-	AXI_WRITE_REQUESTS_ID_7 = 28,
-	AXI_WRITE_REQUESTS_ID_8 = 29,
-	AXI_WRITE_REQUESTS_ID_9 = 30,
-	AXI_WRITE_REQUESTS_ID_10 = 31,
-	AXI_WRITE_REQUESTS_ID_11 = 32,
-	AXI_WRITE_REQUESTS_ID_12 = 33,
-	AXI_WRITE_REQUESTS_ID_13 = 34,
-	AXI_WRITE_REQUESTS_ID_14 = 35,
-	AXI_WRITE_REQUESTS_ID_15 = 36,
-	AXI0_WRITE_REQUESTS_TOTAL = 37,
-	AXI1_WRITE_REQUESTS_TOTAL = 38,
-	AXI2_WRITE_REQUESTS_TOTAL = 39,
-	AXI3_WRITE_REQUESTS_TOTAL = 40,
-	AXI_WRITE_REQUESTS_TOTAL = 41,
-	AXI_TOTAL_REQUESTS = 42,
-	AXI_READ_DATA_BEATS_ID_0 = 43,
-	AXI_READ_DATA_BEATS_ID_1 = 44,
-	AXI_READ_DATA_BEATS_ID_2 = 45,
-	AXI_READ_DATA_BEATS_ID_3 = 46,
-	AXI_READ_DATA_BEATS_ID_4 = 47,
-	AXI_READ_DATA_BEATS_ID_5 = 48,
-	AXI_READ_DATA_BEATS_ID_6 = 49,
-	AXI_READ_DATA_BEATS_ID_7 = 50,
-	AXI_READ_DATA_BEATS_ID_8 = 51,
-	AXI_READ_DATA_BEATS_ID_9 = 52,
-	AXI_READ_DATA_BEATS_ID_10 = 53,
-	AXI_READ_DATA_BEATS_ID_11 = 54,
-	AXI_READ_DATA_BEATS_ID_12 = 55,
-	AXI_READ_DATA_BEATS_ID_13 = 56,
-	AXI_READ_DATA_BEATS_ID_14 = 57,
-	AXI_READ_DATA_BEATS_ID_15 = 58,
-	AXI0_READ_DATA_BEATS_TOTAL = 59,
-	AXI1_READ_DATA_BEATS_TOTAL = 60,
-	AXI2_READ_DATA_BEATS_TOTAL = 61,
-	AXI3_READ_DATA_BEATS_TOTAL = 62,
-	AXI_READ_DATA_BEATS_TOTAL = 63,
-	AXI_WRITE_DATA_BEATS_ID_0 = 64,
-	AXI_WRITE_DATA_BEATS_ID_1 = 65,
-	AXI_WRITE_DATA_BEATS_ID_2 = 66,
-	AXI_WRITE_DATA_BEATS_ID_3 = 67,
-	AXI_WRITE_DATA_BEATS_ID_4 = 68,
-	AXI_WRITE_DATA_BEATS_ID_5 = 69,
-	AXI_WRITE_DATA_BEATS_ID_6 = 70,
-	AXI_WRITE_DATA_BEATS_ID_7 = 71,
-	AXI_WRITE_DATA_BEATS_ID_8 = 72,
-	AXI_WRITE_DATA_BEATS_ID_9 = 73,
-	AXI_WRITE_DATA_BEATS_ID_10 = 74,
-	AXI_WRITE_DATA_BEATS_ID_11 = 75,
-	AXI_WRITE_DATA_BEATS_ID_12 = 76,
-	AXI_WRITE_DATA_BEATS_ID_13 = 77,
-	AXI_WRITE_DATA_BEATS_ID_14 = 78,
-	AXI_WRITE_DATA_BEATS_ID_15 = 79,
-	AXI0_WRITE_DATA_BEATS_TOTAL = 80,
-	AXI1_WRITE_DATA_BEATS_TOTAL = 81,
-	AXI2_WRITE_DATA_BEATS_TOTAL = 82,
-	AXI3_WRITE_DATA_BEATS_TOTAL = 83,
-	AXI_WRITE_DATA_BEATS_TOTAL = 84,
-	AXI_DATA_BEATS_TOTAL = 85,
-};
-
-enum a5xx_tex_filter {
-	A5XX_TEX_NEAREST = 0,
-	A5XX_TEX_LINEAR = 1,
-	A5XX_TEX_ANISO = 2,
-};
-
-enum a5xx_tex_clamp {
-	A5XX_TEX_REPEAT = 0,
-	A5XX_TEX_CLAMP_TO_EDGE = 1,
-	A5XX_TEX_MIRROR_REPEAT = 2,
-	A5XX_TEX_CLAMP_TO_BORDER = 3,
-	A5XX_TEX_MIRROR_CLAMP = 4,
-};
-
-enum a5xx_tex_aniso {
-	A5XX_TEX_ANISO_1 = 0,
-	A5XX_TEX_ANISO_2 = 1,
-	A5XX_TEX_ANISO_4 = 2,
-	A5XX_TEX_ANISO_8 = 3,
-	A5XX_TEX_ANISO_16 = 4,
-};
-
-enum a5xx_tex_swiz {
-	A5XX_TEX_X = 0,
-	A5XX_TEX_Y = 1,
-	A5XX_TEX_Z = 2,
-	A5XX_TEX_W = 3,
-	A5XX_TEX_ZERO = 4,
-	A5XX_TEX_ONE = 5,
-};
-
-enum a5xx_tex_type {
-	A5XX_TEX_1D = 0,
-	A5XX_TEX_2D = 1,
-	A5XX_TEX_CUBE = 2,
-	A5XX_TEX_3D = 3,
-};
-
-#define A5XX_INT0_RBBM_GPU_IDLE					0x00000001
-#define A5XX_INT0_RBBM_AHB_ERROR				0x00000002
-#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT				0x00000004
-#define A5XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
-#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
-#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT				0x00000020
-#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW			0x00000040
-#define A5XX_INT0_RBBM_GPC_ERROR				0x00000080
-#define A5XX_INT0_CP_SW						0x00000100
-#define A5XX_INT0_CP_HW_ERROR					0x00000200
-#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS				0x00000400
-#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS				0x00000800
-#define A5XX_INT0_CP_CCU_RESOLVE_TS				0x00001000
-#define A5XX_INT0_CP_IB2					0x00002000
-#define A5XX_INT0_CP_IB1					0x00004000
-#define A5XX_INT0_CP_RB						0x00008000
-#define A5XX_INT0_CP_UNUSED_1					0x00010000
-#define A5XX_INT0_CP_RB_DONE_TS					0x00020000
-#define A5XX_INT0_CP_WT_DONE_TS					0x00040000
-#define A5XX_INT0_UNKNOWN_1					0x00080000
-#define A5XX_INT0_CP_CACHE_FLUSH_TS				0x00100000
-#define A5XX_INT0_UNUSED_2					0x00200000
-#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00400000
-#define A5XX_INT0_MISC_HANG_DETECT				0x00800000
-#define A5XX_INT0_UCHE_OOB_ACCESS				0x01000000
-#define A5XX_INT0_UCHE_TRAP_INTR				0x02000000
-#define A5XX_INT0_DEBBUS_INTR_0					0x04000000
-#define A5XX_INT0_DEBBUS_INTR_1					0x08000000
-#define A5XX_INT0_GPMU_VOLTAGE_DROOP				0x10000000
-#define A5XX_INT0_GPMU_FIRMWARE					0x20000000
-#define A5XX_INT0_ISDB_CPU_IRQ					0x40000000
-#define A5XX_INT0_ISDB_UNDER_DEBUG				0x80000000
-#define A5XX_CP_INT_CP_OPCODE_ERROR				0x00000001
-#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR			0x00000002
-#define A5XX_CP_INT_CP_HW_FAULT_ERROR				0x00000004
-#define A5XX_CP_INT_CP_DMA_ERROR				0x00000008
-#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR		0x00000010
-#define A5XX_CP_INT_CP_AHB_ERROR				0x00000020
-#define REG_A5XX_CP_RB_BASE					0x00000800
-
-#define REG_A5XX_CP_RB_BASE_HI					0x00000801
-
-#define REG_A5XX_CP_RB_CNTL					0x00000802
-
-#define REG_A5XX_CP_RB_RPTR_ADDR				0x00000804
-
-#define REG_A5XX_CP_RB_RPTR_ADDR_HI				0x00000805
-
-#define REG_A5XX_CP_RB_RPTR					0x00000806
-
-#define REG_A5XX_CP_RB_WPTR					0x00000807
-
-#define REG_A5XX_CP_PFP_STAT_ADDR				0x00000808
-
-#define REG_A5XX_CP_PFP_STAT_DATA				0x00000809
-
-#define REG_A5XX_CP_DRAW_STATE_ADDR				0x0000080b
-
-#define REG_A5XX_CP_DRAW_STATE_DATA				0x0000080c
-
-#define REG_A5XX_CP_ME_NRT_ADDR_LO				0x0000080d
-
-#define REG_A5XX_CP_ME_NRT_ADDR_HI				0x0000080e
-
-#define REG_A5XX_CP_ME_NRT_DATA					0x00000810
-
-#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO			0x00000817
-
-#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI			0x00000818
-
-#define REG_A5XX_CP_CRASH_DUMP_CNTL				0x00000819
-
-#define REG_A5XX_CP_ME_STAT_ADDR				0x0000081a
-
-#define REG_A5XX_CP_ROQ_THRESHOLDS_1				0x0000081f
-
-#define REG_A5XX_CP_ROQ_THRESHOLDS_2				0x00000820
-
-#define REG_A5XX_CP_ROQ_DBG_ADDR				0x00000821
-
-#define REG_A5XX_CP_ROQ_DBG_DATA				0x00000822
-
-#define REG_A5XX_CP_MEQ_DBG_ADDR				0x00000823
-
-#define REG_A5XX_CP_MEQ_DBG_DATA				0x00000824
-
-#define REG_A5XX_CP_MEQ_THRESHOLDS				0x00000825
-
-#define REG_A5XX_CP_MERCIU_SIZE					0x00000826
-
-#define REG_A5XX_CP_MERCIU_DBG_ADDR				0x00000827
-
-#define REG_A5XX_CP_MERCIU_DBG_DATA_1				0x00000828
-
-#define REG_A5XX_CP_MERCIU_DBG_DATA_2				0x00000829
-
-#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR				0x0000082a
-
-#define REG_A5XX_CP_PFP_UCODE_DBG_DATA				0x0000082b
-
-#define REG_A5XX_CP_ME_UCODE_DBG_ADDR				0x0000082f
-
-#define REG_A5XX_CP_ME_UCODE_DBG_DATA				0x00000830
-
-#define REG_A5XX_CP_CNTL					0x00000831
-
-#define REG_A5XX_CP_PFP_ME_CNTL					0x00000832
-
-#define REG_A5XX_CP_CHICKEN_DBG					0x00000833
-
-#define REG_A5XX_CP_PFP_INSTR_BASE_LO				0x00000835
-
-#define REG_A5XX_CP_PFP_INSTR_BASE_HI				0x00000836
-
-#define REG_A5XX_CP_ME_INSTR_BASE_LO				0x00000838
-
-#define REG_A5XX_CP_ME_INSTR_BASE_HI				0x00000839
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL				0x0000083b
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO		0x0000083c
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI		0x0000083d
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO			0x0000083e
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI			0x0000083f
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO			0x00000840
-
-#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI			0x00000841
-
-#define REG_A5XX_CP_ADDR_MODE_CNTL				0x00000860
-
-#define REG_A5XX_CP_ME_STAT_DATA				0x00000b14
-
-#define REG_A5XX_CP_WFI_PEND_CTR				0x00000b15
-
-#define REG_A5XX_CP_INTERRUPT_STATUS				0x00000b18
-
-#define REG_A5XX_CP_HW_FAULT					0x00000b1a
-
-#define REG_A5XX_CP_PROTECT_STATUS				0x00000b1c
-
-#define REG_A5XX_CP_IB1_BASE					0x00000b1f
-
-#define REG_A5XX_CP_IB1_BASE_HI					0x00000b20
-
-#define REG_A5XX_CP_IB1_BUFSZ					0x00000b21
-
-#define REG_A5XX_CP_IB2_BASE					0x00000b22
-
-#define REG_A5XX_CP_IB2_BASE_HI					0x00000b23
-
-#define REG_A5XX_CP_IB2_BUFSZ					0x00000b24
-
-static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; }
-#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0001ffff
-#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
-static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
-{
-	return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK;
-}
-#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK			0x1f000000
-#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT			24
-static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
-{
-	return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK;
-}
-#define A5XX_CP_PROTECT_REG_TRAP_WRITE				0x20000000
-#define A5XX_CP_PROTECT_REG_TRAP_READ				0x40000000
-
-#define REG_A5XX_CP_PROTECT_CNTL				0x000008a0
-
-#define REG_A5XX_CP_AHB_FAULT					0x00000b1b
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_0				0x00000bb0
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_1				0x00000bb1
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_2				0x00000bb2
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_3				0x00000bb3
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_4				0x00000bb4
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_5				0x00000bb5
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_6				0x00000bb6
-
-#define REG_A5XX_CP_PERFCTR_CP_SEL_7				0x00000bb7
-
-#define REG_A5XX_VSC_ADDR_MODE_CNTL				0x00000bc1
-
-#define REG_A5XX_CP_POWERCTR_CP_SEL_0				0x00000bba
-
-#define REG_A5XX_CP_POWERCTR_CP_SEL_1				0x00000bbb
-
-#define REG_A5XX_CP_POWERCTR_CP_SEL_2				0x00000bbc
-
-#define REG_A5XX_CP_POWERCTR_CP_SEL_3				0x00000bbd
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A				0x00000004
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B				0x00000005
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C				0x00000006
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D				0x00000007
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT				0x00000008
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM				0x00000009
-
-#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT		0x00000018
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_OPL				0x0000000a
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_OPE				0x0000000b
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0				0x0000000c
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1				0x0000000d
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2				0x0000000e
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3				0x0000000f
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0			0x00000010
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1			0x00000011
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2			0x00000012
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3			0x00000013
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0			0x00000014
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1			0x00000015
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0				0x00000016
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1				0x00000017
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2				0x00000018
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3				0x00000019
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0			0x0000001a
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1			0x0000001b
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2			0x0000001c
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3			0x0000001d
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE			0x0000001e
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0				0x0000001f
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1				0x00000020
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG			0x00000021
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_IDX				0x00000022
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC				0x00000023
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT			0x00000024
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL			0x0000002f
-
-#define REG_A5XX_RBBM_INT_CLEAR_CMD				0x00000037
-
-#define REG_A5XX_RBBM_INT_0_MASK				0x00000038
-#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE			0x00000001
-#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR			0x00000002
-#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT		0x00000004
-#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT			0x00000008
-#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT		0x00000010
-#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT		0x00000020
-#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW		0x00000040
-#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR			0x00000080
-#define A5XX_RBBM_INT_0_MASK_CP_SW				0x00000100
-#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR			0x00000200
-#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS		0x00000400
-#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS		0x00000800
-#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS			0x00001000
-#define A5XX_RBBM_INT_0_MASK_CP_IB2				0x00002000
-#define A5XX_RBBM_INT_0_MASK_CP_IB1				0x00004000
-#define A5XX_RBBM_INT_0_MASK_CP_RB				0x00008000
-#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS			0x00020000
-#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS			0x00040000
-#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS			0x00100000
-#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW		0x00400000
-#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT			0x00800000
-#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS			0x01000000
-#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR			0x02000000
-#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0			0x04000000
-#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1			0x08000000
-#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP			0x10000000
-#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE			0x20000000
-#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ			0x40000000
-#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG			0x80000000
-
-#define REG_A5XX_RBBM_AHB_DBG_CNTL				0x0000003f
-
-#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL				0x00000041
-
-#define REG_A5XX_RBBM_SW_RESET_CMD				0x00000043
-
-#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
-
-#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2			0x00000046
-
-#define REG_A5XX_RBBM_DBG_LO_HI_GPIO				0x00000048
-
-#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL			0x00000049
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_TP0				0x0000004a
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_TP1				0x0000004b
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_TP2				0x0000004c
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_TP3				0x0000004d
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0				0x0000004e
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1				0x0000004f
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2				0x00000050
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3				0x00000051
-
-#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0				0x00000052
-
-#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1				0x00000053
-
-#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2				0x00000054
-
-#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3				0x00000055
-
-#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG			0x00000059
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE				0x0000005a
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE				0x0000005b
-
-#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE				0x0000005c
-
-#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE				0x0000005d
-
-#define REG_A5XX_RBBM_CLOCK_HYST_UCHE				0x0000005e
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE				0x0000005f
-
-#define REG_A5XX_RBBM_CLOCK_MODE_GPC				0x00000060
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_GPC				0x00000061
-
-#define REG_A5XX_RBBM_CLOCK_HYST_GPC				0x00000062
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM			0x00000063
-
-#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x00000064
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x00000065
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ				0x00000066
-
-#define REG_A5XX_RBBM_CLOCK_CNTL				0x00000067
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_SP0				0x00000068
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_SP1				0x00000069
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_SP2				0x0000006a
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_SP3				0x0000006b
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0				0x0000006c
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1				0x0000006d
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2				0x0000006e
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3				0x0000006f
-
-#define REG_A5XX_RBBM_CLOCK_HYST_SP0				0x00000070
-
-#define REG_A5XX_RBBM_CLOCK_HYST_SP1				0x00000071
-
-#define REG_A5XX_RBBM_CLOCK_HYST_SP2				0x00000072
-
-#define REG_A5XX_RBBM_CLOCK_HYST_SP3				0x00000073
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_SP0				0x00000074
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_SP1				0x00000075
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_SP2				0x00000076
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_SP3				0x00000077
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_RB0				0x00000078
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_RB1				0x00000079
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_RB2				0x0000007a
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_RB3				0x0000007b
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0				0x0000007c
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1				0x0000007d
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2				0x0000007e
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3				0x0000007f
-
-#define REG_A5XX_RBBM_CLOCK_HYST_RAC				0x00000080
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_RAC				0x00000081
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0				0x00000082
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1				0x00000083
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2				0x00000084
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3				0x00000085
-
-#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0			0x00000086
-
-#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1			0x00000087
-
-#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2			0x00000088
-
-#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3			0x00000089
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_RAC				0x0000008a
-
-#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC				0x0000008b
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0			0x0000008c
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1			0x0000008d
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2			0x0000008e
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3			0x0000008f
-
-#define REG_A5XX_RBBM_CLOCK_HYST_VFD				0x00000090
-
-#define REG_A5XX_RBBM_CLOCK_MODE_VFD				0x00000091
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_VFD				0x00000092
-
-#define REG_A5XX_RBBM_AHB_CNTL0					0x00000093
-
-#define REG_A5XX_RBBM_AHB_CNTL1					0x00000094
-
-#define REG_A5XX_RBBM_AHB_CNTL2					0x00000095
-
-#define REG_A5XX_RBBM_AHB_CMD					0x00000096
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11		0x0000009c
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12		0x0000009d
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13		0x0000009e
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14		0x0000009f
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15		0x000000a0
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16		0x000000a1
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17		0x000000a2
-
-#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18		0x000000a3
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_TP0				0x000000a4
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_TP1				0x000000a5
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_TP2				0x000000a6
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_TP3				0x000000a7
-
-#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0				0x000000a8
-
-#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1				0x000000a9
-
-#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2				0x000000aa
-
-#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3				0x000000ab
-
-#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0				0x000000ac
-
-#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1				0x000000ad
-
-#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2				0x000000ae
-
-#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3				0x000000af
-
-#define REG_A5XX_RBBM_CLOCK_HYST_TP0				0x000000b0
-
-#define REG_A5XX_RBBM_CLOCK_HYST_TP1				0x000000b1
-
-#define REG_A5XX_RBBM_CLOCK_HYST_TP2				0x000000b2
-
-#define REG_A5XX_RBBM_CLOCK_HYST_TP3				0x000000b3
-
-#define REG_A5XX_RBBM_CLOCK_HYST2_TP0				0x000000b4
-
-#define REG_A5XX_RBBM_CLOCK_HYST2_TP1				0x000000b5
-
-#define REG_A5XX_RBBM_CLOCK_HYST2_TP2				0x000000b6
-
-#define REG_A5XX_RBBM_CLOCK_HYST2_TP3				0x000000b7
-
-#define REG_A5XX_RBBM_CLOCK_HYST3_TP0				0x000000b8
-
-#define REG_A5XX_RBBM_CLOCK_HYST3_TP1				0x000000b9
-
-#define REG_A5XX_RBBM_CLOCK_HYST3_TP2				0x000000ba
-
-#define REG_A5XX_RBBM_CLOCK_HYST3_TP3				0x000000bb
-
-#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU				0x000000c8
-
-#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU				0x000000c9
-
-#define REG_A5XX_RBBM_CLOCK_HYST_GPMU				0x000000ca
-
-#define REG_A5XX_RBBM_PERFCTR_CP_0_LO				0x000003a0
-
-#define REG_A5XX_RBBM_PERFCTR_CP_0_HI				0x000003a1
-
-#define REG_A5XX_RBBM_PERFCTR_CP_1_LO				0x000003a2
-
-#define REG_A5XX_RBBM_PERFCTR_CP_1_HI				0x000003a3
-
-#define REG_A5XX_RBBM_PERFCTR_CP_2_LO				0x000003a4
-
-#define REG_A5XX_RBBM_PERFCTR_CP_2_HI				0x000003a5
-
-#define REG_A5XX_RBBM_PERFCTR_CP_3_LO				0x000003a6
-
-#define REG_A5XX_RBBM_PERFCTR_CP_3_HI				0x000003a7
-
-#define REG_A5XX_RBBM_PERFCTR_CP_4_LO				0x000003a8
-
-#define REG_A5XX_RBBM_PERFCTR_CP_4_HI				0x000003a9
-
-#define REG_A5XX_RBBM_PERFCTR_CP_5_LO				0x000003aa
-
-#define REG_A5XX_RBBM_PERFCTR_CP_5_HI				0x000003ab
-
-#define REG_A5XX_RBBM_PERFCTR_CP_6_LO				0x000003ac
-
-#define REG_A5XX_RBBM_PERFCTR_CP_6_HI				0x000003ad
-
-#define REG_A5XX_RBBM_PERFCTR_CP_7_LO				0x000003ae
-
-#define REG_A5XX_RBBM_PERFCTR_CP_7_HI				0x000003af
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO				0x000003b0
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI				0x000003b1
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO				0x000003b2
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI				0x000003b3
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO				0x000003b4
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI				0x000003b5
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO				0x000003b6
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI				0x000003b7
-
-#define REG_A5XX_RBBM_PERFCTR_PC_0_LO				0x000003b8
-
-#define REG_A5XX_RBBM_PERFCTR_PC_0_HI				0x000003b9
-
-#define REG_A5XX_RBBM_PERFCTR_PC_1_LO				0x000003ba
-
-#define REG_A5XX_RBBM_PERFCTR_PC_1_HI				0x000003bb
-
-#define REG_A5XX_RBBM_PERFCTR_PC_2_LO				0x000003bc
-
-#define REG_A5XX_RBBM_PERFCTR_PC_2_HI				0x000003bd
-
-#define REG_A5XX_RBBM_PERFCTR_PC_3_LO				0x000003be
-
-#define REG_A5XX_RBBM_PERFCTR_PC_3_HI				0x000003bf
-
-#define REG_A5XX_RBBM_PERFCTR_PC_4_LO				0x000003c0
-
-#define REG_A5XX_RBBM_PERFCTR_PC_4_HI				0x000003c1
-
-#define REG_A5XX_RBBM_PERFCTR_PC_5_LO				0x000003c2
-
-#define REG_A5XX_RBBM_PERFCTR_PC_5_HI				0x000003c3
-
-#define REG_A5XX_RBBM_PERFCTR_PC_6_LO				0x000003c4
-
-#define REG_A5XX_RBBM_PERFCTR_PC_6_HI				0x000003c5
-
-#define REG_A5XX_RBBM_PERFCTR_PC_7_LO				0x000003c6
-
-#define REG_A5XX_RBBM_PERFCTR_PC_7_HI				0x000003c7
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO				0x000003c8
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI				0x000003c9
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO				0x000003ca
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI				0x000003cb
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO				0x000003cc
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI				0x000003cd
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO				0x000003ce
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI				0x000003cf
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO				0x000003d0
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI				0x000003d1
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO				0x000003d2
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI				0x000003d3
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO				0x000003d4
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI				0x000003d5
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO				0x000003d6
-
-#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI				0x000003d7
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO				0x000003d8
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI				0x000003d9
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO				0x000003da
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI				0x000003db
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO				0x000003dc
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI				0x000003dd
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO				0x000003de
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI				0x000003df
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO				0x000003e0
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI				0x000003e1
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO				0x000003e2
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI				0x000003e3
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO				0x000003e4
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI				0x000003e5
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO				0x000003e6
-
-#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI				0x000003e7
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO				0x000003e8
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI				0x000003e9
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO				0x000003ea
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI				0x000003eb
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO				0x000003ec
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI				0x000003ed
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO				0x000003ee
-
-#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI				0x000003ef
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO				0x000003f0
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI				0x000003f1
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO				0x000003f2
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI				0x000003f3
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO				0x000003f4
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI				0x000003f5
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO				0x000003f6
-
-#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI				0x000003f7
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO				0x000003f8
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI				0x000003f9
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO				0x000003fa
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI				0x000003fb
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO				0x000003fc
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI				0x000003fd
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO				0x000003fe
-
-#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI				0x000003ff
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO				0x00000400
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI				0x00000401
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO				0x00000402
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI				0x00000403
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO				0x00000404
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI				0x00000405
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO				0x00000406
-
-#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI				0x00000407
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO				0x00000408
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI				0x00000409
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO				0x0000040a
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI				0x0000040b
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO				0x0000040c
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI				0x0000040d
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO				0x0000040e
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI				0x0000040f
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO				0x00000410
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI				0x00000411
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO				0x00000412
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI				0x00000413
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO				0x00000414
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI				0x00000415
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO				0x00000416
-
-#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI				0x00000417
-
-#define REG_A5XX_RBBM_PERFCTR_TP_0_LO				0x00000418
-
-#define REG_A5XX_RBBM_PERFCTR_TP_0_HI				0x00000419
-
-#define REG_A5XX_RBBM_PERFCTR_TP_1_LO				0x0000041a
-
-#define REG_A5XX_RBBM_PERFCTR_TP_1_HI				0x0000041b
-
-#define REG_A5XX_RBBM_PERFCTR_TP_2_LO				0x0000041c
-
-#define REG_A5XX_RBBM_PERFCTR_TP_2_HI				0x0000041d
-
-#define REG_A5XX_RBBM_PERFCTR_TP_3_LO				0x0000041e
-
-#define REG_A5XX_RBBM_PERFCTR_TP_3_HI				0x0000041f
-
-#define REG_A5XX_RBBM_PERFCTR_TP_4_LO				0x00000420
-
-#define REG_A5XX_RBBM_PERFCTR_TP_4_HI				0x00000421
-
-#define REG_A5XX_RBBM_PERFCTR_TP_5_LO				0x00000422
-
-#define REG_A5XX_RBBM_PERFCTR_TP_5_HI				0x00000423
-
-#define REG_A5XX_RBBM_PERFCTR_TP_6_LO				0x00000424
-
-#define REG_A5XX_RBBM_PERFCTR_TP_6_HI				0x00000425
-
-#define REG_A5XX_RBBM_PERFCTR_TP_7_LO				0x00000426
-
-#define REG_A5XX_RBBM_PERFCTR_TP_7_HI				0x00000427
-
-#define REG_A5XX_RBBM_PERFCTR_SP_0_LO				0x00000428
-
-#define REG_A5XX_RBBM_PERFCTR_SP_0_HI				0x00000429
-
-#define REG_A5XX_RBBM_PERFCTR_SP_1_LO				0x0000042a
-
-#define REG_A5XX_RBBM_PERFCTR_SP_1_HI				0x0000042b
-
-#define REG_A5XX_RBBM_PERFCTR_SP_2_LO				0x0000042c
-
-#define REG_A5XX_RBBM_PERFCTR_SP_2_HI				0x0000042d
-
-#define REG_A5XX_RBBM_PERFCTR_SP_3_LO				0x0000042e
-
-#define REG_A5XX_RBBM_PERFCTR_SP_3_HI				0x0000042f
-
-#define REG_A5XX_RBBM_PERFCTR_SP_4_LO				0x00000430
-
-#define REG_A5XX_RBBM_PERFCTR_SP_4_HI				0x00000431
-
-#define REG_A5XX_RBBM_PERFCTR_SP_5_LO				0x00000432
-
-#define REG_A5XX_RBBM_PERFCTR_SP_5_HI				0x00000433
-
-#define REG_A5XX_RBBM_PERFCTR_SP_6_LO				0x00000434
-
-#define REG_A5XX_RBBM_PERFCTR_SP_6_HI				0x00000435
-
-#define REG_A5XX_RBBM_PERFCTR_SP_7_LO				0x00000436
-
-#define REG_A5XX_RBBM_PERFCTR_SP_7_HI				0x00000437
-
-#define REG_A5XX_RBBM_PERFCTR_SP_8_LO				0x00000438
-
-#define REG_A5XX_RBBM_PERFCTR_SP_8_HI				0x00000439
-
-#define REG_A5XX_RBBM_PERFCTR_SP_9_LO				0x0000043a
-
-#define REG_A5XX_RBBM_PERFCTR_SP_9_HI				0x0000043b
-
-#define REG_A5XX_RBBM_PERFCTR_SP_10_LO				0x0000043c
-
-#define REG_A5XX_RBBM_PERFCTR_SP_10_HI				0x0000043d
-
-#define REG_A5XX_RBBM_PERFCTR_SP_11_LO				0x0000043e
-
-#define REG_A5XX_RBBM_PERFCTR_SP_11_HI				0x0000043f
-
-#define REG_A5XX_RBBM_PERFCTR_RB_0_LO				0x00000440
-
-#define REG_A5XX_RBBM_PERFCTR_RB_0_HI				0x00000441
-
-#define REG_A5XX_RBBM_PERFCTR_RB_1_LO				0x00000442
-
-#define REG_A5XX_RBBM_PERFCTR_RB_1_HI				0x00000443
-
-#define REG_A5XX_RBBM_PERFCTR_RB_2_LO				0x00000444
-
-#define REG_A5XX_RBBM_PERFCTR_RB_2_HI				0x00000445
-
-#define REG_A5XX_RBBM_PERFCTR_RB_3_LO				0x00000446
-
-#define REG_A5XX_RBBM_PERFCTR_RB_3_HI				0x00000447
-
-#define REG_A5XX_RBBM_PERFCTR_RB_4_LO				0x00000448
-
-#define REG_A5XX_RBBM_PERFCTR_RB_4_HI				0x00000449
-
-#define REG_A5XX_RBBM_PERFCTR_RB_5_LO				0x0000044a
-
-#define REG_A5XX_RBBM_PERFCTR_RB_5_HI				0x0000044b
-
-#define REG_A5XX_RBBM_PERFCTR_RB_6_LO				0x0000044c
-
-#define REG_A5XX_RBBM_PERFCTR_RB_6_HI				0x0000044d
-
-#define REG_A5XX_RBBM_PERFCTR_RB_7_LO				0x0000044e
-
-#define REG_A5XX_RBBM_PERFCTR_RB_7_HI				0x0000044f
-
-#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO				0x00000450
-
-#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI				0x00000451
-
-#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO				0x00000452
-
-#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI				0x00000453
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO				0x00000454
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI				0x00000455
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO				0x00000456
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI				0x00000457
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO				0x00000458
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI				0x00000459
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO				0x0000045a
-
-#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI				0x0000045b
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO				0x0000045c
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI				0x0000045d
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO				0x0000045e
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI				0x0000045f
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO				0x00000460
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI				0x00000461
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO				0x00000462
-
-#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI				0x00000463
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0			0x0000046b
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1			0x0000046c
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2			0x0000046d
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000046e
-
-#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO			0x000004d2
-
-#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI			0x000004d3
-
-#define REG_A5XX_RBBM_STATUS					0x000004f5
-#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB			0x80000000
-#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP			0x40000000
-#define A5XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
-#define A5XX_RBBM_STATUS_VSC_BUSY				0x10000000
-#define A5XX_RBBM_STATUS_TPL1_BUSY				0x08000000
-#define A5XX_RBBM_STATUS_SP_BUSY				0x04000000
-#define A5XX_RBBM_STATUS_UCHE_BUSY				0x02000000
-#define A5XX_RBBM_STATUS_VPC_BUSY				0x01000000
-#define A5XX_RBBM_STATUS_VFDP_BUSY				0x00800000
-#define A5XX_RBBM_STATUS_VFD_BUSY				0x00400000
-#define A5XX_RBBM_STATUS_TESS_BUSY				0x00200000
-#define A5XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
-#define A5XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
-#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY			0x00040000
-#define A5XX_RBBM_STATUS_DCOM_BUSY				0x00020000
-#define A5XX_RBBM_STATUS_COM_BUSY				0x00010000
-#define A5XX_RBBM_STATUS_LRZ_BUZY				0x00008000
-#define A5XX_RBBM_STATUS_A2D_DSP_BUSY				0x00004000
-#define A5XX_RBBM_STATUS_CCUFCHE_BUSY				0x00002000
-#define A5XX_RBBM_STATUS_RB_BUSY				0x00001000
-#define A5XX_RBBM_STATUS_RAS_BUSY				0x00000800
-#define A5XX_RBBM_STATUS_TSE_BUSY				0x00000400
-#define A5XX_RBBM_STATUS_VBIF_BUSY				0x00000200
-#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST			0x00000100
-#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST			0x00000080
-#define A5XX_RBBM_STATUS_CP_BUSY				0x00000040
-#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY			0x00000020
-#define A5XX_RBBM_STATUS_CP_CRASH_BUSY				0x00000010
-#define A5XX_RBBM_STATUS_CP_ETS_BUSY				0x00000008
-#define A5XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
-#define A5XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
-#define A5XX_RBBM_STATUS_HI_BUSY				0x00000001
-
-#define REG_A5XX_RBBM_STATUS3					0x00000530
-
-#define REG_A5XX_RBBM_INT_0_STATUS				0x000004e1
-
-#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS			0x000004f0
-
-#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS			0x000004f1
-
-#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS			0x000004f3
-
-#define REG_A5XX_RBBM_AHB_ERROR_STATUS				0x000004f4
-
-#define REG_A5XX_RBBM_PERFCTR_CNTL				0x00000464
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0				0x00000465
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1				0x00000466
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2				0x00000467
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3				0x00000468
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000469
-
-#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x0000046a
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0			0x0000046b
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1			0x0000046c
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2			0x0000046d
-
-#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000046e
-
-#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED			0x0000046f
-
-#define REG_A5XX_RBBM_AHB_ERROR					0x000004ed
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC			0x00000504
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_OVER				0x00000505
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0				0x00000506
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1				0x00000507
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2				0x00000508
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3				0x00000509
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4				0x0000050a
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5				0x0000050b
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR			0x0000050c
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0			0x0000050d
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1			0x0000050e
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2			0x0000050f
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3			0x00000510
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4			0x00000511
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0				0x00000512
-
-#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1				0x00000513
-
-#define REG_A5XX_RBBM_ISDB_CNT					0x00000533
-
-#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG			0x0000f000
-
-#define REG_A5XX_RBBM_SECVID_TRUST_CNTL				0x0000f400
-
-#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO		0x0000f800
-
-#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI		0x0000f801
-
-#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE			0x0000f802
-
-#define REG_A5XX_RBBM_SECVID_TSB_CNTL				0x0000f803
-
-#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO			0x0000f804
-
-#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI			0x0000f805
-
-#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO			0x0000f806
-
-#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI			0x0000f807
-
-#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL			0x0000f810
-
-#define REG_A5XX_VSC_BIN_SIZE					0x00000bc2
-#define A5XX_VSC_BIN_SIZE_WIDTH__MASK				0x000000ff
-#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
-static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK;
-}
-#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK				0x0001fe00
-#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT				9
-static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK;
-}
-
-#define REG_A5XX_VSC_SIZE_ADDRESS_LO				0x00000bc3
-
-#define REG_A5XX_VSC_SIZE_ADDRESS_HI				0x00000bc4
-
-#define REG_A5XX_UNKNOWN_0BC5					0x00000bc5
-
-#define REG_A5XX_UNKNOWN_0BC6					0x00000bc6
-
-static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; }
-#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
-#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
-static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
-{
-	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK;
-}
-#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
-#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
-static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
-{
-	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK;
-}
-#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK			0x00f00000
-#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
-static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
-{
-	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK;
-}
-#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK			0x0f000000
-#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT			24
-static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
-{
-	return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK;
-}
-
-static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; }
-
-static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; }
-
-static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; }
-
-static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; }
-
-#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0				0x00000c60
-
-#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1				0x00000c61
-
-#define REG_A5XX_VSC_RESOLVE_CNTL				0x00000cdd
-#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE		0x80000000
-#define A5XX_VSC_RESOLVE_CNTL_X__MASK				0x00007fff
-#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT				0
-static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val)
-{
-	return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK;
-}
-#define A5XX_VSC_RESOLVE_CNTL_Y__MASK				0x7fff0000
-#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT				16
-static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val)
-{
-	return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_ADDR_MODE_CNTL				0x00000c81
-
-#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0				0x00000c90
-
-#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1				0x00000c91
-
-#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2				0x00000c92
-
-#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c93
-
-#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0				0x00000c94
-
-#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1				0x00000c95
-
-#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2				0x00000c96
-
-#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3				0x00000c97
-
-#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0				0x00000c98
-
-#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1				0x00000c99
-
-#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2				0x00000c9a
-
-#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3				0x00000c9b
-
-#define REG_A5XX_RB_DBG_ECO_CNTL				0x00000cc4
-
-#define REG_A5XX_RB_ADDR_MODE_CNTL				0x00000cc5
-
-#define REG_A5XX_RB_MODE_CNTL					0x00000cc6
-
-#define REG_A5XX_RB_CCU_CNTL					0x00000cc7
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_0				0x00000cd0
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_1				0x00000cd1
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_2				0x00000cd2
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_3				0x00000cd3
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_4				0x00000cd4
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_5				0x00000cd5
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_6				0x00000cd6
-
-#define REG_A5XX_RB_PERFCTR_RB_SEL_7				0x00000cd7
-
-#define REG_A5XX_RB_PERFCTR_CCU_SEL_0				0x00000cd8
-
-#define REG_A5XX_RB_PERFCTR_CCU_SEL_1				0x00000cd9
-
-#define REG_A5XX_RB_PERFCTR_CCU_SEL_2				0x00000cda
-
-#define REG_A5XX_RB_PERFCTR_CCU_SEL_3				0x00000cdb
-
-#define REG_A5XX_RB_POWERCTR_RB_SEL_0				0x00000ce0
-
-#define REG_A5XX_RB_POWERCTR_RB_SEL_1				0x00000ce1
-
-#define REG_A5XX_RB_POWERCTR_RB_SEL_2				0x00000ce2
-
-#define REG_A5XX_RB_POWERCTR_RB_SEL_3				0x00000ce3
-
-#define REG_A5XX_RB_POWERCTR_CCU_SEL_0				0x00000ce4
-
-#define REG_A5XX_RB_POWERCTR_CCU_SEL_1				0x00000ce5
-
-#define REG_A5XX_RB_PERFCTR_CMP_SEL_0				0x00000cec
-
-#define REG_A5XX_RB_PERFCTR_CMP_SEL_1				0x00000ced
-
-#define REG_A5XX_RB_PERFCTR_CMP_SEL_2				0x00000cee
-
-#define REG_A5XX_RB_PERFCTR_CMP_SEL_3				0x00000cef
-
-#define REG_A5XX_PC_DBG_ECO_CNTL				0x00000d00
-#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI			0x00000100
-
-#define REG_A5XX_PC_ADDR_MODE_CNTL				0x00000d01
-
-#define REG_A5XX_PC_MODE_CNTL					0x00000d02
-
-#define REG_A5XX_PC_INDEX_BUF_LO				0x00000d04
-
-#define REG_A5XX_PC_INDEX_BUF_HI				0x00000d05
-
-#define REG_A5XX_PC_START_INDEX					0x00000d06
-
-#define REG_A5XX_PC_MAX_INDEX					0x00000d07
-
-#define REG_A5XX_PC_TESSFACTOR_ADDR_LO				0x00000d08
-
-#define REG_A5XX_PC_TESSFACTOR_ADDR_HI				0x00000d09
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_0				0x00000d10
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_1				0x00000d11
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_2				0x00000d12
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_3				0x00000d13
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_4				0x00000d14
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_5				0x00000d15
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_6				0x00000d16
-
-#define REG_A5XX_PC_PERFCTR_PC_SEL_7				0x00000d17
-
-#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0			0x00000e00
-
-#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1			0x00000e01
-
-#define REG_A5XX_HLSQ_ADDR_MODE_CNTL				0x00000e05
-
-#define REG_A5XX_HLSQ_MODE_CNTL					0x00000e06
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x00000e10
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x00000e11
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x00000e12
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x00000e13
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x00000e14
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x00000e15
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6			0x00000e16
-
-#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7			0x00000e17
-
-#define REG_A5XX_HLSQ_SPTP_RDSEL				0x00000f08
-
-#define REG_A5XX_HLSQ_DBG_READ_SEL				0x0000bc00
-
-#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE			0x0000a000
-
-#define REG_A5XX_VFD_ADDR_MODE_CNTL				0x00000e41
-
-#define REG_A5XX_VFD_MODE_CNTL					0x00000e42
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0				0x00000e50
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1				0x00000e51
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2				0x00000e52
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3				0x00000e53
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4				0x00000e54
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5				0x00000e55
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6				0x00000e56
-
-#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7				0x00000e57
-
-#define REG_A5XX_VPC_DBG_ECO_CNTL				0x00000e60
-
-#define REG_A5XX_VPC_ADDR_MODE_CNTL				0x00000e61
-
-#define REG_A5XX_VPC_MODE_CNTL					0x00000e62
-#define A5XX_VPC_MODE_CNTL_BINNING_PASS				0x00000001
-
-#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0				0x00000e64
-
-#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1				0x00000e65
-
-#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2				0x00000e66
-
-#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3				0x00000e67
-
-#define REG_A5XX_UCHE_ADDR_MODE_CNTL				0x00000e80
-
-#define REG_A5XX_UCHE_SVM_CNTL					0x00000e82
-
-#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO			0x00000e87
-
-#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI			0x00000e88
-
-#define REG_A5XX_UCHE_TRAP_BASE_LO				0x00000e89
-
-#define REG_A5XX_UCHE_TRAP_BASE_HI				0x00000e8a
-
-#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO				0x00000e8b
-
-#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI				0x00000e8c
-
-#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO				0x00000e8d
-
-#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI				0x00000e8e
-
-#define REG_A5XX_UCHE_DBG_ECO_CNTL_2				0x00000e8f
-
-#define REG_A5XX_UCHE_DBG_ECO_CNTL				0x00000e90
-
-#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO			0x00000e91
-
-#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI			0x00000e92
-
-#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO			0x00000e93
-
-#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI			0x00000e94
-
-#define REG_A5XX_UCHE_CACHE_INVALIDATE				0x00000e95
-
-#define REG_A5XX_UCHE_CACHE_WAYS				0x00000e96
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000ea0
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000ea1
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000ea2
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000ea3
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000ea4
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000ea5
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000ea6
-
-#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000ea7
-
-#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0			0x00000ea8
-
-#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1			0x00000ea9
-
-#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2			0x00000eaa
-
-#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3			0x00000eab
-
-#define REG_A5XX_UCHE_TRAP_LOG_LO				0x00000eb1
-
-#define REG_A5XX_UCHE_TRAP_LOG_HI				0x00000eb2
-
-#define REG_A5XX_SP_DBG_ECO_CNTL				0x00000ec0
-
-#define REG_A5XX_SP_ADDR_MODE_CNTL				0x00000ec1
-
-#define REG_A5XX_SP_MODE_CNTL					0x00000ec2
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_0				0x00000ed0
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_1				0x00000ed1
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_2				0x00000ed2
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_3				0x00000ed3
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_4				0x00000ed4
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_5				0x00000ed5
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_6				0x00000ed6
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_7				0x00000ed7
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_8				0x00000ed8
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_9				0x00000ed9
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_10				0x00000eda
-
-#define REG_A5XX_SP_PERFCTR_SP_SEL_11				0x00000edb
-
-#define REG_A5XX_SP_POWERCTR_SP_SEL_0				0x00000edc
-
-#define REG_A5XX_SP_POWERCTR_SP_SEL_1				0x00000edd
-
-#define REG_A5XX_SP_POWERCTR_SP_SEL_2				0x00000ede
-
-#define REG_A5XX_SP_POWERCTR_SP_SEL_3				0x00000edf
-
-#define REG_A5XX_TPL1_ADDR_MODE_CNTL				0x00000f01
-
-#define REG_A5XX_TPL1_MODE_CNTL					0x00000f02
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0				0x00000f10
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1				0x00000f11
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2				0x00000f12
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3				0x00000f13
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4				0x00000f14
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5				0x00000f15
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6				0x00000f16
-
-#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7				0x00000f17
-
-#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0				0x00000f18
-
-#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1				0x00000f19
-
-#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2				0x00000f1a
-
-#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3				0x00000f1b
-
-#define REG_A5XX_VBIF_VERSION					0x00003000
-
-#define REG_A5XX_VBIF_CLKON					0x00003001
-
-#define REG_A5XX_VBIF_ABIT_SORT					0x00003028
-
-#define REG_A5XX_VBIF_ABIT_SORT_CONF				0x00003029
-
-#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
-
-#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
-
-#define REG_A5XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
-
-#define REG_A5XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
-
-#define REG_A5XX_VBIF_XIN_HALT_CTRL0				0x00003080
-
-#define REG_A5XX_VBIF_XIN_HALT_CTRL1				0x00003081
-
-#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL				0x00003084
-
-#define REG_A5XX_VBIF_TEST_BUS1_CTRL0				0x00003085
-
-#define REG_A5XX_VBIF_TEST_BUS1_CTRL1				0x00003086
-
-#define REG_A5XX_VBIF_TEST_BUS2_CTRL0				0x00003087
-
-#define REG_A5XX_VBIF_TEST_BUS2_CTRL1				0x00003088
-
-#define REG_A5XX_VBIF_TEST_BUS_OUT				0x0000308c
-
-#define REG_A5XX_VBIF_PERF_CNT_EN0				0x000030c0
-
-#define REG_A5XX_VBIF_PERF_CNT_EN1				0x000030c1
-
-#define REG_A5XX_VBIF_PERF_CNT_EN2				0x000030c2
-
-#define REG_A5XX_VBIF_PERF_CNT_EN3				0x000030c3
-
-#define REG_A5XX_VBIF_PERF_CNT_CLR0				0x000030c8
-
-#define REG_A5XX_VBIF_PERF_CNT_CLR1				0x000030c9
-
-#define REG_A5XX_VBIF_PERF_CNT_CLR2				0x000030ca
-
-#define REG_A5XX_VBIF_PERF_CNT_CLR3				0x000030cb
-
-#define REG_A5XX_VBIF_PERF_CNT_SEL0				0x000030d0
-
-#define REG_A5XX_VBIF_PERF_CNT_SEL1				0x000030d1
-
-#define REG_A5XX_VBIF_PERF_CNT_SEL2				0x000030d2
-
-#define REG_A5XX_VBIF_PERF_CNT_SEL3				0x000030d3
-
-#define REG_A5XX_VBIF_PERF_CNT_LOW0				0x000030d8
-
-#define REG_A5XX_VBIF_PERF_CNT_LOW1				0x000030d9
-
-#define REG_A5XX_VBIF_PERF_CNT_LOW2				0x000030da
-
-#define REG_A5XX_VBIF_PERF_CNT_LOW3				0x000030db
-
-#define REG_A5XX_VBIF_PERF_CNT_HIGH0				0x000030e0
-
-#define REG_A5XX_VBIF_PERF_CNT_HIGH1				0x000030e1
-
-#define REG_A5XX_VBIF_PERF_CNT_HIGH2				0x000030e2
-
-#define REG_A5XX_VBIF_PERF_CNT_HIGH3				0x000030e3
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0				0x00003110
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1				0x00003111
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2				0x00003112
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0			0x00003118
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1			0x00003119
-
-#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2			0x0000311a
-
-#define REG_A5XX_GPMU_INST_RAM_BASE				0x00008800
-
-#define REG_A5XX_GPMU_DATA_RAM_BASE				0x00009800
-
-#define REG_A5XX_GPMU_SP_POWER_CNTL				0x0000a881
-
-#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL				0x0000a886
-
-#define REG_A5XX_GPMU_RBCCU_POWER_CNTL				0x0000a887
-
-#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS				0x0000a88b
-#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON			0x00100000
-
-#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS			0x0000a88d
-#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON			0x00100000
-
-#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY			0x0000a891
-
-#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL			0x0000a892
-
-#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST			0x0000a893
-
-#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL			0x0000a894
-
-#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL			0x0000a8a3
-
-#define REG_A5XX_GPMU_WFI_CONFIG				0x0000a8c1
-
-#define REG_A5XX_GPMU_RBBM_INTR_INFO				0x0000a8d6
-
-#define REG_A5XX_GPMU_CM3_SYSRESET				0x0000a8d8
-
-#define REG_A5XX_GPMU_GENERAL_0					0x0000a8e0
-
-#define REG_A5XX_GPMU_GENERAL_1					0x0000a8e1
-
-#define REG_A5XX_SP_POWER_COUNTER_0_LO				0x0000a840
-
-#define REG_A5XX_SP_POWER_COUNTER_0_HI				0x0000a841
-
-#define REG_A5XX_SP_POWER_COUNTER_1_LO				0x0000a842
-
-#define REG_A5XX_SP_POWER_COUNTER_1_HI				0x0000a843
-
-#define REG_A5XX_SP_POWER_COUNTER_2_LO				0x0000a844
-
-#define REG_A5XX_SP_POWER_COUNTER_2_HI				0x0000a845
-
-#define REG_A5XX_SP_POWER_COUNTER_3_LO				0x0000a846
-
-#define REG_A5XX_SP_POWER_COUNTER_3_HI				0x0000a847
-
-#define REG_A5XX_TP_POWER_COUNTER_0_LO				0x0000a848
-
-#define REG_A5XX_TP_POWER_COUNTER_0_HI				0x0000a849
-
-#define REG_A5XX_TP_POWER_COUNTER_1_LO				0x0000a84a
-
-#define REG_A5XX_TP_POWER_COUNTER_1_HI				0x0000a84b
-
-#define REG_A5XX_TP_POWER_COUNTER_2_LO				0x0000a84c
-
-#define REG_A5XX_TP_POWER_COUNTER_2_HI				0x0000a84d
-
-#define REG_A5XX_TP_POWER_COUNTER_3_LO				0x0000a84e
-
-#define REG_A5XX_TP_POWER_COUNTER_3_HI				0x0000a84f
-
-#define REG_A5XX_RB_POWER_COUNTER_0_LO				0x0000a850
-
-#define REG_A5XX_RB_POWER_COUNTER_0_HI				0x0000a851
-
-#define REG_A5XX_RB_POWER_COUNTER_1_LO				0x0000a852
-
-#define REG_A5XX_RB_POWER_COUNTER_1_HI				0x0000a853
-
-#define REG_A5XX_RB_POWER_COUNTER_2_LO				0x0000a854
-
-#define REG_A5XX_RB_POWER_COUNTER_2_HI				0x0000a855
-
-#define REG_A5XX_RB_POWER_COUNTER_3_LO				0x0000a856
-
-#define REG_A5XX_RB_POWER_COUNTER_3_HI				0x0000a857
-
-#define REG_A5XX_CCU_POWER_COUNTER_0_LO				0x0000a858
-
-#define REG_A5XX_CCU_POWER_COUNTER_0_HI				0x0000a859
-
-#define REG_A5XX_CCU_POWER_COUNTER_1_LO				0x0000a85a
-
-#define REG_A5XX_CCU_POWER_COUNTER_1_HI				0x0000a85b
-
-#define REG_A5XX_UCHE_POWER_COUNTER_0_LO			0x0000a85c
-
-#define REG_A5XX_UCHE_POWER_COUNTER_0_HI			0x0000a85d
-
-#define REG_A5XX_UCHE_POWER_COUNTER_1_LO			0x0000a85e
-
-#define REG_A5XX_UCHE_POWER_COUNTER_1_HI			0x0000a85f
-
-#define REG_A5XX_UCHE_POWER_COUNTER_2_LO			0x0000a860
-
-#define REG_A5XX_UCHE_POWER_COUNTER_2_HI			0x0000a861
-
-#define REG_A5XX_UCHE_POWER_COUNTER_3_LO			0x0000a862
-
-#define REG_A5XX_UCHE_POWER_COUNTER_3_HI			0x0000a863
-
-#define REG_A5XX_CP_POWER_COUNTER_0_LO				0x0000a864
-
-#define REG_A5XX_CP_POWER_COUNTER_0_HI				0x0000a865
-
-#define REG_A5XX_CP_POWER_COUNTER_1_LO				0x0000a866
-
-#define REG_A5XX_CP_POWER_COUNTER_1_HI				0x0000a867
-
-#define REG_A5XX_CP_POWER_COUNTER_2_LO				0x0000a868
-
-#define REG_A5XX_CP_POWER_COUNTER_2_HI				0x0000a869
-
-#define REG_A5XX_CP_POWER_COUNTER_3_LO				0x0000a86a
-
-#define REG_A5XX_CP_POWER_COUNTER_3_HI				0x0000a86b
-
-#define REG_A5XX_GPMU_POWER_COUNTER_0_LO			0x0000a86c
-
-#define REG_A5XX_GPMU_POWER_COUNTER_0_HI			0x0000a86d
-
-#define REG_A5XX_GPMU_POWER_COUNTER_1_LO			0x0000a86e
-
-#define REG_A5XX_GPMU_POWER_COUNTER_1_HI			0x0000a86f
-
-#define REG_A5XX_GPMU_POWER_COUNTER_2_LO			0x0000a870
-
-#define REG_A5XX_GPMU_POWER_COUNTER_2_HI			0x0000a871
-
-#define REG_A5XX_GPMU_POWER_COUNTER_3_LO			0x0000a872
-
-#define REG_A5XX_GPMU_POWER_COUNTER_3_HI			0x0000a873
-
-#define REG_A5XX_GPMU_POWER_COUNTER_4_LO			0x0000a874
-
-#define REG_A5XX_GPMU_POWER_COUNTER_4_HI			0x0000a875
-
-#define REG_A5XX_GPMU_POWER_COUNTER_5_LO			0x0000a876
-
-#define REG_A5XX_GPMU_POWER_COUNTER_5_HI			0x0000a877
-
-#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE			0x0000a878
-
-#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO			0x0000a879
-
-#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI			0x0000a87a
-
-#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET			0x0000a87b
-
-#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0			0x0000a87c
-
-#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1			0x0000a87d
-
-#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL			0x0000a8a3
-
-#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL		0x0000a8a8
-
-#define REG_A5XX_GPMU_TEMP_SENSOR_ID				0x0000ac00
-
-#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG			0x0000ac01
-
-#define REG_A5XX_GPMU_TEMP_VAL					0x0000ac02
-
-#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD			0x0000ac03
-
-#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS		0x0000ac05
-
-#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK		0x0000ac06
-
-#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1			0x0000ac40
-
-#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3			0x0000ac41
-
-#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1			0x0000ac42
-
-#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3			0x0000ac43
-
-#define REG_A5XX_GPMU_BASE_LEAKAGE				0x0000ac46
-
-#define REG_A5XX_GPMU_GPMU_VOLTAGE				0x0000ac60
-
-#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS			0x0000ac61
-
-#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK			0x0000ac62
-
-#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD			0x0000ac80
-
-#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL			0x0000acc4
-
-#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS			0x0000acc5
-
-#define REG_A5XX_GDPM_CONFIG1					0x0000b80c
-
-#define REG_A5XX_GDPM_CONFIG2					0x0000b80d
-
-#define REG_A5XX_GDPM_INT_EN					0x0000b80f
-
-#define REG_A5XX_GDPM_INT_MASK					0x0000b811
-
-#define REG_A5XX_GPMU_BEC_ENABLE				0x0000b9a0
-
-#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS			0x0000c41a
-
-#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0		0x0000c41d
-
-#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2		0x0000c41f
-
-#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4		0x0000c421
-
-#define REG_A5XX_GPU_CS_ENABLE_REG				0x0000c520
-
-#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1		0x0000c557
-
-#define REG_A5XX_GRAS_CL_CNTL					0x0000e000
-#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z			0x00000040
-
-#define REG_A5XX_UNKNOWN_E001					0x0000e001
-
-#define REG_A5XX_UNKNOWN_E004					0x0000e004
-
-#define REG_A5XX_GRAS_CNTL					0x0000e005
-#define A5XX_GRAS_CNTL_VARYING					0x00000001
-#define A5XX_GRAS_CNTL_UNK3					0x00000008
-#define A5XX_GRAS_CNTL_XCOORD					0x00000040
-#define A5XX_GRAS_CNTL_YCOORD					0x00000080
-#define A5XX_GRAS_CNTL_ZCOORD					0x00000100
-#define A5XX_GRAS_CNTL_WCOORD					0x00000200
-
-#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ			0x0000e006
-#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK		0x000003ff
-#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT		0
-static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK;
-}
-#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK		0x000ffc00
-#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT		10
-static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0			0x0000e010
-#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0				0x0000e011
-#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0			0x0000e012
-#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0				0x0000e013
-#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0			0x0000e014
-#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
-}
-
-#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0				0x0000e015
-#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
-#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
-static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_CNTL					0x0000e090
-#define A5XX_GRAS_SU_CNTL_CULL_FRONT				0x00000001
-#define A5XX_GRAS_SU_CNTL_CULL_BACK				0x00000002
-#define A5XX_GRAS_SU_CNTL_FRONT_CW				0x00000004
-#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK			0x000007f8
-#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT			3
-static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val)
-{
-	return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
-}
-#define A5XX_GRAS_SU_CNTL_POLY_OFFSET				0x00000800
-#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE				0x00002000
-
-#define REG_A5XX_GRAS_SU_POINT_MINMAX				0x0000e091
-#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
-#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
-static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
-}
-#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
-#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
-static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_POINT_SIZE				0x0000e092
-#define A5XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
-#define A5XX_GRAS_SU_POINT_SIZE__SHIFT				0
-static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val)
-{
-	return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_LAYERED				0x0000e093
-
-#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL			0x0000e094
-#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z		0x00000001
-#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1			0x00000002
-
-#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE			0x0000e095
-#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
-#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
-static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000e096
-#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
-#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
-static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP		0x0000e097
-#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK		0xffffffff
-#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT		0
-static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val)
-{
-	return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO			0x0000e098
-#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK	0x00000007
-#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT	0
-static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val)
-{
-	return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
-}
-
-#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL			0x0000e099
-
-#define REG_A5XX_GRAS_SC_CNTL					0x0000e0a0
-#define A5XX_GRAS_SC_CNTL_BINNING_PASS				0x00000001
-#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED			0x00008000
-
-#define REG_A5XX_GRAS_SC_BIN_CNTL				0x0000e0a1
-
-#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL				0x0000e0a2
-#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK		0x00000003
-#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL				0x0000e0a3
-#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK		0x00000003
-#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE		0x00000004
-
-#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL			0x0000e0a4
-
-#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0			0x0000e0aa
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK		0x00007fff
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK;
-}
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK		0x7fff0000
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT		16
-static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0			0x0000e0ab
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK		0x00007fff
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK;
-}
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK		0x7fff0000
-#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT		16
-static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0			0x0000e0ca
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK		0x00007fff
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK;
-}
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK		0x7fff0000
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT		16
-static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0			0x0000e0cb
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK		0x00007fff
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT		0
-static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK;
-}
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK		0x7fff0000
-#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT		16
-static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL			0x0000e0ea
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
-}
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000e0eb
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
-}
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A5XX_GRAS_LRZ_CNTL					0x0000e100
-#define A5XX_GRAS_LRZ_CNTL_ENABLE				0x00000001
-#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE				0x00000002
-#define A5XX_GRAS_LRZ_CNTL_GREATER				0x00000004
-
-#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO			0x0000e101
-
-#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI			0x0000e102
-
-#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH				0x0000e103
-#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK			0xffffffff
-#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT			0
-static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK;
-}
-
-#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO		0x0000e104
-
-#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI		0x0000e105
-
-#define REG_A5XX_RB_CNTL					0x0000e140
-#define A5XX_RB_CNTL_WIDTH__MASK				0x000000ff
-#define A5XX_RB_CNTL_WIDTH__SHIFT				0
-static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK;
-}
-#define A5XX_RB_CNTL_HEIGHT__MASK				0x0001fe00
-#define A5XX_RB_CNTL_HEIGHT__SHIFT				9
-static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK;
-}
-#define A5XX_RB_CNTL_BYPASS					0x00020000
-
-#define REG_A5XX_RB_RENDER_CNTL					0x0000e141
-#define A5XX_RB_RENDER_CNTL_BINNING_PASS			0x00000001
-#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED			0x00000040
-#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE			0x00000080
-#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH				0x00004000
-#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2				0x00008000
-#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK			0x00ff0000
-#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT			16
-static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK;
-}
-#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK			0xff000000
-#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT			24
-static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK;
-}
-
-#define REG_A5XX_RB_RAS_MSAA_CNTL				0x0000e142
-#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A5XX_RB_DEST_MSAA_CNTL				0x0000e143
-#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
-
-#define REG_A5XX_RB_RENDER_CONTROL0				0x0000e144
-#define A5XX_RB_RENDER_CONTROL0_VARYING				0x00000001
-#define A5XX_RB_RENDER_CONTROL0_UNK3				0x00000008
-#define A5XX_RB_RENDER_CONTROL0_XCOORD				0x00000040
-#define A5XX_RB_RENDER_CONTROL0_YCOORD				0x00000080
-#define A5XX_RB_RENDER_CONTROL0_ZCOORD				0x00000100
-#define A5XX_RB_RENDER_CONTROL0_WCOORD				0x00000200
-
-#define REG_A5XX_RB_RENDER_CONTROL1				0x0000e145
-#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK			0x00000001
-#define A5XX_RB_RENDER_CONTROL1_FACENESS			0x00000002
-#define A5XX_RB_RENDER_CONTROL1_SAMPLEID			0x00000004
-
-#define REG_A5XX_RB_FS_OUTPUT_CNTL				0x0000e146
-#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK			0x0000000f
-#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT			0
-static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val)
-{
-	return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK;
-}
-#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z			0x00000020
-
-#define REG_A5XX_RB_RENDER_COMPONENTS				0x0000e147
-#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
-#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
-#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
-#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
-#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
-#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
-#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
-#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK;
-}
-#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
-#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
-static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
-{
-	return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; }
-#define A5XX_RB_MRT_CONTROL_BLEND				0x00000001
-#define A5XX_RB_MRT_CONTROL_BLEND2				0x00000002
-#define A5XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000004
-#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000078
-#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			3
-static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
-{
-	return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK;
-}
-#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x00000780
-#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		7
-static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; }
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
-}
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
-}
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
-#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
-}
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
-}
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
-}
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
-#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
-static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; }
-#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x000000ff
-#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
-}
-#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x00000300
-#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		8
-static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
-}
-#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK			0x00001800
-#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT			11
-static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK;
-}
-#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00006000
-#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			13
-static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
-}
-#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00008000
-
-static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; }
-#define A5XX_RB_MRT_PITCH__MASK					0xffffffff
-#define A5XX_RB_MRT_PITCH__SHIFT				0
-static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; }
-#define A5XX_RB_MRT_ARRAY_PITCH__MASK				0xffffffff
-#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT				0
-static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; }
-
-#define REG_A5XX_RB_BLEND_RED					0x0000e1a0
-#define A5XX_RB_BLEND_RED_UINT__MASK				0x000000ff
-#define A5XX_RB_BLEND_RED_UINT__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK;
-}
-#define A5XX_RB_BLEND_RED_SINT__MASK				0x0000ff00
-#define A5XX_RB_BLEND_RED_SINT__SHIFT				8
-static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK;
-}
-#define A5XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
-#define A5XX_RB_BLEND_RED_FLOAT__SHIFT				16
-static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_RED_F32				0x0000e1a1
-#define A5XX_RB_BLEND_RED_F32__MASK				0xffffffff
-#define A5XX_RB_BLEND_RED_F32__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_RED_F32(float val)
-{
-	return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_GREEN					0x0000e1a2
-#define A5XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
-#define A5XX_RB_BLEND_GREEN_UINT__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK;
-}
-#define A5XX_RB_BLEND_GREEN_SINT__MASK				0x0000ff00
-#define A5XX_RB_BLEND_GREEN_SINT__SHIFT				8
-static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK;
-}
-#define A5XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
-#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
-static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_GREEN_F32				0x0000e1a3
-#define A5XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
-#define A5XX_RB_BLEND_GREEN_F32__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val)
-{
-	return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_BLUE					0x0000e1a4
-#define A5XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
-#define A5XX_RB_BLEND_BLUE_UINT__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK;
-}
-#define A5XX_RB_BLEND_BLUE_SINT__MASK				0x0000ff00
-#define A5XX_RB_BLEND_BLUE_SINT__SHIFT				8
-static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK;
-}
-#define A5XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
-#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
-static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_BLUE_F32				0x0000e1a5
-#define A5XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
-#define A5XX_RB_BLEND_BLUE_F32__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val)
-{
-	return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_ALPHA					0x0000e1a6
-#define A5XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
-#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK;
-}
-#define A5XX_RB_BLEND_ALPHA_SINT__MASK				0x0000ff00
-#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT				8
-static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK;
-}
-#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
-#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
-static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val)
-{
-	return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_ALPHA_F32				0x0000e1a7
-#define A5XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
-#define A5XX_RB_BLEND_ALPHA_F32__SHIFT				0
-static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val)
-{
-	return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK;
-}
-
-#define REG_A5XX_RB_ALPHA_CONTROL				0x0000e1a8
-#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
-#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
-static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
-{
-	return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
-}
-#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
-#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
-#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
-static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
-}
-
-#define REG_A5XX_RB_BLEND_CNTL					0x0000e1a9
-#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK			0x000000ff
-#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT			0
-static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
-}
-#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND			0x00000100
-#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
-#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK			0xffff0000
-#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT			16
-static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK;
-}
-
-#define REG_A5XX_RB_DEPTH_PLANE_CNTL				0x0000e1b0
-#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z			0x00000001
-#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1				0x00000002
-
-#define REG_A5XX_RB_DEPTH_CNTL					0x0000e1b1
-#define A5XX_RB_DEPTH_CNTL_Z_ENABLE				0x00000001
-#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE			0x00000002
-#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK				0x0000001c
-#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT				2
-static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK;
-}
-#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE			0x00000040
-
-#define REG_A5XX_RB_DEPTH_BUFFER_INFO				0x0000e1b2
-#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK		0x00000007
-#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT		0
-static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val)
-{
-	return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
-}
-
-#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO			0x0000e1b3
-
-#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI			0x0000e1b4
-
-#define REG_A5XX_RB_DEPTH_BUFFER_PITCH				0x0000e1b5
-#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK			0xffffffff
-#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH			0x0000e1b6
-#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK			0xffffffff
-#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_STENCIL_CONTROL				0x0000e1c0
-#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
-#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
-#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
-#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
-#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
-#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
-#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
-#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
-#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
-#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
-#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
-}
-#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
-#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
-static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
-}
-
-#define REG_A5XX_RB_STENCIL_INFO				0x0000e1c1
-#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
-
-#define REG_A5XX_RB_STENCIL_BASE_LO				0x0000e1c2
-
-#define REG_A5XX_RB_STENCIL_BASE_HI				0x0000e1c3
-
-#define REG_A5XX_RB_STENCIL_PITCH				0x0000e1c4
-#define A5XX_RB_STENCIL_PITCH__MASK				0xffffffff
-#define A5XX_RB_STENCIL_PITCH__SHIFT				0
-static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_STENCIL_ARRAY_PITCH				0x0000e1c5
-#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK			0xffffffff
-#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_STENCILREFMASK				0x0000e1c6
-#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
-#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
-static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK;
-}
-#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
-#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
-static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK;
-}
-#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
-#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
-static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A5XX_RB_STENCILREFMASK_BF				0x0000e1c7
-#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
-#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
-static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
-}
-#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
-#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
-static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
-}
-#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
-#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
-static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
-}
-
-#define REG_A5XX_RB_WINDOW_OFFSET				0x0000e1d0
-#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
-#define A5XX_RB_WINDOW_OFFSET_X__MASK				0x00007fff
-#define A5XX_RB_WINDOW_OFFSET_X__SHIFT				0
-static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val)
-{
-	return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK;
-}
-#define A5XX_RB_WINDOW_OFFSET_Y__MASK				0x7fff0000
-#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT				16
-static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK;
-}
-
-#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL			0x0000e1d1
-#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
-
-#define REG_A5XX_RB_BLIT_CNTL					0x0000e210
-#define A5XX_RB_BLIT_CNTL_BUF__MASK				0x0000000f
-#define A5XX_RB_BLIT_CNTL_BUF__SHIFT				0
-static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val)
-{
-	return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK;
-}
-
-#define REG_A5XX_RB_RESOLVE_CNTL_1				0x0000e211
-#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE		0x80000000
-#define A5XX_RB_RESOLVE_CNTL_1_X__MASK				0x00007fff
-#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT				0
-static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val)
-{
-	return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK;
-}
-#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK				0x7fff0000
-#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT				16
-static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val)
-{
-	return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK;
-}
-
-#define REG_A5XX_RB_RESOLVE_CNTL_2				0x0000e212
-#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE		0x80000000
-#define A5XX_RB_RESOLVE_CNTL_2_X__MASK				0x00007fff
-#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT				0
-static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val)
-{
-	return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK;
-}
-#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK				0x7fff0000
-#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT				16
-static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val)
-{
-	return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK;
-}
-
-#define REG_A5XX_RB_RESOLVE_CNTL_3				0x0000e213
-#define A5XX_RB_RESOLVE_CNTL_3_TILED				0x00000001
-
-#define REG_A5XX_RB_BLIT_DST_LO					0x0000e214
-
-#define REG_A5XX_RB_BLIT_DST_HI					0x0000e215
-
-#define REG_A5XX_RB_BLIT_DST_PITCH				0x0000e216
-#define A5XX_RB_BLIT_DST_PITCH__MASK				0xffffffff
-#define A5XX_RB_BLIT_DST_PITCH__SHIFT				0
-static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH			0x0000e217
-#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK			0xffffffff
-#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_CLEAR_COLOR_DW0				0x0000e218
-
-#define REG_A5XX_RB_CLEAR_COLOR_DW1				0x0000e219
-
-#define REG_A5XX_RB_CLEAR_COLOR_DW2				0x0000e21a
-
-#define REG_A5XX_RB_CLEAR_COLOR_DW3				0x0000e21b
-
-#define REG_A5XX_RB_CLEAR_CNTL					0x0000e21c
-#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR				0x00000002
-#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE				0x00000004
-#define A5XX_RB_CLEAR_CNTL_MASK__MASK				0x000000f0
-#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT				4
-static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val)
-{
-	return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK;
-}
-
-#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO			0x0000e240
-
-#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI			0x0000e241
-
-#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH			0x0000e242
-
-static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; }
-#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK			0xffffffff
-#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK;
-}
-
-static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; }
-#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK		0xffffffff
-#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT		0
-static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_BLIT_FLAG_DST_LO				0x0000e263
-
-#define REG_A5XX_RB_BLIT_FLAG_DST_HI				0x0000e264
-
-#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH				0x0000e265
-#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK			0xffffffff
-#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH			0x0000e266
-#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK			0xffffffff
-#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT		0
-static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO			0x0000e267
-
-#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI			0x0000e268
-
-#define REG_A5XX_VPC_CNTL_0					0x0000e280
-#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK			0x0000007f
-#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT			0
-static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val)
-{
-	return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK;
-}
-#define A5XX_VPC_CNTL_0_VARYING					0x00000800
-
-static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; }
-
-#define REG_A5XX_UNKNOWN_E292					0x0000e292
-
-#define REG_A5XX_UNKNOWN_E293					0x0000e293
-
-static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; }
-
-#define REG_A5XX_VPC_GS_SIV_CNTL				0x0000e298
-
-#define REG_A5XX_UNKNOWN_E29A					0x0000e29a
-
-#define REG_A5XX_VPC_PACK					0x0000e29d
-#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK			0x000000ff
-#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT			0
-static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
-{
-	return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK;
-}
-#define A5XX_VPC_PACK_PSIZELOC__MASK				0x0000ff00
-#define A5XX_VPC_PACK_PSIZELOC__SHIFT				8
-static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val)
-{
-	return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK;
-}
-
-#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL			0x0000e2a0
-
-#define REG_A5XX_VPC_SO_BUF_CNTL				0x0000e2a1
-#define A5XX_VPC_SO_BUF_CNTL_BUF0				0x00000001
-#define A5XX_VPC_SO_BUF_CNTL_BUF1				0x00000008
-#define A5XX_VPC_SO_BUF_CNTL_BUF2				0x00000040
-#define A5XX_VPC_SO_BUF_CNTL_BUF3				0x00000200
-#define A5XX_VPC_SO_BUF_CNTL_ENABLE				0x00008000
-
-#define REG_A5XX_VPC_SO_OVERRIDE				0x0000e2a2
-#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE				0x00000001
-
-#define REG_A5XX_VPC_SO_CNTL					0x0000e2a3
-#define A5XX_VPC_SO_CNTL_ENABLE					0x00010000
-
-#define REG_A5XX_VPC_SO_PROG					0x0000e2a4
-#define A5XX_VPC_SO_PROG_A_BUF__MASK				0x00000003
-#define A5XX_VPC_SO_PROG_A_BUF__SHIFT				0
-static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val)
-{
-	return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK;
-}
-#define A5XX_VPC_SO_PROG_A_OFF__MASK				0x000007fc
-#define A5XX_VPC_SO_PROG_A_OFF__SHIFT				2
-static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK;
-}
-#define A5XX_VPC_SO_PROG_A_EN					0x00000800
-#define A5XX_VPC_SO_PROG_B_BUF__MASK				0x00003000
-#define A5XX_VPC_SO_PROG_B_BUF__SHIFT				12
-static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val)
-{
-	return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK;
-}
-#define A5XX_VPC_SO_PROG_B_OFF__MASK				0x007fc000
-#define A5XX_VPC_SO_PROG_B_OFF__SHIFT				14
-static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK;
-}
-#define A5XX_VPC_SO_PROG_B_EN					0x00800000
-
-static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; }
-
-static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; }
-
-#define REG_A5XX_PC_PRIMITIVE_CNTL				0x0000e384
-#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK		0x0000007f
-#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT		0
-static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val)
-{
-	return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK;
-}
-#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART		0x00000100
-#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES			0x00000200
-#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST		0x00000400
-
-#define REG_A5XX_PC_PRIM_VTX_CNTL				0x0000e385
-#define A5XX_PC_PRIM_VTX_CNTL_PSIZE				0x00000800
-
-#define REG_A5XX_PC_RASTER_CNTL					0x0000e388
-#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK		0x00000007
-#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT		0
-static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK;
-}
-#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK		0x00000038
-#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT		3
-static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK;
-}
-#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE			0x00000040
-
-#define REG_A5XX_UNKNOWN_E389					0x0000e389
-
-#define REG_A5XX_PC_RESTART_INDEX				0x0000e38c
-
-#define REG_A5XX_PC_GS_LAYERED					0x0000e38d
-
-#define REG_A5XX_PC_GS_PARAM					0x0000e38e
-#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK			0x000003ff
-#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT			0
-static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val)
-{
-	return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK;
-}
-#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK			0x0000f800
-#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT			11
-static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val)
-{
-	return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK;
-}
-#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK				0x01800000
-#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT			23
-static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
-{
-	return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK;
-}
-
-#define REG_A5XX_PC_HS_PARAM					0x0000e38f
-#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK			0x0000003f
-#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT			0
-static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val)
-{
-	return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK;
-}
-#define A5XX_PC_HS_PARAM_SPACING__MASK				0x00600000
-#define A5XX_PC_HS_PARAM_SPACING__SHIFT				21
-static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val)
-{
-	return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK;
-}
-#define A5XX_PC_HS_PARAM_CW					0x00800000
-#define A5XX_PC_HS_PARAM_CONNECTED				0x01000000
-
-#define REG_A5XX_PC_POWER_CNTL					0x0000e3b0
-
-#define REG_A5XX_VFD_CONTROL_0					0x0000e400
-#define A5XX_VFD_CONTROL_0_VTXCNT__MASK				0x0000003f
-#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT			0
-static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK;
-}
-
-#define REG_A5XX_VFD_CONTROL_1					0x0000e401
-#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK			0x000000ff
-#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT			0
-static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK;
-}
-#define A5XX_VFD_CONTROL_1_REGID4INST__MASK			0x0000ff00
-#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT			8
-static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK;
-}
-#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK			0x00ff0000
-#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT			16
-static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK;
-}
-
-#define REG_A5XX_VFD_CONTROL_2					0x0000e402
-#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK			0x000000ff
-#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT			0
-static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK;
-}
-
-#define REG_A5XX_VFD_CONTROL_3					0x0000e403
-#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK			0x0000ff00
-#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT			8
-static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK;
-}
-#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
-#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
-static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK;
-}
-#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
-#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
-static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
-{
-	return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK;
-}
-
-#define REG_A5XX_VFD_CONTROL_4					0x0000e404
-
-#define REG_A5XX_VFD_CONTROL_5					0x0000e405
-
-#define REG_A5XX_VFD_INDEX_OFFSET				0x0000e408
-
-#define REG_A5XX_VFD_INSTANCE_START_OFFSET			0x0000e409
-
-static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; }
-
-static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; }
-
-static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; }
-#define A5XX_VFD_DECODE_INSTR_IDX__MASK				0x0000001f
-#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT			0
-static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val)
-{
-	return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK;
-}
-#define A5XX_VFD_DECODE_INSTR_INSTANCED				0x00020000
-#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK			0x0ff00000
-#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT			20
-static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val)
-{
-	return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK;
-}
-#define A5XX_VFD_DECODE_INSTR_SWAP__MASK			0x30000000
-#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT			28
-static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK;
-}
-#define A5XX_VFD_DECODE_INSTR_UNK30				0x40000000
-#define A5XX_VFD_DECODE_INSTR_FLOAT				0x80000000
-
-static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; }
-
-static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; }
-#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK		0x0000000f
-#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT		0
-static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val)
-{
-	return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK;
-}
-#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK			0x00000ff0
-#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT			4
-static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val)
-{
-	return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK;
-}
-
-#define REG_A5XX_VFD_POWER_CNTL					0x0000e4f0
-
-#define REG_A5XX_SP_SP_CNTL					0x0000e580
-
-#define REG_A5XX_SP_VS_CONFIG					0x0000e584
-#define A5XX_SP_VS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_FS_CONFIG					0x0000e585
-#define A5XX_SP_FS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_HS_CONFIG					0x0000e586
-#define A5XX_SP_HS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_DS_CONFIG					0x0000e587
-#define A5XX_SP_DS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_GS_CONFIG					0x0000e588
-#define A5XX_SP_GS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_CS_CONFIG					0x0000e589
-#define A5XX_SP_CS_CONFIG_ENABLED				0x00000001
-#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK			0x00007f00
-#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_SP_VS_CONFIG_MAX_CONST				0x0000e58a
-
-#define REG_A5XX_SP_FS_CONFIG_MAX_CONST				0x0000e58b
-
-#define REG_A5XX_SP_VS_CTRL_REG0				0x0000e590
-#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_VS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_SP_PRIMITIVE_CNTL				0x0000e592
-#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK			0x0000001f
-#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT			0
-static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val)
-{
-	return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK;
-}
-
-static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; }
-#define A5XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
-#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK;
-}
-#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00000f00
-#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			8
-static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A5XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
-#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK;
-}
-#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x0f000000
-#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			24
-static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; }
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
-#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E5AB					0x0000e5ab
-
-#define REG_A5XX_SP_VS_OBJ_START_LO				0x0000e5ac
-
-#define REG_A5XX_SP_VS_OBJ_START_HI				0x0000e5ad
-
-#define REG_A5XX_SP_FS_CTRL_REG0				0x0000e5c0
-#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_FS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E5C2					0x0000e5c2
-
-#define REG_A5XX_SP_FS_OBJ_START_LO				0x0000e5c3
-
-#define REG_A5XX_SP_FS_OBJ_START_HI				0x0000e5c4
-
-#define REG_A5XX_SP_BLEND_CNTL					0x0000e5c9
-#define A5XX_SP_BLEND_CNTL_ENABLED				0x00000001
-#define A5XX_SP_BLEND_CNTL_UNK8					0x00000100
-#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE			0x00000400
-
-#define REG_A5XX_SP_FS_OUTPUT_CNTL				0x0000e5ca
-#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK			0x0000000f
-#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT			0
-static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK;
-}
-#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK		0x00001fe0
-#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT		5
-static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK;
-}
-#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK		0x001fe000
-#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT		13
-static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK;
-}
-
-static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; }
-#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK			0x000000ff
-#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT			0
-static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val)
-{
-	return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK;
-}
-#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION			0x00000100
-
-static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; }
-
-static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; }
-#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK			0x000000ff
-#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT			0
-static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
-}
-#define A5XX_SP_FS_MRT_REG_COLOR_SINT				0x00000100
-#define A5XX_SP_FS_MRT_REG_COLOR_UINT				0x00000200
-#define A5XX_SP_FS_MRT_REG_COLOR_SRGB				0x00000400
-
-#define REG_A5XX_UNKNOWN_E5DB					0x0000e5db
-
-#define REG_A5XX_SP_CS_CTRL_REG0				0x0000e5f0
-#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_CS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E5F2					0x0000e5f2
-
-#define REG_A5XX_SP_CS_OBJ_START_LO				0x0000e5f3
-
-#define REG_A5XX_SP_CS_OBJ_START_HI				0x0000e5f4
-
-#define REG_A5XX_SP_HS_CTRL_REG0				0x0000e600
-#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_HS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E602					0x0000e602
-
-#define REG_A5XX_SP_HS_OBJ_START_LO				0x0000e603
-
-#define REG_A5XX_SP_HS_OBJ_START_HI				0x0000e604
-
-#define REG_A5XX_SP_DS_CTRL_REG0				0x0000e610
-#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_DS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E62B					0x0000e62b
-
-#define REG_A5XX_SP_DS_OBJ_START_LO				0x0000e62c
-
-#define REG_A5XX_SP_DS_OBJ_START_HI				0x0000e62d
-
-#define REG_A5XX_SP_GS_CTRL_REG0				0x0000e640
-#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK			0x00000008
-#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT			3
-static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
-#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
-static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
-#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
-static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A5XX_SP_GS_CTRL_REG0_VARYING				0x00010000
-#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE			0x00100000
-#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK			0xfe000000
-#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT			25
-static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-
-#define REG_A5XX_UNKNOWN_E65B					0x0000e65b
-
-#define REG_A5XX_SP_GS_OBJ_START_LO				0x0000e65c
-
-#define REG_A5XX_SP_GS_OBJ_START_HI				0x0000e65d
-
-#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL				0x0000e704
-#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK		0x00000003
-#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT		0
-static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL				0x0000e705
-#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK		0x00000003
-#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
-static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE		0x00000004
-
-#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO		0x0000e706
-
-#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI		0x0000e707
-
-#define REG_A5XX_TPL1_VS_TEX_COUNT				0x0000e700
-
-#define REG_A5XX_TPL1_HS_TEX_COUNT				0x0000e701
-
-#define REG_A5XX_TPL1_DS_TEX_COUNT				0x0000e702
-
-#define REG_A5XX_TPL1_GS_TEX_COUNT				0x0000e703
-
-#define REG_A5XX_TPL1_VS_TEX_SAMP_LO				0x0000e722
-
-#define REG_A5XX_TPL1_VS_TEX_SAMP_HI				0x0000e723
-
-#define REG_A5XX_TPL1_HS_TEX_SAMP_LO				0x0000e724
-
-#define REG_A5XX_TPL1_HS_TEX_SAMP_HI				0x0000e725
-
-#define REG_A5XX_TPL1_DS_TEX_SAMP_LO				0x0000e726
-
-#define REG_A5XX_TPL1_DS_TEX_SAMP_HI				0x0000e727
-
-#define REG_A5XX_TPL1_GS_TEX_SAMP_LO				0x0000e728
-
-#define REG_A5XX_TPL1_GS_TEX_SAMP_HI				0x0000e729
-
-#define REG_A5XX_TPL1_VS_TEX_CONST_LO				0x0000e72a
-
-#define REG_A5XX_TPL1_VS_TEX_CONST_HI				0x0000e72b
-
-#define REG_A5XX_TPL1_HS_TEX_CONST_LO				0x0000e72c
-
-#define REG_A5XX_TPL1_HS_TEX_CONST_HI				0x0000e72d
-
-#define REG_A5XX_TPL1_DS_TEX_CONST_LO				0x0000e72e
-
-#define REG_A5XX_TPL1_DS_TEX_CONST_HI				0x0000e72f
-
-#define REG_A5XX_TPL1_GS_TEX_CONST_LO				0x0000e730
-
-#define REG_A5XX_TPL1_GS_TEX_CONST_HI				0x0000e731
-
-#define REG_A5XX_TPL1_FS_TEX_COUNT				0x0000e750
-
-#define REG_A5XX_TPL1_CS_TEX_COUNT				0x0000e751
-
-#define REG_A5XX_TPL1_FS_TEX_SAMP_LO				0x0000e75a
-
-#define REG_A5XX_TPL1_FS_TEX_SAMP_HI				0x0000e75b
-
-#define REG_A5XX_TPL1_CS_TEX_SAMP_LO				0x0000e75c
-
-#define REG_A5XX_TPL1_CS_TEX_SAMP_HI				0x0000e75d
-
-#define REG_A5XX_TPL1_FS_TEX_CONST_LO				0x0000e75e
-
-#define REG_A5XX_TPL1_FS_TEX_CONST_HI				0x0000e75f
-
-#define REG_A5XX_TPL1_CS_TEX_CONST_LO				0x0000e760
-
-#define REG_A5XX_TPL1_CS_TEX_CONST_HI				0x0000e761
-
-#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL			0x0000e764
-
-#define REG_A5XX_HLSQ_CONTROL_0_REG				0x0000e784
-#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000001
-#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
-}
-#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK		0x00000004
-#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT		2
-static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK;
-}
-
-#define REG_A5XX_HLSQ_CONTROL_1_REG				0x0000e785
-#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK	0x0000003f
-#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT	0
-static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK;
-}
-
-#define REG_A5XX_HLSQ_CONTROL_2_REG				0x0000e786
-#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000000ff
-#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
-}
-#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK			0x0000ff00
-#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT			8
-static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK;
-}
-#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK		0x00ff0000
-#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT		16
-static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK;
-}
-
-#define REG_A5XX_HLSQ_CONTROL_3_REG				0x0000e787
-#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK		0x000000ff
-#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK;
-}
-
-#define REG_A5XX_HLSQ_CONTROL_4_REG				0x0000e788
-#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK		0x00ff0000
-#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT		16
-static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK;
-}
-#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK		0xff000000
-#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT		24
-static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK;
-}
-
-#define REG_A5XX_HLSQ_UPDATE_CNTL				0x0000e78a
-
-#define REG_A5XX_HLSQ_VS_CONFIG					0x0000e78b
-#define A5XX_HLSQ_VS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_FS_CONFIG					0x0000e78c
-#define A5XX_HLSQ_FS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_HS_CONFIG					0x0000e78d
-#define A5XX_HLSQ_HS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_DS_CONFIG					0x0000e78e
-#define A5XX_HLSQ_DS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_GS_CONFIG					0x0000e78f
-#define A5XX_HLSQ_GS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_CONFIG					0x0000e790
-#define A5XX_HLSQ_CS_CONFIG_ENABLED				0x00000001
-#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK		0x000000fe
-#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT		1
-static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK;
-}
-#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK		0x00007f00
-#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT		8
-static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK;
-}
-
-#define REG_A5XX_HLSQ_VS_CNTL					0x0000e791
-#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_FS_CNTL					0x0000e792
-#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_HS_CNTL					0x0000e793
-#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_DS_CNTL					0x0000e794
-#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_GS_CNTL					0x0000e795
-#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_CNTL					0x0000e796
-#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE				0x00000001
-#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK			0xfffffffe
-#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT			1
-static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X				0x0000e7b9
-
-#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y				0x0000e7ba
-
-#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z				0x0000e7bb
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_0				0x0000e7b0
-#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK			0x00000003
-#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT			0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK;
-}
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT		2
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK;
-}
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT		12
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK;
-}
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
-#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT		22
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_1				0x0000e7b1
-#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_2				0x0000e7b2
-#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_3				0x0000e7b3
-#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_4				0x0000e7b4
-#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_5				0x0000e7b5
-#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_NDRANGE_6				0x0000e7b6
-#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK		0xffffffff
-#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT		0
-static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_CNTL_0					0x0000e7b7
-#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK			0x000000ff
-#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT			0
-static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK;
-}
-#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK				0x0000ff00
-#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT				8
-static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK;
-}
-#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK				0x00ff0000
-#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT				16
-static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK;
-}
-#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK			0xff000000
-#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT			24
-static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val)
-{
-	return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK;
-}
-
-#define REG_A5XX_HLSQ_CS_CNTL_1					0x0000e7b8
-
-#define REG_A5XX_UNKNOWN_E7C0					0x0000e7c0
-
-#define REG_A5XX_HLSQ_VS_CONSTLEN				0x0000e7c3
-
-#define REG_A5XX_HLSQ_VS_INSTRLEN				0x0000e7c4
-
-#define REG_A5XX_UNKNOWN_E7C5					0x0000e7c5
-
-#define REG_A5XX_HLSQ_HS_CONSTLEN				0x0000e7c8
-
-#define REG_A5XX_HLSQ_HS_INSTRLEN				0x0000e7c9
-
-#define REG_A5XX_UNKNOWN_E7CA					0x0000e7ca
-
-#define REG_A5XX_HLSQ_DS_CONSTLEN				0x0000e7cd
-
-#define REG_A5XX_HLSQ_DS_INSTRLEN				0x0000e7ce
-
-#define REG_A5XX_UNKNOWN_E7CF					0x0000e7cf
-
-#define REG_A5XX_HLSQ_GS_CONSTLEN				0x0000e7d2
-
-#define REG_A5XX_HLSQ_GS_INSTRLEN				0x0000e7d3
-
-#define REG_A5XX_UNKNOWN_E7D4					0x0000e7d4
-
-#define REG_A5XX_HLSQ_FS_CONSTLEN				0x0000e7d7
-
-#define REG_A5XX_HLSQ_FS_INSTRLEN				0x0000e7d8
-
-#define REG_A5XX_UNKNOWN_E7D9					0x0000e7d9
-
-#define REG_A5XX_HLSQ_CS_CONSTLEN				0x0000e7dc
-
-#define REG_A5XX_HLSQ_CS_INSTRLEN				0x0000e7dd
-
-#define REG_A5XX_RB_2D_BLIT_CNTL				0x00002100
-
-#define REG_A5XX_RB_2D_SRC_SOLID_DW0				0x00002101
-
-#define REG_A5XX_RB_2D_SRC_SOLID_DW1				0x00002102
-
-#define REG_A5XX_RB_2D_SRC_SOLID_DW2				0x00002103
-
-#define REG_A5XX_RB_2D_SRC_SOLID_DW3				0x00002104
-
-#define REG_A5XX_RB_2D_SRC_INFO					0x00002107
-#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK			0x000000ff
-#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT			0
-static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK;
-}
-#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
-#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK;
-}
-#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK;
-}
-#define A5XX_RB_2D_SRC_INFO_FLAGS				0x00001000
-
-#define REG_A5XX_RB_2D_SRC_LO					0x00002108
-
-#define REG_A5XX_RB_2D_SRC_HI					0x00002109
-
-#define REG_A5XX_RB_2D_SRC_SIZE					0x0000210a
-#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK				0x0000ffff
-#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK;
-}
-#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK			0xffff0000
-#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT			16
-static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_2D_DST_INFO					0x00002110
-#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK			0x000000ff
-#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT			0
-static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK;
-}
-#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK			0x00000300
-#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK;
-}
-#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK;
-}
-#define A5XX_RB_2D_DST_INFO_FLAGS				0x00001000
-
-#define REG_A5XX_RB_2D_DST_LO					0x00002111
-
-#define REG_A5XX_RB_2D_DST_HI					0x00002112
-
-#define REG_A5XX_RB_2D_DST_SIZE					0x00002113
-#define A5XX_RB_2D_DST_SIZE_PITCH__MASK				0x0000ffff
-#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT			0
-static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK;
-}
-#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK			0xffff0000
-#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT			16
-static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_RB_2D_SRC_FLAGS_LO				0x00002140
-
-#define REG_A5XX_RB_2D_SRC_FLAGS_HI				0x00002141
-
-#define REG_A5XX_RB_2D_DST_FLAGS_LO				0x00002143
-
-#define REG_A5XX_RB_2D_DST_FLAGS_HI				0x00002144
-
-#define REG_A5XX_GRAS_2D_BLIT_CNTL				0x00002180
-
-#define REG_A5XX_GRAS_2D_SRC_INFO				0x00002181
-#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK		0x000000ff
-#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK;
-}
-#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
-#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK;
-}
-#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK;
-}
-#define A5XX_GRAS_2D_SRC_INFO_FLAGS				0x00001000
-
-#define REG_A5XX_GRAS_2D_DST_INFO				0x00002182
-#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK		0x000000ff
-#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val)
-{
-	return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK;
-}
-#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK			0x00000300
-#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK;
-}
-#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK;
-}
-#define A5XX_GRAS_2D_DST_INFO_FLAGS				0x00001000
-
-#define REG_A5XX_UNKNOWN_2100					0x00002100
-
-#define REG_A5XX_UNKNOWN_2180					0x00002180
-
-#define REG_A5XX_UNKNOWN_2184					0x00002184
-
-#define REG_A5XX_TEX_SAMP_0					0x00000000
-#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
-#define A5XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
-#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT				1
-static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK;
-}
-#define A5XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
-#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT				3
-static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK;
-}
-#define A5XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
-#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT				5
-static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK;
-}
-#define A5XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
-#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT				8
-static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK;
-}
-#define A5XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
-#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT				11
-static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK;
-}
-#define A5XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
-#define A5XX_TEX_SAMP_0_ANISO__SHIFT				14
-static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val)
-{
-	return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK;
-}
-#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
-#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
-static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val)
-{
-	return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK;
-}
-
-#define REG_A5XX_TEX_SAMP_1					0x00000001
-#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
-#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
-static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
-}
-#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
-#define A5XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
-#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
-#define A5XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
-#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
-static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK;
-}
-#define A5XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
-#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
-static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK;
-}
-
-#define REG_A5XX_TEX_SAMP_2					0x00000002
-#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK			0xfffffff0
-#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT			4
-static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val)
-{
-	return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK;
-}
-
-#define REG_A5XX_TEX_SAMP_3					0x00000003
-
-#define REG_A5XX_TEX_CONST_0					0x00000000
-#define A5XX_TEX_CONST_0_TILE_MODE__MASK			0x00000003
-#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT			0
-static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val)
-{
-	return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK;
-}
-#define A5XX_TEX_CONST_0_SRGB					0x00000004
-#define A5XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
-#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT				4
-static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK;
-}
-#define A5XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
-#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
-static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK;
-}
-#define A5XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
-#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
-static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK;
-}
-#define A5XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
-#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT				13
-static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK;
-}
-#define A5XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
-#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT				16
-static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK;
-}
-#define A5XX_TEX_CONST_0_SAMPLES__MASK				0x00300000
-#define A5XX_TEX_CONST_0_SAMPLES__SHIFT				20
-static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK;
-}
-#define A5XX_TEX_CONST_0_FMT__MASK				0x3fc00000
-#define A5XX_TEX_CONST_0_FMT__SHIFT				22
-static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val)
-{
-	return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK;
-}
-#define A5XX_TEX_CONST_0_SWAP__MASK				0xc0000000
-#define A5XX_TEX_CONST_0_SWAP__SHIFT				30
-static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK;
-}
-
-#define REG_A5XX_TEX_CONST_1					0x00000001
-#define A5XX_TEX_CONST_1_WIDTH__MASK				0x00007fff
-#define A5XX_TEX_CONST_1_WIDTH__SHIFT				0
-static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK;
-}
-#define A5XX_TEX_CONST_1_HEIGHT__MASK				0x3fff8000
-#define A5XX_TEX_CONST_1_HEIGHT__SHIFT				15
-static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK;
-}
-
-#define REG_A5XX_TEX_CONST_2					0x00000002
-#define A5XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
-#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
-static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val)
-{
-	return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK;
-}
-#define A5XX_TEX_CONST_2_PITCH__MASK				0x1fffff80
-#define A5XX_TEX_CONST_2_PITCH__SHIFT				7
-static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK;
-}
-#define A5XX_TEX_CONST_2_TYPE__MASK				0x60000000
-#define A5XX_TEX_CONST_2_TYPE__SHIFT				29
-static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val)
-{
-	return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK;
-}
-
-#define REG_A5XX_TEX_CONST_3					0x00000003
-#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK			0x00003fff
-#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK;
-}
-#define A5XX_TEX_CONST_3_FLAG					0x10000000
-
-#define REG_A5XX_TEX_CONST_4					0x00000004
-#define A5XX_TEX_CONST_4_BASE_LO__MASK				0xffffffe0
-#define A5XX_TEX_CONST_4_BASE_LO__SHIFT				5
-static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK;
-}
-
-#define REG_A5XX_TEX_CONST_5					0x00000005
-#define A5XX_TEX_CONST_5_BASE_HI__MASK				0x0001ffff
-#define A5XX_TEX_CONST_5_BASE_HI__SHIFT				0
-static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK;
-}
-#define A5XX_TEX_CONST_5_DEPTH__MASK				0x3ffe0000
-#define A5XX_TEX_CONST_5_DEPTH__SHIFT				17
-static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val)
-{
-	return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK;
-}
-
-#define REG_A5XX_TEX_CONST_6					0x00000006
-
-#define REG_A5XX_TEX_CONST_7					0x00000007
-
-#define REG_A5XX_TEX_CONST_8					0x00000008
-
-#define REG_A5XX_TEX_CONST_9					0x00000009
-
-#define REG_A5XX_TEX_CONST_10					0x0000000a
-
-#define REG_A5XX_TEX_CONST_11					0x0000000b
-
-#define REG_A5XX_SSBO_0_0					0x00000000
-#define A5XX_SSBO_0_0_BASE_LO__MASK				0xffffffe0
-#define A5XX_SSBO_0_0_BASE_LO__SHIFT				5
-static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK;
-}
-
-#define REG_A5XX_SSBO_0_1					0x00000001
-#define A5XX_SSBO_0_1_PITCH__MASK				0x003fffff
-#define A5XX_SSBO_0_1_PITCH__SHIFT				0
-static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK;
-}
-
-#define REG_A5XX_SSBO_0_2					0x00000002
-#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK				0x03fff000
-#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT			12
-static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK;
-}
-
-#define REG_A5XX_SSBO_0_3					0x00000003
-#define A5XX_SSBO_0_3_CPP__MASK					0x0000003f
-#define A5XX_SSBO_0_3_CPP__SHIFT				0
-static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK;
-}
-
-#define REG_A5XX_SSBO_1_0					0x00000000
-#define A5XX_SSBO_1_0_FMT__MASK					0x0000ff00
-#define A5XX_SSBO_1_0_FMT__SHIFT				8
-static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val)
-{
-	return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK;
-}
-#define A5XX_SSBO_1_0_WIDTH__MASK				0xffff0000
-#define A5XX_SSBO_1_0_WIDTH__SHIFT				16
-static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK;
-}
-
-#define REG_A5XX_SSBO_1_1					0x00000001
-#define A5XX_SSBO_1_1_HEIGHT__MASK				0x0000ffff
-#define A5XX_SSBO_1_1_HEIGHT__SHIFT				0
-static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK;
-}
-#define A5XX_SSBO_1_1_DEPTH__MASK				0xffff0000
-#define A5XX_SSBO_1_1_DEPTH__SHIFT				16
-static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK;
-}
-
-#define REG_A5XX_SSBO_2_0					0x00000000
-#define A5XX_SSBO_2_0_BASE_LO__MASK				0xffffffff
-#define A5XX_SSBO_2_0_BASE_LO__SHIFT				0
-static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK;
-}
-
-#define REG_A5XX_SSBO_2_1					0x00000001
-#define A5XX_SSBO_2_1_BASE_HI__MASK				0xffffffff
-#define A5XX_SSBO_2_1_BASE_HI__SHIFT				0
-static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val)
-{
-	return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK;
-}
-
-
-#endif /* A5XX_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c	2019-03-31 23:16:37.000000000 +0000
@@ -122,7 +122,8 @@
 	debug_assert(info->dst.box.height >= 0);
 	debug_assert(info->dst.box.depth >= 0);
 
-	if (info->dst.resource->nr_samples + info->src.resource->nr_samples)
+	if ((info->dst.resource->nr_samples > 1) ||
+			(info->src.resource->nr_samples > 1))
 		return false;
 
 	if (info->scissor_enable)
@@ -449,14 +450,13 @@
 	}
 }
 
-void
+bool
 fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
 	struct fd_batch *batch;
 
 	if (!can_do_blit(info)) {
-		fd_blitter_blit(ctx, info);
-		return;
+		return false;
 	}
 
 	batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
@@ -482,6 +482,8 @@
 	batch->needs_flush = true;
 
 	fd_batch_flush(batch, false, false);
+
+	return true;
 }
 
 unsigned
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h	2018-01-17 14:10:45.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,7 +31,7 @@
 
 #include "freedreno_context.h"
 
-void fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
 unsigned fd5_tile_mode(const struct pipe_resource *tmpl);
 
 #endif /* FD5_BLIT_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -48,8 +48,6 @@
 
 	fd_context_destroy(pctx);
 
-	fd_bo_del(fd5_ctx->vs_pvt_mem);
-	fd_bo_del(fd5_ctx->fs_pvt_mem);
 	fd_bo_del(fd5_ctx->vsc_size_mem);
 	fd_bo_del(fd5_ctx->blit_mem);
 
@@ -105,17 +103,11 @@
 
 	util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true);
 
-	fd5_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
-
-	fd5_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
-
 	fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size");
 
 	fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
 
 	fd_context_setup_common_vbos(&fd5_ctx->base);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,13 +31,11 @@
 
 #include "freedreno_context.h"
 
-#include "ir3_shader.h"
+#include "ir3/ir3_shader.h"
 
 struct fd5_context {
 	struct fd_context base;
 
-	struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
-
 	/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
 	 * could combine it with another allocation.
 	 */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -51,7 +51,7 @@
  * sizedwords:     size of const value buffer
  */
 static void
-fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+fd5_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
@@ -90,7 +90,7 @@
 }
 
 static void
-fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+fd5_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
 {
 	uint32_t anum = align(num, 2);
@@ -704,7 +704,7 @@
 	if (!emit->binning_pass)
 		ir3_emit_fs_consts(fp, ring, ctx);
 
-	struct pipe_stream_output_info *info = &vp->shader->stream_output;
+	struct ir3_stream_output_info *info = &vp->shader->stream_output;
 	if (info->num_outputs) {
 		struct fd_streamout_stateobj *so = &ctx->streamout;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,7 +34,7 @@
 #include "fd5_format.h"
 #include "fd5_program.h"
 #include "fd5_screen.h"
-#include "ir3_shader.h"
+#include "ir3_gallium.h"
 
 struct fd_ringbuffer;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -290,7 +290,7 @@
 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
 		if (!pipe->bo) {
 			pipe->bo = fd_bo_new(ctx->dev, 0x20000,
-					DRM_FREEDRENO_GEM_TYPE_KMEM);
+					DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i);
 		}
 		OUT_RELOCW(ring, pipe->bo, 0, 0, 0);     /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
 	}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -42,7 +42,7 @@
 
 static struct ir3_shader *
 create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
-		enum shader_t type)
+		gl_shader_stage type)
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct ir3_compiler *compiler = ctx->screen->compiler;
@@ -53,7 +53,7 @@
 fd5_fp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT);
 }
 
 static void
@@ -67,7 +67,7 @@
 fd5_vp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX);
 }
 
 static void
@@ -105,7 +105,7 @@
 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER));
 		OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0));
 	} else {
-		OUT_RELOC(ring, so->bo, 0,
+		OUT_RELOCD(ring, so->bo, 0,
 				CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0);
 	}
 
@@ -125,14 +125,14 @@
 static void
 link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v)
 {
-	const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
+	const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
 
 	/*
 	 * First, any stream-out varyings not already in linkage map (ie. also
 	 * consumed by frag shader) need to be added:
 	 */
 	for (unsigned i = 0; i < strmout->num_outputs; i++) {
-		const struct pipe_stream_output *out = &strmout->output[i];
+		const struct ir3_stream_output *out = &strmout->output[i];
 		unsigned k = out->register_index;
 		unsigned compmask =
 			(1 << (out->num_components + out->start_component)) - 1;
@@ -173,14 +173,14 @@
 emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
 		struct ir3_shader_linkage *l)
 {
-	const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
+	const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
 	unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0};
 	unsigned prog[align(l->max_loc, 2) / 2];
 
 	memset(prog, 0, sizeof(prog));
 
 	for (unsigned i = 0; i < strmout->num_outputs; i++) {
-		const struct pipe_stream_output *out = &strmout->output[i];
+		const struct ir3_stream_output *out = &strmout->output[i];
 		unsigned k = out->register_index;
 		unsigned idx;
 
@@ -443,7 +443,7 @@
 	OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
 			A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
 			0x6 | /* XXX seems to be always set? */
-			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
 			COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
 	struct ir3_shader_linkage l = {0};
@@ -567,7 +567,7 @@
 			A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
 			A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
 			A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
-			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
 			COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
 	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.h	2019-03-31 23:16:37.000000000 +0000
@@ -29,7 +29,8 @@
 
 #include "pipe/p_context.h"
 #include "freedreno_context.h"
-#include "ir3_shader.h"
+
+#include "ir3/ir3_shader.h"
 
 struct fd5_emit;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_screen.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 #include "fd5_format.h"
 #include "fd5_resource.h"
 
-#include "ir3_compiler.h"
+#include "ir3/ir3_compiler.h"
 
 static bool
 valid_sample_count(unsigned sample_count)
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,5447 +0,0 @@
-#ifndef A6XX_XML
-#define A6XX_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /work/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-10 14:59:32)
-- /work/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-10 14:59:32)
-- /work/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 20:10:47)
-- /work/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 20:10:47)
-- /work/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 20:14:26)
-- /work/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-10 14:59:32)
-- /work/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-10 14:59:32)
-- /work/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-09-28 22:41:49)
-- /work/envytools/rnndb/adreno/a6xx.xml          ( 140642 bytes, from 2018-10-12 21:46:25)
-- /work/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-28 22:41:49)
-- /work/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-10 14:59:32)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum a6xx_color_fmt {
-	RB6_A8_UNORM = 2,
-	RB6_R8_UNORM = 3,
-	RB6_R8_SNORM = 4,
-	RB6_R8_UINT = 5,
-	RB6_R8_SINT = 6,
-	RB6_R4G4B4A4_UNORM = 8,
-	RB6_R5G5B5A1_UNORM = 10,
-	RB6_R5G6B5_UNORM = 14,
-	RB6_R8G8_UNORM = 15,
-	RB6_R8G8_SNORM = 16,
-	RB6_R8G8_UINT = 17,
-	RB6_R8G8_SINT = 18,
-	RB6_R16_UNORM = 21,
-	RB6_R16_SNORM = 22,
-	RB6_R16_FLOAT = 23,
-	RB6_R16_UINT = 24,
-	RB6_R16_SINT = 25,
-	RB6_R8G8B8A8_UNORM = 48,
-	RB6_R8G8B8_UNORM = 49,
-	RB6_R8G8B8A8_SNORM = 50,
-	RB6_R8G8B8A8_UINT = 51,
-	RB6_R8G8B8A8_SINT = 52,
-	RB6_R10G10B10A2_UNORM = 55,
-	RB6_R10G10B10A2_UINT = 58,
-	RB6_R11G11B10_FLOAT = 66,
-	RB6_R16G16_UNORM = 67,
-	RB6_R16G16_SNORM = 68,
-	RB6_R16G16_FLOAT = 69,
-	RB6_R16G16_UINT = 70,
-	RB6_R16G16_SINT = 71,
-	RB6_R32_FLOAT = 74,
-	RB6_R32_UINT = 75,
-	RB6_R32_SINT = 76,
-	RB6_R16G16B16A16_UNORM = 96,
-	RB6_R16G16B16A16_SNORM = 97,
-	RB6_R16G16B16A16_FLOAT = 98,
-	RB6_R16G16B16A16_UINT = 99,
-	RB6_R16G16B16A16_SINT = 100,
-	RB6_R32G32_FLOAT = 103,
-	RB6_R32G32_UINT = 104,
-	RB6_R32G32_SINT = 105,
-	RB6_R32G32B32A32_FLOAT = 130,
-	RB6_R32G32B32A32_UINT = 131,
-	RB6_R32G32B32A32_SINT = 132,
-	RB6_X8Z24_UNORM = 160,
-};
-
-enum a6xx_tile_mode {
-	TILE6_LINEAR = 0,
-	TILE6_2 = 2,
-	TILE6_3 = 3,
-};
-
-enum a6xx_vtx_fmt {
-	VFMT6_8_UNORM = 3,
-	VFMT6_8_SNORM = 4,
-	VFMT6_8_UINT = 5,
-	VFMT6_8_SINT = 6,
-	VFMT6_8_8_UNORM = 15,
-	VFMT6_8_8_SNORM = 16,
-	VFMT6_8_8_UINT = 17,
-	VFMT6_8_8_SINT = 18,
-	VFMT6_16_UNORM = 21,
-	VFMT6_16_SNORM = 22,
-	VFMT6_16_FLOAT = 23,
-	VFMT6_16_UINT = 24,
-	VFMT6_16_SINT = 25,
-	VFMT6_8_8_8_UNORM = 33,
-	VFMT6_8_8_8_SNORM = 34,
-	VFMT6_8_8_8_UINT = 35,
-	VFMT6_8_8_8_SINT = 36,
-	VFMT6_8_8_8_8_UNORM = 48,
-	VFMT6_8_8_8_8_SNORM = 50,
-	VFMT6_8_8_8_8_UINT = 51,
-	VFMT6_8_8_8_8_SINT = 52,
-	VFMT6_10_10_10_2_UNORM = 54,
-	VFMT6_10_10_10_2_SNORM = 57,
-	VFMT6_10_10_10_2_UINT = 58,
-	VFMT6_10_10_10_2_SINT = 59,
-	VFMT6_11_11_10_FLOAT = 66,
-	VFMT6_16_16_UNORM = 67,
-	VFMT6_16_16_SNORM = 68,
-	VFMT6_16_16_FLOAT = 69,
-	VFMT6_16_16_UINT = 70,
-	VFMT6_16_16_SINT = 71,
-	VFMT6_32_UNORM = 72,
-	VFMT6_32_SNORM = 73,
-	VFMT6_32_FLOAT = 74,
-	VFMT6_32_UINT = 75,
-	VFMT6_32_SINT = 76,
-	VFMT6_32_FIXED = 77,
-	VFMT6_16_16_16_UNORM = 88,
-	VFMT6_16_16_16_SNORM = 89,
-	VFMT6_16_16_16_FLOAT = 90,
-	VFMT6_16_16_16_UINT = 91,
-	VFMT6_16_16_16_SINT = 92,
-	VFMT6_16_16_16_16_UNORM = 96,
-	VFMT6_16_16_16_16_SNORM = 97,
-	VFMT6_16_16_16_16_FLOAT = 98,
-	VFMT6_16_16_16_16_UINT = 99,
-	VFMT6_16_16_16_16_SINT = 100,
-	VFMT6_32_32_UNORM = 101,
-	VFMT6_32_32_SNORM = 102,
-	VFMT6_32_32_FLOAT = 103,
-	VFMT6_32_32_UINT = 104,
-	VFMT6_32_32_SINT = 105,
-	VFMT6_32_32_FIXED = 106,
-	VFMT6_32_32_32_UNORM = 112,
-	VFMT6_32_32_32_SNORM = 113,
-	VFMT6_32_32_32_UINT = 114,
-	VFMT6_32_32_32_SINT = 115,
-	VFMT6_32_32_32_FLOAT = 116,
-	VFMT6_32_32_32_FIXED = 117,
-	VFMT6_32_32_32_32_UNORM = 128,
-	VFMT6_32_32_32_32_SNORM = 129,
-	VFMT6_32_32_32_32_FLOAT = 130,
-	VFMT6_32_32_32_32_UINT = 131,
-	VFMT6_32_32_32_32_SINT = 132,
-	VFMT6_32_32_32_32_FIXED = 133,
-};
-
-enum a6xx_tex_fmt {
-	TFMT6_A8_UNORM = 2,
-	TFMT6_8_UNORM = 3,
-	TFMT6_8_SNORM = 4,
-	TFMT6_8_UINT = 5,
-	TFMT6_8_SINT = 6,
-	TFMT6_4_4_4_4_UNORM = 8,
-	TFMT6_5_5_5_1_UNORM = 10,
-	TFMT6_5_6_5_UNORM = 14,
-	TFMT6_8_8_UNORM = 15,
-	TFMT6_8_8_SNORM = 16,
-	TFMT6_8_8_UINT = 17,
-	TFMT6_8_8_SINT = 18,
-	TFMT6_L8_A8_UNORM = 19,
-	TFMT6_16_UNORM = 21,
-	TFMT6_16_SNORM = 22,
-	TFMT6_16_FLOAT = 23,
-	TFMT6_16_UINT = 24,
-	TFMT6_16_SINT = 25,
-	TFMT6_8_8_8_8_UNORM = 48,
-	TFMT6_8_8_8_UNORM = 49,
-	TFMT6_8_8_8_8_SNORM = 50,
-	TFMT6_8_8_8_8_UINT = 51,
-	TFMT6_8_8_8_8_SINT = 52,
-	TFMT6_9_9_9_E5_FLOAT = 53,
-	TFMT6_10_10_10_2_UNORM = 54,
-	TFMT6_10_10_10_2_UINT = 58,
-	TFMT6_11_11_10_FLOAT = 66,
-	TFMT6_16_16_UNORM = 67,
-	TFMT6_16_16_SNORM = 68,
-	TFMT6_16_16_FLOAT = 69,
-	TFMT6_16_16_UINT = 70,
-	TFMT6_16_16_SINT = 71,
-	TFMT6_32_FLOAT = 74,
-	TFMT6_32_UINT = 75,
-	TFMT6_32_SINT = 76,
-	TFMT6_16_16_16_16_UNORM = 96,
-	TFMT6_16_16_16_16_SNORM = 97,
-	TFMT6_16_16_16_16_FLOAT = 98,
-	TFMT6_16_16_16_16_UINT = 99,
-	TFMT6_16_16_16_16_SINT = 100,
-	TFMT6_32_32_FLOAT = 103,
-	TFMT6_32_32_UINT = 104,
-	TFMT6_32_32_SINT = 105,
-	TFMT6_32_32_32_UINT = 114,
-	TFMT6_32_32_32_SINT = 115,
-	TFMT6_32_32_32_FLOAT = 116,
-	TFMT6_32_32_32_32_FLOAT = 130,
-	TFMT6_32_32_32_32_UINT = 131,
-	TFMT6_32_32_32_32_SINT = 132,
-	TFMT6_X8Z24_UNORM = 160,
-	TFMT6_ETC2_RG11_UNORM = 171,
-	TFMT6_ETC2_RG11_SNORM = 172,
-	TFMT6_ETC2_R11_UNORM = 173,
-	TFMT6_ETC2_R11_SNORM = 174,
-	TFMT6_ETC1 = 175,
-	TFMT6_ETC2_RGB8 = 176,
-	TFMT6_ETC2_RGBA8 = 177,
-	TFMT6_ETC2_RGB8A1 = 178,
-	TFMT6_DXT1 = 179,
-	TFMT6_DXT3 = 180,
-	TFMT6_DXT5 = 181,
-	TFMT6_RGTC1_UNORM = 183,
-	TFMT6_RGTC1_SNORM = 184,
-	TFMT6_RGTC2_UNORM = 187,
-	TFMT6_RGTC2_SNORM = 188,
-	TFMT6_BPTC_UFLOAT = 190,
-	TFMT6_BPTC_FLOAT = 191,
-	TFMT6_BPTC = 192,
-	TFMT6_ASTC_4x4 = 193,
-	TFMT6_ASTC_5x4 = 194,
-	TFMT6_ASTC_5x5 = 195,
-	TFMT6_ASTC_6x5 = 196,
-	TFMT6_ASTC_6x6 = 197,
-	TFMT6_ASTC_8x5 = 198,
-	TFMT6_ASTC_8x6 = 199,
-	TFMT6_ASTC_8x8 = 200,
-	TFMT6_ASTC_10x5 = 201,
-	TFMT6_ASTC_10x6 = 202,
-	TFMT6_ASTC_10x8 = 203,
-	TFMT6_ASTC_10x10 = 204,
-	TFMT6_ASTC_12x10 = 205,
-	TFMT6_ASTC_12x12 = 206,
-};
-
-enum a6xx_tex_fetchsize {
-	TFETCH6_1_BYTE = 0,
-	TFETCH6_2_BYTE = 1,
-	TFETCH6_4_BYTE = 2,
-	TFETCH6_8_BYTE = 3,
-	TFETCH6_16_BYTE = 4,
-};
-
-enum a6xx_depth_format {
-	DEPTH6_NONE = 0,
-	DEPTH6_16 = 1,
-	DEPTH6_24_8 = 2,
-	DEPTH6_32 = 4,
-};
-
-enum a6xx_shader_id {
-	A6XX_TP0_TMO_DATA = 9,
-	A6XX_TP0_SMO_DATA = 10,
-	A6XX_TP0_MIPMAP_BASE_DATA = 11,
-	A6XX_TP1_TMO_DATA = 25,
-	A6XX_TP1_SMO_DATA = 26,
-	A6XX_TP1_MIPMAP_BASE_DATA = 27,
-	A6XX_SP_INST_DATA = 41,
-	A6XX_SP_LB_0_DATA = 42,
-	A6XX_SP_LB_1_DATA = 43,
-	A6XX_SP_LB_2_DATA = 44,
-	A6XX_SP_LB_3_DATA = 45,
-	A6XX_SP_LB_4_DATA = 46,
-	A6XX_SP_LB_5_DATA = 47,
-	A6XX_SP_CB_BINDLESS_DATA = 48,
-	A6XX_SP_CB_LEGACY_DATA = 49,
-	A6XX_SP_UAV_DATA = 50,
-	A6XX_SP_INST_TAG = 51,
-	A6XX_SP_CB_BINDLESS_TAG = 52,
-	A6XX_SP_TMO_UMO_TAG = 53,
-	A6XX_SP_SMO_TAG = 54,
-	A6XX_SP_STATE_DATA = 55,
-	A6XX_HLSQ_CHUNK_CVS_RAM = 73,
-	A6XX_HLSQ_CHUNK_CPS_RAM = 74,
-	A6XX_HLSQ_CHUNK_CVS_RAM_TAG = 75,
-	A6XX_HLSQ_CHUNK_CPS_RAM_TAG = 76,
-	A6XX_HLSQ_ICB_CVS_CB_BASE_TAG = 77,
-	A6XX_HLSQ_ICB_CPS_CB_BASE_TAG = 78,
-	A6XX_HLSQ_CVS_MISC_RAM = 80,
-	A6XX_HLSQ_CPS_MISC_RAM = 81,
-	A6XX_HLSQ_INST_RAM = 82,
-	A6XX_HLSQ_GFX_CVS_CONST_RAM = 83,
-	A6XX_HLSQ_GFX_CPS_CONST_RAM = 84,
-	A6XX_HLSQ_CVS_MISC_RAM_TAG = 85,
-	A6XX_HLSQ_CPS_MISC_RAM_TAG = 86,
-	A6XX_HLSQ_INST_RAM_TAG = 87,
-	A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG = 88,
-	A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG = 89,
-	A6XX_HLSQ_PWR_REST_RAM = 90,
-	A6XX_HLSQ_PWR_REST_TAG = 91,
-	A6XX_HLSQ_DATAPATH_META = 96,
-	A6XX_HLSQ_FRONTEND_META = 97,
-	A6XX_HLSQ_INDIRECT_META = 98,
-	A6XX_HLSQ_BACKEND_META = 99,
-};
-
-enum a6xx_debugbus_id {
-	A6XX_DBGBUS_CP = 1,
-	A6XX_DBGBUS_RBBM = 2,
-	A6XX_DBGBUS_VBIF = 3,
-	A6XX_DBGBUS_HLSQ = 4,
-	A6XX_DBGBUS_UCHE = 5,
-	A6XX_DBGBUS_DPM = 6,
-	A6XX_DBGBUS_TESS = 7,
-	A6XX_DBGBUS_PC = 8,
-	A6XX_DBGBUS_VFDP = 9,
-	A6XX_DBGBUS_VPC = 10,
-	A6XX_DBGBUS_TSE = 11,
-	A6XX_DBGBUS_RAS = 12,
-	A6XX_DBGBUS_VSC = 13,
-	A6XX_DBGBUS_COM = 14,
-	A6XX_DBGBUS_LRZ = 16,
-	A6XX_DBGBUS_A2D = 17,
-	A6XX_DBGBUS_CCUFCHE = 18,
-	A6XX_DBGBUS_GMU_CX = 19,
-	A6XX_DBGBUS_RBP = 20,
-	A6XX_DBGBUS_DCS = 21,
-	A6XX_DBGBUS_DBGC = 22,
-	A6XX_DBGBUS_CX = 23,
-	A6XX_DBGBUS_GMU_GX = 24,
-	A6XX_DBGBUS_TPFCHE = 25,
-	A6XX_DBGBUS_GBIF_GX = 26,
-	A6XX_DBGBUS_GPC = 29,
-	A6XX_DBGBUS_LARC = 30,
-	A6XX_DBGBUS_HLSQ_SPTP = 31,
-	A6XX_DBGBUS_RB_0 = 32,
-	A6XX_DBGBUS_RB_1 = 33,
-	A6XX_DBGBUS_UCHE_WRAPPER = 36,
-	A6XX_DBGBUS_CCU_0 = 40,
-	A6XX_DBGBUS_CCU_1 = 41,
-	A6XX_DBGBUS_VFD_0 = 56,
-	A6XX_DBGBUS_VFD_1 = 57,
-	A6XX_DBGBUS_VFD_2 = 58,
-	A6XX_DBGBUS_VFD_3 = 59,
-	A6XX_DBGBUS_SP_0 = 64,
-	A6XX_DBGBUS_SP_1 = 65,
-	A6XX_DBGBUS_TPL1_0 = 72,
-	A6XX_DBGBUS_TPL1_1 = 73,
-	A6XX_DBGBUS_TPL1_2 = 74,
-	A6XX_DBGBUS_TPL1_3 = 75,
-};
-
-enum a6xx_cp_perfcounter_select {
-	PERF_CP_ALWAYS_COUNT = 0,
-	PERF_CP_BUSY_GFX_CORE_IDLE = 1,
-	PERF_CP_BUSY_CYCLES = 2,
-	PERF_CP_NUM_PREEMPTIONS = 3,
-	PERF_CP_PREEMPTION_REACTION_DELAY = 4,
-	PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 5,
-	PERF_CP_PREEMPTION_SWITCH_IN_TIME = 6,
-	PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 7,
-	PERF_CP_PREDICATED_DRAWS_KILLED = 8,
-	PERF_CP_MODE_SWITCH = 9,
-	PERF_CP_ZPASS_DONE = 10,
-	PERF_CP_CONTEXT_DONE = 11,
-	PERF_CP_CACHE_FLUSH = 12,
-	PERF_CP_LONG_PREEMPTIONS = 13,
-	PERF_CP_SQE_I_CACHE_STARVE = 14,
-	PERF_CP_SQE_IDLE = 15,
-	PERF_CP_SQE_PM4_STARVE_RB_IB = 16,
-	PERF_CP_SQE_PM4_STARVE_SDS = 17,
-	PERF_CP_SQE_MRB_STARVE = 18,
-	PERF_CP_SQE_RRB_STARVE = 19,
-	PERF_CP_SQE_VSD_STARVE = 20,
-	PERF_CP_VSD_DECODE_STARVE = 21,
-	PERF_CP_SQE_PIPE_OUT_STALL = 22,
-	PERF_CP_SQE_SYNC_STALL = 23,
-	PERF_CP_SQE_PM4_WFI_STALL = 24,
-	PERF_CP_SQE_SYS_WFI_STALL = 25,
-	PERF_CP_SQE_T4_EXEC = 26,
-	PERF_CP_SQE_LOAD_STATE_EXEC = 27,
-	PERF_CP_SQE_SAVE_SDS_STATE = 28,
-	PERF_CP_SQE_DRAW_EXEC = 29,
-	PERF_CP_SQE_CTXT_REG_BUNCH_EXEC = 30,
-	PERF_CP_SQE_EXEC_PROFILED = 31,
-	PERF_CP_MEMORY_POOL_EMPTY = 32,
-	PERF_CP_MEMORY_POOL_SYNC_STALL = 33,
-	PERF_CP_MEMORY_POOL_ABOVE_THRESH = 34,
-	PERF_CP_AHB_WR_STALL_PRE_DRAWS = 35,
-	PERF_CP_AHB_STALL_SQE_GMU = 36,
-	PERF_CP_AHB_STALL_SQE_WR_OTHER = 37,
-	PERF_CP_AHB_STALL_SQE_RD_OTHER = 38,
-	PERF_CP_CLUSTER0_EMPTY = 39,
-	PERF_CP_CLUSTER1_EMPTY = 40,
-	PERF_CP_CLUSTER2_EMPTY = 41,
-	PERF_CP_CLUSTER3_EMPTY = 42,
-	PERF_CP_CLUSTER4_EMPTY = 43,
-	PERF_CP_CLUSTER5_EMPTY = 44,
-	PERF_CP_PM4_DATA = 45,
-	PERF_CP_PM4_HEADERS = 46,
-	PERF_CP_VBIF_READ_BEATS = 47,
-	PERF_CP_VBIF_WRITE_BEATS = 48,
-	PERF_CP_SQE_INSTR_COUNTER = 49,
-};
-
-enum a6xx_rbbm_perfcounter_select {
-	PERF_RBBM_ALWAYS_COUNT = 0,
-	PERF_RBBM_ALWAYS_ON = 1,
-	PERF_RBBM_TSE_BUSY = 2,
-	PERF_RBBM_RAS_BUSY = 3,
-	PERF_RBBM_PC_DCALL_BUSY = 4,
-	PERF_RBBM_PC_VSD_BUSY = 5,
-	PERF_RBBM_STATUS_MASKED = 6,
-	PERF_RBBM_COM_BUSY = 7,
-	PERF_RBBM_DCOM_BUSY = 8,
-	PERF_RBBM_VBIF_BUSY = 9,
-	PERF_RBBM_VSC_BUSY = 10,
-	PERF_RBBM_TESS_BUSY = 11,
-	PERF_RBBM_UCHE_BUSY = 12,
-	PERF_RBBM_HLSQ_BUSY = 13,
-};
-
-enum a6xx_pc_perfcounter_select {
-	PERF_PC_BUSY_CYCLES = 0,
-	PERF_PC_WORKING_CYCLES = 1,
-	PERF_PC_STALL_CYCLES_VFD = 2,
-	PERF_PC_STALL_CYCLES_TSE = 3,
-	PERF_PC_STALL_CYCLES_VPC = 4,
-	PERF_PC_STALL_CYCLES_UCHE = 5,
-	PERF_PC_STALL_CYCLES_TESS = 6,
-	PERF_PC_STALL_CYCLES_TSE_ONLY = 7,
-	PERF_PC_STALL_CYCLES_VPC_ONLY = 8,
-	PERF_PC_PASS1_TF_STALL_CYCLES = 9,
-	PERF_PC_STARVE_CYCLES_FOR_INDEX = 10,
-	PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11,
-	PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12,
-	PERF_PC_STARVE_CYCLES_FOR_POSITION = 13,
-	PERF_PC_STARVE_CYCLES_DI = 14,
-	PERF_PC_VIS_STREAMS_LOADED = 15,
-	PERF_PC_INSTANCES = 16,
-	PERF_PC_VPC_PRIMITIVES = 17,
-	PERF_PC_DEAD_PRIM = 18,
-	PERF_PC_LIVE_PRIM = 19,
-	PERF_PC_VERTEX_HITS = 20,
-	PERF_PC_IA_VERTICES = 21,
-	PERF_PC_IA_PRIMITIVES = 22,
-	PERF_PC_GS_PRIMITIVES = 23,
-	PERF_PC_HS_INVOCATIONS = 24,
-	PERF_PC_DS_INVOCATIONS = 25,
-	PERF_PC_VS_INVOCATIONS = 26,
-	PERF_PC_GS_INVOCATIONS = 27,
-	PERF_PC_DS_PRIMITIVES = 28,
-	PERF_PC_VPC_POS_DATA_TRANSACTION = 29,
-	PERF_PC_3D_DRAWCALLS = 30,
-	PERF_PC_2D_DRAWCALLS = 31,
-	PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32,
-	PERF_TESS_BUSY_CYCLES = 33,
-	PERF_TESS_WORKING_CYCLES = 34,
-	PERF_TESS_STALL_CYCLES_PC = 35,
-	PERF_TESS_STARVE_CYCLES_PC = 36,
-	PERF_PC_TSE_TRANSACTION = 37,
-	PERF_PC_TSE_VERTEX = 38,
-	PERF_PC_TESS_PC_UV_TRANS = 39,
-	PERF_PC_TESS_PC_UV_PATCHES = 40,
-	PERF_PC_TESS_FACTOR_TRANS = 41,
-};
-
-enum a6xx_vfd_perfcounter_select {
-	PERF_VFD_BUSY_CYCLES = 0,
-	PERF_VFD_STALL_CYCLES_UCHE = 1,
-	PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2,
-	PERF_VFD_STALL_CYCLES_SP_INFO = 3,
-	PERF_VFD_STALL_CYCLES_SP_ATTR = 4,
-	PERF_VFD_STARVE_CYCLES_UCHE = 5,
-	PERF_VFD_RBUFFER_FULL = 6,
-	PERF_VFD_ATTR_INFO_FIFO_FULL = 7,
-	PERF_VFD_DECODED_ATTRIBUTE_BYTES = 8,
-	PERF_VFD_NUM_ATTRIBUTES = 9,
-	PERF_VFD_UPPER_SHADER_FIBERS = 10,
-	PERF_VFD_LOWER_SHADER_FIBERS = 11,
-	PERF_VFD_MODE_0_FIBERS = 12,
-	PERF_VFD_MODE_1_FIBERS = 13,
-	PERF_VFD_MODE_2_FIBERS = 14,
-	PERF_VFD_MODE_3_FIBERS = 15,
-	PERF_VFD_MODE_4_FIBERS = 16,
-	PERF_VFD_TOTAL_VERTICES = 17,
-	PERF_VFDP_STALL_CYCLES_VFD = 18,
-	PERF_VFDP_STALL_CYCLES_VFD_INDEX = 19,
-	PERF_VFDP_STALL_CYCLES_VFD_PROG = 20,
-	PERF_VFDP_STARVE_CYCLES_PC = 21,
-	PERF_VFDP_VS_STAGE_WAVES = 22,
-};
-
-enum a6xx_hlsq_perfcounter_select {
-	PERF_HLSQ_BUSY_CYCLES = 0,
-	PERF_HLSQ_STALL_CYCLES_UCHE = 1,
-	PERF_HLSQ_STALL_CYCLES_SP_STATE = 2,
-	PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3,
-	PERF_HLSQ_UCHE_LATENCY_CYCLES = 4,
-	PERF_HLSQ_UCHE_LATENCY_COUNT = 5,
-	PERF_HLSQ_FS_STAGE_1X_WAVES = 6,
-	PERF_HLSQ_FS_STAGE_2X_WAVES = 7,
-	PERF_HLSQ_QUADS = 8,
-	PERF_HLSQ_CS_INVOCATIONS = 9,
-	PERF_HLSQ_COMPUTE_DRAWCALLS = 10,
-	PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING = 11,
-	PERF_HLSQ_DUAL_FS_PROG_ACTIVE = 12,
-	PERF_HLSQ_DUAL_VS_PROG_ACTIVE = 13,
-	PERF_HLSQ_FS_BATCH_COUNT_ZERO = 14,
-	PERF_HLSQ_VS_BATCH_COUNT_ZERO = 15,
-	PERF_HLSQ_WAVE_PENDING_NO_QUAD = 16,
-	PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE = 17,
-	PERF_HLSQ_STALL_CYCLES_VPC = 18,
-	PERF_HLSQ_PIXELS = 19,
-	PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC = 20,
-};
-
-enum a6xx_vpc_perfcounter_select {
-	PERF_VPC_BUSY_CYCLES = 0,
-	PERF_VPC_WORKING_CYCLES = 1,
-	PERF_VPC_STALL_CYCLES_UCHE = 2,
-	PERF_VPC_STALL_CYCLES_VFD_WACK = 3,
-	PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4,
-	PERF_VPC_STALL_CYCLES_PC = 5,
-	PERF_VPC_STALL_CYCLES_SP_LM = 6,
-	PERF_VPC_STARVE_CYCLES_SP = 7,
-	PERF_VPC_STARVE_CYCLES_LRZ = 8,
-	PERF_VPC_PC_PRIMITIVES = 9,
-	PERF_VPC_SP_COMPONENTS = 10,
-	PERF_VPC_STALL_CYCLES_VPCRAM_POS = 11,
-	PERF_VPC_LRZ_ASSIGN_PRIMITIVES = 12,
-	PERF_VPC_RB_VISIBLE_PRIMITIVES = 13,
-	PERF_VPC_LM_TRANSACTION = 14,
-	PERF_VPC_STREAMOUT_TRANSACTION = 15,
-	PERF_VPC_VS_BUSY_CYCLES = 16,
-	PERF_VPC_PS_BUSY_CYCLES = 17,
-	PERF_VPC_VS_WORKING_CYCLES = 18,
-	PERF_VPC_PS_WORKING_CYCLES = 19,
-	PERF_VPC_STARVE_CYCLES_RB = 20,
-	PERF_VPC_NUM_VPCRAM_READ_POS = 21,
-	PERF_VPC_WIT_FULL_CYCLES = 22,
-	PERF_VPC_VPCRAM_FULL_CYCLES = 23,
-	PERF_VPC_LM_FULL_WAIT_FOR_INTP_END = 24,
-	PERF_VPC_NUM_VPCRAM_WRITE = 25,
-	PERF_VPC_NUM_VPCRAM_READ_SO = 26,
-	PERF_VPC_NUM_ATTR_REQ_LM = 27,
-};
-
-enum a6xx_tse_perfcounter_select {
-	PERF_TSE_BUSY_CYCLES = 0,
-	PERF_TSE_CLIPPING_CYCLES = 1,
-	PERF_TSE_STALL_CYCLES_RAS = 2,
-	PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3,
-	PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4,
-	PERF_TSE_STARVE_CYCLES_PC = 5,
-	PERF_TSE_INPUT_PRIM = 6,
-	PERF_TSE_INPUT_NULL_PRIM = 7,
-	PERF_TSE_TRIVAL_REJ_PRIM = 8,
-	PERF_TSE_CLIPPED_PRIM = 9,
-	PERF_TSE_ZERO_AREA_PRIM = 10,
-	PERF_TSE_FACENESS_CULLED_PRIM = 11,
-	PERF_TSE_ZERO_PIXEL_PRIM = 12,
-	PERF_TSE_OUTPUT_NULL_PRIM = 13,
-	PERF_TSE_OUTPUT_VISIBLE_PRIM = 14,
-	PERF_TSE_CINVOCATION = 15,
-	PERF_TSE_CPRIMITIVES = 16,
-	PERF_TSE_2D_INPUT_PRIM = 17,
-	PERF_TSE_2D_ALIVE_CYCLES = 18,
-	PERF_TSE_CLIP_PLANES = 19,
-};
-
-enum a6xx_ras_perfcounter_select {
-	PERF_RAS_BUSY_CYCLES = 0,
-	PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1,
-	PERF_RAS_STALL_CYCLES_LRZ = 2,
-	PERF_RAS_STARVE_CYCLES_TSE = 3,
-	PERF_RAS_SUPER_TILES = 4,
-	PERF_RAS_8X4_TILES = 5,
-	PERF_RAS_MASKGEN_ACTIVE = 6,
-	PERF_RAS_FULLY_COVERED_SUPER_TILES = 7,
-	PERF_RAS_FULLY_COVERED_8X4_TILES = 8,
-	PERF_RAS_PRIM_KILLED_INVISILBE = 9,
-	PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES = 10,
-	PERF_RAS_LRZ_INTF_WORKING_CYCLES = 11,
-	PERF_RAS_BLOCKS = 12,
-};
-
-enum a6xx_uche_perfcounter_select {
-	PERF_UCHE_BUSY_CYCLES = 0,
-	PERF_UCHE_STALL_CYCLES_ARBITER = 1,
-	PERF_UCHE_VBIF_LATENCY_CYCLES = 2,
-	PERF_UCHE_VBIF_LATENCY_SAMPLES = 3,
-	PERF_UCHE_VBIF_READ_BEATS_TP = 4,
-	PERF_UCHE_VBIF_READ_BEATS_VFD = 5,
-	PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6,
-	PERF_UCHE_VBIF_READ_BEATS_LRZ = 7,
-	PERF_UCHE_VBIF_READ_BEATS_SP = 8,
-	PERF_UCHE_READ_REQUESTS_TP = 9,
-	PERF_UCHE_READ_REQUESTS_VFD = 10,
-	PERF_UCHE_READ_REQUESTS_HLSQ = 11,
-	PERF_UCHE_READ_REQUESTS_LRZ = 12,
-	PERF_UCHE_READ_REQUESTS_SP = 13,
-	PERF_UCHE_WRITE_REQUESTS_LRZ = 14,
-	PERF_UCHE_WRITE_REQUESTS_SP = 15,
-	PERF_UCHE_WRITE_REQUESTS_VPC = 16,
-	PERF_UCHE_WRITE_REQUESTS_VSC = 17,
-	PERF_UCHE_EVICTS = 18,
-	PERF_UCHE_BANK_REQ0 = 19,
-	PERF_UCHE_BANK_REQ1 = 20,
-	PERF_UCHE_BANK_REQ2 = 21,
-	PERF_UCHE_BANK_REQ3 = 22,
-	PERF_UCHE_BANK_REQ4 = 23,
-	PERF_UCHE_BANK_REQ5 = 24,
-	PERF_UCHE_BANK_REQ6 = 25,
-	PERF_UCHE_BANK_REQ7 = 26,
-	PERF_UCHE_VBIF_READ_BEATS_CH0 = 27,
-	PERF_UCHE_VBIF_READ_BEATS_CH1 = 28,
-	PERF_UCHE_GMEM_READ_BEATS = 29,
-	PERF_UCHE_TPH_REF_FULL = 30,
-	PERF_UCHE_TPH_VICTIM_FULL = 31,
-	PERF_UCHE_TPH_EXT_FULL = 32,
-	PERF_UCHE_VBIF_STALL_WRITE_DATA = 33,
-	PERF_UCHE_DCMP_LATENCY_SAMPLES = 34,
-	PERF_UCHE_DCMP_LATENCY_CYCLES = 35,
-	PERF_UCHE_VBIF_READ_BEATS_PC = 36,
-	PERF_UCHE_READ_REQUESTS_PC = 37,
-	PERF_UCHE_RAM_READ_REQ = 38,
-	PERF_UCHE_RAM_WRITE_REQ = 39,
-};
-
-enum a6xx_tp_perfcounter_select {
-	PERF_TP_BUSY_CYCLES = 0,
-	PERF_TP_STALL_CYCLES_UCHE = 1,
-	PERF_TP_LATENCY_CYCLES = 2,
-	PERF_TP_LATENCY_TRANS = 3,
-	PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4,
-	PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5,
-	PERF_TP_L1_CACHELINE_REQUESTS = 6,
-	PERF_TP_L1_CACHELINE_MISSES = 7,
-	PERF_TP_SP_TP_TRANS = 8,
-	PERF_TP_TP_SP_TRANS = 9,
-	PERF_TP_OUTPUT_PIXELS = 10,
-	PERF_TP_FILTER_WORKLOAD_16BIT = 11,
-	PERF_TP_FILTER_WORKLOAD_32BIT = 12,
-	PERF_TP_QUADS_RECEIVED = 13,
-	PERF_TP_QUADS_OFFSET = 14,
-	PERF_TP_QUADS_SHADOW = 15,
-	PERF_TP_QUADS_ARRAY = 16,
-	PERF_TP_QUADS_GRADIENT = 17,
-	PERF_TP_QUADS_1D = 18,
-	PERF_TP_QUADS_2D = 19,
-	PERF_TP_QUADS_BUFFER = 20,
-	PERF_TP_QUADS_3D = 21,
-	PERF_TP_QUADS_CUBE = 22,
-	PERF_TP_DIVERGENT_QUADS_RECEIVED = 23,
-	PERF_TP_PRT_NON_RESIDENT_EVENTS = 24,
-	PERF_TP_OUTPUT_PIXELS_POINT = 25,
-	PERF_TP_OUTPUT_PIXELS_BILINEAR = 26,
-	PERF_TP_OUTPUT_PIXELS_MIP = 27,
-	PERF_TP_OUTPUT_PIXELS_ANISO = 28,
-	PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 29,
-	PERF_TP_FLAG_CACHE_REQUESTS = 30,
-	PERF_TP_FLAG_CACHE_MISSES = 31,
-	PERF_TP_L1_5_L2_REQUESTS = 32,
-	PERF_TP_2D_OUTPUT_PIXELS = 33,
-	PERF_TP_2D_OUTPUT_PIXELS_POINT = 34,
-	PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 35,
-	PERF_TP_2D_FILTER_WORKLOAD_16BIT = 36,
-	PERF_TP_2D_FILTER_WORKLOAD_32BIT = 37,
-	PERF_TP_TPA2TPC_TRANS = 38,
-	PERF_TP_L1_MISSES_ASTC_1TILE = 39,
-	PERF_TP_L1_MISSES_ASTC_2TILE = 40,
-	PERF_TP_L1_MISSES_ASTC_4TILE = 41,
-	PERF_TP_L1_5_L2_COMPRESS_REQS = 42,
-	PERF_TP_L1_5_L2_COMPRESS_MISS = 43,
-	PERF_TP_L1_BANK_CONFLICT = 44,
-	PERF_TP_L1_5_MISS_LATENCY_CYCLES = 45,
-	PERF_TP_L1_5_MISS_LATENCY_TRANS = 46,
-	PERF_TP_QUADS_CONSTANT_MULTIPLIED = 47,
-	PERF_TP_FRONTEND_WORKING_CYCLES = 48,
-	PERF_TP_L1_TAG_WORKING_CYCLES = 49,
-	PERF_TP_L1_DATA_WRITE_WORKING_CYCLES = 50,
-	PERF_TP_PRE_L1_DECOM_WORKING_CYCLES = 51,
-	PERF_TP_BACKEND_WORKING_CYCLES = 52,
-	PERF_TP_FLAG_CACHE_WORKING_CYCLES = 53,
-	PERF_TP_L1_5_CACHE_WORKING_CYCLES = 54,
-	PERF_TP_STARVE_CYCLES_SP = 55,
-	PERF_TP_STARVE_CYCLES_UCHE = 56,
-};
-
-enum a6xx_sp_perfcounter_select {
-	PERF_SP_BUSY_CYCLES = 0,
-	PERF_SP_ALU_WORKING_CYCLES = 1,
-	PERF_SP_EFU_WORKING_CYCLES = 2,
-	PERF_SP_STALL_CYCLES_VPC = 3,
-	PERF_SP_STALL_CYCLES_TP = 4,
-	PERF_SP_STALL_CYCLES_UCHE = 5,
-	PERF_SP_STALL_CYCLES_RB = 6,
-	PERF_SP_NON_EXECUTION_CYCLES = 7,
-	PERF_SP_WAVE_CONTEXTS = 8,
-	PERF_SP_WAVE_CONTEXT_CYCLES = 9,
-	PERF_SP_FS_STAGE_WAVE_CYCLES = 10,
-	PERF_SP_FS_STAGE_WAVE_SAMPLES = 11,
-	PERF_SP_VS_STAGE_WAVE_CYCLES = 12,
-	PERF_SP_VS_STAGE_WAVE_SAMPLES = 13,
-	PERF_SP_FS_STAGE_DURATION_CYCLES = 14,
-	PERF_SP_VS_STAGE_DURATION_CYCLES = 15,
-	PERF_SP_WAVE_CTRL_CYCLES = 16,
-	PERF_SP_WAVE_LOAD_CYCLES = 17,
-	PERF_SP_WAVE_EMIT_CYCLES = 18,
-	PERF_SP_WAVE_NOP_CYCLES = 19,
-	PERF_SP_WAVE_WAIT_CYCLES = 20,
-	PERF_SP_WAVE_FETCH_CYCLES = 21,
-	PERF_SP_WAVE_IDLE_CYCLES = 22,
-	PERF_SP_WAVE_END_CYCLES = 23,
-	PERF_SP_WAVE_LONG_SYNC_CYCLES = 24,
-	PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25,
-	PERF_SP_WAVE_JOIN_CYCLES = 26,
-	PERF_SP_LM_LOAD_INSTRUCTIONS = 27,
-	PERF_SP_LM_STORE_INSTRUCTIONS = 28,
-	PERF_SP_LM_ATOMICS = 29,
-	PERF_SP_GM_LOAD_INSTRUCTIONS = 30,
-	PERF_SP_GM_STORE_INSTRUCTIONS = 31,
-	PERF_SP_GM_ATOMICS = 32,
-	PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33,
-	PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 34,
-	PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 35,
-	PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 36,
-	PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 37,
-	PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 38,
-	PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 39,
-	PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 40,
-	PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 41,
-	PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 42,
-	PERF_SP_VS_INSTRUCTIONS = 43,
-	PERF_SP_FS_INSTRUCTIONS = 44,
-	PERF_SP_ADDR_LOCK_COUNT = 45,
-	PERF_SP_UCHE_READ_TRANS = 46,
-	PERF_SP_UCHE_WRITE_TRANS = 47,
-	PERF_SP_EXPORT_VPC_TRANS = 48,
-	PERF_SP_EXPORT_RB_TRANS = 49,
-	PERF_SP_PIXELS_KILLED = 50,
-	PERF_SP_ICL1_REQUESTS = 51,
-	PERF_SP_ICL1_MISSES = 52,
-	PERF_SP_HS_INSTRUCTIONS = 53,
-	PERF_SP_DS_INSTRUCTIONS = 54,
-	PERF_SP_GS_INSTRUCTIONS = 55,
-	PERF_SP_CS_INSTRUCTIONS = 56,
-	PERF_SP_GPR_READ = 57,
-	PERF_SP_GPR_WRITE = 58,
-	PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS = 59,
-	PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS = 60,
-	PERF_SP_LM_BANK_CONFLICTS = 61,
-	PERF_SP_TEX_CONTROL_WORKING_CYCLES = 62,
-	PERF_SP_LOAD_CONTROL_WORKING_CYCLES = 63,
-	PERF_SP_FLOW_CONTROL_WORKING_CYCLES = 64,
-	PERF_SP_LM_WORKING_CYCLES = 65,
-	PERF_SP_DISPATCHER_WORKING_CYCLES = 66,
-	PERF_SP_SEQUENCER_WORKING_CYCLES = 67,
-	PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP = 68,
-	PERF_SP_STARVE_CYCLES_HLSQ = 69,
-	PERF_SP_NON_EXECUTION_LS_CYCLES = 70,
-	PERF_SP_WORKING_EU = 71,
-	PERF_SP_ANY_EU_WORKING = 72,
-	PERF_SP_WORKING_EU_FS_STAGE = 73,
-	PERF_SP_ANY_EU_WORKING_FS_STAGE = 74,
-	PERF_SP_WORKING_EU_VS_STAGE = 75,
-	PERF_SP_ANY_EU_WORKING_VS_STAGE = 76,
-	PERF_SP_WORKING_EU_CS_STAGE = 77,
-	PERF_SP_ANY_EU_WORKING_CS_STAGE = 78,
-	PERF_SP_GPR_READ_PREFETCH = 79,
-	PERF_SP_GPR_READ_CONFLICT = 80,
-	PERF_SP_GPR_WRITE_CONFLICT = 81,
-	PERF_SP_GM_LOAD_LATENCY_CYCLES = 82,
-	PERF_SP_GM_LOAD_LATENCY_SAMPLES = 83,
-	PERF_SP_EXECUTABLE_WAVES = 84,
-};
-
-enum a6xx_rb_perfcounter_select {
-	PERF_RB_BUSY_CYCLES = 0,
-	PERF_RB_STALL_CYCLES_HLSQ = 1,
-	PERF_RB_STALL_CYCLES_FIFO0_FULL = 2,
-	PERF_RB_STALL_CYCLES_FIFO1_FULL = 3,
-	PERF_RB_STALL_CYCLES_FIFO2_FULL = 4,
-	PERF_RB_STARVE_CYCLES_SP = 5,
-	PERF_RB_STARVE_CYCLES_LRZ_TILE = 6,
-	PERF_RB_STARVE_CYCLES_CCU = 7,
-	PERF_RB_STARVE_CYCLES_Z_PLANE = 8,
-	PERF_RB_STARVE_CYCLES_BARY_PLANE = 9,
-	PERF_RB_Z_WORKLOAD = 10,
-	PERF_RB_HLSQ_ACTIVE = 11,
-	PERF_RB_Z_READ = 12,
-	PERF_RB_Z_WRITE = 13,
-	PERF_RB_C_READ = 14,
-	PERF_RB_C_WRITE = 15,
-	PERF_RB_TOTAL_PASS = 16,
-	PERF_RB_Z_PASS = 17,
-	PERF_RB_Z_FAIL = 18,
-	PERF_RB_S_FAIL = 19,
-	PERF_RB_BLENDED_FXP_COMPONENTS = 20,
-	PERF_RB_BLENDED_FP16_COMPONENTS = 21,
-	PERF_RB_PS_INVOCATIONS = 22,
-	PERF_RB_2D_ALIVE_CYCLES = 23,
-	PERF_RB_2D_STALL_CYCLES_A2D = 24,
-	PERF_RB_2D_STARVE_CYCLES_SRC = 25,
-	PERF_RB_2D_STARVE_CYCLES_SP = 26,
-	PERF_RB_2D_STARVE_CYCLES_DST = 27,
-	PERF_RB_2D_VALID_PIXELS = 28,
-	PERF_RB_3D_PIXELS = 29,
-	PERF_RB_BLENDER_WORKING_CYCLES = 30,
-	PERF_RB_ZPROC_WORKING_CYCLES = 31,
-	PERF_RB_CPROC_WORKING_CYCLES = 32,
-	PERF_RB_SAMPLER_WORKING_CYCLES = 33,
-	PERF_RB_STALL_CYCLES_CCU_COLOR_READ = 34,
-	PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE = 35,
-	PERF_RB_STALL_CYCLES_CCU_DEPTH_READ = 36,
-	PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE = 37,
-	PERF_RB_STALL_CYCLES_VPC = 38,
-	PERF_RB_2D_INPUT_TRANS = 39,
-	PERF_RB_2D_OUTPUT_RB_DST_TRANS = 40,
-	PERF_RB_2D_OUTPUT_RB_SRC_TRANS = 41,
-	PERF_RB_BLENDED_FP32_COMPONENTS = 42,
-	PERF_RB_COLOR_PIX_TILES = 43,
-	PERF_RB_STALL_CYCLES_CCU = 44,
-	PERF_RB_EARLY_Z_ARB3_GRANT = 45,
-	PERF_RB_LATE_Z_ARB3_GRANT = 46,
-	PERF_RB_EARLY_Z_SKIP_GRANT = 47,
-};
-
-enum a6xx_vsc_perfcounter_select {
-	PERF_VSC_BUSY_CYCLES = 0,
-	PERF_VSC_WORKING_CYCLES = 1,
-	PERF_VSC_STALL_CYCLES_UCHE = 2,
-	PERF_VSC_EOT_NUM = 3,
-	PERF_VSC_INPUT_TILES = 4,
-};
-
-enum a6xx_ccu_perfcounter_select {
-	PERF_CCU_BUSY_CYCLES = 0,
-	PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1,
-	PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2,
-	PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3,
-	PERF_CCU_DEPTH_BLOCKS = 4,
-	PERF_CCU_COLOR_BLOCKS = 5,
-	PERF_CCU_DEPTH_BLOCK_HIT = 6,
-	PERF_CCU_COLOR_BLOCK_HIT = 7,
-	PERF_CCU_PARTIAL_BLOCK_READ = 8,
-	PERF_CCU_GMEM_READ = 9,
-	PERF_CCU_GMEM_WRITE = 10,
-	PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11,
-	PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12,
-	PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13,
-	PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14,
-	PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15,
-	PERF_CCU_DEPTH_READ_FLAG5_COUNT = 16,
-	PERF_CCU_DEPTH_READ_FLAG6_COUNT = 17,
-	PERF_CCU_DEPTH_READ_FLAG8_COUNT = 18,
-	PERF_CCU_COLOR_READ_FLAG0_COUNT = 19,
-	PERF_CCU_COLOR_READ_FLAG1_COUNT = 20,
-	PERF_CCU_COLOR_READ_FLAG2_COUNT = 21,
-	PERF_CCU_COLOR_READ_FLAG3_COUNT = 22,
-	PERF_CCU_COLOR_READ_FLAG4_COUNT = 23,
-	PERF_CCU_COLOR_READ_FLAG5_COUNT = 24,
-	PERF_CCU_COLOR_READ_FLAG6_COUNT = 25,
-	PERF_CCU_COLOR_READ_FLAG8_COUNT = 26,
-	PERF_CCU_2D_RD_REQ = 27,
-	PERF_CCU_2D_WR_REQ = 28,
-};
-
-enum a6xx_lrz_perfcounter_select {
-	PERF_LRZ_BUSY_CYCLES = 0,
-	PERF_LRZ_STARVE_CYCLES_RAS = 1,
-	PERF_LRZ_STALL_CYCLES_RB = 2,
-	PERF_LRZ_STALL_CYCLES_VSC = 3,
-	PERF_LRZ_STALL_CYCLES_VPC = 4,
-	PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5,
-	PERF_LRZ_STALL_CYCLES_UCHE = 6,
-	PERF_LRZ_LRZ_READ = 7,
-	PERF_LRZ_LRZ_WRITE = 8,
-	PERF_LRZ_READ_LATENCY = 9,
-	PERF_LRZ_MERGE_CACHE_UPDATING = 10,
-	PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11,
-	PERF_LRZ_PRIM_KILLED_BY_LRZ = 12,
-	PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13,
-	PERF_LRZ_FULL_8X8_TILES = 14,
-	PERF_LRZ_PARTIAL_8X8_TILES = 15,
-	PERF_LRZ_TILE_KILLED = 16,
-	PERF_LRZ_TOTAL_PIXEL = 17,
-	PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18,
-	PERF_LRZ_FULLY_COVERED_TILES = 19,
-	PERF_LRZ_PARTIAL_COVERED_TILES = 20,
-	PERF_LRZ_FEEDBACK_ACCEPT = 21,
-	PERF_LRZ_FEEDBACK_DISCARD = 22,
-	PERF_LRZ_FEEDBACK_STALL = 23,
-	PERF_LRZ_STALL_CYCLES_RB_ZPLANE = 24,
-	PERF_LRZ_STALL_CYCLES_RB_BPLANE = 25,
-	PERF_LRZ_STALL_CYCLES_VC = 26,
-	PERF_LRZ_RAS_MASK_TRANS = 27,
-};
-
-enum a6xx_cmp_perfcounter_select {
-	PERF_CMPDECMP_STALL_CYCLES_ARB = 0,
-	PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1,
-	PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2,
-	PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3,
-	PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4,
-	PERF_CMPDECMP_VBIF_READ_REQUEST = 5,
-	PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6,
-	PERF_CMPDECMP_VBIF_READ_DATA = 7,
-	PERF_CMPDECMP_VBIF_WRITE_DATA = 8,
-	PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9,
-	PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG5_COUNT = 15,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG6_COUNT = 16,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG8_COUNT = 17,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 18,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 19,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 20,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 21,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG5_COUNT = 22,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG6_COUNT = 23,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG8_COUNT = 24,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 25,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 26,
-	PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 27,
-	PERF_CMPDECMP_2D_RD_DATA = 28,
-	PERF_CMPDECMP_2D_WR_DATA = 29,
-	PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH0 = 30,
-	PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH1 = 31,
-	PERF_CMPDECMP_2D_OUTPUT_TRANS = 32,
-	PERF_CMPDECMP_VBIF_WRITE_DATA_UCHE = 33,
-	PERF_CMPDECMP_DEPTH_WRITE_FLAG0_COUNT = 34,
-	PERF_CMPDECMP_COLOR_WRITE_FLAG0_COUNT = 35,
-	PERF_CMPDECMP_COLOR_WRITE_FLAGALPHA_COUNT = 36,
-	PERF_CMPDECMP_2D_BUSY_CYCLES = 37,
-	PERF_CMPDECMP_2D_REORDER_STARVE_CYCLES = 38,
-	PERF_CMPDECMP_2D_PIXELS = 39,
-};
-
-enum a6xx_tex_filter {
-	A6XX_TEX_NEAREST = 0,
-	A6XX_TEX_LINEAR = 1,
-	A6XX_TEX_ANISO = 2,
-};
-
-enum a6xx_tex_clamp {
-	A6XX_TEX_REPEAT = 0,
-	A6XX_TEX_CLAMP_TO_EDGE = 1,
-	A6XX_TEX_MIRROR_REPEAT = 2,
-	A6XX_TEX_CLAMP_TO_BORDER = 3,
-	A6XX_TEX_MIRROR_CLAMP = 4,
-};
-
-enum a6xx_tex_aniso {
-	A6XX_TEX_ANISO_1 = 0,
-	A6XX_TEX_ANISO_2 = 1,
-	A6XX_TEX_ANISO_4 = 2,
-	A6XX_TEX_ANISO_8 = 3,
-	A6XX_TEX_ANISO_16 = 4,
-};
-
-enum a6xx_tex_swiz {
-	A6XX_TEX_X = 0,
-	A6XX_TEX_Y = 1,
-	A6XX_TEX_Z = 2,
-	A6XX_TEX_W = 3,
-	A6XX_TEX_ZERO = 4,
-	A6XX_TEX_ONE = 5,
-};
-
-enum a6xx_tex_type {
-	A6XX_TEX_1D = 0,
-	A6XX_TEX_2D = 1,
-	A6XX_TEX_CUBE = 2,
-	A6XX_TEX_3D = 3,
-};
-
-#define A6XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE			0x00000001
-#define A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR			0x00000002
-#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW	0x00000040
-#define A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR			0x00000080
-#define A6XX_RBBM_INT_0_MASK_CP_SW				0x00000100
-#define A6XX_RBBM_INT_0_MASK_CP_HW_ERROR			0x00000200
-#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS		0x00000400
-#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS		0x00000800
-#define A6XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS			0x00001000
-#define A6XX_RBBM_INT_0_MASK_CP_IB2				0x00002000
-#define A6XX_RBBM_INT_0_MASK_CP_IB1				0x00004000
-#define A6XX_RBBM_INT_0_MASK_CP_RB				0x00008000
-#define A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS			0x00020000
-#define A6XX_RBBM_INT_0_MASK_CP_WT_DONE_TS			0x00040000
-#define A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS			0x00100000
-#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW		0x00400000
-#define A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT			0x00800000
-#define A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS			0x01000000
-#define A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR			0x02000000
-#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_0			0x04000000
-#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_1			0x08000000
-#define A6XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ			0x40000000
-#define A6XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG			0x80000000
-#define A6XX_CP_INT_CP_OPCODE_ERROR				0x00000001
-#define A6XX_CP_INT_CP_UCODE_ERROR				0x00000002
-#define A6XX_CP_INT_CP_HW_FAULT_ERROR				0x00000004
-#define A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR		0x00000010
-#define A6XX_CP_INT_CP_AHB_ERROR				0x00000020
-#define A6XX_CP_INT_CP_VSD_PARITY_ERROR				0x00000040
-#define A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR			0x00000080
-#define REG_A6XX_CP_RB_BASE					0x00000800
-
-#define REG_A6XX_CP_RB_BASE_HI					0x00000801
-
-#define REG_A6XX_CP_RB_CNTL					0x00000802
-
-#define REG_A6XX_CP_RB_RPTR_ADDR_LO				0x00000804
-
-#define REG_A6XX_CP_RB_RPTR_ADDR_HI				0x00000805
-
-#define REG_A6XX_CP_RB_RPTR					0x00000806
-
-#define REG_A6XX_CP_RB_WPTR					0x00000807
-
-#define REG_A6XX_CP_SQE_CNTL					0x00000808
-
-#define REG_A6XX_CP_HW_FAULT					0x00000821
-
-#define REG_A6XX_CP_INTERRUPT_STATUS				0x00000823
-
-#define REG_A6XX_CP_PROTECT_STATUS				0x00000824
-
-#define REG_A6XX_CP_SQE_INSTR_BASE_LO				0x00000830
-
-#define REG_A6XX_CP_SQE_INSTR_BASE_HI				0x00000831
-
-#define REG_A6XX_CP_MISC_CNTL					0x00000840
-
-#define REG_A6XX_CP_ROQ_THRESHOLDS_1				0x000008c1
-
-#define REG_A6XX_CP_ROQ_THRESHOLDS_2				0x000008c2
-
-#define REG_A6XX_CP_MEM_POOL_SIZE				0x000008c3
-
-#define REG_A6XX_CP_CHICKEN_DBG					0x00000841
-
-#define REG_A6XX_CP_ADDR_MODE_CNTL				0x00000842
-
-#define REG_A6XX_CP_DBG_ECO_CNTL				0x00000843
-
-#define REG_A6XX_CP_PROTECT_CNTL				0x0000084f
-
-static inline uint32_t REG_A6XX_CP_SCRATCH(uint32_t i0) { return 0x00000883 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000883 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_CP_PROTECT(uint32_t i0) { return 0x00000850 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000850 + 0x1*i0; }
-#define A6XX_CP_PROTECT_REG_BASE_ADDR__MASK			0x0003ffff
-#define A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT			0
-static inline uint32_t A6XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val)
-{
-	return ((val) << A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A6XX_CP_PROTECT_REG_BASE_ADDR__MASK;
-}
-#define A6XX_CP_PROTECT_REG_MASK_LEN__MASK			0x7ffc0000
-#define A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT			18
-static inline uint32_t A6XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
-{
-	return ((val) << A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A6XX_CP_PROTECT_REG_MASK_LEN__MASK;
-}
-#define A6XX_CP_PROTECT_REG_READ				0x80000000
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_CNTL				0x000008a0
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO			0x000008a1
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI			0x000008a2
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO	0x000008a3
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI	0x000008a4
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO	0x000008a5
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI	0x000008a6
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO	0x000008a7
-
-#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI	0x000008a8
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_0				0x000008d0
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_1				0x000008d1
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_2				0x000008d2
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_3				0x000008d3
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_4				0x000008d4
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_5				0x000008d5
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_6				0x000008d6
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_7				0x000008d7
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_8				0x000008d8
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_9				0x000008d9
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_10				0x000008da
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_11				0x000008db
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_12				0x000008dc
-
-#define REG_A6XX_CP_PERFCTR_CP_SEL_13				0x000008dd
-
-#define REG_A6XX_CP_CRASH_SCRIPT_BASE_LO			0x00000900
-
-#define REG_A6XX_CP_CRASH_SCRIPT_BASE_HI			0x00000901
-
-#define REG_A6XX_CP_CRASH_DUMP_CNTL				0x00000902
-
-#define REG_A6XX_CP_CRASH_DUMP_STATUS				0x00000903
-
-#define REG_A6XX_CP_SQE_STAT_ADDR				0x00000908
-
-#define REG_A6XX_CP_SQE_STAT_DATA				0x00000909
-
-#define REG_A6XX_CP_DRAW_STATE_ADDR				0x0000090a
-
-#define REG_A6XX_CP_DRAW_STATE_DATA				0x0000090b
-
-#define REG_A6XX_CP_ROQ_DBG_ADDR				0x0000090c
-
-#define REG_A6XX_CP_ROQ_DBG_DATA				0x0000090d
-
-#define REG_A6XX_CP_MEM_POOL_DBG_ADDR				0x0000090e
-
-#define REG_A6XX_CP_MEM_POOL_DBG_DATA				0x0000090f
-
-#define REG_A6XX_CP_SQE_UCODE_DBG_ADDR				0x00000910
-
-#define REG_A6XX_CP_SQE_UCODE_DBG_DATA				0x00000911
-
-#define REG_A6XX_CP_IB1_BASE					0x00000928
-
-#define REG_A6XX_CP_IB1_BASE_HI					0x00000929
-
-#define REG_A6XX_CP_IB1_REM_SIZE				0x0000092a
-
-#define REG_A6XX_CP_IB2_BASE					0x0000092b
-
-#define REG_A6XX_CP_IB2_BASE_HI					0x0000092c
-
-#define REG_A6XX_CP_IB2_REM_SIZE				0x0000092d
-
-#define REG_A6XX_CP_ALWAYS_ON_COUNTER_LO			0x00000980
-
-#define REG_A6XX_CP_ALWAYS_ON_COUNTER_HI			0x00000981
-
-#define REG_A6XX_CP_AHB_CNTL					0x0000098d
-
-#define REG_A6XX_CP_APERTURE_CNTL_HOST				0x00000a00
-
-#define REG_A6XX_CP_APERTURE_CNTL_CD				0x00000a03
-
-#define REG_A6XX_VSC_ADDR_MODE_CNTL				0x00000c01
-
-#define REG_A6XX_RBBM_INT_0_STATUS				0x00000201
-
-#define REG_A6XX_RBBM_STATUS					0x00000210
-#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB			0x00800000
-#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP			0x00400000
-#define A6XX_RBBM_STATUS_HLSQ_BUSY				0x00200000
-#define A6XX_RBBM_STATUS_VSC_BUSY				0x00100000
-#define A6XX_RBBM_STATUS_TPL1_BUSY				0x00080000
-#define A6XX_RBBM_STATUS_SP_BUSY				0x00040000
-#define A6XX_RBBM_STATUS_UCHE_BUSY				0x00020000
-#define A6XX_RBBM_STATUS_VPC_BUSY				0x00010000
-#define A6XX_RBBM_STATUS_VFD_BUSY				0x00008000
-#define A6XX_RBBM_STATUS_TESS_BUSY				0x00004000
-#define A6XX_RBBM_STATUS_PC_VSD_BUSY				0x00002000
-#define A6XX_RBBM_STATUS_PC_DCALL_BUSY				0x00001000
-#define A6XX_RBBM_STATUS_COM_DCOM_BUSY				0x00000800
-#define A6XX_RBBM_STATUS_LRZ_BUSY				0x00000400
-#define A6XX_RBBM_STATUS_A2D_BUSY				0x00000200
-#define A6XX_RBBM_STATUS_CCU_BUSY				0x00000100
-#define A6XX_RBBM_STATUS_RB_BUSY				0x00000080
-#define A6XX_RBBM_STATUS_RAS_BUSY				0x00000040
-#define A6XX_RBBM_STATUS_TSE_BUSY				0x00000020
-#define A6XX_RBBM_STATUS_VBIF_BUSY				0x00000010
-#define A6XX_RBBM_STATUS_GFX_DBGC_BUSY				0x00000008
-#define A6XX_RBBM_STATUS_CP_BUSY				0x00000004
-#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CP_MASTER			0x00000002
-#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER			0x00000001
-
-#define REG_A6XX_RBBM_STATUS3					0x00000213
-
-#define REG_A6XX_RBBM_VBIF_GX_RESET_STATUS			0x00000215
-
-#define REG_A6XX_RBBM_PERFCTR_CP_0_LO				0x00000400
-
-#define REG_A6XX_RBBM_PERFCTR_CP_0_HI				0x00000401
-
-#define REG_A6XX_RBBM_PERFCTR_CP_1_LO				0x00000402
-
-#define REG_A6XX_RBBM_PERFCTR_CP_1_HI				0x00000403
-
-#define REG_A6XX_RBBM_PERFCTR_CP_2_LO				0x00000404
-
-#define REG_A6XX_RBBM_PERFCTR_CP_2_HI				0x00000405
-
-#define REG_A6XX_RBBM_PERFCTR_CP_3_LO				0x00000406
-
-#define REG_A6XX_RBBM_PERFCTR_CP_3_HI				0x00000407
-
-#define REG_A6XX_RBBM_PERFCTR_CP_4_LO				0x00000408
-
-#define REG_A6XX_RBBM_PERFCTR_CP_4_HI				0x00000409
-
-#define REG_A6XX_RBBM_PERFCTR_CP_5_LO				0x0000040a
-
-#define REG_A6XX_RBBM_PERFCTR_CP_5_HI				0x0000040b
-
-#define REG_A6XX_RBBM_PERFCTR_CP_6_LO				0x0000040c
-
-#define REG_A6XX_RBBM_PERFCTR_CP_6_HI				0x0000040d
-
-#define REG_A6XX_RBBM_PERFCTR_CP_7_LO				0x0000040e
-
-#define REG_A6XX_RBBM_PERFCTR_CP_7_HI				0x0000040f
-
-#define REG_A6XX_RBBM_PERFCTR_CP_8_LO				0x00000410
-
-#define REG_A6XX_RBBM_PERFCTR_CP_8_HI				0x00000411
-
-#define REG_A6XX_RBBM_PERFCTR_CP_9_LO				0x00000412
-
-#define REG_A6XX_RBBM_PERFCTR_CP_9_HI				0x00000413
-
-#define REG_A6XX_RBBM_PERFCTR_CP_10_LO				0x00000414
-
-#define REG_A6XX_RBBM_PERFCTR_CP_10_HI				0x00000415
-
-#define REG_A6XX_RBBM_PERFCTR_CP_11_LO				0x00000416
-
-#define REG_A6XX_RBBM_PERFCTR_CP_11_HI				0x00000417
-
-#define REG_A6XX_RBBM_PERFCTR_CP_12_LO				0x00000418
-
-#define REG_A6XX_RBBM_PERFCTR_CP_12_HI				0x00000419
-
-#define REG_A6XX_RBBM_PERFCTR_CP_13_LO				0x0000041a
-
-#define REG_A6XX_RBBM_PERFCTR_CP_13_HI				0x0000041b
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_0_LO				0x0000041c
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_0_HI				0x0000041d
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_1_LO				0x0000041e
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_1_HI				0x0000041f
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_2_LO				0x00000420
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_2_HI				0x00000421
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_3_LO				0x00000422
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_3_HI				0x00000423
-
-#define REG_A6XX_RBBM_PERFCTR_PC_0_LO				0x00000424
-
-#define REG_A6XX_RBBM_PERFCTR_PC_0_HI				0x00000425
-
-#define REG_A6XX_RBBM_PERFCTR_PC_1_LO				0x00000426
-
-#define REG_A6XX_RBBM_PERFCTR_PC_1_HI				0x00000427
-
-#define REG_A6XX_RBBM_PERFCTR_PC_2_LO				0x00000428
-
-#define REG_A6XX_RBBM_PERFCTR_PC_2_HI				0x00000429
-
-#define REG_A6XX_RBBM_PERFCTR_PC_3_LO				0x0000042a
-
-#define REG_A6XX_RBBM_PERFCTR_PC_3_HI				0x0000042b
-
-#define REG_A6XX_RBBM_PERFCTR_PC_4_LO				0x0000042c
-
-#define REG_A6XX_RBBM_PERFCTR_PC_4_HI				0x0000042d
-
-#define REG_A6XX_RBBM_PERFCTR_PC_5_LO				0x0000042e
-
-#define REG_A6XX_RBBM_PERFCTR_PC_5_HI				0x0000042f
-
-#define REG_A6XX_RBBM_PERFCTR_PC_6_LO				0x00000430
-
-#define REG_A6XX_RBBM_PERFCTR_PC_6_HI				0x00000431
-
-#define REG_A6XX_RBBM_PERFCTR_PC_7_LO				0x00000432
-
-#define REG_A6XX_RBBM_PERFCTR_PC_7_HI				0x00000433
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_0_LO				0x00000434
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_0_HI				0x00000435
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_1_LO				0x00000436
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_1_HI				0x00000437
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_2_LO				0x00000438
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_2_HI				0x00000439
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_3_LO				0x0000043a
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_3_HI				0x0000043b
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_4_LO				0x0000043c
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_4_HI				0x0000043d
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_5_LO				0x0000043e
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_5_HI				0x0000043f
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_6_LO				0x00000440
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_6_HI				0x00000441
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_7_LO				0x00000442
-
-#define REG_A6XX_RBBM_PERFCTR_VFD_7_HI				0x00000443
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_LO				0x00000444
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_HI				0x00000445
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_LO				0x00000446
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_HI				0x00000447
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_LO				0x00000448
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_HI				0x00000449
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_LO				0x0000044a
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_HI				0x0000044b
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_LO				0x0000044c
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_HI				0x0000044d
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_LO				0x0000044e
-
-#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_HI				0x0000044f
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_0_LO				0x00000450
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_0_HI				0x00000451
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_1_LO				0x00000452
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_1_HI				0x00000453
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_2_LO				0x00000454
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_2_HI				0x00000455
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_3_LO				0x00000456
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_3_HI				0x00000457
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_4_LO				0x00000458
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_4_HI				0x00000459
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_5_LO				0x0000045a
-
-#define REG_A6XX_RBBM_PERFCTR_VPC_5_HI				0x0000045b
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_0_LO				0x0000045c
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_0_HI				0x0000045d
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_1_LO				0x0000045e
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_1_HI				0x0000045f
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_2_LO				0x00000460
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_2_HI				0x00000461
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_3_LO				0x00000462
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_3_HI				0x00000463
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_4_LO				0x00000464
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI				0x00000465
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO				0x00000466
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI				0x00000467
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO				0x00000468
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI				0x00000469
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO				0x0000046a
-
-#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI				0x00000465
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO				0x00000466
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI				0x00000467
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO				0x00000468
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI				0x00000469
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO				0x0000046a
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_2_HI				0x0000046b
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_3_LO				0x0000046c
-
-#define REG_A6XX_RBBM_PERFCTR_TSE_3_HI				0x0000046d
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_0_LO				0x0000046e
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_0_HI				0x0000046f
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_1_LO				0x00000470
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_1_HI				0x00000471
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_2_LO				0x00000472
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_2_HI				0x00000473
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_3_LO				0x00000474
-
-#define REG_A6XX_RBBM_PERFCTR_RAS_3_HI				0x00000475
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_0_LO				0x00000476
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_0_HI				0x00000477
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_1_LO				0x00000478
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_1_HI				0x00000479
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_2_LO				0x0000047a
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_2_HI				0x0000047b
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_3_LO				0x0000047c
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_3_HI				0x0000047d
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_4_LO				0x0000047e
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_4_HI				0x0000047f
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_5_LO				0x00000480
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_5_HI				0x00000481
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_6_LO				0x00000482
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_6_HI				0x00000483
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_7_LO				0x00000484
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_7_HI				0x00000485
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_8_LO				0x00000486
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_8_HI				0x00000487
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_9_LO				0x00000488
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_9_HI				0x00000489
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_10_LO			0x0000048a
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_10_HI			0x0000048b
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_11_LO			0x0000048c
-
-#define REG_A6XX_RBBM_PERFCTR_UCHE_11_HI			0x0000048d
-
-#define REG_A6XX_RBBM_PERFCTR_TP_0_LO				0x0000048e
-
-#define REG_A6XX_RBBM_PERFCTR_TP_0_HI				0x0000048f
-
-#define REG_A6XX_RBBM_PERFCTR_TP_1_LO				0x00000490
-
-#define REG_A6XX_RBBM_PERFCTR_TP_1_HI				0x00000491
-
-#define REG_A6XX_RBBM_PERFCTR_TP_2_LO				0x00000492
-
-#define REG_A6XX_RBBM_PERFCTR_TP_2_HI				0x00000493
-
-#define REG_A6XX_RBBM_PERFCTR_TP_3_LO				0x00000494
-
-#define REG_A6XX_RBBM_PERFCTR_TP_3_HI				0x00000495
-
-#define REG_A6XX_RBBM_PERFCTR_TP_4_LO				0x00000496
-
-#define REG_A6XX_RBBM_PERFCTR_TP_4_HI				0x00000497
-
-#define REG_A6XX_RBBM_PERFCTR_TP_5_LO				0x00000498
-
-#define REG_A6XX_RBBM_PERFCTR_TP_5_HI				0x00000499
-
-#define REG_A6XX_RBBM_PERFCTR_TP_6_LO				0x0000049a
-
-#define REG_A6XX_RBBM_PERFCTR_TP_6_HI				0x0000049b
-
-#define REG_A6XX_RBBM_PERFCTR_TP_7_LO				0x0000049c
-
-#define REG_A6XX_RBBM_PERFCTR_TP_7_HI				0x0000049d
-
-#define REG_A6XX_RBBM_PERFCTR_TP_8_LO				0x0000049e
-
-#define REG_A6XX_RBBM_PERFCTR_TP_8_HI				0x0000049f
-
-#define REG_A6XX_RBBM_PERFCTR_TP_9_LO				0x000004a0
-
-#define REG_A6XX_RBBM_PERFCTR_TP_9_HI				0x000004a1
-
-#define REG_A6XX_RBBM_PERFCTR_TP_10_LO				0x000004a2
-
-#define REG_A6XX_RBBM_PERFCTR_TP_10_HI				0x000004a3
-
-#define REG_A6XX_RBBM_PERFCTR_TP_11_LO				0x000004a4
-
-#define REG_A6XX_RBBM_PERFCTR_TP_11_HI				0x000004a5
-
-#define REG_A6XX_RBBM_PERFCTR_SP_0_LO				0x000004a6
-
-#define REG_A6XX_RBBM_PERFCTR_SP_0_HI				0x000004a7
-
-#define REG_A6XX_RBBM_PERFCTR_SP_1_LO				0x000004a8
-
-#define REG_A6XX_RBBM_PERFCTR_SP_1_HI				0x000004a9
-
-#define REG_A6XX_RBBM_PERFCTR_SP_2_LO				0x000004aa
-
-#define REG_A6XX_RBBM_PERFCTR_SP_2_HI				0x000004ab
-
-#define REG_A6XX_RBBM_PERFCTR_SP_3_LO				0x000004ac
-
-#define REG_A6XX_RBBM_PERFCTR_SP_3_HI				0x000004ad
-
-#define REG_A6XX_RBBM_PERFCTR_SP_4_LO				0x000004ae
-
-#define REG_A6XX_RBBM_PERFCTR_SP_4_HI				0x000004af
-
-#define REG_A6XX_RBBM_PERFCTR_SP_5_LO				0x000004b0
-
-#define REG_A6XX_RBBM_PERFCTR_SP_5_HI				0x000004b1
-
-#define REG_A6XX_RBBM_PERFCTR_SP_6_LO				0x000004b2
-
-#define REG_A6XX_RBBM_PERFCTR_SP_6_HI				0x000004b3
-
-#define REG_A6XX_RBBM_PERFCTR_SP_7_LO				0x000004b4
-
-#define REG_A6XX_RBBM_PERFCTR_SP_7_HI				0x000004b5
-
-#define REG_A6XX_RBBM_PERFCTR_SP_8_LO				0x000004b6
-
-#define REG_A6XX_RBBM_PERFCTR_SP_8_HI				0x000004b7
-
-#define REG_A6XX_RBBM_PERFCTR_SP_9_LO				0x000004b8
-
-#define REG_A6XX_RBBM_PERFCTR_SP_9_HI				0x000004b9
-
-#define REG_A6XX_RBBM_PERFCTR_SP_10_LO				0x000004ba
-
-#define REG_A6XX_RBBM_PERFCTR_SP_10_HI				0x000004bb
-
-#define REG_A6XX_RBBM_PERFCTR_SP_11_LO				0x000004bc
-
-#define REG_A6XX_RBBM_PERFCTR_SP_11_HI				0x000004bd
-
-#define REG_A6XX_RBBM_PERFCTR_SP_12_LO				0x000004be
-
-#define REG_A6XX_RBBM_PERFCTR_SP_12_HI				0x000004bf
-
-#define REG_A6XX_RBBM_PERFCTR_SP_13_LO				0x000004c0
-
-#define REG_A6XX_RBBM_PERFCTR_SP_13_HI				0x000004c1
-
-#define REG_A6XX_RBBM_PERFCTR_SP_14_LO				0x000004c2
-
-#define REG_A6XX_RBBM_PERFCTR_SP_14_HI				0x000004c3
-
-#define REG_A6XX_RBBM_PERFCTR_SP_15_LO				0x000004c4
-
-#define REG_A6XX_RBBM_PERFCTR_SP_15_HI				0x000004c5
-
-#define REG_A6XX_RBBM_PERFCTR_SP_16_LO				0x000004c6
-
-#define REG_A6XX_RBBM_PERFCTR_SP_16_HI				0x000004c7
-
-#define REG_A6XX_RBBM_PERFCTR_SP_17_LO				0x000004c8
-
-#define REG_A6XX_RBBM_PERFCTR_SP_17_HI				0x000004c9
-
-#define REG_A6XX_RBBM_PERFCTR_SP_18_LO				0x000004ca
-
-#define REG_A6XX_RBBM_PERFCTR_SP_18_HI				0x000004cb
-
-#define REG_A6XX_RBBM_PERFCTR_SP_19_LO				0x000004cc
-
-#define REG_A6XX_RBBM_PERFCTR_SP_19_HI				0x000004cd
-
-#define REG_A6XX_RBBM_PERFCTR_SP_20_LO				0x000004ce
-
-#define REG_A6XX_RBBM_PERFCTR_SP_20_HI				0x000004cf
-
-#define REG_A6XX_RBBM_PERFCTR_SP_21_LO				0x000004d0
-
-#define REG_A6XX_RBBM_PERFCTR_SP_21_HI				0x000004d1
-
-#define REG_A6XX_RBBM_PERFCTR_SP_22_LO				0x000004d2
-
-#define REG_A6XX_RBBM_PERFCTR_SP_22_HI				0x000004d3
-
-#define REG_A6XX_RBBM_PERFCTR_SP_23_LO				0x000004d4
-
-#define REG_A6XX_RBBM_PERFCTR_SP_23_HI				0x000004d5
-
-#define REG_A6XX_RBBM_PERFCTR_RB_0_LO				0x000004d6
-
-#define REG_A6XX_RBBM_PERFCTR_RB_0_HI				0x000004d7
-
-#define REG_A6XX_RBBM_PERFCTR_RB_1_LO				0x000004d8
-
-#define REG_A6XX_RBBM_PERFCTR_RB_1_HI				0x000004d9
-
-#define REG_A6XX_RBBM_PERFCTR_RB_2_LO				0x000004da
-
-#define REG_A6XX_RBBM_PERFCTR_RB_2_HI				0x000004db
-
-#define REG_A6XX_RBBM_PERFCTR_RB_3_LO				0x000004dc
-
-#define REG_A6XX_RBBM_PERFCTR_RB_3_HI				0x000004dd
-
-#define REG_A6XX_RBBM_PERFCTR_RB_4_LO				0x000004de
-
-#define REG_A6XX_RBBM_PERFCTR_RB_4_HI				0x000004df
-
-#define REG_A6XX_RBBM_PERFCTR_RB_5_LO				0x000004e0
-
-#define REG_A6XX_RBBM_PERFCTR_RB_5_HI				0x000004e1
-
-#define REG_A6XX_RBBM_PERFCTR_RB_6_LO				0x000004e2
-
-#define REG_A6XX_RBBM_PERFCTR_RB_6_HI				0x000004e3
-
-#define REG_A6XX_RBBM_PERFCTR_RB_7_LO				0x000004e4
-
-#define REG_A6XX_RBBM_PERFCTR_RB_7_HI				0x000004e5
-
-#define REG_A6XX_RBBM_PERFCTR_VSC_0_LO				0x000004e6
-
-#define REG_A6XX_RBBM_PERFCTR_VSC_0_HI				0x000004e7
-
-#define REG_A6XX_RBBM_PERFCTR_VSC_1_LO				0x000004e8
-
-#define REG_A6XX_RBBM_PERFCTR_VSC_1_HI				0x000004e9
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_0_LO				0x000004ea
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_0_HI				0x000004eb
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_1_LO				0x000004ec
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_1_HI				0x000004ed
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_2_LO				0x000004ee
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_2_HI				0x000004ef
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_3_LO				0x000004f0
-
-#define REG_A6XX_RBBM_PERFCTR_LRZ_3_HI				0x000004f1
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_0_LO				0x000004f2
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_0_HI				0x000004f3
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_1_LO				0x000004f4
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_1_HI				0x000004f5
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_2_LO				0x000004f6
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_2_HI				0x000004f7
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_3_LO				0x000004f8
-
-#define REG_A6XX_RBBM_PERFCTR_CMP_3_HI				0x000004f9
-
-#define REG_A6XX_RBBM_PERFCTR_CNTL				0x00000500
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD0				0x00000501
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD1				0x00000502
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD2				0x00000503
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD3				0x00000504
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000505
-
-#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000506
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_0			0x00000507
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_1			0x00000508
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_2			0x00000509
-
-#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_3			0x0000050a
-
-#define REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED			0x0000050b
-
-#define REG_A6XX_RBBM_ISDB_CNT					0x00000533
-
-#define REG_A6XX_RBBM_SECVID_TRUST_CNTL				0x0000f400
-
-#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO		0x0000f800
-
-#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI		0x0000f801
-
-#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE			0x0000f802
-
-#define REG_A6XX_RBBM_SECVID_TSB_CNTL				0x0000f803
-
-#define REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL			0x0000f810
-
-#define REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL			0x00000010
-
-#define REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL			0x0000001f
-
-#define REG_A6XX_RBBM_INT_CLEAR_CMD				0x00000037
-
-#define REG_A6XX_RBBM_INT_0_MASK				0x00000038
-
-#define REG_A6XX_RBBM_SP_HYST_CNT				0x00000042
-
-#define REG_A6XX_RBBM_SW_RESET_CMD				0x00000043
-
-#define REG_A6XX_RBBM_RAC_THRESHOLD_CNT				0x00000044
-
-#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD			0x00000045
-
-#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD2			0x00000046
-
-#define REG_A6XX_RBBM_CLOCK_CNTL				0x000000ae
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_SP0				0x000000b0
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_SP1				0x000000b1
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_SP2				0x000000b2
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_SP3				0x000000b3
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_SP0				0x000000b4
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_SP1				0x000000b5
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_SP2				0x000000b6
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_SP3				0x000000b7
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_SP0				0x000000b8
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_SP1				0x000000b9
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_SP2				0x000000ba
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_SP3				0x000000bb
-
-#define REG_A6XX_RBBM_CLOCK_HYST_SP0				0x000000bc
-
-#define REG_A6XX_RBBM_CLOCK_HYST_SP1				0x000000bd
-
-#define REG_A6XX_RBBM_CLOCK_HYST_SP2				0x000000be
-
-#define REG_A6XX_RBBM_CLOCK_HYST_SP3				0x000000bf
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_TP0				0x000000c0
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_TP1				0x000000c1
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_TP2				0x000000c2
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_TP3				0x000000c3
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_TP0				0x000000c4
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_TP1				0x000000c5
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_TP2				0x000000c6
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_TP3				0x000000c7
-
-#define REG_A6XX_RBBM_CLOCK_CNTL3_TP0				0x000000c8
-
-#define REG_A6XX_RBBM_CLOCK_CNTL3_TP1				0x000000c9
-
-#define REG_A6XX_RBBM_CLOCK_CNTL3_TP2				0x000000ca
-
-#define REG_A6XX_RBBM_CLOCK_CNTL3_TP3				0x000000cb
-
-#define REG_A6XX_RBBM_CLOCK_CNTL4_TP0				0x000000cc
-
-#define REG_A6XX_RBBM_CLOCK_CNTL4_TP1				0x000000cd
-
-#define REG_A6XX_RBBM_CLOCK_CNTL4_TP2				0x000000ce
-
-#define REG_A6XX_RBBM_CLOCK_CNTL4_TP3				0x000000cf
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_TP0				0x000000d0
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_TP1				0x000000d1
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_TP2				0x000000d2
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_TP3				0x000000d3
-
-#define REG_A6XX_RBBM_CLOCK_DELAY2_TP0				0x000000d4
-
-#define REG_A6XX_RBBM_CLOCK_DELAY2_TP1				0x000000d5
-
-#define REG_A6XX_RBBM_CLOCK_DELAY2_TP2				0x000000d6
-
-#define REG_A6XX_RBBM_CLOCK_DELAY2_TP3				0x000000d7
-
-#define REG_A6XX_RBBM_CLOCK_DELAY3_TP0				0x000000d8
-
-#define REG_A6XX_RBBM_CLOCK_DELAY3_TP1				0x000000d9
-
-#define REG_A6XX_RBBM_CLOCK_DELAY3_TP2				0x000000da
-
-#define REG_A6XX_RBBM_CLOCK_DELAY3_TP3				0x000000db
-
-#define REG_A6XX_RBBM_CLOCK_DELAY4_TP0				0x000000dc
-
-#define REG_A6XX_RBBM_CLOCK_DELAY4_TP1				0x000000dd
-
-#define REG_A6XX_RBBM_CLOCK_DELAY4_TP2				0x000000de
-
-#define REG_A6XX_RBBM_CLOCK_DELAY4_TP3				0x000000df
-
-#define REG_A6XX_RBBM_CLOCK_HYST_TP0				0x000000e0
-
-#define REG_A6XX_RBBM_CLOCK_HYST_TP1				0x000000e1
-
-#define REG_A6XX_RBBM_CLOCK_HYST_TP2				0x000000e2
-
-#define REG_A6XX_RBBM_CLOCK_HYST_TP3				0x000000e3
-
-#define REG_A6XX_RBBM_CLOCK_HYST2_TP0				0x000000e4
-
-#define REG_A6XX_RBBM_CLOCK_HYST2_TP1				0x000000e5
-
-#define REG_A6XX_RBBM_CLOCK_HYST2_TP2				0x000000e6
-
-#define REG_A6XX_RBBM_CLOCK_HYST2_TP3				0x000000e7
-
-#define REG_A6XX_RBBM_CLOCK_HYST3_TP0				0x000000e8
-
-#define REG_A6XX_RBBM_CLOCK_HYST3_TP1				0x000000e9
-
-#define REG_A6XX_RBBM_CLOCK_HYST3_TP2				0x000000ea
-
-#define REG_A6XX_RBBM_CLOCK_HYST3_TP3				0x000000eb
-
-#define REG_A6XX_RBBM_CLOCK_HYST4_TP0				0x000000ec
-
-#define REG_A6XX_RBBM_CLOCK_HYST4_TP1				0x000000ed
-
-#define REG_A6XX_RBBM_CLOCK_HYST4_TP2				0x000000ee
-
-#define REG_A6XX_RBBM_CLOCK_HYST4_TP3				0x000000ef
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_RB0				0x000000f0
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_RB1				0x000000f1
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_RB2				0x000000f2
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_RB3				0x000000f3
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_RB0				0x000000f4
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_RB1				0x000000f5
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_RB2				0x000000f6
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_RB3				0x000000f7
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_CCU0				0x000000f8
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_CCU1				0x000000f9
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_CCU2				0x000000fa
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_CCU3				0x000000fb
-
-#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0			0x00000100
-
-#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1			0x00000101
-
-#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2			0x00000102
-
-#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3			0x00000103
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_RAC				0x00000104
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_RAC				0x00000105
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_RAC				0x00000106
-
-#define REG_A6XX_RBBM_CLOCK_HYST_RAC				0x00000107
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM			0x00000108
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM			0x00000109
-
-#define REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM			0x0000010a
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_UCHE				0x0000010b
-
-#define REG_A6XX_RBBM_CLOCK_CNTL2_UCHE				0x0000010c
-
-#define REG_A6XX_RBBM_CLOCK_CNTL3_UCHE				0x0000010d
-
-#define REG_A6XX_RBBM_CLOCK_CNTL4_UCHE				0x0000010e
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_UCHE				0x0000010f
-
-#define REG_A6XX_RBBM_CLOCK_HYST_UCHE				0x00000110
-
-#define REG_A6XX_RBBM_CLOCK_MODE_VFD				0x00000111
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_VFD				0x00000112
-
-#define REG_A6XX_RBBM_CLOCK_HYST_VFD				0x00000113
-
-#define REG_A6XX_RBBM_CLOCK_MODE_GPC				0x00000114
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_GPC				0x00000115
-
-#define REG_A6XX_RBBM_CLOCK_HYST_GPC				0x00000116
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2			0x00000117
-
-#define REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX				0x00000118
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX			0x00000119
-
-#define REG_A6XX_RBBM_CLOCK_HYST_GMU_GX				0x0000011a
-
-#define REG_A6XX_RBBM_CLOCK_MODE_HLSQ				0x0000011b
-
-#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ				0x0000011c
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_A				0x00000600
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_B				0x00000601
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_C				0x00000602
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_D				0x00000603
-#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK		0x000000ff
-#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT		0
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK		0x0000ff00
-#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT		8
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK;
-}
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLT				0x00000604
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK		0x0000003f
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT		0
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK			0x00007000
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT			12
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK			0xf0000000
-#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT			28
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK;
-}
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLM				0x00000605
-#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK			0x0f000000
-#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT		24
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK;
-}
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0				0x00000608
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1				0x00000609
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2				0x0000060a
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3				0x0000060b
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0			0x0000060c
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1			0x0000060d
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2			0x0000060e
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3			0x0000060f
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0			0x00000610
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK		0x0000000f
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT		0
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK		0x000000f0
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT		4
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK		0x00000f00
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT		8
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK		0x0000f000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT		12
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK		0x000f0000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT		16
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK		0x00f00000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT		20
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK		0x0f000000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT		24
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK		0xf0000000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT		28
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK;
-}
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1			0x00000611
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK		0x0000000f
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT		0
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK		0x000000f0
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT		4
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK		0x00000f00
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT		8
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK		0x0000f000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT		12
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK		0x000f0000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT		16
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK		0x00f00000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT		20
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK		0x0f000000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT		24
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK;
-}
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK		0xf0000000
-#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT		28
-static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val)
-{
-	return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK;
-}
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1			0x0000062f
-
-#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2			0x00000630
-
-#define REG_A6XX_VSC_PERFCTR_VSC_SEL_0				0x00000cd8
-
-#define REG_A6XX_VSC_PERFCTR_VSC_SEL_1				0x00000cd9
-
-#define REG_A6XX_GRAS_ADDR_MODE_CNTL				0x00008601
-
-#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_0				0x00008610
-
-#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_1				0x00008611
-
-#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_2				0x00008612
-
-#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_3				0x00008613
-
-#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_0				0x00008614
-
-#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_1				0x00008615
-
-#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_2				0x00008616
-
-#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_3				0x00008617
-
-#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_0				0x00008618
-
-#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_1				0x00008619
-
-#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_2				0x0000861a
-
-#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_3				0x0000861b
-
-#define REG_A6XX_RB_ADDR_MODE_CNTL				0x00008e05
-
-#define REG_A6XX_RB_NC_MODE_CNTL				0x00008e08
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_0				0x00008e10
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_1				0x00008e11
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_2				0x00008e12
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_3				0x00008e13
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_4				0x00008e14
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_5				0x00008e15
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_6				0x00008e16
-
-#define REG_A6XX_RB_PERFCTR_RB_SEL_7				0x00008e17
-
-#define REG_A6XX_RB_PERFCTR_CCU_SEL_0				0x00008e18
-
-#define REG_A6XX_RB_PERFCTR_CCU_SEL_1				0x00008e19
-
-#define REG_A6XX_RB_PERFCTR_CCU_SEL_2				0x00008e1a
-
-#define REG_A6XX_RB_PERFCTR_CCU_SEL_3				0x00008e1b
-
-#define REG_A6XX_RB_PERFCTR_CCU_SEL_4				0x00008e1c
-
-#define REG_A6XX_RB_PERFCTR_CMP_SEL_0				0x00008e2c
-
-#define REG_A6XX_RB_PERFCTR_CMP_SEL_1				0x00008e2d
-
-#define REG_A6XX_RB_PERFCTR_CMP_SEL_2				0x00008e2e
-
-#define REG_A6XX_RB_PERFCTR_CMP_SEL_3				0x00008e2f
-
-#define REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD			0x00008e3d
-
-#define REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE		0x00008e50
-
-#define REG_A6XX_PC_DBG_ECO_CNTL				0x00009e00
-
-#define REG_A6XX_PC_ADDR_MODE_CNTL				0x00009e01
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_0				0x00009e34
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_1				0x00009e35
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_2				0x00009e36
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_3				0x00009e37
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_4				0x00009e38
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_5				0x00009e39
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_6				0x00009e3a
-
-#define REG_A6XX_PC_PERFCTR_PC_SEL_7				0x00009e3b
-
-#define REG_A6XX_HLSQ_ADDR_MODE_CNTL				0x0000be05
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x0000be10
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x0000be11
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x0000be12
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x0000be13
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x0000be14
-
-#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x0000be15
-
-#define REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE			0x0000c800
-
-#define REG_A6XX_HLSQ_DBG_READ_SEL				0x0000d000
-
-#define REG_A6XX_VFD_ADDR_MODE_CNTL				0x0000a601
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_0				0x0000a610
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_1				0x0000a611
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_2				0x0000a612
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_3				0x0000a613
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_4				0x0000a614
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_5				0x0000a615
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_6				0x0000a616
-
-#define REG_A6XX_VFD_PERFCTR_VFD_SEL_7				0x0000a617
-
-#define REG_A6XX_VPC_ADDR_MODE_CNTL				0x00009601
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_0				0x00009604
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_1				0x00009605
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_2				0x00009606
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_3				0x00009607
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_4				0x00009608
-
-#define REG_A6XX_VPC_PERFCTR_VPC_SEL_5				0x00009609
-
-#define REG_A6XX_UCHE_ADDR_MODE_CNTL				0x00000e00
-
-#define REG_A6XX_UCHE_MODE_CNTL					0x00000e01
-
-#define REG_A6XX_UCHE_WRITE_RANGE_MAX_LO			0x00000e05
-
-#define REG_A6XX_UCHE_WRITE_RANGE_MAX_HI			0x00000e06
-
-#define REG_A6XX_UCHE_WRITE_THRU_BASE_LO			0x00000e07
-
-#define REG_A6XX_UCHE_WRITE_THRU_BASE_HI			0x00000e08
-
-#define REG_A6XX_UCHE_TRAP_BASE_LO				0x00000e09
-
-#define REG_A6XX_UCHE_TRAP_BASE_HI				0x00000e0a
-
-#define REG_A6XX_UCHE_GMEM_RANGE_MIN_LO				0x00000e0b
-
-#define REG_A6XX_UCHE_GMEM_RANGE_MIN_HI				0x00000e0c
-
-#define REG_A6XX_UCHE_GMEM_RANGE_MAX_LO				0x00000e0d
-
-#define REG_A6XX_UCHE_GMEM_RANGE_MAX_HI				0x00000e0e
-
-#define REG_A6XX_UCHE_CACHE_WAYS				0x00000e17
-
-#define REG_A6XX_UCHE_FILTER_CNTL				0x00000e18
-
-#define REG_A6XX_UCHE_CLIENT_PF					0x00000e19
-#define A6XX_UCHE_CLIENT_PF_PERFSEL__MASK			0x000000ff
-#define A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT			0
-static inline uint32_t A6XX_UCHE_CLIENT_PF_PERFSEL(uint32_t val)
-{
-	return ((val) << A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT) & A6XX_UCHE_CLIENT_PF_PERFSEL__MASK;
-}
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000e1c
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000e1d
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000e1e
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000e1f
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000e20
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000e21
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000e22
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000e23
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_8			0x00000e24
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_9			0x00000e25
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_10			0x00000e26
-
-#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_11			0x00000e27
-
-#define REG_A6XX_SP_ADDR_MODE_CNTL				0x0000ae01
-
-#define REG_A6XX_SP_NC_MODE_CNTL				0x0000ae02
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_0				0x0000ae10
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_1				0x0000ae11
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_2				0x0000ae12
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_3				0x0000ae13
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_4				0x0000ae14
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_5				0x0000ae15
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_6				0x0000ae16
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_7				0x0000ae17
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_8				0x0000ae18
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_9				0x0000ae19
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_10				0x0000ae1a
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_11				0x0000ae1b
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_12				0x0000ae1c
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_13				0x0000ae1d
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_14				0x0000ae1e
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_15				0x0000ae1f
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_16				0x0000ae20
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_17				0x0000ae21
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_18				0x0000ae22
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_19				0x0000ae23
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_20				0x0000ae24
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_21				0x0000ae25
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_22				0x0000ae26
-
-#define REG_A6XX_SP_PERFCTR_SP_SEL_23				0x0000ae27
-
-#define REG_A6XX_TPL1_ADDR_MODE_CNTL				0x0000b601
-
-#define REG_A6XX_TPL1_NC_MODE_CNTL				0x0000b604
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_0				0x0000b610
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_1				0x0000b611
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_2				0x0000b612
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_3				0x0000b613
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_4				0x0000b614
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_5				0x0000b615
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_6				0x0000b616
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_7				0x0000b617
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_8				0x0000b618
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_9				0x0000b619
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_10				0x0000b61a
-
-#define REG_A6XX_TPL1_PERFCTR_TP_SEL_11				0x0000b61b
-
-#define REG_A6XX_VBIF_VERSION					0x00003000
-
-#define REG_A6XX_VBIF_CLKON					0x00003001
-#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS			0x00000002
-
-#define REG_A6XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
-
-#define REG_A6XX_VBIF_XIN_HALT_CTRL0				0x00003080
-
-#define REG_A6XX_VBIF_XIN_HALT_CTRL1				0x00003081
-
-#define REG_A6XX_VBIF_TEST_BUS_OUT_CTRL				0x00003084
-
-#define REG_A6XX_VBIF_TEST_BUS1_CTRL0				0x00003085
-
-#define REG_A6XX_VBIF_TEST_BUS1_CTRL1				0x00003086
-#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK		0x0000000f
-#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT		0
-static inline uint32_t A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL(uint32_t val)
-{
-	return ((val) << A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK;
-}
-
-#define REG_A6XX_VBIF_TEST_BUS2_CTRL0				0x00003087
-
-#define REG_A6XX_VBIF_TEST_BUS2_CTRL1				0x00003088
-#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK		0x000001ff
-#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT		0
-static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)
-{
-	return ((val) << A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK;
-}
-
-#define REG_A6XX_VBIF_TEST_BUS_OUT				0x0000308c
-
-#define REG_A6XX_VBIF_PERF_CNT_SEL0				0x000030d0
-
-#define REG_A6XX_VBIF_PERF_CNT_SEL1				0x000030d1
-
-#define REG_A6XX_VBIF_PERF_CNT_SEL2				0x000030d2
-
-#define REG_A6XX_VBIF_PERF_CNT_SEL3				0x000030d3
-
-#define REG_A6XX_VBIF_PERF_CNT_LOW0				0x000030d8
-
-#define REG_A6XX_VBIF_PERF_CNT_LOW1				0x000030d9
-
-#define REG_A6XX_VBIF_PERF_CNT_LOW2				0x000030da
-
-#define REG_A6XX_VBIF_PERF_CNT_LOW3				0x000030db
-
-#define REG_A6XX_VBIF_PERF_CNT_HIGH0				0x000030e0
-
-#define REG_A6XX_VBIF_PERF_CNT_HIGH1				0x000030e1
-
-#define REG_A6XX_VBIF_PERF_CNT_HIGH2				0x000030e2
-
-#define REG_A6XX_VBIF_PERF_CNT_HIGH3				0x000030e3
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_EN0				0x00003100
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_EN1				0x00003101
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_EN2				0x00003102
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW0				0x00003110
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW1				0x00003111
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW2				0x00003112
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH0			0x00003118
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH1			0x00003119
-
-#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2			0x0000311a
-
-#define REG_A6XX_RB_WINDOW_OFFSET2				0x000088d4
-#define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_RB_WINDOW_OFFSET2_X__MASK				0x00007fff
-#define A6XX_RB_WINDOW_OFFSET2_X__SHIFT				0
-static inline uint32_t A6XX_RB_WINDOW_OFFSET2_X(uint32_t val)
-{
-	return ((val) << A6XX_RB_WINDOW_OFFSET2_X__SHIFT) & A6XX_RB_WINDOW_OFFSET2_X__MASK;
-}
-#define A6XX_RB_WINDOW_OFFSET2_Y__MASK				0x7fff0000
-#define A6XX_RB_WINDOW_OFFSET2_Y__SHIFT				16
-static inline uint32_t A6XX_RB_WINDOW_OFFSET2_Y(uint32_t val)
-{
-	return ((val) << A6XX_RB_WINDOW_OFFSET2_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET2_Y__MASK;
-}
-
-#define REG_A6XX_SP_WINDOW_OFFSET				0x0000b4d1
-#define A6XX_SP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_SP_WINDOW_OFFSET_X__MASK				0x00007fff
-#define A6XX_SP_WINDOW_OFFSET_X__SHIFT				0
-static inline uint32_t A6XX_SP_WINDOW_OFFSET_X(uint32_t val)
-{
-	return ((val) << A6XX_SP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_WINDOW_OFFSET_X__MASK;
-}
-#define A6XX_SP_WINDOW_OFFSET_Y__MASK				0x7fff0000
-#define A6XX_SP_WINDOW_OFFSET_Y__SHIFT				16
-static inline uint32_t A6XX_SP_WINDOW_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A6XX_SP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_WINDOW_OFFSET_Y__MASK;
-}
-
-#define REG_A6XX_SP_TP_WINDOW_OFFSET				0x0000b307
-#define A6XX_SP_TP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_SP_TP_WINDOW_OFFSET_X__MASK			0x00007fff
-#define A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT			0
-static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_X(uint32_t val)
-{
-	return ((val) << A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_X__MASK;
-}
-#define A6XX_SP_TP_WINDOW_OFFSET_Y__MASK			0x7fff0000
-#define A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT			16
-static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_BIN_CONTROL				0x000080a1
-#define A6XX_GRAS_BIN_CONTROL_BINW__MASK			0x000000ff
-#define A6XX_GRAS_BIN_CONTROL_BINW__SHIFT			0
-static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINW(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_GRAS_BIN_CONTROL_BINW__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINW__MASK;
-}
-#define A6XX_GRAS_BIN_CONTROL_BINH__MASK			0x0001ff00
-#define A6XX_GRAS_BIN_CONTROL_BINH__SHIFT			8
-static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_GRAS_BIN_CONTROL_BINH__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINH__MASK;
-}
-#define A6XX_GRAS_BIN_CONTROL_BINNING_PASS			0x00040000
-#define A6XX_GRAS_BIN_CONTROL_USE_VIZ				0x00200000
-
-#define REG_A6XX_RB_BIN_CONTROL2				0x000088d3
-#define A6XX_RB_BIN_CONTROL2_BINW__MASK				0x000000ff
-#define A6XX_RB_BIN_CONTROL2_BINW__SHIFT			0
-static inline uint32_t A6XX_RB_BIN_CONTROL2_BINW(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_RB_BIN_CONTROL2_BINW__SHIFT) & A6XX_RB_BIN_CONTROL2_BINW__MASK;
-}
-#define A6XX_RB_BIN_CONTROL2_BINH__MASK				0x0001ff00
-#define A6XX_RB_BIN_CONTROL2_BINH__SHIFT			8
-static inline uint32_t A6XX_RB_BIN_CONTROL2_BINH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_RB_BIN_CONTROL2_BINH__SHIFT) & A6XX_RB_BIN_CONTROL2_BINH__MASK;
-}
-
-#define REG_A6XX_VSC_BIN_SIZE					0x00000c02
-#define A6XX_VSC_BIN_SIZE_WIDTH__MASK				0x000000ff
-#define A6XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
-static inline uint32_t A6XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A6XX_VSC_BIN_SIZE_WIDTH__MASK;
-}
-#define A6XX_VSC_BIN_SIZE_HEIGHT__MASK				0x0001ff00
-#define A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT				8
-static inline uint32_t A6XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A6XX_VSC_BIN_SIZE_HEIGHT__MASK;
-}
-
-#define REG_A6XX_VSC_SIZE_ADDRESS_LO				0x00000c03
-
-#define REG_A6XX_VSC_SIZE_ADDRESS_HI				0x00000c04
-
-#define REG_A6XX_VSC_BIN_COUNT					0x00000c06
-#define A6XX_VSC_BIN_COUNT_NX__MASK				0x000007fe
-#define A6XX_VSC_BIN_COUNT_NX__SHIFT				1
-static inline uint32_t A6XX_VSC_BIN_COUNT_NX(uint32_t val)
-{
-	return ((val) << A6XX_VSC_BIN_COUNT_NX__SHIFT) & A6XX_VSC_BIN_COUNT_NX__MASK;
-}
-#define A6XX_VSC_BIN_COUNT_NY__MASK				0x001ff800
-#define A6XX_VSC_BIN_COUNT_NY__SHIFT				11
-static inline uint32_t A6XX_VSC_BIN_COUNT_NY(uint32_t val)
-{
-	return ((val) << A6XX_VSC_BIN_COUNT_NY__SHIFT) & A6XX_VSC_BIN_COUNT_NY__MASK;
-}
-
-static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
-#define A6XX_VSC_PIPE_CONFIG_REG_X__MASK			0x000003ff
-#define A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT			0
-static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
-{
-	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_X__MASK;
-}
-#define A6XX_VSC_PIPE_CONFIG_REG_Y__MASK			0x000ffc00
-#define A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT			10
-static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
-{
-	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_Y__MASK;
-}
-#define A6XX_VSC_PIPE_CONFIG_REG_W__MASK			0x03f00000
-#define A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT			20
-static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
-{
-	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_W__MASK;
-}
-#define A6XX_VSC_PIPE_CONFIG_REG_H__MASK			0xfc000000
-#define A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT			26
-static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
-{
-	return ((val) << A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_H__MASK;
-}
-
-#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO			0x00000c30
-
-#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_HI			0x00000c31
-
-#define REG_A6XX_VSC_PIPE_DATA2_PITCH				0x00000c32
-
-#define REG_A6XX_VSC_PIPE_DATA2_ARRAY_PITCH			0x00000c33
-#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK			0xffffffff
-#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO			0x00000c34
-
-#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_HI			0x00000c35
-
-#define REG_A6XX_VSC_PIPE_DATA_PITCH				0x00000c36
-
-#define REG_A6XX_VSC_PIPE_DATA_ARRAY_PITCH			0x00000c37
-#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK			0xffffffff
-#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A6XX_VSC_PIPE_DATA_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK;
-}
-
-static inline uint32_t REG_A6XX_VSC_SIZE(uint32_t i0) { return 0x00000c78 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VSC_SIZE_REG(uint32_t i0) { return 0x00000c78 + 0x1*i0; }
-
-#define REG_A6XX_UCHE_UNKNOWN_0E12				0x00000e12
-
-#define REG_A6XX_GRAS_UNKNOWN_8000				0x00008000
-
-#define REG_A6XX_GRAS_UNKNOWN_8001				0x00008001
-
-#define REG_A6XX_GRAS_UNKNOWN_8004				0x00008004
-
-#define REG_A6XX_GRAS_CNTL					0x00008005
-#define A6XX_GRAS_CNTL_VARYING					0x00000001
-#define A6XX_GRAS_CNTL_UNK3					0x00000008
-#define A6XX_GRAS_CNTL_XCOORD					0x00000040
-#define A6XX_GRAS_CNTL_YCOORD					0x00000080
-#define A6XX_GRAS_CNTL_ZCOORD					0x00000100
-#define A6XX_GRAS_CNTL_WCOORD					0x00000200
-
-#define REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ			0x00008006
-#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK		0x000003ff
-#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT		0
-static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK;
-}
-#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK		0x000ffc00
-#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT		10
-static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_XOFFSET_0			0x00008010
-#define A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_XOFFSET_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_XSCALE_0				0x00008011
-#define A6XX_GRAS_CL_VPORT_XSCALE_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_XSCALE_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_XSCALE_0__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_YOFFSET_0			0x00008012
-#define A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_YOFFSET_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_YSCALE_0				0x00008013
-#define A6XX_GRAS_CL_VPORT_YSCALE_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_YSCALE_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_YSCALE_0__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_ZOFFSET_0			0x00008014
-#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
-}
-
-#define REG_A6XX_GRAS_CL_VPORT_ZSCALE_0				0x00008015
-#define A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK			0xffffffff
-#define A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT			0
-static inline uint32_t A6XX_GRAS_CL_VPORT_ZSCALE_0(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_CNTL					0x00008090
-#define A6XX_GRAS_SU_CNTL_CULL_FRONT				0x00000001
-#define A6XX_GRAS_SU_CNTL_CULL_BACK				0x00000002
-#define A6XX_GRAS_SU_CNTL_FRONT_CW				0x00000004
-#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK			0x000007f8
-#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT			3
-static inline uint32_t A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val)
-{
-	return ((((int32_t)(val * 4.0))) << A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
-}
-#define A6XX_GRAS_SU_CNTL_POLY_OFFSET				0x00000800
-#define A6XX_GRAS_SU_CNTL_MSAA_ENABLE				0x00002000
-
-#define REG_A6XX_GRAS_SU_POINT_MINMAX				0x00008091
-#define A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
-#define A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT			0
-static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MIN(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
-}
-#define A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK			0xffff0000
-#define A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT			16
-static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MAX(float val)
-{
-	return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_POINT_SIZE				0x00008092
-#define A6XX_GRAS_SU_POINT_SIZE__MASK				0xffffffff
-#define A6XX_GRAS_SU_POINT_SIZE__SHIFT				0
-static inline uint32_t A6XX_GRAS_SU_POINT_SIZE(float val)
-{
-	return ((((int32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_SIZE__SHIFT) & A6XX_GRAS_SU_POINT_SIZE__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL			0x00008094
-#define A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z		0x00000001
-
-#define REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE			0x00008095
-#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
-#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT			0
-static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET			0x00008096
-#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
-#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
-static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP		0x00008097
-#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK		0xffffffff
-#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT		0
-static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val)
-{
-	return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK;
-}
-
-#define REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO			0x00008098
-#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK	0x00000007
-#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT	0
-static inline uint32_t A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val)
-{
-	return ((val) << A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
-}
-
-#define REG_A6XX_GRAS_UNKNOWN_8099				0x00008099
-
-#define REG_A6XX_GRAS_UNKNOWN_809B				0x0000809b
-
-#define REG_A6XX_GRAS_UNKNOWN_80A0				0x000080a0
-
-#define REG_A6XX_GRAS_RAS_MSAA_CNTL				0x000080a2
-#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A6XX_GRAS_DEST_MSAA_CNTL				0x000080a3
-#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
-
-#define REG_A6XX_GRAS_UNKNOWN_80A4				0x000080a4
-
-#define REG_A6XX_GRAS_UNKNOWN_80A5				0x000080a5
-
-#define REG_A6XX_GRAS_UNKNOWN_80A6				0x000080a6
-
-#define REG_A6XX_GRAS_UNKNOWN_80AF				0x000080af
-
-#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0			0x000080b0
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK		0x00007fff
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT		0
-static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK;
-}
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK		0x7fff0000
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT		16
-static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0			0x000080b1
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK		0x00007fff
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT		0
-static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK;
-}
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK		0x7fff0000
-#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT		16
-static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0			0x000080d0
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK		0x00007fff
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT		0
-static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK;
-}
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK		0x7fff0000
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT		16
-static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0			0x000080d1
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK		0x00007fff
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT		0
-static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK;
-}
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK		0x7fff0000
-#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT		16
-static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL			0x000080f0
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
-}
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_BR			0x000080f1
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
-}
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
-#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_LRZ_CNTL					0x00008100
-#define A6XX_GRAS_LRZ_CNTL_ENABLE				0x00000001
-#define A6XX_GRAS_LRZ_CNTL_LRZ_WRITE				0x00000002
-#define A6XX_GRAS_LRZ_CNTL_GREATER				0x00000004
-#define A6XX_GRAS_LRZ_CNTL_UNK3					0x00000008
-#define A6XX_GRAS_LRZ_CNTL_UNK4					0x00000010
-
-#define REG_A6XX_GRAS_UNKNOWN_8101				0x00008101
-
-#define REG_A6XX_GRAS_2D_BLIT_INFO				0x00008102
-#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK		0x000000ff
-#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK;
-}
-
-#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO			0x00008103
-
-#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_HI			0x00008104
-
-#define REG_A6XX_GRAS_LRZ_BUFFER_PITCH				0x00008105
-#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK			0x000007ff
-#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT			0
-static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK;
-}
-#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK		0x003ff800
-#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT		11
-static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO		0x00008106
-
-#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI		0x00008107
-
-#define REG_A6XX_GRAS_UNKNOWN_8109				0x00008109
-
-#define REG_A6XX_GRAS_UNKNOWN_8110				0x00008110
-
-#define REG_A6XX_GRAS_2D_BLIT_CNTL				0x00008400
-#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK		0x0000ff00
-#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT		8
-static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK;
-}
-#define A6XX_GRAS_2D_BLIT_CNTL_SCISSOR				0x00010000
-
-#define REG_A6XX_GRAS_2D_SRC_TL_X				0x00008401
-#define A6XX_GRAS_2D_SRC_TL_X_X__MASK				0x00ffff00
-#define A6XX_GRAS_2D_SRC_TL_X_X__SHIFT				8
-static inline uint32_t A6XX_GRAS_2D_SRC_TL_X_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_SRC_TL_X_X__SHIFT) & A6XX_GRAS_2D_SRC_TL_X_X__MASK;
-}
-
-#define REG_A6XX_GRAS_2D_SRC_BR_X				0x00008402
-#define A6XX_GRAS_2D_SRC_BR_X_X__MASK				0x00ffff00
-#define A6XX_GRAS_2D_SRC_BR_X_X__SHIFT				8
-static inline uint32_t A6XX_GRAS_2D_SRC_BR_X_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_SRC_BR_X_X__SHIFT) & A6XX_GRAS_2D_SRC_BR_X_X__MASK;
-}
-
-#define REG_A6XX_GRAS_2D_SRC_TL_Y				0x00008403
-#define A6XX_GRAS_2D_SRC_TL_Y_Y__MASK				0x00ffff00
-#define A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT				8
-static inline uint32_t A6XX_GRAS_2D_SRC_TL_Y_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_TL_Y_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_2D_SRC_BR_Y				0x00008404
-#define A6XX_GRAS_2D_SRC_BR_Y_Y__MASK				0x00ffff00
-#define A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT				8
-static inline uint32_t A6XX_GRAS_2D_SRC_BR_Y_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_BR_Y_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_2D_DST_TL					0x00008405
-#define A6XX_GRAS_2D_DST_TL_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_GRAS_2D_DST_TL_X__MASK				0x00007fff
-#define A6XX_GRAS_2D_DST_TL_X__SHIFT				0
-static inline uint32_t A6XX_GRAS_2D_DST_TL_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_DST_TL_X__SHIFT) & A6XX_GRAS_2D_DST_TL_X__MASK;
-}
-#define A6XX_GRAS_2D_DST_TL_Y__MASK				0x7fff0000
-#define A6XX_GRAS_2D_DST_TL_Y__SHIFT				16
-static inline uint32_t A6XX_GRAS_2D_DST_TL_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_DST_TL_Y__SHIFT) & A6XX_GRAS_2D_DST_TL_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_2D_DST_BR					0x00008406
-#define A6XX_GRAS_2D_DST_BR_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_GRAS_2D_DST_BR_X__MASK				0x00007fff
-#define A6XX_GRAS_2D_DST_BR_X__SHIFT				0
-static inline uint32_t A6XX_GRAS_2D_DST_BR_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_DST_BR_X__SHIFT) & A6XX_GRAS_2D_DST_BR_X__MASK;
-}
-#define A6XX_GRAS_2D_DST_BR_Y__MASK				0x7fff0000
-#define A6XX_GRAS_2D_DST_BR_Y__SHIFT				16
-static inline uint32_t A6XX_GRAS_2D_DST_BR_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_2D_DST_BR_Y__SHIFT) & A6XX_GRAS_2D_DST_BR_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_RESOLVE_CNTL_1				0x0000840a
-#define A6XX_GRAS_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_GRAS_RESOLVE_CNTL_1_X__MASK			0x00007fff
-#define A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT			0
-static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_X__MASK;
-}
-#define A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK			0x7fff0000
-#define A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT			16
-static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_RESOLVE_CNTL_2				0x0000840b
-#define A6XX_GRAS_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_GRAS_RESOLVE_CNTL_2_X__MASK			0x00007fff
-#define A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT			0
-static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_X(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_X__MASK;
-}
-#define A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK			0x7fff0000
-#define A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT			16
-static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_Y(uint32_t val)
-{
-	return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK;
-}
-
-#define REG_A6XX_GRAS_UNKNOWN_8600				0x00008600
-
-#define REG_A6XX_RB_BIN_CONTROL					0x00008800
-#define A6XX_RB_BIN_CONTROL_BINW__MASK				0x000000ff
-#define A6XX_RB_BIN_CONTROL_BINW__SHIFT				0
-static inline uint32_t A6XX_RB_BIN_CONTROL_BINW(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_RB_BIN_CONTROL_BINW__SHIFT) & A6XX_RB_BIN_CONTROL_BINW__MASK;
-}
-#define A6XX_RB_BIN_CONTROL_BINH__MASK				0x0001ff00
-#define A6XX_RB_BIN_CONTROL_BINH__SHIFT				8
-static inline uint32_t A6XX_RB_BIN_CONTROL_BINH(uint32_t val)
-{
-	assert(!(val & 0xf));
-	return ((val >> 4) << A6XX_RB_BIN_CONTROL_BINH__SHIFT) & A6XX_RB_BIN_CONTROL_BINH__MASK;
-}
-#define A6XX_RB_BIN_CONTROL_BINNING_PASS			0x00040000
-#define A6XX_RB_BIN_CONTROL_USE_VIZ				0x00200000
-
-#define REG_A6XX_RB_RENDER_CNTL					0x00008801
-#define A6XX_RB_RENDER_CNTL_UNK4				0x00000010
-#define A6XX_RB_RENDER_CNTL_BINNING				0x00000080
-#define A6XX_RB_RENDER_CNTL_FLAG_DEPTH				0x00004000
-#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK			0x00ff0000
-#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT			16
-static inline uint32_t A6XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK;
-}
-
-#define REG_A6XX_RB_RAS_MSAA_CNTL				0x00008802
-#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A6XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A6XX_RB_DEST_MSAA_CNTL				0x00008803
-#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A6XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
-
-#define REG_A6XX_RB_UNKNOWN_8804				0x00008804
-
-#define REG_A6XX_RB_UNKNOWN_8805				0x00008805
-
-#define REG_A6XX_RB_UNKNOWN_8806				0x00008806
-
-#define REG_A6XX_RB_RENDER_CONTROL0				0x00008809
-#define A6XX_RB_RENDER_CONTROL0_VARYING				0x00000001
-#define A6XX_RB_RENDER_CONTROL0_UNK3				0x00000008
-#define A6XX_RB_RENDER_CONTROL0_XCOORD				0x00000040
-#define A6XX_RB_RENDER_CONTROL0_YCOORD				0x00000080
-#define A6XX_RB_RENDER_CONTROL0_ZCOORD				0x00000100
-#define A6XX_RB_RENDER_CONTROL0_WCOORD				0x00000200
-#define A6XX_RB_RENDER_CONTROL0_UNK10				0x00000400
-
-#define REG_A6XX_RB_RENDER_CONTROL1				0x0000880a
-#define A6XX_RB_RENDER_CONTROL1_SAMPLEMASK			0x00000001
-#define A6XX_RB_RENDER_CONTROL1_FACENESS			0x00000002
-#define A6XX_RB_RENDER_CONTROL1_SAMPLEID			0x00000008
-
-#define REG_A6XX_RB_FS_OUTPUT_CNTL0				0x0000880b
-#define A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z			0x00000002
-
-#define REG_A6XX_RB_FS_OUTPUT_CNTL1				0x0000880c
-#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK			0x0000000f
-#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT			0
-static inline uint32_t A6XX_RB_FS_OUTPUT_CNTL1_MRT(uint32_t val)
-{
-	return ((val) << A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK;
-}
-
-#define REG_A6XX_RB_RENDER_COMPONENTS				0x0000880d
-#define A6XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
-#define A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT0__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT1__MASK			0x000000f0
-#define A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT			4
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT1__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT2__MASK			0x00000f00
-#define A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT			8
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT2__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT3__MASK			0x0000f000
-#define A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT			12
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT3__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT4__MASK			0x000f0000
-#define A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT			16
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT4__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT5__MASK			0x00f00000
-#define A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT			20
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT5__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT6__MASK			0x0f000000
-#define A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT			24
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT6__MASK;
-}
-#define A6XX_RB_RENDER_COMPONENTS_RT7__MASK			0xf0000000
-#define A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT			28
-static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
-{
-	return ((val) << A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT7__MASK;
-}
-
-#define REG_A6XX_RB_DITHER_CNTL					0x0000880e
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK		0x00000003
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT		0
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK		0x0000000c
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT		2
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK		0x00000030
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT		4
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK		0x000000c0
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT		6
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK		0x00000300
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT		8
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK		0x00000c00
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT		10
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK		0x00001000
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT		12
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK;
-}
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK		0x0000c000
-#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT		14
-static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7(enum adreno_rb_dither_mode val)
-{
-	return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK;
-}
-
-#define REG_A6XX_RB_SRGB_CNTL					0x0000880f
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT0				0x00000001
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT1				0x00000002
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT2				0x00000004
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT3				0x00000008
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT4				0x00000010
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT5				0x00000020
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT6				0x00000040
-#define A6XX_RB_SRGB_CNTL_SRGB_MRT7				0x00000080
-
-#define REG_A6XX_RB_UNKNOWN_8810				0x00008810
-
-#define REG_A6XX_RB_UNKNOWN_8811				0x00008811
-
-#define REG_A6XX_RB_UNKNOWN_8818				0x00008818
-
-#define REG_A6XX_RB_UNKNOWN_8819				0x00008819
-
-#define REG_A6XX_RB_UNKNOWN_881A				0x0000881a
-
-#define REG_A6XX_RB_UNKNOWN_881B				0x0000881b
-
-#define REG_A6XX_RB_UNKNOWN_881C				0x0000881c
-
-#define REG_A6XX_RB_UNKNOWN_881D				0x0000881d
-
-#define REG_A6XX_RB_UNKNOWN_881E				0x0000881e
-
-static inline uint32_t REG_A6XX_RB_MRT(uint32_t i0) { return 0x00008820 + 0x8*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_CONTROL(uint32_t i0) { return 0x00008820 + 0x8*i0; }
-#define A6XX_RB_MRT_CONTROL_BLEND				0x00000001
-#define A6XX_RB_MRT_CONTROL_BLEND2				0x00000002
-#define A6XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000004
-#define A6XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000078
-#define A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			3
-static inline uint32_t A6XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
-{
-	return ((val) << A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A6XX_RB_MRT_CONTROL_ROP_CODE__MASK;
-}
-#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x00000780
-#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		7
-static inline uint32_t A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
-{
-	return ((val) << A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
-}
-
-static inline uint32_t REG_A6XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x00008821 + 0x8*i0; }
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
-}
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
-}
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
-#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
-}
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
-}
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
-}
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
-#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
-static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
-{
-	return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
-}
-
-static inline uint32_t REG_A6XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x00008822 + 0x8*i0; }
-#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x000000ff
-#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
-}
-#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x00000300
-#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		8
-static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a6xx_tile_mode val)
-{
-	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
-}
-#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00006000
-#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			13
-static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
-}
-
-static inline uint32_t REG_A6XX_RB_MRT_PITCH(uint32_t i0) { return 0x00008823 + 0x8*i0; }
-#define A6XX_RB_MRT_PITCH__MASK					0xffffffff
-#define A6XX_RB_MRT_PITCH__SHIFT				0
-static inline uint32_t A6XX_RB_MRT_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_MRT_PITCH__SHIFT) & A6XX_RB_MRT_PITCH__MASK;
-}
-
-static inline uint32_t REG_A6XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x00008824 + 0x8*i0; }
-#define A6XX_RB_MRT_ARRAY_PITCH__MASK				0xffffffff
-#define A6XX_RB_MRT_ARRAY_PITCH__SHIFT				0
-static inline uint32_t A6XX_RB_MRT_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_MRT_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_ARRAY_PITCH__MASK;
-}
-
-static inline uint32_t REG_A6XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x00008825 + 0x8*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x00008826 + 0x8*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_BASE_GMEM(uint32_t i0) { return 0x00008827 + 0x8*i0; }
-
-#define REG_A6XX_RB_BLEND_RED_F32				0x00008860
-#define A6XX_RB_BLEND_RED_F32__MASK				0xffffffff
-#define A6XX_RB_BLEND_RED_F32__SHIFT				0
-static inline uint32_t A6XX_RB_BLEND_RED_F32(float val)
-{
-	return ((fui(val)) << A6XX_RB_BLEND_RED_F32__SHIFT) & A6XX_RB_BLEND_RED_F32__MASK;
-}
-
-#define REG_A6XX_RB_BLEND_GREEN_F32				0x00008861
-#define A6XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
-#define A6XX_RB_BLEND_GREEN_F32__SHIFT				0
-static inline uint32_t A6XX_RB_BLEND_GREEN_F32(float val)
-{
-	return ((fui(val)) << A6XX_RB_BLEND_GREEN_F32__SHIFT) & A6XX_RB_BLEND_GREEN_F32__MASK;
-}
-
-#define REG_A6XX_RB_BLEND_BLUE_F32				0x00008862
-#define A6XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
-#define A6XX_RB_BLEND_BLUE_F32__SHIFT				0
-static inline uint32_t A6XX_RB_BLEND_BLUE_F32(float val)
-{
-	return ((fui(val)) << A6XX_RB_BLEND_BLUE_F32__SHIFT) & A6XX_RB_BLEND_BLUE_F32__MASK;
-}
-
-#define REG_A6XX_RB_BLEND_ALPHA_F32				0x00008863
-#define A6XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
-#define A6XX_RB_BLEND_ALPHA_F32__SHIFT				0
-static inline uint32_t A6XX_RB_BLEND_ALPHA_F32(float val)
-{
-	return ((fui(val)) << A6XX_RB_BLEND_ALPHA_F32__SHIFT) & A6XX_RB_BLEND_ALPHA_F32__MASK;
-}
-
-#define REG_A6XX_RB_ALPHA_CONTROL				0x00008864
-#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
-#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
-static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
-{
-	return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
-}
-#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST			0x00000100
-#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK		0x00000e00
-#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT		9
-static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
-}
-
-#define REG_A6XX_RB_BLEND_CNTL					0x00008865
-#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK			0x000000ff
-#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT			0
-static inline uint32_t A6XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
-}
-#define A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND			0x00000100
-#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK			0xffff0000
-#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT			16
-static inline uint32_t A6XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK;
-}
-
-#define REG_A6XX_RB_DEPTH_PLANE_CNTL				0x00008870
-#define A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z			0x00000001
-
-#define REG_A6XX_RB_DEPTH_CNTL					0x00008871
-#define A6XX_RB_DEPTH_CNTL_Z_ENABLE				0x00000001
-#define A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE			0x00000002
-#define A6XX_RB_DEPTH_CNTL_ZFUNC__MASK				0x0000001c
-#define A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT				2
-static inline uint32_t A6XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val)
-{
-	return ((val) << A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
-}
-#define A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE			0x00000040
-
-#define REG_A6XX_RB_DEPTH_BUFFER_INFO				0x00008872
-#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK		0x00000007
-#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT		0
-static inline uint32_t A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val)
-{
-	return ((val) << A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK;
-}
-
-#define REG_A6XX_RB_DEPTH_BUFFER_PITCH				0x00008873
-#define A6XX_RB_DEPTH_BUFFER_PITCH__MASK			0xffffffff
-#define A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT			0
-static inline uint32_t A6XX_RB_DEPTH_BUFFER_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH			0x00008874
-#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK			0xffffffff
-#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_DEPTH_BUFFER_BASE_LO			0x00008875
-
-#define REG_A6XX_RB_DEPTH_BUFFER_BASE_HI			0x00008876
-
-#define REG_A6XX_RB_DEPTH_BUFFER_BASE_GMEM			0x00008877
-
-#define REG_A6XX_RB_UNKNOWN_8878				0x00008878
-
-#define REG_A6XX_RB_UNKNOWN_8879				0x00008879
-
-#define REG_A6XX_RB_STENCIL_CONTROL				0x00008880
-#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
-#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000002
-#define A6XX_RB_STENCIL_CONTROL_STENCIL_READ			0x00000004
-#define A6XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
-#define A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
-#define A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
-#define A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
-#define A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
-#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
-#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
-#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
-}
-#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
-#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
-static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
-{
-	return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
-}
-
-#define REG_A6XX_RB_STENCIL_INFO				0x00008881
-#define A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
-
-#define REG_A6XX_RB_STENCIL_BUFFER_PITCH			0x00008882
-#define A6XX_RB_STENCIL_BUFFER_PITCH__MASK			0xffffffff
-#define A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT			0
-static inline uint32_t A6XX_RB_STENCIL_BUFFER_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH			0x00008883
-#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK		0xffffffff
-#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT		0
-static inline uint32_t A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_STENCIL_BUFFER_BASE_LO			0x00008884
-
-#define REG_A6XX_RB_STENCIL_BUFFER_BASE_HI			0x00008885
-
-#define REG_A6XX_RB_STENCIL_BUFFER_BASE_GMEM			0x00008886
-
-#define REG_A6XX_RB_STENCILREF					0x00008887
-#define A6XX_RB_STENCILREF_REF__MASK				0x000000ff
-#define A6XX_RB_STENCILREF_REF__SHIFT				0
-static inline uint32_t A6XX_RB_STENCILREF_REF(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILREF_REF__SHIFT) & A6XX_RB_STENCILREF_REF__MASK;
-}
-#define A6XX_RB_STENCILREF_BFREF__MASK				0x0000ff00
-#define A6XX_RB_STENCILREF_BFREF__SHIFT				8
-static inline uint32_t A6XX_RB_STENCILREF_BFREF(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILREF_BFREF__SHIFT) & A6XX_RB_STENCILREF_BFREF__MASK;
-}
-
-#define REG_A6XX_RB_STENCILMASK					0x00008888
-#define A6XX_RB_STENCILMASK_MASK__MASK				0x000000ff
-#define A6XX_RB_STENCILMASK_MASK__SHIFT				0
-static inline uint32_t A6XX_RB_STENCILMASK_MASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILMASK_MASK__SHIFT) & A6XX_RB_STENCILMASK_MASK__MASK;
-}
-#define A6XX_RB_STENCILMASK_BFMASK__MASK			0x0000ff00
-#define A6XX_RB_STENCILMASK_BFMASK__SHIFT			8
-static inline uint32_t A6XX_RB_STENCILMASK_BFMASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILMASK_BFMASK__SHIFT) & A6XX_RB_STENCILMASK_BFMASK__MASK;
-}
-
-#define REG_A6XX_RB_STENCILWRMASK				0x00008889
-#define A6XX_RB_STENCILWRMASK_WRMASK__MASK			0x000000ff
-#define A6XX_RB_STENCILWRMASK_WRMASK__SHIFT			0
-static inline uint32_t A6XX_RB_STENCILWRMASK_WRMASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILWRMASK_WRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_WRMASK__MASK;
-}
-#define A6XX_RB_STENCILWRMASK_BFWRMASK__MASK			0x0000ff00
-#define A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT			8
-static inline uint32_t A6XX_RB_STENCILWRMASK_BFWRMASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_BFWRMASK__MASK;
-}
-
-#define REG_A6XX_RB_WINDOW_OFFSET				0x00008890
-#define A6XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_RB_WINDOW_OFFSET_X__MASK				0x00007fff
-#define A6XX_RB_WINDOW_OFFSET_X__SHIFT				0
-static inline uint32_t A6XX_RB_WINDOW_OFFSET_X(uint32_t val)
-{
-	return ((val) << A6XX_RB_WINDOW_OFFSET_X__SHIFT) & A6XX_RB_WINDOW_OFFSET_X__MASK;
-}
-#define A6XX_RB_WINDOW_OFFSET_Y__MASK				0x7fff0000
-#define A6XX_RB_WINDOW_OFFSET_Y__SHIFT				16
-static inline uint32_t A6XX_RB_WINDOW_OFFSET_Y(uint32_t val)
-{
-	return ((val) << A6XX_RB_WINDOW_OFFSET_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET_Y__MASK;
-}
-
-#define REG_A6XX_RB_SAMPLE_COUNT_CONTROL			0x00008891
-#define A6XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
-
-#define REG_A6XX_RB_LRZ_CNTL					0x00008898
-#define A6XX_RB_LRZ_CNTL_ENABLE					0x00000001
-
-#define REG_A6XX_RB_UNKNOWN_88D0				0x000088d0
-
-#define REG_A6XX_RB_BLIT_SCISSOR_TL				0x000088d1
-#define A6XX_RB_BLIT_SCISSOR_TL_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_RB_BLIT_SCISSOR_TL_X__MASK				0x00007fff
-#define A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT			0
-static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_X(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_X__MASK;
-}
-#define A6XX_RB_BLIT_SCISSOR_TL_Y__MASK				0x7fff0000
-#define A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT			16
-static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_Y(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_Y__MASK;
-}
-
-#define REG_A6XX_RB_BLIT_SCISSOR_BR				0x000088d2
-#define A6XX_RB_BLIT_SCISSOR_BR_WINDOW_OFFSET_DISABLE		0x80000000
-#define A6XX_RB_BLIT_SCISSOR_BR_X__MASK				0x00007fff
-#define A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT			0
-static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_X(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_X__MASK;
-}
-#define A6XX_RB_BLIT_SCISSOR_BR_Y__MASK				0x7fff0000
-#define A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT			16
-static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_Y(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_Y__MASK;
-}
-
-#define REG_A6XX_RB_BLIT_BASE_GMEM				0x000088d6
-
-#define REG_A6XX_RB_BLIT_DST_INFO				0x000088d7
-#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK			0x00000003
-#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT			0
-static inline uint32_t A6XX_RB_BLIT_DST_INFO_TILE_MODE(enum a6xx_tile_mode val)
-{
-	return ((val) << A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK;
-}
-#define A6XX_RB_BLIT_DST_INFO_FLAGS				0x00000004
-#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK		0x00007f80
-#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT		7
-static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK;
-}
-#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK			0x00000060
-#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT			5
-static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK;
-}
-
-#define REG_A6XX_RB_BLIT_DST_LO					0x000088d8
-
-#define REG_A6XX_RB_BLIT_DST_HI					0x000088d9
-
-#define REG_A6XX_RB_BLIT_DST_PITCH				0x000088da
-#define A6XX_RB_BLIT_DST_PITCH__MASK				0xffffffff
-#define A6XX_RB_BLIT_DST_PITCH__SHIFT				0
-static inline uint32_t A6XX_RB_BLIT_DST_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_BLIT_DST_PITCH__SHIFT) & A6XX_RB_BLIT_DST_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_BLIT_DST_ARRAY_PITCH			0x000088db
-#define A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK			0xffffffff
-#define A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A6XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_BLIT_FLAG_DST_LO				0x000088dc
-
-#define REG_A6XX_RB_BLIT_FLAG_DST_HI				0x000088dd
-
-#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0			0x000088df
-
-#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW1			0x000088e0
-
-#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW2			0x000088e1
-
-#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW3			0x000088e2
-
-#define REG_A6XX_RB_BLIT_INFO					0x000088e3
-#define A6XX_RB_BLIT_INFO_UNK0					0x00000001
-#define A6XX_RB_BLIT_INFO_GMEM					0x00000002
-#define A6XX_RB_BLIT_INFO_INTEGER				0x00000004
-#define A6XX_RB_BLIT_INFO_DEPTH					0x00000008
-#define A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK			0x000000f0
-#define A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT			4
-static inline uint32_t A6XX_RB_BLIT_INFO_CLEAR_MASK(uint32_t val)
-{
-	return ((val) << A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT) & A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK;
-}
-
-#define REG_A6XX_RB_UNKNOWN_88F0				0x000088f0
-
-#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO			0x00008900
-
-#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_HI			0x00008901
-
-#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_PITCH			0x00008902
-
-static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x00008903 + 0x3*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x00008903 + 0x3*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x00008904 + 0x3*i0; }
-
-static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x00008905 + 0x3*i0; }
-#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK		0x000007ff
-#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT		0
-static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK;
-}
-#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK		0x003ff800
-#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT	11
-static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO			0x00008927
-
-#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_HI			0x00008928
-
-#define REG_A6XX_RB_2D_BLIT_CNTL				0x00008c00
-#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK			0x0000ff00
-#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT		8
-static inline uint32_t A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK;
-}
-#define A6XX_RB_2D_BLIT_CNTL_SCISSOR				0x00010000
-
-#define REG_A6XX_RB_UNKNOWN_8C01				0x00008c01
-
-#define REG_A6XX_RB_2D_DST_INFO					0x00008c17
-#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK			0x000000ff
-#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT			0
-static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK;
-}
-#define A6XX_RB_2D_DST_INFO_TILE_MODE__MASK			0x00000300
-#define A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A6XX_RB_2D_DST_INFO_TILE_MODE(enum a6xx_tile_mode val)
-{
-	return ((val) << A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_2D_DST_INFO_TILE_MODE__MASK;
-}
-#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT			10
-static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK;
-}
-#define A6XX_RB_2D_DST_INFO_FLAGS				0x00001000
-
-#define REG_A6XX_RB_2D_DST_LO					0x00008c18
-
-#define REG_A6XX_RB_2D_DST_HI					0x00008c19
-
-#define REG_A6XX_RB_2D_DST_SIZE					0x00008c1a
-#define A6XX_RB_2D_DST_SIZE_PITCH__MASK				0x0000ffff
-#define A6XX_RB_2D_DST_SIZE_PITCH__SHIFT			0
-static inline uint32_t A6XX_RB_2D_DST_SIZE_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A6XX_RB_2D_DST_SIZE_PITCH__MASK;
-}
-
-#define REG_A6XX_RB_2D_DST_FLAGS_LO				0x00008c20
-
-#define REG_A6XX_RB_2D_DST_FLAGS_HI				0x00008c21
-
-#define REG_A6XX_RB_2D_SRC_SOLID_C0				0x00008c2c
-
-#define REG_A6XX_RB_2D_SRC_SOLID_C1				0x00008c2d
-
-#define REG_A6XX_RB_2D_SRC_SOLID_C2				0x00008c2e
-
-#define REG_A6XX_RB_2D_SRC_SOLID_C3				0x00008c2f
-
-#define REG_A6XX_RB_UNKNOWN_8E01				0x00008e01
-
-#define REG_A6XX_RB_UNKNOWN_8E04				0x00008e04
-
-#define REG_A6XX_RB_CCU_CNTL					0x00008e07
-
-#define REG_A6XX_VPC_UNKNOWN_9101				0x00009101
-
-#define REG_A6XX_VPC_GS_SIV_CNTL				0x00009104
-
-#define REG_A6XX_VPC_UNKNOWN_9107				0x00009107
-
-#define REG_A6XX_VPC_UNKNOWN_9108				0x00009108
-
-static inline uint32_t REG_A6XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00009200 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00009200 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00009208 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00009208 + 0x1*i0; }
-
-#define REG_A6XX_VPC_UNKNOWN_9210				0x00009210
-
-#define REG_A6XX_VPC_UNKNOWN_9211				0x00009211
-
-static inline uint32_t REG_A6XX_VPC_VAR(uint32_t i0) { return 0x00009212 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x00009212 + 0x1*i0; }
-
-#define REG_A6XX_VPC_SO_CNTL					0x00009216
-#define A6XX_VPC_SO_CNTL_ENABLE					0x00010000
-
-#define REG_A6XX_VPC_SO_PROG					0x00009217
-#define A6XX_VPC_SO_PROG_A_BUF__MASK				0x00000003
-#define A6XX_VPC_SO_PROG_A_BUF__SHIFT				0
-static inline uint32_t A6XX_VPC_SO_PROG_A_BUF(uint32_t val)
-{
-	return ((val) << A6XX_VPC_SO_PROG_A_BUF__SHIFT) & A6XX_VPC_SO_PROG_A_BUF__MASK;
-}
-#define A6XX_VPC_SO_PROG_A_OFF__MASK				0x000007fc
-#define A6XX_VPC_SO_PROG_A_OFF__SHIFT				2
-static inline uint32_t A6XX_VPC_SO_PROG_A_OFF(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_VPC_SO_PROG_A_OFF__SHIFT) & A6XX_VPC_SO_PROG_A_OFF__MASK;
-}
-#define A6XX_VPC_SO_PROG_A_EN					0x00000800
-#define A6XX_VPC_SO_PROG_B_BUF__MASK				0x00003000
-#define A6XX_VPC_SO_PROG_B_BUF__SHIFT				12
-static inline uint32_t A6XX_VPC_SO_PROG_B_BUF(uint32_t val)
-{
-	return ((val) << A6XX_VPC_SO_PROG_B_BUF__SHIFT) & A6XX_VPC_SO_PROG_B_BUF__MASK;
-}
-#define A6XX_VPC_SO_PROG_B_OFF__MASK				0x007fc000
-#define A6XX_VPC_SO_PROG_B_OFF__SHIFT				14
-static inline uint32_t A6XX_VPC_SO_PROG_B_OFF(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_VPC_SO_PROG_B_OFF__SHIFT) & A6XX_VPC_SO_PROG_B_OFF__MASK;
-}
-#define A6XX_VPC_SO_PROG_B_EN					0x00800000
-
-static inline uint32_t REG_A6XX_VPC_SO(uint32_t i0) { return 0x0000921a + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000921a + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000921b + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000921c + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000921d + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000921e + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000921f + 0x7*i0; }
-
-static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x00009220 + 0x7*i0; }
-
-#define REG_A6XX_VPC_UNKNOWN_9236				0x00009236
-
-#define REG_A6XX_VPC_UNKNOWN_9300				0x00009300
-
-#define REG_A6XX_VPC_PACK					0x00009301
-#define A6XX_VPC_PACK_STRIDE_IN_VPC__MASK			0x000000ff
-#define A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT			0
-static inline uint32_t A6XX_VPC_PACK_STRIDE_IN_VPC(uint32_t val)
-{
-	return ((val) << A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT) & A6XX_VPC_PACK_STRIDE_IN_VPC__MASK;
-}
-#define A6XX_VPC_PACK_NUMNONPOSVAR__MASK			0x0000ff00
-#define A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT			8
-static inline uint32_t A6XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
-{
-	return ((val) << A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A6XX_VPC_PACK_NUMNONPOSVAR__MASK;
-}
-#define A6XX_VPC_PACK_PSIZELOC__MASK				0x00ff0000
-#define A6XX_VPC_PACK_PSIZELOC__SHIFT				16
-static inline uint32_t A6XX_VPC_PACK_PSIZELOC(uint32_t val)
-{
-	return ((val) << A6XX_VPC_PACK_PSIZELOC__SHIFT) & A6XX_VPC_PACK_PSIZELOC__MASK;
-}
-
-#define REG_A6XX_VPC_CNTL_0					0x00009304
-#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK			0x000000ff
-#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT			0
-static inline uint32_t A6XX_VPC_CNTL_0_NUMNONPOSVAR(uint32_t val)
-{
-	return ((val) << A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT) & A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK;
-}
-#define A6XX_VPC_CNTL_0_VARYING					0x00010000
-
-#define REG_A6XX_VPC_SO_BUF_CNTL				0x00009305
-#define A6XX_VPC_SO_BUF_CNTL_BUF0				0x00000001
-#define A6XX_VPC_SO_BUF_CNTL_BUF1				0x00000008
-#define A6XX_VPC_SO_BUF_CNTL_BUF2				0x00000040
-#define A6XX_VPC_SO_BUF_CNTL_BUF3				0x00000200
-#define A6XX_VPC_SO_BUF_CNTL_ENABLE				0x00008000
-
-#define REG_A6XX_VPC_SO_OVERRIDE				0x00009306
-#define A6XX_VPC_SO_OVERRIDE_SO_DISABLE				0x00000001
-
-#define REG_A6XX_VPC_UNKNOWN_9600				0x00009600
-
-#define REG_A6XX_VPC_UNKNOWN_9602				0x00009602
-
-#define REG_A6XX_PC_UNKNOWN_9801				0x00009801
-
-#define REG_A6XX_PC_RESTART_INDEX				0x00009803
-
-#define REG_A6XX_PC_MODE_CNTL					0x00009804
-
-#define REG_A6XX_PC_UNKNOWN_9805				0x00009805
-
-#define REG_A6XX_PC_UNKNOWN_9806				0x00009806
-
-#define REG_A6XX_PC_UNKNOWN_9980				0x00009980
-
-#define REG_A6XX_PC_UNKNOWN_9981				0x00009981
-
-#define REG_A6XX_PC_UNKNOWN_9990				0x00009990
-
-#define REG_A6XX_PC_PRIMITIVE_CNTL_0				0x00009b00
-#define A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART		0x00000001
-#define A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST		0x00000002
-
-#define REG_A6XX_PC_PRIMITIVE_CNTL_1				0x00009b01
-#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK		0x0000007f
-#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT		0
-static inline uint32_t A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(uint32_t val)
-{
-	return ((val) << A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT) & A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK;
-}
-#define A6XX_PC_PRIMITIVE_CNTL_1_PSIZE				0x00000100
-
-#define REG_A6XX_PC_UNKNOWN_9B06				0x00009b06
-
-#define REG_A6XX_PC_UNKNOWN_9B07				0x00009b07
-
-#define REG_A6XX_PC_TESSFACTOR_ADDR_LO				0x00009e08
-
-#define REG_A6XX_PC_TESSFACTOR_ADDR_HI				0x00009e09
-
-#define REG_A6XX_PC_UNKNOWN_9E72				0x00009e72
-
-#define REG_A6XX_VFD_CONTROL_0					0x0000a000
-#define A6XX_VFD_CONTROL_0_VTXCNT__MASK				0x0000003f
-#define A6XX_VFD_CONTROL_0_VTXCNT__SHIFT			0
-static inline uint32_t A6XX_VFD_CONTROL_0_VTXCNT(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A6XX_VFD_CONTROL_0_VTXCNT__MASK;
-}
-
-#define REG_A6XX_VFD_CONTROL_1					0x0000a001
-#define A6XX_VFD_CONTROL_1_REGID4VTX__MASK			0x000000ff
-#define A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT			0
-static inline uint32_t A6XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A6XX_VFD_CONTROL_1_REGID4VTX__MASK;
-}
-#define A6XX_VFD_CONTROL_1_REGID4INST__MASK			0x0000ff00
-#define A6XX_VFD_CONTROL_1_REGID4INST__SHIFT			8
-static inline uint32_t A6XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A6XX_VFD_CONTROL_1_REGID4INST__MASK;
-}
-#define A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK			0x00ff0000
-#define A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT			16
-static inline uint32_t A6XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK;
-}
-
-#define REG_A6XX_VFD_CONTROL_2					0x0000a002
-#define A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK			0x000000ff
-#define A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT			0
-static inline uint32_t A6XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK;
-}
-
-#define REG_A6XX_VFD_CONTROL_3					0x0000a003
-#define A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK			0x0000ff00
-#define A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT			8
-static inline uint32_t A6XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK;
-}
-#define A6XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
-#define A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
-static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSX__MASK;
-}
-#define A6XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
-#define A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
-static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
-{
-	return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSY__MASK;
-}
-
-#define REG_A6XX_VFD_CONTROL_4					0x0000a004
-
-#define REG_A6XX_VFD_CONTROL_5					0x0000a005
-
-#define REG_A6XX_VFD_CONTROL_6					0x0000a006
-
-#define REG_A6XX_VFD_MODE_CNTL					0x0000a007
-#define A6XX_VFD_MODE_CNTL_BINNING_PASS				0x00000001
-
-#define REG_A6XX_VFD_UNKNOWN_A008				0x0000a008
-
-#define REG_A6XX_VFD_UNKNOWN_A009				0x0000a009
-
-#define REG_A6XX_VFD_INDEX_OFFSET				0x0000a00e
-
-#define REG_A6XX_VFD_INSTANCE_START_OFFSET			0x0000a00f
-
-static inline uint32_t REG_A6XX_VFD_FETCH(uint32_t i0) { return 0x0000a010 + 0x4*i0; }
-
-static inline uint32_t REG_A6XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000a010 + 0x4*i0; }
-
-static inline uint32_t REG_A6XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000a011 + 0x4*i0; }
-
-static inline uint32_t REG_A6XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000a012 + 0x4*i0; }
-
-static inline uint32_t REG_A6XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000a013 + 0x4*i0; }
-
-static inline uint32_t REG_A6XX_VFD_DECODE(uint32_t i0) { return 0x0000a090 + 0x2*i0; }
-
-static inline uint32_t REG_A6XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000a090 + 0x2*i0; }
-#define A6XX_VFD_DECODE_INSTR_IDX__MASK				0x0000001f
-#define A6XX_VFD_DECODE_INSTR_IDX__SHIFT			0
-static inline uint32_t A6XX_VFD_DECODE_INSTR_IDX(uint32_t val)
-{
-	return ((val) << A6XX_VFD_DECODE_INSTR_IDX__SHIFT) & A6XX_VFD_DECODE_INSTR_IDX__MASK;
-}
-#define A6XX_VFD_DECODE_INSTR_INSTANCED				0x00020000
-#define A6XX_VFD_DECODE_INSTR_FORMAT__MASK			0x0ff00000
-#define A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT			20
-static inline uint32_t A6XX_VFD_DECODE_INSTR_FORMAT(enum a6xx_vtx_fmt val)
-{
-	return ((val) << A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A6XX_VFD_DECODE_INSTR_FORMAT__MASK;
-}
-#define A6XX_VFD_DECODE_INSTR_SWAP__MASK			0x30000000
-#define A6XX_VFD_DECODE_INSTR_SWAP__SHIFT			28
-static inline uint32_t A6XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A6XX_VFD_DECODE_INSTR_SWAP__MASK;
-}
-#define A6XX_VFD_DECODE_INSTR_UNK30				0x40000000
-#define A6XX_VFD_DECODE_INSTR_FLOAT				0x80000000
-
-static inline uint32_t REG_A6XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000a091 + 0x2*i0; }
-
-static inline uint32_t REG_A6XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; }
-#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK		0x0000000f
-#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT		0
-static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val)
-{
-	return ((val) << A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK;
-}
-#define A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK			0x00000ff0
-#define A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT			4
-static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val)
-{
-	return ((val) << A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK;
-}
-
-#define REG_A6XX_SP_UNKNOWN_A0F8				0x0000a0f8
-
-#define REG_A6XX_SP_PRIMITIVE_CNTL				0x0000a802
-#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK			0x0000001f
-#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT			0
-static inline uint32_t A6XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val)
-{
-	return ((val) << A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK;
-}
-
-static inline uint32_t REG_A6XX_SP_VS_OUT(uint32_t i0) { return 0x0000a803 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000a803 + 0x1*i0; }
-#define A6XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
-#define A6XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
-static inline uint32_t A6XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_A_REGID__MASK;
-}
-#define A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00000f00
-#define A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			8
-static inline uint32_t A6XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
-}
-#define A6XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
-#define A6XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
-static inline uint32_t A6XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_B_REGID__MASK;
-}
-#define A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x0f000000
-#define A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			24
-static inline uint32_t A6XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
-}
-
-static inline uint32_t REG_A6XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000a813 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000a813 + 0x1*i0; }
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
-static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
-}
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
-static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
-}
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
-static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
-}
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
-#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
-static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
-}
-
-#define REG_A6XX_SP_VS_CTRL_REG0				0x0000a800
-#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_VS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_VS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_UNKNOWN_A81B				0x0000a81b
-
-#define REG_A6XX_SP_VS_OBJ_START_LO				0x0000a81c
-
-#define REG_A6XX_SP_VS_OBJ_START_HI				0x0000a81d
-
-#define REG_A6XX_SP_VS_TEX_COUNT				0x0000a822
-
-#define REG_A6XX_SP_VS_CONFIG					0x0000a823
-#define A6XX_SP_VS_CONFIG_ENABLED				0x00000100
-#define A6XX_SP_VS_CONFIG_NTEX__MASK				0x0001fe00
-#define A6XX_SP_VS_CONFIG_NTEX__SHIFT				9
-static inline uint32_t A6XX_SP_VS_CONFIG_NTEX(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_CONFIG_NTEX__SHIFT) & A6XX_SP_VS_CONFIG_NTEX__MASK;
-}
-#define A6XX_SP_VS_CONFIG_NSAMP__MASK				0x01fe0000
-#define A6XX_SP_VS_CONFIG_NSAMP__SHIFT				17
-static inline uint32_t A6XX_SP_VS_CONFIG_NSAMP(uint32_t val)
-{
-	return ((val) << A6XX_SP_VS_CONFIG_NSAMP__SHIFT) & A6XX_SP_VS_CONFIG_NSAMP__MASK;
-}
-
-#define REG_A6XX_SP_VS_INSTRLEN					0x0000a824
-
-#define REG_A6XX_SP_HS_CTRL_REG0				0x0000a830
-#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_HS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_HS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_HS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_HS_UNKNOWN_A831				0x0000a831
-
-#define REG_A6XX_SP_HS_OBJ_START_LO				0x0000a834
-
-#define REG_A6XX_SP_HS_OBJ_START_HI				0x0000a835
-
-#define REG_A6XX_SP_HS_TEX_COUNT				0x0000a83a
-
-#define REG_A6XX_SP_HS_CONFIG					0x0000a83b
-#define A6XX_SP_HS_CONFIG_ENABLED				0x00000100
-#define A6XX_SP_HS_CONFIG_NTEX__MASK				0x0001fe00
-#define A6XX_SP_HS_CONFIG_NTEX__SHIFT				9
-static inline uint32_t A6XX_SP_HS_CONFIG_NTEX(uint32_t val)
-{
-	return ((val) << A6XX_SP_HS_CONFIG_NTEX__SHIFT) & A6XX_SP_HS_CONFIG_NTEX__MASK;
-}
-#define A6XX_SP_HS_CONFIG_NSAMP__MASK				0x01fe0000
-#define A6XX_SP_HS_CONFIG_NSAMP__SHIFT				17
-static inline uint32_t A6XX_SP_HS_CONFIG_NSAMP(uint32_t val)
-{
-	return ((val) << A6XX_SP_HS_CONFIG_NSAMP__SHIFT) & A6XX_SP_HS_CONFIG_NSAMP__MASK;
-}
-
-#define REG_A6XX_SP_HS_INSTRLEN					0x0000a83c
-
-#define REG_A6XX_SP_DS_CTRL_REG0				0x0000a840
-#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_DS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_DS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_DS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_DS_OBJ_START_LO				0x0000a85c
-
-#define REG_A6XX_SP_DS_OBJ_START_HI				0x0000a85d
-
-#define REG_A6XX_SP_DS_TEX_COUNT				0x0000a862
-
-#define REG_A6XX_SP_DS_CONFIG					0x0000a863
-#define A6XX_SP_DS_CONFIG_ENABLED				0x00000100
-#define A6XX_SP_DS_CONFIG_NTEX__MASK				0x0001fe00
-#define A6XX_SP_DS_CONFIG_NTEX__SHIFT				9
-static inline uint32_t A6XX_SP_DS_CONFIG_NTEX(uint32_t val)
-{
-	return ((val) << A6XX_SP_DS_CONFIG_NTEX__SHIFT) & A6XX_SP_DS_CONFIG_NTEX__MASK;
-}
-#define A6XX_SP_DS_CONFIG_NSAMP__MASK				0x01fe0000
-#define A6XX_SP_DS_CONFIG_NSAMP__SHIFT				17
-static inline uint32_t A6XX_SP_DS_CONFIG_NSAMP(uint32_t val)
-{
-	return ((val) << A6XX_SP_DS_CONFIG_NSAMP__SHIFT) & A6XX_SP_DS_CONFIG_NSAMP__MASK;
-}
-
-#define REG_A6XX_SP_DS_INSTRLEN					0x0000a864
-
-#define REG_A6XX_SP_GS_CTRL_REG0				0x0000a870
-#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_GS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_GS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_GS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_GS_UNKNOWN_A871				0x0000a871
-
-#define REG_A6XX_SP_GS_OBJ_START_LO				0x0000a88d
-
-#define REG_A6XX_SP_GS_OBJ_START_HI				0x0000a88e
-
-#define REG_A6XX_SP_GS_TEX_COUNT				0x0000a893
-
-#define REG_A6XX_SP_GS_CONFIG					0x0000a894
-#define A6XX_SP_GS_CONFIG_ENABLED				0x00000100
-#define A6XX_SP_GS_CONFIG_NTEX__MASK				0x0001fe00
-#define A6XX_SP_GS_CONFIG_NTEX__SHIFT				9
-static inline uint32_t A6XX_SP_GS_CONFIG_NTEX(uint32_t val)
-{
-	return ((val) << A6XX_SP_GS_CONFIG_NTEX__SHIFT) & A6XX_SP_GS_CONFIG_NTEX__MASK;
-}
-#define A6XX_SP_GS_CONFIG_NSAMP__MASK				0x01fe0000
-#define A6XX_SP_GS_CONFIG_NSAMP__SHIFT				17
-static inline uint32_t A6XX_SP_GS_CONFIG_NSAMP(uint32_t val)
-{
-	return ((val) << A6XX_SP_GS_CONFIG_NSAMP__SHIFT) & A6XX_SP_GS_CONFIG_NSAMP__MASK;
-}
-
-#define REG_A6XX_SP_GS_INSTRLEN					0x0000a895
-
-#define REG_A6XX_SP_VS_TEX_SAMP_LO				0x0000a8a0
-
-#define REG_A6XX_SP_VS_TEX_SAMP_HI				0x0000a8a1
-
-#define REG_A6XX_SP_HS_TEX_SAMP_LO				0x0000a8a2
-
-#define REG_A6XX_SP_HS_TEX_SAMP_HI				0x0000a8a3
-
-#define REG_A6XX_SP_DS_TEX_SAMP_LO				0x0000a8a4
-
-#define REG_A6XX_SP_DS_TEX_SAMP_HI				0x0000a8a5
-
-#define REG_A6XX_SP_GS_TEX_SAMP_LO				0x0000a8a6
-
-#define REG_A6XX_SP_GS_TEX_SAMP_HI				0x0000a8a7
-
-#define REG_A6XX_SP_VS_TEX_CONST_LO				0x0000a8a8
-
-#define REG_A6XX_SP_VS_TEX_CONST_HI				0x0000a8a9
-
-#define REG_A6XX_SP_HS_TEX_CONST_LO				0x0000a8aa
-
-#define REG_A6XX_SP_HS_TEX_CONST_HI				0x0000a8ab
-
-#define REG_A6XX_SP_DS_TEX_CONST_LO				0x0000a8ac
-
-#define REG_A6XX_SP_DS_TEX_CONST_HI				0x0000a8ad
-
-#define REG_A6XX_SP_GS_TEX_CONST_LO				0x0000a8ae
-
-#define REG_A6XX_SP_GS_TEX_CONST_HI				0x0000a8af
-
-#define REG_A6XX_SP_FS_CTRL_REG0				0x0000a980
-#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_FS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_FS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_UNKNOWN_A982				0x0000a982
-
-#define REG_A6XX_SP_FS_OBJ_START_LO				0x0000a983
-
-#define REG_A6XX_SP_FS_OBJ_START_HI				0x0000a984
-
-#define REG_A6XX_SP_BLEND_CNTL					0x0000a989
-#define A6XX_SP_BLEND_CNTL_ENABLED				0x00000001
-#define A6XX_SP_BLEND_CNTL_UNK8					0x00000100
-
-#define REG_A6XX_SP_SRGB_CNTL					0x0000a98a
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT0				0x00000001
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT1				0x00000002
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT2				0x00000004
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT3				0x00000008
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT4				0x00000010
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT5				0x00000020
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT6				0x00000040
-#define A6XX_SP_SRGB_CNTL_SRGB_MRT7				0x00000080
-
-#define REG_A6XX_SP_FS_RENDER_COMPONENTS			0x0000a98b
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK			0x0000000f
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT			0
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT0(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK			0x000000f0
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT			4
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT1(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK			0x00000f00
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT			8
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT2(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK			0x0000f000
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT			12
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT3(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK			0x000f0000
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT			16
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT4(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK			0x00f00000
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT			20
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT5(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK			0x0f000000
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT			24
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT6(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK;
-}
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK			0xf0000000
-#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT			28
-static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT7(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK;
-}
-
-#define REG_A6XX_SP_FS_OUTPUT_CNTL0				0x0000a98c
-#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK		0x0000ff00
-#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT		8
-static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK;
-}
-
-#define REG_A6XX_SP_FS_OUTPUT_CNTL1				0x0000a98d
-#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK			0x0000000f
-#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT			0
-static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL1_MRT(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK;
-}
-
-static inline uint32_t REG_A6XX_SP_FS_MRT(uint32_t i0) { return 0x0000a996 + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000a996 + 0x1*i0; }
-#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK			0x000000ff
-#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT			0
-static inline uint32_t A6XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
-}
-#define A6XX_SP_FS_MRT_REG_COLOR_SINT				0x00000100
-#define A6XX_SP_FS_MRT_REG_COLOR_UINT				0x00000200
-
-#define REG_A6XX_SP_UNKNOWN_A99E				0x0000a99e
-
-#define REG_A6XX_SP_FS_TEX_COUNT				0x0000a9a7
-
-#define REG_A6XX_SP_UNKNOWN_A9A8				0x0000a9a8
-
-#define REG_A6XX_SP_FS_TEX_SAMP_LO				0x0000a9e0
-
-#define REG_A6XX_SP_FS_TEX_SAMP_HI				0x0000a9e1
-
-#define REG_A6XX_SP_CS_TEX_SAMP_LO				0x0000a9e2
-
-#define REG_A6XX_SP_CS_TEX_SAMP_HI				0x0000a9e3
-
-#define REG_A6XX_SP_FS_TEX_CONST_LO				0x0000a9e4
-
-#define REG_A6XX_SP_FS_TEX_CONST_HI				0x0000a9e5
-
-#define REG_A6XX_SP_CS_TEX_CONST_LO				0x0000a9e6
-
-#define REG_A6XX_SP_CS_TEX_CONST_HI				0x0000a9e7
-
-static inline uint32_t REG_A6XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000a98e + 0x1*i0; }
-
-static inline uint32_t REG_A6XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000a98e + 0x1*i0; }
-#define A6XX_SP_FS_OUTPUT_REG_REGID__MASK			0x000000ff
-#define A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT			0
-static inline uint32_t A6XX_SP_FS_OUTPUT_REG_REGID(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_REG_REGID__MASK;
-}
-#define A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION			0x00000100
-
-#define REG_A6XX_SP_CS_CTRL_REG0				0x0000a9b0
-#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x0000007e
-#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		1
-static inline uint32_t A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x00001f80
-#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		7
-static inline uint32_t A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
-{
-	return ((val) << A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
-}
-#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK			0x000fc000
-#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT			14
-static inline uint32_t A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val)
-{
-	return ((val) << A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK;
-}
-#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK			0x00100000
-#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT			20
-static inline uint32_t A6XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
-{
-	return ((val) << A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK;
-}
-#define A6XX_SP_CS_CTRL_REG0_VARYING				0x00400000
-#define A6XX_SP_CS_CTRL_REG0_PIXLODENABLE			0x04000000
-#define A6XX_SP_CS_CTRL_REG0_MERGEDREGS				0x80000000
-
-#define REG_A6XX_SP_CS_OBJ_START_LO				0x0000a9b4
-
-#define REG_A6XX_SP_CS_OBJ_START_HI				0x0000a9b5
-
-#define REG_A6XX_SP_CS_INSTRLEN					0x0000a9bc
-
-#define REG_A6XX_SP_UNKNOWN_AB00				0x0000ab00
-
-#define REG_A6XX_SP_FS_CONFIG					0x0000ab04
-#define A6XX_SP_FS_CONFIG_ENABLED				0x00000100
-#define A6XX_SP_FS_CONFIG_NTEX__MASK				0x0001fe00
-#define A6XX_SP_FS_CONFIG_NTEX__SHIFT				9
-static inline uint32_t A6XX_SP_FS_CONFIG_NTEX(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_CONFIG_NTEX__SHIFT) & A6XX_SP_FS_CONFIG_NTEX__MASK;
-}
-#define A6XX_SP_FS_CONFIG_NSAMP__MASK				0x01fe0000
-#define A6XX_SP_FS_CONFIG_NSAMP__SHIFT				17
-static inline uint32_t A6XX_SP_FS_CONFIG_NSAMP(uint32_t val)
-{
-	return ((val) << A6XX_SP_FS_CONFIG_NSAMP__SHIFT) & A6XX_SP_FS_CONFIG_NSAMP__MASK;
-}
-
-#define REG_A6XX_SP_FS_INSTRLEN					0x0000ab05
-
-#define REG_A6XX_SP_UNKNOWN_AB20				0x0000ab20
-
-#define REG_A6XX_SP_UNKNOWN_ACC0				0x0000acc0
-
-#define REG_A6XX_SP_UNKNOWN_AE00				0x0000ae00
-
-#define REG_A6XX_SP_UNKNOWN_AE03				0x0000ae03
-
-#define REG_A6XX_SP_UNKNOWN_AE04				0x0000ae04
-
-#define REG_A6XX_SP_UNKNOWN_AE0F				0x0000ae0f
-
-#define REG_A6XX_SP_UNKNOWN_B182				0x0000b182
-
-#define REG_A6XX_SP_UNKNOWN_B183				0x0000b183
-
-#define REG_A6XX_SP_TP_RAS_MSAA_CNTL				0x0000b300
-#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT			0
-static inline uint32_t A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK;
-}
-
-#define REG_A6XX_SP_TP_DEST_MSAA_CNTL				0x0000b301
-#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK			0x00000003
-#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT		0
-static inline uint32_t A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val)
-{
-	return ((val) << A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK;
-}
-#define A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE			0x00000004
-
-#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO		0x0000b302
-
-#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_HI		0x0000b303
-
-#define REG_A6XX_SP_TP_UNKNOWN_B304				0x0000b304
-
-#define REG_A6XX_SP_TP_UNKNOWN_B309				0x0000b309
-
-#define REG_A6XX_SP_PS_2D_SRC_INFO				0x0000b4c0
-#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK		0x000000ff
-#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT		0
-static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(enum a6xx_color_fmt val)
-{
-	return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK;
-}
-#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK			0x00000300
-#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT			8
-static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(enum a6xx_tile_mode val)
-{
-	return ((val) << A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK;
-}
-#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK			0x00000c00
-#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT		10
-static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK;
-}
-#define A6XX_SP_PS_2D_SRC_INFO_FLAGS				0x00001000
-#define A6XX_SP_PS_2D_SRC_INFO_FILTER				0x00010000
-
-#define REG_A6XX_SP_PS_2D_SRC_SIZE				0x0000b4c1
-#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK			0x00007fff
-#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT			0
-static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_WIDTH(uint32_t val)
-{
-	return ((val) << A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK;
-}
-#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK			0x3fff8000
-#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT			15
-static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(uint32_t val)
-{
-	return ((val) << A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK;
-}
-
-#define REG_A6XX_SP_PS_2D_SRC_LO				0x0000b4c2
-
-#define REG_A6XX_SP_PS_2D_SRC_HI				0x0000b4c3
-
-#define REG_A6XX_SP_PS_2D_SRC_PITCH				0x0000b4c4
-#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK			0x01fffe00
-#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT			9
-static inline uint32_t A6XX_SP_PS_2D_SRC_PITCH_PITCH(uint32_t val)
-{
-	assert(!(val & 0x3f));
-	return ((val >> 6) << A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT) & A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK;
-}
-
-#define REG_A6XX_SP_PS_2D_SRC_FLAGS_LO				0x0000b4ca
-
-#define REG_A6XX_SP_PS_2D_SRC_FLAGS_HI				0x0000b4cb
-
-#define REG_A6XX_SP_UNKNOWN_B600				0x0000b600
-
-#define REG_A6XX_SP_UNKNOWN_B605				0x0000b605
-
-#define REG_A6XX_HLSQ_VS_CNTL					0x0000b800
-#define A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK			0x000000ff
-#define A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT			0
-static inline uint32_t A6XX_HLSQ_VS_CNTL_CONSTLEN(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK;
-}
-
-#define REG_A6XX_HLSQ_HS_CNTL					0x0000b801
-#define A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK			0x000000ff
-#define A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT			0
-static inline uint32_t A6XX_HLSQ_HS_CNTL_CONSTLEN(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK;
-}
-
-#define REG_A6XX_HLSQ_DS_CNTL					0x0000b802
-#define A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK			0x000000ff
-#define A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT			0
-static inline uint32_t A6XX_HLSQ_DS_CNTL_CONSTLEN(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK;
-}
-
-#define REG_A6XX_HLSQ_GS_CNTL					0x0000b803
-#define A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK			0x000000ff
-#define A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT			0
-static inline uint32_t A6XX_HLSQ_GS_CNTL_CONSTLEN(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK;
-}
-
-#define REG_A6XX_HLSQ_UNKNOWN_B980				0x0000b980
-
-#define REG_A6XX_HLSQ_CONTROL_1_REG				0x0000b982
-
-#define REG_A6XX_HLSQ_CONTROL_2_REG				0x0000b983
-#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK			0x000000ff
-#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
-}
-#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK			0x0000ff00
-#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT			8
-static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK;
-}
-#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK		0x00ff0000
-#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT		16
-static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK;
-}
-
-#define REG_A6XX_HLSQ_CONTROL_3_REG				0x0000b984
-#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK		0x000000ff
-#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK;
-}
-
-#define REG_A6XX_HLSQ_CONTROL_4_REG				0x0000b985
-#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK		0x00ff0000
-#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT		16
-static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK;
-}
-#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK		0xff000000
-#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT		24
-static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK;
-}
-
-#define REG_A6XX_HLSQ_CONTROL_5_REG				0x0000b986
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_0				0x0000b990
-#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK			0x00000003
-#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT			0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK;
-}
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK			0x00000ffc
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT		2
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK;
-}
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK			0x003ff000
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT		12
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK;
-}
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK			0xffc00000
-#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT		22
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_1				0x0000b991
-#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_2				0x0000b992
-#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_3				0x0000b993
-#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_4				0x0000b994
-#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_5				0x0000b995
-#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_NDRANGE_6				0x0000b996
-#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK		0xffffffff
-#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT		0
-static inline uint32_t A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_CNTL_0					0x0000b997
-#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK			0x000000ff
-#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT			0
-static inline uint32_t A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK;
-}
-#define A6XX_HLSQ_CS_CNTL_0_UNK0__MASK				0x0000ff00
-#define A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT				8
-static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK0__MASK;
-}
-#define A6XX_HLSQ_CS_CNTL_0_UNK1__MASK				0x00ff0000
-#define A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT				16
-static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK1__MASK;
-}
-#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK			0xff000000
-#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT			24
-static inline uint32_t A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val)
-{
-	return ((val) << A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK;
-}
-
-#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_X				0x0000b999
-
-#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y				0x0000b99a
-
-#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z				0x0000b99b
-
-#define REG_A6XX_HLSQ_UPDATE_CNTL				0x0000bb08
-
-#define REG_A6XX_HLSQ_FS_CNTL					0x0000bb10
-#define A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK			0x000000ff
-#define A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT			0
-static inline uint32_t A6XX_HLSQ_FS_CNTL_CONSTLEN(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK;
-}
-
-#define REG_A6XX_HLSQ_UNKNOWN_BB11				0x0000bb11
-
-#define REG_A6XX_HLSQ_UNKNOWN_BE00				0x0000be00
-
-#define REG_A6XX_HLSQ_UNKNOWN_BE01				0x0000be01
-
-#define REG_A6XX_HLSQ_UNKNOWN_BE04				0x0000be04
-
-#define REG_A6XX_TEX_SAMP_0					0x00000000
-#define A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
-#define A6XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
-#define A6XX_TEX_SAMP_0_XY_MAG__SHIFT				1
-static inline uint32_t A6XX_TEX_SAMP_0_XY_MAG(enum a6xx_tex_filter val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_XY_MAG__SHIFT) & A6XX_TEX_SAMP_0_XY_MAG__MASK;
-}
-#define A6XX_TEX_SAMP_0_XY_MIN__MASK				0x00000018
-#define A6XX_TEX_SAMP_0_XY_MIN__SHIFT				3
-static inline uint32_t A6XX_TEX_SAMP_0_XY_MIN(enum a6xx_tex_filter val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_XY_MIN__SHIFT) & A6XX_TEX_SAMP_0_XY_MIN__MASK;
-}
-#define A6XX_TEX_SAMP_0_WRAP_S__MASK				0x000000e0
-#define A6XX_TEX_SAMP_0_WRAP_S__SHIFT				5
-static inline uint32_t A6XX_TEX_SAMP_0_WRAP_S(enum a6xx_tex_clamp val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_WRAP_S__SHIFT) & A6XX_TEX_SAMP_0_WRAP_S__MASK;
-}
-#define A6XX_TEX_SAMP_0_WRAP_T__MASK				0x00000700
-#define A6XX_TEX_SAMP_0_WRAP_T__SHIFT				8
-static inline uint32_t A6XX_TEX_SAMP_0_WRAP_T(enum a6xx_tex_clamp val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_WRAP_T__SHIFT) & A6XX_TEX_SAMP_0_WRAP_T__MASK;
-}
-#define A6XX_TEX_SAMP_0_WRAP_R__MASK				0x00003800
-#define A6XX_TEX_SAMP_0_WRAP_R__SHIFT				11
-static inline uint32_t A6XX_TEX_SAMP_0_WRAP_R(enum a6xx_tex_clamp val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_WRAP_R__SHIFT) & A6XX_TEX_SAMP_0_WRAP_R__MASK;
-}
-#define A6XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
-#define A6XX_TEX_SAMP_0_ANISO__SHIFT				14
-static inline uint32_t A6XX_TEX_SAMP_0_ANISO(enum a6xx_tex_aniso val)
-{
-	return ((val) << A6XX_TEX_SAMP_0_ANISO__SHIFT) & A6XX_TEX_SAMP_0_ANISO__MASK;
-}
-#define A6XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
-#define A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
-static inline uint32_t A6XX_TEX_SAMP_0_LOD_BIAS(float val)
-{
-	return ((((int32_t)(val * 256.0))) << A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A6XX_TEX_SAMP_0_LOD_BIAS__MASK;
-}
-
-#define REG_A6XX_TEX_SAMP_1					0x00000001
-#define A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
-#define A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT			1
-static inline uint32_t A6XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
-{
-	return ((val) << A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
-}
-#define A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
-#define A6XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
-#define A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
-#define A6XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
-#define A6XX_TEX_SAMP_1_MAX_LOD__SHIFT				8
-static inline uint32_t A6XX_TEX_SAMP_1_MAX_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A6XX_TEX_SAMP_1_MAX_LOD__MASK;
-}
-#define A6XX_TEX_SAMP_1_MIN_LOD__MASK				0xfff00000
-#define A6XX_TEX_SAMP_1_MIN_LOD__SHIFT				20
-static inline uint32_t A6XX_TEX_SAMP_1_MIN_LOD(float val)
-{
-	return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A6XX_TEX_SAMP_1_MIN_LOD__MASK;
-}
-
-#define REG_A6XX_TEX_SAMP_2					0x00000002
-#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK			0xfffffff0
-#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT			4
-static inline uint32_t A6XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val)
-{
-	return ((val) << A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK;
-}
-
-#define REG_A6XX_TEX_SAMP_3					0x00000003
-
-#define REG_A6XX_TEX_CONST_0					0x00000000
-#define A6XX_TEX_CONST_0_TILE_MODE__MASK			0x00000003
-#define A6XX_TEX_CONST_0_TILE_MODE__SHIFT			0
-static inline uint32_t A6XX_TEX_CONST_0_TILE_MODE(enum a6xx_tile_mode val)
-{
-	return ((val) << A6XX_TEX_CONST_0_TILE_MODE__SHIFT) & A6XX_TEX_CONST_0_TILE_MODE__MASK;
-}
-#define A6XX_TEX_CONST_0_SRGB					0x00000004
-#define A6XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
-#define A6XX_TEX_CONST_0_SWIZ_X__SHIFT				4
-static inline uint32_t A6XX_TEX_CONST_0_SWIZ_X(enum a6xx_tex_swiz val)
-{
-	return ((val) << A6XX_TEX_CONST_0_SWIZ_X__SHIFT) & A6XX_TEX_CONST_0_SWIZ_X__MASK;
-}
-#define A6XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
-#define A6XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
-static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Y(enum a6xx_tex_swiz val)
-{
-	return ((val) << A6XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Y__MASK;
-}
-#define A6XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
-#define A6XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
-static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Z(enum a6xx_tex_swiz val)
-{
-	return ((val) << A6XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Z__MASK;
-}
-#define A6XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
-#define A6XX_TEX_CONST_0_SWIZ_W__SHIFT				13
-static inline uint32_t A6XX_TEX_CONST_0_SWIZ_W(enum a6xx_tex_swiz val)
-{
-	return ((val) << A6XX_TEX_CONST_0_SWIZ_W__SHIFT) & A6XX_TEX_CONST_0_SWIZ_W__MASK;
-}
-#define A6XX_TEX_CONST_0_MIPLVLS__MASK				0x000f0000
-#define A6XX_TEX_CONST_0_MIPLVLS__SHIFT				16
-static inline uint32_t A6XX_TEX_CONST_0_MIPLVLS(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_0_MIPLVLS__SHIFT) & A6XX_TEX_CONST_0_MIPLVLS__MASK;
-}
-#define A6XX_TEX_CONST_0_FMT__MASK				0x3fc00000
-#define A6XX_TEX_CONST_0_FMT__SHIFT				22
-static inline uint32_t A6XX_TEX_CONST_0_FMT(enum a6xx_tex_fmt val)
-{
-	return ((val) << A6XX_TEX_CONST_0_FMT__SHIFT) & A6XX_TEX_CONST_0_FMT__MASK;
-}
-#define A6XX_TEX_CONST_0_SWAP__MASK				0xc0000000
-#define A6XX_TEX_CONST_0_SWAP__SHIFT				30
-static inline uint32_t A6XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val)
-{
-	return ((val) << A6XX_TEX_CONST_0_SWAP__SHIFT) & A6XX_TEX_CONST_0_SWAP__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_1					0x00000001
-#define A6XX_TEX_CONST_1_WIDTH__MASK				0x00007fff
-#define A6XX_TEX_CONST_1_WIDTH__SHIFT				0
-static inline uint32_t A6XX_TEX_CONST_1_WIDTH(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_1_WIDTH__SHIFT) & A6XX_TEX_CONST_1_WIDTH__MASK;
-}
-#define A6XX_TEX_CONST_1_HEIGHT__MASK				0x3fff8000
-#define A6XX_TEX_CONST_1_HEIGHT__SHIFT				15
-static inline uint32_t A6XX_TEX_CONST_1_HEIGHT(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_1_HEIGHT__SHIFT) & A6XX_TEX_CONST_1_HEIGHT__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_2					0x00000002
-#define A6XX_TEX_CONST_2_FETCHSIZE__MASK			0x0000000f
-#define A6XX_TEX_CONST_2_FETCHSIZE__SHIFT			0
-static inline uint32_t A6XX_TEX_CONST_2_FETCHSIZE(enum a6xx_tex_fetchsize val)
-{
-	return ((val) << A6XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A6XX_TEX_CONST_2_FETCHSIZE__MASK;
-}
-#define A6XX_TEX_CONST_2_PITCH__MASK				0x1fffff80
-#define A6XX_TEX_CONST_2_PITCH__SHIFT				7
-static inline uint32_t A6XX_TEX_CONST_2_PITCH(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_2_PITCH__SHIFT) & A6XX_TEX_CONST_2_PITCH__MASK;
-}
-#define A6XX_TEX_CONST_2_TYPE__MASK				0x60000000
-#define A6XX_TEX_CONST_2_TYPE__SHIFT				29
-static inline uint32_t A6XX_TEX_CONST_2_TYPE(enum a6xx_tex_type val)
-{
-	return ((val) << A6XX_TEX_CONST_2_TYPE__SHIFT) & A6XX_TEX_CONST_2_TYPE__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_3					0x00000003
-#define A6XX_TEX_CONST_3_ARRAY_PITCH__MASK			0x00003fff
-#define A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT			0
-static inline uint32_t A6XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val)
-{
-	assert(!(val & 0xfff));
-	return ((val >> 12) << A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A6XX_TEX_CONST_3_ARRAY_PITCH__MASK;
-}
-#define A6XX_TEX_CONST_3_FLAG					0x10000000
-
-#define REG_A6XX_TEX_CONST_4					0x00000004
-#define A6XX_TEX_CONST_4_BASE_LO__MASK				0xffffffe0
-#define A6XX_TEX_CONST_4_BASE_LO__SHIFT				5
-static inline uint32_t A6XX_TEX_CONST_4_BASE_LO(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_TEX_CONST_4_BASE_LO__SHIFT) & A6XX_TEX_CONST_4_BASE_LO__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_5					0x00000005
-#define A6XX_TEX_CONST_5_BASE_HI__MASK				0x0001ffff
-#define A6XX_TEX_CONST_5_BASE_HI__SHIFT				0
-static inline uint32_t A6XX_TEX_CONST_5_BASE_HI(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_5_BASE_HI__SHIFT) & A6XX_TEX_CONST_5_BASE_HI__MASK;
-}
-#define A6XX_TEX_CONST_5_DEPTH__MASK				0x3ffe0000
-#define A6XX_TEX_CONST_5_DEPTH__SHIFT				17
-static inline uint32_t A6XX_TEX_CONST_5_DEPTH(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_5_DEPTH__SHIFT) & A6XX_TEX_CONST_5_DEPTH__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_6					0x00000006
-
-#define REG_A6XX_TEX_CONST_7					0x00000007
-#define A6XX_TEX_CONST_7_FLAG_LO__MASK				0xffffffe0
-#define A6XX_TEX_CONST_7_FLAG_LO__SHIFT				5
-static inline uint32_t A6XX_TEX_CONST_7_FLAG_LO(uint32_t val)
-{
-	assert(!(val & 0x1f));
-	return ((val >> 5) << A6XX_TEX_CONST_7_FLAG_LO__SHIFT) & A6XX_TEX_CONST_7_FLAG_LO__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_8					0x00000008
-#define A6XX_TEX_CONST_8_FLAG_HI__MASK				0x0001ffff
-#define A6XX_TEX_CONST_8_FLAG_HI__SHIFT				0
-static inline uint32_t A6XX_TEX_CONST_8_FLAG_HI(uint32_t val)
-{
-	return ((val) << A6XX_TEX_CONST_8_FLAG_HI__SHIFT) & A6XX_TEX_CONST_8_FLAG_HI__MASK;
-}
-
-#define REG_A6XX_TEX_CONST_9					0x00000009
-
-#define REG_A6XX_TEX_CONST_10					0x0000000a
-
-#define REG_A6XX_TEX_CONST_11					0x0000000b
-
-#define REG_A6XX_TEX_CONST_12					0x0000000c
-
-#define REG_A6XX_TEX_CONST_13					0x0000000d
-
-#define REG_A6XX_TEX_CONST_14					0x0000000e
-
-#define REG_A6XX_TEX_CONST_15					0x0000000f
-
-#define REG_A6XX_PDC_GPU_ENABLE_PDC				0x00001140
-
-#define REG_A6XX_PDC_GPU_SEQ_START_ADDR				0x00001148
-
-#define REG_A6XX_PDC_GPU_TCS0_CONTROL				0x00001540
-
-#define REG_A6XX_PDC_GPU_TCS0_CMD_ENABLE_BANK			0x00001541
-
-#define REG_A6XX_PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK		0x00001542
-
-#define REG_A6XX_PDC_GPU_TCS0_CMD0_MSGID			0x00001543
-
-#define REG_A6XX_PDC_GPU_TCS0_CMD0_ADDR				0x00001544
-
-#define REG_A6XX_PDC_GPU_TCS0_CMD0_DATA				0x00001545
-
-#define REG_A6XX_PDC_GPU_TCS1_CONTROL				0x00001572
-
-#define REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK			0x00001573
-
-#define REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK		0x00001574
-
-#define REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID			0x00001575
-
-#define REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR				0x00001576
-
-#define REG_A6XX_PDC_GPU_TCS1_CMD0_DATA				0x00001577
-
-#define REG_A6XX_PDC_GPU_TCS2_CONTROL				0x000015a4
-
-#define REG_A6XX_PDC_GPU_TCS2_CMD_ENABLE_BANK			0x000015a5
-
-#define REG_A6XX_PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK		0x000015a6
-
-#define REG_A6XX_PDC_GPU_TCS2_CMD0_MSGID			0x000015a7
-
-#define REG_A6XX_PDC_GPU_TCS2_CMD0_ADDR				0x000015a8
-
-#define REG_A6XX_PDC_GPU_TCS2_CMD0_DATA				0x000015a9
-
-#define REG_A6XX_PDC_GPU_TCS3_CONTROL				0x000015d6
-
-#define REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK			0x000015d7
-
-#define REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK		0x000015d8
-
-#define REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID			0x000015d9
-
-#define REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR				0x000015da
-
-#define REG_A6XX_PDC_GPU_TCS3_CMD0_DATA				0x000015db
-
-#define REG_A6XX_PDC_GPU_SEQ_MEM_0				0x00000000
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A			0x00000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK		0x000000ff
-#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT		0
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK	0x0000ff00
-#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT	8
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK;
-}
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B			0x00000001
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C			0x00000002
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D			0x00000003
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT			0x00000004
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK		0x0000003f
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT		0
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK		0x00007000
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT		12
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK		0xf0000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT		28
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK;
-}
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM			0x00000005
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK		0x0f000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT		24
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK;
-}
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0			0x00000008
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1			0x00000009
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2			0x0000000a
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3			0x0000000b
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0			0x0000000c
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1			0x0000000d
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2			0x0000000e
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3			0x0000000f
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0			0x00000010
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK		0x0000000f
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT		0
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK		0x000000f0
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT		4
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK		0x00000f00
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT		8
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK		0x0000f000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT		12
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK		0x000f0000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT		16
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK		0x00f00000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT		20
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK		0x0f000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT		24
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK		0xf0000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT		28
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK;
-}
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1			0x00000011
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK		0x0000000f
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT		0
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK		0x000000f0
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT		4
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK		0x00000f00
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT		8
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK		0x0000f000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT		12
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK		0x000f0000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT		16
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK		0x00f00000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT		20
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK		0x0f000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT		24
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK;
-}
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK		0xf0000000
-#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT		28
-static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val)
-{
-	return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK;
-}
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1			0x0000002f
-
-#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2			0x00000030
-
-#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0			0x00000001
-
-#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1			0x00000002
-
-
-#endif /* A6XX_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blend.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blend.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blend.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blend.c	2019-03-31 23:16:37.000000000 +0000
@@ -138,8 +138,10 @@
 	}
 
 	so->rb_blend_cntl = A6XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) |
+		COND(cso->alpha_to_coverage, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) |
 		COND(cso->independent_blend_enable, A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND);
 	so->sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8 |
+		COND(cso->alpha_to_coverage, A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) |
 		COND(mrt_blend, A6XX_SP_BLEND_CNTL_ENABLED);
 
 	return so;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c	2019-03-31 23:16:37.000000000 +0000
@@ -47,12 +47,24 @@
 		r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl)
 		: r->array_size;
 
-
 	return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
 		(b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
 		(b->z >= 0) && (b->z + b->depth <= last_layer);
 }
 
+static bool
+ok_format(enum pipe_format pfmt)
+{
+	enum a6xx_color_fmt fmt = fd6_pipe2color(pfmt);
+	if (fmt == ~0)
+		return false;
+
+	if (fd6_ifmt(fmt) == 0)
+		return false;
+
+	return true;
+}
+
 #define DEBUG_BLIT_FALLBACK 0
 #define fail_if(cond)													\
 	do {																\
@@ -82,19 +94,14 @@
 	fail_if(util_format_is_compressed(info->src.format) !=
 			util_format_is_compressed(info->src.format));
 
+	/* Fail if unsupported format: */
+	fail_if(!ok_format(info->src.format));
+	fail_if(!ok_format(info->dst.format));
+
 	/* ... but only if they're the same compression format. */
 	fail_if(util_format_is_compressed(info->src.format) &&
 			info->src.format != info->dst.format);
 
-	/* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE
-	 * is set (not linear).  We can kind of get around that when tiling/
-	 * untiling by setting both src and dst COLOR_SWAP=WZYX, but that
-	 * means the formats must match:
-	 */
-	fail_if((fd_resource(info->dst.resource)->tile_mode ||
-			 fd_resource(info->src.resource)->tile_mode) &&
-			info->dst.format != info->src.format);
-
 	/* src box can be inverted, which we don't support.. dst box cannot: */
 	fail_if((info->src.box.width < 0) || (info->src.box.height < 0));
 
@@ -106,7 +113,9 @@
 	debug_assert(info->dst.box.height >= 0);
 	debug_assert(info->dst.box.depth >= 0);
 
-	fail_if(info->dst.resource->nr_samples + info->src.resource->nr_samples > 2);
+	/* non-multisampled could either have nr_samples == 0 or == 1 */
+	fail_if(info->dst.resource->nr_samples > 1);
+	fail_if(info->src.resource->nr_samples > 1);
 
 	fail_if(info->window_rectangle_include);
 
@@ -139,6 +148,15 @@
 	OUT_RING(ring, 0x10000000);
 }
 
+static uint32_t
+blit_control(enum a6xx_color_fmt fmt)
+{
+	unsigned blit_cntl = 0xf00000;
+	blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
+	blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(fd6_ifmt(fmt));
+	return blit_cntl;
+}
+
 /* buffers need to be handled specially since x/width can exceed the bounds
  * supported by hw.. if necessary decompose into (potentially) two 2D blits
  */
@@ -198,7 +216,7 @@
 	OUT_PKT7(ring, CP_SET_MARKER, 1);
 	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
 
-	uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R8_UNORM) | 0x20f00000;
+	uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
 	OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
 	OUT_RING(ring, blit_cntl);
 
@@ -271,13 +289,13 @@
 		OUT_RING(ring, 0x3f);
 		OUT_WFI5(ring);
 
-		OUT_PKT4(ring, 0x8c01, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
 		OUT_RING(ring, 0);
 
-		OUT_PKT4(ring, 0xacc0, 1);
+		OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
 		OUT_RING(ring, 0xf180);
 
-		OUT_PKT4(ring, 0x8e04, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
 		OUT_RING(ring, 0x01000000);
 
 		OUT_PKT7(ring, CP_BLIT, 1);
@@ -285,7 +303,7 @@
 
 		OUT_WFI5(ring);
 
-		OUT_PKT4(ring, 0x8e04, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
 		OUT_RING(ring, 0);
 	}
 }
@@ -333,8 +351,8 @@
 	dtile = fd_resource_level_linear(info->dst.resource, info->dst.level) ?
 			TILE6_LINEAR : dst->tile_mode;
 
-	sswap = fd6_pipe2swap(info->src.format);
-	dswap = fd6_pipe2swap(info->dst.format);
+	sswap = stile ? WZYX : fd6_pipe2swap(info->src.format);
+	dswap = dtile ? WZYX : fd6_pipe2swap(info->dst.format);
 
 	if (util_format_is_compressed(info->src.format)) {
 		debug_assert(info->src.format == info->dst.format);
@@ -361,20 +379,10 @@
 	uint32_t width = DIV_ROUND_UP(u_minify(src->base.width0, info->src.level), blockwidth) * nelements;
 	uint32_t height = DIV_ROUND_UP(u_minify(src->base.height0, info->src.level), blockheight);
 
-	/* if dtile, then dswap ignored by hw, and likewise if stile then sswap
-	 * ignored by hw.. but in this case we have already rejected the blit
-	 * if src and dst formats differ, so juse use WZYX for both src and
-	 * dst swap mode (so we don't change component order)
-	 */
-	if (stile || dtile) {
-		debug_assert(info->src.format == info->dst.format);
-		sswap = dswap = WZYX;
-	}
-
 	OUT_PKT7(ring, CP_SET_MARKER, 1);
 	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
 
-	uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt) | 0xf00000;
+	uint32_t blit_cntl = blit_control(dfmt);
 
 	if (dtile != stile)
 		blit_cntl |= 0x20000000;
@@ -430,7 +438,7 @@
 		OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
 				 A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
 				 A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
-		OUT_RELOC(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
+		OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
 		OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
 		OUT_RING(ring, 0x00000000);
 		OUT_RING(ring, 0x00000000);
@@ -455,13 +463,25 @@
 		OUT_RING(ring, 0x3f);
 		OUT_WFI5(ring);
 
-		OUT_PKT4(ring, 0x8c01, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1);
 		OUT_RING(ring, 0);
 
-		OUT_PKT4(ring, 0xacc0, 1);
-		OUT_RING(ring, 0xf180);
+		OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
+		OUT_RING(ring, A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(sfmt) |
+				COND(util_format_is_pure_sint(info->src.format),
+						A6XX_SP_2D_SRC_FORMAT_SINT) |
+				COND(util_format_is_pure_uint(info->src.format),
+						A6XX_SP_2D_SRC_FORMAT_UINT) |
+				COND(util_format_is_snorm(info->src.format),
+						A6XX_SP_2D_SRC_FORMAT_SINT |
+						A6XX_SP_2D_SRC_FORMAT_NORM) |
+				COND(util_format_is_unorm(info->src.format),
+// TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that
+//						A6XX_SP_2D_SRC_FORMAT_UINT |
+						A6XX_SP_2D_SRC_FORMAT_NORM) |
+				0xf000);
 
-		OUT_PKT4(ring, 0x8e04, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
 		OUT_RING(ring, 0x01000000);
 
 		OUT_PKT7(ring, CP_BLIT, 1);
@@ -469,25 +489,17 @@
 
 		OUT_WFI5(ring);
 
-		OUT_PKT4(ring, 0x8e04, 1);
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
 		OUT_RING(ring, 0);
 	}
 }
 
 static void
-fd6_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+emit_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
-	struct fd_context *ctx = fd_context(pctx);
 	struct fd_batch *batch;
 
-	if (!can_do_blit(info)) {
-		fd_blitter_pipe_begin(ctx, info->render_condition_enable, false, FD_STAGE_BLIT);
-		fd_blitter_blit(ctx, info);
-		fd_blitter_pipe_end(ctx);
-		return;
-	}
-
-	fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
+	fd_fence_ref(ctx->base.screen, &ctx->last_fence, NULL);
 
 	batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true);
 
@@ -526,40 +538,16 @@
 	fd_batch_reference(&batch, NULL);
 }
 
-static void
-fd6_resource_copy_region(struct pipe_context *pctx,
-		struct pipe_resource *dst,
-		unsigned dst_level,
-		unsigned dstx, unsigned dsty, unsigned dstz,
-		struct pipe_resource *src,
-		unsigned src_level,
-		const struct pipe_box *src_box)
+static bool
+fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
-	struct pipe_blit_info info;
-
-	debug_assert(src->format == dst->format);
+	if (!can_do_blit(info)) {
+		return false;
+	}
 
-	memset(&info, 0, sizeof info);
-	info.dst.resource = dst;
-	info.dst.level = dst_level;
-	info.dst.box.x = dstx;
-	info.dst.box.y = dsty;
-	info.dst.box.z = dstz;
-	info.dst.box.width = src_box->width;
-	info.dst.box.height = src_box->height;
-	assert(info.dst.box.width >= 0);
-	assert(info.dst.box.height >= 0);
-	info.dst.box.depth = 1;
-	info.dst.format = dst->format;
-	info.src.resource = src;
-	info.src.level = src_level;
-	info.src.box = *src_box;
-	info.src.format = src->format;
-	info.mask = util_format_get_mask(src->format);
-	info.filter = PIPE_TEX_FILTER_NEAREST;
-	info.scissor_enable = 0;
+	emit_blit(ctx, info);
 
-	fd6_blit(pctx, &info);
+	return true;
 }
 
 void
@@ -568,8 +556,7 @@
 	if (fd_mesa_debug & FD_DBG_NOBLIT)
 		return;
 
-	pctx->resource_copy_region = fd6_resource_copy_region;
-	pctx->blit = fd6_blit;
+	fd_context(pctx)->blit = fd6_blit;
 }
 
 unsigned
@@ -578,5 +565,8 @@
 	/* basically just has to be a format we can blit, so uploads/downloads
 	 * via linear staging buffer works:
 	 */
-	return TILE6_3;
+	if (ok_format(tmpl->format))
+		return TILE6_3;
+
+	return TILE6_LINEAR;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -48,8 +48,6 @@
 
 	fd_context_destroy(pctx);
 
-	fd_bo_del(fd6_ctx->vs_pvt_mem);
-	fd_bo_del(fd6_ctx->fs_pvt_mem);
 	fd_bo_del(fd6_ctx->vsc_data);
 	fd_bo_del(fd6_ctx->vsc_data2);
 	fd_bo_del(fd6_ctx->blit_mem);
@@ -104,27 +102,23 @@
 	if (!pctx)
 		return NULL;
 
+	util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true);
+
 	/* fd_context_init overwrites delete_rasterizer_state, so set this
 	 * here. */
 	pctx->delete_rasterizer_state = fd6_rasterizer_state_delete;
 	pctx->delete_depth_stencil_alpha_state = fd6_depth_stencil_alpha_state_delete;
 
-	fd6_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
-
-	fd6_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
-
 	fd6_ctx->vsc_data = fd_bo_new(screen->dev,
 			(A6XX_VSC_DATA_PITCH * 32) + 0x100,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data");
 
 	fd6_ctx->vsc_data2 = fd_bo_new(screen->dev,
 			A6XX_VSC_DATA2_PITCH * 32,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data2");
 
 	fd6_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000,
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
+			DRM_FREEDRENO_GEM_TYPE_KMEM, "blit");
 
 	fd_context_setup_common_vbos(&fd6_ctx->base);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -32,15 +32,13 @@
 
 #include "freedreno_context.h"
 
-#include "ir3_shader.h"
+#include "ir3/ir3_shader.h"
 
 #include "a6xx.xml.h"
 
 struct fd6_context {
 	struct fd_context base;
 
-	struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
-
 	/* Two buffers related to hw binning / visibility stream (VSC).
 	 * Compared to previous generations
 	 *   (1) we cannot specify individual buffers per VSC, instead
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -254,18 +254,6 @@
 	return true;
 }
 
-static bool is_z32(enum pipe_format format)
-{
-	switch (format) {
-	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-	case PIPE_FORMAT_Z32_UNORM:
-	case PIPE_FORMAT_Z32_FLOAT:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void
 fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
 {
@@ -317,7 +305,7 @@
 	OUT_RING(ring, 0x00000000);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_ACC0, 1);
+	OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1);
 	OUT_RING(ring, 0x0000f410);
 
 	OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
@@ -383,135 +371,48 @@
 	fd6_cache_flush(batch, ring);
 }
 
+static bool is_z32(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+	case PIPE_FORMAT_Z32_UNORM:
+	case PIPE_FORMAT_Z32_FLOAT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static bool
 fd6_clear(struct fd_context *ctx, unsigned buffers,
 		const union pipe_color_union *color, double depth, unsigned stencil)
 {
 	struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
-	struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-	struct fd_ringbuffer *ring = ctx->batch->draw;
-
-	if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
-			is_z32(pfb->zsbuf->format))
+	const bool has_depth = pfb->zsbuf;
+	unsigned color_buffers = buffers >> 2;
+	unsigned i;
+
+	/* If we're clearing after draws, fallback to 3D pipe clears.  We could
+	 * use blitter clears in the draw batch but then we'd have to patch up the
+	 * gmem offsets. This doesn't seem like a useful thing to optimize for
+	 * however.*/
+	if (ctx->batch->num_draws > 0)
 		return false;
 
-	OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
-	OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) |
-			 A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny));
-	OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx - 1) |
-			 A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy - 1));
-
-	if (buffers & PIPE_CLEAR_COLOR) {
-		for (int i = 0; i < pfb->nr_cbufs; i++) {
-			union util_color uc = {0};
-
-			if (!pfb->cbufs[i])
-				continue;
-
-			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
-				continue;
-
-			enum pipe_format pfmt = pfb->cbufs[i]->format;
-
-			// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
-			union pipe_color_union swapped;
-			switch (fd6_pipe2swap(pfmt)) {
-			case WZYX:
-				swapped.ui[0] = color->ui[0];
-				swapped.ui[1] = color->ui[1];
-				swapped.ui[2] = color->ui[2];
-				swapped.ui[3] = color->ui[3];
-				break;
-			case WXYZ:
-				swapped.ui[2] = color->ui[0];
-				swapped.ui[1] = color->ui[1];
-				swapped.ui[0] = color->ui[2];
-				swapped.ui[3] = color->ui[3];
-				break;
-			case ZYXW:
-				swapped.ui[3] = color->ui[0];
-				swapped.ui[0] = color->ui[1];
-				swapped.ui[1] = color->ui[2];
-				swapped.ui[2] = color->ui[3];
-				break;
-			case XYZW:
-				swapped.ui[3] = color->ui[0];
-				swapped.ui[2] = color->ui[1];
-				swapped.ui[1] = color->ui[2];
-				swapped.ui[0] = color->ui[3];
-				break;
-			}
-
-			if (util_format_is_pure_uint(pfmt)) {
-				util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
-			} else if (util_format_is_pure_sint(pfmt)) {
-				util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
-			} else {
-				util_pack_color(swapped.f, pfmt, &uc);
-			}
-
-			OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
-			OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-				A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
-			OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
-			OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
-				A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
-
-			OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
-			OUT_RINGP(ring, i, &ctx->batch->gmem_patches);
-
-			OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
-			OUT_RING(ring, 0);
-
-			OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
-			OUT_RING(ring, uc.ui[0]);
-			OUT_RING(ring, uc.ui[1]);
-			OUT_RING(ring, uc.ui[2]);
-			OUT_RING(ring, uc.ui[3]);
-
-			fd6_emit_blit(ctx->batch, ring);
-		}
-	}
-
-	if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
-		enum pipe_format pfmt = pfb->zsbuf->format;
-		uint32_t clear = util_pack_z_stencil(pfmt, depth, stencil);
-		uint32_t mask = 0;
-
-		if (buffers & PIPE_CLEAR_DEPTH)
-			mask |= 0x1;
-
-		if (buffers & PIPE_CLEAR_STENCIL)
-			mask |= 0x2;
-
-		OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
-		OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-			A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
-		OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
-		OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
-			// XXX UNK0 for separate stencil ??
-			A6XX_RB_BLIT_INFO_DEPTH |
-			A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
-
-		OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
-		OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches);
-
-		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
-		OUT_RING(ring, 0);
-
-		OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
-		OUT_RING(ring, clear);
-
-		fd6_emit_blit(ctx->batch, ring);
-
-		if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
-			struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
-			if (zsbuf->lrz) {
-				zsbuf->lrz_valid = true;
-				fd6_clear_lrz(ctx->batch, zsbuf, depth);
-			}
+	foreach_bit(i, color_buffers)
+		ctx->batch->clear_color[i] = *color;
+	if (buffers & PIPE_CLEAR_DEPTH)
+		ctx->batch->clear_depth = depth;
+	if (buffers & PIPE_CLEAR_STENCIL)
+		ctx->batch->clear_stencil = stencil;
+
+	ctx->batch->fast_cleared |= buffers;
+
+	if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) {
+		struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
+		if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
+			zsbuf->lrz_valid = true;
+			fd6_clear_lrz(ctx->batch, zsbuf, depth);
 		}
 	}
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -46,16 +46,17 @@
 #include "fd6_zsa.h"
 
 static uint32_t
-shader_t_to_opcode(enum shader_t type)
+shader_t_to_opcode(gl_shader_stage type)
 {
 	switch (type) {
-	case SHADER_VERTEX:
-	case SHADER_TCS:
-	case SHADER_TES:
-	case SHADER_GEOM:
+	case MESA_SHADER_VERTEX:
+	case MESA_SHADER_TESS_CTRL:
+	case MESA_SHADER_TESS_EVAL:
+	case MESA_SHADER_GEOMETRY:
 		return CP_LOAD_STATE6_GEOM;
-	case SHADER_FRAGMENT:
-	case SHADER_COMPUTE:
+	case MESA_SHADER_FRAGMENT:
+	case MESA_SHADER_COMPUTE:
+	case MESA_SHADER_KERNEL:
 		return CP_LOAD_STATE6_FRAG;
 	default:
 		unreachable("bad shader type");
@@ -67,7 +68,7 @@
  * sizedwords:     size of const value buffer
  */
 static void
-fd6_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
+fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
@@ -105,7 +106,7 @@
 }
 
 static void
-fd6_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
 		uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
 {
 	uint32_t anum = align(num, 2);
@@ -394,13 +395,8 @@
 			static const struct fd6_pipe_sampler_view dummy_view = {};
 			const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
 				fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view;
-			enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
 
-			if (view->base.texture)
-				tile_mode = fd_resource(view->base.texture)->tile_mode;
-
-			OUT_RING(state, view->texconst0 |
-				A6XX_TEX_CONST_0_TILE_MODE(tile_mode));
+			OUT_RING(state, view->texconst0);
 			OUT_RING(state, view->texconst1);
 			OUT_RING(state, view->texconst2);
 			OUT_RING(state, view->texconst3);
@@ -677,12 +673,6 @@
 		OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) |
 				A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1));
 
-		OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
-		OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
-				A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
-		OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
-				A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
-
 		ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx);
 		ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny);
 		ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
@@ -690,7 +680,8 @@
 	}
 
 	if (dirty & FD_DIRTY_VIEWPORT) {
-		fd_wfi(ctx->batch, ring);
+		struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
+
 		OUT_PKT4(ring, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
 		OUT_RING(ring, A6XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
 		OUT_RING(ring, A6XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
@@ -698,6 +689,19 @@
 		OUT_RING(ring, A6XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
 		OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
 		OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+
+		OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
+		OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) |
+				A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny));
+		OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) |
+				A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1));
+
+		unsigned guardband_x = fd_calc_guardband(scissor->maxx - scissor->minx);
+		unsigned guardband_y = fd_calc_guardband(scissor->maxy - scissor->miny);
+
+		OUT_PKT4(ring, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
+		OUT_RING(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_x) |
+				A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_y));
 	}
 
 	if (dirty & FD_DIRTY_PROG) {
@@ -733,7 +737,6 @@
 		OUT_PKT4(ring, REG_A6XX_VFD_UNKNOWN_A008, 1);
 		OUT_RING(ring, 0);
 
-
 		OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
 		OUT_RING(ring, rasterizer->pc_primitive_cntl |
 				 COND(emit->info->primitive_restart && emit->info->index_size,
@@ -761,6 +764,7 @@
 		struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer(
 				ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
 
+		OUT_WFI5(vsconstobj);
 		ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info);
 		fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7);
 		fd_ringbuffer_del(vsconstobj);
@@ -770,12 +774,13 @@
 		struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer(
 				ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
 
+		OUT_WFI5(fsconstobj);
 		ir3_emit_fs_consts(fp, fsconstobj, ctx);
 		fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6);
 		fd_ringbuffer_del(fsconstobj);
 	}
 
-	struct pipe_stream_output_info *info = &vp->shader->stream_output;
+	struct ir3_stream_output_info *info = &vp->shader->stream_output;
 	if (info->num_outputs) {
 		struct fd_streamout_stateobj *so = &ctx->streamout;
 
@@ -871,14 +876,18 @@
 			OUT_RING(ring, blend_control);
 		}
 
-		OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1);
-		OUT_RING(ring, blend->rb_blend_cntl |
-				A6XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff));
-
 		OUT_PKT4(ring, REG_A6XX_SP_BLEND_CNTL, 1);
 		OUT_RING(ring, blend->sp_blend_cntl);
 	}
 
+	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) {
+		struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1);
+		OUT_RING(ring, blend->rb_blend_cntl |
+				A6XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask));
+	}
+
 	if (dirty & FD_DIRTY_BLEND_COLOR) {
 		struct pipe_blend_color *bcolor = &ctx->blend_color;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,7 +34,7 @@
 #include "fd6_context.h"
 #include "fd6_format.h"
 #include "fd6_program.h"
-#include "ir3_shader.h"
+#include "ir3_gallium.h"
 
 struct fd_ringbuffer;
 
@@ -155,14 +155,15 @@
 }
 
 static inline enum a6xx_state_block
-fd6_stage2shadersb(enum shader_t type)
+fd6_stage2shadersb(gl_shader_stage type)
 {
 	switch (type) {
-	case SHADER_VERTEX:
+	case MESA_SHADER_VERTEX:
 		return SB6_VS_SHADER;
-	case SHADER_FRAGMENT:
+	case MESA_SHADER_FRAGMENT:
 		return SB6_FS_SHADER;
-	case SHADER_COMPUTE:
+	case MESA_SHADER_COMPUTE:
+	case MESA_SHADER_KERNEL:
 		return SB6_CS_SHADER;
 	default:
 		unreachable("bad shader type");
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,6 +29,7 @@
 #include "util/u_format.h"
 
 #include "fd6_format.h"
+#include "freedreno_resource.h"
 
 
 /* Specifies the table of all the formats and their features. Also supplies
@@ -419,8 +420,8 @@
 	}
 }
 
-static inline enum a6xx_tex_swiz
-tex_swiz(unsigned swiz)
+enum a6xx_tex_swiz
+fd6_pipe2swiz(unsigned swiz)
 {
 	switch (swiz) {
 	default:
@@ -434,19 +435,37 @@
 }
 
 uint32_t
-fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+fd6_tex_swiz(struct pipe_resource *prsc, unsigned swizzle_r, unsigned swizzle_g,
 		unsigned swizzle_b, unsigned swizzle_a)
 {
 	const struct util_format_description *desc =
-			util_format_description(format);
+			util_format_description(prsc->format);
 	unsigned char swiz[4] = {
 			swizzle_r, swizzle_g, swizzle_b, swizzle_a,
-	}, rswiz[4];
+	}, rswiz[4], *swizp;
 
 	util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
 
-	return A6XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
-			A6XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
-			A6XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
-			A6XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+	if (fd_resource(prsc)->tile_mode) {
+		/* for tiled modes, we don't get SWAP, so manually apply that
+		 * extra step of swizzle:
+		 */
+		enum a3xx_color_swap swap = fd6_pipe2swap(prsc->format);
+		unsigned char swapswiz[][4] = {
+				[WZYX] = { 0, 1, 2, 3 },
+				[WXYZ] = { 2, 1, 0, 3 },
+				[ZYXW] = { 3, 0, 1, 2 },
+				[XYZW] = { 3, 2, 1, 0 },
+		};
+
+		util_format_compose_swizzles(swapswiz[swap], rswiz, swiz);
+		swizp = swiz;
+	} else {
+		swizp = rswiz;
+	}
+
+	return A6XX_TEX_CONST_0_SWIZ_X(fd6_pipe2swiz(swizp[0])) |
+			A6XX_TEX_CONST_0_SWIZ_Y(fd6_pipe2swiz(swizp[1])) |
+			A6XX_TEX_CONST_0_SWIZ_Z(fd6_pipe2swiz(swizp[2])) |
+			A6XX_TEX_CONST_0_SWIZ_W(fd6_pipe2swiz(swizp[3]));
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.h	2019-03-31 23:16:37.000000000 +0000
@@ -38,8 +38,78 @@
 enum a3xx_color_swap fd6_pipe2swap(enum pipe_format format);
 enum a6xx_tex_fetchsize fd6_pipe2fetchsize(enum pipe_format format);
 enum a6xx_depth_format fd6_pipe2depth(enum pipe_format format);
+enum a6xx_tex_swiz fd6_pipe2swiz(unsigned swiz);
 
-uint32_t fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+uint32_t fd6_tex_swiz(struct pipe_resource *prsc, unsigned swizzle_r,
 		unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
 
+static inline enum a6xx_2d_ifmt
+fd6_ifmt(enum a6xx_color_fmt fmt)
+{
+	switch (fmt) {
+	case RB6_A8_UNORM:
+	case RB6_R8_UNORM:
+	case RB6_R8_SNORM:
+	case RB6_R8G8_UNORM:
+	case RB6_R8G8_SNORM:
+	case RB6_R8G8B8A8_UNORM:
+	case RB6_R8G8B8_UNORM:
+	case RB6_R8G8B8A8_SNORM:
+		return R2D_UNORM8;
+
+	case RB6_R32_UINT:
+	case RB6_R32_SINT:
+	case RB6_R32G32_UINT:
+	case RB6_R32G32_SINT:
+	case RB6_R32G32B32A32_UINT:
+	case RB6_R32G32B32A32_SINT:
+		return R2D_INT32;
+
+	case RB6_R16_UINT:
+	case RB6_R16_SINT:
+	case RB6_R16G16_UINT:
+	case RB6_R16G16_SINT:
+	case RB6_R16G16B16A16_UINT:
+	case RB6_R16G16B16A16_SINT:
+		return R2D_INT16;
+
+	case RB6_R8_UINT:
+	case RB6_R8_SINT:
+	case RB6_R8G8_UINT:
+	case RB6_R8G8_SINT:
+	case RB6_R8G8B8A8_UINT:
+	case RB6_R8G8B8A8_SINT:
+		return R2D_INT8;
+
+	case RB6_R16_UNORM:
+	case RB6_R16_SNORM:
+	case RB6_R16G16_UNORM:
+	case RB6_R16G16_SNORM:
+	case RB6_R16G16B16A16_UNORM:
+	case RB6_R16G16B16A16_SNORM:
+	case RB6_R32_FLOAT:
+	case RB6_R32G32_FLOAT:
+	case RB6_R32G32B32A32_FLOAT:
+		return R2D_FLOAT32;
+
+	case RB6_R16_FLOAT:
+	case RB6_R16G16_FLOAT:
+	case RB6_R16G16B16A16_FLOAT:
+		return R2D_FLOAT16;
+
+	case RB6_R4G4B4A4_UNORM:
+	case RB6_R5G5B5A1_UNORM:
+	case RB6_R5G6B5_UNORM:
+	case RB6_R10G10B10A2_UNORM:
+	case RB6_R10G10B10A2_UINT:
+	case RB6_R11G11B10_FLOAT:
+	case RB6_X8Z24_UNORM:
+		// ???
+		return 0;
+	default:
+		unreachable("bad format");
+		return 0;
+	}
+}
+
 #endif /* FD6_UTIL_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -64,6 +64,7 @@
 		struct fd_resource_slice *slice = NULL;
 		uint32_t stride = 0;
 		uint32_t offset = 0;
+		uint32_t tile_mode;
 
 		if (!pfb->cbufs[i])
 			continue;
@@ -79,7 +80,6 @@
 		uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
 		slice = fd_resource_slice(rsc, psurf->u.tex.level);
 		format = fd6_pipe2color(pformat);
-		swap = fd6_pipe2swap(pformat);
 		sint = util_format_is_pure_sint(pformat);
 		uint = util_format_is_pure_uint(pformat);
 
@@ -89,14 +89,21 @@
 		offset = fd_resource_offset(rsc, psurf->u.tex.level,
 									psurf->u.tex.first_layer);
 
-		stride = slice->pitch * rsc->cpp;
+		stride = slice->pitch * rsc->cpp * pfb->samples;
+		swap = rsc->tile_mode ? WZYX : fd6_pipe2swap(pformat);
+
+		if (rsc->tile_mode &&
+			fd_resource_level_linear(psurf->texture, psurf->u.tex.level))
+			tile_mode = TILE6_LINEAR;
+		else
+			tile_mode = rsc->tile_mode;
 
 		debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
 		debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
 
 		OUT_PKT4(ring, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
 		OUT_RING(ring, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
-				A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) |
+				A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
 				A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
 		OUT_RING(ring, A6XX_RB_MRT_PITCH(stride));
 		OUT_RING(ring, A6XX_RB_MRT_ARRAY_PITCH(slice->size0));
@@ -254,22 +261,6 @@
 }
 
 static void
-patch_gmem_bases(struct fd_batch *batch)
-{
-	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
-	unsigned i;
-
-	for (i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
-		struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
-		if (patch->val < MAX_RENDER_TARGETS)
-			*patch->cs = gmem->cbuf_base[patch->val];
-		else
-			*patch->cs = gmem->zsbuf_base[0];
-	}
-	util_dynarray_resize(&batch->gmem_patches, 0);
-}
-
-static void
 update_render_cntl(struct fd_batch *batch, bool binning)
 {
 	struct fd_ringbuffer *ring = batch->gmem;
@@ -292,14 +283,13 @@
 	struct fd6_context *fd6_ctx = fd6_context(ctx);
 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct fd_ringbuffer *ring = batch->gmem;
-	unsigned n = gmem->nbins_x * gmem->nbins_y;
 	int i;
 
 	OUT_PKT4(ring, REG_A6XX_VSC_BIN_SIZE, 3);
 	OUT_RING(ring, A6XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
 			A6XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
 	OUT_RELOCW(ring, fd6_ctx->vsc_data,
-			n * A6XX_VSC_DATA_PITCH, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
+			32 * A6XX_VSC_DATA_PITCH, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
 
 	OUT_PKT4(ring, REG_A6XX_VSC_BIN_COUNT, 1);
 	OUT_RING(ring, A6XX_VSC_BIN_COUNT_NX(gmem->nbins_x) |
@@ -431,25 +421,32 @@
 }
 
 static void
-disable_msaa(struct fd_ringbuffer *ring)
+emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
 {
-	// TODO MSAA
+	enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
+
 	OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
-	OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
-	OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
-			 A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
+	OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
+	OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
+			 COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
 
 	OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
-	OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
-	OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
-			 A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE);
+	OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
+	OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
+			 COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
 
 	OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
-	OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
-	OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
-			 A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
+	OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
+	OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
+			 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
+
+	OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
+	OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
 }
 
+static void prepare_tile_setup_ib(struct fd_batch *batch);
+static void prepare_tile_fini_ib(struct fd_batch *batch);
+
 /* before first tile */
 static void
 fd6_emit_tile_init(struct fd_batch *batch)
@@ -468,6 +465,9 @@
 
 	fd6_cache_flush(batch, ring);
 
+	prepare_tile_setup_ib(batch);
+	prepare_tile_fini_ib(batch);
+
 	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
 	OUT_RING(ring, 0x0);
 
@@ -478,10 +478,7 @@
 
 	emit_zs(ring, pfb->zsbuf, &ctx->gmem);
 	emit_mrt(ring, pfb, &ctx->gmem);
-
-	patch_gmem_bases(batch);
-
-	disable_msaa(ring);
+	emit_msaa(ring, pfb->samples);
 
 	if (use_hw_binning(batch)) {
 		set_bin_size(ring, gmem->bin_w, gmem->bin_h,
@@ -552,9 +549,7 @@
 	OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
 
 	if (use_hw_binning(batch)) {
-		struct fd_gmem_stateobj *gmem = &ctx->gmem;
 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p];
-		unsigned n = gmem->nbins_x * gmem->nbins_y;
 
 		OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
 
@@ -570,7 +565,7 @@
 		OUT_RELOC(ring, fd6_ctx->vsc_data,       /* VSC_PIPE[p].DATA_ADDRESS */
 				(tile->p * A6XX_VSC_DATA_PITCH), 0, 0);
 		OUT_RELOC(ring, fd6_ctx->vsc_data,       /* VSC_SIZE_ADDRESS + (p * 4) */
-				(tile->p * 4) + (n * A6XX_VSC_DATA_PITCH), 0, 0);
+				(tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0);
 		OUT_RELOC(ring, fd6_ctx->vsc_data2,
 				(tile->p * A6XX_VSC_DATA2_PITCH), 0, 0);
 	} else {
@@ -583,9 +578,8 @@
 }
 
 static void
-set_blit_scissor(struct fd_batch *batch)
+set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
-	struct fd_ringbuffer *ring = batch->gmem;
 	struct pipe_scissor_state blit_scissor;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 
@@ -604,34 +598,47 @@
 }
 
 static void
-emit_blit(struct fd_batch *batch, uint32_t base,
+emit_blit(struct fd_batch *batch,
+		  struct fd_ringbuffer *ring,
+		  uint32_t base,
 		  struct pipe_surface *psurf,
-		  struct fd_resource *rsc)
+		  bool stencil)
 {
-	struct fd_ringbuffer *ring = batch->gmem;
 	struct fd_resource_slice *slice;
+	struct fd_resource *rsc = fd_resource(psurf->texture);
+	enum pipe_format pfmt = psurf->format;
 	uint32_t offset;
 
+	/* separate stencil case: */
+	if (stencil) {
+		rsc = rsc->stencil;
+		pfmt = rsc->base.format;
+	}
+
 	slice = fd_resource_slice(rsc, psurf->u.tex.level);
 	offset = fd_resource_offset(rsc, psurf->u.tex.level,
 			psurf->u.tex.first_layer);
 
 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
 
-	enum pipe_format pfmt = psurf->format;
 	enum a6xx_color_fmt format = fd6_pipe2color(pfmt);
 	uint32_t stride = slice->pitch * rsc->cpp;
 	uint32_t size = slice->size0;
-	enum a3xx_color_swap swap = fd6_pipe2swap(pfmt);
-
-	// TODO: tile mode
-	// bool tiled;
-	// tiled = rsc->tile_mode &&
-	//   !fd_resource_level_linear(psurf->texture, psurf->u.tex.level);
+	enum a3xx_color_swap swap = rsc->tile_mode ? WZYX : fd6_pipe2swap(pfmt);
+	enum a3xx_msaa_samples samples =
+			fd_msaa_samples(rsc->base.nr_samples);
+	uint32_t tile_mode;
+
+	if (rsc->tile_mode &&
+		fd_resource_level_linear(&rsc->base, psurf->u.tex.level))
+		tile_mode = TILE6_LINEAR;
+	else
+		tile_mode = rsc->tile_mode;
 
 	OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 5);
 	OUT_RING(ring,
-			 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+			 A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
+			 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
 			 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format) |
 			 A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap));
 	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);  /* RB_BLIT_DST_LO/HI */
@@ -645,13 +652,14 @@
 }
 
 static void
-emit_restore_blit(struct fd_batch *batch, uint32_t base,
+emit_restore_blit(struct fd_batch *batch,
+				  struct fd_ringbuffer *ring,
+				  uint32_t base,
 				  struct pipe_surface *psurf,
-				  struct fd_resource *rsc,
 				  unsigned buffer)
 {
-	struct fd_ringbuffer *ring = batch->gmem;
 	uint32_t info = 0;
+	bool stencil = false;
 
 	switch (buffer) {
 	case FD_BUFFER_COLOR:
@@ -659,6 +667,7 @@
 		break;
 	case FD_BUFFER_STENCIL:
 		info |= A6XX_RB_BLIT_INFO_UNK0;
+		stencil = true;
 		break;
 	case FD_BUFFER_DEPTH:
 		info |= A6XX_RB_BLIT_INFO_DEPTH | A6XX_RB_BLIT_INFO_UNK0;
@@ -671,64 +680,244 @@
 	OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
 	OUT_RING(ring, info | A6XX_RB_BLIT_INFO_GMEM);
 
-	emit_blit(batch, base, psurf, rsc);
+	emit_blit(batch, ring, base, psurf, stencil);
+}
+
+static void
+emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+	enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
+
+	uint32_t buffers = batch->fast_cleared;
+
+	if (buffers & PIPE_CLEAR_COLOR) {
+
+		for (int i = 0; i < pfb->nr_cbufs; i++) {
+			union pipe_color_union *color = &batch->clear_color[i];
+			union util_color uc = {0};
+
+			if (!pfb->cbufs[i])
+				continue;
+
+			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+				continue;
+
+			enum pipe_format pfmt = pfb->cbufs[i]->format;
+
+			// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
+			union pipe_color_union swapped;
+			switch (fd6_pipe2swap(pfmt)) {
+			case WZYX:
+				swapped.ui[0] = color->ui[0];
+				swapped.ui[1] = color->ui[1];
+				swapped.ui[2] = color->ui[2];
+				swapped.ui[3] = color->ui[3];
+				break;
+			case WXYZ:
+				swapped.ui[2] = color->ui[0];
+				swapped.ui[1] = color->ui[1];
+				swapped.ui[0] = color->ui[2];
+				swapped.ui[3] = color->ui[3];
+				break;
+			case ZYXW:
+				swapped.ui[3] = color->ui[0];
+				swapped.ui[0] = color->ui[1];
+				swapped.ui[1] = color->ui[2];
+				swapped.ui[2] = color->ui[3];
+				break;
+			case XYZW:
+				swapped.ui[3] = color->ui[0];
+				swapped.ui[2] = color->ui[1];
+				swapped.ui[1] = color->ui[2];
+				swapped.ui[0] = color->ui[3];
+				break;
+			}
+
+			if (util_format_is_pure_uint(pfmt)) {
+				util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
+			} else if (util_format_is_pure_sint(pfmt)) {
+				util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
+			} else {
+				util_pack_color(swapped.f, pfmt, &uc);
+			}
+
+			OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+			OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+				A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+				A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+			OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+			OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+				A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
+
+			OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+			OUT_RING(ring, gmem->cbuf_base[i]);
+
+			OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+			OUT_RING(ring, 0);
+
+			OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
+			OUT_RING(ring, uc.ui[0]);
+			OUT_RING(ring, uc.ui[1]);
+			OUT_RING(ring, uc.ui[2]);
+			OUT_RING(ring, uc.ui[3]);
+
+			fd6_emit_blit(batch, ring);
+		}
+	}
+
+	const bool has_depth = pfb->zsbuf;
+	const bool has_separate_stencil =
+		has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
+
+	/* First clear depth or combined depth/stencil. */
+	if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
+		(!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
+		enum pipe_format pfmt = pfb->zsbuf->format;
+		uint32_t clear_value;
+		uint32_t mask = 0;
+
+		if (has_separate_stencil) {
+			pfmt = util_format_get_depth_only(pfb->zsbuf->format);
+			clear_value = util_pack_z(pfmt, batch->clear_depth);
+		} else {
+			pfmt = pfb->zsbuf->format;
+			clear_value = util_pack_z_stencil(pfmt, batch->clear_depth,
+											  batch->clear_stencil);
+		}
+
+		if (buffers & PIPE_CLEAR_DEPTH)
+			mask |= 0x1;
+
+		if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
+			mask |= 0x2;
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+		OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+			A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+			A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+		OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+			// XXX UNK0 for separate stencil ??
+			A6XX_RB_BLIT_INFO_DEPTH |
+			A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+		OUT_RING(ring, gmem->zsbuf_base[0]);
+
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+		OUT_RING(ring, 0);
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+		OUT_RING(ring, clear_value);
+
+		fd6_emit_blit(batch, ring);
+	}
+
+	/* Then clear the separate stencil buffer in case of 32 bit depth
+	 * formats with separate stencil. */
+	if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+		OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+				 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
+				 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT));
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+		OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+				 //A6XX_RB_BLIT_INFO_UNK0 |
+				 A6XX_RB_BLIT_INFO_DEPTH |
+				 A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+		OUT_RING(ring, gmem->zsbuf_base[1]);
+
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+		OUT_RING(ring, 0);
+
+		OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+		OUT_RING(ring, batch->clear_stencil & 0xff);
+
+		fd6_emit_blit(batch, ring);
+	}
 }
 
 /*
  * transfer from system memory to gmem
  */
 static void
-fd6_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
+emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
 {
 	struct fd_context *ctx = batch->ctx;
 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 
-	set_blit_scissor(batch);
-
-	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
+	if (batch->restore & FD_BUFFER_COLOR) {
 		unsigned i;
 		for (i = 0; i < pfb->nr_cbufs; i++) {
 			if (!pfb->cbufs[i])
 				continue;
 			if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
 				continue;
-			emit_restore_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i],
-							  fd_resource(pfb->cbufs[i]->texture),
+			emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
 							  FD_BUFFER_COLOR);
 		}
 	}
 
-	if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+	if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
 
-		if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH)) {
-			emit_restore_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc,
+		if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
+			emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
 							  FD_BUFFER_DEPTH);
 		}
-		if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL)) {
-			emit_restore_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil,
+		if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
+			emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
 							  FD_BUFFER_STENCIL);
 		}
 	}
 }
 
+static void
+prepare_tile_setup_ib(struct fd_batch *batch)
+{
+	batch->tile_setup = fd_submit_new_ringbuffer(batch->submit, 0x1000,
+			FD_RINGBUFFER_STREAMING);
+
+	set_blit_scissor(batch, batch->tile_setup);
+
+	emit_restore_blits(batch, batch->tile_setup);
+	emit_clears(batch, batch->tile_setup);
+}
+
+/*
+ * transfer from system memory to gmem
+ */
+static void
+fd6_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile)
+{
+}
+
 /* before IB to rendering cmds: */
 static void
 fd6_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
 {
+	fd6_emit_ib(batch->gmem, batch->tile_setup);
 }
 
 static void
-emit_resolve_blit(struct fd_batch *batch, uint32_t base,
+emit_resolve_blit(struct fd_batch *batch,
+				  struct fd_ringbuffer *ring,
+				  uint32_t base,
 				  struct pipe_surface *psurf,
-				  struct fd_resource *rsc,
 				  unsigned buffer)
 {
-	struct fd_ringbuffer *ring = batch->gmem;
 	uint32_t info = 0;
+	bool stencil = false;
 
-	if (!rsc->valid)
+	if (!fd_resource(psurf->texture)->valid)
 		return;
 
 	switch (buffer) {
@@ -736,6 +925,7 @@
 		break;
 	case FD_BUFFER_STENCIL:
 		info |= A6XX_RB_BLIT_INFO_UNK0;
+		stencil = true;
 		break;
 	case FD_BUFFER_DEPTH:
 		info |= A6XX_RB_BLIT_INFO_DEPTH;
@@ -748,7 +938,7 @@
 	OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
 	OUT_RING(ring, info);
 
-	emit_blit(batch, base, psurf, rsc);
+	emit_blit(batch, ring, base, psurf, stencil);
 }
 
 /*
@@ -756,12 +946,16 @@
  */
 
 static void
-fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
+prepare_tile_fini_ib(struct fd_batch *batch)
 {
 	struct fd_context *ctx = batch->ctx;
 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct pipe_framebuffer_state *pfb = &batch->framebuffer;
-	struct fd_ringbuffer *ring = batch->gmem;
+	struct fd_ringbuffer *ring;
+
+	batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000,
+			FD_RINGBUFFER_STREAMING);
+	ring = batch->tile_fini;
 
 	if (use_hw_binning(batch)) {
 		OUT_PKT7(ring, CP_SET_MARKER, 1);
@@ -783,17 +977,19 @@
 	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
 	emit_marker6(ring, 7);
 
-	set_blit_scissor(batch);
+	set_blit_scissor(batch, ring);
 
 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
 		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
 
 		if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
-			emit_resolve_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc,
+			emit_resolve_blit(batch, ring,
+							  gmem->zsbuf_base[0], pfb->zsbuf,
 							  FD_BUFFER_DEPTH);
 		}
 		if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
-			emit_resolve_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil,
+			emit_resolve_blit(batch, ring,
+							  gmem->zsbuf_base[1], pfb->zsbuf,
 							  FD_BUFFER_STENCIL);
 		}
 	}
@@ -805,14 +1001,19 @@
 				continue;
 			if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
 				continue;
-			emit_resolve_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i],
-							  fd_resource(pfb->cbufs[i]->texture),
+			emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
 							  FD_BUFFER_COLOR);
 		}
 	}
 }
 
 static void
+fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
+{
+	fd6_emit_ib(batch->gmem, batch->tile_fini);
+}
+
+static void
 fd6_emit_tile_fini(struct fd_batch *batch)
 {
 	struct fd_ringbuffer *ring = batch->gmem;
@@ -874,8 +1075,7 @@
 
 	emit_zs(ring, pfb->zsbuf, NULL);
 	emit_mrt(ring, pfb, NULL);
-
-	disable_msaa(ring);
+	emit_msaa(ring, pfb->samples);
 }
 
 static void
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_image.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_image.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_image.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_image.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,6 +43,7 @@
 };
 
 struct fd6_image {
+	struct pipe_resource *prsc;
 	enum pipe_format pfmt;
 	enum a6xx_tex_fmt fmt;
 	enum a6xx_tex_fetchsize fetchsize;
@@ -70,6 +71,7 @@
 		return;
 	}
 
+	img->prsc      = prsc;
 	img->pfmt      = format;
 	img->fmt       = fd6_pipe2tex(format);
 	img->fetchsize = fd6_pipe2fetchsize(format);
@@ -112,7 +114,7 @@
 	OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
 
 	OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) |
-		fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+		fd6_tex_swiz(img->prsc, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
 			PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
 		COND(img->srgb, A6XX_TEX_CONST_0_SRGB));
 	OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -41,7 +41,7 @@
 
 static struct ir3_shader *
 create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
-		enum shader_t type)
+		gl_shader_stage type)
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct ir3_compiler *compiler = ctx->screen->compiler;
@@ -52,7 +52,7 @@
 fd6_fp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT);
 }
 
 static void
@@ -68,7 +68,7 @@
 fd6_vp_state_create(struct pipe_context *pctx,
 		const struct pipe_shader_state *cso)
 {
-	return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+	return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX);
 }
 
 static void
@@ -100,11 +100,12 @@
 	}
 
 	switch (so->type) {
-	case SHADER_VERTEX:
+	case MESA_SHADER_VERTEX:
 		opcode = CP_LOAD_STATE6_GEOM;
 		break;
-	case SHADER_FRAGMENT:
-	case SHADER_COMPUTE:
+	case MESA_SHADER_FRAGMENT:
+	case MESA_SHADER_COMPUTE:
+	case MESA_SHADER_KERNEL:
 		opcode = CP_LOAD_STATE6_FRAG;
 		break;
 	default:
@@ -121,7 +122,7 @@
 		OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
 		OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
 	} else {
-		OUT_RELOC(ring, so->bo, 0, 0, 0);
+		OUT_RELOCD(ring, so->bo, 0, 0, 0);
 	}
 
 	/* for how clever coverity is, it is sometimes rather dull, and
@@ -140,14 +141,14 @@
 static void
 link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v)
 {
-	const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
+	const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
 
 	/*
 	 * First, any stream-out varyings not already in linkage map (ie. also
 	 * consumed by frag shader) need to be added:
 	 */
 	for (unsigned i = 0; i < strmout->num_outputs; i++) {
-		const struct pipe_stream_output *out = &strmout->output[i];
+		const struct ir3_stream_output *out = &strmout->output[i];
 		unsigned k = out->register_index;
 		unsigned compmask =
 			(1 << (out->num_components + out->start_component)) - 1;
@@ -185,7 +186,7 @@
 setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_variant *v,
 		struct ir3_shader_linkage *l)
 {
-	const struct pipe_stream_output_info *strmout = &v->shader->stream_output;
+	const struct ir3_stream_output_info *strmout = &v->shader->stream_output;
 	struct fd6_streamout_state *tf = &state->tf;
 
 	memset(tf, 0, sizeof(*tf));
@@ -195,7 +196,7 @@
 	debug_assert(tf->prog_count < ARRAY_SIZE(tf->prog));
 
 	for (unsigned i = 0; i < strmout->num_outputs; i++) {
-		const struct pipe_stream_output *out = &strmout->output[i];
+		const struct ir3_stream_output *out = &strmout->output[i];
 		unsigned k = out->register_index;
 		unsigned idx;
 
@@ -402,7 +403,7 @@
 	OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) |
 			A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
 			A6XX_SP_VS_CTRL_REG0_MERGEDREGS |
-			A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) |
 			COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
 
 	struct ir3_shader_linkage l = {0};
@@ -524,7 +525,7 @@
 			0x1000000 |
 			A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
 			A6XX_SP_FS_CTRL_REG0_MERGEDREGS |
-			A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
+			A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) |
 			COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE));
 
 	OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.h	2019-03-31 23:16:37.000000000 +0000
@@ -30,7 +30,8 @@
 
 #include "pipe/p_context.h"
 #include "freedreno_context.h"
-#include "ir3_shader.h"
+
+#include "ir3/ir3_shader.h"
 #include "ir3_cache.h"
 
 struct fd6_streamout_state {
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c	2019-03-31 23:16:37.000000000 +0000
@@ -69,7 +69,9 @@
 			A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp);
 
 	so->gras_su_cntl =
-			A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0);
+			A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0) |
+			COND(cso->multisample, A6XX_GRAS_SU_CNTL_MSAA_ENABLE);
+
 #if 0
 	so->pc_raster_cntl =
 		A6XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,22 +27,30 @@
 
 #include "fd6_resource.h"
 
-/* indexed by cpp: */
+/* indexed by cpp, including msaa 2x and 4x: */
 static const struct {
 	unsigned pitchalign;
 	unsigned heightalign;
 } tile_alignment[] = {
 	[1]  = { 128, 32 },
-	[2]  = { 128, 16 },
-	[3]  = { 128, 16 },
+	[2]  = {  64, 32 },
+	[3]  = {  64, 32 },
 	[4]  = {  64, 16 },
+	[6]  = {  64, 16 },
 	[8]  = {  64, 16 },
 	[12] = {  64, 16 },
 	[16] = {  64, 16 },
+	[24] = {  64, 16 },
+	[32] = {  64, 16 },
+	[48] = {  64, 16 },
+	[64] = {  64, 16 },
+
+	/* special cases for r16: */
+	[0]  = { 128, 16 },
 };
 
 /* NOTE: good way to test this is:  (for example)
- *  piglit/bin/texelFetch fs sampler2D 100x100x1-100x300x1
+ *  piglit/bin/texelFetch fs sampler3D 100x100x8
  */
 static uint32_t
 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
@@ -51,41 +59,61 @@
 	struct fd_screen *screen = fd_screen(prsc->screen);
 	enum util_format_layout layout = util_format_description(format)->layout;
 	uint32_t pitchalign = screen->gmem_alignw;
-	uint32_t heightalign;
 	uint32_t level, size = 0;
-	uint32_t width = prsc->width0;
-	uint32_t height = prsc->height0;
 	uint32_t depth = prsc->depth0;
+	/* linear dimensions: */
+	uint32_t lwidth = prsc->width0;
+	uint32_t lheight = prsc->height0;
+	/* tile_mode dimensions: */
+	uint32_t twidth = util_next_power_of_two(lwidth);
+	uint32_t theight = util_next_power_of_two(lheight);
 	/* in layer_first layout, the level (slice) contains just one
 	 * layer (since in fact the layer contains the slices)
 	 */
 	uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
+	int ta = rsc->cpp;
+
+	/* The z16/r16 formats seem to not play by the normal tiling rules: */
+	if ((rsc->cpp == 2) && (util_format_get_nr_components(format) == 1))
+		ta = 0;
 
-	heightalign = tile_alignment[rsc->cpp].heightalign;
+	debug_assert(ta < ARRAY_SIZE(tile_alignment));
+	debug_assert(tile_alignment[ta].pitchalign);
 
 	for (level = 0; level <= prsc->last_level; level++) {
 		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
 		bool linear_level = fd_resource_level_linear(prsc, level);
+		uint32_t width, height;
+
+		/* tiled levels of 3D textures are rounded up to PoT dimensions: */
+		if ((prsc->target == PIPE_TEXTURE_3D) && rsc->tile_mode && !linear_level) {
+			width = twidth;
+			height = theight;
+		} else {
+			width = lwidth;
+			height = lheight;
+		}
 		uint32_t aligned_height = height;
 		uint32_t blocks;
 
 		if (rsc->tile_mode && !linear_level) {
-			pitchalign = tile_alignment[rsc->cpp].pitchalign;
-			aligned_height = align(aligned_height, heightalign);
+			pitchalign = tile_alignment[ta].pitchalign;
+			aligned_height = align(aligned_height,
+					tile_alignment[ta].heightalign);
 		} else {
 			pitchalign = 64;
-
-			/* The blits used for mem<->gmem work at a granularity of
-			 * 32x32, which can cause faults due to over-fetch on the
-			 * last level.  The simple solution is to over-allocate a
-			 * bit the last level to ensure any over-fetch is harmless.
-			 * The pitch is already sufficiently aligned, but height
-			 * may not be:
-			 */
-			if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
-				aligned_height = align(aligned_height, 32);
 		}
 
+		/* The blits used for mem<->gmem work at a granularity of
+		 * 32x32, which can cause faults due to over-fetch on the
+		 * last level.  The simple solution is to over-allocate a
+		 * bit the last level to ensure any over-fetch is harmless.
+		 * The pitch is already sufficiently aligned, but height
+		 * may not be:
+		 */
+		if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
+			aligned_height = align(aligned_height, 32);
+
 		if (layout == UTIL_FORMAT_LAYOUT_ASTC)
 			slice->pitch =
 				util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
@@ -96,34 +124,36 @@
 		blocks = util_format_get_nblocks(format, slice->pitch, aligned_height);
 
 		/* 1d array and 2d array textures must all have the same layer size
-		 * for each miplevel on a3xx. 3d textures can have different layer
+		 * for each miplevel on a6xx. 3d textures can have different layer
 		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
 		 * different than what this code does), so as soon as the layer size
 		 * range gets into range, we stop reducing it.
 		 */
-		if (prsc->target == PIPE_TEXTURE_3D && (
-					level == 1 ||
-					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
-			slice->size0 = align(blocks * rsc->cpp, alignment);
-		else if (level == 0 || rsc->layer_first || alignment == 1)
+		if (prsc->target == PIPE_TEXTURE_3D) {
+			if (level <= 1 || (rsc->slices[level - 1].size0 > 0xf000)) {
+				slice->size0 = align(blocks * rsc->cpp, alignment);
+			} else {
+				slice->size0 = rsc->slices[level - 1].size0;
+			}
+		} else {
 			slice->size0 = align(blocks * rsc->cpp, alignment);
-		else
-			slice->size0 = rsc->slices[level - 1].size0;
+		}
+
+		size += slice->size0 * depth * layers_in_level;
 
 #if 0
-		debug_printf("%s: %ux%ux%u@%u: %2u: stride=%4u, size=%7u, aligned_height=%3u\n",
+		debug_printf("%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u\n",
 				util_format_name(prsc->format),
-				prsc->width0, prsc->height0, prsc->depth0, rsc->cpp,
+				width, height, depth, rsc->cpp,
 				level, slice->pitch * rsc->cpp,
-				slice->size0 * depth * layers_in_level,
-				aligned_height);
+				slice->size0, size, aligned_height, blocks);
 #endif
 
-		size += slice->size0 * depth * layers_in_level;
-
-		width = u_minify(width, 1);
-		height = u_minify(height, 1);
 		depth = u_minify(depth, 1);
+		lwidth = u_minify(lwidth, 1);
+		lheight = u_minify(lheight, 1);
+		twidth = u_minify(twidth, 1);
+		theight = u_minify(theight, 1);
 	}
 
 	return size;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,11 +29,30 @@
 #include "util/u_format.h"
 
 #include "fd6_screen.h"
+#include "fd6_blitter.h"
 #include "fd6_context.h"
 #include "fd6_format.h"
 #include "fd6_resource.h"
 
-#include "ir3_compiler.h"
+#include "ir3/ir3_compiler.h"
+
+static bool
+valid_sample_count(unsigned sample_count)
+{
+	switch (sample_count) {
+	case 0:
+	case 1:
+	case 2:
+	case 4:
+// TODO seems 8x works, but increases lrz width or height.. but the
+// blob I have doesn't seem to expose any egl configs w/ 8x, so
+// just hide it for now and revisit later.
+//	case 8:
+		return true;
+	default:
+		return false;
+	}
+}
 
 static boolean
 fd6_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -46,7 +65,7 @@
 	unsigned retval = 0;
 
 	if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
-			(sample_count > 1)) { /* TODO add MSAA */
+			!valid_sample_count(sample_count)) {
 		DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
 				util_format_name(format), target, sample_count, usage);
 		return FALSE;
@@ -60,11 +79,11 @@
 		retval |= PIPE_BIND_VERTEX_BUFFER;
 	}
 
-	if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+	if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) &&
 			(target == PIPE_BUFFER ||
 			 util_format_get_blocksize(format) != 12) &&
 			(fd6_pipe2tex(format) != (enum a6xx_tex_fmt)~0)) {
-		retval |= PIPE_BIND_SAMPLER_VIEW;
+		retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE);
 	}
 
 	if ((usage & (PIPE_BIND_RENDER_TARGET |
@@ -116,4 +135,5 @@
 	pscreen->is_format_supported = fd6_screen_is_format_supported;
 
 	screen->setup_slices = fd6_setup_slices;
+	screen->tile_mode = fd6_tile_mode;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -245,7 +245,8 @@
 
 	so->texconst0 =
 		A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) |
-		fd6_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
+		A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
+		fd6_tex_swiz(prsc, cso->swizzle_r, cso->swizzle_g,
 				cso->swizzle_b, cso->swizzle_a);
 
 	/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
@@ -256,8 +257,12 @@
 	 * Note that gallium expects stencil sampler to return (s,s,s,s)
 	 * which isn't quite true.  To make that happen we'd have to massage
 	 * the swizzle.  But in practice only the .x component is used.
+	 *
+	 * Skip this in the tile case because tiled formats are not swapped
+	 * and we have already applied the inverse swap in fd6_tex_swiz()
+	 * to componsate for that.
 	 */
-	if (format == PIPE_FORMAT_X24S8_UINT) {
+	if ((format == PIPE_FORMAT_X24S8_UINT) && !rsc->tile_mode) {
 		so->texconst0 |= A6XX_TEX_CONST_0_SWAP(XYZW);
 	}
 
@@ -280,12 +285,17 @@
 		so->offset = cso->u.buf.offset;
 	} else {
 		unsigned miplevels;
+		enum a6xx_tile_mode tile_mode = TILE6_LINEAR;
 
 		lvl = fd_sampler_first_level(cso);
 		miplevels = fd_sampler_last_level(cso) - lvl;
 		layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;
 
-		so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels);
+		if (!fd_resource_level_linear(prsc, lvl))
+			tile_mode = fd_resource(prsc)->tile_mode;
+
+		so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels) |
+			A6XX_TEX_CONST_0_TILE_MODE(tile_mode);
 		so->texconst1 =
 			A6XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
 			A6XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
@@ -324,12 +334,12 @@
 		break;
 	case PIPE_TEXTURE_3D:
 		so->texconst3 =
+			A6XX_TEX_CONST_3_MIN_LAYERSZ(rsc->slices[prsc->last_level].size0) |
 			A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0);
 		so->texconst5 =
 			A6XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
 		break;
 	default:
-		so->texconst3 = 0x00000000;
 		break;
 	}
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/adreno_common.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/adreno_common.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/adreno_common.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/adreno_common.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,535 +0,0 @@
-#ifndef ADRENO_COMMON_XML
-#define ADRENO_COMMON_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum chip {
-	A2XX = 0,
-	A3XX = 0,
-	A4XX = 0,
-	A5XX = 0,
-	A6XX = 0,
-};
-
-enum adreno_pa_su_sc_draw {
-	PC_DRAW_POINTS = 0,
-	PC_DRAW_LINES = 1,
-	PC_DRAW_TRIANGLES = 2,
-};
-
-enum adreno_compare_func {
-	FUNC_NEVER = 0,
-	FUNC_LESS = 1,
-	FUNC_EQUAL = 2,
-	FUNC_LEQUAL = 3,
-	FUNC_GREATER = 4,
-	FUNC_NOTEQUAL = 5,
-	FUNC_GEQUAL = 6,
-	FUNC_ALWAYS = 7,
-};
-
-enum adreno_stencil_op {
-	STENCIL_KEEP = 0,
-	STENCIL_ZERO = 1,
-	STENCIL_REPLACE = 2,
-	STENCIL_INCR_CLAMP = 3,
-	STENCIL_DECR_CLAMP = 4,
-	STENCIL_INVERT = 5,
-	STENCIL_INCR_WRAP = 6,
-	STENCIL_DECR_WRAP = 7,
-};
-
-enum adreno_rb_blend_factor {
-	FACTOR_ZERO = 0,
-	FACTOR_ONE = 1,
-	FACTOR_SRC_COLOR = 4,
-	FACTOR_ONE_MINUS_SRC_COLOR = 5,
-	FACTOR_SRC_ALPHA = 6,
-	FACTOR_ONE_MINUS_SRC_ALPHA = 7,
-	FACTOR_DST_COLOR = 8,
-	FACTOR_ONE_MINUS_DST_COLOR = 9,
-	FACTOR_DST_ALPHA = 10,
-	FACTOR_ONE_MINUS_DST_ALPHA = 11,
-	FACTOR_CONSTANT_COLOR = 12,
-	FACTOR_ONE_MINUS_CONSTANT_COLOR = 13,
-	FACTOR_CONSTANT_ALPHA = 14,
-	FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
-	FACTOR_SRC_ALPHA_SATURATE = 16,
-	FACTOR_SRC1_COLOR = 20,
-	FACTOR_ONE_MINUS_SRC1_COLOR = 21,
-	FACTOR_SRC1_ALPHA = 22,
-	FACTOR_ONE_MINUS_SRC1_ALPHA = 23,
-};
-
-enum adreno_rb_surface_endian {
-	ENDIAN_NONE = 0,
-	ENDIAN_8IN16 = 1,
-	ENDIAN_8IN32 = 2,
-	ENDIAN_16IN32 = 3,
-	ENDIAN_8IN64 = 4,
-	ENDIAN_8IN128 = 5,
-};
-
-enum adreno_rb_dither_mode {
-	DITHER_DISABLE = 0,
-	DITHER_ALWAYS = 1,
-	DITHER_IF_ALPHA_OFF = 2,
-};
-
-enum adreno_rb_depth_format {
-	DEPTHX_16 = 0,
-	DEPTHX_24_8 = 1,
-	DEPTHX_32 = 2,
-};
-
-enum adreno_rb_copy_control_mode {
-	RB_COPY_RESOLVE = 1,
-	RB_COPY_CLEAR = 2,
-	RB_COPY_DEPTH_STENCIL = 5,
-};
-
-enum a3xx_rop_code {
-	ROP_CLEAR = 0,
-	ROP_NOR = 1,
-	ROP_AND_INVERTED = 2,
-	ROP_COPY_INVERTED = 3,
-	ROP_AND_REVERSE = 4,
-	ROP_INVERT = 5,
-	ROP_XOR = 6,
-	ROP_NAND = 7,
-	ROP_AND = 8,
-	ROP_EQUIV = 9,
-	ROP_NOOP = 10,
-	ROP_OR_INVERTED = 11,
-	ROP_COPY = 12,
-	ROP_OR_REVERSE = 13,
-	ROP_OR = 14,
-	ROP_SET = 15,
-};
-
-enum a3xx_render_mode {
-	RB_RENDERING_PASS = 0,
-	RB_TILING_PASS = 1,
-	RB_RESOLVE_PASS = 2,
-	RB_COMPUTE_PASS = 3,
-};
-
-enum a3xx_msaa_samples {
-	MSAA_ONE = 0,
-	MSAA_TWO = 1,
-	MSAA_FOUR = 2,
-};
-
-enum a3xx_threadmode {
-	MULTI = 0,
-	SINGLE = 1,
-};
-
-enum a3xx_instrbuffermode {
-	CACHE = 0,
-	BUFFER = 1,
-};
-
-enum a3xx_threadsize {
-	TWO_QUADS = 0,
-	FOUR_QUADS = 1,
-};
-
-enum a3xx_color_swap {
-	WZYX = 0,
-	WXYZ = 1,
-	ZYXW = 2,
-	XYZW = 3,
-};
-
-enum a3xx_rb_blend_opcode {
-	BLEND_DST_PLUS_SRC = 0,
-	BLEND_SRC_MINUS_DST = 1,
-	BLEND_DST_MINUS_SRC = 2,
-	BLEND_MIN_DST_SRC = 3,
-	BLEND_MAX_DST_SRC = 4,
-};
-
-enum a4xx_tess_spacing {
-	EQUAL_SPACING = 0,
-	ODD_SPACING = 2,
-	EVEN_SPACING = 3,
-};
-
-#define REG_AXXX_CP_RB_BASE					0x000001c0
-
-#define REG_AXXX_CP_RB_CNTL					0x000001c1
-#define AXXX_CP_RB_CNTL_BUFSZ__MASK				0x0000003f
-#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT				0
-static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
-{
-	return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
-}
-#define AXXX_CP_RB_CNTL_BLKSZ__MASK				0x00003f00
-#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT				8
-static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
-{
-	return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
-}
-#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK				0x00030000
-#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT				16
-static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
-{
-	return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
-}
-#define AXXX_CP_RB_CNTL_POLL_EN					0x00100000
-#define AXXX_CP_RB_CNTL_NO_UPDATE				0x08000000
-#define AXXX_CP_RB_CNTL_RPTR_WR_EN				0x80000000
-
-#define REG_AXXX_CP_RB_RPTR_ADDR				0x000001c3
-#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK				0x00000003
-#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT			0
-static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
-{
-	return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
-}
-#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK				0xfffffffc
-#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT			2
-static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
-}
-
-#define REG_AXXX_CP_RB_RPTR					0x000001c4
-
-#define REG_AXXX_CP_RB_WPTR					0x000001c5
-
-#define REG_AXXX_CP_RB_WPTR_DELAY				0x000001c6
-
-#define REG_AXXX_CP_RB_RPTR_WR					0x000001c7
-
-#define REG_AXXX_CP_RB_WPTR_BASE				0x000001c8
-
-#define REG_AXXX_CP_QUEUE_THRESHOLDS				0x000001d5
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK		0x0000000f
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT		0
-static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
-{
-	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
-}
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK		0x00000f00
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT		8
-static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
-{
-	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
-}
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK		0x000f0000
-#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT		16
-static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
-{
-	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
-}
-
-#define REG_AXXX_CP_MEQ_THRESHOLDS				0x000001d6
-#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK			0x001f0000
-#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT			16
-static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_MEQ_END(uint32_t val)
-{
-	return ((val) << AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK;
-}
-#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK			0x1f000000
-#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT			24
-static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_ROQ_END(uint32_t val)
-{
-	return ((val) << AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK;
-}
-
-#define REG_AXXX_CP_CSQ_AVAIL					0x000001d7
-#define AXXX_CP_CSQ_AVAIL_RING__MASK				0x0000007f
-#define AXXX_CP_CSQ_AVAIL_RING__SHIFT				0
-static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
-}
-#define AXXX_CP_CSQ_AVAIL_IB1__MASK				0x00007f00
-#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT				8
-static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
-}
-#define AXXX_CP_CSQ_AVAIL_IB2__MASK				0x007f0000
-#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT				16
-static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
-}
-
-#define REG_AXXX_CP_STQ_AVAIL					0x000001d8
-#define AXXX_CP_STQ_AVAIL_ST__MASK				0x0000007f
-#define AXXX_CP_STQ_AVAIL_ST__SHIFT				0
-static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
-{
-	return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
-}
-
-#define REG_AXXX_CP_MEQ_AVAIL					0x000001d9
-#define AXXX_CP_MEQ_AVAIL_MEQ__MASK				0x0000001f
-#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT				0
-static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
-{
-	return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
-}
-
-#define REG_AXXX_SCRATCH_UMSK					0x000001dc
-#define AXXX_SCRATCH_UMSK_UMSK__MASK				0x000000ff
-#define AXXX_SCRATCH_UMSK_UMSK__SHIFT				0
-static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
-{
-	return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
-}
-#define AXXX_SCRATCH_UMSK_SWAP__MASK				0x00030000
-#define AXXX_SCRATCH_UMSK_SWAP__SHIFT				16
-static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
-{
-	return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
-}
-
-#define REG_AXXX_SCRATCH_ADDR					0x000001dd
-
-#define REG_AXXX_CP_ME_RDADDR					0x000001ea
-
-#define REG_AXXX_CP_STATE_DEBUG_INDEX				0x000001ec
-
-#define REG_AXXX_CP_STATE_DEBUG_DATA				0x000001ed
-
-#define REG_AXXX_CP_INT_CNTL					0x000001f2
-#define AXXX_CP_INT_CNTL_SW_INT_MASK				0x00080000
-#define AXXX_CP_INT_CNTL_T0_PACKET_IN_IB_MASK			0x00800000
-#define AXXX_CP_INT_CNTL_OPCODE_ERROR_MASK			0x01000000
-#define AXXX_CP_INT_CNTL_PROTECTED_MODE_ERROR_MASK		0x02000000
-#define AXXX_CP_INT_CNTL_RESERVED_BIT_ERROR_MASK		0x04000000
-#define AXXX_CP_INT_CNTL_IB_ERROR_MASK				0x08000000
-#define AXXX_CP_INT_CNTL_IB2_INT_MASK				0x20000000
-#define AXXX_CP_INT_CNTL_IB1_INT_MASK				0x40000000
-#define AXXX_CP_INT_CNTL_RB_INT_MASK				0x80000000
-
-#define REG_AXXX_CP_INT_STATUS					0x000001f3
-
-#define REG_AXXX_CP_INT_ACK					0x000001f4
-
-#define REG_AXXX_CP_ME_CNTL					0x000001f6
-#define AXXX_CP_ME_CNTL_BUSY					0x20000000
-#define AXXX_CP_ME_CNTL_HALT					0x10000000
-
-#define REG_AXXX_CP_ME_STATUS					0x000001f7
-
-#define REG_AXXX_CP_ME_RAM_WADDR				0x000001f8
-
-#define REG_AXXX_CP_ME_RAM_RADDR				0x000001f9
-
-#define REG_AXXX_CP_ME_RAM_DATA					0x000001fa
-
-#define REG_AXXX_CP_DEBUG					0x000001fc
-#define AXXX_CP_DEBUG_PREDICATE_DISABLE				0x00800000
-#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE			0x01000000
-#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE			0x02000000
-#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS			0x04000000
-#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE			0x08000000
-#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE			0x10000000
-#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL			0x40000000
-#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE			0x80000000
-
-#define REG_AXXX_CP_CSQ_RB_STAT					0x000001fd
-#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK				0x0000007f
-#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT				0
-static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
-}
-#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK				0x007f0000
-#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT				16
-static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
-}
-
-#define REG_AXXX_CP_CSQ_IB1_STAT				0x000001fe
-#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK				0x0000007f
-#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT			0
-static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
-}
-#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK				0x007f0000
-#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT			16
-static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
-}
-
-#define REG_AXXX_CP_CSQ_IB2_STAT				0x000001ff
-#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK				0x0000007f
-#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT			0
-static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
-}
-#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK				0x007f0000
-#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT			16
-static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
-{
-	return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
-}
-
-#define REG_AXXX_CP_NON_PREFETCH_CNTRS				0x00000440
-
-#define REG_AXXX_CP_STQ_ST_STAT					0x00000443
-
-#define REG_AXXX_CP_ST_BASE					0x0000044d
-
-#define REG_AXXX_CP_ST_BUFSZ					0x0000044e
-
-#define REG_AXXX_CP_MEQ_STAT					0x0000044f
-
-#define REG_AXXX_CP_MIU_TAG_STAT				0x00000452
-
-#define REG_AXXX_CP_BIN_MASK_LO					0x00000454
-
-#define REG_AXXX_CP_BIN_MASK_HI					0x00000455
-
-#define REG_AXXX_CP_BIN_SELECT_LO				0x00000456
-
-#define REG_AXXX_CP_BIN_SELECT_HI				0x00000457
-
-#define REG_AXXX_CP_IB1_BASE					0x00000458
-
-#define REG_AXXX_CP_IB1_BUFSZ					0x00000459
-
-#define REG_AXXX_CP_IB2_BASE					0x0000045a
-
-#define REG_AXXX_CP_IB2_BUFSZ					0x0000045b
-
-#define REG_AXXX_CP_STAT					0x0000047f
-#define AXXX_CP_STAT_CP_BUSY					0x80000000
-#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY				0x40000000
-#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY				0x20000000
-#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY				0x10000000
-#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY				0x08000000
-#define AXXX_CP_STAT_ME_BUSY					0x04000000
-#define AXXX_CP_STAT_MIU_WR_C_BUSY				0x02000000
-#define AXXX_CP_STAT_CP_3D_BUSY					0x00800000
-#define AXXX_CP_STAT_CP_NRT_BUSY				0x00400000
-#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY				0x00200000
-#define AXXX_CP_STAT_RCIU_ME_BUSY				0x00100000
-#define AXXX_CP_STAT_RCIU_PFP_BUSY				0x00080000
-#define AXXX_CP_STAT_MEQ_RING_BUSY				0x00040000
-#define AXXX_CP_STAT_PFP_BUSY					0x00020000
-#define AXXX_CP_STAT_ST_QUEUE_BUSY				0x00010000
-#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY			0x00002000
-#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY			0x00001000
-#define AXXX_CP_STAT_RING_QUEUE_BUSY				0x00000800
-#define AXXX_CP_STAT_CSF_BUSY					0x00000400
-#define AXXX_CP_STAT_CSF_ST_BUSY				0x00000200
-#define AXXX_CP_STAT_EVENT_BUSY					0x00000100
-#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY				0x00000080
-#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY				0x00000040
-#define AXXX_CP_STAT_CSF_RING_BUSY				0x00000020
-#define AXXX_CP_STAT_RCIU_BUSY					0x00000010
-#define AXXX_CP_STAT_RBIU_BUSY					0x00000008
-#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY				0x00000004
-#define AXXX_CP_STAT_MIU_RD_REQ_BUSY				0x00000002
-#define AXXX_CP_STAT_MIU_WR_BUSY				0x00000001
-
-#define REG_AXXX_CP_SCRATCH_REG0				0x00000578
-
-#define REG_AXXX_CP_SCRATCH_REG1				0x00000579
-
-#define REG_AXXX_CP_SCRATCH_REG2				0x0000057a
-
-#define REG_AXXX_CP_SCRATCH_REG3				0x0000057b
-
-#define REG_AXXX_CP_SCRATCH_REG4				0x0000057c
-
-#define REG_AXXX_CP_SCRATCH_REG5				0x0000057d
-
-#define REG_AXXX_CP_SCRATCH_REG6				0x0000057e
-
-#define REG_AXXX_CP_SCRATCH_REG7				0x0000057f
-
-#define REG_AXXX_CP_ME_VS_EVENT_SRC				0x00000600
-
-#define REG_AXXX_CP_ME_VS_EVENT_ADDR				0x00000601
-
-#define REG_AXXX_CP_ME_VS_EVENT_DATA				0x00000602
-
-#define REG_AXXX_CP_ME_VS_EVENT_ADDR_SWM			0x00000603
-
-#define REG_AXXX_CP_ME_VS_EVENT_DATA_SWM			0x00000604
-
-#define REG_AXXX_CP_ME_PS_EVENT_SRC				0x00000605
-
-#define REG_AXXX_CP_ME_PS_EVENT_ADDR				0x00000606
-
-#define REG_AXXX_CP_ME_PS_EVENT_DATA				0x00000607
-
-#define REG_AXXX_CP_ME_PS_EVENT_ADDR_SWM			0x00000608
-
-#define REG_AXXX_CP_ME_PS_EVENT_DATA_SWM			0x00000609
-
-#define REG_AXXX_CP_ME_CF_EVENT_SRC				0x0000060a
-
-#define REG_AXXX_CP_ME_CF_EVENT_ADDR				0x0000060b
-
-#define REG_AXXX_CP_ME_CF_EVENT_DATA				0x0000060c
-
-#define REG_AXXX_CP_ME_NRT_ADDR					0x0000060d
-
-#define REG_AXXX_CP_ME_NRT_DATA					0x0000060e
-
-#define REG_AXXX_CP_ME_VS_FETCH_DONE_SRC			0x00000612
-
-#define REG_AXXX_CP_ME_VS_FETCH_DONE_ADDR			0x00000613
-
-#define REG_AXXX_CP_ME_VS_FETCH_DONE_DATA			0x00000614
-
-
-#endif /* ADRENO_COMMON_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/adreno_pm4.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/adreno_pm4.xml.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/adreno_pm4.xml.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/adreno_pm4.xml.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,1569 +0,0 @@
-#ifndef ADRENO_PM4_XML
-#define ADRENO_PM4_XML
-
-/* Autogenerated file, DO NOT EDIT manually!
-
-This file was generated by the rules-ng-ng headergen tool in this git repository:
-http://github.com/freedreno/envytools/
-git clone https://github.com/freedreno/envytools.git
-
-The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/envytools/rnndb/adreno.xml               (    501 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml          (  37936 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml (  14201 bytes, from 2018-10-08 11:43:51)
-- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml    (  42864 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml          (  83840 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml          ( 112086 bytes, from 2018-07-03 19:37:13)
-- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml          ( 147240 bytes, from 2018-10-08 21:57:22)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml          ( 140514 bytes, from 2018-10-08 21:57:35)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml      (  10431 bytes, from 2018-09-14 13:03:07)
-- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2018-07-03 19:37:13)
-
-Copyright (C) 2013-2018 by the following authors:
-- Rob Clark <robdclark@gmail.com> (robclark)
-- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-
-enum vgt_event_type {
-	VS_DEALLOC = 0,
-	PS_DEALLOC = 1,
-	VS_DONE_TS = 2,
-	PS_DONE_TS = 3,
-	CACHE_FLUSH_TS = 4,
-	CONTEXT_DONE = 5,
-	CACHE_FLUSH = 6,
-	HLSQ_FLUSH = 7,
-	VIZQUERY_START = 7,
-	VIZQUERY_END = 8,
-	SC_WAIT_WC = 9,
-	RST_PIX_CNT = 13,
-	RST_VTX_CNT = 14,
-	TILE_FLUSH = 15,
-	STAT_EVENT = 16,
-	CACHE_FLUSH_AND_INV_TS_EVENT = 20,
-	ZPASS_DONE = 21,
-	CACHE_FLUSH_AND_INV_EVENT = 22,
-	PERFCOUNTER_START = 23,
-	PERFCOUNTER_STOP = 24,
-	VS_FETCH_DONE = 27,
-	FACENESS_FLUSH = 28,
-	FLUSH_SO_0 = 17,
-	FLUSH_SO_1 = 18,
-	FLUSH_SO_2 = 19,
-	FLUSH_SO_3 = 20,
-	PC_CCU_INVALIDATE_DEPTH = 24,
-	PC_CCU_INVALIDATE_COLOR = 25,
-	UNK_1C = 28,
-	UNK_1D = 29,
-	BLIT = 30,
-	UNK_25 = 37,
-	LRZ_FLUSH = 38,
-	UNK_2C = 44,
-	UNK_2D = 45,
-};
-
-enum pc_di_primtype {
-	DI_PT_NONE = 0,
-	DI_PT_POINTLIST_PSIZE = 1,
-	DI_PT_LINELIST = 2,
-	DI_PT_LINESTRIP = 3,
-	DI_PT_TRILIST = 4,
-	DI_PT_TRIFAN = 5,
-	DI_PT_TRISTRIP = 6,
-	DI_PT_LINELOOP = 7,
-	DI_PT_RECTLIST = 8,
-	DI_PT_POINTLIST = 9,
-	DI_PT_LINE_ADJ = 10,
-	DI_PT_LINESTRIP_ADJ = 11,
-	DI_PT_TRI_ADJ = 12,
-	DI_PT_TRISTRIP_ADJ = 13,
-};
-
-enum pc_di_src_sel {
-	DI_SRC_SEL_DMA = 0,
-	DI_SRC_SEL_IMMEDIATE = 1,
-	DI_SRC_SEL_AUTO_INDEX = 2,
-	DI_SRC_SEL_RESERVED = 3,
-};
-
-enum pc_di_face_cull_sel {
-	DI_FACE_CULL_NONE = 0,
-	DI_FACE_CULL_FETCH = 1,
-	DI_FACE_BACKFACE_CULL = 2,
-	DI_FACE_FRONTFACE_CULL = 3,
-};
-
-enum pc_di_index_size {
-	INDEX_SIZE_IGN = 0,
-	INDEX_SIZE_16_BIT = 0,
-	INDEX_SIZE_32_BIT = 1,
-	INDEX_SIZE_8_BIT = 2,
-	INDEX_SIZE_INVALID = 0,
-};
-
-enum pc_di_vis_cull_mode {
-	IGNORE_VISIBILITY = 0,
-	USE_VISIBILITY = 1,
-};
-
-enum adreno_pm4_packet_type {
-	CP_TYPE0_PKT = 0,
-	CP_TYPE1_PKT = 0x40000000,
-	CP_TYPE2_PKT = 0x80000000,
-	CP_TYPE3_PKT = 0xc0000000,
-	CP_TYPE4_PKT = 0x40000000,
-	CP_TYPE7_PKT = 0x70000000,
-};
-
-enum adreno_pm4_type3_packets {
-	CP_ME_INIT = 72,
-	CP_NOP = 16,
-	CP_PREEMPT_ENABLE = 28,
-	CP_PREEMPT_TOKEN = 30,
-	CP_INDIRECT_BUFFER = 63,
-	CP_INDIRECT_BUFFER_PFD = 55,
-	CP_WAIT_FOR_IDLE = 38,
-	CP_WAIT_REG_MEM = 60,
-	CP_WAIT_REG_EQ = 82,
-	CP_WAIT_REG_GTE = 83,
-	CP_WAIT_UNTIL_READ = 92,
-	CP_WAIT_IB_PFD_COMPLETE = 93,
-	CP_REG_RMW = 33,
-	CP_SET_BIN_DATA = 47,
-	CP_SET_BIN_DATA5 = 47,
-	CP_REG_TO_MEM = 62,
-	CP_MEM_WRITE = 61,
-	CP_MEM_WRITE_CNTR = 79,
-	CP_COND_EXEC = 68,
-	CP_COND_WRITE = 69,
-	CP_COND_WRITE5 = 69,
-	CP_EVENT_WRITE = 70,
-	CP_EVENT_WRITE_SHD = 88,
-	CP_EVENT_WRITE_CFL = 89,
-	CP_EVENT_WRITE_ZPD = 91,
-	CP_RUN_OPENCL = 49,
-	CP_DRAW_INDX = 34,
-	CP_DRAW_INDX_2 = 54,
-	CP_DRAW_INDX_BIN = 52,
-	CP_DRAW_INDX_2_BIN = 53,
-	CP_VIZ_QUERY = 35,
-	CP_SET_STATE = 37,
-	CP_SET_CONSTANT = 45,
-	CP_IM_LOAD = 39,
-	CP_IM_LOAD_IMMEDIATE = 43,
-	CP_LOAD_CONSTANT_CONTEXT = 46,
-	CP_INVALIDATE_STATE = 59,
-	CP_SET_SHADER_BASES = 74,
-	CP_SET_BIN_MASK = 80,
-	CP_SET_BIN_SELECT = 81,
-	CP_CONTEXT_UPDATE = 94,
-	CP_INTERRUPT = 64,
-	CP_IM_STORE = 44,
-	CP_SET_DRAW_INIT_FLAGS = 75,
-	CP_SET_PROTECTED_MODE = 95,
-	CP_BOOTSTRAP_UCODE = 111,
-	CP_LOAD_STATE = 48,
-	CP_LOAD_STATE4 = 48,
-	CP_COND_INDIRECT_BUFFER_PFE = 58,
-	CP_COND_INDIRECT_BUFFER_PFD = 50,
-	CP_INDIRECT_BUFFER_PFE = 63,
-	CP_SET_BIN = 76,
-	CP_TEST_TWO_MEMS = 113,
-	CP_REG_WR_NO_CTXT = 120,
-	CP_RECORD_PFP_TIMESTAMP = 17,
-	CP_SET_SECURE_MODE = 102,
-	CP_WAIT_FOR_ME = 19,
-	CP_SET_DRAW_STATE = 67,
-	CP_DRAW_INDX_OFFSET = 56,
-	CP_DRAW_INDIRECT = 40,
-	CP_DRAW_INDX_INDIRECT = 41,
-	CP_DRAW_AUTO = 36,
-	CP_UNKNOWN_19 = 25,
-	CP_UNKNOWN_1A = 26,
-	CP_UNKNOWN_4E = 78,
-	CP_WIDE_REG_WRITE = 116,
-	CP_SCRATCH_TO_REG = 77,
-	CP_REG_TO_SCRATCH = 74,
-	CP_WAIT_MEM_WRITES = 18,
-	CP_COND_REG_EXEC = 71,
-	CP_MEM_TO_REG = 66,
-	CP_EXEC_CS_INDIRECT = 65,
-	CP_EXEC_CS = 51,
-	CP_PERFCOUNTER_ACTION = 80,
-	CP_SMMU_TABLE_UPDATE = 83,
-	CP_SET_MARKER = 101,
-	CP_SET_PSEUDO_REG = 86,
-	CP_CONTEXT_REG_BUNCH = 92,
-	CP_YIELD_ENABLE = 28,
-	CP_SKIP_IB2_ENABLE_GLOBAL = 29,
-	CP_SKIP_IB2_ENABLE_LOCAL = 35,
-	CP_SET_SUBDRAW_SIZE = 53,
-	CP_SET_VISIBILITY_OVERRIDE = 100,
-	CP_PREEMPT_ENABLE_GLOBAL = 105,
-	CP_PREEMPT_ENABLE_LOCAL = 106,
-	CP_CONTEXT_SWITCH_YIELD = 107,
-	CP_SET_RENDER_MODE = 108,
-	CP_COMPUTE_CHECKPOINT = 110,
-	CP_MEM_TO_MEM = 115,
-	CP_BLIT = 44,
-	CP_REG_TEST = 57,
-	CP_SET_MODE = 99,
-	CP_LOAD_STATE6_GEOM = 50,
-	CP_LOAD_STATE6_FRAG = 52,
-	IN_IB_PREFETCH_END = 23,
-	IN_SUBBLK_PREFETCH = 31,
-	IN_INSTR_PREFETCH = 32,
-	IN_INSTR_MATCH = 71,
-	IN_CONST_PREFETCH = 73,
-	IN_INCR_UPDT_STATE = 85,
-	IN_INCR_UPDT_CONST = 86,
-	IN_INCR_UPDT_INSTR = 87,
-	PKT4 = 4,
-	CP_UNK_A6XX_14 = 20,
-	CP_UNK_A6XX_36 = 54,
-	CP_UNK_A6XX_55 = 85,
-	CP_REG_WRITE = 109,
-};
-
-enum adreno_state_block {
-	SB_VERT_TEX = 0,
-	SB_VERT_MIPADDR = 1,
-	SB_FRAG_TEX = 2,
-	SB_FRAG_MIPADDR = 3,
-	SB_VERT_SHADER = 4,
-	SB_GEOM_SHADER = 5,
-	SB_FRAG_SHADER = 6,
-	SB_COMPUTE_SHADER = 7,
-};
-
-enum adreno_state_type {
-	ST_SHADER = 0,
-	ST_CONSTANTS = 1,
-};
-
-enum adreno_state_src {
-	SS_DIRECT = 0,
-	SS_INVALID_ALL_IC = 2,
-	SS_INVALID_PART_IC = 3,
-	SS_INDIRECT = 4,
-	SS_INDIRECT_TCM = 5,
-	SS_INDIRECT_STM = 6,
-};
-
-enum a4xx_state_block {
-	SB4_VS_TEX = 0,
-	SB4_HS_TEX = 1,
-	SB4_DS_TEX = 2,
-	SB4_GS_TEX = 3,
-	SB4_FS_TEX = 4,
-	SB4_CS_TEX = 5,
-	SB4_VS_SHADER = 8,
-	SB4_HS_SHADER = 9,
-	SB4_DS_SHADER = 10,
-	SB4_GS_SHADER = 11,
-	SB4_FS_SHADER = 12,
-	SB4_CS_SHADER = 13,
-	SB4_SSBO = 14,
-	SB4_CS_SSBO = 15,
-};
-
-enum a4xx_state_type {
-	ST4_SHADER = 0,
-	ST4_CONSTANTS = 1,
-};
-
-enum a4xx_state_src {
-	SS4_DIRECT = 0,
-	SS4_INDIRECT = 2,
-};
-
-enum a6xx_state_block {
-	SB6_VS_TEX = 0,
-	SB6_HS_TEX = 1,
-	SB6_DS_TEX = 2,
-	SB6_GS_TEX = 3,
-	SB6_FS_TEX = 4,
-	SB6_CS_TEX = 5,
-	SB6_VS_SHADER = 8,
-	SB6_HS_SHADER = 9,
-	SB6_DS_SHADER = 10,
-	SB6_GS_SHADER = 11,
-	SB6_FS_SHADER = 12,
-	SB6_CS_SHADER = 13,
-	SB6_SSBO = 14,
-	SB6_CS_SSBO = 15,
-};
-
-enum a6xx_state_type {
-	ST6_SHADER = 0,
-	ST6_CONSTANTS = 1,
-};
-
-enum a6xx_state_src {
-	SS6_DIRECT = 0,
-	SS6_INDIRECT = 2,
-};
-
-enum a4xx_index_size {
-	INDEX4_SIZE_8_BIT = 0,
-	INDEX4_SIZE_16_BIT = 1,
-	INDEX4_SIZE_32_BIT = 2,
-};
-
-enum cp_cond_function {
-	WRITE_ALWAYS = 0,
-	WRITE_LT = 1,
-	WRITE_LE = 2,
-	WRITE_EQ = 3,
-	WRITE_NE = 4,
-	WRITE_GE = 5,
-	WRITE_GT = 6,
-};
-
-enum render_mode_cmd {
-	BYPASS = 1,
-	BINNING = 2,
-	GMEM = 3,
-	BLIT2D = 5,
-	BLIT2DSCALE = 7,
-	END2D = 8,
-};
-
-enum cp_blit_cmd {
-	BLIT_OP_FILL = 0,
-	BLIT_OP_COPY = 1,
-	BLIT_OP_SCALE = 3,
-};
-
-enum a6xx_render_mode {
-	RM6_BYPASS = 1,
-	RM6_BINNING = 2,
-	RM6_GMEM = 4,
-	RM6_BLIT2D = 5,
-	RM6_RESOLVE = 6,
-	RM6_BLIT2DSCALE = 12,
-};
-
-enum pseudo_reg {
-	SMMU_INFO = 0,
-	NON_SECURE_SAVE_ADDR = 1,
-	SECURE_SAVE_ADDR = 2,
-	NON_PRIV_SAVE_ADDR = 3,
-	COUNTER = 4,
-};
-
-#define REG_CP_LOAD_STATE_0					0x00000000
-#define CP_LOAD_STATE_0_DST_OFF__MASK				0x0000ffff
-#define CP_LOAD_STATE_0_DST_OFF__SHIFT				0
-static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK;
-}
-#define CP_LOAD_STATE_0_STATE_SRC__MASK				0x00070000
-#define CP_LOAD_STATE_0_STATE_SRC__SHIFT			16
-static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val)
-{
-	return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK;
-}
-#define CP_LOAD_STATE_0_STATE_BLOCK__MASK			0x00380000
-#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT			19
-static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
-{
-	return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
-}
-#define CP_LOAD_STATE_0_NUM_UNIT__MASK				0xffc00000
-#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT				22
-static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK;
-}
-
-#define REG_CP_LOAD_STATE_1					0x00000001
-#define CP_LOAD_STATE_1_STATE_TYPE__MASK			0x00000003
-#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT			0
-static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val)
-{
-	return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK;
-}
-#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK			0xfffffffc
-#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT			2
-static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK;
-}
-
-#define REG_CP_LOAD_STATE4_0					0x00000000
-#define CP_LOAD_STATE4_0_DST_OFF__MASK				0x00003fff
-#define CP_LOAD_STATE4_0_DST_OFF__SHIFT				0
-static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK;
-}
-#define CP_LOAD_STATE4_0_STATE_SRC__MASK			0x00030000
-#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT			16
-static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val)
-{
-	return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK;
-}
-#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK			0x003c0000
-#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT			18
-static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val)
-{
-	return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK;
-}
-#define CP_LOAD_STATE4_0_NUM_UNIT__MASK				0xffc00000
-#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT			22
-static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK;
-}
-
-#define REG_CP_LOAD_STATE4_1					0x00000001
-#define CP_LOAD_STATE4_1_STATE_TYPE__MASK			0x00000003
-#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT			0
-static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val)
-{
-	return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK;
-}
-#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK			0xfffffffc
-#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT			2
-static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK;
-}
-
-#define REG_CP_LOAD_STATE4_2					0x00000002
-#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK			0xffffffff
-#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT			0
-static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK;
-}
-
-#define REG_CP_LOAD_STATE6_0					0x00000000
-#define CP_LOAD_STATE6_0_DST_OFF__MASK				0x00003fff
-#define CP_LOAD_STATE6_0_DST_OFF__SHIFT				0
-static inline uint32_t CP_LOAD_STATE6_0_DST_OFF(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE6_0_DST_OFF__SHIFT) & CP_LOAD_STATE6_0_DST_OFF__MASK;
-}
-#define CP_LOAD_STATE6_0_STATE_TYPE__MASK			0x00004000
-#define CP_LOAD_STATE6_0_STATE_TYPE__SHIFT			14
-static inline uint32_t CP_LOAD_STATE6_0_STATE_TYPE(enum a6xx_state_type val)
-{
-	return ((val) << CP_LOAD_STATE6_0_STATE_TYPE__SHIFT) & CP_LOAD_STATE6_0_STATE_TYPE__MASK;
-}
-#define CP_LOAD_STATE6_0_STATE_SRC__MASK			0x00030000
-#define CP_LOAD_STATE6_0_STATE_SRC__SHIFT			16
-static inline uint32_t CP_LOAD_STATE6_0_STATE_SRC(enum a6xx_state_src val)
-{
-	return ((val) << CP_LOAD_STATE6_0_STATE_SRC__SHIFT) & CP_LOAD_STATE6_0_STATE_SRC__MASK;
-}
-#define CP_LOAD_STATE6_0_STATE_BLOCK__MASK			0x003c0000
-#define CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT			18
-static inline uint32_t CP_LOAD_STATE6_0_STATE_BLOCK(enum a6xx_state_block val)
-{
-	return ((val) << CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE6_0_STATE_BLOCK__MASK;
-}
-#define CP_LOAD_STATE6_0_NUM_UNIT__MASK				0xffc00000
-#define CP_LOAD_STATE6_0_NUM_UNIT__SHIFT			22
-static inline uint32_t CP_LOAD_STATE6_0_NUM_UNIT(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE6_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE6_0_NUM_UNIT__MASK;
-}
-
-#define REG_CP_LOAD_STATE6_1					0x00000001
-#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK			0xfffffffc
-#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT			2
-static inline uint32_t CP_LOAD_STATE6_1_EXT_SRC_ADDR(uint32_t val)
-{
-	assert(!(val & 0x3));
-	return ((val >> 2) << CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK;
-}
-
-#define REG_CP_LOAD_STATE6_2					0x00000002
-#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK			0xffffffff
-#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT			0
-static inline uint32_t CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK;
-}
-
-#define REG_CP_DRAW_INDX_0					0x00000000
-#define CP_DRAW_INDX_0_VIZ_QUERY__MASK				0xffffffff
-#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT				0
-static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK;
-}
-
-#define REG_CP_DRAW_INDX_1					0x00000001
-#define CP_DRAW_INDX_1_PRIM_TYPE__MASK				0x0000003f
-#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT				0
-static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK;
-}
-#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK			0x000000c0
-#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT			6
-static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK;
-}
-#define CP_DRAW_INDX_1_VIS_CULL__MASK				0x00000600
-#define CP_DRAW_INDX_1_VIS_CULL__SHIFT				9
-static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK;
-}
-#define CP_DRAW_INDX_1_INDEX_SIZE__MASK				0x00000800
-#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT			11
-static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val)
-{
-	return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK;
-}
-#define CP_DRAW_INDX_1_NOT_EOP					0x00001000
-#define CP_DRAW_INDX_1_SMALL_INDEX				0x00002000
-#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE		0x00004000
-#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK			0xff000000
-#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT			24
-static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_2					0x00000002
-#define CP_DRAW_INDX_2_NUM_INDICES__MASK			0xffffffff
-#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_3					0x00000003
-#define CP_DRAW_INDX_3_INDX_BASE__MASK				0xffffffff
-#define CP_DRAW_INDX_3_INDX_BASE__SHIFT				0
-static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK;
-}
-
-#define REG_CP_DRAW_INDX_4					0x00000004
-#define CP_DRAW_INDX_4_INDX_SIZE__MASK				0xffffffff
-#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT				0
-static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK;
-}
-
-#define REG_CP_DRAW_INDX_2_0					0x00000000
-#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK			0xffffffff
-#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK;
-}
-
-#define REG_CP_DRAW_INDX_2_1					0x00000001
-#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK			0x0000003f
-#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK;
-}
-#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK			0x000000c0
-#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT			6
-static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK;
-}
-#define CP_DRAW_INDX_2_1_VIS_CULL__MASK				0x00000600
-#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT			9
-static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK;
-}
-#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK			0x00000800
-#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT			11
-static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val)
-{
-	return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK;
-}
-#define CP_DRAW_INDX_2_1_NOT_EOP				0x00001000
-#define CP_DRAW_INDX_2_1_SMALL_INDEX				0x00002000
-#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE		0x00004000
-#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK			0xff000000
-#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT			24
-static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_2_2					0x00000002
-#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK			0xffffffff
-#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_OFFSET_0				0x00000000
-#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK			0x0000003f
-#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK;
-}
-#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK		0x000000c0
-#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT		6
-static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK;
-}
-#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK			0x00000300
-#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT			8
-static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK;
-}
-#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK			0x00000c00
-#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT			10
-static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK;
-}
-#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK			0x01f00000
-#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT			20
-static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK;
-}
-
-#define REG_CP_DRAW_INDX_OFFSET_1				0x00000001
-#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK		0xffffffff
-#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT		0
-static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_OFFSET_2				0x00000002
-#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK			0xffffffff
-#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT		0
-static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK;
-}
-
-#define REG_CP_DRAW_INDX_OFFSET_3				0x00000003
-
-#define REG_CP_DRAW_INDX_OFFSET_4				0x00000004
-#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK			0xffffffff
-#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK;
-}
-
-#define REG_CP_DRAW_INDX_OFFSET_5				0x00000005
-#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK			0xffffffff
-#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT			0
-static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val)
-{
-	return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK;
-}
-
-#define REG_A4XX_CP_DRAW_INDIRECT_0				0x00000000
-#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK			0x0000003f
-#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT		0
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK;
-}
-#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK		0x000000c0
-#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT		6
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK;
-}
-#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK			0x00000300
-#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT			8
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK;
-}
-#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK		0x00000c00
-#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT		10
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK;
-}
-#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK			0x01f00000
-#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT		20
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK;
-}
-
-#define REG_A4XX_CP_DRAW_INDIRECT_1				0x00000001
-#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK			0xffffffff
-#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT			0
-static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK;
-}
-
-
-#define REG_A5XX_CP_DRAW_INDIRECT_2				0x00000002
-#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT		0
-static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK;
-}
-
-#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0			0x00000000
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK		0x0000003f
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT		0
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK;
-}
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK	0x000000c0
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT	6
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK;
-}
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK		0x00000300
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT		8
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK;
-}
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK		0x00000c00
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT		10
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK;
-}
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK		0x01f00000
-#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT		20
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK;
-}
-
-
-#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1			0x00000001
-#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK		0xffffffff
-#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT		0
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK;
-}
-
-#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2			0x00000002
-#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK		0xffffffff
-#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT		0
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK;
-}
-
-#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3			0x00000003
-#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK		0xffffffff
-#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT		0
-static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val)
-{
-	return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK;
-}
-
-
-#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1			0x00000001
-#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT	0
-static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK;
-}
-
-#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2			0x00000002
-#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT	0
-static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK;
-}
-
-#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3			0x00000003
-#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT		0
-static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK;
-}
-
-#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4			0x00000004
-#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT		0
-static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK;
-}
-
-#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5			0x00000005
-#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK		0xffffffff
-#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT		0
-static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val)
-{
-	return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK;
-}
-
-static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; }
-
-static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; }
-#define CP_SET_DRAW_STATE__0_COUNT__MASK			0x0000ffff
-#define CP_SET_DRAW_STATE__0_COUNT__SHIFT			0
-static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val)
-{
-	return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK;
-}
-#define CP_SET_DRAW_STATE__0_DIRTY				0x00010000
-#define CP_SET_DRAW_STATE__0_DISABLE				0x00020000
-#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS			0x00040000
-#define CP_SET_DRAW_STATE__0_LOAD_IMMED				0x00080000
-#define CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK			0x00f00000
-#define CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT			20
-static inline uint32_t CP_SET_DRAW_STATE__0_ENABLE_MASK(uint32_t val)
-{
-	return ((val) << CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT) & CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK;
-}
-#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK			0x1f000000
-#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT			24
-static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val)
-{
-	return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK;
-}
-
-static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; }
-#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK			0xffffffff
-#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT			0
-static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val)
-{
-	return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK;
-}
-
-static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; }
-#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK			0xffffffff
-#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT			0
-static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK;
-}
-
-#define REG_CP_SET_BIN_0					0x00000000
-
-#define REG_CP_SET_BIN_1					0x00000001
-#define CP_SET_BIN_1_X1__MASK					0x0000ffff
-#define CP_SET_BIN_1_X1__SHIFT					0
-static inline uint32_t CP_SET_BIN_1_X1(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK;
-}
-#define CP_SET_BIN_1_Y1__MASK					0xffff0000
-#define CP_SET_BIN_1_Y1__SHIFT					16
-static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK;
-}
-
-#define REG_CP_SET_BIN_2					0x00000002
-#define CP_SET_BIN_2_X2__MASK					0x0000ffff
-#define CP_SET_BIN_2_X2__SHIFT					0
-static inline uint32_t CP_SET_BIN_2_X2(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK;
-}
-#define CP_SET_BIN_2_Y2__MASK					0xffff0000
-#define CP_SET_BIN_2_Y2__SHIFT					16
-static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA_0					0x00000000
-#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK			0xffffffff
-#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT			0
-static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA_1					0x00000001
-#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK		0xffffffff
-#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_0					0x00000000
-#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK			0x003f0000
-#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT			16
-static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK;
-}
-#define CP_SET_BIN_DATA5_0_VSC_N__MASK				0x07c00000
-#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT				22
-static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_1					0x00000001
-#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_2					0x00000002
-#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_3					0x00000003
-#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_4					0x00000004
-#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_5					0x00000005
-#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK;
-}
-
-#define REG_CP_SET_BIN_DATA5_6					0x00000006
-#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK		0xffffffff
-#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT		0
-static inline uint32_t CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO(uint32_t val)
-{
-	return ((val) << CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK;
-}
-
-#define REG_CP_REG_TO_MEM_0					0x00000000
-#define CP_REG_TO_MEM_0_REG__MASK				0x0000ffff
-#define CP_REG_TO_MEM_0_REG__SHIFT				0
-static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
-{
-	return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
-}
-#define CP_REG_TO_MEM_0_CNT__MASK				0x3ff80000
-#define CP_REG_TO_MEM_0_CNT__SHIFT				19
-static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
-{
-	return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
-}
-#define CP_REG_TO_MEM_0_64B					0x40000000
-#define CP_REG_TO_MEM_0_ACCUMULATE				0x80000000
-
-#define REG_CP_REG_TO_MEM_1					0x00000001
-#define CP_REG_TO_MEM_1_DEST__MASK				0xffffffff
-#define CP_REG_TO_MEM_1_DEST__SHIFT				0
-static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
-{
-	return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
-}
-
-#define REG_CP_REG_TO_MEM_2					0x00000002
-#define CP_REG_TO_MEM_2_DEST_HI__MASK				0xffffffff
-#define CP_REG_TO_MEM_2_DEST_HI__SHIFT				0
-static inline uint32_t CP_REG_TO_MEM_2_DEST_HI(uint32_t val)
-{
-	return ((val) << CP_REG_TO_MEM_2_DEST_HI__SHIFT) & CP_REG_TO_MEM_2_DEST_HI__MASK;
-}
-
-#define REG_CP_MEM_TO_REG_0					0x00000000
-#define CP_MEM_TO_REG_0_REG__MASK				0x0000ffff
-#define CP_MEM_TO_REG_0_REG__SHIFT				0
-static inline uint32_t CP_MEM_TO_REG_0_REG(uint32_t val)
-{
-	return ((val) << CP_MEM_TO_REG_0_REG__SHIFT) & CP_MEM_TO_REG_0_REG__MASK;
-}
-#define CP_MEM_TO_REG_0_CNT__MASK				0x3ff80000
-#define CP_MEM_TO_REG_0_CNT__SHIFT				19
-static inline uint32_t CP_MEM_TO_REG_0_CNT(uint32_t val)
-{
-	return ((val) << CP_MEM_TO_REG_0_CNT__SHIFT) & CP_MEM_TO_REG_0_CNT__MASK;
-}
-#define CP_MEM_TO_REG_0_64B					0x40000000
-#define CP_MEM_TO_REG_0_ACCUMULATE				0x80000000
-
-#define REG_CP_MEM_TO_REG_1					0x00000001
-#define CP_MEM_TO_REG_1_SRC__MASK				0xffffffff
-#define CP_MEM_TO_REG_1_SRC__SHIFT				0
-static inline uint32_t CP_MEM_TO_REG_1_SRC(uint32_t val)
-{
-	return ((val) << CP_MEM_TO_REG_1_SRC__SHIFT) & CP_MEM_TO_REG_1_SRC__MASK;
-}
-
-#define REG_CP_MEM_TO_REG_2					0x00000002
-#define CP_MEM_TO_REG_2_SRC_HI__MASK				0xffffffff
-#define CP_MEM_TO_REG_2_SRC_HI__SHIFT				0
-static inline uint32_t CP_MEM_TO_REG_2_SRC_HI(uint32_t val)
-{
-	return ((val) << CP_MEM_TO_REG_2_SRC_HI__SHIFT) & CP_MEM_TO_REG_2_SRC_HI__MASK;
-}
-
-#define REG_CP_MEM_TO_MEM_0					0x00000000
-#define CP_MEM_TO_MEM_0_NEG_A					0x00000001
-#define CP_MEM_TO_MEM_0_NEG_B					0x00000002
-#define CP_MEM_TO_MEM_0_NEG_C					0x00000004
-#define CP_MEM_TO_MEM_0_DOUBLE					0x20000000
-
-#define REG_CP_COND_WRITE_0					0x00000000
-#define CP_COND_WRITE_0_FUNCTION__MASK				0x00000007
-#define CP_COND_WRITE_0_FUNCTION__SHIFT				0
-static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val)
-{
-	return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK;
-}
-#define CP_COND_WRITE_0_POLL_MEMORY				0x00000010
-#define CP_COND_WRITE_0_WRITE_MEMORY				0x00000100
-
-#define REG_CP_COND_WRITE_1					0x00000001
-#define CP_COND_WRITE_1_POLL_ADDR__MASK				0xffffffff
-#define CP_COND_WRITE_1_POLL_ADDR__SHIFT			0
-static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK;
-}
-
-#define REG_CP_COND_WRITE_2					0x00000002
-#define CP_COND_WRITE_2_REF__MASK				0xffffffff
-#define CP_COND_WRITE_2_REF__SHIFT				0
-static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK;
-}
-
-#define REG_CP_COND_WRITE_3					0x00000003
-#define CP_COND_WRITE_3_MASK__MASK				0xffffffff
-#define CP_COND_WRITE_3_MASK__SHIFT				0
-static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK;
-}
-
-#define REG_CP_COND_WRITE_4					0x00000004
-#define CP_COND_WRITE_4_WRITE_ADDR__MASK			0xffffffff
-#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT			0
-static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK;
-}
-
-#define REG_CP_COND_WRITE_5					0x00000005
-#define CP_COND_WRITE_5_WRITE_DATA__MASK			0xffffffff
-#define CP_COND_WRITE_5_WRITE_DATA__SHIFT			0
-static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK;
-}
-
-#define REG_CP_COND_WRITE5_0					0x00000000
-#define CP_COND_WRITE5_0_FUNCTION__MASK				0x00000007
-#define CP_COND_WRITE5_0_FUNCTION__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val)
-{
-	return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK;
-}
-#define CP_COND_WRITE5_0_POLL_MEMORY				0x00000010
-#define CP_COND_WRITE5_0_WRITE_MEMORY				0x00000100
-
-#define REG_CP_COND_WRITE5_1					0x00000001
-#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK			0xffffffff
-#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK;
-}
-
-#define REG_CP_COND_WRITE5_2					0x00000002
-#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK			0xffffffff
-#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK;
-}
-
-#define REG_CP_COND_WRITE5_3					0x00000003
-#define CP_COND_WRITE5_3_REF__MASK				0xffffffff
-#define CP_COND_WRITE5_3_REF__SHIFT				0
-static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK;
-}
-
-#define REG_CP_COND_WRITE5_4					0x00000004
-#define CP_COND_WRITE5_4_MASK__MASK				0xffffffff
-#define CP_COND_WRITE5_4_MASK__SHIFT				0
-static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK;
-}
-
-#define REG_CP_COND_WRITE5_5					0x00000005
-#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK			0xffffffff
-#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK;
-}
-
-#define REG_CP_COND_WRITE5_6					0x00000006
-#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK			0xffffffff
-#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK;
-}
-
-#define REG_CP_COND_WRITE5_7					0x00000007
-#define CP_COND_WRITE5_7_WRITE_DATA__MASK			0xffffffff
-#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT			0
-static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val)
-{
-	return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK;
-}
-
-#define REG_CP_DISPATCH_COMPUTE_0				0x00000000
-
-#define REG_CP_DISPATCH_COMPUTE_1				0x00000001
-#define CP_DISPATCH_COMPUTE_1_X__MASK				0xffffffff
-#define CP_DISPATCH_COMPUTE_1_X__SHIFT				0
-static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val)
-{
-	return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK;
-}
-
-#define REG_CP_DISPATCH_COMPUTE_2				0x00000002
-#define CP_DISPATCH_COMPUTE_2_Y__MASK				0xffffffff
-#define CP_DISPATCH_COMPUTE_2_Y__SHIFT				0
-static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val)
-{
-	return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK;
-}
-
-#define REG_CP_DISPATCH_COMPUTE_3				0x00000003
-#define CP_DISPATCH_COMPUTE_3_Z__MASK				0xffffffff
-#define CP_DISPATCH_COMPUTE_3_Z__SHIFT				0
-static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val)
-{
-	return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_0				0x00000000
-#define CP_SET_RENDER_MODE_0_MODE__MASK				0x000001ff
-#define CP_SET_RENDER_MODE_0_MODE__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val)
-{
-	return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_1				0x00000001
-#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK			0xffffffff
-#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val)
-{
-	return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_2				0x00000002
-#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK			0xffffffff
-#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val)
-{
-	return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_3				0x00000003
-#define CP_SET_RENDER_MODE_3_VSC_ENABLE				0x00000008
-#define CP_SET_RENDER_MODE_3_GMEM_ENABLE			0x00000010
-
-#define REG_CP_SET_RENDER_MODE_4				0x00000004
-
-#define REG_CP_SET_RENDER_MODE_5				0x00000005
-#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK			0xffffffff
-#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val)
-{
-	return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_6				0x00000006
-#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK			0xffffffff
-#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val)
-{
-	return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK;
-}
-
-#define REG_CP_SET_RENDER_MODE_7				0x00000007
-#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK			0xffffffff
-#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT			0
-static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val)
-{
-	return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_0				0x00000000
-#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK			0xffffffff
-#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT		0
-static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val)
-{
-	return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_1				0x00000001
-#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK			0xffffffff
-#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT		0
-static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val)
-{
-	return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_2				0x00000002
-
-#define REG_CP_COMPUTE_CHECKPOINT_3				0x00000003
-#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK		0xffffffff
-#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT		0
-static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val)
-{
-	return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_4				0x00000004
-
-#define REG_CP_COMPUTE_CHECKPOINT_5				0x00000005
-#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK			0xffffffff
-#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT		0
-static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val)
-{
-	return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_6				0x00000006
-#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK			0xffffffff
-#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT		0
-static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val)
-{
-	return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK;
-}
-
-#define REG_CP_COMPUTE_CHECKPOINT_7				0x00000007
-
-#define REG_CP_PERFCOUNTER_ACTION_0				0x00000000
-
-#define REG_CP_PERFCOUNTER_ACTION_1				0x00000001
-#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK			0xffffffff
-#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT		0
-static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val)
-{
-	return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK;
-}
-
-#define REG_CP_PERFCOUNTER_ACTION_2				0x00000002
-#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK			0xffffffff
-#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT		0
-static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val)
-{
-	return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK;
-}
-
-#define REG_CP_EVENT_WRITE_0					0x00000000
-#define CP_EVENT_WRITE_0_EVENT__MASK				0x000000ff
-#define CP_EVENT_WRITE_0_EVENT__SHIFT				0
-static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val)
-{
-	return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK;
-}
-#define CP_EVENT_WRITE_0_TIMESTAMP				0x40000000
-
-#define REG_CP_EVENT_WRITE_1					0x00000001
-#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK			0xffffffff
-#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT			0
-static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val)
-{
-	return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK;
-}
-
-#define REG_CP_EVENT_WRITE_2					0x00000002
-#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK			0xffffffff
-#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT			0
-static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val)
-{
-	return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK;
-}
-
-#define REG_CP_EVENT_WRITE_3					0x00000003
-
-#define REG_CP_BLIT_0						0x00000000
-#define CP_BLIT_0_OP__MASK					0x0000000f
-#define CP_BLIT_0_OP__SHIFT					0
-static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val)
-{
-	return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK;
-}
-
-#define REG_CP_BLIT_1						0x00000001
-#define CP_BLIT_1_SRC_X1__MASK					0x00003fff
-#define CP_BLIT_1_SRC_X1__SHIFT					0
-static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val)
-{
-	return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK;
-}
-#define CP_BLIT_1_SRC_Y1__MASK					0x3fff0000
-#define CP_BLIT_1_SRC_Y1__SHIFT					16
-static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val)
-{
-	return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK;
-}
-
-#define REG_CP_BLIT_2						0x00000002
-#define CP_BLIT_2_SRC_X2__MASK					0x00003fff
-#define CP_BLIT_2_SRC_X2__SHIFT					0
-static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val)
-{
-	return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK;
-}
-#define CP_BLIT_2_SRC_Y2__MASK					0x3fff0000
-#define CP_BLIT_2_SRC_Y2__SHIFT					16
-static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val)
-{
-	return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK;
-}
-
-#define REG_CP_BLIT_3						0x00000003
-#define CP_BLIT_3_DST_X1__MASK					0x00003fff
-#define CP_BLIT_3_DST_X1__SHIFT					0
-static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val)
-{
-	return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK;
-}
-#define CP_BLIT_3_DST_Y1__MASK					0x3fff0000
-#define CP_BLIT_3_DST_Y1__SHIFT					16
-static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val)
-{
-	return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK;
-}
-
-#define REG_CP_BLIT_4						0x00000004
-#define CP_BLIT_4_DST_X2__MASK					0x00003fff
-#define CP_BLIT_4_DST_X2__SHIFT					0
-static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val)
-{
-	return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK;
-}
-#define CP_BLIT_4_DST_Y2__MASK					0x3fff0000
-#define CP_BLIT_4_DST_Y2__SHIFT					16
-static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val)
-{
-	return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK;
-}
-
-#define REG_CP_EXEC_CS_0					0x00000000
-
-#define REG_CP_EXEC_CS_1					0x00000001
-#define CP_EXEC_CS_1_NGROUPS_X__MASK				0xffffffff
-#define CP_EXEC_CS_1_NGROUPS_X__SHIFT				0
-static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val)
-{
-	return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK;
-}
-
-#define REG_CP_EXEC_CS_2					0x00000002
-#define CP_EXEC_CS_2_NGROUPS_Y__MASK				0xffffffff
-#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT				0
-static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val)
-{
-	return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK;
-}
-
-#define REG_CP_EXEC_CS_3					0x00000003
-#define CP_EXEC_CS_3_NGROUPS_Z__MASK				0xffffffff
-#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT				0
-static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val)
-{
-	return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK;
-}
-
-#define REG_A4XX_CP_EXEC_CS_INDIRECT_0				0x00000000
-
-
-#define REG_A4XX_CP_EXEC_CS_INDIRECT_1				0x00000001
-#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK			0xffffffff
-#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT			0
-static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val)
-{
-	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK;
-}
-
-#define REG_A4XX_CP_EXEC_CS_INDIRECT_2				0x00000002
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK		0x00000ffc
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT		2
-static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val)
-{
-	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK;
-}
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK		0x003ff000
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT		12
-static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val)
-{
-	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK;
-}
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK		0xffc00000
-#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT		22
-static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val)
-{
-	return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK;
-}
-
-
-#define REG_A5XX_CP_EXEC_CS_INDIRECT_1				0x00000001
-#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK		0xffffffff
-#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT		0
-static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val)
-{
-	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK;
-}
-
-#define REG_A5XX_CP_EXEC_CS_INDIRECT_2				0x00000002
-#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK		0xffffffff
-#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT		0
-static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val)
-{
-	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK;
-}
-
-#define REG_A5XX_CP_EXEC_CS_INDIRECT_3				0x00000003
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK		0x00000ffc
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT		2
-static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val)
-{
-	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK;
-}
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK		0x003ff000
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT		12
-static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val)
-{
-	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK;
-}
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK		0xffc00000
-#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT		22
-static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val)
-{
-	return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK;
-}
-
-#define REG_A2XX_CP_SET_MARKER_0				0x00000000
-#define A2XX_CP_SET_MARKER_0_MARKER__MASK			0x0000000f
-#define A2XX_CP_SET_MARKER_0_MARKER__SHIFT			0
-static inline uint32_t A2XX_CP_SET_MARKER_0_MARKER(uint32_t val)
-{
-	return ((val) << A2XX_CP_SET_MARKER_0_MARKER__SHIFT) & A2XX_CP_SET_MARKER_0_MARKER__MASK;
-}
-#define A2XX_CP_SET_MARKER_0_MODE__MASK				0x0000000f
-#define A2XX_CP_SET_MARKER_0_MODE__SHIFT			0
-static inline uint32_t A2XX_CP_SET_MARKER_0_MODE(enum a6xx_render_mode val)
-{
-	return ((val) << A2XX_CP_SET_MARKER_0_MODE__SHIFT) & A2XX_CP_SET_MARKER_0_MODE__MASK;
-}
-#define A2XX_CP_SET_MARKER_0_IFPC				0x00000100
-
-static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG_(uint32_t i0) { return 0x00000000 + 0x3*i0; }
-
-static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__0(uint32_t i0) { return 0x00000000 + 0x3*i0; }
-#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK		0x00000007
-#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT		0
-static inline uint32_t A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG(enum pseudo_reg val)
-{
-	return ((val) << A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT) & A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK;
-}
-
-static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__1(uint32_t i0) { return 0x00000001 + 0x3*i0; }
-#define A2XX_CP_SET_PSEUDO_REG__1_LO__MASK			0xffffffff
-#define A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT			0
-static inline uint32_t A2XX_CP_SET_PSEUDO_REG__1_LO(uint32_t val)
-{
-	return ((val) << A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT) & A2XX_CP_SET_PSEUDO_REG__1_LO__MASK;
-}
-
-static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__2(uint32_t i0) { return 0x00000002 + 0x3*i0; }
-#define A2XX_CP_SET_PSEUDO_REG__2_HI__MASK			0xffffffff
-#define A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT			0
-static inline uint32_t A2XX_CP_SET_PSEUDO_REG__2_HI(uint32_t val)
-{
-	return ((val) << A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT) & A2XX_CP_SET_PSEUDO_REG__2_HI__MASK;
-}
-
-#define REG_A2XX_CP_REG_TEST_0					0x00000000
-#define A2XX_CP_REG_TEST_0_REG__MASK				0x00000fff
-#define A2XX_CP_REG_TEST_0_REG__SHIFT				0
-static inline uint32_t A2XX_CP_REG_TEST_0_REG(uint32_t val)
-{
-	return ((val) << A2XX_CP_REG_TEST_0_REG__SHIFT) & A2XX_CP_REG_TEST_0_REG__MASK;
-}
-#define A2XX_CP_REG_TEST_0_BIT__MASK				0x01f00000
-#define A2XX_CP_REG_TEST_0_BIT__SHIFT				20
-static inline uint32_t A2XX_CP_REG_TEST_0_BIT(uint32_t val)
-{
-	return ((val) << A2XX_CP_REG_TEST_0_BIT__SHIFT) & A2XX_CP_REG_TEST_0_BIT__MASK;
-}
-#define A2XX_CP_REG_TEST_0_UNK25				0x02000000
-
-
-#endif /* ADRENO_PM4_XML */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Android.mk mesa-19.0.1/src/gallium/drivers/freedreno/Android.mk
--- mesa-18.3.3/src/gallium/drivers/freedreno/Android.mk	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -27,7 +27,6 @@
 
 LOCAL_SRC_FILES := \
 	$(C_SOURCES) \
-	$(drm_SOURCES) \
 	$(a2xx_SOURCES) \
 	$(a3xx_SOURCES)	\
 	$(a4xx_SOURCES) \
@@ -39,7 +38,8 @@
 #	-Wno-packed-bitfield-compat
 
 LOCAL_C_INCLUDES := \
-	$(LOCAL_PATH)/ir3
+	$(LOCAL_PATH)/ir3 \
+	$(MESA_TOP)/include/drm-uapi
 
 LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H)
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Automake.inc mesa-19.0.1/src/gallium/drivers/freedreno/Automake.inc
--- mesa-18.3.3/src/gallium/drivers/freedreno/Automake.inc	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/Automake.inc	2019-03-31 23:16:37.000000000 +0000
@@ -5,6 +5,8 @@
 TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
 	$(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+	$(top_builddir)/src/freedreno/libfreedreno_drm.la \
+	$(top_builddir)/src/freedreno/libfreedreno_ir3.la \
 	$(FREEDRENO_LIBS) \
 	$(LIBDRM_LIBS)
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/disasm.h mesa-19.0.1/src/gallium/drivers/freedreno/disasm.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/disasm.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/disasm.h	2019-03-31 23:16:37.000000000 +0000
@@ -27,66 +27,17 @@
 #include <stdio.h>
 #include <stdbool.h>
 
+#include "compiler/shader_enums.h"
 #include "util/u_debug.h"
 
-enum fd_shader_debug {
-	FD_DBG_SHADER_VS = 0x01,
-	FD_DBG_SHADER_FS = 0x02,
-	FD_DBG_SHADER_CS = 0x04,
-};
-
-extern enum fd_shader_debug fd_shader_debug;
-
-enum shader_t {
-	SHADER_VERTEX,
-	SHADER_TCS,
-	SHADER_TES,
-	SHADER_GEOM,
-	SHADER_FRAGMENT,
-	SHADER_COMPUTE,
-	SHADER_MAX,
-};
-
-static inline bool
-shader_debug_enabled(enum shader_t type)
-{
-	switch (type) {
-	case SHADER_VERTEX:      return !!(fd_shader_debug & FD_DBG_SHADER_VS);
-	case SHADER_FRAGMENT:    return !!(fd_shader_debug & FD_DBG_SHADER_FS);
-	case SHADER_COMPUTE:     return !!(fd_shader_debug & FD_DBG_SHADER_CS);
-	default:
-		debug_assert(0);
-		return false;
-	}
-}
-
-static inline const char *
-shader_stage_name(enum shader_t type)
-{
-	/* NOTE these names are chosen to match the INTEL_DEBUG output
-	 * which frameretrace parses.  Hurray accidental ABI!
-	 */
-	switch (type) {
-	case SHADER_VERTEX:      return "vertex";
-	case SHADER_TCS:         return "tessellation control";
-	case SHADER_TES:         return "tessellation evaluation";
-	case SHADER_GEOM:        return "geometry";
-	case SHADER_FRAGMENT:    return "fragment";
-	case SHADER_COMPUTE:     return "compute";
-	default:
-		debug_assert(0);
-		return NULL;
-	}
-}
-
 /* bitmask of debug flags */
 enum debug_t {
 	PRINT_RAW      = 0x1,    /* dump raw hexdump */
 	PRINT_VERBOSE  = 0x2,
 };
 
-int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out);
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type);
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
 void disasm_set_debug(enum debug_t debug);
 
 #endif /* DISASM_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,361 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "os/os_mman.h"
-
-#include "freedreno_drmif.h"
-#include "freedreno_priv.h"
-
-pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
-void bo_del(struct fd_bo *bo);
-
-/* set buffer name, and add to table, call w/ table_lock held: */
-static void set_name(struct fd_bo *bo, uint32_t name)
-{
-	bo->name = name;
-	/* add ourself into the handle table: */
-	_mesa_hash_table_insert(bo->dev->name_table, &bo->name, bo);
-}
-
-/* lookup a buffer, call w/ table_lock held: */
-static struct fd_bo * lookup_bo(struct hash_table *tbl, uint32_t key)
-{
-	struct fd_bo *bo = NULL;
-	struct hash_entry *entry = _mesa_hash_table_search(tbl, &key);
-	if (entry) {
-		/* found, incr refcnt and return: */
-		bo = fd_bo_ref(entry->data);
-
-		/* don't break the bucket if this bo was found in one */
-		list_delinit(&bo->list);
-	}
-	return bo;
-}
-
-/* allocate a new buffer object, call w/ table_lock held */
-static struct fd_bo * bo_from_handle(struct fd_device *dev,
-		uint32_t size, uint32_t handle)
-{
-	struct fd_bo *bo;
-
-	bo = dev->funcs->bo_from_handle(dev, size, handle);
-	if (!bo) {
-		struct drm_gem_close req = {
-				.handle = handle,
-		};
-		drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
-		return NULL;
-	}
-	bo->dev = fd_device_ref(dev);
-	bo->size = size;
-	bo->handle = handle;
-	p_atomic_set(&bo->refcnt, 1);
-	list_inithead(&bo->list);
-	/* add ourself into the handle table: */
-	_mesa_hash_table_insert(dev->handle_table, &bo->handle, bo);
-	return bo;
-}
-
-static struct fd_bo *
-bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
-		struct fd_bo_cache *cache)
-{
-	struct fd_bo *bo = NULL;
-	uint32_t handle;
-	int ret;
-
-	bo = fd_bo_cache_alloc(cache, &size, flags);
-	if (bo)
-		return bo;
-
-	ret = dev->funcs->bo_new_handle(dev, size, flags, &handle);
-	if (ret)
-		return NULL;
-
-	pthread_mutex_lock(&table_lock);
-	bo = bo_from_handle(dev, size, handle);
-	pthread_mutex_unlock(&table_lock);
-
-	VG_BO_ALLOC(bo);
-
-	return bo;
-}
-
-struct fd_bo *
-fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
-{
-	struct fd_bo *bo = bo_new(dev, size, flags, &dev->bo_cache);
-	if (bo)
-		bo->bo_reuse = BO_CACHE;
-	return bo;
-}
-
-/* internal function to allocate bo's that use the ringbuffer cache
- * instead of the normal bo_cache.  The purpose is, because cmdstream
- * bo's get vmap'd on the kernel side, and that is expensive, we want
- * to re-use cmdstream bo's for cmdstream and not unrelated purposes.
- */
-struct fd_bo *
-fd_bo_new_ring(struct fd_device *dev, uint32_t size, uint32_t flags)
-{
-	struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache);
-	if (bo)
-		bo->bo_reuse = RING_CACHE;
-	return bo;
-}
-
-struct fd_bo *
-fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size)
-{
-	struct fd_bo *bo = NULL;
-
-	pthread_mutex_lock(&table_lock);
-
-	bo = lookup_bo(dev->handle_table, handle);
-	if (bo)
-		goto out_unlock;
-
-	bo = bo_from_handle(dev, size, handle);
-
-	VG_BO_ALLOC(bo);
-
-out_unlock:
-	pthread_mutex_unlock(&table_lock);
-
-	return bo;
-}
-
-struct fd_bo *
-fd_bo_from_dmabuf(struct fd_device *dev, int fd)
-{
-	int ret, size;
-	uint32_t handle;
-	struct fd_bo *bo;
-
-	pthread_mutex_lock(&table_lock);
-	ret = drmPrimeFDToHandle(dev->fd, fd, &handle);
-	if (ret) {
-		pthread_mutex_unlock(&table_lock);
-		return NULL;
-	}
-
-	bo = lookup_bo(dev->handle_table, handle);
-	if (bo)
-		goto out_unlock;
-
-	/* lseek() to get bo size */
-	size = lseek(fd, 0, SEEK_END);
-	lseek(fd, 0, SEEK_CUR);
-
-	bo = bo_from_handle(dev, size, handle);
-
-	VG_BO_ALLOC(bo);
-
-out_unlock:
-	pthread_mutex_unlock(&table_lock);
-
-	return bo;
-}
-
-struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name)
-{
-	struct drm_gem_open req = {
-			.name = name,
-	};
-	struct fd_bo *bo;
-
-	pthread_mutex_lock(&table_lock);
-
-	/* check name table first, to see if bo is already open: */
-	bo = lookup_bo(dev->name_table, name);
-	if (bo)
-		goto out_unlock;
-
-	if (drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req)) {
-		ERROR_MSG("gem-open failed: %s", strerror(errno));
-		goto out_unlock;
-	}
-
-	bo = lookup_bo(dev->handle_table, req.handle);
-	if (bo)
-		goto out_unlock;
-
-	bo = bo_from_handle(dev, req.size, req.handle);
-	if (bo) {
-		set_name(bo, name);
-		VG_BO_ALLOC(bo);
-	}
-
-out_unlock:
-	pthread_mutex_unlock(&table_lock);
-
-	return bo;
-}
-
-uint64_t fd_bo_get_iova(struct fd_bo *bo)
-{
-	if (!bo->iova)
-		bo->iova = bo->funcs->iova(bo);
-	return bo->iova;
-}
-
-void fd_bo_put_iova(struct fd_bo *bo)
-{
-	/* currently a no-op */
-}
-
-struct fd_bo * fd_bo_ref(struct fd_bo *bo)
-{
-	p_atomic_inc(&bo->refcnt);
-	return bo;
-}
-
-void fd_bo_del(struct fd_bo *bo)
-{
-	struct fd_device *dev = bo->dev;
-
-	if (!atomic_dec_and_test(&bo->refcnt))
-		return;
-
-	pthread_mutex_lock(&table_lock);
-
-	if ((bo->bo_reuse == BO_CACHE) && (fd_bo_cache_free(&dev->bo_cache, bo) == 0))
-		goto out;
-	if ((bo->bo_reuse == RING_CACHE) && (fd_bo_cache_free(&dev->ring_cache, bo) == 0))
-		goto out;
-
-	bo_del(bo);
-	fd_device_del_locked(dev);
-out:
-	pthread_mutex_unlock(&table_lock);
-}
-
-/* Called under table_lock */
-void bo_del(struct fd_bo *bo)
-{
-	VG_BO_FREE(bo);
-
-	if (bo->map)
-		os_munmap(bo->map, bo->size);
-
-	/* TODO probably bo's in bucket list get removed from
-	 * handle table??
-	 */
-
-	if (bo->handle) {
-		struct drm_gem_close req = {
-				.handle = bo->handle,
-		};
-		_mesa_hash_table_remove_key(bo->dev->handle_table, &bo->handle);
-		if (bo->name)
-			_mesa_hash_table_remove_key(bo->dev->name_table, &bo->name);
-		drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
-	}
-
-	bo->funcs->destroy(bo);
-}
-
-int fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
-{
-	if (!bo->name) {
-		struct drm_gem_flink req = {
-				.handle = bo->handle,
-		};
-		int ret;
-
-		ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_FLINK, &req);
-		if (ret) {
-			return ret;
-		}
-
-		pthread_mutex_lock(&table_lock);
-		set_name(bo, req.name);
-		pthread_mutex_unlock(&table_lock);
-		bo->bo_reuse = NO_CACHE;
-	}
-
-	*name = bo->name;
-
-	return 0;
-}
-
-uint32_t fd_bo_handle(struct fd_bo *bo)
-{
-	return bo->handle;
-}
-
-int fd_bo_dmabuf(struct fd_bo *bo)
-{
-	int ret, prime_fd;
-
-	ret = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
-			&prime_fd);
-	if (ret) {
-		ERROR_MSG("failed to get dmabuf fd: %d", ret);
-		return ret;
-	}
-
-	bo->bo_reuse = NO_CACHE;
-
-	return prime_fd;
-}
-
-uint32_t fd_bo_size(struct fd_bo *bo)
-{
-	return bo->size;
-}
-
-void * fd_bo_map(struct fd_bo *bo)
-{
-	if (!bo->map) {
-		uint64_t offset;
-		int ret;
-
-		ret = bo->funcs->offset(bo, &offset);
-		if (ret) {
-			return NULL;
-		}
-
-		bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-				bo->dev->fd, offset);
-		if (bo->map == MAP_FAILED) {
-			ERROR_MSG("mmap failed: %s", strerror(errno));
-			bo->map = NULL;
-		}
-	}
-	return bo->map;
-}
-
-/* a bit odd to take the pipe as an arg, but it's a, umm, quirk of kgsl.. */
-int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
-{
-	return bo->funcs->cpu_prep(bo, pipe, op);
-}
-
-void fd_bo_cpu_fini(struct fd_bo *bo)
-{
-	bo->funcs->cpu_fini(bo);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,218 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "freedreno_drmif.h"
-#include "freedreno_priv.h"
-
-void bo_del(struct fd_bo *bo);
-extern pthread_mutex_t table_lock;
-
-static void
-add_bucket(struct fd_bo_cache *cache, int size)
-{
-	unsigned int i = cache->num_buckets;
-
-	assert(i < ARRAY_SIZE(cache->cache_bucket));
-
-	list_inithead(&cache->cache_bucket[i].list);
-	cache->cache_bucket[i].size = size;
-	cache->num_buckets++;
-}
-
-/**
- * @coarse: if true, only power-of-two bucket sizes, otherwise
- *    fill in for a bit smoother size curve..
- */
-void
-fd_bo_cache_init(struct fd_bo_cache *cache, int coarse)
-{
-	unsigned long size, cache_max_size = 64 * 1024 * 1024;
-
-	/* OK, so power of two buckets was too wasteful of memory.
-	 * Give 3 other sizes between each power of two, to hopefully
-	 * cover things accurately enough.  (The alternative is
-	 * probably to just go for exact matching of sizes, and assume
-	 * that for things like composited window resize the tiled
-	 * width/height alignment and rounding of sizes to pages will
-	 * get us useful cache hit rates anyway)
-	 */
-	add_bucket(cache, 4096);
-	add_bucket(cache, 4096 * 2);
-	if (!coarse)
-		add_bucket(cache, 4096 * 3);
-
-	/* Initialize the linked lists for BO reuse cache. */
-	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
-		add_bucket(cache, size);
-		if (!coarse) {
-			add_bucket(cache, size + size * 1 / 4);
-			add_bucket(cache, size + size * 2 / 4);
-			add_bucket(cache, size + size * 3 / 4);
-		}
-	}
-}
-
-/* Frees older cached buffers.  Called under table_lock */
-void
-fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
-{
-	int i;
-
-	if (cache->time == time)
-		return;
-
-	for (i = 0; i < cache->num_buckets; i++) {
-		struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
-		struct fd_bo *bo;
-
-		while (!LIST_IS_EMPTY(&bucket->list)) {
-			bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
-
-			/* keep things in cache for at least 1 second: */
-			if (time && ((time - bo->free_time) <= 1))
-				break;
-
-			VG_BO_OBTAIN(bo);
-			list_del(&bo->list);
-			bo_del(bo);
-		}
-	}
-
-	cache->time = time;
-}
-
-static struct fd_bo_bucket * get_bucket(struct fd_bo_cache *cache, uint32_t size)
-{
-	int i;
-
-	/* hmm, this is what intel does, but I suppose we could calculate our
-	 * way to the correct bucket size rather than looping..
-	 */
-	for (i = 0; i < cache->num_buckets; i++) {
-		struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
-		if (bucket->size >= size) {
-			return bucket;
-		}
-	}
-
-	return NULL;
-}
-
-static int is_idle(struct fd_bo *bo)
-{
-	return fd_bo_cpu_prep(bo, NULL,
-			DRM_FREEDRENO_PREP_READ |
-			DRM_FREEDRENO_PREP_WRITE |
-			DRM_FREEDRENO_PREP_NOSYNC) == 0;
-}
-
-static struct fd_bo *find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
-{
-	struct fd_bo *bo = NULL;
-
-	/* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could
-	 * skip the busy check.. if it is only going to be a render target
-	 * then we probably don't need to stall..
-	 *
-	 * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail
-	 * (MRU, since likely to be in GPU cache), rather than head (LRU)..
-	 */
-	pthread_mutex_lock(&table_lock);
-	if (!LIST_IS_EMPTY(&bucket->list)) {
-		bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
-		/* TODO check for compatible flags? */
-		if (is_idle(bo)) {
-			list_del(&bo->list);
-		} else {
-			bo = NULL;
-		}
-	}
-	pthread_mutex_unlock(&table_lock);
-
-	return bo;
-}
-
-/* NOTE: size is potentially rounded up to bucket size: */
-struct fd_bo *
-fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size, uint32_t flags)
-{
-	struct fd_bo *bo = NULL;
-	struct fd_bo_bucket *bucket;
-
-	*size = align(*size, 4096);
-	bucket = get_bucket(cache, *size);
-
-	/* see if we can be green and recycle: */
-retry:
-	if (bucket) {
-		*size = bucket->size;
-		bo = find_in_bucket(bucket, flags);
-		if (bo) {
-			VG_BO_OBTAIN(bo);
-			if (bo->funcs->madvise(bo, TRUE) <= 0) {
-				/* we've lost the backing pages, delete and try again: */
-				pthread_mutex_lock(&table_lock);
-				bo_del(bo);
-				pthread_mutex_unlock(&table_lock);
-				goto retry;
-			}
-			p_atomic_set(&bo->refcnt, 1);
-			fd_device_ref(bo->dev);
-			return bo;
-		}
-	}
-
-	return NULL;
-}
-
-int
-fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo)
-{
-	struct fd_bo_bucket *bucket = get_bucket(cache, bo->size);
-
-	/* see if we can be green and recycle: */
-	if (bucket) {
-		struct timespec time;
-
-		bo->funcs->madvise(bo, FALSE);
-
-		clock_gettime(CLOCK_MONOTONIC, &time);
-
-		bo->free_time = time.tv_sec;
-		VG_BO_RELEASE(bo);
-		list_addtail(&bo->list, &bucket->list);
-		fd_bo_cache_cleanup(cache, time.tv_sec);
-
-		/* bo's in the bucket cache don't have a ref and
-		 * don't hold a ref to the dev:
-		 */
-		fd_device_del_locked(bo->dev);
-
-		return 0;
-	}
-
-	return -1;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_device.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_device.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_device.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_device.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "freedreno_drmif.h"
-#include "freedreno_priv.h"
-
-static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
-
-static uint32_t
-u32_hash(const void *key)
-{
-	return _mesa_hash_data(key, sizeof(uint32_t));
-}
-
-static bool
-u32_equals(const void *key1, const void *key2)
-{
-	return *(const uint32_t *)key1 == *(const uint32_t *)key2;
-}
-
-
-struct fd_device * kgsl_device_new(int fd);
-struct fd_device * msm_device_new(int fd);
-
-struct fd_device * fd_device_new(int fd)
-{
-	struct fd_device *dev;
-	drmVersionPtr version;
-
-	/* figure out if we are kgsl or msm drm driver: */
-	version = drmGetVersion(fd);
-	if (!version) {
-		ERROR_MSG("cannot get version: %s", strerror(errno));
-		return NULL;
-	}
-
-	if (!strcmp(version->name, "msm")) {
-		DEBUG_MSG("msm DRM device");
-		if (version->version_major != 1) {
-			ERROR_MSG("unsupported version: %u.%u.%u", version->version_major,
-				version->version_minor, version->version_patchlevel);
-			dev = NULL;
-			goto out;
-		}
-
-		dev = msm_device_new(fd);
-		dev->version = version->version_minor;
-#if HAVE_FREEDRENO_KGSL
-	} else if (!strcmp(version->name, "kgsl")) {
-		DEBUG_MSG("kgsl DRM device");
-		dev = kgsl_device_new(fd);
-#endif
-	} else {
-		ERROR_MSG("unknown device: %s", version->name);
-		dev = NULL;
-	}
-
-out:
-	drmFreeVersion(version);
-
-	if (!dev)
-		return NULL;
-
-	p_atomic_set(&dev->refcnt, 1);
-	dev->fd = fd;
-	dev->handle_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals);
-	dev->name_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals);
-	fd_bo_cache_init(&dev->bo_cache, FALSE);
-	fd_bo_cache_init(&dev->ring_cache, TRUE);
-
-	return dev;
-}
-
-/* like fd_device_new() but creates it's own private dup() of the fd
- * which is close()d when the device is finalized.
- */
-struct fd_device * fd_device_new_dup(int fd)
-{
-	int dup_fd = dup(fd);
-	struct fd_device *dev = fd_device_new(dup_fd);
-	if (dev)
-		dev->closefd = 1;
-	else
-		close(dup_fd);
-	return dev;
-}
-
-struct fd_device * fd_device_ref(struct fd_device *dev)
-{
-	p_atomic_inc(&dev->refcnt);
-	return dev;
-}
-
-static void fd_device_del_impl(struct fd_device *dev)
-{
-	int close_fd = dev->closefd ? dev->fd : -1;
-	fd_bo_cache_cleanup(&dev->bo_cache, 0);
-	_mesa_hash_table_destroy(dev->handle_table, NULL);
-	_mesa_hash_table_destroy(dev->name_table, NULL);
-	dev->funcs->destroy(dev);
-	if (close_fd >= 0)
-		close(close_fd);
-}
-
-void fd_device_del_locked(struct fd_device *dev)
-{
-	if (!atomic_dec_and_test(&dev->refcnt))
-		return;
-	fd_device_del_impl(dev);
-}
-
-void fd_device_del(struct fd_device *dev)
-{
-	if (!atomic_dec_and_test(&dev->refcnt))
-		return;
-	pthread_mutex_lock(&table_lock);
-	fd_device_del_impl(dev);
-	pthread_mutex_unlock(&table_lock);
-}
-
-int fd_device_fd(struct fd_device *dev)
-{
-	return dev->fd;
-}
-
-enum fd_version fd_device_version(struct fd_device *dev)
-{
-	return dev->version;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_drmif.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_drmif.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_drmif.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_drmif.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef FREEDRENO_DRMIF_H_
-#define FREEDRENO_DRMIF_H_
-
-#include <stdint.h>
-
-struct fd_bo;
-struct fd_pipe;
-struct fd_device;
-
-enum fd_pipe_id {
-	FD_PIPE_3D = 1,
-	FD_PIPE_2D = 2,
-	/* some devices have two 2d blocks.. not really sure how to
-	 * use that yet, so just ignoring the 2nd 2d pipe for now
-	 */
-	FD_PIPE_MAX
-};
-
-enum fd_param_id {
-	FD_DEVICE_ID,
-	FD_GMEM_SIZE,
-	FD_GPU_ID,
-	FD_CHIP_ID,
-	FD_MAX_FREQ,
-	FD_TIMESTAMP,
-	FD_NR_RINGS,      /* # of rings == # of distinct priority levels */
-};
-
-/* bo flags: */
-#define DRM_FREEDRENO_GEM_TYPE_SMI        0x00000001
-#define DRM_FREEDRENO_GEM_TYPE_KMEM       0x00000002
-#define DRM_FREEDRENO_GEM_TYPE_MEM_MASK   0x0000000f
-#define DRM_FREEDRENO_GEM_CACHE_NONE      0x00000000
-#define DRM_FREEDRENO_GEM_CACHE_WCOMBINE  0x00100000
-#define DRM_FREEDRENO_GEM_CACHE_WTHROUGH  0x00200000
-#define DRM_FREEDRENO_GEM_CACHE_WBACK     0x00400000
-#define DRM_FREEDRENO_GEM_CACHE_WBACKWA   0x00800000
-#define DRM_FREEDRENO_GEM_CACHE_MASK      0x00f00000
-#define DRM_FREEDRENO_GEM_GPUREADONLY     0x01000000
-
-/* bo access flags: (keep aligned to MSM_PREP_x) */
-#define DRM_FREEDRENO_PREP_READ           0x01
-#define DRM_FREEDRENO_PREP_WRITE          0x02
-#define DRM_FREEDRENO_PREP_NOSYNC         0x04
-
-/* device functions:
- */
-
-struct fd_device * fd_device_new(int fd);
-struct fd_device * fd_device_new_dup(int fd);
-struct fd_device * fd_device_ref(struct fd_device *dev);
-void fd_device_del(struct fd_device *dev);
-int fd_device_fd(struct fd_device *dev);
-
-enum fd_version {
-	FD_VERSION_MADVISE = 1,            /* kernel supports madvise */
-	FD_VERSION_UNLIMITED_CMDS = 1,     /* submits w/ >4 cmd buffers (growable ringbuffer) */
-	FD_VERSION_FENCE_FD = 2,           /* submit command supports in/out fences */
-	FD_VERSION_SUBMIT_QUEUES = 3,      /* submit queues and multiple priority levels */
-	FD_VERSION_BO_IOVA = 3,            /* supports fd_bo_get/put_iova() */
-};
-enum fd_version fd_device_version(struct fd_device *dev);
-
-/* pipe functions:
- */
-
-struct fd_pipe * fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id);
-struct fd_pipe * fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio);
-struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe);
-void fd_pipe_del(struct fd_pipe *pipe);
-int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param,
-		uint64_t *value);
-int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp);
-/* timeout in nanosec */
-int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp,
-		uint64_t timeout);
-
-
-/* buffer-object functions:
- */
-
-struct fd_bo * fd_bo_new(struct fd_device *dev,
-		uint32_t size, uint32_t flags);
-struct fd_bo *fd_bo_from_handle(struct fd_device *dev,
-		uint32_t handle, uint32_t size);
-struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name);
-struct fd_bo * fd_bo_from_dmabuf(struct fd_device *dev, int fd);
-uint64_t fd_bo_get_iova(struct fd_bo *bo);
-void fd_bo_put_iova(struct fd_bo *bo);
-struct fd_bo * fd_bo_ref(struct fd_bo *bo);
-void fd_bo_del(struct fd_bo *bo);
-int fd_bo_get_name(struct fd_bo *bo, uint32_t *name);
-uint32_t fd_bo_handle(struct fd_bo *bo);
-int fd_bo_dmabuf(struct fd_bo *bo);
-uint32_t fd_bo_size(struct fd_bo *bo);
-void * fd_bo_map(struct fd_bo *bo);
-int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
-void fd_bo_cpu_fini(struct fd_bo *bo);
-
-#endif /* FREEDRENO_DRMIF_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_pipe.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_pipe.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_pipe.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_pipe.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "freedreno_drmif.h"
-#include "freedreno_priv.h"
-
-/**
- * priority of zero is highest priority, and higher numeric values are
- * lower priorities
- */
-struct fd_pipe *
-fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio)
-{
-	struct fd_pipe *pipe;
-	uint64_t val;
-
-	if (id > FD_PIPE_MAX) {
-		ERROR_MSG("invalid pipe id: %d", id);
-		return NULL;
-	}
-
-	if ((prio != 1) && (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)) {
-		ERROR_MSG("invalid priority!");
-		return NULL;
-	}
-
-	pipe = dev->funcs->pipe_new(dev, id, prio);
-	if (!pipe) {
-		ERROR_MSG("allocation failed");
-		return NULL;
-	}
-
-	pipe->dev = dev;
-	pipe->id = id;
-	p_atomic_set(&pipe->refcnt, 1);
-
-	fd_pipe_get_param(pipe, FD_GPU_ID, &val);
-	pipe->gpu_id = val;
-
-	return pipe;
-}
-
-struct fd_pipe *
-fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id)
-{
-	return fd_pipe_new2(dev, id, 1);
-}
-
-struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe)
-{
-	p_atomic_inc(&pipe->refcnt);
-	return pipe;
-}
-
-void fd_pipe_del(struct fd_pipe *pipe)
-{
-	if (!atomic_dec_and_test(&pipe->refcnt))
-		return;
-	pipe->funcs->destroy(pipe);
-}
-
-int fd_pipe_get_param(struct fd_pipe *pipe,
-				 enum fd_param_id param, uint64_t *value)
-{
-	return pipe->funcs->get_param(pipe, param, value);
-}
-
-int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp)
-{
-	return fd_pipe_wait_timeout(pipe, timestamp, ~0);
-}
-
-int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp,
-		uint64_t timeout)
-{
-	return pipe->funcs->wait(pipe, timestamp, timeout);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_priv.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_priv.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_priv.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_priv.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,258 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef FREEDRENO_PRIV_H_
-#define FREEDRENO_PRIV_H_
-
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <pthread.h>
-#include <stdio.h>
-
-#include <xf86drm.h>
-
-#include "util/hash_table.h"
-#include "util/list.h"
-#include "util/u_debug.h"
-#include "util/u_atomic.h"
-#include "util/u_math.h"
-#include "util/u_debug.h"
-
-#include "freedreno_drmif.h"
-#include "freedreno_ringbuffer.h"
-
-#define atomic_dec_and_test(x) (__sync_add_and_fetch (x, -1) == 0)
-
-struct fd_device_funcs {
-	int (*bo_new_handle)(struct fd_device *dev, uint32_t size,
-			uint32_t flags, uint32_t *handle);
-	struct fd_bo * (*bo_from_handle)(struct fd_device *dev,
-			uint32_t size, uint32_t handle);
-	struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id,
-			unsigned prio);
-	void (*destroy)(struct fd_device *dev);
-};
-
-struct fd_bo_bucket {
-	uint32_t size;
-	struct list_head list;
-};
-
-struct fd_bo_cache {
-	struct fd_bo_bucket cache_bucket[14 * 4];
-	int num_buckets;
-	time_t time;
-};
-
-struct fd_device {
-	int fd;
-	enum fd_version version;
-	int32_t refcnt;
-
-	/* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects:
-	 *
-	 *   handle_table: maps handle to fd_bo
-	 *   name_table: maps flink name to fd_bo
-	 *
-	 * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always
-	 * returns a new handle.  So we need to figure out if the bo is already
-	 * open in the process first, before calling gem-open.
-	 */
-	struct hash_table *handle_table, *name_table;
-
-	const struct fd_device_funcs *funcs;
-
-	struct fd_bo_cache bo_cache;
-	struct fd_bo_cache ring_cache;
-
-	int closefd;        /* call close(fd) upon destruction */
-
-	/* just for valgrind: */
-	int bo_size;
-};
-
-void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse);
-void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time);
-struct fd_bo * fd_bo_cache_alloc(struct fd_bo_cache *cache,
-		uint32_t *size, uint32_t flags);
-int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo);
-
-/* for where @table_lock is already held: */
-void fd_device_del_locked(struct fd_device *dev);
-
-struct fd_pipe_funcs {
-	struct fd_ringbuffer * (*ringbuffer_new_object)(struct fd_pipe *pipe, uint32_t size);
-	struct fd_submit * (*submit_new)(struct fd_pipe *pipe);
-	int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value);
-	int (*wait)(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout);
-	void (*destroy)(struct fd_pipe *pipe);
-};
-
-struct fd_pipe {
-	struct fd_device *dev;
-	enum fd_pipe_id id;
-	uint32_t gpu_id;
-	int32_t refcnt;
-	const struct fd_pipe_funcs *funcs;
-};
-
-struct fd_submit_funcs {
-	struct fd_ringbuffer * (*new_ringbuffer)(struct fd_submit *submit,
-			uint32_t size, enum fd_ringbuffer_flags flags);
-	int (*flush)(struct fd_submit *submit, int in_fence_fd,
-			int *out_fence_fd, uint32_t *out_fence);
-	void (*destroy)(struct fd_submit *submit);
-};
-
-struct fd_submit {
-	struct fd_pipe *pipe;
-	const struct fd_submit_funcs *funcs;
-};
-
-struct fd_ringbuffer_funcs {
-	void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
-	void (*emit_reloc)(struct fd_ringbuffer *ring,
-			const struct fd_reloc *reloc);
-	uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
-			struct fd_ringbuffer *target, uint32_t cmd_idx);
-	uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
-	void (*destroy)(struct fd_ringbuffer *ring);
-};
-
-struct fd_bo_funcs {
-	int (*offset)(struct fd_bo *bo, uint64_t *offset);
-	int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op);
-	void (*cpu_fini)(struct fd_bo *bo);
-	int (*madvise)(struct fd_bo *bo, int willneed);
-	uint64_t (*iova)(struct fd_bo *bo);
-	void (*destroy)(struct fd_bo *bo);
-};
-
-struct fd_bo {
-	struct fd_device *dev;
-	uint32_t size;
-	uint32_t handle;
-	uint32_t name;
-	int32_t refcnt;
-	uint64_t iova;
-	void *map;
-	const struct fd_bo_funcs *funcs;
-
-	enum {
-		NO_CACHE = 0,
-		BO_CACHE = 1,
-		RING_CACHE = 2,
-	} bo_reuse;
-
-	struct list_head list;   /* bucket-list entry */
-	time_t free_time;        /* time when added to bucket-list */
-};
-
-struct fd_bo *fd_bo_new_ring(struct fd_device *dev,
-		uint32_t size, uint32_t flags);
-
-#define enable_debug 0  /* TODO make dynamic */
-
-#define INFO_MSG(fmt, ...) \
-		do { debug_printf("[I] "fmt " (%s:%d)\n", \
-				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
-#define DEBUG_MSG(fmt, ...) \
-		do if (enable_debug) { debug_printf("[D] "fmt " (%s:%d)\n", \
-				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
-#define WARN_MSG(fmt, ...) \
-		do { debug_printf("[W] "fmt " (%s:%d)\n", \
-				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
-#define ERROR_MSG(fmt, ...) \
-		do { debug_printf("[E] " fmt " (%s:%d)\n", \
-				##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0)
-
-#define U642VOID(x) ((void *)(unsigned long)(x))
-#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
-
-#if HAVE_VALGRIND
-#  include <memcheck.h>
-
-/*
- * For tracking the backing memory (if valgrind enabled, we force a mmap
- * for the purposes of tracking)
- */
-static inline void VG_BO_ALLOC(struct fd_bo *bo)
-{
-	if (bo && RUNNING_ON_VALGRIND) {
-		VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1);
-	}
-}
-
-static inline void VG_BO_FREE(struct fd_bo *bo)
-{
-	VALGRIND_FREELIKE_BLOCK(bo->map, 0);
-}
-
-/*
- * For tracking bo structs that are in the buffer-cache, so that valgrind
- * doesn't attribute ownership to the first one to allocate the recycled
- * bo.
- *
- * Note that the list_head in fd_bo is used to track the buffers in cache
- * so disable error reporting on the range while they are in cache so
- * valgrind doesn't squawk about list traversal.
- *
- */
-static inline void VG_BO_RELEASE(struct fd_bo *bo)
-{
-	if (RUNNING_ON_VALGRIND) {
-		VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
-		VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size);
-		VALGRIND_FREELIKE_BLOCK(bo->map, 0);
-	}
-}
-static inline void VG_BO_OBTAIN(struct fd_bo *bo)
-{
-	if (RUNNING_ON_VALGRIND) {
-		VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size);
-		VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size);
-		VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1);
-	}
-}
-#else
-static inline void VG_BO_ALLOC(struct fd_bo *bo)   {}
-static inline void VG_BO_FREE(struct fd_bo *bo)    {}
-static inline void VG_BO_RELEASE(struct fd_bo *bo) {}
-static inline void VG_BO_OBTAIN(struct fd_bo *bo)  {}
-#endif
-
-#define FD_DEFINE_CAST(parent, child) \
-static inline struct child * to_ ## child (struct parent *x) \
-{ return (struct child *)x; }
-
-
-#endif /* FREEDRENO_PRIV_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <assert.h>
-
-#include "freedreno_drmif.h"
-#include "freedreno_ringbuffer.h"
-#include "freedreno_priv.h"
-
-struct fd_submit *
-fd_submit_new(struct fd_pipe *pipe)
-{
-	return pipe->funcs->submit_new(pipe);
-}
-
-void
-fd_submit_del(struct fd_submit *submit)
-{
-	return submit->funcs->destroy(submit);
-}
-
-int
-fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd,
-		uint32_t *out_fence)
-{
-	return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence);
-}
-
-struct fd_ringbuffer *
-fd_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size,
-		enum fd_ringbuffer_flags flags)
-{
-	debug_assert(!(flags & _FD_RINGBUFFER_OBJECT));
-	if (flags & FD_RINGBUFFER_STREAMING) {
-		debug_assert(!(flags & FD_RINGBUFFER_GROWABLE));
-		debug_assert(!(flags & FD_RINGBUFFER_PRIMARY));
-	}
-	return submit->funcs->new_ringbuffer(submit, size, flags);
-}
-
-struct fd_ringbuffer *
-fd_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size)
-{
-	return pipe->funcs->ringbuffer_new_object(pipe, size);
-}
-
-void fd_ringbuffer_del(struct fd_ringbuffer *ring)
-{
-	if (!atomic_dec_and_test(&ring->refcnt))
-		return;
-
-	ring->funcs->destroy(ring);
-}
-
-struct fd_ringbuffer *
-fd_ringbuffer_ref(struct fd_ringbuffer *ring)
-{
-	p_atomic_inc(&ring->refcnt);
-	return ring;
-}
-
-void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
-{
-	assert(ring->funcs->grow);     /* unsupported on kgsl */
-
-	/* there is an upper bound on IB size, which appears to be 0x100000 */
-	if (ring->size < 0x100000)
-		ring->size *= 2;
-
-	ring->funcs->grow(ring, ring->size);
-}
-
-void fd_ringbuffer_reloc(struct fd_ringbuffer *ring,
-				     const struct fd_reloc *reloc)
-{
-	ring->funcs->emit_reloc(ring, reloc);
-}
-
-uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
-{
-	if (!ring->funcs->cmd_count)
-		return 1;
-	return ring->funcs->cmd_count(ring);
-}
-
-uint32_t
-fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
-		struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
-	return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef FREEDRENO_RINGBUFFER_H_
-#define FREEDRENO_RINGBUFFER_H_
-
-#include "util/u_debug.h"
-
-#include "freedreno_drmif.h"
-
-struct fd_submit;
-struct fd_ringbuffer;
-
-enum fd_ringbuffer_flags {
-
-	/* Primary ringbuffer for a submit, ie. an IB1 level rb
-	 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
-	 * packets.
-	 */
-	FD_RINGBUFFER_PRIMARY = 0x1,
-
-	/* Hint that the stateobj will be used for streaming state
-	 * that is used once or a few times and then discarded.
-	 *
-	 * For sub-allocation, non streaming stateobj's should be
-	 * sub-allocated from a page size buffer, so one long lived
-	 * state obj doesn't prevent other pages from being freed.
-	 * (Ie. it would be no worse than allocating a page sized
-	 * bo for each small non-streaming stateobj).
-	 *
-	 * But streaming stateobj's could be sub-allocated from a
-	 * larger buffer to reduce the alloc/del overhead.
-	 */
-	FD_RINGBUFFER_STREAMING = 0x2,
-
-	/* Indicates that "growable" cmdstream can be used,
-	 * consisting of multiple physical cmdstream buffers
-	 */
-	FD_RINGBUFFER_GROWABLE = 0x4,
-
-	/* Internal use only: */
-	_FD_RINGBUFFER_OBJECT = 0x8,
-};
-
-/* A submit object manages/tracks all the state buildup for a "submit"
- * ioctl to the kernel.  Additionally, with the exception of long-lived
- * non-STREAMING stateobj rb's, rb's are allocated from the submit.
- */
-struct fd_submit * fd_submit_new(struct fd_pipe *pipe);
-
-/* NOTE: all ringbuffer's create from the submit should be unref'd
- * before destroying the submit.
- */
-void fd_submit_del(struct fd_submit *submit);
-
-/* Allocate a new rb from the submit. */
-struct fd_ringbuffer * fd_submit_new_ringbuffer(struct fd_submit *submit,
-		uint32_t size, enum fd_ringbuffer_flags flags);
-
-/* in_fence_fd: -1 for no in-fence, else fence fd
- * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence
- */
-int fd_submit_flush(struct fd_submit *submit,
-		int in_fence_fd, int *out_fence_fd,
-		uint32_t *out_fence);
-
-struct fd_ringbuffer_funcs;
-
-/* the ringbuffer object is not opaque so that OUT_RING() type stuff
- * can be inlined.  Note that users should not make assumptions about
- * the size of this struct.
- */
-struct fd_ringbuffer {
-	uint32_t *cur, *end, *start;
-	const struct fd_ringbuffer_funcs *funcs;
-
-// size or end coudl probably go away
-	int size;
-	int32_t refcnt;
-	enum fd_ringbuffer_flags flags;
-};
-
-/* Allocate a new long-lived state object, not associated with
- * a submit:
- */
-struct fd_ringbuffer * fd_ringbuffer_new_object(struct fd_pipe *pipe,
-		uint32_t size);
-
-struct fd_ringbuffer *fd_ringbuffer_ref(struct fd_ringbuffer *ring);
-void fd_ringbuffer_del(struct fd_ringbuffer *ring);
-
-void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords);
-
-static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring,
-		uint32_t data)
-{
-	(*ring->cur++) = data;
-}
-
-struct fd_reloc {
-	struct fd_bo *bo;
-#define FD_RELOC_READ             0x0001
-#define FD_RELOC_WRITE            0x0002
-	uint32_t flags;
-	uint32_t offset;
-	uint32_t or;
-	int32_t  shift;
-	uint32_t orhi;      /* used for a5xx+ */
-};
-
-/* NOTE: relocs are 2 dwords on a5xx+ */
-
-void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
-uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring);
-uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
-		struct fd_ringbuffer *target, uint32_t cmd_idx);
-
-static inline uint32_t
-offset_bytes(void *end, void *start)
-{
-	return ((char *)end) - ((char *)start);
-}
-
-static inline uint32_t
-fd_ringbuffer_size(struct fd_ringbuffer *ring)
-{
-	/* only really needed for stateobj ringbuffers, and won't really
-	 * do what you expect for growable rb's.. so lets just restrict
-	 * this to stateobj's for now:
-	 */
-	debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
-	return offset_bytes(ring->cur, ring->start);
-}
-
-
-#endif /* FREEDRENO_RINGBUFFER_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_bo.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_bo.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_bo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_bo.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,170 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "msm_priv.h"
-
-static int bo_allocate(struct msm_bo *msm_bo)
-{
-	struct fd_bo *bo = &msm_bo->base;
-	if (!msm_bo->offset) {
-		struct drm_msm_gem_info req = {
-				.handle = bo->handle,
-		};
-		int ret;
-
-		/* if the buffer is already backed by pages then this
-		 * doesn't actually do anything (other than giving us
-		 * the offset)
-		 */
-		ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO,
-				&req, sizeof(req));
-		if (ret) {
-			ERROR_MSG("alloc failed: %s", strerror(errno));
-			return ret;
-		}
-
-		msm_bo->offset = req.offset;
-	}
-
-	return 0;
-}
-
-static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset)
-{
-	struct msm_bo *msm_bo = to_msm_bo(bo);
-	int ret = bo_allocate(msm_bo);
-	if (ret)
-		return ret;
-	*offset = msm_bo->offset;
-	return 0;
-}
-
-static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
-{
-	struct drm_msm_gem_cpu_prep req = {
-			.handle = bo->handle,
-			.op = op,
-	};
-
-	get_abs_timeout(&req.timeout, 5000000000);
-
-	return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req));
-}
-
-static void msm_bo_cpu_fini(struct fd_bo *bo)
-{
-	struct drm_msm_gem_cpu_fini req = {
-			.handle = bo->handle,
-	};
-
-	drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req));
-}
-
-static int msm_bo_madvise(struct fd_bo *bo, int willneed)
-{
-	struct drm_msm_gem_madvise req = {
-			.handle = bo->handle,
-			.madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED,
-	};
-	int ret;
-
-	/* older kernels do not support this: */
-	if (bo->dev->version < FD_VERSION_MADVISE)
-		return willneed;
-
-	ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req));
-	if (ret)
-		return ret;
-
-	return req.retained;
-}
-
-static uint64_t msm_bo_iova(struct fd_bo *bo)
-{
-	struct drm_msm_gem_info req = {
-			.handle = bo->handle,
-			.flags = MSM_INFO_IOVA,
-	};
-	int ret;
-
-	ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
-	debug_assert(ret == 0);
-
-	return req.offset;
-}
-
-static void msm_bo_destroy(struct fd_bo *bo)
-{
-	struct msm_bo *msm_bo = to_msm_bo(bo);
-	free(msm_bo);
-
-}
-
-static const struct fd_bo_funcs funcs = {
-		.offset = msm_bo_offset,
-		.cpu_prep = msm_bo_cpu_prep,
-		.cpu_fini = msm_bo_cpu_fini,
-		.madvise = msm_bo_madvise,
-		.iova = msm_bo_iova,
-		.destroy = msm_bo_destroy,
-};
-
-/* allocate a buffer handle: */
-int msm_bo_new_handle(struct fd_device *dev,
-		uint32_t size, uint32_t flags, uint32_t *handle)
-{
-	struct drm_msm_gem_new req = {
-			.size = size,
-			.flags = MSM_BO_WC,  // TODO figure out proper flags..
-	};
-	int ret;
-
-	ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW,
-			&req, sizeof(req));
-	if (ret)
-		return ret;
-
-	*handle = req.handle;
-
-	return 0;
-}
-
-/* allocate a new buffer object */
-struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
-		uint32_t size, uint32_t handle)
-{
-	struct msm_bo *msm_bo;
-	struct fd_bo *bo;
-
-	msm_bo = calloc(1, sizeof(*msm_bo));
-	if (!msm_bo)
-		return NULL;
-
-	bo = &msm_bo->base;
-	bo->funcs = &funcs;
-
-	return bo;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_device.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_device.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_device.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_device.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "msm_priv.h"
-
-static void msm_device_destroy(struct fd_device *dev)
-{
-	struct msm_device *msm_dev = to_msm_device(dev);
-	free(msm_dev);
-}
-
-static const struct fd_device_funcs funcs = {
-		.bo_new_handle = msm_bo_new_handle,
-		.bo_from_handle = msm_bo_from_handle,
-		.pipe_new = msm_pipe_new,
-		.destroy = msm_device_destroy,
-};
-
-struct fd_device * msm_device_new(int fd)
-{
-	struct msm_device *msm_dev;
-	struct fd_device *dev;
-
-	msm_dev = calloc(1, sizeof(*msm_dev));
-	if (!msm_dev)
-		return NULL;
-
-	dev = &msm_dev->base;
-	dev->funcs = &funcs;
-
-	dev->bo_size = sizeof(struct msm_bo);
-
-	return dev;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_drm.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_drm.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_drm.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_drm.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,308 +0,0 @@
-/*
- * Copyright (C) 2013 Red Hat
- * Author: Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __MSM_DRM_H__
-#define __MSM_DRM_H__
-
-#include "drm.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/* Please note that modifications to all structs defined here are
- * subject to backwards-compatibility constraints:
- *  1) Do not use pointers, use __u64 instead for 32 bit / 64 bit
- *     user/kernel compatibility
- *  2) Keep fields aligned to their size
- *  3) Because of how drm_ioctl() works, we can add new fields at
- *     the end of an ioctl if some care is taken: drm_ioctl() will
- *     zero out the new fields at the tail of the ioctl, so a zero
- *     value should have a backwards compatible meaning.  And for
- *     output params, userspace won't see the newly added output
- *     fields.. so that has to be somehow ok.
- */
-
-#define MSM_PIPE_NONE        0x00
-#define MSM_PIPE_2D0         0x01
-#define MSM_PIPE_2D1         0x02
-#define MSM_PIPE_3D0         0x10
-
-/* The pipe-id just uses the lower bits, so can be OR'd with flags in
- * the upper 16 bits (which could be extended further, if needed, maybe
- * we extend/overload the pipe-id some day to deal with multiple rings,
- * but even then I don't think we need the full lower 16 bits).
- */
-#define MSM_PIPE_ID_MASK     0xffff
-#define MSM_PIPE_ID(x)       ((x) & MSM_PIPE_ID_MASK)
-#define MSM_PIPE_FLAGS(x)    ((x) & ~MSM_PIPE_ID_MASK)
-
-/* timeouts are specified in clock-monotonic absolute times (to simplify
- * restarting interrupted ioctls).  The following struct is logically the
- * same as 'struct timespec' but 32/64b ABI safe.
- */
-struct drm_msm_timespec {
-	__s64 tv_sec;          /* seconds */
-	__s64 tv_nsec;         /* nanoseconds */
-};
-
-#define MSM_PARAM_GPU_ID     0x01
-#define MSM_PARAM_GMEM_SIZE  0x02
-#define MSM_PARAM_CHIP_ID    0x03
-#define MSM_PARAM_MAX_FREQ   0x04
-#define MSM_PARAM_TIMESTAMP  0x05
-#define MSM_PARAM_GMEM_BASE  0x06
-#define MSM_PARAM_NR_RINGS   0x07
-
-struct drm_msm_param {
-	__u32 pipe;           /* in, MSM_PIPE_x */
-	__u32 param;          /* in, MSM_PARAM_x */
-	__u64 value;          /* out (get_param) or in (set_param) */
-};
-
-/*
- * GEM buffers:
- */
-
-#define MSM_BO_SCANOUT       0x00000001     /* scanout capable */
-#define MSM_BO_GPU_READONLY  0x00000002
-#define MSM_BO_CACHE_MASK    0x000f0000
-/* cache modes */
-#define MSM_BO_CACHED        0x00010000
-#define MSM_BO_WC            0x00020000
-#define MSM_BO_UNCACHED      0x00040000
-
-#define MSM_BO_FLAGS         (MSM_BO_SCANOUT | \
-                              MSM_BO_GPU_READONLY | \
-                              MSM_BO_CACHED | \
-                              MSM_BO_WC | \
-                              MSM_BO_UNCACHED)
-
-struct drm_msm_gem_new {
-	__u64 size;           /* in */
-	__u32 flags;          /* in, mask of MSM_BO_x */
-	__u32 handle;         /* out */
-};
-
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
-
-struct drm_msm_gem_info {
-	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
-};
-
-#define MSM_PREP_READ        0x01
-#define MSM_PREP_WRITE       0x02
-#define MSM_PREP_NOSYNC      0x04
-
-#define MSM_PREP_FLAGS       (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC)
-
-struct drm_msm_gem_cpu_prep {
-	__u32 handle;         /* in */
-	__u32 op;             /* in, mask of MSM_PREP_x */
-	struct drm_msm_timespec timeout;   /* in */
-};
-
-struct drm_msm_gem_cpu_fini {
-	__u32 handle;         /* in */
-};
-
-/*
- * Cmdstream Submission:
- */
-
-/* The value written into the cmdstream is logically:
- *
- *   ((relocbuf->gpuaddr + reloc_offset) << shift) | or
- *
- * When we have GPU's w/ >32bit ptrs, it should be possible to deal
- * with this by emit'ing two reloc entries with appropriate shift
- * values.  Or a new MSM_SUBMIT_CMD_x type would also be an option.
- *
- * NOTE that reloc's must be sorted by order of increasing submit_offset,
- * otherwise EINVAL.
- */
-struct drm_msm_gem_submit_reloc {
-	__u32 submit_offset;  /* in, offset from submit_bo */
-	__u32 or;             /* in, value OR'd with result */
-	__s32 shift;          /* in, amount of left shift (can be negative) */
-	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
-	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
-};
-
-/* submit-types:
- *   BUF - this cmd buffer is executed normally.
- *   IB_TARGET_BUF - this cmd buffer is an IB target.  Reloc's are
- *      processed normally, but the kernel does not setup an IB to
- *      this buffer in the first-level ringbuffer
- *   CTX_RESTORE_BUF - only executed if there has been a GPU context
- *      switch since the last SUBMIT ioctl
- */
-#define MSM_SUBMIT_CMD_BUF             0x0001
-#define MSM_SUBMIT_CMD_IB_TARGET_BUF   0x0002
-#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003
-struct drm_msm_gem_submit_cmd {
-	__u32 type;           /* in, one of MSM_SUBMIT_CMD_x */
-	__u32 submit_idx;     /* in, index of submit_bo cmdstream buffer */
-	__u32 submit_offset;  /* in, offset into submit_bo */
-	__u32 size;           /* in, cmdstream size */
-	__u32 pad;
-	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 relocs;         /* in, ptr to array of submit_reloc's */
-};
-
-/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
- * cmdstream buffer(s) themselves or reloc entries) has one (and only
- * one) entry in the submit->bos[] table.
- *
- * As a optimization, the current buffer (gpu virtual address) can be
- * passed back through the 'presumed' field.  If on a subsequent reloc,
- * userspace passes back a 'presumed' address that is still valid,
- * then patching the cmdstream for this entry is skipped.  This can
- * avoid kernel needing to map/access the cmdstream bo in the common
- * case.
- */
-#define MSM_SUBMIT_BO_READ             0x0001
-#define MSM_SUBMIT_BO_WRITE            0x0002
-
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
-
-struct drm_msm_gem_submit_bo {
-	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
-	__u32 handle;         /* in, GEM handle */
-	__u64 presumed;       /* in/out, presumed buffer address */
-};
-
-/* Valid submit ioctl flags: */
-#define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
-#define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
-#define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
-#define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
-#define MSM_SUBMIT_FLAGS                ( \
-		MSM_SUBMIT_NO_IMPLICIT   | \
-		MSM_SUBMIT_FENCE_FD_IN   | \
-		MSM_SUBMIT_FENCE_FD_OUT  | \
-		MSM_SUBMIT_SUDO          | \
-		0)
-
-/* Each cmdstream submit consists of a table of buffers involved, and
- * one or more cmdstream buffers.  This allows for conditional execution
- * (context-restore), and IB buffers needed for per tile/bin draw cmds.
- */
-struct drm_msm_gem_submit {
-	__u32 flags;          /* MSM_PIPE_x | MSM_SUBMIT_x */
-	__u32 fence;          /* out */
-	__u32 nr_bos;         /* in, number of submit_bo's */
-	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 bos;            /* in, ptr to array of submit_bo's */
-	__u64 cmds;           /* in, ptr to array of submit_cmd's */
-	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
-	__u32 queueid;         /* in, submitqueue id */
-};
-
-/* The normal way to synchronize with the GPU is just to CPU_PREP on
- * a buffer if you need to access it from the CPU (other cmdstream
- * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
- * handle the required synchronization under the hood).  This ioctl
- * mainly just exists as a way to implement the gallium pipe_fence
- * APIs without requiring a dummy bo to synchronize on.
- */
-struct drm_msm_wait_fence {
-	__u32 fence;          /* in */
-	__u32 pad;
-	struct drm_msm_timespec timeout;   /* in */
-	__u32 queueid;         /* in, submitqueue id */
-};
-
-/* madvise provides a way to tell the kernel in case a buffers contents
- * can be discarded under memory pressure, which is useful for userspace
- * bo cache where we want to optimistically hold on to buffer allocate
- * and potential mmap, but allow the pages to be discarded under memory
- * pressure.
- *
- * Typical usage would involve madvise(DONTNEED) when buffer enters BO
- * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache.
- * In the WILLNEED case, 'retained' indicates to userspace whether the
- * backing pages still exist.
- */
-#define MSM_MADV_WILLNEED 0       /* backing pages are needed, status returned in 'retained' */
-#define MSM_MADV_DONTNEED 1       /* backing pages not needed */
-#define __MSM_MADV_PURGED 2       /* internal state */
-
-struct drm_msm_gem_madvise {
-	__u32 handle;         /* in, GEM handle */
-	__u32 madv;           /* in, MSM_MADV_x */
-	__u32 retained;       /* out, whether backing store still exists */
-};
-
-/*
- * Draw queues allow the user to set specific submission parameter. Command
- * submissions specify a specific submitqueue to use.  ID 0 is reserved for
- * backwards compatibility as a "default" submitqueue
- */
-
-#define MSM_SUBMITQUEUE_FLAGS (0)
-
-struct drm_msm_submitqueue {
-	__u32 flags;   /* in, MSM_SUBMITQUEUE_x */
-	__u32 prio;    /* in, Priority level */
-	__u32 id;      /* out, identifier */
-};
-
-#define DRM_MSM_GET_PARAM              0x00
-/* placeholder:
-#define DRM_MSM_SET_PARAM              0x01
- */
-#define DRM_MSM_GEM_NEW                0x02
-#define DRM_MSM_GEM_INFO               0x03
-#define DRM_MSM_GEM_CPU_PREP           0x04
-#define DRM_MSM_GEM_CPU_FINI           0x05
-#define DRM_MSM_GEM_SUBMIT             0x06
-#define DRM_MSM_WAIT_FENCE             0x07
-#define DRM_MSM_GEM_MADVISE            0x08
-/* placeholder:
-#define DRM_MSM_GEM_SVM_NEW            0x09
- */
-#define DRM_MSM_SUBMITQUEUE_NEW        0x0A
-#define DRM_MSM_SUBMITQUEUE_CLOSE      0x0B
-
-#define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
-#define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
-#define DRM_IOCTL_MSM_GEM_INFO         DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info)
-#define DRM_IOCTL_MSM_GEM_CPU_PREP     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep)
-#define DRM_IOCTL_MSM_GEM_CPU_FINI     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini)
-#define DRM_IOCTL_MSM_GEM_SUBMIT       DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit)
-#define DRM_IOCTL_MSM_WAIT_FENCE       DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence)
-#define DRM_IOCTL_MSM_GEM_MADVISE      DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
-#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW    DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
-#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* __MSM_DRM_H__ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_pipe.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_pipe.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_pipe.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_pipe.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/slab.h"
-
-#include "freedreno_util.h"
-#include "msm_priv.h"
-
-static int query_param(struct fd_pipe *pipe, uint32_t param,
-		uint64_t *value)
-{
-	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
-	struct drm_msm_param req = {
-			.pipe = msm_pipe->pipe,
-			.param = param,
-	};
-	int ret;
-
-	ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM,
-			&req, sizeof(req));
-	if (ret)
-		return ret;
-
-	*value = req.value;
-
-	return 0;
-}
-
-static int msm_pipe_get_param(struct fd_pipe *pipe,
-		enum fd_param_id param, uint64_t *value)
-{
-	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
-	switch(param) {
-	case FD_DEVICE_ID: // XXX probably get rid of this..
-	case FD_GPU_ID:
-		*value = msm_pipe->gpu_id;
-		return 0;
-	case FD_GMEM_SIZE:
-		*value = msm_pipe->gmem;
-		return 0;
-	case FD_CHIP_ID:
-		*value = msm_pipe->chip_id;
-		return 0;
-	case FD_MAX_FREQ:
-		return query_param(pipe, MSM_PARAM_MAX_FREQ, value);
-	case FD_TIMESTAMP:
-		return query_param(pipe, MSM_PARAM_TIMESTAMP, value);
-	case FD_NR_RINGS:
-		return query_param(pipe, MSM_PARAM_NR_RINGS, value);
-	default:
-		ERROR_MSG("invalid param id: %d", param);
-		return -1;
-	}
-}
-
-static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp,
-		uint64_t timeout)
-{
-	struct fd_device *dev = pipe->dev;
-	struct drm_msm_wait_fence req = {
-			.fence = timestamp,
-			.queueid = to_msm_pipe(pipe)->queue_id,
-	};
-	int ret;
-
-	get_abs_timeout(&req.timeout, timeout);
-
-	ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
-	if (ret) {
-		ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno));
-		return ret;
-	}
-
-	return 0;
-}
-
-static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio)
-{
-	struct drm_msm_submitqueue req = {
-		.flags = 0,
-		.prio = prio,
-	};
-	uint64_t nr_rings = 1;
-	int ret;
-
-	if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) {
-		to_msm_pipe(pipe)->queue_id = 0;
-		return 0;
-	}
-
-	msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings);
-
-	req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1);
-
-	ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW,
-			&req, sizeof(req));
-	if (ret) {
-		ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno));
-		return ret;
-	}
-
-	to_msm_pipe(pipe)->queue_id = req.id;
-	return 0;
-}
-
-static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id)
-{
-	if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES)
-		return;
-
-	drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
-			&queue_id, sizeof(queue_id));
-}
-
-static void msm_pipe_destroy(struct fd_pipe *pipe)
-{
-	struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
-	close_submitqueue(pipe, msm_pipe->queue_id);
-	free(msm_pipe);
-}
-
-static const struct fd_pipe_funcs sp_funcs = {
-		.ringbuffer_new_object = msm_ringbuffer_sp_new_object,
-		.submit_new = msm_submit_sp_new,
-		.get_param = msm_pipe_get_param,
-		.wait = msm_pipe_wait,
-		.destroy = msm_pipe_destroy,
-};
-
-static const struct fd_pipe_funcs legacy_funcs = {
-		.ringbuffer_new_object = msm_ringbuffer_new_object,
-		.submit_new = msm_submit_new,
-		.get_param = msm_pipe_get_param,
-		.wait = msm_pipe_wait,
-		.destroy = msm_pipe_destroy,
-};
-
-static uint64_t get_param(struct fd_pipe *pipe, uint32_t param)
-{
-	uint64_t value;
-	int ret = query_param(pipe, param, &value);
-	if (ret) {
-		ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno));
-		return 0;
-	}
-	return value;
-}
-
-struct fd_pipe * msm_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id, uint32_t prio)
-{
-	static const uint32_t pipe_id[] = {
-			[FD_PIPE_3D] = MSM_PIPE_3D0,
-			[FD_PIPE_2D] = MSM_PIPE_2D0,
-	};
-	struct msm_pipe *msm_pipe = NULL;
-	struct fd_pipe *pipe = NULL;
-
-	msm_pipe = calloc(1, sizeof(*msm_pipe));
-	if (!msm_pipe) {
-		ERROR_MSG("allocation failed");
-		goto fail;
-	}
-
-	pipe = &msm_pipe->base;
-
-	// TODO once kernel changes are in place, this switch will be
-	// based on kernel version:
-	if (fd_mesa_debug & FD_DBG_SOFTPIN) {
-		pipe->funcs = &sp_funcs;
-	} else {
-		pipe->funcs = &legacy_funcs;
-	}
-
-	/* initialize before get_param(): */
-	pipe->dev = dev;
-	msm_pipe->pipe = pipe_id[id];
-
-	/* these params should be supported since the first version of drm/msm: */
-	msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID);
-	msm_pipe->gmem   = get_param(pipe, MSM_PARAM_GMEM_SIZE);
-	msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID);
-
-	if (! msm_pipe->gpu_id)
-		goto fail;
-
-	INFO_MSG("Pipe Info:");
-	INFO_MSG(" GPU-id:          %d", msm_pipe->gpu_id);
-	INFO_MSG(" Chip-id:         0x%08x", msm_pipe->chip_id);
-	INFO_MSG(" GMEM size:       0x%08x", msm_pipe->gmem);
-
-	if (open_submitqueue(pipe, prio))
-		goto fail;
-
-	return pipe;
-fail:
-	if (pipe)
-		fd_pipe_del(pipe);
-	return NULL;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_priv.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_priv.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_priv.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_priv.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef MSM_PRIV_H_
-#define MSM_PRIV_H_
-
-#include "freedreno_priv.h"
-
-#ifndef __user
-#  define __user
-#endif
-
-#include "msm_drm.h"
-
-struct msm_device {
-	struct fd_device base;
-	struct fd_bo_cache ring_cache;
-};
-FD_DEFINE_CAST(fd_device, msm_device);
-
-struct fd_device * msm_device_new(int fd);
-
-struct msm_pipe {
-	struct fd_pipe base;
-	uint32_t pipe;
-	uint32_t gpu_id;
-	uint32_t gmem;
-	uint32_t chip_id;
-	uint32_t queue_id;
-};
-FD_DEFINE_CAST(fd_pipe, msm_pipe);
-
-struct fd_pipe * msm_pipe_new(struct fd_device *dev,
-		enum fd_pipe_id id, uint32_t prio);
-
-struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size);
-struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size);
-
-struct fd_submit * msm_submit_new(struct fd_pipe *pipe);
-struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe);
-
-struct msm_bo {
-	struct fd_bo base;
-	uint64_t offset;
-	/* to avoid excess hashtable lookups, cache the ring this bo was
-	 * last emitted on (since that will probably also be the next ring
-	 * it is emitted on)
-	 */
-	unsigned current_submit_seqno;
-	uint32_t idx;
-};
-FD_DEFINE_CAST(fd_bo, msm_bo);
-
-int msm_bo_new_handle(struct fd_device *dev,
-		uint32_t size, uint32_t flags, uint32_t *handle);
-struct fd_bo * msm_bo_from_handle(struct fd_device *dev,
-		uint32_t size, uint32_t handle);
-
-static inline void
-msm_dump_submit(struct drm_msm_gem_submit *req)
-{
-	for (unsigned i = 0; i < req->nr_bos; i++) {
-		struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos);
-		struct drm_msm_gem_submit_bo *bo = &bos[i];
-		ERROR_MSG("  bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags);
-	}
-	for (unsigned i = 0; i < req->nr_cmds; i++) {
-		struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds);
-		struct drm_msm_gem_submit_cmd *cmd = &cmds[i];
-		struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs);
-		ERROR_MSG("  cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u",
-				i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size);
-		for (unsigned j = 0; j < cmd->nr_relocs; j++) {
-			struct drm_msm_gem_submit_reloc *r = &relocs[j];
-			ERROR_MSG("    reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u"
-					", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift,
-					r->reloc_idx, r->reloc_offset);
-		}
-	}
-}
-
-static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
-{
-	struct timespec t;
-	uint32_t s = ns / 1000000000;
-	clock_gettime(CLOCK_MONOTONIC, &t);
-	tv->tv_sec = t.tv_sec + s;
-	tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000);
-}
-
-/*
- * Stupid/simple growable array implementation:
- */
-
-static inline void *
-grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz)
-{
-	if ((nr + 1) > *max) {
-		if ((*max * 2) < (nr + 1))
-			*max = nr + 5;
-		else
-			*max = *max * 2;
-		ptr = realloc(ptr, *max * sz);
-	}
-	return ptr;
-}
-
-#define DECLARE_ARRAY(type, name) \
-	unsigned short nr_ ## name, max_ ## name; \
-	type * name;
-
-#define APPEND(x, name) ({ \
-	(x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \
-	(x)->nr_ ## name ++; \
-})
-
-#endif /* MSM_PRIV_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,724 +0,0 @@
-/*
- * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <assert.h>
-#include <inttypes.h>
-
-#include "util/hash_table.h"
-#include "util/set.h"
-#include "util/slab.h"
-
-#include "drm/freedreno_ringbuffer.h"
-#include "msm_priv.h"
-
-/* The legacy implementation of submit/ringbuffer, which still does the
- * traditional reloc and cmd tracking
- */
-
-
-#define INIT_SIZE 0x1000
-
-static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
-
-
-struct msm_submit {
-	struct fd_submit base;
-
-	DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
-	DECLARE_ARRAY(struct fd_bo *, bos);
-
-	unsigned seqno;
-
-	/* maps fd_bo to idx in bos table: */
-	struct hash_table *bo_table;
-
-	struct slab_mempool ring_pool;
-
-	/* hash-set of associated rings: */
-	struct set *ring_set;
-
-	struct fd_ringbuffer *primary;
-
-	/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
-	 * the same underlying bo)..
-	 *
-	 * We also rely on previous stateobj having been fully constructed
-	 * so we can reclaim extra space at it's end.
-	 */
-	struct fd_ringbuffer *suballoc_ring;
-};
-FD_DEFINE_CAST(fd_submit, msm_submit);
-
-/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
- * and sizes.  Ie. a finalized buffer can have no more commands appended to
- * it.
- */
-struct msm_cmd {
-	struct fd_bo *ring_bo;
-	unsigned size;
-	DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs);
-};
-
-static struct msm_cmd *
-cmd_new(struct fd_bo *ring_bo)
-{
-	struct msm_cmd *cmd = malloc(sizeof(*cmd));
-	cmd->ring_bo = fd_bo_ref(ring_bo);
-	cmd->size = 0;
-	cmd->nr_relocs = cmd->max_relocs = 0;
-	cmd->relocs = NULL;
-	return cmd;
-}
-
-static void
-cmd_free(struct msm_cmd *cmd)
-{
-	fd_bo_del(cmd->ring_bo);
-	free(cmd->relocs);
-	free(cmd);
-}
-
-/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
- * later copy into the submit when the stateobj rb is later referenced by
- * a regular rb:
- */
-struct msm_reloc_bo {
-	struct fd_bo *bo;
-	unsigned flags;
-};
-
-struct msm_ringbuffer {
-	struct fd_ringbuffer base;
-
-	/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
-	unsigned offset;
-
-	union {
-		/* for _FD_RINGBUFFER_OBJECT case: */
-		struct {
-			struct fd_pipe *pipe;
-			DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos);
-			struct set *ring_set;
-		};
-		/* for other cases: */
-		struct {
-			struct fd_submit *submit;
-			DECLARE_ARRAY(struct msm_cmd *, cmds);
-		};
-	} u;
-
-	struct msm_cmd *cmd;          /* current cmd */
-	struct fd_bo *ring_bo;
-};
-FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer);
-
-static void finalize_current_cmd(struct fd_ringbuffer *ring);
-static struct fd_ringbuffer * msm_ringbuffer_init(
-		struct msm_ringbuffer *msm_ring,
-		uint32_t size, enum fd_ringbuffer_flags flags);
-
-/* add (if needed) bo to submit and return index: */
-static uint32_t
-append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags)
-{
-	struct msm_bo *msm_bo = to_msm_bo(bo);
-	uint32_t idx;
-	pthread_mutex_lock(&idx_lock);
-	if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
-		idx = msm_bo->idx;
-	} else {
-		uint32_t hash = _mesa_hash_pointer(bo);
-		struct hash_entry *entry;
-
-		entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
-		if (entry) {
-			/* found */
-			idx = (uint32_t)(uintptr_t)entry->data;
-		} else {
-			idx = APPEND(submit, submit_bos);
-			idx = APPEND(submit, bos);
-
-			submit->submit_bos[idx].flags = 0;
-			submit->submit_bos[idx].handle = bo->handle;
-			submit->submit_bos[idx].presumed = 0;
-
-			submit->bos[idx] = fd_bo_ref(bo);
-
-			_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
-					(void *)(uintptr_t)idx);
-		}
-		msm_bo->current_submit_seqno = submit->seqno;
-		msm_bo->idx = idx;
-	}
-	pthread_mutex_unlock(&idx_lock);
-	if (flags & FD_RELOC_READ)
-		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
-	if (flags & FD_RELOC_WRITE)
-		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
-	return idx;
-}
-
-static void
-append_ring(struct set *set, struct fd_ringbuffer *ring)
-{
-	uint32_t hash = _mesa_hash_pointer(ring);
-
-	if (!_mesa_set_search_pre_hashed(set, hash, ring)) {
-		fd_ringbuffer_ref(ring);
-		_mesa_set_add_pre_hashed(set, hash, ring);
-	}
-}
-
-static void
-msm_submit_suballoc_ring_bo(struct fd_submit *submit,
-		struct msm_ringbuffer *msm_ring, uint32_t size)
-{
-	struct msm_submit *msm_submit = to_msm_submit(submit);
-	unsigned suballoc_offset = 0;
-	struct fd_bo *suballoc_bo = NULL;
-
-	if (msm_submit->suballoc_ring) {
-		struct msm_ringbuffer *suballoc_ring =
-				to_msm_ringbuffer(msm_submit->suballoc_ring);
-
-		suballoc_bo = suballoc_ring->ring_bo;
-		suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
-				suballoc_ring->offset;
-
-		suballoc_offset = align(suballoc_offset, 0x10);
-
-		if ((size + suballoc_offset) > suballoc_bo->size) {
-			suballoc_bo = NULL;
-		}
-	}
-
-	if (!suballoc_bo) {
-		// TODO possibly larger size for streaming bo?
-		msm_ring->ring_bo = fd_bo_new_ring(
-				submit->pipe->dev, 0x8000, 0);
-		msm_ring->offset = 0;
-	} else {
-		msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
-		msm_ring->offset = suballoc_offset;
-	}
-
-	struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
-
-	msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
-
-	if (old_suballoc_ring)
-		fd_ringbuffer_del(old_suballoc_ring);
-}
-
-static struct fd_ringbuffer *
-msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size,
-		enum fd_ringbuffer_flags flags)
-{
-	struct msm_submit *msm_submit = to_msm_submit(submit);
-	struct msm_ringbuffer *msm_ring;
-
-	msm_ring = slab_alloc_st(&msm_submit->ring_pool);
-
-	msm_ring->u.submit = submit;
-
-	/* NOTE: needs to be before _suballoc_ring_bo() since it could
-	 * increment the refcnt of the current ring
-	 */
-	msm_ring->base.refcnt = 1;
-
-	if (flags & FD_RINGBUFFER_STREAMING) {
-		msm_submit_suballoc_ring_bo(submit, msm_ring, size);
-	} else {
-		if (flags & FD_RINGBUFFER_GROWABLE)
-			size = INIT_SIZE;
-
-		msm_ring->offset = 0;
-		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
-	}
-
-	if (!msm_ringbuffer_init(msm_ring, size, flags))
-		return NULL;
-
-	if (flags & FD_RINGBUFFER_PRIMARY) {
-		debug_assert(!msm_submit->primary);
-		msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
-	}
-
-	return &msm_ring->base;
-}
-
-static struct drm_msm_gem_submit_reloc *
-handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring)
-{
-	struct msm_cmd *cmd = ring->cmd;
-	struct drm_msm_gem_submit_reloc *relocs;
-
-	relocs = malloc(cmd->nr_relocs * sizeof(*relocs));
-
-	for (unsigned i = 0; i < cmd->nr_relocs; i++) {
-		unsigned idx = cmd->relocs[i].reloc_idx;
-		struct fd_bo *bo = ring->u.reloc_bos[idx].bo;
-		unsigned flags = 0;
-
-		if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ)
-			flags |= FD_RELOC_READ;
-		if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE)
-			flags |= FD_RELOC_WRITE;
-
-		relocs[i] = cmd->relocs[i];
-		relocs[i].reloc_idx = append_bo(submit, bo, flags);
-	}
-
-	return relocs;
-}
-
-static int
-msm_submit_flush(struct fd_submit *submit, int in_fence_fd,
-		int *out_fence_fd, uint32_t *out_fence)
-{
-	struct msm_submit *msm_submit = to_msm_submit(submit);
-	struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
-	struct drm_msm_gem_submit req = {
-			.flags = msm_pipe->pipe,
-			.queueid = msm_pipe->queue_id,
-	};
-	int ret;
-
-	debug_assert(msm_submit->primary);
-
-	finalize_current_cmd(msm_submit->primary);
-	append_ring(msm_submit->ring_set, msm_submit->primary);
-
-	struct set_entry *entry;
-	unsigned nr_cmds = 0;
-	unsigned nr_objs = 0;
-
-	set_foreach(msm_submit->ring_set, entry) {
-		struct fd_ringbuffer *ring = (void *)entry->key;
-		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-			nr_cmds += 1;
-			nr_objs += 1;
-		} else {
-			if (ring != msm_submit->primary)
-				finalize_current_cmd(ring);
-			nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds;
-		}
-	}
-
-	void *obj_relocs[nr_objs];
-	struct drm_msm_gem_submit_cmd cmds[nr_cmds];
-	unsigned i = 0, o = 0;
-
-	set_foreach(msm_submit->ring_set, entry) {
-		struct fd_ringbuffer *ring = (void *)entry->key;
-		struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
-		debug_assert(i < nr_cmds);
-
-		// TODO handle relocs:
-		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-
-			debug_assert(o < nr_objs);
-
-			void *relocs = handle_stateobj_relocs(msm_submit, msm_ring);
-			obj_relocs[o++] = relocs;
-
-			cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
-			cmds[i].submit_idx =
-				append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ);
-			cmds[i].submit_offset = msm_ring->offset;
-			cmds[i].size = offset_bytes(ring->cur, ring->start);
-			cmds[i].pad = 0;
-			cmds[i].nr_relocs = msm_ring->cmd->nr_relocs;
-			cmds[i].relocs = VOID2U64(relocs);
-
-			i++;
-		} else {
-			for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) {
-				if (ring->flags & FD_RINGBUFFER_PRIMARY) {
-					cmds[i].type = MSM_SUBMIT_CMD_BUF;
-				} else {
-					cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
-				}
-				cmds[i].submit_idx = append_bo(msm_submit,
-						msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ);
-				cmds[i].submit_offset = msm_ring->offset;
-				cmds[i].size = msm_ring->u.cmds[j]->size;
-				cmds[i].pad = 0;
-				cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs;
-				cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs);
-
-				i++;
-			}
-		}
-	}
-
-	if (in_fence_fd != -1) {
-		req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
-		req.fence_fd = in_fence_fd;
-	}
-
-	if (out_fence_fd) {
-		req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
-	}
-
-	/* needs to be after get_cmd() as that could create bos/cmds table: */
-	req.bos = VOID2U64(msm_submit->submit_bos),
-	req.nr_bos = msm_submit->nr_submit_bos;
-	req.cmds = VOID2U64(cmds),
-	req.nr_cmds = nr_cmds;
-
-	DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
-
-	ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
-			&req, sizeof(req));
-	if (ret) {
-		ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
-		msm_dump_submit(&req);
-	} else if (!ret) {
-		if (out_fence)
-			*out_fence = req.fence;
-
-		if (out_fence_fd)
-			*out_fence_fd = req.fence_fd;
-	}
-
-	for (unsigned o = 0; o < nr_objs; o++)
-		free(obj_relocs[o]);
-
-	return ret;
-}
-
-static void
-unref_rings(struct set_entry *entry)
-{
-	struct fd_ringbuffer *ring = (void *)entry->key;
-	fd_ringbuffer_del(ring);
-}
-
-static void
-msm_submit_destroy(struct fd_submit *submit)
-{
-	struct msm_submit *msm_submit = to_msm_submit(submit);
-
-	if (msm_submit->primary)
-		fd_ringbuffer_del(msm_submit->primary);
-	if (msm_submit->suballoc_ring)
-		fd_ringbuffer_del(msm_submit->suballoc_ring);
-
-	_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
-	_mesa_set_destroy(msm_submit->ring_set, unref_rings);
-
-	// TODO it would be nice to have a way to debug_assert() if all
-	// rb's haven't been free'd back to the slab, because that is
-	// an indication that we are leaking bo's
-	slab_destroy(&msm_submit->ring_pool);
-
-	for (unsigned i = 0; i < msm_submit->nr_bos; i++)
-		fd_bo_del(msm_submit->bos[i]);
-
-	free(msm_submit->submit_bos);
-	free(msm_submit->bos);
-	free(msm_submit);
-}
-
-static const struct fd_submit_funcs submit_funcs = {
-		.new_ringbuffer = msm_submit_new_ringbuffer,
-		.flush = msm_submit_flush,
-		.destroy = msm_submit_destroy,
-};
-
-struct fd_submit *
-msm_submit_new(struct fd_pipe *pipe)
-{
-	struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit));
-	struct fd_submit *submit;
-	static unsigned submit_cnt = 0;
-
-	msm_submit->seqno = ++submit_cnt;
-	msm_submit->bo_table = _mesa_hash_table_create(NULL,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-	msm_submit->ring_set = _mesa_set_create(NULL,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-	// TODO tune size:
-	slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16);
-
-	submit = &msm_submit->base;
-	submit->pipe = pipe;
-	submit->funcs = &submit_funcs;
-
-	return submit;
-}
-
-
-static void
-finalize_current_cmd(struct fd_ringbuffer *ring)
-{
-	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
-	debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
-	if (!msm_ring->cmd)
-		return;
-
-	debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo);
-
-	unsigned idx = APPEND(&msm_ring->u, cmds);
-
-	msm_ring->u.cmds[idx] = msm_ring->cmd;
-	msm_ring->cmd = NULL;
-
-	msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start);
-}
-
-static void
-msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size)
-{
-	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-	struct fd_pipe *pipe = msm_ring->u.submit->pipe;
-
-	debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
-
-	finalize_current_cmd(ring);
-
-	fd_bo_del(msm_ring->ring_bo);
-	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
-	msm_ring->cmd = cmd_new(msm_ring->ring_bo);
-
-	ring->start = fd_bo_map(msm_ring->ring_bo);
-	ring->end = &(ring->start[size/4]);
-	ring->cur = ring->start;
-	ring->size = size;
-}
-
-static void
-msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
-		const struct fd_reloc *reloc)
-{
-	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-	struct fd_pipe *pipe;
-	unsigned reloc_idx;
-
-	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-		unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
-		msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
-		msm_ring->u.reloc_bos[idx].flags = reloc->flags;
-
-		/* this gets fixed up at submit->flush() time, since this state-
-		 * object rb can be used with many different submits
-		 */
-		reloc_idx = idx;
-
-		pipe = msm_ring->u.pipe;
-	} else {
-		struct msm_submit *msm_submit =
-				to_msm_submit(msm_ring->u.submit);
-
-		reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags);
-
-		pipe = msm_ring->u.submit->pipe;
-	}
-
-	struct drm_msm_gem_submit_reloc *r;
-	unsigned idx = APPEND(msm_ring->cmd, relocs);
-
-	r = &msm_ring->cmd->relocs[idx];
-
-	r->reloc_idx = reloc_idx;
-	r->reloc_offset = reloc->offset;
-	r->or = reloc->or;
-	r->shift = reloc->shift;
-	r->submit_offset = offset_bytes(ring->cur, ring->start) +
-			msm_ring->offset;
-
-	ring->cur++;
-
-	if (pipe->gpu_id >= 500) {
-		idx = APPEND(msm_ring->cmd, relocs);
-		r = &msm_ring->cmd->relocs[idx];
-
-		r->reloc_idx = reloc_idx;
-		r->reloc_offset = reloc->offset;
-		r->or = reloc->orhi;
-		r->shift = reloc->shift - 32;
-		r->submit_offset = offset_bytes(ring->cur, ring->start) +
-				msm_ring->offset;
-
-		ring->cur++;
-	}
-}
-
-static void
-append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target)
-{
-	struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
-
-	debug_assert(target->flags & _FD_RINGBUFFER_OBJECT);
-
-	struct set_entry *entry;
-	set_foreach(msm_target->u.ring_set, entry) {
-		struct fd_ringbuffer *ring = (void *)entry->key;
-
-		append_ring(submit->ring_set, ring);
-
-		if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-			append_stateobj_rings(submit, ring);
-		}
-	}
-}
-
-static uint32_t
-msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
-		struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
-	struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target);
-	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-	struct fd_bo *bo;
-	uint32_t size;
-
-	if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
-			(cmd_idx < msm_target->u.nr_cmds)) {
-		bo   = msm_target->u.cmds[cmd_idx]->ring_bo;
-		size = msm_target->u.cmds[cmd_idx]->size;
-	} else {
-		bo   = msm_target->ring_bo;
-		size = offset_bytes(target->cur, target->start);
-	}
-
-	msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
-		.bo     = bo,
-		.flags  = FD_RELOC_READ,
-		.offset = msm_target->offset,
-	});
-
-	if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
-			!(ring->flags & _FD_RINGBUFFER_OBJECT)) {
-		struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
-
-		append_stateobj_rings(msm_submit, target);
-	}
-
-	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-		append_ring(msm_ring->u.ring_set, target);
-	} else {
-		struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit);
-		append_ring(msm_submit->ring_set, target);
-	}
-
-	return size;
-}
-
-static uint32_t
-msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
-{
-	if (ring->flags & FD_RINGBUFFER_GROWABLE)
-		return to_msm_ringbuffer(ring)->u.nr_cmds + 1;
-	return 1;
-}
-
-static void
-msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
-{
-	struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
-
-	fd_bo_del(msm_ring->ring_bo);
-	if (msm_ring->cmd)
-		cmd_free(msm_ring->cmd);
-
-	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-		for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
-			fd_bo_del(msm_ring->u.reloc_bos[i].bo);
-		}
-
-		_mesa_set_destroy(msm_ring->u.ring_set, unref_rings);
-
-		free(msm_ring->u.reloc_bos);
-		free(msm_ring);
-	} else {
-		struct fd_submit *submit = msm_ring->u.submit;
-
-		for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
-			cmd_free(msm_ring->u.cmds[i]);
-		}
-
-		free(msm_ring->u.cmds);
-		slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring);
-	}
-}
-
-static const struct fd_ringbuffer_funcs ring_funcs = {
-		.grow = msm_ringbuffer_grow,
-		.emit_reloc = msm_ringbuffer_emit_reloc,
-		.emit_reloc_ring = msm_ringbuffer_emit_reloc_ring,
-		.cmd_count = msm_ringbuffer_cmd_count,
-		.destroy = msm_ringbuffer_destroy,
-};
-
-static inline struct fd_ringbuffer *
-msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size,
-		enum fd_ringbuffer_flags flags)
-{
-	struct fd_ringbuffer *ring = &msm_ring->base;
-
-	debug_assert(msm_ring->ring_bo);
-
-	uint8_t *base = fd_bo_map(msm_ring->ring_bo);
-	ring->start = (void *)(base + msm_ring->offset);
-	ring->end = &(ring->start[size/4]);
-	ring->cur = ring->start;
-
-	ring->size = size;
-	ring->flags = flags;
-
-	ring->funcs = &ring_funcs;
-
-	msm_ring->u.cmds = NULL;
-	msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
-
-	msm_ring->cmd = cmd_new(msm_ring->ring_bo);
-
-	return ring;
-}
-
-struct fd_ringbuffer *
-msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size)
-{
-	struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring));
-
-	msm_ring->u.pipe = pipe;
-	msm_ring->offset = 0;
-	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
-	msm_ring->base.refcnt = 1;
-
-	msm_ring->u.reloc_bos = NULL;
-	msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
-
-	msm_ring->u.ring_set = _mesa_set_create(NULL,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-
-	return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,551 +0,0 @@
-/*
- * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <assert.h>
-#include <inttypes.h>
-
-#include "util/hash_table.h"
-#include "util/slab.h"
-
-#include "drm/freedreno_ringbuffer.h"
-#include "msm_priv.h"
-
-/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
- * by avoiding the additional tracking necessary to build cmds/relocs tables
- * (but still builds a bos table)
- */
-
-
-#define INIT_SIZE 0x1000
-
-static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
-
-
-struct msm_submit_sp {
-	struct fd_submit base;
-
-	DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
-	DECLARE_ARRAY(struct fd_bo *, bos);
-
-	unsigned seqno;
-
-	/* maps fd_bo to idx in bos table: */
-	struct hash_table *bo_table;
-
-	struct slab_mempool ring_pool;
-
-	struct fd_ringbuffer *primary;
-
-	/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
-	 * the same underlying bo)..
-	 *
-	 * We also rely on previous stateobj having been fully constructed
-	 * so we can reclaim extra space at it's end.
-	 */
-	struct fd_ringbuffer *suballoc_ring;
-};
-FD_DEFINE_CAST(fd_submit, msm_submit_sp);
-
-/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
- * and sizes.  Ie. a finalized buffer can have no more commands appended to
- * it.
- */
-struct msm_cmd_sp {
-	struct fd_bo *ring_bo;
-	unsigned size;
-};
-
-/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
- * later copy into the submit when the stateobj rb is later referenced by
- * a regular rb:
- */
-struct msm_reloc_bo_sp {
-	struct fd_bo *bo;
-	unsigned flags;
-};
-
-struct msm_ringbuffer_sp {
-	struct fd_ringbuffer base;
-
-	/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
-	unsigned offset;
-
-// TODO check disasm.. hopefully compilers CSE can realize that
-// reloc_bos and cmds are at the same offsets and optimize some
-// divergent cases into single case
-	union {
-		/* for _FD_RINGBUFFER_OBJECT case: */
-		struct {
-			struct fd_pipe *pipe;
-			DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
-		};
-		/* for other cases: */
-		struct {
-			struct fd_submit *submit;
-			DECLARE_ARRAY(struct msm_cmd_sp, cmds);
-		};
-	} u;
-
-	struct fd_bo *ring_bo;
-};
-FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
-
-static void finalize_current_cmd(struct fd_ringbuffer *ring);
-static struct fd_ringbuffer * msm_ringbuffer_sp_init(
-		struct msm_ringbuffer_sp *msm_ring,
-		uint32_t size, enum fd_ringbuffer_flags flags);
-
-/* add (if needed) bo to submit and return index: */
-static uint32_t
-append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
-{
-	struct msm_bo *msm_bo = to_msm_bo(bo);
-	uint32_t idx;
-	pthread_mutex_lock(&idx_lock);
-	if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
-		idx = msm_bo->idx;
-	} else {
-		uint32_t hash = _mesa_hash_pointer(bo);
-		struct hash_entry *entry;
-
-		entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
-		if (entry) {
-			/* found */
-			idx = (uint32_t)(uintptr_t)entry->data;
-		} else {
-			idx = APPEND(submit, submit_bos);
-			idx = APPEND(submit, bos);
-
-			submit->submit_bos[idx].flags = 0;
-			submit->submit_bos[idx].handle = bo->handle;
-			submit->submit_bos[idx].presumed = 0;
-
-			submit->bos[idx] = fd_bo_ref(bo);
-
-			_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
-					(void *)(uintptr_t)idx);
-		}
-		msm_bo->current_submit_seqno = submit->seqno;
-		msm_bo->idx = idx;
-	}
-	pthread_mutex_unlock(&idx_lock);
-	if (flags & FD_RELOC_READ)
-		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
-	if (flags & FD_RELOC_WRITE)
-		submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
-	return idx;
-}
-
-static void
-msm_submit_suballoc_ring_bo(struct fd_submit *submit,
-		struct msm_ringbuffer_sp *msm_ring, uint32_t size)
-{
-	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-	unsigned suballoc_offset = 0;
-	struct fd_bo *suballoc_bo = NULL;
-
-	if (msm_submit->suballoc_ring) {
-		struct msm_ringbuffer_sp *suballoc_ring =
-				to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
-
-		suballoc_bo = suballoc_ring->ring_bo;
-		suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
-				suballoc_ring->offset;
-
-		suballoc_offset = align(suballoc_offset, 0x10);
-
-		if ((size + suballoc_offset) > suballoc_bo->size) {
-			suballoc_bo = NULL;
-		}
-	}
-
-	if (!suballoc_bo) {
-		// TODO possibly larger size for streaming bo?
-		msm_ring->ring_bo = fd_bo_new_ring(
-				submit->pipe->dev, 0x8000, 0);
-		msm_ring->offset = 0;
-	} else {
-		msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
-		msm_ring->offset = suballoc_offset;
-	}
-
-	struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
-
-	msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
-
-	if (old_suballoc_ring)
-		fd_ringbuffer_del(old_suballoc_ring);
-}
-
-static struct fd_ringbuffer *
-msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
-		enum fd_ringbuffer_flags flags)
-{
-	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-	struct msm_ringbuffer_sp *msm_ring;
-
-	msm_ring = slab_alloc_st(&msm_submit->ring_pool);
-
-	msm_ring->u.submit = submit;
-
-	/* NOTE: needs to be before _suballoc_ring_bo() since it could
-	 * increment the refcnt of the current ring
-	 */
-	msm_ring->base.refcnt = 1;
-
-	if (flags & FD_RINGBUFFER_STREAMING) {
-		msm_submit_suballoc_ring_bo(submit, msm_ring, size);
-	} else {
-		if (flags & FD_RINGBUFFER_GROWABLE)
-			size = INIT_SIZE;
-
-		msm_ring->offset = 0;
-		msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
-	}
-
-	if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
-		return NULL;
-
-	if (flags & FD_RINGBUFFER_PRIMARY) {
-		debug_assert(!msm_submit->primary);
-		msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
-	}
-
-	return &msm_ring->base;
-}
-
-static int
-msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
-		int *out_fence_fd, uint32_t *out_fence)
-{
-	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-	struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
-	struct drm_msm_gem_submit req = {
-			.flags = msm_pipe->pipe,
-			.queueid = msm_pipe->queue_id,
-	};
-	int ret;
-
-	debug_assert(msm_submit->primary);
-	finalize_current_cmd(msm_submit->primary);
-
-	struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
-	struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
-
-	for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
-		cmds[i].type = MSM_SUBMIT_CMD_BUF;
-		cmds[i].submit_idx =
-			append_bo(msm_submit, primary->u.cmds[i].ring_bo, FD_RELOC_READ);
-		cmds[i].submit_offset = primary->offset;
-		cmds[i].size = primary->u.cmds[i].size;
-		cmds[i].pad = 0;
-		cmds[i].nr_relocs = 0;
-	}
-
-	if (in_fence_fd != -1) {
-		req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
-		req.fence_fd = in_fence_fd;
-	}
-
-	if (out_fence_fd) {
-		req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
-	}
-
-	/* needs to be after get_cmd() as that could create bos/cmds table: */
-	req.bos = VOID2U64(msm_submit->submit_bos),
-	req.nr_bos = msm_submit->nr_submit_bos;
-	req.cmds = VOID2U64(cmds),
-	req.nr_cmds = primary->u.nr_cmds;
-
-	DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
-
-	ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
-			&req, sizeof(req));
-	if (ret) {
-		ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
-		msm_dump_submit(&req);
-	} else if (!ret) {
-		if (out_fence)
-			*out_fence = req.fence;
-
-		if (out_fence_fd)
-			*out_fence_fd = req.fence_fd;
-	}
-
-	return ret;
-}
-
-static void
-msm_submit_sp_destroy(struct fd_submit *submit)
-{
-	struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
-
-	if (msm_submit->primary)
-		fd_ringbuffer_del(msm_submit->primary);
-	if (msm_submit->suballoc_ring)
-		fd_ringbuffer_del(msm_submit->suballoc_ring);
-
-	_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
-
-	// TODO it would be nice to have a way to debug_assert() if all
-	// rb's haven't been free'd back to the slab, because that is
-	// an indication that we are leaking bo's
-	slab_destroy(&msm_submit->ring_pool);
-
-	for (unsigned i = 0; i < msm_submit->nr_bos; i++)
-		fd_bo_del(msm_submit->bos[i]);
-
-	free(msm_submit->submit_bos);
-	free(msm_submit->bos);
-	free(msm_submit);
-}
-
-static const struct fd_submit_funcs submit_funcs = {
-		.new_ringbuffer = msm_submit_sp_new_ringbuffer,
-		.flush = msm_submit_sp_flush,
-		.destroy = msm_submit_sp_destroy,
-};
-
-struct fd_submit *
-msm_submit_sp_new(struct fd_pipe *pipe)
-{
-	struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
-	struct fd_submit *submit;
-	static unsigned submit_cnt = 0;
-
-	msm_submit->seqno = ++submit_cnt;
-	msm_submit->bo_table = _mesa_hash_table_create(NULL,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-	// TODO tune size:
-	slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
-
-	submit = &msm_submit->base;
-	submit->pipe = pipe;
-	submit->funcs = &submit_funcs;
-
-	return submit;
-}
-
-
-static void
-finalize_current_cmd(struct fd_ringbuffer *ring)
-{
-	debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
-
-	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-	unsigned idx = APPEND(&msm_ring->u, cmds);
-
-	msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
-	msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
-}
-
-static void
-msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
-{
-	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-	struct fd_pipe *pipe = msm_ring->u.submit->pipe;
-
-	debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
-
-	finalize_current_cmd(ring);
-
-	fd_bo_del(msm_ring->ring_bo);
-	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
-
-	ring->start = fd_bo_map(msm_ring->ring_bo);
-	ring->end = &(ring->start[size/4]);
-	ring->cur = ring->start;
-	ring->size = size;
-}
-
-static void
-msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
-		const struct fd_reloc *reloc)
-{
-	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-	struct fd_pipe *pipe;
-
-	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-		unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
-		msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
-		msm_ring->u.reloc_bos[idx].flags = reloc->flags;
-
-		pipe = msm_ring->u.pipe;
-	} else {
-		struct msm_submit_sp *msm_submit =
-				to_msm_submit_sp(msm_ring->u.submit);
-
-		append_bo(msm_submit, reloc->bo, reloc->flags);
-
-		pipe = msm_ring->u.submit->pipe;
-	}
-
-	uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
-	uint32_t dword = iova;
-	int shift = reloc->shift;
-
-	if (shift < 0)
-		dword >>= -shift;
-	else
-		dword <<= shift;
-
-	(*ring->cur++) = dword | reloc->or;
-
-	if (pipe->gpu_id >= 500) {
-		dword = iova >> 32;
-		shift -= 32;
-
-		if (shift < 0)
-			dword >>= -shift;
-		else
-			dword <<= shift;
-
-		(*ring->cur++) = dword | reloc->orhi;
-	}
-}
-
-static uint32_t
-msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
-		struct fd_ringbuffer *target, uint32_t cmd_idx)
-{
-	struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
-	struct fd_bo *bo;
-	uint32_t size;
-
-	if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
-			(cmd_idx < msm_target->u.nr_cmds)) {
-		bo   = msm_target->u.cmds[cmd_idx].ring_bo;
-		size = msm_target->u.cmds[cmd_idx].size;
-	} else {
-		bo   = msm_target->ring_bo;
-		size = offset_bytes(target->cur, target->start);
-	}
-
-	msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
-		.bo     = bo,
-		.flags  = FD_RELOC_READ,
-		.offset = msm_target->offset,
-	});
-
-	if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
-			!(ring->flags & _FD_RINGBUFFER_OBJECT)) {
-		// TODO it would be nice to know whether we have already
-		// seen this target before.  But hopefully we hit the
-		// append_bo() fast path enough for this to not matter:
-		struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-		struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
-
-		for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
-			append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
-					msm_target->u.reloc_bos[i].flags);
-		}
-	}
-
-	return size;
-}
-
-static uint32_t
-msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
-{
-	if (ring->flags & FD_RINGBUFFER_GROWABLE)
-		return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
-	return 1;
-}
-
-static void
-msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
-{
-	struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
-
-	fd_bo_del(msm_ring->ring_bo);
-
-	if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-		for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
-			fd_bo_del(msm_ring->u.reloc_bos[i].bo);
-		}
-
-		free(msm_ring);
-	} else {
-		struct fd_submit *submit = msm_ring->u.submit;
-
-		for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
-			fd_bo_del(msm_ring->u.cmds[i].ring_bo);
-		}
-
-		slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
-	}
-}
-
-static const struct fd_ringbuffer_funcs ring_funcs = {
-		.grow = msm_ringbuffer_sp_grow,
-		.emit_reloc = msm_ringbuffer_sp_emit_reloc,
-		.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
-		.cmd_count = msm_ringbuffer_sp_cmd_count,
-		.destroy = msm_ringbuffer_sp_destroy,
-};
-
-static inline struct fd_ringbuffer *
-msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
-		enum fd_ringbuffer_flags flags)
-{
-	struct fd_ringbuffer *ring = &msm_ring->base;
-
-	debug_assert(msm_ring->ring_bo);
-
-	uint8_t *base = fd_bo_map(msm_ring->ring_bo);
-	ring->start = (void *)(base + msm_ring->offset);
-	ring->end = &(ring->start[size/4]);
-	ring->cur = ring->start;
-
-	ring->size = size;
-	ring->flags = flags;
-
-	ring->funcs = &ring_funcs;
-
-	// TODO initializing these could probably be conditional on flags
-	// since unneed for FD_RINGBUFFER_STAGING case..
-	msm_ring->u.cmds = NULL;
-	msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
-
-	msm_ring->u.reloc_bos = NULL;
-	msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
-
-	return ring;
-}
-
-struct fd_ringbuffer *
-msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
-{
-	struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
-
-	msm_ring->u.pipe = pipe;
-	msm_ring->offset = 0;
-	msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
-	msm_ring->base.refcnt = 1;
-
-	return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.c	2019-03-31 23:16:37.000000000 +0000
@@ -76,23 +76,28 @@
 	batch->fence = fd_fence_create(batch);
 
 	batch->cleared = 0;
+	batch->fast_cleared = 0;
 	batch->invalidated = 0;
 	batch->restore = batch->resolve = 0;
 	batch->needs_flush = false;
 	batch->flushed = false;
 	batch->gmem_reason = 0;
 	batch->num_draws = 0;
+	batch->num_vertices = 0;
 	batch->stage = FD_STAGE_NULL;
 
 	fd_reset_wfi(batch);
 
 	util_dynarray_init(&batch->draw_patches, NULL);
 
+	if (is_a2xx(ctx->screen)) {
+		util_dynarray_init(&batch->shader_patches, NULL);
+		util_dynarray_init(&batch->gmem_patches, NULL);
+	}
+
 	if (is_a3xx(ctx->screen))
 		util_dynarray_init(&batch->rbrc_patches, NULL);
 
-	util_dynarray_init(&batch->gmem_patches, NULL);
-
 	assert(batch->resources->entries == 0);
 
 	util_dynarray_init(&batch->samples, NULL);
@@ -144,20 +149,34 @@
 		debug_assert(!batch->binning);
 		debug_assert(!batch->gmem);
 	}
+
 	if (batch->lrz_clear) {
 		fd_ringbuffer_del(batch->lrz_clear);
 		batch->lrz_clear = NULL;
 	}
 
+	if (batch->tile_setup) {
+		fd_ringbuffer_del(batch->tile_setup);
+		batch->tile_setup = NULL;
+	}
+
+	if (batch->tile_fini) {
+		fd_ringbuffer_del(batch->tile_fini);
+		batch->tile_fini = NULL;
+	}
+
 	fd_submit_del(batch->submit);
 
 	util_dynarray_fini(&batch->draw_patches);
 
+	if (is_a2xx(batch->ctx->screen)) {
+		util_dynarray_fini(&batch->shader_patches);
+		util_dynarray_fini(&batch->gmem_patches);
+	}
+
 	if (is_a3xx(batch->ctx->screen))
 		util_dynarray_fini(&batch->rbrc_patches);
 
-	util_dynarray_fini(&batch->gmem_patches);
-
 	while (batch->samples.size > 0) {
 		struct fd_hw_sample *samp =
 			util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch_cache.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch_cache.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch_cache.c	2019-03-31 23:16:37.000000000 +0000
@@ -81,7 +81,8 @@
 	struct {
 		struct pipe_resource *texture;
 		union pipe_surface_desc u;
-		uint16_t pos, format;
+		uint8_t pos, samples;
+		uint16_t format;
 	} surf[0];
 };
 
@@ -401,6 +402,7 @@
 	key->surf[idx].texture = psurf->texture;
 	key->surf[idx].u = psurf->u;
 	key->surf[idx].pos = pos;
+	key->surf[idx].samples = MAX2(1, psurf->nr_samples);
 	key->surf[idx].format = psurf->format;
 }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.h	2019-03-31 23:16:37.000000000 +0000
@@ -95,7 +95,7 @@
 		FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
 		FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
 		FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
-	} invalidated, cleared, restore, resolve;
+	} invalidated, cleared, fast_cleared, restore, resolve;
 
 	/* is this a non-draw batch (ie compute/blit which has no pfb state)? */
 	bool nondraw : 1;
@@ -124,6 +124,7 @@
 		FD_GMEM_LOGICOP_ENABLED      = 0x20,
 	} gmem_reason;
 	unsigned num_draws;   /* number of draws in current batch */
+	unsigned num_vertices;   /* number of vertices in current batch */
 
 	/* Track the maximal bounds of the scissor of all the draws within a
 	 * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
@@ -136,11 +137,6 @@
 	 */
 	struct util_dynarray draw_patches;
 
-	/* Keep track of blitter GMEM offsets that need to be patched up once we
-	 * know the gmem layout:
-	 */
-	struct util_dynarray gmem_patches;
-
 	/* Keep track of writes to RB_RENDER_CONTROL which need to be patched
 	 * once we know whether or not to use GMEM, and GMEM tile pitch.
 	 *
@@ -149,6 +145,18 @@
 	 */
 	struct util_dynarray rbrc_patches;
 
+	/* Keep track of GMEM related values that need to be patched up once we
+	 * know the gmem layout:
+	 */
+	struct util_dynarray gmem_patches;
+
+	/* Keep track of pointer to start of MEM exports for a20x binning shaders
+	 *
+	 * this is so the end of the shader can be cut off at the right point
+	 * depending on the GMEM configuration
+	 */
+	struct util_dynarray shader_patches;
+
 	struct pipe_framebuffer_state framebuffer;
 
 	struct fd_submit *submit;
@@ -162,6 +170,12 @@
 
 	// TODO maybe more generically split out clear and clear_binning rings?
 	struct fd_ringbuffer *lrz_clear;
+	struct fd_ringbuffer *tile_setup;
+	struct fd_ringbuffer *tile_fini;
+
+	union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+	double clear_depth;
+	unsigned clear_stencil;
 
 	/**
 	 * hw query related state:
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.c	2017-12-19 17:21:05.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.c	2019-03-31 23:16:37.000000000 +0000
@@ -25,9 +25,11 @@
  */
 
 #include "util/u_blitter.h"
+#include "util/u_surface.h"
 
 #include "freedreno_blitter.h"
 #include "freedreno_context.h"
+#include "freedreno_resource.h"
 
 /* generic blit using u_blitter.. slightly modified version of util_blitter_blit
  * which also handles PIPE_BUFFER:
@@ -80,7 +82,7 @@
 	src_templ->swizzle_a = PIPE_SWIZZLE_W;
 }
 
-void
+bool
 fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 {
 	struct pipe_resource *dst = info->dst.resource;
@@ -88,6 +90,16 @@
 	struct pipe_context *pipe = &ctx->base;
 	struct pipe_surface *dst_view, dst_templ;
 	struct pipe_sampler_view src_templ, *src_view;
+	bool discard = false;
+
+	if (!info->scissor_enable && !info->alpha_blend) {
+		discard = util_texrange_covers_whole_level(info->dst.resource,
+				info->dst.level, info->dst.box.x, info->dst.box.y,
+				info->dst.box.z, info->dst.box.width,
+				info->dst.box.height, info->dst.box.depth);
+	}
+
+	fd_blitter_pipe_begin(ctx, info->render_condition_enable, discard, FD_STAGE_BLIT);
 
 	/* Initialize the surface. */
 	default_dst_texture(&dst_templ, dst, info->dst.level,
@@ -109,4 +121,97 @@
 
 	pipe_surface_reference(&dst_view, NULL);
 	pipe_sampler_view_reference(&src_view, NULL);
+
+	fd_blitter_pipe_end(ctx);
+
+	/* The fallback blitter must never fail: */
+	return true;
+}
+
+/**
+ * _copy_region using pipe (3d engine)
+ */
+static bool
+fd_blitter_pipe_copy_region(struct fd_context *ctx,
+		struct pipe_resource *dst,
+		unsigned dst_level,
+		unsigned dstx, unsigned dsty, unsigned dstz,
+		struct pipe_resource *src,
+		unsigned src_level,
+		const struct pipe_box *src_box)
+{
+	/* not until we allow rendertargets to be buffers */
+	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
+		return false;
+
+	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
+		return false;
+
+	/* TODO we could discard if dst box covers dst level fully.. */
+	fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
+	util_blitter_copy_texture(ctx->blitter,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box);
+	fd_blitter_pipe_end(ctx);
+
+	return true;
+}
+
+/**
+ * Copy a block of pixels from one resource to another.
+ * The resource must be of the same format.
+ */
+void
+fd_resource_copy_region(struct pipe_context *pctx,
+		struct pipe_resource *dst,
+		unsigned dst_level,
+		unsigned dstx, unsigned dsty, unsigned dstz,
+		struct pipe_resource *src,
+		unsigned src_level,
+		const struct pipe_box *src_box)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	if (ctx->blit) {
+		struct pipe_blit_info info;
+
+		memset(&info, 0, sizeof info);
+		info.dst.resource = dst;
+		info.dst.level = dst_level;
+		info.dst.box.x = dstx;
+		info.dst.box.y = dsty;
+		info.dst.box.z = dstz;
+		info.dst.box.width = src_box->width;
+		info.dst.box.height = src_box->height;
+		assert(info.dst.box.width >= 0);
+		assert(info.dst.box.height >= 0);
+		info.dst.box.depth = 1;
+		info.dst.format = dst->format;
+		info.src.resource = src;
+		info.src.level = src_level;
+		info.src.box = *src_box;
+		info.src.format = src->format;
+		info.mask = util_format_get_mask(src->format);
+		info.filter = PIPE_TEX_FILTER_NEAREST;
+		info.scissor_enable = 0;
+
+		if (ctx->blit(ctx, &info))
+			return;
+	}
+
+	/* TODO if we have 2d core, or other DMA engine that could be used
+	 * for simple copies and reasonably easily synchronized with the 3d
+	 * core, this is where we'd plug it in..
+	 */
+
+	/* try blit on 3d pipe: */
+	if (fd_blitter_pipe_copy_region(ctx,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box))
+		return;
+
+	/* else fallback to pure sw: */
+	util_resource_copy_region(pctx,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.h	2017-12-19 17:21:05.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,14 @@
 
 #include "freedreno_context.h"
 
-void fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+bool fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info);
+
+void fd_resource_copy_region(struct pipe_context *pctx,
+		struct pipe_resource *dst,
+		unsigned dst_level,
+		unsigned dstx, unsigned dsty, unsigned dstz,
+		struct pipe_resource *src,
+		unsigned src_level,
+		const struct pipe_box *src_box);
 
 #endif /* FREEDRENO_BLIT_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -333,9 +333,6 @@
 
 	slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
 
-	if (!ctx->blit)
-		ctx->blit = fd_blitter_blit;
-
 	fd_draw_init(pctx);
 	fd_resource_context_init(pctx);
 	fd_query_context_init(pctx);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -56,14 +56,6 @@
 
 struct fd_program_stateobj {
 	void *vp, *fp;
-
-	/* rest only used by fd2.. split out: */
-	uint8_t num_exports;
-	/* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
-	 * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
-	 * size) are not included in this
-	 */
-	uint8_t export_linkage[63];
 };
 
 struct fd_constbuf_stateobj {
@@ -289,6 +281,7 @@
 	struct pipe_framebuffer_state framebuffer;
 	struct pipe_poly_stipple stipple;
 	struct pipe_viewport_state viewport;
+	struct pipe_scissor_state viewport_scissor;
 	struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
 	struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES];
 	struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES];
@@ -324,11 +317,11 @@
 	void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info);
 
 	/* constant emit:  (note currently not used/needed for a2xx) */
-	void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+	void (*emit_const)(struct fd_ringbuffer *ring, gl_shader_stage type,
 			uint32_t regid, uint32_t offset, uint32_t sizedwords,
 			const uint32_t *dwords, struct pipe_resource *prsc);
 	/* emit bo addresses as constant: */
-	void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+	void (*emit_const_bo)(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
 			uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets);
 
 	/* indirect-branch emit: */
@@ -342,7 +335,7 @@
 	void (*query_set_stage)(struct fd_batch *batch, enum fd_render_stage stage);
 
 	/* blitter: */
-	void (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info);
+	bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info);
 
 	/* simple gpu "memcpy": */
 	void (*mem_to_mem)(struct fd_ringbuffer *ring, struct pipe_resource *dst,
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -144,9 +144,13 @@
 			} else {
 				batch->invalidated |= FD_BUFFER_DEPTH;
 			}
-			buffers |= FD_BUFFER_DEPTH;
-			resource_written(batch, pfb->zsbuf->texture);
 			batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
+			if (fd_depth_write_enabled(ctx)) {
+				buffers |= FD_BUFFER_DEPTH;
+				resource_written(batch, pfb->zsbuf->texture);
+			} else {
+				resource_read(batch, pfb->zsbuf->texture);
+			}
 		}
 
 		if (fd_stencil_enabled(ctx)) {
@@ -155,18 +159,9 @@
 			} else {
 				batch->invalidated |= FD_BUFFER_STENCIL;
 			}
+			batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
 			buffers |= FD_BUFFER_STENCIL;
 			resource_written(batch, pfb->zsbuf->texture);
-			batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
-		}
-	}
-
-	if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) {
-		for (i = 0; i < pfb->nr_cbufs; i++) {
-			if (!pfb->cbufs[i])
-				continue;
-
-			resource_written(batch, pfb->cbufs[i]->texture);
 		}
 	}
 
@@ -191,6 +186,9 @@
 
 		if (fd_blend_enabled(ctx, i))
 			batch->gmem_reason |= FD_GMEM_BLEND_ENABLED;
+
+		if (ctx->dirty & FD_DIRTY_FRAMEBUFFER)
+			resource_written(batch, pfb->cbufs[i]->texture);
 	}
 
 	/* Mark SSBOs as being written.. we don't actually know which ones are
@@ -291,6 +289,8 @@
 	if (ctx->draw_vbo(ctx, info, index_offset))
 		batch->needs_flush = true;
 
+	batch->num_vertices += info->count * info->instance_count;
+
 	for (i = 0; i < ctx->streamout.num_targets; i++)
 		ctx->streamout.offsets[i] += info->count;
 
@@ -410,7 +410,7 @@
 	 * the depth buffer, etc)
 	 */
 	cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore);
-	batch->cleared |= cleared_buffers;
+	batch->cleared |= buffers;
 	batch->invalidated |= cleared_buffers;
 
 	batch->resolve |= buffers;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.h	2019-03-31 23:16:37.000000000 +0000
@@ -73,9 +73,33 @@
 	}
 
 	if (is_a20x(batch->ctx->screen)) {
-		OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 4 : 2);
+		/* a20x has a different draw command for drawing with binning data
+		 * note: if we do patching we will have to insert a NOP
+		 *
+		 * binning data is is 1 byte/vertex (8x8x4 bin position of vertex)
+		 * base ptr set by the CP_SET_DRAW_INIT_FLAGS command
+		 *
+		 * TODO: investigate the faceness_cull_select parameter to see how
+		 * it is used with hw binning to use "faceness" bits
+		 */
+		uint32_t size = 2;
+		if (vismode)
+			size += 2;
+		if (idx_buffer)
+			size += 2;
+
+		BEGIN_RING(ring, size+1);
+		if (vismode)
+			util_dynarray_append(&batch->draw_patches, uint32_t*, ring->cur);
+
+		OUT_PKT3(ring, vismode ? CP_DRAW_INDX_BIN : CP_DRAW_INDX, size);
 		OUT_RING(ring, 0x00000000);
-		OUT_RING(ring, DRAW_A20X(primtype, src_sel, idx_type, vismode, count));
+		OUT_RING(ring, DRAW_A20X(primtype, DI_FACE_CULL_NONE, src_sel,
+								 idx_type, vismode, vismode, count));
+		if (vismode == USE_VISIBILITY) {
+			OUT_RING(ring, batch->num_vertices);
+			OUT_RING(ring, count);
+		}
 	} else {
 		OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 5 : 3);
 		OUT_RING(ring, 0x00000000);        /* viz query info. */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.c	2019-03-31 23:16:37.000000000 +0000
@@ -77,24 +77,25 @@
 
 static uint32_t
 total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
-		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
+		   uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align,
+		   struct fd_gmem_stateobj *gmem)
 {
 	uint32_t total = 0, i;
 
 	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
 		if (cbuf_cpp[i]) {
-			gmem->cbuf_base[i] = align(total, 0x4000);
+			gmem->cbuf_base[i] = align(total, gmem_align);
 			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
 		}
 	}
 
 	if (zsbuf_cpp[0]) {
-		gmem->zsbuf_base[0] = align(total, 0x4000);
+		gmem->zsbuf_base[0] = align(total, gmem_align);
 		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
 	}
 
 	if (zsbuf_cpp[1]) {
-		gmem->zsbuf_base[1] = align(total, 0x4000);
+		gmem->zsbuf_base[1] = align(total, gmem_align);
 		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
 	}
 
@@ -116,11 +117,13 @@
 	uint32_t minx, miny, width, height;
 	uint32_t nbins_x = 1, nbins_y = 1;
 	uint32_t bin_w, bin_h;
+	uint32_t gmem_align = 0x4000;
 	uint32_t max_width = bin_width(screen);
 	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
-	bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+	bool has_zs = !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED |
+		FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL));
 	int tile_n[npipes];
 
 	if (has_zs) {
@@ -128,6 +131,10 @@
 		zsbuf_cpp[0] = rsc->cpp;
 		if (rsc->stencil)
 			zsbuf_cpp[1] = rsc->stencil->cpp;
+	} else {
+		/* we might have a zsbuf, but it isn't used */
+		batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+		batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
 	}
 	for (i = 0; i < pfb->nr_cbufs; i++) {
 		if (pfb->cbufs[i])
@@ -177,10 +184,18 @@
 				zsbuf_cpp[0], width, height);
 	}
 
+	if (is_a20x(screen) && batch->cleared) {
+		/* under normal circumstances the requirement would be 4K
+		 * but the fast clear path requires an alignment of 32K
+		 */
+		gmem_align = 0x8000;
+	}
+
 	/* then find a bin width/height that satisfies the memory
 	 * constraints:
 	 */
-	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
+	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) >
+		   gmem_size) {
 		if (bin_w > bin_h) {
 			nbins_x++;
 			bin_w = align(width / nbins_x, gmem_alignw);
@@ -214,12 +229,21 @@
 
 #define div_round_up(v, a)  (((v) + (a) - 1) / (a))
 	/* figure out number of tiles per pipe: */
-	tpp_x = tpp_y = 1;
-	while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes)
-		tpp_y += 2;
-	while ((div_round_up(nbins_y, tpp_y) *
-			div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes)
-		tpp_x += 1;
+	if (is_a20x(ctx->screen)) {
+		/* for a20x we want to minimize the number of "pipes"
+		 * binning data has 3 bits for x/y (8x8) but the edges are used to
+		 * cull off-screen vertices with hw binning, so we have 6x6 pipes
+		 */
+		tpp_x = 6;
+		tpp_y = 6;
+	} else {
+		tpp_x = tpp_y = 1;
+		while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes)
+			tpp_y += 2;
+		while ((div_round_up(nbins_y, tpp_y) *
+				div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes)
+			tpp_x += 1;
+	}
 
 	gmem->maxpw = tpp_x;
 	gmem->maxph = tpp_y;
@@ -246,6 +270,9 @@
 		xoff += tpp_x;
 	}
 
+	/* number of pipes to use for a20x */
+	gmem->num_vsc_pipes = MAX2(1, i);
+
 	for (; i < npipes; i++) {
 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
 		pipe->x = pipe->y = pipe->w = pipe->h = 0;
@@ -280,11 +307,12 @@
 
 			/* pipe number: */
 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
+			assert(p < gmem->num_vsc_pipes);
 
 			/* clip bin width: */
 			bw = MIN2(bin_w, minx + width - xoff);
-
-			tile->n = tile_n[p]++;
+			tile->n = !is_a20x(ctx->screen) ? tile_n[p]++ :
+				((i % tpp_y + 1) << 3 | (j % tpp_x + 1));
 			tile->p = p;
 			tile->bin_w = bw;
 			tile->bin_h = bh;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.h	2019-03-31 23:16:37.000000000 +0000
@@ -57,6 +57,7 @@
 	uint16_t minx, miny;
 	uint16_t width, height;
 	uint16_t maxpw, maxph;   /* maximum pipe width/height */
+	uint8_t num_vsc_pipes;   /* number of pipes for a20x */
 };
 
 struct fd_batch;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_program.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_program.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -67,7 +67,7 @@
 	"VERT                                        \n"
 	"DCL IN[0]                                   \n"
 	"DCL IN[1]                                   \n"
-	"DCL OUT[0], TEXCOORD[0]                     \n"
+	"DCL OUT[0], GENERIC[0]                      \n"
 	"DCL OUT[1], POSITION                        \n"
 	"  0: MOV OUT[0], IN[0]                      \n"
 	"  0: MOV OUT[1], IN[1]                      \n"
@@ -129,15 +129,14 @@
 	pctx->bind_fs_state = fd_fp_state_bind;
 	pctx->bind_vs_state = fd_vp_state_bind;
 
-	// XXX for now, let a2xx keep it's own hand-rolled shaders
-	// for solid and blit progs:
-	if (ctx->screen->gpu_id < 300)
-		return;
-
 	ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true);
 	ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false);
 	ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false);
 	ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false);
+
+	if (ctx->screen->gpu_id < 300)
+		return;
+
 	for (i = 1; i < ctx->screen->max_rts; i++) {
 		ctx->blit_prog[i].vp = ctx->blit_prog[0].vp;
 		ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -35,6 +35,7 @@
 
 #include "freedreno_resource.h"
 #include "freedreno_batch_cache.h"
+#include "freedreno_blitter.h"
 #include "freedreno_fence.h"
 #include "freedreno_screen.h"
 #include "freedreno_surface.h"
@@ -42,6 +43,7 @@
 #include "freedreno_query_hw.h"
 #include "freedreno_util.h"
 
+#include <drm_fourcc.h>
 #include <errno.h>
 
 /* XXX this should go away, needed for 'struct winsys_handle' */
@@ -97,9 +99,12 @@
 static void
 realloc_bo(struct fd_resource *rsc, uint32_t size)
 {
+	struct pipe_resource *prsc = &rsc->base;
 	struct fd_screen *screen = fd_screen(rsc->base.screen);
 	uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
-			DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+			DRM_FREEDRENO_GEM_TYPE_KMEM |
+			COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT);
+			/* TODO other flags? */
 
 	/* if we start using things other than write-combine,
 	 * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
@@ -108,7 +113,8 @@
 	if (rsc->bo)
 		fd_bo_del(rsc->bo);
 
-	rsc->bo = fd_bo_new(screen->dev, size, flags);
+	rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x",
+			prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind);
 	rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
 	util_range_set_empty(&rsc->valid_buffer_range);
 	fd_bc_invalidate_resource(rsc, true);
@@ -117,15 +123,15 @@
 static void
 do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback)
 {
+	struct pipe_context *pctx = &ctx->base;
+
 	/* TODO size threshold too?? */
 	if (!fallback) {
 		/* do blit on gpu: */
-		fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT);
-		ctx->blit(ctx, blit);
-		fd_blitter_pipe_end(ctx);
+		pctx->blit(pctx, blit);
 	} else {
 		/* do blit on cpu: */
-		util_resource_copy_region(&ctx->base,
+		util_resource_copy_region(pctx,
 				blit->dst.resource, blit->dst.level, blit->dst.box.x,
 				blit->dst.box.y, blit->dst.box.z,
 				blit->src.resource, blit->src.level, &blit->src.box);
@@ -289,8 +295,16 @@
 
 	tmpl.width0  = box->width;
 	tmpl.height0 = box->height;
-	tmpl.depth0  = box->depth;
-	tmpl.array_size = 1;
+	/* for array textures, box->depth is the array_size, otherwise
+	 * for 3d textures, it is the depth:
+	 */
+	if (tmpl.array_size > 1) {
+		tmpl.array_size = box->depth;
+		tmpl.depth0 = 1;
+	} else {
+		tmpl.array_size = 1;
+		tmpl.depth0 = box->depth;
+	}
 	tmpl.last_level = 0;
 	tmpl.bind |= PIPE_BIND_LINEAR;
 
@@ -342,17 +356,6 @@
 	do_blit(ctx, &blit, false);
 }
 
-static unsigned
-fd_resource_layer_offset(struct fd_resource *rsc,
-						 struct fd_resource_slice *slice,
-						 unsigned layer)
-{
-	if (rsc->layer_first)
-		return layer * rsc->layer_size;
-	else
-		return layer * slice->size0;
-}
-
 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
 		struct pipe_transfer *ptrans,
 		const struct pipe_box *box)
@@ -496,7 +499,21 @@
 
 			if (usage & PIPE_TRANSFER_READ) {
 				fd_blit_to_staging(ctx, trans);
-				fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ);
+
+				struct fd_batch *batch = NULL;
+				fd_batch_reference(&batch, staging_rsc->write_batch);
+
+				/* we can't fd_bo_cpu_prep() until the blit to staging
+				 * is submitted to kernel.. in that case write_batch
+				 * wouldn't be NULL yet:
+				 */
+				if (batch) {
+					fd_batch_sync(batch);
+					fd_batch_reference(&batch, NULL);
+				}
+
+				fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe,
+						DRM_FREEDRENO_PREP_READ);
 			}
 
 			buf = fd_bo_map(staging_rsc->bo);
@@ -621,10 +638,10 @@
 	}
 
 	buf = fd_bo_map(rsc->bo);
-	offset = slice->offset +
+	offset =
 		box->y / util_format_get_blockheight(format) * ptrans->stride +
 		box->x / util_format_get_blockwidth(format) * rsc->cpp +
-		fd_resource_layer_offset(rsc, slice, box->z);
+		fd_resource_offset(rsc, level, box->z);
 
 	if (usage & PIPE_TRANSFER_WRITE)
 		rsc->valid = true;
@@ -646,10 +663,23 @@
 	fd_bc_invalidate_resource(rsc, true);
 	if (rsc->bo)
 		fd_bo_del(rsc->bo);
+	if (rsc->scanout)
+		renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro);
+
 	util_range_destroy(&rsc->valid_buffer_range);
 	FREE(rsc);
 }
 
+static uint64_t
+fd_resource_modifier(struct fd_resource *rsc)
+{
+	if (!rsc->tile_mode)
+		return DRM_FORMAT_MOD_LINEAR;
+
+	/* TODO invent a modifier for tiled but not UBWC buffers: */
+	return DRM_FORMAT_MOD_INVALID;
+}
+
 static boolean
 fd_resource_get_handle(struct pipe_screen *pscreen,
 		struct pipe_context *pctx,
@@ -659,7 +689,9 @@
 {
 	struct fd_resource *rsc = fd_resource(prsc);
 
-	return fd_screen_bo_get_handle(pscreen, rsc->bo,
+	handle->modifier = fd_resource_modifier(rsc);
+
+	return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout,
 			rsc->slices[0].pitch * rsc->cpp, handle);
 }
 
@@ -794,19 +826,65 @@
 	}
 }
 
+static bool
+find_modifier(uint64_t needle, const uint64_t *haystack, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (haystack[i] == needle)
+			return true;
+	}
+
+	return false;
+}
+
 /**
  * Create a new texture object, using the given template info.
  */
 static struct pipe_resource *
-fd_resource_create(struct pipe_screen *pscreen,
-		const struct pipe_resource *tmpl)
+fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
+		const struct pipe_resource *tmpl,
+		const uint64_t *modifiers, int count)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
-	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
-	struct pipe_resource *prsc = &rsc->base;
+	struct fd_resource *rsc;
+	struct pipe_resource *prsc;
 	enum pipe_format format = tmpl->format;
 	uint32_t size;
 
+	/* when using kmsro, scanout buffers are allocated on the display device
+	 * create_with_modifiers() doesn't give us usage flags, so we have to
+	 * assume that all calls with modifiers are scanout-possible
+	 */
+	if (screen->ro &&
+		((tmpl->bind & PIPE_BIND_SCANOUT) ||
+		 !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
+		struct pipe_resource scanout_templat = *tmpl;
+		struct renderonly_scanout *scanout;
+		struct winsys_handle handle;
+
+		scanout = renderonly_scanout_for_resource(&scanout_templat,
+												  screen->ro, &handle);
+		if (!scanout)
+			return NULL;
+
+		renderonly_scanout_destroy(scanout, screen->ro);
+
+		assert(handle.type == WINSYS_HANDLE_TYPE_FD);
+		rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl,
+														&handle,
+														PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE));
+		close(handle.handle);
+		if (!rsc)
+			return NULL;
+
+		return &rsc->base;
+	}
+
+	rsc = CALLOC_STRUCT(fd_resource);
+	prsc = &rsc->base;
+
 	DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
 			"nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc,
 			tmpl->target, util_format_name(format),
@@ -824,10 +902,26 @@
 	 PIPE_BIND_LINEAR  | \
 	 PIPE_BIND_DISPLAY_TARGET)
 
+	bool linear = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
+	if (tmpl->bind & LINEAR)
+		linear = true;
+
+	/* Normally, for non-shared buffers, allow buffer compression if
+	 * not shared, otherwise only allow if QCOM_COMPRESSED modifier
+	 * is requested:
+	 *
+	 * TODO we should probably also limit tiled in a similar way,
+	 * except we don't have a format modifier for tiled.  (We probably
+	 * should.)
+	 */
+	bool allow_ubwc = find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count);
+	if (tmpl->bind & PIPE_BIND_SHARED)
+		allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count);
+
 	if (screen->tile_mode &&
 			(tmpl->target != PIPE_BUFFER) &&
 			(tmpl->bind & PIPE_BIND_SAMPLER_VIEW) &&
-			!(tmpl->bind & LINEAR)) {
+			!linear) {
 		rsc->tile_mode = screen->tile_mode(tmpl);
 	}
 
@@ -839,8 +933,7 @@
 
 	rsc->internal_format = format;
 	rsc->cpp = util_format_get_blocksize(format);
-	prsc->nr_samples = MAX2(1, prsc->nr_samples);
-	rsc->cpp *= prsc->nr_samples;
+	rsc->cpp *= fd_resource_nr_samples(prsc);
 
 	assert(rsc->cpp);
 
@@ -851,6 +944,15 @@
 				DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
 		unsigned lrz_pitch  = align(DIV_ROUND_UP(tmpl->width0, 8), 64);
 		unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8);
+
+		/* LRZ buffer is super-sampled: */
+		switch (prsc->nr_samples) {
+		case 4:
+			lrz_pitch *= 2;
+		case 2:
+			lrz_height *= 2;
+		}
+
 		unsigned size = lrz_pitch * lrz_height * 2;
 
 		size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */
@@ -858,11 +960,14 @@
 		rsc->lrz_height = lrz_height;
 		rsc->lrz_width = lrz_pitch;
 		rsc->lrz_pitch = lrz_pitch;
-		rsc->lrz = fd_bo_new(screen->dev, size, flags);
+		rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
 	}
 
 	size = screen->setup_slices(rsc);
 
+	if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode)
+		size += screen->fill_ubwc_buffer_sizes(rsc);
+
 	/* special case for hw-query buffer, which we need to allocate before we
 	 * know the size:
 	 */
@@ -887,6 +992,34 @@
 	return NULL;
 }
 
+static struct pipe_resource *
+fd_resource_create(struct pipe_screen *pscreen,
+		const struct pipe_resource *tmpl)
+{
+	const uint64_t mod = DRM_FORMAT_MOD_INVALID;
+	return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1);
+}
+
+static bool
+is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt,
+		uint64_t mod)
+{
+	int count;
+
+	/* Get the count of supported modifiers: */
+	pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count);
+
+	/* Get the supported modifiers: */
+	uint64_t modifiers[count];
+	pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, modifiers, NULL, &count);
+
+	for (int i = 0; i < count; i++)
+		if (modifiers[i] == mod)
+			return true;
+
+	return false;
+}
+
 /**
  * Create a texture from a winsys_handle. The handle is often created in
  * another process by first creating a pipe texture and then calling
@@ -897,6 +1030,7 @@
 		const struct pipe_resource *tmpl,
 		struct winsys_handle *handle, unsigned usage)
 {
+	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
 	struct fd_resource_slice *slice = &rsc->slices[0];
 	struct pipe_resource *prsc = &rsc->base;
@@ -924,9 +1058,9 @@
 	if (!rsc->bo)
 		goto fail;
 
-	prsc->nr_samples = MAX2(1, prsc->nr_samples);
 	rsc->internal_format = tmpl->format;
-	rsc->cpp = prsc->nr_samples * util_format_get_blocksize(tmpl->format);
+	rsc->cpp = util_format_get_blocksize(tmpl->format);
+	rsc->cpp *= fd_resource_nr_samples(prsc);
 	slice->pitch = handle->stride / rsc->cpp;
 	slice->offset = handle->offset;
 	slice->size0 = handle->stride * prsc->height0;
@@ -935,8 +1069,27 @@
 			(slice->pitch & (pitchalign - 1)))
 		goto fail;
 
+	if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
+		if (!is_supported_modifier(pscreen, tmpl->format,
+				DRM_FORMAT_MOD_QCOM_COMPRESSED)) {
+			DBG("bad modifier: %lx", handle->modifier);
+			goto fail;
+		}
+		debug_assert(screen->fill_ubwc_buffer_sizes);
+		screen->fill_ubwc_buffer_sizes(rsc);
+	} else if (handle->modifier &&
+			(handle->modifier != DRM_FORMAT_MOD_INVALID)) {
+		goto fail;
+	}
+
 	assert(rsc->cpp);
 
+	if (screen->ro) {
+		rsc->scanout =
+			renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL);
+		/* failure is expected in some cases.. */
+	}
+
 	return prsc;
 
 fail:
@@ -944,68 +1097,6 @@
 	return NULL;
 }
 
-/**
- * _copy_region using pipe (3d engine)
- */
-static bool
-fd_blitter_pipe_copy_region(struct fd_context *ctx,
-		struct pipe_resource *dst,
-		unsigned dst_level,
-		unsigned dstx, unsigned dsty, unsigned dstz,
-		struct pipe_resource *src,
-		unsigned src_level,
-		const struct pipe_box *src_box)
-{
-	/* not until we allow rendertargets to be buffers */
-	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
-		return false;
-
-	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
-		return false;
-
-	/* TODO we could discard if dst box covers dst level fully.. */
-	fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT);
-	util_blitter_copy_texture(ctx->blitter,
-			dst, dst_level, dstx, dsty, dstz,
-			src, src_level, src_box);
-	fd_blitter_pipe_end(ctx);
-
-	return true;
-}
-
-/**
- * Copy a block of pixels from one resource to another.
- * The resource must be of the same format.
- * Resources with nr_samples > 1 are not allowed.
- */
-static void
-fd_resource_copy_region(struct pipe_context *pctx,
-		struct pipe_resource *dst,
-		unsigned dst_level,
-		unsigned dstx, unsigned dsty, unsigned dstz,
-		struct pipe_resource *src,
-		unsigned src_level,
-		const struct pipe_box *src_box)
-{
-	struct fd_context *ctx = fd_context(pctx);
-
-	/* TODO if we have 2d core, or other DMA engine that could be used
-	 * for simple copies and reasonably easily synchronized with the 3d
-	 * core, this is where we'd plug it in..
-	 */
-
-	/* try blit on 3d pipe: */
-	if (fd_blitter_pipe_copy_region(ctx,
-			dst, dst_level, dstx, dsty, dstz,
-			src, src_level, src_box))
-		return;
-
-	/* else fallback to pure sw: */
-	util_resource_copy_region(pctx,
-			dst, dst_level, dstx, dsty, dstz,
-			src, src_level, src_box);
-}
-
 bool
 fd_render_condition_check(struct pipe_context *pctx)
 {
@@ -1034,22 +1125,10 @@
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct pipe_blit_info info = *blit_info;
-	bool discard = false;
 
 	if (info.render_condition_enable && !fd_render_condition_check(pctx))
 		return;
 
-	if (!info.scissor_enable && !info.alpha_blend) {
-		discard = util_texrange_covers_whole_level(info.dst.resource,
-				info.dst.level, info.dst.box.x, info.dst.box.y,
-				info.dst.box.z, info.dst.box.width,
-				info.dst.box.height, info.dst.box.depth);
-	}
-
-	if (util_try_blit_via_copy_region(pctx, &info)) {
-		return; /* done */
-	}
-
 	if (info.mask & PIPE_MASK_S) {
 		DBG("cannot blit stencil, skipping");
 		info.mask &= ~PIPE_MASK_S;
@@ -1062,9 +1141,8 @@
 		return;
 	}
 
-	fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT);
-	ctx->blit(ctx, &info);
-	fd_blitter_pipe_end(ctx);
+	if (!(ctx->blit && ctx->blit(ctx, &info)))
+		fd_blitter_blit(ctx, &info);
 }
 
 void
@@ -1116,24 +1194,30 @@
 static void
 fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
+	struct fd_context *ctx = fd_context(pctx);
 	struct fd_resource *rsc = fd_resource(prsc);
 
 	/*
 	 * TODO I guess we could track that the resource is invalidated and
 	 * use that as a hint to realloc rather than stall in _transfer_map(),
 	 * even in the non-DISCARD_WHOLE_RESOURCE case?
+	 *
+	 * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo
 	 */
 
 	if (rsc->write_batch) {
 		struct fd_batch *batch = rsc->write_batch;
 		struct pipe_framebuffer_state *pfb = &batch->framebuffer;
 
-		if (pfb->zsbuf && pfb->zsbuf->texture == prsc)
+		if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
 			batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+			ctx->dirty |= FD_DIRTY_ZSA;
+		}
 
 		for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
 			if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
 				batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
+				ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
 			}
 		}
 	}
@@ -1181,6 +1265,10 @@
 	bool fake_rgtc = screen->gpu_id < 400;
 
 	pscreen->resource_create = u_transfer_helper_resource_create;
+	/* NOTE: u_transfer_helper does not yet support the _with_modifiers()
+	 * variant:
+	 */
+	pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers;
 	pscreen->resource_from_handle = fd_resource_from_handle;
 	pscreen->resource_get_handle = fd_resource_get_handle;
 	pscreen->resource_destroy = u_transfer_helper_resource_destroy;
@@ -1192,6 +1280,50 @@
 		screen->setup_slices = fd_setup_slices;
 }
 
+static void
+fd_get_sample_position(struct pipe_context *context,
+                         unsigned sample_count, unsigned sample_index,
+                         float *pos_out)
+{
+	/* The following is copied from nouveau/nv50 except for position
+	 * values, which are taken from blob driver */
+	static const uint8_t pos1[1][2] = { { 0x8, 0x8 } };
+	static const uint8_t pos2[2][2] = {
+		{ 0xc, 0xc }, { 0x4, 0x4 } };
+	static const uint8_t pos4[4][2] = {
+		{ 0x6, 0x2 }, { 0xe, 0x6 },
+		{ 0x2, 0xa }, { 0xa, 0xe } };
+	/* TODO needs to be verified on supported hw */
+	static const uint8_t pos8[8][2] = {
+		{ 0x9, 0x5 }, { 0x7, 0xb },
+		{ 0xd, 0x9 }, { 0x5, 0x3 },
+		{ 0x3, 0xd }, { 0x1, 0x7 },
+		{ 0xb, 0xf }, { 0xf, 0x1 } };
+
+	const uint8_t (*ptr)[2];
+
+	switch (sample_count) {
+	case 1:
+		ptr = pos1;
+		break;
+	case 2:
+		ptr = pos2;
+		break;
+	case 4:
+		ptr = pos4;
+		break;
+	case 8:
+		ptr = pos8;
+		break;
+	default:
+		assert(0);
+		return;
+	}
+
+	pos_out[0] = ptr[sample_index][0] / 16.0f;
+	pos_out[1] = ptr[sample_index][1] / 16.0f;
+}
+
 void
 fd_resource_context_init(struct pipe_context *pctx)
 {
@@ -1206,4 +1338,5 @@
 	pctx->blit = fd_blit;
 	pctx->flush_resource = fd_flush_resource;
 	pctx->invalidate_resource = fd_invalidate_resource;
+	pctx->get_sample_position = fd_get_sample_position;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.h	2019-03-31 23:16:37.000000000 +0000
@@ -41,7 +41,7 @@
  * programmed with the start address of each mipmap level, and hw
  * derives the layer offset within the level.
  *
- * Texture Layout on a4xx:
+ * Texture Layout on a4xx+:
  *
  * For cubemap and 2d array, each layer contains all of it's mipmap
  * levels (layer_first layout).
@@ -72,6 +72,7 @@
 	/* buffer range that has been initialized */
 	struct util_range valid_buffer_range;
 	bool valid;
+	struct renderonly_scanout *scanout;
 
 	/* reference to the resource holding stencil data for a z32_s8 texture */
 	/* TODO rename to secondary or auxiliary? */
@@ -99,7 +100,6 @@
 	uint16_t seqno;
 
 	unsigned tile_mode : 2;
-	unsigned preferred_tile_mode : 2;
 
 	/*
 	 * LRZ
@@ -178,6 +178,15 @@
 	return false;
 }
 
+/* access # of samples, with 0 normalized to 1 (which is what we care about
+ * most of the time)
+ */
+static inline unsigned
+fd_resource_nr_samples(struct pipe_resource *prsc)
+{
+	return MAX2(1, prsc->nr_samples);
+}
+
 void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
 		enum fd_render_stage stage);
 void fd_blitter_pipe_end(struct fd_context *ctx);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -39,6 +39,7 @@
 
 #include "util/os_time.h"
 
+#include <drm_fourcc.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -58,13 +59,14 @@
 
 
 #include "ir3/ir3_nir.h"
+#include "a2xx/ir2.h"
 
 /* XXX this should go away */
 #include "state_tracker/drm_driver.h"
 
 static const struct debug_named_value debug_options[] = {
 		{"msgs",      FD_DBG_MSGS,   "Print debug messages"},
-		{"disasm",    FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
+		{"disasm",    FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly (a2xx only, see IR3_SHADER_DEBUG)"},
 		{"dclear",    FD_DBG_DCLEAR, "Mark all state dirty after clear"},
 		{"ddraw",     FD_DBG_DDRAW,  "Mark all state dirty after draw"},
 		{"noscis",    FD_DBG_NOSCIS, "Disable scissor optimization"},
@@ -72,7 +74,6 @@
 		{"nobypass",  FD_DBG_NOBYPASS, "Disable GMEM bypass"},
 		{"fraghalf",  FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
 		{"nobin",     FD_DBG_NOBIN,  "Disable hw binning"},
-		{"optmsgs",   FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
 		{"glsl120",   FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
 		{"shaderdb",  FD_DBG_SHADERDB, "Enable shaderdb output"},
 		{"flush",     FD_DBG_FLUSH,  "Force flush after every draw"},
@@ -96,17 +97,6 @@
 bool fd_binning_enabled = true;
 static bool glsl120 = false;
 
-static const struct debug_named_value shader_debug_options[] = {
-		{"vs", FD_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
-		{"fs", FD_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
-		{"cs", FD_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
-		DEBUG_NAMED_VALUE_END
-};
-
-DEBUG_GET_ONCE_FLAGS_OPTION(fd_shader_debug, "FD_SHADER_DEBUG", shader_debug_options, 0)
-
-enum fd_shader_debug fd_shader_debug = 0;
-
 static const char *
 fd_screen_get_name(struct pipe_screen *pscreen)
 {
@@ -157,6 +147,9 @@
 	if (screen->dev)
 		fd_device_del(screen->dev);
 
+	if (screen->ro)
+		FREE(screen->ro);
+
 	fd_bc_fini(&screen->batch_cache);
 
 	slab_destroy_parent(&screen->transfer_pool);
@@ -237,6 +230,9 @@
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		return is_a5xx(screen) || is_a6xx(screen);
 
+	case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+		return is_a6xx(screen);
+
 	case PIPE_CAP_DEPTH_CLIP_DISABLE:
 		return is_a3xx(screen) || is_a4xx(screen);
 
@@ -321,6 +317,9 @@
 	case PIPE_CAP_MAX_VIEWPORTS:
 		return 1;
 
+	case PIPE_CAP_MAX_VARYINGS:
+		return 16;
+
 	case PIPE_CAP_SHAREABLE_SHADERS:
 	case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
 	/* manage the variants for these ourself, to avoid breaking precompile: */
@@ -505,16 +504,9 @@
 	case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
 		return 16;
 	case PIPE_SHADER_CAP_PREFERRED_IR:
-		if (is_ir3(screen))
-			return PIPE_SHADER_IR_NIR;
-		return PIPE_SHADER_IR_TGSI;
+		return PIPE_SHADER_IR_NIR;
 	case PIPE_SHADER_CAP_SUPPORTED_IRS:
-		if (is_ir3(screen)) {
-			return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
-		} else {
-			return (1 << PIPE_SHADER_IR_TGSI);
-		}
-		return 0;
+		return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
 	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
 		return 32;
 	case PIPE_SHADER_CAP_SCALAR_ISA:
@@ -645,12 +637,13 @@
 	if (is_ir3(screen))
 		return ir3_get_compiler_options(screen->compiler);
 
-	return NULL;
+	return ir2_get_compiler_options();
 }
 
 boolean
 fd_screen_bo_get_handle(struct pipe_screen *pscreen,
 		struct fd_bo *bo,
+		struct renderonly_scanout *scanout,
 		unsigned stride,
 		struct winsys_handle *whandle)
 {
@@ -659,6 +652,8 @@
 	if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
 		return fd_bo_get_name(bo, &whandle->handle) == 0;
 	} else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+		if (renderonly_get_handle(scanout, whandle))
+			return TRUE;
 		whandle->handle = fd_bo_handle(bo);
 		return TRUE;
 	} else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
@@ -669,6 +664,37 @@
 	}
 }
 
+static void
+fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,
+		enum pipe_format format,
+		int max, uint64_t *modifiers,
+		unsigned int *external_only,
+		int *count)
+{
+	struct fd_screen *screen = fd_screen(pscreen);
+	int i, num = 0;
+
+	max = MIN2(max, screen->num_supported_modifiers);
+
+	if (!max) {
+		max = screen->num_supported_modifiers;
+		external_only = NULL;
+		modifiers = NULL;
+	}
+
+	for (i = 0; i < max; i++) {
+		if (modifiers)
+			modifiers[num] = screen->supported_modifiers[i];
+
+		if (external_only)
+			external_only[num] = 0;
+
+		num++;
+	}
+
+	*count = num;
+}
+
 struct fd_bo *
 fd_screen_bo_from_handle(struct pipe_screen *pscreen,
 		struct winsys_handle *whandle)
@@ -696,14 +722,13 @@
 }
 
 struct pipe_screen *
-fd_screen_create(struct fd_device *dev)
+fd_screen_create(struct fd_device *dev, struct renderonly *ro)
 {
 	struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
 	struct pipe_screen *pscreen;
 	uint64_t val;
 
 	fd_mesa_debug = debug_get_option_fd_mesa_debug();
-	fd_shader_debug = debug_get_option_fd_shader_debug();
 
 	if (fd_mesa_debug & FD_DBG_NOBIN)
 		fd_binning_enabled = false;
@@ -718,6 +743,14 @@
 	screen->dev = dev;
 	screen->refcnt = 1;
 
+	if (ro) {
+		screen->ro = renderonly_dup(ro);
+		if (!screen->ro) {
+			DBG("could not create renderonly object");
+			goto fail;
+		}
+	}
+
 	// maybe this should be in context?
 	screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
 	if (!screen->pipe) {
@@ -796,6 +829,8 @@
 	 * send a patch ;-)
 	 */
 	switch (screen->gpu_id) {
+	case 200:
+	case 201:
 	case 205:
 	case 220:
 		fd2_screen_init(pscreen);
@@ -867,6 +902,17 @@
 	pscreen->fence_finish = fd_fence_finish;
 	pscreen->fence_get_fd = fd_fence_get_fd;
 
+	pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers;
+
+	if (!screen->supported_modifiers) {
+		static const uint64_t supported_modifiers[] = {
+			DRM_FORMAT_MOD_LINEAR,
+		};
+
+		screen->supported_modifiers = supported_modifiers;
+		screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers);
+	}
+
 	slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
 
 	return pscreen;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,6 +34,7 @@
 #include "util/u_memory.h"
 #include "util/slab.h"
 #include "os/os_thread.h"
+#include "renderonly/renderonly.h"
 
 #include "freedreno_batch_cache.h"
 #include "freedreno_perfcntr.h"
@@ -87,6 +88,7 @@
 	 */
 	struct fd_pipe *pipe;
 
+	uint32_t (*fill_ubwc_buffer_sizes)(struct fd_resource *rsc);
 	uint32_t (*setup_slices)(struct fd_resource *rsc);
 	unsigned (*tile_mode)(const struct pipe_resource *prsc);
 
@@ -97,6 +99,11 @@
 	bool reorder;
 
 	uint16_t rsc_seqno;
+
+	unsigned num_supported_modifiers;
+	const uint64_t *supported_modifiers;
+
+	struct renderonly *ro;
 };
 
 static inline struct fd_screen *
@@ -107,12 +114,14 @@
 
 boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen,
 		struct fd_bo *bo,
+		struct renderonly_scanout *scanout,
 		unsigned stride,
 		struct winsys_handle *whandle);
 struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
 		struct winsys_handle *whandle);
 
-struct pipe_screen * fd_screen_create(struct fd_device *dev);
+struct pipe_screen *
+fd_screen_create(struct fd_device *dev, struct renderonly *ro);
 
 static inline boolean
 is_a20x(struct fd_screen *screen)
@@ -120,6 +129,12 @@
 	return (screen->gpu_id >= 200) && (screen->gpu_id < 210);
 }
 
+static inline boolean
+is_a2xx(struct fd_screen *screen)
+{
+	return (screen->gpu_id >= 200) && (screen->gpu_id < 300);
+}
+
 /* is a3xx patch revision 0? */
 /* TODO a306.0 probably doesn't need this.. be more clever?? */
 static inline boolean
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -288,7 +288,36 @@
 		const struct pipe_viewport_state *viewport)
 {
 	struct fd_context *ctx = fd_context(pctx);
+	struct pipe_scissor_state *scissor = &ctx->viewport_scissor;
+	float minx, miny, maxx, maxy;
+
 	ctx->viewport = *viewport;
+
+	/* see si_get_scissor_from_viewport(): */
+
+	/* Convert (-1, -1) and (1, 1) from clip space into window space. */
+	minx = -viewport->scale[0] + viewport->translate[0];
+	miny = -viewport->scale[1] + viewport->translate[1];
+	maxx = viewport->scale[0] + viewport->translate[0];
+	maxy = viewport->scale[1] + viewport->translate[1];
+
+	/* Handle inverted viewports. */
+	if (minx > maxx) {
+		swap(minx, maxx);
+	}
+	if (miny > maxy) {
+		swap(miny, maxy);
+	}
+
+	debug_assert(miny >= 0);
+	debug_assert(maxy >= 0);
+
+	/* Convert to integer and round up the max bounds. */
+	scissor->minx = minx;
+	scissor->miny = miny;
+	scissor->maxx = ceilf(maxx);
+	scissor->maxy = ceilf(maxy);
+
 	ctx->dirty |= FD_DIRTY_VIEWPORT;
 }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.h	2019-03-31 23:16:37.000000000 +0000
@@ -35,6 +35,11 @@
 	return ctx->zsa && ctx->zsa->depth.enabled;
 }
 
+static inline bool fd_depth_write_enabled(struct fd_context *ctx)
+{
+	return ctx->zsa && ctx->zsa->depth.writemask;
+}
+
 static inline bool fd_stencil_enabled(struct fd_context *ctx)
 {
 	return ctx->zsa && ctx->zsa->stencil[0].enabled;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.c	2019-03-31 23:16:37.000000000 +0000
@@ -53,6 +53,7 @@
 	psurf->format = surf_tmpl->format;
 	psurf->width = u_minify(ptex->width0, level);
 	psurf->height = u_minify(ptex->height0, level);
+	psurf->nr_samples = surf_tmpl->nr_samples;
 
 	if (ptex->target == PIPE_BUFFER) {
 		psurf->u.buf.first_element = surf_tmpl->u.buf.first_element;
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,11 +31,6 @@
 
 struct fd_surface {
 	struct pipe_surface base;
-	uint32_t offset;
-	uint32_t pitch;
-	uint32_t width;
-	uint16_t height;
-	uint16_t depth;
 };
 
 static inline struct fd_surface *
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_texture.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,7 @@
 
 #include "freedreno_texture.h"
 #include "freedreno_context.h"
+#include "freedreno_resource.h"
 #include "freedreno_util.h"
 
 static void
@@ -83,7 +84,7 @@
 	tex->num_textures = util_last_bit(tex->valid_textures);
 
 	for (i = 0; i < tex->num_textures; i++) {
-		uint nr_samples = tex->textures[i]->texture->nr_samples;
+		uint nr_samples = fd_resource_nr_samples(tex->textures[i]->texture);
 		samplers |= (nr_samples >> 1) << (i * 2);
 	}
 
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_util.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_util.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_util.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_util.h	2019-03-31 23:16:37.000000000 +0000
@@ -70,7 +70,7 @@
 #define FD_DBG_NOBYPASS 0x0040
 #define FD_DBG_FRAGHALF 0x0080
 #define FD_DBG_NOBIN    0x0100
-#define FD_DBG_OPTMSGS  0x0200
+/* unused 0x0200 */
 #define FD_DBG_GLSL120  0x0400
 #define FD_DBG_SHADERDB 0x0800
 #define FD_DBG_FLUSH    0x1000
@@ -114,15 +114,19 @@
 }
 
 static inline uint32_t DRAW_A20X(enum pc_di_primtype prim_type,
+		enum pc_di_face_cull_sel faceness_cull_select,
 		enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
-		enum pc_di_vis_cull_mode vis_cull_mode,
+		bool pre_fetch_cull_enable,
+		bool grp_cull_enable,
 		uint16_t count)
 {
 	return (prim_type         << 0) |
 			(source_select     << 6) |
+			(faceness_cull_select << 8) |
 			((index_size & 1)  << 11) |
 			((index_size >> 1) << 13) |
-			(vis_cull_mode     << 9) |
+			(pre_fetch_cull_enable << 14) |
+			(grp_cull_enable << 15) |
 			(count         << 16);
 }
 
@@ -194,6 +198,18 @@
 	return true;
 }
 
+/* Note sure if this is same on all gens, but seems to be same on the later
+ * gen's
+ */
+static inline unsigned
+fd_calc_guardband(unsigned x)
+{
+	float l = log2(x);
+	if (l <= 8)
+		return 511;
+	return 511 - ((l - 8) * 65);
+}
+
 #define LOG_DWORDS 0
 
 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
@@ -228,8 +244,8 @@
  */
 
 static inline void
-OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
-		uint32_t offset, uint64_t or, int32_t shift)
+__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
+		uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
 {
 	if (LOG_DWORDS) {
 		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
@@ -238,7 +254,7 @@
 	debug_assert(offset < fd_bo_size(bo));
 	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
 		.bo = bo,
-		.flags = FD_RELOC_READ,
+		.flags = flags,
 		.offset = offset,
 		.or = or,
 		.shift = shift,
@@ -247,22 +263,24 @@
 }
 
 static inline void
+OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+		uint32_t offset, uint64_t or, int32_t shift)
+{
+	__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
+}
+
+static inline void
 OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
 		uint32_t offset, uint64_t or, int32_t shift)
 {
-	if (LOG_DWORDS) {
-		DBG("ring[%p]: OUT_RELOCW  %04x:  %p+%u << %d", ring,
-				(uint32_t)(ring->cur - ring->start), bo, offset, shift);
-	}
-	debug_assert(offset < fd_bo_size(bo));
-	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
-		.bo = bo,
-		.flags = FD_RELOC_READ | FD_RELOC_WRITE,
-		.offset = offset,
-		.or = or,
-		.shift = shift,
-		.orhi = or >> 32,
-	});
+	__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
+}
+
+static inline void
+OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
+		uint32_t offset, uint64_t or, int32_t shift)
+{
+	__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
 }
 
 static inline void
@@ -411,18 +429,6 @@
 	OUT_RING(ring, ++marker_cnt);
 }
 
-/* helper to get numeric value from environment variable..  mostly
- * just leaving this here because it is helpful to brute-force figure
- * out unknown formats, etc, which blob driver does not support:
- */
-static inline uint32_t env2u(const char *envvar)
-{
-	char *str = getenv(envvar);
-	if (str)
-		return strtoul(str, NULL, 0);
-	return 0;
-}
-
 static inline uint32_t
 pack_rgba(enum pipe_format format, const float *rgba)
 {
@@ -453,9 +459,11 @@
 	switch (samples) {
 	default:
 		debug_assert(0);
+	case 0:
 	case 1: return MSAA_ONE;
 	case 2: return MSAA_TWO;
 	case 4: return MSAA_FOUR;
+	case 8: return MSAA_EIGHT;
 	}
 }
 
@@ -464,14 +472,15 @@
  */
 
 static inline enum a4xx_state_block
-fd4_stage2shadersb(enum shader_t type)
+fd4_stage2shadersb(gl_shader_stage type)
 {
 	switch (type) {
-	case SHADER_VERTEX:
+	case MESA_SHADER_VERTEX:
 		return SB4_VS_SHADER;
-	case SHADER_FRAGMENT:
+	case MESA_SHADER_FRAGMENT:
 		return SB4_FS_SHADER;
-	case SHADER_COMPUTE:
+	case MESA_SHADER_COMPUTE:
+	case MESA_SHADER_KERNEL:
 		return SB4_CS_SHADER;
 	default:
 		unreachable("bad shader type");
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1033 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <assert.h>
-
-#include <util/u_debug.h>
-
-#include "disasm.h"
-#include "instr-a3xx.h"
-
-static enum debug_t debug;
-
-#define printf debug_printf
-
-static const char *levels[] = {
-		"",
-		"\t",
-		"\t\t",
-		"\t\t\t",
-		"\t\t\t\t",
-		"\t\t\t\t\t",
-		"\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t\t",
-		"\t\t\t\t\t\t\t\t\t",
-		"x",
-		"x",
-		"x",
-		"x",
-		"x",
-		"x",
-};
-
-static const char *component = "xyzw";
-
-static const char *type[] = {
-		[TYPE_F16] = "f16",
-		[TYPE_F32] = "f32",
-		[TYPE_U16] = "u16",
-		[TYPE_U32] = "u32",
-		[TYPE_S16] = "s16",
-		[TYPE_S32] = "s32",
-		[TYPE_U8]  = "u8",
-		[TYPE_S8]  = "s8",
-};
-
-struct disasm_ctx {
-	FILE *out;
-	int level;
-
-	/* current instruction repeat flag: */
-	unsigned repeat;
-};
-
-static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
-		bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-	const char type = c ? 'c' : 'r';
-
-	// XXX I prefer - and || for neg/abs, but preserving format used
-	// by libllvm-a3xx for easy diffing..
-
-	if (abs && neg)
-		fprintf(ctx->out, "(absneg)");
-	else if (neg)
-		fprintf(ctx->out, "(neg)");
-	else if (abs)
-		fprintf(ctx->out, "(abs)");
-
-	if (r)
-		fprintf(ctx->out, "(r)");
-
-	if (im) {
-		fprintf(ctx->out, "%d", reg.iim_val);
-	} else if (addr_rel) {
-		/* I would just use %+d but trying to make it diff'able with
-		 * libllvm-a3xx...
-		 */
-		if (reg.iim_val < 0)
-			fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
-		else if (reg.iim_val > 0)
-			fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
-		else
-			fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
-	} else if ((reg.num == REG_A0) && !c) {
-		fprintf(ctx->out, "a0.%c", component[reg.comp]);
-	} else if ((reg.num == REG_P0) && !c) {
-		fprintf(ctx->out, "p0.%c", component[reg.comp]);
-	} else {
-		fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
-	}
-}
-
-
-static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
-{
-	print_reg(ctx, reg, full, false, false, false, false, false, addr_rel);
-}
-
-static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool r,
-		bool c, bool im, bool neg, bool abs, bool addr_rel)
-{
-	print_reg(ctx, reg, full, r, c, im, neg, abs, addr_rel);
-}
-
-/* TODO switch to using reginfo struct everywhere, since more readable
- * than passing a bunch of bools to print_reg_src
- */
-
-struct reginfo {
-	reg_t reg;
-	bool full;
-	bool r;
-	bool c;
-	bool im;
-	bool neg;
-	bool abs;
-	bool addr_rel;
-};
-
-static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
-{
-	print_reg_src(ctx, info->reg, info->full, info->r, info->c, info->im,
-			info->neg, info->abs, info->addr_rel);
-}
-
-//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
-//{
-//	print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
-//}
-
-static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat0_t *cat0 = &instr->cat0;
-
-	switch (cat0->opc) {
-	case OPC_KILL:
-		fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "",
-				component[cat0->comp]);
-		break;
-	case OPC_BR:
-		fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "",
-				component[cat0->comp], cat0->a3xx.immed);
-		break;
-	case OPC_JUMP:
-	case OPC_CALL:
-		fprintf(ctx->out, " #%d", cat0->a3xx.immed);
-		break;
-	}
-
-	if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4))
-		fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4);
-}
-
-static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat1_t *cat1 = &instr->cat1;
-
-	if (cat1->ul)
-		fprintf(ctx->out, "(ul)");
-
-	if (cat1->src_type == cat1->dst_type) {
-		if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
-			/* special case (nmemonic?): */
-			fprintf(ctx->out, "mova");
-		} else {
-			fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-		}
-	} else {
-		fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
-	}
-
-	fprintf(ctx->out, " ");
-
-	if (cat1->even)
-		fprintf(ctx->out, "(even)");
-
-	if (cat1->pos_inf)
-		fprintf(ctx->out, "(pos_infinity)");
-
-	print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
-			cat1->dst_rel);
-
-	fprintf(ctx->out, ", ");
-
-	/* ugg, have to special case this.. vs print_reg().. */
-	if (cat1->src_im) {
-		if (type_float(cat1->src_type))
-			fprintf(ctx->out, "(%f)", cat1->fim_val);
-		else if (type_uint(cat1->src_type))
-			fprintf(ctx->out, "0x%08x", cat1->uim_val);
-		else
-			fprintf(ctx->out, "%d", cat1->iim_val);
-	} else if (cat1->src_rel && !cat1->src_c) {
-		/* I would just use %+d but trying to make it diff'able with
-		 * libllvm-a3xx...
-		 */
-		char type = cat1->src_rel_c ? 'c' : 'r';
-		if (cat1->off < 0)
-			fprintf(ctx->out, "%c<a0.x - %d>", type, -cat1->off);
-		else if (cat1->off > 0)
-			fprintf(ctx->out, "%c<a0.x + %d>", type, cat1->off);
-		else
-			fprintf(ctx->out, "%c<a0.x>", type);
-	} else {
-		print_reg_src(ctx, (reg_t)(cat1->src), type_size(cat1->src_type) == 32,
-				cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
-	}
-
-	if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
-		fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
-}
-
-static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat2_t *cat2 = &instr->cat2;
-	static const char *cond[] = {
-			"lt",
-			"le",
-			"gt",
-			"ge",
-			"eq",
-			"ne",
-			"?6?",
-	};
-
-	switch (_OPC(2, cat2->opc)) {
-	case OPC_CMPS_F:
-	case OPC_CMPS_U:
-	case OPC_CMPS_S:
-	case OPC_CMPV_F:
-	case OPC_CMPV_U:
-	case OPC_CMPV_S:
-		fprintf(ctx->out, ".%s", cond[cat2->cond]);
-		break;
-	}
-
-	fprintf(ctx->out, " ");
-	if (cat2->ei)
-		fprintf(ctx->out, "(ei)");
-	print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
-	fprintf(ctx->out, ", ");
-
-	if (cat2->c1.src1_c) {
-		print_reg_src(ctx, (reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r,
-				cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg,
-				cat2->src1_abs, false);
-	} else if (cat2->rel1.src1_rel) {
-		print_reg_src(ctx, (reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r,
-				cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg,
-				cat2->src1_abs, cat2->rel1.src1_rel);
-	} else {
-		print_reg_src(ctx, (reg_t)(cat2->src1), cat2->full, cat2->src1_r,
-				false, cat2->src1_im, cat2->src1_neg,
-				cat2->src1_abs, false);
-	}
-
-	switch (_OPC(2, cat2->opc)) {
-	case OPC_ABSNEG_F:
-	case OPC_ABSNEG_S:
-	case OPC_CLZ_B:
-	case OPC_CLZ_S:
-	case OPC_SIGN_F:
-	case OPC_FLOOR_F:
-	case OPC_CEIL_F:
-	case OPC_RNDNE_F:
-	case OPC_RNDAZ_F:
-	case OPC_TRUNC_F:
-	case OPC_NOT_B:
-	case OPC_BFREV_B:
-	case OPC_SETRM:
-	case OPC_CBITS_B:
-		/* these only have one src reg */
-		break;
-	default:
-		fprintf(ctx->out, ", ");
-		if (cat2->c2.src2_c) {
-			print_reg_src(ctx, (reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r,
-					cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg,
-					cat2->src2_abs, false);
-		} else if (cat2->rel2.src2_rel) {
-			print_reg_src(ctx, (reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r,
-					cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg,
-					cat2->src2_abs, cat2->rel2.src2_rel);
-		} else {
-			print_reg_src(ctx, (reg_t)(cat2->src2), cat2->full, cat2->src2_r,
-					false, cat2->src2_im, cat2->src2_neg,
-					cat2->src2_abs, false);
-		}
-		break;
-	}
-}
-
-static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat3_t *cat3 = &instr->cat3;
-	bool full = instr_cat3_full(cat3);
-
-	fprintf(ctx->out, " ");
-	print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
-	fprintf(ctx->out, ", ");
-	if (cat3->c1.src1_c) {
-		print_reg_src(ctx, (reg_t)(cat3->c1.src1), full,
-				cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg,
-				false, false);
-	} else if (cat3->rel1.src1_rel) {
-		print_reg_src(ctx, (reg_t)(cat3->rel1.src1), full,
-				cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg,
-				false, cat3->rel1.src1_rel);
-	} else {
-		print_reg_src(ctx, (reg_t)(cat3->src1), full,
-				cat3->src1_r, false, false, cat3->src1_neg,
-				false, false);
-	}
-	fprintf(ctx->out, ", ");
-	print_reg_src(ctx, (reg_t)cat3->src2, full,
-			cat3->src2_r, cat3->src2_c, false, cat3->src2_neg,
-			false, false);
-	fprintf(ctx->out, ", ");
-	if (cat3->c2.src3_c) {
-		print_reg_src(ctx, (reg_t)(cat3->c2.src3), full,
-				cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg,
-				false, false);
-	} else if (cat3->rel2.src3_rel) {
-		print_reg_src(ctx, (reg_t)(cat3->rel2.src3), full,
-				cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg,
-				false, cat3->rel2.src3_rel);
-	} else {
-		print_reg_src(ctx, (reg_t)(cat3->src3), full,
-				cat3->src3_r, false, false, cat3->src3_neg,
-				false, false);
-	}
-}
-
-static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat4_t *cat4 = &instr->cat4;
-
-	fprintf(ctx->out, " ");
-	print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
-	fprintf(ctx->out, ", ");
-
-	if (cat4->c.src_c) {
-		print_reg_src(ctx, (reg_t)(cat4->c.src), cat4->full,
-				cat4->src_r, cat4->c.src_c, cat4->src_im,
-				cat4->src_neg, cat4->src_abs, false);
-	} else if (cat4->rel.src_rel) {
-		print_reg_src(ctx, (reg_t)(cat4->rel.src), cat4->full,
-				cat4->src_r, cat4->rel.src_c, cat4->src_im,
-				cat4->src_neg, cat4->src_abs, cat4->rel.src_rel);
-	} else {
-		print_reg_src(ctx, (reg_t)(cat4->src), cat4->full,
-				cat4->src_r, false, cat4->src_im,
-				cat4->src_neg, cat4->src_abs, false);
-	}
-
-	if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
-		fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
-}
-
-static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
-{
-	static const struct {
-		bool src1, src2, samp, tex;
-	} info[0x1f] = {
-			[opc_op(OPC_ISAM)]     = { true,  false, true,  true,  },
-			[opc_op(OPC_ISAML)]    = { true,  true,  true,  true,  },
-			[opc_op(OPC_ISAMM)]    = { true,  false, true,  true,  },
-			[opc_op(OPC_SAM)]      = { true,  false, true,  true,  },
-			[opc_op(OPC_SAMB)]     = { true,  true,  true,  true,  },
-			[opc_op(OPC_SAML)]     = { true,  true,  true,  true,  },
-			[opc_op(OPC_SAMGQ)]    = { true,  false, true,  true,  },
-			[opc_op(OPC_GETLOD)]   = { true,  false, true,  true,  },
-			[opc_op(OPC_CONV)]     = { true,  true,  true,  true,  },
-			[opc_op(OPC_CONVM)]    = { true,  true,  true,  true,  },
-			[opc_op(OPC_GETSIZE)]  = { true,  false, false, true,  },
-			[opc_op(OPC_GETBUF)]   = { false, false, false, true,  },
-			[opc_op(OPC_GETPOS)]   = { true,  false, false, true,  },
-			[opc_op(OPC_GETINFO)]  = { false, false, false, true,  },
-			[opc_op(OPC_DSX)]      = { true,  false, false, false, },
-			[opc_op(OPC_DSY)]      = { true,  false, false, false, },
-			[opc_op(OPC_GATHER4R)] = { true,  false, true,  true,  },
-			[opc_op(OPC_GATHER4G)] = { true,  false, true,  true,  },
-			[opc_op(OPC_GATHER4B)] = { true,  false, true,  true,  },
-			[opc_op(OPC_GATHER4A)] = { true,  false, true,  true,  },
-			[opc_op(OPC_SAMGP0)]   = { true,  false, true,  true,  },
-			[opc_op(OPC_SAMGP1)]   = { true,  false, true,  true,  },
-			[opc_op(OPC_SAMGP2)]   = { true,  false, true,  true,  },
-			[opc_op(OPC_SAMGP3)]   = { true,  false, true,  true,  },
-			[opc_op(OPC_DSXPP_1)]  = { true,  false, false, false, },
-			[opc_op(OPC_DSYPP_1)]  = { true,  false, false, false, },
-			[opc_op(OPC_RGETPOS)]  = { false, false, false, false, },
-			[opc_op(OPC_RGETINFO)] = { false, false, false, false, },
-	};
-	instr_cat5_t *cat5 = &instr->cat5;
-	int i;
-
-	if (cat5->is_3d)   fprintf(ctx->out, ".3d");
-	if (cat5->is_a)    fprintf(ctx->out, ".a");
-	if (cat5->is_o)    fprintf(ctx->out, ".o");
-	if (cat5->is_p)    fprintf(ctx->out, ".p");
-	if (cat5->is_s)    fprintf(ctx->out, ".s");
-	if (cat5->is_s2en) fprintf(ctx->out, ".s2en");
-
-	fprintf(ctx->out, " ");
-
-	switch (_OPC(5, cat5->opc)) {
-	case OPC_DSXPP_1:
-	case OPC_DSYPP_1:
-		break;
-	default:
-		fprintf(ctx->out, "(%s)", type[cat5->type]);
-		break;
-	}
-
-	fprintf(ctx->out, "(");
-	for (i = 0; i < 4; i++)
-		if (cat5->wrmask & (1 << i))
-			fprintf(ctx->out, "%c", "xyzw"[i]);
-	fprintf(ctx->out, ")");
-
-	print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
-
-	if (info[cat5->opc].src1) {
-		fprintf(ctx->out, ", ");
-		print_reg_src(ctx, (reg_t)(cat5->src1), cat5->full, false, false, false,
-				false, false, false);
-	}
-
-	if (cat5->is_s2en) {
-		fprintf(ctx->out, ", ");
-		print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full, false, false, false,
-				false, false, false);
-		fprintf(ctx->out, ", ");
-		print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false,
-				false, false, false);
-	} else {
-		if (cat5->is_o || info[cat5->opc].src2) {
-			fprintf(ctx->out, ", ");
-			print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full,
-					false, false, false, false, false, false);
-		}
-		if (info[cat5->opc].samp)
-			fprintf(ctx->out, ", s#%d", cat5->norm.samp);
-		if (info[cat5->opc].tex)
-			fprintf(ctx->out, ", t#%d", cat5->norm.tex);
-	}
-
-	if (debug & PRINT_VERBOSE) {
-		if (cat5->is_s2en) {
-			if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
-				fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
-		} else {
-			if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
-				fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
-		}
-	}
-}
-
-static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat6_t *cat6 = &instr->cat6;
-	char sd = 0, ss = 0;  /* dst/src address space */
-	bool nodst = false;
-	struct reginfo dst, src1, src2;
-	int src1off = 0, dstoff = 0;
-
-	memset(&dst, 0, sizeof(dst));
-	memset(&src1, 0, sizeof(src1));
-	memset(&src2, 0, sizeof(src2));
-
-	switch (_OPC(6, cat6->opc)) {
-	case OPC_RESINFO:
-	case OPC_RESFMT:
-		dst.full  = type_size(cat6->type) == 32;
-		src1.full = type_size(cat6->type) == 32;
-		src2.full = type_size(cat6->type) == 32;
-		break;
-	case OPC_L2G:
-	case OPC_G2L:
-		dst.full = true;
-		src1.full = true;
-		src2.full = true;
-		break;
-	case OPC_STG:
-	case OPC_STL:
-	case OPC_STP:
-	case OPC_STI:
-	case OPC_STLW:
-	case OPC_STIB:
-		dst.full  = true;
-		src1.full = type_size(cat6->type) == 32;
-		src2.full = type_size(cat6->type) == 32;
-		break;
-	default:
-		dst.full  = type_size(cat6->type) == 32;
-		src1.full = true;
-		src2.full = true;
-		break;
-	}
-
-	switch (_OPC(6, cat6->opc)) {
-	case OPC_PREFETCH:
-		break;
-	case OPC_RESINFO:
-		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-		break;
-	case OPC_LDGB:
-		fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-		fprintf(ctx->out, ".%s", type[cat6->type]);
-		fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-		break;
-	case OPC_STGB:
-	case OPC_STIB:
-		fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
-		fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
-		fprintf(ctx->out, ".%s", type[cat6->type]);
-		fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
-		break;
-	case OPC_ATOMIC_ADD:
-	case OPC_ATOMIC_SUB:
-	case OPC_ATOMIC_XCHG:
-	case OPC_ATOMIC_INC:
-	case OPC_ATOMIC_DEC:
-	case OPC_ATOMIC_CMPXCHG:
-	case OPC_ATOMIC_MIN:
-	case OPC_ATOMIC_MAX:
-	case OPC_ATOMIC_AND:
-	case OPC_ATOMIC_OR:
-	case OPC_ATOMIC_XOR:
-		ss = cat6->g ? 'g' : 'l';
-		fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
-		fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
-		fprintf(ctx->out, ".%s", type[cat6->type]);
-		fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
-		fprintf(ctx->out, ".%c", ss);
-		break;
-	default:
-		dst.im = cat6->g && !cat6->dst_off;
-		fprintf(ctx->out, ".%s", type[cat6->type]);
-		break;
-	}
-	fprintf(ctx->out, " ");
-
-	switch (_OPC(6, cat6->opc)) {
-	case OPC_STG:
-		sd = 'g';
-		break;
-	case OPC_STP:
-		sd = 'p';
-		break;
-	case OPC_STL:
-	case OPC_STLW:
-		sd = 'l';
-		break;
-
-	case OPC_LDG:
-	case OPC_LDC:
-		ss = 'g';
-		break;
-	case OPC_LDP:
-		ss = 'p';
-		break;
-	case OPC_LDL:
-	case OPC_LDLW:
-	case OPC_LDLV:
-		ss = 'l';
-		break;
-
-	case OPC_L2G:
-		ss = 'l';
-		sd = 'g';
-		break;
-
-	case OPC_G2L:
-		ss = 'g';
-		sd = 'l';
-		break;
-
-	case OPC_PREFETCH:
-		ss = 'g';
-		nodst = true;
-		break;
-
-	case OPC_STI:
-		dst.full = false;  // XXX or inverts??
-		break;
-	}
-
-	if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
-		struct reginfo src3;
-
-		memset(&src3, 0, sizeof(src3));
-
-		src1.reg = (reg_t)(cat6->stgb.src1);
-		src2.reg = (reg_t)(cat6->stgb.src2);
-		src2.im  = cat6->stgb.src2_im;
-		src3.reg = (reg_t)(cat6->stgb.src3);
-		src3.im  = cat6->stgb.src3_im;
-		src3.full = true;
-
-		fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
-		print_src(ctx, &src1);
-		fprintf(ctx->out, ", ");
-		print_src(ctx, &src2);
-		fprintf(ctx->out, ", ");
-		print_src(ctx, &src3);
-
-		if (debug & PRINT_VERBOSE)
-			fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
-
-		return;
-	}
-
-	if (is_atomic(_OPC(6, cat6->opc))) {
-
-		src1.reg = (reg_t)(cat6->ldgb.src1);
-		src1.im  = cat6->ldgb.src1_im;
-		src2.reg = (reg_t)(cat6->ldgb.src2);
-		src2.im  = cat6->ldgb.src2_im;
-		dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-		print_src(ctx, &dst);
-		fprintf(ctx->out, ", ");
-		if (ss == 'g') {
-			struct reginfo src3;
-			memset(&src3, 0, sizeof(src3));
-
-			src3.reg = (reg_t)(cat6->ldgb.src3);
-			src3.full = true;
-
-			/* For images, the ".typed" variant is used and src2 is
-			 * the ivecN coordinates, ie ivec2 for 2d.
-			 *
-			 * For SSBOs, the ".untyped" variant is used and src2 is
-			 * a simple dword offset..  src3 appears to be
-			 * uvec2(offset * 4, 0).  Not sure the point of that.
-			 */
-
-			fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-			print_src(ctx, &src1);  /* value */
-			fprintf(ctx->out, ", ");
-			print_src(ctx, &src2);  /* offset/coords */
-			fprintf(ctx->out, ", ");
-			print_src(ctx, &src3);  /* 64b byte offset.. */
-
-			if (debug & PRINT_VERBOSE) {
-				fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
-						cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-			}
-		} else { /* ss == 'l' */
-			fprintf(ctx->out, "l[");
-			print_src(ctx, &src1);  /* simple byte offset */
-			fprintf(ctx->out, "], ");
-			print_src(ctx, &src2);  /* value */
-
-			if (debug & PRINT_VERBOSE) {
-				fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
-						cat6->ldgb.src3, cat6->ldgb.pad0,
-						cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-			}
-		}
-
-		return;
-	} else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
-		dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-		print_src(ctx, &dst);
-		fprintf(ctx->out, ", ");
-		fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
-
-		return;
-	} else if (_OPC(6, cat6->opc) == OPC_LDGB) {
-
-		src1.reg = (reg_t)(cat6->ldgb.src1);
-		src1.im  = cat6->ldgb.src1_im;
-		src2.reg = (reg_t)(cat6->ldgb.src2);
-		src2.im  = cat6->ldgb.src2_im;
-		dst.reg  = (reg_t)(cat6->ldgb.dst);
-
-		print_src(ctx, &dst);
-		fprintf(ctx->out, ", ");
-		fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
-		print_src(ctx, &src1);
-		fprintf(ctx->out, ", ");
-		print_src(ctx, &src2);
-
-		if (debug & PRINT_VERBOSE)
-			fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
-
-		return;
-	}
-	if (cat6->dst_off) {
-		dst.reg = (reg_t)(cat6->c.dst);
-		dstoff  = cat6->c.off;
-	} else {
-		dst.reg = (reg_t)(cat6->d.dst);
-	}
-
-	if (cat6->src_off) {
-		src1.reg = (reg_t)(cat6->a.src1);
-		src1.im  = cat6->a.src1_im;
-		src2.reg = (reg_t)(cat6->a.src2);
-		src2.im  = cat6->a.src2_im;
-		src1off  = cat6->a.off;
-	} else {
-		src1.reg = (reg_t)(cat6->b.src1);
-		src1.im  = cat6->b.src1_im;
-		src2.reg = (reg_t)(cat6->b.src2);
-		src2.im  = cat6->b.src2_im;
-	}
-
-	if (!nodst) {
-		if (sd)
-			fprintf(ctx->out, "%c[", sd);
-		/* note: dst might actually be a src (ie. address to store to) */
-		print_src(ctx, &dst);
-		if (dstoff)
-			fprintf(ctx->out, "%+d", dstoff);
-		if (sd)
-			fprintf(ctx->out, "]");
-		fprintf(ctx->out, ", ");
-	}
-
-	if (ss)
-		fprintf(ctx->out, "%c[", ss);
-
-	/* can have a larger than normal immed, so hack: */
-	if (src1.im) {
-		fprintf(ctx->out, "%u", src1.reg.dummy13);
-	} else {
-		print_src(ctx, &src1);
-	}
-
-	if (src1off)
-		fprintf(ctx->out, "%+d", src1off);
-	if (ss)
-		fprintf(ctx->out, "]");
-
-	switch (_OPC(6, cat6->opc)) {
-	case OPC_RESINFO:
-	case OPC_RESFMT:
-		break;
-	default:
-		fprintf(ctx->out, ", ");
-		print_src(ctx, &src2);
-		break;
-	}
-}
-
-static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
-{
-	instr_cat7_t *cat7 = &instr->cat7;
-
-	if (cat7->g)
-		fprintf(ctx->out, ".g");
-	if (cat7->l)
-		fprintf(ctx->out, ".l");
-
-	if (_OPC(7, cat7->opc) == OPC_FENCE) {
-		if (cat7->r)
-			fprintf(ctx->out, ".r");
-		if (cat7->w)
-			fprintf(ctx->out, ".w");
-	}
-}
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-static const struct opc_info {
-	uint16_t cat;
-	uint16_t opc;
-	const char *name;
-	void (*print)(struct disasm_ctx *ctx, instr_t *instr);
-} opcs[1 << (3+NOPC_BITS)] = {
-#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
-	/* category 0: */
-	OPC(0, OPC_NOP,          nop),
-	OPC(0, OPC_BR,           br),
-	OPC(0, OPC_JUMP,         jump),
-	OPC(0, OPC_CALL,         call),
-	OPC(0, OPC_RET,          ret),
-	OPC(0, OPC_KILL,         kill),
-	OPC(0, OPC_END,          end),
-	OPC(0, OPC_EMIT,         emit),
-	OPC(0, OPC_CUT,          cut),
-	OPC(0, OPC_CHMASK,       chmask),
-	OPC(0, OPC_CHSH,         chsh),
-	OPC(0, OPC_FLOW_REV,     flow_rev),
-
-	/* category 1: */
-	OPC(1, OPC_MOV, ),
-
-	/* category 2: */
-	OPC(2, OPC_ADD_F,        add.f),
-	OPC(2, OPC_MIN_F,        min.f),
-	OPC(2, OPC_MAX_F,        max.f),
-	OPC(2, OPC_MUL_F,        mul.f),
-	OPC(2, OPC_SIGN_F,       sign.f),
-	OPC(2, OPC_CMPS_F,       cmps.f),
-	OPC(2, OPC_ABSNEG_F,     absneg.f),
-	OPC(2, OPC_CMPV_F,       cmpv.f),
-	OPC(2, OPC_FLOOR_F,      floor.f),
-	OPC(2, OPC_CEIL_F,       ceil.f),
-	OPC(2, OPC_RNDNE_F,      rndne.f),
-	OPC(2, OPC_RNDAZ_F,      rndaz.f),
-	OPC(2, OPC_TRUNC_F,      trunc.f),
-	OPC(2, OPC_ADD_U,        add.u),
-	OPC(2, OPC_ADD_S,        add.s),
-	OPC(2, OPC_SUB_U,        sub.u),
-	OPC(2, OPC_SUB_S,        sub.s),
-	OPC(2, OPC_CMPS_U,       cmps.u),
-	OPC(2, OPC_CMPS_S,       cmps.s),
-	OPC(2, OPC_MIN_U,        min.u),
-	OPC(2, OPC_MIN_S,        min.s),
-	OPC(2, OPC_MAX_U,        max.u),
-	OPC(2, OPC_MAX_S,        max.s),
-	OPC(2, OPC_ABSNEG_S,     absneg.s),
-	OPC(2, OPC_AND_B,        and.b),
-	OPC(2, OPC_OR_B,         or.b),
-	OPC(2, OPC_NOT_B,        not.b),
-	OPC(2, OPC_XOR_B,        xor.b),
-	OPC(2, OPC_CMPV_U,       cmpv.u),
-	OPC(2, OPC_CMPV_S,       cmpv.s),
-	OPC(2, OPC_MUL_U,        mul.u),
-	OPC(2, OPC_MUL_S,        mul.s),
-	OPC(2, OPC_MULL_U,       mull.u),
-	OPC(2, OPC_BFREV_B,      bfrev.b),
-	OPC(2, OPC_CLZ_S,        clz.s),
-	OPC(2, OPC_CLZ_B,        clz.b),
-	OPC(2, OPC_SHL_B,        shl.b),
-	OPC(2, OPC_SHR_B,        shr.b),
-	OPC(2, OPC_ASHR_B,       ashr.b),
-	OPC(2, OPC_BARY_F,       bary.f),
-	OPC(2, OPC_MGEN_B,       mgen.b),
-	OPC(2, OPC_GETBIT_B,     getbit.b),
-	OPC(2, OPC_SETRM,        setrm),
-	OPC(2, OPC_CBITS_B,      cbits.b),
-	OPC(2, OPC_SHB,          shb),
-	OPC(2, OPC_MSAD,         msad),
-
-	/* category 3: */
-	OPC(3, OPC_MAD_U16,      mad.u16),
-	OPC(3, OPC_MADSH_U16,    madsh.u16),
-	OPC(3, OPC_MAD_S16,      mad.s16),
-	OPC(3, OPC_MADSH_M16,    madsh.m16),
-	OPC(3, OPC_MAD_U24,      mad.u24),
-	OPC(3, OPC_MAD_S24,      mad.s24),
-	OPC(3, OPC_MAD_F16,      mad.f16),
-	OPC(3, OPC_MAD_F32,      mad.f32),
-	OPC(3, OPC_SEL_B16,      sel.b16),
-	OPC(3, OPC_SEL_B32,      sel.b32),
-	OPC(3, OPC_SEL_S16,      sel.s16),
-	OPC(3, OPC_SEL_S32,      sel.s32),
-	OPC(3, OPC_SEL_F16,      sel.f16),
-	OPC(3, OPC_SEL_F32,      sel.f32),
-	OPC(3, OPC_SAD_S16,      sad.s16),
-	OPC(3, OPC_SAD_S32,      sad.s32),
-
-	/* category 4: */
-	OPC(4, OPC_RCP,          rcp),
-	OPC(4, OPC_RSQ,          rsq),
-	OPC(4, OPC_LOG2,         log2),
-	OPC(4, OPC_EXP2,         exp2),
-	OPC(4, OPC_SIN,          sin),
-	OPC(4, OPC_COS,          cos),
-	OPC(4, OPC_SQRT,         sqrt),
-
-	/* category 5: */
-	OPC(5, OPC_ISAM,         isam),
-	OPC(5, OPC_ISAML,        isaml),
-	OPC(5, OPC_ISAMM,        isamm),
-	OPC(5, OPC_SAM,          sam),
-	OPC(5, OPC_SAMB,         samb),
-	OPC(5, OPC_SAML,         saml),
-	OPC(5, OPC_SAMGQ,        samgq),
-	OPC(5, OPC_GETLOD,       getlod),
-	OPC(5, OPC_CONV,         conv),
-	OPC(5, OPC_CONVM,        convm),
-	OPC(5, OPC_GETSIZE,      getsize),
-	OPC(5, OPC_GETBUF,       getbuf),
-	OPC(5, OPC_GETPOS,       getpos),
-	OPC(5, OPC_GETINFO,      getinfo),
-	OPC(5, OPC_DSX,          dsx),
-	OPC(5, OPC_DSY,          dsy),
-	OPC(5, OPC_GATHER4R,     gather4r),
-	OPC(5, OPC_GATHER4G,     gather4g),
-	OPC(5, OPC_GATHER4B,     gather4b),
-	OPC(5, OPC_GATHER4A,     gather4a),
-	OPC(5, OPC_SAMGP0,       samgp0),
-	OPC(5, OPC_SAMGP1,       samgp1),
-	OPC(5, OPC_SAMGP2,       samgp2),
-	OPC(5, OPC_SAMGP3,       samgp3),
-	OPC(5, OPC_DSXPP_1,      dsxpp.1),
-	OPC(5, OPC_DSYPP_1,      dsypp.1),
-	OPC(5, OPC_RGETPOS,      rgetpos),
-	OPC(5, OPC_RGETINFO,     rgetinfo),
-
-
-	/* category 6: */
-	OPC(6, OPC_LDG,          ldg),
-	OPC(6, OPC_LDL,          ldl),
-	OPC(6, OPC_LDP,          ldp),
-	OPC(6, OPC_STG,          stg),
-	OPC(6, OPC_STL,          stl),
-	OPC(6, OPC_STP,          stp),
-	OPC(6, OPC_STI,          sti),
-	OPC(6, OPC_G2L,          g2l),
-	OPC(6, OPC_L2G,          l2g),
-	OPC(6, OPC_PREFETCH,     prefetch),
-	OPC(6, OPC_LDLW,         ldlw),
-	OPC(6, OPC_STLW,         stlw),
-	OPC(6, OPC_RESFMT,       resfmt),
-	OPC(6, OPC_RESINFO,      resinfo),
-	OPC(6, OPC_ATOMIC_ADD,     atomic.add),
-	OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
-	OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
-	OPC(6, OPC_ATOMIC_INC,     atomic.inc),
-	OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
-	OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
-	OPC(6, OPC_ATOMIC_MIN,     atomic.min),
-	OPC(6, OPC_ATOMIC_MAX,     atomic.max),
-	OPC(6, OPC_ATOMIC_AND,     atomic.and),
-	OPC(6, OPC_ATOMIC_OR,      atomic.or),
-	OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
-	OPC(6, OPC_LDGB,         ldgb),
-	OPC(6, OPC_STGB,         stgb),
-	OPC(6, OPC_STIB,         stib),
-	OPC(6, OPC_LDC,          ldc),
-	OPC(6, OPC_LDLV,         ldlv),
-
-	OPC(7, OPC_BAR,          bar),
-	OPC(7, OPC_FENCE,        fence),
-
-#undef OPC
-};
-
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
-
-// XXX hack.. probably should move this table somewhere common:
-#include "ir3.h"
-const char *ir3_instr_name(struct ir3_instruction *instr)
-{
-	if (opc_cat(instr->opc) == -1) return "??meta??";
-	return opcs[instr->opc].name;
-}
-
-static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
-{
-	instr_t *instr = (instr_t *)dwords;
-	uint32_t opc = instr_opc(instr);
-	const char *name;
-
-	if (debug & PRINT_VERBOSE)
-		fprintf(ctx->out, "%s%04d[%08xx_%08xx] ", levels[ctx->level], n, dwords[1], dwords[0]);
-
-	/* NOTE: order flags are printed is a bit fugly.. but for now I
-	 * try to match the order in llvm-a3xx disassembler for easy
-	 * diff'ing..
-	 */
-
-	ctx->repeat = instr_repeat(instr);
-
-	if (instr->sync)
-		fprintf(ctx->out, "(sy)");
-	if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7)))
-		fprintf(ctx->out, "(ss)");
-	if (instr->jmp_tgt)
-		fprintf(ctx->out, "(jp)");
-	if (instr_sat(instr))
-		fprintf(ctx->out, "(sat)");
-	if (ctx->repeat)
-		fprintf(ctx->out, "(rpt%d)", ctx->repeat);
-	if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
-		fprintf(ctx->out, "(ul)");
-
-	name = GETINFO(instr)->name;
-
-	if (name) {
-		fprintf(ctx->out, "%s", name);
-		GETINFO(instr)->print(ctx, instr);
-	} else {
-		fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
-	}
-
-	fprintf(ctx->out, "\n");
-
-	return (instr->opc_cat == 0) && (opc == OPC_END);
-}
-
-int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out)
-{
-	struct disasm_ctx ctx;
-	int i;
-
-	assert((sizedwords % 2) == 0);
-
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.out = out;
-	ctx.level = level;
-
-	for (i = 0; i < sizedwords; i += 2)
-		print_instr(&ctx, &dwords[i], i/2);
-
-	return 0;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/instr-a3xx.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/instr-a3xx.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/instr-a3xx.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,869 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef INSTR_A3XX_H_
-#define INSTR_A3XX_H_
-
-#define PACKED __attribute__((__packed__))
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <assert.h>
-
-/* size of largest OPC field of all the instruction categories: */
-#define NOPC_BITS 6
-
-#define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
-
-typedef enum {
-	/* category 0: */
-	OPC_NOP             = _OPC(0, 0),
-	OPC_BR              = _OPC(0, 1),
-	OPC_JUMP            = _OPC(0, 2),
-	OPC_CALL            = _OPC(0, 3),
-	OPC_RET             = _OPC(0, 4),
-	OPC_KILL            = _OPC(0, 5),
-	OPC_END             = _OPC(0, 6),
-	OPC_EMIT            = _OPC(0, 7),
-	OPC_CUT             = _OPC(0, 8),
-	OPC_CHMASK          = _OPC(0, 9),
-	OPC_CHSH            = _OPC(0, 10),
-	OPC_FLOW_REV        = _OPC(0, 11),
-
-	/* category 1: */
-	OPC_MOV             = _OPC(1, 0),
-
-	/* category 2: */
-	OPC_ADD_F           = _OPC(2, 0),
-	OPC_MIN_F           = _OPC(2, 1),
-	OPC_MAX_F           = _OPC(2, 2),
-	OPC_MUL_F           = _OPC(2, 3),
-	OPC_SIGN_F          = _OPC(2, 4),
-	OPC_CMPS_F          = _OPC(2, 5),
-	OPC_ABSNEG_F        = _OPC(2, 6),
-	OPC_CMPV_F          = _OPC(2, 7),
-	/* 8 - invalid */
-	OPC_FLOOR_F         = _OPC(2, 9),
-	OPC_CEIL_F          = _OPC(2, 10),
-	OPC_RNDNE_F         = _OPC(2, 11),
-	OPC_RNDAZ_F         = _OPC(2, 12),
-	OPC_TRUNC_F         = _OPC(2, 13),
-	/* 14-15 - invalid */
-	OPC_ADD_U           = _OPC(2, 16),
-	OPC_ADD_S           = _OPC(2, 17),
-	OPC_SUB_U           = _OPC(2, 18),
-	OPC_SUB_S           = _OPC(2, 19),
-	OPC_CMPS_U          = _OPC(2, 20),
-	OPC_CMPS_S          = _OPC(2, 21),
-	OPC_MIN_U           = _OPC(2, 22),
-	OPC_MIN_S           = _OPC(2, 23),
-	OPC_MAX_U           = _OPC(2, 24),
-	OPC_MAX_S           = _OPC(2, 25),
-	OPC_ABSNEG_S        = _OPC(2, 26),
-	/* 27 - invalid */
-	OPC_AND_B           = _OPC(2, 28),
-	OPC_OR_B            = _OPC(2, 29),
-	OPC_NOT_B           = _OPC(2, 30),
-	OPC_XOR_B           = _OPC(2, 31),
-	/* 32 - invalid */
-	OPC_CMPV_U          = _OPC(2, 33),
-	OPC_CMPV_S          = _OPC(2, 34),
-	/* 35-47 - invalid */
-	OPC_MUL_U           = _OPC(2, 48),
-	OPC_MUL_S           = _OPC(2, 49),
-	OPC_MULL_U          = _OPC(2, 50),
-	OPC_BFREV_B         = _OPC(2, 51),
-	OPC_CLZ_S           = _OPC(2, 52),
-	OPC_CLZ_B           = _OPC(2, 53),
-	OPC_SHL_B           = _OPC(2, 54),
-	OPC_SHR_B           = _OPC(2, 55),
-	OPC_ASHR_B          = _OPC(2, 56),
-	OPC_BARY_F          = _OPC(2, 57),
-	OPC_MGEN_B          = _OPC(2, 58),
-	OPC_GETBIT_B        = _OPC(2, 59),
-	OPC_SETRM           = _OPC(2, 60),
-	OPC_CBITS_B         = _OPC(2, 61),
-	OPC_SHB             = _OPC(2, 62),
-	OPC_MSAD            = _OPC(2, 63),
-
-	/* category 3: */
-	OPC_MAD_U16         = _OPC(3, 0),
-	OPC_MADSH_U16       = _OPC(3, 1),
-	OPC_MAD_S16         = _OPC(3, 2),
-	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
-	OPC_MAD_U24         = _OPC(3, 4),
-	OPC_MAD_S24         = _OPC(3, 5),
-	OPC_MAD_F16         = _OPC(3, 6),
-	OPC_MAD_F32         = _OPC(3, 7),
-	OPC_SEL_B16         = _OPC(3, 8),
-	OPC_SEL_B32         = _OPC(3, 9),
-	OPC_SEL_S16         = _OPC(3, 10),
-	OPC_SEL_S32         = _OPC(3, 11),
-	OPC_SEL_F16         = _OPC(3, 12),
-	OPC_SEL_F32         = _OPC(3, 13),
-	OPC_SAD_S16         = _OPC(3, 14),
-	OPC_SAD_S32         = _OPC(3, 15),
-
-	/* category 4: */
-	OPC_RCP             = _OPC(4, 0),
-	OPC_RSQ             = _OPC(4, 1),
-	OPC_LOG2            = _OPC(4, 2),
-	OPC_EXP2            = _OPC(4, 3),
-	OPC_SIN             = _OPC(4, 4),
-	OPC_COS             = _OPC(4, 5),
-	OPC_SQRT            = _OPC(4, 6),
-	// 7-63 - invalid
-
-	/* category 5: */
-	OPC_ISAM            = _OPC(5, 0),
-	OPC_ISAML           = _OPC(5, 1),
-	OPC_ISAMM           = _OPC(5, 2),
-	OPC_SAM             = _OPC(5, 3),
-	OPC_SAMB            = _OPC(5, 4),
-	OPC_SAML            = _OPC(5, 5),
-	OPC_SAMGQ           = _OPC(5, 6),
-	OPC_GETLOD          = _OPC(5, 7),
-	OPC_CONV            = _OPC(5, 8),
-	OPC_CONVM           = _OPC(5, 9),
-	OPC_GETSIZE         = _OPC(5, 10),
-	OPC_GETBUF          = _OPC(5, 11),
-	OPC_GETPOS          = _OPC(5, 12),
-	OPC_GETINFO         = _OPC(5, 13),
-	OPC_DSX             = _OPC(5, 14),
-	OPC_DSY             = _OPC(5, 15),
-	OPC_GATHER4R        = _OPC(5, 16),
-	OPC_GATHER4G        = _OPC(5, 17),
-	OPC_GATHER4B        = _OPC(5, 18),
-	OPC_GATHER4A        = _OPC(5, 19),
-	OPC_SAMGP0          = _OPC(5, 20),
-	OPC_SAMGP1          = _OPC(5, 21),
-	OPC_SAMGP2          = _OPC(5, 22),
-	OPC_SAMGP3          = _OPC(5, 23),
-	OPC_DSXPP_1         = _OPC(5, 24),
-	OPC_DSYPP_1         = _OPC(5, 25),
-	OPC_RGETPOS         = _OPC(5, 26),
-	OPC_RGETINFO        = _OPC(5, 27),
-
-	/* category 6: */
-	OPC_LDG             = _OPC(6, 0),        /* load-global */
-	OPC_LDL             = _OPC(6, 1),
-	OPC_LDP             = _OPC(6, 2),
-	OPC_STG             = _OPC(6, 3),        /* store-global */
-	OPC_STL             = _OPC(6, 4),
-	OPC_STP             = _OPC(6, 5),
-	OPC_STI             = _OPC(6, 6),
-	OPC_G2L             = _OPC(6, 7),
-	OPC_L2G             = _OPC(6, 8),
-	OPC_PREFETCH        = _OPC(6, 9),
-	OPC_LDLW            = _OPC(6, 10),
-	OPC_STLW            = _OPC(6, 11),
-	OPC_RESFMT          = _OPC(6, 14),
-	OPC_RESINFO         = _OPC(6, 15),
-	OPC_ATOMIC_ADD      = _OPC(6, 16),
-	OPC_ATOMIC_SUB      = _OPC(6, 17),
-	OPC_ATOMIC_XCHG     = _OPC(6, 18),
-	OPC_ATOMIC_INC      = _OPC(6, 19),
-	OPC_ATOMIC_DEC      = _OPC(6, 20),
-	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
-	OPC_ATOMIC_MIN      = _OPC(6, 22),
-	OPC_ATOMIC_MAX      = _OPC(6, 23),
-	OPC_ATOMIC_AND      = _OPC(6, 24),
-	OPC_ATOMIC_OR       = _OPC(6, 25),
-	OPC_ATOMIC_XOR      = _OPC(6, 26),
-	OPC_LDGB            = _OPC(6, 27),
-	OPC_STGB            = _OPC(6, 28),
-	OPC_STIB            = _OPC(6, 29),
-	OPC_LDC             = _OPC(6, 30),
-	OPC_LDLV            = _OPC(6, 31),
-
-	/* category 7: */
-	OPC_BAR             = _OPC(7, 0),
-	OPC_FENCE           = _OPC(7, 1),
-
-	/* meta instructions (category -1): */
-	/* placeholder instr to mark shader inputs: */
-	OPC_META_INPUT      = _OPC(-1, 0),
-	/* The "fan-in" and "fan-out" instructions are used for keeping
-	 * track of instructions that write to multiple dst registers
-	 * (fan-out) like texture sample instructions, or read multiple
-	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
-	 */
-	OPC_META_FO         = _OPC(-1, 2),
-	OPC_META_FI         = _OPC(-1, 3),
-
-} opc_t;
-
-#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
-#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
-
-typedef enum {
-	TYPE_F16 = 0,
-	TYPE_F32 = 1,
-	TYPE_U16 = 2,
-	TYPE_U32 = 3,
-	TYPE_S16 = 4,
-	TYPE_S32 = 5,
-	TYPE_U8  = 6,
-	TYPE_S8  = 7,  // XXX I assume?
-} type_t;
-
-static inline uint32_t type_size(type_t type)
-{
-	switch (type) {
-	case TYPE_F32:
-	case TYPE_U32:
-	case TYPE_S32:
-		return 32;
-	case TYPE_F16:
-	case TYPE_U16:
-	case TYPE_S16:
-		return 16;
-	case TYPE_U8:
-	case TYPE_S8:
-		return 8;
-	default:
-		assert(0); /* invalid type */
-		return 0;
-	}
-}
-
-static inline int type_float(type_t type)
-{
-	return (type == TYPE_F32) || (type == TYPE_F16);
-}
-
-static inline int type_uint(type_t type)
-{
-	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
-}
-
-static inline int type_sint(type_t type)
-{
-	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
-}
-
-typedef union PACKED {
-	/* normal gpr or const src register: */
-	struct PACKED {
-		uint32_t comp  : 2;
-		uint32_t num   : 10;
-	};
-	/* for immediate val: */
-	int32_t  iim_val   : 11;
-	/* to make compiler happy: */
-	uint32_t dummy32;
-	uint32_t dummy10   : 10;
-	int32_t  idummy10  : 10;
-	uint32_t dummy11   : 11;
-	uint32_t dummy12   : 12;
-	uint32_t dummy13   : 13;
-	uint32_t dummy8    : 8;
-} reg_t;
-
-/* special registers: */
-#define REG_A0 61       /* address register */
-#define REG_P0 62       /* predicate register */
-
-static inline int reg_special(reg_t reg)
-{
-	return (reg.num == REG_A0) || (reg.num == REG_P0);
-}
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		struct PACKED {
-			int16_t  immed    : 16;
-			uint32_t dummy1   : 16;
-		} a3xx;
-		struct PACKED {
-			int32_t  immed    : 20;
-			uint32_t dummy1   : 12;
-		} a4xx;
-		struct PACKED {
-			int32_t immed     : 32;
-		} a5xx;
-	};
-
-	/* dword1: */
-	uint32_t dummy2   : 8;
-	uint32_t repeat   : 3;
-	uint32_t dummy3   : 1;
-	uint32_t ss       : 1;
-	uint32_t dummy4   : 7;
-	uint32_t inv      : 1;
-	uint32_t comp     : 2;
-	uint32_t opc      : 4;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
-} instr_cat0_t;
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		/* for normal src register: */
-		struct PACKED {
-			uint32_t src : 11;
-			/* at least low bit of pad must be zero or it will
-			 * look like a address relative src
-			 */
-			uint32_t pad : 21;
-		};
-		/* for address relative: */
-		struct PACKED {
-			int32_t  off : 10;
-			uint32_t src_rel_c : 1;
-			uint32_t src_rel : 1;
-			uint32_t unknown : 20;
-		};
-		/* for immediate: */
-		int32_t  iim_val;
-		uint32_t uim_val;
-		float    fim_val;
-	};
-
-	/* dword1: */
-	uint32_t dst        : 8;
-	uint32_t repeat     : 3;
-	uint32_t src_r      : 1;
-	uint32_t ss         : 1;
-	uint32_t ul         : 1;
-	uint32_t dst_type   : 3;
-	uint32_t dst_rel    : 1;
-	uint32_t src_type   : 3;
-	uint32_t src_c      : 1;
-	uint32_t src_im     : 1;
-	uint32_t even       : 1;
-	uint32_t pos_inf    : 1;
-	uint32_t must_be_0  : 2;
-	uint32_t jmp_tgt    : 1;
-	uint32_t sync       : 1;
-	uint32_t opc_cat    : 3;
-} instr_cat1_t;
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		struct PACKED {
-			uint32_t src1         : 11;
-			uint32_t must_be_zero1: 2;
-			uint32_t src1_im      : 1;   /* immediate */
-			uint32_t src1_neg     : 1;   /* negate */
-			uint32_t src1_abs     : 1;   /* absolute value */
-		};
-		struct PACKED {
-			uint32_t src1         : 10;
-			uint32_t src1_c       : 1;   /* relative-const */
-			uint32_t src1_rel     : 1;   /* relative address */
-			uint32_t must_be_zero : 1;
-			uint32_t dummy        : 3;
-		} rel1;
-		struct PACKED {
-			uint32_t src1         : 12;
-			uint32_t src1_c       : 1;   /* const */
-			uint32_t dummy        : 3;
-		} c1;
-	};
-
-	union PACKED {
-		struct PACKED {
-			uint32_t src2         : 11;
-			uint32_t must_be_zero2: 2;
-			uint32_t src2_im      : 1;   /* immediate */
-			uint32_t src2_neg     : 1;   /* negate */
-			uint32_t src2_abs     : 1;   /* absolute value */
-		};
-		struct PACKED {
-			uint32_t src2         : 10;
-			uint32_t src2_c       : 1;   /* relative-const */
-			uint32_t src2_rel     : 1;   /* relative address */
-			uint32_t must_be_zero : 1;
-			uint32_t dummy        : 3;
-		} rel2;
-		struct PACKED {
-			uint32_t src2         : 12;
-			uint32_t src2_c       : 1;   /* const */
-			uint32_t dummy        : 3;
-		} c2;
-	};
-
-	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t repeat   : 2;
-	uint32_t sat      : 1;
-	uint32_t src1_r   : 1;
-	uint32_t ss       : 1;
-	uint32_t ul       : 1;   /* dunno */
-	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-	uint32_t ei       : 1;
-	uint32_t cond     : 3;
-	uint32_t src2_r   : 1;
-	uint32_t full     : 1;   /* not half */
-	uint32_t opc      : 6;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
-} instr_cat2_t;
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		struct PACKED {
-			uint32_t src1         : 11;
-			uint32_t must_be_zero1: 2;
-			uint32_t src2_c       : 1;
-			uint32_t src1_neg     : 1;
-			uint32_t src2_r       : 1;
-		};
-		struct PACKED {
-			uint32_t src1         : 10;
-			uint32_t src1_c       : 1;
-			uint32_t src1_rel     : 1;
-			uint32_t must_be_zero : 1;
-			uint32_t dummy        : 3;
-		} rel1;
-		struct PACKED {
-			uint32_t src1         : 12;
-			uint32_t src1_c       : 1;
-			uint32_t dummy        : 3;
-		} c1;
-	};
-
-	union PACKED {
-		struct PACKED {
-			uint32_t src3         : 11;
-			uint32_t must_be_zero2: 2;
-			uint32_t src3_r       : 1;
-			uint32_t src2_neg     : 1;
-			uint32_t src3_neg     : 1;
-		};
-		struct PACKED {
-			uint32_t src3         : 10;
-			uint32_t src3_c       : 1;
-			uint32_t src3_rel     : 1;
-			uint32_t must_be_zero : 1;
-			uint32_t dummy        : 3;
-		} rel2;
-		struct PACKED {
-			uint32_t src3         : 12;
-			uint32_t src3_c       : 1;
-			uint32_t dummy        : 3;
-		} c2;
-	};
-
-	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t repeat   : 2;
-	uint32_t sat      : 1;
-	uint32_t src1_r   : 1;
-	uint32_t ss       : 1;
-	uint32_t ul       : 1;
-	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-	uint32_t src2     : 8;
-	uint32_t opc      : 4;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
-} instr_cat3_t;
-
-static inline bool instr_cat3_full(instr_cat3_t *cat3)
-{
-	switch (_OPC(3, cat3->opc)) {
-	case OPC_MAD_F16:
-	case OPC_MAD_U16:
-	case OPC_MAD_S16:
-	case OPC_SEL_B16:
-	case OPC_SEL_S16:
-	case OPC_SEL_F16:
-	case OPC_SAD_S16:
-	case OPC_SAD_S32:  // really??
-		return false;
-	default:
-		return true;
-	}
-}
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		struct PACKED {
-			uint32_t src          : 11;
-			uint32_t must_be_zero1: 2;
-			uint32_t src_im       : 1;   /* immediate */
-			uint32_t src_neg      : 1;   /* negate */
-			uint32_t src_abs      : 1;   /* absolute value */
-		};
-		struct PACKED {
-			uint32_t src          : 10;
-			uint32_t src_c        : 1;   /* relative-const */
-			uint32_t src_rel      : 1;   /* relative address */
-			uint32_t must_be_zero : 1;
-			uint32_t dummy        : 3;
-		} rel;
-		struct PACKED {
-			uint32_t src          : 12;
-			uint32_t src_c        : 1;   /* const */
-			uint32_t dummy        : 3;
-		} c;
-	};
-	uint32_t dummy1   : 16;  /* seem to be ignored */
-
-	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t repeat   : 2;
-	uint32_t sat      : 1;
-	uint32_t src_r    : 1;
-	uint32_t ss       : 1;
-	uint32_t ul       : 1;
-	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
-	uint32_t dummy2   : 5;   /* seem to be ignored */
-	uint32_t full     : 1;   /* not half */
-	uint32_t opc      : 6;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
-} instr_cat4_t;
-
-typedef struct PACKED {
-	/* dword0: */
-	union PACKED {
-		/* normal case: */
-		struct PACKED {
-			uint32_t full     : 1;   /* not half */
-			uint32_t src1     : 8;
-			uint32_t src2     : 8;
-			uint32_t dummy1   : 4;   /* seem to be ignored */
-			uint32_t samp     : 4;
-			uint32_t tex      : 7;
-		} norm;
-		/* s2en case: */
-		struct PACKED {
-			uint32_t full     : 1;   /* not half */
-			uint32_t src1     : 8;
-			uint32_t src2     : 11;
-			uint32_t dummy1   : 1;
-			uint32_t src3     : 8;
-			uint32_t dummy2   : 3;
-		} s2en;
-		/* same in either case: */
-		// XXX I think, confirm this
-		struct PACKED {
-			uint32_t full     : 1;   /* not half */
-			uint32_t src1     : 8;
-			uint32_t pad      : 23;
-		};
-	};
-
-	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t wrmask   : 4;   /* write-mask */
-	uint32_t type     : 3;
-	uint32_t dummy2   : 1;   /* seems to be ignored */
-	uint32_t is_3d    : 1;
-
-	uint32_t is_a     : 1;
-	uint32_t is_s     : 1;
-	uint32_t is_s2en  : 1;
-	uint32_t is_o     : 1;
-	uint32_t is_p     : 1;
-
-	uint32_t opc      : 5;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
-} instr_cat5_t;
-
-/* dword0 encoding for src_off: [src1 + off], src2: */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t mustbe1  : 1;
-	int32_t  off      : 13;
-	uint32_t src1     : 8;
-	uint32_t src1_im  : 1;
-	uint32_t src2_im  : 1;
-	uint32_t src2     : 8;
-
-	/* dword1: */
-	uint32_t dword1;
-} instr_cat6a_t;
-
-/* dword0 encoding for !src_off: [src1], src2 */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t mustbe0  : 1;
-	uint32_t src1     : 13;
-	uint32_t ignore0  : 8;
-	uint32_t src1_im  : 1;
-	uint32_t src2_im  : 1;
-	uint32_t src2     : 8;
-
-	/* dword1: */
-	uint32_t dword1;
-} instr_cat6b_t;
-
-/* dword1 encoding for dst_off: */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t dword0;
-
-	/* note: there is some weird stuff going on where sometimes
-	 * cat6->a.off is involved.. but that seems like a bug in
-	 * the blob, since it is used even if !cat6->src_off
-	 * It would make sense for there to be some more bits to
-	 * bring us to 11 bits worth of offset, but not sure..
-	 */
-	int32_t off       : 8;
-	uint32_t mustbe1  : 1;
-	uint32_t dst      : 8;
-	uint32_t pad1     : 15;
-} instr_cat6c_t;
-
-/* dword1 encoding for !dst_off: */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t dword0;
-
-	uint32_t dst      : 8;
-	uint32_t mustbe0  : 1;
-	uint32_t idx      : 8;
-	uint32_t pad0     : 15;
-} instr_cat6d_t;
-
-/* ldgb and atomics..
- *
- * ldgb:      pad0=0, pad3=1
- * atomic .g: pad0=1, pad3=1
- *        .l: pad0=1, pad3=0
- */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t pad0     : 1;
-	uint32_t src3     : 8;
-	uint32_t d        : 2;
-	uint32_t typed    : 1;
-	uint32_t type_size : 2;
-	uint32_t src1     : 8;
-	uint32_t src1_im  : 1;
-	uint32_t src2_im  : 1;
-	uint32_t src2     : 8;
-
-	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t mustbe0  : 1;
-	uint32_t src_ssbo : 8;
-	uint32_t pad2     : 3;  // type
-	uint32_t g        : 1;
-	uint32_t pad3     : 1;
-	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6ldgb_t;
-
-/* stgb, pad0=0, pad3=2
- */
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t mustbe1  : 1;  // ???
-	uint32_t src1     : 8;
-	uint32_t d        : 2;
-	uint32_t typed    : 1;
-	uint32_t type_size : 2;
-	uint32_t pad0     : 9;
-	uint32_t src2_im  : 1;
-	uint32_t src2     : 8;
-
-	/* dword1: */
-	uint32_t src3     : 8;
-	uint32_t src3_im  : 1;
-	uint32_t dst_ssbo : 8;
-	uint32_t pad2     : 3;  // type
-	uint32_t pad3     : 2;
-	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
-} instr_cat6stgb_t;
-
-typedef union PACKED {
-	instr_cat6a_t a;
-	instr_cat6b_t b;
-	instr_cat6c_t c;
-	instr_cat6d_t d;
-	instr_cat6ldgb_t ldgb;
-	instr_cat6stgb_t stgb;
-	struct PACKED {
-		/* dword0: */
-		uint32_t src_off  : 1;
-		uint32_t pad1     : 31;
-
-		/* dword1: */
-		uint32_t pad2     : 8;
-		uint32_t dst_off  : 1;
-		uint32_t pad3     : 8;
-		uint32_t type     : 3;
-		uint32_t g        : 1;  /* or in some cases it means dst immed */
-		uint32_t pad4     : 1;
-		uint32_t opc      : 5;
-		uint32_t jmp_tgt  : 1;
-		uint32_t sync     : 1;
-		uint32_t opc_cat  : 3;
-	};
-} instr_cat6_t;
-
-typedef struct PACKED {
-	/* dword0: */
-	uint32_t pad1     : 32;
-
-	/* dword1: */
-	uint32_t pad2     : 12;
-	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
-	uint32_t pad3     : 6;
-	uint32_t w        : 1;  /* write */
-	uint32_t r        : 1;  /* read */
-	uint32_t l        : 1;  /* local */
-	uint32_t g        : 1;  /* global */
-	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
-	uint32_t jmp_tgt  : 1;  /* (jp) */
-	uint32_t sync     : 1;  /* (sy) */
-	uint32_t opc_cat  : 3;
-} instr_cat7_t;
-
-typedef union PACKED {
-	instr_cat0_t cat0;
-	instr_cat1_t cat1;
-	instr_cat2_t cat2;
-	instr_cat3_t cat3;
-	instr_cat4_t cat4;
-	instr_cat5_t cat5;
-	instr_cat6_t cat6;
-	instr_cat7_t cat7;
-	struct PACKED {
-		/* dword0: */
-		uint32_t pad1     : 32;
-
-		/* dword1: */
-		uint32_t pad2     : 12;
-		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
-		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
-		uint32_t pad3     : 13;
-		uint32_t jmp_tgt  : 1;
-		uint32_t sync     : 1;
-		uint32_t opc_cat  : 3;
-
-	};
-} instr_t;
-
-static inline uint32_t instr_repeat(instr_t *instr)
-{
-	switch (instr->opc_cat) {
-	case 0:  return instr->cat0.repeat;
-	case 1:  return instr->cat1.repeat;
-	case 2:  return instr->cat2.repeat;
-	case 3:  return instr->cat3.repeat;
-	case 4:  return instr->cat4.repeat;
-	default: return 0;
-	}
-}
-
-static inline bool instr_sat(instr_t *instr)
-{
-	switch (instr->opc_cat) {
-	case 2:  return instr->cat2.sat;
-	case 3:  return instr->cat3.sat;
-	case 4:  return instr->cat4.sat;
-	default: return false;
-	}
-}
-
-static inline uint32_t instr_opc(instr_t *instr)
-{
-	switch (instr->opc_cat) {
-	case 0:  return instr->cat0.opc;
-	case 1:  return 0;
-	case 2:  return instr->cat2.opc;
-	case 3:  return instr->cat3.opc;
-	case 4:  return instr->cat4.opc;
-	case 5:  return instr->cat5.opc;
-	case 6:  return instr->cat6.opc;
-	case 7:  return instr->cat7.opc;
-	default: return 0;
-	}
-}
-
-static inline bool is_mad(opc_t opc)
-{
-	switch (opc) {
-	case OPC_MAD_U16:
-	case OPC_MAD_S16:
-	case OPC_MAD_U24:
-	case OPC_MAD_S24:
-	case OPC_MAD_F16:
-	case OPC_MAD_F32:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_madsh(opc_t opc)
-{
-	switch (opc) {
-	case OPC_MADSH_U16:
-	case OPC_MADSH_M16:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_atomic(opc_t opc)
-{
-	switch (opc) {
-	case OPC_ATOMIC_ADD:
-	case OPC_ATOMIC_SUB:
-	case OPC_ATOMIC_XCHG:
-	case OPC_ATOMIC_INC:
-	case OPC_ATOMIC_DEC:
-	case OPC_ATOMIC_CMPXCHG:
-	case OPC_ATOMIC_MIN:
-	case OPC_ATOMIC_MAX:
-	case OPC_ATOMIC_AND:
-	case OPC_ATOMIC_OR:
-	case OPC_ATOMIC_XOR:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_ssbo(opc_t opc)
-{
-	switch (opc) {
-	case OPC_RESFMT:
-	case OPC_RESINFO:
-	case OPC_LDGB:
-	case OPC_STGB:
-	case OPC_STIB:
-		return true;
-	default:
-		return false;
-	}
-}
-
-#endif /* INSTR_A3XX_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,940 +0,0 @@
-/*
- * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ir3.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <stdbool.h>
-#include <errno.h>
-
-#include "util/ralloc.h"
-
-#include "freedreno_util.h"
-#include "instr-a3xx.h"
-
-/* simple allocator to carve allocations out of an up-front allocated heap,
- * so that we can free everything easily in one shot.
- */
-void * ir3_alloc(struct ir3 *shader, int sz)
-{
-	return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
-}
-
-struct ir3 * ir3_create(struct ir3_compiler *compiler,
-		unsigned nin, unsigned nout)
-{
-	struct ir3 *shader = rzalloc(compiler, struct ir3);
-
-	shader->compiler = compiler;
-	shader->ninputs = nin;
-	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
-
-	shader->noutputs = nout;
-	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
-
-	list_inithead(&shader->block_list);
-	list_inithead(&shader->array_list);
-
-	return shader;
-}
-
-void ir3_destroy(struct ir3 *shader)
-{
-	ralloc_free(shader);
-}
-
-#define iassert(cond) do { \
-	if (!(cond)) { \
-		debug_assert(cond); \
-		return -1; \
-	} } while (0)
-
-#define iassert_type(reg, full) do { \
-	if ((full)) { \
-		iassert(!((reg)->flags & IR3_REG_HALF)); \
-	} else { \
-		iassert((reg)->flags & IR3_REG_HALF); \
-	} } while (0);
-
-static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
-		uint32_t repeat, uint32_t valid_flags)
-{
-	reg_t val = { .dummy32 = 0 };
-
-	if (reg->flags & ~valid_flags) {
-		debug_printf("INVALID FLAGS: %x vs %x\n",
-				reg->flags, valid_flags);
-	}
-
-	if (!(reg->flags & IR3_REG_R))
-		repeat = 0;
-
-	if (reg->flags & IR3_REG_IMMED) {
-		val.iim_val = reg->iim_val;
-	} else {
-		unsigned components;
-		int16_t max;
-
-		if (reg->flags & IR3_REG_RELATIV) {
-			components = reg->size;
-			val.idummy10 = reg->array.offset;
-			max = (reg->array.offset + repeat + components - 1) >> 2;
-		} else {
-			components = util_last_bit(reg->wrmask);
-			val.comp = reg->num & 0x3;
-			val.num  = reg->num >> 2;
-			max = (reg->num + repeat + components - 1) >> 2;
-		}
-
-		if (reg->flags & IR3_REG_CONST) {
-			info->max_const = MAX2(info->max_const, max);
-		} else if (val.num == 63) {
-			/* ignore writes to dummy register r63.x */
-		} else if (max < 48) {
-			if (reg->flags & IR3_REG_HALF) {
-				if (info->gpu_id >= 600) {
-					/* starting w/ a6xx, half regs conflict with full regs: */
-					info->max_reg = MAX2(info->max_reg, (max+1)/2);
-				} else {
-					info->max_half_reg = MAX2(info->max_half_reg, max);
-				}
-			} else {
-				info->max_reg = MAX2(info->max_reg, max);
-			}
-		}
-	}
-
-	return val.dummy32;
-}
-
-static int emit_cat0(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	instr_cat0_t *cat0 = ptr;
-
-	if (info->gpu_id >= 500) {
-		cat0->a5xx.immed = instr->cat0.immed;
-	} else if (info->gpu_id >= 400) {
-		cat0->a4xx.immed = instr->cat0.immed;
-	} else {
-		cat0->a3xx.immed = instr->cat0.immed;
-	}
-	cat0->repeat   = instr->repeat;
-	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
-	cat0->inv      = instr->cat0.inv;
-	cat0->comp     = instr->cat0.comp;
-	cat0->opc      = instr->opc;
-	cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat0->opc_cat  = 0;
-
-	return 0;
-}
-
-static int emit_cat1(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src = instr->regs[1];
-	instr_cat1_t *cat1 = ptr;
-
-	iassert(instr->regs_count == 2);
-	iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
-	if (!(src->flags & IR3_REG_IMMED))
-		iassert_type(src, type_size(instr->cat1.src_type) == 32);
-
-	if (src->flags & IR3_REG_IMMED) {
-		cat1->iim_val = src->iim_val;
-		cat1->src_im  = 1;
-	} else if (src->flags & IR3_REG_RELATIV) {
-		cat1->off       = reg(src, info, instr->repeat,
-				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
-		cat1->src_rel   = 1;
-		cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
-	} else {
-		cat1->src  = reg(src, info, instr->repeat,
-				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
-		cat1->src_c     = !!(src->flags & IR3_REG_CONST);
-	}
-
-	cat1->dst      = reg(dst, info, instr->repeat,
-			IR3_REG_RELATIV | IR3_REG_EVEN |
-			IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
-	cat1->repeat   = instr->repeat;
-	cat1->src_r    = !!(src->flags & IR3_REG_R);
-	cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
-	cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
-	cat1->dst_type = instr->cat1.dst_type;
-	cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
-	cat1->src_type = instr->cat1.src_type;
-	cat1->even     = !!(dst->flags & IR3_REG_EVEN);
-	cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
-	cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat1->opc_cat  = 1;
-
-	return 0;
-}
-
-static int emit_cat2(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src1 = instr->regs[1];
-	struct ir3_register *src2 = instr->regs[2];
-	instr_cat2_t *cat2 = ptr;
-	unsigned absneg = ir3_cat2_absneg(instr->opc);
-
-	iassert((instr->regs_count == 2) || (instr->regs_count == 3));
-
-	if (src1->flags & IR3_REG_RELATIV) {
-		iassert(src1->array.offset < (1 << 10));
-		cat2->rel1.src1      = reg(src1, info, instr->repeat,
-				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
-				IR3_REG_HALF | absneg);
-		cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
-		cat2->rel1.src1_rel  = 1;
-	} else if (src1->flags & IR3_REG_CONST) {
-		iassert(src1->num < (1 << 12));
-		cat2->c1.src1   = reg(src1, info, instr->repeat,
-				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
-		cat2->c1.src1_c = 1;
-	} else {
-		iassert(src1->num < (1 << 11));
-		cat2->src1 = reg(src1, info, instr->repeat,
-				IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
-				absneg);
-	}
-	cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
-	cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
-	cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
-	cat2->src1_r   = !!(src1->flags & IR3_REG_R);
-
-	if (src2) {
-		iassert((src2->flags & IR3_REG_IMMED) ||
-				!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-
-		if (src2->flags & IR3_REG_RELATIV) {
-			iassert(src2->array.offset < (1 << 10));
-			cat2->rel2.src2      = reg(src2, info, instr->repeat,
-					IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
-					IR3_REG_HALF | absneg);
-			cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
-			cat2->rel2.src2_rel  = 1;
-		} else if (src2->flags & IR3_REG_CONST) {
-			iassert(src2->num < (1 << 12));
-			cat2->c2.src2   = reg(src2, info, instr->repeat,
-					IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
-			cat2->c2.src2_c = 1;
-		} else {
-			iassert(src2->num < (1 << 11));
-			cat2->src2 = reg(src2, info, instr->repeat,
-					IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
-					absneg);
-		}
-
-		cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
-		cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
-		cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
-		cat2->src2_r   = !!(src2->flags & IR3_REG_R);
-	}
-
-	cat2->dst      = reg(dst, info, instr->repeat,
-			IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
-	cat2->repeat   = instr->repeat;
-	cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
-	cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
-	cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
-	cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
-	cat2->ei       = !!(dst->flags & IR3_REG_EI);
-	cat2->cond     = instr->cat2.condition;
-	cat2->full     = ! (src1->flags & IR3_REG_HALF);
-	cat2->opc      = instr->opc;
-	cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat2->opc_cat  = 2;
-
-	return 0;
-}
-
-static int emit_cat3(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src1 = instr->regs[1];
-	struct ir3_register *src2 = instr->regs[2];
-	struct ir3_register *src3 = instr->regs[3];
-	unsigned absneg = ir3_cat3_absneg(instr->opc);
-	instr_cat3_t *cat3 = ptr;
-	uint32_t src_flags = 0;
-
-	switch (instr->opc) {
-	case OPC_MAD_F16:
-	case OPC_MAD_U16:
-	case OPC_MAD_S16:
-	case OPC_SEL_B16:
-	case OPC_SEL_S16:
-	case OPC_SEL_F16:
-	case OPC_SAD_S16:
-	case OPC_SAD_S32:  // really??
-		src_flags |= IR3_REG_HALF;
-		break;
-	default:
-		break;
-	}
-
-	iassert(instr->regs_count == 4);
-	iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
-	iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
-	iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
-
-	if (src1->flags & IR3_REG_RELATIV) {
-		iassert(src1->array.offset < (1 << 10));
-		cat3->rel1.src1      = reg(src1, info, instr->repeat,
-				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
-				IR3_REG_HALF | absneg);
-		cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
-		cat3->rel1.src1_rel  = 1;
-	} else if (src1->flags & IR3_REG_CONST) {
-		iassert(src1->num < (1 << 12));
-		cat3->c1.src1   = reg(src1, info, instr->repeat,
-				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
-		cat3->c1.src1_c = 1;
-	} else {
-		iassert(src1->num < (1 << 11));
-		cat3->src1 = reg(src1, info, instr->repeat,
-				IR3_REG_R | IR3_REG_HALF | absneg);
-	}
-
-	cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
-	cat3->src1_r   = !!(src1->flags & IR3_REG_R);
-
-	cat3->src2     = reg(src2, info, instr->repeat,
-			IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
-	cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
-	cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
-	cat3->src2_r   = !!(src2->flags & IR3_REG_R);
-
-
-	if (src3->flags & IR3_REG_RELATIV) {
-		iassert(src3->array.offset < (1 << 10));
-		cat3->rel2.src3      = reg(src3, info, instr->repeat,
-				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
-				IR3_REG_HALF | absneg);
-		cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
-		cat3->rel2.src3_rel  = 1;
-	} else if (src3->flags & IR3_REG_CONST) {
-		iassert(src3->num < (1 << 12));
-		cat3->c2.src3   = reg(src3, info, instr->repeat,
-				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
-		cat3->c2.src3_c = 1;
-	} else {
-		iassert(src3->num < (1 << 11));
-		cat3->src3 = reg(src3, info, instr->repeat,
-				IR3_REG_R | IR3_REG_HALF | absneg);
-	}
-
-	cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
-	cat3->src3_r   = !!(src3->flags & IR3_REG_R);
-
-	cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-	cat3->repeat   = instr->repeat;
-	cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
-	cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
-	cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
-	cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
-	cat3->opc      = instr->opc;
-	cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat3->opc_cat  = 3;
-
-	return 0;
-}
-
-static int emit_cat4(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src = instr->regs[1];
-	instr_cat4_t *cat4 = ptr;
-
-	iassert(instr->regs_count == 2);
-
-	if (src->flags & IR3_REG_RELATIV) {
-		iassert(src->array.offset < (1 << 10));
-		cat4->rel.src      = reg(src, info, instr->repeat,
-				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
-				IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
-		cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
-		cat4->rel.src_rel  = 1;
-	} else if (src->flags & IR3_REG_CONST) {
-		iassert(src->num < (1 << 12));
-		cat4->c.src   = reg(src, info, instr->repeat,
-				IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
-				IR3_REG_R | IR3_REG_HALF);
-		cat4->c.src_c = 1;
-	} else {
-		iassert(src->num < (1 << 11));
-		cat4->src = reg(src, info, instr->repeat,
-				IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
-				IR3_REG_R | IR3_REG_HALF);
-	}
-
-	cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
-	cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
-	cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
-	cat4->src_r    = !!(src->flags & IR3_REG_R);
-
-	cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-	cat4->repeat   = instr->repeat;
-	cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
-	cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
-	cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
-	cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
-	cat4->full     = ! (src->flags & IR3_REG_HALF);
-	cat4->opc      = instr->opc;
-	cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat4->opc_cat  = 4;
-
-	return 0;
-}
-
-static int emit_cat5(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src1 = instr->regs[1];
-	struct ir3_register *src2 = instr->regs[2];
-	struct ir3_register *src3 = instr->regs[3];
-	instr_cat5_t *cat5 = ptr;
-
-	iassert_type(dst, type_size(instr->cat5.type) == 32)
-
-	assume(src1 || !src2);
-	assume(src2 || !src3);
-
-	if (src1) {
-		cat5->full = ! (src1->flags & IR3_REG_HALF);
-		cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
-	}
-
-	if (instr->flags & IR3_INSTR_S2EN) {
-		if (src2) {
-			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-			cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-		}
-		if (src3) {
-			iassert(src3->flags & IR3_REG_HALF);
-			cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
-		}
-		iassert(!(instr->cat5.samp | instr->cat5.tex));
-	} else {
-		iassert(!src3);
-		if (src2) {
-			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-			cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-		}
-		cat5->norm.samp = instr->cat5.samp;
-		cat5->norm.tex  = instr->cat5.tex;
-	}
-
-	cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-	cat5->wrmask   = dst->wrmask;
-	cat5->type     = instr->cat5.type;
-	cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
-	cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
-	cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
-	cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
-	cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
-	cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
-	cat5->opc      = instr->opc;
-	cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat5->opc_cat  = 5;
-
-	return 0;
-}
-
-static int emit_cat6(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	struct ir3_register *dst, *src1, *src2;
-	instr_cat6_t *cat6 = ptr;
-	bool type_full = type_size(instr->cat6.type) == 32;
-
-	cat6->type     = instr->cat6.type;
-	cat6->opc      = instr->opc;
-	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
-	cat6->g        = !!(instr->flags & IR3_INSTR_G);
-	cat6->opc_cat  = 6;
-
-	switch (instr->opc) {
-	case OPC_RESINFO:
-	case OPC_RESFMT:
-		iassert_type(instr->regs[0], type_full); /* dst */
-		iassert_type(instr->regs[1], type_full); /* src1 */
-		break;
-	case OPC_L2G:
-	case OPC_G2L:
-		iassert_type(instr->regs[0], true);      /* dst */
-		iassert_type(instr->regs[1], true);      /* src1 */
-		break;
-	case OPC_STG:
-	case OPC_STL:
-	case OPC_STP:
-	case OPC_STI:
-	case OPC_STLW:
-	case OPC_STIB:
-		/* no dst, so regs[0] is dummy */
-		iassert_type(instr->regs[1], true);      /* dst */
-		iassert_type(instr->regs[2], type_full); /* src1 */
-		iassert_type(instr->regs[3], true);      /* src2 */
-		break;
-	default:
-		iassert_type(instr->regs[0], type_full); /* dst */
-		iassert_type(instr->regs[1], true);      /* src1 */
-		if (instr->regs_count > 2)
-			iassert_type(instr->regs[2], true);  /* src1 */
-		break;
-	}
-
-	/* the "dst" for a store instruction is (from the perspective
-	 * of data flow in the shader, ie. register use/def, etc) in
-	 * fact a register that is read by the instruction, rather
-	 * than written:
-	 */
-	if (is_store(instr)) {
-		iassert(instr->regs_count >= 3);
-
-		dst  = instr->regs[1];
-		src1 = instr->regs[2];
-		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
-	} else {
-		iassert(instr->regs_count >= 2);
-
-		dst  = instr->regs[0];
-		src1 = instr->regs[1];
-		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
-	}
-
-	/* TODO we need a more comprehensive list about which instructions
-	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
-	 * indicate to use the src_off encoding even if offset is zero
-	 * (but then what to do about dst_off?)
-	 */
-	if (is_atomic(instr->opc)) {
-		instr_cat6ldgb_t *ldgb = ptr;
-
-		/* maybe these two bits both determine the instruction encoding? */
-		cat6->src_off = false;
-
-		ldgb->d = instr->cat6.d - 1;
-		ldgb->typed = instr->cat6.typed;
-		ldgb->type_size = instr->cat6.iim_val - 1;
-
-		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-
-		if (ldgb->g) {
-			struct ir3_register *src3 = instr->regs[3];
-			struct ir3_register *src4 = instr->regs[4];
-
-			/* first src is src_ssbo: */
-			iassert(src1->flags & IR3_REG_IMMED);
-			ldgb->src_ssbo = src1->uim_val;
-
-			ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-			ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
-			ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
-			ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
-
-			ldgb->src3 = reg(src4, info, instr->repeat, 0);
-			ldgb->pad0 = 0x1;
-			ldgb->pad3 = 0x1;
-		} else {
-			ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
-			ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
-			ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-			ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
-			ldgb->pad0 = 0x1;
-			ldgb->pad3 = 0x0;
-		}
-
-		return 0;
-	} else if (instr->opc == OPC_LDGB) {
-		struct ir3_register *src3 = instr->regs[3];
-		instr_cat6ldgb_t *ldgb = ptr;
-
-		/* maybe these two bits both determine the instruction encoding? */
-		cat6->src_off = false;
-
-		ldgb->d = instr->cat6.d - 1;
-		ldgb->typed = instr->cat6.typed;
-		ldgb->type_size = instr->cat6.iim_val - 1;
-
-		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-
-		/* first src is src_ssbo: */
-		iassert(src1->flags & IR3_REG_IMMED);
-		ldgb->src_ssbo = src1->uim_val;
-
-		/* then next two are src1/src2: */
-		ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-		ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
-		ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
-		ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
-
-		ldgb->pad0 = 0x0;
-		ldgb->pad3 = 0x1;
-
-		return 0;
-	} else if (instr->opc == OPC_RESINFO) {
-		instr_cat6ldgb_t *ldgb = ptr;
-
-		ldgb->d = instr->cat6.d - 1;
-
-		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-
-		/* first src is src_ssbo: */
-		iassert(src1->flags & IR3_REG_IMMED);
-		ldgb->src_ssbo = src1->uim_val;
-
-		return 0;
-	} else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
-		struct ir3_register *src3 = instr->regs[4];
-		instr_cat6stgb_t *stgb = ptr;
-
-		/* maybe these two bits both determine the instruction encoding? */
-		cat6->src_off = true;
-		stgb->pad3 = 0x2;
-
-		stgb->d = instr->cat6.d - 1;
-		stgb->typed = instr->cat6.typed;
-		stgb->type_size = instr->cat6.iim_val - 1;
-
-		/* first src is dst_ssbo: */
-		iassert(dst->flags & IR3_REG_IMMED);
-		stgb->dst_ssbo = dst->uim_val;
-
-		/* then src1/src2/src3: */
-		stgb->src1 = reg(src1, info, instr->repeat, 0);
-		stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-		stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
-		stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
-		stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
-
-		return 0;
-	} else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
-			(instr->opc == OPC_LDL)) {
-		instr_cat6a_t *cat6a = ptr;
-
-		cat6->src_off = true;
-
-		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
-		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
-		if (src2) {
-			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
-		}
-		cat6a->off = instr->cat6.src_offset;
-	} else {
-		instr_cat6b_t *cat6b = ptr;
-
-		cat6->src_off = false;
-
-		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
-		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
-		if (src2) {
-			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
-			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
-		}
-	}
-
-	if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
-			(instr->opc == OPC_STL)) {
-		instr_cat6c_t *cat6c = ptr;
-		cat6->dst_off = true;
-		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-		cat6c->off = instr->cat6.dst_offset;
-	} else {
-		instr_cat6d_t *cat6d = ptr;
-		cat6->dst_off = false;
-		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-	}
-
-	return 0;
-}
-
-static int emit_cat7(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info)
-{
-	instr_cat7_t *cat7 = ptr;
-
-	cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
-	cat7->w       = instr->cat7.w;
-	cat7->r       = instr->cat7.r;
-	cat7->l       = instr->cat7.l;
-	cat7->g       = instr->cat7.g;
-	cat7->opc     = instr->opc;
-	cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
-	cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
-	cat7->opc_cat = 7;
-
-	return 0;
-}
-
-static int (*emit[])(struct ir3_instruction *instr, void *ptr,
-		struct ir3_info *info) = {
-	emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
-	emit_cat7,
-};
-
-void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
-		uint32_t gpu_id)
-{
-	uint32_t *ptr, *dwords;
-
-	info->gpu_id        = gpu_id;
-	info->max_reg       = -1;
-	info->max_half_reg  = -1;
-	info->max_const     = -1;
-	info->instrs_count  = 0;
-	info->sizedwords    = 0;
-	info->ss = info->sy = 0;
-
-	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			info->sizedwords += 2;
-		}
-	}
-
-	/* need an integer number of instruction "groups" (sets of 16
-	 * instructions on a4xx or sets of 4 instructions on a3xx),
-	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
-	 */
-	if (gpu_id >= 400) {
-		info->sizedwords = align(info->sizedwords, 16 * 2);
-	} else {
-		info->sizedwords = align(info->sizedwords, 4 * 2);
-	}
-
-	ptr = dwords = calloc(4, info->sizedwords);
-
-	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
-			if (ret)
-				goto fail;
-			info->instrs_count += 1 + instr->repeat;
-			dwords += 2;
-
-			if (instr->flags & IR3_INSTR_SS)
-				info->ss++;
-
-			if (instr->flags & IR3_INSTR_SY)
-				info->sy++;
-		}
-	}
-
-	return ptr;
-
-fail:
-	free(ptr);
-	return NULL;
-}
-
-static struct ir3_register * reg_create(struct ir3 *shader,
-		int num, int flags)
-{
-	struct ir3_register *reg =
-			ir3_alloc(shader, sizeof(struct ir3_register));
-	reg->wrmask = 1;
-	reg->flags = flags;
-	reg->num = num;
-	return reg;
-}
-
-static void insert_instr(struct ir3_block *block,
-		struct ir3_instruction *instr)
-{
-	struct ir3 *shader = block->shader;
-#ifdef DEBUG
-	instr->serialno = ++shader->instr_count;
-#endif
-	list_addtail(&instr->node, &block->instr_list);
-
-	if (is_input(instr))
-		array_insert(shader, shader->baryfs, instr);
-}
-
-struct ir3_block * ir3_block_create(struct ir3 *shader)
-{
-	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
-#ifdef DEBUG
-	block->serialno = ++shader->block_count;
-#endif
-	block->shader = shader;
-	list_inithead(&block->node);
-	list_inithead(&block->instr_list);
-	return block;
-}
-
-static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
-{
-	struct ir3_instruction *instr;
-	unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
-	char *ptr = ir3_alloc(block->shader, sz);
-
-	instr = (struct ir3_instruction *)ptr;
-	ptr  += sizeof(*instr);
-	instr->regs = (struct ir3_register **)ptr;
-
-#ifdef DEBUG
-	instr->regs_max = nreg;
-#endif
-
-	return instr;
-}
-
-struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
-		opc_t opc, int nreg)
-{
-	struct ir3_instruction *instr = instr_create(block, nreg);
-	instr->block = block;
-	instr->opc = opc;
-	insert_instr(block, instr);
-	return instr;
-}
-
-struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
-{
-	/* NOTE: we could be slightly more clever, at least for non-meta,
-	 * and choose # of regs based on category.
-	 */
-	return ir3_instr_create2(block, opc, 4);
-}
-
-struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
-{
-	struct ir3_instruction *new_instr = instr_create(instr->block,
-			instr->regs_count);
-	struct ir3_register **regs;
-	unsigned i;
-
-	regs = new_instr->regs;
-	*new_instr = *instr;
-	new_instr->regs = regs;
-
-	insert_instr(instr->block, new_instr);
-
-	/* clone registers: */
-	new_instr->regs_count = 0;
-	for (i = 0; i < instr->regs_count; i++) {
-		struct ir3_register *reg = instr->regs[i];
-		struct ir3_register *new_reg =
-				ir3_reg_create(new_instr, reg->num, reg->flags);
-		*new_reg = *reg;
-	}
-
-	return new_instr;
-}
-
-/* Add a false dependency to instruction, to ensure it is scheduled first: */
-void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
-{
-	array_insert(instr, instr->deps, dep);
-}
-
-struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
-		int num, int flags)
-{
-	struct ir3 *shader = instr->block->shader;
-	struct ir3_register *reg = reg_create(shader, num, flags);
-#ifdef DEBUG
-	debug_assert(instr->regs_count < instr->regs_max);
-#endif
-	instr->regs[instr->regs_count++] = reg;
-	return reg;
-}
-
-struct ir3_register * ir3_reg_clone(struct ir3 *shader,
-		struct ir3_register *reg)
-{
-	struct ir3_register *new_reg = reg_create(shader, 0, 0);
-	*new_reg = *reg;
-	return new_reg;
-}
-
-void
-ir3_instr_set_address(struct ir3_instruction *instr,
-		struct ir3_instruction *addr)
-{
-	if (instr->address != addr) {
-		struct ir3 *ir = instr->block->shader;
-		instr->address = addr;
-		array_insert(ir, ir->indirects, instr);
-	}
-}
-
-void
-ir3_block_clear_mark(struct ir3_block *block)
-{
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
-		instr->flags &= ~IR3_INSTR_MARK;
-}
-
-void
-ir3_clear_mark(struct ir3 *ir)
-{
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		ir3_block_clear_mark(block);
-	}
-}
-
-/* note: this will destroy instr->depth, don't do it until after sched! */
-unsigned
-ir3_count_instructions(struct ir3 *ir)
-{
-	unsigned cnt = 0;
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			instr->ip = cnt++;
-		}
-		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
-		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
-	}
-	return cnt;
-}
-
-struct ir3_array *
-ir3_lookup_array(struct ir3 *ir, unsigned id)
-{
-	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
-		if (arr->id == id)
-			return arr;
-	return NULL;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.c	2019-03-31 23:16:37.000000000 +0000
@@ -1,5 +1,3 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
 /*
  * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
  *
@@ -30,7 +28,7 @@
 #include "util/hash_table.h"
 
 #include "ir3_cache.h"
-#include "ir3_shader.h"
+#include "ir3_gallium.h"
 
 
 static uint32_t
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.h	2019-03-31 23:16:37.000000000 +0000
@@ -27,7 +27,7 @@
 #ifndef IR3_CACHE_H_
 #define IR3_CACHE_H_
 
-#include "ir3_shader.h"
+#include "ir3/ir3_shader.h"
 
 /*
  * An in-memory cache for mapping shader state objects plus shader key to
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c	2019-03-31 23:16:37.000000000 +0000
@@ -37,12 +37,11 @@
 #include "tgsi/tgsi_text.h"
 #include "tgsi/tgsi_dump.h"
 
-#include "freedreno_util.h"
-
-#include "ir3_compiler.h"
-#include "ir3_nir.h"
-#include "instr-a3xx.h"
-#include "ir3.h"
+#include "ir3/ir3_compiler.h"
+#include "ir3/ir3_gallium.h"
+#include "ir3/ir3_nir.h"
+#include "ir3/instr-a3xx.h"
+#include "ir3/ir3.h"
 
 #include "compiler/glsl/standalone.h"
 #include "compiler/glsl/glsl_to_nir.h"
@@ -103,29 +102,42 @@
 load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage)
 {
 	static const struct standalone_options options = {
-			.glsl_version = 140,
+			.glsl_version = 460,
 			.do_link = true,
 	};
 	struct gl_shader_program *prog;
+	const nir_shader_compiler_options *nir_options =
+			ir3_get_compiler_options(compiler);
 
 	prog = standalone_compile_shader(&options, num_files, files);
 	if (!prog)
 		errx(1, "couldn't parse `%s'", files[0]);
 
-	nir_shader *nir = glsl_to_nir(prog, stage, ir3_get_compiler_options(compiler));
+	nir_shader *nir = glsl_to_nir(prog, stage, nir_options);
 
 	/* required NIR passes: */
-	/* TODO cmdline args for some of the conditional lowering passes? */
+	if (nir_options->lower_all_io_to_temps ||
+			nir->info.stage == MESA_SHADER_VERTEX ||
+			nir->info.stage == MESA_SHADER_GEOMETRY) {
+		NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+				nir_shader_get_entrypoint(nir),
+				true, true);
+	} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+		NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+				nir_shader_get_entrypoint(nir),
+				true, false);
+	}
 
-	NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-			nir_shader_get_entrypoint(nir),
-			true, true);
 	NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 	NIR_PASS_V(nir, nir_split_var_copies);
 	NIR_PASS_V(nir, nir_lower_var_copies);
 
 	NIR_PASS_V(nir, nir_split_var_copies);
 	NIR_PASS_V(nir, nir_lower_var_copies);
+	nir_print_shader(nir, stdout);
+	NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true);
+	NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, 8);
+	nir_print_shader(nir, stdout);
 
 	switch (stage) {
 	case MESA_SHADER_VERTEX:
@@ -152,6 +164,9 @@
 				&nir->num_outputs,
 				ir3_glsl_type_size);
 		break;
+	case MESA_SHADER_COMPUTE:
+	case MESA_SHADER_KERNEL:
+		break;
 	default:
 		errx(1, "unhandled shader stage: %d", stage);
 	}
@@ -215,6 +230,7 @@
 			.variable_pointers = true,
 		},
 		.lower_workgroup_access_to_offsets = true,
+		.lower_ubo_ssbo_access_to_offsets = true,
 		.debug = {
 			.func = debug_func,
 		}
@@ -282,7 +298,7 @@
 
 	while (n < argc) {
 		if (!strcmp(argv[n], "--verbose")) {
-			fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS | FD_DBG_DISASM;
+			ir3_shader_debug |= IR3_DBG_OPTMSGS | IR3_DBG_DISASM;
 			n++;
 			continue;
 		}
@@ -337,7 +353,7 @@
 		}
 
 		if (!strcmp(argv[n], "--stream-out")) {
-			struct pipe_stream_output_info *so = &s.stream_output;
+			struct ir3_stream_output_info *so = &s.stream_output;
 			debug_printf(" %s", argv[n]);
 			/* TODO more dynamic config based on number of outputs, etc
 			 * rather than just hard-code for first output:
@@ -396,6 +412,12 @@
 				errx(1, "in SPIR-V mode, an entry point must be specified");
 			entry = argv[n];
 			n++;
+		} else if (strcmp(ext, ".comp") == 0) {
+			if (s.from_tgsi || from_spirv)
+				errx(1, "cannot mix GLSL/TGSI/SPIRV");
+			if (num_files >= ARRAY_SIZE(filenames))
+				errx(1, "too many GLSL files");
+			stage = MESA_SHADER_COMPUTE;
 		} else if (strcmp(ext, ".frag") == 0) {
 			if (s.from_tgsi || from_spirv)
 				errx(1, "cannot mix GLSL/TGSI/SPIRV");
@@ -431,16 +453,16 @@
 			return ret;
 		}
 
-		if (fd_mesa_debug & FD_DBG_OPTMSGS)
+		if (ir3_shader_debug & IR3_DBG_OPTMSGS)
 			debug_printf("%s\n", (char *)ptr);
 
 		if (!tgsi_text_translate(ptr, toks, ARRAY_SIZE(toks)))
 			errx(1, "could not parse `%s'", filenames[0]);
 
-		if (fd_mesa_debug & FD_DBG_OPTMSGS)
+		if (ir3_shader_debug & IR3_DBG_OPTMSGS)
 			tgsi_dump(toks, 0);
 
-		nir = ir3_tgsi_to_nir(toks);
+		nir = ir3_tgsi_to_nir(compiler, toks);
 		NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 	} else if (from_spirv) {
 		nir = load_spirv(filenames[0], entry, stage);
@@ -463,20 +485,7 @@
 
 	v.key = key;
 	v.shader = &s;
-
-	switch (nir->info.stage) {
-	case MESA_SHADER_FRAGMENT:
-		s.type = v.type = SHADER_FRAGMENT;
-		break;
-	case MESA_SHADER_VERTEX:
-		s.type = v.type = SHADER_VERTEX;
-		break;
-	case MESA_SHADER_COMPUTE:
-		s.type = v.type = SHADER_COMPUTE;
-		break;
-	default:
-		errx(1, "unhandled shader stage: %d", nir->info.stage);
-	}
+	s.type = v.type = nir->info.stage;
 
 	info = "NIR compiler";
 	ret = ir3_compile_shader_nir(s.compiler, &v);
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/ralloc.h"
-
-#include "ir3_compiler.h"
-
-struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
-{
-	struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
-
-	compiler->dev = dev;
-	compiler->gpu_id = gpu_id;
-	compiler->set = ir3_ra_alloc_reg_set(compiler);
-
-	if (compiler->gpu_id >= 400) {
-		/* need special handling for "flat" */
-		compiler->flat_bypass = true;
-		compiler->levels_add_one = false;
-		compiler->unminify_coords = false;
-		compiler->txf_ms_with_isaml = false;
-		compiler->array_index_add_half = true;
-	} else {
-		/* no special handling for "flat" */
-		compiler->flat_bypass = false;
-		compiler->levels_add_one = true;
-		compiler->unminify_coords = true;
-		compiler->txf_ms_with_isaml = true;
-		compiler->array_index_add_half = false;
-	}
-
-	return compiler;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef IR3_COMPILER_H_
-#define IR3_COMPILER_H_
-
-#include "ir3_shader.h"
-
-struct ir3_ra_reg_set;
-
-struct ir3_compiler {
-	struct fd_device *dev;
-	uint32_t gpu_id;
-	struct ir3_ra_reg_set *set;
-	uint32_t shader_count;
-
-	/*
-	 * Configuration options for things that are handled differently on
-	 * different generations:
-	 */
-
-	/* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
-	 * so we need to use ldlv.u32 to load the varying directly:
-	 */
-	bool flat_bypass;
-
-	/* on a3xx, we need to add one to # of array levels:
-	 */
-	bool levels_add_one;
-
-	/* on a3xx, we need to scale up integer coords for isaml based
-	 * on LoD:
-	 */
-	bool unminify_coords;
-
-	/* on a3xx do txf_ms w/ isaml and scaled coords: */
-	bool txf_ms_with_isaml;
-
-	/* on a4xx, for array textures we need to add 0.5 to the array
-	 * index coordinate:
-	 */
-	bool array_index_add_half;
-};
-
-struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id);
-
-int ir3_compile_shader_nir(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *so);
-
-#endif /* IR3_COMPILER_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,3823 +0,0 @@
-/*
- * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <stdarg.h>
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-
-#include "freedreno_util.h"
-
-#include "ir3_compiler.h"
-#include "ir3_shader.h"
-#include "ir3_nir.h"
-
-#include "instr-a3xx.h"
-#include "ir3.h"
-
-
-struct ir3_context {
-	struct ir3_compiler *compiler;
-
-	struct nir_shader *s;
-
-	struct nir_instr *cur_instr;  /* current instruction, just for debug */
-
-	struct ir3 *ir;
-	struct ir3_shader_variant *so;
-
-	struct ir3_block *block;      /* the current block */
-	struct ir3_block *in_block;   /* block created for shader inputs */
-
-	nir_function_impl *impl;
-
-	/* For fragment shaders, varyings are not actual shader inputs,
-	 * instead the hw passes a varying-coord which is used with
-	 * bary.f.
-	 *
-	 * But NIR doesn't know that, it still declares varyings as
-	 * inputs.  So we do all the input tracking normally and fix
-	 * things up after compile_instructions()
-	 *
-	 * NOTE that frag_vcoord is the hardware position (possibly it
-	 * is actually an index or tag or some such.. it is *not*
-	 * values that can be directly used for gl_FragCoord..)
-	 */
-	struct ir3_instruction *frag_vcoord;
-
-	/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
-	struct ir3_instruction *frag_face, *frag_coord;
-
-	/* For vertex shaders, keep track of the system values sources */
-	struct ir3_instruction *vertex_id, *basevertex, *instance_id;
-
-	/* For fragment shaders: */
-	struct ir3_instruction *samp_id, *samp_mask_in;
-
-	/* Compute shader inputs: */
-	struct ir3_instruction *local_invocation_id, *work_group_id;
-
-	/* mapping from nir_register to defining instruction: */
-	struct hash_table *def_ht;
-
-	unsigned num_arrays;
-
-	/* a common pattern for indirect addressing is to request the
-	 * same address register multiple times.  To avoid generating
-	 * duplicate instruction sequences (which our backend does not
-	 * try to clean up, since that should be done as the NIR stage)
-	 * we cache the address value generated for a given src value:
-	 *
-	 * Note that we have to cache these per alignment, since same
-	 * src used for an array of vec1 cannot be also used for an
-	 * array of vec4.
-	 */
-	struct hash_table *addr_ht[4];
-
-	/* last dst array, for indirect we need to insert a var-store.
-	 */
-	struct ir3_instruction **last_dst;
-	unsigned last_dst_n;
-
-	/* maps nir_block to ir3_block, mostly for the purposes of
-	 * figuring out the blocks successors
-	 */
-	struct hash_table *block_ht;
-
-	/* on a4xx, bitmask of samplers which need astc+srgb workaround: */
-	unsigned astc_srgb;
-
-	unsigned samples;             /* bitmask of x,y sample shifts */
-
-	unsigned max_texture_index;
-
-	/* set if we encounter something we can't handle yet, so we
-	 * can bail cleanly and fallback to TGSI compiler f/e
-	 */
-	bool error;
-};
-
-/* gpu pointer size in units of 32bit registers/slots */
-static unsigned pointer_size(struct ir3_context *ctx)
-{
-	return (ctx->compiler->gpu_id >= 500) ? 2 : 1;
-}
-
-static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
-static struct ir3_block * get_block(struct ir3_context *ctx, const nir_block *nblock);
-
-
-static struct ir3_context *
-compile_init(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *so)
-{
-	struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
-
-	if (compiler->gpu_id >= 400) {
-		if (so->type == SHADER_VERTEX) {
-			ctx->astc_srgb = so->key.vastc_srgb;
-		} else if (so->type == SHADER_FRAGMENT) {
-			ctx->astc_srgb = so->key.fastc_srgb;
-		}
-
-	} else {
-		if (so->type == SHADER_VERTEX) {
-			ctx->samples = so->key.vsamples;
-		} else if (so->type == SHADER_FRAGMENT) {
-			ctx->samples = so->key.fsamples;
-		}
-	}
-
-	ctx->compiler = compiler;
-	ctx->so = so;
-	ctx->def_ht = _mesa_hash_table_create(ctx,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-	ctx->block_ht = _mesa_hash_table_create(ctx,
-			_mesa_hash_pointer, _mesa_key_pointer_equal);
-
-	/* TODO: maybe generate some sort of bitmask of what key
-	 * lowers vs what shader has (ie. no need to lower
-	 * texture clamp lowering if no texture sample instrs)..
-	 * although should be done further up the stack to avoid
-	 * creating duplicate variants..
-	 */
-
-	if (ir3_key_lowers_nir(&so->key)) {
-		nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
-		ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
-	} else {
-		/* fast-path for shader key that lowers nothing in NIR: */
-		ctx->s = so->shader->nir;
-	}
-
-	/* this needs to be the last pass run, so do this here instead of
-	 * in ir3_optimize_nir():
-	 */
-	NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
-	NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
-
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
-			so->shader->id, so->id, so->type,
-			so->key.color_two_side, so->key.half_precision);
-		nir_print_shader(ctx->s, stdout);
-	}
-
-	if (shader_debug_enabled(so->type)) {
-		fprintf(stderr, "NIR (final form) for %s shader:\n",
-			shader_stage_name(so->type));
-		nir_print_shader(ctx->s, stderr);
-	}
-
-	ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
-
-	so->num_uniforms = ctx->s->num_uniforms;
-	so->num_ubos = ctx->s->info.num_ubos;
-
-	/* Layout of constant registers, each section aligned to vec4.  Note
-	 * that pointer size (ubo, etc) changes depending on generation.
-	 *
-	 *    user consts
-	 *    UBO addresses
-	 *    SSBO sizes
-	 *    if (vertex shader) {
-	 *        driver params (IR3_DP_*)
-	 *        if (stream_output.num_outputs > 0)
-	 *           stream-out addresses
-	 *    }
-	 *    immediates
-	 *
-	 * Immediates go last mostly because they are inserted in the CP pass
-	 * after the nir -> ir3 frontend.
-	 */
-	unsigned constoff = align(ctx->s->num_uniforms, 4);
-	unsigned ptrsz = pointer_size(ctx);
-
-	memset(&so->constbase, ~0, sizeof(so->constbase));
-
-	if (so->num_ubos > 0) {
-		so->constbase.ubo = constoff;
-		constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
-	}
-
-	if (so->const_layout.ssbo_size.count > 0) {
-		unsigned cnt = so->const_layout.ssbo_size.count;
-		so->constbase.ssbo_sizes = constoff;
-		constoff += align(cnt, 4) / 4;
-	}
-
-	if (so->const_layout.image_dims.count > 0) {
-		unsigned cnt = so->const_layout.image_dims.count;
-		so->constbase.image_dims = constoff;
-		constoff += align(cnt, 4) / 4;
-	}
-
-	unsigned num_driver_params = 0;
-	if (so->type == SHADER_VERTEX) {
-		num_driver_params = IR3_DP_VS_COUNT;
-	} else if (so->type == SHADER_COMPUTE) {
-		num_driver_params = IR3_DP_CS_COUNT;
-	}
-
-	so->constbase.driver_param = constoff;
-	constoff += align(num_driver_params, 4) / 4;
-
-	if ((so->type == SHADER_VERTEX) &&
-			(compiler->gpu_id < 500) &&
-			so->shader->stream_output.num_outputs > 0) {
-		so->constbase.tfbo = constoff;
-		constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4;
-	}
-
-	so->constbase.immediate = constoff;
-
-	return ctx;
-}
-
-static void
-compile_error(struct ir3_context *ctx, const char *format, ...)
-{
-	struct hash_table *errors = NULL;
-	va_list ap;
-	va_start(ap, format);
-	if (ctx->cur_instr) {
-		errors = _mesa_hash_table_create(NULL,
-				_mesa_hash_pointer,
-				_mesa_key_pointer_equal);
-		char *msg = ralloc_vasprintf(errors, format, ap);
-		_mesa_hash_table_insert(errors, ctx->cur_instr, msg);
-	} else {
-		_debug_vprintf(format, ap);
-	}
-	va_end(ap);
-	nir_print_shader_annotated(ctx->s, stdout, errors);
-	ralloc_free(errors);
-	ctx->error = true;
-	debug_assert(0);
-}
-
-#define compile_assert(ctx, cond) do { \
-		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
-	} while (0)
-
-static void
-compile_free(struct ir3_context *ctx)
-{
-	ralloc_free(ctx);
-}
-
-static void
-declare_array(struct ir3_context *ctx, nir_register *reg)
-{
-	struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
-	arr->id = ++ctx->num_arrays;
-	/* NOTE: sometimes we get non array regs, for example for arrays of
-	 * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
-	 * treat a non-array as if it was an array of length 1.
-	 *
-	 * It would be nice if there was a nir pass to convert arrays of
-	 * length 1 to ssa.
-	 */
-	arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
-	compile_assert(ctx, arr->length > 0);
-	arr->r = reg;
-	list_addtail(&arr->node, &ctx->ir->array_list);
-}
-
-static struct ir3_array *
-get_array(struct ir3_context *ctx, nir_register *reg)
-{
-	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
-		if (arr->r == reg)
-			return arr;
-	}
-	compile_error(ctx, "bogus reg: %s\n", reg->name);
-	return NULL;
-}
-
-/* relative (indirect) if address!=NULL */
-static struct ir3_instruction *
-create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
-		struct ir3_instruction *address)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *mov;
-	struct ir3_register *src;
-
-	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
-	mov->barrier_class = IR3_BARRIER_ARRAY_R;
-	mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
-	ir3_reg_create(mov, 0, 0);
-	src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
-			COND(address, IR3_REG_RELATIV));
-	src->instr = arr->last_write;
-	src->size  = arr->length;
-	src->array.id = arr->id;
-	src->array.offset = n;
-
-	if (address)
-		ir3_instr_set_address(mov, address);
-
-	return mov;
-}
-
-/* relative (indirect) if address!=NULL */
-static void
-create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
-		struct ir3_instruction *src, struct ir3_instruction *address)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *mov;
-	struct ir3_register *dst;
-
-	/* if not relative store, don't create an extra mov, since that
-	 * ends up being difficult for cp to remove.
-	 */
-	if (!address) {
-		dst = src->regs[0];
-
-		src->barrier_class |= IR3_BARRIER_ARRAY_W;
-		src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-
-		dst->flags |= IR3_REG_ARRAY;
-		dst->instr = arr->last_write;
-		dst->size = arr->length;
-		dst->array.id = arr->id;
-		dst->array.offset = n;
-
-		arr->last_write = src;
-
-		array_insert(block, block->keeps, src);
-
-		return;
-	}
-
-	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
-	mov->barrier_class = IR3_BARRIER_ARRAY_W;
-	mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-	dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
-			COND(address, IR3_REG_RELATIV));
-	dst->instr = arr->last_write;
-	dst->size  = arr->length;
-	dst->array.id = arr->id;
-	dst->array.offset = n;
-	ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
-
-	if (address)
-		ir3_instr_set_address(mov, address);
-
-	arr->last_write = mov;
-
-	/* the array store may only matter to something in an earlier
-	 * block (ie. loops), but since arrays are not in SSA, depth
-	 * pass won't know this.. so keep all array stores:
-	 */
-	array_insert(block, block->keeps, mov);
-}
-
-static inline type_t utype_for_size(unsigned bit_size)
-{
-	switch (bit_size) {
-	case 32: return TYPE_U32;
-	case 16: return TYPE_U16;
-	case  8: return TYPE_U8;
-	default: unreachable("bad bitsize"); return ~0;
-	}
-}
-
-static inline type_t utype_src(nir_src src)
-{ return utype_for_size(nir_src_bit_size(src)); }
-
-static inline type_t utype_dst(nir_dest dst)
-{ return utype_for_size(nir_dest_bit_size(dst)); }
-
-/* allocate a n element value array (to be populated by caller) and
- * insert in def_ht
- */
-static struct ir3_instruction **
-get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
-{
-	struct ir3_instruction **value =
-		ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
-	_mesa_hash_table_insert(ctx->def_ht, dst, value);
-	return value;
-}
-
-static struct ir3_instruction **
-get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
-{
-	struct ir3_instruction **value;
-
-	if (dst->is_ssa) {
-		value = get_dst_ssa(ctx, &dst->ssa, n);
-	} else {
-		value = ralloc_array(ctx, struct ir3_instruction *, n);
-	}
-
-	/* NOTE: in non-ssa case, we don't really need to store last_dst
-	 * but this helps us catch cases where put_dst() call is forgotten
-	 */
-	compile_assert(ctx, !ctx->last_dst);
-	ctx->last_dst = value;
-	ctx->last_dst_n = n;
-
-	return value;
-}
-
-static struct ir3_instruction * get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align);
-
-static struct ir3_instruction * const *
-get_src(struct ir3_context *ctx, nir_src *src)
-{
-	if (src->is_ssa) {
-		struct hash_entry *entry;
-		entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
-		compile_assert(ctx, entry);
-		return entry->data;
-	} else {
-		nir_register *reg = src->reg.reg;
-		struct ir3_array *arr = get_array(ctx, reg);
-		unsigned num_components = arr->r->num_components;
-		struct ir3_instruction *addr = NULL;
-		struct ir3_instruction **value =
-			ralloc_array(ctx, struct ir3_instruction *, num_components);
-
-		if (src->reg.indirect)
-			addr = get_addr(ctx, get_src(ctx, src->reg.indirect)[0],
-					reg->num_components);
-
-		for (unsigned i = 0; i < num_components; i++) {
-			unsigned n = src->reg.base_offset * reg->num_components + i;
-			compile_assert(ctx, n < arr->length);
-			value[i] = create_array_load(ctx, arr, n, addr);
-		}
-
-		return value;
-	}
-}
-
-static void
-put_dst(struct ir3_context *ctx, nir_dest *dst)
-{
-	unsigned bit_size = nir_dest_bit_size(*dst);
-
-	if (bit_size < 32) {
-		for (unsigned i = 0; i < ctx->last_dst_n; i++) {
-			struct ir3_instruction *dst = ctx->last_dst[i];
-			dst->regs[0]->flags |= IR3_REG_HALF;
-			if (ctx->last_dst[i]->opc == OPC_META_FO)
-				dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
-		}
-	}
-
-	if (!dst->is_ssa) {
-		nir_register *reg = dst->reg.reg;
-		struct ir3_array *arr = get_array(ctx, reg);
-		unsigned num_components = ctx->last_dst_n;
-		struct ir3_instruction *addr = NULL;
-
-		if (dst->reg.indirect)
-			addr = get_addr(ctx, get_src(ctx, dst->reg.indirect)[0],
-					reg->num_components);
-
-		for (unsigned i = 0; i < num_components; i++) {
-			unsigned n = dst->reg.base_offset * reg->num_components + i;
-			compile_assert(ctx, n < arr->length);
-			if (!ctx->last_dst[i])
-				continue;
-			create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
-		}
-
-		ralloc_free(ctx->last_dst);
-	}
-	ctx->last_dst = NULL;
-	ctx->last_dst_n = 0;
-}
-
-static struct ir3_instruction *
-create_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
-{
-	struct ir3_instruction *mov;
-	unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
-
-	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = type;
-	mov->cat1.dst_type = type;
-	ir3_reg_create(mov, 0, flags);
-	ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val;
-
-	return mov;
-}
-
-static struct ir3_instruction *
-create_immed(struct ir3_block *block, uint32_t val)
-{
-	return create_immed_typed(block, val, TYPE_U32);
-}
-
-static struct ir3_instruction *
-create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
-{
-	struct ir3_instruction *instr, *immed;
-
-	/* TODO in at least some cases, the backend could probably be
-	 * made clever enough to propagate IR3_REG_HALF..
-	 */
-	instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
-	instr->regs[0]->flags |= IR3_REG_HALF;
-
-	switch(align){
-	case 1:
-		/* src *= 1: */
-		break;
-	case 2:
-		/* src *= 2	=> src <<= 1: */
-		immed = create_immed(block, 1);
-		immed->regs[0]->flags |= IR3_REG_HALF;
-
-		instr = ir3_SHL_B(block, instr, 0, immed, 0);
-		instr->regs[0]->flags |= IR3_REG_HALF;
-		instr->regs[1]->flags |= IR3_REG_HALF;
-		break;
-	case 3:
-		/* src *= 3: */
-		immed = create_immed(block, 3);
-		immed->regs[0]->flags |= IR3_REG_HALF;
-
-		instr = ir3_MULL_U(block, instr, 0, immed, 0);
-		instr->regs[0]->flags |= IR3_REG_HALF;
-		instr->regs[1]->flags |= IR3_REG_HALF;
-		break;
-	case 4:
-		/* src *= 4 => src <<= 2: */
-		immed = create_immed(block, 2);
-		immed->regs[0]->flags |= IR3_REG_HALF;
-
-		instr = ir3_SHL_B(block, instr, 0, immed, 0);
-		instr->regs[0]->flags |= IR3_REG_HALF;
-		instr->regs[1]->flags |= IR3_REG_HALF;
-		break;
-	default:
-		unreachable("bad align");
-		return NULL;
-	}
-
-	instr = ir3_MOV(block, instr, TYPE_S16);
-	instr->regs[0]->num = regid(REG_A0, 0);
-	instr->regs[0]->flags |= IR3_REG_HALF;
-	instr->regs[1]->flags |= IR3_REG_HALF;
-
-	return instr;
-}
-
-/* caches addr values to avoid generating multiple cov/shl/mova
- * sequences for each use of a given NIR level src as address
- */
-static struct ir3_instruction *
-get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
-{
-	struct ir3_instruction *addr;
-	unsigned idx = align - 1;
-
-	compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
-
-	if (!ctx->addr_ht[idx]) {
-		ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
-				_mesa_hash_pointer, _mesa_key_pointer_equal);
-	} else {
-		struct hash_entry *entry;
-		entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
-		if (entry)
-			return entry->data;
-	}
-
-	addr = create_addr(ctx->block, src, align);
-	_mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
-
-	return addr;
-}
-
-static struct ir3_instruction *
-get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *cond;
-
-	/* NOTE: only cmps.*.* can write p0.x: */
-	cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
-	cond->cat2.condition = IR3_COND_NE;
-
-	/* condition always goes in predicate register: */
-	cond->regs[0]->num = regid(REG_P0, 0);
-
-	return cond;
-}
-
-static struct ir3_instruction *
-create_uniform(struct ir3_context *ctx, unsigned n)
-{
-	struct ir3_instruction *mov;
-
-	mov = ir3_instr_create(ctx->block, OPC_MOV);
-	/* TODO get types right? */
-	mov->cat1.src_type = TYPE_F32;
-	mov->cat1.dst_type = TYPE_F32;
-	ir3_reg_create(mov, 0, 0);
-	ir3_reg_create(mov, n, IR3_REG_CONST);
-
-	return mov;
-}
-
-static struct ir3_instruction *
-create_uniform_indirect(struct ir3_context *ctx, int n,
-		struct ir3_instruction *address)
-{
-	struct ir3_instruction *mov;
-
-	mov = ir3_instr_create(ctx->block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
-	ir3_reg_create(mov, 0, 0);
-	ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
-
-	ir3_instr_set_address(mov, address);
-
-	return mov;
-}
-
-static struct ir3_instruction *
-create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
-		unsigned arrsz)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *collect;
-
-	if (arrsz == 0)
-		return NULL;
-
-	unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
-
-	collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
-	ir3_reg_create(collect, 0, flags);     /* dst */
-	for (unsigned i = 0; i < arrsz; i++) {
-		struct ir3_instruction *elem = arr[i];
-
-		/* Since arrays are pre-colored in RA, we can't assume that
-		 * things will end up in the right place.  (Ie. if a collect
-		 * joins elements from two different arrays.)  So insert an
-		 * extra mov.
-		 *
-		 * We could possibly skip this if all the collected elements
-		 * are contiguous elements in a single array.. not sure how
-		 * likely that is to happen.
-		 *
-		 * Fixes a problem with glamor shaders, that in effect do
-		 * something like:
-		 *
-		 *   if (foo)
-		 *     texcoord = ..
-		 *   else
-		 *     texcoord = ..
-		 *   color = texture2D(tex, texcoord);
-		 *
-		 * In this case, texcoord will end up as nir registers (which
-		 * translate to ir3 array's of length 1.  And we can't assume
-		 * the two (or more) arrays will get allocated in consecutive
-		 * scalar registers.
-		 *
-		 */
-		if (elem->regs[0]->flags & IR3_REG_ARRAY) {
-			type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
-			elem = ir3_MOV(block, elem, type);
-		}
-
-		compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
-		ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
-	}
-
-	return collect;
-}
-
-static struct ir3_instruction *
-create_indirect_load(struct ir3_context *ctx, unsigned arrsz, int n,
-		struct ir3_instruction *address, struct ir3_instruction *collect)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *mov;
-	struct ir3_register *src;
-
-	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
-	ir3_reg_create(mov, 0, 0);
-	src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
-	src->instr = collect;
-	src->size  = arrsz;
-	src->array.offset = n;
-
-	ir3_instr_set_address(mov, address);
-
-	return mov;
-}
-
-static struct ir3_instruction *
-create_input_compmask(struct ir3_context *ctx, unsigned n, unsigned compmask)
-{
-	struct ir3_instruction *in;
-
-	in = ir3_instr_create(ctx->in_block, OPC_META_INPUT);
-	in->inout.block = ctx->in_block;
-	ir3_reg_create(in, n, 0);
-
-	in->regs[0]->wrmask = compmask;
-
-	return in;
-}
-
-static struct ir3_instruction *
-create_input(struct ir3_context *ctx, unsigned n)
-{
-	return create_input_compmask(ctx, n, 0x1);
-}
-
-static struct ir3_instruction *
-create_frag_input(struct ir3_context *ctx, bool use_ldlv)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *instr;
-	/* actual inloc is assigned and fixed up later: */
-	struct ir3_instruction *inloc = create_immed(block, 0);
-
-	if (use_ldlv) {
-		instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
-		instr->cat6.type = TYPE_U32;
-		instr->cat6.iim_val = 1;
-	} else {
-		instr = ir3_BARY_F(block, inloc, 0, ctx->frag_vcoord, 0);
-		instr->regs[2]->wrmask = 0x3;
-	}
-
-	return instr;
-}
-
-static struct ir3_instruction *
-create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
-{
-	/* first four vec4 sysval's reserved for UBOs: */
-	/* NOTE: dp is in scalar, but there can be >4 dp components: */
-	unsigned n = ctx->so->constbase.driver_param;
-	unsigned r = regid(n + dp / 4, dp % 4);
-	return create_uniform(ctx, r);
-}
-
-/* helper for instructions that produce multiple consecutive scalar
- * outputs which need to have a split/fanout meta instruction inserted
- */
-static void
-split_dest(struct ir3_block *block, struct ir3_instruction **dst,
-		struct ir3_instruction *src, unsigned base, unsigned n)
-{
-	struct ir3_instruction *prev = NULL;
-
-	if ((n == 1) && (src->regs[0]->wrmask == 0x1)) {
-		dst[0] = src;
-		return;
-	}
-
-	for (int i = 0, j = 0; i < n; i++) {
-		struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
-		ir3_reg_create(split, 0, IR3_REG_SSA);
-		ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
-		split->fo.off = i + base;
-
-		if (prev) {
-			split->cp.left = prev;
-			split->cp.left_cnt++;
-			prev->cp.right = split;
-			prev->cp.right_cnt++;
-		}
-		prev = split;
-
-		if (src->regs[0]->wrmask & (1 << (i + base)))
-			dst[j++] = split;
-	}
-}
-
-/*
- * Adreno uses uint rather than having dedicated bool type,
- * which (potentially) requires some conversion, in particular
- * when using output of an bool instr to int input, or visa
- * versa.
- *
- *         | Adreno  |  NIR  |
- *  -------+---------+-------+-
- *   true  |    1    |  ~0   |
- *   false |    0    |   0   |
- *
- * To convert from an adreno bool (uint) to nir, use:
- *
- *    absneg.s dst, (neg)src
- *
- * To convert back in the other direction:
- *
- *    absneg.s dst, (abs)arc
- *
- * The CP step can clean up the absneg.s that cancel each other
- * out, and with a slight bit of extra cleverness (to recognize
- * the instructions which produce either a 0 or 1) can eliminate
- * the absneg.s's completely when an instruction that wants
- * 0/1 consumes the result.  For example, when a nir 'bcsel'
- * consumes the result of 'feq'.  So we should be able to get by
- * without a boolean resolve step, and without incuring any
- * extra penalty in instruction count.
- */
-
-/* NIR bool -> native (adreno): */
-static struct ir3_instruction *
-ir3_b2n(struct ir3_block *block, struct ir3_instruction *instr)
-{
-	return ir3_ABSNEG_S(block, instr, IR3_REG_SABS);
-}
-
-/* native (adreno) -> NIR bool: */
-static struct ir3_instruction *
-ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr)
-{
-	return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG);
-}
-
-/*
- * alu/sfu instructions:
- */
-
-static struct ir3_instruction *
-create_cov(struct ir3_context *ctx, struct ir3_instruction *src,
-		unsigned src_bitsize, nir_op op)
-{
-	type_t src_type, dst_type;
-
-	switch (op) {
-	case nir_op_f2f32:
-	case nir_op_f2f16_rtne:
-	case nir_op_f2f16_rtz:
-	case nir_op_f2f16:
-	case nir_op_f2i32:
-	case nir_op_f2i16:
-	case nir_op_f2i8:
-	case nir_op_f2u32:
-	case nir_op_f2u16:
-	case nir_op_f2u8:
-		switch (src_bitsize) {
-		case 32:
-			src_type = TYPE_F32;
-			break;
-		case 16:
-			src_type = TYPE_F16;
-			break;
-		default:
-			compile_error(ctx, "invalid src bit size: %u", src_bitsize);
-		}
-		break;
-
-	case nir_op_i2f32:
-	case nir_op_i2f16:
-	case nir_op_i2i32:
-	case nir_op_i2i16:
-	case nir_op_i2i8:
-		switch (src_bitsize) {
-		case 32:
-			src_type = TYPE_S32;
-			break;
-		case 16:
-			src_type = TYPE_S16;
-			break;
-		case 8:
-			src_type = TYPE_S8;
-			break;
-		default:
-			compile_error(ctx, "invalid src bit size: %u", src_bitsize);
-		}
-		break;
-
-	case nir_op_u2f32:
-	case nir_op_u2f16:
-	case nir_op_u2u32:
-	case nir_op_u2u16:
-	case nir_op_u2u8:
-		switch (src_bitsize) {
-		case 32:
-			src_type = TYPE_U32;
-			break;
-		case 16:
-			src_type = TYPE_U16;
-			break;
-		case 8:
-			src_type = TYPE_U8;
-			break;
-		default:
-			compile_error(ctx, "invalid src bit size: %u", src_bitsize);
-		}
-		break;
-
-	default:
-		compile_error(ctx, "invalid conversion op: %u", op);
-	}
-
-	switch (op) {
-	case nir_op_f2f32:
-	case nir_op_i2f32:
-	case nir_op_u2f32:
-		dst_type = TYPE_F32;
-		break;
-
-	case nir_op_f2f16_rtne:
-	case nir_op_f2f16_rtz:
-	case nir_op_f2f16:
-		/* TODO how to handle rounding mode? */
-	case nir_op_i2f16:
-	case nir_op_u2f16:
-		dst_type = TYPE_F16;
-		break;
-
-	case nir_op_f2i32:
-	case nir_op_i2i32:
-		dst_type = TYPE_S32;
-		break;
-
-	case nir_op_f2i16:
-	case nir_op_i2i16:
-		dst_type = TYPE_S16;
-		break;
-
-	case nir_op_f2i8:
-	case nir_op_i2i8:
-		dst_type = TYPE_S8;
-		break;
-
-	case nir_op_f2u32:
-	case nir_op_u2u32:
-		dst_type = TYPE_U32;
-		break;
-
-	case nir_op_f2u16:
-	case nir_op_u2u16:
-		dst_type = TYPE_U16;
-		break;
-
-	case nir_op_f2u8:
-	case nir_op_u2u8:
-		dst_type = TYPE_U8;
-		break;
-
-	default:
-		compile_error(ctx, "invalid conversion op: %u", op);
-	}
-
-	return ir3_COV(ctx->block, src, src_type, dst_type);
-}
-
-static void
-emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
-{
-	const nir_op_info *info = &nir_op_infos[alu->op];
-	struct ir3_instruction **dst, *src[info->num_inputs];
-	unsigned bs[info->num_inputs];     /* bit size */
-	struct ir3_block *b = ctx->block;
-	unsigned dst_sz, wrmask;
-
-	if (alu->dest.dest.is_ssa) {
-		dst_sz = alu->dest.dest.ssa.num_components;
-		wrmask = (1 << dst_sz) - 1;
-	} else {
-		dst_sz = alu->dest.dest.reg.reg->num_components;
-		wrmask = alu->dest.write_mask;
-	}
-
-	dst = get_dst(ctx, &alu->dest.dest, dst_sz);
-
-	/* Vectors are special in that they have non-scalarized writemasks,
-	 * and just take the first swizzle channel for each argument in
-	 * order into each writemask channel.
-	 */
-	if ((alu->op == nir_op_vec2) ||
-			(alu->op == nir_op_vec3) ||
-			(alu->op == nir_op_vec4)) {
-
-		for (int i = 0; i < info->num_inputs; i++) {
-			nir_alu_src *asrc = &alu->src[i];
-
-			compile_assert(ctx, !asrc->abs);
-			compile_assert(ctx, !asrc->negate);
-
-			src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[0]];
-			if (!src[i])
-				src[i] = create_immed(ctx->block, 0);
-			dst[i] = ir3_MOV(b, src[i], TYPE_U32);
-		}
-
-		put_dst(ctx, &alu->dest.dest);
-		return;
-	}
-
-	/* We also get mov's with more than one component for mov's so
-	 * handle those specially:
-	 */
-	if ((alu->op == nir_op_imov) || (alu->op == nir_op_fmov)) {
-		type_t type = (alu->op == nir_op_imov) ? TYPE_U32 : TYPE_F32;
-		nir_alu_src *asrc = &alu->src[0];
-		struct ir3_instruction *const *src0 = get_src(ctx, &asrc->src);
-
-		for (unsigned i = 0; i < dst_sz; i++) {
-			if (wrmask & (1 << i)) {
-				dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type);
-			} else {
-				dst[i] = NULL;
-			}
-		}
-
-		put_dst(ctx, &alu->dest.dest);
-		return;
-	}
-
-	/* General case: We can just grab the one used channel per src. */
-	for (int i = 0; i < info->num_inputs; i++) {
-		unsigned chan = ffs(alu->dest.write_mask) - 1;
-		nir_alu_src *asrc = &alu->src[i];
-
-		compile_assert(ctx, !asrc->abs);
-		compile_assert(ctx, !asrc->negate);
-
-		src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[chan]];
-		bs[i] = nir_src_bit_size(asrc->src);
-
-		compile_assert(ctx, src[i]);
-	}
-
-	switch (alu->op) {
-	case nir_op_f2f32:
-	case nir_op_f2f16_rtne:
-	case nir_op_f2f16_rtz:
-	case nir_op_f2f16:
-	case nir_op_f2i32:
-	case nir_op_f2i16:
-	case nir_op_f2i8:
-	case nir_op_f2u32:
-	case nir_op_f2u16:
-	case nir_op_f2u8:
-	case nir_op_i2f32:
-	case nir_op_i2f16:
-	case nir_op_i2i32:
-	case nir_op_i2i16:
-	case nir_op_i2i8:
-	case nir_op_u2f32:
-	case nir_op_u2f16:
-	case nir_op_u2u32:
-	case nir_op_u2u16:
-	case nir_op_u2u8:
-		dst[0] = create_cov(ctx, src[0], bs[0], alu->op);
-		break;
-	case nir_op_f2b:
-		dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
-		dst[0]->cat2.condition = IR3_COND_NE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_b2f:
-		dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
-		break;
-	case nir_op_b2i:
-		dst[0] = ir3_b2n(b, src[0]);
-		break;
-	case nir_op_i2b:
-		dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
-		dst[0]->cat2.condition = IR3_COND_NE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-
-	case nir_op_fneg:
-		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG);
-		break;
-	case nir_op_fabs:
-		dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS);
-		break;
-	case nir_op_fmax:
-		dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_fmin:
-		dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_fsat:
-		/* if there is just a single use of the src, and it supports
-		 * (sat) bit, we can just fold the (sat) flag back to the
-		 * src instruction and create a mov.  This is easier for cp
-		 * to eliminate.
-		 *
-		 * TODO probably opc_cat==4 is ok too
-		 */
-		if (alu->src[0].src.is_ssa &&
-				(list_length(&alu->src[0].src.ssa->uses) == 1) &&
-				((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
-			src[0]->flags |= IR3_INSTR_SAT;
-			dst[0] = ir3_MOV(b, src[0], TYPE_U32);
-		} else {
-			/* otherwise generate a max.f that saturates.. blob does
-			 * similar (generating a cat2 mov using max.f)
-			 */
-			dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0);
-			dst[0]->flags |= IR3_INSTR_SAT;
-		}
-		break;
-	case nir_op_fmul:
-		dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_fadd:
-		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_fsub:
-		dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG);
-		break;
-	case nir_op_ffma:
-		dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0);
-		break;
-	case nir_op_fddx:
-		dst[0] = ir3_DSX(b, src[0], 0);
-		dst[0]->cat5.type = TYPE_F32;
-		break;
-	case nir_op_fddy:
-		dst[0] = ir3_DSY(b, src[0], 0);
-		dst[0]->cat5.type = TYPE_F32;
-		break;
-		break;
-	case nir_op_flt:
-		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_LT;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_fge:
-		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_GE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_feq:
-		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_EQ;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_fne:
-		dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_NE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_fceil:
-		dst[0] = ir3_CEIL_F(b, src[0], 0);
-		break;
-	case nir_op_ffloor:
-		dst[0] = ir3_FLOOR_F(b, src[0], 0);
-		break;
-	case nir_op_ftrunc:
-		dst[0] = ir3_TRUNC_F(b, src[0], 0);
-		break;
-	case nir_op_fround_even:
-		dst[0] = ir3_RNDNE_F(b, src[0], 0);
-		break;
-	case nir_op_fsign:
-		dst[0] = ir3_SIGN_F(b, src[0], 0);
-		break;
-
-	case nir_op_fsin:
-		dst[0] = ir3_SIN(b, src[0], 0);
-		break;
-	case nir_op_fcos:
-		dst[0] = ir3_COS(b, src[0], 0);
-		break;
-	case nir_op_frsq:
-		dst[0] = ir3_RSQ(b, src[0], 0);
-		break;
-	case nir_op_frcp:
-		dst[0] = ir3_RCP(b, src[0], 0);
-		break;
-	case nir_op_flog2:
-		dst[0] = ir3_LOG2(b, src[0], 0);
-		break;
-	case nir_op_fexp2:
-		dst[0] = ir3_EXP2(b, src[0], 0);
-		break;
-	case nir_op_fsqrt:
-		dst[0] = ir3_SQRT(b, src[0], 0);
-		break;
-
-	case nir_op_iabs:
-		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS);
-		break;
-	case nir_op_iadd:
-		dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_iand:
-		dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_imax:
-		dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_umax:
-		dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_imin:
-		dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_umin:
-		dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_imul:
-		/*
-		 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
-		 *   mull.u tmp0, a, b           ; mul low, i.e. al * bl
-		 *   madsh.m16 tmp1, a, b, tmp0  ; mul-add shift high mix, i.e. ah * bl << 16
-		 *   madsh.m16 dst, b, a, tmp1   ; i.e. al * bh << 16
-		 */
-		dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0,
-					ir3_MADSH_M16(b, src[0], 0, src[1], 0,
-						ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0);
-		break;
-	case nir_op_ineg:
-		dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
-		break;
-	case nir_op_inot:
-		dst[0] = ir3_NOT_B(b, src[0], 0);
-		break;
-	case nir_op_ior:
-		dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_ishl:
-		dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_ishr:
-		dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_isign: {
-		/* maybe this would be sane to lower in nir.. */
-		struct ir3_instruction *neg, *pos;
-
-		neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
-		neg->cat2.condition = IR3_COND_LT;
-
-		pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
-		pos->cat2.condition = IR3_COND_GT;
-
-		dst[0] = ir3_SUB_U(b, pos, 0, neg, 0);
-
-		break;
-	}
-	case nir_op_isub:
-		dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_ixor:
-		dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_ushr:
-		dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0);
-		break;
-	case nir_op_ilt:
-		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_LT;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_ige:
-		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_GE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_ieq:
-		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_EQ;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_ine:
-		dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_NE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_ult:
-		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_LT;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-	case nir_op_uge:
-		dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
-		dst[0]->cat2.condition = IR3_COND_GE;
-		dst[0] = ir3_n2b(b, dst[0]);
-		break;
-
-	case nir_op_bcsel: {
-		struct ir3_instruction *cond = ir3_b2n(b, src[0]);
-		compile_assert(ctx, bs[1] == bs[2]);
-		/* the boolean condition is 32b even if src[1] and src[2] are
-		 * half-precision, but sel.b16 wants all three src's to be the
-		 * same type.
-		 */
-		if (bs[1] < 32)
-			cond = ir3_COV(b, cond, TYPE_U32, TYPE_U16);
-		dst[0] = ir3_SEL_B32(b, src[1], 0, cond, 0, src[2], 0);
-		break;
-	}
-	case nir_op_bit_count:
-		dst[0] = ir3_CBITS_B(b, src[0], 0);
-		break;
-	case nir_op_ifind_msb: {
-		struct ir3_instruction *cmp;
-		dst[0] = ir3_CLZ_S(b, src[0], 0);
-		cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0);
-		cmp->cat2.condition = IR3_COND_GE;
-		dst[0] = ir3_SEL_B32(b,
-				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
-				cmp, 0, dst[0], 0);
-		break;
-	}
-	case nir_op_ufind_msb:
-		dst[0] = ir3_CLZ_B(b, src[0], 0);
-		dst[0] = ir3_SEL_B32(b,
-				ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0,
-				src[0], 0, dst[0], 0);
-		break;
-	case nir_op_find_lsb:
-		dst[0] = ir3_BFREV_B(b, src[0], 0);
-		dst[0] = ir3_CLZ_B(b, dst[0], 0);
-		break;
-	case nir_op_bitfield_reverse:
-		dst[0] = ir3_BFREV_B(b, src[0], 0);
-		break;
-
-	default:
-		compile_error(ctx, "Unhandled ALU op: %s\n",
-				nir_op_infos[alu->op].name);
-		break;
-	}
-
-	put_dst(ctx, &alu->dest.dest);
-}
-
-/* handles direct/indirect UBO reads: */
-static void
-emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1;
-	nir_const_value *const_offset;
-	/* UBO addresses are the first driver params: */
-	unsigned ubo = regid(ctx->so->constbase.ubo, 0);
-	const unsigned ptrsz = pointer_size(ctx);
-
-	int off = 0;
-
-	/* First src is ubo index, which could either be an immed or not: */
-	src0 = get_src(ctx, &intr->src[0])[0];
-	if (is_same_type_mov(src0) &&
-			(src0->regs[1]->flags & IR3_REG_IMMED)) {
-		base_lo = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz));
-		base_hi = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
-	} else {
-		base_lo = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0, 4));
-		base_hi = create_uniform_indirect(ctx, ubo + 1, get_addr(ctx, src0, 4));
-	}
-
-	/* note: on 32bit gpu's base_hi is ignored and DCE'd */
-	addr = base_lo;
-
-	const_offset = nir_src_as_const_value(intr->src[1]);
-	if (const_offset) {
-		off += const_offset->u32[0];
-	} else {
-		/* For load_ubo_indirect, second src is indirect offset: */
-		src1 = get_src(ctx, &intr->src[1])[0];
-
-		/* and add offset to addr: */
-		addr = ir3_ADD_S(b, addr, 0, src1, 0);
-	}
-
-	/* if offset is to large to encode in the ldg, split it out: */
-	if ((off + (intr->num_components * 4)) > 1024) {
-		/* split out the minimal amount to improve the odds that
-		 * cp can fit the immediate in the add.s instruction:
-		 */
-		unsigned off2 = off + (intr->num_components * 4) - 1024;
-		addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0);
-		off -= off2;
-	}
-
-	if (ptrsz == 2) {
-		struct ir3_instruction *carry;
-
-		/* handle 32b rollover, ie:
-		 *   if (addr < base_lo)
-		 *      base_hi++
-		 */
-		carry = ir3_CMPS_U(b, addr, 0, base_lo, 0);
-		carry->cat2.condition = IR3_COND_LT;
-		base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0);
-
-		addr = create_collect(ctx, (struct ir3_instruction*[]){ addr, base_hi }, 2);
-	}
-
-	for (int i = 0; i < intr->num_components; i++) {
-		struct ir3_instruction *load =
-				ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
-		load->cat6.type = TYPE_U32;
-		load->cat6.src_offset = off + i * 4;     /* byte offset */
-		dst[i] = load;
-	}
-}
-
-/* src[] = { buffer_index, offset }. No const_index */
-static void
-emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *ldgb, *src0, *src1, *offset;
-	nir_const_value *const_offset;
-
-	/* can this be non-const buffer_index?  how do we handle that? */
-	const_offset = nir_src_as_const_value(intr->src[0]);
-	compile_assert(ctx, const_offset);
-
-	offset = get_src(ctx, &intr->src[1])[0];
-
-	/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
-	src0 = create_collect(ctx, (struct ir3_instruction*[]){
-		offset,
-		create_immed(b, 0),
-	}, 2);
-	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-
-	ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
-			src0, 0, src1, 0);
-	ldgb->regs[0]->wrmask = MASK(intr->num_components);
-	ldgb->cat6.iim_val = intr->num_components;
-	ldgb->cat6.d = 4;
-	ldgb->cat6.type = TYPE_U32;
-	ldgb->barrier_class = IR3_BARRIER_BUFFER_R;
-	ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W;
-
-	split_dest(b, dst, ldgb, 0, intr->num_components);
-}
-
-/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
-static void
-emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stgb, *src0, *src1, *src2, *offset;
-	nir_const_value *const_offset;
-	/* TODO handle wrmask properly, see _store_shared().. but I think
-	 * it is more a PITA than that, since blob ends up loading the
-	 * masked components and writing them back out.
-	 */
-	unsigned wrmask = intr->const_index[0];
-	unsigned ncomp = ffs(~wrmask) - 1;
-
-	/* can this be non-const buffer_index?  how do we handle that? */
-	const_offset = nir_src_as_const_value(intr->src[1]);
-	compile_assert(ctx, const_offset);
-
-	offset = get_src(ctx, &intr->src[2])[0];
-
-	/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
-	 * nir already *= 4:
-	 */
-	src0 = create_collect(ctx, get_src(ctx, &intr->src[0]), ncomp);
-	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-	src2 = create_collect(ctx, (struct ir3_instruction*[]){
-		offset,
-		create_immed(b, 0),
-	}, 2);
-
-	stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
-			src0, 0, src1, 0, src2, 0);
-	stgb->cat6.iim_val = ncomp;
-	stgb->cat6.d = 4;
-	stgb->cat6.type = TYPE_U32;
-	stgb->barrier_class = IR3_BARRIER_BUFFER_W;
-	stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-
-	array_insert(b, b->keeps, stgb);
-}
-
-/* src[] = { block_index } */
-static void
-emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	/* SSBO size stored as a const starting at ssbo_sizes: */
-	unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0];
-	unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
-		ctx->so->const_layout.ssbo_size.off[blk_idx];
-
-	debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
-
-	dst[0] = create_uniform(ctx, idx);
-}
-
-/*
- * SSBO atomic intrinsics
- *
- * All of the SSBO atomic memory operations read a value from memory,
- * compute a new value using one of the operations below, write the new
- * value to memory, and return the original value read.
- *
- * All operations take 3 sources except CompSwap that takes 4. These
- * sources represent:
- *
- * 0: The SSBO buffer index.
- * 1: The offset into the SSBO buffer of the variable that the atomic
- *    operation will operate on.
- * 2: The data parameter to the atomic function (i.e. the value to add
- *    in ssbo_atomic_add, etc).
- * 3: For CompSwap only: the second data parameter.
- */
-static struct ir3_instruction *
-emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset;
-	nir_const_value *const_offset;
-	type_t type = TYPE_U32;
-
-	/* can this be non-const buffer_index?  how do we handle that? */
-	const_offset = nir_src_as_const_value(intr->src[0]);
-	compile_assert(ctx, const_offset);
-	ssbo = create_immed(b, const_offset->u32[0]);
-
-	offset = get_src(ctx, &intr->src[1])[0];
-
-	/* src0 is data (or uvec2(data, compare))
-	 * src1 is offset
-	 * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset)
-	 *
-	 * Note that nir already multiplies the offset by four
-	 */
-	src0 = get_src(ctx, &intr->src[2])[0];
-	src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-	src2 = create_collect(ctx, (struct ir3_instruction*[]){
-		offset,
-		create_immed(b, 0),
-	}, 2);
-
-	switch (intr->intrinsic) {
-	case nir_intrinsic_ssbo_atomic_add:
-		atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imin:
-		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umin:
-		atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_imax:
-		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_ssbo_atomic_umax:
-		atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_and:
-		atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_or:
-		atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_xor:
-		atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_exchange:
-		atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_ssbo_atomic_comp_swap:
-		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
-		src0 = create_collect(ctx, (struct ir3_instruction*[]){
-			get_src(ctx, &intr->src[3])[0],
-			src0,
-		}, 2);
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
-
-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = 4;
-	atomic->cat6.type = type;
-	atomic->barrier_class = IR3_BARRIER_BUFFER_W;
-	atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-
-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
-
-	return atomic;
-}
-
-/* src[] = { offset }. const_index[] = { base } */
-static void
-emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *ldl, *offset;
-	unsigned base;
-
-	offset = get_src(ctx, &intr->src[0])[0];
-	base   = nir_intrinsic_base(intr);
-
-	ldl = ir3_LDL(b, offset, 0, create_immed(b, intr->num_components), 0);
-	ldl->cat6.src_offset = base;
-	ldl->cat6.type = utype_dst(intr->dest);
-	ldl->regs[0]->wrmask = MASK(intr->num_components);
-
-	ldl->barrier_class = IR3_BARRIER_SHARED_R;
-	ldl->barrier_conflict = IR3_BARRIER_SHARED_W;
-
-	split_dest(b, dst, ldl, 0, intr->num_components);
-}
-
-/* src[] = { value, offset }. const_index[] = { base, write_mask } */
-static void
-emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *stl, *offset;
-	struct ir3_instruction * const *value;
-	unsigned base, wrmask;
-
-	value  = get_src(ctx, &intr->src[0]);
-	offset = get_src(ctx, &intr->src[1])[0];
-
-	base   = nir_intrinsic_base(intr);
-	wrmask = nir_intrinsic_write_mask(intr);
-
-	/* Combine groups of consecutive enabled channels in one write
-	 * message. We use ffs to find the first enabled channel and then ffs on
-	 * the bit-inverse, down-shifted writemask to determine the length of
-	 * the block of enabled bits.
-	 *
-	 * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
-	 */
-	while (wrmask) {
-		unsigned first_component = ffs(wrmask) - 1;
-		unsigned length = ffs(~(wrmask >> first_component)) - 1;
-
-		stl = ir3_STL(b, offset, 0,
-			create_collect(ctx, &value[first_component], length), 0,
-			create_immed(b, length), 0);
-		stl->cat6.dst_offset = first_component + base;
-		stl->cat6.type = utype_src(intr->src[0]);
-		stl->barrier_class = IR3_BARRIER_SHARED_W;
-		stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
-
-		array_insert(b, b->keeps, stl);
-
-		/* Clear the bits in the writemask that we just wrote, then try
-		 * again to see if more channels are left.
-		 */
-		wrmask &= (15 << (first_component + length));
-	}
-}
-
-/*
- * CS shared variable atomic intrinsics
- *
- * All of the shared variable atomic memory operations read a value from
- * memory, compute a new value using one of the operations below, write the
- * new value to memory, and return the original value read.
- *
- * All operations take 2 sources except CompSwap that takes 3. These
- * sources represent:
- *
- * 0: The offset into the shared variable storage region that the atomic
- *    operation will operate on.
- * 1: The data parameter to the atomic function (i.e. the value to add
- *    in shared_atomic_add, etc).
- * 2: For CompSwap only: the second data parameter.
- */
-static struct ir3_instruction *
-emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *atomic, *src0, *src1;
-	type_t type = TYPE_U32;
-
-	src0 = get_src(ctx, &intr->src[0])[0];   /* offset */
-	src1 = get_src(ctx, &intr->src[1])[0];   /* value */
-
-	switch (intr->intrinsic) {
-	case nir_intrinsic_shared_atomic_add:
-		atomic = ir3_ATOMIC_ADD(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_imin:
-		atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_shared_atomic_umin:
-		atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_imax:
-		atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
-		type = TYPE_S32;
-		break;
-	case nir_intrinsic_shared_atomic_umax:
-		atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_and:
-		atomic = ir3_ATOMIC_AND(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_or:
-		atomic = ir3_ATOMIC_OR(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_xor:
-		atomic = ir3_ATOMIC_XOR(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_exchange:
-		atomic = ir3_ATOMIC_XCHG(b, src0, 0, src1, 0);
-		break;
-	case nir_intrinsic_shared_atomic_comp_swap:
-		/* for cmpxchg, src1 is [ui]vec2(data, compare): */
-		src1 = create_collect(ctx, (struct ir3_instruction*[]){
-			get_src(ctx, &intr->src[2])[0],
-			src1,
-		}, 2);
-		atomic = ir3_ATOMIC_CMPXCHG(b, src0, 0, src1, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
-
-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = 1;
-	atomic->cat6.type = type;
-	atomic->barrier_class = IR3_BARRIER_SHARED_W;
-	atomic->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
-
-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
-
-	return atomic;
-}
-
-/* Images get mapped into SSBO/image state (for store/atomic) and texture
- * state block (for load).  To simplify things, invert the image id and
- * map it from end of state block, ie. image 0 becomes num-1, image 1
- * becomes num-2, etc.  This potentially avoids needing to re-emit texture
- * state when switching shaders.
- *
- * TODO is max # of samplers and SSBOs the same.  This shouldn't be hard-
- * coded.  Also, since all the gl shader stages (ie. everything but CS)
- * share the same SSBO/image state block, this might require some more
- * logic if we supported images in anything other than FS..
- */
-static unsigned
-get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref)
-{
-	unsigned int loc = 0;
-	unsigned inner_size = 1;
-
-	while (deref->deref_type != nir_deref_type_var) {
-		assert(deref->deref_type == nir_deref_type_array);
-		nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
-		assert(const_index);
-
-		/* Go to the next instruction */
-		deref = nir_deref_instr_parent(deref);
-
-		assert(glsl_type_is_array(deref->type));
-		const unsigned array_len = glsl_get_length(deref->type);
-		loc += MIN2(const_index->u32[0], array_len - 1) * inner_size;
-
-		/* Update the inner size */
-		inner_size *= array_len;
-	}
-
-	loc += deref->var->data.driver_location;
-
-	/* TODO figure out real limit per generation, and don't hardcode: */
-	const unsigned max_samplers = 16;
-	return max_samplers - loc - 1;
-}
-
-/* see tex_info() for equiv logic for texture instructions.. it would be
- * nice if this could be better unified..
- */
-static unsigned
-get_image_coords(const nir_variable *var, unsigned *flagsp)
-{
-	const struct glsl_type *type = glsl_without_array(var->type);
-	unsigned coords, flags = 0;
-
-	switch (glsl_get_sampler_dim(type)) {
-	case GLSL_SAMPLER_DIM_1D:
-	case GLSL_SAMPLER_DIM_BUF:
-		coords = 1;
-		break;
-	case GLSL_SAMPLER_DIM_2D:
-	case GLSL_SAMPLER_DIM_RECT:
-	case GLSL_SAMPLER_DIM_EXTERNAL:
-	case GLSL_SAMPLER_DIM_MS:
-		coords = 2;
-		break;
-	case GLSL_SAMPLER_DIM_3D:
-	case GLSL_SAMPLER_DIM_CUBE:
-		flags |= IR3_INSTR_3D;
-		coords = 3;
-		break;
-	default:
-		unreachable("bad sampler dim");
-		return 0;
-	}
-
-	if (glsl_sampler_type_is_array(type)) {
-		/* note: unlike tex_info(), adjust # of coords to include array idx: */
-		coords++;
-		flags |= IR3_INSTR_A;
-	}
-
-	if (flagsp)
-		*flagsp = flags;
-
-	return coords;
-}
-
-static type_t
-get_image_type(const nir_variable *var)
-{
-	switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) {
-	case GLSL_TYPE_UINT:
-		return TYPE_U32;
-	case GLSL_TYPE_INT:
-		return TYPE_S32;
-	case GLSL_TYPE_FLOAT:
-		return TYPE_F32;
-	default:
-		unreachable("bad sampler type.");
-		return 0;
-	}
-}
-
-static struct ir3_instruction *
-get_image_offset(struct ir3_context *ctx, const nir_variable *var,
-		struct ir3_instruction * const *coords, bool byteoff)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *offset;
-	unsigned ncoords = get_image_coords(var, NULL);
-
-	/* to calculate the byte offset (yes, uggg) we need (up to) three
-	 * const values to know the bytes per pixel, and y and z stride:
-	 */
-	unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-		ctx->so->const_layout.image_dims.off[var->data.driver_location];
-
-	debug_assert(ctx->so->const_layout.image_dims.mask &
-			(1 << var->data.driver_location));
-
-	/* offset = coords.x * bytes_per_pixel: */
-	offset = ir3_MUL_S(b, coords[0], 0, create_uniform(ctx, cb + 0), 0);
-	if (ncoords > 1) {
-		/* offset += coords.y * y_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(ctx, cb + 1), 0,
-				coords[1], 0, offset, 0);
-	}
-	if (ncoords > 2) {
-		/* offset += coords.z * z_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(ctx, cb + 2), 0,
-				coords[2], 0, offset, 0);
-	}
-
-	if (!byteoff) {
-		/* Some cases, like atomics, seem to use dword offset instead
-		 * of byte offsets.. blob just puts an extra shr.b in there
-		 * in those cases:
-		 */
-		offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-	}
-
-	return create_collect(ctx, (struct ir3_instruction*[]){
-		offset,
-		create_immed(b, 0),
-	}, 2);
-}
-
-/* src[] = { deref, coord, sample_index }. const_index[] = {} */
-static void
-emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	struct ir3_block *b = ctx->block;
-	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-	struct ir3_instruction *sam;
-	struct ir3_instruction * const *src0 = get_src(ctx, &intr->src[1]);
-	struct ir3_instruction *coords[4];
-	unsigned flags, ncoords = get_image_coords(var, &flags);
-	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
-	type_t type = get_image_type(var);
-
-	/* hmm, this seems a bit odd, but it is what blob does and (at least
-	 * a5xx) just faults on bogus addresses otherwise:
-	 */
-	if (flags & IR3_INSTR_3D) {
-		flags &= ~IR3_INSTR_3D;
-		flags |= IR3_INSTR_A;
-	}
-
-	for (unsigned i = 0; i < ncoords; i++)
-		coords[i] = src0[i];
-
-	if (ncoords == 1)
-		coords[ncoords++] = create_immed(b, 0);
-
-	sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags,
-			tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL);
-
-	sam->barrier_class = IR3_BARRIER_IMAGE_R;
-	sam->barrier_conflict = IR3_BARRIER_IMAGE_W;
-
-	split_dest(b, dst, sam, 0, 4);
-}
-
-/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
-static void
-emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-	struct ir3_instruction *stib, *offset;
-	struct ir3_instruction * const *value = get_src(ctx, &intr->src[3]);
-	struct ir3_instruction * const *coords = get_src(ctx, &intr->src[1]);
-	unsigned ncoords = get_image_coords(var, NULL);
-	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
-
-	/* src0 is value
-	 * src1 is coords
-	 * src2 is 64b byte offset
-	 */
-
-	offset = get_image_offset(ctx, var, coords, true);
-
-	/* NOTE: stib seems to take byte offset, but stgb.typed can be used
-	 * too and takes a dword offset.. not quite sure yet why blob uses
-	 * one over the other in various cases.
-	 */
-
-	stib = ir3_STIB(b, create_immed(b, tex_idx), 0,
-			create_collect(ctx, value, 4), 0,
-			create_collect(ctx, coords, ncoords), 0,
-			offset, 0);
-	stib->cat6.iim_val = 4;
-	stib->cat6.d = ncoords;
-	stib->cat6.type = get_image_type(var);
-	stib->cat6.typed = true;
-	stib->barrier_class = IR3_BARRIER_IMAGE_W;
-	stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
-
-	array_insert(b, b->keeps, stib);
-}
-
-static void
-emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
-		struct ir3_instruction **dst)
-{
-	struct ir3_block *b = ctx->block;
-	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-	unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0]));
-	struct ir3_instruction *sam, *lod;
-	unsigned flags, ncoords = get_image_coords(var, &flags);
-
-	lod = create_immed(b, 0);
-	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
-			tex_idx, tex_idx, lod, NULL);
-
-	/* Array size actually ends up in .w rather than .z. This doesn't
-	 * matter for miplevel 0, but for higher mips the value in z is
-	 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
-	 * returned, which means that we have to add 1 to it for arrays for
-	 * a3xx.
-	 *
-	 * Note use a temporary dst and then copy, since the size of the dst
-	 * array that is passed in is based on nir's understanding of the
-	 * result size, not the hardware's
-	 */
-	struct ir3_instruction *tmp[4];
-
-	split_dest(b, tmp, sam, 0, 4);
-
-	/* get_size instruction returns size in bytes instead of texels
-	 * for imageBuffer, so we need to divide it by the pixel size
-	 * of the image format.
-	 *
-	 * TODO: This is at least true on a5xx. Check other gens.
-	 */
-	enum glsl_sampler_dim dim =
-		glsl_get_sampler_dim(glsl_without_array(var->type));
-	if (dim == GLSL_SAMPLER_DIM_BUF) {
-		/* Since all the possible values the divisor can take are
-		 * power-of-two (4, 8, or 16), the division is implemented
-		 * as a shift-right.
-		 * During shader setup, the log2 of the image format's
-		 * bytes-per-pixel should have been emitted in 2nd slot of
-		 * image_dims. See ir3_shader::emit_image_dims().
-		 */
-		unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-			ctx->so->const_layout.image_dims.off[var->data.driver_location];
-		struct ir3_instruction *aux = create_uniform(ctx, cb + 1);
-
-		tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
-	}
-
-	for (unsigned i = 0; i < ncoords; i++)
-		dst[i] = tmp[i];
-
-	if (flags & IR3_INSTR_A) {
-		if (ctx->compiler->levels_add_one) {
-			dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0);
-		} else {
-			dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32);
-		}
-	}
-}
-
-/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
-static struct ir3_instruction *
-emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-	struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
-	struct ir3_instruction * const *coords = get_src(ctx, &intr->src[1]);
-	unsigned ncoords = get_image_coords(var, NULL);
-
-	image = create_immed(b, get_image_slot(ctx, nir_src_as_deref(intr->src[0])));
-
-	/* src0 is value (or uvec2(value, compare))
-	 * src1 is coords
-	 * src2 is 64b byte offset
-	 */
-	src0 = get_src(ctx, &intr->src[3])[0];
-	src1 = create_collect(ctx, coords, ncoords);
-	src2 = get_image_offset(ctx, var, coords, false);
-
-	switch (intr->intrinsic) {
-	case nir_intrinsic_image_deref_atomic_add:
-		atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_min:
-		atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_max:
-		atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_and:
-		atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_or:
-		atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_xor:
-		atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_exchange:
-		atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	case nir_intrinsic_image_deref_atomic_comp_swap:
-		/* for cmpxchg, src0 is [ui]vec2(data, compare): */
-		src0 = create_collect(ctx, (struct ir3_instruction*[]){
-			get_src(ctx, &intr->src[4])[0],
-			src0,
-		}, 2);
-		atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
-		break;
-	default:
-		unreachable("boo");
-	}
-
-	atomic->cat6.iim_val = 1;
-	atomic->cat6.d = ncoords;
-	atomic->cat6.type = get_image_type(var);
-	atomic->cat6.typed = true;
-	atomic->barrier_class = IR3_BARRIER_IMAGE_W;
-	atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
-
-	/* even if nothing consume the result, we can't DCE the instruction: */
-	array_insert(b, b->keeps, atomic);
-
-	return atomic;
-}
-
-static void
-emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *barrier;
-
-	switch (intr->intrinsic) {
-	case nir_intrinsic_barrier:
-		barrier = ir3_BAR(b);
-		barrier->cat7.g = true;
-		barrier->cat7.l = true;
-		barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
-		barrier->barrier_class = IR3_BARRIER_EVERYTHING;
-		break;
-	case nir_intrinsic_memory_barrier:
-		barrier = ir3_FENCE(b);
-		barrier->cat7.g = true;
-		barrier->cat7.r = true;
-		barrier->cat7.w = true;
-		barrier->barrier_class = IR3_BARRIER_IMAGE_W |
-				IR3_BARRIER_BUFFER_W;
-		barrier->barrier_conflict =
-				IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
-				IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-		break;
-	case nir_intrinsic_memory_barrier_atomic_counter:
-	case nir_intrinsic_memory_barrier_buffer:
-		barrier = ir3_FENCE(b);
-		barrier->cat7.g = true;
-		barrier->cat7.r = true;
-		barrier->cat7.w = true;
-		barrier->barrier_class = IR3_BARRIER_BUFFER_W;
-		barrier->barrier_conflict = IR3_BARRIER_BUFFER_R |
-				IR3_BARRIER_BUFFER_W;
-		break;
-	case nir_intrinsic_memory_barrier_image:
-		// TODO double check if this should have .g set
-		barrier = ir3_FENCE(b);
-		barrier->cat7.g = true;
-		barrier->cat7.r = true;
-		barrier->cat7.w = true;
-		barrier->barrier_class = IR3_BARRIER_IMAGE_W;
-		barrier->barrier_conflict = IR3_BARRIER_IMAGE_R |
-				IR3_BARRIER_IMAGE_W;
-		break;
-	case nir_intrinsic_memory_barrier_shared:
-		barrier = ir3_FENCE(b);
-		barrier->cat7.g = true;
-		barrier->cat7.l = true;
-		barrier->cat7.r = true;
-		barrier->cat7.w = true;
-		barrier->barrier_class = IR3_BARRIER_SHARED_W;
-		barrier->barrier_conflict = IR3_BARRIER_SHARED_R |
-				IR3_BARRIER_SHARED_W;
-		break;
-	case nir_intrinsic_group_memory_barrier:
-		barrier = ir3_FENCE(b);
-		barrier->cat7.g = true;
-		barrier->cat7.l = true;
-		barrier->cat7.r = true;
-		barrier->cat7.w = true;
-		barrier->barrier_class = IR3_BARRIER_SHARED_W |
-				IR3_BARRIER_IMAGE_W |
-				IR3_BARRIER_BUFFER_W;
-		barrier->barrier_conflict =
-				IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W |
-				IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
-				IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
-		break;
-	default:
-		unreachable("boo");
-	}
-
-	/* make sure barrier doesn't get DCE'd */
-	array_insert(b, b->keeps, barrier);
-}
-
-static void add_sysval_input_compmask(struct ir3_context *ctx,
-		gl_system_value slot, unsigned compmask,
-		struct ir3_instruction *instr)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned r = regid(so->inputs_count, 0);
-	unsigned n = so->inputs_count++;
-
-	so->inputs[n].sysval = true;
-	so->inputs[n].slot = slot;
-	so->inputs[n].compmask = compmask;
-	so->inputs[n].regid = r;
-	so->inputs[n].interpolate = INTERP_MODE_FLAT;
-	so->total_in++;
-
-	ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
-	ctx->ir->inputs[r] = instr;
-}
-
-static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot,
-		struct ir3_instruction *instr)
-{
-	add_sysval_input_compmask(ctx, slot, 0x1, instr);
-}
-
-static void
-emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
-{
-	const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
-	struct ir3_instruction **dst;
-	struct ir3_instruction * const *src;
-	struct ir3_block *b = ctx->block;
-	nir_const_value *const_offset;
-	int idx, comp;
-
-	if (info->has_dest) {
-		unsigned n = nir_intrinsic_dest_components(intr);
-		dst = get_dst(ctx, &intr->dest, n);
-	} else {
-		dst = NULL;
-	}
-
-	switch (intr->intrinsic) {
-	case nir_intrinsic_load_uniform:
-		idx = nir_intrinsic_base(intr);
-		const_offset = nir_src_as_const_value(intr->src[0]);
-		if (const_offset) {
-			idx += const_offset->u32[0];
-			for (int i = 0; i < intr->num_components; i++) {
-				unsigned n = idx * 4 + i;
-				dst[i] = create_uniform(ctx, n);
-			}
-		} else {
-			src = get_src(ctx, &intr->src[0]);
-			for (int i = 0; i < intr->num_components; i++) {
-				int n = idx * 4 + i;
-				dst[i] = create_uniform_indirect(ctx, n,
-						get_addr(ctx, src[0], 4));
-			}
-			/* NOTE: if relative addressing is used, we set
-			 * constlen in the compiler (to worst-case value)
-			 * since we don't know in the assembler what the max
-			 * addr reg value can be:
-			 */
-			ctx->so->constlen = ctx->s->num_uniforms;
-		}
-		break;
-	case nir_intrinsic_load_ubo:
-		emit_intrinsic_load_ubo(ctx, intr, dst);
-		break;
-	case nir_intrinsic_load_input:
-		idx = nir_intrinsic_base(intr);
-		comp = nir_intrinsic_component(intr);
-		const_offset = nir_src_as_const_value(intr->src[0]);
-		if (const_offset) {
-			idx += const_offset->u32[0];
-			for (int i = 0; i < intr->num_components; i++) {
-				unsigned n = idx * 4 + i + comp;
-				dst[i] = ctx->ir->inputs[n];
-			}
-		} else {
-			src = get_src(ctx, &intr->src[0]);
-			struct ir3_instruction *collect =
-					create_collect(ctx, ctx->ir->inputs, ctx->ir->ninputs);
-			struct ir3_instruction *addr = get_addr(ctx, src[0], 4);
-			for (int i = 0; i < intr->num_components; i++) {
-				unsigned n = idx * 4 + i + comp;
-				dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
-						n, addr, collect);
-			}
-		}
-		break;
-	case nir_intrinsic_load_ssbo:
-		emit_intrinsic_load_ssbo(ctx, intr, dst);
-		break;
-	case nir_intrinsic_store_ssbo:
-		emit_intrinsic_store_ssbo(ctx, intr);
-		break;
-	case nir_intrinsic_get_buffer_size:
-		emit_intrinsic_ssbo_size(ctx, intr, dst);
-		break;
-	case nir_intrinsic_ssbo_atomic_add:
-	case nir_intrinsic_ssbo_atomic_imin:
-	case nir_intrinsic_ssbo_atomic_umin:
-	case nir_intrinsic_ssbo_atomic_imax:
-	case nir_intrinsic_ssbo_atomic_umax:
-	case nir_intrinsic_ssbo_atomic_and:
-	case nir_intrinsic_ssbo_atomic_or:
-	case nir_intrinsic_ssbo_atomic_xor:
-	case nir_intrinsic_ssbo_atomic_exchange:
-	case nir_intrinsic_ssbo_atomic_comp_swap:
-		dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr);
-		break;
-	case nir_intrinsic_load_shared:
-		emit_intrinsic_load_shared(ctx, intr, dst);
-		break;
-	case nir_intrinsic_store_shared:
-		emit_intrinsic_store_shared(ctx, intr);
-		break;
-	case nir_intrinsic_shared_atomic_add:
-	case nir_intrinsic_shared_atomic_imin:
-	case nir_intrinsic_shared_atomic_umin:
-	case nir_intrinsic_shared_atomic_imax:
-	case nir_intrinsic_shared_atomic_umax:
-	case nir_intrinsic_shared_atomic_and:
-	case nir_intrinsic_shared_atomic_or:
-	case nir_intrinsic_shared_atomic_xor:
-	case nir_intrinsic_shared_atomic_exchange:
-	case nir_intrinsic_shared_atomic_comp_swap:
-		dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
-		break;
-	case nir_intrinsic_image_deref_load:
-		emit_intrinsic_load_image(ctx, intr, dst);
-		break;
-	case nir_intrinsic_image_deref_store:
-		emit_intrinsic_store_image(ctx, intr);
-		break;
-	case nir_intrinsic_image_deref_size:
-		emit_intrinsic_image_size(ctx, intr, dst);
-		break;
-	case nir_intrinsic_image_deref_atomic_add:
-	case nir_intrinsic_image_deref_atomic_min:
-	case nir_intrinsic_image_deref_atomic_max:
-	case nir_intrinsic_image_deref_atomic_and:
-	case nir_intrinsic_image_deref_atomic_or:
-	case nir_intrinsic_image_deref_atomic_xor:
-	case nir_intrinsic_image_deref_atomic_exchange:
-	case nir_intrinsic_image_deref_atomic_comp_swap:
-		dst[0] = emit_intrinsic_atomic_image(ctx, intr);
-		break;
-	case nir_intrinsic_barrier:
-	case nir_intrinsic_memory_barrier:
-	case nir_intrinsic_group_memory_barrier:
-	case nir_intrinsic_memory_barrier_atomic_counter:
-	case nir_intrinsic_memory_barrier_buffer:
-	case nir_intrinsic_memory_barrier_image:
-	case nir_intrinsic_memory_barrier_shared:
-		emit_intrinsic_barrier(ctx, intr);
-		/* note that blk ptr no longer valid, make that obvious: */
-		b = NULL;
-		break;
-	case nir_intrinsic_store_output:
-		idx = nir_intrinsic_base(intr);
-		comp = nir_intrinsic_component(intr);
-		const_offset = nir_src_as_const_value(intr->src[1]);
-		compile_assert(ctx, const_offset != NULL);
-		idx += const_offset->u32[0];
-
-		src = get_src(ctx, &intr->src[0]);
-		for (int i = 0; i < intr->num_components; i++) {
-			unsigned n = idx * 4 + i + comp;
-			ctx->ir->outputs[n] = src[i];
-		}
-		break;
-	case nir_intrinsic_load_base_vertex:
-	case nir_intrinsic_load_first_vertex:
-		if (!ctx->basevertex) {
-			ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
-			add_sysval_input(ctx, SYSTEM_VALUE_FIRST_VERTEX, ctx->basevertex);
-		}
-		dst[0] = ctx->basevertex;
-		break;
-	case nir_intrinsic_load_vertex_id_zero_base:
-	case nir_intrinsic_load_vertex_id:
-		if (!ctx->vertex_id) {
-			gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ?
-				SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
-			ctx->vertex_id = create_input(ctx, 0);
-			add_sysval_input(ctx, sv, ctx->vertex_id);
-		}
-		dst[0] = ctx->vertex_id;
-		break;
-	case nir_intrinsic_load_instance_id:
-		if (!ctx->instance_id) {
-			ctx->instance_id = create_input(ctx, 0);
-			add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
-					ctx->instance_id);
-		}
-		dst[0] = ctx->instance_id;
-		break;
-	case nir_intrinsic_load_sample_id:
-	case nir_intrinsic_load_sample_id_no_per_sample:
-		if (!ctx->samp_id) {
-			ctx->samp_id = create_input(ctx, 0);
-			ctx->samp_id->regs[0]->flags |= IR3_REG_HALF;
-			add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_ID,
-					ctx->samp_id);
-		}
-		dst[0] = ir3_COV(b, ctx->samp_id, TYPE_U16, TYPE_U32);
-		break;
-	case nir_intrinsic_load_sample_mask_in:
-		if (!ctx->samp_mask_in) {
-			ctx->samp_mask_in = create_input(ctx, 0);
-			add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN,
-					ctx->samp_mask_in);
-		}
-		dst[0] = ctx->samp_mask_in;
-		break;
-	case nir_intrinsic_load_user_clip_plane:
-		idx = nir_intrinsic_ucp_id(intr);
-		for (int i = 0; i < intr->num_components; i++) {
-			unsigned n = idx * 4 + i;
-			dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
-		}
-		break;
-	case nir_intrinsic_load_front_face:
-		if (!ctx->frag_face) {
-			ctx->so->frag_face = true;
-			ctx->frag_face = create_input(ctx, 0);
-			add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face);
-			ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
-		}
-		/* for fragface, we get -1 for back and 0 for front. However this is
-		 * the inverse of what nir expects (where ~0 is true).
-		 */
-		dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32);
-		dst[0] = ir3_NOT_B(b, dst[0], 0);
-		break;
-	case nir_intrinsic_load_local_invocation_id:
-		if (!ctx->local_invocation_id) {
-			ctx->local_invocation_id = create_input_compmask(ctx, 0, 0x7);
-			add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID,
-					0x7, ctx->local_invocation_id);
-		}
-		split_dest(b, dst, ctx->local_invocation_id, 0, 3);
-		break;
-	case nir_intrinsic_load_work_group_id:
-		if (!ctx->work_group_id) {
-			ctx->work_group_id = create_input_compmask(ctx, 0, 0x7);
-			add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID,
-					0x7, ctx->work_group_id);
-			ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH;
-		}
-		split_dest(b, dst, ctx->work_group_id, 0, 3);
-		break;
-	case nir_intrinsic_load_num_work_groups:
-		for (int i = 0; i < intr->num_components; i++) {
-			dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
-		}
-		break;
-	case nir_intrinsic_load_local_group_size:
-		for (int i = 0; i < intr->num_components; i++) {
-			dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
-		}
-		break;
-	case nir_intrinsic_discard_if:
-	case nir_intrinsic_discard: {
-		struct ir3_instruction *cond, *kill;
-
-		if (intr->intrinsic == nir_intrinsic_discard_if) {
-			/* conditional discard: */
-			src = get_src(ctx, &intr->src[0]);
-			cond = ir3_b2n(b, src[0]);
-		} else {
-			/* unconditional discard: */
-			cond = create_immed(b, 1);
-		}
-
-		/* NOTE: only cmps.*.* can write p0.x: */
-		cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
-		cond->cat2.condition = IR3_COND_NE;
-
-		/* condition always goes in predicate register: */
-		cond->regs[0]->num = regid(REG_P0, 0);
-
-		kill = ir3_KILL(b, cond, 0);
-		array_insert(ctx->ir, ctx->ir->predicates, kill);
-
-		array_insert(b, b->keeps, kill);
-		ctx->so->has_kill = true;
-
-		break;
-	}
-	default:
-		compile_error(ctx, "Unhandled intrinsic type: %s\n",
-				nir_intrinsic_infos[intr->intrinsic].name);
-		break;
-	}
-
-	if (info->has_dest)
-		put_dst(ctx, &intr->dest);
-}
-
-static void
-emit_load_const(struct ir3_context *ctx, nir_load_const_instr *instr)
-{
-	struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
-			instr->def.num_components);
-	type_t type = (instr->def.bit_size < 32) ? TYPE_U16 : TYPE_U32;
-
-	for (int i = 0; i < instr->def.num_components; i++)
-		dst[i] = create_immed_typed(ctx->block, instr->value.u32[i], type);
-}
-
-static void
-emit_undef(struct ir3_context *ctx, nir_ssa_undef_instr *undef)
-{
-	struct ir3_instruction **dst = get_dst_ssa(ctx, &undef->def,
-			undef->def.num_components);
-	type_t type = (undef->def.bit_size < 32) ? TYPE_U16 : TYPE_U32;
-
-	/* backend doesn't want undefined instructions, so just plug
-	 * in 0.0..
-	 */
-	for (int i = 0; i < undef->def.num_components; i++)
-		dst[i] = create_immed_typed(ctx->block, fui(0.0), type);
-}
-
-/*
- * texture fetch/sample instructions:
- */
-
-static void
-tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
-{
-	unsigned coords, flags = 0;
-
-	/* note: would use tex->coord_components.. except txs.. also,
-	 * since array index goes after shadow ref, we don't want to
-	 * count it:
-	 */
-	switch (tex->sampler_dim) {
-	case GLSL_SAMPLER_DIM_1D:
-	case GLSL_SAMPLER_DIM_BUF:
-		coords = 1;
-		break;
-	case GLSL_SAMPLER_DIM_2D:
-	case GLSL_SAMPLER_DIM_RECT:
-	case GLSL_SAMPLER_DIM_EXTERNAL:
-	case GLSL_SAMPLER_DIM_MS:
-		coords = 2;
-		break;
-	case GLSL_SAMPLER_DIM_3D:
-	case GLSL_SAMPLER_DIM_CUBE:
-		coords = 3;
-		flags |= IR3_INSTR_3D;
-		break;
-	default:
-		unreachable("bad sampler_dim");
-	}
-
-	if (tex->is_shadow && tex->op != nir_texop_lod)
-		flags |= IR3_INSTR_S;
-
-	if (tex->is_array && tex->op != nir_texop_lod)
-		flags |= IR3_INSTR_A;
-
-	*flagsp = flags;
-	*coordsp = coords;
-}
-
-static void
-emit_tex(struct ir3_context *ctx, nir_tex_instr *tex)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
-	struct ir3_instruction * const *coord, * const *off, * const *ddx, * const *ddy;
-	struct ir3_instruction *lod, *compare, *proj, *sample_index;
-	bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
-	unsigned i, coords, flags;
-	unsigned nsrc0 = 0, nsrc1 = 0;
-	type_t type;
-	opc_t opc = 0;
-
-	coord = off = ddx = ddy = NULL;
-	lod = proj = compare = sample_index = NULL;
-
-	/* TODO: might just be one component for gathers? */
-	dst = get_dst(ctx, &tex->dest, 4);
-
-	for (unsigned i = 0; i < tex->num_srcs; i++) {
-		switch (tex->src[i].src_type) {
-		case nir_tex_src_coord:
-			coord = get_src(ctx, &tex->src[i].src);
-			break;
-		case nir_tex_src_bias:
-			lod = get_src(ctx, &tex->src[i].src)[0];
-			has_bias = true;
-			break;
-		case nir_tex_src_lod:
-			lod = get_src(ctx, &tex->src[i].src)[0];
-			has_lod = true;
-			break;
-		case nir_tex_src_comparator: /* shadow comparator */
-			compare = get_src(ctx, &tex->src[i].src)[0];
-			break;
-		case nir_tex_src_projector:
-			proj = get_src(ctx, &tex->src[i].src)[0];
-			has_proj = true;
-			break;
-		case nir_tex_src_offset:
-			off = get_src(ctx, &tex->src[i].src);
-			has_off = true;
-			break;
-		case nir_tex_src_ddx:
-			ddx = get_src(ctx, &tex->src[i].src);
-			break;
-		case nir_tex_src_ddy:
-			ddy = get_src(ctx, &tex->src[i].src);
-			break;
-		case nir_tex_src_ms_index:
-			sample_index = get_src(ctx, &tex->src[i].src)[0];
-			break;
-		default:
-			compile_error(ctx, "Unhandled NIR tex src type: %d\n",
-					tex->src[i].src_type);
-			return;
-		}
-	}
-
-	switch (tex->op) {
-	case nir_texop_tex:      opc = has_lod ? OPC_SAML : OPC_SAM; break;
-	case nir_texop_txb:      opc = OPC_SAMB;     break;
-	case nir_texop_txl:      opc = OPC_SAML;     break;
-	case nir_texop_txd:      opc = OPC_SAMGQ;    break;
-	case nir_texop_txf:      opc = OPC_ISAML;    break;
-	case nir_texop_lod:      opc = OPC_GETLOD;   break;
-	case nir_texop_tg4:
-		/* NOTE: a4xx might need to emulate gather w/ txf (this is
-		 * what blob does, seems gather  is broken?), and a3xx did
-		 * not support it (but probably could also emulate).
-		 */
-		switch (tex->component) {
-		case 0:              opc = OPC_GATHER4R; break;
-		case 1:              opc = OPC_GATHER4G; break;
-		case 2:              opc = OPC_GATHER4B; break;
-		case 3:              opc = OPC_GATHER4A; break;
-		}
-		break;
-	case nir_texop_txf_ms:   opc = OPC_ISAMM;    break;
-	case nir_texop_txs:
-	case nir_texop_query_levels:
-	case nir_texop_texture_samples:
-	case nir_texop_samples_identical:
-	case nir_texop_txf_ms_mcs:
-		compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
-		return;
-	}
-
-	tex_info(tex, &flags, &coords);
-
-	/*
-	 * lay out the first argument in the proper order:
-	 *  - actual coordinates first
-	 *  - shadow reference
-	 *  - array index
-	 *  - projection w
-	 *  - starting at offset 4, dpdx.xy, dpdy.xy
-	 *
-	 * bias/lod go into the second arg
-	 */
-
-	/* insert tex coords: */
-	for (i = 0; i < coords; i++)
-		src0[i] = coord[i];
-
-	nsrc0 = i;
-
-	/* NOTE a3xx (and possibly a4xx?) might be different, using isaml
-	 * with scaled x coord according to requested sample:
-	 */
-	if (tex->op == nir_texop_txf_ms) {
-		if (ctx->compiler->txf_ms_with_isaml) {
-			/* the samples are laid out in x dimension as
-			 *     0 1 2 3
-			 * x_ms = (x << ms) + sample_index;
-			 */
-			struct ir3_instruction *ms;
-			ms = create_immed(b, (ctx->samples >> (2 * tex->texture_index)) & 3);
-
-			src0[0] = ir3_SHL_B(b, src0[0], 0, ms, 0);
-			src0[0] = ir3_ADD_U(b, src0[0], 0, sample_index, 0);
-
-			opc = OPC_ISAML;
-		} else {
-			src0[nsrc0++] = sample_index;
-		}
-	}
-
-	/* scale up integer coords for TXF based on the LOD */
-	if (ctx->compiler->unminify_coords && (opc == OPC_ISAML)) {
-		assert(has_lod);
-		for (i = 0; i < coords; i++)
-			src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
-	}
-
-	if (coords == 1) {
-		/* hw doesn't do 1d, so we treat it as 2d with
-		 * height of 1, and patch up the y coord.
-		 * TODO: y coord should be (int)0 in some cases..
-		 */
-		src0[nsrc0++] = create_immed(b, fui(0.5));
-	}
-
-	if (tex->is_shadow && tex->op != nir_texop_lod)
-		src0[nsrc0++] = compare;
-
-	if (tex->is_array && tex->op != nir_texop_lod) {
-		struct ir3_instruction *idx = coord[coords];
-
-		/* the array coord for cube arrays needs 0.5 added to it */
-		if (ctx->compiler->array_index_add_half && (opc != OPC_ISAML))
-			idx = ir3_ADD_F(b, idx, 0, create_immed(b, fui(0.5)), 0);
-
-		src0[nsrc0++] = idx;
-	}
-
-	if (has_proj) {
-		src0[nsrc0++] = proj;
-		flags |= IR3_INSTR_P;
-	}
-
-	/* pad to 4, then ddx/ddy: */
-	if (tex->op == nir_texop_txd) {
-		while (nsrc0 < 4)
-			src0[nsrc0++] = create_immed(b, fui(0.0));
-		for (i = 0; i < coords; i++)
-			src0[nsrc0++] = ddx[i];
-		if (coords < 2)
-			src0[nsrc0++] = create_immed(b, fui(0.0));
-		for (i = 0; i < coords; i++)
-			src0[nsrc0++] = ddy[i];
-		if (coords < 2)
-			src0[nsrc0++] = create_immed(b, fui(0.0));
-	}
-
-	/*
-	 * second argument (if applicable):
-	 *  - offsets
-	 *  - lod
-	 *  - bias
-	 */
-	if (has_off | has_lod | has_bias) {
-		if (has_off) {
-			unsigned off_coords = coords;
-			if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
-				off_coords--;
-			for (i = 0; i < off_coords; i++)
-				src1[nsrc1++] = off[i];
-			if (off_coords < 2)
-				src1[nsrc1++] = create_immed(b, fui(0.0));
-			flags |= IR3_INSTR_O;
-		}
-
-		if (has_lod | has_bias)
-			src1[nsrc1++] = lod;
-	}
-
-	switch (tex->dest_type) {
-	case nir_type_invalid:
-	case nir_type_float:
-		type = TYPE_F32;
-		break;
-	case nir_type_int:
-		type = TYPE_S32;
-		break;
-	case nir_type_uint:
-	case nir_type_bool:
-		type = TYPE_U32;
-		break;
-	default:
-		unreachable("bad dest_type");
-	}
-
-	if (opc == OPC_GETLOD)
-		type = TYPE_U32;
-
-	unsigned tex_idx = tex->texture_index;
-
-	ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
-
-	struct ir3_instruction *col0 = create_collect(ctx, src0, nsrc0);
-	struct ir3_instruction *col1 = create_collect(ctx, src1, nsrc1);
-
-	sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags,
-			tex_idx, tex_idx, col0, col1);
-
-	if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) {
-		/* only need first 3 components: */
-		sam->regs[0]->wrmask = 0x7;
-		split_dest(b, dst, sam, 0, 3);
-
-		/* we need to sample the alpha separately with a non-ASTC
-		 * texture state:
-		 */
-		sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags,
-				tex_idx, tex_idx, col0, col1);
-
-		array_insert(ctx->ir, ctx->ir->astc_srgb, sam);
-
-		/* fixup .w component: */
-		split_dest(b, &dst[3], sam, 3, 1);
-	} else {
-		/* normal (non-workaround) case: */
-		split_dest(b, dst, sam, 0, 4);
-	}
-
-	/* GETLOD returns results in 4.8 fixed point */
-	if (opc == OPC_GETLOD) {
-		struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256));
-
-		compile_assert(ctx, tex->dest_type == nir_type_float);
-		for (i = 0; i < 2; i++) {
-			dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0,
-							   factor, 0);
-		}
-	}
-
-	put_dst(ctx, &tex->dest);
-}
-
-static void
-emit_tex_query_levels(struct ir3_context *ctx, nir_tex_instr *tex)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction **dst, *sam;
-
-	dst = get_dst(ctx, &tex->dest, 1);
-
-	sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, TGSI_WRITEMASK_Z, 0,
-			tex->texture_index, tex->texture_index, NULL, NULL);
-
-	/* even though there is only one component, since it ends
-	 * up in .z rather than .x, we need a split_dest()
-	 */
-	split_dest(b, dst, sam, 0, 3);
-
-	/* The # of levels comes from getinfo.z. We need to add 1 to it, since
-	 * the value in TEX_CONST_0 is zero-based.
-	 */
-	if (ctx->compiler->levels_add_one)
-		dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0);
-
-	put_dst(ctx, &tex->dest);
-}
-
-static void
-emit_tex_txs(struct ir3_context *ctx, nir_tex_instr *tex)
-{
-	struct ir3_block *b = ctx->block;
-	struct ir3_instruction **dst, *sam;
-	struct ir3_instruction *lod;
-	unsigned flags, coords;
-
-	tex_info(tex, &flags, &coords);
-
-	/* Actually we want the number of dimensions, not coordinates. This
-	 * distinction only matters for cubes.
-	 */
-	if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
-		coords = 2;
-
-	dst = get_dst(ctx, &tex->dest, 4);
-
-	compile_assert(ctx, tex->num_srcs == 1);
-	compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod);
-
-	lod = get_src(ctx, &tex->src[0].src)[0];
-
-	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
-			tex->texture_index, tex->texture_index, lod, NULL);
-
-	split_dest(b, dst, sam, 0, 4);
-
-	/* Array size actually ends up in .w rather than .z. This doesn't
-	 * matter for miplevel 0, but for higher mips the value in z is
-	 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
-	 * returned, which means that we have to add 1 to it for arrays.
-	 */
-	if (tex->is_array) {
-		if (ctx->compiler->levels_add_one) {
-			dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0);
-		} else {
-			dst[coords] = ir3_MOV(b, dst[3], TYPE_U32);
-		}
-	}
-
-	put_dst(ctx, &tex->dest);
-}
-
-static void
-emit_jump(struct ir3_context *ctx, nir_jump_instr *jump)
-{
-	switch (jump->type) {
-	case nir_jump_break:
-	case nir_jump_continue:
-	case nir_jump_return:
-		/* I *think* we can simply just ignore this, and use the
-		 * successor block link to figure out where we need to
-		 * jump to for break/continue
-		 */
-		break;
-	default:
-		compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type);
-		break;
-	}
-}
-
-static void
-emit_instr(struct ir3_context *ctx, nir_instr *instr)
-{
-	switch (instr->type) {
-	case nir_instr_type_alu:
-		emit_alu(ctx, nir_instr_as_alu(instr));
-		break;
-	case nir_instr_type_deref:
-		/* ignored, handled as part of the intrinsic they are src to */
-		break;
-	case nir_instr_type_intrinsic:
-		emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
-		break;
-	case nir_instr_type_load_const:
-		emit_load_const(ctx, nir_instr_as_load_const(instr));
-		break;
-	case nir_instr_type_ssa_undef:
-		emit_undef(ctx, nir_instr_as_ssa_undef(instr));
-		break;
-	case nir_instr_type_tex: {
-		nir_tex_instr *tex = nir_instr_as_tex(instr);
-		/* couple tex instructions get special-cased:
-		 */
-		switch (tex->op) {
-		case nir_texop_txs:
-			emit_tex_txs(ctx, tex);
-			break;
-		case nir_texop_query_levels:
-			emit_tex_query_levels(ctx, tex);
-			break;
-		default:
-			emit_tex(ctx, tex);
-			break;
-		}
-		break;
-	}
-	case nir_instr_type_jump:
-		emit_jump(ctx, nir_instr_as_jump(instr));
-		break;
-	case nir_instr_type_phi:
-		/* we have converted phi webs to regs in NIR by now */
-		compile_error(ctx, "Unexpected NIR instruction type: %d\n", instr->type);
-		break;
-	case nir_instr_type_call:
-	case nir_instr_type_parallel_copy:
-		compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
-		break;
-	}
-}
-
-static struct ir3_block *
-get_block(struct ir3_context *ctx, const nir_block *nblock)
-{
-	struct ir3_block *block;
-	struct hash_entry *hentry;
-	unsigned i;
-
-	hentry = _mesa_hash_table_search(ctx->block_ht, nblock);
-	if (hentry)
-		return hentry->data;
-
-	block = ir3_block_create(ctx->ir);
-	block->nblock = nblock;
-	_mesa_hash_table_insert(ctx->block_ht, nblock, block);
-
-	block->predecessors_count = nblock->predecessors->entries;
-	block->predecessors = ralloc_array_size(block,
-		sizeof(block->predecessors[0]), block->predecessors_count);
-	i = 0;
-	set_foreach(nblock->predecessors, sentry) {
-		block->predecessors[i++] = get_block(ctx, sentry->key);
-	}
-
-	return block;
-}
-
-static void
-emit_block(struct ir3_context *ctx, nir_block *nblock)
-{
-	struct ir3_block *block = get_block(ctx, nblock);
-
-	for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
-		if (nblock->successors[i]) {
-			block->successors[i] =
-				get_block(ctx, nblock->successors[i]);
-		}
-	}
-
-	ctx->block = block;
-	list_addtail(&block->node, &ctx->ir->block_list);
-
-	/* re-emit addr register in each block if needed: */
-	for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) {
-		_mesa_hash_table_destroy(ctx->addr_ht[i], NULL);
-		ctx->addr_ht[i] = NULL;
-	}
-
-	nir_foreach_instr(instr, nblock) {
-		ctx->cur_instr = instr;
-		emit_instr(ctx, instr);
-		ctx->cur_instr = NULL;
-		if (ctx->error)
-			return;
-	}
-}
-
-static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list);
-
-static void
-emit_if(struct ir3_context *ctx, nir_if *nif)
-{
-	struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0];
-
-	ctx->block->condition =
-		get_predicate(ctx, ir3_b2n(condition->block, condition));
-
-	emit_cf_list(ctx, &nif->then_list);
-	emit_cf_list(ctx, &nif->else_list);
-}
-
-static void
-emit_loop(struct ir3_context *ctx, nir_loop *nloop)
-{
-	emit_cf_list(ctx, &nloop->body);
-}
-
-static void
-emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
-{
-	foreach_list_typed(nir_cf_node, node, node, list) {
-		switch (node->type) {
-		case nir_cf_node_block:
-			emit_block(ctx, nir_cf_node_as_block(node));
-			break;
-		case nir_cf_node_if:
-			emit_if(ctx, nir_cf_node_as_if(node));
-			break;
-		case nir_cf_node_loop:
-			emit_loop(ctx, nir_cf_node_as_loop(node));
-			break;
-		case nir_cf_node_function:
-			compile_error(ctx, "TODO\n");
-			break;
-		}
-	}
-}
-
-/* emit stream-out code.  At this point, the current block is the original
- * (nir) end block, and nir ensures that all flow control paths terminate
- * into the end block.  We re-purpose the original end block to generate
- * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
- * block holding stream-out write instructions, followed by the new end
- * block:
- *
- *   blockOrigEnd {
- *      p0.x = (vtxcnt < maxvtxcnt)
- *      // succs: blockStreamOut, blockNewEnd
- *   }
- *   blockStreamOut {
- *      ... stream-out instructions ...
- *      // succs: blockNewEnd
- *   }
- *   blockNewEnd {
- *   }
- */
-static void
-emit_stream_out(struct ir3_context *ctx)
-{
-	struct ir3_shader_variant *v = ctx->so;
-	struct ir3 *ir = ctx->ir;
-	struct pipe_stream_output_info *strmout =
-			&ctx->so->shader->stream_output;
-	struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
-	struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
-	struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS];
-
-	/* create vtxcnt input in input block at top of shader,
-	 * so that it is seen as live over the entire duration
-	 * of the shader:
-	 */
-	vtxcnt = create_input(ctx, 0);
-	add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt);
-
-	maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
-
-	/* at this point, we are at the original 'end' block,
-	 * re-purpose this block to stream-out condition, then
-	 * append stream-out block and new-end block
-	 */
-	orig_end_block = ctx->block;
-
-// TODO these blocks need to update predecessors..
-// maybe w/ store_global intrinsic, we could do this
-// stuff in nir->nir pass
-
-	stream_out_block = ir3_block_create(ir);
-	list_addtail(&stream_out_block->node, &ir->block_list);
-
-	new_end_block = ir3_block_create(ir);
-	list_addtail(&new_end_block->node, &ir->block_list);
-
-	orig_end_block->successors[0] = stream_out_block;
-	orig_end_block->successors[1] = new_end_block;
-	stream_out_block->successors[0] = new_end_block;
-
-	/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
-	cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
-	cond->regs[0]->num = regid(REG_P0, 0);
-	cond->cat2.condition = IR3_COND_LT;
-
-	/* condition goes on previous block to the conditional,
-	 * since it is used to pick which of the two successor
-	 * paths to take:
-	 */
-	orig_end_block->condition = cond;
-
-	/* switch to stream_out_block to generate the stream-out
-	 * instructions:
-	 */
-	ctx->block = stream_out_block;
-
-	/* Calculate base addresses based on vtxcnt.  Instructions
-	 * generated for bases not used in following loop will be
-	 * stripped out in the backend.
-	 */
-	for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
-		unsigned stride = strmout->stride[i];
-		struct ir3_instruction *base, *off;
-
-		base = create_uniform(ctx, regid(v->constbase.tfbo, i));
-
-		/* 24-bit should be enough: */
-		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
-				create_immed(ctx->block, stride * 4), 0);
-
-		bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
-	}
-
-	/* Generate the per-output store instructions: */
-	for (unsigned i = 0; i < strmout->num_outputs; i++) {
-		for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
-			unsigned c = j + strmout->output[i].start_component;
-			struct ir3_instruction *base, *out, *stg;
-
-			base = bases[strmout->output[i].output_buffer];
-			out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
-
-			stg = ir3_STG(ctx->block, base, 0, out, 0,
-					create_immed(ctx->block, 1), 0);
-			stg->cat6.type = TYPE_U32;
-			stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
-
-			array_insert(ctx->block, ctx->block->keeps, stg);
-		}
-	}
-
-	/* and finally switch to the new_end_block: */
-	ctx->block = new_end_block;
-}
-
-static void
-emit_function(struct ir3_context *ctx, nir_function_impl *impl)
-{
-	nir_metadata_require(impl, nir_metadata_block_index);
-
-	emit_cf_list(ctx, &impl->body);
-	emit_block(ctx, impl->end_block);
-
-	/* at this point, we should have a single empty block,
-	 * into which we emit the 'end' instruction.
-	 */
-	compile_assert(ctx, list_empty(&ctx->block->instr_list));
-
-	/* If stream-out (aka transform-feedback) enabled, emit the
-	 * stream-out instructions, followed by a new empty block (into
-	 * which the 'end' instruction lands).
-	 *
-	 * NOTE: it is done in this order, rather than inserting before
-	 * we emit end_block, because NIR guarantees that all blocks
-	 * flow into end_block, and that end_block has no successors.
-	 * So by re-purposing end_block as the first block of stream-
-	 * out, we guarantee that all exit paths flow into the stream-
-	 * out instructions.
-	 */
-	if ((ctx->compiler->gpu_id < 500) &&
-			(ctx->so->shader->stream_output.num_outputs > 0) &&
-			!ctx->so->binning_pass) {
-		debug_assert(ctx->so->type == SHADER_VERTEX);
-		emit_stream_out(ctx);
-	}
-
-	ir3_END(ctx->block);
-}
-
-static struct ir3_instruction *
-create_frag_coord(struct ir3_context *ctx, unsigned comp)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *instr;
-
-	if (!ctx->frag_coord) {
-		ctx->frag_coord = create_input_compmask(ctx, 0, 0xf);
-		/* defer add_sysval_input() until after all inputs created */
-	}
-
-	split_dest(block, &instr, ctx->frag_coord, comp, 1);
-
-	switch (comp) {
-	case 0: /* .x */
-	case 1: /* .y */
-		/* for frag_coord, we get unsigned values.. we need
-		 * to subtract (integer) 8 and divide by 16 (right-
-		 * shift by 4) then convert to float:
-		 *
-		 *    sub.s tmp, src, 8
-		 *    shr.b tmp, tmp, 4
-		 *    mov.u32f32 dst, tmp
-		 *
-		 */
-		instr = ir3_SUB_S(block, instr, 0,
-				create_immed(block, 8), 0);
-		instr = ir3_SHR_B(block, instr, 0,
-				create_immed(block, 4), 0);
-		instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
-
-		return instr;
-	case 2: /* .z */
-	case 3: /* .w */
-	default:
-		/* seems that we can use these as-is: */
-		return instr;
-	}
-}
-
-static void
-setup_input(struct ir3_context *ctx, nir_variable *in)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned array_len = MAX2(glsl_get_length(in->type), 1);
-	unsigned ncomp = glsl_get_components(in->type);
-	unsigned n = in->data.driver_location;
-	unsigned slot = in->data.location;
-
-	DBG("; in: slot=%u, len=%ux%u, drvloc=%u",
-			slot, array_len, ncomp, n);
-
-	/* let's pretend things other than vec4 don't exist: */
-	ncomp = MAX2(ncomp, 4);
-
-	/* skip unread inputs, we could end up with (for example), unsplit
-	 * matrix/etc inputs in the case they are not read, so just silently
-	 * skip these.
-	 */
-	if (ncomp > 4)
-		return;
-
-	compile_assert(ctx, ncomp == 4);
-
-	so->inputs[n].slot = slot;
-	so->inputs[n].compmask = (1 << ncomp) - 1;
-	so->inputs_count = MAX2(so->inputs_count, n + 1);
-	so->inputs[n].interpolate = in->data.interpolation;
-
-	if (ctx->so->type == SHADER_FRAGMENT) {
-		for (int i = 0; i < ncomp; i++) {
-			struct ir3_instruction *instr = NULL;
-			unsigned idx = (n * 4) + i;
-
-			if (slot == VARYING_SLOT_POS) {
-				so->inputs[n].bary = false;
-				so->frag_coord = true;
-				instr = create_frag_coord(ctx, i);
-			} else if (slot == VARYING_SLOT_PNTC) {
-				/* see for example st_nir_fixup_varying_slots().. this is
-				 * maybe a bit mesa/st specific.  But we need things to line
-				 * up for this in fdN_program:
-				 *    unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
-				 *    if (emit->sprite_coord_enable & texmask) {
-				 *       ...
-				 *    }
-				 */
-				so->inputs[n].slot = VARYING_SLOT_VAR8;
-				so->inputs[n].bary = true;
-				instr = create_frag_input(ctx, false);
-			} else {
-				bool use_ldlv = false;
-
-				/* detect the special case for front/back colors where
-				 * we need to do flat vs smooth shading depending on
-				 * rast state:
-				 */
-				if (in->data.interpolation == INTERP_MODE_NONE) {
-					switch (slot) {
-					case VARYING_SLOT_COL0:
-					case VARYING_SLOT_COL1:
-					case VARYING_SLOT_BFC0:
-					case VARYING_SLOT_BFC1:
-						so->inputs[n].rasterflat = true;
-						break;
-					default:
-						break;
-					}
-				}
-
-				if (ctx->compiler->flat_bypass) {
-					if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
-							(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
-						use_ldlv = true;
-				}
-
-				so->inputs[n].bary = true;
-
-				instr = create_frag_input(ctx, use_ldlv);
-			}
-
-			compile_assert(ctx, idx < ctx->ir->ninputs);
-
-			ctx->ir->inputs[idx] = instr;
-		}
-	} else if (ctx->so->type == SHADER_VERTEX) {
-		for (int i = 0; i < ncomp; i++) {
-			unsigned idx = (n * 4) + i;
-			compile_assert(ctx, idx < ctx->ir->ninputs);
-			ctx->ir->inputs[idx] = create_input(ctx, idx);
-		}
-	} else {
-		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
-	}
-
-	if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
-		so->total_in += ncomp;
-	}
-}
-
-static void
-setup_output(struct ir3_context *ctx, nir_variable *out)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned array_len = MAX2(glsl_get_length(out->type), 1);
-	unsigned ncomp = glsl_get_components(out->type);
-	unsigned n = out->data.driver_location;
-	unsigned slot = out->data.location;
-	unsigned comp = 0;
-
-	DBG("; out: slot=%u, len=%ux%u, drvloc=%u",
-			slot, array_len, ncomp, n);
-
-	/* let's pretend things other than vec4 don't exist: */
-	ncomp = MAX2(ncomp, 4);
-	compile_assert(ctx, ncomp == 4);
-
-	if (ctx->so->type == SHADER_FRAGMENT) {
-		switch (slot) {
-		case FRAG_RESULT_DEPTH:
-			comp = 2;  /* tgsi will write to .z component */
-			so->writes_pos = true;
-			break;
-		case FRAG_RESULT_COLOR:
-			so->color0_mrt = 1;
-			break;
-		default:
-			if (slot >= FRAG_RESULT_DATA0)
-				break;
-			compile_error(ctx, "unknown FS output name: %s\n",
-					gl_frag_result_name(slot));
-		}
-	} else if (ctx->so->type == SHADER_VERTEX) {
-		switch (slot) {
-		case VARYING_SLOT_POS:
-			so->writes_pos = true;
-			break;
-		case VARYING_SLOT_PSIZ:
-			so->writes_psize = true;
-			break;
-		case VARYING_SLOT_COL0:
-		case VARYING_SLOT_COL1:
-		case VARYING_SLOT_BFC0:
-		case VARYING_SLOT_BFC1:
-		case VARYING_SLOT_FOGC:
-		case VARYING_SLOT_CLIP_DIST0:
-		case VARYING_SLOT_CLIP_DIST1:
-		case VARYING_SLOT_CLIP_VERTEX:
-			break;
-		default:
-			if (slot >= VARYING_SLOT_VAR0)
-				break;
-			if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7))
-				break;
-			compile_error(ctx, "unknown VS output name: %s\n",
-					gl_varying_slot_name(slot));
-		}
-	} else {
-		compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
-	}
-
-	compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
-
-	so->outputs[n].slot = slot;
-	so->outputs[n].regid = regid(n, comp);
-	so->outputs_count = MAX2(so->outputs_count, n + 1);
-
-	for (int i = 0; i < ncomp; i++) {
-		unsigned idx = (n * 4) + i;
-		compile_assert(ctx, idx < ctx->ir->noutputs);
-		ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
-	}
-}
-
-static int
-max_drvloc(struct exec_list *vars)
-{
-	int drvloc = -1;
-	nir_foreach_variable(var, vars) {
-		drvloc = MAX2(drvloc, (int)var->data.driver_location);
-	}
-	return drvloc;
-}
-
-static const unsigned max_sysvals[SHADER_MAX] = {
-	[SHADER_FRAGMENT] = 24,  // TODO
-	[SHADER_VERTEX]  = 16,
-	[SHADER_COMPUTE] = 16, // TODO how many do we actually need?
-};
-
-static void
-emit_instructions(struct ir3_context *ctx)
-{
-	unsigned ninputs, noutputs;
-	nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
-
-	ninputs  = (max_drvloc(&ctx->s->inputs) + 1) * 4;
-	noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4;
-
-	/* we need to leave room for sysvals:
-	 */
-	ninputs += max_sysvals[ctx->so->type];
-
-	ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
-
-	/* Create inputs in first block: */
-	ctx->block = get_block(ctx, nir_start_block(fxn));
-	ctx->in_block = ctx->block;
-	list_addtail(&ctx->block->node, &ctx->ir->block_list);
-
-	ninputs -= max_sysvals[ctx->so->type];
-
-	/* for fragment shader, the vcoord input register is used as the
-	 * base for bary.f varying fetch instrs:
-	 */
-	struct ir3_instruction *vcoord = NULL;
-	if (ctx->so->type == SHADER_FRAGMENT) {
-		struct ir3_instruction *xy[2];
-
-		vcoord = create_input_compmask(ctx, 0, 0x3);
-		split_dest(ctx->block, xy, vcoord, 0, 2);
-
-		ctx->frag_vcoord = create_collect(ctx, xy, 2);
-	}
-
-	/* Setup inputs: */
-	nir_foreach_variable(var, &ctx->s->inputs) {
-		setup_input(ctx, var);
-	}
-
-	/* Defer add_sysval_input() stuff until after setup_inputs(),
-	 * because sysvals need to be appended after varyings:
-	 */
-	if (vcoord) {
-		add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD,
-				0x3, vcoord);
-	}
-
-	if (ctx->frag_coord) {
-		add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD,
-				0xf, ctx->frag_coord);
-	}
-
-	/* Setup outputs: */
-	nir_foreach_variable(var, &ctx->s->outputs) {
-		setup_output(ctx, var);
-	}
-
-	/* Setup registers (which should only be arrays): */
-	nir_foreach_register(reg, &ctx->s->registers) {
-		declare_array(ctx, reg);
-	}
-
-	/* NOTE: need to do something more clever when we support >1 fxn */
-	nir_foreach_register(reg, &fxn->registers) {
-		declare_array(ctx, reg);
-	}
-	/* And emit the body: */
-	ctx->impl = fxn;
-	emit_function(ctx, fxn);
-}
-
-/* from NIR perspective, we actually have varying inputs.  But the varying
- * inputs, from an IR standpoint, are just bary.f/ldlv instructions.  The
- * only actual inputs are the sysvals.
- */
-static void
-fixup_frag_inputs(struct ir3_context *ctx)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	struct ir3 *ir = ctx->ir;
-	unsigned i = 0;
-
-	/* sysvals should appear at the end of the inputs, drop everything else: */
-	while ((i < so->inputs_count) && !so->inputs[i].sysval)
-		i++;
-
-	/* at IR level, inputs are always blocks of 4 scalars: */
-	i *= 4;
-
-	ir->inputs = &ir->inputs[i];
-	ir->ninputs -= i;
-}
-
-/* Fixup tex sampler state for astc/srgb workaround instructions.  We
- * need to assign the tex state indexes for these after we know the
- * max tex index.
- */
-static void
-fixup_astc_srgb(struct ir3_context *ctx)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	/* indexed by original tex idx, value is newly assigned alpha sampler
-	 * state tex idx.  Zero is invalid since there is at least one sampler
-	 * if we get here.
-	 */
-	unsigned alt_tex_state[16] = {0};
-	unsigned tex_idx = ctx->max_texture_index + 1;
-	unsigned idx = 0;
-
-	so->astc_srgb.base = tex_idx;
-
-	for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) {
-		struct ir3_instruction *sam = ctx->ir->astc_srgb[i];
-
-		compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state));
-
-		if (alt_tex_state[sam->cat5.tex] == 0) {
-			/* assign new alternate/alpha tex state slot: */
-			alt_tex_state[sam->cat5.tex] = tex_idx++;
-			so->astc_srgb.orig_idx[idx++] = sam->cat5.tex;
-			so->astc_srgb.count++;
-		}
-
-		sam->cat5.tex = alt_tex_state[sam->cat5.tex];
-	}
-}
-
-static void
-fixup_binning_pass(struct ir3_context *ctx)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	struct ir3 *ir = ctx->ir;
-	unsigned i, j;
-
-	for (i = 0, j = 0; i < so->outputs_count; i++) {
-		unsigned slot = so->outputs[i].slot;
-
-		/* throw away everything but first position/psize */
-		if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) {
-			if (i != j) {
-				so->outputs[j] = so->outputs[i];
-				ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
-				ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
-				ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
-				ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
-			}
-			j++;
-		}
-	}
-	so->outputs_count = j;
-	ir->noutputs = j * 4;
-}
-
-int
-ir3_compile_shader_nir(struct ir3_compiler *compiler,
-		struct ir3_shader_variant *so)
-{
-	struct ir3_context *ctx;
-	struct ir3 *ir;
-	struct ir3_instruction **inputs;
-	unsigned i, actual_in, inloc;
-	int ret = 0, max_bary;
-
-	assert(!so->ir);
-
-	ctx = compile_init(compiler, so);
-	if (!ctx) {
-		DBG("INIT failed!");
-		ret = -1;
-		goto out;
-	}
-
-	emit_instructions(ctx);
-
-	if (ctx->error) {
-		DBG("EMIT failed!");
-		ret = -1;
-		goto out;
-	}
-
-	ir = so->ir = ctx->ir;
-
-	/* keep track of the inputs from TGSI perspective.. */
-	inputs = ir->inputs;
-
-	/* but fixup actual inputs for frag shader: */
-	if (so->type == SHADER_FRAGMENT)
-		fixup_frag_inputs(ctx);
-
-	/* at this point, for binning pass, throw away unneeded outputs: */
-	if (so->binning_pass && (ctx->compiler->gpu_id < 600))
-		fixup_binning_pass(ctx);
-
-	/* if we want half-precision outputs, mark the output registers
-	 * as half:
-	 */
-	if (so->key.half_precision) {
-		for (i = 0; i < ir->noutputs; i++) {
-			struct ir3_instruction *out = ir->outputs[i];
-
-			if (!out)
-				continue;
-
-			/* if frag shader writes z, that needs to be full precision: */
-			if (so->outputs[i/4].slot == FRAG_RESULT_DEPTH)
-				continue;
-
-			out->regs[0]->flags |= IR3_REG_HALF;
-			/* output could be a fanout (ie. texture fetch output)
-			 * in which case we need to propagate the half-reg flag
-			 * up to the definer so that RA sees it:
-			 */
-			if (out->opc == OPC_META_FO) {
-				out = out->regs[1]->instr;
-				out->regs[0]->flags |= IR3_REG_HALF;
-			}
-
-			if (out->opc == OPC_MOV) {
-				out->cat1.dst_type = half_type(out->cat1.dst_type);
-			}
-		}
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("BEFORE CP:\n");
-		ir3_print(ir);
-	}
-
-	ir3_cp(ir, so);
-
-	/* at this point, for binning pass, throw away unneeded outputs:
-	 * Note that for a6xx and later, we do this after ir3_cp to ensure
-	 * that the uniform/constant layout for BS and VS matches, so that
-	 * we can re-use same VS_CONST state group.
-	 */
-	if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
-		fixup_binning_pass(ctx);
-
-	/* Insert mov if there's same instruction for each output.
-	 * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
-	 */
-	for (int i = ir->noutputs - 1; i >= 0; i--) {
-		if (!ir->outputs[i])
-			continue;
-		for (unsigned j = 0; j < i; j++) {
-			if (ir->outputs[i] == ir->outputs[j]) {
-				ir->outputs[i] =
-					ir3_MOV(ir->outputs[i]->block, ir->outputs[i], TYPE_F32);
-			}
-		}
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("BEFORE GROUPING:\n");
-		ir3_print(ir);
-	}
-
-	ir3_sched_add_deps(ir);
-
-	/* Group left/right neighbors, inserting mov's where needed to
-	 * solve conflicts:
-	 */
-	ir3_group(ir);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER GROUPING:\n");
-		ir3_print(ir);
-	}
-
-	ir3_depth(ir);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER DEPTH:\n");
-		ir3_print(ir);
-	}
-
-	ret = ir3_sched(ir);
-	if (ret) {
-		DBG("SCHED failed!");
-		goto out;
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER SCHED:\n");
-		ir3_print(ir);
-	}
-
-	ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
-	if (ret) {
-		DBG("RA failed!");
-		goto out;
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER RA:\n");
-		ir3_print(ir);
-	}
-
-	/* fixup input/outputs: */
-	for (i = 0; i < so->outputs_count; i++) {
-		so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
-	}
-
-	/* Note that some or all channels of an input may be unused: */
-	actual_in = 0;
-	inloc = 0;
-	for (i = 0; i < so->inputs_count; i++) {
-		unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0;
-		so->inputs[i].ncomp = 0;
-		so->inputs[i].inloc = inloc;
-		for (j = 0; j < 4; j++) {
-			struct ir3_instruction *in = inputs[(i*4) + j];
-			if (in && !(in->flags & IR3_INSTR_UNUSED)) {
-				compmask |= (1 << j);
-				reg = in->regs[0]->num - j;
-				actual_in++;
-				so->inputs[i].ncomp++;
-				if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) {
-					/* assign inloc: */
-					assert(in->regs[1]->flags & IR3_REG_IMMED);
-					in->regs[1]->iim_val = inloc + j;
-					maxcomp = j + 1;
-				}
-			}
-		}
-		if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) {
-			so->varying_in++;
-			so->inputs[i].compmask = (1 << maxcomp) - 1;
-			inloc += maxcomp;
-		} else if (!so->inputs[i].sysval) {
-			so->inputs[i].compmask = compmask;
-		}
-		so->inputs[i].regid = reg;
-	}
-
-	if (ctx->astc_srgb)
-		fixup_astc_srgb(ctx);
-
-	/* We need to do legalize after (for frag shader's) the "bary.f"
-	 * offsets (inloc) have been assigned.
-	 */
-	ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER LEGALIZE:\n");
-		ir3_print(ir);
-	}
-
-	/* Note that actual_in counts inputs that are not bary.f'd for FS: */
-	if (so->type == SHADER_VERTEX)
-		so->total_in = actual_in;
-	else
-		so->total_in = max_bary + 1;
-
-out:
-	if (ret) {
-		if (so->ir)
-			ir3_destroy(so->ir);
-		so->ir = NULL;
-	}
-	compile_free(ctx);
-
-	return ret;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cp.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cp.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cp.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cp.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,653 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "freedreno_util.h"
-
-#include "ir3.h"
-#include "ir3_shader.h"
-
-/*
- * Copy Propagate:
- */
-
-struct ir3_cp_ctx {
-	struct ir3 *shader;
-	struct ir3_shader_variant *so;
-	unsigned immediate_idx;
-};
-
-/* is it a type preserving mov, with ok flags? */
-static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
-{
-	if (is_same_type_mov(instr)) {
-		struct ir3_register *dst = instr->regs[0];
-		struct ir3_register *src = instr->regs[1];
-		struct ir3_instruction *src_instr = ssa(src);
-
-		/* only if mov src is SSA (not const/immed): */
-		if (!src_instr)
-			return false;
-
-		/* no indirect: */
-		if (dst->flags & IR3_REG_RELATIV)
-			return false;
-		if (src->flags & IR3_REG_RELATIV)
-			return false;
-
-		if (src->flags & IR3_REG_ARRAY)
-			return false;
-
-		if (!allow_flags)
-			if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
-					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
-				return false;
-
-		/* TODO: remove this hack: */
-		if (src_instr->opc == OPC_META_FO)
-			return false;
-
-		return true;
-	}
-	return false;
-}
-
-static unsigned cp_flags(unsigned flags)
-{
-	/* only considering these flags (at least for now): */
-	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
-			IR3_REG_FNEG | IR3_REG_FABS |
-			IR3_REG_SNEG | IR3_REG_SABS |
-			IR3_REG_BNOT | IR3_REG_RELATIV);
-	return flags;
-}
-
-static bool valid_flags(struct ir3_instruction *instr, unsigned n,
-		unsigned flags)
-{
-	unsigned valid_flags;
-	flags = cp_flags(flags);
-
-	/* If destination is indirect, then source cannot be.. at least
-	 * I don't think so..
-	 */
-	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
-			(flags & IR3_REG_RELATIV))
-		return false;
-
-	/* TODO it seems to *mostly* work to cp RELATIV, except we get some
-	 * intermittent piglit variable-indexing fails.  Newer blob driver
-	 * doesn't seem to cp these.  Possibly this is hw workaround?  Not
-	 * sure, but until that is understood better, lets just switch off
-	 * cp for indirect src's:
-	 */
-	if (flags & IR3_REG_RELATIV)
-		return false;
-
-	switch (opc_cat(instr->opc)) {
-	case 1:
-		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
-		if (flags & ~valid_flags)
-			return false;
-		break;
-	case 2:
-		valid_flags = ir3_cat2_absneg(instr->opc) |
-				IR3_REG_CONST | IR3_REG_RELATIV;
-
-		if (ir3_cat2_int(instr->opc))
-			valid_flags |= IR3_REG_IMMED;
-
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
-			unsigned m = (n ^ 1) + 1;
-			/* cannot deal w/ const in both srcs:
-			 * (note that some cat2 actually only have a single src)
-			 */
-			if (m < instr->regs_count) {
-				struct ir3_register *reg = instr->regs[m];
-				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
-					return false;
-				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
-					return false;
-			}
-			/* cannot be const + ABS|NEG: */
-			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
-					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
-				return false;
-		}
-		break;
-	case 3:
-		valid_flags = ir3_cat3_absneg(instr->opc) |
-				IR3_REG_CONST | IR3_REG_RELATIV;
-
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
-			/* cannot deal w/ const/relativ in 2nd src: */
-			if (n == 1)
-				return false;
-		}
-
-		if (flags & IR3_REG_CONST) {
-			/* cannot be const + ABS|NEG: */
-			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
-					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
-				return false;
-		}
-		break;
-	case 4:
-		/* seems like blob compiler avoids const as src.. */
-		/* TODO double check if this is still the case on a4xx */
-		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
-			return false;
-		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
-			return false;
-		break;
-	case 5:
-		/* no flags allowed */
-		if (flags)
-			return false;
-		break;
-	case 6:
-		valid_flags = IR3_REG_IMMED;
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & IR3_REG_IMMED) {
-			/* doesn't seem like we can have immediate src for store
-			 * instructions:
-			 *
-			 * TODO this restriction could also apply to load instructions,
-			 * but for load instructions this arg is the address (and not
-			 * really sure any good way to test a hard-coded immed addr src)
-			 */
-			if (is_store(instr) && (n == 1))
-				return false;
-
-			if ((instr->opc == OPC_LDL) && (n != 1))
-				return false;
-
-			if ((instr->opc == OPC_STL) && (n != 2))
-				return false;
-
-			/* disallow CP into anything but the SSBO slot argument for
-			 * atomics:
-			 */
-			if (is_atomic(instr->opc) && (n != 0))
-				return false;
-
-			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
-				return false;
-		}
-
-		break;
-	}
-
-	return true;
-}
-
-/* propagate register flags from src to dst.. negates need special
- * handling to cancel each other out.
- */
-static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
-{
-	unsigned srcflags = src->regs[1]->flags;
-
-	/* if what we are combining into already has (abs) flags,
-	 * we can drop (neg) from src:
-	 */
-	if (*dstflags & IR3_REG_FABS)
-		srcflags &= ~IR3_REG_FNEG;
-	if (*dstflags & IR3_REG_SABS)
-		srcflags &= ~IR3_REG_SNEG;
-
-	if (srcflags & IR3_REG_FABS)
-		*dstflags |= IR3_REG_FABS;
-	if (srcflags & IR3_REG_SABS)
-		*dstflags |= IR3_REG_SABS;
-	if (srcflags & IR3_REG_FNEG)
-		*dstflags ^= IR3_REG_FNEG;
-	if (srcflags & IR3_REG_SNEG)
-		*dstflags ^= IR3_REG_SNEG;
-	if (srcflags & IR3_REG_BNOT)
-		*dstflags ^= IR3_REG_BNOT;
-
-	*dstflags &= ~IR3_REG_SSA;
-	*dstflags |= srcflags & IR3_REG_SSA;
-	*dstflags |= srcflags & IR3_REG_CONST;
-	*dstflags |= srcflags & IR3_REG_IMMED;
-	*dstflags |= srcflags & IR3_REG_RELATIV;
-	*dstflags |= srcflags & IR3_REG_ARRAY;
-
-	/* if src of the src is boolean we can drop the (abs) since we know
-	 * the source value is already a postitive integer.  This cleans
-	 * up the absnegs that get inserted when converting between nir and
-	 * native boolean (see ir3_b2n/n2b)
-	 */
-	struct ir3_instruction *srcsrc = ssa(src->regs[1]);
-	if (srcsrc && is_bool(srcsrc))
-		*dstflags &= ~IR3_REG_SABS;
-}
-
-static struct ir3_register *
-lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
-{
-	unsigned swiz, idx, i;
-
-	reg = ir3_reg_clone(ctx->shader, reg);
-
-	/* in some cases, there are restrictions on (abs)/(neg) plus const..
-	 * so just evaluate those and clear the flags:
-	 */
-	if (new_flags & IR3_REG_SABS) {
-		reg->iim_val = abs(reg->iim_val);
-		new_flags &= ~IR3_REG_SABS;
-	}
-
-	if (new_flags & IR3_REG_FABS) {
-		reg->fim_val = fabs(reg->fim_val);
-		new_flags &= ~IR3_REG_FABS;
-	}
-
-	if (new_flags & IR3_REG_SNEG) {
-		reg->iim_val = -reg->iim_val;
-		new_flags &= ~IR3_REG_SNEG;
-	}
-
-	if (new_flags & IR3_REG_FNEG) {
-		reg->fim_val = -reg->fim_val;
-		new_flags &= ~IR3_REG_FNEG;
-	}
-
-	/* Reallocate for 4 more elements whenever it's necessary */
-	if (ctx->immediate_idx == ctx->so->immediates_size * 4) {
-		ctx->so->immediates_size += 4;
-		ctx->so->immediates = realloc (ctx->so->immediates,
-			ctx->so->immediates_size * sizeof (ctx->so->immediates[0]));
-	}
-
-	for (i = 0; i < ctx->immediate_idx; i++) {
-		swiz = i % 4;
-		idx  = i / 4;
-
-		if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
-			break;
-		}
-	}
-
-	if (i == ctx->immediate_idx) {
-		/* need to generate a new immediate: */
-		swiz = i % 4;
-		idx  = i / 4;
-		ctx->so->immediates[idx].val[swiz] = reg->uim_val;
-		ctx->so->immediates_count = idx + 1;
-		ctx->immediate_idx++;
-	}
-
-	new_flags &= ~IR3_REG_IMMED;
-	new_flags |= IR3_REG_CONST;
-	reg->flags = new_flags;
-	reg->num = i + (4 * ctx->so->constbase.immediate);
-
-	return reg;
-}
-
-static void
-unuse(struct ir3_instruction *instr)
-{
-	debug_assert(instr->use_count > 0);
-
-	if (--instr->use_count == 0) {
-		struct ir3_block *block = instr->block;
-
-		instr->barrier_class = 0;
-		instr->barrier_conflict = 0;
-
-		/* we don't want to remove anything in keeps (which could
-		 * be things like array store's)
-		 */
-		for (unsigned i = 0; i < block->keeps_count; i++) {
-			debug_assert(block->keeps[i] != instr);
-		}
-	}
-}
-
-/**
- * Handle cp for a given src register.  This additionally handles
- * the cases of collapsing immedate/const (which replace the src
- * register with a non-ssa src) or collapsing mov's from relative
- * src (which needs to also fixup the address src reference by the
- * instruction).
- */
-static void
-reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
-		struct ir3_register *reg, unsigned n)
-{
-	struct ir3_instruction *src = ssa(reg);
-
-	if (is_eligible_mov(src, true)) {
-		/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
-		struct ir3_register *src_reg = src->regs[1];
-		unsigned new_flags = reg->flags;
-
-		combine_flags(&new_flags, src);
-
-		if (valid_flags(instr, n, new_flags)) {
-			if (new_flags & IR3_REG_ARRAY) {
-				debug_assert(!(reg->flags & IR3_REG_ARRAY));
-				reg->array = src_reg->array;
-			}
-			reg->flags = new_flags;
-			reg->instr = ssa(src_reg);
-
-			instr->barrier_class |= src->barrier_class;
-			instr->barrier_conflict |= src->barrier_conflict;
-
-			unuse(src);
-			reg->instr->use_count++;
-		}
-
-	} else if (is_same_type_mov(src) &&
-			/* cannot collapse const/immed/etc into meta instrs: */
-			!is_meta(instr)) {
-		/* immed/const/etc cases, which require some special handling: */
-		struct ir3_register *src_reg = src->regs[1];
-		unsigned new_flags = reg->flags;
-
-		combine_flags(&new_flags, src);
-
-		if (!valid_flags(instr, n, new_flags)) {
-			/* See if lowering an immediate to const would help. */
-			if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
-				debug_assert(new_flags & IR3_REG_IMMED);
-				instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
-				return;
-			}
-
-			/* special case for "normal" mad instructions, we can
-			 * try swapping the first two args if that fits better.
-			 *
-			 * the "plain" MAD's (ie. the ones that don't shift first
-			 * src prior to multiply) can swap their first two srcs if
-			 * src[0] is !CONST and src[1] is CONST:
-			 */
-			if ((n == 1) && is_mad(instr->opc) &&
-					!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
-					valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) {
-				/* swap src[0] and src[1]: */
-				struct ir3_register *tmp;
-				tmp = instr->regs[0 + 1];
-				instr->regs[0 + 1] = instr->regs[1 + 1];
-				instr->regs[1 + 1] = tmp;
-
-				n = 0;
-			} else {
-				return;
-			}
-		}
-
-		/* Here we handle the special case of mov from
-		 * CONST and/or RELATIV.  These need to be handled
-		 * specially, because in the case of move from CONST
-		 * there is no src ir3_instruction so we need to
-		 * replace the ir3_register.  And in the case of
-		 * RELATIV we need to handle the address register
-		 * dependency.
-		 */
-		if (src_reg->flags & IR3_REG_CONST) {
-			/* an instruction cannot reference two different
-			 * address registers:
-			 */
-			if ((src_reg->flags & IR3_REG_RELATIV) &&
-					conflicts(instr->address, reg->instr->address))
-				return;
-
-			/* This seems to be a hw bug, or something where the timings
-			 * just somehow don't work out.  This restriction may only
-			 * apply if the first src is also CONST.
-			 */
-			if ((opc_cat(instr->opc) == 3) && (n == 2) &&
-					(src_reg->flags & IR3_REG_RELATIV) &&
-					(src_reg->array.offset == 0))
-				return;
-
-			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
-			src_reg->flags = new_flags;
-			instr->regs[n+1] = src_reg;
-
-			if (src_reg->flags & IR3_REG_RELATIV)
-				ir3_instr_set_address(instr, reg->instr->address);
-
-			return;
-		}
-
-		if ((src_reg->flags & IR3_REG_RELATIV) &&
-				!conflicts(instr->address, reg->instr->address)) {
-			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
-			src_reg->flags = new_flags;
-			instr->regs[n+1] = src_reg;
-			ir3_instr_set_address(instr, reg->instr->address);
-
-			return;
-		}
-
-		/* NOTE: seems we can only do immed integers, so don't
-		 * need to care about float.  But we do need to handle
-		 * abs/neg *before* checking that the immediate requires
-		 * few enough bits to encode:
-		 *
-		 * TODO: do we need to do something to avoid accidentally
-		 * catching a float immed?
-		 */
-		if (src_reg->flags & IR3_REG_IMMED) {
-			int32_t iim_val = src_reg->iim_val;
-
-			debug_assert((opc_cat(instr->opc) == 1) ||
-					(opc_cat(instr->opc) == 6) ||
-					ir3_cat2_int(instr->opc) ||
-					(is_mad(instr->opc) && (n == 0)));
-
-			if (new_flags & IR3_REG_SABS)
-				iim_val = abs(iim_val);
-
-			if (new_flags & IR3_REG_SNEG)
-				iim_val = -iim_val;
-
-			if (new_flags & IR3_REG_BNOT)
-				iim_val = ~iim_val;
-
-			/* other than category 1 (mov) we can only encode up to 10 bits: */
-			if ((instr->opc == OPC_MOV) ||
-					!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
-				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
-				src_reg = ir3_reg_clone(instr->block->shader, src_reg);
-				src_reg->flags = new_flags;
-				src_reg->iim_val = iim_val;
-				instr->regs[n+1] = src_reg;
-			} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
-				/* See if lowering an immediate to const would help. */
-				instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
-			}
-
-			return;
-		}
-	}
-}
-
-/* Handle special case of eliminating output mov, and similar cases where
- * there isn't a normal "consuming" instruction.  In this case we cannot
- * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
- * be eliminated)
- */
-static struct ir3_instruction *
-eliminate_output_mov(struct ir3_instruction *instr)
-{
-	if (is_eligible_mov(instr, false)) {
-		struct ir3_register *reg = instr->regs[1];
-		if (!(reg->flags & IR3_REG_ARRAY)) {
-			struct ir3_instruction *src_instr = ssa(reg);
-			debug_assert(src_instr);
-			return src_instr;
-		}
-	}
-	return instr;
-}
-
-/**
- * Find instruction src's which are mov's that can be collapsed, replacing
- * the mov dst with the mov src
- */
-static void
-instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
-{
-	struct ir3_register *reg;
-
-	if (instr->regs_count == 0)
-		return;
-
-	if (ir3_instr_check_mark(instr))
-		return;
-
-	/* walk down the graph from each src: */
-	foreach_src_n(reg, n, instr) {
-		struct ir3_instruction *src = ssa(reg);
-
-		if (!src)
-			continue;
-
-		instr_cp(ctx, src);
-
-		/* TODO non-indirect access we could figure out which register
-		 * we actually want and allow cp..
-		 */
-		if (reg->flags & IR3_REG_ARRAY)
-			continue;
-
-		/* Don't CP absneg into meta instructions, that won't end well: */
-		if (is_meta(instr) && (src->opc != OPC_MOV))
-			continue;
-
-		reg_cp(ctx, instr, reg, n);
-	}
-
-	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
-		struct ir3_instruction *src = ssa(instr->regs[0]);
-		if (src)
-			instr_cp(ctx, src);
-	}
-
-	if (instr->address) {
-		instr_cp(ctx, instr->address);
-		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
-	}
-
-	/* we can end up with extra cmps.s from frontend, which uses a
-	 *
-	 *    cmps.s p0.x, cond, 0
-	 *
-	 * as a way to mov into the predicate register.  But frequently 'cond'
-	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
-	 * just re-write the instruction writing predicate register to get rid
-	 * of the double cmps.
-	 */
-	if ((instr->opc == OPC_CMPS_S) &&
-			(instr->regs[0]->num == regid(REG_P0, 0)) &&
-			ssa(instr->regs[1]) &&
-			(instr->regs[2]->flags & IR3_REG_IMMED) &&
-			(instr->regs[2]->iim_val == 0)) {
-		struct ir3_instruction *cond = ssa(instr->regs[1]);
-		switch (cond->opc) {
-		case OPC_CMPS_S:
-		case OPC_CMPS_F:
-		case OPC_CMPS_U:
-			instr->opc   = cond->opc;
-			instr->flags = cond->flags;
-			instr->cat2  = cond->cat2;
-			instr->address = cond->address;
-			instr->regs[1] = cond->regs[1];
-			instr->regs[2] = cond->regs[2];
-			instr->barrier_class |= cond->barrier_class;
-			instr->barrier_conflict |= cond->barrier_conflict;
-			unuse(cond);
-			break;
-		default:
-			break;
-		}
-	}
-}
-
-void
-ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
-{
-	struct ir3_cp_ctx ctx = {
-			.shader = ir,
-			.so = so,
-	};
-
-	/* This is a bit annoying, and probably wouldn't be necessary if we
-	 * tracked a reverse link from producing instruction to consumer.
-	 * But we need to know when we've eliminated the last consumer of
-	 * a mov, so we need to do a pass to first count consumers of a
-	 * mov.
-	 */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			struct ir3_instruction *src;
-
-			/* by the way, we don't account for false-dep's, so the CP
-			 * pass should always happen before false-dep's are inserted
-			 */
-			debug_assert(instr->deps_count == 0);
-
-			foreach_ssa_src(src, instr) {
-				src->use_count++;
-			}
-		}
-	}
-
-	ir3_clear_mark(ir);
-
-	for (unsigned i = 0; i < ir->noutputs; i++) {
-		if (ir->outputs[i]) {
-			instr_cp(&ctx, ir->outputs[i]);
-			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
-		}
-	}
-
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		if (block->condition) {
-			instr_cp(&ctx, block->condition);
-			block->condition = eliminate_output_mov(block->condition);
-		}
-
-		for (unsigned i = 0; i < block->keeps_count; i++) {
-			instr_cp(&ctx, block->keeps[i]);
-			block->keeps[i] = eliminate_output_mov(block->keeps[i]);
-		}
-	}
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_depth.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_depth.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_depth.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_depth.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,245 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/u_math.h"
-
-#include "ir3.h"
-
-/*
- * Instruction Depth:
- *
- * Calculates weighted instruction depth, ie. the sum of # of needed
- * instructions plus delay slots back to original input (ie INPUT or
- * CONST).  That is to say, an instructions depth is:
- *
- *   depth(instr) {
- *     d = 0;
- *     // for each src register:
- *     foreach (src in instr->regs[1..n])
- *       d = max(d, delayslots(src->instr, n) + depth(src->instr));
- *     return d + 1;
- *   }
- *
- * After an instruction's depth is calculated, it is inserted into the
- * blocks depth sorted list, which is used by the scheduling pass.
- */
-
-/* generally don't count false dependencies, since this can just be
- * something like a barrier, or SSBO store.  The exception is array
- * dependencies if the assigner is an array write and the consumer
- * reads the same array.
- */
-static bool
-ignore_dep(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n)
-{
-	if (!__is_false_dep(consumer, n))
-		return false;
-
-	if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) {
-		struct ir3_register *dst = assigner->regs[0];
-		struct ir3_register *src;
-
-		debug_assert(dst->flags & IR3_REG_ARRAY);
-
-		foreach_src(src, consumer) {
-			if ((src->flags & IR3_REG_ARRAY) &&
-					(dst->array.id == src->array.id)) {
-				return false;
-			}
-		}
-	}
-
-	return true;
-}
-
-/* calculate required # of delay slots between the instruction that
- * assigns a value and the one that consumes
- */
-int ir3_delayslots(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n)
-{
-	if (ignore_dep(assigner, consumer, n))
-		return 0;
-
-	/* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
-	 * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
-	 * handled with sync bits
-	 */
-
-	if (is_meta(assigner))
-		return 0;
-
-	if (writes_addr(assigner))
-		return 6;
-
-	/* handled via sync flags: */
-	if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
-		return 0;
-
-	/* assigner must be alu: */
-	if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
-			is_mem(consumer)) {
-		return 6;
-	} else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
-			(n == 3)) {
-		/* special case, 3rd src to cat3 not required on first cycle */
-		return 1;
-	} else {
-		return 3;
-	}
-}
-
-void
-ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
-{
-	/* remove from existing spot in list: */
-	list_delinit(&instr->node);
-
-	/* find where to re-insert instruction: */
-	list_for_each_entry (struct ir3_instruction, pos, list, node) {
-		if (pos->depth > instr->depth) {
-			list_add(&instr->node, &pos->node);
-			return;
-		}
-	}
-	/* if we get here, we didn't find an insertion spot: */
-	list_addtail(&instr->node, list);
-}
-
-static void
-ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
-{
-	struct ir3_instruction *src;
-
-	/* don't mark falsedep's as used, but otherwise process them normally: */
-	if (!falsedep)
-		instr->flags &= ~IR3_INSTR_UNUSED;
-
-	if (ir3_instr_check_mark(instr))
-		return;
-
-	instr->depth = 0;
-
-	foreach_ssa_src_n(src, i, instr) {
-		unsigned sd;
-
-		/* visit child to compute it's depth: */
-		ir3_instr_depth(src, boost, __is_false_dep(instr, i));
-
-		/* for array writes, no need to delay on previous write: */
-		if (i == 0)
-			continue;
-
-		sd = ir3_delayslots(src, instr, i) + src->depth;
-		sd += boost;
-
-		instr->depth = MAX2(instr->depth, sd);
-	}
-
-	if (!is_meta(instr))
-		instr->depth++;
-
-	ir3_insert_by_depth(instr, &instr->block->instr_list);
-}
-
-static bool
-remove_unused_by_block(struct ir3_block *block)
-{
-	bool progress = false;
-	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
-		if (instr->opc == OPC_END)
-			continue;
-		if (instr->flags & IR3_INSTR_UNUSED) {
-			list_delinit(&instr->node);
-			progress = true;
-		}
-	}
-	return progress;
-}
-
-static bool
-compute_depth_and_remove_unused(struct ir3 *ir)
-{
-	unsigned i;
-	bool progress = false;
-
-	ir3_clear_mark(ir);
-
-	/* initially mark everything as unused, we'll clear the flag as we
-	 * visit the instructions:
-	 */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			instr->flags |= IR3_INSTR_UNUSED;
-		}
-	}
-
-	for (i = 0; i < ir->noutputs; i++)
-		if (ir->outputs[i])
-			ir3_instr_depth(ir->outputs[i], 0, false);
-
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		for (i = 0; i < block->keeps_count; i++)
-			ir3_instr_depth(block->keeps[i], 0, false);
-
-		/* We also need to account for if-condition: */
-		if (block->condition)
-			ir3_instr_depth(block->condition, 6, false);
-	}
-
-	/* mark un-used instructions: */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		progress |= remove_unused_by_block(block);
-	}
-
-	/* note that we can end up with unused indirects, but we should
-	 * not end up with unused predicates.
-	 */
-	for (i = 0; i < ir->indirects_count; i++) {
-		struct ir3_instruction *instr = ir->indirects[i];
-		if (instr && (instr->flags & IR3_INSTR_UNUSED))
-			ir->indirects[i] = NULL;
-	}
-
-	/* cleanup unused inputs: */
-	for (i = 0; i < ir->ninputs; i++) {
-		struct ir3_instruction *in = ir->inputs[i];
-		if (in && (in->flags & IR3_INSTR_UNUSED))
-			ir->inputs[i] = NULL;
-	}
-
-	return progress;
-}
-
-void
-ir3_depth(struct ir3 *ir)
-{
-	bool progress;
-	do {
-		progress = compute_depth_and_remove_unused(ir);
-	} while (progress);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,643 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "nir/tgsi_to_nir.h"
+
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+#include "ir3/ir3_shader.h"
+#include "ir3/ir3_gallium.h"
+#include "ir3/ir3_compiler.h"
+#include "ir3/ir3_nir.h"
+
+static void
+dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug)
+{
+	if (!unlikely(fd_mesa_debug & FD_DBG_SHADERDB))
+		return;
+
+	pipe_debug_message(debug, SHADER_INFO, "\n"
+			"SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n"
+			"SHADER-DB: %s prog %d/%d: %u half, %u full\n"
+			"SHADER-DB: %s prog %d/%d: %u const, %u constlen\n"
+			"SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n",
+			ir3_shader_stage(v->shader),
+			v->shader->id, v->id,
+			v->info.instrs_count,
+			v->info.sizedwords,
+			ir3_shader_stage(v->shader),
+			v->shader->id, v->id,
+			v->info.max_half_reg + 1,
+			v->info.max_reg + 1,
+			ir3_shader_stage(v->shader),
+			v->shader->id, v->id,
+			v->info.max_const + 1,
+			v->constlen,
+			ir3_shader_stage(v->shader),
+			v->shader->id, v->id,
+			v->info.ss, v->info.sy);
+}
+
+struct ir3_shader_variant *
+ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
+		bool binning_pass, struct pipe_debug_callback *debug)
+{
+	struct ir3_shader_variant *v;
+	bool created = false;
+
+	/* some shader key values only apply to vertex or frag shader,
+	 * so normalize the key to avoid constructing multiple identical
+	 * variants:
+	 */
+	ir3_normalize_key(&key, shader->type);
+
+	v = ir3_shader_get_variant(shader, &key, binning_pass, &created);
+
+	if (created) {
+		dump_shader_info(v, debug);
+	}
+
+	return v;
+}
+
+static void
+copy_stream_out(struct ir3_stream_output_info *i,
+		const struct pipe_stream_output_info *p)
+{
+	STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride));
+	STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output));
+
+	i->num_outputs = p->num_outputs;
+	for (int n = 0; n < ARRAY_SIZE(i->stride); n++)
+		i->stride[n] = p->stride[n];
+
+	for (int n = 0; n < ARRAY_SIZE(i->output); n++) {
+		i->output[n].register_index  = p->output[n].register_index;
+		i->output[n].start_component = p->output[n].start_component;
+		i->output[n].num_components  = p->output[n].num_components;
+		i->output[n].output_buffer   = p->output[n].output_buffer;
+		i->output[n].dst_offset      = p->output[n].dst_offset;
+		i->output[n].stream          = p->output[n].stream;
+	}
+}
+
+struct ir3_shader *
+ir3_shader_create(struct ir3_compiler *compiler,
+		const struct pipe_shader_state *cso, gl_shader_stage type,
+		struct pipe_debug_callback *debug)
+{
+	nir_shader *nir;
+	if (cso->type == PIPE_SHADER_IR_NIR) {
+		/* we take ownership of the reference: */
+		nir = cso->ir.nir;
+	} else {
+		debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
+		if (ir3_shader_debug & IR3_DBG_DISASM) {
+			tgsi_dump(cso->tokens, 0);
+		}
+		nir = ir3_tgsi_to_nir(compiler, cso->tokens);
+	}
+
+	struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir);
+
+	copy_stream_out(&shader->stream_output, &cso->stream_output);
+
+	if (fd_mesa_debug & FD_DBG_SHADERDB) {
+		/* if shader-db run, create a standard variant immediately
+		 * (as otherwise nothing will trigger the shader to be
+		 * actually compiled)
+		 */
+		static struct ir3_shader_key key;
+		memset(&key, 0, sizeof(key));
+		ir3_shader_variant(shader, key, false, debug);
+	}
+	return shader;
+}
+
+/* a bit annoying that compute-shader and normal shader state objects
+ * aren't a bit more aligned.
+ */
+struct ir3_shader *
+ir3_shader_create_compute(struct ir3_compiler *compiler,
+		const struct pipe_compute_state *cso,
+		struct pipe_debug_callback *debug)
+{
+	nir_shader *nir;
+	if (cso->ir_type == PIPE_SHADER_IR_NIR) {
+		/* we take ownership of the reference: */
+		nir = (nir_shader *)cso->prog;
+	} else {
+		debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
+		if (ir3_shader_debug & IR3_DBG_DISASM) {
+			tgsi_dump(cso->prog, 0);
+		}
+		nir = ir3_tgsi_to_nir(compiler, cso->prog);
+	}
+
+	struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir);
+
+	return shader;
+}
+
+struct nir_shader *
+ir3_tgsi_to_nir(struct ir3_compiler *compiler, const struct tgsi_token *tokens)
+{
+	return tgsi_to_nir(tokens, ir3_get_compiler_options(compiler));
+}
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what.  And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static inline bool
+is_stateobj(struct fd_ringbuffer *ring)
+{
+	/* XXX this is an ugly way to differentiate.. */
+	return !!(ring->flags & FD_RINGBUFFER_STREAMING);
+}
+
+static inline void
+ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+	/* when we emit const state via ring (IB2) we need a WFI, but when
+	 * it is emit'd via stateobj, we don't
+	 */
+	if (is_stateobj(ring))
+		return;
+
+	fd_wfi(batch, ring);
+}
+
+static void
+emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+{
+	const unsigned index = 0;     /* user consts are index 0 */
+
+	if (constbuf->enabled_mask & (1 << index)) {
+		struct pipe_constant_buffer *cb = &constbuf->cb[index];
+		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+		/* in particular, with binning shader we may end up with
+		 * unused consts, ie. we could end up w/ constlen that is
+		 * smaller than first_driver_param.  In that case truncate
+		 * the user consts early to avoid HLSQ lockup caused by
+		 * writing too many consts
+		 */
+		uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
+
+		// I expect that size should be a multiple of vec4's:
+		assert(size == align(size, 4));
+
+		/* and even if the start of the const buffer is before
+		 * first_immediate, the end may not be:
+		 */
+		size = MIN2(size, 4 * max_const);
+
+		if (size > 0) {
+			ring_wfi(ctx->batch, ring);
+			ctx->emit_const(ring, v->type, 0,
+					cb->buffer_offset, size,
+					cb->user_buffer, cb->buffer);
+		}
+	}
+}
+
+static void
+emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
+{
+	uint32_t offset = v->constbase.ubo;
+	if (v->constlen > offset) {
+		uint32_t params = v->num_ubos;
+		uint32_t offsets[params];
+		struct pipe_resource *prscs[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			const uint32_t index = i + 1;   /* UBOs start at index 1 */
+			struct pipe_constant_buffer *cb = &constbuf->cb[index];
+			assert(!cb->user_buffer);
+
+			if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+				offsets[i] = cb->buffer_offset;
+				prscs[i] = cb->buffer;
+			} else {
+				offsets[i] = 0;
+				prscs[i] = NULL;
+			}
+		}
+
+		ring_wfi(ctx->batch, ring);
+		ctx->emit_const_bo(ring, v->type, false, offset * 4, params, prscs, offsets);
+	}
+}
+
+static void
+emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
+{
+	uint32_t offset = v->constbase.ssbo_sizes;
+	if (v->constlen > offset) {
+		uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
+		unsigned mask = v->const_layout.ssbo_size.mask;
+
+		while (mask) {
+			unsigned index = u_bit_scan(&mask);
+			unsigned off = v->const_layout.ssbo_size.off[index];
+			sizes[off] = sb->sb[index].buffer_size;
+		}
+
+		ring_wfi(ctx->batch, ring);
+		ctx->emit_const(ring, v->type, offset * 4,
+			0, ARRAY_SIZE(sizes), sizes, NULL);
+	}
+}
+
+static void
+emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
+{
+	uint32_t offset = v->constbase.image_dims;
+	if (v->constlen > offset) {
+		uint32_t dims[align(v->const_layout.image_dims.count, 4)];
+		unsigned mask = v->const_layout.image_dims.mask;
+
+		while (mask) {
+			struct pipe_image_view *img;
+			struct fd_resource *rsc;
+			unsigned index = u_bit_scan(&mask);
+			unsigned off = v->const_layout.image_dims.off[index];
+
+			img = &si->si[index];
+			rsc = fd_resource(img->resource);
+
+			dims[off + 0] = util_format_get_blocksize(img->format);
+			if (img->resource->target != PIPE_BUFFER) {
+				unsigned lvl = img->u.tex.level;
+				/* note for 2d/cube/etc images, even if re-interpreted
+				 * as a different color format, the pixel size should
+				 * be the same, so use original dimensions for y and z
+				 * stride:
+				 */
+				dims[off + 1] = rsc->slices[lvl].pitch * rsc->cpp;
+				/* see corresponding logic in fd_resource_offset(): */
+				if (rsc->layer_first) {
+					dims[off + 2] = rsc->layer_size;
+				} else {
+					dims[off + 2] = rsc->slices[lvl].size0;
+				}
+			} else {
+				/* For buffer-backed images, the log2 of the format's
+				 * bytes-per-pixel is placed on the 2nd slot. This is useful
+				 * when emitting image_size instructions, for which we need
+				 * to divide by bpp for image buffers. Since the bpp
+				 * can only be power-of-two, the division is implemented
+				 * as a SHR, and for that it is handy to have the log2 of
+				 * bpp as a constant. (log2 = first-set-bit - 1)
+				 */
+				dims[off + 1] = ffs(dims[off + 0]) - 1;
+			}
+		}
+
+		ring_wfi(ctx->batch, ring);
+		ctx->emit_const(ring, v->type, offset * 4,
+			0, ARRAY_SIZE(dims), dims, NULL);
+	}
+}
+
+static void
+emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring)
+{
+	int size = v->immediates_count;
+	uint32_t base = v->constbase.immediate;
+
+	/* truncate size to avoid writing constants that shader
+	 * does not use:
+	 */
+	size = MIN2(size + base, v->constlen) - base;
+
+	/* convert out of vec4: */
+	base *= 4;
+	size *= 4;
+
+	if (size > 0) {
+		ring_wfi(ctx->batch, ring);
+		ctx->emit_const(ring, v->type, base,
+			0, size, v->immediates[0].val, NULL);
+	}
+}
+
+/* emit stream-out buffers: */
+static void
+emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+		struct fd_ringbuffer *ring)
+{
+	/* streamout addresses after driver-params: */
+	uint32_t offset = v->constbase.tfbo;
+	if (v->constlen > offset) {
+		struct fd_streamout_stateobj *so = &ctx->streamout;
+		struct ir3_stream_output_info *info = &v->shader->stream_output;
+		uint32_t params = 4;
+		uint32_t offsets[params];
+		struct pipe_resource *prscs[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			struct pipe_stream_output_target *target = so->targets[i];
+
+			if (target) {
+				offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
+						target->buffer_offset;
+				prscs[i] = target->buffer;
+			} else {
+				offsets[i] = 0;
+				prscs[i] = NULL;
+			}
+		}
+
+		ring_wfi(ctx->batch, ring);
+		ctx->emit_const_bo(ring, v->type, true, offset * 4, params, prscs, offsets);
+	}
+}
+
+static uint32_t
+max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v)
+{
+	struct fd_streamout_stateobj *so = &ctx->streamout;
+	struct ir3_stream_output_info *info = &v->shader->stream_output;
+	uint32_t maxvtxcnt = 0x7fffffff;
+
+	if (ctx->screen->gpu_id >= 500)
+		return 0;
+	if (v->binning_pass)
+		return 0;
+	if (v->shader->stream_output.num_outputs == 0)
+		return 0;
+	if (so->num_targets == 0)
+		return 0;
+
+	/* offset to write to is:
+	 *
+	 *   total_vtxcnt = vtxcnt + offsets[i]
+	 *   offset = total_vtxcnt * stride[i]
+	 *
+	 *   offset =   vtxcnt * stride[i]       ; calculated in shader
+	 *            + offsets[i] * stride[i]   ; calculated at emit_tfbos()
+	 *
+	 * assuming for each vtx, each target buffer will have data written
+	 * up to 'offset + stride[i]', that leaves maxvtxcnt as:
+	 *
+	 *   buffer_size = (maxvtxcnt * stride[i]) + stride[i]
+	 *   maxvtxcnt   = (buffer_size - stride[i]) / stride[i]
+	 *
+	 * but shader is actually doing a less-than (rather than less-than-
+	 * equal) check, so we can drop the -stride[i].
+	 *
+	 * TODO is assumption about `offset + stride[i]` legit?
+	 */
+	for (unsigned i = 0; i < so->num_targets; i++) {
+		struct pipe_stream_output_target *target = so->targets[i];
+		unsigned stride = info->stride[i] * 4;   /* convert dwords->bytes */
+		if (target) {
+			uint32_t max = target->buffer_size / stride;
+			maxvtxcnt = MIN2(maxvtxcnt, max);
+		}
+	}
+
+	return maxvtxcnt;
+}
+
+static void
+emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx, enum pipe_shader_type t)
+{
+	enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
+
+	/* When we use CP_SET_DRAW_STATE objects to emit constant state,
+	 * if we emit any of it we need to emit all.  This is because
+	 * we are using the same state-group-id each time for uniform
+	 * state, and if previous update is never evaluated (due to no
+	 * visible primitives in the current tile) then the new stateobj
+	 * completely replaces the old one.
+	 *
+	 * Possibly if we split up different parts of the const state to
+	 * different state-objects we could avoid this.
+	 */
+	if (dirty && is_stateobj(ring))
+		dirty = ~0;
+
+	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
+		struct fd_constbuf_stateobj *constbuf;
+		bool shader_dirty;
+
+		constbuf = &ctx->constbuf[t];
+		shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
+
+		emit_user_consts(ctx, v, ring, constbuf);
+		emit_ubos(ctx, v, ring, constbuf);
+		if (shader_dirty)
+			emit_immediates(ctx, v, ring);
+	}
+
+	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
+		struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
+		emit_ssbo_sizes(ctx, v, ring, sb);
+	}
+
+	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
+		struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
+		emit_image_dims(ctx, v, ring, si);
+	}
+}
+
+void
+ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+	debug_assert(v->type == MESA_SHADER_VERTEX);
+
+	emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
+
+	/* emit driver params every time: */
+	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+	if (info) {
+		uint32_t offset = v->constbase.driver_param;
+		if (v->constlen > offset) {
+			uint32_t vertex_params[IR3_DP_VS_COUNT] = {
+				[IR3_DP_VTXID_BASE] = info->index_size ?
+						info->index_bias : info->start,
+				[IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v),
+			};
+			/* if no user-clip-planes, we don't need to emit the
+			 * entire thing:
+			 */
+			uint32_t vertex_params_size = 4;
+
+			if (v->key.ucp_enables) {
+				struct pipe_clip_state *ucp = &ctx->ucp;
+				unsigned pos = IR3_DP_UCP0_X;
+				for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
+					for (unsigned j = 0; j < 4; j++) {
+						vertex_params[pos] = fui(ucp->ucp[i][j]);
+						pos++;
+					}
+				}
+				vertex_params_size = ARRAY_SIZE(vertex_params);
+			}
+
+			ring_wfi(ctx->batch, ring);
+
+			bool needs_vtxid_base =
+				ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0);
+
+			/* for indirect draw, we need to copy VTXID_BASE from
+			 * indirect-draw parameters buffer.. which is annoying
+			 * and means we can't easily emit these consts in cmd
+			 * stream so need to copy them to bo.
+			 */
+			if (info->indirect && needs_vtxid_base) {
+				struct pipe_draw_indirect_info *indirect = info->indirect;
+				struct pipe_resource *vertex_params_rsc =
+					pipe_buffer_create(&ctx->screen->base,
+						PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM,
+						vertex_params_size * 4);
+				unsigned src_off = info->indirect->offset;;
+				void *ptr;
+
+				ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
+				memcpy(ptr, vertex_params, vertex_params_size * 4);
+
+				if (info->index_size) {
+					/* indexed draw, index_bias is 4th field: */
+					src_off += 3 * 4;
+				} else {
+					/* non-indexed draw, start is 3rd field: */
+					src_off += 2 * 4;
+				}
+
+				/* copy index_bias or start from draw params: */
+				ctx->mem_to_mem(ring, vertex_params_rsc, 0,
+						indirect->buffer, src_off, 1);
+
+				ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0,
+						vertex_params_size, NULL, vertex_params_rsc);
+
+				pipe_resource_reference(&vertex_params_rsc, NULL);
+			} else {
+				ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0,
+						vertex_params_size, vertex_params, NULL);
+			}
+
+			/* if needed, emit stream-out buffer addresses: */
+			if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
+				emit_tfbos(ctx, v, ring);
+			}
+		}
+	}
+}
+
+void
+ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx)
+{
+	debug_assert(v->type == MESA_SHADER_FRAGMENT);
+
+	emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
+}
+
+/* emit compute-shader consts: */
+void
+ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx, const struct pipe_grid_info *info)
+{
+	debug_assert(gl_shader_stage_is_compute(v->type));
+
+	emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
+
+	/* emit compute-shader driver-params: */
+	uint32_t offset = v->constbase.driver_param;
+	if (v->constlen > offset) {
+		ring_wfi(ctx->batch, ring);
+
+		if (info->indirect) {
+			struct pipe_resource *indirect = NULL;
+			unsigned indirect_offset;
+
+			/* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
+			 * to be aligned more strongly than 4 bytes.  So in this case
+			 * we need a temporary buffer to copy NumWorkGroups.xyz to.
+			 *
+			 * TODO if previous compute job is writing to info->indirect,
+			 * we might need a WFI.. but since we currently flush for each
+			 * compute job, we are probably ok for now.
+			 */
+			if (info->indirect_offset & 0xf) {
+				indirect = pipe_buffer_create(&ctx->screen->base,
+					PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM,
+					0x1000);
+				indirect_offset = 0;
+
+				ctx->mem_to_mem(ring, indirect, 0, info->indirect,
+						info->indirect_offset, 3);
+			} else {
+				pipe_resource_reference(&indirect, info->indirect);
+				indirect_offset = info->indirect_offset;
+			}
+
+			ctx->emit_const(ring, MESA_SHADER_COMPUTE, offset * 4,
+					indirect_offset, 4, NULL, indirect);
+
+			pipe_resource_reference(&indirect, NULL);
+		} else {
+			uint32_t compute_params[IR3_DP_CS_COUNT] = {
+				[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
+				[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
+				[IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
+				[IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
+				[IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
+				[IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
+			};
+
+			ctx->emit_const(ring, MESA_SHADER_COMPUTE, offset * 4, 0,
+					ARRAY_SIZE(compute_params), compute_params, NULL);
+		}
+	}
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_GALLIUM_H_
+#define IR3_GALLIUM_H_
+
+#include "pipe/p_state.h"
+#include "ir3/ir3_shader.h"
+
+struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler,
+		const struct pipe_shader_state *cso, gl_shader_stage type,
+		struct pipe_debug_callback *debug);
+struct ir3_shader *
+ir3_shader_create_compute(struct ir3_compiler *compiler,
+		const struct pipe_compute_state *cso,
+		struct pipe_debug_callback *debug);
+struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
+		struct ir3_shader_key key, bool binning_pass,
+		struct pipe_debug_callback *debug);
+struct nir_shader * ir3_tgsi_to_nir(struct ir3_compiler *compiler,
+		const struct tgsi_token *tokens);
+
+struct fd_ringbuffer;
+struct fd_context;
+void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx, const struct pipe_draw_info *info);
+void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx);
+void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_context *ctx, const struct pipe_grid_info *info);
+
+#endif /* IR3_GALLIUM_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_group.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_group.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_group.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_group.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,276 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "freedreno_util.h"
-
-#include "ir3.h"
-
-/*
- * Find/group instruction neighbors:
- */
-
-/* bleh.. we need to do the same group_n() thing for both inputs/outputs
- * (where we have a simple instr[] array), and fanin nodes (where we have
- * an extra indirection via reg->instr).
- */
-struct group_ops {
-	struct ir3_instruction *(*get)(void *arr, int idx);
-	void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
-};
-
-static struct ir3_instruction *arr_get(void *arr, int idx)
-{
-	return ((struct ir3_instruction **)arr)[idx];
-}
-static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
-{
-	((struct ir3_instruction **)arr)[idx] =
-			ir3_MOV(instr->block, instr, TYPE_F32);
-}
-static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
-{
-	/* so, we can't insert a mov in front of a meta:in.. and the downstream
-	 * instruction already has a pointer to 'instr'.  So we cheat a bit and
-	 * morph the meta:in instruction into a mov and insert a new meta:in
-	 * in front.
-	 */
-	struct ir3_instruction *in;
-
-	debug_assert(instr->regs_count == 1);
-
-	in = ir3_instr_create(instr->block, OPC_META_INPUT);
-	in->inout.block = instr->block;
-	ir3_reg_create(in, instr->regs[0]->num, 0);
-
-	/* create src reg for meta:in and fixup to now be a mov: */
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
-	instr->opc = OPC_MOV;
-	instr->cat1.src_type = TYPE_F32;
-	instr->cat1.dst_type = TYPE_F32;
-
-	((struct ir3_instruction **)arr)[idx] = in;
-}
-static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
-static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
-
-static struct ir3_instruction *instr_get(void *arr, int idx)
-{
-	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
-}
-static void
-instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
-{
-	((struct ir3_instruction *)arr)->regs[idx+1]->instr =
-			ir3_MOV(instr->block, instr, TYPE_F32);
-}
-static struct group_ops instr_ops = { instr_get, instr_insert_mov };
-
-/* verify that cur != instr, but cur is also not in instr's neighbor-list: */
-static bool
-in_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos)
-{
-	int idx = 0;
-
-	if (!instr)
-		return false;
-
-	if (instr == cur)
-		return true;
-
-	for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right)
-		if ((idx++ != pos) && (instr == cur))
-			return true;
-
-	return false;
-}
-
-static void
-group_n(struct group_ops *ops, void *arr, unsigned n)
-{
-	unsigned i, j;
-
-	/* first pass, figure out what has conflicts and needs a mov
-	 * inserted.  Do this up front, before starting to setup
-	 * left/right neighbor pointers.  Trying to do it in a single
-	 * pass could result in a situation where we can't even setup
-	 * the mov's right neighbor ptr if the next instr also needs
-	 * a mov.
-	 */
-restart:
-	for (i = 0; i < n; i++) {
-		struct ir3_instruction *instr = ops->get(arr, i);
-		if (instr) {
-			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
-			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
-			bool conflict;
-
-			/* check for left/right neighbor conflicts: */
-			conflict = conflicts(instr->cp.left, left) ||
-				conflicts(instr->cp.right, right);
-
-			/* Mixing array elements and higher register classes
-			 * (ie. groups) doesn't really work out in RA.  See:
-			 *
-			 * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag
-			 */
-			if (instr->regs[0]->flags & IR3_REG_ARRAY)
-				conflict = true;
-
-			/* we also can't have an instr twice in the group: */
-			for (j = i + 1; (j < n) && !conflict; j++)
-				if (in_neighbor_list(ops->get(arr, j), instr, i))
-					conflict = true;
-
-			if (conflict) {
-				ops->insert_mov(arr, i, instr);
-				/* inserting the mov may have caused a conflict
-				 * against the previous:
-				 */
-				goto restart;
-			}
-		}
-	}
-
-	/* second pass, now that we've inserted mov's, fixup left/right
-	 * neighbors.  This is guaranteed to succeed, since by definition
-	 * the newly inserted mov's cannot conflict with anything.
-	 */
-	for (i = 0; i < n; i++) {
-		struct ir3_instruction *instr = ops->get(arr, i);
-		if (instr) {
-			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
-			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
-
-			debug_assert(!conflicts(instr->cp.left, left));
-			if (left) {
-				instr->cp.left_cnt++;
-				instr->cp.left = left;
-			}
-
-			debug_assert(!conflicts(instr->cp.right, right));
-			if (right) {
-				instr->cp.right_cnt++;
-				instr->cp.right = right;
-			}
-		}
-	}
-}
-
-static void
-instr_find_neighbors(struct ir3_instruction *instr)
-{
-	struct ir3_instruction *src;
-
-	if (ir3_instr_check_mark(instr))
-		return;
-
-	if (instr->opc == OPC_META_FI)
-		group_n(&instr_ops, instr, instr->regs_count - 1);
-
-	foreach_ssa_src(src, instr)
-		instr_find_neighbors(src);
-}
-
-/* a bit of sadness.. we can't have "holes" in inputs from PoV of
- * register assignment, they still need to be grouped together.  So
- * we need to insert dummy/padding instruction for grouping, and
- * then take it back out again before anyone notices.
- */
-static void
-pad_and_group_input(struct ir3_instruction **input, unsigned n)
-{
-	int i, mask = 0;
-	struct ir3_block *block = NULL;
-
-	for (i = n - 1; i >= 0; i--) {
-		struct ir3_instruction *instr = input[i];
-		if (instr) {
-			block = instr->block;
-		} else if (block) {
-			instr = ir3_NOP(block);
-			ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dummy dst */
-			input[i] = instr;
-			mask |= (1 << i);
-		}
-	}
-
-	group_n(&arr_ops_in, input, n);
-
-	for (i = 0; i < n; i++) {
-		if (mask & (1 << i))
-			input[i] = NULL;
-	}
-}
-
-static void
-find_neighbors(struct ir3 *ir)
-{
-	unsigned i;
-
-	/* shader inputs/outputs themselves must be contiguous as well:
-	 *
-	 * NOTE: group inputs first, since we only insert mov's
-	 * *before* the conflicted instr (and that would go badly
-	 * for inputs).  By doing inputs first, we should never
-	 * have a conflict on inputs.. pushing any conflict to
-	 * resolve to the outputs, for stuff like:
-	 *
-	 *     MOV OUT[n], IN[m].wzyx
-	 *
-	 * NOTE: we assume here inputs/outputs are grouped in vec4.
-	 * This logic won't quite cut it if we don't align smaller
-	 * on vec4 boundaries
-	 */
-	for (i = 0; i < ir->ninputs; i += 4)
-		pad_and_group_input(&ir->inputs[i], 4);
-	for (i = 0; i < ir->noutputs; i += 4)
-		group_n(&arr_ops_out, &ir->outputs[i], 4);
-
-	for (i = 0; i < ir->noutputs; i++) {
-		if (ir->outputs[i]) {
-			struct ir3_instruction *instr = ir->outputs[i];
-			instr_find_neighbors(instr);
-		}
-	}
-
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		for (i = 0; i < block->keeps_count; i++) {
-			struct ir3_instruction *instr = block->keeps[i];
-			instr_find_neighbors(instr);
-		}
-
-		/* We also need to account for if-condition: */
-		if (block->condition)
-			instr_find_neighbors(block->condition);
-	}
-}
-
-void
-ir3_group(struct ir3 *ir)
-{
-	ir3_clear_mark(ir);
-	find_neighbors(ir);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,1393 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IR3_H_
-#define IR3_H_
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "util/u_debug.h"
-#include "util/list.h"
-
-#include "instr-a3xx.h"
-#include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
-
-/* low level intermediate representation of an adreno shader program */
-
-struct ir3_compiler;
-struct ir3;
-struct ir3_instruction;
-struct ir3_block;
-
-struct ir3_info {
-	uint32_t gpu_id;
-	uint16_t sizedwords;
-	uint16_t instrs_count;   /* expanded to account for rpt's */
-	/* NOTE: max_reg, etc, does not include registers not touched
-	 * by the shader (ie. vertex fetched via VFD_DECODE but not
-	 * touched by shader)
-	 */
-	int8_t   max_reg;   /* highest GPR # used by shader */
-	int8_t   max_half_reg;
-	int16_t  max_const;
-
-	/* number of sync bits: */
-	uint16_t ss, sy;
-};
-
-struct ir3_register {
-	enum {
-		IR3_REG_CONST  = 0x001,
-		IR3_REG_IMMED  = 0x002,
-		IR3_REG_HALF   = 0x004,
-		/* high registers are used for some things in compute shaders,
-		 * for example.  Seems to be for things that are global to all
-		 * threads in a wave, so possibly these are global/shared by
-		 * all the threads in the wave?
-		 */
-		IR3_REG_HIGH   = 0x008,
-		IR3_REG_RELATIV= 0x010,
-		IR3_REG_R      = 0x020,
-		/* Most instructions, it seems, can do float abs/neg but not
-		 * integer.  The CP pass needs to know what is intended (int or
-		 * float) in order to do the right thing.  For this reason the
-		 * abs/neg flags are split out into float and int variants.  In
-		 * addition, .b (bitwise) operations, the negate is actually a
-		 * bitwise not, so split that out into a new flag to make it
-		 * more clear.
-		 */
-		IR3_REG_FNEG   = 0x040,
-		IR3_REG_FABS   = 0x080,
-		IR3_REG_SNEG   = 0x100,
-		IR3_REG_SABS   = 0x200,
-		IR3_REG_BNOT   = 0x400,
-		IR3_REG_EVEN   = 0x800,
-		IR3_REG_POS_INF= 0x1000,
-		/* (ei) flag, end-input?  Set on last bary, presumably to signal
-		 * that the shader needs no more input:
-		 */
-		IR3_REG_EI     = 0x2000,
-		/* meta-flags, for intermediate stages of IR, ie.
-		 * before register assignment is done:
-		 */
-		IR3_REG_SSA    = 0x4000,   /* 'instr' is ptr to assigning instr */
-		IR3_REG_ARRAY  = 0x8000,
-
-	} flags;
-
-	/* normal registers:
-	 * the component is in the low two bits of the reg #, so
-	 * rN.x becomes: (N << 2) | x
-	 */
-	int   num;
-	union {
-		/* immediate: */
-		int32_t  iim_val;
-		uint32_t uim_val;
-		float    fim_val;
-		/* relative: */
-		struct {
-			uint16_t id;
-			int16_t offset;
-		} array;
-	};
-
-	/* For IR3_REG_SSA, src registers contain ptr back to assigning
-	 * instruction.
-	 *
-	 * For IR3_REG_ARRAY, the pointer is back to the last dependent
-	 * array access (although the net effect is the same, it points
-	 * back to a previous instruction that we depend on).
-	 */
-	struct ir3_instruction *instr;
-
-	union {
-		/* used for cat5 instructions, but also for internal/IR level
-		 * tracking of what registers are read/written by an instruction.
-		 * wrmask may be a bad name since it is used to represent both
-		 * src and dst that touch multiple adjacent registers.
-		 */
-		unsigned wrmask;
-		/* for relative addressing, 32bits for array size is too small,
-		 * but otoh we don't need to deal with disjoint sets, so instead
-		 * use a simple size field (number of scalar components).
-		 */
-		unsigned size;
-	};
-};
-
-/*
- * Stupid/simple growable array implementation:
- */
-#define DECLARE_ARRAY(type, name) \
-	unsigned name ## _count, name ## _sz; \
-	type * name;
-
-#define array_insert(ctx, arr, val) do { \
-		if (arr ## _count == arr ## _sz) { \
-			arr ## _sz = MAX2(2 * arr ## _sz, 16); \
-			arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \
-		} \
-		arr[arr ##_count++] = val; \
-	} while (0)
-
-struct ir3_instruction {
-	struct ir3_block *block;
-	opc_t opc;
-	enum {
-		/* (sy) flag is set on first instruction, and after sample
-		 * instructions (probably just on RAW hazard).
-		 */
-		IR3_INSTR_SY    = 0x001,
-		/* (ss) flag is set on first instruction, and first instruction
-		 * to depend on the result of "long" instructions (RAW hazard):
-		 *
-		 *   rcp, rsq, log2, exp2, sin, cos, sqrt
-		 *
-		 * It seems to synchronize until all in-flight instructions are
-		 * completed, for example:
-		 *
-		 *   rsq hr1.w, hr1.w
-		 *   add.f hr2.z, (neg)hr2.z, hc0.y
-		 *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
-		 *   rsq hr2.x, hr2.x
-		 *   (rpt1)nop
-		 *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
-		 *   nop
-		 *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
-		 *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
-		 *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
-		 *
-		 * The last mul.f does not have (ss) set, presumably because the
-		 * (ss) on the previous instruction does the job.
-		 *
-		 * The blob driver also seems to set it on WAR hazards, although
-		 * not really clear if this is needed or just blob compiler being
-		 * sloppy.  So far I haven't found a case where removing the (ss)
-		 * causes problems for WAR hazard, but I could just be getting
-		 * lucky:
-		 *
-		 *   rcp r1.y, r3.y
-		 *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
-		 *
-		 */
-		IR3_INSTR_SS    = 0x002,
-		/* (jp) flag is set on jump targets:
-		 */
-		IR3_INSTR_JP    = 0x004,
-		IR3_INSTR_UL    = 0x008,
-		IR3_INSTR_3D    = 0x010,
-		IR3_INSTR_A     = 0x020,
-		IR3_INSTR_O     = 0x040,
-		IR3_INSTR_P     = 0x080,
-		IR3_INSTR_S     = 0x100,
-		IR3_INSTR_S2EN  = 0x200,
-		IR3_INSTR_G     = 0x400,
-		IR3_INSTR_SAT   = 0x800,
-		/* meta-flags, for intermediate stages of IR, ie.
-		 * before register assignment is done:
-		 */
-		IR3_INSTR_MARK  = 0x1000,
-		IR3_INSTR_UNUSED= 0x2000,
-	} flags;
-	int repeat;
-#ifdef DEBUG
-	unsigned regs_max;
-#endif
-	unsigned regs_count;
-	struct ir3_register **regs;
-	union {
-		struct {
-			char inv;
-			char comp;
-			int  immed;
-			struct ir3_block *target;
-		} cat0;
-		struct {
-			type_t src_type, dst_type;
-		} cat1;
-		struct {
-			enum {
-				IR3_COND_LT = 0,
-				IR3_COND_LE = 1,
-				IR3_COND_GT = 2,
-				IR3_COND_GE = 3,
-				IR3_COND_EQ = 4,
-				IR3_COND_NE = 5,
-			} condition;
-		} cat2;
-		struct {
-			unsigned samp, tex;
-			type_t type;
-		} cat5;
-		struct {
-			type_t type;
-			int src_offset;
-			int dst_offset;
-			int iim_val : 3;      /* for ldgb/stgb, # of components */
-			int d : 3;
-			bool typed : 1;
-		} cat6;
-		struct {
-			unsigned w : 1;       /* write */
-			unsigned r : 1;       /* read */
-			unsigned l : 1;       /* local */
-			unsigned g : 1;       /* global */
-		} cat7;
-		/* for meta-instructions, just used to hold extra data
-		 * before instruction scheduling, etc
-		 */
-		struct {
-			int off;              /* component/offset */
-		} fo;
-		struct {
-			struct ir3_block *block;
-		} inout;
-	};
-
-	/* transient values used during various algorithms: */
-	union {
-		/* The instruction depth is the max dependency distance to output.
-		 *
-		 * You can also think of it as the "cost", if we did any sort of
-		 * optimization for register footprint.  Ie. a value that is  just
-		 * result of moving a const to a reg would have a low cost,  so to
-		 * it could make sense to duplicate the instruction at various
-		 * points where the result is needed to reduce register footprint.
-		 */
-		unsigned depth;
-		/* When we get to the RA stage, we no longer need depth, but
-		 * we do need instruction's position/name:
-		 */
-		struct {
-			uint16_t ip;
-			uint16_t name;
-		};
-	};
-
-	/* used for per-pass extra instruction data.
-	 */
-	void *data;
-
-	/* Used during CP and RA stages.  For fanin and shader inputs/
-	 * outputs where we need a sequence of consecutive registers,
-	 * keep track of each src instructions left (ie 'n-1') and right
-	 * (ie 'n+1') neighbor.  The front-end must insert enough mov's
-	 * to ensure that each instruction has at most one left and at
-	 * most one right neighbor.  During the copy-propagation pass,
-	 * we only remove mov's when we can preserve this constraint.
-	 * And during the RA stage, we use the neighbor information to
-	 * allocate a block of registers in one shot.
-	 *
-	 * TODO: maybe just add something like:
-	 *   struct ir3_instruction_ref {
-	 *       struct ir3_instruction *instr;
-	 *       unsigned cnt;
-	 *   }
-	 *
-	 * Or can we get away without the refcnt stuff?  It seems like
-	 * it should be overkill..  the problem is if, potentially after
-	 * already eliminating some mov's, if you have a single mov that
-	 * needs to be grouped with it's neighbors in two different
-	 * places (ex. shader output and a fanin).
-	 */
-	struct {
-		struct ir3_instruction *left, *right;
-		uint16_t left_cnt, right_cnt;
-	} cp;
-
-	/* an instruction can reference at most one address register amongst
-	 * it's src/dst registers.  Beyond that, you need to insert mov's.
-	 *
-	 * NOTE: do not write this directly, use ir3_instr_set_address()
-	 */
-	struct ir3_instruction *address;
-
-	/* Tracking for additional dependent instructions.  Used to handle
-	 * barriers, WAR hazards for arrays/SSBOs/etc.
-	 */
-	DECLARE_ARRAY(struct ir3_instruction *, deps);
-
-	/*
-	 * From PoV of instruction scheduling, not execution (ie. ignores global/
-	 * local distinction):
-	 *                            shared  image  atomic  SSBO  everything
-	 *   barrier()/            -   R/W     R/W    R/W     R/W       X
-	 *     groupMemoryBarrier()
-	 *   memoryBarrier()       -           R/W    R/W
-	 *     (but only images declared coherent?)
-	 *   memoryBarrierAtomic() -                  R/W
-	 *   memoryBarrierBuffer() -                          R/W
-	 *   memoryBarrierImage()  -           R/W
-	 *   memoryBarrierShared() -   R/W
-	 *
-	 * TODO I think for SSBO/image/shared, in cases where we can determine
-	 * which variable is accessed, we don't need to care about accesses to
-	 * different variables (unless declared coherent??)
-	 */
-	enum {
-		IR3_BARRIER_EVERYTHING = 1 << 0,
-		IR3_BARRIER_SHARED_R   = 1 << 1,
-		IR3_BARRIER_SHARED_W   = 1 << 2,
-		IR3_BARRIER_IMAGE_R    = 1 << 3,
-		IR3_BARRIER_IMAGE_W    = 1 << 4,
-		IR3_BARRIER_BUFFER_R   = 1 << 5,
-		IR3_BARRIER_BUFFER_W   = 1 << 6,
-		IR3_BARRIER_ARRAY_R    = 1 << 7,
-		IR3_BARRIER_ARRAY_W    = 1 << 8,
-	} barrier_class, barrier_conflict;
-
-	/* Entry in ir3_block's instruction list: */
-	struct list_head node;
-
-	int use_count;      /* currently just updated/used by cp */
-
-#ifdef DEBUG
-	uint32_t serialno;
-#endif
-};
-
-static inline struct ir3_instruction *
-ir3_neighbor_first(struct ir3_instruction *instr)
-{
-	int cnt = 0;
-	while (instr->cp.left) {
-		instr = instr->cp.left;
-		if (++cnt > 0xffff) {
-			debug_assert(0);
-			break;
-		}
-	}
-	return instr;
-}
-
-static inline int ir3_neighbor_count(struct ir3_instruction *instr)
-{
-	int num = 1;
-
-	debug_assert(!instr->cp.left);
-
-	while (instr->cp.right) {
-		num++;
-		instr = instr->cp.right;
-		if (num > 0xffff) {
-			debug_assert(0);
-			break;
-		}
-	}
-
-	return num;
-}
-
-struct ir3 {
-	struct ir3_compiler *compiler;
-
-	unsigned ninputs, noutputs;
-	struct ir3_instruction **inputs;
-	struct ir3_instruction **outputs;
-
-	/* Track bary.f (and ldlv) instructions.. this is needed in
-	 * scheduling to ensure that all varying fetches happen before
-	 * any potential kill instructions.  The hw gets grumpy if all
-	 * threads in a group are killed before the last bary.f gets
-	 * a chance to signal end of input (ei).
-	 */
-	DECLARE_ARRAY(struct ir3_instruction *, baryfs);
-
-	/* Track all indirect instructions (read and write).  To avoid
-	 * deadlock scenario where an address register gets scheduled,
-	 * but other dependent src instructions cannot be scheduled due
-	 * to dependency on a *different* address register value, the
-	 * scheduler needs to ensure that all dependencies other than
-	 * the instruction other than the address register are scheduled
-	 * before the one that writes the address register.  Having a
-	 * convenient list of instructions that reference some address
-	 * register simplifies this.
-	 */
-	DECLARE_ARRAY(struct ir3_instruction *, indirects);
-
-	/* and same for instructions that consume predicate register: */
-	DECLARE_ARRAY(struct ir3_instruction *, predicates);
-
-	/* Track texture sample instructions which need texture state
-	 * patched in (for astc-srgb workaround):
-	 */
-	DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
-
-	/* List of blocks: */
-	struct list_head block_list;
-
-	/* List of ir3_array's: */
-	struct list_head array_list;
-
-#ifdef DEBUG
-	unsigned block_count, instr_count;
-#endif
-};
-
-struct ir3_array {
-	struct list_head node;
-	unsigned length;
-	unsigned id;
-
-	struct nir_register *r;
-
-	/* To avoid array write's from getting DCE'd, keep track of the
-	 * most recent write.  Any array access depends on the most
-	 * recent write.  This way, nothing depends on writes after the
-	 * last read.  But all the writes that happen before that have
-	 * something depending on them
-	 */
-	struct ir3_instruction *last_write;
-
-	/* extra stuff used in RA pass: */
-	unsigned base;      /* base vreg name */
-	unsigned reg;       /* base physical reg */
-	uint16_t start_ip, end_ip;
-};
-
-struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
-
-struct ir3_block {
-	struct list_head node;
-	struct ir3 *shader;
-
-	const struct nir_block *nblock;
-
-	struct list_head instr_list;  /* list of ir3_instruction */
-
-	/* each block has either one or two successors.. in case of
-	 * two successors, 'condition' decides which one to follow.
-	 * A block preceding an if/else has two successors.
-	 */
-	struct ir3_instruction *condition;
-	struct ir3_block *successors[2];
-
-	unsigned predecessors_count;
-	struct ir3_block **predecessors;
-
-	uint16_t start_ip, end_ip;
-
-	/* Track instructions which do not write a register but other-
-	 * wise must not be discarded (such as kill, stg, etc)
-	 */
-	DECLARE_ARRAY(struct ir3_instruction *, keeps);
-
-	/* used for per-pass extra block data.  Mainly used right
-	 * now in RA step to track livein/liveout.
-	 */
-	void *data;
-
-#ifdef DEBUG
-	uint32_t serialno;
-#endif
-};
-
-static inline uint32_t
-block_id(struct ir3_block *block)
-{
-#ifdef DEBUG
-	return block->serialno;
-#else
-	return (uint32_t)(unsigned long)block;
-#endif
-}
-
-struct ir3 * ir3_create(struct ir3_compiler *compiler,
-		unsigned nin, unsigned nout);
-void ir3_destroy(struct ir3 *shader);
-void * ir3_assemble(struct ir3 *shader,
-		struct ir3_info *info, uint32_t gpu_id);
-void * ir3_alloc(struct ir3 *shader, int sz);
-
-struct ir3_block * ir3_block_create(struct ir3 *shader);
-
-struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
-struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
-		opc_t opc, int nreg);
-struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
-void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep);
-const char *ir3_instr_name(struct ir3_instruction *instr);
-
-struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
-		int num, int flags);
-struct ir3_register * ir3_reg_clone(struct ir3 *shader,
-		struct ir3_register *reg);
-
-void ir3_instr_set_address(struct ir3_instruction *instr,
-		struct ir3_instruction *addr);
-
-static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
-{
-	if (instr->flags & IR3_INSTR_MARK)
-		return true;  /* already visited */
-	instr->flags |= IR3_INSTR_MARK;
-	return false;
-}
-
-void ir3_block_clear_mark(struct ir3_block *block);
-void ir3_clear_mark(struct ir3 *shader);
-
-unsigned ir3_count_instructions(struct ir3 *ir);
-
-static inline int ir3_instr_regno(struct ir3_instruction *instr,
-		struct ir3_register *reg)
-{
-	unsigned i;
-	for (i = 0; i < instr->regs_count; i++)
-		if (reg == instr->regs[i])
-			return i;
-	return -1;
-}
-
-
-#define MAX_ARRAYS 16
-
-/* comp:
- *   0 - x
- *   1 - y
- *   2 - z
- *   3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
-	return (num << 2) | (comp & 0x3);
-}
-
-static inline uint32_t reg_num(struct ir3_register *reg)
-{
-	return reg->num >> 2;
-}
-
-static inline uint32_t reg_comp(struct ir3_register *reg)
-{
-	return reg->num & 0x3;
-}
-
-static inline bool is_flow(struct ir3_instruction *instr)
-{
-	return (opc_cat(instr->opc) == 0);
-}
-
-static inline bool is_kill(struct ir3_instruction *instr)
-{
-	return instr->opc == OPC_KILL;
-}
-
-static inline bool is_nop(struct ir3_instruction *instr)
-{
-	return instr->opc == OPC_NOP;
-}
-
-/* Is it a non-transformative (ie. not type changing) mov?  This can
- * also include absneg.s/absneg.f, which for the most part can be
- * treated as a mov (single src argument).
- */
-static inline bool is_same_type_mov(struct ir3_instruction *instr)
-{
-	struct ir3_register *dst;
-
-	switch (instr->opc) {
-	case OPC_MOV:
-		if (instr->cat1.src_type != instr->cat1.dst_type)
-			return false;
-		break;
-	case OPC_ABSNEG_F:
-	case OPC_ABSNEG_S:
-		if (instr->flags & IR3_INSTR_SAT)
-			return false;
-		break;
-	default:
-		return false;
-	}
-
-	dst = instr->regs[0];
-
-	/* mov's that write to a0.x or p0.x are special: */
-	if (dst->num == regid(REG_P0, 0))
-		return false;
-	if (dst->num == regid(REG_A0, 0))
-		return false;
-
-	if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
-		return false;
-
-	return true;
-}
-
-static inline bool is_alu(struct ir3_instruction *instr)
-{
-	return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
-}
-
-static inline bool is_sfu(struct ir3_instruction *instr)
-{
-	return (opc_cat(instr->opc) == 4);
-}
-
-static inline bool is_tex(struct ir3_instruction *instr)
-{
-	return (opc_cat(instr->opc) == 5);
-}
-
-static inline bool is_mem(struct ir3_instruction *instr)
-{
-	return (opc_cat(instr->opc) == 6);
-}
-
-static inline bool is_barrier(struct ir3_instruction *instr)
-{
-	return (opc_cat(instr->opc) == 7);
-}
-
-static inline bool
-is_store(struct ir3_instruction *instr)
-{
-	/* these instructions, the "destination" register is
-	 * actually a source, the address to store to.
-	 */
-	switch (instr->opc) {
-	case OPC_STG:
-	case OPC_STGB:
-	case OPC_STIB:
-	case OPC_STP:
-	case OPC_STL:
-	case OPC_STLW:
-	case OPC_L2G:
-	case OPC_G2L:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_load(struct ir3_instruction *instr)
-{
-	switch (instr->opc) {
-	case OPC_LDG:
-	case OPC_LDGB:
-	case OPC_LDL:
-	case OPC_LDP:
-	case OPC_L2G:
-	case OPC_LDLW:
-	case OPC_LDC:
-	case OPC_LDLV:
-		/* probably some others too.. */
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_input(struct ir3_instruction *instr)
-{
-	/* in some cases, ldlv is used to fetch varying without
-	 * interpolation.. fortunately inloc is the first src
-	 * register in either case
-	 */
-	switch (instr->opc) {
-	case OPC_LDLV:
-	case OPC_BARY_F:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_bool(struct ir3_instruction *instr)
-{
-	switch (instr->opc) {
-	case OPC_CMPS_F:
-	case OPC_CMPS_S:
-	case OPC_CMPS_U:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static inline bool is_meta(struct ir3_instruction *instr)
-{
-	/* TODO how should we count PHI (and maybe fan-in/out) which
-	 * might actually contribute some instructions to the final
-	 * result?
-	 */
-	return (opc_cat(instr->opc) == -1);
-}
-
-static inline bool writes_addr(struct ir3_instruction *instr)
-{
-	if (instr->regs_count > 0) {
-		struct ir3_register *dst = instr->regs[0];
-		return reg_num(dst) == REG_A0;
-	}
-	return false;
-}
-
-static inline bool writes_pred(struct ir3_instruction *instr)
-{
-	if (instr->regs_count > 0) {
-		struct ir3_register *dst = instr->regs[0];
-		return reg_num(dst) == REG_P0;
-	}
-	return false;
-}
-
-/* returns defining instruction for reg */
-/* TODO better name */
-static inline struct ir3_instruction *ssa(struct ir3_register *reg)
-{
-	if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
-		return reg->instr;
-	}
-	return NULL;
-}
-
-static inline bool conflicts(struct ir3_instruction *a,
-		struct ir3_instruction *b)
-{
-	return (a && b) && (a != b);
-}
-
-static inline bool reg_gpr(struct ir3_register *r)
-{
-	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
-		return false;
-	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
-		return false;
-	return true;
-}
-
-static inline type_t half_type(type_t type)
-{
-	switch (type) {
-	case TYPE_F32: return TYPE_F16;
-	case TYPE_U32: return TYPE_U16;
-	case TYPE_S32: return TYPE_S16;
-	case TYPE_F16:
-	case TYPE_U16:
-	case TYPE_S16:
-		return type;
-	default:
-		assert(0);
-		return ~0;
-	}
-}
-
-/* some cat2 instructions (ie. those which are not float) can embed an
- * immediate:
- */
-static inline bool ir3_cat2_int(opc_t opc)
-{
-	switch (opc) {
-	case OPC_ADD_U:
-	case OPC_ADD_S:
-	case OPC_SUB_U:
-	case OPC_SUB_S:
-	case OPC_CMPS_U:
-	case OPC_CMPS_S:
-	case OPC_MIN_U:
-	case OPC_MIN_S:
-	case OPC_MAX_U:
-	case OPC_MAX_S:
-	case OPC_CMPV_U:
-	case OPC_CMPV_S:
-	case OPC_MUL_U:
-	case OPC_MUL_S:
-	case OPC_MULL_U:
-	case OPC_CLZ_S:
-	case OPC_ABSNEG_S:
-	case OPC_AND_B:
-	case OPC_OR_B:
-	case OPC_NOT_B:
-	case OPC_XOR_B:
-	case OPC_BFREV_B:
-	case OPC_CLZ_B:
-	case OPC_SHL_B:
-	case OPC_SHR_B:
-	case OPC_ASHR_B:
-	case OPC_MGEN_B:
-	case OPC_GETBIT_B:
-	case OPC_CBITS_B:
-	case OPC_BARY_F:
-		return true;
-
-	default:
-		return false;
-	}
-}
-
-
-/* map cat2 instruction to valid abs/neg flags: */
-static inline unsigned ir3_cat2_absneg(opc_t opc)
-{
-	switch (opc) {
-	case OPC_ADD_F:
-	case OPC_MIN_F:
-	case OPC_MAX_F:
-	case OPC_MUL_F:
-	case OPC_SIGN_F:
-	case OPC_CMPS_F:
-	case OPC_ABSNEG_F:
-	case OPC_CMPV_F:
-	case OPC_FLOOR_F:
-	case OPC_CEIL_F:
-	case OPC_RNDNE_F:
-	case OPC_RNDAZ_F:
-	case OPC_TRUNC_F:
-	case OPC_BARY_F:
-		return IR3_REG_FABS | IR3_REG_FNEG;
-
-	case OPC_ADD_U:
-	case OPC_ADD_S:
-	case OPC_SUB_U:
-	case OPC_SUB_S:
-	case OPC_CMPS_U:
-	case OPC_CMPS_S:
-	case OPC_MIN_U:
-	case OPC_MIN_S:
-	case OPC_MAX_U:
-	case OPC_MAX_S:
-	case OPC_CMPV_U:
-	case OPC_CMPV_S:
-	case OPC_MUL_U:
-	case OPC_MUL_S:
-	case OPC_MULL_U:
-	case OPC_CLZ_S:
-		return 0;
-
-	case OPC_ABSNEG_S:
-		return IR3_REG_SABS | IR3_REG_SNEG;
-
-	case OPC_AND_B:
-	case OPC_OR_B:
-	case OPC_NOT_B:
-	case OPC_XOR_B:
-	case OPC_BFREV_B:
-	case OPC_CLZ_B:
-	case OPC_SHL_B:
-	case OPC_SHR_B:
-	case OPC_ASHR_B:
-	case OPC_MGEN_B:
-	case OPC_GETBIT_B:
-	case OPC_CBITS_B:
-		return IR3_REG_BNOT;
-
-	default:
-		return 0;
-	}
-}
-
-/* map cat3 instructions to valid abs/neg flags: */
-static inline unsigned ir3_cat3_absneg(opc_t opc)
-{
-	switch (opc) {
-	case OPC_MAD_F16:
-	case OPC_MAD_F32:
-	case OPC_SEL_F16:
-	case OPC_SEL_F32:
-		return IR3_REG_FNEG;
-
-	case OPC_MAD_U16:
-	case OPC_MADSH_U16:
-	case OPC_MAD_S16:
-	case OPC_MADSH_M16:
-	case OPC_MAD_U24:
-	case OPC_MAD_S24:
-	case OPC_SEL_S16:
-	case OPC_SEL_S32:
-	case OPC_SAD_S16:
-	case OPC_SAD_S32:
-		/* neg *may* work on 3rd src.. */
-
-	case OPC_SEL_B16:
-	case OPC_SEL_B32:
-
-	default:
-		return 0;
-	}
-}
-
-#define MASK(n) ((1 << (n)) - 1)
-
-/* iterator for an instructions's sources (reg), also returns src #: */
-#define foreach_src_n(__srcreg, __n, __instr) \
-	if ((__instr)->regs_count) \
-		for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
-			if ((__srcreg = (__instr)->regs[__n + 1]))
-
-/* iterator for an instructions's sources (reg): */
-#define foreach_src(__srcreg, __instr) \
-	foreach_src_n(__srcreg, __i, __instr)
-
-static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
-{
-	unsigned cnt = instr->regs_count + instr->deps_count;
-	if (instr->address)
-		cnt++;
-	return cnt;
-}
-
-static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
-{
-	if (n == (instr->regs_count + instr->deps_count))
-		return instr->address;
-	if (n >= instr->regs_count)
-		return instr->deps[n - instr->regs_count];
-	return ssa(instr->regs[n]);
-}
-
-static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
-{
-	if (n == (instr->regs_count + instr->deps_count))
-		return false;
-	if (n >= instr->regs_count)
-		return true;
-	return false;
-}
-
-#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
-
-/* iterator for an instruction's SSA sources (instr), also returns src #: */
-#define foreach_ssa_src_n(__srcinst, __n, __instr) \
-	for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
-		if ((__srcinst = __ssa_src_n(__instr, __n)))
-
-/* iterator for an instruction's SSA sources (instr): */
-#define foreach_ssa_src(__srcinst, __instr) \
-	foreach_ssa_src_n(__srcinst, __i, __instr)
-
-
-/* dump: */
-void ir3_print(struct ir3 *ir);
-void ir3_print_instr(struct ir3_instruction *instr);
-
-/* depth calculation: */
-int ir3_delayslots(struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer, unsigned n);
-void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
-void ir3_depth(struct ir3 *ir);
-
-/* copy-propagate: */
-struct ir3_shader_variant;
-void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
-
-/* group neighbors and insert mov's to resolve conflicts: */
-void ir3_group(struct ir3 *ir);
-
-/* scheduling: */
-void ir3_sched_add_deps(struct ir3 *ir);
-int ir3_sched(struct ir3 *ir);
-
-/* register assignment: */
-struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
-int ir3_ra(struct ir3 *ir3, enum shader_t type,
-		bool frag_coord, bool frag_face);
-
-/* legalize: */
-void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary);
-
-/* ************************************************************************* */
-/* instruction helpers */
-
-/* creates SSA src of correct type (ie. half vs full precision) */
-static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr,
-		struct ir3_instruction *src, unsigned flags)
-{
-	struct ir3_register *reg;
-	if (src->regs[0]->flags & IR3_REG_HALF)
-		flags |= IR3_REG_HALF;
-	reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags);
-	reg->instr = src;
-	return reg;
-}
-
-static inline struct ir3_instruction *
-ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
-{
-	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
-	ir3_reg_create(instr, 0, 0);   /* dst */
-	if (src->regs[0]->flags & IR3_REG_ARRAY) {
-		struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
-		src_reg->array = src->regs[0]->array;
-	} else {
-		__ssa_src(instr, src, 0);
-	}
-	debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
-	instr->cat1.src_type = type;
-	instr->cat1.dst_type = type;
-	return instr;
-}
-
-static inline struct ir3_instruction *
-ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
-		type_t src_type, type_t dst_type)
-{
-	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
-	unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0;
-	unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0;
-
-	debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags);
-
-	ir3_reg_create(instr, 0, dst_flags);   /* dst */
-	__ssa_src(instr, src, 0);
-	instr->cat1.src_type = src_type;
-	instr->cat1.dst_type = dst_type;
-	debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
-	return instr;
-}
-
-static inline struct ir3_instruction *
-ir3_NOP(struct ir3_block *block)
-{
-	return ir3_instr_create(block, OPC_NOP);
-}
-
-#define INSTR0(name)                                                     \
-static inline struct ir3_instruction *                                   \
-ir3_##name(struct ir3_block *block)                                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create(block, OPC_##name);                             \
-	return instr;                                                        \
-}
-
-#define INSTR1(name)                                                     \
-static inline struct ir3_instruction *                                   \
-ir3_##name(struct ir3_block *block,                                      \
-		struct ir3_instruction *a, unsigned aflags)                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create(block, OPC_##name);                             \
-	ir3_reg_create(instr, 0, 0);   /* dst */                             \
-	__ssa_src(instr, a, aflags);                                         \
-	return instr;                                                        \
-}
-
-#define INSTR2(name)                                                     \
-static inline struct ir3_instruction *                                   \
-ir3_##name(struct ir3_block *block,                                      \
-		struct ir3_instruction *a, unsigned aflags,                      \
-		struct ir3_instruction *b, unsigned bflags)                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create(block, OPC_##name);                             \
-	ir3_reg_create(instr, 0, 0);   /* dst */                             \
-	__ssa_src(instr, a, aflags);                                         \
-	__ssa_src(instr, b, bflags);                                         \
-	return instr;                                                        \
-}
-
-#define INSTR3(name)                                                     \
-static inline struct ir3_instruction *                                   \
-ir3_##name(struct ir3_block *block,                                      \
-		struct ir3_instruction *a, unsigned aflags,                      \
-		struct ir3_instruction *b, unsigned bflags,                      \
-		struct ir3_instruction *c, unsigned cflags)                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create(block, OPC_##name);                             \
-	ir3_reg_create(instr, 0, 0);   /* dst */                             \
-	__ssa_src(instr, a, aflags);                                         \
-	__ssa_src(instr, b, bflags);                                         \
-	__ssa_src(instr, c, cflags);                                         \
-	return instr;                                                        \
-}
-
-#define INSTR4(name)                                                     \
-static inline struct ir3_instruction *                                   \
-ir3_##name(struct ir3_block *block,                                      \
-		struct ir3_instruction *a, unsigned aflags,                      \
-		struct ir3_instruction *b, unsigned bflags,                      \
-		struct ir3_instruction *c, unsigned cflags,                      \
-		struct ir3_instruction *d, unsigned dflags)                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create2(block, OPC_##name, 5);                         \
-	ir3_reg_create(instr, 0, 0);   /* dst */                             \
-	__ssa_src(instr, a, aflags);                                         \
-	__ssa_src(instr, b, bflags);                                         \
-	__ssa_src(instr, c, cflags);                                         \
-	__ssa_src(instr, d, dflags);                                         \
-	return instr;                                                        \
-}
-
-#define INSTR4F(f, name)                                                 \
-static inline struct ir3_instruction *                                   \
-ir3_##name##_##f(struct ir3_block *block,                                \
-		struct ir3_instruction *a, unsigned aflags,                      \
-		struct ir3_instruction *b, unsigned bflags,                      \
-		struct ir3_instruction *c, unsigned cflags,                      \
-		struct ir3_instruction *d, unsigned dflags)                      \
-{                                                                        \
-	struct ir3_instruction *instr =                                      \
-		ir3_instr_create2(block, OPC_##name, 5);                         \
-	ir3_reg_create(instr, 0, 0);   /* dst */                             \
-	__ssa_src(instr, a, aflags);                                         \
-	__ssa_src(instr, b, bflags);                                         \
-	__ssa_src(instr, c, cflags);                                         \
-	__ssa_src(instr, d, dflags);                                         \
-	instr->flags |= IR3_INSTR_##f;                                       \
-	return instr;                                                        \
-}
-
-/* cat0 instructions: */
-INSTR0(BR)
-INSTR0(JUMP)
-INSTR1(KILL)
-INSTR0(END)
-
-/* cat2 instructions, most 2 src but some 1 src: */
-INSTR2(ADD_F)
-INSTR2(MIN_F)
-INSTR2(MAX_F)
-INSTR2(MUL_F)
-INSTR1(SIGN_F)
-INSTR2(CMPS_F)
-INSTR1(ABSNEG_F)
-INSTR2(CMPV_F)
-INSTR1(FLOOR_F)
-INSTR1(CEIL_F)
-INSTR1(RNDNE_F)
-INSTR1(RNDAZ_F)
-INSTR1(TRUNC_F)
-INSTR2(ADD_U)
-INSTR2(ADD_S)
-INSTR2(SUB_U)
-INSTR2(SUB_S)
-INSTR2(CMPS_U)
-INSTR2(CMPS_S)
-INSTR2(MIN_U)
-INSTR2(MIN_S)
-INSTR2(MAX_U)
-INSTR2(MAX_S)
-INSTR1(ABSNEG_S)
-INSTR2(AND_B)
-INSTR2(OR_B)
-INSTR1(NOT_B)
-INSTR2(XOR_B)
-INSTR2(CMPV_U)
-INSTR2(CMPV_S)
-INSTR2(MUL_U)
-INSTR2(MUL_S)
-INSTR2(MULL_U)
-INSTR1(BFREV_B)
-INSTR1(CLZ_S)
-INSTR1(CLZ_B)
-INSTR2(SHL_B)
-INSTR2(SHR_B)
-INSTR2(ASHR_B)
-INSTR2(BARY_F)
-INSTR2(MGEN_B)
-INSTR2(GETBIT_B)
-INSTR1(SETRM)
-INSTR1(CBITS_B)
-INSTR2(SHB)
-INSTR2(MSAD)
-
-/* cat3 instructions: */
-INSTR3(MAD_U16)
-INSTR3(MADSH_U16)
-INSTR3(MAD_S16)
-INSTR3(MADSH_M16)
-INSTR3(MAD_U24)
-INSTR3(MAD_S24)
-INSTR3(MAD_F16)
-INSTR3(MAD_F32)
-INSTR3(SEL_B16)
-INSTR3(SEL_B32)
-INSTR3(SEL_S16)
-INSTR3(SEL_S32)
-INSTR3(SEL_F16)
-INSTR3(SEL_F32)
-INSTR3(SAD_S16)
-INSTR3(SAD_S32)
-
-/* cat4 instructions: */
-INSTR1(RCP)
-INSTR1(RSQ)
-INSTR1(LOG2)
-INSTR1(EXP2)
-INSTR1(SIN)
-INSTR1(COS)
-INSTR1(SQRT)
-
-/* cat5 instructions: */
-INSTR1(DSX)
-INSTR1(DSY)
-
-static inline struct ir3_instruction *
-ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
-		unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
-		struct ir3_instruction *src0, struct ir3_instruction *src1)
-{
-	struct ir3_instruction *sam;
-	struct ir3_register *reg;
-
-	sam = ir3_instr_create(block, opc);
-	sam->flags |= flags;
-	ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
-	if (src0) {
-		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
-		reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
-		reg->instr = src0;
-	}
-	if (src1) {
-		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
-		reg->instr = src1;
-		reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
-	}
-	sam->cat5.samp = samp;
-	sam->cat5.tex  = tex;
-	sam->cat5.type  = type;
-
-	return sam;
-}
-
-/* cat6 instructions: */
-INSTR2(LDLV)
-INSTR2(LDG)
-INSTR2(LDL)
-INSTR3(STG)
-INSTR3(STL)
-INSTR3(LDGB)
-INSTR4(STGB)
-INSTR4(STIB)
-INSTR1(RESINFO)
-INSTR1(RESFMT)
-INSTR2(ATOMIC_ADD)
-INSTR2(ATOMIC_SUB)
-INSTR2(ATOMIC_XCHG)
-INSTR2(ATOMIC_INC)
-INSTR2(ATOMIC_DEC)
-INSTR2(ATOMIC_CMPXCHG)
-INSTR2(ATOMIC_MIN)
-INSTR2(ATOMIC_MAX)
-INSTR2(ATOMIC_AND)
-INSTR2(ATOMIC_OR)
-INSTR2(ATOMIC_XOR)
-INSTR4F(G, ATOMIC_ADD)
-INSTR4F(G, ATOMIC_SUB)
-INSTR4F(G, ATOMIC_XCHG)
-INSTR4F(G, ATOMIC_INC)
-INSTR4F(G, ATOMIC_DEC)
-INSTR4F(G, ATOMIC_CMPXCHG)
-INSTR4F(G, ATOMIC_MIN)
-INSTR4F(G, ATOMIC_MAX)
-INSTR4F(G, ATOMIC_AND)
-INSTR4F(G, ATOMIC_OR)
-INSTR4F(G, ATOMIC_XOR)
-
-/* cat7 instructions: */
-INSTR0(BAR)
-INSTR0(FENCE)
-
-/* ************************************************************************* */
-/* split this out or find some helper to use.. like main/bitset.h.. */
-
-#include <string.h>
-
-#define MAX_REG 256
-
-typedef uint8_t regmask_t[2 * MAX_REG / 8];
-
-static inline unsigned regmask_idx(struct ir3_register *reg)
-{
-	unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
-	debug_assert(num < MAX_REG);
-	if (reg->flags & IR3_REG_HALF)
-		num += MAX_REG;
-	return num;
-}
-
-static inline void regmask_init(regmask_t *regmask)
-{
-	memset(regmask, 0, sizeof(*regmask));
-}
-
-static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
-{
-	unsigned idx = regmask_idx(reg);
-	if (reg->flags & IR3_REG_RELATIV) {
-		unsigned i;
-		for (i = 0; i < reg->size; i++, idx++)
-			(*regmask)[idx / 8] |= 1 << (idx % 8);
-	} else {
-		unsigned mask;
-		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
-			if (mask & 1)
-				(*regmask)[idx / 8] |= 1 << (idx % 8);
-	}
-}
-
-static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
-{
-	unsigned i;
-	for (i = 0; i < ARRAY_SIZE(*dst); i++)
-		(*dst)[i] = (*a)[i] | (*b)[i];
-}
-
-/* set bits in a if not set in b, conceptually:
- *   a |= (reg & ~b)
- */
-static inline void regmask_set_if_not(regmask_t *a,
-		struct ir3_register *reg, regmask_t *b)
-{
-	unsigned idx = regmask_idx(reg);
-	if (reg->flags & IR3_REG_RELATIV) {
-		unsigned i;
-		for (i = 0; i < reg->size; i++, idx++)
-			if (!((*b)[idx / 8] & (1 << (idx % 8))))
-				(*a)[idx / 8] |= 1 << (idx % 8);
-	} else {
-		unsigned mask;
-		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
-			if (mask & 1)
-				if (!((*b)[idx / 8] & (1 << (idx % 8))))
-					(*a)[idx / 8] |= 1 << (idx % 8);
-	}
-}
-
-static inline bool regmask_get(regmask_t *regmask,
-		struct ir3_register *reg)
-{
-	unsigned idx = regmask_idx(reg);
-	if (reg->flags & IR3_REG_RELATIV) {
-		unsigned i;
-		for (i = 0; i < reg->size; i++, idx++)
-			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
-				return true;
-	} else {
-		unsigned mask;
-		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
-			if (mask & 1)
-				if ((*regmask)[idx / 8] & (1 << (idx % 8)))
-					return true;
-	}
-	return false;
-}
-
-/* ************************************************************************* */
-
-#endif /* IR3_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_legalize.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_legalize.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_legalize.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,497 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/u_math.h"
-
-#include "freedreno_util.h"
-
-#include "ir3.h"
-
-/*
- * Legalize:
- *
- * We currently require that scheduling ensures that we have enough nop's
- * in all the right places.  The legalize step mostly handles fixing up
- * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
- * into fewer nop's w/ rpt flag.
- */
-
-struct ir3_legalize_ctx {
-	int num_samp;
-	bool has_ssbo;
-	int max_bary;
-};
-
-struct ir3_legalize_state {
-	regmask_t needs_ss;
-	regmask_t needs_ss_war;       /* write after read */
-	regmask_t needs_sy;
-};
-
-struct ir3_legalize_block_data {
-	bool valid;
-	struct ir3_legalize_state state;
-};
-
-/* We want to evaluate each block from the position of any other
- * predecessor block, in order that the flags set are the union of
- * all possible program paths.
- *
- * To do this, we need to know the output state (needs_ss/ss_war/sy)
- * of all predecessor blocks.  The tricky thing is loops, which mean
- * that we can't simply recursively process each predecessor block
- * before legalizing the current block.
- *
- * How we handle that is by looping over all the blocks until the
- * results converge.  If the output state of a given block changes
- * in a given pass, this means that all successor blocks are not
- * yet fully legalized.
- */
-
-static bool
-legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
-{
-	struct ir3_legalize_block_data *bd = block->data;
-
-	if (bd->valid)
-		return false;
-
-	struct ir3_instruction *last_input = NULL;
-	struct ir3_instruction *last_rel = NULL;
-	struct ir3_instruction *last_n = NULL;
-	struct list_head instr_list;
-	struct ir3_legalize_state prev_state = bd->state;
-	struct ir3_legalize_state *state = &bd->state;
-
-	/* our input state is the OR of all predecessor blocks' state: */
-	for (unsigned i = 0; i < block->predecessors_count; i++) {
-		struct ir3_legalize_block_data *pbd = block->predecessors[i]->data;
-		struct ir3_legalize_state *pstate = &pbd->state;
-
-		/* Our input (ss)/(sy) state is based on OR'ing the output
-		 * state of all our predecessor blocks
-		 */
-		regmask_or(&state->needs_ss,
-				&state->needs_ss, &pstate->needs_ss);
-		regmask_or(&state->needs_ss_war,
-				&state->needs_ss_war, &pstate->needs_ss_war);
-		regmask_or(&state->needs_sy,
-				&state->needs_sy, &pstate->needs_sy);
-	}
-
-	/* remove all the instructions from the list, we'll be adding
-	 * them back in as we go
-	 */
-	list_replace(&block->instr_list, &instr_list);
-	list_inithead(&block->instr_list);
-
-	list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
-		struct ir3_register *reg;
-		unsigned i;
-
-		n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
-
-		if (is_meta(n))
-			continue;
-
-		if (is_input(n)) {
-			struct ir3_register *inloc = n->regs[1];
-			assert(inloc->flags & IR3_REG_IMMED);
-			ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
-		}
-
-		if (last_n && is_barrier(last_n))
-			n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
-
-		/* NOTE: consider dst register too.. it could happen that
-		 * texture sample instruction (for example) writes some
-		 * components which are unused.  A subsequent instruction
-		 * that writes the same register can race w/ the sam instr
-		 * resulting in undefined results:
-		 */
-		for (i = 0; i < n->regs_count; i++) {
-			reg = n->regs[i];
-
-			if (reg_gpr(reg)) {
-
-				/* TODO: we probably only need (ss) for alu
-				 * instr consuming sfu result.. need to make
-				 * some tests for both this and (sy)..
-				 */
-				if (regmask_get(&state->needs_ss, reg)) {
-					n->flags |= IR3_INSTR_SS;
-					regmask_init(&state->needs_ss_war);
-					regmask_init(&state->needs_ss);
-				}
-
-				if (regmask_get(&state->needs_sy, reg)) {
-					n->flags |= IR3_INSTR_SY;
-					regmask_init(&state->needs_sy);
-				}
-			}
-
-			/* TODO: is it valid to have address reg loaded from a
-			 * relative src (ie. mova a0, c<a0.x+4>)?  If so, the
-			 * last_rel check below should be moved ahead of this:
-			 */
-			if (reg->flags & IR3_REG_RELATIV)
-				last_rel = n;
-		}
-
-		if (n->regs_count > 0) {
-			reg = n->regs[0];
-			if (regmask_get(&state->needs_ss_war, reg)) {
-				n->flags |= IR3_INSTR_SS;
-				regmask_init(&state->needs_ss_war);
-				regmask_init(&state->needs_ss);
-			}
-
-			if (last_rel && (reg->num == regid(REG_A0, 0))) {
-				last_rel->flags |= IR3_INSTR_UL;
-				last_rel = NULL;
-			}
-		}
-
-		/* cat5+ does not have an (ss) bit, if needed we need to
-		 * insert a nop to carry the sync flag.  Would be kinda
-		 * clever if we were aware of this during scheduling, but
-		 * this should be a pretty rare case:
-		 */
-		if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) {
-			struct ir3_instruction *nop;
-			nop = ir3_NOP(block);
-			nop->flags |= IR3_INSTR_SS;
-			n->flags &= ~IR3_INSTR_SS;
-		}
-
-		/* need to be able to set (ss) on first instruction: */
-		if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5))
-			ir3_NOP(block);
-
-		if (is_nop(n) && !list_empty(&block->instr_list)) {
-			struct ir3_instruction *last = list_last_entry(&block->instr_list,
-					struct ir3_instruction, node);
-			if (is_nop(last) && (last->repeat < 5)) {
-				last->repeat++;
-				last->flags |= n->flags;
-				continue;
-			}
-		}
-
-		list_addtail(&n->node, &block->instr_list);
-
-		if (is_sfu(n))
-			regmask_set(&state->needs_ss, n->regs[0]);
-
-		if (is_tex(n)) {
-			/* this ends up being the # of samp instructions.. but that
-			 * is ok, everything else only cares whether it is zero or
-			 * not.  We do this here, rather than when we encounter a
-			 * SAMP decl, because (especially in binning pass shader)
-			 * the samp instruction(s) could get eliminated if the
-			 * result is not used.
-			 */
-			ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1);
-			regmask_set(&state->needs_sy, n->regs[0]);
-		} else if (n->opc == OPC_RESINFO) {
-			regmask_set(&state->needs_ss, n->regs[0]);
-			ir3_NOP(block)->flags |= IR3_INSTR_SS;
-		} else if (is_load(n)) {
-			/* seems like ldlv needs (ss) bit instead??  which is odd but
-			 * makes a bunch of flat-varying tests start working on a4xx.
-			 */
-			if ((n->opc == OPC_LDLV) || (n->opc == OPC_LDL))
-				regmask_set(&state->needs_ss, n->regs[0]);
-			else
-				regmask_set(&state->needs_sy, n->regs[0]);
-		} else if (is_atomic(n->opc)) {
-			if (n->flags & IR3_INSTR_G)
-				regmask_set(&state->needs_sy, n->regs[0]);
-			else
-				regmask_set(&state->needs_ss, n->regs[0]);
-		}
-
-		if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
-			ctx->has_ssbo = true;
-
-		/* both tex/sfu appear to not always immediately consume
-		 * their src register(s):
-		 */
-		if (is_tex(n) || is_sfu(n) || is_mem(n)) {
-			foreach_src(reg, n) {
-				if (reg_gpr(reg))
-					regmask_set(&state->needs_ss_war, reg);
-			}
-		}
-
-		if (is_input(n))
-			last_input = n;
-
-		last_n = n;
-	}
-
-	if (last_input) {
-		/* special hack.. if using ldlv to bypass interpolation,
-		 * we need to insert a dummy bary.f on which we can set
-		 * the (ei) flag:
-		 */
-		if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
-			struct ir3_instruction *baryf;
-
-			/* (ss)bary.f (ei)r63.x, 0, r0.x */
-			baryf = ir3_instr_create(block, OPC_BARY_F);
-			baryf->flags |= IR3_INSTR_SS;
-			ir3_reg_create(baryf, regid(63, 0), 0);
-			ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
-			ir3_reg_create(baryf, regid(0, 0), 0);
-
-			/* insert the dummy bary.f after last_input: */
-			list_delinit(&baryf->node);
-			list_add(&baryf->node, &last_input->node);
-
-			last_input = baryf;
-		}
-		last_input->regs[0]->flags |= IR3_REG_EI;
-	}
-
-	if (last_rel)
-		last_rel->flags |= IR3_INSTR_UL;
-
-	bd->valid = true;
-
-	if (memcmp(&prev_state, state, sizeof(*state))) {
-		/* our output state changed, this invalidates all of our
-		 * successors:
-		 */
-		for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
-			if (!block->successors[i])
-				break;
-			struct ir3_legalize_block_data *pbd = block->successors[i]->data;
-			pbd->valid = false;
-		}
-	}
-
-	return true;
-}
-
-/* NOTE: branch instructions are always the last instruction(s)
- * in the block.  We take advantage of this as we resolve the
- * branches, since "if (foo) break;" constructs turn into
- * something like:
- *
- *   block3 {
- *   	...
- *   	0029:021: mov.s32s32 r62.x, r1.y
- *   	0082:022: br !p0.x, target=block5
- *   	0083:023: br p0.x, target=block4
- *   	// succs: if _[0029:021: mov.s32s32] block4; else block5;
- *   }
- *   block4 {
- *   	0084:024: jump, target=block6
- *   	// succs: block6;
- *   }
- *   block5 {
- *   	0085:025: jump, target=block7
- *   	// succs: block7;
- *   }
- *
- * ie. only instruction in block4/block5 is a jump, so when
- * resolving branches we can easily detect this by checking
- * that the first instruction in the target block is itself
- * a jump, and setup the br directly to the jump's target
- * (and strip back out the now unreached jump)
- *
- * TODO sometimes we end up with things like:
- *
- *    br !p0.x, #2
- *    br p0.x, #12
- *    add.u r0.y, r0.y, 1
- *
- * If we swapped the order of the branches, we could drop one.
- */
-static struct ir3_block *
-resolve_dest_block(struct ir3_block *block)
-{
-	/* special case for last block: */
-	if (!block->successors[0])
-		return block;
-
-	/* NOTE that we may or may not have inserted the jump
-	 * in the target block yet, so conditions to resolve
-	 * the dest to the dest block's successor are:
-	 *
-	 *   (1) successor[1] == NULL &&
-	 *   (2) (block-is-empty || only-instr-is-jump)
-	 */
-	if (block->successors[1] == NULL) {
-		if (list_empty(&block->instr_list)) {
-			return block->successors[0];
-		} else if (list_length(&block->instr_list) == 1) {
-			struct ir3_instruction *instr = list_first_entry(
-					&block->instr_list, struct ir3_instruction, node);
-			if (instr->opc == OPC_JUMP)
-				return block->successors[0];
-		}
-	}
-	return block;
-}
-
-static bool
-resolve_jump(struct ir3_instruction *instr)
-{
-	struct ir3_block *tblock =
-		resolve_dest_block(instr->cat0.target);
-	struct ir3_instruction *target;
-
-	if (tblock != instr->cat0.target) {
-		list_delinit(&instr->cat0.target->node);
-		instr->cat0.target = tblock;
-		return true;
-	}
-
-	target = list_first_entry(&tblock->instr_list,
-				struct ir3_instruction, node);
-
-	/* TODO maybe a less fragile way to do this.  But we are expecting
-	 * a pattern from sched_block() that looks like:
-	 *
-	 *   br !p0.x, #else-block
-	 *   br p0.x, #if-block
-	 *
-	 * if the first branch target is +2, or if 2nd branch target is +1
-	 * then we can just drop the jump.
-	 */
-	unsigned next_block;
-	if (instr->cat0.inv == true)
-		next_block = 2;
-	else
-		next_block = 1;
-
-	if ((!target) || (target->ip == (instr->ip + next_block))) {
-		list_delinit(&instr->node);
-		return true;
-	} else {
-		instr->cat0.immed =
-			(int)target->ip - (int)instr->ip;
-	}
-	return false;
-}
-
-/* resolve jumps, removing jumps/branches to immediately following
- * instruction which we end up with from earlier stages.  Since
- * removing an instruction can invalidate earlier instruction's
- * branch offsets, we need to do this iteratively until no more
- * branches are removed.
- */
-static bool
-resolve_jumps(struct ir3 *ir)
-{
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
-			if (is_flow(instr) && instr->cat0.target)
-				if (resolve_jump(instr))
-					return true;
-
-	return false;
-}
-
-/* we want to mark points where divergent flow control re-converges
- * with (jp) flags.  For now, since we don't do any optimization for
- * things that start out as a 'do {} while()', re-convergence points
- * will always be a branch or jump target.  Note that this is overly
- * conservative, since unconditional jump targets are not convergence
- * points, we are just assuming that the other path to reach the jump
- * target was divergent.  If we were clever enough to optimize the
- * jump at end of a loop back to a conditional branch into a single
- * conditional branch, ie. like:
- *
- *    add.f r1.w, r0.x, (neg)(r)c2.x   <= loop start
- *    mul.f r1.z, r1.z, r0.x
- *    mul.f r1.y, r1.y, r0.x
- *    mul.f r0.z, r1.x, r0.x
- *    mul.f r0.w, r0.y, r0.x
- *    cmps.f.ge r0.x, (r)c2.y, (r)r1.w
- *    add.s r0.x, (r)r0.x, (r)-1
- *    sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x
- *    cmps.f.eq p0.x, r0.x, c3.y
- *    mov.f32f32 r0.x, r1.w
- *    mov.f32f32 r0.y, r0.w
- *    mov.f32f32 r1.x, r0.z
- *    (rpt2)nop
- *    br !p0.x, #-13
- *    (jp)mul.f r0.x, c263.y, r1.y
- *
- * Then we'd have to be more clever, as the convergence point is no
- * longer a branch or jump target.
- */
-static void
-mark_convergence_points(struct ir3 *ir)
-{
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			if (is_flow(instr) && instr->cat0.target) {
-				struct ir3_instruction *target =
-					list_first_entry(&instr->cat0.target->instr_list,
-							struct ir3_instruction, node);
-				target->flags |= IR3_INSTR_JP;
-			}
-		}
-	}
-}
-
-void
-ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary)
-{
-	struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
-	bool progress;
-
-	ctx->max_bary = -1;
-
-	/* allocate per-block data: */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		block->data = rzalloc(ctx, struct ir3_legalize_block_data);
-	}
-
-	/* process each block: */
-	do {
-		progress = false;
-		list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-			progress |= legalize_block(ctx, block);
-		}
-	} while (progress);
-
-	*num_samp = ctx->num_samp;
-	*has_ssbo = ctx->has_ssbo;
-	*max_bary = ctx->max_bary;
-
-	do {
-		ir3_count_instructions(ir);
-	} while(resolve_jumps(ir));
-
-	mark_convergence_points(ir);
-
-	ralloc_free(ctx);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,272 +0,0 @@
-/*
- * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-
-#include "freedreno_util.h"
-
-#include "ir3_nir.h"
-#include "ir3_compiler.h"
-#include "ir3_shader.h"
-
-#include "nir/tgsi_to_nir.h"
-
-
-static const nir_shader_compiler_options options = {
-		.lower_fpow = true,
-		.lower_scmp = true,
-		.lower_flrp32 = true,
-		.lower_flrp64 = true,
-		.lower_ffract = true,
-		.lower_fmod32 = true,
-		.lower_fmod64 = true,
-		.lower_fdiv = true,
-		.lower_ldexp = true,
-		.fuse_ffma = true,
-		.native_integers = true,
-		.vertex_id_zero_based = true,
-		.lower_extract_byte = true,
-		.lower_extract_word = true,
-		.lower_all_io_to_temps = true,
-		.lower_helper_invocation = true,
-};
-
-struct nir_shader *
-ir3_tgsi_to_nir(const struct tgsi_token *tokens)
-{
-	return tgsi_to_nir(tokens, &options);
-}
-
-const nir_shader_compiler_options *
-ir3_get_compiler_options(struct ir3_compiler *compiler)
-{
-	return &options;
-}
-
-/* for given shader key, are any steps handled in nir? */
-bool
-ir3_key_lowers_nir(const struct ir3_shader_key *key)
-{
-	return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
-			key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
-			key->ucp_enables | key->color_two_side |
-			key->fclamp_color | key->vclamp_color;
-}
-
-#define OPT(nir, pass, ...) ({                             \
-   bool this_progress = false;                             \
-   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
-   this_progress;                                          \
-})
-
-#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
-
-static void
-ir3_optimize_loop(nir_shader *s)
-{
-	bool progress;
-	do {
-		progress = false;
-
-		OPT_V(s, nir_lower_vars_to_ssa);
-		progress |= OPT(s, nir_opt_copy_prop_vars);
-		progress |= OPT(s, nir_opt_dead_write_vars);
-		progress |= OPT(s, nir_lower_alu_to_scalar);
-		progress |= OPT(s, nir_lower_phis_to_scalar);
-
-		progress |= OPT(s, nir_copy_prop);
-		progress |= OPT(s, nir_opt_dce);
-		progress |= OPT(s, nir_opt_cse);
-		static int gcm = -1;
-		if (gcm == -1)
-			gcm = env2u("GCM");
-		if (gcm == 1)
-			progress |= OPT(s, nir_opt_gcm, true);
-		else if (gcm == 2)
-			progress |= OPT(s, nir_opt_gcm, false);
-		progress |= OPT(s, nir_opt_peephole_select, 16);
-		progress |= OPT(s, nir_opt_intrinsics);
-		progress |= OPT(s, nir_opt_algebraic);
-		progress |= OPT(s, nir_opt_constant_folding);
-		progress |= OPT(s, nir_opt_dead_cf);
-		if (OPT(s, nir_opt_trivial_continues)) {
-			progress |= true;
-			/* If nir_opt_trivial_continues makes progress, then we need to clean
-			 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
-			 * to make progress.
-			 */
-			OPT(s, nir_copy_prop);
-			OPT(s, nir_opt_dce);
-		}
-		progress |= OPT(s, nir_opt_if);
-		progress |= OPT(s, nir_opt_remove_phis);
-		progress |= OPT(s, nir_opt_undef);
-
-	} while (progress);
-}
-
-struct nir_shader *
-ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-		const struct ir3_shader_key *key)
-{
-	struct nir_lower_tex_options tex_options = {
-			.lower_rect = 0,
-	};
-
-	if (key) {
-		switch (shader->type) {
-		case SHADER_FRAGMENT:
-			tex_options.saturate_s = key->fsaturate_s;
-			tex_options.saturate_t = key->fsaturate_t;
-			tex_options.saturate_r = key->fsaturate_r;
-			break;
-		case SHADER_VERTEX:
-			tex_options.saturate_s = key->vsaturate_s;
-			tex_options.saturate_t = key->vsaturate_t;
-			tex_options.saturate_r = key->vsaturate_r;
-			break;
-		default:
-			/* TODO */
-			break;
-		}
-	}
-
-	if (shader->compiler->gpu_id >= 400) {
-		/* a4xx seems to have *no* sam.p */
-		tex_options.lower_txp = ~0;  /* lower all txp */
-	} else {
-		/* a3xx just needs to avoid sam.p for 3d tex */
-		tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
-	}
-
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		debug_printf("----------------------\n");
-		nir_print_shader(s, stdout);
-		debug_printf("----------------------\n");
-	}
-
-	OPT_V(s, nir_opt_global_to_local);
-	OPT_V(s, nir_lower_regs_to_ssa);
-
-	if (key) {
-		if (s->info.stage == MESA_SHADER_VERTEX) {
-			OPT_V(s, nir_lower_clip_vs, key->ucp_enables);
-			if (key->vclamp_color)
-				OPT_V(s, nir_lower_clamp_color_outputs);
-		} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
-			OPT_V(s, nir_lower_clip_fs, key->ucp_enables);
-			if (key->fclamp_color)
-				OPT_V(s, nir_lower_clamp_color_outputs);
-		}
-		if (key->color_two_side) {
-			OPT_V(s, nir_lower_two_sided_color);
-		}
-	} else {
-		/* only want to do this the first time (when key is null)
-		 * and not again on any potential 2nd variant lowering pass:
-		 */
-		OPT_V(s, ir3_nir_apply_trig_workarounds);
-	}
-
-	OPT_V(s, nir_lower_tex, &tex_options);
-	OPT_V(s, nir_lower_load_const_to_scalar);
-	if (shader->compiler->gpu_id < 500)
-		OPT_V(s, ir3_nir_lower_tg4_to_tex);
-
-	ir3_optimize_loop(s);
-
-	/* do idiv lowering after first opt loop to give a chance for
-	 * divide by immed power-of-two to be caught first:
-	 */
-	if (OPT(s, nir_lower_idiv))
-		ir3_optimize_loop(s);
-
-	OPT_V(s, nir_remove_dead_variables, nir_var_local);
-
-	OPT_V(s, nir_move_load_const);
-
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		debug_printf("----------------------\n");
-		nir_print_shader(s, stdout);
-		debug_printf("----------------------\n");
-	}
-
-	nir_sweep(s);
-
-	return s;
-}
-
-void
-ir3_nir_scan_driver_consts(nir_shader *shader,
-		struct ir3_driver_const_layout *layout)
-{
-	nir_foreach_function(function, shader) {
-		if (!function->impl)
-			continue;
-
-		nir_foreach_block(block, function->impl) {
-			nir_foreach_instr(instr, block) {
-				if (instr->type != nir_instr_type_intrinsic)
-					continue;
-
-				nir_intrinsic_instr *intr =
-					nir_instr_as_intrinsic(instr);
-				unsigned idx;
-
-				switch (intr->intrinsic) {
-				case nir_intrinsic_get_buffer_size:
-					idx = nir_src_as_const_value(intr->src[0])->u32[0];
-					if (layout->ssbo_size.mask & (1 << idx))
-						break;
-					layout->ssbo_size.mask |= (1 << idx);
-					layout->ssbo_size.off[idx] =
-						layout->ssbo_size.count;
-					layout->ssbo_size.count += 1; /* one const per */
-					break;
-				case nir_intrinsic_image_deref_atomic_add:
-				case nir_intrinsic_image_deref_atomic_min:
-				case nir_intrinsic_image_deref_atomic_max:
-				case nir_intrinsic_image_deref_atomic_and:
-				case nir_intrinsic_image_deref_atomic_or:
-				case nir_intrinsic_image_deref_atomic_xor:
-				case nir_intrinsic_image_deref_atomic_exchange:
-				case nir_intrinsic_image_deref_atomic_comp_swap:
-				case nir_intrinsic_image_deref_store:
-				case nir_intrinsic_image_deref_size:
-					idx = nir_intrinsic_get_var(intr, 0)->data.driver_location;
-					if (layout->image_dims.mask & (1 << idx))
-						break;
-					layout->image_dims.mask |= (1 << idx);
-					layout->image_dims.off[idx] =
-						layout->image_dims.count;
-					layout->image_dims.count += 3; /* three const per */
-					break;
-				default:
-					break;
-				}
-			}
-		}
-	}
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef IR3_NIR_H_
-#define IR3_NIR_H_
-
-#include "compiler/nir/nir.h"
-#include "compiler/shader_enums.h"
-
-#include "ir3_shader.h"
-
-void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
-
-bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
-bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
-
-struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens);
-const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
-bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
-struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-		const struct ir3_shader_key *key);
-
-#endif /* IR3_NIR_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,138 +0,0 @@
-/*
- * Copyright © 2017 Ilia Mirkin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "ir3_nir.h"
-#include "compiler/nir/nir_builder.h"
-
-/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
- * gather results, rather than before. As a result, it must be emulated with
- * direct texture calls.
- */
-
-static bool
-lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx)
-{
-	bool progress = false;
-
-	static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} };
-
-	nir_foreach_instr_safe(instr, block) {
-		if (instr->type != nir_instr_type_tex)
-			continue;
-
-		nir_tex_instr *tg4 = (nir_tex_instr *)instr;
-
-		if (tg4->op != nir_texop_tg4)
-			continue;
-
-		b->cursor = nir_before_instr(&tg4->instr);
-
-		nir_ssa_def *results[4];
-		int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset);
-		for (int i = 0; i < 4; i++) {
-			int num_srcs = tg4->num_srcs + 1 /* lod */;
-			if (offset_index < 0 && i < 3)
-				num_srcs++;
-
-			nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs);
-			tex->op = nir_texop_txl;
-			tex->sampler_dim = tg4->sampler_dim;
-			tex->coord_components = tg4->coord_components;
-			tex->is_array = tg4->is_array;
-			tex->is_shadow = tg4->is_shadow;
-			tex->is_new_style_shadow = tg4->is_new_style_shadow;
-			tex->texture_index = tg4->texture_index;
-			tex->sampler_index = tg4->sampler_index;
-			tex->dest_type = tg4->dest_type;
-
-			for (int j = 0; j < tg4->num_srcs; j++) {
-				nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex);
-				tex->src[j].src_type = tg4->src[j].src_type;
-			}
-			if (i != 3) {
-				nir_ssa_def *offset =
-					nir_vec2(b, nir_imm_int(b, offsets[i][0]),
-							 nir_imm_int(b, offsets[i][1]));
-				if (offset_index < 0) {
-					tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset);
-					tex->src[tg4->num_srcs].src_type = nir_tex_src_offset;
-				} else {
-					assert(nir_tex_instr_src_size(tex, offset_index) == 2);
-					nir_ssa_def *orig = nir_ssa_for_src(
-							b, tex->src[offset_index].src, 2);
-					tex->src[offset_index].src =
-						nir_src_for_ssa(nir_iadd(b, orig, offset));
-				}
-			}
-			tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0));
-			tex->src[num_srcs - 1].src_type = nir_tex_src_lod;
-
-			nir_ssa_dest_init(&tex->instr, &tex->dest,
-							  nir_tex_instr_dest_size(tex), 32, NULL);
-			nir_builder_instr_insert(b, &tex->instr);
-
-			results[i] = nir_channel(b, &tex->dest.ssa, tg4->component);
-		}
-
-		nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]);
-		nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result));
-
-		nir_instr_remove(&tg4->instr);
-
-		progress = true;
-	}
-
-	return progress;
-}
-
-static bool
-lower_tg4_func(nir_function_impl *impl)
-{
-	void *mem_ctx = ralloc_parent(impl);
-	nir_builder b;
-	nir_builder_init(&b, impl);
-
-	bool progress = false;
-	nir_foreach_block_safe(block, impl) {
-		progress |= lower_tg4(block, &b, mem_ctx);
-	}
-
-	if (progress)
-		nir_metadata_preserve(impl, nir_metadata_block_index |
-									nir_metadata_dominance);
-
-	return progress;
-}
-
-bool
-ir3_nir_lower_tg4_to_tex(nir_shader *shader)
-{
-	bool progress = false;
-
-	nir_foreach_function(function, shader) {
-		if (function->impl)
-			progress |= lower_tg4_func(function->impl);
-	}
-
-	return progress;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py	1970-01-01 00:00:00.000000000 +0000
@@ -1,51 +0,0 @@
-#
-# Copyright (C) 2016 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-from __future__ import print_function
-
-import argparse
-import sys
-
-trig_workarounds = [
-   (('fsin', 'x'), ('fsin', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))),
-   (('fcos', 'x'), ('fcos', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))),
-]
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-p', '--import-path', required=True)
-    args = parser.parse_args()
-    sys.path.insert(0, args.import_path)
-    run()
-
-
-def run():
-    import nir_algebraic  # pylint: disable=import-error
-
-    print('#include "ir3_nir.h"')
-    print(nir_algebraic.AlgebraicPass("ir3_nir_apply_trig_workarounds",
-                                      trig_workarounds).render())
-
-
-if __name__ == '__main__':
-    main()
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_print.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_print.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_print.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_print.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-
-#include "ir3.h"
-
-#define PTRID(x) ((unsigned long)(x))
-
-static void print_instr_name(struct ir3_instruction *instr)
-{
-	if (!instr)
-		return;
-#ifdef DEBUG
-	printf("%04u:", instr->serialno);
-#endif
-	printf("%04u:", instr->name);
-	printf("%04u:", instr->ip);
-	printf("%03u: ", instr->depth);
-
-	if (instr->flags & IR3_INSTR_SY)
-		printf("(sy)");
-	if (instr->flags & IR3_INSTR_SS)
-		printf("(ss)");
-
-	if (is_meta(instr)) {
-		switch (instr->opc) {
-		case OPC_META_INPUT:  printf("_meta:in");   break;
-		case OPC_META_FO:     printf("_meta:fo");   break;
-		case OPC_META_FI:     printf("_meta:fi");   break;
-
-		/* shouldn't hit here.. just for debugging: */
-		default: printf("_meta:%d", instr->opc);    break;
-		}
-	} else if (instr->opc == OPC_MOV) {
-		static const char *type[] = {
-				[TYPE_F16] = "f16",
-				[TYPE_F32] = "f32",
-				[TYPE_U16] = "u16",
-				[TYPE_U32] = "u32",
-				[TYPE_S16] = "s16",
-				[TYPE_S32] = "s32",
-				[TYPE_U8]  = "u8",
-				[TYPE_S8]  = "s8",
-		};
-		if (instr->cat1.src_type == instr->cat1.dst_type)
-			printf("mov");
-		else
-			printf("cov");
-		printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
-	} else {
-		printf("%s", ir3_instr_name(instr));
-		if (instr->flags & IR3_INSTR_3D)
-			printf(".3d");
-		if (instr->flags & IR3_INSTR_A)
-			printf(".a");
-		if (instr->flags & IR3_INSTR_O)
-			printf(".o");
-		if (instr->flags & IR3_INSTR_P)
-			printf(".p");
-		if (instr->flags & IR3_INSTR_S)
-			printf(".s");
-		if (instr->flags & IR3_INSTR_S2EN)
-			printf(".s2en");
-	}
-}
-
-static void print_reg_name(struct ir3_register *reg)
-{
-	if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
-			(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
-		printf("(absneg)");
-	else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
-		printf("(neg)");
-	else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
-		printf("(abs)");
-
-	if (reg->flags & IR3_REG_IMMED) {
-		printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
-	} else if (reg->flags & IR3_REG_ARRAY) {
-		printf("arr[id=%u, offset=%d, size=%u", reg->array.id,
-				reg->array.offset, reg->size);
-		/* for ARRAY we could have null src, for example first write
-		 * instruction..
-		 */
-		if (reg->instr) {
-			printf(", _[");
-			print_instr_name(reg->instr);
-			printf("]");
-		}
-		printf("]");
-	} else if (reg->flags & IR3_REG_SSA) {
-		printf("_[");
-		print_instr_name(reg->instr);
-		printf("]");
-	} else if (reg->flags & IR3_REG_RELATIV) {
-		if (reg->flags & IR3_REG_HALF)
-			printf("h");
-		if (reg->flags & IR3_REG_CONST)
-			printf("c<a0.x + %d>", reg->array.offset);
-		else
-			printf("\x1b[0;31mr<a0.x + %d>\x1b[0m (%u)", reg->array.offset, reg->size);
-	} else {
-		if (reg->flags & IR3_REG_HALF)
-			printf("h");
-		if (reg->flags & IR3_REG_CONST)
-			printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
-		else
-			printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
-	}
-}
-
-static void
-tab(int lvl)
-{
-	for (int i = 0; i < lvl; i++)
-		printf("\t");
-}
-
-static void
-print_instr(struct ir3_instruction *instr, int lvl)
-{
-	unsigned i;
-
-	tab(lvl);
-
-	print_instr_name(instr);
-	for (i = 0; i < instr->regs_count; i++) {
-		struct ir3_register *reg = instr->regs[i];
-		printf(i ? ", " : " ");
-		print_reg_name(reg);
-	}
-
-	if (instr->address) {
-		printf(", address=_");
-		printf("[");
-		print_instr_name(instr->address);
-		printf("]");
-	}
-
-	if (instr->cp.left) {
-		printf(", left=_");
-		printf("[");
-		print_instr_name(instr->cp.left);
-		printf("]");
-	}
-
-	if (instr->cp.right) {
-		printf(", right=_");
-		printf("[");
-		print_instr_name(instr->cp.right);
-		printf("]");
-	}
-
-	if (instr->opc == OPC_META_FO) {
-		printf(", off=%d", instr->fo.off);
-	}
-
-	if (is_flow(instr) && instr->cat0.target) {
-		/* the predicate register src is implied: */
-		if (instr->opc == OPC_BR) {
-			printf(" %sp0.x", instr->cat0.inv ? "!" : "");
-		}
-		printf(", target=block%u", block_id(instr->cat0.target));
-	}
-
-	if (instr->deps_count) {
-		printf(", false-deps:");
-		for (unsigned i = 0; i < instr->deps_count; i++) {
-			if (i > 0)
-				printf(", ");
-			printf("_[");
-			print_instr_name(instr->deps[i]);
-			printf("]");
-		}
-	}
-
-	printf("\n");
-}
-
-void ir3_print_instr(struct ir3_instruction *instr)
-{
-	print_instr(instr, 0);
-}
-
-static void
-print_block(struct ir3_block *block, int lvl)
-{
-	tab(lvl); printf("block%u {\n", block_id(block));
-
-	if (block->predecessors_count > 0) {
-		tab(lvl+1);
-		printf("pred: ");
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			if (i)
-				printf(", ");
-			printf("block%u", block_id(block->predecessors[i]));
-		}
-		printf("\n");
-	}
-
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		print_instr(instr, lvl+1);
-	}
-
-	tab(lvl+1); printf("/* keeps:\n");
-	for (unsigned i = 0; i < block->keeps_count; i++) {
-		print_instr(block->keeps[i], lvl+2);
-	}
-	tab(lvl+1); printf(" */\n");
-
-	if (block->successors[1]) {
-		/* leading into if/else: */
-		tab(lvl+1);
-		printf("/* succs: if _[");
-		print_instr_name(block->condition);
-		printf("] block%u; else block%u; */\n",
-				block_id(block->successors[0]),
-				block_id(block->successors[1]));
-	} else if (block->successors[0]) {
-		tab(lvl+1);
-		printf("/* succs: block%u; */\n",
-				block_id(block->successors[0]));
-	}
-	tab(lvl); printf("}\n");
-}
-
-void
-ir3_print(struct ir3 *ir)
-{
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
-		print_block(block, 0);
-
-	for (unsigned i = 0; i < ir->noutputs; i++) {
-		if (!ir->outputs[i])
-			continue;
-		printf("out%d: ", i);
-		print_instr(ir->outputs[i], 0);
-	}
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_ra.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_ra.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_ra.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_ra.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1126 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "util/u_math.h"
-#include "util/register_allocate.h"
-#include "util/ralloc.h"
-#include "util/bitset.h"
-
-#include "freedreno_util.h"
-
-#include "ir3.h"
-#include "ir3_compiler.h"
-
-/*
- * Register Assignment:
- *
- * Uses the register_allocate util, which implements graph coloring
- * algo with interference classes.  To handle the cases where we need
- * consecutive registers (for example, texture sample instructions),
- * we model these as larger (double/quad/etc) registers which conflict
- * with the corresponding registers in other classes.
- *
- * Additionally we create additional classes for half-regs, which
- * do not conflict with the full-reg classes.  We do need at least
- * sizes 1-4 (to deal w/ texture sample instructions output to half-
- * reg).  At the moment we don't create the higher order half-reg
- * classes as half-reg frequently does not have enough precision
- * for texture coords at higher resolutions.
- *
- * There are some additional cases that we need to handle specially,
- * as the graph coloring algo doesn't understand "partial writes".
- * For example, a sequence like:
- *
- *   add r0.z, ...
- *   sam (f32)(xy)r0.x, ...
- *   ...
- *   sam (f32)(xyzw)r0.w, r0.x, ...  ; 3d texture, so r0.xyz are coord
- *
- * In this scenario, we treat r0.xyz as class size 3, which is written
- * (from a use/def perspective) at the 'add' instruction and ignore the
- * subsequent partial writes to r0.xy.  So the 'add r0.z, ...' is the
- * defining instruction, as it is the first to partially write r0.xyz.
- *
- * Note i965 has a similar scenario, which they solve with a virtual
- * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
- * register assignment.  But for us that is horrible from a scheduling
- * standpoint.  Instead what we do is use idea of 'definer' instruction.
- * Ie. the first instruction (lowest ip) to write to the variable is the
- * one we consider from use/def perspective when building interference
- * graph.  (Other instructions which write other variable components
- * just define the variable some more.)
- *
- * Arrays of arbitrary size are handled via pre-coloring a consecutive
- * sequence of registers.  Additional scalar (single component) reg
- * names are allocated starting at ctx->class_base[total_class_count]
- * (see arr->base), which are pre-colored.  In the use/def graph direct
- * access is treated as a single element use/def, and indirect access
- * is treated as use or def of all array elements.  (Only the first
- * def is tracked, in case of multiple indirect writes, etc.)
- *
- * TODO arrays that fit in one of the pre-defined class sizes should
- * not need to be pre-colored, but instead could be given a normal
- * vreg name.  (Ignoring this for now since it is a good way to work
- * out the kinks with arbitrary sized arrays.)
- *
- * TODO might be easier for debugging to split this into two passes,
- * the first assigning vreg names in a way that we could ir3_print()
- * the result.
- */
-
-static const unsigned class_sizes[] = {
-	1, 2, 3, 4,
-	4 + 4, /* txd + 1d/2d */
-	4 + 6, /* txd + 3d */
-};
-#define class_count ARRAY_SIZE(class_sizes)
-
-static const unsigned half_class_sizes[] = {
-	1, 2, 3, 4,
-};
-#define half_class_count  ARRAY_SIZE(half_class_sizes)
-
-/* seems to just be used for compute shaders?  Seems like vec1 and vec3
- * are sufficient (for now?)
- */
-static const unsigned high_class_sizes[] = {
-	1, 3,
-};
-#define high_class_count ARRAY_SIZE(high_class_sizes)
-
-#define total_class_count (class_count + half_class_count + high_class_count)
-
-/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
-#define NUM_REGS             (4 * 48)  /* r0 to r47 */
-#define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
-#define FIRST_HIGH_REG       (4 * 48)
-/* Number of virtual regs in a given class: */
-#define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
-#define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
-#define HIGH_CLASS_REGS(i)   (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
-
-#define HALF_OFFSET          (class_count)
-#define HIGH_OFFSET          (class_count + half_class_count)
-
-/* register-set, created one time, used for all shaders: */
-struct ir3_ra_reg_set {
-	struct ra_regs *regs;
-	unsigned int classes[class_count];
-	unsigned int half_classes[half_class_count];
-	unsigned int high_classes[high_class_count];
-	/* maps flat virtual register space to base gpr: */
-	uint16_t *ra_reg_to_gpr;
-	/* maps cls,gpr to flat virtual register space: */
-	uint16_t **gpr_to_ra_reg;
-};
-
-static void
-build_q_values(unsigned int **q_values, unsigned off,
-		const unsigned *sizes, unsigned count)
-{
-	for (unsigned i = 0; i < count; i++) {
-		q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
-
-		/* From register_allocate.c:
-		 *
-		 * q(B,C) (indexed by C, B is this register class) in
-		 * Runeson/Nyström paper.  This is "how many registers of B could
-		 * the worst choice register from C conflict with".
-		 *
-		 * If we just let the register allocation algorithm compute these
-		 * values, is extremely expensive.  However, since all of our
-		 * registers are laid out, we can very easily compute them
-		 * ourselves.  View the register from C as fixed starting at GRF n
-		 * somewhere in the middle, and the register from B as sliding back
-		 * and forth.  Then the first register to conflict from B is the
-		 * one starting at n - class_size[B] + 1 and the last register to
-		 * conflict will start at n + class_size[B] - 1.  Therefore, the
-		 * number of conflicts from B is class_size[B] + class_size[C] - 1.
-		 *
-		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
-		 * B | | | | | |n| --> | | | | | | |
-		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
-		 *             +-+-+-+-+-+
-		 * C           |n| | | | |
-		 *             +-+-+-+-+-+
-		 *
-		 * (Idea copied from brw_fs_reg_allocate.cpp)
-		 */
-		for (unsigned j = 0; j < count; j++)
-			q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
-	}
-}
-
-/* One-time setup of RA register-set, which describes all the possible
- * "virtual" registers and their interferences.  Ie. double register
- * occupies (and conflicts with) two single registers, and so forth.
- * Since registers do not need to be aligned to their class size, they
- * can conflict with other registers in the same class too.  Ie:
- *
- *    Single (base) |  Double
- *    --------------+---------------
- *       R0         |  D0
- *       R1         |  D0 D1
- *       R2         |     D1 D2
- *       R3         |        D2
- *           .. and so on..
- *
- * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
- * really just four scalar registers.  Don't let that confuse you.)
- */
-struct ir3_ra_reg_set *
-ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
-{
-	struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
-	unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
-	unsigned int **q_values;
-
-	/* calculate # of regs across all classes: */
-	ra_reg_count = 0;
-	for (unsigned i = 0; i < class_count; i++)
-		ra_reg_count += CLASS_REGS(i);
-	for (unsigned i = 0; i < half_class_count; i++)
-		ra_reg_count += HALF_CLASS_REGS(i);
-	for (unsigned i = 0; i < high_class_count; i++)
-		ra_reg_count += HIGH_CLASS_REGS(i);
-
-	/* allocate and populate q_values: */
-	q_values = ralloc_array(set, unsigned *, total_class_count);
-
-	build_q_values(q_values, 0, class_sizes, class_count);
-	build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
-	build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
-
-	/* allocate the reg-set.. */
-	set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
-	set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
-	set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
-
-	/* .. and classes */
-	reg = 0;
-	for (unsigned i = 0; i < class_count; i++) {
-		set->classes[i] = ra_alloc_reg_class(set->regs);
-
-		set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
-
-		for (unsigned j = 0; j < CLASS_REGS(i); j++) {
-			ra_class_add_reg(set->regs, set->classes[i], reg);
-
-			set->ra_reg_to_gpr[reg] = j;
-			set->gpr_to_ra_reg[i][j] = reg;
-
-			for (unsigned br = j; br < j + class_sizes[i]; br++)
-				ra_add_transitive_reg_conflict(set->regs, br, reg);
-
-			reg++;
-		}
-	}
-
-	first_half_reg = reg;
-	base = HALF_OFFSET;
-
-	for (unsigned i = 0; i < half_class_count; i++) {
-		set->half_classes[i] = ra_alloc_reg_class(set->regs);
-
-		set->gpr_to_ra_reg[base + i] =
-				ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
-
-		for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
-			ra_class_add_reg(set->regs, set->half_classes[i], reg);
-
-			set->ra_reg_to_gpr[reg] = j;
-			set->gpr_to_ra_reg[base + i][j] = reg;
-
-			for (unsigned br = j; br < j + half_class_sizes[i]; br++)
-				ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
-
-			reg++;
-		}
-	}
-
-	first_high_reg = reg;
-	base = HIGH_OFFSET;
-
-	for (unsigned i = 0; i < high_class_count; i++) {
-		set->high_classes[i] = ra_alloc_reg_class(set->regs);
-
-		set->gpr_to_ra_reg[base + i] =
-				ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
-
-		for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
-			ra_class_add_reg(set->regs, set->high_classes[i], reg);
-
-			set->ra_reg_to_gpr[reg] = j;
-			set->gpr_to_ra_reg[base + i][j] = reg;
-
-			for (unsigned br = j; br < j + high_class_sizes[i]; br++)
-				ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
-
-			reg++;
-		}
-	}
-
-	/* starting a6xx, half precision regs conflict w/ full precision regs: */
-	if (compiler->gpu_id >= 600) {
-		/* because of transitivity, we can get away with just setting up
-		 * conflicts between the first class of full and half regs:
-		 */
-		for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) {
-			unsigned freg  = set->gpr_to_ra_reg[0][j];
-			unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0];
-			unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1];
-
-			ra_add_transitive_reg_conflict(set->regs, freg, hreg0);
-			ra_add_transitive_reg_conflict(set->regs, freg, hreg1);
-		}
-
-		// TODO also need to update q_values, but for now:
-		ra_set_finalize(set->regs, NULL);
-	} else {
-		ra_set_finalize(set->regs, q_values);
-	}
-
-	ralloc_free(q_values);
-
-	return set;
-}
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
-	BITSET_WORD *def;        /* variables defined before used in block */
-	BITSET_WORD *use;        /* variables used before defined in block */
-	BITSET_WORD *livein;     /* which defs reach entry point of block */
-	BITSET_WORD *liveout;    /* which defs reach exit point of block */
-};
-
-/* additional instruction-data (per-instruction) */
-struct ir3_ra_instr_data {
-	/* cached instruction 'definer' info: */
-	struct ir3_instruction *defn;
-	int off, sz, cls;
-};
-
-/* register-assign context, per-shader */
-struct ir3_ra_ctx {
-	struct ir3 *ir;
-	enum shader_t type;
-	bool frag_face;
-
-	struct ir3_ra_reg_set *set;
-	struct ra_graph *g;
-	unsigned alloc_count;
-	/* one per class, plus one slot for arrays: */
-	unsigned class_alloc_count[total_class_count + 1];
-	unsigned class_base[total_class_count + 1];
-	unsigned instr_cnt;
-	unsigned *def, *use;     /* def/use table */
-	struct ir3_ra_instr_data *instrd;
-};
-
-/* does it conflict? */
-static inline bool
-intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end)
-{
-	return !((a_start >= b_end) || (b_start >= a_end));
-}
-
-static bool
-is_half(struct ir3_instruction *instr)
-{
-	return !!(instr->regs[0]->flags & IR3_REG_HALF);
-}
-
-static bool
-is_high(struct ir3_instruction *instr)
-{
-	return !!(instr->regs[0]->flags & IR3_REG_HIGH);
-}
-
-static int
-size_to_class(unsigned sz, bool half, bool high)
-{
-	if (high) {
-		for (unsigned i = 0; i < high_class_count; i++)
-			if (high_class_sizes[i] >= sz)
-				return i + HIGH_OFFSET;
-	} else if (half) {
-		for (unsigned i = 0; i < half_class_count; i++)
-			if (half_class_sizes[i] >= sz)
-				return i + HALF_OFFSET;
-	} else {
-		for (unsigned i = 0; i < class_count; i++)
-			if (class_sizes[i] >= sz)
-				return i;
-	}
-	debug_assert(0);
-	return -1;
-}
-
-static bool
-writes_gpr(struct ir3_instruction *instr)
-{
-	if (is_store(instr))
-		return false;
-	/* is dest a normal temp register: */
-	struct ir3_register *reg = instr->regs[0];
-	if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
-		return false;
-	if ((reg->num == regid(REG_A0, 0)) ||
-			(reg->num == regid(REG_P0, 0)))
-		return false;
-	return true;
-}
-
-static bool
-instr_before(struct ir3_instruction *a, struct ir3_instruction *b)
-{
-	if (a->flags & IR3_INSTR_UNUSED)
-		return false;
-	return (a->ip < b->ip);
-}
-
-static struct ir3_instruction *
-get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
-		int *sz, int *off)
-{
-	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-	struct ir3_instruction *d = NULL;
-
-	if (id->defn) {
-		*sz = id->sz;
-		*off = id->off;
-		return id->defn;
-	}
-
-	if (instr->opc == OPC_META_FI) {
-		/* What about the case where collect is subset of array, we
-		 * need to find the distance between where actual array starts
-		 * and fanin..  that probably doesn't happen currently.
-		 */
-		struct ir3_register *src;
-		int dsz, doff;
-
-		/* note: don't use foreach_ssa_src as this gets called once
-		 * while assigning regs (which clears SSA flag)
-		 */
-		foreach_src_n(src, n, instr) {
-			struct ir3_instruction *dd;
-			if (!src->instr)
-				continue;
-
-			dd = get_definer(ctx, src->instr, &dsz, &doff);
-
-			if ((!d) || instr_before(dd, d)) {
-				d = dd;
-				*sz = dsz;
-				*off = doff - n;
-			}
-		}
-
-	} else if (instr->cp.right || instr->cp.left) {
-		/* covers also the meta:fo case, which ends up w/ single
-		 * scalar instructions for each component:
-		 */
-		struct ir3_instruction *f = ir3_neighbor_first(instr);
-
-		/* by definition, the entire sequence forms one linked list
-		 * of single scalar register nodes (even if some of them may
-		 * be fanouts from a texture sample (for example) instr.  We
-		 * just need to walk the list finding the first element of
-		 * the group defined (lowest ip)
-		 */
-		int cnt = 0;
-
-		/* need to skip over unused in the group: */
-		while (f && (f->flags & IR3_INSTR_UNUSED)) {
-			f = f->cp.right;
-			cnt++;
-		}
-
-		while (f) {
-			if ((!d) || instr_before(f, d))
-				d = f;
-			if (f == instr)
-				*off = cnt;
-			f = f->cp.right;
-			cnt++;
-		}
-
-		*sz = cnt;
-
-	} else {
-		/* second case is looking directly at the instruction which
-		 * produces multiple values (eg, texture sample), rather
-		 * than the fanout nodes that point back to that instruction.
-		 * This isn't quite right, because it may be part of a larger
-		 * group, such as:
-		 *
-		 *     sam (f32)(xyzw)r0.x, ...
-		 *     add r1.x, ...
-		 *     add r1.y, ...
-		 *     sam (f32)(xyzw)r2.x, r0.w  <-- (r0.w, r1.x, r1.y)
-		 *
-		 * need to come up with a better way to handle that case.
-		 */
-		if (instr->address) {
-			*sz = instr->regs[0]->size;
-		} else {
-			*sz = util_last_bit(instr->regs[0]->wrmask);
-		}
-		*off = 0;
-		d = instr;
-	}
-
-	if (d->opc == OPC_META_FO) {
-		struct ir3_instruction *dd;
-		int dsz, doff;
-
-		dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
-
-		/* by definition, should come before: */
-		debug_assert(instr_before(dd, d));
-
-		*sz = MAX2(*sz, dsz);
-
-		debug_assert(instr->opc == OPC_META_FO);
-		*off = MAX2(*off, instr->fo.off);
-
-		d = dd;
-	}
-
-	id->defn = d;
-	id->sz = *sz;
-	id->off = *off;
-
-	return d;
-}
-
-static void
-ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-		if (instr->regs_count == 0)
-			continue;
-		/* couple special cases: */
-		if (writes_addr(instr) || writes_pred(instr)) {
-			id->cls = -1;
-		} else if (instr->regs[0]->flags & IR3_REG_ARRAY) {
-			id->cls = total_class_count;
-		} else {
-			id->defn = get_definer(ctx, instr, &id->sz, &id->off);
-			id->cls = size_to_class(id->sz, is_half(id->defn), is_high(id->defn));
-		}
-	}
-}
-
-/* give each instruction a name (and ip), and count up the # of names
- * of each class
- */
-static void
-ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-
-#ifdef DEBUG
-		instr->name = ~0;
-#endif
-
-		ctx->instr_cnt++;
-
-		if (instr->regs_count == 0)
-			continue;
-
-		if (!writes_gpr(instr))
-			continue;
-
-		if (id->defn != instr)
-			continue;
-
-		/* arrays which don't fit in one of the pre-defined class
-		 * sizes are pre-colored:
-		 */
-		if ((id->cls >= 0) && (id->cls < total_class_count)) {
-			instr->name = ctx->class_alloc_count[id->cls]++;
-			ctx->alloc_count++;
-		}
-	}
-}
-
-static void
-ra_init(struct ir3_ra_ctx *ctx)
-{
-	unsigned n, base;
-
-	ir3_clear_mark(ctx->ir);
-	n = ir3_count_instructions(ctx->ir);
-
-	ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
-
-	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
-		ra_block_find_definers(ctx, block);
-	}
-
-	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
-		ra_block_name_instructions(ctx, block);
-	}
-
-	/* figure out the base register name for each class.  The
-	 * actual ra name is class_base[cls] + instr->name;
-	 */
-	ctx->class_base[0] = 0;
-	for (unsigned i = 1; i <= total_class_count; i++) {
-		ctx->class_base[i] = ctx->class_base[i-1] +
-				ctx->class_alloc_count[i-1];
-	}
-
-	/* and vreg names for array elements: */
-	base = ctx->class_base[total_class_count];
-	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
-		arr->base = base;
-		ctx->class_alloc_count[total_class_count] += arr->length;
-		base += arr->length;
-	}
-	ctx->alloc_count += ctx->class_alloc_count[total_class_count];
-
-	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
-	ralloc_steal(ctx->g, ctx->instrd);
-	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
-	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
-}
-
-static unsigned
-__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
-{
-	unsigned name;
-	debug_assert(cls >= 0);
-	debug_assert(cls < total_class_count);  /* we shouldn't get arrays here.. */
-	name = ctx->class_base[cls] + defn->name;
-	debug_assert(name < ctx->alloc_count);
-	return name;
-}
-
-static int
-ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
-{
-	/* TODO handle name mapping for arrays */
-	return __ra_name(ctx, id->cls, id->defn);
-}
-
-static void
-ra_destroy(struct ir3_ra_ctx *ctx)
-{
-	ralloc_free(ctx->g);
-}
-
-static void
-ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
-	struct ir3_ra_block_data *bd;
-	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
-
-#define def(name, instr) \
-		do { \
-			/* defined on first write: */ \
-			if (!ctx->def[name]) \
-				ctx->def[name] = instr->ip; \
-			ctx->use[name] = instr->ip; \
-			BITSET_SET(bd->def, name); \
-		} while(0);
-
-#define use(name, instr) \
-		do { \
-			ctx->use[name] = MAX2(ctx->use[name], instr->ip); \
-			if (!BITSET_TEST(bd->def, name)) \
-				BITSET_SET(bd->use, name); \
-		} while(0);
-
-	bd = rzalloc(ctx->g, struct ir3_ra_block_data);
-
-	bd->def     = rzalloc_array(bd, BITSET_WORD, bitset_words);
-	bd->use     = rzalloc_array(bd, BITSET_WORD, bitset_words);
-	bd->livein  = rzalloc_array(bd, BITSET_WORD, bitset_words);
-	bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
-
-	block->data = bd;
-
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		struct ir3_instruction *src;
-		struct ir3_register *reg;
-
-		if (instr->regs_count == 0)
-			continue;
-
-		/* There are a couple special cases to deal with here:
-		 *
-		 * fanout: used to split values from a higher class to a lower
-		 *     class, for example split the results of a texture fetch
-		 *     into individual scalar values;  We skip over these from
-		 *     a 'def' perspective, and for a 'use' we walk the chain
-		 *     up to the defining instruction.
-		 *
-		 * fanin: used to collect values from lower class and assemble
-		 *     them together into a higher class, for example arguments
-		 *     to texture sample instructions;  We consider these to be
-		 *     defined at the earliest fanin source.
-		 *
-		 * Most of this is handled in the get_definer() helper.
-		 *
-		 * In either case, we trace the instruction back to the original
-		 * definer and consider that as the def/use ip.
-		 */
-
-		if (writes_gpr(instr)) {
-			struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-			struct ir3_register *dst = instr->regs[0];
-
-			if (dst->flags & IR3_REG_ARRAY) {
-				struct ir3_array *arr =
-					ir3_lookup_array(ctx->ir, dst->array.id);
-				unsigned i;
-
-				arr->start_ip = MIN2(arr->start_ip, instr->ip);
-				arr->end_ip = MAX2(arr->end_ip, instr->ip);
-
-				/* set the node class now.. in case we don't encounter
-				 * this array dst again.  From register_alloc algo's
-				 * perspective, these are all single/scalar regs:
-				 */
-				for (i = 0; i < arr->length; i++) {
-					unsigned name = arr->base + i;
-					ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
-				}
-
-				/* indirect write is treated like a write to all array
-				 * elements, since we don't know which one is actually
-				 * written:
-				 */
-				if (dst->flags & IR3_REG_RELATIV) {
-					for (i = 0; i < arr->length; i++) {
-						unsigned name = arr->base + i;
-						def(name, instr);
-					}
-				} else {
-					unsigned name = arr->base + dst->array.offset;
-					def(name, instr);
-				}
-
-			} else if (id->defn == instr) {
-				unsigned name = ra_name(ctx, id);
-
-				/* since we are in SSA at this point: */
-				debug_assert(!BITSET_TEST(bd->use, name));
-
-				def(name, id->defn);
-
-				if (is_high(id->defn)) {
-					ra_set_node_class(ctx->g, name,
-							ctx->set->high_classes[id->cls - HIGH_OFFSET]);
-				} else if (is_half(id->defn)) {
-					ra_set_node_class(ctx->g, name,
-							ctx->set->half_classes[id->cls - HALF_OFFSET]);
-				} else {
-					ra_set_node_class(ctx->g, name,
-							ctx->set->classes[id->cls]);
-				}
-			}
-		}
-
-		foreach_src(reg, instr) {
-			if (reg->flags & IR3_REG_ARRAY) {
-				struct ir3_array *arr =
-					ir3_lookup_array(ctx->ir, reg->array.id);
-				arr->start_ip = MIN2(arr->start_ip, instr->ip);
-				arr->end_ip = MAX2(arr->end_ip, instr->ip);
-
-				/* indirect read is treated like a read fromall array
-				 * elements, since we don't know which one is actually
-				 * read:
-				 */
-				if (reg->flags & IR3_REG_RELATIV) {
-					unsigned i;
-					for (i = 0; i < arr->length; i++) {
-						unsigned name = arr->base + i;
-						use(name, instr);
-					}
-				} else {
-					unsigned name = arr->base + reg->array.offset;
-					use(name, instr);
-					/* NOTE: arrays are not SSA so unconditionally
-					 * set use bit:
-					 */
-					BITSET_SET(bd->use, name);
-					debug_assert(reg->array.offset < arr->length);
-				}
-			} else if ((src = ssa(reg)) && writes_gpr(src)) {
-				unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
-				use(name, instr);
-			}
-		}
-	}
-}
-
-static bool
-ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
-{
-	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
-	bool progress = false;
-
-	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
-		struct ir3_ra_block_data *bd = block->data;
-
-		/* update livein: */
-		for (unsigned i = 0; i < bitset_words; i++) {
-			BITSET_WORD new_livein =
-				(bd->use[i] | (bd->liveout[i] & ~bd->def[i]));
-
-			if (new_livein & ~bd->livein[i]) {
-				bd->livein[i] |= new_livein;
-				progress = true;
-			}
-		}
-
-		/* update liveout: */
-		for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) {
-			struct ir3_block *succ = block->successors[j];
-			struct ir3_ra_block_data *succ_bd;
-
-			if (!succ)
-				continue;
-
-			succ_bd = succ->data;
-
-			for (unsigned i = 0; i < bitset_words; i++) {
-				BITSET_WORD new_liveout =
-					(succ_bd->livein[i] & ~bd->liveout[i]);
-
-				if (new_liveout) {
-					bd->liveout[i] |= new_liveout;
-					progress = true;
-				}
-			}
-		}
-	}
-
-	return progress;
-}
-
-static void
-print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt)
-{
-	bool first = true;
-	debug_printf("  %s:", name);
-	for (unsigned i = 0; i < cnt; i++) {
-		if (BITSET_TEST(bs, i)) {
-			if (!first)
-				debug_printf(",");
-			debug_printf(" %04u", i);
-			first = false;
-		}
-	}
-	debug_printf("\n");
-}
-
-static void
-ra_add_interference(struct ir3_ra_ctx *ctx)
-{
-	struct ir3 *ir = ctx->ir;
-
-	/* initialize array live ranges: */
-	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
-		arr->start_ip = ~0;
-		arr->end_ip = 0;
-	}
-
-	/* compute live ranges (use/def) on a block level, also updating
-	 * block's def/use bitmasks (used below to calculate per-block
-	 * livein/liveout):
-	 */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		ra_block_compute_live_ranges(ctx, block);
-	}
-
-	/* update per-block livein/liveout: */
-	while (ra_compute_livein_liveout(ctx)) {}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		debug_printf("AFTER LIVEIN/OUT:\n");
-		ir3_print(ir);
-		list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-			struct ir3_ra_block_data *bd = block->data;
-			debug_printf("block%u:\n", block_id(block));
-			print_bitset("  def", bd->def, ctx->alloc_count);
-			print_bitset("  use", bd->use, ctx->alloc_count);
-			print_bitset("  l/i", bd->livein, ctx->alloc_count);
-			print_bitset("  l/o", bd->liveout, ctx->alloc_count);
-		}
-		list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) {
-			debug_printf("array%u:\n", arr->id);
-			debug_printf("  length:   %u\n", arr->length);
-			debug_printf("  start_ip: %u\n", arr->start_ip);
-			debug_printf("  end_ip:   %u\n", arr->end_ip);
-		}
-	}
-
-	/* extend start/end ranges based on livein/liveout info from cfg: */
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		struct ir3_ra_block_data *bd = block->data;
-
-		for (unsigned i = 0; i < ctx->alloc_count; i++) {
-			if (BITSET_TEST(bd->livein, i)) {
-				ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
-				ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
-			}
-
-			if (BITSET_TEST(bd->liveout, i)) {
-				ctx->def[i] = MIN2(ctx->def[i], block->end_ip);
-				ctx->use[i] = MAX2(ctx->use[i], block->end_ip);
-			}
-		}
-
-		list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
-			for (unsigned i = 0; i < arr->length; i++) {
-				if (BITSET_TEST(bd->livein, i + arr->base)) {
-					arr->start_ip = MIN2(arr->start_ip, block->start_ip);
-				}
-				if (BITSET_TEST(bd->livein, i + arr->base)) {
-					arr->end_ip = MAX2(arr->end_ip, block->end_ip);
-				}
-			}
-		}
-	}
-
-	/* need to fix things up to keep outputs live: */
-	for (unsigned i = 0; i < ir->noutputs; i++) {
-		struct ir3_instruction *instr = ir->outputs[i];
-		unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]);
-		ctx->use[name] = ctx->instr_cnt;
-	}
-
-	for (unsigned i = 0; i < ctx->alloc_count; i++) {
-		for (unsigned j = 0; j < ctx->alloc_count; j++) {
-			if (intersects(ctx->def[i], ctx->use[i],
-					ctx->def[j], ctx->use[j])) {
-				ra_add_node_interference(ctx->g, i, j);
-			}
-		}
-	}
-}
-
-/* some instructions need fix-up if dst register is half precision: */
-static void fixup_half_instr_dst(struct ir3_instruction *instr)
-{
-	switch (opc_cat(instr->opc)) {
-	case 1: /* move instructions */
-		instr->cat1.dst_type = half_type(instr->cat1.dst_type);
-		break;
-	case 3:
-		switch (instr->opc) {
-		case OPC_MAD_F32:
-			instr->opc = OPC_MAD_F16;
-			break;
-		case OPC_SEL_B32:
-			instr->opc = OPC_SEL_B16;
-			break;
-		case OPC_SEL_S32:
-			instr->opc = OPC_SEL_S16;
-			break;
-		case OPC_SEL_F32:
-			instr->opc = OPC_SEL_F16;
-			break;
-		case OPC_SAD_S32:
-			instr->opc = OPC_SAD_S16;
-			break;
-		/* instructions may already be fixed up: */
-		case OPC_MAD_F16:
-		case OPC_SEL_B16:
-		case OPC_SEL_S16:
-		case OPC_SEL_F16:
-		case OPC_SAD_S16:
-			break;
-		default:
-			assert(0);
-			break;
-		}
-		break;
-	case 5:
-		instr->cat5.type = half_type(instr->cat5.type);
-		break;
-	}
-}
-/* some instructions need fix-up if src register is half precision: */
-static void fixup_half_instr_src(struct ir3_instruction *instr)
-{
-	switch (instr->opc) {
-	case OPC_MOV:
-		instr->cat1.src_type = half_type(instr->cat1.src_type);
-		break;
-	default:
-		break;
-	}
-}
-
-/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
- * array access(es) which do not have any previous access to depend
- * on from scheduling point of view
- */
-static void
-reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
-		struct ir3_instruction *instr)
-{
-	struct ir3_ra_instr_data *id;
-
-	if (reg->flags & IR3_REG_ARRAY) {
-		struct ir3_array *arr =
-			ir3_lookup_array(ctx->ir, reg->array.id);
-		unsigned name = arr->base + reg->array.offset;
-		unsigned r = ra_get_node_reg(ctx->g, name);
-		unsigned num = ctx->set->ra_reg_to_gpr[r];
-
-		if (reg->flags & IR3_REG_RELATIV) {
-			reg->array.offset = num;
-		} else {
-			reg->num = num;
-			reg->flags &= ~IR3_REG_SSA;
-		}
-
-		reg->flags &= ~IR3_REG_ARRAY;
-	} else if ((id = &ctx->instrd[instr->ip]) && id->defn) {
-		unsigned name = ra_name(ctx, id);
-		unsigned r = ra_get_node_reg(ctx->g, name);
-		unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
-
-		debug_assert(!(reg->flags & IR3_REG_RELATIV));
-
-		if (is_high(id->defn))
-			num += FIRST_HIGH_REG;
-
-		reg->num = num;
-		reg->flags &= ~IR3_REG_SSA;
-
-		if (is_half(id->defn))
-			reg->flags |= IR3_REG_HALF;
-	}
-}
-
-static void
-ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		struct ir3_register *reg;
-
-		if (instr->regs_count == 0)
-			continue;
-
-		if (writes_gpr(instr)) {
-			reg_assign(ctx, instr->regs[0], instr);
-			if (instr->regs[0]->flags & IR3_REG_HALF)
-				fixup_half_instr_dst(instr);
-		}
-
-		foreach_src_n(reg, n, instr) {
-			struct ir3_instruction *src = reg->instr;
-			/* Note: reg->instr could be null for IR3_REG_ARRAY */
-			if (!(src || (reg->flags & IR3_REG_ARRAY)))
-				continue;
-			reg_assign(ctx, instr->regs[n+1], src);
-			if (instr->regs[n+1]->flags & IR3_REG_HALF)
-				fixup_half_instr_src(instr);
-		}
-	}
-}
-
-static int
-ra_alloc(struct ir3_ra_ctx *ctx)
-{
-	/* pre-assign array elements:
-	 */
-	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
-		unsigned base = 0;
-
-		if (arr->end_ip == 0)
-			continue;
-
-		/* figure out what else we conflict with which has already
-		 * been assigned:
-		 */
-retry:
-		list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) {
-			if (arr2 == arr)
-				break;
-			if (arr2->end_ip == 0)
-				continue;
-			/* if it intersects with liverange AND register range.. */
-			if (intersects(arr->start_ip, arr->end_ip,
-					arr2->start_ip, arr2->end_ip) &&
-				intersects(base, base + arr->length,
-					arr2->reg, arr2->reg + arr2->length)) {
-				base = MAX2(base, arr2->reg + arr2->length);
-				goto retry;
-			}
-		}
-
-		arr->reg = base;
-
-		for (unsigned i = 0; i < arr->length; i++) {
-			unsigned name, reg;
-
-			name = arr->base + i;
-			reg = ctx->set->gpr_to_ra_reg[0][base++];
-
-			ra_set_node_reg(ctx->g, name, reg);
-		}
-	}
-
-	if (!ra_allocate(ctx->g))
-		return -1;
-
-	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
-		ra_block_alloc(ctx, block);
-	}
-
-	return 0;
-}
-
-int ir3_ra(struct ir3 *ir, enum shader_t type,
-		bool frag_coord, bool frag_face)
-{
-	struct ir3_ra_ctx ctx = {
-			.ir = ir,
-			.type = type,
-			.frag_face = frag_face,
-			.set = ir->compiler->set,
-	};
-	int ret;
-
-	ra_init(&ctx);
-	ra_add_interference(&ctx);
-	ret = ra_alloc(&ctx);
-	ra_destroy(&ctx);
-
-	return ret;
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_sched.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_sched.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_sched.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_sched.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,818 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-
-#include "util/u_math.h"
-
-#include "ir3.h"
-
-/*
- * Instruction Scheduling:
- *
- * A recursive depth based scheduling algo.  Recursively find an eligible
- * instruction to schedule from the deepest instruction (recursing through
- * it's unscheduled src instructions).  Normally this would result in a
- * lot of re-traversal of the same instructions, so we cache results in
- * instr->data (and clear cached results that would be no longer valid
- * after scheduling an instruction).
- *
- * There are a few special cases that need to be handled, since sched
- * is currently independent of register allocation.  Usages of address
- * register (a0.x) or predicate register (p0.x) must be serialized.  Ie.
- * if you have two pairs of instructions that write the same special
- * register and then read it, then those pairs cannot be interleaved.
- * To solve this, when we are in such a scheduling "critical section",
- * and we encounter a conflicting write to a special register, we try
- * to schedule any remaining instructions that use that value first.
- */
-
-struct ir3_sched_ctx {
-	struct ir3_block *block;           /* the current block */
-	struct list_head depth_list;       /* depth sorted unscheduled instrs */
-	struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
-	struct ir3_instruction *addr;      /* current a0.x user, if any */
-	struct ir3_instruction *pred;      /* current p0.x user, if any */
-	bool error;
-};
-
-static bool is_sfu_or_mem(struct ir3_instruction *instr)
-{
-	return is_sfu(instr) || is_mem(instr);
-}
-
-#define NULL_INSTR ((void *)~0)
-
-static void
-clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
-{
-	list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) {
-		if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr)
-			instr2->data = NULL;
-	}
-}
-
-static void
-schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
-{
-	debug_assert(ctx->block == instr->block);
-
-	/* maybe there is a better way to handle this than just stuffing
-	 * a nop.. ideally we'd know about this constraint in the
-	 * scheduling and depth calculation..
-	 */
-	if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
-		ir3_NOP(ctx->block);
-
-	/* remove from depth list:
-	 */
-	list_delinit(&instr->node);
-
-	if (writes_addr(instr)) {
-		debug_assert(ctx->addr == NULL);
-		ctx->addr = instr;
-	}
-
-	if (writes_pred(instr)) {
-		debug_assert(ctx->pred == NULL);
-		ctx->pred = instr;
-	}
-
-	instr->flags |= IR3_INSTR_MARK;
-
-	list_addtail(&instr->node, &instr->block->instr_list);
-	ctx->scheduled = instr;
-
-	if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
-		clear_cache(ctx, NULL);
-	} else {
-		/* invalidate only the necessary entries.. */
-		clear_cache(ctx, instr);
-	}
-}
-
-static struct ir3_instruction *
-deepest(struct ir3_instruction **srcs, unsigned nsrcs)
-{
-	struct ir3_instruction *d = NULL;
-	unsigned i = 0, id = 0;
-
-	while ((i < nsrcs) && !(d = srcs[id = i]))
-		i++;
-
-	if (!d)
-		return NULL;
-
-	for (; i < nsrcs; i++)
-		if (srcs[i] && (srcs[i]->depth > d->depth))
-			d = srcs[id = i];
-
-	srcs[id] = NULL;
-
-	return d;
-}
-
-/**
- * @block: the block to search in, starting from end; in first pass,
- *    this will be the block the instruction would be inserted into
- *    (but has not yet, ie. it only contains already scheduled
- *    instructions).  For intra-block scheduling (second pass), this
- *    would be one of the predecessor blocks.
- * @instr: the instruction to search for
- * @maxd:  max distance, bail after searching this # of instruction
- *    slots, since it means the instruction we are looking for is
- *    far enough away
- * @pred:  if true, recursively search into predecessor blocks to
- *    find the worst case (shortest) distance (only possible after
- *    individual blocks are all scheduled
- */
-static unsigned
-distance(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned maxd, bool pred)
-{
-	unsigned d = 0;
-
-	list_for_each_entry_rev (struct ir3_instruction, n, &block->instr_list, node) {
-		if ((n == instr) || (d >= maxd))
-			return d;
-		/* NOTE: don't count branch/jump since we don't know yet if they will
-		 * be eliminated later in resolve_jumps().. really should do that
-		 * earlier so we don't have this constraint.
-		 */
-		if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR)))
-			d++;
-	}
-
-	/* if coming from a predecessor block, assume it is assigned far
-	 * enough away.. we'll fix up later.
-	 */
-	if (!pred)
-		return maxd;
-
-	if (pred && (block->data != block)) {
-		/* Search into predecessor blocks, finding the one with the
-		 * shortest distance, since that will be the worst case
-		 */
-		unsigned min = maxd - d;
-
-		/* (ab)use block->data to prevent recursion: */
-		block->data = block;
-
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			unsigned n;
-
-			n = distance(block->predecessors[i], instr, min, pred);
-
-			min = MIN2(min, n);
-		}
-
-		block->data = NULL;
-		d += min;
-	}
-
-	return d;
-}
-
-/* calculate delay for specified src: */
-static unsigned
-delay_calc_srcn(struct ir3_block *block,
-		struct ir3_instruction *assigner,
-		struct ir3_instruction *consumer,
-		unsigned srcn, bool soft, bool pred)
-{
-	unsigned delay = 0;
-
-	if (is_meta(assigner)) {
-		struct ir3_instruction *src;
-		foreach_ssa_src(src, assigner) {
-			unsigned d;
-			d = delay_calc_srcn(block, src, consumer, srcn, soft, pred);
-			delay = MAX2(delay, d);
-		}
-	} else {
-		if (soft) {
-			if (is_sfu(assigner)) {
-				delay = 4;
-			} else {
-				delay = ir3_delayslots(assigner, consumer, srcn);
-			}
-		} else {
-			delay = ir3_delayslots(assigner, consumer, srcn);
-		}
-		delay -= distance(block, assigner, delay, pred);
-	}
-
-	return delay;
-}
-
-/* calculate delay for instruction (maximum of delay for all srcs): */
-static unsigned
-delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
-		bool soft, bool pred)
-{
-	unsigned delay = 0;
-	struct ir3_instruction *src;
-
-	foreach_ssa_src_n(src, i, instr) {
-		unsigned d;
-		d = delay_calc_srcn(block, src, instr, i, soft, pred);
-		delay = MAX2(delay, d);
-	}
-
-	return delay;
-}
-
-struct ir3_sched_notes {
-	/* there is at least one kill which could be scheduled, except
-	 * for unscheduled bary.f's:
-	 */
-	bool blocked_kill;
-	/* there is at least one instruction that could be scheduled,
-	 * except for conflicting address/predicate register usage:
-	 */
-	bool addr_conflict, pred_conflict;
-};
-
-static bool is_scheduled(struct ir3_instruction *instr)
-{
-	return !!(instr->flags & IR3_INSTR_MARK);
-}
-
-/* could an instruction be scheduled if specified ssa src was scheduled? */
-static bool
-could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
-{
-	struct ir3_instruction *other_src;
-	foreach_ssa_src(other_src, instr) {
-		/* if dependency not scheduled, we aren't ready yet: */
-		if ((src != other_src) && !is_scheduled(other_src)) {
-			return false;
-		}
-	}
-	return true;
-}
-
-/* Check if instruction is ok to schedule.  Make sure it is not blocked
- * by use of addr/predicate register, etc.
- */
-static bool
-check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
-		struct ir3_instruction *instr)
-{
-	/* For instructions that write address register we need to
-	 * make sure there is at least one instruction that uses the
-	 * addr value which is otherwise ready.
-	 *
-	 * TODO if any instructions use pred register and have other
-	 * src args, we would need to do the same for writes_pred()..
-	 */
-	if (writes_addr(instr)) {
-		struct ir3 *ir = instr->block->shader;
-		bool ready = false;
-		for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
-			struct ir3_instruction *indirect = ir->indirects[i];
-			if (!indirect)
-				continue;
-			if (indirect->address != instr)
-				continue;
-			ready = could_sched(indirect, instr);
-		}
-
-		/* nothing could be scheduled, so keep looking: */
-		if (!ready)
-			return false;
-	}
-
-	/* if this is a write to address/predicate register, and that
-	 * register is currently in use, we need to defer until it is
-	 * free:
-	 */
-	if (writes_addr(instr) && ctx->addr) {
-		debug_assert(ctx->addr != instr);
-		notes->addr_conflict = true;
-		return false;
-	}
-
-	if (writes_pred(instr) && ctx->pred) {
-		debug_assert(ctx->pred != instr);
-		notes->pred_conflict = true;
-		return false;
-	}
-
-	/* if the instruction is a kill, we need to ensure *every*
-	 * bary.f is scheduled.  The hw seems unhappy if the thread
-	 * gets killed before the end-input (ei) flag is hit.
-	 *
-	 * We could do this by adding each bary.f instruction as
-	 * virtual ssa src for the kill instruction.  But we have
-	 * fixed length instr->regs[].
-	 *
-	 * TODO this wouldn't be quite right if we had multiple
-	 * basic blocks, if any block was conditional.  We'd need
-	 * to schedule the bary.f's outside of any block which
-	 * was conditional that contained a kill.. I think..
-	 */
-	if (is_kill(instr)) {
-		struct ir3 *ir = instr->block->shader;
-
-		for (unsigned i = 0; i < ir->baryfs_count; i++) {
-			struct ir3_instruction *baryf = ir->baryfs[i];
-			if (baryf->flags & IR3_INSTR_UNUSED)
-				continue;
-			if (!is_scheduled(baryf)) {
-				notes->blocked_kill = true;
-				return false;
-			}
-		}
-	}
-
-	return true;
-}
-
-/* Find the best instruction to schedule from specified instruction or
- * recursively it's ssa sources.
- */
-static struct ir3_instruction *
-find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
-		struct ir3_instruction *instr)
-{
-	struct ir3_instruction *srcs[__ssa_src_cnt(instr)];
-	struct ir3_instruction *src;
-	unsigned nsrcs = 0;
-
-	if (is_scheduled(instr))
-		return NULL;
-
-	/* use instr->data to cache the results of recursing up the
-	 * instr src's.  Otherwise the recursive algo can scale quite
-	 * badly w/ shader size.  But this takes some care to clear
-	 * the cache appropriately when instructions are scheduled.
-	 */
-	if (instr->data) {
-		if (instr->data == NULL_INSTR)
-			return NULL;
-		return instr->data;
-	}
-
-	/* find unscheduled srcs: */
-	foreach_ssa_src(src, instr) {
-		if (!is_scheduled(src)) {
-			debug_assert(nsrcs < ARRAY_SIZE(srcs));
-			srcs[nsrcs++] = src;
-		}
-	}
-
-	/* if all our src's are already scheduled: */
-	if (nsrcs == 0) {
-		if (check_instr(ctx, notes, instr)) {
-			instr->data = instr;
-			return instr;
-		}
-		return NULL;
-	}
-
-	while ((src = deepest(srcs, nsrcs))) {
-		struct ir3_instruction *candidate;
-
-		candidate = find_instr_recursive(ctx, notes, src);
-		if (!candidate)
-			continue;
-
-		if (check_instr(ctx, notes, candidate)) {
-			instr->data = candidate;
-			return candidate;
-		}
-	}
-
-	instr->data = NULL_INSTR;
-	return NULL;
-}
-
-/* find instruction to schedule: */
-static struct ir3_instruction *
-find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
-		bool soft)
-{
-	struct ir3_instruction *best_instr = NULL;
-	unsigned min_delay = ~0;
-
-	/* TODO we'd really rather use the list/array of block outputs.  But we
-	 * don't have such a thing.  Recursing *every* instruction in the list
-	 * will result in a lot of repeated traversal, since instructions will
-	 * get traversed both when they appear as ssa src to a later instruction
-	 * as well as where they appear in the depth_list.
-	 */
-	list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) {
-		struct ir3_instruction *candidate;
-		unsigned delay;
-
-		candidate = find_instr_recursive(ctx, notes, instr);
-		if (!candidate)
-			continue;
-
-		delay = delay_calc(ctx->block, candidate, soft, false);
-		if (delay < min_delay) {
-			best_instr = candidate;
-			min_delay = delay;
-		}
-
-		if (min_delay == 0)
-			break;
-	}
-
-	return best_instr;
-}
-
-/* "spill" the address register by remapping any unscheduled
- * instructions which depend on the current address register
- * to a clone of the instruction which wrote the address reg.
- */
-static struct ir3_instruction *
-split_addr(struct ir3_sched_ctx *ctx)
-{
-	struct ir3 *ir;
-	struct ir3_instruction *new_addr = NULL;
-	unsigned i;
-
-	debug_assert(ctx->addr);
-
-	ir = ctx->addr->block->shader;
-
-	for (i = 0; i < ir->indirects_count; i++) {
-		struct ir3_instruction *indirect = ir->indirects[i];
-
-		if (!indirect)
-			continue;
-
-		/* skip instructions already scheduled: */
-		if (is_scheduled(indirect))
-			continue;
-
-		/* remap remaining instructions using current addr
-		 * to new addr:
-		 */
-		if (indirect->address == ctx->addr) {
-			if (!new_addr) {
-				new_addr = ir3_instr_clone(ctx->addr);
-				/* original addr is scheduled, but new one isn't: */
-				new_addr->flags &= ~IR3_INSTR_MARK;
-			}
-			ir3_instr_set_address(indirect, new_addr);
-		}
-	}
-
-	/* all remaining indirects remapped to new addr: */
-	ctx->addr = NULL;
-
-	return new_addr;
-}
-
-/* "spill" the predicate register by remapping any unscheduled
- * instructions which depend on the current predicate register
- * to a clone of the instruction which wrote the address reg.
- */
-static struct ir3_instruction *
-split_pred(struct ir3_sched_ctx *ctx)
-{
-	struct ir3 *ir;
-	struct ir3_instruction *new_pred = NULL;
-	unsigned i;
-
-	debug_assert(ctx->pred);
-
-	ir = ctx->pred->block->shader;
-
-	for (i = 0; i < ir->predicates_count; i++) {
-		struct ir3_instruction *predicated = ir->predicates[i];
-
-		/* skip instructions already scheduled: */
-		if (is_scheduled(predicated))
-			continue;
-
-		/* remap remaining instructions using current pred
-		 * to new pred:
-		 *
-		 * TODO is there ever a case when pred isn't first
-		 * (and only) src?
-		 */
-		if (ssa(predicated->regs[1]) == ctx->pred) {
-			if (!new_pred) {
-				new_pred = ir3_instr_clone(ctx->pred);
-				/* original pred is scheduled, but new one isn't: */
-				new_pred->flags &= ~IR3_INSTR_MARK;
-			}
-			predicated->regs[1]->instr = new_pred;
-		}
-	}
-
-	/* all remaining predicated remapped to new pred: */
-	ctx->pred = NULL;
-
-	return new_pred;
-}
-
-static void
-sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
-{
-	struct list_head unscheduled_list;
-
-	ctx->block = block;
-
-	/* addr/pred writes are per-block: */
-	ctx->addr = NULL;
-	ctx->pred = NULL;
-
-	/* move all instructions to the unscheduled list, and
-	 * empty the block's instruction list (to which we will
-	 * be inserting).
-	 */
-	list_replace(&block->instr_list, &unscheduled_list);
-	list_inithead(&block->instr_list);
-	list_inithead(&ctx->depth_list);
-
-	/* first a pre-pass to schedule all meta:input instructions
-	 * (which need to appear first so that RA knows the register is
-	 * occupied), and move remaining to depth sorted list:
-	 */
-	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
-		if (instr->opc == OPC_META_INPUT) {
-			schedule(ctx, instr);
-		} else {
-			ir3_insert_by_depth(instr, &ctx->depth_list);
-		}
-	}
-
-	while (!list_empty(&ctx->depth_list)) {
-		struct ir3_sched_notes notes = {0};
-		struct ir3_instruction *instr;
-
-		instr = find_eligible_instr(ctx, &notes, true);
-		if (!instr)
-			instr = find_eligible_instr(ctx, &notes, false);
-
-		if (instr) {
-			unsigned delay = delay_calc(ctx->block, instr, false, false);
-
-			/* and if we run out of instructions that can be scheduled,
-			 * then it is time for nop's:
-			 */
-			debug_assert(delay <= 6);
-			while (delay > 0) {
-				ir3_NOP(block);
-				delay--;
-			}
-
-			schedule(ctx, instr);
-		} else {
-			struct ir3_instruction *new_instr = NULL;
-
-			/* nothing available to schedule.. if we are blocked on
-			 * address/predicate register conflict, then break the
-			 * deadlock by cloning the instruction that wrote that
-			 * reg:
-			 */
-			if (notes.addr_conflict) {
-				new_instr = split_addr(ctx);
-			} else if (notes.pred_conflict) {
-				new_instr = split_pred(ctx);
-			} else {
-				debug_assert(0);
-				ctx->error = true;
-				return;
-			}
-
-			if (new_instr) {
-				/* clearing current addr/pred can change what is
-				 * available to schedule, so clear cache..
-				 */
-				clear_cache(ctx, NULL);
-
-				ir3_insert_by_depth(new_instr, &ctx->depth_list);
-				/* the original instr that wrote addr/pred may have
-				 * originated from a different block:
-				 */
-				new_instr->block = block;
-			}
-		}
-	}
-
-	/* And lastly, insert branch/jump instructions to take us to
-	 * the next block.  Later we'll strip back out the branches
-	 * that simply jump to next instruction.
-	 */
-	if (block->successors[1]) {
-		/* if/else, conditional branches to "then" or "else": */
-		struct ir3_instruction *br;
-		unsigned delay = 6;
-
-		debug_assert(ctx->pred);
-		debug_assert(block->condition);
-
-		delay -= distance(ctx->block, ctx->pred, delay, false);
-
-		while (delay > 0) {
-			ir3_NOP(block);
-			delay--;
-		}
-
-		/* create "else" branch first (since "then" block should
-		 * frequently/always end up being a fall-thru):
-		 */
-		br = ir3_BR(block);
-		br->cat0.inv = true;
-		br->cat0.target = block->successors[1];
-
-		/* NOTE: we have to hard code delay of 6 above, since
-		 * we want to insert the nop's before constructing the
-		 * branch.  Throw in an assert so we notice if this
-		 * ever breaks on future generation:
-		 */
-		debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
-
-		br = ir3_BR(block);
-		br->cat0.target = block->successors[0];
-
-	} else if (block->successors[0]) {
-		/* otherwise unconditional jump to next block: */
-		struct ir3_instruction *jmp;
-
-		jmp = ir3_JUMP(block);
-		jmp->cat0.target = block->successors[0];
-	}
-
-	/* NOTE: if we kept track of the predecessors, we could do a better
-	 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
-	 * Note that as we eliminate blocks which contain only an unconditional
-	 * jump we probably need to propagate (jp) flag..
-	 */
-}
-
-/* After scheduling individual blocks, we still could have cases where
- * one (or more) paths into a block, a value produced by a previous
- * has too few delay slots to be legal.  We can't deal with this in the
- * first pass, because loops (ie. we can't ensure all predecessor blocks
- * are already scheduled in the first pass).  All we can really do at
- * this point is stuff in extra nop's until things are legal.
- */
-static void
-sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
-{
-	unsigned n = 0;
-
-	ctx->block = block;
-
-	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
-		unsigned delay = 0;
-
-		for (unsigned i = 0; i < block->predecessors_count; i++) {
-			unsigned d = delay_calc(block->predecessors[i], instr, false, true);
-			delay = MAX2(d, delay);
-		}
-
-		while (delay > n) {
-			struct ir3_instruction *nop = ir3_NOP(block);
-
-			/* move to before instr: */
-			list_delinit(&nop->node);
-			list_addtail(&nop->node, &instr->node);
-
-			n++;
-		}
-
-		/* we can bail once we hit worst case delay: */
-		if (++n > 6)
-			break;
-	}
-}
-
-int ir3_sched(struct ir3 *ir)
-{
-	struct ir3_sched_ctx ctx = {0};
-
-	ir3_clear_mark(ir);
-
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		sched_block(&ctx, block);
-	}
-
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		sched_intra_block(&ctx, block);
-	}
-
-	if (ctx.error)
-		return -1;
-	return 0;
-}
-
-/* does instruction 'prior' need to be scheduled before 'instr'? */
-static bool
-depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
-{
-	/* TODO for dependencies that are related to a specific object, ie
-	 * a specific SSBO/image/array, we could relax this constraint to
-	 * make accesses to unrelated objects not depend on each other (at
-	 * least as long as not declared coherent)
-	 */
-	if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) ||
-			((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class))
-		return true;
-	return !!(instr->barrier_class & prior->barrier_conflict);
-}
-
-static void
-add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
-{
-	struct list_head *prev = instr->node.prev;
-	struct list_head *next = instr->node.next;
-
-	/* add dependencies on previous instructions that must be scheduled
-	 * prior to the current instruction
-	 */
-	while (prev != &block->instr_list) {
-		struct ir3_instruction *pi =
-			LIST_ENTRY(struct ir3_instruction, prev, node);
-
-		prev = prev->prev;
-
-		if (is_meta(pi))
-			continue;
-
-		if (instr->barrier_class == pi->barrier_class) {
-			ir3_instr_add_dep(instr, pi);
-			break;
-		}
-
-		if (depends_on(instr, pi))
-			ir3_instr_add_dep(instr, pi);
-	}
-
-	/* add dependencies on this instruction to following instructions
-	 * that must be scheduled after the current instruction:
-	 */
-	while (next != &block->instr_list) {
-		struct ir3_instruction *ni =
-			LIST_ENTRY(struct ir3_instruction, next, node);
-
-		next = next->next;
-
-		if (is_meta(ni))
-			continue;
-
-		if (instr->barrier_class == ni->barrier_class) {
-			ir3_instr_add_dep(ni, instr);
-			break;
-		}
-
-		if (depends_on(ni, instr))
-			ir3_instr_add_dep(ni, instr);
-	}
-}
-
-/* before scheduling a block, we need to add any necessary false-dependencies
- * to ensure that:
- *
- *  (1) barriers are scheduled in the right order wrt instructions related
- *      to the barrier
- *
- *  (2) reads that come before a write actually get scheduled before the
- *      write
- */
-static void
-calculate_deps(struct ir3_block *block)
-{
-	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		if (instr->barrier_class) {
-			add_barrier_deps(block, instr);
-		}
-	}
-}
-
-void
-ir3_sched_add_deps(struct ir3 *ir)
-{
-	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-		calculate_deps(block);
-	}
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.c
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1025 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
-
-#include "freedreno_context.h"
-#include "freedreno_util.h"
-
-#include "ir3_shader.h"
-#include "ir3_compiler.h"
-#include "ir3_nir.h"
-
-int
-ir3_glsl_type_size(const struct glsl_type *type)
-{
-	return glsl_count_attribute_slots(type, false);
-}
-
-static void
-delete_variant(struct ir3_shader_variant *v)
-{
-	if (v->ir)
-		ir3_destroy(v->ir);
-	if (v->bo)
-		fd_bo_del(v->bo);
-	if (v->immediates)
-		free(v->immediates);
-	free(v);
-}
-
-/* for vertex shader, the inputs are loaded into registers before the shader
- * is executed, so max_regs from the shader instructions might not properly
- * reflect the # of registers actually used, especially in case passthrough
- * varyings.
- *
- * Likewise, for fragment shader, we can have some regs which are passed
- * input values but never touched by the resulting shader (ie. as result
- * of dead code elimination or simply because we don't know how to turn
- * the reg off.
- */
-static void
-fixup_regfootprint(struct ir3_shader_variant *v)
-{
-	unsigned i;
-
-	for (i = 0; i < v->inputs_count; i++) {
-		/* skip frag inputs fetch via bary.f since their reg's are
-		 * not written by gpu before shader starts (and in fact the
-		 * regid's might not even be valid)
-		 */
-		if (v->inputs[i].bary)
-			continue;
-
-		/* ignore high regs that are global to all threads in a warp
-		 * (they exist by default) (a5xx+)
-		 */
-		if (v->inputs[i].regid >= regid(48,0))
-			continue;
-
-		if (v->inputs[i].compmask) {
-			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
-			int32_t regid = (v->inputs[i].regid + n) >> 2;
-			v->info.max_reg = MAX2(v->info.max_reg, regid);
-		}
-	}
-
-	for (i = 0; i < v->outputs_count; i++) {
-		int32_t regid = (v->outputs[i].regid + 3) >> 2;
-		v->info.max_reg = MAX2(v->info.max_reg, regid);
-	}
-}
-
-/* wrapper for ir3_assemble() which does some info fixup based on
- * shader state.  Non-static since used by ir3_cmdline too.
- */
-void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
-{
-	void *bin;
-
-	bin = ir3_assemble(v->ir, &v->info, gpu_id);
-	if (!bin)
-		return NULL;
-
-	if (gpu_id >= 400) {
-		v->instrlen = v->info.sizedwords / (2 * 16);
-	} else {
-		v->instrlen = v->info.sizedwords / (2 * 4);
-	}
-
-	/* NOTE: if relative addressing is used, we set constlen in
-	 * the compiler (to worst-case value) since we don't know in
-	 * the assembler what the max addr reg value can be:
-	 */
-	v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
-
-	fixup_regfootprint(v);
-
-	return bin;
-}
-
-static void
-assemble_variant(struct ir3_shader_variant *v)
-{
-	struct ir3_compiler *compiler = v->shader->compiler;
-	uint32_t gpu_id = compiler->gpu_id;
-	uint32_t sz, *bin;
-
-	bin = ir3_shader_assemble(v, gpu_id);
-	sz = v->info.sizedwords * 4;
-
-	v->bo = fd_bo_new(compiler->dev, sz,
-			DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
-			DRM_FREEDRENO_GEM_TYPE_KMEM);
-
-	memcpy(fd_bo_map(v->bo), bin, sz);
-
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		struct ir3_shader_key key = v->key;
-		printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
-			v->binning_pass, key.color_two_side, key.half_precision);
-		ir3_shader_disasm(v, bin, stdout);
-	}
-
-	if (shader_debug_enabled(v->shader->type)) {
-		fprintf(stderr, "Native code for unnamed %s shader %s:\n",
-			shader_stage_name(v->shader->type), v->shader->nir->info.name);
-		if (v->shader->type == SHADER_FRAGMENT)
-			fprintf(stderr, "SIMD0\n");
-		ir3_shader_disasm(v, bin, stderr);
-	}
-
-	free(bin);
-
-	/* no need to keep the ir around beyond this point: */
-	ir3_destroy(v->ir);
-	v->ir = NULL;
-}
-
-static void
-dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug)
-{
-	if (!unlikely(fd_mesa_debug & FD_DBG_SHADERDB))
-		return;
-
-	pipe_debug_message(debug, SHADER_INFO, "\n"
-			"SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n"
-			"SHADER-DB: %s prog %d/%d: %u half, %u full\n"
-			"SHADER-DB: %s prog %d/%d: %u const, %u constlen\n"
-			"SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n",
-			ir3_shader_stage(v->shader),
-			v->shader->id, v->id,
-			v->info.instrs_count,
-			v->info.sizedwords,
-			ir3_shader_stage(v->shader),
-			v->shader->id, v->id,
-			v->info.max_half_reg + 1,
-			v->info.max_reg + 1,
-			ir3_shader_stage(v->shader),
-			v->shader->id, v->id,
-			v->info.max_const + 1,
-			v->constlen,
-			ir3_shader_stage(v->shader),
-			v->shader->id, v->id,
-			v->info.ss, v->info.sy);
-}
-
-static struct ir3_shader_variant *
-create_variant(struct ir3_shader *shader, struct ir3_shader_key key,
-		bool binning_pass)
-{
-	struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
-	int ret;
-
-	if (!v)
-		return NULL;
-
-	v->id = ++shader->variant_count;
-	v->shader = shader;
-	v->binning_pass = binning_pass;
-	v->key = key;
-	v->type = shader->type;
-
-	ret = ir3_compile_shader_nir(shader->compiler, v);
-	if (ret) {
-		debug_error("compile failed!");
-		goto fail;
-	}
-
-	assemble_variant(v);
-	if (!v->bo) {
-		debug_error("assemble failed!");
-		goto fail;
-	}
-
-	return v;
-
-fail:
-	delete_variant(v);
-	return NULL;
-}
-
-static inline struct ir3_shader_variant *
-shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
-		struct pipe_debug_callback *debug)
-{
-	struct ir3_shader_variant *v;
-
-	/* some shader key values only apply to vertex or frag shader,
-	 * so normalize the key to avoid constructing multiple identical
-	 * variants:
-	 */
-	switch (shader->type) {
-	case SHADER_FRAGMENT:
-		if (key.has_per_samp) {
-			key.vsaturate_s = 0;
-			key.vsaturate_t = 0;
-			key.vsaturate_r = 0;
-			key.vastc_srgb = 0;
-			key.vsamples = 0;
-		}
-		break;
-	case SHADER_VERTEX:
-		key.color_two_side = false;
-		key.half_precision = false;
-		key.rasterflat = false;
-		if (key.has_per_samp) {
-			key.fsaturate_s = 0;
-			key.fsaturate_t = 0;
-			key.fsaturate_r = 0;
-			key.fastc_srgb = 0;
-			key.fsamples = 0;
-		}
-		break;
-	default:
-		/* TODO */
-		break;
-	}
-
-	for (v = shader->variants; v; v = v->next)
-		if (ir3_shader_key_equal(&key, &v->key))
-			return v;
-
-	/* compile new variant if it doesn't exist already: */
-	v = create_variant(shader, key, false);
-	if (v) {
-		v->next = shader->variants;
-		shader->variants = v;
-		dump_shader_info(v, debug);
-	}
-
-	return v;
-}
-
-
-struct ir3_shader_variant *
-ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
-		bool binning_pass, struct pipe_debug_callback *debug)
-{
-	struct ir3_shader_variant *v =
-			shader_variant(shader, key, debug);
-
-	if (binning_pass) {
-		if (!v->binning)
-			v->binning = create_variant(shader, key, true);
-		return v->binning;
-	}
-
-	return v;
-}
-
-void
-ir3_shader_destroy(struct ir3_shader *shader)
-{
-	struct ir3_shader_variant *v, *t;
-	for (v = shader->variants; v; ) {
-		t = v;
-		v = v->next;
-		delete_variant(t);
-	}
-	ralloc_free(shader->nir);
-	free(shader);
-}
-
-struct ir3_shader *
-ir3_shader_create(struct ir3_compiler *compiler,
-		const struct pipe_shader_state *cso, enum shader_t type,
-		struct pipe_debug_callback *debug)
-{
-	struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
-	shader->compiler = compiler;
-	shader->id = ++shader->compiler->shader_count;
-	shader->type = type;
-
-	nir_shader *nir;
-	if (cso->type == PIPE_SHADER_IR_NIR) {
-		/* we take ownership of the reference: */
-		nir = cso->ir.nir;
-	} else {
-		debug_assert(cso->type == PIPE_SHADER_IR_TGSI);
-		if (fd_mesa_debug & FD_DBG_DISASM) {
-			DBG("dump tgsi: type=%d", shader->type);
-			tgsi_dump(cso->tokens, 0);
-		}
-		nir = ir3_tgsi_to_nir(cso->tokens);
-	}
-	NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
-			   (nir_lower_io_options)0);
-	/* do first pass optimization, ignoring the key: */
-	shader->nir = ir3_optimize_nir(shader, nir, NULL);
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		DBG("dump nir%d: type=%d", shader->id, shader->type);
-		nir_print_shader(shader->nir, stdout);
-	}
-
-	shader->stream_output = cso->stream_output;
-	if (fd_mesa_debug & FD_DBG_SHADERDB) {
-		/* if shader-db run, create a standard variant immediately
-		 * (as otherwise nothing will trigger the shader to be
-		 * actually compiled)
-		 */
-		static struct ir3_shader_key key;
-		memset(&key, 0, sizeof(key));
-		ir3_shader_variant(shader, key, false, debug);
-	}
-	return shader;
-}
-
-/* a bit annoying that compute-shader and normal shader state objects
- * aren't a bit more aligned.
- */
-struct ir3_shader *
-ir3_shader_create_compute(struct ir3_compiler *compiler,
-		const struct pipe_compute_state *cso,
-		struct pipe_debug_callback *debug)
-{
-	struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
-
-	shader->compiler = compiler;
-	shader->id = ++shader->compiler->shader_count;
-	shader->type = SHADER_COMPUTE;
-
-	nir_shader *nir;
-	if (cso->ir_type == PIPE_SHADER_IR_NIR) {
-		/* we take ownership of the reference: */
-		nir = (nir_shader *)cso->prog;
-
-		NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
-			   (nir_lower_io_options)0);
-	} else {
-		debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI);
-		if (fd_mesa_debug & FD_DBG_DISASM) {
-			DBG("dump tgsi: type=%d", shader->type);
-			tgsi_dump(cso->prog, 0);
-		}
-		nir = ir3_tgsi_to_nir(cso->prog);
-	}
-
-	/* do first pass optimization, ignoring the key: */
-	shader->nir = ir3_optimize_nir(shader, nir, NULL);
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		printf("dump nir%d: type=%d\n", shader->id, shader->type);
-		nir_print_shader(shader->nir, stdout);
-	}
-
-	return shader;
-}
-
-static void dump_reg(FILE *out, const char *name, uint32_t r)
-{
-	if (r != regid(63,0))
-		fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
-}
-
-static void dump_output(FILE *out, struct ir3_shader_variant *so,
-		unsigned slot, const char *name)
-{
-	uint32_t regid;
-	regid = ir3_find_output_regid(so, slot);
-	dump_reg(out, name, regid);
-}
-
-void
-ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
-{
-	struct ir3 *ir = so->ir;
-	struct ir3_register *reg;
-	const char *type = ir3_shader_stage(so->shader);
-	uint8_t regid;
-	unsigned i;
-
-	for (i = 0; i < ir->ninputs; i++) {
-		if (!ir->inputs[i]) {
-			fprintf(out, "; in%d unused\n", i);
-			continue;
-		}
-		reg = ir->inputs[i]->regs[0];
-		regid = reg->num;
-		fprintf(out, "@in(%sr%d.%c)\tin%d\n",
-				(reg->flags & IR3_REG_HALF) ? "h" : "",
-				(regid >> 2), "xyzw"[regid & 0x3], i);
-	}
-
-	for (i = 0; i < ir->noutputs; i++) {
-		if (!ir->outputs[i]) {
-			fprintf(out, "; out%d unused\n", i);
-			continue;
-		}
-		/* kill shows up as a virtual output.. skip it! */
-		if (is_kill(ir->outputs[i]))
-			continue;
-		reg = ir->outputs[i]->regs[0];
-		regid = reg->num;
-		fprintf(out, "@out(%sr%d.%c)\tout%d\n",
-				(reg->flags & IR3_REG_HALF) ? "h" : "",
-				(regid >> 2), "xyzw"[regid & 0x3], i);
-	}
-
-	for (i = 0; i < so->immediates_count; i++) {
-		fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
-		fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
-				so->immediates[i].val[0],
-				so->immediates[i].val[1],
-				so->immediates[i].val[2],
-				so->immediates[i].val[3]);
-	}
-
-	disasm_a3xx(bin, so->info.sizedwords, 0, out);
-
-	switch (so->type) {
-	case SHADER_VERTEX:
-		fprintf(out, "; %s: outputs:", type);
-		for (i = 0; i < so->outputs_count; i++) {
-			uint8_t regid = so->outputs[i].regid;
-			fprintf(out, " r%d.%c (%s)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					gl_varying_slot_name(so->outputs[i].slot));
-		}
-		fprintf(out, "\n");
-		fprintf(out, "; %s: inputs:", type);
-		for (i = 0; i < so->inputs_count; i++) {
-			uint8_t regid = so->inputs[i].regid;
-			fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					so->inputs[i].compmask,
-					so->inputs[i].inloc,
-					so->inputs[i].bary);
-		}
-		fprintf(out, "\n");
-		break;
-	case SHADER_FRAGMENT:
-		fprintf(out, "; %s: outputs:", type);
-		for (i = 0; i < so->outputs_count; i++) {
-			uint8_t regid = so->outputs[i].regid;
-			fprintf(out, " r%d.%c (%s)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					gl_frag_result_name(so->outputs[i].slot));
-		}
-		fprintf(out, "\n");
-		fprintf(out, "; %s: inputs:", type);
-		for (i = 0; i < so->inputs_count; i++) {
-			uint8_t regid = so->inputs[i].regid;
-			fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					gl_varying_slot_name(so->inputs[i].slot),
-					so->inputs[i].compmask,
-					so->inputs[i].inloc,
-					so->inputs[i].bary);
-		}
-		fprintf(out, "\n");
-		break;
-	default:
-		/* TODO */
-		break;
-	}
-
-	/* print generic shader info: */
-	fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n",
-			type, so->shader->id, so->id,
-			so->info.instrs_count,
-			so->info.max_half_reg + 1,
-			so->info.max_reg + 1);
-
-	fprintf(out, "; %d const, %u constlen\n",
-			so->info.max_const + 1,
-			so->constlen);
-
-	fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy);
-
-	/* print shader type specific info: */
-	switch (so->type) {
-	case SHADER_VERTEX:
-		dump_output(out, so, VARYING_SLOT_POS, "pos");
-		dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
-		break;
-	case SHADER_FRAGMENT:
-		dump_reg(out, "pos (bary)",
-			ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD));
-		dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
-		if (so->color0_mrt) {
-			dump_output(out, so, FRAG_RESULT_COLOR, "color");
-		} else {
-			dump_output(out, so, FRAG_RESULT_DATA0, "data0");
-			dump_output(out, so, FRAG_RESULT_DATA1, "data1");
-			dump_output(out, so, FRAG_RESULT_DATA2, "data2");
-			dump_output(out, so, FRAG_RESULT_DATA3, "data3");
-			dump_output(out, so, FRAG_RESULT_DATA4, "data4");
-			dump_output(out, so, FRAG_RESULT_DATA5, "data5");
-			dump_output(out, so, FRAG_RESULT_DATA6, "data6");
-			dump_output(out, so, FRAG_RESULT_DATA7, "data7");
-		}
-		/* these two are hard-coded since we don't know how to
-		 * program them to anything but all 0's...
-		 */
-		if (so->frag_coord)
-			fprintf(out, "; fragcoord: r0.x\n");
-		if (so->frag_face)
-			fprintf(out, "; fragface: hr0.x\n");
-		break;
-	default:
-		/* TODO */
-		break;
-	}
-
-	fprintf(out, "\n");
-}
-
-uint64_t
-ir3_shader_outputs(const struct ir3_shader *so)
-{
-	return so->nir->info.outputs_written;
-}
-
-/* This has to reach into the fd_context a bit more than the rest of
- * ir3, but it needs to be aligned with the compiler, so both agree
- * on which const regs hold what.  And the logic is identical between
- * a3xx/a4xx, the only difference is small details in the actual
- * CP_LOAD_STATE packets (which is handled inside the generation
- * specific ctx->emit_const(_bo)() fxns)
- */
-
-#include "freedreno_resource.h"
-
-static inline bool
-is_stateobj(struct fd_ringbuffer *ring)
-{
-	/* XXX this is an ugly way to differentiate.. */
-	return !!(ring->flags & FD_RINGBUFFER_STREAMING);
-}
-
-static inline void
-ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)
-{
-	/* when we emit const state via ring (IB2) we need a WFI, but when
-	 * it is emit'd via stateobj, we don't
-	 */
-	if (is_stateobj(ring))
-		return;
-
-	fd_wfi(batch, ring);
-}
-
-static void
-emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
-{
-	const unsigned index = 0;     /* user consts are index 0 */
-
-	if (constbuf->enabled_mask & (1 << index)) {
-		struct pipe_constant_buffer *cb = &constbuf->cb[index];
-		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-		/* in particular, with binning shader we may end up with
-		 * unused consts, ie. we could end up w/ constlen that is
-		 * smaller than first_driver_param.  In that case truncate
-		 * the user consts early to avoid HLSQ lockup caused by
-		 * writing too many consts
-		 */
-		uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
-
-		// I expect that size should be a multiple of vec4's:
-		assert(size == align(size, 4));
-
-		/* and even if the start of the const buffer is before
-		 * first_immediate, the end may not be:
-		 */
-		size = MIN2(size, 4 * max_const);
-
-		if (size > 0) {
-			ring_wfi(ctx->batch, ring);
-			ctx->emit_const(ring, v->type, 0,
-					cb->buffer_offset, size,
-					cb->user_buffer, cb->buffer);
-		}
-	}
-}
-
-static void
-emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
-{
-	uint32_t offset = v->constbase.ubo;
-	if (v->constlen > offset) {
-		uint32_t params = v->num_ubos;
-		uint32_t offsets[params];
-		struct pipe_resource *prscs[params];
-
-		for (uint32_t i = 0; i < params; i++) {
-			const uint32_t index = i + 1;   /* UBOs start at index 1 */
-			struct pipe_constant_buffer *cb = &constbuf->cb[index];
-			assert(!cb->user_buffer);
-
-			if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
-				offsets[i] = cb->buffer_offset;
-				prscs[i] = cb->buffer;
-			} else {
-				offsets[i] = 0;
-				prscs[i] = NULL;
-			}
-		}
-
-		ring_wfi(ctx->batch, ring);
-		ctx->emit_const_bo(ring, v->type, false, offset * 4, params, prscs, offsets);
-	}
-}
-
-static void
-emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
-{
-	uint32_t offset = v->constbase.ssbo_sizes;
-	if (v->constlen > offset) {
-		uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
-		unsigned mask = v->const_layout.ssbo_size.mask;
-
-		while (mask) {
-			unsigned index = u_bit_scan(&mask);
-			unsigned off = v->const_layout.ssbo_size.off[index];
-			sizes[off] = sb->sb[index].buffer_size;
-		}
-
-		ring_wfi(ctx->batch, ring);
-		ctx->emit_const(ring, v->type, offset * 4,
-			0, ARRAY_SIZE(sizes), sizes, NULL);
-	}
-}
-
-static void
-emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
-{
-	uint32_t offset = v->constbase.image_dims;
-	if (v->constlen > offset) {
-		uint32_t dims[align(v->const_layout.image_dims.count, 4)];
-		unsigned mask = v->const_layout.image_dims.mask;
-
-		while (mask) {
-			struct pipe_image_view *img;
-			struct fd_resource *rsc;
-			unsigned index = u_bit_scan(&mask);
-			unsigned off = v->const_layout.image_dims.off[index];
-
-			img = &si->si[index];
-			rsc = fd_resource(img->resource);
-
-			dims[off + 0] = util_format_get_blocksize(img->format);
-			if (img->resource->target != PIPE_BUFFER) {
-				unsigned lvl = img->u.tex.level;
-				/* note for 2d/cube/etc images, even if re-interpreted
-				 * as a different color format, the pixel size should
-				 * be the same, so use original dimensions for y and z
-				 * stride:
-				 */
-				dims[off + 1] = rsc->slices[lvl].pitch * rsc->cpp;
-				/* see corresponding logic in fd_resource_offset(): */
-				if (rsc->layer_first) {
-					dims[off + 2] = rsc->layer_size;
-				} else {
-					dims[off + 2] = rsc->slices[lvl].size0;
-				}
-			} else {
-				/* For buffer-backed images, the log2 of the format's
-				 * bytes-per-pixel is placed on the 2nd slot. This is useful
-				 * when emitting image_size instructions, for which we need
-				 * to divide by bpp for image buffers. Since the bpp
-				 * can only be power-of-two, the division is implemented
-				 * as a SHR, and for that it is handy to have the log2 of
-				 * bpp as a constant. (log2 = first-set-bit - 1)
-				 */
-				dims[off + 1] = ffs(dims[off + 0]) - 1;
-			}
-		}
-
-		ring_wfi(ctx->batch, ring);
-		ctx->emit_const(ring, v->type, offset * 4,
-			0, ARRAY_SIZE(dims), dims, NULL);
-	}
-}
-
-static void
-emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring)
-{
-	int size = v->immediates_count;
-	uint32_t base = v->constbase.immediate;
-
-	/* truncate size to avoid writing constants that shader
-	 * does not use:
-	 */
-	size = MIN2(size + base, v->constlen) - base;
-
-	/* convert out of vec4: */
-	base *= 4;
-	size *= 4;
-
-	if (size > 0) {
-		ring_wfi(ctx->batch, ring);
-		ctx->emit_const(ring, v->type, base,
-			0, size, v->immediates[0].val, NULL);
-	}
-}
-
-/* emit stream-out buffers: */
-static void
-emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
-		struct fd_ringbuffer *ring)
-{
-	/* streamout addresses after driver-params: */
-	uint32_t offset = v->constbase.tfbo;
-	if (v->constlen > offset) {
-		struct fd_streamout_stateobj *so = &ctx->streamout;
-		struct pipe_stream_output_info *info = &v->shader->stream_output;
-		uint32_t params = 4;
-		uint32_t offsets[params];
-		struct pipe_resource *prscs[params];
-
-		for (uint32_t i = 0; i < params; i++) {
-			struct pipe_stream_output_target *target = so->targets[i];
-
-			if (target) {
-				offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
-						target->buffer_offset;
-				prscs[i] = target->buffer;
-			} else {
-				offsets[i] = 0;
-				prscs[i] = NULL;
-			}
-		}
-
-		ring_wfi(ctx->batch, ring);
-		ctx->emit_const_bo(ring, v->type, true, offset * 4, params, prscs, offsets);
-	}
-}
-
-static uint32_t
-max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v)
-{
-	struct fd_streamout_stateobj *so = &ctx->streamout;
-	struct pipe_stream_output_info *info = &v->shader->stream_output;
-	uint32_t maxvtxcnt = 0x7fffffff;
-
-	if (ctx->screen->gpu_id >= 500)
-		return 0;
-	if (v->binning_pass)
-		return 0;
-	if (v->shader->stream_output.num_outputs == 0)
-		return 0;
-	if (so->num_targets == 0)
-		return 0;
-
-	/* offset to write to is:
-	 *
-	 *   total_vtxcnt = vtxcnt + offsets[i]
-	 *   offset = total_vtxcnt * stride[i]
-	 *
-	 *   offset =   vtxcnt * stride[i]       ; calculated in shader
-	 *            + offsets[i] * stride[i]   ; calculated at emit_tfbos()
-	 *
-	 * assuming for each vtx, each target buffer will have data written
-	 * up to 'offset + stride[i]', that leaves maxvtxcnt as:
-	 *
-	 *   buffer_size = (maxvtxcnt * stride[i]) + stride[i]
-	 *   maxvtxcnt   = (buffer_size - stride[i]) / stride[i]
-	 *
-	 * but shader is actually doing a less-than (rather than less-than-
-	 * equal) check, so we can drop the -stride[i].
-	 *
-	 * TODO is assumption about `offset + stride[i]` legit?
-	 */
-	for (unsigned i = 0; i < so->num_targets; i++) {
-		struct pipe_stream_output_target *target = so->targets[i];
-		unsigned stride = info->stride[i] * 4;   /* convert dwords->bytes */
-		if (target) {
-			uint32_t max = target->buffer_size / stride;
-			maxvtxcnt = MIN2(maxvtxcnt, max);
-		}
-	}
-
-	return maxvtxcnt;
-}
-
-static void
-emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx, enum pipe_shader_type t)
-{
-	enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
-
-	/* When we use CP_SET_DRAW_STATE objects to emit constant state,
-	 * if we emit any of it we need to emit all.  This is because
-	 * we are using the same state-group-id each time for uniform
-	 * state, and if previous update is never evaluated (due to no
-	 * visible primitives in the current tile) then the new stateobj
-	 * completely replaces the old one.
-	 *
-	 * Possibly if we split up different parts of the const state to
-	 * different state-objects we could avoid this.
-	 */
-	if (dirty && is_stateobj(ring))
-		dirty = ~0;
-
-	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
-		struct fd_constbuf_stateobj *constbuf;
-		bool shader_dirty;
-
-		constbuf = &ctx->constbuf[t];
-		shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
-
-		emit_user_consts(ctx, v, ring, constbuf);
-		emit_ubos(ctx, v, ring, constbuf);
-		if (shader_dirty)
-			emit_immediates(ctx, v, ring);
-	}
-
-	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
-		struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
-		emit_ssbo_sizes(ctx, v, ring, sb);
-	}
-
-	if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
-		struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
-		emit_image_dims(ctx, v, ring, si);
-	}
-}
-
-void
-ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx, const struct pipe_draw_info *info)
-{
-	debug_assert(v->type == SHADER_VERTEX);
-
-	emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
-
-	/* emit driver params every time: */
-	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
-	if (info) {
-		uint32_t offset = v->constbase.driver_param;
-		if (v->constlen > offset) {
-			uint32_t vertex_params[IR3_DP_VS_COUNT] = {
-				[IR3_DP_VTXID_BASE] = info->index_size ?
-						info->index_bias : info->start,
-				[IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v),
-			};
-			/* if no user-clip-planes, we don't need to emit the
-			 * entire thing:
-			 */
-			uint32_t vertex_params_size = 4;
-
-			if (v->key.ucp_enables) {
-				struct pipe_clip_state *ucp = &ctx->ucp;
-				unsigned pos = IR3_DP_UCP0_X;
-				for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
-					for (unsigned j = 0; j < 4; j++) {
-						vertex_params[pos] = fui(ucp->ucp[i][j]);
-						pos++;
-					}
-				}
-				vertex_params_size = ARRAY_SIZE(vertex_params);
-			}
-
-			ring_wfi(ctx->batch, ring);
-
-			bool needs_vtxid_base =
-				ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0);
-
-			/* for indirect draw, we need to copy VTXID_BASE from
-			 * indirect-draw parameters buffer.. which is annoying
-			 * and means we can't easily emit these consts in cmd
-			 * stream so need to copy them to bo.
-			 */
-			if (info->indirect && needs_vtxid_base) {
-				struct pipe_draw_indirect_info *indirect = info->indirect;
-				struct pipe_resource *vertex_params_rsc =
-					pipe_buffer_create(&ctx->screen->base,
-						PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM,
-						vertex_params_size * 4);
-				unsigned src_off = info->indirect->offset;;
-				void *ptr;
-
-				ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
-				memcpy(ptr, vertex_params, vertex_params_size * 4);
-
-				if (info->index_size) {
-					/* indexed draw, index_bias is 4th field: */
-					src_off += 3 * 4;
-				} else {
-					/* non-indexed draw, start is 3rd field: */
-					src_off += 2 * 4;
-				}
-
-				/* copy index_bias or start from draw params: */
-				ctx->mem_to_mem(ring, vertex_params_rsc, 0,
-						indirect->buffer, src_off, 1);
-
-				ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-						vertex_params_size, NULL, vertex_params_rsc);
-
-				pipe_resource_reference(&vertex_params_rsc, NULL);
-			} else {
-				ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
-						vertex_params_size, vertex_params, NULL);
-			}
-
-			/* if needed, emit stream-out buffer addresses: */
-			if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
-				emit_tfbos(ctx, v, ring);
-			}
-		}
-	}
-}
-
-void
-ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx)
-{
-	debug_assert(v->type == SHADER_FRAGMENT);
-
-	emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
-}
-
-/* emit compute-shader consts: */
-void
-ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx, const struct pipe_grid_info *info)
-{
-	debug_assert(v->type == SHADER_COMPUTE);
-
-	emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
-
-	/* emit compute-shader driver-params: */
-	uint32_t offset = v->constbase.driver_param;
-	if (v->constlen > offset) {
-		ring_wfi(ctx->batch, ring);
-
-		if (info->indirect) {
-			struct pipe_resource *indirect = NULL;
-			unsigned indirect_offset;
-
-			/* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
-			 * to be aligned more strongly than 4 bytes.  So in this case
-			 * we need a temporary buffer to copy NumWorkGroups.xyz to.
-			 *
-			 * TODO if previous compute job is writing to info->indirect,
-			 * we might need a WFI.. but since we currently flush for each
-			 * compute job, we are probably ok for now.
-			 */
-			if (info->indirect_offset & 0xf) {
-				indirect = pipe_buffer_create(&ctx->screen->base,
-					PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM,
-					0x1000);
-				indirect_offset = 0;
-
-				ctx->mem_to_mem(ring, indirect, 0, info->indirect,
-						info->indirect_offset, 3);
-			} else {
-				pipe_resource_reference(&indirect, info->indirect);
-				indirect_offset = info->indirect_offset;
-			}
-
-			ctx->emit_const(ring, SHADER_COMPUTE, offset * 4,
-					indirect_offset, 4, NULL, indirect);
-
-			pipe_resource_reference(&indirect, NULL);
-		} else {
-			uint32_t compute_params[IR3_DP_CS_COUNT] = {
-				[IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
-				[IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
-				[IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
-				[IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
-				[IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
-				[IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
-			};
-
-			ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
-					ARRAY_SIZE(compute_params), compute_params, NULL);
-		}
-	}
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.h
--- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,538 +0,0 @@
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#ifndef IR3_SHADER_H_
-#define IR3_SHADER_H_
-
-#include "pipe/p_state.h"
-#include "compiler/shader_enums.h"
-#include "util/bitscan.h"
-
-#include "ir3.h"
-#include "disasm.h"
-
-struct glsl_type;
-
-/* driver param indices: */
-enum ir3_driver_param {
-	/* compute shader driver params: */
-	IR3_DP_NUM_WORK_GROUPS_X = 0,
-	IR3_DP_NUM_WORK_GROUPS_Y = 1,
-	IR3_DP_NUM_WORK_GROUPS_Z = 2,
-	IR3_DP_LOCAL_GROUP_SIZE_X = 4,
-	IR3_DP_LOCAL_GROUP_SIZE_Y = 5,
-	IR3_DP_LOCAL_GROUP_SIZE_Z = 6,
-	/* NOTE: gl_NumWorkGroups should be vec4 aligned because
-	 * glDispatchComputeIndirect() needs to load these from
-	 * the info->indirect buffer.  Keep that in mind when/if
-	 * adding any addition CS driver params.
-	 */
-	IR3_DP_CS_COUNT   = 8,   /* must be aligned to vec4 */
-
-	/* vertex shader driver params: */
-	IR3_DP_VTXID_BASE = 0,
-	IR3_DP_VTXCNT_MAX = 1,
-	/* user-clip-plane components, up to 8x vec4's: */
-	IR3_DP_UCP0_X     = 4,
-	/* .... */
-	IR3_DP_UCP7_W     = 35,
-	IR3_DP_VS_COUNT   = 36   /* must be aligned to vec4 */
-};
-
-/**
- * For consts needed to pass internal values to shader which may or may not
- * be required, rather than allocating worst-case const space, we scan the
- * shader and allocate consts as-needed:
- *
- *   + SSBO sizes: only needed if shader has a get_buffer_size intrinsic
- *     for a given SSBO
- *
- *   + Image dimensions: needed to calculate pixel offset, but only for
- *     images that have a image_store intrinsic
- */
-struct ir3_driver_const_layout {
-	struct {
-		uint32_t mask;  /* bitmask of SSBOs that have get_buffer_size */
-		uint32_t count; /* number of consts allocated */
-		/* one const allocated per SSBO which has get_buffer_size,
-		 * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
-		 * consts:
-		 */
-		uint32_t off[PIPE_MAX_SHADER_BUFFERS];
-	} ssbo_size;
-
-	struct {
-		uint32_t mask;  /* bitmask of images that have image_store */
-		uint32_t count; /* number of consts allocated */
-		/* three const allocated per image which has image_store:
-		 *  + cpp         (bytes per pixel)
-		 *  + pitch       (y pitch)
-		 *  + array_pitch (z pitch)
-		 */
-		uint32_t off[PIPE_MAX_SHADER_IMAGES];
-	} image_dims;
-};
-
-/* Configuration key used to identify a shader variant.. different
- * shader variants can be used to implement features not supported
- * in hw (two sided color), binning-pass vertex shader, etc.
- */
-struct ir3_shader_key {
-	union {
-		struct {
-			/*
-			 * Combined Vertex/Fragment shader parameters:
-			 */
-			unsigned ucp_enables : 8;
-
-			/* do we need to check {v,f}saturate_{s,t,r}? */
-			unsigned has_per_samp : 1;
-
-			/*
-			 * Vertex shader variant parameters:
-			 */
-			unsigned vclamp_color : 1;
-
-			/*
-			 * Fragment shader variant parameters:
-			 */
-			unsigned color_two_side : 1;
-			unsigned half_precision : 1;
-			/* used when shader needs to handle flat varyings (a4xx)
-			 * for front/back color inputs to frag shader:
-			 */
-			unsigned rasterflat : 1;
-			unsigned fclamp_color : 1;
-		};
-		uint32_t global;
-	};
-
-	/* bitmask of sampler which needs coords clamped for vertex
-	 * shader:
-	 */
-	uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
-
-	/* bitmask of sampler which needs coords clamped for frag
-	 * shader:
-	 */
-	uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
-
-	/* bitmask of ms shifts */
-	uint32_t vsamples, fsamples;
-
-	/* bitmask of samplers which need astc srgb workaround: */
-	uint16_t vastc_srgb, fastc_srgb;
-};
-
-static inline bool
-ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
-{
-	/* slow-path if we need to check {v,f}saturate_{s,t,r} */
-	if (a->has_per_samp || b->has_per_samp)
-		return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
-	return a->global == b->global;
-}
-
-/* will the two keys produce different lowering for a fragment shader? */
-static inline bool
-ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *last_key)
-{
-	if (last_key->has_per_samp || key->has_per_samp) {
-		if ((last_key->fsaturate_s != key->fsaturate_s) ||
-				(last_key->fsaturate_t != key->fsaturate_t) ||
-				(last_key->fsaturate_r != key->fsaturate_r) ||
-				(last_key->fsamples != key->fsamples) ||
-				(last_key->fastc_srgb != key->fastc_srgb))
-			return true;
-	}
-
-	if (last_key->fclamp_color != key->fclamp_color)
-		return true;
-
-	if (last_key->color_two_side != key->color_two_side)
-		return true;
-
-	if (last_key->half_precision != key->half_precision)
-		return true;
-
-	if (last_key->rasterflat != key->rasterflat)
-		return true;
-
-	if (last_key->ucp_enables != key->ucp_enables)
-		return true;
-
-	return false;
-}
-
-/* will the two keys produce different lowering for a vertex shader? */
-static inline bool
-ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *last_key)
-{
-	if (last_key->has_per_samp || key->has_per_samp) {
-		if ((last_key->vsaturate_s != key->vsaturate_s) ||
-				(last_key->vsaturate_t != key->vsaturate_t) ||
-				(last_key->vsaturate_r != key->vsaturate_r) ||
-				(last_key->vsamples != key->vsamples) ||
-				(last_key->vastc_srgb != key->vastc_srgb))
-			return true;
-	}
-
-	if (last_key->vclamp_color != key->vclamp_color)
-		return true;
-
-	if (last_key->ucp_enables != key->ucp_enables)
-		return true;
-
-	return false;
-}
-
-struct ir3_shader_variant {
-	struct fd_bo *bo;
-
-	/* variant id (for debug) */
-	uint32_t id;
-
-	struct ir3_shader_key key;
-
-	/* vertex shaders can have an extra version for hwbinning pass,
-	 * which is pointed to by so->binning:
-	 */
-	bool binning_pass;
-	struct ir3_shader_variant *binning;
-
-	struct ir3_driver_const_layout const_layout;
-	struct ir3_info info;
-	struct ir3 *ir;
-
-	/* the instructions length is in units of instruction groups
-	 * (4 instructions for a3xx, 16 instructions for a4xx.. each
-	 * instruction is 2 dwords):
-	 */
-	unsigned instrlen;
-
-	/* the constants length is in units of vec4's, and is the sum of
-	 * the uniforms and the built-in compiler constants
-	 */
-	unsigned constlen;
-
-	/* number of uniforms (in vec4), not including built-in compiler
-	 * constants, etc.
-	 */
-	unsigned num_uniforms;
-
-	unsigned num_ubos;
-
-	/* About Linkage:
-	 *   + Let the frag shader determine the position/compmask for the
-	 *     varyings, since it is the place where we know if the varying
-	 *     is actually used, and if so, which components are used.  So
-	 *     what the hw calls "outloc" is taken from the "inloc" of the
-	 *     frag shader.
-	 *   + From the vert shader, we only need the output regid
-	 */
-
-	bool frag_coord, frag_face, color0_mrt;
-
-	/* NOTE: for input/outputs, slot is:
-	 *   gl_vert_attrib  - for VS inputs
-	 *   gl_varying_slot - for VS output / FS input
-	 *   gl_frag_result  - for FS output
-	 */
-
-	/* varyings/outputs: */
-	unsigned outputs_count;
-	struct {
-		uint8_t slot;
-		uint8_t regid;
-	} outputs[16 + 2];  /* +POSITION +PSIZE */
-	bool writes_pos, writes_psize;
-
-	/* attributes (VS) / varyings (FS):
-	 * Note that sysval's should come *after* normal inputs.
-	 */
-	unsigned inputs_count;
-	struct {
-		uint8_t slot;
-		uint8_t regid;
-		uint8_t compmask;
-		uint8_t ncomp;
-		/* location of input (ie. offset passed to bary.f, etc).  This
-		 * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx
-		 * have the OUTLOCn value offset by 8, presumably to account
-		 * for gl_Position/gl_PointSize)
-		 */
-		uint8_t inloc;
-		/* vertex shader specific: */
-		bool    sysval     : 1;   /* slot is a gl_system_value */
-		/* fragment shader specific: */
-		bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
-		bool    rasterflat : 1;   /* special handling for emit->rasterflat */
-		enum glsl_interp_mode interpolate;
-	} inputs[16 + 2];  /* +POSITION +FACE */
-
-	/* sum of input components (scalar).  For frag shaders, it only counts
-	 * the varying inputs:
-	 */
-	unsigned total_in;
-
-	/* For frag shaders, the total number of inputs (not scalar,
-	 * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR)
-	 */
-	unsigned varying_in;
-
-	/* number of samplers/textures (which are currently 1:1): */
-	int num_samp;
-
-	/* do we have one or more SSBO instructions: */
-	bool has_ssbo;
-
-	/* do we have kill instructions: */
-	bool has_kill;
-
-	/* Layout of constant registers, each section (in vec4). Pointer size
-	 * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
-	 * UBO and stream-out consts.
-	 */
-	struct {
-		/* user const start at zero */
-		unsigned ubo;
-		/* NOTE that a3xx might need a section for SSBO addresses too */
-		unsigned ssbo_sizes;
-		unsigned image_dims;
-		unsigned driver_param;
-		unsigned tfbo;
-		unsigned immediate;
-	} constbase;
-
-	unsigned immediates_count;
-	unsigned immediates_size;
-	struct {
-		uint32_t val[4];
-	} *immediates;
-
-	/* for astc srgb workaround, the number/base of additional
-	 * alpha tex states we need, and index of original tex states
-	 */
-	struct {
-		unsigned base, count;
-		unsigned orig_idx[16];
-	} astc_srgb;
-
-	/* shader variants form a linked list: */
-	struct ir3_shader_variant *next;
-
-	/* replicated here to avoid passing extra ptrs everywhere: */
-	enum shader_t type;
-	struct ir3_shader *shader;
-};
-
-struct ir3_shader {
-	enum shader_t type;
-
-	/* shader id (for debug): */
-	uint32_t id;
-	uint32_t variant_count;
-
-	/* so we know when we can disable TGSI related hacks: */
-	bool from_tgsi;
-
-	struct ir3_compiler *compiler;
-
-	struct nir_shader *nir;
-	struct pipe_stream_output_info stream_output;
-
-	struct ir3_shader_variant *variants;
-};
-
-void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
-
-struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler,
-		const struct pipe_shader_state *cso, enum shader_t type,
-		struct pipe_debug_callback *debug);
-struct ir3_shader *
-ir3_shader_create_compute(struct ir3_compiler *compiler,
-		const struct pipe_compute_state *cso,
-		struct pipe_debug_callback *debug);
-void ir3_shader_destroy(struct ir3_shader *shader);
-struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
-		struct ir3_shader_key key, bool binning_pass,
-		struct pipe_debug_callback *debug);
-void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out);
-uint64_t ir3_shader_outputs(const struct ir3_shader *so);
-
-struct fd_ringbuffer;
-struct fd_context;
-void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx, const struct pipe_draw_info *info);
-void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx);
-void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
-		struct fd_context *ctx, const struct pipe_grid_info *info);
-
-int
-ir3_glsl_type_size(const struct glsl_type *type);
-
-static inline const char *
-ir3_shader_stage(struct ir3_shader *shader)
-{
-	switch (shader->type) {
-	case SHADER_VERTEX:     return "VERT";
-	case SHADER_FRAGMENT:   return "FRAG";
-	case SHADER_COMPUTE:    return "CL";
-	default:
-		unreachable("invalid type");
-		return NULL;
-	}
-}
-
-/*
- * Helper/util:
- */
-
-#include "pipe/p_shader_tokens.h"
-
-static inline int
-ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
-{
-	int j;
-
-	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].slot == slot)
-			return j;
-
-	/* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
-	 * in the vertex shader.. but the fragment shader doesn't know this
-	 * so  it will always have both IN.COLOR[n] and IN.BCOLOR[n].  So
-	 * at link time if there is no matching OUT.BCOLOR[n], we must map
-	 * OUT.COLOR[n] to IN.BCOLOR[n].  And visa versa if there is only
-	 * a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
-	 */
-	if (slot == VARYING_SLOT_BFC0) {
-		slot = VARYING_SLOT_COL0;
-	} else if (slot == VARYING_SLOT_BFC1) {
-		slot = VARYING_SLOT_COL1;
-	} else if (slot == VARYING_SLOT_COL0) {
-		slot = VARYING_SLOT_BFC0;
-	} else if (slot == VARYING_SLOT_COL1) {
-		slot = VARYING_SLOT_BFC1;
-	} else {
-		return 0;
-	}
-
-	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].slot == slot)
-			return j;
-
-	debug_assert(0);
-
-	return 0;
-}
-
-static inline int
-ir3_next_varying(const struct ir3_shader_variant *so, int i)
-{
-	while (++i < so->inputs_count)
-		if (so->inputs[i].compmask && so->inputs[i].bary)
-			break;
-	return i;
-}
-
-struct ir3_shader_linkage {
-	uint8_t max_loc;
-	uint8_t cnt;
-	struct {
-		uint8_t regid;
-		uint8_t compmask;
-		uint8_t loc;
-	} var[32];
-};
-
-static inline void
-ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc)
-{
-	int i = l->cnt++;
-
-	debug_assert(i < ARRAY_SIZE(l->var));
-
-	l->var[i].regid    = regid;
-	l->var[i].compmask = compmask;
-	l->var[i].loc      = loc;
-	l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask));
-}
-
-static inline void
-ir3_link_shaders(struct ir3_shader_linkage *l,
-		const struct ir3_shader_variant *vs,
-		const struct ir3_shader_variant *fs)
-{
-	int j = -1, k;
-
-	while (l->cnt < ARRAY_SIZE(l->var)) {
-		j = ir3_next_varying(fs, j);
-
-		if (j >= fs->inputs_count)
-			break;
-
-		if (fs->inputs[j].inloc >= fs->total_in)
-			continue;
-
-		k = ir3_find_output(vs, fs->inputs[j].slot);
-
-		ir3_link_add(l, vs->outputs[k].regid,
-			fs->inputs[j].compmask, fs->inputs[j].inloc);
-	}
-}
-
-static inline uint32_t
-ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot)
-{
-	int j;
-	for (j = 0; j < so->outputs_count; j++)
-		if (so->outputs[j].slot == slot)
-			return so->outputs[j].regid;
-	return regid(63, 0);
-}
-
-static inline uint32_t
-ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
-{
-	int j;
-	for (j = 0; j < so->inputs_count; j++)
-		if (so->inputs[j].sysval && (so->inputs[j].slot == slot))
-			return so->inputs[j].regid;
-	return regid(63, 0);
-}
-
-/* calculate register footprint in terms of half-regs (ie. one full
- * reg counts as two half-regs).
- */
-static inline uint32_t
-ir3_shader_halfregs(const struct ir3_shader_variant *v)
-{
-	return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1);
-}
-
-#endif /* IR3_SHADER_H_ */
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.am mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.am
--- mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -3,50 +3,26 @@
 
 AM_CFLAGS = \
 	-Wno-packed-bitfield-compat \
+	-I$(top_srcdir)/include/drm-uapi \
 	-I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \
+	-I$(top_srcdir)/src/freedreno \
+	-I$(top_srcdir)/src/freedreno/registers \
 	-I$(top_builddir)/src/compiler/nir \
 	-I$(top_srcdir)/src/compiler/nir \
-	$(GALLIUM_DRIVER_CFLAGS) \
 	$(LIBDRM_CFLAGS) \
-	$(VALGRIND_CFLAGS)
-
-MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
-ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
-	$(MKDIR_GEN)
-	$(AM_V_GEN) $(PYTHON) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py -p $(top_srcdir)/src/compiler/nir > $@ || ($(RM) $@; false)
+	$(GALLIUM_DRIVER_CFLAGS)
 
 noinst_LTLIBRARIES = libfreedreno.la
 
 libfreedreno_la_SOURCES = \
 	$(C_SOURCES) \
-	$(drm_SOURCES) \
 	$(a2xx_SOURCES) \
 	$(a3xx_SOURCES) \
 	$(a4xx_SOURCES) \
 	$(a5xx_SOURCES) \
 	$(a6xx_SOURCES) \
-	$(ir3_SOURCES) \
-	$(ir3_GENERATED_FILES)
-
-BUILT_SOURCES := $(ir3_GENERATED_FILES)
-CLEANFILES := $(BUILT_SOURCES)
-EXTRA_DIST = ir3/ir3_nir_trig.py
-
-noinst_PROGRAMS = ir3_compiler
-
-# XXX: Required due to the C++ sources in libnir
-nodist_EXTRA_ir3_compiler_SOURCES = dummy.cpp
-ir3_compiler_SOURCES = \
-	ir3/ir3_cmdline.c
-
-ir3_compiler_LDADD = \
-	libfreedreno.la \
-	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/compiler/glsl/libstandalone.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(top_builddir)/src/mesa/libmesagallium.la \
-	$(GALLIUM_COMMON_LIB_DEPS) \
-	$(LIBDRM_LIBS)
+	$(ir3_SOURCES)
 
-EXTRA_DIST += meson.build
+EXTRA_DIST = \
+	ir3/ir3_cmdline.c \
+	meson.build
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.sources mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.sources
--- mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -1,6 +1,4 @@
 C_SOURCES := \
-	adreno_common.xml.h \
-	adreno_pm4.xml.h \
 	disasm.h \
 	freedreno_batch.c \
 	freedreno_batch.h \
@@ -40,30 +38,10 @@
 	freedreno_util.c \
 	freedreno_util.h
 
-drm_SOURCES := \
-	drm/freedreno_bo.c \
-	drm/freedreno_bo_cache.c \
-	drm/freedreno_device.c \
-	drm/freedreno_drmif.h \
-	drm/freedreno_pipe.c \
-	drm/freedreno_priv.h \
-	drm/freedreno_ringbuffer.c \
-	drm/freedreno_ringbuffer.h \
-	drm/msm_bo.c \
-	drm/msm_device.c \
-	drm/msm_drm.h \
-	drm/msm_pipe.c \
-	drm/msm_priv.h \
-	drm/msm_ringbuffer.c \
-	drm/msm_ringbuffer_sp.c
-
 a2xx_SOURCES := \
-	a2xx/a2xx.xml.h \
 	a2xx/disasm-a2xx.c \
 	a2xx/fd2_blend.c \
 	a2xx/fd2_blend.h \
-	a2xx/fd2_compiler.c \
-	a2xx/fd2_compiler.h \
 	a2xx/fd2_context.c \
 	a2xx/fd2_context.h \
 	a2xx/fd2_draw.c \
@@ -72,10 +50,15 @@
 	a2xx/fd2_emit.h \
 	a2xx/fd2_gmem.c \
 	a2xx/fd2_gmem.h \
+	a2xx/fd2_perfcntr.c \
 	a2xx/fd2_program.c \
 	a2xx/fd2_program.h \
+	a2xx/fd2_query.c \
+	a2xx/fd2_query.h \
 	a2xx/fd2_rasterizer.c \
 	a2xx/fd2_rasterizer.h \
+	a2xx/fd2_resource.c \
+	a2xx/fd2_resource.h \
 	a2xx/fd2_screen.c \
 	a2xx/fd2_screen.h \
 	a2xx/fd2_texture.c \
@@ -85,11 +68,16 @@
 	a2xx/fd2_zsa.c \
 	a2xx/fd2_zsa.h \
 	a2xx/instr-a2xx.h \
-	a2xx/ir-a2xx.c \
-	a2xx/ir-a2xx.h
+	a2xx/ir2.c \
+	a2xx/ir2.h \
+	a2xx/ir2_assemble.c \
+	a2xx/ir2_cp.c \
+	a2xx/ir2_nir.c \
+	a2xx/ir2_nir_lower_scalar.c \
+	a2xx/ir2_private.h \
+	a2xx/ir2_ra.c
 
 a3xx_SOURCES := \
-	a3xx/a3xx.xml.h \
 	a3xx/fd3_blend.c \
 	a3xx/fd3_blend.h \
 	a3xx/fd3_context.c \
@@ -116,7 +104,6 @@
 	a3xx/fd3_zsa.h
 
 a4xx_SOURCES := \
-	a4xx/a4xx.xml.h \
 	a4xx/fd4_blend.c \
 	a4xx/fd4_blend.h \
 	a4xx/fd4_context.c \
@@ -143,7 +130,6 @@
 	a4xx/fd4_zsa.h
 
 a5xx_SOURCES := \
-	a5xx/a5xx.xml.h \
 	a5xx/fd5_blend.c \
 	a5xx/fd5_blend.h \
 	a5xx/fd5_blitter.c \
@@ -179,7 +165,6 @@
 	a5xx/fd5_zsa.h
 
 a6xx_SOURCES := \
-	a6xx/a6xx.xml.h \
 	a6xx/fd6_blend.c \
 	a6xx/fd6_blend.h \
 	a6xx/fd6_blitter.c \
@@ -212,27 +197,8 @@
 	a6xx/fd6_zsa.h
 
 ir3_SOURCES := \
-	ir3/disasm-a3xx.c \
-	ir3/instr-a3xx.h \
-	ir3/ir3.c \
 	ir3/ir3_cache.c \
 	ir3/ir3_cache.h \
-	ir3/ir3_compiler_nir.c \
-	ir3/ir3_compiler.c \
-	ir3/ir3_compiler.h \
-	ir3/ir3_cp.c \
-	ir3/ir3_depth.c \
-	ir3/ir3_group.c \
-	ir3/ir3.h \
-	ir3/ir3_legalize.c \
-	ir3/ir3_nir.c \
-	ir3/ir3_nir.h \
-	ir3/ir3_nir_lower_tg4_to_tex.c \
-	ir3/ir3_print.c \
-	ir3/ir3_ra.c \
-	ir3/ir3_sched.c \
-	ir3/ir3_shader.c \
-	ir3/ir3_shader.h
+	ir3/ir3_gallium.c \
+	ir3/ir3_gallium.h
 
-ir3_GENERATED_FILES := \
-	ir3/ir3_nir_trig.c
diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/meson.build mesa-19.0.1/src/gallium/drivers/freedreno/meson.build
--- mesa-18.3.3/src/gallium/drivers/freedreno/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/freedreno/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -18,21 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-ir3_nir_trig_c = custom_target(
-  'ir3_nir_trig.c',
-  input : 'ir3/ir3_nir_trig.py',
-  output : 'ir3_nir_trig.c',
-  command : [
-    prog_python, '@INPUT@',
-    '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
-  ],
-  capture : true,
-  depend_files : nir_algebraic_py,
-)
-
 files_libfreedreno = files(
-  'adreno_common.xml.h',
-  'adreno_pm4.xml.h',
   'disasm.h',
   'freedreno_batch.c',
   'freedreno_batch.h',
@@ -71,27 +57,9 @@
   'freedreno_texture.h',
   'freedreno_util.c',
   'freedreno_util.h',
-  'drm/freedreno_bo.c',
-  'drm/freedreno_bo_cache.c',
-  'drm/freedreno_device.c',
-  'drm/freedreno_drmif.h',
-  'drm/freedreno_pipe.c',
-  'drm/freedreno_priv.h',
-  'drm/freedreno_ringbuffer.c',
-  'drm/freedreno_ringbuffer.h',
-  'drm/msm_bo.c',
-  'drm/msm_device.c',
-  'drm/msm_drm.h',
-  'drm/msm_pipe.c',
-  'drm/msm_priv.h',
-  'drm/msm_ringbuffer.c',
-  'drm/msm_ringbuffer_sp.c',
-  'a2xx/a2xx.xml.h',
   'a2xx/disasm-a2xx.c',
   'a2xx/fd2_blend.c',
   'a2xx/fd2_blend.h',
-  'a2xx/fd2_compiler.c',
-  'a2xx/fd2_compiler.h',
   'a2xx/fd2_context.c',
   'a2xx/fd2_context.h',
   'a2xx/fd2_draw.c',
@@ -100,10 +68,15 @@
   'a2xx/fd2_emit.h',
   'a2xx/fd2_gmem.c',
   'a2xx/fd2_gmem.h',
+  'a2xx/fd2_perfcntr.c',
   'a2xx/fd2_program.c',
   'a2xx/fd2_program.h',
+  'a2xx/fd2_query.c',
+  'a2xx/fd2_query.h',
   'a2xx/fd2_rasterizer.c',
   'a2xx/fd2_rasterizer.h',
+  'a2xx/fd2_resource.c',
+  'a2xx/fd2_resource.h',
   'a2xx/fd2_screen.c',
   'a2xx/fd2_screen.h',
   'a2xx/fd2_texture.c',
@@ -113,9 +86,14 @@
   'a2xx/fd2_zsa.c',
   'a2xx/fd2_zsa.h',
   'a2xx/instr-a2xx.h',
-  'a2xx/ir-a2xx.c',
-  'a2xx/ir-a2xx.h',
-  'a3xx/a3xx.xml.h',
+  'a2xx/ir2.c',
+  'a2xx/ir2.h',
+  'a2xx/ir2_assemble.c',
+  'a2xx/ir2_cp.c',
+  'a2xx/ir2_nir.c',
+  'a2xx/ir2_nir_lower_scalar.c',
+  'a2xx/ir2_private.h',
+  'a2xx/ir2_ra.c',
   'a3xx/fd3_blend.c',
   'a3xx/fd3_blend.h',
   'a3xx/fd3_context.c',
@@ -140,7 +118,6 @@
   'a3xx/fd3_texture.h',
   'a3xx/fd3_zsa.c',
   'a3xx/fd3_zsa.h',
-  'a4xx/a4xx.xml.h',
   'a4xx/fd4_blend.c',
   'a4xx/fd4_blend.h',
   'a4xx/fd4_context.c',
@@ -165,7 +142,6 @@
   'a4xx/fd4_texture.h',
   'a4xx/fd4_zsa.c',
   'a4xx/fd4_zsa.h',
-  'a5xx/a5xx.xml.h',
   'a5xx/fd5_blend.c',
   'a5xx/fd5_blend.h',
   'a5xx/fd5_blitter.c',
@@ -199,7 +175,6 @@
   'a5xx/fd5_texture.h',
   'a5xx/fd5_zsa.c',
   'a5xx/fd5_zsa.h',
-  'a6xx/a6xx.xml.h',
   'a6xx/fd6_blend.c',
   'a6xx/fd6_blend.h',
   'a6xx/fd6_blitter.c',
@@ -230,32 +205,15 @@
   'a6xx/fd6_texture.h',
   'a6xx/fd6_zsa.c',
   'a6xx/fd6_zsa.h',
-  'ir3/disasm-a3xx.c',
-  'ir3/instr-a3xx.h',
-  'ir3/ir3.c',
   'ir3/ir3_cache.c',
   'ir3/ir3_cache.h',
-  'ir3/ir3_compiler_nir.c',
-  'ir3/ir3_compiler.c',
-  'ir3/ir3_compiler.h',
-  'ir3/ir3_cp.c',
-  'ir3/ir3_depth.c',
-  'ir3/ir3_group.c',
-  'ir3/ir3.h',
-  'ir3/ir3_legalize.c',
-  'ir3/ir3_nir.c',
-  'ir3/ir3_nir.h',
-  'ir3/ir3_nir_lower_tg4_to_tex.c',
-  'ir3/ir3_print.c',
-  'ir3/ir3_ra.c',
-  'ir3/ir3_sched.c',
-  'ir3/ir3_shader.c',
-  'ir3/ir3_shader.h',
+  'ir3/ir3_gallium.c',
+  'ir3/ir3_gallium.h',
 )
 
 freedreno_includes = [
   inc_src, inc_include, inc_gallium, inc_gallium_aux,
-  include_directories('ir3')
+  inc_freedreno, include_directories('ir3'),
 ]
 
 freedreno_c_args = []
@@ -270,20 +228,21 @@
 
 libfreedreno = static_library(
   'freedreno',
-  [files_libfreedreno, ir3_nir_trig_c],
+  [files_libfreedreno],
   include_directories : freedreno_includes,
   c_args : [freedreno_c_args, c_vis_args],
   cpp_args : [freedreno_cpp_args, cpp_vis_args],
-  dependencies : [
-    dep_libdrm,
-    dep_valgrind,
-    idep_nir_headers
-  ],
+  dependencies : [dep_libdrm, idep_nir_headers],
 )
 
 driver_freedreno = declare_dependency(
   compile_args : '-DGALLIUM_FREEDRENO',
-  link_with : [libfreedrenowinsys, libfreedreno],
+  link_with : [
+    libfreedrenowinsys,
+    libfreedreno,
+    libfreedreno_drm,
+    libfreedreno_ir3,
+  ],
   dependencies : idep_nir,
 )
 
@@ -292,13 +251,13 @@
   'ir3/ir3_cmdline.c',
   include_directories : freedreno_includes,
   dependencies : [
-    dep_libdrm,
-    dep_valgrind,
     dep_thread,
     idep_nir,
   ],
   link_with : [
     libfreedreno,
+    libfreedreno_drm,
+    libfreedreno_ir3,
     libgallium,
     libglsl_standalone,
     libmesa_util,
diff -Nru mesa-18.3.3/src/gallium/drivers/i915/i915_screen.c mesa-19.0.1/src/gallium/drivers/i915/i915_screen.c
--- mesa-18.3.3/src/gallium/drivers/i915/i915_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/i915/i915_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -402,6 +402,8 @@
       return 0;
    case PIPE_CAP_ENDIANNESS:
       return PIPE_ENDIAN_LITTLE;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 10;
 
    case PIPE_CAP_VENDOR_ID:
       return 0x8086;
diff -Nru mesa-18.3.3/src/gallium/drivers/imx/Automake.inc mesa-19.0.1/src/gallium/drivers/imx/Automake.inc
--- mesa-18.3.3/src/gallium/drivers/imx/Automake.inc	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/imx/Automake.inc	1970-01-01 00:00:00.000000000 +0000
@@ -1,9 +0,0 @@
-if HAVE_GALLIUM_IMX
-
-TARGET_DRIVERS += imx-drm
-TARGET_CPPFLAGS += -DGALLIUM_IMX
-TARGET_LIB_DEPS += \
-    $(top_builddir)/src/gallium/winsys/imx/drm/libimxdrm.la \
-    $(LIBDRM_LIBS)
-
-endif
diff -Nru mesa-18.3.3/src/gallium/drivers/imx/Makefile.am mesa-19.0.1/src/gallium/drivers/imx/Makefile.am
--- mesa-18.3.3/src/gallium/drivers/imx/Makefile.am	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/imx/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,8 +0,0 @@
-include $(top_srcdir)/src/gallium/Automake.inc
-
-AM_CPPFLAGS = \
-	$(GALLIUM_CFLAGS)
-
-noinst_LTLIBRARIES = libimx.la
-
-libimx_la_SOURCES =
diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Android.mk mesa-19.0.1/src/gallium/drivers/kmsro/Android.mk
--- mesa-18.3.3/src/gallium/drivers/kmsro/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/kmsro/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,41 @@
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_kmsro
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+ifneq ($(HAVE_GALLIUM_KMSRO),)
+GALLIUM_TARGET_DRIVERS += pl111
+GALLIUM_TARGET_DRIVERS += hx8357d
+GALLIUM_TARGET_DRIVERS += imx
+$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_kmsro)
+endif
diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Automake.inc mesa-19.0.1/src/gallium/drivers/kmsro/Automake.inc
--- mesa-18.3.3/src/gallium/drivers/kmsro/Automake.inc	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/kmsro/Automake.inc	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,10 @@
+if HAVE_GALLIUM_KMSRO
+
+TARGET_DRIVERS += pl111
+TARGET_DRIVERS += hx8357d
+TARGET_CPPFLAGS += -DGALLIUM_KMSRO
+TARGET_LIB_DEPS += \
+    $(top_builddir)/src/gallium/winsys/kmsro/drm/libkmsrodrm.la \
+    $(LIBDRM_LIBS)
+
+endif
diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.am mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.am
--- mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.am	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,8 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CPPFLAGS = \
+	$(GALLIUM_CFLAGS)
+
+noinst_LTLIBRARIES = libkmsro.la
+
+libkmsro_la_SOURCES = $(C_SOURCES)
diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.sources mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.sources
--- mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,2 @@
+C_SOURCES :=
+
diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_screen.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_screen.c
--- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -310,6 +310,8 @@
       return 1;
    case PIPE_CAP_CLEAR_TEXTURE:
       return 1;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 32;
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_arit.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_arit.c
--- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_arit.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_arit.c	2019-03-31 23:16:37.000000000 +0000
@@ -458,7 +458,8 @@
             continue;
          }
 
-         if (test->ref == &nearbyintf && length == 2 && 
+         if (!util_cpu_caps.has_neon &&
+             test->ref == &nearbyintf && length == 2 &&
              ref != roundf(testval)) {
             /* FIXME: The generic (non SSE) path in lp_build_iround, which is
              * always taken for length==2 regardless of native round support,
diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_format.c
--- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_format.c	2019-03-31 23:16:37.000000000 +0000
@@ -44,8 +44,6 @@
 
 #include "lp_test.h"
 
-#define USE_TEXTURE_CACHE 1
-
 static struct lp_build_format_cache *cache_ptr;
 
 void
@@ -80,7 +78,8 @@
 static LLVMValueRef
 add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
                     const struct util_format_description *desc,
-                    struct lp_type type)
+                    struct lp_type type,
+                    unsigned use_cache)
 {
    char name[256];
    LLVMContextRef context = gallivm->context;
@@ -114,7 +113,7 @@
    i = LLVMGetParam(func, 2);
    j = LLVMGetParam(func, 3);
 
-   if (cache_ptr) {
+   if (use_cache) {
       cache = LLVMGetParam(func, 4);
    }
 
@@ -137,7 +136,8 @@
 PIPE_ALIGN_STACK
 static boolean
 test_format_float(unsigned verbose, FILE *fp,
-                  const struct util_format_description *desc)
+                  const struct util_format_description *desc,
+                  unsigned use_cache)
 {
    LLVMContextRef context;
    struct gallivm_state *gallivm;
@@ -152,7 +152,8 @@
    context = LLVMContextCreate();
    gallivm = gallivm_create("test_module_float", context);
 
-   fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
+   fetch = add_fetch_rgba_test(gallivm, verbose, desc,
+                               lp_float32_vec4_type(), use_cache);
 
    gallivm_compile_module(gallivm);
 
@@ -181,7 +182,7 @@
 
                memset(unpacked, 0, sizeof unpacked);
 
-               fetch_ptr(unpacked, packed, j, i, cache_ptr);
+               fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
 
                for(k = 0; k < 4; ++k) {
                   if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
@@ -236,7 +237,8 @@
 PIPE_ALIGN_STACK
 static boolean
 test_format_unorm8(unsigned verbose, FILE *fp,
-                   const struct util_format_description *desc)
+                   const struct util_format_description *desc,
+                   unsigned use_cache)
 {
    LLVMContextRef context;
    struct gallivm_state *gallivm;
@@ -251,7 +253,8 @@
    context = LLVMContextCreate();
    gallivm = gallivm_create("test_module_unorm8", context);
 
-   fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
+   fetch = add_fetch_rgba_test(gallivm, verbose, desc,
+                               lp_unorm8_vec4_type(), use_cache);
 
    gallivm_compile_module(gallivm);
 
@@ -280,7 +283,7 @@
 
                memset(unpacked, 0, sizeof unpacked);
 
-               fetch_ptr(unpacked, packed, j, i, cache_ptr);
+               fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
 
                match = TRUE;
                for(k = 0; k < 4; ++k) {
@@ -335,15 +338,16 @@
 
 static boolean
 test_one(unsigned verbose, FILE *fp,
-         const struct util_format_description *format_desc)
+         const struct util_format_description *format_desc,
+         unsigned use_cache)
 {
    boolean success = TRUE;
 
-   if (!test_format_float(verbose, fp, format_desc)) {
+   if (!test_format_float(verbose, fp, format_desc, use_cache)) {
      success = FALSE;
    }
 
-   if (!test_format_unorm8(verbose, fp, format_desc)) {
+   if (!test_format_unorm8(verbose, fp, format_desc, use_cache)) {
      success = FALSE;
    }
 
@@ -356,49 +360,52 @@
 {
    enum pipe_format format;
    boolean success = TRUE;
+   unsigned use_cache;
 
-#if USE_TEXTURE_CACHE
    cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
-#endif
-
-   for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
-      const struct util_format_description *format_desc;
 
-      format_desc = util_format_description(format);
-      if (!format_desc) {
-         continue;
-      }
+   for (use_cache = 0; use_cache < 2; use_cache++) {
+      for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
+         const struct util_format_description *format_desc;
+
+         format_desc = util_format_description(format);
+         if (!format_desc) {
+            continue;
+         }
 
+         /*
+          * TODO: test more
+          */
 
-      /*
-       * TODO: test more
-       */
+         if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+            continue;
+         }
 
-      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-         continue;
-      }
+         if (util_format_is_pure_integer(format))
+	    continue;
 
-      if (util_format_is_pure_integer(format))
-	 continue;
+         /* only have util fetch func for etc1 */
+         if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
+             format != PIPE_FORMAT_ETC1_RGB8) {
+            continue;
+         }
 
-      /* only have util fetch func for etc1 */
-      if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
-          format != PIPE_FORMAT_ETC1_RGB8) {
-         continue;
-      }
+         /* missing fetch funcs */
+         if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
+            continue;
+         }
 
-      /* missing fetch funcs */
-      if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
-         continue;
-      }
+         /* only test twice with formats which can use cache */
+         if (format_desc->layout != UTIL_FORMAT_LAYOUT_S3TC && use_cache) {
+            continue;
+         }
 
-      if (!test_one(verbose, fp, format_desc)) {
-           success = FALSE;
+         if (!test_one(verbose, fp, format_desc, use_cache)) {
+              success = FALSE;
+         }
       }
    }
-#if USE_TEXTURE_CACHE
    align_free(cache_ptr);
-#endif
 
    return success;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/meson.build mesa-19.0.1/src/gallium/drivers/llvmpipe/meson.build
--- mesa-18.3.3/src/gallium/drivers/llvmpipe/meson.build	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/llvmpipe/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -119,7 +119,8 @@
         dependencies : [dep_llvm, dep_dl, dep_thread, dep_clock],
         include_directories : [inc_gallium, inc_gallium_aux, inc_include, inc_src],
         link_with : [libllvmpipe, libgallium, libmesa_util],
-      )
+      ),
+      suite : ['llvmpipe'],
     )
   endforeach
 endif
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm	2019-03-31 23:16:37.000000000 +0000
@@ -543,6 +543,8 @@
 $p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0
 long mov b32 $r3 0x3f800000
 long nop
+sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00
+long nop
 long ret
 
 
@@ -554,7 +556,144 @@
 // SIZE:    9 * 8 bytes
 //
 gk104_rcp_f64:
-   long nop
+   // Step 1: classify input according to exponent and value, and calculate
+   // result for 0/inf/nan. $r2 holds the exponent value, which starts at
+   // bit 52 (bit 20 of the upper half) and is 11 bits in length
+   ext u32 $r2 $r1 0xb14
+   add b32 $r3 $r2 0xffffffff
+   joinat #rcp_rejoin
+   // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf,
+   // denorm, or 0). Do this by substracting 1 from the exponent, which will
+   // mean that it's > 0x7fd in those cases when doing unsigned comparison
+   set $p0 0x1 gt u32 $r3 0x7fd
+   // $r3: 0 for norms, 0x36 for denorms, -1 for others
+   long mov b32 $r3 0x0
+   sched 0x2f 0x04 0x2d 0x2b 0x2f 0x28 0x28
+   join (not $p0) nop
+   // Process all special values: NaN, inf, denorm, 0
+   mov b32 $r3 0xffffffff
+   // A number is NaN if its abs value is greater than or unordered with inf
+   set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000
+   (not $p0) bra #rcp_inf_or_denorm_or_zero
+   // NaN -> NaN, the next line sets the "quiet" bit of the result. This
+   // behavior is both seen on the CPU and the blob
+   join or b32 $r1 $r1 0x80000
+rcp_inf_or_denorm_or_zero:
+   and b32 $r4 $r1 0x7ff00000
+   // Other values with nonzero in exponent field should be inf
+   set $p0 0x1 eq s32 $r4 0x0
+   sched 0x2b 0x04 0x2f 0x2d 0x2b 0x2f 0x20
+   $p0 bra #rcp_denorm_or_zero
+   // +/-Inf -> +/-0
+   xor b32 $r1 $r1 0x7ff00000
+   join mov b32 $r0 0x0
+rcp_denorm_or_zero:
+   set $p0 0x1 gtu f64 abs $r0d 0x0
+   $p0 bra #rcp_denorm
+   // +/-0 -> +/-Inf
+   join or b32 $r1 $r1 0x7ff00000
+rcp_denorm:
+   // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms
+   mul rn f64 $r0d $r0d 0x4350000000000000
+   sched 0x2f 0x28 0x2b 0x28 0x28 0x04 0x28
+   join mov b32 $r3 0x36
+rcp_rejoin:
+   // All numbers with -1 in $r3 have their result ready in $r0d, return them
+   // others need further calculation
+   set $p0 0x1 lt s32 $r3 0x0
+   $p0 bra #rcp_end
+   // Step 2: Before the real calculation goes on, renormalize the values to
+   // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1)
+   // result in $r6d. The exponent will be recovered later.
+   ext u32 $r2 $r1 0xb14
+   and b32 $r7 $r1 0x800fffff
+   add b32 $r7 $r7 0x3ff00000
+   long mov b32 $r6 $r0
+   sched 0x2b 0x04 0x28 0x28 0x2a 0x2b 0x2e
+   // Step 3: Convert new value to float (no overflow will occur due to step
+   // 2), calculate rcp and do newton-raphson step once
+   cvt rz f32 $r5 f64 $r6d
+   long rcp f32 $r4 $r5
+   mov b32 $r0 0xbf800000
+   fma rn f32 $r5 $r4 $r5 $r0
+   fma rn f32 $r0 neg $r4 $r5 $r4
+   // Step 4: convert result $r0 back to double, do newton-raphson steps
+   cvt f64 $r0d f32 $r0
+   cvt f64 $r6d neg f64 $r6d
+   sched 0x2e 0x29 0x29 0x29 0x29 0x29 0x29
+   cvt f64 $r8d f32 0x3f800000
+   // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d
+   // The formula used here (and above) is:
+   //     RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n}
+   // The following code uses 2 FMAs for each step, and it will basically
+   // looks like:
+   //     tmp = -src * RCP_{n} + 1
+   //     RCP_{n + 1} = RCP_{n} * tmp + RCP_{n}
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   sched 0x29 0x20 0x28 0x28 0x28 0x28 0x28
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   // Step 5: Exponent recovery and final processing
+   // The exponent is recovered by adding what we added to the exponent.
+   // Suppose we want to calculate rcp(x), but we have rcp(cx), then
+   //     rcp(x) = c * rcp(cx)
+   // The delta in exponent comes from two sources:
+   //   1) The renormalization in step 2. The delta is:
+   //      0x3ff - $r2
+   //   2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored
+   //      in $r3
+   // These 2 sources are calculated in the first two lines below, and then
+   // added to the exponent extracted from the result above.
+   // Note that after processing, the new exponent may >= 0x7ff (inf)
+   // or <= 0 (denorm). Those cases will be handled respectively below
+   subr b32 $r2 $r2 0x3ff
+   long add b32 $r4 $r2 $r3
+   ext u32 $r3 $r1 0xb14
+   // New exponent in $r3
+   long add b32 $r3 $r3 $r4
+   add b32 $r2 $r3 0xffffffff
+   sched 0x28 0x2b 0x28 0x2b 0x28 0x28 0x2b
+   // (exponent-1) < 0x7fe (unsigned) means the result is in norm range
+   // (same logic as in step 1)
+   set $p0 0x1 lt u32 $r2 0x7fe
+   (not $p0) bra #rcp_result_inf_or_denorm
+   // Norms: convert exponents back and return
+   shl b32 $r4 $r4 clamp 0x14
+   long add b32 $r1 $r4 $r1
+   bra #rcp_end
+rcp_result_inf_or_denorm:
+   // New exponent >= 0x7ff means that result is inf
+   set $p0 0x1 ge s32 $r3 0x7ff
+   (not $p0) bra #rcp_result_denorm
+   sched 0x20 0x25 0x28 0x2b 0x23 0x25 0x2f
+   // Infinity
+   and b32 $r1 $r1 0x80000000
+   long mov b32 $r0 0x0
+   add b32 $r1 $r1 0x7ff00000
+   bra #rcp_end
+rcp_result_denorm:
+   // Denorm result comes from huge input. The greatest possible fp64, i.e.
+   // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest
+   // normal value. Other rcp result should be greater than that. If we
+   // set the exponent field to 1, we can recover the result by multiplying
+   // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise
+   // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies
+   // the logic here.
+   set $p0 0x1 ne u32 $r3 0x0
+   and b32 $r1 $r1 0x800fffff
+   // 0x3e800000: 1/4
+   $p0 cvt f64 $r6d f32 0x3e800000
+   sched 0x2f 0x28 0x2c 0x2e 0x2a 0x20 0x27
+   // 0x3f000000: 1/2
+   (not $p0) cvt f64 $r6d f32 0x3f000000
+   add b32 $r1 $r1 0x00100000
+   mul rn f64 $r0d $r0d $r6d
+rcp_end:
    long ret
 
 // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i)
@@ -565,7 +704,67 @@
 // SIZE:    14 * 8 bytes
 //
 gk104_rsq_f64:
-   long nop
+   // Before getting initial result rsqrt64h, two special cases should be
+   // handled first.
+   // 1. NaN: set the highest bit in mantissa so it'll be surely recognized
+   //    as NaN in rsqrt64h
+   set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000
+   $p0 or b32 $r1 $r1 0x00080000
+   and b32 $r2 $r1 0x7fffffff
+   sched 0x27 0x20 0x28 0x2c 0x25 0x28 0x28
+   // 2. denorms and small normal values: using their original value will
+   //    lose precision either at rsqrt64h or the first step in newton-raphson
+   //    steps below. Take 2 as a threshold in exponent field, and multiply
+   //    with 2^54 if the exponent is smaller or equal. (will multiply 2^27
+   //    to recover in the end)
+   ext u32 $r3 $r1 0xb14
+   set $p1 0x1 le u32 $r3 0x2
+   long or b32 $r2 $r0 $r2
+   $p1 mul rn f64 $r0d $r0d 0x4350000000000000
+   rsqrt64h $r5 $r1
+   // rsqrt64h will give correct result for 0/inf/nan, the following logic
+   // checks whether the input is one of those (exponent is 0x7ff or all 0
+   // except for the sign bit)
+   set b32 $r6 ne u32 $r3 0x7ff
+   long and b32 $r2 $r2 $r6
+   sched 0x28 0x2b 0x20 0x27 0x28 0x2e 0x28
+   set $p0 0x1 ne u32 $r2 0x0
+   $p0 bra #rsq_norm
+   // For 0/inf/nan, make sure the sign bit agrees with input and return
+   and b32 $r1 $r1 0x80000000
+   long mov b32 $r0 0x0
+   long or b32 $r1 $r1 $r5
+   long ret
+rsq_norm:
+   // For others, do 4 Newton-Raphson steps with the formula:
+   //     RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n})
+   // In the code below, each step is written as:
+   //     tmp1 = 0.5 * x * RSQ_{n}
+   //     tmp2 = -RSQ_{n} * tmp1 + 0.5
+   //     RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n}
+   long mov b32 $r4 0x0
+   sched 0x2f 0x29 0x29 0x29 0x29 0x29 0x29
+   // 0x3f000000: 1/2
+   cvt f64 $r8d f32 0x3f000000
+   mul rn f64 $r2d $r0d $r8d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   sched 0x29 0x29 0x29 0x29 0x29 0x29 0x29
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   sched 0x29 0x20 0x28 0x2e 0x00 0x00 0x00
+   // Multiply 2^27 to result for small inputs to recover
+   $p1 mul rn f64 $r4d $r4d 0x41a0000000000000
+   long mov b32 $r1 $r5
+   long mov b32 $r0 $r4
    long ret
 
 //
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h	2019-03-31 23:16:37.000000000 +0000
@@ -481,12 +481,132 @@
 	0xd40040000840c785,
 	0x18fe00000000dde2,
 	0x4000000000001de4,
-	0x9000000000001de7,
-/* 0x0f08: gk104_rcp_f64 */
+	0x2000000000000007,
 	0x4000000000001de4,
 	0x9000000000001de7,
-/* 0x0f18: gk104_rsq_f64 */
-	0x4000000000001de4,
+/* 0x0f18: gk104_rcp_f64 */
+	0x7000c02c50109c03,
+	0x0bfffffffc20dc02,
+	0x6000000280000007,
+	0x1a0ec01ff431dc03,
+	0x180000000000dde2,
+	0x228282f2b2d042f7,
+	0x40000000000021f4,
+	0x1bfffffffc00dde2,
+	0x1e0edffc0001dc81,
+	0x40000000200021e7,
+	0x3800200000105c52,
+/* 0x0f70: rcp_inf_or_denorm_or_zero */
+	0x39ffc00000111c02,
+	0x190e0000fc41dc23,
+	0x2202f2b2d2f042b7,
+	0x40000000400001e7,
+	0x39ffc00000105c82,
+	0x1800000000001df2,
+/* 0x0fa0: rcp_denorm_or_zero */
+	0x1e0ec0000001dc81,
+	0x40000000200001e7,
+	0x39ffc00000105c52,
+/* 0x0fb8: rcp_denorm */
+	0x5000d0d400001c01,
+	0x2280428282b282f7,
+	0x18000000d800ddf2,
+/* 0x0fd0: rcp_rejoin */
+	0x188e0000fc31dc23,
+	0x40000006000001e7,
+	0x7000c02c50109c03,
+	0x3a003ffffc11dc02,
+	0x08ffc0000071dc02,
+	0x2800000000019de4,
+	0x22e2b2a2828042b7,
+	0x1006000019a15c04,
+	0xc800000010511c00,
+	0x1afe000000001de2,
+	0x3000000014415c00,
+	0x3008000014401e00,
+	0x1000000001301c04,
+	0x1000000019b19d04,
+	0x22929292929292e7,
+	0x1000cfe001321c04,
+	0x2010000000611c01,
+	0x2000000010001c01,
+	0x2010000000611c01,
+	0x2000000010001c01,
+	0x2010000000611c01,
+	0x2000000010001c01,
+	0x2282828282820297,
+	0x2010000000611c01,
+	0x2000000010001c01,
+	0x0800000ffc209e02,
+	0x480000000c211c03,
+	0x7000c02c5010dc03,
+	0x480000001030dc03,
+	0x0bfffffffc309c02,
+	0x22b28282b282b287,
+	0x188ec01ff821dc03,
+	0x40000000600021e7,
+	0x6000c00050411c03,
+	0x4800000004405c03,
+	0x40000001c0001de7,
+/* 0x10f0: rcp_result_inf_or_denorm */
+	0x1b0ec01ffc31dc23,
+	0x40000000a00021e7,
+	0x22f25232b2825207,
+	0x3a00000000105c02,
+	0x1800000000001de2,
+	0x09ffc00000105c02,
+	0x40000000e0001de7,
+/* 0x1128: rcp_result_denorm */
+	0x1a8e0000fc31dc03,
+	0x3a003ffffc105c02,
+	0x1000cfa001318004,
+	0x227202a2e2c282f7,
+	0x1000cfc00131a004,
+	0x0800400000105c02,
+	0x5000000018001c01,
+/* 0x1160: rcp_end */
+	0x9000000000001de7,
+/* 0x1168: gk104_rsq_f64 */
+	0x1e0edffc0001dc81,
+	0x3800200000104042,
+	0x39fffffffc109c02,
+	0x22828252c2820277,
+	0x7000c02c5010dc03,
+	0x198ec0000833dc03,
+	0x6800000008009c43,
+	0x5000d0d400000401,
+	0xc80000001c115c00,
+	0x128ec01ffc319c03,
+	0x6800000018209c03,
+	0x2282e2827202b287,
+	0x1a8e0000fc21dc03,
+	0x40000000800001e7,
+	0x3a00000000105c02,
+	0x1800000000001de2,
+	0x6800000014105c43,
+	0x9000000000001de7,
+/* 0x11f8: rsq_norm */
+	0x1800000000011de2,
+	0x22929292929292f7,
+	0x1000cfc001321c04,
+	0x5000000020009c01,
+	0x5000000010201c01,
+	0x2010000000419e01,
+	0x2008000018411c01,
+	0x5000000010201c01,
+	0x2010000000419e01,
+	0x2292929292929297,
+	0x2008000018411c01,
+	0x5000000010201c01,
+	0x2010000000419e01,
+	0x2008000018411c01,
+	0x5000000010201c01,
+	0x2010000000419e01,
+	0x2008000018411c01,
+	0x20000002e2820297,
+	0x5000d06800410401,
+	0x2800000014005de4,
+	0x2800000010001de4,
 	0x9000000000001de7,
 	0xc800000003f01cc5,
 	0x2c00000100005c04,
@@ -495,7 +615,7 @@
 	0x680100000c1fdc03,
 	0x4000000a60001c47,
 	0x180000004000dde2,
-/* 0x0f60: spill_cfstack */
+/* 0x12e0: spill_cfstack */
 	0x78000009c0000007,
 	0x0c0000000430dd02,
 	0x4003ffffa0001ca7,
@@ -543,14 +663,14 @@
 	0x4000000100001ea7,
 	0x480100000c001c03,
 	0x0800000000105c42,
-/* 0x10d8: shared_loop */
+/* 0x1458: shared_loop */
 	0xc100000000309c85,
 	0x9400000500009c85,
 	0x0c00000010001d02,
 	0x0800000000105d42,
 	0x0c0000001030dd02,
 	0x4003ffff40001ca7,
-/* 0x1108: shared_done */
+/* 0x1488: shared_done */
 	0x2800406420001de4,
 	0x2800406430005de4,
 	0xe000000000001c45,
@@ -564,7 +684,7 @@
 	0x480000000c209c03,
 	0x4801000008001c03,
 	0x0800000000105c42,
-/* 0x1170: search_cstack */
+/* 0x14f0: search_cstack */
 	0x280040646000dde4,
 	0x8400000020009f05,
 	0x190ec0002821dc03,
@@ -573,17 +693,17 @@
 	0x0800000000105c42,
 	0x0c0000004030dd02,
 	0x00029dff0ffc5cbf,
-/* 0x11b0: entry_found */
+/* 0x1530: entry_found */
 	0x8400000000009f85,
 	0x2800406400001de4,
 	0x2800406410005de4,
 	0x9400000010009c85,
 	0x4000000000001df4,
-/* 0x11d8: end_exit */
+/* 0x1558: end_exit */
 	0x9800000003ffdcc5,
 	0xd000000000008007,
 	0xa000000000004007,
-/* 0x11f0: end_cont */
+/* 0x1570: end_cont */
 	0xd000000000008007,
 	0x3400c3fffc201c04,
 	0xc000000003f01ec5,
@@ -593,6 +713,6 @@
 uint64_t gk104_builtin_offsets[] = {
 	0x0000000000000000,
 	0x00000000000000f0,
-	0x0000000000000f08,
 	0x0000000000000f18,
+	0x0000000000001168,
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm	2019-03-31 23:16:37.000000000 +0000
@@ -83,12 +83,229 @@
    $p0 sub b32 $r1 $r1 $r2
    $p0 add b32 $r0 $r0 0x1
    $p3 cvt s32 $r0 neg s32 $r0
-   sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c
+   sched 0x04 0x2e 0x28 0x04 0x28 0x28 0x28
    $p2 cvt s32 $r1 neg s32 $r1
    ret
 
+// RCP F64
+//
+// INPUT:   $r0d
+// OUTPUT:  $r0d
+// CLOBBER: $r2 - $r9, $p0
+//
+// The core of RCP and RSQ implementation is Newton-Raphson step, which is
+// used to find successively better approximation from an imprecise initial
+// value (single precision rcp in RCP and rsqrt64h in RSQ).
+//
 gk110_rcp_f64:
+   // Step 1: classify input according to exponent and value, and calculate
+   // result for 0/inf/nan. $r2 holds the exponent value, which starts at
+   // bit 52 (bit 20 of the upper half) and is 11 bits in length
+   ext u32 $r2 $r1 0xb14
+   add b32 $r3 $r2 0xffffffff
+   joinat #rcp_rejoin
+   // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf,
+   // denorm, or 0). Do this by substracting 1 from the exponent, which will
+   // mean that it's > 0x7fd in those cases when doing unsigned comparison
+   set b32 $p0 0x1 gt u32 $r3 0x7fd
+   // $r3: 0 for norms, 0x36 for denorms, -1 for others
+   mov b32 $r3 0x0
+   sched 0x2f 0x04 0x2d 0x2b 0x2f 0x28 0x28
+   join (not $p0) nop
+   // Process all special values: NaN, inf, denorm, 0
+   mov b32 $r3 0xffffffff
+   // A number is NaN if its abs value is greater than or unordered with inf
+   set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000
+   (not $p0) bra #rcp_inf_or_denorm_or_zero
+   // NaN -> NaN, the next line sets the "quiet" bit of the result. This
+   // behavior is both seen on the CPU and the blob
+   join or b32 $r1 $r1 0x80000
+rcp_inf_or_denorm_or_zero:
+   and b32 $r4 $r1 0x7ff00000
+   // Other values with nonzero in exponent field should be inf
+   set b32 $p0 0x1 eq s32 $r4 0x0
+   sched 0x2b 0x04 0x2f 0x2d 0x2b 0x2f 0x20
+   $p0 bra #rcp_denorm_or_zero
+   // +/-Inf -> +/-0
+   xor b32 $r1 $r1 0x7ff00000
+   join mov b32 $r0 0x0
+rcp_denorm_or_zero:
+   set $p0 0x1 gtu f64 abs $r0d 0x0
+   $p0 bra #rcp_denorm
+   // +/-0 -> +/-Inf
+   join or b32 $r1 $r1 0x7ff00000
+rcp_denorm:
+   // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms
+   mul rn f64 $r0d $r0d 0x4350000000000000
+   sched 0x2f 0x28 0x2b 0x28 0x28 0x04 0x28
+   join mov b32 $r3 0x36
+rcp_rejoin:
+   // All numbers with -1 in $r3 have their result ready in $r0d, return them
+   // others need further calculation
+   set b32 $p0 0x1 lt s32 $r3 0x0
+   $p0 bra #rcp_end
+   // Step 2: Before the real calculation goes on, renormalize the values to
+   // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1)
+   // result in $r6d. The exponent will be recovered later.
+   ext u32 $r2 $r1 0xb14
+   and b32 $r7 $r1 0x800fffff
+   add b32 $r7 $r7 0x3ff00000
+   mov b32 $r6 $r0
+   sched 0x2b 0x04 0x28 0x28 0x2a 0x2b 0x2e
+   // Step 3: Convert new value to float (no overflow will occur due to step
+   // 2), calculate rcp and do newton-raphson step once
+   cvt rz f32 $r5 f64 $r6d
+   rcp f32 $r4 $r5
+   mov b32 $r0 0xbf800000
+   fma rn f32 $r5 $r4 $r5 $r0
+   fma rn f32 $r0 neg $r4 $r5 $r4
+   // Step 4: convert result $r0 back to double, do newton-raphson steps
+   cvt f64 $r0d f32 $r0
+   cvt f64 $r6d f64 neg $r6d
+   sched 0x2e 0x29 0x29 0x29 0x29 0x29 0x29
+   cvt f64 $r8d f32 0x3f800000
+   // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d
+   // The formula used here (and above) is:
+   //     RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n}
+   // The following code uses 2 FMAs for each step, and it will basically
+   // looks like:
+   //     tmp = -src * RCP_{n} + 1
+   //     RCP_{n + 1} = RCP_{n} * tmp + RCP_{n}
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   sched 0x29 0x20 0x28 0x28 0x28 0x28 0x28
+   fma rn f64 $r4d $r6d $r0d $r8d
+   fma rn f64 $r0d $r0d $r4d $r0d
+   // Step 5: Exponent recovery and final processing
+   // The exponent is recovered by adding what we added to the exponent.
+   // Suppose we want to calculate rcp(x), but we have rcp(cx), then
+   //     rcp(x) = c * rcp(cx)
+   // The delta in exponent comes from two sources:
+   //   1) The renormalization in step 2. The delta is:
+   //      0x3ff - $r2
+   //   2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored
+   //      in $r3
+   // These 2 sources are calculated in the first two lines below, and then
+   // added to the exponent extracted from the result above.
+   // Note that after processing, the new exponent may >= 0x7ff (inf)
+   // or <= 0 (denorm). Those cases will be handled respectively below
+   subr b32 $r2 $r2 0x3ff
+   add b32 $r4 $r2 $r3
+   ext u32 $r3 $r1 0xb14
+   // New exponent in $r3
+   add b32 $r3 $r3 $r4
+   add b32 $r2 $r3 0xffffffff
+   sched 0x28 0x2b 0x28 0x2b 0x28 0x28 0x2b
+   // (exponent-1) < 0x7fe (unsigned) means the result is in norm range
+   // (same logic as in step 1)
+   set b32 $p0 0x1 lt u32 $r2 0x7fe
+   (not $p0) bra #rcp_result_inf_or_denorm
+   // Norms: convert exponents back and return
+   shl b32 $r4 $r4 clamp 0x14
+   add b32 $r1 $r4 $r1
+   bra #rcp_end
+rcp_result_inf_or_denorm:
+   // New exponent >= 0x7ff means that result is inf
+   set b32 $p0 0x1 ge s32 $r3 0x7ff
+   (not $p0) bra #rcp_result_denorm
+   sched 0x20 0x25 0x28 0x2b 0x23 0x25 0x2f
+   // Infinity
+   and b32 $r1 $r1 0x80000000
+   mov b32 $r0 0x0
+   add b32 $r1 $r1 0x7ff00000
+   bra #rcp_end
+rcp_result_denorm:
+   // Denorm result comes from huge input. The greatest possible fp64, i.e.
+   // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest
+   // normal value. Other rcp result should be greater than that. If we
+   // set the exponent field to 1, we can recover the result by multiplying
+   // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise
+   // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies
+   // the logic here.
+   set b32 $p0 0x1 ne u32 $r3 0x0
+   and b32 $r1 $r1 0x800fffff
+   // 0x3e800000: 1/4
+   $p0 cvt f64 $r6d f32 0x3e800000
+   sched 0x2f 0x28 0x2c 0x2e 0x2a 0x20 0x27
+   // 0x3f000000: 1/2
+   (not $p0) cvt f64 $r6d f32 0x3f000000
+   add b32 $r1 $r1 0x00100000
+   mul rn f64 $r0d $r0d $r6d
+rcp_end:
+   ret
+
+// RSQ F64
+//
+// INPUT:   $r0d
+// OUTPUT:  $r0d
+// CLOBBER: $r2 - $r9, $p0 - $p1
+//
 gk110_rsq_f64:
+   // Before getting initial result rsqrt64h, two special cases should be
+   // handled first.
+   // 1. NaN: set the highest bit in mantissa so it'll be surely recognized
+   //    as NaN in rsqrt64h
+   set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000
+   $p0 or b32 $r1 $r1 0x00080000
+   and b32 $r2 $r1 0x7fffffff
+   sched 0x27 0x20 0x28 0x2c 0x25 0x28 0x28
+   // 2. denorms and small normal values: using their original value will
+   //    lose precision either at rsqrt64h or the first step in newton-raphson
+   //    steps below. Take 2 as a threshold in exponent field, and multiply
+   //    with 2^54 if the exponent is smaller or equal. (will multiply 2^27
+   //    to recover in the end)
+   ext u32 $r3 $r1 0xb14
+   set b32 $p1 0x1 le u32 $r3 0x2
+   or b32 $r2 $r0 $r2
+   $p1 mul rn f64 $r0d $r0d 0x4350000000000000
+   rsqrt64h f32 $r5 $r1
+   // rsqrt64h will give correct result for 0/inf/nan, the following logic
+   // checks whether the input is one of those (exponent is 0x7ff or all 0
+   // except for the sign bit)
+   set b32 $r6 ne u32 $r3 0x7ff
+   and b32 $r2 $r2 $r6
+   sched 0x28 0x2b 0x20 0x27 0x28 0x2e 0x28
+   set b32 $p0 0x1 ne u32 $r2 0x0
+   $p0 bra #rsq_norm
+   // For 0/inf/nan, make sure the sign bit agrees with input and return
+   and b32 $r1 $r1 0x80000000
+   mov b32 $r0 0x0
+   or b32 $r1 $r1 $r5
+   ret
+rsq_norm:
+   // For others, do 4 Newton-Raphson steps with the formula:
+   //     RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n})
+   // In the code below, each step is written as:
+   //     tmp1 = 0.5 * x * RSQ_{n}
+   //     tmp2 = -RSQ_{n} * tmp1 + 0.5
+   //     RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n}
+   mov b32 $r4 0x0
+   sched 0x2f 0x29 0x29 0x29 0x29 0x29 0x29
+   // 0x3f000000: 1/2
+   cvt f64 $r8d f32 0x3f000000
+   mul rn f64 $r2d $r0d $r8d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   sched 0x29 0x29 0x29 0x29 0x29 0x29 0x29
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   mul rn f64 $r0d $r2d $r4d
+   fma rn f64 $r6d neg $r4d $r0d $r8d
+   fma rn f64 $r4d $r4d $r6d $r4d
+   sched 0x29 0x20 0x28 0x2e 0x00 0x00 0x00
+   // Multiply 2^27 to result for small inputs to recover
+   $p1 mul rn f64 $r4d $r4d 0x41a0000000000000
+   mov b32 $r1 $r5
+   mov b32 $r0 $r4
    ret
 
 .section #gk110_builtin_offsets
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h	2019-03-31 23:16:37.000000000 +0000
@@ -65,11 +65,132 @@
 	0xe088000001000406,
 	0x4000000000800001,
 	0xe6010000000ce802,
-	0x08b08010a010b810,
+	0x08a0a0a010a0b810,
 	0xe60100000088e806,
 	0x19000000001c003c,
 /* 0x0218: gk110_rcp_f64 */
-/* 0x0218: gk110_rsq_f64 */
+	0xc00000058a1c0409,
+	0x407fffffff9c080d,
+	0x1480000050000000,
+	0xb3401c03fe9c0c1d,
+	0xe4c03c007f9c000e,
+	0x08a0a0bcacb410bc,
+	0x8580000000603c02,
+	0x747fffffff9fc00e,
+	0xb4601fff801c021d,
+	0x120000000420003c,
+	0x21000400005c0404,
+/* 0x0270: rcp_inf_or_denorm_or_zero */
+	0x203ff800001c0410,
+	0xb3281c00001c101d,
+	0x0880bcacb4bc10ac,
+	0x120000000800003c,
+	0x223ff800001c0404,
+	0xe4c03c007fdc0002,
+/* 0x02a0: rcp_denorm_or_zero */
+	0xb4601c00001c021d,
+	0x120000000400003c,
+	0x213ff800005c0404,
+/* 0x02b8: rcp_denorm */
+	0xc400021a801c0001,
+	0x08a010a0a0aca0bc,
+	0x740000001b5fc00e,
+/* 0x02d0: rcp_rejoin */
+	0xb3181c00001c0c1d,
+	0x12000000c000003c,
+	0xc00000058a1c0409,
+	0x204007ffff9c041c,
+	0x401ff800001c1c1d,
+	0xe4c03c00001c001a,
+	0x08b8aca8a0a010ac,
+	0xe5400c00031c3816,
+	0x84000000021c1412,
+	0x745fc000001fc002,
+	0xcc000000029c1016,
+	0xcc081000029c1002,
+	0xe5400000001c2c02,
+	0xe5410000031c3c1a,
+	0x08a4a4a4a4a4a4b8,
+	0xc54001fc001c2c21,
+	0xdb802000001c1812,
+	0xdb800000021c0002,
+	0xdb802000001c1812,
+	0xdb800000021c0002,
+	0xdb802000001c1812,
+	0xdb800000021c0002,
+	0x08a0a0a0a0a080a4,
+	0xdb802000001c1812,
+	0xdb800000021c0002,
+	0x48000001ff9c0809,
+	0xe0800000019c0812,
+	0xc00000058a1c040d,
+	0xe0800000021c0c0e,
+	0x407fffffff9c0c09,
+	0x08aca0a0aca0aca0,
+	0xb3101c03ff1c081d,
+	0x120000000c20003c,
+	0xc24000000a1c1011,
+	0xe0800000009c1006,
+	0x12000000381c003c,
+/* 0x03f0: rcp_result_inf_or_denorm */
+	0xb3681c03ff9c0c1d,
+	0x120000001420003c,
+	0x08bc948caca09480,
+	0x20400000001c0404,
+	0xe4c03c007f9c0002,
+	0x403ff800001c0405,
+	0x120000001c1c003c,
+/* 0x0428: rcp_result_denorm */
+	0xb3501c00001c0c1d,
+	0x204007ffff9c0404,
+	0xc54001f400002c19,
+	0x089c80a8b8b0a0bc,
+	0xc54001f800202c19,
+	0x40000800001c0405,
+	0xe4000000031c0002,
+/* 0x0460: rcp_end */
+	0x19000000001c003c,
+/* 0x0468: gk110_rsq_f64 */
+	0xb4601fff801c021d,
+	0x2100040000000404,
+	0x203fffffff9c0408,
+	0x08a0a094b0a0809c,
+	0xc00000058a1c040d,
+	0xb3301c00011c0c3d,
+	0xe2001000011c000a,
+	0xc400021a80040001,
+	0x84000000039c0416,
+	0xb2d01c03ff9c0c19,
+	0xe2000000031c080a,
+	0x08a0b8a09c80aca0,
+	0xb3501c00001c081d,
+	0x120000001000003c,
+	0x20400000001c0404,
+	0xe4c03c007f9c0002,
+	0xe2001000029c0406,
+	0x19000000001c003c,
+/* 0x04f8: rsq_norm */
+	0xe4c03c007f9c0012,
+	0x08a4a4a4a4a4a4bc,
+	0xc54001f8001c2c21,
+	0xe4000000041c000a,
+	0xe4000000021c0802,
+	0xdb882000001c101a,
+	0xdb801000031c1012,
+	0xe4000000021c0802,
+	0xdb882000001c101a,
+	0x08a4a4a4a4a4a4a4,
+	0xdb801000031c1012,
+	0xe4000000021c0802,
+	0xdb882000001c101a,
+	0xdb801000031c1012,
+	0xe4000000021c0802,
+	0xdb882000001c101a,
+	0xdb801000031c1012,
+	0x08000000b8a080a4,
+	0xc400020d00041011,
+	0xe4c03c00029c0006,
+	0xe4c03c00021c0002,
 	0x19000000001c003c,
 };
 
@@ -77,5 +198,5 @@
 	0x0000000000000000,
 	0x00000000000000f0,
 	0x0000000000000218,
-	0x0000000000000218,
+	0x0000000000000468,
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm	2019-03-31 23:16:37.000000000 +0000
@@ -100,10 +100,253 @@
    ret
    nop 0
 
-// STUB
+// RCP F64
+//
+// INPUT:   $r0d
+// OUTPUT:  $r0d
+// CLOBBER: $r2 - $r9, $p0
+//
+// The core of RCP and RSQ implementation is Newton-Raphson step, which is
+// used to find successively better approximation from an imprecise initial
+// value (single precision rcp in RCP and rsqrt64h in RSQ).
+//
 gm107_rcp_f64:
-gm107_rsq_f64:
+   // Step 1: classify input according to exponent and value, and calculate
+   // result for 0/inf/nan. $r2 holds the exponent value, which starts at
+   // bit 52 (bit 20 of the upper half) and is 11 bits in length
+   sched (st 0x0) (st 0x0) (st 0x0)
+   bfe u32 $r2 $r1 0xb14
+   iadd32i $r3 $r2 -1
+   ssy #rcp_rejoin
+   // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf,
+   // denorm, or 0). Do this by substracting 1 from the exponent, which will
+   // mean that it's > 0x7fd in those cases when doing unsigned comparison
+   sched (st 0x0) (st 0x0) (st 0x0)
+   isetp gt u32 and $p0 1 $r3 0x7fd 1
+   // $r3: 0 for norms, 0x36 for denorms, -1 for others
+   mov $r3 0x0 0xf
+   not $p0 sync
+   // Process all special values: NaN, inf, denorm, 0
+   sched (st 0x0) (st 0x0) (st 0x0)
+   mov32i $r3 0xffffffff 0xf
+   // A number is NaN if its abs value is greater than or unordered with inf
+   dsetp gtu and $p0 1 abs $r0 0x7ff0000000000000 1
+   not $p0 bra #rcp_inf_or_denorm_or_zero
+   // NaN -> NaN, the next line sets the "quiet" bit of the result. This
+   // behavior is both seen on the CPU and the blob
+   sched (st 0x0) (st 0x0) (st 0x0)
+   lop32i or $r1 $r1 0x80000
+   sync
+rcp_inf_or_denorm_or_zero:
+   lop32i and $r4 $r1 0x7ff00000
+   sched (st 0x0) (st 0x0) (st 0x0)
+   // Other values with nonzero in exponent field should be inf
+   isetp eq and $p0 1 $r4 0x0 1
+   $p0 bra #rcp_denorm_or_zero
+   // +/-Inf -> +/-0
+   lop32i xor $r1 $r1 0x7ff00000
+   sched (st 0x0) (st 0x0) (st 0x0)
+   mov $r0 0x0 0xf
+   sync
+rcp_denorm_or_zero:
+   dsetp gtu and $p0 1 abs $r0 0x0 1
+   sched (st 0x0) (st 0x0) (st 0x0)
+   $p0 bra #rcp_denorm
+   // +/-0 -> +/-Inf
+   lop32i or $r1 $r1 0x7ff00000
+   sync
+rcp_denorm:
+   // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms
+   sched (st 0x0) (st 0x0) (st 0x0)
+   dmul $r0 $r0 0x4350000000000000
+   mov $r3 0x36 0xf
+   sync
+rcp_rejoin:
+   // All numbers with -1 in $r3 have their result ready in $r0d, return them
+   // others need further calculation
+   sched (st 0x0) (st 0x0) (st 0x0)
+   isetp lt and $p0 1 $r3 0x0 1
+   $p0 bra #rcp_end
+   // Step 2: Before the real calculation goes on, renormalize the values to
+   // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1)
+   // result in $r6d. The exponent will be recovered later.
+   bfe u32 $r2 $r1 0xb14
+   sched (st 0x0) (st 0x0) (st 0x0)
+   lop32i and $r7 $r1 0x800fffff
+   iadd32i $r7 $r7 0x3ff00000
+   mov $r6 $r0 0xf
+   // Step 3: Convert new value to float (no overflow will occur due to step
+   // 2), calculate rcp and do newton-raphson step once
+   sched (st 0x0) (st 0x0) (st 0x0)
+   f2f ftz f64 f32 $r5 $r6
+   mufu rcp $r4 $r5
+   mov32i $r0 0xbf800000 0xf
+   sched (st 0x0) (st 0x0) (st 0x0)
+   ffma $r5 $r4 $r5 $r0
+   ffma $r0 $r5 neg $r4 $r4
+   // Step 4: convert result $r0 back to double, do newton-raphson steps
+   f2f f32 f64 $r0 $r0
+   sched (st 0x0) (st 0x0) (st 0x0)
+   f2f f64 f64 $r6 neg $r6
+   f2f f32 f64 $r8 0x3f800000
+   // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d
+   // The formula used here (and above) is:
+   //     RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n}
+   // The following code uses 2 FMAs for each step, and it will basically
+   // looks like:
+   //     tmp = -src * RCP_{n} + 1
+   //     RCP_{n + 1} = RCP_{n} * tmp + RCP_{n}
+   dfma $r4 $r6 $r0 $r8
+   sched (st 0x0) (st 0x0) (st 0x0)
+   dfma $r0 $r0 $r4 $r0
+   dfma $r4 $r6 $r0 $r8
+   dfma $r0 $r0 $r4 $r0
    sched (st 0x0) (st 0x0) (st 0x0)
+   dfma $r4 $r6 $r0 $r8
+   dfma $r0 $r0 $r4 $r0
+   dfma $r4 $r6 $r0 $r8
+   sched (st 0x0) (st 0x0) (st 0x0)
+   dfma $r0 $r0 $r4 $r0
+   // Step 5: Exponent recovery and final processing
+   // The exponent is recovered by adding what we added to the exponent.
+   // Suppose we want to calculate rcp(x), but we have rcp(cx), then
+   //     rcp(x) = c * rcp(cx)
+   // The delta in exponent comes from two sources:
+   //   1) The renormalization in step 2. The delta is:
+   //      0x3ff - $r2
+   //   2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored
+   //      in $r3
+   // These 2 sources are calculated in the first two lines below, and then
+   // added to the exponent extracted from the result above.
+   // Note that after processing, the new exponent may >= 0x7ff (inf)
+   // or <= 0 (denorm). Those cases will be handled respectively below
+   iadd $r2 neg $r2 0x3ff
+   iadd $r4 $r2 $r3
+   sched (st 0x0) (st 0x0) (st 0x0)
+   bfe u32 $r3 $r1 0xb14
+   // New exponent in $r3
+   iadd $r3 $r3 $r4
+   iadd32i $r2 $r3 -1
+   // (exponent-1) < 0x7fe (unsigned) means the result is in norm range
+   // (same logic as in step 1)
+   sched (st 0x0) (st 0x0) (st 0x0)
+   isetp lt u32 and $p0 1 $r2 0x7fe 1
+   not $p0 bra #rcp_result_inf_or_denorm
+   // Norms: convert exponents back and return
+   shl $r4 $r4 0x14
+   sched (st 0x0) (st 0x0) (st 0x0)
+   iadd $r1 $r4 $r1
+   bra #rcp_end
+rcp_result_inf_or_denorm:
+   // New exponent >= 0x7ff means that result is inf
+   isetp ge and $p0 1 $r3 0x7ff 1
+   sched (st 0x0) (st 0x0) (st 0x0)
+   not $p0 bra #rcp_result_denorm
+   // Infinity
+   lop32i and $r1 $r1 0x80000000
+   mov $r0 0x0 0xf
+   sched (st 0x0) (st 0x0) (st 0x0)
+   iadd32i $r1 $r1 0x7ff00000
+   bra #rcp_end
+rcp_result_denorm:
+   // Denorm result comes from huge input. The greatest possible fp64, i.e.
+   // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest
+   // normal value. Other rcp result should be greater than that. If we
+   // set the exponent field to 1, we can recover the result by multiplying
+   // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise
+   // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies
+   // the logic here.
+   isetp ne u32 and $p0 1 $r3 0x0 1
+   sched (st 0x0) (st 0x0) (st 0x0)
+   lop32i and $r1 $r1 0x800fffff
+   // 0x3e800000: 1/4
+   $p0 f2f f32 f64 $r6 0x3e800000
+   // 0x3f000000: 1/2
+   not $p0 f2f f32 f64 $r6 0x3f000000
+   sched (st 0x0) (st 0x0) (st 0x0)
+   iadd32i $r1 $r1 0x00100000
+   dmul $r0 $r0 $r6
+rcp_end:
+   ret
+
+// RSQ F64
+//
+// INPUT:   $r0d
+// OUTPUT:  $r0d
+// CLOBBER: $r2 - $r9, $p0 - $p1
+//
+gm107_rsq_f64:
+   // Before getting initial result rsqrt64h, two special cases should be
+   // handled first.
+   // 1. NaN: set the highest bit in mantissa so it'll be surely recognized
+   //    as NaN in rsqrt64h
+   sched (st 0xd wr 0x0 wt 0x3f) (st 0xd wt 0x1) (st 0xd)
+   dsetp gtu and $p0 1 abs $r0 0x7ff0000000000000 1
+   $p0 lop32i or $r1 $r1 0x00080000
+   lop32i and $r2 $r1 0x7fffffff
+   // 2. denorms and small normal values: using their original value will
+   //    lose precision either at rsqrt64h or the first step in newton-raphson
+   //    steps below. Take 2 as a threshold in exponent field, and multiply
+   //    with 2^54 if the exponent is smaller or equal. (will multiply 2^27
+   //    to recover in the end)
+   sched (st 0xd) (st 0xd) (st 0xd)
+   bfe u32 $r3 $r1 0xb14
+   isetp le u32 and $p1 1 $r3 0x2 1
+   lop or 1 $r2 $r0 $r2
+   sched (st 0xd wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xd)
+   $p1 dmul $r0 $r0 0x4350000000000000
+   mufu rsq64h $r5 $r1
+   // rsqrt64h will give correct result for 0/inf/nan, the following logic
+   // checks whether the input is one of those (exponent is 0x7ff or all 0
+   // except for the sign bit)
+   iset ne u32 and $r6 $r3 0x7ff 1
+   sched (st 0xd) (st 0xd) (st 0xd)
+   lop and 1 $r2 $r2 $r6
+   isetp ne u32 and $p0 1 $r2 0x0 1
+   $p0 bra #rsq_norm
+   // For 0/inf/nan, make sure the sign bit agrees with input and return
+   sched (st 0xd) (st 0xd) (st 0xd wt 0x1)
+   lop32i and $r1 $r1 0x80000000
+   mov $r0 0x0 0xf
+   lop or 1 $r1 $r1 $r5
+   sched (st 0xd) (st 0xf) (st 0xf)
+   ret
+   nop 0
+   nop 0
+rsq_norm:
+   // For others, do 4 Newton-Raphson steps with the formula:
+   //     RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n})
+   // In the code below, each step is written as:
+   //     tmp1 = 0.5 * x * RSQ_{n}
+   //     tmp2 = -RSQ_{n} * tmp1 + 0.5
+   //     RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n}
+   sched (st 0xd) (st 0xd wr 0x1) (st 0xd wr 0x1 rd 0x0 wt 0x3)
+   mov $r4 0x0 0xf
+   // 0x3f000000: 1/2
+   f2f f32 f64 $r8 0x3f000000
+   dmul $r2 $r0 $r8
+   sched (st 0xd wr 0x0 wt 0x3) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1)
+   dmul $r0 $r2 $r4
+   dfma $r6 $r0 neg $r4 $r8
+   dfma $r4 $r4 $r6 $r4
+   sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1)
+   dmul $r0 $r2 $r4
+   dfma $r6 $r0 neg $r4 $r8
+   dfma $r4 $r4 $r6 $r4
+   sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1)
+   dmul $r0 $r2 $r4
+   dfma $r6 $r0 neg $r4 $r8
+   dfma $r4 $r4 $r6 $r4
+   sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1)
+   dmul $r0 $r2 $r4
+   dfma $r6 $r0 neg $r4 $r8
+   dfma $r4 $r4 $r6 $r4
+   // Multiply 2^27 to result for small inputs to recover
+   sched (st 0xd wr 0x0 wt 0x1) (st 0xd wt 0x1) (st 0xd)
+   $p1 dmul $r4 $r4 0x41a0000000000000
+   mov $r1 $r5 0xf
+   mov $r0 $r4 0xf
+   sched (st 0xd) (st 0xf) (st 0xf)
    ret
    nop 0
    nop 0
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h	2019-03-31 23:16:37.000000000 +0000
@@ -82,8 +82,156 @@
 	0xe32000000007000f,
 	0x50b0000000070f00,
 /* 0x0280: gm107_rcp_f64 */
-/* 0x0280: gm107_rsq_f64 */
 	0x001f8000fc0007e0,
+	0x38000000b1470102,
+	0x1c0ffffffff70203,
+	0xe29000000e000000,
+	0x001f8000fc0007e0,
+	0x366803807fd70307,
+	0x5c9807800ff70003,
+	0xf0f800000008000f,
+	0x001f8000fc0007e0,
+	0x010ffffffff7f003,
+	0x368c03fff0070087,
+	0xe24000000188000f,
+	0x001f8000fc0007e0,
+	0x0420008000070101,
+	0xf0f800000007000f,
+/* 0x02f8: rcp_inf_or_denorm_or_zero */
+	0x0407ff0000070104,
+	0x001f8000fc0007e0,
+	0x5b6503800ff70407,
+	0xe24000000200000f,
+	0x0447ff0000070101,
+	0x001f8000fc0007e0,
+	0x5c9807800ff70000,
+	0xf0f800000007000f,
+/* 0x0338: rcp_denorm_or_zero */
+	0x5b8c03800ff70087,
+	0x001f8000fc0007e0,
+	0xe24000000100000f,
+	0x0427ff0000070101,
+	0xf0f800000007000f,
+/* 0x0360: rcp_denorm */
+	0x001f8000fc0007e0,
+	0x3880004350070000,
+	0x3898078003670003,
+	0xf0f800000007000f,
+/* 0x0380: rcp_rejoin */
+	0x001f8000fc0007e0,
+	0x5b6303800ff70307,
+	0xe24000001c00000f,
+	0x38000000b1470102,
+	0x001f8000fc0007e0,
+	0x040800fffff70107,
+	0x1c03ff0000070707,
+	0x5c98078000070006,
+	0x001f8000fc0007e0,
+	0x5ca8100000670e05,
+	0x5080000000470504,
+	0x010bf8000007f000,
+	0x001f8000fc0007e0,
+	0x5980000000570405,
+	0x5981020000470500,
+	0x5ca8000000070b00,
+	0x001f8000fc0007e0,
+	0x5ca8200000670f06,
+	0x38a8003f80070b08,
+	0x5b70040000070604,
+	0x001f8000fc0007e0,
+	0x5b70000000470000,
+	0x5b70040000070604,
+	0x5b70000000470000,
+	0x001f8000fc0007e0,
+	0x5b70040000070604,
+	0x5b70000000470000,
+	0x5b70040000070604,
+	0x001f8000fc0007e0,
+	0x5b70000000470000,
+	0x381200003ff70202,
+	0x5c10000000370204,
+	0x001f8000fc0007e0,
+	0x38000000b1470103,
+	0x5c10000000470303,
+	0x1c0ffffffff70302,
+	0x001f8000fc0007e0,
+	0x366203807fe70207,
+	0xe24000000208000f,
+	0x3848000001470404,
+	0x001f8000fc0007e0,
+	0x5c10000000170401,
+	0xe24000000807000f,
+/* 0x04d8: rcp_result_inf_or_denorm */
+	0x366d03807ff70307,
+	0x001f8000fc0007e0,
+	0xe24000000288000f,
+	0x0408000000070101,
+	0x5c9807800ff70000,
+	0x001f8000fc0007e0,
+	0x1c07ff0000070101,
+	0xe24000000407000f,
+/* 0x0518: rcp_result_denorm */
+	0x5b6a03800ff70307,
+	0x001f8000fc0007e0,
+	0x040800fffff70101,
+	0x38a8003e80000b06,
+	0x38a8003f00080b06,
+	0x001f8000fc0007e0,
+	0x1c00010000070101,
+	0x5c80000000670000,
+/* 0x0558: rcp_end */
+	0xe32000000007000f,
+/* 0x0560: gm107_rsq_f64 */
+	0x001fb401fda1ff0d,
+	0x368c03fff0070087,
+	0x0420008000000101,
+	0x0407fffffff70102,
+	0x001fb400fda007ed,
+	0x38000000b1470103,
+	0x366603800027030f,
+	0x5c47020000270002,
+	0x001fb401e1a0070d,
+	0x3880004350010000,
+	0x5080000000770105,
+	0x365a03807ff70306,
+	0x001fb400fda007ed,
+	0x5c47000000670202,
+	0x5b6a03800ff70207,
+	0xe24000000400000f,
+	0x003fb400fda007ed,
+	0x0408000000070101,
+	0x5c9807800ff70000,
+	0x5c47020000570101,
+	0x001fbc00fde007ed,
+	0xe32000000007000f,
+	0x50b0000000070f00,
+	0x50b0000000070f00,
+/* 0x0620: rsq_norm */
+	0x0060b400e5a007ed,
+	0x5c9807800ff70004,
+	0x38a8003f00070b08,
+	0x5c80000000870002,
+	0x003c3401e1a01f0d,
+	0x5c80000000470200,
+	0x5b71040000470006,
+	0x5b70020000670404,
+	0x003c3401e1a00f0d,
+	0x5c80000000470200,
+	0x5b71040000470006,
+	0x5b70020000670404,
+	0x003c3401e1a00f0d,
+	0x5c80000000470200,
+	0x5b71040000470006,
+	0x5b70020000670404,
+	0x003c3401e1a00f0d,
+	0x5c80000000470200,
+	0x5b71040000470006,
+	0x5b70020000670404,
+	0x001fb401fda00f0d,
+	0x38800041a0010404,
+	0x5c98078000570001,
+	0x5c98078000470000,
+	0x001fbc00fde007ed,
 	0xe32000000007000f,
 	0x50b0000000070f00,
 	0x50b0000000070f00,
@@ -93,5 +241,5 @@
 	0x0000000000000000,
 	0x0000000000000120,
 	0x0000000000000280,
-	0x0000000000000280,
+	0x0000000000000560,
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -1119,6 +1119,7 @@
    binSize = 0;
 
    maxGPR = -1;
+   fp64 = false;
 
    main = new Function(this, "MAIN", ~0);
    calls.insert(&main->call);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -192,6 +192,7 @@
 
    void emitTEXs(int);
    void emitTEX();
+   void emitTEXS();
    void emitTLD();
    void emitTLD4();
    void emitTXD();
@@ -2718,6 +2719,104 @@
       emitGPR(pos);
 }
 
+static uint8_t
+getTEXSMask(uint8_t mask)
+{
+   switch (mask) {
+   case 0x1: return 0x0;
+   case 0x2: return 0x1;
+   case 0x3: return 0x4;
+   case 0x4: return 0x2;
+   case 0x7: return 0x0;
+   case 0x8: return 0x3;
+   case 0x9: return 0x5;
+   case 0xa: return 0x6;
+   case 0xb: return 0x1;
+   case 0xc: return 0x7;
+   case 0xd: return 0x2;
+   case 0xe: return 0x3;
+   case 0xf: return 0x4;
+   default:
+      assert(!"invalid mask");
+      return 0;
+   }
+}
+
+static uint8_t
+getTEXSTarget(const TexInstruction *tex)
+{
+   assert(tex->op == OP_TEX || tex->op == OP_TXL);
+
+   switch (tex->tex.target.getEnum()) {
+   case TEX_TARGET_1D:
+      assert(tex->tex.levelZero);
+      return 0x0;
+   case TEX_TARGET_2D:
+   case TEX_TARGET_RECT:
+      if (tex->tex.levelZero)
+         return 0x2;
+      if (tex->op == OP_TXL)
+         return 0x3;
+      return 0x1;
+   case TEX_TARGET_2D_SHADOW:
+   case TEX_TARGET_RECT_SHADOW:
+      if (tex->tex.levelZero)
+         return 0x6;
+      if (tex->op == OP_TXL)
+         return 0x5;
+      return 0x4;
+   case TEX_TARGET_2D_ARRAY:
+      if (tex->tex.levelZero)
+         return 0x8;
+      return 0x7;
+   case TEX_TARGET_2D_ARRAY_SHADOW:
+      assert(tex->tex.levelZero);
+      return 0x9;
+   case TEX_TARGET_3D:
+      if (tex->tex.levelZero)
+         return 0xb;
+      assert(tex->op != OP_TXL);
+      return 0xa;
+   case TEX_TARGET_CUBE:
+      assert(!tex->tex.levelZero);
+      if (tex->op == OP_TXL)
+         return 0xd;
+      return 0xc;
+   default:
+      assert(false);
+      return 0x0;
+   }
+}
+
+static uint8_t
+getTLDSTarget(const TexInstruction *tex)
+{
+   switch (tex->tex.target.getEnum()) {
+   case TEX_TARGET_1D:
+      if (tex->tex.levelZero)
+         return 0x0;
+      return 0x1;
+   case TEX_TARGET_2D:
+   case TEX_TARGET_RECT:
+      if (tex->tex.levelZero)
+         return tex->tex.useOffsets ? 0x4 : 0x2;
+      return tex->tex.useOffsets ? 0xc : 0x5;
+   case TEX_TARGET_2D_MS:
+      assert(tex->tex.levelZero);
+      return 0x6;
+   case TEX_TARGET_3D:
+      assert(tex->tex.levelZero);
+      return 0x7;
+   case TEX_TARGET_2D_ARRAY:
+      assert(tex->tex.levelZero);
+      return 0x8;
+
+   default:
+      assert(false);
+      return 0x0;
+   }
+}
+
 void
 CodeEmitterGM107::emitTEX()
 {
@@ -2761,6 +2860,50 @@
 }
 
 void
+CodeEmitterGM107::emitTEXS()
+{
+   const TexInstruction *insn = this->insn->asTex();
+   assert(!insn->tex.derivAll);
+
+   switch (insn->op) {
+   case OP_TEX:
+   case OP_TXL:
+      emitInsn (0xd8000000);
+      emitField(0x35, 4, getTEXSTarget(insn));
+      emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+      break;
+   case OP_TXF:
+      emitInsn (0xda000000);
+      emitField(0x35, 4, getTLDSTarget(insn));
+      emitField(0x32, 3, getTEXSMask(insn->tex.mask));
+      break;
+   case OP_TXG:
+      assert(insn->tex.useOffsets != 4);
+      emitInsn (0xdf000000);
+      emitField(0x34, 2, insn->tex.gatherComp);
+      emitField(0x33, 1, insn->tex.useOffsets == 1);
+      emitField(0x32, 1, insn->tex.target.isShadow());
+      break;
+   default:
+      unreachable("unknown op in emitTEXS()");
+      break;
+   }
+
+   emitField(0x31, 1, insn->tex.liveOnly);
+   emitField(0x24, 13, insn->tex.r);
+   if (insn->defExists(1))
+      emitGPR(0x1c, insn->def(1));
+   else
+      emitGPR(0x1c);
+   if (insn->srcExists(1))
+      emitGPR(0x14, insn->getSrc(1));
+   else
+      emitGPR(0x14);
+   emitGPR  (0x08, insn->src(0));
+   emitGPR  (0x00, insn->def(0));
+}
+
+void
 CodeEmitterGM107::emitTLD()
 {
    const TexInstruction *insn = this->insn->asTex();
@@ -3474,15 +3617,26 @@
       emitPIXLD();
       break;
    case OP_TEX:
-   case OP_TXB:
    case OP_TXL:
+      if (insn->asTex()->tex.scalar)
+         emitTEXS();
+      else
+         emitTEX();
+      break;
+   case OP_TXB:
       emitTEX();
       break;
    case OP_TXF:
-      emitTLD();
+      if (insn->asTex()->tex.scalar)
+         emitTEXS();
+      else
+         emitTLD();
       break;
    case OP_TXG:
-      emitTLD4();
+      if (insn->asTex()->tex.scalar)
+         emitTEXS();
+      else
+         emitTLD4();
       break;
    case OP_TXD:
       emitTXD();
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -968,6 +968,7 @@
    NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
    NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
    NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
+   NV50_IR_OPCODE_CASE(ATOMFADD, ATOM);
 
    NV50_IR_OPCODE_CASE(TEX2, TEX);
    NV50_IR_OPCODE_CASE(TXB2, TXB);
@@ -1010,6 +1011,7 @@
    case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
    case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
    case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
+   case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD;
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_UMUL_HI:
       return NV50_IR_SUBOP_MUL_HIGH;
@@ -1085,6 +1087,8 @@
    };
    std::vector<MemoryFile> memoryFiles;
 
+   std::vector<bool> bufferAtomics;
+
 private:
    int inferSysValDirection(unsigned sn) const;
    bool scanDeclaration(const struct tgsi_full_declaration *);
@@ -1135,6 +1139,7 @@
    //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
    tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
    memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
+   bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1);
 
    info->immd.bufSize = 0;
 
@@ -1481,11 +1486,14 @@
          tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
                                                    first, last - first + 1)));
       break;
+   case TGSI_FILE_BUFFER:
+      for (i = first; i <= last; ++i)
+         bufferAtomics[i] = decl->Declaration.Atomic;
+      break;
    case TGSI_FILE_ADDRESS:
    case TGSI_FILE_CONSTANT:
    case TGSI_FILE_IMMEDIATE:
    case TGSI_FILE_SAMPLER:
-   case TGSI_FILE_BUFFER:
    case TGSI_FILE_IMAGE:
       break;
    default:
@@ -1619,6 +1627,7 @@
       case TGSI_OPCODE_ATOMIMIN:
       case TGSI_OPCODE_ATOMUMAX:
       case TGSI_OPCODE_ATOMIMAX:
+      case TGSI_OPCODE_ATOMFADD:
       case TGSI_OPCODE_LOAD:
          info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
             0x1 : 0x2;
@@ -2717,7 +2726,11 @@
          }
 
          Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
-         ld->cache = tgsi.getCacheMode();
+         if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER &&
+             code->bufferAtomics[r])
+            ld->cache = nv50_ir::CACHE_CG;
+         else
+            ld->cache = tgsi.getCacheMode();
          if (ind)
             ld->setIndirect(0, 1, ind);
       }
@@ -3834,6 +3847,7 @@
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
       handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
       break;
    case TGSI_OPCODE_RESQ:
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.h	2019-03-31 23:16:37.000000000 +0000
@@ -1058,6 +1058,8 @@
 
       enum TexQuery query;
       const struct ImgFormatDesc *format;
+
+      bool scalar; // for GM107s TEXS, TLDS, TLD4S
    } tex;
 
    ValueRef dPdx[3];
@@ -1309,6 +1311,7 @@
    uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
 
    int maxGPR;
+   bool fp64;
 
    MemoryPool mem_Instruction;
    MemoryPool mem_CmpInstruction;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -84,6 +84,38 @@
 }
 
 void
+NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
+{
+   FlowInstruction *call;
+   Value *def[2];
+   int builtin;
+
+   def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
+   def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
+
+   if (i->op == OP_RCP)
+      builtin = NVC0_BUILTIN_RCP_F64;
+   else
+      builtin = NVC0_BUILTIN_RSQ_F64;
+
+   call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+   def[0] = bld.getSSA();
+   def[1] = bld.getSSA();
+   bld.mkMovFromReg(def[0], 0);
+   bld.mkMovFromReg(def[1], 1);
+   bld.mkClobber(FILE_GPR, 0x3fc, 2);
+   bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
+   bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
+
+   call->fixed = 1;
+   call->absolute = call->builtin = 1;
+   call->target.builtin = builtin;
+   delete_Instruction(prog, i);
+
+   prog->fp64 = true;
+}
+
+void
 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
 {
    assert(i->dType == TYPE_F64);
@@ -96,6 +128,12 @@
    Value *src[2], *dst[2], *def = i->getDef(0);
    bld.mkSplit(src, 4, i->getSrc(0));
 
+   int chip = prog->getTarget()->getChipset();
+   if (chip >= NVISA_GK104_CHIPSET) {
+      handleRCPRSQLib(i, src);
+      return;
+   }
+
    // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
    dst[0] = bld.loadImm(NULL, 0);
    dst[1] = bld.getSSA();
@@ -1063,22 +1101,6 @@
       }
    }
 
-   if (chipset >= NVISA_GK104_CHIPSET) {
-      //
-      // If TEX requires more than 4 sources, the 2nd register tuple must be
-      // aligned to 4, even if it consists of just a single 4-byte register.
-      //
-      // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
-      //
-      int s = i->srcCount(0xff, true);
-      if (s > 4 && s < 7) {
-         if (i->srcExists(s)) // move potential predicate out of the way
-            i->moveSources(s, 7 - s);
-         while (s < 7)
-            i->setSrc(s++, bld.loadImm(NULL, 0));
-      }
-   }
-
    return true;
 }
 
@@ -1887,7 +1909,8 @@
       su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB;
    const int slot = su->tex.r;
    const int dim = su->tex.target.getDim();
-   const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
+   const bool array = su->tex.target.isArray() || su->tex.target.isCube();
+   const int arg = dim + array;
    int c;
    Value *zero = bld.mkImm(0);
    Value *p1 = NULL;
@@ -1896,6 +1919,7 @@
    Value *bf, *eau, *off;
    Value *addr, *pred;
    Value *ind = su->getIndirectR();
+   Value *y, *z;
 
    off = bld.getScratch(4);
    bf = bld.getScratch(4);
@@ -1926,34 +1950,42 @@
    for (; c < 3; ++c)
       src[c] = zero;
 
+   if (dim == 2 && !array) {
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+      src[2] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(),
+                          v, bld.loadImm(NULL, 16));
+
+      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(2), su->tex.bindless);
+      bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[2], src[2], v, zero)
+         ->subOp = NV50_IR_SUBOP_SUCLAMP_SD(0, 2);
+   }
+
    // set predicate output
    if (su->tex.target == TEX_TARGET_BUFFER) {
       src[0]->getInsn()->setFlagsDef(1, pred);
    } else
-   if (su->tex.target.isArray() || su->tex.target.isCube()) {
+   if (array) {
       p1 = bld.getSSA(1, FILE_PREDICATE);
       src[dim]->getInsn()->setFlagsDef(1, p1);
    }
 
    // calculate pixel offset
    if (dim == 1) {
+      y = z = zero;
       if (su->tex.target != TEX_TARGET_BUFFER)
          bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff));
-   } else
-   if (dim == 3) {
+   } else {
+      y = src[1];
+      z = src[2];
+
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
-         ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
+         ->subOp = NV50_IR_SUBOP_MADSP(4,4,8); // u16l u16l u16l
 
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
       bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
-         ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
-   } else {
-      assert(dim == 2);
-      v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless);
-      bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
-         ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
-         NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
+         ->subOp = array ?
+         NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
    }
 
    // calculate effective address part 1
@@ -1966,19 +1998,15 @@
             ->subOp = NV50_IR_SUBOP_V1(7,6,8|2);
       }
    } else {
-      Value *y = src[1];
-      Value *z = src[2];
       uint16_t subOp = 0;
 
       switch (dim) {
       case 1:
-         y = zero;
-         z = zero;
          break;
       case 2:
-         z = off;
-         if (!su->tex.target.isArray() && !su->tex.target.isCube()) {
-            z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
+         if (array) {
+            z = off;
+         } else {
             subOp = NV50_IR_SUBOP_SUBFM_3D;
          }
          break;
@@ -2001,7 +2029,7 @@
       eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v);
    }
    // add array layer offset
-   if (su->tex.target.isArray() || su->tex.target.isCube()) {
+   if (array) {
       v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless);
       if (dim == 1)
          bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h	2019-03-31 23:16:37.000000000 +0000
@@ -62,6 +62,7 @@
 
    // we want to insert calls to the builtin library only after optimization
    void handleDIV(Instruction *); // integer division, modulus
+   void handleRCPRSQLib(Instruction *, Value *[]);
    void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
    void handleFTZ(Instruction *);
    void handleSET(CmpInstruction *);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -744,6 +744,7 @@
       // restrictions, so move it into a separate LValue.
       bld.setPosition(i, false);
       i->op = OP_ADD;
+      i->dnz = 0;
       i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0));
       i->setSrc(0, i->getSrc(2));
       i->src(0).mod = i->src(2).mod;
@@ -1100,6 +1101,7 @@
          if (imm0.isNegative())
             i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
          i->op = OP_ADD;
+         i->dnz = 0;
          i->setSrc(s, i->getSrc(t));
          i->src(s).mod = i->src(t).mod;
       } else
@@ -1140,6 +1142,7 @@
          i->setSrc(1, i->getSrc(2));
          i->src(1).mod = i->src(2).mod;
          i->setSrc(2, NULL);
+         i->dnz = 0;
          i->op = OP_ADD;
       } else
       if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
@@ -1914,7 +1917,7 @@
    if (minmax->src(0).mod == minmax->src(1).mod) {
       if (minmax->def(0).mayReplace(minmax->src(0))) {
          minmax->def(0).replace(minmax->src(0), false);
-         minmax->bb->remove(minmax);
+         delete_Instruction(prog, minmax);
       } else {
          minmax->op = OP_CVT;
          minmax->setSrc(1, NULL);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -359,6 +359,31 @@
    "samp sc"
 };
 
+static const char *texMaskStr[16] =
+{
+   "____",
+   "r___",
+   "_g__",
+   "rg__",
+   "__b_",
+   "r_b_",
+   "_gb_",
+   "rgb_",
+   "___a",
+   "r__a",
+   "_g_a",
+   "rg_a",
+   "__ba",
+   "r_ba",
+   "_gba",
+   "rgba",
+};
+
+static const char *gatherCompStr[4] =
+{
+   "r", "g", "b", "a",
+};
+
 #define PRINT(args...)                                \
    do {                                               \
       pos += snprintf(&buf[pos], size - pos, args);   \
@@ -587,7 +612,10 @@
       if (asFlow()->target.bb)
          PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId());
    } else {
-      PRINT("%s ", operationStr[op]);
+      if (asTex())
+         PRINT("%s%s ", operationStr[op], asTex()->tex.scalar ? "s" : "");
+      else
+         PRINT("%s ", operationStr[op]);
       if (op == OP_LINTERP || op == OP_PINTERP)
          PRINT("%s ", interpStr[ipa]);
       switch (op) {
@@ -651,10 +679,14 @@
       }
       if (perPatch)
          PRINT("patch ");
-      if (asTex())
-         PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
-               colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
-               colour[TXT_INSN]);
+      if (asTex()) {
+         PRINT("%s %s$r%u $s%u ", asTex()->tex.target.getName(),
+               colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s);
+         if (op == OP_TXG)
+            PRINT("%s ", gatherCompStr[asTex()->tex.gatherComp]);
+         PRINT("%s %s", texMaskStr[asTex()->tex.mask], colour[TXT_INSN]);
+      }
+
       if (postFactor)
          PRINT("x2^%i ", postFactor);
       PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""),  DataTypeStr[dType]);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -55,7 +55,7 @@
    void periodicMask(DataFile f, uint32_t lock, uint32_t unlock);
    void intersect(DataFile f, const RegisterSet *);
 
-   bool assign(int32_t& reg, DataFile f, unsigned int size);
+   bool assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg);
    void release(DataFile f, int32_t reg, unsigned int size);
    void occupy(DataFile f, int32_t reg, unsigned int size);
    void occupy(const Value *);
@@ -66,10 +66,8 @@
 
    inline int getMaxAssigned(DataFile f) const { return fill[f]; }
 
-   inline unsigned int getFileSize(DataFile f, uint8_t regSize) const
+   inline unsigned int getFileSize(DataFile f) const
    {
-      if (restrictedGPR16Range && f == FILE_GPR && regSize == 2)
-         return (last[f] + 1) / 2;
       return last[f] + 1;
    }
 
@@ -162,9 +160,9 @@
 }
 
 bool
-RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size)
+RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg)
 {
-   reg = bits[f].findFreeRange(size);
+   reg = bits[f].findFreeRange(size, maxReg);
    if (reg < 0)
       return false;
    fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1));
@@ -261,6 +259,7 @@
       bool insertConstraintMoves();
 
       void condenseDefs(Instruction *);
+      void condenseDefs(Instruction *, const int first, const int last);
       void condenseSrcs(Instruction *, const int first, const int last);
 
       void addHazard(Instruction *i, const ValueRef *src);
@@ -274,6 +273,9 @@
       void texConstraintNVE0(TexInstruction *);
       void texConstraintGM107(TexInstruction *);
 
+      bool isScalarTexGM107(TexInstruction *);
+      void handleScalarTexGM107(TexInstruction *);
+
       std::list<Instruction *> constrList;
 
       const Target *targ;
@@ -745,6 +747,7 @@
    public:
       uint32_t degree;
       uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable
+      uint16_t maxReg;
       uint16_t colors;
 
       DataFile f;
@@ -800,7 +803,21 @@
    Function *func;
    Program *prog;
 
-   static uint8_t relDegree[17][17];
+   struct RelDegree {
+      uint8_t data[17][17];
+
+      RelDegree() {
+         for (int i = 1; i <= 16; ++i)
+            for (int j = 1; j <= 16; ++j)
+               data[i][j] = j * ((i + j - 1) / j);
+      }
+
+      const uint8_t* operator[](std::size_t i) const {
+         return data[i];
+      }
+   };
+
+   static const RelDegree relDegree;
 
    RegisterSet regs;
 
@@ -812,7 +829,7 @@
    std::list<ValuePair> mustSpill;
 };
 
-uint8_t GCRA::relDegree[17][17];
+const GCRA::RelDegree GCRA::relDegree;
 
 GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
 {
@@ -842,9 +859,11 @@
 static bool
 isShortRegOp(Instruction *insn)
 {
-   // Immediates are always in src1. Every other situation can be resolved by
+   // Immediates are always in src1 (except zeroes, which end up getting
+   // replaced with a zero reg). Every other situation can be resolved by
    // using a long encoding.
-   return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE;
+   return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE &&
+      insn->getSrc(1)->reg.data.u64;
 }
 
 // Check if this LValue is ever used in an instruction that can't be encoded
@@ -880,12 +899,12 @@
 
    weight = std::numeric_limits<float>::infinity();
    degree = 0;
-   int size = regs.getFileSize(f, lval->reg.size);
+   maxReg = regs.getFileSize(f);
    // On nv50, we lose a bit of gpr encoding when there's an embedded
    // immediate.
-   if (regs.restrictedGPR16Range && f == FILE_GPR && isShortRegVal(lval))
-      size /= 2;
-   degreeLimit = size;
+   if (regs.restrictedGPR16Range && f == FILE_GPR && (lval->reg.size == 2 || isShortRegVal(lval)))
+      maxReg /= 2;
+   degreeLimit = maxReg;
    degreeLimit -= relDegree[1][colors] - 1;
 
    livei.insert(lval->livei);
@@ -945,6 +964,8 @@
    // add val's definitions to rep and extend the live interval of its RIG node
    rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end());
    nRep->livei.unify(nVal->livei);
+   nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit);
+   nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg);
    return true;
 }
 
@@ -1148,11 +1169,6 @@
    spill(spill)
 {
    prog = func->getProgram();
-
-   // initialize relative degrees array - i takes away from j
-   for (int i = 1; i <= 16; ++i)
-      for (int j = 1; j <= 16; ++j)
-         relDegree[i][j] = j * ((i + j - 1) / j);
 }
 
 GCRA::~GCRA()
@@ -1318,13 +1334,17 @@
       } else
       if (!DLLIST_EMPTY(&hi)) {
          RIG_Node *best = hi.next;
+         unsigned bestMaxReg = best->maxReg;
          float bestScore = best->weight / (float)best->degree;
-         // spill candidate
+         // Spill candidate. First go through the ones with the highest max
+         // register, then the ones with lower. That way the ones with the
+         // lowest requirement will be allocated first, since it's a stack.
          for (RIG_Node *it = best->next; it != &hi; it = it->next) {
             float score = it->weight / (float)it->degree;
-            if (score < bestScore) {
+            if (score < bestScore || it->maxReg > bestMaxReg) {
                best = it;
                bestScore = score;
+               bestMaxReg = it->maxReg;
             }
          }
          if (isinf(bestScore)) {
@@ -1425,7 +1445,7 @@
       LValue *lval = node->getValue();
       if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)
          regs.print(node->f);
-      bool ret = regs.assign(node->reg, node->f, node->colors);
+      bool ret = regs.assign(node->reg, node->f, node->colors, node->maxReg);
       if (ret) {
          INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg);
          lval->compMask = node->getCompMask();
@@ -2048,24 +2068,35 @@
 void
 RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn)
 {
-   uint8_t size = 0;
    int n;
-   for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n)
-      size += insn->getDef(n)->reg.size;
-   if (n < 2)
+   for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n);
+   condenseDefs(insn, 0, n - 1);
+}
+
+void
+RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn,
+                                              const int a, const int b)
+{
+   uint8_t size = 0;
+   if (a >= b)
       return;
+   for (int s = a; s <= b; ++s)
+      size += insn->getDef(s)->reg.size;
+   if (!size)
+      return;
+
    LValue *lval = new_LValue(func, FILE_GPR);
    lval->reg.size = size;
 
    Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size));
    split->setSrc(0, lval);
-   for (int d = 0; d < n; ++d) {
-      split->setDef(d, insn->getDef(d));
+   for (int d = a; d <= b; ++d) {
+      split->setDef(d - a, insn->getDef(d));
       insn->setDef(d, NULL);
    }
-   insn->setDef(0, lval);
+   insn->setDef(a, lval);
 
-   for (int k = 1, d = n; insn->defExists(d); ++d, ++k) {
+   for (int k = a + 1, d = b + 1; insn->defExists(d); ++d, ++k) {
       insn->setDef(k, insn->getDef(d));
       insn->setDef(d, NULL);
    }
@@ -2075,6 +2106,7 @@
    insn->bb->insertAfter(insn, split);
    constrList.push_back(split);
 }
+
 void
 RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn,
                                               const int a, const int b)
@@ -2106,6 +2138,159 @@
    constrList.push_back(merge);
 }
 
+bool
+RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex)
+{
+   if (tex->tex.sIndirectSrc >= 0 ||
+       tex->tex.rIndirectSrc >= 0 ||
+       tex->tex.derivAll)
+      return false;
+
+   if (tex->tex.mask == 5 || tex->tex.mask == 6)
+      return false;
+
+   switch (tex->op) {
+   case OP_TEX:
+   case OP_TXF:
+   case OP_TXG:
+   case OP_TXL:
+      break;
+   default:
+      return false;
+   }
+
+   // legal variants:
+   // TEXS.1D.LZ
+   // TEXS.2D
+   // TEXS.2D.LZ
+   // TEXS.2D.LL
+   // TEXS.2D.DC
+   // TEXS.2D.LL.DC
+   // TEXS.2D.LZ.DC
+   // TEXS.A2D
+   // TEXS.A2D.LZ
+   // TEXS.A2D.LZ.DC
+   // TEXS.3D
+   // TEXS.3D.LZ
+   // TEXS.CUBE
+   // TEXS.CUBE.LL
+
+   // TLDS.1D.LZ
+   // TLDS.1D.LL
+   // TLDS.2D.LZ
+   // TLSD.2D.LZ.AOFFI
+   // TLDS.2D.LZ.MZ
+   // TLDS.2D.LL
+   // TLDS.2D.LL.AOFFI
+   // TLDS.A2D.LZ
+   // TLDS.3D.LZ
+
+   // TLD4S: all 2D/RECT variants and only offset
+
+   switch (tex->op) {
+   case OP_TEX:
+      if (tex->tex.useOffsets)
+         return false;
+
+      switch (tex->tex.target.getEnum()) {
+      case TEX_TARGET_1D:
+      case TEX_TARGET_2D_ARRAY_SHADOW:
+         return tex->tex.levelZero;
+      case TEX_TARGET_CUBE:
+         return !tex->tex.levelZero;
+      case TEX_TARGET_2D:
+      case TEX_TARGET_2D_ARRAY:
+      case TEX_TARGET_2D_SHADOW:
+      case TEX_TARGET_3D:
+      case TEX_TARGET_RECT:
+      case TEX_TARGET_RECT_SHADOW:
+         return true;
+      default:
+         return false;
+      }
+
+   case OP_TXL:
+      if (tex->tex.useOffsets)
+         return false;
+
+      switch (tex->tex.target.getEnum()) {
+      case TEX_TARGET_2D:
+      case TEX_TARGET_2D_SHADOW:
+      case TEX_TARGET_RECT:
+      case TEX_TARGET_RECT_SHADOW:
+      case TEX_TARGET_CUBE:
+         return true;
+      default:
+         return false;
+      }
+
+   case OP_TXF:
+      switch (tex->tex.target.getEnum()) {
+      case TEX_TARGET_1D:
+         return !tex->tex.useOffsets;
+      case TEX_TARGET_2D:
+      case TEX_TARGET_RECT:
+         return true;
+      case TEX_TARGET_2D_ARRAY:
+      case TEX_TARGET_2D_MS:
+      case TEX_TARGET_3D:
+         return !tex->tex.useOffsets && tex->tex.levelZero;
+      default:
+         return false;
+      }
+
+   case OP_TXG:
+      if (tex->tex.useOffsets > 1)
+         return false;
+      if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf)
+         return false;
+
+      switch (tex->tex.target.getEnum()) {
+      case TEX_TARGET_2D:
+      case TEX_TARGET_2D_MS:
+      case TEX_TARGET_2D_SHADOW:
+      case TEX_TARGET_RECT:
+      case TEX_TARGET_RECT_SHADOW:
+         return true;
+      default:
+         return false;
+      }
+
+   default:
+      return false;
+   }
+}
+
+void
+RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex)
+{
+   int defCount = tex->defCount(0xff);
+   int srcCount = tex->srcCount(0xff);
+
+   tex->tex.scalar = true;
+
+   // 1. handle defs
+   if (defCount > 3)
+      condenseDefs(tex, 2, 3);
+   if (defCount > 1)
+      condenseDefs(tex, 0, 1);
+
+   // 2. handle srcs
+   // special case for TXF.A2D
+   if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) {
+      assert(srcCount >= 3);
+      condenseSrcs(tex, 1, 2);
+   } else {
+      if (srcCount > 3)
+         condenseSrcs(tex, 2, 3);
+      // only if we have more than 2 sources
+      if (srcCount > 2)
+         condenseSrcs(tex, 0, 1);
+   }
+
+   assert(!tex->defExists(2) && !tex->srcExists(2));
+}
+
 void
 RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
 {
@@ -2113,6 +2298,13 @@
 
    if (isTextureOp(tex->op))
       textureMask(tex);
+
+   if (isScalarTexGM107(tex)) {
+      handleScalarTexGM107(tex);
+      return;
+   }
+
+   assert(!tex->tex.scalar);
    condenseDefs(tex);
 
    if (isSurfaceOp(tex->op)) {
@@ -2149,9 +2341,19 @@
             if (!tex->tex.target.isArray() && tex->tex.useOffsets)
                s++;
          }
-         n = tex->srcCount(0xff) - s;
+         n = tex->srcCount(0xff, true) - s;
+         // TODO: Is this necessary? Perhaps just has to be aligned to the
+         // level that the first arg is, not necessarily to 4. This
+         // requirement has not been rigorously verified, as it has been on
+         // Kepler.
+         if (n > 0 && n < 3) {
+            if (tex->srcExists(n + s)) // move potential predicate out of the way
+               tex->moveSources(n + s, 3 - n);
+            while (n < 3)
+               tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
+         }
       } else {
-         s = tex->srcCount(0xff);
+         s = tex->srcCount(0xff, true);
          n = 0;
       }
 
@@ -2174,14 +2376,18 @@
    } else
    if (isTextureOp(tex->op)) {
       int n = tex->srcCount(0xff, true);
-      if (n > 4) {
-         condenseSrcs(tex, 0, 3);
-         if (n > 5) // NOTE: first call modified positions already
-            condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
-      } else
-      if (n > 1) {
-         condenseSrcs(tex, 0, n - 1);
+      int s = n > 4 ? 4 : n;
+      if (n > 4 && n < 7) {
+         if (tex->srcExists(n)) // move potential predicate out of the way
+            tex->moveSources(n, 7 - n);
+
+         while (n < 7)
+            tex->setSrc(n++, new_LValue(func, FILE_GPR));
       }
+      if (s > 1)
+         condenseSrcs(tex, 0, s - 1);
+      if (n > 4)
+         condenseSrcs(tex, 1, n - s);
    }
 }
 
@@ -2318,6 +2524,7 @@
    assert(cst->getSrc(s)->defs.size() == 1); // still SSA
 
    Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+
    bool imm = defi->op == OP_MOV &&
       defi->src(0).getFile() == FILE_IMMEDIATE;
    bool load = defi->op == OP_LOAD &&
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -399,6 +399,7 @@
          }
       }
    }
+   info->io.fp64 |= fp64;
    info->bin.relocData = emit->getRelocInfo();
    info->bin.fixupData = emit->getFixupInfo();
 
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -203,7 +203,7 @@
 {
    switch (file) {
    case FILE_NULL:          return 0;
-   case FILE_GPR:           return 256; // in 16-bit units **
+   case FILE_GPR:           return 254; // in 16-bit units **
    case FILE_PREDICATE:     return 0;
    case FILE_FLAGS:         return 4;
    case FILE_ADDRESS:       return 4;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -311,12 +311,12 @@
    }
 }
 
-int BitSet::findFreeRange(unsigned int count) const
+int BitSet::findFreeRange(unsigned int count, unsigned int max) const
 {
    const uint32_t m = (1 << count) - 1;
-   int pos = size;
+   int pos = max;
    unsigned int i;
-   const unsigned int end = (size + 31) / 32;
+   const unsigned int end = (max + 31) / 32;
 
    if (count == 1) {
       for (i = 0; i < end; ++i) {
@@ -373,7 +373,7 @@
 
    pos += i * 32;
 
-   return ((pos + count) <= size) ? pos : -1;
+   return ((pos + count) <= max) ? pos : -1;
 }
 
 void BitSet::print() const
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h	2019-03-31 23:16:37.000000000 +0000
@@ -539,8 +539,11 @@
       return data[i / 32] & (((1 << n) - 1) << (i % 32));
    }
 
-   // Find a range of size (<= 32) clear bits aligned to roundup_pow2(size).
-   int findFreeRange(unsigned int size) const;
+   // Find a range of count (<= 32) clear bits aligned to roundup_pow2(count).
+   int findFreeRange(unsigned int count, unsigned int max) const;
+   inline int findFreeRange(unsigned int count) const {
+      return findFreeRange(count, size);
+   }
 
    BitSet& operator|=(const BitSet&);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nouveau_screen.h mesa-19.0.1/src/gallium/drivers/nouveau/nouveau_screen.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nouveau_screen.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nouveau_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -3,6 +3,7 @@
 
 #include "pipe/p_screen.h"
 #include "util/disk_cache.h"
+#include "util/u_atomic.h"
 #include "util/u_memory.h"
 
 #ifdef DEBUG
@@ -106,10 +107,10 @@
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 # define NOUVEAU_DRV_STAT(s, n, v) do {         \
-      (s)->stats.named.n += (v);                \
+      p_atomic_add(&(s)->stats.named.n, (v));   \
    } while(0)
-# define NOUVEAU_DRV_STAT_RES(r, n, v) do {                     \
-      nouveau_screen((r)->base.screen)->stats.named.n += (v);   \
+# define NOUVEAU_DRV_STAT_RES(r, n, v) do {                                \
+      p_atomic_add(&nouveau_screen((r)->base.screen)->stats.named.n, v);   \
    } while(0)
 # define NOUVEAU_DRV_STAT_IFD(x) x
 #else
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_context.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_context.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -221,9 +221,7 @@
    /*XXX: *cough* per-context pushbufs */
    push = screen->base.pushbuf;
    nv30->base.pushbuf = push;
-   nv30->base.pushbuf->user_priv = &nv30->bufctx; /* hack at validate time */
-   nv30->base.pushbuf->rsvd_kick = 16; /* hack in screen before first space */
-   nv30->base.pushbuf->kick_notify = nv30_context_kick_notify;
+   push->kick_notify = nv30_context_kick_notify;
 
    nv30->base.invalidate_resource_storage = nv30_invalidate_resource_storage;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c	2019-03-31 23:16:37.000000000 +0000
@@ -171,7 +171,7 @@
     * code
     */
    if (fp != nv30->state.fragprog)
-      PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG);
+      nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGPROG);
 
    nv30->fragprog.program = fp;
    nv30->dirty |= NV30_NEW_FRAGPROG;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_miptree.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_miptree.c	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_miptree.c	2019-03-31 23:16:37.000000000 +0000
@@ -116,8 +116,22 @@
 
    rect->x0     = util_format_get_nblocksx(pt->format, x) << mt->ms_x;
    rect->y0     = util_format_get_nblocksy(pt->format, y) << mt->ms_y;
-   rect->x1     = rect->x0 + (w << mt->ms_x);
-   rect->y1     = rect->y0 + (h << mt->ms_y);
+   rect->x1     = rect->x0 + (util_format_get_nblocksx(pt->format, w) << mt->ms_x);
+   rect->y1     = rect->y0 + (util_format_get_nblocksy(pt->format, h) << mt->ms_y);
+
+   /* XXX There's some indication that swizzled formats > 4 bytes are treated
+    * differently. However that only applies to RGBA16_FLOAT, RGBA32_FLOAT,
+    * and the DXT* formats. The former aren't properly supported yet, and the
+    * latter avoid swizzled layouts.
+
+   if (mt->swizzled && rect->cpp > 4) {
+      unsigned scale = rect->cpp / 4;
+      rect->w *= scale;
+      rect->x0 *= scale;
+      rect->x1 *= scale;
+      rect->cpp = 4;
+   }
+   */
 }
 
 void
@@ -265,6 +279,7 @@
 {
    struct nv30_context *nv30 = nv30_context(pipe);
    struct nouveau_device *dev = nv30->screen->base.device;
+   struct nv30_miptree *mt = nv30_miptree(pt);
    struct nv30_transfer *tx;
    unsigned access = 0;
    int ret;
@@ -285,10 +300,11 @@
    tx->nblocksy = util_format_get_nblocksy(pt->format, box->height);
 
    define_rect(pt, level, box->z, box->x, box->y,
-                   tx->nblocksx, tx->nblocksy, &tx->img);
+               box->width, box->height, &tx->img);
 
    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
-                        tx->base.layer_stride, NULL, &tx->tmp.bo);
+                        tx->base.layer_stride * tx->base.box.depth, NULL,
+                        &tx->tmp.bo);
    if (ret) {
       pipe_resource_reference(&tx->base.resource, NULL);
       FREE(tx);
@@ -308,8 +324,25 @@
    tx->tmp.y1     = tx->tmp.h;
    tx->tmp.z      = 0;
 
-   if (usage & PIPE_TRANSFER_READ)
-      nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp);
+   if (usage & PIPE_TRANSFER_READ) {
+      bool is_3d = mt->base.base.target == PIPE_TEXTURE_3D;
+      unsigned offset = tx->img.offset;
+      unsigned z = tx->img.z;
+      unsigned i;
+      for (i = 0; i < box->depth; ++i) {
+         nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp);
+         if (is_3d && mt->swizzled)
+            tx->img.z++;
+         else if (is_3d)
+            tx->img.offset += mt->level[level].zslice_size;
+         else
+            tx->img.offset += mt->layer_size;
+         tx->tmp.offset += tx->base.layer_stride;
+      }
+      tx->img.z = z;
+      tx->img.offset = offset;
+      tx->tmp.offset = 0;
+   }
 
    if (tx->tmp.bo->map) {
       *ptransfer = &tx->base;
@@ -338,9 +371,21 @@
 {
    struct nv30_context *nv30 = nv30_context(pipe);
    struct nv30_transfer *tx = nv30_transfer(ptx);
+   struct nv30_miptree *mt = nv30_miptree(tx->base.resource);
+   unsigned i;
 
    if (ptx->usage & PIPE_TRANSFER_WRITE) {
-      nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
+      bool is_3d = mt->base.base.target == PIPE_TEXTURE_3D;
+      for (i = 0; i < tx->base.box.depth; ++i) {
+         nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
+         if (is_3d && mt->swizzled)
+            tx->img.z++;
+         else if (is_3d)
+            tx->img.offset += mt->level[tx->base.level].zslice_size;
+         else
+            tx->img.offset += mt->layer_size;
+         tx->tmp.offset += tx->base.layer_stride;
+      }
 
       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv30->screen->base.fence.current,
@@ -404,8 +449,7 @@
        !util_is_power_of_two_or_zero(pt->width0) ||
        !util_is_power_of_two_or_zero(pt->height0) ||
        !util_is_power_of_two_or_zero(pt->depth0) ||
-       util_format_is_compressed(pt->format) ||
-       util_format_is_float(pt->format) || mt->ms_mode) {
+       mt->ms_mode) {
       mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
       mt->uniform_pitch = align(mt->uniform_pitch, 64);
       if (pt->bind & PIPE_BIND_SCANOUT) {
@@ -418,14 +462,20 @@
       }
    }
 
-   if (!mt->uniform_pitch)
+   if (util_format_is_compressed(pt->format)) {
+      // Compressed (DXT) formats are packed tightly. We don't mark them as
+      // swizzled, since their layout is largely linear. However we do end up
+      // omitting the LINEAR flag when texturing them, as the levels are not
+      // uniformly sized (for POT sizes).
+   } else if (!mt->uniform_pitch) {
       mt->swizzled = true;
+   }
 
    size = 0;
    for (l = 0; l <= pt->last_level; l++) {
       struct nv30_miptree_level *lvl = &mt->level[l];
       unsigned nbx = util_format_get_nblocksx(pt->format, w);
-      unsigned nby = util_format_get_nblocksx(pt->format, h);
+      unsigned nby = util_format_get_nblocksy(pt->format, h);
 
       lvl->offset = size;
       lvl->pitch  = mt->uniform_pitch;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_screen.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -77,6 +77,11 @@
       return 1;
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
+   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
+      return 8 * 1024 * 1024;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 8;
+
    /* supported capabilities */
    case PIPE_CAP_ANISOTROPIC_FILTER:
    case PIPE_CAP_POINT_SPRITE:
@@ -241,7 +246,6 @@
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
    case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
    case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
-   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
       return 0;
 
    case PIPE_CAP_MAX_GS_INVOCATIONS:
@@ -434,6 +438,12 @@
    if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
       return false;
 
+   /* No way to render to a swizzled 3d texture. We don't necessarily know if
+    * it's swizzled or not here, but we have to assume anyways.
+    */
+   if (target == PIPE_TEXTURE_3D && (bindings & PIPE_BIND_RENDER_TARGET))
+      return false;
+
    /* shared is always supported */
    bindings &= ~PIPE_BIND_SHARED;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_texture.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_texture.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_texture.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -287,7 +287,7 @@
    so->npot_size0 = (pt->width0 << 16) | pt->height0;
    if (eng3d->oclass >= NV40_3D_CLASS) {
       so->npot_size1 = (pt->depth0 << 20) | mt->uniform_pitch;
-      if (!mt->swizzled)
+      if (mt->uniform_pitch)
          so->fmt |= NV40_3D_TEX_FORMAT_LINEAR;
       so->fmt |= 0x00008000;
       so->fmt |= (pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -379,6 +379,15 @@
 
    util_dynarray_init(&nv50->global_residents, NULL);
 
+   // Make sure that the first TSC entry has SRGB conversion bit set, since we
+   // use it as a fallback.
+   if (!screen->tsc.entries[0])
+      nv50_upload_tsc0(nv50);
+
+   // And mark samplers as dirty so that the first slot would get bound to the
+   // zero entry if it's not otherwise set.
+   nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
+
    return pipe;
 
 out_err:
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.h mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -256,6 +256,7 @@
 void nv50_validate_textures(struct nv50_context *);
 void nv50_validate_samplers(struct nv50_context *);
 void nv50_upload_ms_info(struct nouveau_pushbuf *);
+void nv50_upload_tsc0(struct nv50_context *);
 
 struct pipe_sampler_view *
 nv50_create_texture_view(struct pipe_context *,
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -98,12 +98,10 @@
       case PIPE_QUERY_OCCLUSION_COUNTER:
       case PIPE_QUERY_OCCLUSION_PREDICATE:
       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+         if (hq->state == NV50_HW_QUERY_STATE_READY)
+            wait = true;
          if (likely(!condition)) {
-            if (unlikely(hq->nesting))
-               cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
-                             NV50_3D_COND_MODE_ALWAYS;
-            else
-               cond = NV50_3D_COND_MODE_RES_NON_ZERO;
+            cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS;
          } else {
             cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS;
          }
@@ -129,7 +127,7 @@
 
    PUSH_SPACE(push, 9);
 
-   if (wait) {
+   if (wait && hq->state != NV50_HW_QUERY_STATE_READY) {
       BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
       PUSH_DATA (push, 0);
    }
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,11 +29,6 @@
 #include "nv50/nv50_query_hw_sm.h"
 #include "nv_object.xml.h"
 
-#define NV50_HW_QUERY_STATE_READY   0
-#define NV50_HW_QUERY_STATE_ACTIVE  1
-#define NV50_HW_QUERY_STATE_ENDED   2
-#define NV50_HW_QUERY_STATE_FLUSHED 3
-
 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
  * (since we use only a single GPU channel per screen) will not work properly.
  *
@@ -158,8 +153,7 @@
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
-      hq->nesting = nv50->screen->num_occlusion_queries_active++;
-      if (hq->nesting) {
+      if (nv50->screen->num_occlusion_queries_active++) {
          nv50_hw_query_get(push, q, 0x10, 0x0100f002);
       } else {
          PUSH_SPACE(push, 4);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,6 +6,11 @@
 
 #include "nv50_query.h"
 
+#define NV50_HW_QUERY_STATE_READY   0
+#define NV50_HW_QUERY_STATE_ACTIVE  1
+#define NV50_HW_QUERY_STATE_ENDED   2
+#define NV50_HW_QUERY_STATE_FLUSHED 3
+
 #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
 
 struct nv50_hw_query;
@@ -29,7 +34,6 @@
    uint8_t state;
    bool is64bit;
    uint8_t rotate;
-   int nesting; /* only used for occlusion queries */
    struct nouveau_mm_allocation *mm;
    struct nouveau_fence *fence;
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_screen.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -136,6 +136,8 @@
       return 0;
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
+   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
+      return 2047;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 256;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -152,6 +154,10 @@
       return (class_3d >= NVA3_3D_CLASS) ? 4 : 0;
    case PIPE_CAP_MAX_WINDOW_RECTANGLES:
       return NV50_MAX_WINDOW_RECTANGLES;
+   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
+      return 16 * 1024 * 1024;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 15;
 
    /* supported caps */
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -211,6 +217,7 @@
    case PIPE_CAP_TGSI_CLOCK:
    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -301,10 +308,14 @@
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
    case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
    case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
-   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
    case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+   case PIPE_CAP_TGSI_ATOMFADD:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
+   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
+   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_state.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_state.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -967,7 +967,7 @@
 
    util_copy_framebuffer_state(&nv50->framebuffer, fb);
 
-   nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+   nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_TEXTURES;
 }
 
 static void
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_tex.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_tex.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_tex.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -380,6 +380,16 @@
    }
    nv50->state.num_samplers[s] = nv50->num_samplers[s];
 
+   // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
+   // ensure that it remains bound. Its contents don't matter, all samplers we
+   // ever create have the SRGB_CONVERSION bit set, so as long as the first
+   // entry is initialized, we're good to go. This is the only bit that has
+   // any effect on what TXF does.
+   if (!nv50->samplers[s][0]) {
+      BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+      PUSH_DATA (push, 1);
+   }
+
    return need_flush;
 }
 
@@ -451,3 +461,14 @@
    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), ARRAY_SIZE(msaa_sample_xy_offsets));
    PUSH_DATAp(push, msaa_sample_xy_offsets, ARRAY_SIZE(msaa_sample_xy_offsets));
 }
+
+void nv50_upload_tsc0(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
+   nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
+                       65536 /* + tsc->id * 32 */,
+                       NOUVEAU_BO_VRAM, 32, data);
+   BEGIN_NV04(push, NV50_3D(TSC_FLUSH), 1);
+   PUSH_DATA (push, 0);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -423,6 +423,7 @@
 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_program *cp = nvc0->compprog;
    int ret;
@@ -463,12 +464,14 @@
    PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
    PUSH_DATA (push, info->block[2]);
 
+   nouveau_pushbuf_space(push, 32, 2, 1);
+   PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
+
    if (unlikely(info->indirect)) {
       struct nv04_resource *res = nv04_resource(info->indirect);
       uint32_t offset = res->offset + info->indirect_offset;
       unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
 
-      nouveau_pushbuf_space(push, 16, 0, 1);
       PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
       PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
       nouveau_pushbuf_data(push, res->bo, offset,
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.c	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -449,11 +449,9 @@
 
    flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD;
 
-   BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, flags, screen->text);
    BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo);
    BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc);
    if (screen->compute) {
-      BCTX_REFN_bo(nvc0->bufctx_cp, CP_TEXT, flags, screen->text);
       BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo);
       BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
    }
@@ -478,6 +476,24 @@
 
    util_dynarray_init(&nvc0->global_residents, NULL);
 
+   // Make sure that the first TSC entry has SRGB conversion bit set, since we
+   // use it as a fallback on Fermi for TXF, and on Kepler+ generations for
+   // FBFETCH handling (which also uses TXF).
+   //
+   // NOTE: Preliminary testing suggests that this isn't necessary at all at
+   // least on GM20x (untested on Kepler). However this is ~free, so no reason
+   // not to do it.
+   if (!screen->tsc.entries[0])
+      nvc0_upload_tsc0(nvc0);
+
+   // On Fermi, mark samplers dirty so that the proper binding can happen
+   if (screen->base.class_3d < NVE4_3D_CLASS) {
+      for (int s = 0; s < 6; s++)
+         nvc0->samplers_dirty[s] = 1;
+      nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
+      nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
+   }
+
    return pipe;
 
 out_err:
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -365,6 +365,7 @@
 void nvc0_validate_suf(struct nvc0_context *nvc0, int s);
 void nvc0_validate_textures(struct nvc0_context *);
 void nvc0_validate_samplers(struct nvc0_context *);
+void nvc0_upload_tsc0(struct nvc0_context *);
 void nve4_set_tex_handles(struct nvc0_context *);
 void nvc0_validate_surfaces(struct nvc0_context *);
 void nve4_set_surface_info(struct nouveau_pushbuf *,
@@ -433,6 +434,7 @@
 
 /* nvc0_push.c */
 void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+void nvc0_push_vbo_indirect(struct nvc0_context *, const struct pipe_draw_info *);
 
 /* nve4_compute.c */
 void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_program.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_program.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -831,16 +831,6 @@
             NOUVEAU_ERR("Error allocating TEXT area: %d\n", ret);
             return false;
          }
-         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT);
-         BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT,
-                      NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD,
-                      screen->text);
-         if (screen->compute) {
-            nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEXT);
-            BCTX_REFN_bo(nvc0->bufctx_cp, CP_TEXT,
-                         NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD,
-                         screen->text);
-         }
 
          /* Re-upload the builtin function into the new code segment. */
          nvc0_program_library_upload(nvc0);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -121,12 +121,10 @@
       case PIPE_QUERY_OCCLUSION_COUNTER:
       case PIPE_QUERY_OCCLUSION_PREDICATE:
       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+         if (hq->state == NVC0_HW_QUERY_STATE_READY)
+            wait = true;
          if (likely(!condition)) {
-            if (unlikely(hq->nesting))
-               cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
-                             NVC0_3D_COND_MODE_ALWAYS;
-            else
-               cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+            cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
          } else {
             cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
          }
@@ -151,7 +149,7 @@
       return;
    }
 
-   if (wait)
+   if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
       nvc0_hw_query_fifo_wait(nvc0, q);
 
    PUSH_SPACE(push, 10);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,11 +28,6 @@
 #include "nvc0/nvc0_query_hw_metric.h"
 #include "nvc0/nvc0_query_hw_sm.h"
 
-#define NVC0_HW_QUERY_STATE_READY   0
-#define NVC0_HW_QUERY_STATE_ACTIVE  1
-#define NVC0_HW_QUERY_STATE_ENDED   2
-#define NVC0_HW_QUERY_STATE_FLUSHED 3
-
 #define NVC0_HW_QUERY_ALLOC_SPACE 256
 
 bool
@@ -158,14 +153,18 @@
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
-      hq->nesting = nvc0->screen->num_occlusion_queries_active++;
-      if (hq->nesting) {
+      if (nvc0->screen->num_occlusion_queries_active++) {
          nvc0_hw_query_get(push, q, 0x10, 0x0100f002);
       } else {
          PUSH_SPACE(push, 3);
          BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
          PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
          IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+         /* Given that the counter is reset, the contents at 0x10 are
+          * equivalent to doing the query -- we would get hq->sequence as the
+          * payload and 0 as the reported value. This is already set up above
+          * as in the hq->rotate case.
+          */
       }
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -199,6 +198,7 @@
       nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
       nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
       nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      ((uint64_t *)hq->data)[(12 + 10) * 2] = 0;
       break;
    default:
       break;
@@ -271,6 +271,7 @@
       nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
       nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
       nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+      ((uint64_t *)hq->data)[10 * 2] = 0;
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
       /* This query is not issued on GPU because disjoint is forced to false */
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h	2019-03-31 23:16:37.000000000 +0000
@@ -6,6 +6,11 @@
 
 #include "nvc0_query.h"
 
+#define NVC0_HW_QUERY_STATE_READY   0
+#define NVC0_HW_QUERY_STATE_ACTIVE  1
+#define NVC0_HW_QUERY_STATE_ENDED   2
+#define NVC0_HW_QUERY_STATE_FLUSHED 3
+
 #define NVC0_HW_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
 
 struct nvc0_hw_query;
@@ -29,7 +34,6 @@
    uint8_t state;
    boolean is64bit;
    uint8_t rotate;
-   int nesting; /* only used for occlusion queries */
    struct nouveau_mm_allocation *mm;
    struct nouveau_fence *fence;
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -154,6 +154,8 @@
       return 1 << 27;
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
+   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
+      return 2047;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 256;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
@@ -178,6 +180,15 @@
       return NVC0_MAX_WINDOW_RECTANGLES;
    case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
       return class_3d >= GM200_3D_CLASS ? 8 : 0;
+   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
+      return 64 * 1024 * 1024;
+   case PIPE_CAP_MAX_VARYINGS:
+      /* NOTE: These only count our slots for GENERIC varyings.
+       * The address space may be larger, but the actual hard limit seems to be
+       * less than what the address space layout permits, so don't add TEXCOORD,
+       * COLOR, etc. here.
+       */
+      return 0x1f0 / 16;
 
    /* supported caps */
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -262,6 +273,7 @@
    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
    case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
       return 1;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
@@ -271,6 +283,8 @@
    case PIPE_CAP_TGSI_BALLOT:
    case PIPE_CAP_BINDLESS_TEXTURE:
       return class_3d >= NVE4_3D_CLASS;
+   case PIPE_CAP_TGSI_ATOMFADD:
+      return class_3d < GM107_3D_CLASS; /* needs additional lowering */
    case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
@@ -324,10 +338,13 @@
    case PIPE_CAP_CONSTBUF0_FLAGS:
    case PIPE_CAP_PACKED_UNIFORMS:
    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
-   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
    case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
+   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
+   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -384,18 +401,6 @@
    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
       return 16;
    case PIPE_SHADER_CAP_MAX_INPUTS:
-      if (shader == PIPE_SHADER_VERTEX)
-         return 32;
-      /* NOTE: These only count our slots for GENERIC varyings.
-       * The address space may be larger, but the actual hard limit seems to be
-       * less than what the address space layout permits, so don't add TEXCOORD,
-       * COLOR, etc. here.
-       */
-      if (shader == PIPE_SHADER_FRAGMENT)
-         return 0x1f0 / 16;
-      /* Actually this counts CLIPVERTEX, which occupies the last generic slot,
-       * and excludes 0x60 per-patch inputs.
-       */
       return 0x200 / 16;
    case PIPE_SHADER_CAP_MAX_OUTPUTS:
       return 32;
@@ -625,7 +630,6 @@
    nouveau_heap_destroy(&screen->lib_code);
    nouveau_heap_destroy(&screen->text_heap);
 
-   FREE(screen->default_tsc);
    FREE(screen->tic.entries);
 
    nouveau_object_del(&screen->eng3d);
@@ -1279,8 +1283,8 @@
    for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
       BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3);
       PUSH_DATA (push, 1);
-      PUSH_DATA (push, 8192 << 16);
-      PUSH_DATA (push, 8192 << 16);
+      PUSH_DATA (push, 16384 << 16);
+      PUSH_DATA (push, 16384 << 16);
    }
 
 #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
@@ -1384,9 +1388,6 @@
    if (!nvc0_blitter_create(screen))
       goto fail;
 
-   screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
-   screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
-
    nouveau_fence_new(&screen->base, &screen->base.fence.current);
 
    return &screen->base;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -89,8 +89,6 @@
 
    struct nvc0_blitter *blitter;
 
-   struct nv50_tsc_entry *default_tsc;
-
    struct {
       void **entries;
       int next;
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -852,7 +852,9 @@
 
     util_copy_framebuffer_state(&nvc0->framebuffer, fb);
 
-    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS;
+    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS |
+       NVC0_NEW_3D_TEXTURES;
+    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
 }
 
 static void
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,4 @@
-
+#include "util/u_format.h"
 #include "util/u_framebuffer.h"
 #include "util/u_math.h"
 #include "util/u_viewport.h"
@@ -831,20 +831,6 @@
       pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
    nvc0->fbtexture = new_view;
 
-   if (screen->default_tsc->id < 0) {
-      struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
-      tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
-      nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
-                           NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
-      screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
-
-      IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
-      if (screen->base.class_3d < NVE4_3D_CLASS) {
-         BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
-         PUSH_DATA (push, (tsc->id << 12) | 1);
-      }
-   }
-
    if (new_view) {
       struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
       assert(tic->id < 0);
@@ -860,7 +846,7 @@
          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
          PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
-         PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
+         PUSH_DATA (push, (0 << 20) | tic->id);
       } else {
          BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
          PUSH_DATA (push, (tic->id << 9) | 1);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c	2018-10-21 19:21:32.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c	2019-03-31 23:16:37.000000000 +0000
@@ -1178,6 +1178,7 @@
                                        nvc0->cond_cond, nvc0->cond_mode);
 
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
+   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0));
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1));
@@ -1200,6 +1201,7 @@
 static void
 nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
 {
+   struct nvc0_screen *screen = nvc0->screen;
    struct nvc0_blitctx *blit = nvc0->blit;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct pipe_resource *src = info->src.resource;
@@ -1301,6 +1303,8 @@
 
    BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP,
                 NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo);
+   BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT,
+                NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD, screen->text);
    nouveau_pushbuf_validate(push);
 
    BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4);
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c	2019-03-31 23:16:37.000000000 +0000
@@ -657,6 +657,19 @@
 
    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
 
+   // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
+   // ensure that it remains bound. Its contents don't matter, all samplers we
+   // ever create have the SRGB_CONVERSION bit set, so as long as the first
+   // entry is initialized, we're good to go. This is the only bit that has
+   // any effect on what TXF does.
+   if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) {
+      if (n == 0)
+         n = 1;
+      // We're guaranteed that the first command refers to the first slot, so
+      // we're not overwriting a valid entry.
+      commands[0] = (0 << 12) | (0 << 4) | 1;
+   }
+
    if (n) {
       if (unlikely(s == 5))
          BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
@@ -728,6 +741,18 @@
    nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
 }
 
+void
+nvc0_upload_tsc0(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
+   nvc0->base.push_data(&nvc0->base, nvc0->screen->txc,
+                        65536 /*+ tsc->id * 32*/,
+                        NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data);
+   BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1);
+   PUSH_DATA (push, 0);
+}
+
 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
  * At some point we might want to get a list of the combinations used by a
  * shader and fill in those entries instead of having it extract the handles.
@@ -1026,21 +1051,13 @@
    } else {
       struct nv50_miptree *mt = nv50_miptree(&res->base);
       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
-      const unsigned z = view->u.tex.first_layer;
+      unsigned z = view->u.tex.first_layer;
 
-      if (z) {
-         if (mt->layout_3d) {
-            address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
-            /* doesn't work if z passes z-tile boundary */
-            if (depth > 1) {
-               pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
-                                  "3D images are not really supported!");
-               debug_printf("3D images are not really supported!\n");
-            }
-         } else {
-            address += mt->layer_stride * z;
-         }
+      if (!mt->layout_3d) {
+         address += mt->layer_stride * z;
+         z = 0;
       }
+
       address += lvl->offset;
 
       info[0]  = address >> 8;
@@ -1055,7 +1072,8 @@
       info[6]  = depth - 1;
       info[6] |= (lvl->tile_mode & 0xf00) << 21;
       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
-      info[7]  = 0;
+      info[7]  = mt->layout_3d ? 1 : 0;
+      info[7] |= z << 16;
       info[14] = mt->ms_x;
       info[15] = mt->ms_y;
    }
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c	2019-03-31 23:16:37.000000000 +0000
@@ -919,6 +919,7 @@
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_screen *screen = nvc0->screen;
+   unsigned vram_domain = NV_VRAM_DOMAIN(&screen->base);
    int s;
 
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -982,6 +983,9 @@
                         resident->flags);
    }
 
+   BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, vram_domain | NOUVEAU_BO_RD,
+                screen->text);
+
    nvc0_state_validate_3d(nvc0, ~0);
 
    if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
@@ -1036,7 +1040,10 @@
    }
 
    if (nvc0->state.vbo_mode) {
-      nvc0_push_vbo(nvc0, info);
+      if (info->indirect)
+         nvc0_push_vbo_indirect(nvc0, info);
+      else
+         nvc0_push_vbo(nvc0, info);
       goto cleanup;
    }
 
@@ -1092,6 +1099,7 @@
 
    nouveau_pushbuf_bufctx(push, NULL);
 
+   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c	2019-03-31 23:16:37.000000000 +0000
@@ -466,6 +466,83 @@
    }
 }
 
+typedef struct {
+   uint32_t count;
+   uint32_t primCount;
+   uint32_t first;
+   uint32_t baseInstance;
+} DrawArraysIndirectCommand;
+
+typedef struct {
+   uint32_t count;
+   uint32_t primCount;
+   uint32_t firstIndex;
+   int32_t  baseVertex;
+   uint32_t baseInstance;
+} DrawElementsIndirectCommand;
+
+void
+nvc0_push_vbo_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+   /* The strategy here is to just read the commands from the indirect buffer
+    * and do the draws. This is suboptimal, but will only happen in the case
+    * that conversion is required for FIXED or DOUBLE inputs.
+    */
+   struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nv04_resource *buf = nv04_resource(info->indirect->buffer);
+   struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count);
+   unsigned i;
+
+   unsigned draw_count = info->indirect->draw_count;
+   if (buf_count) {
+      uint32_t *count = nouveau_resource_map_offset(
+            &nvc0->base, buf_count, info->indirect->indirect_draw_count_offset,
+            NOUVEAU_BO_RD);
+      draw_count = *count;
+   }
+
+   uint8_t *buf_data = nouveau_resource_map_offset(
+            &nvc0->base, buf, info->indirect->offset, NOUVEAU_BO_RD);
+   struct pipe_draw_info single = *info;
+   single.indirect = NULL;
+   for (i = 0; i < draw_count; i++, buf_data += info->indirect->stride) {
+      if (info->index_size) {
+         DrawElementsIndirectCommand *cmd = (void *)buf_data;
+         single.start = info->start + cmd->firstIndex;
+         single.count = cmd->count;
+         single.start_instance = cmd->baseInstance;
+         single.instance_count = cmd->primCount;
+         single.index_bias = cmd->baseVertex;
+      } else {
+         DrawArraysIndirectCommand *cmd = (void *)buf_data;
+         single.start = cmd->first;
+         single.count = cmd->count;
+         single.start_instance = cmd->baseInstance;
+         single.instance_count = cmd->primCount;
+      }
+
+      if (nvc0->vertprog->vp.need_draw_parameters) {
+         PUSH_SPACE(push, 9);
+         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+         PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+         PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
+         PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
+         PUSH_DATA (push, single.index_bias);
+         PUSH_DATA (push, single.start_instance);
+         PUSH_DATA (push, single.drawid + i);
+      }
+
+      nvc0_push_vbo(nvc0, &single);
+   }
+
+   nouveau_resource_unmap(buf);
+   if (buf_count)
+      nouveau_resource_unmap(buf_count);
+}
+
 void
 nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
 {
diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nve4_compute.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
--- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nve4_compute.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nve4_compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -696,6 +696,7 @@
 nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    void *desc;
    uint64_t desc_gpuaddr;
@@ -769,6 +770,8 @@
    }
 
    /* upload descriptor and flush */
+   nouveau_pushbuf_space(push, 32, 1, 0);
+   PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
    BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1);
    PUSH_DATA (push, desc_gpuaddr >> 8);
    BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1);
diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Android.mk mesa-19.0.1/src/gallium/drivers/pl111/Android.mk
--- mesa-18.3.3/src/gallium/drivers/pl111/Android.mk	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/pl111/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,39 +0,0 @@
-# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-LOCAL_PATH := $(call my-dir)
-
-# get C_SOURCES
-include $(LOCAL_PATH)/Makefile.sources
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-	$(C_SOURCES)
-
-LOCAL_MODULE := libmesa_pipe_pl111
-
-include $(GALLIUM_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-ifneq ($(HAVE_GALLIUM_PL111),)
-GALLIUM_TARGET_DRIVERS += pl111
-$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_pl111)
-endif
diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Automake.inc mesa-19.0.1/src/gallium/drivers/pl111/Automake.inc
--- mesa-18.3.3/src/gallium/drivers/pl111/Automake.inc	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/pl111/Automake.inc	1970-01-01 00:00:00.000000000 +0000
@@ -1,9 +0,0 @@
-if HAVE_GALLIUM_PL111
-
-TARGET_DRIVERS += pl111
-TARGET_CPPFLAGS += -DGALLIUM_PL111
-TARGET_LIB_DEPS += \
-    $(top_builddir)/src/gallium/winsys/pl111/drm/libpl111drm.la \
-    $(LIBDRM_LIBS)
-
-endif
diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Makefile.am mesa-19.0.1/src/gallium/drivers/pl111/Makefile.am
--- mesa-18.3.3/src/gallium/drivers/pl111/Makefile.am	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/pl111/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,8 +0,0 @@
-include $(top_srcdir)/src/gallium/Automake.inc
-
-AM_CPPFLAGS = \
-	$(GALLIUM_CFLAGS)
-
-noinst_LTLIBRARIES = libpl111.la
-
-libpl111_la_SOURCES = $(C_SOURCES)
diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Makefile.sources mesa-19.0.1/src/gallium/drivers/pl111/Makefile.sources
--- mesa-18.3.3/src/gallium/drivers/pl111/Makefile.sources	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/pl111/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
@@ -1,2 +0,0 @@
-C_SOURCES :=
-
diff -Nru mesa-18.3.3/src/gallium/drivers/r300/meson.build mesa-19.0.1/src/gallium/drivers/r300/meson.build
--- mesa-18.3.3/src/gallium/drivers/r300/meson.build	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r300/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -158,6 +158,7 @@
       ],
       link_with : [libr300, libgallium, libmesa_util],
       dependencies : [dep_m, dep_clock, dep_dl, dep_thread, dep_unwind],
-    )
+    ),
+    suite : ['r300'],
   )
 endif
diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_context.c mesa-19.0.1/src/gallium/drivers/r300/r300_context.c
--- mesa-18.3.3/src/gallium/drivers/r300/r300_context.c	2018-07-14 15:13:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r300/r300_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -393,7 +393,7 @@
     if (!r300->ctx)
         goto fail;
 
-    r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300);
+    r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, false);
     if (r300->cs == NULL)
         goto fail;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_screen.c mesa-19.0.1/src/gallium/drivers/r300/r300_screen.c
--- mesa-18.3.3/src/gallium/drivers/r300/r300_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r300/r300_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -304,6 +304,9 @@
         case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
             return 2048;
 
+        case PIPE_CAP_MAX_VARYINGS:
+            return 10;
+
         case PIPE_CAP_VENDOR_ID:
                 return 0x1002;
         case PIPE_CAP_DEVICE_ID:
diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_texture.c mesa-19.0.1/src/gallium/drivers/r300/r300_texture.c
--- mesa-18.3.3/src/gallium/drivers/r300/r300_texture.c	2018-01-17 14:10:45.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r300/r300_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -1182,7 +1182,7 @@
         return NULL;
     }
 
-    buffer = rws->buffer_from_handle(rws, whandle, &stride, NULL);
+    buffer = rws->buffer_from_handle(rws, whandle, 0, &stride, NULL);
     if (!buffer)
         return NULL;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/evergreen_compute.c mesa-19.0.1/src/gallium/drivers/r600/evergreen_compute.c
--- mesa-18.3.3/src/gallium/drivers/r600/evergreen_compute.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/evergreen_compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -438,7 +438,9 @@
 	/* Upload code + ROdata */
 	shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
 							shader->bc.ndw * 4);
-	p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
+	p = r600_buffer_map_sync_with_rings(
+		&rctx->b, shader->code_bo,
+		PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 	//TODO: use util_memcpy_cpu_to_le32 ?
 	memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
 	rctx->b.ws->buffer_unmap(shader->code_bo->buf);
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_asm.c mesa-19.0.1/src/gallium/drivers/r600/r600_asm.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_asm.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_asm.c	2019-03-31 23:16:37.000000000 +0000
@@ -2772,7 +2772,9 @@
 		return NULL;
 	}
 
-	bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+	bytecode = r600_buffer_map_sync_with_rings
+		(&rctx->b, shader->buffer,
+		PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
 	bytecode += shader->offset / 4;
 
 	if (R600_BIG_ENDIAN) {
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_pipe.c mesa-19.0.1/src/gallium/drivers/r600/r600_pipe.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_pipe.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_pipe.c	2019-03-31 23:16:37.000000000 +0000
@@ -212,7 +212,7 @@
 	}
 
 	rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
-				       r600_context_gfx_flush, rctx);
+				       r600_context_gfx_flush, rctx, false);
 	rctx->b.gfx.flush = r600_context_gfx_flush;
 
 	rctx->allocator_fetch_shader =
@@ -536,6 +536,9 @@
 	case PIPE_CAP_MAX_TEXEL_OFFSET:
 		return 7;
 
+	case PIPE_CAP_MAX_VARYINGS:
+		return 32;
+
 	case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
 		return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
 	case PIPE_CAP_ENDIANNESS:
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_pipe_common.c mesa-19.0.1/src/gallium/drivers/r600/r600_pipe_common.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_pipe_common.c	2018-11-05 12:21:01.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_pipe_common.c	2019-03-31 23:16:37.000000000 +0000
@@ -715,7 +715,7 @@
 	if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
 		rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
 						   r600_flush_dma_ring,
-						   rctx);
+						   rctx, false);
 		rctx->dma.flush = r600_flush_dma_ring;
 	}
 
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_shader.c mesa-19.0.1/src/gallium/drivers/r600/r600_shader.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_shader.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_shader.c	2019-03-31 23:16:37.000000000 +0000
@@ -141,7 +141,9 @@
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+		ptr = r600_buffer_map_sync_with_rings(
+			&rctx->b, shader->bo,
+			PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < shader->shader.bc.ndw; ++i) {
 				ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_state_common.c mesa-19.0.1/src/gallium/drivers/r600/r600_state_common.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_state_common.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_state_common.c	2019-03-31 23:16:37.000000000 +0000
@@ -1020,7 +1020,9 @@
 
 	rctx->vs_shader = (struct r600_pipe_shader_selector *)state;
 	r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx));
-	rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
+
+        if (rctx->vs_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
 }
 
 static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
@@ -1035,7 +1037,9 @@
 
 	if (!state)
 		return;
-	rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride;
+
+        if (rctx->gs_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride;
 }
 
 static void r600_bind_tcs_state(struct pipe_context *ctx, void *state)
@@ -1057,7 +1061,9 @@
 
 	if (!state)
 		return;
-	rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride;
+
+        if (rctx->tes_shader->so.num_outputs)
+           rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride;
 }
 
 void r600_delete_shader_selector(struct pipe_context *ctx,
@@ -2917,6 +2923,7 @@
 			switch (desc->nr_channels) {
 			case 1:
 				result = FMT_8;
+				is_srgb_valid = TRUE;
 				goto out_word4;
 			case 2:
 				result = FMT_8_8;
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_texture.c mesa-19.0.1/src/gallium/drivers/r600/r600_texture.c
--- mesa-18.3.3/src/gallium/drivers/r600/r600_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/r600_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -1108,7 +1108,9 @@
 	      templ->depth0 != 1 || templ->last_level != 0)
 		return NULL;
 
-	buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
+	buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+					      rscreen->info.max_alignment,
+					      &stride, &offset);
 	if (!buf)
 		return NULL;
 
@@ -1852,6 +1854,7 @@
 		return NULL;
 
 	buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+					      rscreen->info.max_alignment,
 					      &stride, &offset);
 	if (!buf) {
 		free(memobj);
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_uvd.c mesa-19.0.1/src/gallium/drivers/r600/radeon_uvd.c
--- mesa-18.3.3/src/gallium/drivers/r600/radeon_uvd.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/radeon_uvd.c	2019-03-31 23:16:37.000000000 +0000
@@ -152,7 +152,8 @@
 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
 	/* and map it for CPU access */
-	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+                                  PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
 	/* calc buffer offsets */
 	dec->msg = (struct ruvd_msg *)ptr;
@@ -1068,7 +1069,7 @@
 	dec->bs_size = 0;
 	dec->bs_ptr = dec->ws->buffer_map(
 		dec->bs_buffers[dec->cur_buffer].res->buf,
-		dec->cs, PIPE_TRANSFER_WRITE);
+		dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1121,7 +1122,8 @@
 			}
 
 			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-							  PIPE_TRANSFER_WRITE);
+							  PIPE_TRANSFER_WRITE |
+							  RADEON_TRANSFER_TEMPORARY);
 			if (!dec->bs_ptr)
 				return;
 
@@ -1332,7 +1334,7 @@
 	dec->stream_handle = rvid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
+	dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, false);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_vce.c mesa-19.0.1/src/gallium/drivers/r600/radeon_vce.c
--- mesa-18.3.3/src/gallium/drivers/r600/radeon_vce.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/radeon_vce.c	2019-03-31 23:16:37.000000000 +0000
@@ -353,7 +353,9 @@
 	struct rvid_buffer *fb = feedback;
 
 	if (size) {
-		uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+		uint32_t *ptr = enc->ws->buffer_map(
+			fb->res->buf, enc->cs,
+			PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
 		if (ptr[1]) {
 			*size = ptr[4] - ptr[9];
@@ -428,7 +430,7 @@
 
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
+	enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, false);
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_video.c mesa-19.0.1/src/gallium/drivers/r600/radeon_video.c
--- mesa-18.3.3/src/gallium/drivers/r600/radeon_video.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/radeon_video.c	2019-03-31 23:16:37.000000000 +0000
@@ -97,11 +97,13 @@
 	if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
 		goto error;
 
-	src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+	src = ws->buffer_map(old_buf.res->buf, cs,
+			     PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
 	if (!src)
 		goto error;
 
-	dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+	dst = ws->buffer_map(new_buf->res->buf, cs,
+			     PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 	if (!dst)
 		goto error;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/r600/sb/sb_ir.h mesa-19.0.1/src/gallium/drivers/r600/sb/sb_ir.h
--- mesa-18.3.3/src/gallium/drivers/r600/sb/sb_ir.h	2018-02-16 12:24:09.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/r600/sb/sb_ir.h	2019-03-31 23:16:37.000000000 +0000
@@ -1012,7 +1012,7 @@
 
 class alu_node : public node {
 protected:
-	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
+	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }
 public:
 	bc_alu bc;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/r600_perfcounter.c mesa-19.0.1/src/gallium/drivers/radeon/r600_perfcounter.c
--- mesa-18.3.3/src/gallium/drivers/radeon/r600_perfcounter.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/r600_perfcounter.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,639 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "util/u_memory.h"
-#include "radeonsi/si_query.h"
-#include "radeonsi/si_pipe.h"
-#include "amd/common/sid.h"
-
-/* Max counters per HW block */
-#define SI_QUERY_MAX_COUNTERS 16
-
-static struct si_perfcounter_block *
-lookup_counter(struct si_perfcounters *pc, unsigned index,
-	       unsigned *base_gid, unsigned *sub_index)
-{
-	struct si_perfcounter_block *block = pc->blocks;
-	unsigned bid;
-
-	*base_gid = 0;
-	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
-		unsigned total = block->num_groups * block->num_selectors;
-
-		if (index < total) {
-			*sub_index = index;
-			return block;
-		}
-
-		index -= total;
-		*base_gid += block->num_groups;
-	}
-
-	return NULL;
-}
-
-static struct si_perfcounter_block *
-lookup_group(struct si_perfcounters *pc, unsigned *index)
-{
-	unsigned bid;
-	struct si_perfcounter_block *block = pc->blocks;
-
-	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
-		if (*index < block->num_groups)
-			return block;
-		*index -= block->num_groups;
-	}
-
-	return NULL;
-}
-
-struct si_pc_group {
-	struct si_pc_group *next;
-	struct si_perfcounter_block *block;
-	unsigned sub_gid; /* only used during init */
-	unsigned result_base; /* only used during init */
-	int se;
-	int instance;
-	unsigned num_counters;
-	unsigned selectors[SI_QUERY_MAX_COUNTERS];
-};
-
-struct si_pc_counter {
-	unsigned base;
-	unsigned qwords;
-	unsigned stride; /* in uint64s */
-};
-
-#define SI_PC_SHADERS_WINDOWING (1 << 31)
-
-struct si_query_pc {
-	struct si_query_hw b;
-
-	unsigned shaders;
-	unsigned num_counters;
-	struct si_pc_counter *counters;
-	struct si_pc_group *groups;
-};
-
-static void si_pc_query_destroy(struct si_screen *sscreen,
-				struct si_query *rquery)
-{
-	struct si_query_pc *query = (struct si_query_pc *)rquery;
-
-	while (query->groups) {
-		struct si_pc_group *group = query->groups;
-		query->groups = group->next;
-		FREE(group);
-	}
-
-	FREE(query->counters);
-
-	si_query_hw_destroy(sscreen, rquery);
-}
-
-static bool si_pc_query_prepare_buffer(struct si_screen *screen,
-				       struct si_query_hw *hwquery,
-				       struct r600_resource *buffer)
-{
-	/* no-op */
-	return true;
-}
-
-static void si_pc_query_emit_start(struct si_context *sctx,
-				   struct si_query_hw *hwquery,
-				   struct r600_resource *buffer, uint64_t va)
-{
-	struct si_perfcounters *pc = sctx->screen->perfcounters;
-	struct si_query_pc *query = (struct si_query_pc *)hwquery;
-	struct si_pc_group *group;
-	int current_se = -1;
-	int current_instance = -1;
-
-	if (query->shaders)
-		pc->emit_shaders(sctx, query->shaders);
-
-	for (group = query->groups; group; group = group->next) {
-		struct si_perfcounter_block *block = group->block;
-
-		if (group->se != current_se || group->instance != current_instance) {
-			current_se = group->se;
-			current_instance = group->instance;
-			pc->emit_instance(sctx, group->se, group->instance);
-		}
-
-		pc->emit_select(sctx, block, group->num_counters, group->selectors);
-	}
-
-	if (current_se != -1 || current_instance != -1)
-		pc->emit_instance(sctx, -1, -1);
-
-	pc->emit_start(sctx, buffer, va);
-}
-
-static void si_pc_query_emit_stop(struct si_context *sctx,
-				  struct si_query_hw *hwquery,
-				  struct r600_resource *buffer, uint64_t va)
-{
-	struct si_perfcounters *pc = sctx->screen->perfcounters;
-	struct si_query_pc *query = (struct si_query_pc *)hwquery;
-	struct si_pc_group *group;
-
-	pc->emit_stop(sctx, buffer, va);
-
-	for (group = query->groups; group; group = group->next) {
-		struct si_perfcounter_block *block = group->block;
-		unsigned se = group->se >= 0 ? group->se : 0;
-		unsigned se_end = se + 1;
-
-		if ((block->flags & SI_PC_BLOCK_SE) && (group->se < 0))
-			se_end = sctx->screen->info.max_se;
-
-		do {
-			unsigned instance = group->instance >= 0 ? group->instance : 0;
-
-			do {
-				pc->emit_instance(sctx, se, instance);
-				pc->emit_read(sctx, block,
-					      group->num_counters, group->selectors,
-					      buffer, va);
-				va += sizeof(uint64_t) * group->num_counters;
-			} while (group->instance < 0 && ++instance < block->num_instances);
-		} while (++se < se_end);
-	}
-
-	pc->emit_instance(sctx, -1, -1);
-}
-
-static void si_pc_query_clear_result(struct si_query_hw *hwquery,
-				     union pipe_query_result *result)
-{
-	struct si_query_pc *query = (struct si_query_pc *)hwquery;
-
-	memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
-}
-
-static void si_pc_query_add_result(struct si_screen *sscreen,
-				   struct si_query_hw *hwquery,
-				   void *buffer,
-				   union pipe_query_result *result)
-{
-	struct si_query_pc *query = (struct si_query_pc *)hwquery;
-	uint64_t *results = buffer;
-	unsigned i, j;
-
-	for (i = 0; i < query->num_counters; ++i) {
-		struct si_pc_counter *counter = &query->counters[i];
-
-		for (j = 0; j < counter->qwords; ++j) {
-			uint32_t value = results[counter->base + j * counter->stride];
-			result->batch[i].u64 += value;
-		}
-	}
-}
-
-static struct si_query_ops batch_query_ops = {
-	.destroy = si_pc_query_destroy,
-	.begin = si_query_hw_begin,
-	.end = si_query_hw_end,
-	.get_result = si_query_hw_get_result
-};
-
-static struct si_query_hw_ops batch_query_hw_ops = {
-	.prepare_buffer = si_pc_query_prepare_buffer,
-	.emit_start = si_pc_query_emit_start,
-	.emit_stop = si_pc_query_emit_stop,
-	.clear_result = si_pc_query_clear_result,
-	.add_result = si_pc_query_add_result,
-};
-
-static struct si_pc_group *get_group_state(struct si_screen *screen,
-					     struct si_query_pc *query,
-					     struct si_perfcounter_block *block,
-					     unsigned sub_gid)
-{
-	struct si_pc_group *group = query->groups;
-
-	while (group) {
-		if (group->block == block && group->sub_gid == sub_gid)
-			return group;
-		group = group->next;
-	}
-
-	group = CALLOC_STRUCT(si_pc_group);
-	if (!group)
-		return NULL;
-
-	group->block = block;
-	group->sub_gid = sub_gid;
-
-	if (block->flags & SI_PC_BLOCK_SHADER) {
-		unsigned sub_gids = block->num_instances;
-		unsigned shader_id;
-		unsigned shaders;
-		unsigned query_shaders;
-
-		if (block->flags & SI_PC_BLOCK_SE_GROUPS)
-			sub_gids = sub_gids * screen->info.max_se;
-		shader_id = sub_gid / sub_gids;
-		sub_gid = sub_gid % sub_gids;
-
-		shaders = screen->perfcounters->shader_type_bits[shader_id];
-
-		query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING;
-		if (query_shaders && query_shaders != shaders) {
-			fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
-			FREE(group);
-			return NULL;
-		}
-		query->shaders = shaders;
-	}
-
-	if (block->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
-		// A non-zero value in query->shaders ensures that the shader
-		// masking is reset unless the user explicitly requests one.
-		query->shaders = SI_PC_SHADERS_WINDOWING;
-	}
-
-	if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
-		group->se = sub_gid / block->num_instances;
-		sub_gid = sub_gid % block->num_instances;
-	} else {
-		group->se = -1;
-	}
-
-	if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
-		group->instance = sub_gid;
-	} else {
-		group->instance = -1;
-	}
-
-	group->next = query->groups;
-	query->groups = group;
-
-	return group;
-}
-
-struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
-					 unsigned num_queries,
-					 unsigned *query_types)
-{
-	struct si_screen *screen =
-		(struct si_screen *)ctx->screen;
-	struct si_perfcounters *pc = screen->perfcounters;
-	struct si_perfcounter_block *block;
-	struct si_pc_group *group;
-	struct si_query_pc *query;
-	unsigned base_gid, sub_gid, sub_index;
-	unsigned i, j;
-
-	if (!pc)
-		return NULL;
-
-	query = CALLOC_STRUCT(si_query_pc);
-	if (!query)
-		return NULL;
-
-	query->b.b.ops = &batch_query_ops;
-	query->b.ops = &batch_query_hw_ops;
-
-	query->num_counters = num_queries;
-
-	/* Collect selectors per group */
-	for (i = 0; i < num_queries; ++i) {
-		unsigned sub_gid;
-
-		if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
-			goto error;
-
-		block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
-				       &base_gid, &sub_index);
-		if (!block)
-			goto error;
-
-		sub_gid = sub_index / block->num_selectors;
-		sub_index = sub_index % block->num_selectors;
-
-		group = get_group_state(screen, query, block, sub_gid);
-		if (!group)
-			goto error;
-
-		if (group->num_counters >= block->num_counters) {
-			fprintf(stderr,
-				"perfcounter group %s: too many selected\n",
-				block->basename);
-			goto error;
-		}
-		group->selectors[group->num_counters] = sub_index;
-		++group->num_counters;
-	}
-
-	/* Compute result bases and CS size per group */
-	query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
-	query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
-
-	i = 0;
-	for (group = query->groups; group; group = group->next) {
-		struct si_perfcounter_block *block = group->block;
-		unsigned read_dw;
-		unsigned instances = 1;
-
-		if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
-			instances = screen->info.max_se;
-		if (group->instance < 0)
-			instances *= block->num_instances;
-
-		group->result_base = i;
-		query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
-		i += instances * group->num_counters;
-
-		read_dw = 6 * group->num_counters;
-		query->b.num_cs_dw_end += instances * read_dw;
-		query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
-	}
-
-	if (query->shaders) {
-		if (query->shaders == SI_PC_SHADERS_WINDOWING)
-			query->shaders = 0xffffffff;
-	}
-
-	/* Map user-supplied query array to result indices */
-	query->counters = CALLOC(num_queries, sizeof(*query->counters));
-	for (i = 0; i < num_queries; ++i) {
-		struct si_pc_counter *counter = &query->counters[i];
-		struct si_perfcounter_block *block;
-
-		block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
-				       &base_gid, &sub_index);
-
-		sub_gid = sub_index / block->num_selectors;
-		sub_index = sub_index % block->num_selectors;
-
-		group = get_group_state(screen, query, block, sub_gid);
-		assert(group != NULL);
-
-		for (j = 0; j < group->num_counters; ++j) {
-			if (group->selectors[j] == sub_index)
-				break;
-		}
-
-		counter->base = group->result_base + j;
-		counter->stride = group->num_counters;
-
-		counter->qwords = 1;
-		if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0)
-			counter->qwords = screen->info.max_se;
-		if (group->instance < 0)
-			counter->qwords *= block->num_instances;
-	}
-
-	if (!si_query_hw_init(screen, &query->b))
-		goto error;
-
-	return (struct pipe_query *)query;
-
-error:
-	si_pc_query_destroy(screen, &query->b.b);
-	return NULL;
-}
-
-static bool si_init_block_names(struct si_screen *screen,
-				struct si_perfcounter_block *block)
-{
-	unsigned i, j, k;
-	unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
-	unsigned namelen;
-	char *groupname;
-	char *p;
-
-	if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
-		groups_instance = block->num_instances;
-	if (block->flags & SI_PC_BLOCK_SE_GROUPS)
-		groups_se = screen->info.max_se;
-	if (block->flags & SI_PC_BLOCK_SHADER)
-		groups_shader = screen->perfcounters->num_shader_types;
-
-	namelen = strlen(block->basename);
-	block->group_name_stride = namelen + 1;
-	if (block->flags & SI_PC_BLOCK_SHADER)
-		block->group_name_stride += 3;
-	if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
-		assert(groups_se <= 10);
-		block->group_name_stride += 1;
-
-		if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
-			block->group_name_stride += 1;
-	}
-	if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
-		assert(groups_instance <= 100);
-		block->group_name_stride += 2;
-	}
-
-	block->group_names = MALLOC(block->num_groups * block->group_name_stride);
-	if (!block->group_names)
-		return false;
-
-	groupname = block->group_names;
-	for (i = 0; i < groups_shader; ++i) {
-		const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
-		unsigned shaderlen = strlen(shader_suffix);
-		for (j = 0; j < groups_se; ++j) {
-			for (k = 0; k < groups_instance; ++k) {
-				strcpy(groupname, block->basename);
-				p = groupname + namelen;
-
-				if (block->flags & SI_PC_BLOCK_SHADER) {
-					strcpy(p, shader_suffix);
-					p += shaderlen;
-				}
-
-				if (block->flags & SI_PC_BLOCK_SE_GROUPS) {
-					p += sprintf(p, "%d", j);
-					if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
-						*p++ = '_';
-				}
-
-				if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS)
-					p += sprintf(p, "%d", k);
-
-				groupname += block->group_name_stride;
-			}
-		}
-	}
-
-	assert(block->num_selectors <= 1000);
-	block->selector_name_stride = block->group_name_stride + 4;
-	block->selector_names = MALLOC(block->num_groups * block->num_selectors *
-				       block->selector_name_stride);
-	if (!block->selector_names)
-		return false;
-
-	groupname = block->group_names;
-	p = block->selector_names;
-	for (i = 0; i < block->num_groups; ++i) {
-		for (j = 0; j < block->num_selectors; ++j) {
-			sprintf(p, "%s_%03d", groupname, j);
-			p += block->selector_name_stride;
-		}
-		groupname += block->group_name_stride;
-	}
-
-	return true;
-}
-
-int si_get_perfcounter_info(struct si_screen *screen,
-			    unsigned index,
-			    struct pipe_driver_query_info *info)
-{
-	struct si_perfcounters *pc = screen->perfcounters;
-	struct si_perfcounter_block *block;
-	unsigned base_gid, sub;
-
-	if (!pc)
-		return 0;
-
-	if (!info) {
-		unsigned bid, num_queries = 0;
-
-		for (bid = 0; bid < pc->num_blocks; ++bid) {
-			num_queries += pc->blocks[bid].num_selectors *
-				       pc->blocks[bid].num_groups;
-		}
-
-		return num_queries;
-	}
-
-	block = lookup_counter(pc, index, &base_gid, &sub);
-	if (!block)
-		return 0;
-
-	if (!block->selector_names) {
-		if (!si_init_block_names(screen, block))
-			return 0;
-	}
-	info->name = block->selector_names + sub * block->selector_name_stride;
-	info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
-	info->max_value.u64 = 0;
-	info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
-	info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
-	info->group_id = base_gid + sub / block->num_selectors;
-	info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
-	if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
-		info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
-	return 1;
-}
-
-int si_get_perfcounter_group_info(struct si_screen *screen,
-				  unsigned index,
-				  struct pipe_driver_query_group_info *info)
-{
-	struct si_perfcounters *pc = screen->perfcounters;
-	struct si_perfcounter_block *block;
-
-	if (!pc)
-		return 0;
-
-	if (!info)
-		return pc->num_groups;
-
-	block = lookup_group(pc, &index);
-	if (!block)
-		return 0;
-
-	if (!block->group_names) {
-		if (!si_init_block_names(screen, block))
-			return 0;
-	}
-	info->name = block->group_names + index * block->group_name_stride;
-	info->num_queries = block->num_selectors;
-	info->max_active_queries = block->num_counters;
-	return 1;
-}
-
-void si_perfcounters_destroy(struct si_screen *sscreen)
-{
-	if (sscreen->perfcounters)
-		sscreen->perfcounters->cleanup(sscreen);
-}
-
-bool si_perfcounters_init(struct si_perfcounters *pc,
-			    unsigned num_blocks)
-{
-	pc->blocks = CALLOC(num_blocks, sizeof(struct si_perfcounter_block));
-	if (!pc->blocks)
-		return false;
-
-	pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
-	pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
-
-	return true;
-}
-
-void si_perfcounters_add_block(struct si_screen *sscreen,
-			       struct si_perfcounters *pc,
-			       const char *name, unsigned flags,
-			       unsigned counters, unsigned selectors,
-			       unsigned instances, void *data)
-{
-	struct si_perfcounter_block *block = &pc->blocks[pc->num_blocks];
-
-	assert(counters <= SI_QUERY_MAX_COUNTERS);
-
-	block->basename = name;
-	block->flags = flags;
-	block->num_counters = counters;
-	block->num_selectors = selectors;
-	block->num_instances = MAX2(instances, 1);
-	block->data = data;
-
-	if (pc->separate_se && (block->flags & SI_PC_BLOCK_SE))
-		block->flags |= SI_PC_BLOCK_SE_GROUPS;
-	if (pc->separate_instance && block->num_instances > 1)
-		block->flags |= SI_PC_BLOCK_INSTANCE_GROUPS;
-
-	if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) {
-		block->num_groups = block->num_instances;
-	} else {
-		block->num_groups = 1;
-	}
-
-	if (block->flags & SI_PC_BLOCK_SE_GROUPS)
-		block->num_groups *= sscreen->info.max_se;
-	if (block->flags & SI_PC_BLOCK_SHADER)
-		block->num_groups *= pc->num_shader_types;
-
-	++pc->num_blocks;
-	pc->num_groups += block->num_groups;
-}
-
-void si_perfcounters_do_destroy(struct si_perfcounters *pc)
-{
-	unsigned i;
-
-	for (i = 0; i < pc->num_blocks; ++i) {
-		FREE(pc->blocks[i].group_names);
-		FREE(pc->blocks[i].selector_names);
-	}
-	FREE(pc->blocks);
-	FREE(pc);
-}
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd.c	2019-03-31 23:16:37.000000000 +0000
@@ -148,7 +148,8 @@
 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
 	/* and map it for CPU access */
-	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+				  PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
 	/* calc buffer offsets */
 	dec->msg = (struct ruvd_msg *)ptr;
@@ -1015,7 +1016,7 @@
 	dec->bs_size = 0;
 	dec->bs_ptr = dec->ws->buffer_map(
 		dec->bs_buffers[dec->cur_buffer].res->buf,
-		dec->cs, PIPE_TRANSFER_WRITE);
+		dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1060,8 +1061,9 @@
 				return;
 			}
 
-			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-							  PIPE_TRANSFER_WRITE);
+			dec->bs_ptr = dec->ws->buffer_map(
+				buf->res->buf, dec->cs,
+				PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 			if (!dec->bs_ptr)
 				return;
 
@@ -1268,7 +1270,7 @@
 	dec->stream_handle = si_vid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL);
+	dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c	2019-03-31 23:16:37.000000000 +0000
@@ -835,10 +835,10 @@
 static void
 radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc)
 {
-   struct si_screen *rscreen = (struct si_screen *) enc->screen;
+   struct si_screen *sscreen = (struct si_screen *) enc->screen;
 
    enc->enc_pic.ctx_buf.swizzle_mode = 0;
-   if (rscreen->info.chip_class < GFX9) {
+   if (sscreen->info.chip_class < GFX9) {
       enc->enc_pic.ctx_buf.rec_luma_pitch =
          (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
       enc->enc_pic.ctx_buf.rec_chroma_pitch =
@@ -950,7 +950,7 @@
 static void
 radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc)
 {
-   struct si_screen *rscreen = (struct si_screen *) enc->screen;
+   struct si_screen *sscreen = (struct si_screen *) enc->screen;
    switch (enc->enc_pic.picture_type) {
    case PIPE_H265_ENC_PICTURE_TYPE_I:
    case PIPE_H265_ENC_PICTURE_TYPE_IDR:
@@ -970,7 +970,7 @@
    }
 
    enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size;
-   if (rscreen->info.chip_class < GFX9) {
+   if (sscreen->info.chip_class < GFX9) {
       enc->enc_pic.enc_params.input_pic_luma_pitch =
          (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe);
       enc->enc_pic.enc_params.input_pic_chroma_pitch =
@@ -998,7 +998,7 @@
    RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type);
    RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size);
 
-   if (rscreen->info.chip_class < GFX9) {
+   if (sscreen->info.chip_class < GFX9) {
       RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
                       enc->luma->u.legacy.level[0].offset);
       RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM,
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.c	2019-03-31 23:16:37.000000000 +0000
@@ -263,9 +263,9 @@
 
    if (NULL != size) {
       radeon_uvd_enc_feedback_t *fb_data =
-         (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf,
-                                                           enc->cs,
-                                                           PIPE_TRANSFER_READ_WRITE);
+         (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(
+               fb->res->buf, enc->cs,
+               PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
       if (!fb_data->status)
          *size = fb_data->bitstream_size;
@@ -314,7 +314,7 @@
    enc->screen = context->screen;
    enc->ws = ws;
    enc->cs =
-      ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+      ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false);
 
    if (!enc->cs) {
       RVID_ERR("Can't get command submission context.\n");
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.h
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.h	2019-03-31 23:16:37.000000000 +0000
@@ -464,6 +464,6 @@
 };
 
 void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc);
-bool si_radeon_uvd_enc_supported(struct si_screen *rscreen);
+bool si_radeon_uvd_enc_supported(struct si_screen *sscreen);
 
 #endif // _RADEON_UVD_ENC_H
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vce.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vce.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vce.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vce.c	2019-03-31 23:16:37.000000000 +0000
@@ -352,7 +352,9 @@
 	struct rvid_buffer *fb = feedback;
 
 	if (size) {
-		uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+		uint32_t *ptr = enc->ws->buffer_map(
+			fb->res->buf, enc->cs,
+			PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 
 		if (ptr[1]) {
 			*size = ptr[4] - ptr[9];
@@ -438,7 +440,7 @@
 
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc);
+	enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false);
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_dec.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_dec.c	2019-03-31 23:16:37.000000000 +0000
@@ -822,8 +822,8 @@
 	decode->bsd_size = align(dec->bs_size, 128);
 	decode->dpb_size = dec->dpb.res->buf->size;
 	decode->dt_size =
-		r600_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size +
-		r600_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size;
+		si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size +
+		si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size;
 
 	decode->sct_size = 0;
 	decode->sc_coeff_size = 0;
@@ -941,7 +941,9 @@
 			si_vid_clear_buffer(dec->base.context, &dec->ctx);
 
 			/* ctx needs probs table */
-			ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+			ptr = dec->ws->buffer_map(
+				dec->ctx.res->buf, dec->cs,
+				PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 			fill_probs_table(ptr);
 			dec->ws->buffer_unmap(dec->ctx.res->buf);
 		}
@@ -1034,7 +1036,8 @@
 	buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
 
 	/* and map it for CPU access */
-	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+	ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
+				  PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 
 	/* calc buffer offsets */
 	dec->msg = ptr;
@@ -1312,7 +1315,7 @@
 	dec->bs_size = 0;
 	dec->bs_ptr = dec->ws->buffer_map(
 		dec->bs_buffers[dec->cur_buffer].res->buf,
-		dec->cs, PIPE_TRANSFER_WRITE);
+		dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 }
 
 /**
@@ -1357,8 +1360,9 @@
 				return;
 			}
 
-			dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
-							  PIPE_TRANSFER_WRITE);
+			dec->bs_ptr = dec->ws->buffer_map(
+				buf->res->buf, dec->cs,
+				PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 			if (!dec->bs_ptr)
 				return;
 
@@ -1507,7 +1511,7 @@
 	dec->stream_handle = si_vid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL);
+	dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
@@ -1543,7 +1547,9 @@
 			void *ptr;
 
 			buf = &dec->msg_fb_it_probs_buffers[i];
-			ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE);
+			ptr = dec->ws->buffer_map(
+				buf->res->buf, dec->cs,
+				PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 			ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
 			fill_probs_table(ptr);
 			dec->ws->buffer_unmap(buf->res->buf);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_enc.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_enc.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_enc.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_enc.c	2019-03-31 23:16:37.000000000 +0000
@@ -244,7 +244,9 @@
 	struct rvid_buffer *fb = feedback;
 
 	if (size) {
-		uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+		uint32_t *ptr = enc->ws->buffer_map(
+			fb->res->buf, enc->cs,
+			PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY);
 		if (ptr[1])
 			*size = ptr[6];
 		else
@@ -286,7 +288,8 @@
 	enc->bits_in_shifter = 0;
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc);
+	enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush,
+				enc, false);
 
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.c
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,8 +63,8 @@
 	 * able to move buffers around individually, so request a
 	 * non-sub-allocated buffer.
 	 */
-	buffer->res = r600_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED,
-						       usage, size));
+	buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED,
+						     usage, size));
 
 	return buffer->res != NULL;
 }
@@ -72,7 +72,7 @@
 /* destroy a buffer */
 void si_vid_destroy_buffer(struct rvid_buffer *buffer)
 {
-	r600_resource_reference(&buffer->res, NULL);
+	si_resource_reference(&buffer->res, NULL);
 }
 
 /* reallocate a buffer, preserving its content */
@@ -88,11 +88,13 @@
 	if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
 		goto error;
 
-	src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
+	src = ws->buffer_map(old_buf.res->buf, cs,
+			     PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY);
 	if (!src)
 		goto error;
 
-	dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
+	dst = ws->buffer_map(new_buf->res->buf, cs,
+			     PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
 	if (!dst)
 		goto error;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.h
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.h	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,7 @@
 struct rvid_buffer
 {
 	unsigned		usage;
-	struct r600_resource	*res;
+	struct si_resource	*res;
 };
 
 /* generate an stream handle */
diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_winsys.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_winsys.h
--- mesa-18.3.3/src/gallium/drivers/radeon/radeon_winsys.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -52,7 +52,9 @@
 enum radeon_bo_domain { /* bitfield */
     RADEON_DOMAIN_GTT  = 2,
     RADEON_DOMAIN_VRAM = 4,
-    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
+    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
+    RADEON_DOMAIN_GDS = 8,
+    RADEON_DOMAIN_OA = 16,
 };
 
 enum radeon_bo_flag { /* bitfield */
@@ -76,6 +78,15 @@
     RADEON_USAGE_SYNCHRONIZED = 8
 };
 
+enum radeon_transfer_flags {
+   /* Indicates that the caller will unmap the buffer.
+    *
+    * Not unmapping buffers is an important performance optimization for
+    * OpenGL (avoids kernel overhead for frequently mapped buffers).
+    */
+   RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),
+};
+
 #define RADEON_SPARSE_PAGE_SIZE (64 * 1024)
 
 enum ring_type {
@@ -294,9 +305,12 @@
      * Map the entire data store of a buffer object into the client's address
      * space.
      *
+     * Callers are expected to unmap buffers again if and only if the
+     * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.
+     *
      * \param buf       A winsys buffer object to map.
      * \param cs        A command stream to flush if the buffer is referenced by it.
-     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
+     * \param usage     A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags.
      * \return          The pointer at the beginning of the buffer.
      */
     void *(*buffer_map)(struct pb_buffer *buf,
@@ -352,6 +366,7 @@
      */
     struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
                                             struct winsys_handle *whandle,
+                                            unsigned vm_alignment,
                                             unsigned *stride, unsigned *offset);
 
     /**
@@ -464,10 +479,11 @@
      * \param user      User pointer that will be passed to the flush callback.
      */
     struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx,
-                                          enum ring_type ring_type,
-                                          void (*flush)(void *ctx, unsigned flags,
-							struct pipe_fence_handle **fence),
-                                          void *flush_ctx);
+                                       enum ring_type ring_type,
+                                       void (*flush)(void *ctx, unsigned flags,
+                                                     struct pipe_fence_handle **fence),
+                                       void *flush_ctx,
+                                       bool stop_exec_on_failure);
 
     /**
      * Destroy a command stream.
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/cik_sdma.c mesa-19.0.1/src/gallium/drivers/radeonsi/cik_sdma.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/cik_sdma.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/cik_sdma.c	2019-03-31 23:16:37.000000000 +0000
@@ -35,20 +35,20 @@
 {
 	struct radeon_cmdbuf *cs = ctx->dma_cs;
 	unsigned i, ncopy, csize;
-	struct r600_resource *rdst = r600_resource(dst);
-	struct r600_resource *rsrc = r600_resource(src);
+	struct si_resource *sdst = si_resource(dst);
+	struct si_resource *ssrc = si_resource(src);
 
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, dst_offset,
+	util_range_add(&sdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
-	dst_offset += rdst->gpu_address;
-	src_offset += rsrc->gpu_address;
+	dst_offset += sdst->gpu_address;
+	src_offset += ssrc->gpu_address;
 
 	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-	si_need_dma_space(ctx, ncopy * 7, rdst, rsrc);
+	si_need_dma_space(ctx, ncopy * 7, sdst, ssrc);
 
 	for (i = 0; i < ncopy; i++) {
 		csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h mesa-19.0.1/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/driinfo_radeonsi.h	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,8 @@
 // DriConf options specific to radeonsi
+DRI_CONF_SECTION_QUALITY
+    DRI_CONF_ADAPTIVE_SYNC("true")
+DRI_CONF_SECTION_END
+
 DRI_CONF_SECTION_PERFORMANCE
     DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
     DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
@@ -8,4 +12,5 @@
 
 DRI_CONF_SECTION_DEBUG
    DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
+   DRI_CONF_RADEONSI_ENABLE_NIR("false")
 DRI_CONF_SECTION_END
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/Makefile.sources mesa-19.0.1/src/gallium/drivers/radeonsi/Makefile.sources
--- mesa-18.3.3/src/gallium/drivers/radeonsi/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -49,7 +49,6 @@
 	si_test_dma_perf.c \
 	si_texture.c \
 	si_uvd.c \
-	../radeon/r600_perfcounter.c \
 	../radeon/radeon_uvd.c \
 	../radeon/radeon_uvd.h \
 	../radeon/radeon_vcn_dec_jpeg.c \
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/meson.build mesa-19.0.1/src/gallium/drivers/radeonsi/meson.build
--- mesa-18.3.3/src/gallium/drivers/radeonsi/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -65,7 +65,6 @@
   'si_test_dma_perf.c',
   'si_texture.c',
   'si_uvd.c',
-  '../radeon/r600_perfcounter.c',
   '../radeon/radeon_uvd.c',
   '../radeon/radeon_uvd.h',
   '../radeon/radeon_vcn_enc_1_2.c',
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_blit.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_blit.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_blit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_blit.c	2019-03-31 23:16:37.000000000 +0000
@@ -902,6 +902,7 @@
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_texture *ssrc = (struct si_texture*)src;
+	struct si_texture *sdst = (struct si_texture*)dst;
 	struct pipe_surface *dst_view, dst_templ;
 	struct pipe_sampler_view src_templ, *src_view;
 	unsigned dst_width, dst_height, src_width0, src_height0;
@@ -914,6 +915,17 @@
 		return;
 	}
 
+	if (!util_format_is_compressed(src->format) &&
+	    !util_format_is_compressed(dst->format) &&
+	    !util_format_is_depth_or_stencil(src->format) &&
+	    src->nr_samples <= 1 &&
+	    !sdst->dcc_offset &&
+	    !(dst->target != src->target &&
+	      (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
+		si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box);
+		return;
+	}
+
 	assert(u_max_sample(dst) == u_max_sample(src));
 
 	/* The driver doesn't decompress resources automatically while
@@ -1012,36 +1024,8 @@
 	 * Note that some chips avoid this issue by using SDMA.
 	 */
 	if (util_format_is_snorm8(dst_templ.format)) {
-		switch (dst_templ.format) {
-		case PIPE_FORMAT_R8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_R8_SINT;
-			break;
-		case PIPE_FORMAT_R8G8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8_SINT;
-			break;
-		case PIPE_FORMAT_R8G8B8X8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8X8_SINT;
-			break;
-		case PIPE_FORMAT_R8G8B8A8_SNORM:
-		/* There are no SINT variants for ABGR and XBGR, so we have to use RGBA. */
-		case PIPE_FORMAT_A8B8G8R8_SNORM:
-		case PIPE_FORMAT_X8B8G8R8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8A8_SINT;
-			break;
-		case PIPE_FORMAT_A8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_A8_SINT;
-			break;
-		case PIPE_FORMAT_L8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_L8_SINT;
-			break;
-		case PIPE_FORMAT_L8A8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_L8A8_SINT;
-			break;
-		case PIPE_FORMAT_I8_SNORM:
-			dst_templ.format = src_templ.format = PIPE_FORMAT_I8_SINT;
-			break;
-		default:; /* fall through */
-		}
+		dst_templ.format = src_templ.format =
+			util_format_snorm8_to_sint8(dst_templ.format);
 	}
 
 	vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level,
@@ -1193,7 +1177,7 @@
 	templ.depth0 = 1;
 	templ.array_size = 1;
 	templ.usage = PIPE_USAGE_DEFAULT;
-	templ.flags = SI_RESOURCE_FLAG_FORCE_TILING |
+	templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING |
 		      SI_RESOURCE_FLAG_DISABLE_DCC;
 
 	/* The src and dst microtile modes must be the same. */
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_buffer.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_buffer.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_buffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -44,7 +44,7 @@
 }
 
 void *si_buffer_map_sync_with_rings(struct si_context *sctx,
-				    struct r600_resource *resource,
+				    struct si_resource *resource,
 				    unsigned usage)
 {
 	enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
@@ -101,7 +101,7 @@
 }
 
 void si_init_resource_fields(struct si_screen *sscreen,
-			     struct r600_resource *res,
+			     struct si_resource *res,
 			     uint64_t size, unsigned alignment)
 {
 	struct si_texture *tex = (struct si_texture*)res;
@@ -201,7 +201,7 @@
 }
 
 bool si_alloc_resource(struct si_screen *sscreen,
-		       struct r600_resource *res)
+		       struct si_resource *res)
 {
 	struct pb_buffer *old_buf, *new_buf;
 
@@ -248,12 +248,12 @@
 static void si_buffer_destroy(struct pipe_screen *screen,
 			      struct pipe_resource *buf)
 {
-	struct r600_resource *rbuffer = r600_resource(buf);
+	struct si_resource *buffer = si_resource(buf);
 
 	threaded_resource_deinit(buf);
-	util_range_destroy(&rbuffer->valid_buffer_range);
-	pb_reference(&rbuffer->buf, NULL);
-	FREE(rbuffer);
+	util_range_destroy(&buffer->valid_buffer_range);
+	pb_reference(&buffer->buf, NULL);
+	FREE(buffer);
 }
 
 /* Reallocate the buffer a update all resource bindings where the buffer is
@@ -264,32 +264,32 @@
  */
 static bool
 si_invalidate_buffer(struct si_context *sctx,
-		     struct r600_resource *rbuffer)
+		     struct si_resource *buf)
 {
 	/* Shared buffers can't be reallocated. */
-	if (rbuffer->b.is_shared)
+	if (buf->b.is_shared)
 		return false;
 
 	/* Sparse buffers can't be reallocated. */
-	if (rbuffer->flags & RADEON_FLAG_SPARSE)
+	if (buf->flags & RADEON_FLAG_SPARSE)
 		return false;
 
 	/* In AMD_pinned_memory, the user pointer association only gets
 	 * broken when the buffer is explicitly re-allocated.
 	 */
-	if (rbuffer->b.is_user_ptr)
+	if (buf->b.is_user_ptr)
 		return false;
 
 	/* Check if mapping this buffer would cause waiting for the GPU. */
-	if (si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
-	    !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
-		uint64_t old_va = rbuffer->gpu_address;
+	if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
+	    !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
+		uint64_t old_va = buf->gpu_address;
 
 		/* Reallocate the buffer in the same pipe_resource. */
-		si_alloc_resource(sctx->screen, rbuffer);
-		si_rebind_buffer(sctx, &rbuffer->b.b, old_va);
+		si_alloc_resource(sctx->screen, buf);
+		si_rebind_buffer(sctx, &buf->b.b, old_va);
 	} else {
-		util_range_set_empty(&rbuffer->valid_buffer_range);
+		util_range_set_empty(&buf->valid_buffer_range);
 	}
 
 	return true;
@@ -301,22 +301,22 @@
 				 struct pipe_resource *src)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
-	struct r600_resource *rdst = r600_resource(dst);
-	struct r600_resource *rsrc = r600_resource(src);
-	uint64_t old_gpu_address = rdst->gpu_address;
-
-	pb_reference(&rdst->buf, rsrc->buf);
-	rdst->gpu_address = rsrc->gpu_address;
-	rdst->b.b.bind = rsrc->b.b.bind;
-	rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads;
-	rdst->max_forced_staging_uploads = rsrc->max_forced_staging_uploads;
-	rdst->flags = rsrc->flags;
-
-	assert(rdst->vram_usage == rsrc->vram_usage);
-	assert(rdst->gart_usage == rsrc->gart_usage);
-	assert(rdst->bo_size == rsrc->bo_size);
-	assert(rdst->bo_alignment == rsrc->bo_alignment);
-	assert(rdst->domains == rsrc->domains);
+	struct si_resource *sdst = si_resource(dst);
+	struct si_resource *ssrc = si_resource(src);
+	uint64_t old_gpu_address = sdst->gpu_address;
+
+	pb_reference(&sdst->buf, ssrc->buf);
+	sdst->gpu_address = ssrc->gpu_address;
+	sdst->b.b.bind = ssrc->b.b.bind;
+	sdst->b.max_forced_staging_uploads = ssrc->b.max_forced_staging_uploads;
+	sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads;
+	sdst->flags = ssrc->flags;
+
+	assert(sdst->vram_usage == ssrc->vram_usage);
+	assert(sdst->gart_usage == ssrc->gart_usage);
+	assert(sdst->bo_size == ssrc->bo_size);
+	assert(sdst->bo_alignment == ssrc->bo_alignment);
+	assert(sdst->domains == ssrc->domains);
 
 	si_rebind_buffer(sctx, dst, old_gpu_address);
 }
@@ -325,11 +325,11 @@
 				   struct pipe_resource *resource)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
-	struct r600_resource *rbuffer = r600_resource(resource);
+	struct si_resource *buf = si_resource(resource);
 
 	/* We currently only do anyting here for buffers */
 	if (resource->target == PIPE_BUFFER)
-		(void)si_invalidate_buffer(sctx, rbuffer);
+		(void)si_invalidate_buffer(sctx, buf);
 }
 
 static void *si_buffer_get_transfer(struct pipe_context *ctx,
@@ -337,7 +337,7 @@
 				    unsigned usage,
 				    const struct pipe_box *box,
 				    struct pipe_transfer **ptransfer,
-				    void *data, struct r600_resource *staging,
+				    void *data, struct si_resource *staging,
 				    unsigned offset)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
@@ -370,7 +370,7 @@
 				    struct pipe_transfer **ptransfer)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
-	struct r600_resource *rbuffer = r600_resource(resource);
+	struct si_resource *buf = si_resource(resource);
 	uint8_t *data;
 
 	assert(box->x + box->width <= resource->width0);
@@ -386,7 +386,7 @@
 	 *
 	 * So don't ever use staging buffers.
 	 */
-	if (rbuffer->b.is_user_ptr)
+	if (buf->b.is_user_ptr)
 		usage |= PIPE_TRANSFER_PERSISTENT;
 
 	/* See if the buffer range being mapped has never been initialized,
@@ -394,8 +394,8 @@
 	if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
 		       TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
 	    usage & PIPE_TRANSFER_WRITE &&
-	    !rbuffer->b.is_shared &&
-	    !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+	    !buf->b.is_shared &&
+	    !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) {
 		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 	}
 
@@ -414,8 +414,8 @@
 	    !(usage & PIPE_TRANSFER_PERSISTENT) &&
 	    /* Try not to decrement the counter if it's not positive. Still racy,
 	     * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
-	    rbuffer->max_forced_staging_uploads > 0 &&
-	    p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) {
+	    buf->max_forced_staging_uploads > 0 &&
+	    p_atomic_dec_return(&buf->max_forced_staging_uploads) >= 0) {
 		usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
 			   PIPE_TRANSFER_UNSYNCHRONIZED);
 		usage |= PIPE_TRANSFER_DISCARD_RANGE;
@@ -427,7 +427,7 @@
 		       TC_TRANSFER_MAP_NO_INVALIDATE))) {
 		assert(usage & PIPE_TRANSFER_WRITE);
 
-		if (si_invalidate_buffer(sctx, rbuffer)) {
+		if (si_invalidate_buffer(sctx, buf)) {
 			/* At this point, the buffer is always idle. */
 			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 		} else {
@@ -439,18 +439,18 @@
 	if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
 	    ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
 			 PIPE_TRANSFER_PERSISTENT))) ||
-	     (rbuffer->flags & RADEON_FLAG_SPARSE))) {
+	     (buf->flags & RADEON_FLAG_SPARSE))) {
 		assert(usage & PIPE_TRANSFER_WRITE);
 
 		/* Check if mapping this buffer would cause waiting for the GPU.
 		 */
-		if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+		if (buf->flags & RADEON_FLAG_SPARSE ||
 		    force_discard_range ||
-		    si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
-		    !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+		    si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
+		    !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
 			/* Do a wait-free write-only transfer using a temporary buffer. */
 			unsigned offset;
-			struct r600_resource *staging = NULL;
+			struct si_resource *staging = NULL;
 
 			u_upload_alloc(ctx->stream_uploader, 0,
                                        box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),
@@ -462,7 +462,7 @@
 				data += box->x % SI_MAP_BUFFER_ALIGNMENT;
 				return si_buffer_get_transfer(ctx, resource, usage, box,
 								ptransfer, data, staging, offset);
-			} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+			} else if (buf->flags & RADEON_FLAG_SPARSE) {
 				return NULL;
 			}
 		} else {
@@ -473,13 +473,13 @@
 	/* Use a staging buffer in cached GTT for reads. */
 	else if (((usage & PIPE_TRANSFER_READ) &&
 		  !(usage & PIPE_TRANSFER_PERSISTENT) &&
-		  (rbuffer->domains & RADEON_DOMAIN_VRAM ||
-		   rbuffer->flags & RADEON_FLAG_GTT_WC)) ||
-		 (rbuffer->flags & RADEON_FLAG_SPARSE)) {
-		struct r600_resource *staging;
+		  (buf->domains & RADEON_DOMAIN_VRAM ||
+		   buf->flags & RADEON_FLAG_GTT_WC)) ||
+		 (buf->flags & RADEON_FLAG_SPARSE)) {
+		struct si_resource *staging;
 
 		assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
-		staging = r600_resource(pipe_buffer_create(
+		staging = si_resource(pipe_buffer_create(
 				ctx->screen, 0, PIPE_USAGE_STAGING,
 				box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
 		if (staging) {
@@ -491,19 +491,19 @@
 			data = si_buffer_map_sync_with_rings(sctx, staging,
 							     usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
 			if (!data) {
-				r600_resource_reference(&staging, NULL);
+				si_resource_reference(&staging, NULL);
 				return NULL;
 			}
 			data += box->x % SI_MAP_BUFFER_ALIGNMENT;
 
 			return si_buffer_get_transfer(ctx, resource, usage, box,
 							ptransfer, data, staging, 0);
-		} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+		} else if (buf->flags & RADEON_FLAG_SPARSE) {
 			return NULL;
 		}
 	}
 
-	data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
+	data = si_buffer_map_sync_with_rings(sctx, buf, usage);
 	if (!data) {
 		return NULL;
 	}
@@ -518,17 +518,20 @@
 				      const struct pipe_box *box)
 {
 	struct si_transfer *stransfer = (struct si_transfer*)transfer;
-	struct r600_resource *rbuffer = r600_resource(transfer->resource);
+	struct si_resource *buf = si_resource(transfer->resource);
 
 	if (stransfer->staging) {
+		unsigned src_offset = stransfer->offset +
+				      transfer->box.x % SI_MAP_BUFFER_ALIGNMENT +
+				      (box->x - transfer->box.x);
+
 		/* Copy the staging buffer into the original one. */
 		si_copy_buffer((struct si_context*)ctx, transfer->resource,
-			       &stransfer->staging->b.b, box->x,
-			       stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT,
+			       &stransfer->staging->b.b, box->x, src_offset,
 			       box->width);
 	}
 
-	util_range_add(&rbuffer->valid_buffer_range, box->x,
+	util_range_add(&buf->valid_buffer_range, box->x,
 		       box->x + box->width);
 }
 
@@ -557,7 +560,7 @@
 	    !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
 		si_buffer_do_flush_region(ctx, transfer, &transfer->box);
 
-	r600_resource_reference(&stransfer->staging, NULL);
+	si_resource_reference(&stransfer->staging, NULL);
 	assert(stransfer->b.staging == NULL); /* for threaded context only */
 	pipe_resource_reference(&transfer->resource, NULL);
 
@@ -597,27 +600,27 @@
 	si_buffer_transfer_unmap,	/* transfer_unmap */
 };
 
-static struct r600_resource *
+static struct si_resource *
 si_alloc_buffer_struct(struct pipe_screen *screen,
 		       const struct pipe_resource *templ)
 {
-	struct r600_resource *rbuffer;
+	struct si_resource *buf;
 
-	rbuffer = MALLOC_STRUCT(r600_resource);
+	buf = MALLOC_STRUCT(si_resource);
 
-	rbuffer->b.b = *templ;
-	rbuffer->b.b.next = NULL;
-	pipe_reference_init(&rbuffer->b.b.reference, 1);
-	rbuffer->b.b.screen = screen;
-
-	rbuffer->b.vtbl = &si_buffer_vtbl;
-	threaded_resource_init(&rbuffer->b.b);
-
-	rbuffer->buf = NULL;
-	rbuffer->bind_history = 0;
-	rbuffer->TC_L2_dirty = false;
-	util_range_init(&rbuffer->valid_buffer_range);
-	return rbuffer;
+	buf->b.b = *templ;
+	buf->b.b.next = NULL;
+	pipe_reference_init(&buf->b.b.reference, 1);
+	buf->b.b.screen = screen;
+
+	buf->b.vtbl = &si_buffer_vtbl;
+	threaded_resource_init(&buf->b.b);
+
+	buf->buf = NULL;
+	buf->bind_history = 0;
+	buf->TC_L2_dirty = false;
+	util_range_init(&buf->valid_buffer_range);
+	return buf;
 }
 
 static struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
@@ -625,21 +628,21 @@
 					      unsigned alignment)
 {
 	struct si_screen *sscreen = (struct si_screen*)screen;
-	struct r600_resource *rbuffer = si_alloc_buffer_struct(screen, templ);
+	struct si_resource *buf = si_alloc_buffer_struct(screen, templ);
 
 	if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
-		rbuffer->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE;
+		buf->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE;
 
-	si_init_resource_fields(sscreen, rbuffer, templ->width0, alignment);
+	si_init_resource_fields(sscreen, buf, templ->width0, alignment);
 
 	if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
-		rbuffer->flags |= RADEON_FLAG_SPARSE;
+		buf->flags |= RADEON_FLAG_SPARSE;
 
-	if (!si_alloc_resource(sscreen, rbuffer)) {
-		FREE(rbuffer);
+	if (!si_alloc_resource(sscreen, buf)) {
+		FREE(buf);
 		return NULL;
 	}
-	return &rbuffer->b.b;
+	return &buf->b.b;
 }
 
 struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen,
@@ -661,11 +664,11 @@
 	return si_buffer_create(screen, &buffer, alignment);
 }
 
-struct r600_resource *si_aligned_buffer_create(struct pipe_screen *screen,
+struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen,
 					       unsigned flags, unsigned usage,
 					       unsigned size, unsigned alignment)
 {
-	return r600_resource(pipe_aligned_buffer_create(screen, flags, usage,
+	return si_resource(pipe_aligned_buffer_create(screen, flags, usage,
 							size, alignment));
 }
 
@@ -676,26 +679,26 @@
 {
 	struct si_screen *sscreen = (struct si_screen*)screen;
 	struct radeon_winsys *ws = sscreen->ws;
-	struct r600_resource *rbuffer = si_alloc_buffer_struct(screen, templ);
+	struct si_resource *buf = si_alloc_buffer_struct(screen, templ);
 
-	rbuffer->domains = RADEON_DOMAIN_GTT;
-	rbuffer->flags = 0;
-	rbuffer->b.is_user_ptr = true;
-	util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
-	util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+	buf->domains = RADEON_DOMAIN_GTT;
+	buf->flags = 0;
+	buf->b.is_user_ptr = true;
+	util_range_add(&buf->valid_buffer_range, 0, templ->width0);
+	util_range_add(&buf->b.valid_buffer_range, 0, templ->width0);
 
 	/* Convert a user pointer to a buffer. */
-	rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
-	if (!rbuffer->buf) {
-		FREE(rbuffer);
+	buf->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
+	if (!buf->buf) {
+		FREE(buf);
 		return NULL;
 	}
 
-	rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf);
-	rbuffer->vram_usage = 0;
-	rbuffer->gart_usage = templ->width0;
+	buf->gpu_address = ws->buffer_get_virtual_address(buf->buf);
+	buf->vram_usage = 0;
+	buf->gart_usage = templ->width0;
 
-	return &rbuffer->b.b;
+	return &buf->b.b;
 }
 
 static struct pipe_resource *si_resource_create(struct pipe_screen *screen,
@@ -714,7 +717,7 @@
 			       bool commit)
 {
 	struct si_context *ctx = (struct si_context *)pctx;
-	struct r600_resource *res = r600_resource(resource);
+	struct si_resource *res = si_resource(resource);
 
 	/*
 	 * Since buffer commitment changes cannot be pipelined, we need to
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_build_pm4.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_build_pm4.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_build_pm4.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_build_pm4.h	2019-03-31 23:16:37.000000000 +0000
@@ -100,12 +100,18 @@
 }
 
 static inline void radeon_set_uconfig_reg_idx(struct radeon_cmdbuf *cs,
+					      struct si_screen *screen,
 					      unsigned reg, unsigned idx,
 					      unsigned value)
 {
 	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
 	assert(cs->current.cdw + 3 <= cs->current.max_dw);
-	radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
+	assert(idx != 0);
+	unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
+	if (screen->info.chip_class < GFX9 ||
+	    (screen->info.chip_class == GFX9 && screen->info.me_fw_version < 26))
+		opcode = PKT3_SET_UCONFIG_REG;
+	radeon_emit(cs, PKT3(opcode, 1, 0));
 	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
 	radeon_emit(cs, value);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_clear.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_clear.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_clear.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_clear.c	2019-03-31 23:16:37.000000000 +0000
@@ -34,6 +34,15 @@
 	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
 };
 
+enum si_dcc_clear_code
+{
+	DCC_CLEAR_COLOR_0000   = 0x00000000,
+	DCC_CLEAR_COLOR_0001   = 0x40404040,
+	DCC_CLEAR_COLOR_1110   = 0x80808080,
+	DCC_CLEAR_COLOR_1111   = 0xC0C0C0C0,
+	DCC_CLEAR_COLOR_REG    = 0x20202020,
+};
+
 static void si_alloc_separate_cmask(struct si_screen *sscreen,
 				    struct si_texture *tex)
 {
@@ -133,7 +142,7 @@
 		return false;
 
 	*eliminate_needed = true;
-	*clear_value = 0x20202020U; /* use CB clear color registers */
+	*clear_value = DCC_CLEAR_COLOR_REG;
 
 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
 		return true; /* need ELIMINATE_FAST_CLEAR */
@@ -203,15 +212,22 @@
 	}
 
 	/* This doesn't need ELIMINATE_FAST_CLEAR.
-	 * CB uses both the DCC clear codes and the CB clear color registers,
-	 * so they must match.
+	 * On chips predating Raven2, the DCC clear codes and the CB clear
+	 * color registers must match.
 	 */
 	*eliminate_needed = false;
 
-	if (color_value)
-		*clear_value |= 0x80808080U;
-	if (alpha_value)
-		*clear_value |= 0x40404040U;
+	if (color_value) {
+		if (alpha_value)
+			*clear_value = DCC_CLEAR_COLOR_1111;
+		else
+			*clear_value = DCC_CLEAR_COLOR_1110;
+	} else {
+		if (alpha_value)
+			*clear_value = DCC_CLEAR_COLOR_0001;
+		else
+			*clear_value = DCC_CLEAR_COLOR_0000;
+	}
 	return true;
 }
 
@@ -532,6 +548,12 @@
 
 		*buffers &= ~clear_bit;
 
+		/* Chips with DCC constant encoding don't need to set the clear
+		 * color registers for DCC clear values 0 and 1.
+		 */
+		if (sctx->screen->has_dcc_constant_encode && !eliminate_needed)
+			continue;
+
 		if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {
 			sctx->framebuffer.dirty_cbufs |= 1 << i;
 			si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute_blit.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute_blit.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute_blit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute_blit.c	2019-03-31 23:16:37.000000000 +0000
@@ -24,6 +24,7 @@
  */
 
 #include "si_pipe.h"
+#include "util/u_format.h"
 
 /* Note: Compute shaders always use SI_COMPUTE_DST_CACHE_POLICY for dst
  * and L2_STREAM for src.
@@ -57,6 +58,20 @@
 	}
 }
 
+static void si_compute_internal_begin(struct si_context *sctx)
+{
+	sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
+	sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
+	sctx->render_cond_force_off = true;
+}
+
+static void si_compute_internal_end(struct si_context *sctx)
+{
+	sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
+	sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
+	sctx->render_cond_force_off = false;
+}
+
 static void si_compute_do_clear_or_copy(struct si_context *sctx,
 					struct pipe_resource *dst,
 					unsigned dst_offset,
@@ -76,10 +91,10 @@
 	assert(dst->target != PIPE_BUFFER || dst_offset + size <= dst->width0);
 	assert(!src || src_offset + size <= src->width0);
 
+	si_compute_internal_begin(sctx);
 	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 		       SI_CONTEXT_CS_PARTIAL_FLUSH |
 		       si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY);
-	si_emit_cache_flush(sctx);
 
 	/* Save states. */
 	void *saved_cs = sctx->cs_shader_state.program;
@@ -112,12 +127,20 @@
 	sb[0].buffer_offset = dst_offset;
 	sb[0].buffer_size = size;
 
+	bool shader_dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU;
+
 	if (src) {
 		sb[1].buffer = src;
 		sb[1].buffer_offset = src_offset;
 		sb[1].buffer_size = size;
 
 		ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 2, sb);
+
+		if (!sctx->cs_copy_buffer) {
+			sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b,
+							     SI_COMPUTE_COPY_DW_PER_THREAD,
+							     shader_dst_stream_policy, true);
+		}
 		ctx->bind_compute_state(ctx, sctx->cs_copy_buffer);
 	} else {
 		assert(clear_value_size >= 4 &&
@@ -128,6 +151,12 @@
 			sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)];
 
 		ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 1, sb);
+
+		if (!sctx->cs_clear_buffer) {
+			sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b,
+							     SI_COMPUTE_CLEAR_DW_PER_THREAD,
+							     shader_dst_stream_policy, false);
+		}
 		ctx->bind_compute_state(ctx, sctx->cs_clear_buffer);
 	}
 
@@ -138,11 +167,12 @@
 		       (cache_policy == L2_BYPASS ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0);
 
 	if (cache_policy != L2_BYPASS)
-		r600_resource(dst)->TC_L2_dirty = true;
+		si_resource(dst)->TC_L2_dirty = true;
 
 	/* Restore states. */
 	ctx->bind_compute_state(ctx, saved_cs);
 	ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb);
+	si_compute_internal_end(sctx);
 }
 
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
@@ -219,8 +249,8 @@
 						    clear_value_size, coher);
 		} else {
 			assert(clear_value_size == 4);
-			si_cp_dma_clear_buffer(sctx, dst, offset,
-					       aligned_size, *clear_value, coher,
+			si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, offset,
+					       aligned_size, *clear_value, 0, coher,
 					       get_cache_policy(sctx, coher, size));
 		}
 
@@ -267,8 +297,8 @@
 
 	/* Only use compute for VRAM copies on dGPUs. */
 	if (sctx->screen->info.has_dedicated_vram &&
-	    r600_resource(dst)->domains & RADEON_DOMAIN_VRAM &&
-	    r600_resource(src)->domains & RADEON_DOMAIN_VRAM &&
+	    si_resource(dst)->domains & RADEON_DOMAIN_VRAM &&
+	    si_resource(src)->domains & RADEON_DOMAIN_VRAM &&
 	    size > 32 * 1024 &&
 	    dst_offset % 4 == 0 && src_offset % 4 == 0 && size % 4 == 0) {
 		si_compute_do_clear_or_copy(sctx, dst, dst_offset, src, src_offset,
@@ -279,6 +309,118 @@
 	}
 }
 
+void si_compute_copy_image(struct si_context *sctx,
+			   struct pipe_resource *dst,
+			   unsigned dst_level,
+			   struct pipe_resource *src,
+			   unsigned src_level,
+			   unsigned dstx, unsigned dsty, unsigned dstz,
+			   const struct pipe_box *src_box)
+{
+	struct pipe_context *ctx = &sctx->b;
+	unsigned width = src_box->width;
+	unsigned height = src_box->height;
+	unsigned depth = src_box->depth;
+
+	unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0};
+
+	if (width == 0 || height == 0)
+		return;
+
+	si_compute_internal_begin(sctx);
+	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
+	si_make_CB_shader_coherent(sctx, dst->nr_samples, true);
+
+	struct pipe_constant_buffer saved_cb = {};
+	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
+
+	struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
+	struct pipe_image_view saved_image[2] = {0};
+	util_copy_image_view(&saved_image[0], &images->views[0]);
+	util_copy_image_view(&saved_image[1], &images->views[1]);
+
+	void *saved_cs = sctx->cs_shader_state.program;
+
+	struct pipe_constant_buffer cb = {};
+	cb.buffer_size = sizeof(data);
+	cb.user_buffer = data;
+	ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb);
+
+	struct pipe_image_view image[2] = {0};
+	image[0].resource = src;
+	image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ;
+	image[0].format = util_format_linear(src->format);
+	image[0].u.tex.level = src_level;
+	image[0].u.tex.first_layer = 0;
+	image[0].u.tex.last_layer =
+		src->target == PIPE_TEXTURE_3D ? u_minify(src->depth0, src_level) - 1
+						: (unsigned)(src->array_size - 1);
+	image[1].resource = dst;
+	image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE;
+	image[1].format = util_format_linear(dst->format);
+	image[1].u.tex.level = dst_level;
+	image[1].u.tex.first_layer = 0;
+	image[1].u.tex.last_layer =
+		dst->target == PIPE_TEXTURE_3D ? u_minify(dst->depth0, dst_level) - 1
+						: (unsigned)(dst->array_size - 1);
+
+	if (src->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
+		image[0].format = image[1].format = PIPE_FORMAT_R32_UINT;
+
+	/* SNORM8 blitting has precision issues on some chips. Use the SINT
+	 * equivalent instead, which doesn't force DCC decompression.
+	 * Note that some chips avoid this issue by using SDMA.
+	 */
+	if (util_format_is_snorm8(dst->format)) {
+		image[0].format = image[1].format =
+			util_format_snorm8_to_sint8(dst->format);
+	}
+
+	ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image);
+
+	struct pipe_grid_info info = {0};
+
+	if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) {
+		if (!sctx->cs_copy_image_1d_array)
+			sctx->cs_copy_image_1d_array =
+				si_create_copy_image_compute_shader_1d_array(ctx);
+		ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
+		info.block[0] = 64;
+		sctx->compute_last_block[0] = width % 64;
+		info.block[1] = 1;
+		info.block[2] = 1;
+		info.grid[0] = DIV_ROUND_UP(width, 64);
+		info.grid[1] = depth;
+		info.grid[2] = 1;
+	} else {
+		if (!sctx->cs_copy_image)
+			sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
+		ctx->bind_compute_state(ctx, sctx->cs_copy_image);
+		info.block[0] = 8;
+		sctx->compute_last_block[0] = width % 8;
+		info.block[1] = 8;
+		sctx->compute_last_block[1] = height % 8;
+		info.block[2] = 1;
+		info.grid[0] = DIV_ROUND_UP(width, 8);
+		info.grid[1] = DIV_ROUND_UP(height, 8);
+		info.grid[2] = depth;
+	}
+
+	ctx->launch_grid(ctx, &info);
+
+	sctx->compute_last_block[0] = 0;
+	sctx->compute_last_block[1] = 0;
+
+	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
+		       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
+	ctx->bind_compute_state(ctx, saved_cs);
+	ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
+	ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
+	si_compute_internal_end(sctx);
+}
+
 void si_init_compute_blit_functions(struct si_context *sctx)
 {
 	sctx->b.clear_buffer = si_pipe_clear_buffer;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -32,9 +32,9 @@
 #include "si_build_pm4.h"
 #include "si_compute.h"
 
-#define COMPUTE_DBG(rscreen, fmt, args...) \
+#define COMPUTE_DBG(sscreen, fmt, args...) \
 	do { \
-		if ((rscreen->debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \
+		if ((sscreen->debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \
 	} while (0);
 
 struct dispatch_packet {
@@ -308,7 +308,7 @@
 		uint64_t va;
 		uint32_t offset;
 		pipe_resource_reference(&program->global_buffers[first + i], resources[i]);
-		va = r600_resource(resources[i])->gpu_address;
+		va = si_resource(resources[i])->gpu_address;
 		offset = util_le32_to_cpu(*handles[i]);
 		va += offset;
 		va = util_cpu_to_le64(va);
@@ -378,7 +378,7 @@
 		scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
 
 	if (scratch_bo_size < scratch_needed) {
-		r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
+		si_resource_reference(&sctx->compute_scratch_buffer, NULL);
 
 		sctx->compute_scratch_buffer =
 			si_aligned_buffer_create(&sctx->screen->b,
@@ -398,7 +398,7 @@
 		if (si_shader_binary_upload(sctx->screen, shader))
 			return false;
 
-		r600_resource_reference(&shader->scratch_bo,
+		si_resource_reference(&shader->scratch_bo,
 		                        sctx->compute_scratch_buffer);
 	}
 
@@ -582,7 +582,7 @@
 			AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) {
 		struct dispatch_packet dispatch;
 		unsigned dispatch_offset;
-		struct r600_resource *dispatch_buf = NULL;
+		struct si_resource *dispatch_buf = NULL;
 		uint64_t dispatch_va;
 
 		/* Upload dispatch ptr */
@@ -620,7 +620,7 @@
 		radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(dispatch_va >> 32) |
                                 S_008F04_STRIDE(0));
 
-		r600_resource_reference(&dispatch_buf, NULL);
+		si_resource_reference(&dispatch_buf, NULL);
 		user_sgpr += 2;
 	}
 
@@ -651,7 +651,7 @@
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	struct si_compute *program = sctx->cs_shader_state.program;
-	struct r600_resource *input_buffer = NULL;
+	struct si_resource *input_buffer = NULL;
 	unsigned kernel_args_size;
 	unsigned num_work_size_bytes = program->use_code_object_v2 ? 0 : 36;
 	uint32_t kernel_args_offset = 0;
@@ -704,7 +704,7 @@
 		                S_008F04_STRIDE(0));
 	}
 
-	r600_resource_reference(&input_buffer, NULL);
+	si_resource_reference(&input_buffer, NULL);
 
 	return true;
 }
@@ -724,12 +724,12 @@
 
 	if (info->indirect) {
 		if (program->uses_grid_size) {
-			uint64_t base_va = r600_resource(info->indirect)->gpu_address;
+			uint64_t base_va = si_resource(info->indirect)->gpu_address;
 			uint64_t va = base_va + info->indirect_offset;
 			int i;
 
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-					 r600_resource(info->indirect),
+					 si_resource(info->indirect),
 					 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 			for (i = 0; i < 3; ++i) {
@@ -797,11 +797,6 @@
 	radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
 			  compute_resource_limits);
 
-	radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
-	radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
-	radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
-	radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
-
 	unsigned dispatch_initiator =
 		S_00B800_COMPUTE_SHADER_EN(1) |
 		S_00B800_FORCE_START_AT_000(1) |
@@ -809,11 +804,38 @@
 		 * allow launching waves out-of-order. (same as Vulkan) */
 		S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
 
+	uint *last_block = sctx->compute_last_block;
+	bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
+
+	radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+
+	if (partial_block_en) {
+		unsigned partial[3];
+
+		/* If no partial_block, these should be an entire block size, not 0. */
+		partial[0] = last_block[0] ? last_block[0] : info->block[0];
+		partial[1] = last_block[1] ? last_block[1] : info->block[1];
+		partial[2] = last_block[2] ? last_block[2] : info->block[2];
+
+		radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]) |
+				S_00B81C_NUM_THREAD_PARTIAL(partial[0]));
+		radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]) |
+				S_00B820_NUM_THREAD_PARTIAL(partial[1]));
+		radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]) |
+				S_00B824_NUM_THREAD_PARTIAL(partial[2]));
+
+		dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
+	} else {
+		radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
+		radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
+		radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
+	}
+
 	if (info->indirect) {
-		uint64_t base_va = r600_resource(info->indirect)->gpu_address;
+		uint64_t base_va = si_resource(info->indirect)->gpu_address;
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-		                 r600_resource(info->indirect),
+		                 si_resource(info->indirect),
 		                 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 		radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
@@ -881,9 +903,9 @@
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
 		if (sctx->chip_class <= VI &&
-		    r600_resource(info->indirect)->TC_L2_dirty) {
+		    si_resource(info->indirect)->TC_L2_dirty) {
 			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-			r600_resource(info->indirect)->TC_L2_dirty = false;
+			si_resource(info->indirect)->TC_L2_dirty = false;
 		}
 	}
 
@@ -915,8 +937,8 @@
 
 	/* Global buffers */
 	for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
-		struct r600_resource *buffer =
-			r600_resource(program->global_buffers[i]);
+		struct si_resource *buffer =
+			si_resource(program->global_buffers[i]);
 		if (!buffer) {
 			continue;
 		}
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_cp_dma.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_cp_dma.c	2019-03-31 23:16:37.000000000 +0000
@@ -54,11 +54,10 @@
  * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
  * clear value.
  */
-static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
-			   uint64_t src_va, unsigned size, unsigned flags,
-			   enum si_cache_policy cache_policy)
+static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs,
+			   uint64_t dst_va, uint64_t src_va, unsigned size,
+			   unsigned flags, enum si_cache_policy cache_policy)
 {
-	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	uint32_t header = 0, command = 0;
 
 	assert(size <= cp_dma_max_byte_count(sctx));
@@ -146,7 +145,7 @@
 	 * DMA request, however, the CP will see the sync flag and still wait
 	 * for all DMAs to complete.
 	 */
-	si_emit_cp_dma(sctx, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS);
+	si_emit_cp_dma(sctx, sctx->gfx_cs, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS);
 }
 
 static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
@@ -176,11 +175,11 @@
 	if (!(user_flags & SI_CPDMA_SKIP_BO_LIST_UPDATE)) {
 		if (dst)
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-						  r600_resource(dst),
+						  si_resource(dst),
 						  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 		if (src)
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-						  r600_resource(src),
+						  si_resource(src),
 						  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
 	}
 
@@ -190,7 +189,8 @@
 	if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->flags)
 		si_emit_cache_flush(sctx);
 
-	if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first)
+	if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first &&
+	    !(*packet_flags & CP_DMA_CLEAR))
 		*packet_flags |= CP_DMA_RAW_WAIT;
 
 	*is_first = false;
@@ -207,13 +207,13 @@
 	}
 }
 
-void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
-			    uint64_t offset, uint64_t size, unsigned value,
-			    enum si_coherency coher,
-			    enum si_cache_policy cache_policy)
+void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
+			    struct pipe_resource *dst, uint64_t offset,
+			    uint64_t size, unsigned value, unsigned user_flags,
+			    enum si_coherency coher, enum si_cache_policy cache_policy)
 {
-	struct r600_resource *rdst = r600_resource(dst);
-	uint64_t va = (rdst ? rdst->gpu_address : 0) + offset;
+	struct si_resource *sdst = si_resource(dst);
+	uint64_t va = (sdst ? sdst->gpu_address : 0) + offset;
 	bool is_first = true;
 
 	assert(size && size % 4 == 0);
@@ -221,30 +221,32 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	if (rdst)
-		util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+	if (sdst)
+		util_range_add(&sdst->valid_buffer_range, offset, offset + size);
 
 	/* Flush the caches. */
-	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-		       SI_CONTEXT_CS_PARTIAL_FLUSH |
-		       si_get_flush_flags(sctx, coher, cache_policy);
+	if (sdst && !(user_flags & SI_CPDMA_SKIP_GFX_SYNC)) {
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+			       SI_CONTEXT_CS_PARTIAL_FLUSH |
+			       si_get_flush_flags(sctx, coher, cache_policy);
+	}
 
 	while (size) {
 		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
-		unsigned dma_flags = CP_DMA_CLEAR | (rdst ? 0 : CP_DMA_DST_IS_GDS);
+		unsigned dma_flags = CP_DMA_CLEAR | (sdst ? 0 : CP_DMA_DST_IS_GDS);
 
-		si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, coher,
-				  &is_first, &dma_flags);
+		si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, user_flags,
+				  coher, &is_first, &dma_flags);
 
 		/* Emit the clear packet. */
-		si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, cache_policy);
+		si_emit_cp_dma(sctx, cs, va, value, byte_count, dma_flags, cache_policy);
 
 		size -= byte_count;
 		va += byte_count;
 	}
 
-	if (rdst && cache_policy != L2_BYPASS)
-		rdst->TC_L2_dirty = true;
+	if (sdst && cache_policy != L2_BYPASS)
+		sdst->TC_L2_dirty = true;
 
 	/* If it's not a framebuffer fast clear... */
 	if (coher == SI_COHERENCY_SHADER)
@@ -273,7 +275,7 @@
 	 */
 	if (!sctx->scratch_buffer ||
 	    sctx->scratch_buffer->b.b.width0 < scratch_size) {
-		r600_resource_reference(&sctx->scratch_buffer, NULL);
+		si_resource_reference(&sctx->scratch_buffer, NULL);
 		sctx->scratch_buffer =
 			si_aligned_buffer_create(&sctx->screen->b,
 						   SI_RESOURCE_FLAG_UNMAPPABLE,
@@ -290,7 +292,7 @@
 			  coher, is_first, &dma_flags);
 
 	va = sctx->scratch_buffer->gpu_address;
-	si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
+	si_emit_cp_dma(sctx, sctx->gfx_cs, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
 		       cache_policy);
 }
 
@@ -321,14 +323,14 @@
 			/* Mark the buffer range of destination as valid (initialized),
 			 * so that transfer_map knows it should wait for the GPU when mapping
 			 * that range. */
-			util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+			util_range_add(&si_resource(dst)->valid_buffer_range, dst_offset,
 				       dst_offset + size);
 		}
 
-		dst_offset += r600_resource(dst)->gpu_address;
+		dst_offset += si_resource(dst)->gpu_address;
 	}
 	if (src)
-		src_offset += r600_resource(src)->gpu_address;
+		src_offset += si_resource(src)->gpu_address;
 
 	/* The workarounds aren't needed on Fiji and beyond. */
 	if (sctx->family <= CHIP_CARRIZO ||
@@ -373,7 +375,7 @@
 				  size + skipped_size + realign_size,
 				  user_flags, coher, &is_first, &dma_flags);
 
-		si_emit_cp_dma(sctx, main_dst_offset, main_src_offset,
+		si_emit_cp_dma(sctx, sctx->gfx_cs, main_dst_offset, main_src_offset,
 			       byte_count, dma_flags, cache_policy);
 
 		size -= byte_count;
@@ -389,7 +391,7 @@
 				  skipped_size + realign_size, user_flags,
 				  coher, &is_first, &dma_flags);
 
-		si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
+		si_emit_cp_dma(sctx, sctx->gfx_cs, dst_offset, src_offset, skipped_size,
 			       dma_flags, cache_policy);
 	}
 
@@ -400,7 +402,7 @@
 	}
 
 	if (dst && cache_policy != L2_BYPASS)
-		r600_resource(dst)->TC_L2_dirty = true;
+		si_resource(dst)->TC_L2_dirty = true;
 
 	/* If it's not a prefetch or GDS copy... */
 	if (dst && src && (dst != src || dst_offset != src_offset))
@@ -553,11 +555,11 @@
 
 	src = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16);
 	dst = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16);
-	si_cp_dma_clear_buffer(sctx, src, 0, 4, 0xabcdef01, SI_COHERENCY_SHADER, L2_BYPASS);
-	si_cp_dma_clear_buffer(sctx, src, 4, 4, 0x23456789, SI_COHERENCY_SHADER, L2_BYPASS);
-	si_cp_dma_clear_buffer(sctx, src, 8, 4, 0x87654321, SI_COHERENCY_SHADER, L2_BYPASS);
-	si_cp_dma_clear_buffer(sctx, src, 12, 4, 0xfedcba98, SI_COHERENCY_SHADER, L2_BYPASS);
-	si_cp_dma_clear_buffer(sctx, dst, 0, 16, 0xdeadbeef, SI_COHERENCY_SHADER, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 0, 4, 0xabcdef01, 0, SI_COHERENCY_SHADER, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 4, 4, 0x23456789, 0, SI_COHERENCY_SHADER, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 8, 4, 0x87654321, 0, SI_COHERENCY_SHADER, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 12, 4, 0xfedcba98, 0, SI_COHERENCY_SHADER, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, 16, 0xdeadbeef, 0, SI_COHERENCY_SHADER, L2_BYPASS);
 
 	si_cp_dma_copy_buffer(sctx, NULL, src, offset, 0, 16, 0, SI_COHERENCY_NONE, L2_BYPASS);
 	si_cp_dma_copy_buffer(sctx, dst, NULL, 0, offset, 16, 0, SI_COHERENCY_NONE, L2_BYPASS);
@@ -567,7 +569,7 @@
 			r[0] == 0xabcdef01 && r[1] == 0x23456789 &&
 			r[2] == 0x87654321 && r[3] == 0xfedcba98 ? "pass" : "fail");
 
-	si_cp_dma_clear_buffer(sctx, NULL, offset, 16, 0xc1ea4146, SI_COHERENCY_NONE, L2_BYPASS);
+	si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, NULL, offset, 16, 0xc1ea4146, 0, SI_COHERENCY_NONE, L2_BYPASS);
 	si_cp_dma_copy_buffer(sctx, dst, NULL, 0, offset, 16, 0, SI_COHERENCY_NONE, L2_BYPASS);
 
 	pipe_buffer_read(ctx, dst, 0, sizeof(r), r);
@@ -579,3 +581,28 @@
 	pipe_resource_reference(&dst, NULL);
 	exit(0);
 }
+
+void si_cp_write_data(struct si_context *sctx, struct si_resource *buf,
+		      unsigned offset, unsigned size, unsigned dst_sel,
+		      unsigned engine, const void *data)
+{
+	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+	assert(offset % 4 == 0);
+	assert(size % 4 == 0);
+
+	if (sctx->chip_class == SI && dst_sel == V_370_MEM)
+		dst_sel = V_370_MEM_GRBM;
+
+	radeon_add_to_buffer_list(sctx, cs, buf,
+				  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+	uint64_t va = buf->gpu_address + offset;
+
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size/4, 0));
+	radeon_emit(cs, S_370_DST_SEL(dst_sel) |
+		    S_370_WR_CONFIRM(1) |
+		    S_370_ENGINE_SEL(engine));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit_array(cs, (const uint32_t*)data, size/4);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_debug.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_debug.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_debug.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -93,7 +93,7 @@
 void si_destroy_saved_cs(struct si_saved_cs *scs)
 {
 	si_clear_saved_cs(&scs->gfx);
-	r600_resource_reference(&scs->trace_buf, NULL);
+	si_resource_reference(&scs->trace_buf, NULL);
 	free(scs);
 }
 
@@ -612,7 +612,7 @@
 	uint32_t *gpu_list;
 	/** Reference of buffer where the list is uploaded, so that gpu_list
 	 * is kept live. */
-	struct r600_resource *buf;
+	struct si_resource *buf;
 
 	const char *shader_name;
 	const char *elem_name;
@@ -628,7 +628,7 @@
 si_log_chunk_desc_list_destroy(void *data)
 {
 	struct si_log_chunk_desc_list *chunk = data;
-	r600_resource_reference(&chunk->buf, NULL);
+	si_resource_reference(&chunk->buf, NULL);
 	FREE(chunk);
 }
 
@@ -747,7 +747,7 @@
 	chunk->slot_remap = slot_remap;
 	chunk->chip_class = screen->info.chip_class;
 
-	r600_resource_reference(&chunk->buf, desc->buffer);
+	si_resource_reference(&chunk->buf, desc->buffer);
 	chunk->gpu_list = desc->gpu_list;
 
 	for (unsigned i = 0; i < num_elements; ++i) {
@@ -1052,23 +1052,30 @@
 
 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
 {
+	struct si_shader_ctx_state *tcs_shader;
+
 	if (!log)
 		return;
 
+	tcs_shader = &sctx->tcs_shader;
+	if (sctx->tes_shader.cso && !sctx->tcs_shader.cso)
+		tcs_shader = &sctx->fixed_func_tcs_shader;
+
 	si_dump_framebuffer(sctx, log);
 
 	si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
-	si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
+	si_dump_gfx_shader(sctx, tcs_shader, log);
 	si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
 	si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
 	si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
 
 	si_dump_descriptor_list(sctx->screen,
 				&sctx->descriptors[SI_DESCS_RW_BUFFERS],
-				"", "RW buffers", 4, SI_NUM_RW_BUFFERS,
+				"", "RW buffers", 4,
+				sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots,
 				si_identity, log);
 	si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
-	si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
+	si_dump_gfx_descriptors(sctx, tcs_shader, log);
 	si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
 	si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
 	si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_descriptors.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_descriptors.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_descriptors.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_descriptors.c	2019-03-31 23:16:37.000000000 +0000
@@ -134,7 +134,7 @@
 
 static void si_release_descriptors(struct si_descriptors *desc)
 {
-	r600_resource_reference(&desc->buffer, NULL);
+	si_resource_reference(&desc->buffer, NULL);
 	FREE(desc->list);
 }
 
@@ -159,7 +159,7 @@
 						   desc->element_dw_size];
 
 		/* The buffer is already in the buffer list. */
-		r600_resource_reference(&desc->buffer, NULL);
+		si_resource_reference(&desc->buffer, NULL);
 		desc->gpu_list = NULL;
 		desc->gpu_address = si_desc_extract_buffer_address(descriptor);
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
@@ -209,7 +209,7 @@
 /* SAMPLER VIEWS */
 
 static inline enum radeon_bo_priority
-si_get_sampler_view_priority(struct r600_resource *res)
+si_get_sampler_view_priority(struct si_resource *res)
 {
 	if (res->b.b.target == PIPE_BUFFER)
 		return RADEON_PRIO_SAMPLER_BUFFER;
@@ -290,7 +290,7 @@
 }
 
 /* Set buffer descriptor fields that can be changed by reallocations. */
-static void si_set_buf_desc_address(struct r600_resource *buf,
+static void si_set_buf_desc_address(struct si_resource *buf,
 				    uint64_t offset, uint32_t *state)
 {
 	uint64_t va = buf->gpu_address + offset;
@@ -497,7 +497,7 @@
 				bool disallow_early_out)
 {
 	struct si_samplers *samplers = &sctx->samplers[shader];
-	struct si_sampler_view *rview = (struct si_sampler_view*)view;
+	struct si_sampler_view *sview = (struct si_sampler_view*)view;
 	struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
 	unsigned desc_slot = si_get_sampler_slot(slot);
 	uint32_t *desc = descs->list + desc_slot * 16;
@@ -508,7 +508,7 @@
 	if (view) {
 		struct si_texture *tex = (struct si_texture *)view->texture;
 
-		si_set_sampler_view_desc(sctx, rview,
+		si_set_sampler_view_desc(sctx, sview,
 					 samplers->sampler_states[slot], desc);
 
 		if (tex->buffer.b.b.target == PIPE_BUFFER) {
@@ -539,7 +539,7 @@
 		 * updated. */
 		si_sampler_view_add_buffer(sctx, view->texture,
 					   RADEON_USAGE_READ,
-					   rview->is_stencil_sampler, true);
+					   sview->is_stencil_sampler, true);
 	} else {
 		pipe_sampler_view_reference(&samplers->views[slot], NULL);
 		memcpy(desc, null_texture_descriptor, 8*4);
@@ -667,7 +667,7 @@
 static void
 si_mark_image_range_valid(const struct pipe_image_view *view)
 {
-	struct r600_resource *res = r600_resource(view->resource);
+	struct si_resource *res = si_resource(view->resource);
 
 	assert(res && res->b.b.target == PIPE_BUFFER);
 
@@ -682,9 +682,9 @@
 				     uint32_t *desc, uint32_t *fmask_desc)
 {
 	struct si_screen *screen = ctx->screen;
-	struct r600_resource *res;
+	struct si_resource *res;
 
-	res = r600_resource(view->resource);
+	res = si_resource(view->resource);
 
 	if (res->b.b.target == PIPE_BUFFER) {
 		if (view->access & PIPE_IMAGE_ACCESS_WRITE)
@@ -771,7 +771,7 @@
 {
 	struct si_images *images = &ctx->images[shader];
 	struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
-	struct r600_resource *res;
+	struct si_resource *res;
 	unsigned desc_slot = si_get_image_slot(slot);
 	uint32_t *desc = descs->list + desc_slot * 8;
 
@@ -780,7 +780,7 @@
 		return;
 	}
 
-	res = r600_resource(view->resource);
+	res = si_resource(view->resource);
 
 	if (&images->views[slot] != view)
 		util_copy_image_view(&images->views[slot], view);
@@ -1026,7 +1026,7 @@
 		int i = u_bit_scan(&mask);
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-			r600_resource(buffers->buffers[i]),
+			si_resource(buffers->buffers[i]),
 			i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
 						    buffers->shader_usage_constbuf,
 			i < SI_NUM_SHADER_BUFFERS ? buffers->priority :
@@ -1041,7 +1041,7 @@
 {
 	pipe_resource_reference(buf, buffers->buffers[idx]);
 	if (*buf) {
-		struct r600_resource *res = r600_resource(*buf);
+		struct si_resource *res = si_resource(*buf);
 		const uint32_t *desc = descs->list + idx * 4;
 		uint64_t va;
 
@@ -1071,7 +1071,7 @@
 			continue;
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-				      r600_resource(sctx->vertex_buffer[vb].buffer.resource),
+				      si_resource(sctx->vertex_buffer[vb].buffer.resource),
 				      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 	}
 
@@ -1126,22 +1126,22 @@
 
 	for (i = 0; i < count; i++) {
 		struct pipe_vertex_buffer *vb;
-		struct r600_resource *rbuffer;
+		struct si_resource *buf;
 		unsigned vbo_index = velems->vertex_buffer_index[i];
 		uint32_t *desc = &ptr[i*4];
 
 		vb = &sctx->vertex_buffer[vbo_index];
-		rbuffer = r600_resource(vb->buffer.resource);
-		if (!rbuffer) {
+		buf = si_resource(vb->buffer.resource);
+		if (!buf) {
 			memset(desc, 0, 16);
 			continue;
 		}
 
 		int64_t offset = (int64_t)((int)vb->buffer_offset) +
 				 velems->src_offset[i];
-		uint64_t va = rbuffer->gpu_address + offset;
+		uint64_t va = buf->gpu_address + offset;
 
-		int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
+		int64_t num_records = (int64_t)buf->b.b.width0 - offset;
 		if (sctx->chip_class != VI && vb->stride) {
 			/* Round up by rounding down and adding 1 */
 			num_records = (num_records - velems->format_size[i]) /
@@ -1157,7 +1157,7 @@
 
 		if (first_vb_use_mask & (1 << i)) {
 			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-					      r600_resource(vb->buffer.resource),
+					      si_resource(vb->buffer.resource),
 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 		}
 	}
@@ -1189,7 +1189,7 @@
 	return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
 }
 
-void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
+void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset)
 {
 	void *tmp;
@@ -1197,8 +1197,8 @@
 	u_upload_alloc(sctx->b.const_uploader, 0, size,
 		       si_optimal_tcc_alignment(sctx, size),
 		       const_offset,
-		       (struct pipe_resource**)rbuffer, &tmp);
-	if (*rbuffer)
+		       (struct pipe_resource**)buf, &tmp);
+	if (*buf)
 		util_memcpy_cpu_to_le32(tmp, ptr, size);
 }
 
@@ -1226,19 +1226,17 @@
 			unsigned buffer_offset;
 
 			si_upload_const_buffer(sctx,
-					       (struct r600_resource**)&buffer, input->user_buffer,
+					       (struct si_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
 			if (!buffer) {
 				/* Just unbind on failure. */
 				si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
 				return;
 			}
-			va = r600_resource(buffer)->gpu_address + buffer_offset;
+			va = si_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
-			va = r600_resource(buffer)->gpu_address + input->buffer_offset;
-			/* Only track usage for non-user buffers. */
-			r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
+			va = si_resource(buffer)->gpu_address + input->buffer_offset;
 		}
 
 		/* Set the descriptor. */
@@ -1256,7 +1254,7 @@
 
 		buffers->buffers[slot] = buffer;
 		radeon_add_to_gfx_buffer_list_check_mem(sctx,
-							r600_resource(buffer),
+							si_resource(buffer),
 							buffers->shader_usage_constbuf,
 							buffers->priority_constbuf, true);
 		buffers->enabled_mask |= 1u << slot;
@@ -1269,13 +1267,6 @@
 	sctx->descriptors_dirty |= 1u << descriptors_idx;
 }
 
-void si_set_rw_buffer(struct si_context *sctx,
-		      uint slot, const struct pipe_constant_buffer *input)
-{
-	si_set_constant_buffer(sctx, &sctx->rw_buffers,
-			                        SI_DESCS_RW_BUFFERS, slot, input);
-}
-
 static void si_pipe_set_constant_buffer(struct pipe_context *ctx,
 					enum pipe_shader_type shader, uint slot,
 					const struct pipe_constant_buffer *input)
@@ -1286,11 +1277,14 @@
 		return;
 
 	if (slot == 0 && input && input->buffer &&
-	    !(r600_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {
+	    !(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {
 		assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader");
 		return;
 	}
 
+	if (input && input->buffer)
+		si_resource(input->buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
+
 	slot = si_get_constbuf_slot(slot);
 	si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
 			       si_const_and_shader_buffer_descriptors_idx(shader),
@@ -1310,6 +1304,49 @@
 
 /* SHADER BUFFERS */
 
+static void si_set_shader_buffer(struct si_context *sctx,
+				 struct si_buffer_resources *buffers,
+				 unsigned descriptors_idx,
+				 uint slot, const struct pipe_shader_buffer *sbuffer,
+				 enum radeon_bo_priority priority)
+{
+	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
+	uint32_t *desc = descs->list + slot * 4;
+
+	if (!sbuffer || !sbuffer->buffer) {
+		pipe_resource_reference(&buffers->buffers[slot], NULL);
+		memset(desc, 0, sizeof(uint32_t) * 4);
+		buffers->enabled_mask &= ~(1u << slot);
+		sctx->descriptors_dirty |= 1u << descriptors_idx;
+		return;
+	}
+
+	struct si_resource *buf = si_resource(sbuffer->buffer);
+	uint64_t va = buf->gpu_address + sbuffer->buffer_offset;
+
+	desc[0] = va;
+	desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
+		  S_008F04_STRIDE(0);
+	desc[2] = sbuffer->buffer_size;
+	desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+		  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+		  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+		  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+		  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+		  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+	pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+	radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
+						buffers->shader_usage,
+						priority, true);
+
+	buffers->enabled_mask |= 1u << slot;
+	sctx->descriptors_dirty |= 1u << descriptors_idx;
+
+	util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
+		       sbuffer->buffer_offset + sbuffer->buffer_size);
+}
+
 static void si_set_shader_buffers(struct pipe_context *ctx,
 				  enum pipe_shader_type shader,
 				  unsigned start_slot, unsigned count,
@@ -1317,53 +1354,20 @@
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
-	struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
+	unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader);
 	unsigned i;
 
 	assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
 
 	for (i = 0; i < count; ++i) {
 		const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
-		struct r600_resource *buf;
 		unsigned slot = si_get_shaderbuf_slot(start_slot + i);
-		uint32_t *desc = descs->list + slot * 4;
-		uint64_t va;
 
-		if (!sbuffer || !sbuffer->buffer) {
-			pipe_resource_reference(&buffers->buffers[slot], NULL);
-			memset(desc, 0, sizeof(uint32_t) * 4);
-			buffers->enabled_mask &= ~(1u << slot);
-			sctx->descriptors_dirty |=
-				1u << si_const_and_shader_buffer_descriptors_idx(shader);
-			continue;
-		}
-
-		buf = r600_resource(sbuffer->buffer);
-		va = buf->gpu_address + sbuffer->buffer_offset;
-
-		desc[0] = va;
-		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
-			  S_008F04_STRIDE(0);
-		desc[2] = sbuffer->buffer_size;
-		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-			  S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+		if (sbuffer && sbuffer->buffer)
+			si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER;
 
-		pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
-		radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
-							buffers->shader_usage,
-							buffers->priority, true);
-		buf->bind_history |= PIPE_BIND_SHADER_BUFFER;
-
-		buffers->enabled_mask |= 1u << slot;
-		sctx->descriptors_dirty |=
-			1u << si_const_and_shader_buffer_descriptors_idx(shader);
-
-		util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
-			       sbuffer->buffer_offset + sbuffer->buffer_size);
+		si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer,
+				     buffers->priority);
 	}
 }
 
@@ -1386,6 +1390,20 @@
 
 /* RING BUFFERS */
 
+void si_set_rw_buffer(struct si_context *sctx,
+		      uint slot, const struct pipe_constant_buffer *input)
+{
+	si_set_constant_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS,
+			       slot, input);
+}
+
+void si_set_rw_shader_buffer(struct si_context *sctx, uint slot,
+			     const struct pipe_shader_buffer *sbuffer)
+{
+	si_set_shader_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS,
+			     slot, sbuffer, RADEON_PRIO_SHADER_RW_BUFFER);
+}
+
 void si_set_ring_buffer(struct si_context *sctx, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
@@ -1404,7 +1422,7 @@
 	if (buffer) {
 		uint64_t va;
 
-		va = r600_resource(buffer)->gpu_address + offset;
+		va = si_resource(buffer)->gpu_address + offset;
 
 		switch (element_size) {
 		default:
@@ -1468,7 +1486,7 @@
 
 		pipe_resource_reference(&buffers->buffers[slot], buffer);
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-				      r600_resource(buffer),
+				      si_resource(buffer),
 				      buffers->shader_usage, buffers->priority);
 		buffers->enabled_mask |= 1u << slot;
 	} else {
@@ -1490,7 +1508,7 @@
 	uint64_t offset_within_buffer = old_desc_va - old_buf_va;
 
 	/* Update the descriptor. */
-	si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer,
+	si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer,
 				desc);
 }
 
@@ -1593,7 +1611,7 @@
 			sctx->descriptors_dirty |= 1u << descriptors_idx;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								r600_resource(buf),
+								si_resource(buf),
 								usage, priority, true);
 		}
 	}
@@ -1605,7 +1623,7 @@
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 		      uint64_t old_va)
 {
-	struct r600_resource *rbuffer = r600_resource(buf);
+	struct si_resource *buffer = si_resource(buf);
 	unsigned i, shader;
 	unsigned num_elems = sctx->vertex_elements ?
 				       sctx->vertex_elements->count : 0;
@@ -1617,7 +1635,7 @@
 	 */
 
 	/* Vertex buffers. */
-	if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+	if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
 		for (i = 0; i < num_elems; i++) {
 			int vb = sctx->vertex_elements->vertex_buffer_index[i];
 
@@ -1634,7 +1652,7 @@
 	}
 
 	/* Streamout buffers. (other internal buffers can't be invalidated) */
-	if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+	if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
 		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
 			struct si_buffer_resources *buffers = &sctx->rw_buffers;
 			struct si_descriptors *descs =
@@ -1648,7 +1666,7 @@
 			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								rbuffer, buffers->shader_usage,
+								buffer, buffers->shader_usage,
 								RADEON_PRIO_SHADER_RW_BUFFER,
 								true);
 
@@ -1662,7 +1680,7 @@
 	}
 
 	/* Constant and shader buffers. */
-	if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+	if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
@@ -1672,7 +1690,7 @@
 						  sctx->const_and_shader_buffers[shader].priority_constbuf);
 	}
 
-	if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
+	if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
@@ -1682,7 +1700,7 @@
 						  sctx->const_and_shader_buffers[shader].priority);
 	}
 
-	if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+	if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_samplers *samplers = &sctx->samplers[shader];
@@ -1702,7 +1720,7 @@
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_gfx_buffer_list_check_mem(sctx,
-									    rbuffer, RADEON_USAGE_READ,
+									    buffer, RADEON_USAGE_READ,
 									    RADEON_PRIO_SAMPLER_BUFFER,
 									    true);
 				}
@@ -1711,7 +1729,7 @@
 	}
 
 	/* Shader images */
-	if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
+	if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
 		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
 			struct si_images *images = &sctx->images[shader];
 			struct si_descriptors *descs =
@@ -1734,7 +1752,7 @@
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_gfx_buffer_list_check_mem(
-						sctx, rbuffer,
+						sctx, buffer,
 						RADEON_USAGE_READWRITE,
 						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
@@ -1743,7 +1761,7 @@
 	}
 
 	/* Bindless texture handles */
-	if (rbuffer->texture_handle_allocated) {
+	if (buffer->texture_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;
 
 		util_dynarray_foreach(&sctx->resident_tex_handles,
@@ -1752,7 +1770,7 @@
 			unsigned desc_slot = (*tex_handle)->desc_slot;
 
 			if (view->texture == buf) {
-				si_set_buf_desc_address(rbuffer,
+				si_set_buf_desc_address(buffer,
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1761,7 +1779,7 @@
 				sctx->bindless_descriptors_dirty = true;
 
 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, rbuffer,
+					sctx, buffer,
 					RADEON_USAGE_READ,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
@@ -1769,7 +1787,7 @@
 	}
 
 	/* Bindless image handles */
-	if (rbuffer->image_handle_allocated) {
+	if (buffer->image_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;
 
 		util_dynarray_foreach(&sctx->resident_img_handles,
@@ -1781,7 +1799,7 @@
 				if (view->access & PIPE_IMAGE_ACCESS_WRITE)
 					si_mark_image_range_valid(view);
 
-				si_set_buf_desc_address(rbuffer,
+				si_set_buf_desc_address(buffer,
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1790,7 +1808,7 @@
 				sctx->bindless_descriptors_dirty = true;
 
 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, rbuffer,
+					sctx, buffer,
 					RADEON_USAGE_READWRITE,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
@@ -1803,7 +1821,6 @@
 					  unsigned num_dwords)
 {
 	struct si_descriptors *desc = &sctx->bindless_descriptors;
-	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	unsigned desc_slot_offset = desc_slot * 16;
 	uint32_t *data;
 	uint64_t va;
@@ -1811,13 +1828,8 @@
 	data = desc->list + desc_slot_offset;
 	va = desc->gpu_address + desc_slot_offset * 4;
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit_array(cs, data, num_dwords);
+	si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address,
+			 num_dwords * 4, V_370_TC_L2, V_370_ME, data);
 }
 
 static void si_upload_bindless_descriptors(struct si_context *sctx)
@@ -2055,7 +2067,7 @@
 					unsigned sh_offset,
 					unsigned pointer_count)
 {
-	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0));
+	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0));
 	radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
 }
 
@@ -2065,10 +2077,7 @@
 {
 	radeon_emit(cs, va);
 
-	if (HAVE_32BIT_POINTERS)
-		assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
-	else
-		radeon_emit(cs, va >> 32);
+	assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
 }
 
 static void si_emit_shader_pointer(struct si_context *sctx,
@@ -2106,25 +2115,6 @@
 	}
 }
 
-static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
-					     unsigned pointer_mask,
-					     unsigned sh_base)
-{
-	if (!sh_base)
-		return;
-
-	struct radeon_cmdbuf *cs = sctx->gfx_cs;
-	unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
-
-	while (mask) {
-		struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)];
-		unsigned sh_offset = sh_base + descs->shader_userdata_offset;
-
-		si_emit_shader_pointer_head(cs, sh_offset, 1);
-		si_emit_shader_pointer_body(sctx->screen, cs, descs->gpu_address);
-	}
-}
-
 static void si_emit_global_shader_pointers(struct si_context *sctx,
 					   struct si_descriptors *descs)
 {
@@ -2164,17 +2154,10 @@
 					    sh_base[PIPE_SHADER_TESS_EVAL]);
 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
 					    sh_base[PIPE_SHADER_FRAGMENT]);
-	if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) {
-		si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
-						    sh_base[PIPE_SHADER_TESS_CTRL]);
-		si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
-						    sh_base[PIPE_SHADER_GEOMETRY]);
-	} else {
-		si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
-						 sh_base[PIPE_SHADER_TESS_CTRL]);
-		si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
-						 sh_base[PIPE_SHADER_GEOMETRY]);
-	}
+	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
+					    sh_base[PIPE_SHADER_TESS_CTRL]);
+	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
+					    sh_base[PIPE_SHADER_GEOMETRY]);
 
 	sctx->shader_pointers_dirty &=
 		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
@@ -2313,7 +2296,7 @@
 						 bool *desc_dirty)
 {
 	struct si_descriptors *desc = &sctx->bindless_descriptors;
-	struct r600_resource *buf = r600_resource(resource);
+	struct si_resource *buf = si_resource(resource);
 	unsigned desc_slot_offset = desc_slot * 16;
 	uint32_t *desc_list = desc->list + desc_slot_offset + 4;
 	uint64_t old_desc_va;
@@ -2379,7 +2362,7 @@
 
 	pipe_sampler_view_reference(&tex_handle->view, view);
 
-	r600_resource(sview->base.texture)->texture_handle_allocated = true;
+	si_resource(sview->base.texture)->texture_handle_allocated = true;
 
 	return handle;
 }
@@ -2525,7 +2508,7 @@
 
 	util_copy_image_view(&img_handle->view, view);
 
-	r600_resource(view->resource)->image_handle_allocated = true;
+	si_resource(view->resource)->image_handle_allocated = true;
 
 	return handle;
 }
@@ -2555,7 +2538,7 @@
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_image_handle *img_handle;
 	struct pipe_image_view *view;
-	struct r600_resource *res;
+	struct si_resource *res;
 	struct hash_entry *entry;
 
 	entry = _mesa_hash_table_search(sctx->img_handles,
@@ -2565,7 +2548,7 @@
 
 	img_handle = (struct si_image_handle *)entry->data;
 	view = &img_handle->view;
-	res = r600_resource(view->resource);
+	res = si_resource(view->resource);
 
 	if (resident) {
 		if (res->b.b.target != PIPE_BUFFER) {
@@ -2665,10 +2648,6 @@
 {
 	int i;
 
-#if !HAVE_32BIT_POINTERS
-	STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0);
-#endif
-
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		bool is_2nd = sctx->chip_class >= GFX9 &&
 				     (i == PIPE_SHADER_TESS_CTRL ||
@@ -2699,7 +2678,6 @@
 		desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
 
 		if (is_2nd) {
-#if HAVE_32BIT_POINTERS
 			if (i == PIPE_SHADER_TESS_CTRL) {
 				rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
 						 R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
@@ -2707,9 +2685,6 @@
 				rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
 						 R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
 			}
-#else
-			rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES;
-#endif
 		} else {
 			rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
 		}
@@ -2831,7 +2806,7 @@
 	for (i = 0; i < SI_NUM_DESCS; ++i)
 		si_release_descriptors(&sctx->descriptors[i]);
 
-	r600_resource_reference(&sctx->vb_descriptors_buffer, NULL);
+	si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
 	sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */
 
 	si_release_bindless_descriptors(sctx);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma.c	2019-03-31 23:16:37.000000000 +0000
@@ -37,17 +37,17 @@
 {
 	struct radeon_cmdbuf *cs = ctx->dma_cs;
 	unsigned i, ncopy, count, max_size, sub_cmd, shift;
-	struct r600_resource *rdst = r600_resource(dst);
-	struct r600_resource *rsrc = r600_resource(src);
+	struct si_resource *sdst = si_resource(dst);
+	struct si_resource *ssrc = si_resource(src);
 
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, dst_offset,
+	util_range_add(&sdst->valid_buffer_range, dst_offset,
 		       dst_offset + size);
 
-	dst_offset += rdst->gpu_address;
-	src_offset += rsrc->gpu_address;
+	dst_offset += sdst->gpu_address;
+	src_offset += ssrc->gpu_address;
 
 	/* see whether we should use the dword-aligned or byte-aligned copy */
 	if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
@@ -61,7 +61,7 @@
 	}
 
 	ncopy = DIV_ROUND_UP(size, max_size);
-	si_need_dma_space(ctx, ncopy * 5, rdst, rsrc);
+	si_need_dma_space(ctx, ncopy * 5, sdst, ssrc);
 
 	for (i = 0; i < ncopy; i++) {
 		count = MIN2(size, max_size);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma_cs.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma_cs.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma_cs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -36,7 +36,7 @@
 		radeon_emit(cs, 0xf0000000); /* NOP */
 }
 
-void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
 			   uint64_t offset)
 {
 	struct radeon_cmdbuf *cs = sctx->dma_cs;
@@ -69,7 +69,7 @@
 {
 	struct radeon_cmdbuf *cs = sctx->dma_cs;
 	unsigned i, ncopy, csize;
-	struct r600_resource *rdst = r600_resource(dst);
+	struct si_resource *sdst = si_resource(dst);
 
 	assert(offset % 4 == 0);
 	assert(size);
@@ -83,14 +83,14 @@
 	/* Mark the buffer range of destination as valid (initialized),
 	 * so that transfer_map knows it should wait for the GPU when mapping
 	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+	util_range_add(&sdst->valid_buffer_range, offset, offset + size);
 
-	offset += rdst->gpu_address;
+	offset += sdst->gpu_address;
 
 	if (sctx->chip_class == SI) {
 		/* the same maximum size as for copying */
 		ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
-		si_need_dma_space(sctx, ncopy * 4, rdst, NULL);
+		si_need_dma_space(sctx, ncopy * 4, sdst, NULL);
 
 		for (i = 0; i < ncopy; i++) {
 			csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
@@ -108,7 +108,7 @@
 	/* The following code is for CI, VI, Vega/Raven, etc. */
 	/* the same maximum size as for copying */
 	ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-	si_need_dma_space(sctx, ncopy * 5, rdst, NULL);
+	si_need_dma_space(sctx, ncopy * 5, sdst, NULL);
 
 	for (i = 0; i < ncopy; i++) {
 		csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
@@ -124,7 +124,7 @@
 }
 
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
-		       struct r600_resource *dst, struct r600_resource *src)
+		       struct si_resource *dst, struct si_resource *src)
 {
 	uint64_t vram = ctx->dma_cs->used_vram;
 	uint64_t gtt = ctx->dma_cs->used_gart;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_fence.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_fence.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_fence.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_fence.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 #include "si_build_pm4.h"
 
 struct si_fine_fence {
-	struct r600_resource *buf;
+	struct si_resource *buf;
 	unsigned offset;
 };
 
@@ -69,7 +69,7 @@
 void si_cp_release_mem(struct si_context *ctx,
 		       unsigned event, unsigned event_flags,
 		       unsigned dst_sel, unsigned int_sel, unsigned data_sel,
-		       struct r600_resource *buf, uint64_t va,
+		       struct si_resource *buf, uint64_t va,
 		       uint32_t new_fence, unsigned query_type)
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
@@ -93,7 +93,7 @@
 		    query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
 		    query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
 		    query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
-			struct r600_resource *scratch = ctx->eop_bug_scratch;
+			struct si_resource *scratch = ctx->eop_bug_scratch;
 
 			assert(16 * ctx->screen->info.num_render_backends <=
 			       scratch->b.b.width0);
@@ -117,7 +117,7 @@
 	} else {
 		if (ctx->chip_class == CIK ||
 		    ctx->chip_class == VI) {
-			struct r600_resource *scratch = ctx->eop_bug_scratch;
+			struct si_resource *scratch = ctx->eop_bug_scratch;
 			uint64_t va = scratch->gpu_address;
 
 			/* Two EOP events are required to make all engines go idle
@@ -160,13 +160,11 @@
 	return dwords;
 }
 
-void si_cp_wait_mem(struct si_context *ctx,
+void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
 		    uint64_t va, uint32_t ref, uint32_t mask, unsigned flags)
 {
-	struct radeon_cmdbuf *cs = ctx->gfx_cs;
-
 	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1) | flags);
+	radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags);
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);
 	radeon_emit(cs, ref); /* reference value */
@@ -195,17 +193,17 @@
 			       struct pipe_fence_handle *src)
 {
 	struct radeon_winsys *ws = ((struct si_screen*)screen)->ws;
-	struct si_multi_fence **rdst = (struct si_multi_fence **)dst;
-	struct si_multi_fence *rsrc = (struct si_multi_fence *)src;
+	struct si_multi_fence **sdst = (struct si_multi_fence **)dst;
+	struct si_multi_fence *ssrc = (struct si_multi_fence *)src;
 
-	if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
-		ws->fence_reference(&(*rdst)->gfx, NULL);
-		ws->fence_reference(&(*rdst)->sdma, NULL);
-		tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL);
-		r600_resource_reference(&(*rdst)->fine.buf, NULL);
-		FREE(*rdst);
+	if (pipe_reference(&(*sdst)->reference, &ssrc->reference)) {
+		ws->fence_reference(&(*sdst)->gfx, NULL);
+		ws->fence_reference(&(*sdst)->sdma, NULL);
+		tc_unflushed_batch_token_reference(&(*sdst)->tc_token, NULL);
+		si_resource_reference(&(*sdst)->fine.buf, NULL);
+		FREE(*sdst);
 	}
-        *rdst = rsrc;
+        *sdst = ssrc;
 }
 
 static struct si_multi_fence *si_create_multi_fence()
@@ -261,24 +259,19 @@
 
 	*fence_ptr = 0;
 
-	uint64_t fence_va = fine->buf->gpu_address + fine->offset;
-
-	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
-				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 	if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
-		struct radeon_cmdbuf *cs = ctx->gfx_cs;
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
-			S_370_WR_CONFIRM(1) |
-			S_370_ENGINE_SEL(V_370_PFP));
-		radeon_emit(cs, fence_va);
-		radeon_emit(cs, fence_va >> 32);
-		radeon_emit(cs, 0x80000000);
+		uint32_t value = 0x80000000;
+
+		si_cp_write_data(ctx, fine->buf, fine->offset, 4,
+				 V_370_MEM, V_370_PFP, &value);
 	} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
+		uint64_t fence_va = fine->buf->gpu_address + fine->offset;
+
+		radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
+					  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 		si_cp_release_mem(ctx,
 				  V_028A90_BOTTOM_OF_PIPE_TS, 0,
-				  EOP_DST_SEL_MEM,
-				  EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  NULL, fence_va, 0x80000000,
 				  PIPE_QUERY_GPU_FINISHED);
@@ -293,15 +286,15 @@
 			       uint64_t timeout)
 {
 	struct radeon_winsys *rws = ((struct si_screen*)screen)->ws;
-	struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+	struct si_multi_fence *sfence = (struct si_multi_fence *)fence;
 	struct si_context *sctx;
 	int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
 
 	ctx = threaded_context_unwrap_sync(ctx);
 	sctx = (struct si_context*)(ctx ? ctx : NULL);
 
-	if (!util_queue_fence_is_signalled(&rfence->ready)) {
-		if (rfence->tc_token) {
+	if (!util_queue_fence_is_signalled(&sfence->ready)) {
+		if (sfence->tc_token) {
 			/* Ensure that si_flush_from_st will be called for
 			 * this fence, but only if we're in the API thread
 			 * where the context is current.
@@ -310,7 +303,7 @@
 			 * be in flight in the driver thread, so the fence
 			 * may not be ready yet when this call returns.
 			 */
-			threaded_context_flush(ctx, rfence->tc_token,
+			threaded_context_flush(ctx, sfence->tc_token,
 					       timeout == 0);
 		}
 
@@ -318,9 +311,9 @@
 			return false;
 
 		if (timeout == PIPE_TIMEOUT_INFINITE) {
-			util_queue_fence_wait(&rfence->ready);
+			util_queue_fence_wait(&sfence->ready);
 		} else {
-			if (!util_queue_fence_wait_timeout(&rfence->ready, abs_timeout))
+			if (!util_queue_fence_wait_timeout(&sfence->ready, abs_timeout))
 				return false;
 		}
 
@@ -330,8 +323,8 @@
 		}
 	}
 
-	if (rfence->sdma) {
-		if (!rws->fence_wait(rws, rfence->sdma, timeout))
+	if (sfence->sdma) {
+		if (!rws->fence_wait(rws, sfence->sdma, timeout))
 			return false;
 
 		/* Recompute the timeout after waiting. */
@@ -341,19 +334,19 @@
 		}
 	}
 
-	if (!rfence->gfx)
+	if (!sfence->gfx)
 		return true;
 
-	if (rfence->fine.buf &&
-	    si_fine_fence_signaled(rws, &rfence->fine)) {
-		rws->fence_reference(&rfence->gfx, NULL);
-		r600_resource_reference(&rfence->fine.buf, NULL);
+	if (sfence->fine.buf &&
+	    si_fine_fence_signaled(rws, &sfence->fine)) {
+		rws->fence_reference(&sfence->gfx, NULL);
+		si_resource_reference(&sfence->fine.buf, NULL);
 		return true;
 	}
 
 	/* Flush the gfx IB if it hasn't been flushed yet. */
-	if (sctx && rfence->gfx_unflushed.ctx == sctx &&
-	    rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
+	if (sctx && sfence->gfx_unflushed.ctx == sctx &&
+	    sfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
 		/* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
 		 * spec says:
 		 *
@@ -380,7 +373,7 @@
 				(timeout ? 0 : PIPE_FLUSH_ASYNC) |
 				 RADEON_FLUSH_START_NEXT_GFX_IB_NOW,
 				NULL);
-		rfence->gfx_unflushed.ctx = NULL;
+		sfence->gfx_unflushed.ctx = NULL;
 
 		if (!timeout)
 			return false;
@@ -392,13 +385,13 @@
 		}
 	}
 
-	if (rws->fence_wait(rws, rfence->gfx, timeout))
+	if (rws->fence_wait(rws, sfence->gfx, timeout))
 		return true;
 
 	/* Re-check in case the GPU is slow or hangs, but the commands before
 	 * the fine-grained fence have completed. */
-	if (rfence->fine.buf &&
-	    si_fine_fence_signaled(rws, &rfence->fine))
+	if (sfence->fine.buf &&
+	    si_fine_fence_signaled(rws, &sfence->fine))
 		return true;
 
 	return false;
@@ -410,12 +403,12 @@
 {
 	struct si_screen *sscreen = (struct si_screen*)ctx->screen;
 	struct radeon_winsys *ws = sscreen->ws;
-	struct si_multi_fence *rfence;
+	struct si_multi_fence *sfence;
 
 	*pfence = NULL;
 
-	rfence = si_create_multi_fence();
-	if (!rfence)
+	sfence = si_create_multi_fence();
+	if (!sfence)
 		return;
 
 	switch (type) {
@@ -423,14 +416,14 @@
 		if (!sscreen->info.has_fence_to_handle)
 			goto finish;
 
-		rfence->gfx = ws->fence_import_sync_file(ws, fd);
+		sfence->gfx = ws->fence_import_sync_file(ws, fd);
 		break;
 
 	case PIPE_FD_TYPE_SYNCOBJ:
 		if (!sscreen->info.has_syncobj)
 			goto finish;
 
-		rfence->gfx = ws->fence_import_syncobj(ws, fd);
+		sfence->gfx = ws->fence_import_syncobj(ws, fd);
 		break;
 
 	default:
@@ -438,12 +431,12 @@
 	}
 
 finish:
-	if (!rfence->gfx) {
-		FREE(rfence);
+	if (!sfence->gfx) {
+		FREE(sfence);
 		return;
 	}
 
-	*pfence = (struct pipe_fence_handle*)rfence;
+	*pfence = (struct pipe_fence_handle*)sfence;
 }
 
 static int si_fence_get_fd(struct pipe_screen *screen,
@@ -451,26 +444,26 @@
 {
 	struct si_screen *sscreen = (struct si_screen*)screen;
 	struct radeon_winsys *ws = sscreen->ws;
-	struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+	struct si_multi_fence *sfence = (struct si_multi_fence *)fence;
 	int gfx_fd = -1, sdma_fd = -1;
 
 	if (!sscreen->info.has_fence_to_handle)
 		return -1;
 
-	util_queue_fence_wait(&rfence->ready);
+	util_queue_fence_wait(&sfence->ready);
 
 	/* Deferred fences aren't supported. */
-	assert(!rfence->gfx_unflushed.ctx);
-	if (rfence->gfx_unflushed.ctx)
+	assert(!sfence->gfx_unflushed.ctx);
+	if (sfence->gfx_unflushed.ctx)
 		return -1;
 
-	if (rfence->sdma) {
-		sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma);
+	if (sfence->sdma) {
+		sdma_fd = ws->fence_export_sync_file(ws, sfence->sdma);
 		if (sdma_fd == -1)
 			return -1;
 	}
-	if (rfence->gfx) {
-		gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx);
+	if (sfence->gfx) {
+		gfx_fd = ws->fence_export_sync_file(ws, sfence->gfx);
 		if (gfx_fd == -1) {
 			if (sdma_fd != -1)
 				close(sdma_fd);
@@ -591,15 +584,15 @@
 				   struct pipe_fence_handle *fence)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+	struct si_multi_fence *sfence = (struct si_multi_fence *)fence;
 
 	/* We should have at least one syncobj to signal */
-	assert(rfence->sdma || rfence->gfx);
+	assert(sfence->sdma || sfence->gfx);
 
-	if (rfence->sdma)
-		si_add_syncobj_signal(sctx, rfence->sdma);
-	if (rfence->gfx)
-		si_add_syncobj_signal(sctx, rfence->gfx);
+	if (sfence->sdma)
+		si_add_syncobj_signal(sctx, sfence->sdma);
+	if (sfence->gfx)
+		si_add_syncobj_signal(sctx, sfence->gfx);
 
 	/**
 	 * The spec does not require a flush here. We insert a flush
@@ -618,13 +611,13 @@
 				 struct pipe_fence_handle *fence)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+	struct si_multi_fence *sfence = (struct si_multi_fence *)fence;
 
-	util_queue_fence_wait(&rfence->ready);
+	util_queue_fence_wait(&sfence->ready);
 
 	/* Unflushed fences from the same context are no-ops. */
-	if (rfence->gfx_unflushed.ctx &&
-	    rfence->gfx_unflushed.ctx == sctx)
+	if (sfence->gfx_unflushed.ctx &&
+	    sfence->gfx_unflushed.ctx == sctx)
 		return;
 
 	/* All unflushed commands will not start execution before
@@ -634,10 +627,10 @@
 	 */
 	si_flush_from_st(ctx, NULL, PIPE_FLUSH_ASYNC);
 
-	if (rfence->sdma)
-		si_add_fence_dependency(sctx, rfence->sdma);
-	if (rfence->gfx)
-		si_add_fence_dependency(sctx, rfence->gfx);
+	if (sfence->sdma)
+		si_add_fence_dependency(sctx, sfence->sdma);
+	if (sfence->gfx)
+		si_add_fence_dependency(sctx, sfence->gfx);
 }
 
 void si_init_fence_functions(struct si_context *ctx)
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_get.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_get.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_get.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_get.c	2019-03-31 23:16:37.000000000 +0000
@@ -254,6 +254,9 @@
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 		return 30;
 
+	case PIPE_CAP_MAX_VARYINGS:
+		return 32;
+
 	case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
 		return sscreen->info.chip_class <= VI ?
 			PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
@@ -455,15 +458,6 @@
 		    !sscreen->llvm_has_working_vgpr_indexing)
 			return 0;
 
-		/* Doing indirect indexing on GFX9 with LLVM 6.0 hangs.
-		 * This means we don't support INTERP instructions with
-		 * indirect indexing on inputs.
-		 */
-		if (shader == PIPE_SHADER_FRAGMENT &&
-		    !sscreen->llvm_has_working_vgpr_indexing &&
-		    HAVE_LLVM < 0x0700)
-			return 0;
-
 		/* TCS and TES load inputs directly from LDS or offchip
 		 * memory, so indirect indexing is always supported.
 		 * PS has to support indirect indexing, because we can't
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_gfx_cs.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_gfx_cs.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_gfx_cs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_gfx_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
 
 	/* There is no need to flush the DMA IB here, because
-	 * r600_need_dma_space always flushes the GFX IB if there is
+	 * si_need_dma_space always flushes the GFX IB if there is
 	 * a conflict, which means any unflushed DMA commands automatically
 	 * precede the GFX IB (= they had no dependency on the GFX IB when
 	 * they were submitted).
@@ -177,7 +177,7 @@
 
 	pipe_reference_init(&ctx->current_saved_cs->reference, 1);
 
-	ctx->current_saved_cs->trace_buf = r600_resource(
+	ctx->current_saved_cs->trace_buf = si_resource(
 		pipe_buffer_create(ctx->b.screen, 0, PIPE_USAGE_STAGING, 8));
 	if (!ctx->current_saved_cs->trace_buf) {
 		free(ctx->current_saved_cs);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_gpu_load.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_gpu_load.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_gpu_load.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_gpu_load.c	2019-03-31 23:16:37.000000000 +0000
@@ -213,8 +213,8 @@
 	}
 }
 
-#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
-				    rscreen->mmio_counters.array)
+#define BUSY_INDEX(sscreen, field) (&sscreen->mmio_counters.named.field.busy - \
+				    sscreen->mmio_counters.array)
 
 static unsigned busy_index_from_type(struct si_screen *sscreen,
 				     unsigned type)
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_perfcounter.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_perfcounter.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,6 +27,24 @@
 #include "util/u_memory.h"
 
 
+enum si_pc_block_flags {
+	/* This block is part of the shader engine */
+	SI_PC_BLOCK_SE = (1 << 0),
+
+	/* Expose per-instance groups instead of summing all instances (within
+	 * an SE). */
+	SI_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
+
+	/* Expose per-SE groups instead of summing instances across SEs. */
+	SI_PC_BLOCK_SE_GROUPS = (1 << 2),
+
+	/* Shader block */
+	SI_PC_BLOCK_SHADER = (1 << 3),
+
+	/* Non-shader block with perfcounters windowed by shaders. */
+	SI_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
+};
+
 enum si_pc_reg_layout {
 	/* All secondary selector dwords follow as one block after the primary
 	 * selector dwords for the counters that have secondary selectors.
@@ -69,12 +87,24 @@
 	unsigned layout;
 };
 
-struct si_pc_block {
+struct si_pc_block_gfxdescr {
 	struct si_pc_block_base *b;
 	unsigned selectors;
 	unsigned instances;
 };
 
+struct si_pc_block {
+	const struct si_pc_block_gfxdescr *b;
+	unsigned num_instances;
+
+	unsigned num_groups;
+	char *group_names;
+	unsigned group_name_stride;
+
+	char *selector_names;
+	unsigned selector_name_stride;
+};
+
 /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
  * performance counter group IDs.
  */
@@ -93,6 +123,42 @@
 	S_036780_CS_EN(1),
 };
 
+/* Max counters per HW block */
+#define SI_QUERY_MAX_COUNTERS 16
+
+#define SI_PC_SHADERS_WINDOWING (1 << 31)
+
+struct si_query_group {
+	struct si_query_group *next;
+	struct si_pc_block *block;
+	unsigned sub_gid; /* only used during init */
+	unsigned result_base; /* only used during init */
+	int se;
+	int instance;
+	unsigned num_counters;
+	unsigned selectors[SI_QUERY_MAX_COUNTERS];
+};
+
+struct si_query_counter {
+	unsigned base;
+	unsigned qwords;
+	unsigned stride; /* in uint64s */
+};
+
+struct si_query_pc {
+	struct si_query b;
+	struct si_query_buffer buffer;
+
+	/* Size of the results in memory, in bytes. */
+	unsigned result_size;
+
+	unsigned shaders;
+	unsigned num_counters;
+	struct si_query_counter *counters;
+	struct si_query_group *groups;
+};
+
+
 static struct si_pc_block_base cik_CB = {
 	.name = "CB",
 	.num_counters = 4,
@@ -344,7 +410,7 @@
  * blindly once it believes it has identified the hardware, so the order of
  * blocks here matters.
  */
-static struct si_pc_block groups_CIK[] = {
+static struct si_pc_block_gfxdescr groups_CIK[] = {
 	{ &cik_CB, 226},
 	{ &cik_CPF, 17 },
 	{ &cik_DB, 257},
@@ -371,7 +437,7 @@
 
 };
 
-static struct si_pc_block groups_VI[] = {
+static struct si_pc_block_gfxdescr groups_VI[] = {
 	{ &cik_CB, 405},
 	{ &cik_CPF, 19 },
 	{ &cik_DB, 257},
@@ -398,7 +464,7 @@
 
 };
 
-static struct si_pc_block groups_gfx9[] = {
+static struct si_pc_block_gfxdescr groups_gfx9[] = {
 	{ &cik_CB, 438},
 	{ &cik_CPF, 32 },
 	{ &cik_DB, 328},
@@ -422,6 +488,58 @@
 	{ &cik_CPC, 35 },
 };
 
+static bool si_pc_block_has_per_se_groups(const struct si_perfcounters *pc,
+					  const struct si_pc_block *block)
+{
+	return block->b->b->flags & SI_PC_BLOCK_SE_GROUPS ||
+	       (block->b->b->flags & SI_PC_BLOCK_SE && pc->separate_se);
+}
+
+static bool si_pc_block_has_per_instance_groups(const struct si_perfcounters *pc,
+						const struct si_pc_block *block)
+{
+	return block->b->b->flags & SI_PC_BLOCK_INSTANCE_GROUPS ||
+	       (block->num_instances > 1 && pc->separate_instance);
+}
+
+static struct si_pc_block *
+lookup_counter(struct si_perfcounters *pc, unsigned index,
+	       unsigned *base_gid, unsigned *sub_index)
+{
+	struct si_pc_block *block = pc->blocks;
+	unsigned bid;
+
+	*base_gid = 0;
+	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+		unsigned total = block->num_groups * block->b->selectors;
+
+		if (index < total) {
+			*sub_index = index;
+			return block;
+		}
+
+		index -= total;
+		*base_gid += block->num_groups;
+	}
+
+	return NULL;
+}
+
+static struct si_pc_block *
+lookup_group(struct si_perfcounters *pc, unsigned *index)
+{
+	unsigned bid;
+	struct si_pc_block *block = pc->blocks;
+
+	for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+		if (*index < block->num_groups)
+			return block;
+		*index -= block->num_groups;
+	}
+
+	return NULL;
+}
+
 static void si_pc_emit_instance(struct si_context *sctx,
 				int se, int instance)
 {
@@ -454,11 +572,10 @@
 }
 
 static void si_pc_emit_select(struct si_context *sctx,
-		        struct si_perfcounter_block *group,
+		        struct si_pc_block *block,
 		        unsigned count, unsigned *selectors)
 {
-	struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
-	struct si_pc_block_base *regs = sigroup->b;
+	struct si_pc_block_base *regs = block->b->b;
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	unsigned idx;
 	unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
@@ -550,7 +667,7 @@
 }
 
 static void si_pc_emit_start(struct si_context *sctx,
-			     struct r600_resource *buffer, uint64_t va)
+			     struct si_resource *buffer, uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
@@ -576,16 +693,15 @@
 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
  * do it again in here. */
 static void si_pc_emit_stop(struct si_context *sctx,
-			    struct r600_resource *buffer, uint64_t va)
+			    struct si_resource *buffer, uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
 	si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
-			  EOP_DST_SEL_MEM,
-			  EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+			  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 			  EOP_DATA_SEL_VALUE_32BIT,
 			  buffer, va, 0, SI_NOT_QUERY);
-	si_cp_wait_mem(sctx, va, 0, 0xffffffff, 0);
+	si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
 
 	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
@@ -597,12 +713,10 @@
 }
 
 static void si_pc_emit_read(struct si_context *sctx,
-			    struct si_perfcounter_block *group,
-			    unsigned count, unsigned *selectors,
-			    struct r600_resource *buffer, uint64_t va)
+			    struct si_pc_block *block,
+			    unsigned count, uint64_t va)
 {
-	struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
-	struct si_pc_block_base *regs = sigroup->b;
+	struct si_pc_block_base *regs = block->b->b;
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
 	unsigned idx;
 	unsigned reg = regs->counter0_lo;
@@ -642,16 +756,537 @@
 	}
 }
 
-static void si_pc_cleanup(struct si_screen *sscreen)
+static void si_pc_query_destroy(struct si_screen *sscreen,
+				struct si_query *squery)
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+
+	while (query->groups) {
+		struct si_query_group *group = query->groups;
+		query->groups = group->next;
+		FREE(group);
+	}
+
+	FREE(query->counters);
+
+	si_query_buffer_destroy(sscreen, &query->buffer);
+	FREE(query);
+}
+
+static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery)
+/*
+				   struct si_query_hw *hwquery,
+				   struct si_resource *buffer, uint64_t va)*/
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+	int current_se = -1;
+	int current_instance = -1;
+
+	if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size))
+		return;
+	si_need_gfx_cs_space(sctx);
+
+	if (query->shaders)
+		si_pc_emit_shaders(sctx, query->shaders);
+
+	for (struct si_query_group *group = query->groups; group; group = group->next) {
+		struct si_pc_block *block = group->block;
+
+		if (group->se != current_se || group->instance != current_instance) {
+			current_se = group->se;
+			current_instance = group->instance;
+			si_pc_emit_instance(sctx, group->se, group->instance);
+		}
+
+		si_pc_emit_select(sctx, block, group->num_counters, group->selectors);
+	}
+
+	if (current_se != -1 || current_instance != -1)
+		si_pc_emit_instance(sctx, -1, -1);
+
+	uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
+	si_pc_emit_start(sctx, query->buffer.buf, va);
+}
+
+static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery)
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+
+	if (!query->buffer.buf)
+		return;
+
+	uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end;
+	query->buffer.results_end += query->result_size;
+
+	si_pc_emit_stop(sctx, query->buffer.buf, va);
+
+	for (struct si_query_group *group = query->groups; group; group = group->next) {
+		struct si_pc_block *block = group->block;
+		unsigned se = group->se >= 0 ? group->se : 0;
+		unsigned se_end = se + 1;
+
+		if ((block->b->b->flags & SI_PC_BLOCK_SE) && (group->se < 0))
+			se_end = sctx->screen->info.max_se;
+
+		do {
+			unsigned instance = group->instance >= 0 ? group->instance : 0;
+
+			do {
+				si_pc_emit_instance(sctx, se, instance);
+				si_pc_emit_read(sctx, block, group->num_counters, va);
+				va += sizeof(uint64_t) * group->num_counters;
+			} while (group->instance < 0 && ++instance < block->num_instances);
+		} while (++se < se_end);
+	}
+
+	si_pc_emit_instance(sctx, -1, -1);
+}
+
+static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery)
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+
+	si_query_buffer_reset(ctx, &query->buffer);
+
+	LIST_ADDTAIL(&query->b.active_list, &ctx->active_queries);
+	ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
+
+	si_pc_query_resume(ctx, squery);
+
+	return true;
+}
+
+static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery)
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+
+	si_pc_query_suspend(ctx, squery);
+
+	LIST_DEL(&squery->active_list);
+	ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend;
+
+	return query->buffer.buf != NULL;
+}
+
+static void si_pc_query_add_result(struct si_query_pc *query,
+				   void *buffer,
+				   union pipe_query_result *result)
+{
+	uint64_t *results = buffer;
+	unsigned i, j;
+
+	for (i = 0; i < query->num_counters; ++i) {
+		struct si_query_counter *counter = &query->counters[i];
+
+		for (j = 0; j < counter->qwords; ++j) {
+			uint32_t value = results[counter->base + j * counter->stride];
+			result->batch[i].u64 += value;
+		}
+	}
+}
+
+static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery,
+				   bool wait, union pipe_query_result *result)
+{
+	struct si_query_pc *query = (struct si_query_pc *)squery;
+
+	memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
+
+	for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+		unsigned usage = PIPE_TRANSFER_READ |
+				 (wait ? 0 : PIPE_TRANSFER_DONTBLOCK);
+		unsigned results_base = 0;
+		void *map;
+
+		if (squery->b.flushed)
+			map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
+		else
+			map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage);
+
+		if (!map)
+			return false;
+
+		while (results_base != qbuf->results_end) {
+			si_pc_query_add_result(query, map + results_base, result);
+			results_base += query->result_size;
+		}
+	}
+
+	return true;
+}
+
+static const struct si_query_ops batch_query_ops = {
+	.destroy = si_pc_query_destroy,
+	.begin = si_pc_query_begin,
+	.end = si_pc_query_end,
+	.get_result = si_pc_query_get_result,
+
+	.suspend = si_pc_query_suspend,
+	.resume = si_pc_query_resume,
+};
+
+static struct si_query_group *get_group_state(struct si_screen *screen,
+					      struct si_query_pc *query,
+					      struct si_pc_block *block,
+					      unsigned sub_gid)
+{
+	struct si_query_group *group = query->groups;
+
+	while (group) {
+		if (group->block == block && group->sub_gid == sub_gid)
+			return group;
+		group = group->next;
+	}
+
+	group = CALLOC_STRUCT(si_query_group);
+	if (!group)
+		return NULL;
+
+	group->block = block;
+	group->sub_gid = sub_gid;
+
+	if (block->b->b->flags & SI_PC_BLOCK_SHADER) {
+		unsigned sub_gids = block->num_instances;
+		unsigned shader_id;
+		unsigned shaders;
+		unsigned query_shaders;
+
+		if (si_pc_block_has_per_se_groups(screen->perfcounters, block))
+			sub_gids = sub_gids * screen->info.max_se;
+		shader_id = sub_gid / sub_gids;
+		sub_gid = sub_gid % sub_gids;
+
+		shaders = si_pc_shader_type_bits[shader_id];
+
+		query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING;
+		if (query_shaders && query_shaders != shaders) {
+			fprintf(stderr, "si_perfcounter: incompatible shader groups\n");
+			FREE(group);
+			return NULL;
+		}
+		query->shaders = shaders;
+	}
+
+	if (block->b->b->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
+		// A non-zero value in query->shaders ensures that the shader
+		// masking is reset unless the user explicitly requests one.
+		query->shaders = SI_PC_SHADERS_WINDOWING;
+	}
+
+	if (si_pc_block_has_per_se_groups(screen->perfcounters, block)) {
+		group->se = sub_gid / block->num_instances;
+		sub_gid = sub_gid % block->num_instances;
+	} else {
+		group->se = -1;
+	}
+
+	if (si_pc_block_has_per_instance_groups(screen->perfcounters, block)) {
+		group->instance = sub_gid;
+	} else {
+		group->instance = -1;
+	}
+
+	group->next = query->groups;
+	query->groups = group;
+
+	return group;
+}
+
+struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
+					 unsigned num_queries,
+					 unsigned *query_types)
+{
+	struct si_screen *screen =
+		(struct si_screen *)ctx->screen;
+	struct si_perfcounters *pc = screen->perfcounters;
+	struct si_pc_block *block;
+	struct si_query_group *group;
+	struct si_query_pc *query;
+	unsigned base_gid, sub_gid, sub_index;
+	unsigned i, j;
+
+	if (!pc)
+		return NULL;
+
+	query = CALLOC_STRUCT(si_query_pc);
+	if (!query)
+		return NULL;
+
+	query->b.ops = &batch_query_ops;
+
+	query->num_counters = num_queries;
+
+	/* Collect selectors per group */
+	for (i = 0; i < num_queries; ++i) {
+		unsigned sub_gid;
+
+		if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER)
+			goto error;
+
+		block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
+				       &base_gid, &sub_index);
+		if (!block)
+			goto error;
+
+		sub_gid = sub_index / block->b->selectors;
+		sub_index = sub_index % block->b->selectors;
+
+		group = get_group_state(screen, query, block, sub_gid);
+		if (!group)
+			goto error;
+
+		if (group->num_counters >= block->b->b->num_counters) {
+			fprintf(stderr,
+				"perfcounter group %s: too many selected\n",
+				block->b->b->name);
+			goto error;
+		}
+		group->selectors[group->num_counters] = sub_index;
+		++group->num_counters;
+	}
+
+	/* Compute result bases and CS size per group */
+	query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
+	query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
+
+	i = 0;
+	for (group = query->groups; group; group = group->next) {
+		struct si_pc_block *block = group->block;
+		unsigned read_dw;
+		unsigned instances = 1;
+
+		if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
+			instances = screen->info.max_se;
+		if (group->instance < 0)
+			instances *= block->num_instances;
+
+		group->result_base = i;
+		query->result_size += sizeof(uint64_t) * instances * group->num_counters;
+		i += instances * group->num_counters;
+
+		read_dw = 6 * group->num_counters;
+		query->b.num_cs_dw_suspend += instances * read_dw;
+		query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords;
+	}
+
+	if (query->shaders) {
+		if (query->shaders == SI_PC_SHADERS_WINDOWING)
+			query->shaders = 0xffffffff;
+	}
+
+	/* Map user-supplied query array to result indices */
+	query->counters = CALLOC(num_queries, sizeof(*query->counters));
+	for (i = 0; i < num_queries; ++i) {
+		struct si_query_counter *counter = &query->counters[i];
+		struct si_pc_block *block;
+
+		block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER,
+				       &base_gid, &sub_index);
+
+		sub_gid = sub_index / block->b->selectors;
+		sub_index = sub_index % block->b->selectors;
+
+		group = get_group_state(screen, query, block, sub_gid);
+		assert(group != NULL);
+
+		for (j = 0; j < group->num_counters; ++j) {
+			if (group->selectors[j] == sub_index)
+				break;
+		}
+
+		counter->base = group->result_base + j;
+		counter->stride = group->num_counters;
+
+		counter->qwords = 1;
+		if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
+			counter->qwords = screen->info.max_se;
+		if (group->instance < 0)
+			counter->qwords *= block->num_instances;
+	}
+
+	return (struct pipe_query *)query;
+
+error:
+	si_pc_query_destroy(screen, &query->b);
+	return NULL;
+}
+
+static bool si_init_block_names(struct si_screen *screen,
+				struct si_pc_block *block)
+{
+	bool per_instance_groups = si_pc_block_has_per_instance_groups(screen->perfcounters, block);
+	bool per_se_groups = si_pc_block_has_per_se_groups(screen->perfcounters, block);
+	unsigned i, j, k;
+	unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
+	unsigned namelen;
+	char *groupname;
+	char *p;
+
+	if (per_instance_groups)
+		groups_instance = block->num_instances;
+	if (per_se_groups)
+		groups_se = screen->info.max_se;
+	if (block->b->b->flags & SI_PC_BLOCK_SHADER)
+		groups_shader = ARRAY_SIZE(si_pc_shader_type_bits);
+
+	namelen = strlen(block->b->b->name);
+	block->group_name_stride = namelen + 1;
+	if (block->b->b->flags & SI_PC_BLOCK_SHADER)
+		block->group_name_stride += 3;
+	if (per_se_groups) {
+		assert(groups_se <= 10);
+		block->group_name_stride += 1;
+
+		if (per_instance_groups)
+			block->group_name_stride += 1;
+	}
+	if (per_instance_groups) {
+		assert(groups_instance <= 100);
+		block->group_name_stride += 2;
+	}
+
+	block->group_names = MALLOC(block->num_groups * block->group_name_stride);
+	if (!block->group_names)
+		return false;
+
+	groupname = block->group_names;
+	for (i = 0; i < groups_shader; ++i) {
+		const char *shader_suffix = si_pc_shader_type_suffixes[i];
+		unsigned shaderlen = strlen(shader_suffix);
+		for (j = 0; j < groups_se; ++j) {
+			for (k = 0; k < groups_instance; ++k) {
+				strcpy(groupname, block->b->b->name);
+				p = groupname + namelen;
+
+				if (block->b->b->flags & SI_PC_BLOCK_SHADER) {
+					strcpy(p, shader_suffix);
+					p += shaderlen;
+				}
+
+				if (per_se_groups) {
+					p += sprintf(p, "%d", j);
+					if (per_instance_groups)
+						*p++ = '_';
+				}
+
+				if (per_instance_groups)
+					p += sprintf(p, "%d", k);
+
+				groupname += block->group_name_stride;
+			}
+		}
+	}
+
+	assert(block->b->selectors <= 1000);
+	block->selector_name_stride = block->group_name_stride + 4;
+	block->selector_names = MALLOC(block->num_groups * block->b->selectors *
+				       block->selector_name_stride);
+	if (!block->selector_names)
+		return false;
+
+	groupname = block->group_names;
+	p = block->selector_names;
+	for (i = 0; i < block->num_groups; ++i) {
+		for (j = 0; j < block->b->selectors; ++j) {
+			sprintf(p, "%s_%03d", groupname, j);
+			p += block->selector_name_stride;
+		}
+		groupname += block->group_name_stride;
+	}
+
+	return true;
+}
+
+int si_get_perfcounter_info(struct si_screen *screen,
+			    unsigned index,
+			    struct pipe_driver_query_info *info)
+{
+	struct si_perfcounters *pc = screen->perfcounters;
+	struct si_pc_block *block;
+	unsigned base_gid, sub;
+
+	if (!pc)
+		return 0;
+
+	if (!info) {
+		unsigned bid, num_queries = 0;
+
+		for (bid = 0; bid < pc->num_blocks; ++bid) {
+			num_queries += pc->blocks[bid].b->selectors *
+				       pc->blocks[bid].num_groups;
+		}
+
+		return num_queries;
+	}
+
+	block = lookup_counter(pc, index, &base_gid, &sub);
+	if (!block)
+		return 0;
+
+	if (!block->selector_names) {
+		if (!si_init_block_names(screen, block))
+			return 0;
+	}
+	info->name = block->selector_names + sub * block->selector_name_stride;
+	info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index;
+	info->max_value.u64 = 0;
+	info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+	info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
+	info->group_id = base_gid + sub / block->b->selectors;
+	info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+	if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups)
+		info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
+	return 1;
+}
+
+int si_get_perfcounter_group_info(struct si_screen *screen,
+				  unsigned index,
+				  struct pipe_driver_query_group_info *info)
 {
-	si_perfcounters_do_destroy(sscreen->perfcounters);
-	sscreen->perfcounters = NULL;
+	struct si_perfcounters *pc = screen->perfcounters;
+	struct si_pc_block *block;
+
+	if (!pc)
+		return 0;
+
+	if (!info)
+		return pc->num_groups;
+
+	block = lookup_group(pc, &index);
+	if (!block)
+		return 0;
+
+	if (!block->group_names) {
+		if (!si_init_block_names(screen, block))
+			return 0;
+	}
+	info->name = block->group_names + index * block->group_name_stride;
+	info->num_queries = block->b->selectors;
+	info->max_active_queries = block->b->b->num_counters;
+	return 1;
+}
+
+void si_destroy_perfcounters(struct si_screen *screen)
+{
+	struct si_perfcounters *pc = screen->perfcounters;
+	unsigned i;
+
+	if (!pc)
+		return;
+
+	for (i = 0; i < pc->num_blocks; ++i) {
+		FREE(pc->blocks[i].group_names);
+		FREE(pc->blocks[i].selector_names);
+	}
+	FREE(pc->blocks);
+	FREE(pc);
+	screen->perfcounters = NULL;
 }
 
 void si_init_perfcounters(struct si_screen *screen)
 {
 	struct si_perfcounters *pc;
-	struct si_pc_block *blocks;
+	const struct si_pc_block_gfxdescr *blocks;
 	unsigned num_blocks;
 	unsigned i;
 
@@ -680,52 +1315,50 @@
 			screen->info.max_sh_per_se);
 	}
 
-	pc = CALLOC_STRUCT(si_perfcounters);
+	screen->perfcounters = pc = CALLOC_STRUCT(si_perfcounters);
 	if (!pc)
 		return;
 
 	pc->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen);
 	pc->num_instance_cs_dwords = 3;
 
-	pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
-	pc->shader_type_suffixes = si_pc_shader_type_suffixes;
-	pc->shader_type_bits = si_pc_shader_type_bits;
-
-	pc->emit_instance = si_pc_emit_instance;
-	pc->emit_shaders = si_pc_emit_shaders;
-	pc->emit_select = si_pc_emit_select;
-	pc->emit_start = si_pc_emit_start;
-	pc->emit_stop = si_pc_emit_stop;
-	pc->emit_read = si_pc_emit_read;
-	pc->cleanup = si_pc_cleanup;
+	pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
+	pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
 
-	if (!si_perfcounters_init(pc, num_blocks))
+	pc->blocks = CALLOC(num_blocks, sizeof(struct si_pc_block));
+	if (!pc->blocks)
 		goto error;
+	pc->num_blocks = num_blocks;
 
 	for (i = 0; i < num_blocks; ++i) {
-		struct si_pc_block *block = &blocks[i];
-		unsigned instances = block->instances;
+		struct si_pc_block *block = &pc->blocks[i];
+		block->b = &blocks[i];
+		block->num_instances = MAX2(1, block->b->instances);
+
+		if (!strcmp(block->b->b->name, "CB") ||
+		    !strcmp(block->b->b->name, "DB"))
+			block->num_instances = screen->info.max_se;
+		else if (!strcmp(block->b->b->name, "TCC"))
+			block->num_instances = screen->info.num_tcc_blocks;
+		else if (!strcmp(block->b->b->name, "IA"))
+			block->num_instances = MAX2(1, screen->info.max_se / 2);
 
-		if (!strcmp(block->b->name, "CB") ||
-		    !strcmp(block->b->name, "DB"))
-			instances = screen->info.max_se;
-		else if (!strcmp(block->b->name, "TCC"))
-			instances = screen->info.num_tcc_blocks;
-		else if (!strcmp(block->b->name, "IA"))
-			instances = MAX2(1, screen->info.max_se / 2);
-
-		si_perfcounters_add_block(screen, pc,
-					    block->b->name,
-					    block->b->flags,
-					    block->b->num_counters,
-					    block->selectors,
-					    instances,
-					    block);
+		if (si_pc_block_has_per_instance_groups(pc, block)) {
+			block->num_groups = block->num_instances;
+		} else {
+			block->num_groups = 1;
+		}
+
+		if (si_pc_block_has_per_se_groups(pc, block))
+			block->num_groups *= screen->info.max_se;
+		if (block->b->b->flags & SI_PC_BLOCK_SHADER)
+			block->num_groups *= ARRAY_SIZE(si_pc_shader_type_bits);
+
+		pc->num_groups += block->num_groups;
 	}
 
-	screen->perfcounters = pc;
 	return;
 
 error:
-	si_perfcounters_do_destroy(pc);
+	si_destroy_perfcounters(screen);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.c	2019-03-31 23:16:37.000000000 +0000
@@ -103,6 +103,8 @@
 	{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
 	{ "testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance" },
 	{ "testgds", DBG(TEST_GDS), "Test GDS." },
+	{ "testgdsmm", DBG(TEST_GDS_MM), "Test GDS memory management." },
+	{ "testgdsoamm", DBG(TEST_GDS_OA_MM), "Test GDS OA memory management." },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
@@ -125,7 +127,7 @@
 		(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
 
 	ac_init_llvm_once();
-	ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
+	ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
 	compiler->passes = ac_create_llvm_passes(compiler->tm);
 
 	if (compiler->low_opt_tm)
@@ -161,11 +163,11 @@
 	pipe_resource_reference(&sctx->tess_rings, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
 	pipe_resource_reference(&sctx->sample_pos_buffer, NULL);
-	r600_resource_reference(&sctx->border_color_buffer, NULL);
+	si_resource_reference(&sctx->border_color_buffer, NULL);
 	free(sctx->border_color_table);
-	r600_resource_reference(&sctx->scratch_buffer, NULL);
-	r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
-	r600_resource_reference(&sctx->wait_mem_scratch, NULL);
+	si_resource_reference(&sctx->scratch_buffer, NULL);
+	si_resource_reference(&sctx->compute_scratch_buffer, NULL);
+	si_resource_reference(&sctx->wait_mem_scratch, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
 	if (sctx->init_config_gs_rings)
@@ -199,6 +201,10 @@
 		sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer);
 	if (sctx->cs_copy_buffer)
 		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
+	if (sctx->cs_copy_image)
+		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image);
+	if (sctx->cs_copy_image_1d_array)
+		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array);
 
 	if (sctx->blitter)
 		util_blitter_destroy(sctx->blitter);
@@ -240,7 +246,7 @@
 
 	sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
 	sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
-	r600_resource_reference(&sctx->eop_bug_scratch, NULL);
+	si_resource_reference(&sctx->eop_bug_scratch, NULL);
 
 	si_destroy_compiler(&sctx->compiler);
 
@@ -373,6 +379,7 @@
 	struct si_screen* sscreen = (struct si_screen *)screen;
 	struct radeon_winsys *ws = sscreen->ws;
 	int shader, i;
+	bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
 
 	if (!sctx)
 		return NULL;
@@ -411,7 +418,7 @@
 	if (sctx->chip_class == CIK ||
 	    sctx->chip_class == VI ||
 	    sctx->chip_class == GFX9) {
-		sctx->eop_bug_scratch = r600_resource(
+		sctx->eop_bug_scratch = si_resource(
 			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
 					   16 * sscreen->info.num_render_backends));
 		if (!sctx->eop_bug_scratch)
@@ -450,8 +457,8 @@
 
 	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
 		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
-						       (void*)si_flush_dma_cs,
-						       sctx);
+						   (void*)si_flush_dma_cs,
+						   sctx, stop_exec_on_failure);
 	}
 
 	si_init_buffer_functions(sctx);
@@ -472,7 +479,7 @@
 	}
 
 	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
-				       (void*)si_flush_gfx_cs, sctx);
+				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
 
 	/* Border colors. */
 	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
@@ -480,7 +487,7 @@
 	if (!sctx->border_color_table)
 		goto fail;
 
-	sctx->border_color_buffer = r600_resource(
+	sctx->border_color_buffer = si_resource(
 		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
 				   SI_MAX_BORDER_COLORS *
 				   sizeof(*sctx->border_color_table)));
@@ -498,7 +505,6 @@
 	si_init_state_functions(sctx);
 	si_init_shader_functions(sctx);
 	si_init_viewport_functions(sctx);
-	si_init_ia_multi_vgt_param_table(sctx);
 
 	if (sctx->chip_class >= CIK)
 		cik_init_sdma_functions(sctx);
@@ -508,39 +514,24 @@
 	if (sscreen->debug_flags & DBG(FORCE_DMA))
 		sctx->b.resource_copy_region = sctx->dma_copy;
 
-	bool dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU;
-	sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b,
-					     SI_COMPUTE_CLEAR_DW_PER_THREAD,
-					     dst_stream_policy, false);
-	sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b,
-					     SI_COMPUTE_COPY_DW_PER_THREAD,
-					     dst_stream_policy, true);
-
 	sctx->blitter = util_blitter_create(&sctx->b);
 	if (sctx->blitter == NULL)
 		goto fail;
-	sctx->blitter->draw_rectangle = si_draw_rectangle;
 	sctx->blitter->skip_viewport_restore = true;
 
+	si_init_draw_functions(sctx);
+
 	sctx->sample_mask = 0xffff;
 
 	if (sctx->chip_class >= GFX9) {
-		sctx->wait_mem_scratch = r600_resource(
+		sctx->wait_mem_scratch = si_resource(
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
-		struct radeon_cmdbuf *cs = sctx->gfx_cs;
-		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-		radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
-			    S_370_WR_CONFIRM(1) |
-			    S_370_ENGINE_SEL(V_370_ME));
-		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
-		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
-		radeon_emit(cs, sctx->wait_mem_number);
-		radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch,
-					  RADEON_USAGE_WRITE, RADEON_PRIO_FENCE);
+		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
+				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
 	}
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
@@ -702,7 +693,7 @@
 	mtx_destroy(&sscreen->shader_parts_mutex);
 	si_destroy_shader_cache(sscreen);
 
-	si_perfcounters_destroy(sscreen);
+	si_destroy_perfcounters(sscreen);
 	si_gpu_load_kill_thread(sscreen);
 
 	mtx_destroy(&sscreen->gpu_load_mutex);
@@ -722,39 +713,6 @@
 							sscreen->info.family);
 }
 
-static void si_handle_env_var_force_family(struct si_screen *sscreen)
-{
-	const char *family = debug_get_option("SI_FORCE_FAMILY", NULL);
-	unsigned i;
-
-	if (!family)
-		return;
-
-	for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
-		if (!strcmp(family, ac_get_llvm_processor_name(i))) {
-			/* Override family and chip_class. */
-			sscreen->info.family = i;
-			sscreen->info.name = "GCN-NOOP";
-
-			if (i >= CHIP_VEGA10)
-				sscreen->info.chip_class = GFX9;
-			else if (i >= CHIP_TONGA)
-				sscreen->info.chip_class = VI;
-			else if (i >= CHIP_BONAIRE)
-				sscreen->info.chip_class = CIK;
-			else
-				sscreen->info.chip_class = SI;
-
-			/* Don't submit any IBs. */
-			setenv("RADEON_NOOP", "1", 1);
-			return;
-		}
-	}
-
-	fprintf(stderr, "radeonsi: Unknown family: %s\n", family);
-	exit(1);
-}
-
 static void si_test_vmfault(struct si_screen *sscreen)
 {
 	struct pipe_context *ctx = sscreen->aux_context;
@@ -767,7 +725,7 @@
 		exit(1);
 	}
 
-	r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
+	si_resource(buf)->gpu_address = 0; /* cause a VM fault */
 
 	if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {
 		si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0,
@@ -787,6 +745,41 @@
 	exit(0);
 }
 
+static void si_test_gds_memory_management(struct si_context *sctx,
+					  unsigned alloc_size, unsigned alignment,
+					  enum radeon_bo_domain domain)
+{
+	struct radeon_winsys *ws = sctx->ws;
+	struct radeon_cmdbuf *cs[8];
+	struct pb_buffer *gds_bo[ARRAY_SIZE(cs)];
+
+	for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) {
+		cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE,
+				      NULL, NULL, false);
+		gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0);
+		assert(gds_bo[i]);
+	}
+
+	for (unsigned iterations = 0; iterations < 20000; iterations++) {
+		for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) {
+			/* This clears GDS with CP DMA.
+			 *
+			 * We don't care if GDS is present. Just add some packet
+			 * to make the GPU busy for a moment.
+			 */
+			si_cp_dma_clear_buffer(sctx, cs[i], NULL, 0, alloc_size, 0,
+					       SI_CPDMA_SKIP_BO_LIST_UPDATE |
+					       SI_CPDMA_SKIP_CHECK_CS_SPACE |
+					       SI_CPDMA_SKIP_GFX_SYNC, 0, 0);
+
+			ws->cs_add_buffer(cs[i], gds_bo[i], domain,
+					  RADEON_USAGE_READWRITE, 0);
+			ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL);
+		}
+	}
+	exit(0);
+}
+
 static void si_disk_cache_create(struct si_screen *sscreen)
 {
 	/* Don't use the cache if shader dumping is enabled. */
@@ -840,7 +833,6 @@
 
 	sscreen->ws = ws;
 	ws->query_info(ws, &sscreen->info);
-	si_handle_env_var_force_family(sscreen);
 
 	if (sscreen->info.chip_class >= GFX9) {
 		sscreen->se_tile_repeat = 32 * sscreen->info.max_se;
@@ -873,7 +865,8 @@
 		sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
 	if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
 		sscreen->debug_flags |= DBG(SI_SCHED);
-
+	if (driQueryOptionb(config->options, "radeonsi_enable_nir"))
+		sscreen->debug_flags |= DBG(NIR);
 
 	if (sscreen->debug_flags & DBG(INFO))
 		ac_print_gpu_info(&sscreen->info);
@@ -1029,22 +1022,28 @@
 					   sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
 					sscreen->info.family == CHIP_RAVEN;
+	sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;
 
+	/* Only enable primitive binning on APUs by default. */
+	sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
+				sscreen->info.family == CHIP_RAVEN2;
+
+	sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN ||
+				sscreen->info.family == CHIP_RAVEN2;
+
+	/* Process DPBB enable flags. */
 	if (sscreen->debug_flags & DBG(DPBB)) {
 		sscreen->dpbb_allowed = true;
-	} else {
-		/* Only enable primitive binning on APUs by default. */
-		/* TODO: Investigate if binning is profitable on Vega12. */
-		sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) &&
-					(sscreen->info.family == CHIP_RAVEN ||
-					 sscreen->info.family == CHIP_RAVEN2);
+		if (sscreen->debug_flags & DBG(DFSM))
+			sscreen->dfsm_allowed = true;
 	}
 
-	if (sscreen->debug_flags & DBG(DFSM)) {
-		sscreen->dfsm_allowed = sscreen->dpbb_allowed;
-	} else {
-		sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
-					!(sscreen->debug_flags & DBG(NO_DFSM));
+	/* Process DPBB disable flags. */
+	if (sscreen->debug_flags & DBG(NO_DPBB)) {
+		sscreen->dpbb_allowed = false;
+		sscreen->dfsm_allowed = false;
+	} else if (sscreen->debug_flags & DBG(NO_DFSM)) {
+		sscreen->dfsm_allowed = false;
 	}
 
 	/* While it would be nice not to have this flag, we are constrained
@@ -1135,5 +1134,14 @@
 	if (sscreen->debug_flags & DBG(TEST_GDS))
 		si_test_gds((struct si_context*)sscreen->aux_context);
 
+	if (sscreen->debug_flags & DBG(TEST_GDS_MM)) {
+		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
+					      32 * 1024, 4, RADEON_DOMAIN_GDS);
+	}
+	if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) {
+		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
+					      4, 1, RADEON_DOMAIN_OA);
+	}
+
 	return &sscreen->b;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.h	2019-03-31 23:16:37.000000000 +0000
@@ -47,6 +47,7 @@
  * the number shouldn't be a commonly-used one. */
 #define SI_BASE_VERTEX_UNKNOWN		INT_MIN
 #define SI_RESTART_INDEX_UNKNOWN	INT_MIN
+#define SI_INSTANCE_COUNT_UNKNOWN	INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES	8
 #define SI_MAX_POINT_SIZE		2048
 #define SI_GS_PER_ES			128
@@ -103,7 +104,7 @@
 
 #define SI_RESOURCE_FLAG_TRANSFER	(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define SI_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-#define SI_RESOURCE_FLAG_FORCE_TILING	(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define SI_RESOURCE_FLAG_DISABLE_DCC	(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 #define SI_RESOURCE_FLAG_UNMAPPABLE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
 #define SI_RESOURCE_FLAG_READ_ONLY	(PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
@@ -174,6 +175,8 @@
 	DBG_TEST_VMFAULT_SHADER,
 	DBG_TEST_DMA_PERF,
 	DBG_TEST_GDS,
+	DBG_TEST_GDS_MM,
+	DBG_TEST_GDS_OA_MM,
 };
 
 #define DBG_ALL_SHADERS		(((1 << (DBG_CS + 1)) - 1))
@@ -199,7 +202,7 @@
 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
  * at the moment.
  */
-struct r600_resource {
+struct si_resource {
 	struct threaded_resource	b;
 
 	/* Winsys objects. */
@@ -249,12 +252,12 @@
 
 struct si_transfer {
 	struct threaded_transfer	b;
-	struct r600_resource		*staging;
+	struct si_resource		*staging;
 	unsigned			offset;
 };
 
 struct si_texture {
-	struct r600_resource		buffer;
+	struct si_resource		buffer;
 
 	struct radeon_surf		surface;
 	uint64_t			size;
@@ -264,7 +267,7 @@
 	uint64_t			fmask_offset;
 	uint64_t			cmask_offset;
 	uint64_t			cmask_base_address_reg;
-	struct r600_resource		*cmask_buffer;
+	struct si_resource		*cmask_buffer;
 	uint64_t			dcc_offset; /* 0 = disabled */
 	unsigned			cb_color_info; /* fast clear enable bit */
 	unsigned			color_clear_value[2];
@@ -307,9 +310,9 @@
 	 * target == 2D and last_level == 0. If enabled, dcc_offset contains
 	 * the absolute GPUVM address, not the relative one.
 	 */
-	struct r600_resource		*dcc_separate_buffer;
+	struct si_resource		*dcc_separate_buffer;
 	/* When DCC is temporarily disabled, the separate buffer is here. */
-	struct r600_resource		*last_dcc_separate_buffer;
+	struct si_resource		*last_dcc_separate_buffer;
 	/* Estimate of how much this color buffer is written to in units of
 	 * full-screen draws: ps_invocations / (width * height)
 	 * Shader kills, late Z, and blending with trivial discards make it
@@ -445,6 +448,7 @@
 	bool				clear_db_cache_before_clear;
 	bool				has_msaa_sample_loc_bug;
 	bool				has_ls_vgpr_init_bug;
+	bool				has_dcc_constant_encode;
 	bool				dpbb_allowed;
 	bool				dfsm_allowed;
 	bool				llvm_has_working_vgpr_indexing;
@@ -660,7 +664,7 @@
 	struct pipe_stream_output_target b;
 
 	/* The buffer where BUFFER_FILLED_SIZE is stored. */
-	struct r600_resource	*buf_filled_size;
+	struct si_resource	*buf_filled_size;
 	unsigned		buf_filled_size_offset;
 	bool			buf_filled_size_valid;
 
@@ -754,7 +758,7 @@
 	struct pipe_reference	reference;
 	struct si_context	*ctx;
 	struct radeon_saved_cs	gfx;
-	struct r600_resource	*trace_buf;
+	struct si_resource	*trace_buf;
 	unsigned		trace_id;
 
 	unsigned		gfx_last_dw;
@@ -774,7 +778,7 @@
 	struct radeon_cmdbuf		*dma_cs;
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct pipe_fence_handle	*last_sdma_fence;
-	struct r600_resource		*eop_bug_scratch;
+	struct si_resource		*eop_bug_scratch;
 	struct u_upload_mgr		*cached_gtt_allocator;
 	struct threaded_context		*tc;
 	struct u_suballocator		*allocator_zeroed_memory;
@@ -796,11 +800,13 @@
 	void				*vs_blit_texcoord;
 	void				*cs_clear_buffer;
 	void				*cs_copy_buffer;
+	void				*cs_copy_image;
+	void				*cs_copy_image_1d_array;
 	struct si_screen		*screen;
 	struct pipe_debug_callback	debug;
 	struct ac_llvm_compiler		compiler; /* only non-threaded compilation */
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
-	struct r600_resource		*wait_mem_scratch;
+	struct si_resource		*wait_mem_scratch;
 	unsigned			wait_mem_number;
 	uint16_t			prefetch_L2_mask;
 
@@ -866,7 +872,7 @@
 
 	/* vertex buffer descriptors */
 	uint32_t *vb_descriptors_gpu_list;
-	struct r600_resource *vb_descriptors_buffer;
+	struct si_resource *vb_descriptors_buffer;
 	unsigned vb_descriptors_offset;
 
 	/* shader descriptors */
@@ -885,13 +891,35 @@
 	struct pipe_resource		*gsvs_ring;
 	struct pipe_resource		*tess_rings;
 	union pipe_color_union		*border_color_table; /* in CPU memory, any endian */
-	struct r600_resource		*border_color_buffer;
+	struct si_resource		*border_color_buffer;
 	union pipe_color_union		*border_color_map; /* in VRAM (slow access), little endian */
 	unsigned			border_color_count;
 	unsigned			num_vs_blit_sgprs;
 	uint32_t			vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
 	uint32_t			cs_user_data[4];
 
+        /**
+         * last_block allows disabling threads at the farthermost grid boundary.
+         * Full blocks as specified by "block" are launched, but the threads
+         * outside of "last_block" dimensions are disabled.
+         *
+         * If a block touches the grid boundary in the i-th axis, threads with
+         * THREAD_ID[i] >= last_block[i] are disabled.
+         *
+         * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
+         * meaning no effect.
+         *
+         * It's equivalent to doing this at the beginning of the compute shader:
+         *
+         *   for (i = 0; i < 3; i++) {
+         *      if (block_id[i] == grid[i] - 1 &&
+         *          last_block[i] && last_block[i] >= thread_id[i])
+         *         return;
+         *   }
+         * (this could be moved into pipe_grid_info)
+         */
+        uint compute_last_block[3];
+
 	/* Vertex and index buffers. */
 	bool				vertex_buffers_dirty;
 	bool				vertex_buffer_pointer_dirty;
@@ -922,6 +950,7 @@
 	int			last_index_size;
 	int			last_base_vertex;
 	int			last_start_instance;
+	int			last_instance_count;
 	int			last_drawid;
 	int			last_sh_base_reg;
 	int			last_primitive_restart_en;
@@ -935,11 +964,11 @@
 	enum pipe_prim_type	current_rast_prim; /* primitive type after TES, GS */
 
 	/* Scratch buffer */
-	struct r600_resource	*scratch_buffer;
+	struct si_resource	*scratch_buffer;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
 
-	struct r600_resource	*compute_scratch_buffer;
+	struct si_resource	*compute_scratch_buffer;
 
 	/* Emitted derived tessellation state. */
 	/* Local shader (VS), or HS if LS-HS are merged. */
@@ -1107,17 +1136,17 @@
 				   struct pb_buffer *buf,
 				   enum radeon_bo_usage usage);
 void *si_buffer_map_sync_with_rings(struct si_context *sctx,
-				    struct r600_resource *resource,
+				    struct si_resource *resource,
 				    unsigned usage);
 void si_init_resource_fields(struct si_screen *sscreen,
-			     struct r600_resource *res,
+			     struct si_resource *res,
 			     uint64_t size, unsigned alignment);
 bool si_alloc_resource(struct si_screen *sscreen,
-		       struct r600_resource *res);
+		       struct si_resource *res);
 struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen,
 						 unsigned flags, unsigned usage,
 						 unsigned size, unsigned alignment);
-struct r600_resource *si_aligned_buffer_create(struct pipe_screen *screen,
+struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen,
 					       unsigned flags, unsigned usage,
 					       unsigned size, unsigned alignment);
 void si_replace_buffer_storage(struct pipe_context *ctx,
@@ -1143,6 +1172,13 @@
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
 		    uint64_t dst_offset, uint64_t src_offset, unsigned size);
+void si_compute_copy_image(struct si_context *sctx,
+			   struct pipe_resource *dst,
+			   unsigned dst_level,
+			   struct pipe_resource *src,
+			   unsigned src_level,
+			   unsigned dstx, unsigned dsty, unsigned dstz,
+			   const struct pipe_box *src_box);
 void si_init_compute_blit_functions(struct si_context *sctx);
 
 /* si_cp_dma.c */
@@ -1158,10 +1194,10 @@
 			   SI_CPDMA_SKIP_BO_LIST_UPDATE)
 
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
-void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
-			    uint64_t offset, uint64_t size, unsigned value,
-			    enum si_coherency coher,
-			    enum si_cache_policy cache_policy);
+void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
+			    struct pipe_resource *dst, uint64_t offset,
+			    uint64_t size, unsigned value, unsigned user_flags,
+			    enum si_coherency coher, enum si_cache_policy cache_policy);
 void si_cp_dma_copy_buffer(struct si_context *sctx,
 			   struct pipe_resource *dst, struct pipe_resource *src,
 			   uint64_t dst_offset, uint64_t src_offset, unsigned size,
@@ -1171,6 +1207,9 @@
 			      uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_test_gds(struct si_context *sctx);
+void si_cp_write_data(struct si_context *sctx, struct si_resource *buf,
+		      unsigned offset, unsigned size, unsigned dst_sel,
+		      unsigned engine, const void *data);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
@@ -1190,12 +1229,12 @@
 void si_init_dma_functions(struct si_context *sctx);
 
 /* si_dma_cs.c */
-void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst,
+void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
 			   uint64_t offset);
 void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 			  uint64_t offset, uint64_t size, unsigned clear_value);
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
-		       struct r600_resource *dst, struct r600_resource *src);
+		       struct si_resource *dst, struct si_resource *src);
 void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
 		     struct pipe_fence_handle **fence);
 void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst,
@@ -1205,10 +1244,10 @@
 void si_cp_release_mem(struct si_context *ctx,
 		       unsigned event, unsigned event_flags,
 		       unsigned dst_sel, unsigned int_sel, unsigned data_sel,
-		       struct r600_resource *buf, uint64_t va,
+		       struct si_resource *buf, uint64_t va,
 		       uint32_t new_fence, unsigned query_type);
 unsigned si_cp_write_fence_dwords(struct si_screen *screen);
-void si_cp_wait_mem(struct si_context *ctx,
+void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
 		      uint64_t va, uint32_t ref, uint32_t mask, unsigned flags);
 void si_init_fence_functions(struct si_context *ctx);
 void si_init_screen_fence_functions(struct si_screen *screen);
@@ -1224,7 +1263,7 @@
 void si_begin_new_gfx_cs(struct si_context *ctx);
 void si_need_gfx_cs_space(struct si_context *ctx);
 
-/* r600_gpu_load.c */
+/* si_gpu_load.c */
 void si_gpu_load_kill_thread(struct si_screen *sscreen);
 uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);
 unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
@@ -1233,11 +1272,9 @@
 /* si_compute.c */
 void si_init_compute_functions(struct si_context *sctx);
 
-/* r600_perfcounters.c */
-void si_perfcounters_destroy(struct si_screen *sscreen);
-
 /* si_perfcounters.c */
 void si_init_perfcounters(struct si_screen *screen);
+void si_destroy_perfcounters(struct si_screen *screen);
 
 /* si_pipe.c */
 bool si_check_device_reset(struct si_context *sctx);
@@ -1255,6 +1292,8 @@
 void *si_create_dma_compute_shader(struct pipe_context *ctx,
 				   unsigned num_dwords_per_thread,
 				   bool dst_stream_cache_policy, bool is_copy);
+void *si_create_copy_image_compute_shader(struct pipe_context *ctx);
+void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);
 void *si_create_query_result_cs(struct si_context *sctx);
 
 /* si_test_dma.c */
@@ -1326,13 +1365,13 @@
  * common helpers
  */
 
-static inline struct r600_resource *r600_resource(struct pipe_resource *r)
+static inline struct si_resource *si_resource(struct pipe_resource *r)
 {
-	return (struct r600_resource*)r;
+	return (struct si_resource*)r;
 }
 
 static inline void
-r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
+si_resource_reference(struct si_resource **ptr, struct si_resource *res)
 {
 	pipe_resource_reference((struct pipe_resource **)ptr,
 				(struct pipe_resource *)res);
@@ -1364,8 +1403,8 @@
 {
 	if (r) {
 		/* Add memory usage for need_gfx_cs_space */
-		sctx->vram += r600_resource(r)->vram_usage;
-		sctx->gtt += r600_resource(r)->gart_usage;
+		sctx->vram += si_resource(r)->vram_usage;
+		sctx->gtt += si_resource(r)->gart_usage;
 	}
 }
 
@@ -1373,6 +1412,7 @@
 si_invalidate_draw_sh_constants(struct si_context *sctx)
 {
 	sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
+	sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
 }
 
 static inline unsigned
@@ -1610,15 +1650,15 @@
  */
 static inline void radeon_add_to_buffer_list(struct si_context *sctx,
 					     struct radeon_cmdbuf *cs,
-					     struct r600_resource *rbo,
+					     struct si_resource *bo,
 					     enum radeon_bo_usage usage,
 					     enum radeon_bo_priority priority)
 {
 	assert(usage);
 	sctx->ws->cs_add_buffer(
-		cs, rbo->buf,
+		cs, bo->buf,
 		(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
-		rbo->domains, priority);
+		bo->domains, priority);
 }
 
 /**
@@ -1640,18 +1680,18 @@
  */
 static inline void
 radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,
-					struct r600_resource *rbo,
+					struct si_resource *bo,
 					enum radeon_bo_usage usage,
 					enum radeon_bo_priority priority,
 					bool check_mem)
 {
 	if (check_mem &&
 	    !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs,
-					  sctx->vram + rbo->vram_usage,
-					  sctx->gtt + rbo->gart_usage))
+					  sctx->vram + bo->vram_usage,
+					  sctx->gtt + bo->gart_usage))
 		si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
 
-	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, rbo, usage, priority);
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority);
 }
 
 #define PRINT_ERR(fmt, args...) \
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.c	2019-03-31 23:16:37.000000000 +0000
@@ -85,14 +85,14 @@
 }
 
 void si_pm4_add_bo(struct si_pm4_state *state,
-                   struct r600_resource *bo,
+                   struct si_resource *bo,
                    enum radeon_bo_usage usage,
 		   enum radeon_bo_priority priority)
 {
 	unsigned idx = state->nbo++;
 	assert(idx < SI_PM4_MAX_BO);
 
-	r600_resource_reference(&state->bo[idx], bo);
+	si_resource_reference(&state->bo[idx], bo);
 	state->bo_usage[idx] = usage;
 	state->bo_priority[idx] = priority;
 }
@@ -100,8 +100,8 @@
 void si_pm4_clear_state(struct si_pm4_state *state)
 {
 	for (int i = 0; i < state->nbo; ++i)
-		r600_resource_reference(&state->bo[i], NULL);
-	r600_resource_reference(&state->indirect_buffer, NULL);
+		si_resource_reference(&state->bo[i], NULL);
+	si_resource_reference(&state->indirect_buffer, NULL);
 	state->nbo = 0;
 	state->ndw = 0;
 }
@@ -133,7 +133,7 @@
 	if (!state->indirect_buffer) {
 		radeon_emit_array(cs, state->pm4, state->ndw);
 	} else {
-		struct r600_resource *ib = state->indirect_buffer;
+		struct si_resource *ib = state->indirect_buffer;
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, ib,
 					  RADEON_USAGE_READ,
@@ -168,7 +168,7 @@
 	assert(state->ndw);
 	assert(aligned_ndw <= SI_PM4_MAX_DW);
 
-	r600_resource_reference(&state->indirect_buffer, NULL);
+	si_resource_reference(&state->indirect_buffer, NULL);
 	/* TODO: this hangs with 1024 or higher alignment on GFX9. */
 	state->indirect_buffer =
 		si_aligned_buffer_create(screen, 0,
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.h	2019-03-31 23:16:37.000000000 +0000
@@ -43,7 +43,7 @@
 struct si_pm4_state
 {
 	/* optional indirect buffer */
-	struct r600_resource	*indirect_buffer;
+	struct si_resource	*indirect_buffer;
 
 	/* PKT3_SET_*_REG handling */
 	unsigned	last_opcode;
@@ -56,7 +56,7 @@
 
 	/* BO's referenced by this state */
 	unsigned		nbo;
-	struct r600_resource	*bo[SI_PM4_MAX_BO];
+	struct si_resource	*bo[SI_PM4_MAX_BO];
 	enum radeon_bo_usage	bo_usage[SI_PM4_MAX_BO];
 	enum radeon_bo_priority	bo_priority[SI_PM4_MAX_BO];
 
@@ -71,7 +71,7 @@
 
 void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
 void si_pm4_add_bo(struct si_pm4_state *state,
-		   struct r600_resource *bo,
+		   struct si_resource *bo,
 		   enum radeon_bo_usage usage,
 		   enum radeon_bo_priority priority);
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -34,6 +34,8 @@
 
 #define SI_MAX_STREAMS 4
 
+static const struct si_query_ops query_hw_ops;
+
 struct si_hw_query_params {
 	unsigned start_offset;
 	unsigned end_offset;
@@ -57,9 +59,9 @@
 };
 
 static void si_query_sw_destroy(struct si_screen *sscreen,
-				struct si_query *rquery)
+				struct si_query *squery)
 {
-	struct si_query_sw *query = (struct si_query_sw *)rquery;
+	struct si_query_sw *query = (struct si_query_sw *)squery;
 
 	sscreen->b.fence_reference(&sscreen->b, &query->fence, NULL);
 	FREE(query);
@@ -106,9 +108,9 @@
 }
 
 static bool si_query_sw_begin(struct si_context *sctx,
-			      struct si_query *rquery)
+			      struct si_query *squery)
 {
-	struct si_query_sw *query = (struct si_query_sw *)rquery;
+	struct si_query_sw *query = (struct si_query_sw *)squery;
 	enum radeon_value_id ws_id;
 
 	switch(query->b.type) {
@@ -267,9 +269,9 @@
 }
 
 static bool si_query_sw_end(struct si_context *sctx,
-			    struct si_query *rquery)
+			    struct si_query *squery)
 {
-	struct si_query_sw *query = (struct si_query_sw *)rquery;
+	struct si_query_sw *query = (struct si_query_sw *)squery;
 	enum radeon_value_id ws_id;
 
 	switch(query->b.type) {
@@ -432,11 +434,11 @@
 }
 
 static bool si_query_sw_get_result(struct si_context *sctx,
-				   struct si_query *rquery,
+				   struct si_query *squery,
 				   bool wait,
 				   union pipe_query_result *result)
 {
-	struct si_query_sw *query = (struct si_query_sw *)rquery;
+	struct si_query_sw *query = (struct si_query_sw *)squery;
 
 	switch (query->b.type) {
 	case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -447,7 +449,7 @@
 		return true;
 	case PIPE_QUERY_GPU_FINISHED: {
 		struct pipe_screen *screen = sctx->b.screen;
-		struct pipe_context *ctx = rquery->b.flushed ? NULL : &sctx->b;
+		struct pipe_context *ctx = squery->b.flushed ? NULL : &sctx->b;
 
 		result->b = screen->fence_finish(screen, ctx, query->fence,
 						 wait ? PIPE_TIMEOUT_INFINITE : 0);
@@ -497,7 +499,7 @@
 }
 
 
-static struct si_query_ops sw_query_ops = {
+static const struct si_query_ops sw_query_ops = {
 	.destroy = si_query_sw_destroy,
 	.begin = si_query_sw_begin,
 	.end = si_query_sw_end,
@@ -519,72 +521,121 @@
 	return (struct pipe_query *)query;
 }
 
-void si_query_hw_destroy(struct si_screen *sscreen,
-			 struct si_query *rquery)
+void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer)
 {
-	struct si_query_hw *query = (struct si_query_hw *)rquery;
-	struct si_query_buffer *prev = query->buffer.previous;
+	struct si_query_buffer *prev = buffer->previous;
 
 	/* Release all query buffers. */
 	while (prev) {
 		struct si_query_buffer *qbuf = prev;
 		prev = prev->previous;
-		r600_resource_reference(&qbuf->buf, NULL);
+		si_resource_reference(&qbuf->buf, NULL);
 		FREE(qbuf);
 	}
 
-	r600_resource_reference(&query->buffer.buf, NULL);
-	r600_resource_reference(&query->workaround_buf, NULL);
-	FREE(rquery);
+	si_resource_reference(&buffer->buf, NULL);
 }
 
-static struct r600_resource *si_new_query_buffer(struct si_screen *sscreen,
-						 struct si_query_hw *query)
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer)
 {
-	unsigned buf_size = MAX2(query->result_size,
-				 sscreen->info.min_alloc_size);
+	/* Discard all query buffers except for the oldest. */
+	while (buffer->previous) {
+		struct si_query_buffer *qbuf = buffer->previous;
+		buffer->previous = qbuf->previous;
 
-	/* Queries are normally read by the CPU after
-	 * being written by the gpu, hence staging is probably a good
-	 * usage pattern.
-	 */
-	struct r600_resource *buf = r600_resource(
-		pipe_buffer_create(&sscreen->b, 0,
-				   PIPE_USAGE_STAGING, buf_size));
-	if (!buf)
-		return NULL;
+		si_resource_reference(&buffer->buf, NULL);
+		buffer->buf = qbuf->buf; /* move ownership */
+		FREE(qbuf);
+	}
+	buffer->results_end = 0;
 
-	if (!query->ops->prepare_buffer(sscreen, query, buf)) {
-		r600_resource_reference(&buf, NULL);
-		return NULL;
+	if (!buffer->buf)
+		return;
+
+	/* Discard even the oldest buffer if it can't be mapped without a stall. */
+	if (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
+	    !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE)) {
+		si_resource_reference(&buffer->buf, NULL);
+	} else {
+		buffer->unprepared = true;
+	}
+}
+
+bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
+			   bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
+			   unsigned size)
+{
+	bool unprepared = buffer->unprepared;
+	buffer->unprepared = false;
+
+	if (!buffer->buf || buffer->results_end + size > buffer->buf->b.b.width0) {
+		if (buffer->buf) {
+			struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
+			memcpy(qbuf, buffer, sizeof(*qbuf));
+			buffer->previous = qbuf;
+		}
+		buffer->results_end = 0;
+
+		/* Queries are normally read by the CPU after
+		 * being written by the gpu, hence staging is probably a good
+		 * usage pattern.
+		 */
+		struct si_screen *screen = sctx->screen;
+		unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
+		buffer->buf = si_resource(
+			pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
+		if (unlikely(!buffer->buf))
+			return false;
+		unprepared = true;
 	}
 
-	return buf;
+	if (unprepared && prepare_buffer) {
+		if (unlikely(!prepare_buffer(sctx, buffer))) {
+			si_resource_reference(&buffer->buf, NULL);
+			return false;
+		}
+	}
+
+	return true;
 }
 
-static bool si_query_hw_prepare_buffer(struct si_screen *sscreen,
-				       struct si_query_hw *query,
-				       struct r600_resource *buffer)
+
+void si_query_hw_destroy(struct si_screen *sscreen,
+			 struct si_query *squery)
 {
-	/* Callers ensure that the buffer is currently unused by the GPU. */
-	uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL,
+	struct si_query_hw *query = (struct si_query_hw *)squery;
+
+	si_query_buffer_destroy(sscreen, &query->buffer);
+	si_resource_reference(&query->workaround_buf, NULL);
+	FREE(squery);
+}
+
+static bool si_query_hw_prepare_buffer(struct si_context *sctx,
+				       struct si_query_buffer *qbuf)
+{
+	static const struct si_query_hw si_query_hw_s;
+	struct si_query_hw *query = container_of(qbuf, &si_query_hw_s, buffer);
+	struct si_screen *screen = sctx->screen;
+
+	/* The caller ensures that the buffer is currently unused by the GPU. */
+	uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, NULL,
 						   PIPE_TRANSFER_WRITE |
 						   PIPE_TRANSFER_UNSYNCHRONIZED);
 	if (!results)
 		return false;
 
-	memset(results, 0, buffer->b.b.width0);
+	memset(results, 0, qbuf->buf->b.b.width0);
 
 	if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
 	    query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 	    query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
-		unsigned max_rbs = sscreen->info.num_render_backends;
-		unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask;
+		unsigned max_rbs = screen->info.num_render_backends;
+		unsigned enabled_rb_mask = screen->info.enabled_rb_mask;
 		unsigned num_results;
 		unsigned i, j;
 
 		/* Set top bits for unused backends. */
-		num_results = buffer->b.b.width0 / query->result_size;
+		num_results = qbuf->buf->b.b.width0 / query->result_size;
 		for (j = 0; j < num_results; j++) {
 			for (i = 0; i < max_rbs; i++) {
 				if (!(enabled_rb_mask & (1<<i))) {
@@ -600,28 +651,20 @@
 }
 
 static void si_query_hw_get_result_resource(struct si_context *sctx,
-					    struct si_query *rquery,
+					    struct si_query *squery,
 					    bool wait,
 					    enum pipe_query_value_type result_type,
 					    int index,
 					    struct pipe_resource *resource,
 					    unsigned offset);
 
-static struct si_query_ops query_hw_ops = {
-	.destroy = si_query_hw_destroy,
-	.begin = si_query_hw_begin,
-	.end = si_query_hw_end,
-	.get_result = si_query_hw_get_result,
-	.get_result_resource = si_query_hw_get_result_resource,
-};
-
 static void si_query_hw_do_emit_start(struct si_context *sctx,
 				      struct si_query_hw *query,
-				      struct r600_resource *buffer,
+				      struct si_resource *buffer,
 				      uint64_t va);
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
 				     struct si_query_hw *query,
-				     struct r600_resource *buffer,
+				     struct si_resource *buffer,
 				     uint64_t va);
 static void si_query_hw_add_result(struct si_screen *sscreen,
 				   struct si_query_hw *, void *buffer,
@@ -637,16 +680,6 @@
 	.add_result = si_query_hw_add_result,
 };
 
-bool si_query_hw_init(struct si_screen *sscreen,
-		      struct si_query_hw *query)
-{
-	query->buffer.buf = si_new_query_buffer(sscreen, query);
-	if (!query->buffer.buf)
-		return false;
-
-	return true;
-}
-
 static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
 					     unsigned query_type,
 					     unsigned index)
@@ -665,20 +698,19 @@
 	case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
 		query->result_size = 16 * sscreen->info.num_render_backends;
 		query->result_size += 16; /* for the fence + alignment */
-		query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
 		break;
 	case SI_QUERY_TIME_ELAPSED_SDMA:
 		/* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */
 		query->result_size = 64;
-		query->num_cs_dw_end = 0;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		query->result_size = 24;
-		query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 		query->result_size = 16;
-		query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen);
 		query->flags = SI_QUERY_HW_FLAG_NO_START;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
@@ -687,19 +719,19 @@
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32;
-		query->num_cs_dw_end = 6;
+		query->b.num_cs_dw_suspend = 6;
 		query->stream = index;
 		break;
 	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32 * SI_MAX_STREAMS;
-		query->num_cs_dw_end = 6 * SI_MAX_STREAMS;
+		query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on GCN. */
 		query->result_size = 11 * 16;
 		query->result_size += 8; /* for the fence + alignment */
-		query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+		query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
 		break;
 	default:
 		assert(0);
@@ -707,11 +739,6 @@
 		return NULL;
 	}
 
-	if (!si_query_hw_init(sscreen, query)) {
-		FREE(query);
-		return NULL;
-	}
-
 	return (struct pipe_query *)query;
 }
 
@@ -765,7 +792,7 @@
 
 static void si_query_hw_do_emit_start(struct si_context *sctx,
 					struct si_query_hw *query,
-					struct r600_resource *buffer,
+					struct si_resource *buffer,
 					uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
@@ -816,8 +843,9 @@
 {
 	uint64_t va;
 
-	if (!query->buffer.buf)
-		return; // previous buffer allocation failure
+	if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
+				   query->result_size))
+		return;
 
 	si_update_occlusion_query_state(sctx, query->b.type, 1);
 	si_update_prims_generated_query_state(sctx, query->b.type, 1);
@@ -825,28 +853,13 @@
 	if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
 		si_need_gfx_cs_space(sctx);
 
-	/* Get a new query buffer if needed. */
-	if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
-		struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
-		*qbuf = query->buffer;
-		query->buffer.results_end = 0;
-		query->buffer.previous = qbuf;
-		query->buffer.buf = si_new_query_buffer(sctx->screen, query);
-		if (!query->buffer.buf)
-			return;
-	}
-
-	/* emit begin query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
-
 	query->ops->emit_start(sctx, query, query->buffer.buf, va);
-
-	sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
 }
 
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
 				       struct si_query_hw *query,
-				       struct r600_resource *buffer,
+				       struct si_resource *buffer,
 				       uint64_t va)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
@@ -883,9 +896,8 @@
 		va += 8;
 		/* fall through */
 	case PIPE_QUERY_TIMESTAMP:
-		si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
-				  0, EOP_DST_SEL_MEM,
-				  EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+		si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 				  EOP_DATA_SEL_TIMESTAMP, NULL, va,
 				  0, query->b.type);
 		fence_va = va + 8;
@@ -910,8 +922,7 @@
 
 	if (fence_va) {
 		si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
-				  EOP_DST_SEL_MEM,
-				  EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  query->buffer.buf, fence_va, 0x80000000,
 				  query->b.type);
@@ -923,12 +934,16 @@
 {
 	uint64_t va;
 
-	if (!query->buffer.buf)
-		return; // previous buffer allocation failure
-
 	/* The queries which need begin already called this in begin_query. */
-	if (query->flags & SI_QUERY_HW_FLAG_NO_START)
+	if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
 		si_need_gfx_cs_space(sctx);
+		if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
+					   query->result_size))
+			return;
+	}
+
+	if (!query->buffer.buf)
+		return; // previous buffer allocation failure
 
 	/* emit end query */
 	va = query->buffer.buf->gpu_address + query->buffer.results_end;
@@ -937,15 +952,12 @@
 
 	query->buffer.results_end += query->result_size;
 
-	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-		sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
-
 	si_update_occlusion_query_state(sctx, query->b.type, -1);
 	si_update_prims_generated_query_state(sctx, query->b.type, -1);
 }
 
 static void emit_set_predicate(struct si_context *ctx,
-			       struct r600_resource *buf, uint64_t va,
+			       struct si_resource *buf, uint64_t va,
 			       uint32_t op)
 {
 	struct radeon_cmdbuf *cs = ctx->gfx_cs;
@@ -1061,51 +1073,24 @@
 static void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query *rquery = (struct si_query *)query;
+	struct si_query *squery = (struct si_query *)query;
 
-	rquery->ops->destroy(sctx->screen, rquery);
+	squery->ops->destroy(sctx->screen, squery);
 }
 
 static boolean si_begin_query(struct pipe_context *ctx,
                                 struct pipe_query *query)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query *rquery = (struct si_query *)query;
+	struct si_query *squery = (struct si_query *)query;
 
-	return rquery->ops->begin(sctx, rquery);
-}
-
-void si_query_hw_reset_buffers(struct si_context *sctx,
-			       struct si_query_hw *query)
-{
-	struct si_query_buffer *prev = query->buffer.previous;
-
-	/* Discard the old query buffers. */
-	while (prev) {
-		struct si_query_buffer *qbuf = prev;
-		prev = prev->previous;
-		r600_resource_reference(&qbuf->buf, NULL);
-		FREE(qbuf);
-	}
-
-	query->buffer.results_end = 0;
-	query->buffer.previous = NULL;
-
-	/* Obtain a new buffer if the current one can't be mapped without a stall. */
-	if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
-	    !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
-		r600_resource_reference(&query->buffer.buf, NULL);
-		query->buffer.buf = si_new_query_buffer(sctx->screen, query);
-	} else {
-		if (!query->ops->prepare_buffer(sctx->screen, query, query->buffer.buf))
-			r600_resource_reference(&query->buffer.buf, NULL);
-	}
+	return squery->ops->begin(sctx, squery);
 }
 
 bool si_query_hw_begin(struct si_context *sctx,
-		       struct si_query *rquery)
+		       struct si_query *squery)
 {
-	struct si_query_hw *query = (struct si_query_hw *)rquery;
+	struct si_query_hw *query = (struct si_query_hw *)squery;
 
 	if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
 		assert(0);
@@ -1113,38 +1098,41 @@
 	}
 
 	if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
-		si_query_hw_reset_buffers(sctx, query);
+		si_query_buffer_reset(sctx, &query->buffer);
 
-	r600_resource_reference(&query->workaround_buf, NULL);
+	si_resource_reference(&query->workaround_buf, NULL);
 
 	si_query_hw_emit_start(sctx, query);
 	if (!query->buffer.buf)
 		return false;
 
-	LIST_ADDTAIL(&query->list, &sctx->active_queries);
+	LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
+	sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
 	return true;
 }
 
 static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query *rquery = (struct si_query *)query;
+	struct si_query *squery = (struct si_query *)query;
 
-	return rquery->ops->end(sctx, rquery);
+	return squery->ops->end(sctx, squery);
 }
 
 bool si_query_hw_end(struct si_context *sctx,
-		     struct si_query *rquery)
+		     struct si_query *squery)
 {
-	struct si_query_hw *query = (struct si_query_hw *)rquery;
+	struct si_query_hw *query = (struct si_query_hw *)squery;
 
 	if (query->flags & SI_QUERY_HW_FLAG_NO_START)
-		si_query_hw_reset_buffers(sctx, query);
+		si_query_buffer_reset(sctx, &query->buffer);
 
 	si_query_hw_emit_stop(sctx, query);
 
-	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-		LIST_DELINIT(&query->list);
+	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
+		LIST_DELINIT(&query->b.active_list);
+		sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
+	}
 
 	if (!query->buffer.buf)
 		return false;
@@ -1153,7 +1141,7 @@
 }
 
 static void si_get_hw_query_params(struct si_context *sctx,
-				   struct si_query_hw *rquery, int index,
+				   struct si_query_hw *squery, int index,
 				   struct si_hw_query_params *params)
 {
 	unsigned max_rbs = sctx->screen->info.num_render_backends;
@@ -1161,7 +1149,7 @@
 	params->pair_stride = 0;
 	params->pair_count = 1;
 
-	switch (rquery->b.type) {
+	switch (squery->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 	case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
@@ -1207,7 +1195,7 @@
 		 * fence: it is initialized as 0, and the high bit is set by
 		 * the write of the streamout stats event.
 		 */
-		params->fence_offset = rquery->result_size - 4;
+		params->fence_offset = squery->result_size - 4;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 	{
@@ -1351,14 +1339,35 @@
 	}
 }
 
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query)
+{
+	si_query_hw_emit_stop(sctx, (struct si_query_hw *)query);
+}
+
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query)
+{
+	si_query_hw_emit_start(sctx, (struct si_query_hw *)query);
+}
+
+static const struct si_query_ops query_hw_ops = {
+	.destroy = si_query_hw_destroy,
+	.begin = si_query_hw_begin,
+	.end = si_query_hw_end,
+	.get_result = si_query_hw_get_result,
+	.get_result_resource = si_query_hw_get_result_resource,
+
+	.suspend = si_query_hw_suspend,
+	.resume = si_query_hw_resume,
+};
+
 static boolean si_get_query_result(struct pipe_context *ctx,
 				   struct pipe_query *query, boolean wait,
 				   union pipe_query_result *result)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query *rquery = (struct si_query *)query;
+	struct si_query *squery = (struct si_query *)query;
 
-	return rquery->ops->get_result(sctx, rquery, wait, result);
+	return squery->ops->get_result(sctx, squery, wait, result);
 }
 
 static void si_get_query_result_resource(struct pipe_context *ctx,
@@ -1370,9 +1379,9 @@
 					 unsigned offset)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query *rquery = (struct si_query *)query;
+	struct si_query *squery = (struct si_query *)query;
 
-	rquery->ops->get_result_resource(sctx, rquery, wait, result_type, index,
+	squery->ops->get_result_resource(sctx, squery, wait, result_type, index,
 	                                 resource, offset);
 }
 
@@ -1383,11 +1392,11 @@
 }
 
 bool si_query_hw_get_result(struct si_context *sctx,
-			    struct si_query *rquery,
+			    struct si_query *squery,
 			    bool wait, union pipe_query_result *result)
 {
 	struct si_screen *sscreen = sctx->screen;
-	struct si_query_hw *query = (struct si_query_hw *)rquery;
+	struct si_query_hw *query = (struct si_query_hw *)squery;
 	struct si_query_buffer *qbuf;
 
 	query->ops->clear_result(query, result);
@@ -1398,7 +1407,7 @@
 		unsigned results_base = 0;
 		void *map;
 
-		if (rquery->b.flushed)
+		if (squery->b.flushed)
 			map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
 		else
 			map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage);
@@ -1414,9 +1423,9 @@
 	}
 
 	/* Convert the time to expected units. */
-	if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
-	    rquery->type == SI_QUERY_TIME_ELAPSED_SDMA ||
-	    rquery->type == PIPE_QUERY_TIMESTAMP) {
+	if (squery->type == PIPE_QUERY_TIME_ELAPSED ||
+	    squery->type == SI_QUERY_TIME_ELAPSED_SDMA ||
+	    squery->type == PIPE_QUERY_TIMESTAMP) {
 		result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq;
 	}
 	return true;
@@ -1436,14 +1445,14 @@
 }
 
 static void si_query_hw_get_result_resource(struct si_context *sctx,
-                                              struct si_query *rquery,
+                                              struct si_query *squery,
                                               bool wait,
                                               enum pipe_query_value_type result_type,
                                               int index,
                                               struct pipe_resource *resource,
                                               unsigned offset)
 {
-	struct si_query_hw *query = (struct si_query_hw *)rquery;
+	struct si_query_hw *query = (struct si_query_hw *)squery;
 	struct si_query_buffer *qbuf;
 	struct si_query_buffer *qbuf_prev;
 	struct pipe_resource *tmp_buffer = NULL;
@@ -1558,7 +1567,7 @@
 			ssbo[2].buffer_offset = offset;
 			ssbo[2].buffer_size = 8;
 
-			r600_resource(resource)->TC_L2_dirty = true;
+			si_resource(resource)->TC_L2_dirty = true;
 		}
 
 		sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
@@ -1573,7 +1582,8 @@
 			va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
 			va += params.fence_offset;
 
-			si_cp_wait_mem(sctx, va, 0x80000000, 0x80000000, 0);
+			si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x80000000,
+				       0x80000000, WAIT_REG_MEM_EQUAL);
 		}
 
 		sctx->b.launch_grid(&sctx->b, &grid);
@@ -1590,7 +1600,7 @@
 				enum pipe_render_cond_flag mode)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_query_hw *rquery = (struct si_query_hw *)query;
+	struct si_query_hw *squery = (struct si_query_hw *)query;
 	struct si_atom *atom = &sctx->atoms.s.render_cond;
 
 	if (query) {
@@ -1603,21 +1613,21 @@
 		if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
 		     (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
 		    !condition &&
-		    (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
-		     (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
-		      (rquery->buffer.previous ||
-		       rquery->buffer.results_end > rquery->result_size)))) {
+		    (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
+		     (squery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
+		      (squery->buffer.previous ||
+		       squery->buffer.results_end > squery->result_size)))) {
 			needs_workaround = true;
 		}
 
-		if (needs_workaround && !rquery->workaround_buf) {
+		if (needs_workaround && !squery->workaround_buf) {
 			bool old_force_off = sctx->render_cond_force_off;
 			sctx->render_cond_force_off = true;
 
 			u_suballocator_alloc(
 				sctx->allocator_zeroed_memory, 8, 8,
-				&rquery->workaround_offset,
-				(struct pipe_resource **)&rquery->workaround_buf);
+				&squery->workaround_offset,
+				(struct pipe_resource **)&squery->workaround_buf);
 
 			/* Reset to NULL to avoid a redundant SET_PREDICATION
 			 * from launching the compute grid.
@@ -1626,7 +1636,7 @@
 
 			ctx->get_query_result_resource(
 				ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
-				&rquery->workaround_buf->b.b, rquery->workaround_offset);
+				&squery->workaround_buf->b.b, squery->workaround_offset);
 
 			/* Settings this in the render cond atom is too late,
 			 * so set it here. */
@@ -1646,26 +1656,21 @@
 
 void si_suspend_queries(struct si_context *sctx)
 {
-	struct si_query_hw *query;
+	struct si_query *query;
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-		si_query_hw_emit_stop(sctx, query);
-	}
-	assert(sctx->num_cs_dw_queries_suspend == 0);
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+		query->ops->suspend(sctx, query);
 }
 
 void si_resume_queries(struct si_context *sctx)
 {
-	struct si_query_hw *query;
-
-	assert(sctx->num_cs_dw_queries_suspend == 0);
+	struct si_query *query;
 
 	/* Check CS space here. Resuming must not be interrupted by flushes. */
 	si_need_gfx_cs_space(sctx);
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-		si_query_hw_emit_start(sctx, query);
-	}
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+		query->ops->resume(sctx, query);
 }
 
 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.h	2019-03-31 23:16:37.000000000 +0000
@@ -34,8 +34,9 @@
 struct si_screen;
 struct si_context;
 struct si_query;
+struct si_query_buffer;
 struct si_query_hw;
-struct r600_resource;
+struct si_resource;
 
 enum {
 	SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
@@ -133,14 +134,23 @@
 				    int index,
 				    struct pipe_resource *resource,
 				    unsigned offset);
+
+	void (*suspend)(struct si_context *, struct si_query *);
+	void (*resume)(struct si_context *, struct si_query *);
 };
 
 struct si_query {
 	struct threaded_query b;
-	struct si_query_ops *ops;
+	const struct si_query_ops *ops;
 
-	/* The type of query */
+	/* The PIPE_QUERY_xxx type of query */
 	unsigned type;
+
+	/* The number of dwords for suspend. */
+	unsigned num_cs_dw_suspend;
+
+	/* Linked list of queries that must be suspended at end of CS. */
+	struct list_head active_list;
 };
 
 enum {
@@ -151,15 +161,13 @@
 };
 
 struct si_query_hw_ops {
-	bool (*prepare_buffer)(struct si_screen *,
-			       struct si_query_hw *,
-			       struct r600_resource *);
+	bool (*prepare_buffer)(struct si_context *, struct si_query_buffer *);
 	void (*emit_start)(struct si_context *,
 			   struct si_query_hw *,
-			   struct r600_resource *buffer, uint64_t va);
+			   struct si_resource *buffer, uint64_t va);
 	void (*emit_stop)(struct si_context *,
 			  struct si_query_hw *,
-			  struct r600_resource *buffer, uint64_t va);
+			  struct si_resource *buffer, uint64_t va);
 	void (*clear_result)(struct si_query_hw *, union pipe_query_result *);
 	void (*add_result)(struct si_screen *screen,
 			   struct si_query_hw *, void *buffer,
@@ -168,15 +176,23 @@
 
 struct si_query_buffer {
 	/* The buffer where query results are stored. */
-	struct r600_resource		*buf;
-	/* Offset of the next free result after current query data */
-	unsigned			results_end;
+	struct si_resource		*buf;
 	/* If a query buffer is full, a new buffer is created and the old one
 	 * is put in here. When we calculate the result, we sum up the samples
 	 * from all buffers. */
 	struct si_query_buffer	*previous;
+	/* Offset of the next free result after current query data */
+	unsigned			results_end;
+	bool unprepared;
 };
 
+void si_query_buffer_destroy(struct si_screen *sctx, struct si_query_buffer *buffer);
+void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer);
+bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
+			   bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
+			   unsigned size);
+
+
 struct si_query_hw {
 	struct si_query b;
 	struct si_query_hw_ops *ops;
@@ -187,105 +203,37 @@
 	/* Size of the result in memory for both begin_query and end_query,
 	 * this can be one or two numbers, or it could even be a size of a structure. */
 	unsigned result_size;
-	/* The number of dwords for end_query. */
-	unsigned num_cs_dw_end;
-	/* Linked list of queries */
-	struct list_head list;
 	/* For transform feedback: which stream the query is for */
 	unsigned stream;
 
 	/* Workaround via compute shader */
-	struct r600_resource *workaround_buf;
+	struct si_resource *workaround_buf;
 	unsigned workaround_offset;
 };
 
-bool si_query_hw_init(struct si_screen *sscreen,
-		      struct si_query_hw *query);
 void si_query_hw_destroy(struct si_screen *sscreen,
-			 struct si_query *rquery);
+			 struct si_query *squery);
 bool si_query_hw_begin(struct si_context *sctx,
-		       struct si_query *rquery);
+		       struct si_query *squery);
 bool si_query_hw_end(struct si_context *sctx,
-		     struct si_query *rquery);
+		     struct si_query *squery);
 bool si_query_hw_get_result(struct si_context *sctx,
-			    struct si_query *rquery,
+			    struct si_query *squery,
 			    bool wait,
 			    union pipe_query_result *result);
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query);
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query);
 
-/* Performance counters */
-enum {
-	/* This block is part of the shader engine */
-	SI_PC_BLOCK_SE = (1 << 0),
-
-	/* Expose per-instance groups instead of summing all instances (within
-	 * an SE). */
-	SI_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
-
-	/* Expose per-SE groups instead of summing instances across SEs. */
-	SI_PC_BLOCK_SE_GROUPS = (1 << 2),
-
-	/* Shader block */
-	SI_PC_BLOCK_SHADER = (1 << 3),
-
-	/* Non-shader block with perfcounters windowed by shaders. */
-	SI_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
-};
-
-/* Describes a hardware block with performance counters. Multiple instances of
- * each block, possibly per-SE, may exist on the chip. Depending on the block
- * and on the user's configuration, we either
- *  (a) expose every instance as a performance counter group,
- *  (b) expose a single performance counter group that reports the sum over all
- *      instances, or
- *  (c) expose one performance counter group per instance, but summed over all
- *      shader engines.
- */
-struct si_perfcounter_block {
-	const char *basename;
-	unsigned flags;
-	unsigned num_counters;
-	unsigned num_selectors;
-	unsigned num_instances;
-
-	unsigned num_groups;
-	char *group_names;
-	unsigned group_name_stride;
-
-	char *selector_names;
-	unsigned selector_name_stride;
-
-	void *data;
-};
 
+/* Performance counters */
 struct si_perfcounters {
 	unsigned num_groups;
 	unsigned num_blocks;
-	struct si_perfcounter_block *blocks;
+	struct si_pc_block *blocks;
 
 	unsigned num_stop_cs_dwords;
 	unsigned num_instance_cs_dwords;
 
-	unsigned num_shader_types;
-	const char * const *shader_type_suffixes;
-	const unsigned *shader_type_bits;
-
-	void (*emit_instance)(struct si_context *,
-			      int se, int instance);
-	void (*emit_shaders)(struct si_context *, unsigned shaders);
-	void (*emit_select)(struct si_context *,
-			    struct si_perfcounter_block *,
-			    unsigned count, unsigned *selectors);
-	void (*emit_start)(struct si_context *,
-			  struct r600_resource *buffer, uint64_t va);
-	void (*emit_stop)(struct si_context *,
-			  struct r600_resource *buffer, uint64_t va);
-	void (*emit_read)(struct si_context *,
-			  struct si_perfcounter_block *,
-			  unsigned count, unsigned *selectors,
-			  struct r600_resource *buffer, uint64_t va);
-
-	void (*cleanup)(struct si_screen *);
-
 	bool separate_se;
 	bool separate_instance;
 };
@@ -301,16 +249,6 @@
 				  unsigned index,
 				  struct pipe_driver_query_group_info *info);
 
-bool si_perfcounters_init(struct si_perfcounters *, unsigned num_blocks);
-void si_perfcounters_add_block(struct si_screen *,
-			       struct si_perfcounters *,
-			       const char *name, unsigned flags,
-			       unsigned counters, unsigned selectors,
-			       unsigned instances, void *data);
-void si_perfcounters_do_destroy(struct si_perfcounters *);
-void si_query_hw_reset_buffers(struct si_context *sctx,
-			       struct si_query_hw *query);
-
 struct si_qbo_state {
 	void *saved_compute;
 	struct pipe_constant_buffer saved_const0;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.c	2019-03-31 23:16:37.000000000 +0000
@@ -86,6 +86,8 @@
 					union si_shader_part_key *key);
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
+static void si_fix_resource_usage(struct si_screen *sscreen,
+				  struct si_shader *shader);
 
 /* Ideally pass the sample mask input to the PS epilog as v14, which
  * is its usual location, so that the shader doesn't have to add v_mov.
@@ -2318,18 +2320,9 @@
 	ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
 
 	LLVMValueRef desc0, desc1;
-	if (HAVE_32BIT_POINTERS) {
-		desc0 = ptr;
-		desc1 = LLVMConstInt(ctx->i32,
-				     S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
-	} else {
-		ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
-		desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
-		desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
-		/* Mask out all bits except BASE_ADDRESS_HI. */
-		desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
-				     LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
-	}
+	desc0 = ptr;
+	desc1 = LLVMConstInt(ctx->i32,
+			     S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
 
 	LLVMValueRef desc_elems[] = {
 		desc0,
@@ -3271,21 +3264,9 @@
 		    unsigned param, unsigned return_index)
 {
 	LLVMBuilderRef builder = ctx->ac.builder;
-	LLVMValueRef ptr, lo, hi;
-
-	if (HAVE_32BIT_POINTERS) {
-		ptr = LLVMGetParam(ctx->main_fn, param);
-		ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
-		return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
-	}
-
-	ptr = LLVMGetParam(ctx->main_fn, param);
-	ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
-	ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
-	lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
-	hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
-	ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
-	return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
+	LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, param);
+	ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
+	return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 }
 
 /* This only writes the tessellation factor levels. */
@@ -3386,8 +3367,7 @@
 	LLVMValueRef ret = ctx->return_value;
 
 	ret = si_insert_input_ptr(ctx, ret, 0, 0);
-	if (HAVE_32BIT_POINTERS)
-		ret = si_insert_input_ptr(ctx, ret, 1, 1);
+	ret = si_insert_input_ptr(ctx, ret, 1, 1);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
@@ -3402,11 +3382,6 @@
 	ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
 				  8 + SI_SGPR_VS_STATE_BITS);
 
-#if !HAVE_32BIT_POINTERS
-	ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
-				  8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
 				  8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
@@ -3430,8 +3405,7 @@
 	LLVMValueRef ret = ctx->return_value;
 
 	ret = si_insert_input_ptr(ctx, ret, 0, 0);
-	if (HAVE_32BIT_POINTERS)
-		ret = si_insert_input_ptr(ctx, ret, 1, 1);
+	ret = si_insert_input_ptr(ctx, ret, 1, 1);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
 	ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
@@ -3442,11 +3416,6 @@
 				  ctx->param_bindless_samplers_and_images,
 				  8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
-#if !HAVE_32BIT_POINTERS
-	ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
-				  8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
 	unsigned vgpr;
 	if (ctx->type == PIPE_SHADER_VERTEX)
 		vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
@@ -4592,6 +4561,30 @@
 	}
 }
 
+static void declare_vs_blit_inputs(struct si_shader_context *ctx,
+				   struct si_function_info *fninfo,
+				   unsigned vs_blit_property)
+{
+	ctx->param_vs_blit_inputs = fninfo->num_params;
+	add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
+	add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
+	add_arg(fninfo, ARG_SGPR, ctx->f32); /* depth */
+
+	if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* color0 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* color1 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* color2 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* color3 */
+	} else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
+		add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
+	}
+}
+
 static void declare_tes_input_vgprs(struct si_shader_context *ctx,
 				    struct si_function_info *fninfo)
 {
@@ -4636,24 +4629,7 @@
 		declare_global_desc_pointers(ctx, &fninfo);
 
 		if (vs_blit_property) {
-			ctx->param_vs_blit_inputs = fninfo.num_params;
-			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
-			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
-			add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */
-
-			if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */
-			} else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
-				add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
-			}
+			declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property);
 
 			/* VGPRs */
 			declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
@@ -4710,13 +4686,8 @@
 	case SI_SHADER_MERGED_VERTEX_TESSCTRL:
 		/* Merged stages have 8 system SGPRs at the beginning. */
 		/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
-		if (HAVE_32BIT_POINTERS) {
-			declare_per_stage_desc_pointers(ctx, &fninfo,
-							ctx->type == PIPE_SHADER_TESS_CTRL);
-		} else {
-			declare_const_and_shader_buffers(ctx, &fninfo,
-							 ctx->type == PIPE_SHADER_TESS_CTRL);
-		}
+		declare_per_stage_desc_pointers(ctx, &fninfo,
+						ctx->type == PIPE_SHADER_TESS_CTRL);
 		ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@@ -4729,15 +4700,9 @@
 						ctx->type == PIPE_SHADER_VERTEX);
 		declare_vs_specific_input_sgprs(ctx, &fninfo);
 
-		if (!HAVE_32BIT_POINTERS) {
-			declare_samplers_and_images(ctx, &fninfo,
-						    ctx->type == PIPE_SHADER_TESS_CTRL);
-		}
 		ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
-		if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
-			add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 		ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
 			ac_array_in_const32_addr_space(ctx->v4i32));
 
@@ -4771,13 +4736,8 @@
 	case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
 		/* Merged stages have 8 system SGPRs at the beginning. */
 		/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
-		if (HAVE_32BIT_POINTERS) {
-			declare_per_stage_desc_pointers(ctx, &fninfo,
-							ctx->type == PIPE_SHADER_GEOMETRY);
-		} else {
-			declare_const_and_shader_buffers(ctx, &fninfo,
-							 ctx->type == PIPE_SHADER_GEOMETRY);
-		}
+		declare_per_stage_desc_pointers(ctx, &fninfo,
+						ctx->type == PIPE_SHADER_GEOMETRY);
 		ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@@ -4796,14 +4756,8 @@
 			ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 			ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 			/* Declare as many input SGPRs as the VS has. */
-			if (!HAVE_32BIT_POINTERS)
-				add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
 		}
 
-		if (!HAVE_32BIT_POINTERS) {
-			declare_samplers_and_images(ctx, &fninfo,
-						    ctx->type == PIPE_SHADER_GEOMETRY);
-		}
 		if (ctx->type == PIPE_SHADER_VERTEX) {
 			ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
 				ac_array_in_const32_addr_space(ctx->v4i32));
@@ -5288,7 +5242,7 @@
 	       !mainb->rodata_size);
 	assert(!epilog || !epilog->rodata_size);
 
-	r600_resource_reference(&shader->bo, NULL);
+	si_resource_reference(&shader->bo, NULL);
 	shader->bo = si_aligned_buffer_create(&sscreen->b,
 					      sscreen->cpdma_prefetch_writes_memory ?
 						0 : SI_RESOURCE_FLAG_READ_ONLY,
@@ -5301,7 +5255,8 @@
 	/* Upload. */
 	ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
 					PIPE_TRANSFER_READ_WRITE |
-					PIPE_TRANSFER_UNSYNCHRONIZED);
+					PIPE_TRANSFER_UNSYNCHRONIZED |
+					RADEON_TRANSFER_TEMPORARY);
 
 	/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
 	 * endian-independent. */
@@ -5836,6 +5791,8 @@
 	if (r != 0) {
 		FREE(shader);
 		shader = NULL;
+	} else {
+		si_fix_resource_usage(sscreen, shader);
 	}
 	return shader;
 }
@@ -7164,20 +7121,9 @@
 	LLVMValueRef ptr[2], list;
 	bool merged_shader = is_merged_shader(ctx);
 
-	if (HAVE_32BIT_POINTERS) {
-		ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
-		list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
-					 ac_array_in_const32_addr_space(ctx->v4i32), "");
-		return list;
-	}
-
-	/* Get the pointer to rw buffers. */
 	ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
-	ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1);
-	list = ac_build_gather_values(&ctx->ac, ptr, 2);
-	list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
-	list = LLVMBuildIntToPtr(ctx->ac.builder, list,
-				 ac_array_in_const_addr_space(ctx->v4i32), "");
+	list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
+				 ac_array_in_const32_addr_space(ctx->v4i32), "");
 	return list;
 }
 
@@ -7405,8 +7351,6 @@
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
-		if (!HAVE_32BIT_POINTERS)
-			add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
 		ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		add_arg(&fninfo, ARG_SGPR, ctx->i32);
 		ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@@ -8174,9 +8118,9 @@
 void si_shader_destroy(struct si_shader *shader)
 {
 	if (shader->scratch_bo)
-		r600_resource_reference(&shader->scratch_bo, NULL);
+		si_resource_reference(&shader->scratch_bo, NULL);
 
-	r600_resource_reference(&shader->bo, NULL);
+	si_resource_reference(&shader->bo, NULL);
 
 	if (!shader->is_binary_shared)
 		ac_shader_binary_clean(&shader->binary);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.h	2019-03-31 23:16:37.000000000 +0000
@@ -158,21 +158,9 @@
 /* SGPR user data indices */
 enum {
 	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
-#if !HAVE_32BIT_POINTERS
-	SI_SGPR_RW_BUFFERS_HI,
-#endif
 	SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-	SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
-#endif
 	SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
-#if !HAVE_32BIT_POINTERS
-	SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
-#endif
 	SI_SGPR_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-	SI_SGPR_SAMPLERS_AND_IMAGES_HI,
-#endif
 	SI_NUM_RESOURCE_SGPRS,
 
 	/* API VS, TES without GS, GS copy shader */
@@ -200,35 +188,20 @@
 	GFX6_TCS_NUM_USER_SGPR,
 
 	/* GFX9: Merged shaders. */
-#if HAVE_32BIT_POINTERS
 	/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
 	/* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
 	GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
-#else
-	/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
-	GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
-	GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
-	GFX9_MERGED_NUM_USER_SGPR,
-#endif
 
 	/* GFX9: Merged LS-HS (VS-TCS) only. */
 	GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
 	GFX9_SGPR_TCS_OUT_OFFSETS,
 	GFX9_SGPR_TCS_OUT_LAYOUT,
-#if !HAVE_32BIT_POINTERS
-	GFX9_SGPR_align_for_vb_pointer,
-#endif
 	GFX9_TCS_NUM_USER_SGPR,
 
 	/* GS limits */
 	GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
-#if HAVE_32BIT_POINTERS
 	GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
 	GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
-#else
-	GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
-	GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
-#endif
 	SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
 
 	/* PS only */
@@ -627,8 +600,8 @@
 	struct si_shader_part		*epilog;
 
 	struct si_pm4_state		*pm4;
-	struct r600_resource		*bo;
-	struct r600_resource		*scratch_bo;
+	struct si_resource		*bo;
+	struct si_resource		*scratch_bo;
 	struct si_shader_key		key;
 	struct util_queue_fence		ready;
 	bool				compilation_failed;
@@ -735,7 +708,6 @@
 void si_nir_scan_shader(const struct nir_shader *nir,
 			struct tgsi_shader_info *info);
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
-			   const struct tgsi_shader_info *info,
 			   struct tgsi_tessctrl_info *out);
 void si_lower_nir(struct si_shader_selector *sel);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c	2019-03-31 23:16:37.000000000 +0000
@@ -439,3 +439,80 @@
 
 	return sctx->b.create_compute_state(&sctx->b, &state);
 }
+
+/* Create a compute shader implementing copy_image.
+ * Luckily, this works with all texture targets except 1D_ARRAY.
+ */
+void *si_create_copy_image_compute_shader(struct pipe_context *ctx)
+{
+	static const char text[] =
+		"COMP\n"
+		"PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
+		"PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
+		"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+		"DCL SV[0], THREAD_ID\n"
+		"DCL SV[1], BLOCK_ID\n"
+		"DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
+		"DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
+		"DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
+		"DCL TEMP[0..4], LOCAL\n"
+		"IMM[0] UINT32 {8, 1, 0, 0}\n"
+		"MOV TEMP[0].xyz, CONST[0][0].xyzw\n"
+		"UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n"
+		"UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"
+		"LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
+		"MOV TEMP[4].xyz, CONST[0][1].xyzw\n"
+		"UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[4].xyzx\n"
+		"STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
+		"END\n";
+
+	struct tgsi_token tokens[1024];
+	struct pipe_compute_state state = {0};
+
+	if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
+		assert(false);
+		return NULL;
+	}
+
+	state.ir_type = PIPE_SHADER_IR_TGSI;
+	state.prog = tokens;
+
+	return ctx->create_compute_state(ctx, &state);
+}
+
+void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx)
+{
+	static const char text[] =
+		"COMP\n"
+		"PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"
+		"PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
+		"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+		"DCL SV[0], THREAD_ID\n"
+		"DCL SV[1], BLOCK_ID\n"
+		"DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
+		"DCL IMAGE[1], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
+		"DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
+		"DCL TEMP[0..4], LOCAL\n"
+		"IMM[0] UINT32 {64, 1, 0, 0}\n"
+		"MOV TEMP[0].xy, CONST[0][0].xzzw\n"
+		"UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n"
+		"UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n"
+		"LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
+		"MOV TEMP[4].xy, CONST[0][1].xzzw\n"
+		"UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[4].xyzx\n"
+		"STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
+		"END\n";
+
+	struct tgsi_token tokens[1024];
+	struct pipe_compute_state state = {0};
+
+	if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
+		assert(false);
+		return NULL;
+	}
+
+	state.ir_type = PIPE_SHADER_IR_TGSI;
+	state.prog = tokens;
+
+	return ctx->create_compute_state(ctx, &state);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_nir.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -143,16 +143,39 @@
 		case nir_intrinsic_load_tess_level_outer:
 			info->reads_tess_factors = true;
 			break;
-		case nir_intrinsic_image_deref_load:
+		case nir_intrinsic_image_deref_load: {
+			nir_variable *var = intrinsic_get_var(intr);
+			if (var->data.bindless) {
+				info->uses_bindless_images = true;
+
+				if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF)
+					info->uses_bindless_buffer_load = true;
+				else
+					info->uses_bindless_image_load = true;
+			}
+			break;
+		}
 		case nir_intrinsic_image_deref_size:
 		case nir_intrinsic_image_deref_samples: {
 			nir_variable *var = intrinsic_get_var(intr);
 			if (var->data.bindless)
 				info->uses_bindless_images = true;
+			break;
+		}
+		case nir_intrinsic_image_deref_store: {
+			const nir_deref_instr *image_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
+			nir_variable *var = intrinsic_get_var(intr);
+			if (var->data.bindless) {
+				info->uses_bindless_images = true;
 
+				if (glsl_get_sampler_dim(image_deref->type) == GLSL_SAMPLER_DIM_BUF)
+					info->uses_bindless_buffer_store = true;
+				else
+					info->uses_bindless_image_store = true;
+			}
+			info->writes_memory = true;
 			break;
 		}
-		case nir_intrinsic_image_deref_store:
 		case nir_intrinsic_image_deref_atomic_add:
 		case nir_intrinsic_image_deref_atomic_min:
 		case nir_intrinsic_image_deref_atomic_max:
@@ -162,10 +185,16 @@
 		case nir_intrinsic_image_deref_atomic_exchange:
 		case nir_intrinsic_image_deref_atomic_comp_swap: {
 			nir_variable *var = intrinsic_get_var(intr);
-			if (var->data.bindless)
+			if (var->data.bindless) {
 				info->uses_bindless_images = true;
 
-			/* fall-through */
+				if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF)
+					info->uses_bindless_buffer_atomic = true;
+				else
+					info->uses_bindless_image_atomic = true;
+			}
+			info->writes_memory = true;
+			break;
 		}
 		case nir_intrinsic_store_ssbo:
 		case nir_intrinsic_ssbo_atomic_add:
@@ -250,7 +279,6 @@
 }
 
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
-			   const struct tgsi_shader_info *info,
 			   struct tgsi_tessctrl_info *out)
 {
 	memset(out, 0, sizeof(*out));
@@ -258,14 +286,8 @@
 	if (nir->info.stage != MESA_SHADER_TESS_CTRL)
 		return;
 
-	/* Initial value = true. Here the pass will accumulate results from
-	 * multiple segments surrounded by barriers. If tess factors aren't
-	 * written at all, it's a shader bug and we don't care if this will be
-	 * true.
-	 */
-	out->tessfactors_are_def_in_all_invocs = true;
-
-	/* TODO: Implement scanning of tess factors, see tgsi backend. */
+	out->tessfactors_are_def_in_all_invocs =
+		ac_are_tessfactors_def_in_all_invocs(nir);
 }
 
 void si_nir_scan_shader(const struct nir_shader *nir,
@@ -340,7 +362,7 @@
 		}
 	}
 
-	if (nir->info.stage == MESA_SHADER_COMPUTE) {
+	if (gl_shader_stage_is_compute(nir->info.stage)) {
 		info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0];
 		info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1];
 		info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2];
@@ -654,7 +676,8 @@
 		 * so we don't need to worry about the ordering.
 		 */
 		if (variable->interface_type != NULL) {
-			if (variable->data.mode == nir_var_uniform) {
+			if (variable->data.mode == nir_var_uniform ||
+			    variable->data.mode == nir_var_mem_ubo) {
 
 				unsigned block_count;
 				if (base_type != GLSL_TYPE_INTERFACE) {
@@ -678,7 +701,7 @@
 				_mesa_set_add(ubo_set, variable->interface_type);
 			}
 
-			if (variable->data.mode == nir_var_shader_storage) {
+			if (variable->data.mode == nir_var_mem_ssbo) {
 				/* TODO: make this more accurate */
 				info->shader_buffers_declared =
 					u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
@@ -795,8 +818,6 @@
 
 	ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class);
 
-	NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar);
-
 	bool progress;
 	do {
 		progress = false;
@@ -813,7 +834,7 @@
 		NIR_PASS(progress, sel->nir, nir_opt_if);
 		NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
 		NIR_PASS(progress, sel->nir, nir_opt_cse);
-		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8);
+		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);
 
 		/* Needed for algebraic lowering */
 		NIR_PASS(progress, sel->nir, nir_opt_algebraic);
@@ -825,6 +846,8 @@
 			NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
 		}
 	} while (progress);
+
+	NIR_PASS_V(sel->nir, nir_lower_bool_to_int32);
 }
 
 static void declare_nir_input_vs(struct si_shader_context *ctx,
@@ -916,6 +939,12 @@
 
 		/* dynamic_index is the bindless handle */
 		if (image) {
+			/* For simplicity, bindless image descriptors use fixed
+			 * 16-dword slots for now.
+			 */
+			dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+					     LLVMConstInt(ctx->i32, 2, 0), "");
+
 			return si_load_image_desc(ctx, list, dynamic_index, desc_type,
 						  dcc_off, true);
 		}
@@ -1028,7 +1057,7 @@
 	ctx->num_images = util_last_bit(info->images_declared);
 
 	if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
-		assert(nir->info.stage == MESA_SHADER_COMPUTE);
+		assert(gl_shader_stage_is_compute(nir->info.stage));
 		si_declare_compute_memory(ctx);
 	}
 	ac_nir_translate(&ctx->ac, &ctx->abi, nir);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c	2019-03-31 23:16:37.000000000 +0000
@@ -496,36 +496,23 @@
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 
-	if (HAVE_LLVM < 0x0700) {
-		LLVMValueRef bfe_sm5 =
-			ac_build_bfe(&ctx->ac, emit_data->args[0],
-				     emit_data->args[1], emit_data->args[2],
-				     emit_data->info->opcode == TGSI_OPCODE_IBFE);
+	/* FIXME: LLVM 7 returns incorrect result when count is 0.
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+	 */
+	LLVMValueRef zero = ctx->i32_0;
+	LLVMValueRef bfe_sm5 =
+		ac_build_bfe(&ctx->ac, emit_data->args[0],
+			     emit_data->args[1], emit_data->args[2],
+			     emit_data->info->opcode == TGSI_OPCODE_IBFE);
 
-		/* Correct for GLSL semantics. */
-		LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-						  LLVMConstInt(ctx->i32, 32, 0), "");
-		emit_data->output[emit_data->chan] =
-			LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
-	} else {
-		/* FIXME: LLVM 7 returns incorrect result when count is 0.
-		 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
-		 */
-		LLVMValueRef zero = ctx->i32_0;
-		LLVMValueRef bfe_sm5 =
-			ac_build_bfe(&ctx->ac, emit_data->args[0],
-				     emit_data->args[1], emit_data->args[2],
-				     emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
-		/* Correct for GLSL semantics. */
-		LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-						  LLVMConstInt(ctx->i32, 32, 0), "");
-		LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
-						   zero, "");
-		bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
-		emit_data->output[emit_data->chan] =
-			LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
-	}
+	/* Correct for GLSL semantics. */
+	LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
+					  LLVMConstInt(ctx->i32, 32, 0), "");
+	LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
+					   zero, "");
+	bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
 }
 
 /* this is ffs in C */
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c	2019-03-31 23:16:37.000000000 +0000
@@ -396,20 +396,53 @@
  *	For LOAD, set this to (store | atomic) slot usage in the shader.
  *	For STORE, set this to (load | atomic) slot usage in the shader.
  * \param images_reverse_access_mask  Same as above, but for images.
+ * \param bindless_buffer_reverse_access_mask  Same as above, but for bindless image buffers.
+ * \param bindless_image_reverse_access_mask   Same as above, but for bindless images.
  */
 static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
 				  const struct tgsi_shader_info *info,
 				  unsigned shader_buffers_reverse_access_mask,
-				  unsigned images_reverse_access_mask)
+				  unsigned images_reverse_access_mask,
+				  bool bindless_buffer_reverse_access_mask,
+				  bool bindless_image_reverse_access_mask)
 {
+	enum tgsi_file_type resource_file;
+	unsigned resource_index;
+	bool resource_indirect;
+
+	if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
+		resource_file = inst->Dst[0].Register.File;
+		resource_index = inst->Dst[0].Register.Index;
+		resource_indirect = inst->Dst[0].Register.Indirect;
+	} else {
+		resource_file = inst->Src[0].Register.File;
+		resource_index = inst->Src[0].Register.Index;
+		resource_indirect = inst->Src[0].Register.Indirect;
+	}
+
+	assert(resource_file == TGSI_FILE_BUFFER ||
+	       resource_file == TGSI_FILE_IMAGE ||
+	       /* bindless image */
+	       resource_file == TGSI_FILE_INPUT ||
+	       resource_file == TGSI_FILE_OUTPUT ||
+	       resource_file == TGSI_FILE_CONSTANT ||
+	       resource_file == TGSI_FILE_TEMPORARY ||
+	       resource_file == TGSI_FILE_IMMEDIATE);
+
+	assert(resource_file != TGSI_FILE_BUFFER ||
+	       inst->Memory.Texture == TGSI_TEXTURE_BUFFER);
+
+	bool bindless = resource_file != TGSI_FILE_BUFFER &&
+			resource_file != TGSI_FILE_IMAGE;
+
 	/* RESTRICT means NOALIAS.
 	 * If there are no writes, we can assume the accessed memory is read-only.
 	 * If there are no reads, we can assume the accessed memory is write-only.
 	 */
-	if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) {
+	if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) {
 		unsigned reverse_access_mask;
 
-		if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
+		if (resource_file == TGSI_FILE_BUFFER) {
 			reverse_access_mask = shader_buffers_reverse_access_mask;
 		} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 			reverse_access_mask = info->images_buffers &
@@ -419,12 +452,12 @@
 					      images_reverse_access_mask;
 		}
 
-		if (inst->Src[0].Register.Indirect) {
+		if (resource_indirect) {
 			if (!reverse_access_mask)
 				return true;
 		} else {
 			if (!(reverse_access_mask &
-			      (1u << inst->Src[0].Register.Index)))
+			      (1u << resource_index)))
 				return true;
 		}
 	}
@@ -437,15 +470,15 @@
 	 * Same for the case when there are no writes/reads for non-buffer
 	 * images.
 	 */
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
-	    (inst->Memory.Texture == TGSI_TEXTURE_BUFFER &&
-	     (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
-	      tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) {
+	if (resource_file == TGSI_FILE_BUFFER ||
+	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 		if (!shader_buffers_reverse_access_mask &&
-		    !(info->images_buffers & images_reverse_access_mask))
+		    !(info->images_buffers & images_reverse_access_mask) &&
+		    !bindless_buffer_reverse_access_mask)
 			return true;
 	} else {
-		if (!(~info->images_buffers & images_reverse_access_mask))
+		if (!(~info->images_buffers & images_reverse_access_mask) &&
+		    !bindless_image_reverse_access_mask)
 			return true;
 	}
 	return false;
@@ -474,8 +507,7 @@
 		bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
 		args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
 		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
-		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
+	} else {
 		unsigned target = inst->Memory.Texture;
 
 		image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &args.resource);
@@ -499,7 +531,11 @@
 						info->shader_buffers_store |
 						info->shader_buffers_atomic,
 						info->images_store |
-						info->images_atomic);
+						info->images_atomic,
+						info->uses_bindless_buffer_store |
+						info->uses_bindless_buffer_atomic,
+						info->uses_bindless_image_store |
+						info->uses_bindless_image_atomic);
 	args.cache_policy = get_cache_policy(ctx, inst, false, false, false);
 
 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
@@ -650,42 +686,42 @@
 	struct tgsi_full_src_register resource_reg =
 		tgsi_full_src_register_from_dst(&inst->Dst[0]);
 	unsigned target = inst->Memory.Texture;
+
+	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
+		store_emit_memory(ctx, emit_data);
+		return;
+	}
+
 	bool writeonly_memory = is_oneway_access_only(inst, info,
 						      info->shader_buffers_load |
 						      info->shader_buffers_atomic,
 						      info->images_load |
-						      info->images_atomic);
-	bool is_image = inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
-			tgsi_is_bindless_image_file(inst->Dst[0].Register.File);
-	LLVMValueRef chans[4], value;
+						      info->images_atomic,
+						      info->uses_bindless_buffer_load |
+						      info->uses_bindless_buffer_atomic,
+						      info->uses_bindless_image_load |
+						      info->uses_bindless_image_atomic);
+	LLVMValueRef chans[4];
 	LLVMValueRef vindex = ctx->i32_0;
 	LLVMValueRef voffset = ctx->i32_0;
 	struct ac_image_args args = {};
 
-	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
-		store_emit_memory(ctx, emit_data);
-		return;
-	}
-
 	for (unsigned chan = 0; chan < 4; ++chan)
 		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
 
-	value = ac_build_gather_values(&ctx->ac, chans, 4);
-
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
 		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0));
-	} else if (is_image) {
+	} else {
 		image_fetch_rsrc(bld_base, &resource_reg, true, target, &args.resource);
 		image_fetch_coords(bld_base, inst, 0, args.resource, args.coords);
 		vindex = args.coords[0]; /* for buffers only */
-	} else {
-		unreachable("unexpected register file");
 	}
 
 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
 		ac_build_waitcnt(&ctx->ac, VM_CNT);
 
+	bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
 	args.cache_policy = get_cache_policy(ctx, inst,
 					     false, /* atomic */
 					     is_image, /* may_store_unaligned */
@@ -693,27 +729,46 @@
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask,
-				  value, voffset, args.cache_policy, writeonly_memory);
+				  ac_build_gather_values(&ctx->ac, chans, 4),
+				  voffset, args.cache_policy, writeonly_memory);
 		return;
 	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
-		LLVMValueRef buf_args[] = {
-			value,
+		unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
+		num_channels = util_next_power_of_two(num_channels);
+
+		LLVMValueRef buf_args[6] = {
+			ac_build_gather_values(&ctx->ac, chans, 4),
 			args.resource,
 			vindex,
 			ctx->i32_0, /* voffset */
-			LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0),
-			LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0),
 		};
 
+		if (HAVE_LLVM >= 0x0800) {
+			buf_args[4] = ctx->i32_0; /* soffset */
+			buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 0);
+		} else {
+			buf_args[4] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0);
+			buf_args[5] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0);
+		}
+
+		const char *types[] = { "f32", "v2f32", "v4f32" };
+		char name[128];
+
+		snprintf(name, sizeof(name), "%s.%s",
+			 HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format" :
+					       "llvm.amdgcn.buffer.store.format",
+			 types[CLAMP(num_channels, 1, 3) - 1]);
+
 		emit_data->output[emit_data->chan] = ac_build_intrinsic(
-			&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
+			&ctx->ac,
+			name,
 			ctx->voidt, buf_args, 6,
 			ac_get_store_intr_attribs(writeonly_memory));
 	} else {
 		args.opcode = ac_image_store;
-		args.data[0] = value;
+		args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4);
 		args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 		args.attributes = ac_get_store_intr_attribs(writeonly_memory);
 		args.dmask = 0xf;
@@ -822,19 +877,45 @@
 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 		args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
 		voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0));
-	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
-		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
+	} else {
 		image_fetch_rsrc(bld_base, &inst->Src[0], true,
 				inst->Memory.Texture, &args.resource);
 		image_fetch_coords(bld_base, inst, 1, args.resource, args.coords);
 		vindex = args.coords[0]; /* for buffers only */
 	}
 
-	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+	if (HAVE_LLVM >= 0x0800 &&
+	    inst->Src[0].Register.File != TGSI_FILE_BUFFER &&
 	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 		LLVMValueRef buf_args[7];
 		unsigned num_args = 0;
 
+		buf_args[num_args++] = args.data[0];
+		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+			buf_args[num_args++] = args.data[1];
+
+		buf_args[num_args++] = args.resource;
+		buf_args[num_args++] = vindex;
+		buf_args[num_args++] = voffset;
+		buf_args[num_args++] = ctx->i32_0; /* soffset */
+		buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0);
+
+		char intrinsic_name[64];
+		snprintf(intrinsic_name, sizeof(intrinsic_name),
+			 "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name);
+		emit_data->output[emit_data->chan] =
+			ac_to_float(&ctx->ac,
+				    ac_build_intrinsic(&ctx->ac, intrinsic_name,
+						       ctx->i32, buf_args, num_args, 0));
+		return;
+	}
+
+	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
+	    (HAVE_LLVM < 0x0800 &&
+	     inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
+		LLVMValueRef buf_args[7];
+		unsigned num_args = 0;
+
 		buf_args[num_args++] = args.data[0];
 		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
 			buf_args[num_args++] = args.data[1];
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -121,7 +121,7 @@
 				S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
 				S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
 				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
-				S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->family == CHIP_RAVEN2));
+				S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->has_dcc_constant_encode));
 	}
 
 	/* RB+ register settings. */
@@ -2151,7 +2151,7 @@
 	unsigned retval = 0;
 
 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
-		PRINT_ERR("r600: unsupported texture type %d\n", target);
+		PRINT_ERR("radeonsi: unsupported texture type %d\n", target);
 		return false;
 	}
 
@@ -3570,7 +3570,7 @@
  * @param state 256-bit descriptor; only the high 128 bits are filled in
  */
 void
-si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
+si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state)
@@ -3613,14 +3613,11 @@
 	 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
 	 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
 	 */
-	if (screen->info.chip_class >= GFX9)
-		/* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
+	if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800)
+		/* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units
 		 * from STRIDE to bytes. This works around it by setting
 		 * NUM_RECORDS to at least the size of one element, so that
 		 * the first element is readable when IDXEN == 0.
-		 *
-		 * TODO: Fix this in LLVM, but do we need a new intrinsic where
-		 *       IDXEN is enforced?
 		 */
 		num_records = num_records ? MAX2(num_records, stride) : 0;
 	else if (screen->info.chip_class == VI)
@@ -4064,7 +4061,7 @@
 	/* Buffer resource. */
 	if (texture->target == PIPE_BUFFER) {
 		si_make_buffer_descriptor(sctx->screen,
-					  r600_resource(texture),
+					  si_resource(texture),
 					  state->format,
 					  state->u.buf.offset,
 					  state->u.buf.size,
@@ -4584,7 +4581,7 @@
 		unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);
 
 		v->instance_divisor_factor_buffer =
-			(struct r600_resource*)
+			(struct si_resource*)
 			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
 					   num_divisors * sizeof(divisor_factors[0]));
 		if (!v->instance_divisor_factor_buffer) {
@@ -4633,7 +4630,7 @@
 
 	if (sctx->vertex_elements == state)
 		sctx->vertex_elements = NULL;
-	r600_resource_reference(&v->instance_divisor_factor_buffer, NULL);
+	si_resource_reference(&v->instance_divisor_factor_buffer, NULL);
 	FREE(state);
 }
 
@@ -4658,7 +4655,7 @@
 			dsti->stride = src->stride;
 			si_context_add_resource_size(sctx, buf);
 			if (buf)
-				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
+				si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
 		}
 	} else {
 		for (i = 0; i < count; i++) {
@@ -4687,7 +4684,7 @@
 	cb.user_buffer = NULL;
 	cb.buffer_size = sizeof(array);
 
-	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
+	si_upload_const_buffer(sctx, (struct si_resource**)&cb.buffer,
 			       (void*)array, sizeof(array),
 			       &cb.buffer_offset);
 
@@ -4828,8 +4825,6 @@
 
 	sctx->b.set_active_query_state = si_set_active_query_state;
 
-	sctx->b.draw_vbo = si_draw_vbo;
-
 	si_init_config(sctx);
 }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_draw.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_draw.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_draw.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -221,7 +221,7 @@
 	assert(num_tcs_input_cp <= 32);
 	assert(num_tcs_output_cp <= 32);
 
-	uint64_t ring_va = r600_resource(sctx->tess_rings)->gpu_address;
+	uint64_t ring_va = si_resource(sctx->tess_rings)->gpu_address;
 	assert((ring_va & u_bit_consecutive(0, 19)) == 0);
 
 	tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) |
@@ -315,10 +315,12 @@
 	switch (info->mode) {
 	case PIPE_PRIM_PATCHES:
 		return info->count / info->vertices_per_patch;
+	case PIPE_PRIM_POLYGON:
+		return info->count >= 3;
 	case SI_PRIM_RECTANGLE_LIST:
 		return info->count / 3;
 	default:
-		return u_prims_for_vertices(info->mode, info->count);
+		return u_decomposed_prims_for_vertices(info->mode, info->count);
 	}
 }
 
@@ -458,7 +460,7 @@
 		S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9);
 }
 
-void si_init_ia_multi_vgt_param_table(struct si_context *sctx)
+static void si_init_ia_multi_vgt_param_table(struct si_context *sctx)
 {
 	for (int prim = 0; prim <= SI_PRIM_RECTANGLE_LIST; prim++)
 	for (int uses_instancing = 0; uses_instancing < 2; uses_instancing++)
@@ -621,7 +623,9 @@
 	/* Draw state. */
 	if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
 		if (sctx->chip_class >= GFX9)
-			radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+			radeon_set_uconfig_reg_idx(cs, sctx->screen,
+						   R_030960_IA_MULTI_VGT_PARAM, 4,
+						   ia_multi_vgt_param);
 		else if (sctx->chip_class >= CIK)
 			radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
 		else
@@ -631,7 +635,8 @@
 	}
 	if (prim != sctx->last_prim) {
 		if (sctx->chip_class >= CIK)
-			radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
+			radeon_set_uconfig_reg_idx(cs, sctx->screen,
+						   R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
 		else
 			radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
 
@@ -719,8 +724,9 @@
 			}
 
 			if (sctx->chip_class >= GFX9) {
-				radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE,
-							   2, index_type);
+				radeon_set_uconfig_reg_idx(cs, sctx->screen,
+							   R_03090C_VGT_INDEX_TYPE, 2,
+							   index_type);
 			} else {
 				radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
 				radeon_emit(cs, index_type);
@@ -731,10 +737,10 @@
 
 		index_max_size = (indexbuf->width0 - index_offset) /
 				  index_size;
-		index_va = r600_resource(indexbuf)->gpu_address + index_offset;
+		index_va = si_resource(indexbuf)->gpu_address + index_offset;
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-				      r600_resource(indexbuf),
+				      si_resource(indexbuf),
 				      RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 	} else {
 		/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
@@ -745,7 +751,7 @@
 	}
 
 	if (indirect) {
-		uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address;
+		uint64_t indirect_va = si_resource(indirect->buffer)->gpu_address;
 
 		assert(indirect_va % 8 == 0);
 
@@ -757,7 +763,7 @@
 		radeon_emit(cs, indirect_va >> 32);
 
 		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
-				      r600_resource(indirect->buffer),
+				      si_resource(indirect->buffer),
 				      RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 		unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA
@@ -786,8 +792,8 @@
 			uint64_t count_va = 0;
 
 			if (indirect->indirect_draw_count) {
-				struct r600_resource *params_buf =
-					r600_resource(indirect->indirect_draw_count);
+				struct si_resource *params_buf =
+					si_resource(indirect->indirect_draw_count);
 
 				radeon_add_to_buffer_list(
 					sctx, sctx->gfx_cs, params_buf,
@@ -812,10 +818,15 @@
 			radeon_emit(cs, di_src_sel);
 		}
 	} else {
+		unsigned instance_count = info->instance_count;
 		int base_vertex;
 
-		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
-		radeon_emit(cs, info->instance_count);
+		if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
+		    sctx->last_instance_count != instance_count) {
+			radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+			radeon_emit(cs, instance_count);
+			sctx->last_instance_count = instance_count;
+		}
 
 		/* Base vertex and start instance. */
 		base_vertex = index_size ? info->index_bias : info->start;
@@ -1050,7 +1061,8 @@
 				  EOP_DATA_SEL_VALUE_32BIT,
 				  sctx->wait_mem_scratch, va,
 				  sctx->wait_mem_number, SI_NOT_QUERY);
-		si_cp_wait_mem(sctx, va, sctx->wait_mem_number, 0xffffffff, 0);
+		si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
+			       WAIT_REG_MEM_EQUAL);
 	}
 
 	/* Make sure ME is idle (it executes most packets) before continuing.
@@ -1251,7 +1263,7 @@
 	si_emit_draw_registers(sctx, info, num_patches);
 }
 
-void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
@@ -1406,11 +1418,11 @@
 			/* info->start will be added by the drawing code */
 			index_offset -= start_offset;
 		} else if (sctx->chip_class <= CIK &&
-			   r600_resource(indexbuf)->TC_L2_dirty) {
+			   si_resource(indexbuf)->TC_L2_dirty) {
 			/* VI reads index buffers through TC L2, so it doesn't
 			 * need this. */
 			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-			r600_resource(indexbuf)->TC_L2_dirty = false;
+			si_resource(indexbuf)->TC_L2_dirty = false;
 		}
 	}
 
@@ -1422,15 +1434,15 @@
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
 		if (sctx->chip_class <= VI) {
-			if (r600_resource(indirect->buffer)->TC_L2_dirty) {
+			if (si_resource(indirect->buffer)->TC_L2_dirty) {
 				sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-				r600_resource(indirect->buffer)->TC_L2_dirty = false;
+				si_resource(indirect->buffer)->TC_L2_dirty = false;
 			}
 
 			if (indirect->indirect_draw_count &&
-			    r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
+			    si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
 				sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-				r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
+				si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
 			}
 		}
 	}
@@ -1531,13 +1543,14 @@
 		pipe_resource_reference(&indexbuf, NULL);
 }
 
-void si_draw_rectangle(struct blitter_context *blitter,
-		       void *vertex_elements_cso,
-		       blitter_get_vs_func get_vs,
-		       int x1, int y1, int x2, int y2,
-		       float depth, unsigned num_instances,
-		       enum blitter_attrib_type type,
-		       const union blitter_attrib *attrib)
+static void
+si_draw_rectangle(struct blitter_context *blitter,
+		  void *vertex_elements_cso,
+		  blitter_get_vs_func get_vs,
+		  int x1, int y1, int x2, int y2,
+		  float depth, unsigned num_instances,
+		  enum blitter_attrib_type type,
+		  const union blitter_attrib *attrib)
 {
 	struct pipe_context *pipe = util_blitter_get_pipe(blitter);
 	struct si_context *sctx = (struct si_context*)pipe;
@@ -1579,19 +1592,23 @@
 void si_trace_emit(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
-	uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
 	uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, trace_id);
+	si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf,
+			 0, 4, V_370_MEM, V_370_ME, &trace_id);
+
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
 	if (sctx->log)
 		u_log_flush(sctx->log);
 }
+
+void si_init_draw_functions(struct si_context *sctx)
+{
+	sctx->b.draw_vbo = si_draw_vbo;
+
+	sctx->blitter->draw_rectangle = si_draw_rectangle;
+
+	si_init_ia_multi_vgt_param_table(sctx);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.h
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.h	2019-03-31 23:16:37.000000000 +0000
@@ -132,7 +132,7 @@
 
 struct si_vertex_elements
 {
-	struct r600_resource		*instance_divisor_factor_buffer;
+	struct si_resource		*instance_divisor_factor_buffer;
 	uint32_t			rsrc_word3[SI_MAX_ATTRIBS];
 	uint16_t			src_offset[SI_MAX_ATTRIBS];
 	uint8_t				fix_fetch[SI_MAX_ATTRIBS];
@@ -384,7 +384,7 @@
 	uint32_t *gpu_list;
 
 	/* The buffer where the descriptors have been uploaded. */
-	struct r600_resource *buffer;
+	struct si_resource *buffer;
 	uint64_t gpu_address;
 
 	/* The maximum number of descriptors. */
@@ -465,7 +465,7 @@
 void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
 void si_all_resident_buffers_begin_new_cs(struct si_context *sctx);
-void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
+void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
 void si_update_all_texture_descriptors(struct si_context *sctx);
 void si_shader_change_notify(struct si_context *sctx);
@@ -474,6 +474,8 @@
 void si_emit_compute_shader_pointers(struct si_context *sctx);
 void si_set_rw_buffer(struct si_context *sctx,
 		      uint slot, const struct pipe_constant_buffer *input);
+void si_set_rw_shader_buffer(struct si_context *sctx, uint slot,
+			     const struct pipe_shader_buffer *sbuffer);
 void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx,
 			       uint64_t new_active_mask);
 void si_set_active_descriptors_for_shader(struct si_context *sctx,
@@ -490,7 +492,7 @@
 void si_init_state_functions(struct si_context *sctx);
 void si_init_screen_state_functions(struct si_screen *sscreen);
 void
-si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
+si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf,
 			  enum pipe_format format,
 			  unsigned offset, unsigned size,
 			  uint32_t *state);
@@ -541,17 +543,9 @@
 			      uint64_t *samplers_and_images);
 
 /* si_state_draw.c */
-void si_init_ia_multi_vgt_param_table(struct si_context *sctx);
 void si_emit_cache_flush(struct si_context *sctx);
-void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
-void si_draw_rectangle(struct blitter_context *blitter,
-		       void *vertex_elements_cso,
-		       blitter_get_vs_func get_vs,
-		       int x1, int y1, int x2, int y2,
-		       float depth, unsigned num_instances,
-		       enum blitter_attrib_type type,
-		       const union blitter_attrib *attrib);
 void si_trace_emit(struct si_context *sctx);
+void si_init_draw_functions(struct si_context *sctx);
 
 /* si_state_msaa.c */
 void si_init_msaa_functions(struct si_context *sctx);
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_shaders.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_shaders.c	2019-03-31 23:16:37.000000000 +0000
@@ -337,10 +337,10 @@
 /* SHADER STATES */
 
 static void si_set_tesseval_regs(struct si_screen *sscreen,
-				 struct si_shader_selector *tes,
+				 const struct si_shader_selector *tes,
 				 struct si_pm4_state *pm4)
 {
-	struct tgsi_shader_info *info = &tes->info;
+	const struct tgsi_shader_info *info = &tes->info;
 	unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
 	unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
 	bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
@@ -464,12 +464,7 @@
 static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
 {
 	/* Add the pointer to VBO descriptors. */
-	if (HAVE_32BIT_POINTERS) {
-		return num_always_on_user_sgprs + 1;
-	} else {
-		assert(num_always_on_user_sgprs % 2 == 0);
-		return num_always_on_user_sgprs + 2;
-	}
+	return num_always_on_user_sgprs + 1;
 }
 
 static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
@@ -2243,7 +2238,7 @@
 		sel->nir = state->ir.nir;
 
 		si_nir_scan_shader(sel->nir, &sel->info);
-		si_nir_scan_tess_ctrl(sel->nir, &sel->info, &sel->tcs_info);
+		si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
 
 		si_lower_nir(sel);
 	}
@@ -3094,7 +3089,7 @@
 	/* Update the shader state to use the new shader bo. */
 	si_shader_init_pm4_state(sctx->screen, shader);
 
-	r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+	si_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
 
 	si_shader_unlock(shader);
 	return 1;
@@ -3204,7 +3199,7 @@
 	if (scratch_needed_size > 0) {
 		if (scratch_needed_size > current_scratch_buffer_size) {
 			/* Create a bigger scratch buffer */
-			r600_resource_reference(&sctx->scratch_buffer, NULL);
+			si_resource_reference(&sctx->scratch_buffer, NULL);
 
 			sctx->scratch_buffer =
 				si_aligned_buffer_create(&sctx->screen->b,
@@ -3254,10 +3249,10 @@
 
 	si_init_config_add_vgt_flush(sctx);
 
-	si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_rings),
+	si_pm4_add_bo(sctx->init_config, si_resource(sctx->tess_rings),
 		      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS);
 
-	uint64_t factor_va = r600_resource(sctx->tess_rings)->gpu_address +
+	uint64_t factor_va = si_resource(sctx->tess_rings)->gpu_address +
 			     sctx->screen->tess_offchip_ring_size;
 
 	/* Append these registers to the init config state. */
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_streamout.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_streamout.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,7 +43,7 @@
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_streamout_target *t;
-	struct r600_resource *rbuffer = r600_resource(buffer);
+	struct si_resource *buf = si_resource(buffer);
 
 	t = CALLOC_STRUCT(si_streamout_target);
 	if (!t) {
@@ -64,7 +64,7 @@
 	t->b.buffer_offset = buffer_offset;
 	t->b.buffer_size = buffer_size;
 
-	util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+	util_range_add(&buf->valid_buffer_range, buffer_offset,
 		       buffer_offset + buffer_size);
 	return &t->b;
 }
@@ -74,7 +74,7 @@
 {
 	struct si_streamout_target *t = (struct si_streamout_target*)target;
 	pipe_resource_reference(&t->b.buffer, NULL);
-	r600_resource_reference(&t->buf_filled_size, NULL);
+	si_resource_reference(&t->buf_filled_size, NULL);
 	FREE(t);
 }
 
@@ -93,10 +93,8 @@
 				     const unsigned *offsets)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_buffer_resources *buffers = &sctx->rw_buffers;
-	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
 	unsigned old_num_targets = sctx->streamout.num_targets;
-	unsigned i, bufidx;
+	unsigned i;
 
 	/* We are going to unbind the buffers. Mark which caches need to be flushed. */
 	if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
@@ -111,7 +109,7 @@
 		 */
 		for (i = 0; i < sctx->streamout.num_targets; i++)
 			if (sctx->streamout.targets[i])
-				r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+				si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
 
 		/* Invalidate the scalar cache in case a streamout buffer is
 		 * going to be used as a constant buffer.
@@ -175,57 +173,20 @@
 
 	/* Set the shader resources.*/
 	for (i = 0; i < num_targets; i++) {
-		bufidx = SI_VS_STREAMOUT_BUF0 + i;
-
 		if (targets[i]) {
-			struct pipe_resource *buffer = targets[i]->buffer;
-			uint64_t va = r600_resource(buffer)->gpu_address;
-
-			/* Set the descriptor.
-			 *
-			 * On VI, the format must be non-INVALID, otherwise
-			 * the buffer will be considered not bound and store
-			 * instructions will be no-ops.
-			 */
-			uint32_t *desc = descs->list + bufidx*4;
-			desc[0] = va;
-			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
-			desc[2] = 0xffffffff;
-			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
-			/* Set the resource. */
-			pipe_resource_reference(&buffers->buffers[bufidx],
-						buffer);
-			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-							    r600_resource(buffer),
-							    buffers->shader_usage,
-							    RADEON_PRIO_SHADER_RW_BUFFER,
-							    true);
-			r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
-
-			buffers->enabled_mask |= 1u << bufidx;
+			struct pipe_shader_buffer sbuf;
+			sbuf.buffer = targets[i]->buffer;
+			sbuf.buffer_offset = 0;
+			sbuf.buffer_size = targets[i]->buffer_offset +
+					   targets[i]->buffer_size;
+			si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf);
+			si_resource(targets[i]->buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
 		} else {
-			/* Clear the descriptor and unset the resource. */
-			memset(descs->list + bufidx*4, 0,
-			       sizeof(uint32_t) * 4);
-			pipe_resource_reference(&buffers->buffers[bufidx],
-						NULL);
-			buffers->enabled_mask &= ~(1u << bufidx);
+			si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
 		}
 	}
-	for (; i < old_num_targets; i++) {
-		bufidx = SI_VS_STREAMOUT_BUF0 + i;
-		/* Clear the descriptor and unset the resource. */
-		memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
-		pipe_resource_reference(&buffers->buffers[bufidx], NULL);
-		buffers->enabled_mask &= ~(1u << bufidx);
-	}
-
-	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+	for (; i < old_num_targets; i++)
+		si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
 }
 
 static void si_flush_vgt_streamout(struct si_context *sctx)
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_viewport.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_viewport.c	2019-03-31 23:16:37.000000000 +0000
@@ -185,6 +185,16 @@
 	const unsigned hw_screen_offset_alignment =
 		ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
 
+	/* Indexed by quantization modes */
+	static unsigned max_viewport_size[] = {65535, 16383, 4095};
+
+	/* Ensure that the whole viewport stays representable in
+	 * absolute coordinates.
+	 * See comment in si_set_viewport_states.
+	 */
+	assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
+	       vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
+
 	hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
 	hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
 
@@ -219,7 +229,6 @@
 	 *
 	 * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
 	 */
-	static unsigned max_viewport_size[] = {65535, 16383, 4095};
 	assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
 	max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
 	left   = (-max_range - vp.translate[0]) / vp.scale[0];
@@ -333,6 +342,8 @@
 		unsigned h = scissor->maxy - scissor->miny;
 		unsigned max_extent = MAX2(w, h);
 
+		int max_corner = MAX2(scissor->maxx, scissor->maxy);
+
 		unsigned center_x = (scissor->maxx + scissor->minx) / 2;
 		unsigned center_y = (scissor->maxy + scissor->miny) / 2;
 		unsigned max_center = MAX2(center_x, center_y);
@@ -358,7 +369,22 @@
 		if (ctx->family == CHIP_RAVEN)
 			max_extent = 16384; /* Use QUANT_MODE == 16_8. */
 
-		if (max_extent <= 1024) /* 4K scanline area for guardband */
+		/* Another constraint is that all coordinates in the viewport
+		 * are representable in fixed point with respect to the
+		 * surface origin.
+		 *
+		 * It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given
+		 * an offset that would make the upper corner of the viewport
+		 * greater than the maximum representable number post
+		 * quantization, ie 2^quant_bits.
+		 *
+		 * This does not matter for 14.10 and 16.8 formats since the
+		 * offset is already limited at 8k, but it means we can't use
+		 * 12.12 if we are drawing to some pixels outside the lower
+		 * 4k x 4k of the render target.
+		 */
+
+		if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */
 			scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
 		else if (max_extent <= 4096) /* 16K scanline area for guardband */
 			scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_test_dma_perf.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_test_dma_perf.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_test_dma_perf.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_test_dma_perf.c	2019-03-31 23:16:37.000000000 +0000
@@ -181,7 +181,8 @@
 							si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0,
 									      SI_COHERENCY_NONE, cache_policy);
 						} else {
-							si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value,
+							si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, size,
+									       clear_value, 0,
 									       SI_COHERENCY_NONE, cache_policy);
 						}
 					} else if (test_sdma) {
diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_texture.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_texture.c
--- mesa-18.3.3/src/gallium/drivers/radeonsi/si_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -303,7 +303,7 @@
 		flags |= RADEON_SURF_SHAREABLE;
 	if (is_imported)
 		flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
-	if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_TILING))
+	if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING))
 		flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
 	r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe,
@@ -431,7 +431,7 @@
 	tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
 
 	if (tex->cmask_buffer != &tex->buffer)
-	    r600_resource_reference(&tex->cmask_buffer, NULL);
+	    si_resource_reference(&tex->cmask_buffer, NULL);
 
 	tex->cmask_buffer = NULL;
 
@@ -482,7 +482,7 @@
  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
  *   compressed tiled
  *
- * \param sctx  the current context if you have one, or rscreen->aux_context
+ * \param sctx  the current context if you have one, or sscreen->aux_context
  *              if you don't.
  */
 bool si_texture_disable_dcc(struct si_context *sctx,
@@ -577,12 +577,12 @@
 	if (tex->cmask_buffer == &tex->buffer)
 		tex->cmask_buffer = NULL;
 	else
-		r600_resource_reference(&tex->cmask_buffer, NULL);
+		si_resource_reference(&tex->cmask_buffer, NULL);
 
 	if (new_tex->cmask_buffer == &new_tex->buffer)
 		tex->cmask_buffer = &tex->buffer;
 	else
-		r600_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
+		si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
 
 	tex->dcc_offset = new_tex->dcc_offset;
 	tex->cb_color_info = new_tex->cb_color_info;
@@ -606,9 +606,9 @@
 
 	tex->separate_dcc_dirty = new_tex->separate_dcc_dirty;
 	tex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
-	r600_resource_reference(&tex->dcc_separate_buffer,
+	si_resource_reference(&tex->dcc_separate_buffer,
 				new_tex->dcc_separate_buffer);
-	r600_resource_reference(&tex->last_dcc_separate_buffer,
+	si_resource_reference(&tex->last_dcc_separate_buffer,
 				new_tex->last_dcc_separate_buffer);
 
 	if (new_bind_flag == PIPE_BIND_LINEAR) {
@@ -726,7 +726,7 @@
 {
 	struct si_screen *sscreen = (struct si_screen*)screen;
 	struct si_context *sctx;
-	struct r600_resource *res = r600_resource(resource);
+	struct si_resource *res = si_resource(resource);
 	struct si_texture *tex = (struct si_texture*)resource;
 	struct radeon_bo_metadata metadata;
 	bool update_metadata = false;
@@ -865,16 +865,16 @@
 			       struct pipe_resource *ptex)
 {
 	struct si_texture *tex = (struct si_texture*)ptex;
-	struct r600_resource *resource = &tex->buffer;
+	struct si_resource *resource = &tex->buffer;
 
 	si_texture_reference(&tex->flushed_depth_texture, NULL);
 
 	if (tex->cmask_buffer != &tex->buffer) {
-	    r600_resource_reference(&tex->cmask_buffer, NULL);
+	    si_resource_reference(&tex->cmask_buffer, NULL);
 	}
 	pb_reference(&resource->buf, NULL);
-	r600_resource_reference(&tex->dcc_separate_buffer, NULL);
-	r600_resource_reference(&tex->last_dcc_separate_buffer, NULL);
+	si_resource_reference(&tex->dcc_separate_buffer, NULL);
+	si_resource_reference(&tex->last_dcc_separate_buffer, NULL);
 	FREE(tex);
 }
 
@@ -1117,7 +1117,7 @@
 			 struct radeon_surf *surface)
 {
 	struct si_texture *tex;
-	struct r600_resource *resource;
+	struct si_resource *resource;
 	struct si_screen *sscreen = (struct si_screen*)screen;
 
 	tex = CALLOC_STRUCT(si_texture);
@@ -1293,7 +1293,7 @@
 		 const struct pipe_resource *templ, bool tc_compatible_htile)
 {
 	const struct util_format_description *desc = util_format_description(templ->format);
-	bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_TILING;
+	bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING;
 	bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
 				!(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH);
 
@@ -1487,7 +1487,9 @@
 	      templ->depth0 != 1 || templ->last_level != 0)
 		return NULL;
 
-	buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, &stride, &offset);
+	buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
+					      sscreen->info.max_alignment,
+					      &stride, &offset);
 	if (!buf)
 		return NULL;
 
@@ -1635,7 +1637,7 @@
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_texture *tex = (struct si_texture*)texture;
 	struct si_transfer *trans;
-	struct r600_resource *buf;
+	struct si_resource *buf;
 	unsigned offset = 0;
 	char *map;
 	bool use_staging_texture = false;
@@ -1796,6 +1798,12 @@
 		buf = &tex->buffer;
 	}
 
+	/* Always unmap texture CPU mappings on 32-bit architectures, so that
+	 * we don't run out of the CPU address space.
+	 */
+	if (sizeof(void*) == 4)
+		usage |= RADEON_TRANSFER_TEMPORARY;
+
 	if (!(map = si_buffer_map_sync_with_rings(sctx, buf, usage)))
 		goto fail_trans;
 
@@ -1803,7 +1811,7 @@
 	return map + offset;
 
 fail_trans:
-	r600_resource_reference(&trans->staging, NULL);
+	si_resource_reference(&trans->staging, NULL);
 	pipe_resource_reference(&trans->b.b.resource, NULL);
 	FREE(trans);
 	return NULL;
@@ -1817,6 +1825,16 @@
 	struct pipe_resource *texture = transfer->resource;
 	struct si_texture *tex = (struct si_texture*)texture;
 
+	/* Always unmap texture CPU mappings on 32-bit architectures, so that
+	 * we don't run out of the CPU address space.
+	 */
+	if (sizeof(void*) == 4) {
+		struct si_resource *buf =
+			stransfer->staging ? stransfer->staging : &tex->buffer;
+
+		sctx->ws->buffer_unmap(buf->buf);
+	}
+
 	if ((transfer->usage & PIPE_TRANSFER_WRITE) && stransfer->staging) {
 		if (tex->is_depth && tex->buffer.b.b.nr_samples <= 1) {
 			ctx->resource_copy_region(ctx, texture, transfer->level,
@@ -1830,7 +1848,7 @@
 
 	if (stransfer->staging) {
 		sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size;
-		r600_resource_reference(&stransfer->staging, NULL);
+		si_resource_reference(&stransfer->staging, NULL);
 	}
 
 	/* Heuristic for {upload, draw, upload, draw, ..}:
@@ -2281,11 +2299,10 @@
 		union pipe_query_result result;
 
 		/* Read the results. */
-		ctx->get_query_result(ctx, sctx->dcc_stats[i].ps_stats[2],
+		struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2];
+		ctx->get_query_result(ctx, query,
 				      true, &result);
-		si_query_hw_reset_buffers(sctx,
-					  (struct si_query_hw*)
-					  sctx->dcc_stats[i].ps_stats[2]);
+		si_query_buffer_reset(sctx, &((struct si_query_hw*)query)->buffer);
 
 		/* Compute the approximate number of fullscreen draws. */
 		tex->ps_draw_ratio =
@@ -2338,6 +2355,7 @@
 		return NULL;
 
 	buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
+					      sscreen->info.max_alignment,
 					      &stride, &offset);
 	if (!buf) {
 		free(memobj);
diff -Nru mesa-18.3.3/src/gallium/drivers/softpipe/sp_screen.c mesa-19.0.1/src/gallium/drivers/softpipe/sp_screen.c
--- mesa-18.3.3/src/gallium/drivers/softpipe/sp_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/softpipe/sp_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -265,6 +265,8 @@
       return 1;
    case PIPE_CAP_CLEAR_TEXTURE:
       return 1;
+   case PIPE_CAP_MAX_VARYINGS:
+      return TGSI_EXEC_MAX_INPUT_ATTRIBS;
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
diff -Nru mesa-18.3.3/src/gallium/drivers/softpipe/sp_tile_cache.c mesa-19.0.1/src/gallium/drivers/softpipe/sp_tile_cache.c
--- mesa-18.3.3/src/gallium/drivers/softpipe/sp_tile_cache.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/softpipe/sp_tile_cache.c	2019-03-31 23:16:37.000000000 +0000
@@ -373,17 +373,18 @@
                if (util_format_is_pure_uint(tc->surface->format)) {
                   pipe_put_tile_ui_format(pt, tc->transfer_map[layer],
                                           x, y, TILE_SIZE, TILE_SIZE,
-                                          pt->resource->format,
+                                          tc->surface->format,
                                           (unsigned *) tc->tile->data.colorui128);
                } else if (util_format_is_pure_sint(tc->surface->format)) {
                   pipe_put_tile_i_format(pt, tc->transfer_map[layer],
                                          x, y, TILE_SIZE, TILE_SIZE,
-                                         pt->resource->format,
+                                         tc->surface->format,
                                          (int *) tc->tile->data.colori128);
                } else {
-                  pipe_put_tile_rgba(pt, tc->transfer_map[layer],
-                                     x, y, TILE_SIZE, TILE_SIZE,
-                                     (float *) tc->tile->data.color);
+                  pipe_put_tile_rgba_format(pt, tc->transfer_map[layer],
+                                            x, y, TILE_SIZE, TILE_SIZE,
+                                            tc->surface->format,
+                                            (float *) tc->tile->data.color);
                }
             }
             numCleared++;
diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_cmd.c mesa-19.0.1/src/gallium/drivers/svga/svga_cmd.c
--- mesa-18.3.3/src/gallium/drivers/svga/svga_cmd.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/svga/svga_cmd.c	2019-03-31 23:16:37.000000000 +0000
@@ -1693,7 +1693,7 @@
       return PIPE_ERROR_OUT_OF_MEMORY;
 
    swc->surface_relocation(swc, &cmd->sid, &cmd->mobid, surface,
-                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+                           SVGA_RELOC_READ);
 
    swc->commit(swc);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_format.c mesa-19.0.1/src/gallium/drivers/svga/svga_format.c
--- mesa-18.3.3/src/gallium/drivers/svga/svga_format.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/svga/svga_format.c	2019-03-31 23:16:37.000000000 +0000
@@ -370,6 +370,11 @@
    { PIPE_FORMAT_A1B5G5R5_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_X1B5G5R5_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
    { PIPE_FORMAT_A4B4G4R4_UNORM,        SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_R8_SRGB,               SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8L8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_G8R8_SINT,             SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_A8B8G8R8_SINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
+   { PIPE_FORMAT_X8B8G8R8_SINT,         SVGA3D_FORMAT_INVALID,      SVGA3D_FORMAT_INVALID,       SVGA3D_FORMAT_INVALID,       0 },
 };
 
 
diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_screen.c mesa-19.0.1/src/gallium/drivers/svga/svga_screen.c
--- mesa-18.3.3/src/gallium/drivers/svga/svga_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/svga/svga_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -350,6 +350,8 @@
 
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
       return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */
+   case PIPE_CAP_MAX_VARYINGS:
+      return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10;
 
    /* Unsupported features */
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -52,7 +52,7 @@
 
     void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
     {
-        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
+        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL),
                    "Internal memory should not be gfxptr_t.");
     }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h	2019-03-31 23:16:37.000000000 +0000
@@ -51,21 +51,21 @@
         virtual LoadInst* LOAD(Value*         Ptr,
                                const char*    Name,
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*         Ptr,
                                const Twine&   Name  = "",
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*         Ptr,
                                bool           isVolatile,
                                const Twine&   Name  = "",
                                Type*          Ty    = nullptr,
-                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual LoadInst* LOAD(Value*                                 BasePtr,
                                const std::initializer_list<uint32_t>& offset,
                                const llvm::Twine&                     Name  = "",
                                Type*                                  Ty    = nullptr,
-                               JIT_MEM_CLIENT                         usage = MEM_CLIENT_INTERNAL);
+                               JIT_MEM_CLIENT                         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 
         virtual CallInst* MASKED_LOAD(Value*         Ptr,
@@ -74,36 +74,36 @@
                                       Value*         PassThru = nullptr,
                                       const Twine&   Name     = "",
                                       Type*          Ty       = nullptr,
-                                      JIT_MEM_CLIENT usage    = MEM_CLIENT_INTERNAL);
+                                      JIT_MEM_CLIENT usage    = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         virtual Value* GATHERPS(Value*         src,
                                 Value*         pBase,
                                 Value*         indices,
                                 Value*         mask,
                                 uint8_t        scale = 1,
-                                JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                                JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
         virtual Value* GATHERDD(Value*         src,
                                 Value*         pBase,
                                 Value*         indices,
                                 Value*         mask,
                                 uint8_t        scale = 1,
-                                JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                                JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
         virtual void SCATTERPS(Value*         pDst,
                                Value*         vSrc,
                                Value*         vOffsets,
                                Value*         vMask,
-                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                               JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 
         Value* TranslateGfxAddressForRead(Value*         xpGfxAddress,
                                           Type*          PtrTy = nullptr,
                                           const Twine&   Name  = "",
-                                          JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                                          JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
         Value* TranslateGfxAddressForWrite(Value*         xpGfxAddress,
                                            Type*          PtrTy = nullptr,
                                            const Twine&   Name  = "",
-                                           JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                                           JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 
     protected:
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder.h
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder.h	2019-03-31 23:16:37.000000000 +0000
@@ -161,7 +161,6 @@
 #include "builder_math.h"
 #include "builder_mem.h"
 
-    protected:
         void SetPrivateContext(Value* pPrivateContext)
         {
             mpPrivateContext = pPrivateContext;
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -29,7 +29,6 @@
  ******************************************************************************/
 #include "jit_pch.hpp"
 #include "builder.h"
-#include "common/rdtsc_buckets.h"
 
 #include <cstdarg>
 
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h	2019-03-31 23:16:37.000000000 +0000
@@ -30,13 +30,13 @@
 #pragma once
 
 public:
-typedef enum _JIT_MEM_CLIENT
+enum class JIT_MEM_CLIENT
 {
     MEM_CLIENT_INTERNAL,
     GFX_MEM_CLIENT_FETCH,
     GFX_MEM_CLIENT_SAMPLER,
     GFX_MEM_CLIENT_SHADER,
-} JIT_MEM_CLIENT;
+};
 
 protected:
 virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
@@ -56,23 +56,23 @@
 Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
 
 virtual LoadInst*
-                  LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                  LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*         Ptr,
                        const Twine&   Name  = "",
                        Type*          Ty    = nullptr,
-                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst*
-                  LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                  LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*         Ptr,
                        bool           isVolatile,
                        const Twine&   Name  = "",
                        Type*          Ty    = nullptr,
-                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 virtual LoadInst* LOAD(Value*                                 BasePtr,
                        const std::initializer_list<uint32_t>& offset,
                        const llvm::Twine&                     Name  = "",
                        Type*                                  Ty    = nullptr,
-                       JIT_MEM_CLIENT                         usage = MEM_CLIENT_INTERNAL);
+                       JIT_MEM_CLIENT                         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual CallInst* MASKED_LOAD(Value*         Ptr,
                               unsigned       Align,
@@ -80,7 +80,7 @@
                               Value*         PassThru = nullptr,
                               const Twine&   Name     = "",
                               Type*          Ty       = nullptr,
-                              JIT_MEM_CLIENT usage    = MEM_CLIENT_INTERNAL)
+                              JIT_MEM_CLIENT usage    = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
 {
     return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
 }
@@ -101,14 +101,14 @@
              Value*           mask,
              Value*           vGatherComponents[],
              bool             bPackedOutput,
-             JIT_MEM_CLIENT   usage = MEM_CLIENT_INTERNAL);
+             JIT_MEM_CLIENT   usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual Value* GATHERPS(Value*         src,
                         Value*         pBase,
                         Value*         indices,
                         Value*         mask,
                         uint8_t        scale = 1,
-                        JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                        JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void GATHER4PS(const SWR_FORMAT_INFO& info,
                Value*                 pSrcBase,
@@ -116,14 +116,14 @@
                Value*                 mask,
                Value*                 vGatherComponents[],
                bool                   bPackedOutput,
-               JIT_MEM_CLIENT         usage = MEM_CLIENT_INTERNAL);
+               JIT_MEM_CLIENT         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 virtual Value* GATHERDD(Value*         src,
                         Value*         pBase,
                         Value*         indices,
                         Value*         mask,
                         uint8_t        scale = 1,
-                        JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                        JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void GATHER4DD(const SWR_FORMAT_INFO& info,
                Value*                 pSrcBase,
@@ -131,7 +131,7 @@
                Value*                 mask,
                Value*                 vGatherComponents[],
                bool                   bPackedOutput,
-               JIT_MEM_CLIENT         usage = MEM_CLIENT_INTERNAL);
+               JIT_MEM_CLIENT         usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
 
@@ -141,7 +141,7 @@
                        Value*         vSrc,
                        Value*         vOffsets,
                        Value*         vMask,
-                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+                       JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
 void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
                         Value*                 vGatherInput,
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp	2018-11-05 12:21:01.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -765,6 +765,119 @@
     Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
 
     //////////////////////////////////////////////////////////////////////////
+    /// @brief Float / Fixed-point conversions
+    //////////////////////////////////////////////////////////////////////////
+    Value* Builder::VCVT_F32_FIXED_SI(Value*             vFloat,
+                                      uint32_t           numIntBits,
+                                      uint32_t           numFracBits,
+                                      const llvm::Twine& name)
+    {
+        SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+        Value* fixed = nullptr;
+        {
+            // Do round to nearest int on fractional bits first
+            // Not entirely perfect for negative numbers, but close enough
+            vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+                            C(_MM_FROUND_TO_NEAREST_INT));
+            vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
+
+            // TODO: Handle INF, NAN, overflow / underflow, etc.
+
+            Value* vSgn      = FCMP_OLT(vFloat, VIMMED1(0.0f));
+            Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
+            Value* vFixed    = AND(vFloatInt, VIMMED1((1 << 23) - 1));
+            vFixed           = OR(vFixed, VIMMED1(1 << 23));
+            vFixed           = SELECT(vSgn, NEG(vFixed), vFixed);
+
+            Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
+            vExp        = SUB(vExp, VIMMED1(127));
+
+            Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
+
+            fixed = ASHR(vFixed, vExtraBits, name);
+        }
+
+        return fixed;
+    }
+
+    Value* Builder::VCVT_FIXED_SI_F32(Value*             vFixed,
+                                      uint32_t           numIntBits,
+                                      uint32_t           numFracBits,
+                                      const llvm::Twine& name)
+    {
+        SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+        uint32_t extraBits = 32 - numIntBits - numFracBits;
+        if (numIntBits && extraBits)
+        {
+            // Sign extend
+            Value* shftAmt = VIMMED1(extraBits);
+            vFixed         = ASHR(SHL(vFixed, shftAmt), shftAmt);
+        }
+
+        Value* fVal  = VIMMED1(0.0f);
+        Value* fFrac = VIMMED1(0.0f);
+        if (numIntBits)
+        {
+            fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
+        }
+
+        if (numFracBits)
+        {
+            fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
+            fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
+        }
+
+        return FADD(fVal, fFrac, name);
+    }
+
+    Value* Builder::VCVT_F32_FIXED_UI(Value*             vFloat,
+                                      uint32_t           numIntBits,
+                                      uint32_t           numFracBits,
+                                      const llvm::Twine& name)
+    {
+        SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+        Value* fixed = nullptr;
+        // KNOB_SIM_FAST_MATH?  Below works correctly from a precision
+        // standpoint...
+        {
+            fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
+                                    C(_MM_FROUND_TO_NEAREST_INT)),
+                             mSimdInt32Ty);
+        }
+        return fixed;
+    }
+
+    Value* Builder::VCVT_FIXED_UI_F32(Value*             vFixed,
+                                      uint32_t           numIntBits,
+                                      uint32_t           numFracBits,
+                                      const llvm::Twine& name)
+    {
+        SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
+        uint32_t extraBits = 32 - numIntBits - numFracBits;
+        if (numIntBits && extraBits)
+        {
+            // Sign extend
+            Value* shftAmt = VIMMED1(extraBits);
+            vFixed         = ASHR(SHL(vFixed, shftAmt), shftAmt);
+        }
+
+        Value* fVal  = VIMMED1(0.0f);
+        Value* fFrac = VIMMED1(0.0f);
+        if (numIntBits)
+        {
+            fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
+        }
+
+        if (numFracBits)
+        {
+            fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
+            fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
+        }
+
+        return FADD(fVal, fFrac, name);
+    }
+
+    //////////////////////////////////////////////////////////////////////////
     /// @brief C functions called by LLVM IR
     //////////////////////////////////////////////////////////////////////////
 
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h	2019-03-31 23:16:37.000000000 +0000
@@ -123,6 +123,28 @@
 Value* VMOVMSK(Value* mask);
 
 //////////////////////////////////////////////////////////////////////////
+/// @brief Float / Fixed-point conversions
+//////////////////////////////////////////////////////////////////////////
+// Signed
+Value* VCVT_F32_FIXED_SI(Value*             vFloat,
+                         uint32_t           numIntBits,
+                         uint32_t           numFracBits,
+                         const llvm::Twine& name = "");
+Value* VCVT_FIXED_SI_F32(Value*             vFixed,
+                         uint32_t           numIntBits,
+                         uint32_t           numFracBits,
+                         const llvm::Twine& name = "");
+// Unsigned
+Value* VCVT_F32_FIXED_UI(Value*             vFloat,
+                         uint32_t           numIntBits,
+                         uint32_t           numFracBits,
+                         const llvm::Twine& name = "");
+Value* VCVT_FIXED_UI_F32(Value*             vFixed,
+                         uint32_t           numIntBits,
+                         uint32_t           numFracBits,
+                         const llvm::Twine& name = "");
+
+//////////////////////////////////////////////////////////////////////////
 /// @brief functions that build IR to call x86 intrinsics directly, or
 /// emulate them with other instructions if not available on the host
 //////////////////////////////////////////////////////////////////////////
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp	2018-11-05 12:21:01.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -202,7 +202,7 @@
         break;
     case R32_UINT:
         (fetchState.bDisableIndexOOBCheck)
-            ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
+            ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
             : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
         break; // incoming type is already 32bit int
     default:
@@ -368,7 +368,7 @@
 // gather SIMD full pixels per lane then shift/mask to move each component to their
 // own vector
 void FetchJit::CreateGatherOddFormats(
-    SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
+    SWR_FORMAT format, Value* pMask, Value* xpBase, Value* pOffsets, Value* pResult[4])
 {
     const SWR_FORMAT_INFO& info = GetFormatInfo(format);
 
@@ -378,7 +378,7 @@
     Value* pGather;
     if (info.bpp == 32)
     {
-        pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask);
+        pGather = GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
     }
     else
     {
@@ -386,29 +386,40 @@
         Value* pMem = ALLOCA(mSimdInt32Ty);
         STORE(VIMMED1(0u), pMem);
 
-        pBase          = BITCAST(pBase, PointerType::get(mInt8Ty, 0));
-        Value* pDstMem = BITCAST(pMem, mInt32PtrTy);
+        Value* pDstMem = POINTER_CAST(pMem, mInt32PtrTy);
 
         for (uint32_t lane = 0; lane < mVWidth; ++lane)
         {
             // Get index
             Value* index = VEXTRACT(pOffsets, C(lane));
             Value* mask  = VEXTRACT(pMask, C(lane));
+
+            // use branch around load based on mask
+            // Needed to avoid page-faults on unmasked lanes
+            BasicBlock* pCurrentBB = IRB()->GetInsertBlock();
+            BasicBlock* pMaskedLoadBlock =
+                BasicBlock::Create(JM()->mContext, "MaskedLaneLoad", pCurrentBB->getParent());
+            BasicBlock* pEndLoadBB = BasicBlock::Create(JM()->mContext, "AfterMaskedLoad", pCurrentBB->getParent());
+
+            COND_BR(mask, pMaskedLoadBlock, pEndLoadBB);
+
+            JM()->mBuilder.SetInsertPoint(pMaskedLoadBlock);
+
             switch (info.bpp)
             {
             case 8:
             {
                 Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
-                Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt8Ty, 0));
-                STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst);
+                Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
+                STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
 
             case 16:
             {
                 Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
-                Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0));
-                STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst);
+                Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
+                STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
             break;
@@ -417,13 +428,13 @@
             {
                 // First 16-bits of data
                 Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
-                Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0));
-                STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst);
+                Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
+                STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
 
                 // Last 8-bits of data
                 pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
-                pSrc = BITCAST(GEP(pSrc, C(1)), PointerType::get(mInt8Ty, 0));
-                STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst);
+                xpSrc = ADD(xpSrc, C(2));
+                STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
                 break;
             }
 
@@ -431,6 +442,9 @@
                 SWR_INVALID("Shouldn't have BPP = %d now", info.bpp);
                 break;
             }
+
+            BR(pEndLoadBB);
+            JM()->mBuilder.SetInsertPoint(pEndLoadBB);
         }
 
         pGather = LOAD(pMem);
@@ -550,9 +564,6 @@
 
         Value* stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
 
-        // VGATHER* takes an *i8 src pointer
-        Value* pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
-
         Value* stride  = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
         Value* vStride = VBROADCAST(stride);
 
@@ -619,8 +630,9 @@
         // do 64bit address offset calculations.
 
         // calculate byte offset to the start of the VB
-        Value* baseOffset     = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
-        pStreamBase           = GEP(pStreamBase, baseOffset);
+        Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
+
+        // VGATHER* takes an *i8 src pointer so that's what stream is
         Value* pStreamBaseGFX = ADD(stream, baseOffset);
 
         // if we have a start offset, subtract from max vertex. Used for OOB check
@@ -698,7 +710,7 @@
         {
             Value* pResults[4];
             CreateGatherOddFormats(
-                (SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
+                (SWR_FORMAT)ied.Format, vGatherMask, pStreamBaseGFX, vOffsets, pResults);
             ConvertFormat((SWR_FORMAT)ied.Format, pResults);
 
             for (uint32_t c = 0; c < 4; c += 1)
@@ -733,7 +745,7 @@
                 // if we have at least one component out of x or y to fetch
                 if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
                 {
-                    vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
                     // e.g. result of first 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -744,9 +756,9 @@
                 if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
                 {
                     // offset base to the next components(zw) in the vertex to gather
-                    pStreamBase = GEP(pStreamBase, C((char)4));
+                    pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
 
-                    vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask);
                     // e.g. result of second 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -783,18 +795,18 @@
                         {
                             // Gather a SIMD of vertices
                             // APIs allow a 4GB range for offsets
-                            // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
-                            // But, we know that elements must be aligned for FETCH. :)
-                            // Right shift the offset by a bit and then scale by 2 to remove the
-                            // sign extension.
-                            Value* vShiftedOffsets = LSHR(vOffsets, 1);
+                            // However, GATHERPS uses signed 32-bit offsets, so +/- 2GB range :(
+                            // Add 2GB to the base pointer and 2GB to the offsets.  This makes
+                            // "negative" (large) offsets into positive offsets and small offsets
+                            // into negative offsets.
+                            Value* vNewOffsets = ADD(vOffsets, VIMMED1(0x80000000));
                             vVertexElements[currentVertexElement++] =
                                 GATHERPS(gatherSrc,
-                                         pStreamBaseGFX,
-                                         vShiftedOffsets,
+                                         ADD(pStreamBaseGFX, C((uintptr_t)0x80000000U)),
+                                         vNewOffsets,
                                          vGatherMask,
-                                         2,
-                                         GFX_MEM_CLIENT_FETCH);
+                                         1,
+                                         JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                         }
                         else
                         {
@@ -811,7 +823,6 @@
                     }
 
                     // offset base to the next component in the vertex to gather
-                    pStreamBase    = GEP(pStreamBase, C((char)4));
                     pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
                 }
             }
@@ -854,9 +865,9 @@
                                 mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
 
                             Value* pGatherLo =
-                                GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
+                                GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsLo, vMaskLo);
                             Value* pGatherHi =
-                                GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
+                                GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsHi, vMaskHi);
 
                             pGatherLo = VCVTPD2PS(pGatherLo);
                             pGatherHi = VCVTPD2PS(pGatherHi);
@@ -880,7 +891,7 @@
                     }
 
                     // offset base to the next component  in the vertex to gather
-                    pStreamBase = GEP(pStreamBase, C((char)8));
+                    pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)8));
                 }
             }
             break;
@@ -936,7 +947,8 @@
                 // if we have at least one component to fetch
                 if (compMask)
                 {
-                    Value* vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    Value* vGatherResult = GATHERDD(
+                        gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of an 8x32bit integer gather for 8bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
@@ -965,7 +977,7 @@
                 // if we have at least one component out of x or y to fetch
                 if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
                 {
-                    vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    vGatherResult[0] = GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of first 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -976,9 +988,9 @@
                 if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
                 {
                     // offset base to the next components(zw) in the vertex to gather
-                    pStreamBase = GEP(pStreamBase, C((char)4));
+                    pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
 
-                    vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    vGatherResult[1] = GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
                     // e.g. result of second 8x32bit integer gather for 16bit components
                     // 256i - 0    1    2    3    4    5    6    7
                     //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -1015,7 +1027,7 @@
                         if (compCtrl[i] == StoreSrc)
                         {
                             Value* pGather =
-                                GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                                GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 
                             if (conversionType == CONVERT_USCALED)
                             {
@@ -1053,7 +1065,7 @@
                     }
 
                     // offset base to the next component  in the vertex to gather
-                    pStreamBase = GEP(pStreamBase, C((char)4));
+                    pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
                 }
             }
             break;
@@ -1112,7 +1124,7 @@
 
             // if valid, load the index. if not, load 0 from the stack
             Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-            Value* index  = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH);
+            Value* index  = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 
             // zero extended index to 32 bits and insert into the correct simd lane
             index    = Z_EXT(index, mInt32Ty);
@@ -1187,7 +1199,7 @@
                        VIMMED1(0),
                        "vIndices",
                        PointerType::get(mSimdInt32Ty, 0),
-                       GFX_MEM_CLIENT_FETCH);
+                       JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
 }
 
 //////////////////////////////////////////////////////////////////////////
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -63,39 +63,29 @@
     mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
     mArch(arch)
 {
+    mpCurrentModule = nullptr;
+    mpExec = nullptr;
+
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
     InitializeNativeTargetDisassembler();
 
 
-    TargetOptions tOpts;
-    tOpts.AllowFPOpFusion = FPOpFusion::Fast;
-    tOpts.NoInfsFPMath    = false;
-    tOpts.NoNaNsFPMath    = false;
-    tOpts.UnsafeFPMath = false;
-
-    // tOpts.PrintMachineCode    = true;
-
-    std::unique_ptr<Module> newModule(new Module("", mContext));
-    mpCurrentModule = newModule.get();
-
-    StringRef hostCPUName;
-
     // force JIT to use the same CPU arch as the rest of swr
     if (mArch.AVX512F())
     {
 #if USE_SIMD16_SHADERS
         if (mArch.AVX512ER())
         {
-            hostCPUName = StringRef("knl");
+            mHostCpuName = StringRef("knl");
         }
         else
         {
-            hostCPUName = StringRef("skylake-avx512");
+            mHostCpuName = StringRef("skylake-avx512");
         }
         mUsingAVX512 = true;
 #else
-        hostCPUName = StringRef("core-avx2");
+        mHostCpuName = StringRef("core-avx2");
 #endif
         if (mVWidth == 0)
         {
@@ -104,7 +94,7 @@
     }
     else if (mArch.AVX2())
     {
-        hostCPUName = StringRef("core-avx2");
+        mHostCpuName = StringRef("core-avx2");
         if (mVWidth == 0)
         {
             mVWidth = 8;
@@ -114,11 +104,11 @@
     {
         if (mArch.F16C())
         {
-            hostCPUName = StringRef("core-avx-i");
+            mHostCpuName = StringRef("core-avx-i");
         }
         else
         {
-            hostCPUName = StringRef("corei7-avx");
+            mHostCpuName = StringRef("corei7-avx");
         }
         if (mVWidth == 0)
         {
@@ -131,31 +121,21 @@
     }
 
 
-    auto optLevel = CodeGenOpt::Aggressive;
+    mOptLevel = CodeGenOpt::Aggressive;
 
     if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None &&
         KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive)
     {
-        optLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL);
+        mOptLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL);
     }
 
-    mpCurrentModule->setTargetTriple(sys::getProcessTriple());
-    mpExec = EngineBuilder(std::move(newModule))
-                 .setTargetOptions(tOpts)
-                 .setOptLevel(optLevel)
-                 .setMCPU(hostCPUName)
-                 .create();
-
     if (KNOB_JIT_ENABLE_CACHE)
     {
-        mCache.Init(this, hostCPUName, optLevel);
-        mpExec->setObjectCache(&mCache);
+        mCache.Init(this, mHostCpuName, mOptLevel);
     }
 
-#if LLVM_USE_INTEL_JITEVENTS
-    JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
-    mpExec->RegisterJITEventListener(vTune);
-#endif
+    SetupNewModule();
+    mIsModuleFinalized = true;
 
     // fetch function signature
 #if USE_SIMD16_SHADERS
@@ -198,6 +178,35 @@
 #endif
 }
 
+void JitManager::CreateExecEngine(std::unique_ptr<Module> pModule)
+{
+    TargetOptions tOpts;
+    tOpts.AllowFPOpFusion = FPOpFusion::Fast;
+    tOpts.NoInfsFPMath    = false;
+    tOpts.NoNaNsFPMath    = false;
+    tOpts.UnsafeFPMath = false;
+
+    // tOpts.PrintMachineCode    = true;
+
+    mpExec = EngineBuilder(std::move(pModule))
+                 .setTargetOptions(tOpts)
+                 .setOptLevel(mOptLevel)
+                 .setMCPU(mHostCpuName)
+                 .create();
+
+    if (KNOB_JIT_ENABLE_CACHE)
+    {
+        mpExec->setObjectCache(&mCache);
+    }
+
+#if LLVM_USE_INTEL_JITEVENTS
+    JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
+    mpExec->RegisterJITEventListener(vTune);
+#endif
+
+    mvExecEngines.push_back(mpExec);
+}
+
 //////////////////////////////////////////////////////////////////////////
 /// @brief Create new LLVM module.
 void JitManager::SetupNewModule()
@@ -207,7 +216,7 @@
     std::unique_ptr<Module> newModule(new Module("", mContext));
     mpCurrentModule = newModule.get();
     mpCurrentModule->setTargetTriple(sys::getProcessTriple());
-    mpExec->addModule(std::move(newModule));
+    CreateExecEngine(std::move(newModule));
     mIsModuleFinalized = false;
 }
 
@@ -443,7 +452,7 @@
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Dump function to file.
-void JitManager::DumpToFile(Module* M, const char* fileName)
+void JitManager::DumpToFile(Module* M, const char* fileName, llvm::AssemblyAnnotationWriter* annotater)
 {
     if (KNOB_DUMP_SHADER_IR)
     {
@@ -458,7 +467,7 @@
         sprintf(fName, "%s.%s.ll", funcName, fileName);
 #endif
         raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
-        M->print(fd, nullptr);
+        M->print(fd, annotater);
         fd.flush();
     }
 }
@@ -573,7 +582,7 @@
     uint64_t GetObjectCRC() const { return m_objCRC; }
 
 private:
-    static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4;
+    static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543210ULL + 6;
     static const size_t   JC_STR_MAX_LEN  = 32;
     static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
                                             (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
@@ -625,6 +634,15 @@
     {
         mCacheDir = KNOB_JIT_CACHE_DIR;
     }
+
+    // Create cache dir at startup to allow jitter to write debug.ll files
+    // to that directory.
+    if (!llvm::sys::fs::exists(mCacheDir.str()) &&
+        llvm::sys::fs::create_directories(mCacheDir.str()))
+    {
+        SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str());
+    }
+
 }
 
 int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
@@ -632,6 +650,26 @@
     return ExecCmd(CmdLine, "", pStdOut, pStdErr);
 }
 
+/// Calculate actual directory where module will be cached.
+/// This is always a subdirectory of mCacheDir.  Full absolute
+/// path name will be stored in mCurrentModuleCacheDir
+void JitCache::CalcModuleCacheDir()
+{
+    mModuleCacheDir.clear();
+
+    llvm::SmallString<MAX_PATH> moduleDir = mCacheDir;
+
+    // Create 4 levels of directory hierarchy based on CRC, 256 entries each
+    uint8_t* pCRC = (uint8_t*)&mCurrentModuleCRC;
+    for (uint32_t i = 0; i < 4; ++i)
+    {
+        llvm::sys::path::append(moduleDir, std::to_string((int)pCRC[i]));
+    }
+
+    mModuleCacheDir = moduleDir;
+}
+
+
 /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
 void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
 {
@@ -641,16 +679,22 @@
         return;
     }
 
-    if (!llvm::sys::fs::exists(mCacheDir.str()) &&
-        llvm::sys::fs::create_directories(mCacheDir.str()))
+    if (!mModuleCacheDir.size())
     {
-        SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str());
+        SWR_INVALID("Unset module cache directory");
+        return;
+    }
+
+    if (!llvm::sys::fs::exists(mModuleCacheDir.str()) &&
+        llvm::sys::fs::create_directories(mModuleCacheDir.str()))
+    {
+        SWR_INVALID("Unable to create directory: %s", mModuleCacheDir.c_str());
         return;
     }
 
     JitCacheFileHeader header;
 
-    llvm::SmallString<MAX_PATH> filePath = mCacheDir;
+    llvm::SmallString<MAX_PATH> filePath = mModuleCacheDir;
     llvm::sys::path::append(filePath, moduleID);
 
     llvm::SmallString<MAX_PATH> objPath = filePath;
@@ -690,12 +734,14 @@
         return nullptr;
     }
 
-    if (!llvm::sys::fs::exists(mCacheDir))
+    CalcModuleCacheDir();
+
+    if (!llvm::sys::fs::exists(mModuleCacheDir))
     {
         return nullptr;
     }
 
-    llvm::SmallString<MAX_PATH> filePath = mCacheDir;
+    llvm::SmallString<MAX_PATH> filePath = mModuleCacheDir;
     llvm::sys::path::append(filePath, moduleID);
 
     llvm::SmallString<MAX_PATH> objFilePath = filePath;
@@ -758,3 +804,26 @@
 
     return pBuf;
 }
+
+void InterleaveAssemblyAnnotater::emitInstructionAnnot(const llvm::Instruction *pInst, llvm::formatted_raw_ostream &OS)
+{
+    auto dbgLoc = pInst->getDebugLoc();
+    if(dbgLoc)
+    {
+        unsigned int line = dbgLoc.getLine();
+        if(line != mCurrentLineNo)
+        {
+            if(line > 0 && line <= mAssembly.size())
+            {
+                // HACK: here we assume that OS is a formatted_raw_ostream(ods())
+                // and modify the color accordingly. We can't do the color
+                // modification on OS because formatted_raw_ostream strips
+                // the color information. The only way to fix this behavior
+                // is to patch LLVM.
+                OS << "\n; " << line << ": " << mAssembly[line-1] << "\n";
+            }
+            mCurrentLineNo = line;
+        }
+    }
+}
+
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
--- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,7 @@
 
 #include "jit_pch.hpp"
 #include "common/isa.hpp"
+#include <llvm/IR/AssemblyAnnotationWriter.h>
 
 
 //////////////////////////////////////////////////////////////////////////
@@ -112,9 +113,15 @@
 private:
     std::string                 mCpu;
     llvm::SmallString<MAX_PATH> mCacheDir;
+    llvm::SmallString<MAX_PATH> mModuleCacheDir;
     uint32_t                    mCurrentModuleCRC = 0;
     JitManager*                 mpJitMgr          = nullptr;
     llvm::CodeGenOpt::Level     mOptLevel         = llvm::CodeGenOpt::None;
+
+    /// Calculate actual directory where module will be cached.
+    /// This is always a subdirectory of mCacheDir.  Full absolute
+    /// path name will be stored in mCurrentModuleCacheDir
+    void CalcModuleCacheDir();
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -123,12 +130,21 @@
 struct JitManager
 {
     JitManager(uint32_t w, const char* arch, const char* core);
-    ~JitManager(){};
+    ~JitManager()
+    {
+        for (auto* pExec : mvExecEngines)
+        {
+            delete pExec;
+        }
+    }
 
-    JitLLVMContext         mContext; ///< LLVM compiler
-    llvm::IRBuilder<>      mBuilder; ///< LLVM IR Builder
-    llvm::ExecutionEngine* mpExec;
-    JitCache               mCache;
+    JitLLVMContext                      mContext; ///< LLVM compiler
+    llvm::IRBuilder<>                   mBuilder; ///< LLVM IR Builder
+    llvm::ExecutionEngine*              mpExec;
+    std::vector<llvm::ExecutionEngine*> mvExecEngines;
+    JitCache                            mCache;
+    llvm::StringRef                     mHostCpuName;
+    llvm::CodeGenOpt::Level             mOptLevel;
 
     // Need to be rebuilt after a JIT and before building new IR
     llvm::Module* mpCurrentModule;
@@ -147,11 +163,14 @@
     // Debugging support
     std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;
 
+    void CreateExecEngine(std::unique_ptr<llvm::Module> M);
     void SetupNewModule();
 
     void               DumpAsm(llvm::Function* pFunction, const char* fileName);
     static void        DumpToFile(llvm::Function* f, const char* fileName);
-    static void        DumpToFile(llvm::Module* M, const char* fileName);
+    static void        DumpToFile(llvm::Module*                   M,
+                                  const char*                     fileName,
+                                  llvm::AssemblyAnnotationWriter* annotater = nullptr);
     static std::string GetOutputDir();
 
     // Debugging support methods
@@ -178,3 +197,14 @@
                           uint32_t                                             lineNum,
                           const std::vector<std::pair<std::string, uint32_t>>& members);
 };
+
+class InterleaveAssemblyAnnotater : public llvm::AssemblyAnnotationWriter
+{
+public:
+    void                     emitInstructionAnnot(const llvm::Instruction*     pInst,
+                                                  llvm::formatted_raw_ostream& OS) override;
+    std::vector<std::string> mAssembly;
+
+private:
+    uint32_t mCurrentLineNo = 0;
+};
diff -Nru mesa-18.3.3/src/gallium/drivers/swr/swr_screen.cpp mesa-19.0.1/src/gallium/drivers/swr/swr_screen.cpp
--- mesa-18.3.3/src/gallium/drivers/swr/swr_screen.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/swr/swr_screen.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -369,6 +369,8 @@
       return 32;
    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
       return 1 << 27;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 32;
 
    case PIPE_CAP_VENDOR_ID:
       return 0xFFFFFFFF;
@@ -844,7 +846,9 @@
 
    size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
                                  res->swr.pitch * res->swr.numSamples;
-   if (total_size > SWR_MAX_TEXTURE_SIZE)
+
+   // Let non-sampled textures (e.g. buffer objects) bypass the size limit
+   if (swr_resource_is_texture(&res->base) && total_size > SWR_MAX_TEXTURE_SIZE)
       return false;
 
    if (allocate) {
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_blit.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_blit.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_blit.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_blit.c	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,7 @@
 #include "util/u_surface.h"
 #include "util/u_blitter.h"
 #include "v3d_context.h"
+#include "v3d_tiling.h"
 
 #if 0
 static struct pipe_surface *
@@ -183,10 +184,11 @@
         util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask);
         util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer);
         util_blitter_save_fragment_sampler_states(v3d->blitter,
-                        v3d->fragtex.num_samplers,
-                        (void **)v3d->fragtex.samplers);
+                        v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers,
+                        (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers);
         util_blitter_save_fragment_sampler_views(v3d->blitter,
-                        v3d->fragtex.num_textures, v3d->fragtex.textures);
+                        v3d->tex[PIPE_SHADER_FRAGMENT].num_textures,
+                        v3d->tex[PIPE_SHADER_FRAGMENT].textures);
         util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets,
                                      v3d->streamout.targets);
 }
@@ -316,12 +318,206 @@
         pipe_sampler_view_reference(&src_view, NULL);
 }
 
+/* Disable level 0 write, just write following mipmaps */
+#define V3D_TFU_IOA_DIMTW (1 << 0)
+#define V3D_TFU_IOA_FORMAT_SHIFT 3
+#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
+#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
+#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
+#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
+#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
+
+#define V3D_TFU_ICFG_NUMMM_SHIFT 5
+#define V3D_TFU_ICFG_TTYPE_SHIFT 9
+
+#define V3D_TFU_ICFG_OPAD_SHIFT 22
+
+#define V3D_TFU_ICFG_FORMAT_SHIFT 18
+#define V3D_TFU_ICFG_FORMAT_RASTER 0
+#define V3D_TFU_ICFG_FORMAT_SAND_128 1
+#define V3D_TFU_ICFG_FORMAT_SAND_256 2
+#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
+#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
+#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
+#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
+#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
+
+static bool
+v3d_tfu(struct pipe_context *pctx,
+        struct pipe_resource *pdst,
+        struct pipe_resource *psrc,
+        unsigned int src_level,
+        unsigned int base_level,
+        unsigned int last_level,
+        unsigned int src_layer,
+        unsigned int dst_layer)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_screen *screen = v3d->screen;
+        struct v3d_resource *src = v3d_resource(psrc);
+        struct v3d_resource *dst = v3d_resource(pdst);
+        struct v3d_resource_slice *src_base_slice = &src->slices[src_level];
+        struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level];
+        int msaa_scale = pdst->nr_samples > 1 ? 2 : 1;
+        int width = u_minify(pdst->width0, base_level) * msaa_scale;
+        int height = u_minify(pdst->height0, base_level) * msaa_scale;
+
+        if (psrc->format != pdst->format)
+                return false;
+        if (psrc->nr_samples != pdst->nr_samples)
+                return false;
+
+        uint32_t tex_format = v3d_get_tex_format(&screen->devinfo,
+                                                 pdst->format);
+
+        if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format))
+                return false;
+
+        if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D)
+                return false;
+
+        /* Can't write to raster. */
+        if (dst_base_slice->tiling == VC5_TILING_RASTER)
+                return false;
+
+        v3d_flush_jobs_writing_resource(v3d, psrc);
+        v3d_flush_jobs_reading_resource(v3d, pdst);
+
+        struct drm_v3d_submit_tfu tfu = {
+                .ios = (height << 16) | width,
+                .bo_handles = {
+                        dst->bo->handle,
+                        src != dst ? src->bo->handle : 0
+                },
+                .in_sync = v3d->out_sync,
+                .out_sync = v3d->out_sync,
+        };
+        uint32_t src_offset = (src->bo->offset +
+                               v3d_layer_offset(psrc, src_level, src_layer));
+        tfu.iia |= src_offset;
+        if (src_base_slice->tiling == VC5_TILING_RASTER) {
+                tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER <<
+                             V3D_TFU_ICFG_FORMAT_SHIFT);
+        } else {
+                tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE +
+                              (src_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
+                             V3D_TFU_ICFG_FORMAT_SHIFT);
+        }
+
+        uint32_t dst_offset = (dst->bo->offset +
+                               v3d_layer_offset(pdst, src_level, dst_layer));
+        tfu.ioa |= dst_offset;
+        if (last_level != base_level)
+                tfu.ioa |= V3D_TFU_IOA_DIMTW;
+        tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE +
+                     (dst_base_slice->tiling - VC5_TILING_LINEARTILE)) <<
+                    V3D_TFU_IOA_FORMAT_SHIFT);
+
+        tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT;
+        tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT;
+
+        switch (src_base_slice->tiling) {
+        case VC5_TILING_UIF_NO_XOR:
+        case VC5_TILING_UIF_XOR:
+                tfu.iis |= (src_base_slice->padded_height /
+                            (2 * v3d_utile_height(src->cpp)));
+                break;
+        case VC5_TILING_RASTER:
+                tfu.iis |= src_base_slice->stride / src->cpp;
+                break;
+        case VC5_TILING_LINEARTILE:
+        case VC5_TILING_UBLINEAR_1_COLUMN:
+        case VC5_TILING_UBLINEAR_2_COLUMN:
+                break;
+       }
+
+        /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
+         * OPAD field for the destination (how many extra UIF blocks beyond
+         * those necessary to cover the height).  When filling mipmaps, the
+         * miplevel 1+ tiling state is inferred.
+         */
+        if (dst_base_slice->tiling == VC5_TILING_UIF_NO_XOR ||
+            dst_base_slice->tiling == VC5_TILING_UIF_XOR) {
+                int uif_block_h = 2 * v3d_utile_height(dst->cpp);
+                int implicit_padded_height = align(height, uif_block_h);
+
+                tfu.icfg |= (((dst_base_slice->padded_height -
+                               implicit_padded_height) / uif_block_h) <<
+                             V3D_TFU_ICFG_OPAD_SHIFT);
+        }
+
+        int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu);
+        if (ret != 0) {
+                fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
+                return false;
+        }
+
+        dst->writes++;
+
+        return true;
+}
+
+boolean
+v3d_generate_mipmap(struct pipe_context *pctx,
+                    struct pipe_resource *prsc,
+                    enum pipe_format format,
+                    unsigned int base_level,
+                    unsigned int last_level,
+                    unsigned int first_layer,
+                    unsigned int last_layer)
+{
+        if (format != prsc->format)
+                return false;
+
+        /* We could maybe support looping over layers for array textures, but
+         * we definitely don't support 3D.
+         */
+        if (first_layer != last_layer)
+                return false;
+
+        return v3d_tfu(pctx,
+                       prsc, prsc,
+                       base_level,
+                       base_level, last_level,
+                       first_layer, first_layer);
+}
+
+static bool
+v3d_tfu_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+        int dst_width = u_minify(info->dst.resource->width0, info->dst.level);
+        int dst_height = u_minify(info->dst.resource->height0, info->dst.level);
+
+        if ((info->mask & PIPE_MASK_RGBA) == 0)
+                return false;
+
+        if (info->dst.box.x != 0 ||
+            info->dst.box.y != 0 ||
+            info->dst.box.width != dst_width ||
+            info->dst.box.height != dst_height ||
+            info->src.box.x != 0 ||
+            info->src.box.y != 0 ||
+            info->src.box.width != info->dst.box.width ||
+            info->src.box.height != info->dst.box.height) {
+                return false;
+        }
+
+        if (info->dst.format != info->src.format)
+                return false;
+
+        return v3d_tfu(pctx, info->dst.resource, info->src.resource,
+                       info->src.level,
+                       info->dst.level, info->dst.level,
+                       info->src.box.z, info->dst.box.z);
+}
+
 /* Optimal hardware path for blitting pixels.
  * Scaling, format conversion, up- and downsampling (resolve) are allowed.
  */
 void
 v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 {
+        struct v3d_context *v3d = v3d_context(pctx);
         struct pipe_blit_info info = *blit_info;
 
         if (info.mask & PIPE_MASK_S) {
@@ -329,10 +525,16 @@
                 info.mask &= ~PIPE_MASK_S;
         }
 
-#if 0
-        if (v3d_tile_blit(pctx, blit_info))
-                return;
-#endif
+        if (v3d_tfu_blit(pctx, blit_info))
+                info.mask &= ~PIPE_MASK_RGBA;
+
+        if (info.mask)
+                v3d_render_blit(pctx, &info);
 
-        v3d_render_blit(pctx, &info);
+        /* Flush our blit jobs immediately.  They're unlikely to get reused by
+         * normal drawing or other blits, and without flushing we can easily
+         * run into unexpected OOMs when blits are used for a large series of
+         * texture uploads before using the textures.
+         */
+        v3d_flush_jobs_writing_resource(v3d, info.dst.resource);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.c	2019-03-31 23:16:37.000000000 +0000
@@ -331,7 +331,6 @@
 
 static struct v3d_bo *
 v3d_bo_open_handle(struct v3d_screen *screen,
-                   uint32_t winsys_stride,
                    uint32_t handle, uint32_t size)
 {
         struct v3d_bo *bo;
@@ -355,8 +354,7 @@
         bo->private = false;
 
 #ifdef USE_V3D_SIMULATOR
-        v3d_simulator_open_from_handle(screen->fd, winsys_stride,
-                                       bo->handle, bo->size);
+        v3d_simulator_open_from_handle(screen->fd, bo->handle, bo->size);
         bo->map = malloc(bo->size);
 #endif
 
@@ -376,14 +374,16 @@
 
         util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
 
+        screen->bo_count++;
+        screen->bo_size += bo->size;
+
 done:
         mtx_unlock(&screen->bo_handles_mutex);
         return bo;
 }
 
 struct v3d_bo *
-v3d_bo_open_name(struct v3d_screen *screen, uint32_t name,
-                 uint32_t winsys_stride)
+v3d_bo_open_name(struct v3d_screen *screen, uint32_t name)
 {
         struct drm_gem_open o = {
                 .name = name
@@ -395,11 +395,11 @@
                 return NULL;
         }
 
-        return v3d_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+        return v3d_bo_open_handle(screen, o.handle, o.size);
 }
 
 struct v3d_bo *
-v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd, uint32_t winsys_stride)
+v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd)
 {
         uint32_t handle;
         int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
@@ -416,7 +416,7 @@
                 return NULL;
         }
 
-        return v3d_bo_open_handle(screen, winsys_stride, handle, size);
+        return v3d_bo_open_handle(screen, handle, size);
 }
 
 int
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.h	2019-03-31 23:16:37.000000000 +0000
@@ -60,10 +60,8 @@
                             const char *name);
 void v3d_bo_last_unreference(struct v3d_bo *bo);
 void v3d_bo_last_unreference_locked_timed(struct v3d_bo *bo, time_t time);
-struct v3d_bo *v3d_bo_open_name(struct v3d_screen *screen, uint32_t name,
-                                uint32_t winsys_stride);
-struct v3d_bo *v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd,
-                                  uint32_t winsys_stride);
+struct v3d_bo *v3d_bo_open_name(struct v3d_screen *screen, uint32_t name);
+struct v3d_bo *v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd);
 bool v3d_bo_flink(struct v3d_bo *bo, uint32_t *name);
 int v3d_bo_get_dmabuf(struct v3d_bo *bo);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -37,6 +37,7 @@
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_resource.h"
+#include "broadcom/compiler/v3d_compiler.h"
 
 void
 v3d_flush(struct pipe_context *pctx)
@@ -66,6 +67,28 @@
 }
 
 static void
+v3d_memory_barrier(struct pipe_context *pctx, unsigned int flags)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+
+        /* We only need to flush jobs writing to SSBOs/images. */
+        perf_debug("Flushing all jobs for glMemoryBarrier(), could do better");
+        v3d_flush(pctx);
+}
+
+static void
+v3d_set_debug_callback(struct pipe_context *pctx,
+                       const struct pipe_debug_callback *cb)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+
+        if (cb)
+                v3d->debug = *cb;
+        else
+                memset(&v3d->debug, 0, sizeof(v3d->debug));
+}
+
+static void
 v3d_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
         struct v3d_context *v3d = v3d_context(pctx);
@@ -98,6 +121,8 @@
 
         if (v3d->uploader)
                 u_upload_destroy(v3d->uploader);
+        if (v3d->state_uploader)
+                u_upload_destroy(v3d->state_uploader);
 
         slab_destroy_child(&v3d->transfer_pool);
 
@@ -109,6 +134,27 @@
         ralloc_free(v3d);
 }
 
+static void
+v3d_get_sample_position(struct pipe_context *pctx,
+                        unsigned sample_count, unsigned sample_index,
+                        float *xy)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+
+        if (sample_count <= 1) {
+                xy[0] = 0.5;
+                xy[1] = 0.5;
+        } else {
+                static const int xoffsets_v33[] = { 1, -3, 3, -1 };
+                static const int xoffsets_v42[] = { -1, 3, -3, 1 };
+                const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ?
+                                       xoffsets_v42 : xoffsets_v33);
+
+                xy[0] = 0.5 + xoffsets[sample_index] * .125;
+                xy[1] = .125 + sample_index * .25;
+        }
+}
+
 struct pipe_context *
 v3d_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
@@ -137,7 +183,10 @@
         pctx->priv = priv;
         pctx->destroy = v3d_context_destroy;
         pctx->flush = v3d_pipe_flush;
+        pctx->memory_barrier = v3d_memory_barrier;
+        pctx->set_debug_callback = v3d_set_debug_callback;
         pctx->invalidate_resource = v3d_invalidate_resource;
+        pctx->get_sample_position = v3d_get_sample_position;
 
         if (screen->devinfo.ver >= 41) {
                 v3d41_draw_init(pctx);
@@ -159,6 +208,10 @@
         v3d->uploader = u_upload_create_default(&v3d->base);
         v3d->base.stream_uploader = v3d->uploader;
         v3d->base.const_uploader = v3d->uploader;
+        v3d->state_uploader = u_upload_create(&v3d->base,
+                                              4096,
+                                              PIPE_BIND_CONSTANT_BUFFER,
+                                              PIPE_USAGE_STREAM, 0);
 
         v3d->blitter = util_blitter_create(pctx);
         if (!v3d->blitter)
@@ -172,7 +225,7 @@
 
         V3D_DEBUG |= saved_shaderdb_flag;
 
-        v3d->sample_mask = (1 << VC5_MAX_SAMPLES) - 1;
+        v3d->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
         v3d->active_queries = true;
 
         return &v3d->base;
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,7 @@
 #include "xf86drm.h"
 #include "v3d_drm.h"
 #include "v3d_screen.h"
+#include "broadcom/common/v3d_limits.h"
 
 struct v3d_job;
 struct v3d_bo;
@@ -58,6 +59,7 @@
 #define VC5_DIRTY_ZSA           (1 <<  2)
 #define VC5_DIRTY_FRAGTEX       (1 <<  3)
 #define VC5_DIRTY_VERTTEX       (1 <<  4)
+#define VC5_DIRTY_SHADER_IMAGE  (1 <<  5)
 
 #define VC5_DIRTY_BLEND_COLOR   (1 <<  7)
 #define VC5_DIRTY_STENCIL_REF   (1 <<  8)
@@ -82,9 +84,39 @@
 #define VC5_DIRTY_OQ            (1 << 28)
 #define VC5_DIRTY_CENTROID_FLAGS (1 << 29)
 #define VC5_DIRTY_NOPERSPECTIVE_FLAGS (1 << 30)
+#define VC5_DIRTY_SSBO          (1 << 31)
 
 #define VC5_MAX_FS_INPUTS 64
 
+enum v3d_sampler_state_variant {
+        V3D_SAMPLER_STATE_BORDER_0,
+        V3D_SAMPLER_STATE_F16,
+        V3D_SAMPLER_STATE_F16_UNORM,
+        V3D_SAMPLER_STATE_F16_SNORM,
+        V3D_SAMPLER_STATE_F16_BGRA,
+        V3D_SAMPLER_STATE_F16_BGRA_UNORM,
+        V3D_SAMPLER_STATE_F16_BGRA_SNORM,
+        V3D_SAMPLER_STATE_F16_A,
+        V3D_SAMPLER_STATE_F16_A_SNORM,
+        V3D_SAMPLER_STATE_F16_A_UNORM,
+        V3D_SAMPLER_STATE_F16_LA,
+        V3D_SAMPLER_STATE_F16_LA_UNORM,
+        V3D_SAMPLER_STATE_F16_LA_SNORM,
+        V3D_SAMPLER_STATE_32,
+        V3D_SAMPLER_STATE_32_UNORM,
+        V3D_SAMPLER_STATE_32_SNORM,
+        V3D_SAMPLER_STATE_32_A,
+        V3D_SAMPLER_STATE_32_A_UNORM,
+        V3D_SAMPLER_STATE_32_A_SNORM,
+        V3D_SAMPLER_STATE_1010102U,
+        V3D_SAMPLER_STATE_16U,
+        V3D_SAMPLER_STATE_16I,
+        V3D_SAMPLER_STATE_8I,
+        V3D_SAMPLER_STATE_8U,
+
+        V3D_SAMPLER_STATE_VARIANT_COUNT,
+};
+
 struct v3d_sampler_view {
         struct pipe_sampler_view base;
         uint32_t p0;
@@ -95,6 +127,14 @@
         uint8_t texture_shader_state[32];
         /* V3D 4.x: Texture state struct. */
         struct v3d_bo *bo;
+
+        enum v3d_sampler_state_variant sampler_variant;
+
+        /* Actual texture to be read by this sampler view.  May be different
+         * from base.texture in the case of having a shadow tiled copy of a
+         * raster texture.
+         */
+        struct pipe_resource *texture;
 };
 
 struct v3d_sampler_state {
@@ -105,15 +145,18 @@
         /* V3D 3.x: Packed texture state. */
         uint8_t texture_shader_state[32];
         /* V3D 4.x: Sampler state struct. */
-        struct v3d_bo *bo;
+        struct pipe_resource *sampler_state;
+        uint32_t sampler_state_offset[V3D_SAMPLER_STATE_VARIANT_COUNT];
+
+        bool border_color_variants;
 };
 
 struct v3d_texture_stateobj {
-        struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+        struct pipe_sampler_view *textures[V3D_MAX_TEXTURE_SAMPLERS];
         unsigned num_textures;
-        struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+        struct pipe_sampler_state *samplers[V3D_MAX_TEXTURE_SAMPLERS];
         unsigned num_samplers;
-        struct v3d_cl_reloc texture_state[PIPE_MAX_SAMPLERS];
+        struct v3d_cl_reloc texture_state[V3D_MAX_TEXTURE_SAMPLERS];
 };
 
 struct v3d_shader_uniform_info {
@@ -143,7 +186,8 @@
 };
 
 struct v3d_compiled_shader {
-        struct v3d_bo *bo;
+        struct pipe_resource *resource;
+        uint32_t offset;
 
         union {
                 struct v3d_prog_data *base;
@@ -181,11 +225,12 @@
 };
 
 struct v3d_vertex_stateobj {
-        struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES];
+        struct pipe_vertex_element pipe[V3D_MAX_VS_INPUTS / 4];
         unsigned num_elements;
 
-        uint8_t attrs[16 * VC5_MAX_ATTRIBUTES];
-        struct v3d_bo *default_attribute_values;
+        uint8_t attrs[16 * (V3D_MAX_VS_INPUTS / 4)];
+        struct pipe_resource *defaults;
+        uint32_t defaults_offset;
 };
 
 struct v3d_streamout_stateobj {
@@ -195,6 +240,11 @@
         unsigned num_targets;
 };
 
+struct v3d_ssbo_stateobj {
+        struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
+        uint32_t enabled_mask;
+};
+
 /* Hash table key for v3d->jobs */
 struct v3d_job_key {
         struct pipe_surface *cbufs[4];
@@ -208,6 +258,18 @@
         VC5_EZ_DISABLED,
 };
 
+struct v3d_image_view {
+        struct pipe_image_view base;
+        /* V3D 4.x texture shader state struct */
+        struct pipe_resource *tex_state;
+        uint32_t tex_state_offset;
+};
+
+struct v3d_shaderimg_stateobj {
+        struct v3d_image_view si[PIPE_MAX_SHADER_IMAGES];
+        uint32_t enabled_mask;
+};
+
 /**
  * A complete bin/render job.
  *
@@ -300,6 +362,11 @@
          */
         bool needs_flush;
 
+        /* Set if any shader has dirtied cachelines in the TMU that need to be
+         * flushed before job end.
+         */
+        bool tmu_dirty_rcl;
+
         /**
          * Set if a packet enabling TF has been emitted in the job (V3D 4.x).
          */
@@ -365,10 +432,19 @@
         /** Maximum index buffer valid for the current shader_rec. */
         uint32_t max_index;
 
-        /** Sync object that our RCL will update as its out_sync. */
+        /** Sync object that our RCL or TFU job will update as its out_sync. */
         uint32_t out_sync;
 
+        /* Stream uploader used by gallium internals.  This could also be used
+         * by driver internals, but we tend to use the v3d_cl.h interfaces
+         * instead.
+         */
         struct u_upload_mgr *uploader;
+        /* State uploader used inside the driver.  This is for packing bits of
+         * long-term state inside buffers, since the kernel interfaces
+         * allocate a page at a time.
+         */
+        struct u_upload_mgr *state_uploader;
 
         /** @{ Current pipeline state objects */
         struct pipe_scissor_state scissor;
@@ -376,8 +452,6 @@
         struct v3d_rasterizer_state *rasterizer;
         struct v3d_depth_stencil_alpha_state *zsa;
 
-        struct v3d_texture_stateobj verttex, fragtex;
-
         struct v3d_program_stateobj prog;
 
         struct v3d_vertex_stateobj *vtx;
@@ -413,10 +487,14 @@
         struct pipe_poly_stipple stipple;
         struct pipe_clip_state clip;
         struct pipe_viewport_state viewport;
+        struct v3d_ssbo_stateobj ssbo[PIPE_SHADER_TYPES];
+        struct v3d_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES];
         struct v3d_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+        struct v3d_texture_stateobj tex[PIPE_SHADER_TYPES];
         struct v3d_vertexbuf_stateobj vertexbuf;
         struct v3d_streamout_stateobj streamout;
         struct v3d_bo *current_oq;
+        struct pipe_debug_callback debug;
         /** @} */
 };
 
@@ -448,8 +526,13 @@
 #define perf_debug(...) do {                            \
         if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
                 fprintf(stderr, __VA_ARGS__);           \
+        if (unlikely(v3d->debug.debug_message))         \
+                pipe_debug_message(&v3d->debug, PERF_INFO, __VA_ARGS__);    \
 } while (0)
 
+#define foreach_bit(b, mask)                                            \
+        for (uint32_t _m = (mask), b; _m && ({(b) = u_bit_scan(&_m); 1;});)
+
 static inline struct v3d_context *
 v3d_context(struct pipe_context *pcontext)
 {
@@ -476,12 +559,8 @@
 
 void v3d_simulator_init(struct v3d_screen *screen);
 void v3d_simulator_destroy(struct v3d_screen *screen);
-int v3d_simulator_flush(struct v3d_context *v3d,
-                        struct drm_v3d_submit_cl *args,
-                        struct v3d_job *job);
 int v3d_simulator_ioctl(int fd, unsigned long request, void *arg);
-void v3d_simulator_open_from_handle(int fd, uint32_t winsys_stride,
-                                    int handle, uint32_t size);
+void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size);
 
 static inline int
 v3d_ioctl(int fd, unsigned long request, void *arg)
@@ -495,8 +574,7 @@
 void v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader);
 struct v3d_cl_reloc v3d_write_uniforms(struct v3d_context *v3d,
                                        struct v3d_compiled_shader *shader,
-                                       struct v3d_constbuf_stateobj *cb,
-                                       struct v3d_texture_stateobj *texstate);
+                                       enum pipe_shader_type stage);
 
 void v3d_flush(struct pipe_context *pctx);
 void v3d_job_init(struct v3d_context *v3d);
@@ -530,10 +608,19 @@
                                                  uint32_t format,
                                                  uint32_t *type,
                                                  uint32_t *bpp);
+bool v3d_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
+                                 uint32_t tex_format);
 
 void v3d_init_query_functions(struct v3d_context *v3d);
 void v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
 void v3d_blitter_save(struct v3d_context *v3d);
+boolean v3d_generate_mipmap(struct pipe_context *pctx,
+                            struct pipe_resource *prsc,
+                            enum pipe_format format,
+                            unsigned int base_level,
+                            unsigned int last_level,
+                            unsigned int first_layer,
+                            unsigned int last_layer);
 
 struct v3d_fence *v3d_fence_create(struct v3d_context *v3d);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_formats.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_formats.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_formats.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_formats.c	2019-03-31 23:16:37.000000000 +0000
@@ -142,3 +142,14 @@
                                                                      type, bpp);
         }
 }
+
+bool
+v3d_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
+                            uint32_t tex_format)
+{
+        if (devinfo->ver >= 41) {
+                return v3d41_tfu_supports_tex_format(tex_format);
+        } else {
+                return v3d33_tfu_supports_tex_format(tex_format);
+        }
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_job.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_job.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_job.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_job.c	2019-03-31 23:16:37.000000000 +0000
@@ -62,7 +62,7 @@
                 }
         }
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (job->cbufs[i]) {
                         remove_from_ht(v3d->write_jobs, job->cbufs[i]->texture);
                         pipe_surface_reference(&job->cbufs[i], NULL);
@@ -204,7 +204,7 @@
                 tile_size_index++;
 
         int max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (job->cbufs[i]) {
                         struct v3d_surface *surf = v3d_surface(job->cbufs[i]);
                         max_bpp = MAX2(max_bpp, surf->internal_bpp);
@@ -222,7 +222,7 @@
 /**
  * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
  *
- * If we've already started rendering to this FBO, then return old same job,
+ * If we've already started rendering to this FBO, then return the same job,
  * otherwise make a new one.  If we're beginning rendering to an FBO, make
  * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
  * have been flushed.
@@ -251,7 +251,7 @@
          */
         struct v3d_job *job = v3d_job_create(v3d);
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (cbufs[i]) {
                         v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture);
                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
@@ -267,9 +267,7 @@
                         job->msaa = true;
         }
 
-        v3d_job_set_tile_buffer_size(job);
-
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (cbufs[i])
                         _mesa_hash_table_insert(v3d->write_jobs,
                                                 cbufs[i]->texture, job);
@@ -303,6 +301,11 @@
         struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
         struct v3d_job *job = v3d_get_job(v3d, cbufs, zsbuf);
 
+        if (v3d->framebuffer.samples >= 1)
+                job->msaa = true;
+
+        v3d_job_set_tile_buffer_size(job);
+
         /* The dirty flags are tracking what's been updated while v3d->job has
          * been bound, so set them all to ~0 when switching between jobs.  We
          * also need to reset all state at the start of rendering.
@@ -385,7 +388,15 @@
                         v3d33_bcl_epilogue(v3d, job);
         }
 
+        /* While the RCL will implicitly depend on the last RCL to have
+         * finished, we also need to block on any previous TFU job we may have
+         * dispatched.
+         */
+        job->submit.in_sync_rcl = v3d->out_sync;
+
+        /* Update the sync object for the last rendering by our context. */
         job->submit.out_sync = v3d->out_sync;
+
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
 
@@ -406,11 +417,7 @@
         if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
                 int ret;
 
-#ifndef USE_V3D_SIMULATOR
-                ret = drmIoctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
-#else
-                ret = v3d_simulator_flush(v3d, &job->submit, job);
-#endif
+                ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
                 static bool warned = false;
                 if (ret && !warned) {
                         fprintf(stderr, "Draw call returned %s.  "
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_program.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_program.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,6 +27,7 @@
 #include "util/u_memory.h"
 #include "util/ralloc.h"
 #include "util/hash_table.h"
+#include "util/u_upload_mgr.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
 #include "compiler/nir/nir.h"
@@ -37,6 +38,12 @@
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 #include "mesa/state_tracker/st_glsl_types.h"
 
+static struct v3d_compiled_shader *
+v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key);
+static void
+v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
+                                struct v3d_key *key);
+
 static gl_varying_slot
 v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
 {
@@ -174,6 +181,70 @@
         return st_glsl_storage_type_size(type, false);
 }
 
+/**
+ * Precompiles a shader variant at shader state creation time if
+ * V3D_DEBUG=precompile is set.  Used for shader-db
+ * (https://gitlab.freedesktop.org/mesa/shader-db)
+ */
+static void
+v3d_shader_precompile(struct v3d_context *v3d,
+                      struct v3d_uncompiled_shader *so)
+{
+        nir_shader *s = so->base.ir.nir;
+
+        if (s->info.stage == MESA_SHADER_FRAGMENT) {
+                struct v3d_fs_key key = {
+                        .base.shader_state = so,
+                };
+
+                nir_foreach_variable(var, &s->outputs) {
+                        if (var->data.location == FRAG_RESULT_COLOR) {
+                                key.nr_cbufs = 1;
+                        } else if (var->data.location == FRAG_RESULT_DATA0) {
+                                key.nr_cbufs = MAX2(key.nr_cbufs,
+                                                    var->data.location -
+                                                    FRAG_RESULT_DATA0 + 1);
+                        }
+                }
+
+                v3d_setup_shared_precompile_key(so, &key.base);
+                v3d_get_compiled_shader(v3d, &key.base);
+        } else {
+                struct v3d_vs_key key = {
+                        .base.shader_state = so,
+                };
+
+                v3d_setup_shared_precompile_key(so, &key.base);
+
+                /* Compile VS: All outputs */
+                nir_foreach_variable(var, &s->outputs) {
+                        unsigned array_len = MAX2(glsl_get_length(var->type), 1);
+                        assert(array_len == 1);
+                        (void)array_len;
+
+                        int slot = var->data.location;
+                        for (int i = 0; i < glsl_get_components(var->type); i++) {
+                                int swiz = var->data.location_frac + i;
+                                key.fs_inputs[key.num_fs_inputs++] =
+                                        v3d_slot_from_slot_and_component(slot,
+                                                                         swiz);
+                        }
+                }
+
+                v3d_get_compiled_shader(v3d, &key.base);
+
+                /* Compile VS bin shader: only position (XXX: include TF) */
+                key.is_coord = true;
+                key.num_fs_inputs = 0;
+                for (int i = 0; i < 4; i++) {
+                        key.fs_inputs[key.num_fs_inputs++] =
+                                v3d_slot_from_slot_and_component(VARYING_SLOT_POS,
+                                                                 i);
+                }
+                v3d_get_compiled_shader(v3d, &key.base);
+        }
+}
+
 static void *
 v3d_shader_state_create(struct pipe_context *pctx,
                         const struct pipe_shader_state *cso)
@@ -225,7 +296,7 @@
 
         v3d_optimize_nir(s);
 
-        NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+        NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
 
         /* Garbage collect dead instructions */
         nir_sweep(s);
@@ -244,9 +315,20 @@
                 fprintf(stderr, "\n");
         }
 
+        if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE)
+                v3d_shader_precompile(v3d, so);
+
         return so;
 }
 
+static void
+v3d_shader_debug_output(const char *message, void *data)
+{
+        struct v3d_context *v3d = data;
+
+        pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message);
+}
+
 static struct v3d_compiled_shader *
 v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key)
 {
@@ -276,34 +358,19 @@
         uint64_t *qpu_insts;
         uint32_t shader_size;
 
-        switch (s->info.stage) {
-        case MESA_SHADER_VERTEX:
-                shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data);
-
-                qpu_insts = v3d_compile_vs(v3d->screen->compiler,
-                                           (struct v3d_vs_key *)key,
-                                           shader->prog_data.vs, s,
-                                           program_id, variant_id,
-                                           &shader_size);
-                break;
-        case MESA_SHADER_FRAGMENT:
-                shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data);
-
-                qpu_insts = v3d_compile_fs(v3d->screen->compiler,
-                                           (struct v3d_fs_key *)key,
-                                           shader->prog_data.fs, s,
-                                           program_id, variant_id,
-                                           &shader_size);
-                break;
-        default:
-                unreachable("bad stage");
-        }
+        qpu_insts = v3d_compile(v3d->screen->compiler, key,
+                                &shader->prog_data.base, s,
+                                v3d_shader_debug_output,
+                                v3d,
+                                program_id, variant_id, &shader_size);
+        ralloc_steal(shader, shader->prog_data.base);
 
         v3d_set_shader_uniform_dirty_flags(shader);
 
-        shader->bo = v3d_bo_alloc(v3d->screen, shader_size, "shader");
-        v3d_bo_map(shader->bo);
-        memcpy(shader->bo->map, qpu_insts, shader_size);
+        if (shader_size) {
+                u_upload_data(v3d->state_uploader, 0, shader_size, 8,
+                              qpu_insts, &shader->offset, &shader->resource);
+        }
 
         free(qpu_insts);
 
@@ -331,6 +398,13 @@
 }
 
 static void
+v3d_free_compiled_shader(struct v3d_compiled_shader *shader)
+{
+        pipe_resource_reference(&shader->resource, NULL);
+        ralloc_free(shader);
+}
+
+static void
 v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
                      struct v3d_texture_stateobj *texstate)
 {
@@ -379,8 +453,6 @@
                 }
 
                 if (sampler) {
-                        key->tex[i].compare_mode = sampler_state->compare_mode;
-                        key->tex[i].compare_func = sampler_state->compare_func;
                         key->tex[i].clamp_s =
                                 sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP;
                         key->tex[i].clamp_t =
@@ -394,6 +466,23 @@
 }
 
 static void
+v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled,
+                                struct v3d_key *key)
+{
+        nir_shader *s = uncompiled->base.ir.nir;
+
+        for (int i = 0; i < s->info.num_textures; i++) {
+                key->tex[i].return_size = 16;
+                key->tex[i].return_channels = 2;
+
+                key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+                key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+                key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+                key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+        }
+}
+
+static void
 v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
 {
         struct v3d_job *job = v3d->job;
@@ -412,7 +501,7 @@
         }
 
         memset(key, 0, sizeof(*key));
-        v3d_setup_shared_key(v3d, &key->base, &v3d->fragtex);
+        v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]);
         key->base.shader_state = v3d->prog.bind_fs;
         key->is_points = (prim_mode == PIPE_PRIM_POINTS);
         key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
@@ -426,7 +515,7 @@
         if (job->msaa) {
                 key->msaa = v3d->rasterizer->base.multisample;
                 key->sample_coverage = (v3d->rasterizer->base.multisample &&
-                                        v3d->sample_mask != (1 << VC5_MAX_SAMPLES) - 1);
+                                        v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1);
                 key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage;
                 key->sample_alpha_to_one = v3d->blend->base.alpha_to_one;
         }
@@ -523,7 +612,7 @@
         }
 
         memset(key, 0, sizeof(*key));
-        v3d_setup_shared_key(v3d, &key->base, &v3d->verttex);
+        v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
         key->base.shader_state = v3d->prog.bind_vs;
         key->num_fs_inputs = v3d->prog.fs->prog_data.fs->base.num_inputs;
         STATIC_ASSERT(sizeof(key->fs_inputs) ==
@@ -606,12 +695,11 @@
         if (key->shader_state == so) {
                 struct v3d_compiled_shader *shader = entry->data;
                 _mesa_hash_table_remove(ht, entry);
-                v3d_bo_unreference(&shader->bo);
 
                 if (shader == *last_compile)
                         *last_compile = NULL;
 
-                ralloc_free(shader);
+                v3d_free_compiled_shader(shader);
         }
 }
 
@@ -677,15 +765,13 @@
 
         hash_table_foreach(v3d->fs_cache, entry) {
                 struct v3d_compiled_shader *shader = entry->data;
-                v3d_bo_unreference(&shader->bo);
-                ralloc_free(shader);
+                v3d_free_compiled_shader(shader);
                 _mesa_hash_table_remove(v3d->fs_cache, entry);
         }
 
         hash_table_foreach(v3d->vs_cache, entry) {
                 struct v3d_compiled_shader *shader = entry->data;
-                v3d_bo_unreference(&shader->bo);
-                ralloc_free(shader);
+                v3d_free_compiled_shader(shader);
                 _mesa_hash_table_remove(v3d->vs_cache, entry);
         }
 
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -146,37 +146,13 @@
         slab_free(&v3d->transfer_pool, ptrans);
 }
 
-static void *
-v3d_resource_transfer_map(struct pipe_context *pctx,
-                          struct pipe_resource *prsc,
-                          unsigned level, unsigned usage,
-                          const struct pipe_box *box,
-                          struct pipe_transfer **pptrans)
+static void
+v3d_map_usage_prep(struct pipe_context *pctx,
+                   struct pipe_resource *prsc,
+                   unsigned usage)
 {
         struct v3d_context *v3d = v3d_context(pctx);
         struct v3d_resource *rsc = v3d_resource(prsc);
-        struct v3d_transfer *trans;
-        struct pipe_transfer *ptrans;
-        enum pipe_format format = prsc->format;
-        char *buf;
-
-        /* MSAA maps should have been handled by u_transfer_helper. */
-        assert(prsc->nr_samples <= 1);
-
-        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
-         * being mapped.
-         */
-        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
-            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
-            !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
-            prsc->last_level == 0 &&
-            prsc->width0 == box->width &&
-            prsc->height0 == box->height &&
-            prsc->depth0 == box->depth &&
-            prsc->array_size == 1 &&
-            rsc->bo->private) {
-                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
-        }
 
         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
                 if (v3d_resource_bo_alloc(rsc)) {
@@ -209,6 +185,41 @@
                 rsc->writes++;
                 rsc->initialized_buffers = ~0;
         }
+}
+
+static void *
+v3d_resource_transfer_map(struct pipe_context *pctx,
+                          struct pipe_resource *prsc,
+                          unsigned level, unsigned usage,
+                          const struct pipe_box *box,
+                          struct pipe_transfer **pptrans)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+        struct v3d_transfer *trans;
+        struct pipe_transfer *ptrans;
+        enum pipe_format format = prsc->format;
+        char *buf;
+
+        /* MSAA maps should have been handled by u_transfer_helper. */
+        assert(prsc->nr_samples <= 1);
+
+        /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
+         * being mapped.
+         */
+        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+            !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
+            prsc->last_level == 0 &&
+            prsc->width0 == box->width &&
+            prsc->height0 == box->height &&
+            prsc->depth0 == box->depth &&
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
+                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+        }
+
+        v3d_map_usage_prep(pctx, prsc, usage);
 
         trans = slab_alloc(&v3d->transfer_pool);
         if (!trans)
@@ -296,11 +307,60 @@
 }
 
 static void
+v3d_texture_subdata(struct pipe_context *pctx,
+                    struct pipe_resource *prsc,
+                    unsigned level,
+                    unsigned usage,
+                    const struct pipe_box *box,
+                    const void *data,
+                    unsigned stride,
+                    unsigned layer_stride)
+{
+        struct v3d_resource *rsc = v3d_resource(prsc);
+        struct v3d_resource_slice *slice = &rsc->slices[level];
+
+        /* For a direct mapping, we can just take the u_transfer path. */
+        if (!rsc->tiled) {
+                return u_default_texture_subdata(pctx, prsc, level, usage, box,
+                                                 data, stride, layer_stride);
+        }
+
+        /* Otherwise, map and store the texture data directly into the tiled
+         * texture.  Note that gallium's texture_subdata may be called with
+         * obvious usage flags missing!
+         */
+        v3d_map_usage_prep(pctx, prsc, usage | (PIPE_TRANSFER_WRITE |
+                                                PIPE_TRANSFER_DISCARD_RANGE));
+
+        void *buf;
+        if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+                buf = v3d_bo_map_unsynchronized(rsc->bo);
+        else
+                buf = v3d_bo_map(rsc->bo);
+
+        for (int i = 0; i < box->depth; i++) {
+                v3d_store_tiled_image(buf +
+                                      v3d_layer_offset(&rsc->base,
+                                                       level,
+                                                       box->z + i),
+                                      slice->stride,
+                                      (void *)data + layer_stride * i,
+                                      stride,
+                                      slice->tiling, rsc->cpp, slice->padded_height,
+                                      box);
+        }
+}
+
+static void
 v3d_resource_destroy(struct pipe_screen *pscreen,
                      struct pipe_resource *prsc)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
         struct v3d_resource *rsc = v3d_resource(prsc);
 
+        if (rsc->scanout)
+                renderonly_scanout_destroy(rsc->scanout, screen->ro);
+
         v3d_bo_unreference(&rsc->bo);
         free(rsc);
 }
@@ -312,6 +372,7 @@
                         struct winsys_handle *whandle,
                         unsigned usage)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
         struct v3d_resource *rsc = v3d_resource(prsc);
         struct v3d_bo *bo = rsc->bo;
 
@@ -339,6 +400,10 @@
         case WINSYS_HANDLE_TYPE_SHARED:
                 return v3d_bo_flink(bo, &whandle->handle);
         case WINSYS_HANDLE_TYPE_KMS:
+                if (screen->ro) {
+                        assert(rsc->scanout);
+                        return renderonly_get_handle(rsc->scanout, whandle);
+                }
                 whandle->handle = bo->handle;
                 return TRUE;
         case WINSYS_HANDLE_TYPE_FD:
@@ -396,7 +461,7 @@
 }
 
 static void
-v3d_setup_slices(struct v3d_resource *rsc)
+v3d_setup_slices(struct v3d_resource *rsc, uint32_t winsys_stride)
 {
         struct pipe_resource *prsc = &rsc->base;
         uint32_t width = prsc->width0;
@@ -423,6 +488,12 @@
          */
         bool uif_top = msaa;
 
+        /* Check some easy mistakes to make in a resource_create() call that
+         * will break our setup.
+         */
+        assert(prsc->array_size != 0);
+        assert(prsc->depth0 != 0);
+
         for (int i = prsc->last_level; i >= 0; i--) {
                 struct v3d_resource_slice *slice = &rsc->slices[i];
 
@@ -498,7 +569,10 @@
                 }
 
                 slice->offset = offset;
-                slice->stride = level_width * rsc->cpp;
+                if (winsys_stride)
+                        slice->stride = winsys_stride;
+                else
+                        slice->stride = level_width * rsc->cpp;
                 slice->padded_height = level_height;
                 slice->size = level_height * slice->stride;
 
@@ -630,6 +704,43 @@
                                    const uint64_t *modifiers,
                                    int count)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
+
+        /* If we're in a renderonly setup, use the other device to perform our
+         * (linear) allocation and just import it to v3d.  The other device
+         * may be using CMA, and V3D can import from CMA but doesn't do CMA
+         * allocations on its own.
+         *
+         * We always allocate this way for SHARED, because get_handle will
+         * need a resource on the display fd.
+         */
+        if (screen->ro && (tmpl->bind & (PIPE_BIND_SCANOUT |
+                                         PIPE_BIND_SHARED))) {
+                struct winsys_handle handle;
+                struct pipe_resource scanout_tmpl = *tmpl;
+                struct renderonly_scanout *scanout =
+                        renderonly_scanout_for_resource(&scanout_tmpl,
+                                                        screen->ro,
+                                                        &handle);
+                if (!scanout) {
+                        fprintf(stderr, "Failed to create scanout resource\n");
+                        return NULL;
+                }
+                assert(handle.type == WINSYS_HANDLE_TYPE_FD);
+                /* The fd is all we need.  Destroy the old scanout (and its
+                 * GEM handle on kms_fd) before resource_from_handle()'s
+                 * renderonly_create_gpu_import_for_resource() call which will
+                 * also get a kms_fd GEM handle for the fd.
+                 */
+                renderonly_scanout_destroy(scanout, screen->ro);
+                struct pipe_resource *prsc =
+                        pscreen->resource_from_handle(pscreen, tmpl,
+                                                      &handle,
+                                                      PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE);
+                close(handle.handle);
+                return prsc;
+        }
+
         bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count);
         struct v3d_resource *rsc = v3d_resource_setup(pscreen, tmpl);
         struct pipe_resource *prsc = &rsc->base;
@@ -669,14 +780,15 @@
                 rsc->tiled = false;
         } else {
                 fprintf(stderr, "Unsupported modifier requested\n");
-                return NULL;
+                goto fail;
         }
 
         rsc->internal_format = prsc->format;
 
-        v3d_setup_slices(rsc);
+        v3d_setup_slices(rsc, 0);
+
         if (!v3d_resource_bo_alloc(rsc))
-                goto fail;
+           goto fail;
 
         return prsc;
 fail:
@@ -711,9 +823,11 @@
                 rsc->tiled = false;
                 break;
         case DRM_FORMAT_MOD_BROADCOM_UIF:
-        case DRM_FORMAT_MOD_INVALID:
                 rsc->tiled = true;
                 break;
+        case DRM_FORMAT_MOD_INVALID:
+                rsc->tiled = screen->ro == NULL;
+                break;
         default:
                 fprintf(stderr,
                         "Attempt to import unsupported modifier 0x%llx\n",
@@ -730,12 +844,10 @@
 
         switch (whandle->type) {
         case WINSYS_HANDLE_TYPE_SHARED:
-                rsc->bo = v3d_bo_open_name(screen,
-                                           whandle->handle, whandle->stride);
+                rsc->bo = v3d_bo_open_name(screen, whandle->handle);
                 break;
         case WINSYS_HANDLE_TYPE_FD:
-                rsc->bo = v3d_bo_open_dmabuf(screen,
-                                             whandle->handle, whandle->stride);
+                rsc->bo = v3d_bo_open_dmabuf(screen, whandle->handle);
                 break;
         default:
                 fprintf(stderr,
@@ -749,9 +861,24 @@
 
         rsc->internal_format = prsc->format;
 
-        v3d_setup_slices(rsc);
+        v3d_setup_slices(rsc, whandle->stride);
         v3d_debug_resource_layout(rsc, "import");
 
+        if (screen->ro) {
+                /* Make sure that renderonly has a handle to our buffer in the
+                 * display's fd, so that a later renderonly_get_handle()
+                 * returns correct handles or GEM names.
+                 */
+                rsc->scanout =
+                        renderonly_create_gpu_import_for_resource(prsc,
+                                                                  screen->ro,
+                                                                  NULL);
+                if (!rsc->scanout) {
+                        fprintf(stderr, "Failed to create scanout resource.\n");
+                        goto fail;
+                }
+        }
+
         if (whandle->stride != slice->stride) {
                 static bool warned = false;
                 if (!warned) {
@@ -774,6 +901,62 @@
         return NULL;
 }
 
+void
+v3d_update_shadow_texture(struct pipe_context *pctx,
+                          struct pipe_sampler_view *pview)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_sampler_view *view = v3d_sampler_view(pview);
+        struct v3d_resource *shadow = v3d_resource(view->texture);
+        struct v3d_resource *orig = v3d_resource(pview->texture);
+
+        assert(view->texture != pview->texture);
+
+        if (shadow->writes == orig->writes && orig->bo->private)
+                return;
+
+        perf_debug("Updating %dx%d@%d shadow for linear texture\n",
+                   orig->base.width0, orig->base.height0,
+                   pview->u.tex.first_level);
+
+        for (int i = 0; i <= shadow->base.last_level; i++) {
+                unsigned width = u_minify(shadow->base.width0, i);
+                unsigned height = u_minify(shadow->base.height0, i);
+                struct pipe_blit_info info = {
+                        .dst = {
+                                .resource = &shadow->base,
+                                .level = i,
+                                .box = {
+                                        .x = 0,
+                                        .y = 0,
+                                        .z = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .depth = 1,
+                                },
+                                .format = shadow->base.format,
+                        },
+                        .src = {
+                                .resource = &orig->base,
+                                .level = pview->u.tex.first_level + i,
+                                .box = {
+                                        .x = 0,
+                                        .y = 0,
+                                        .z = 0,
+                                        .width = width,
+                                        .height = height,
+                                        .depth = 1,
+                                },
+                                .format = orig->base.format,
+                        },
+                        .mask = util_format_get_mask(orig->base.format),
+                };
+                pctx->blit(pctx, &info);
+        }
+
+        shadow->writes = orig->writes;
+}
+
 static struct pipe_surface *
 v3d_create_surface(struct pipe_context *pctx,
                    struct pipe_resource *ptex,
@@ -810,6 +993,12 @@
 
         surface->format = v3d_get_rt_format(&screen->devinfo, psurf->format);
 
+        const struct util_format_description *desc =
+                util_format_description(psurf->format);
+
+        surface->swap_rb = (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
+                            psurf->format != PIPE_FORMAT_B5G6R5_UNORM);
+
         if (util_format_is_depth_or_stencil(psurf->format)) {
                 switch (psurf->format) {
                 case PIPE_FORMAT_Z16_UNORM:
@@ -920,10 +1109,11 @@
         pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
         pctx->transfer_unmap = u_transfer_helper_transfer_unmap;
         pctx->buffer_subdata = u_default_buffer_subdata;
-        pctx->texture_subdata = u_default_texture_subdata;
+        pctx->texture_subdata = v3d_texture_subdata;
         pctx->create_surface = v3d_create_surface;
         pctx->surface_destroy = v3d_surface_destroy;
         pctx->resource_copy_region = util_resource_copy_region;
         pctx->blit = v3d_blit;
+        pctx->generate_mipmap = v3d_generate_mipmap;
         pctx->flush_resource = v3d_flush_resource;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.h	2019-03-31 23:16:37.000000000 +0000
@@ -104,6 +104,13 @@
          */
         uint8_t internal_bpp;
 
+        /**
+         * If the R and B channels should be swapped.  On V3D 3.x, we do it in
+         * the shader and the blend equation.  On V3D 4.1+, we can use the new
+         * TLB load/store flags instead of recompiling.
+         */
+        bool swap_rb;
+
         uint32_t padded_height_of_output_image_in_uif_blocks;
 
         /* If the resource being referenced is separate stencil, then this is
@@ -115,7 +122,8 @@
 struct v3d_resource {
         struct pipe_resource base;
         struct v3d_bo *bo;
-        struct v3d_resource_slice slices[VC5_MAX_MIP_LEVELS];
+        struct renderonly_scanout *scanout;
+        struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
         uint32_t cube_map_stride;
         uint32_t size;
         int cpp;
@@ -168,6 +176,8 @@
 void v3d_resource_context_init(struct pipe_context *pctx);
 struct pipe_resource *v3d_resource_create(struct pipe_screen *pscreen,
                                           const struct pipe_resource *tmpl);
+void v3d_update_shadow_texture(struct pipe_context *pctx,
+                               struct pipe_sampler_view *view);
 uint32_t v3d_layer_offset(struct pipe_resource *prsc, uint32_t level,
                           uint32_t layer);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,6 +70,7 @@
         util_hash_table_destroy(screen->bo_handles);
         v3d_bufmgr_destroy(pscreen);
         slab_destroy_parent(&screen->transfer_pool);
+        free(screen->ro);
 
         if (using_v3d_simulator)
                 v3d_simulator_destroy(screen);
@@ -81,6 +82,20 @@
         ralloc_free(pscreen);
 }
 
+static bool
+v3d_has_feature(struct v3d_screen *screen, enum drm_v3d_param feature)
+{
+        struct drm_v3d_get_param p = {
+                .param = feature,
+        };
+        int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &p);
+
+        if (ret != 0)
+                return false;
+
+        return p.value;
+}
+
 static int
 v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 {
@@ -108,23 +123,34 @@
         case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
         case PIPE_CAP_COMPUTE:
         case PIPE_CAP_DRAW_INDIRECT:
+        case PIPE_CAP_MULTI_DRAW_INDIRECT:
         case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
         case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
         case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
         case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+        case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
                 return 1;
 
+        case PIPE_CAP_GENERATE_MIPMAP:
+                return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);
+
         case PIPE_CAP_INDEP_BLEND_ENABLE:
                 return screen->devinfo.ver >= 40;
 
         case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
                 return 256;
 
+        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+                if (screen->devinfo.ver < 40)
+                        return 0;
+                return 4;
+
         case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
                 return 4;
 
         case PIPE_CAP_GLSL_FEATURE_LEVEL:
-                return 400;
+                return 330;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
 		return 140;
@@ -152,11 +178,14 @@
         case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
                 return 4;
 
+        case PIPE_CAP_MAX_VARYINGS:
+                return V3D_MAX_FS_INPUTS / 4;
+
                 /* Texturing. */
         case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
         case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
         case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-                return VC5_MAX_MIP_LEVELS;
+                return V3D_MAX_MIP_LEVELS;
         case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
                 return 2048;
 
@@ -215,6 +244,8 @@
 v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                            enum pipe_shader_cap param)
 {
+        struct v3d_screen *screen = v3d_screen(pscreen);
+
         if (shader != PIPE_SHADER_VERTEX &&
             shader != PIPE_SHADER_FRAGMENT) {
                 return 0;
@@ -233,14 +264,14 @@
 
         case PIPE_SHADER_CAP_MAX_INPUTS:
                 if (shader == PIPE_SHADER_FRAGMENT)
-                        return VC5_MAX_FS_INPUTS / 4;
+                        return V3D_MAX_FS_INPUTS / 4;
                 else
-                        return VC5_MAX_ATTRIBUTES;
+                        return V3D_MAX_VS_INPUTS / 4;
         case PIPE_SHADER_CAP_MAX_OUTPUTS:
                 if (shader == PIPE_SHADER_FRAGMENT)
                         return 4;
                 else
-                        return VC5_MAX_FS_INPUTS / 4;
+                        return V3D_MAX_FS_INPUTS / 4;
         case PIPE_SHADER_CAP_MAX_TEMPS:
                 return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
         case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
@@ -273,9 +304,17 @@
                 return 1;
         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+                return V3D_MAX_TEXTURE_SAMPLERS;
+
         case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
-                return VC5_MAX_TEXTURE_SAMPLERS;
+                return PIPE_MAX_SHADER_BUFFERS;
+
+        case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+                if (screen->devinfo.ver < 41)
+                        return 0;
+                else
+                        return PIPE_MAX_SHADER_IMAGES;
+
         case PIPE_SHADER_CAP_PREFERRED_IR:
                 return PIPE_SHADER_IR_NIR;
         case PIPE_SHADER_CAP_SUPPORTED_IRS:
@@ -305,7 +344,7 @@
         if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
                 return false;
 
-        if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES)
+        if (sample_count > 1 && sample_count != V3D_MAX_SAMPLES)
                 return FALSE;
 
         if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -372,7 +411,11 @@
                 }
         }
 
+        /* FORMAT_NONE gets allowed for ARB_framebuffer_no_attachments's probe
+         * of FRAMEBUFFER_MAX_SAMPLES
+         */
         if ((usage & PIPE_BIND_RENDER_TARGET) &&
+            format != PIPE_FORMAT_NONE &&
             !v3d_rt_format_supported(&screen->devinfo, format)) {
                 return FALSE;
         }
@@ -467,7 +510,7 @@
 }
 
 struct pipe_screen *
-v3d_screen_create(int fd)
+v3d_screen_create(int fd, struct renderonly *ro)
 {
         struct v3d_screen *screen = rzalloc(NULL, struct v3d_screen);
         struct pipe_screen *pscreen;
@@ -482,6 +525,14 @@
         pscreen->is_format_supported = v3d_screen_is_format_supported;
 
         screen->fd = fd;
+        if (ro) {
+                screen->ro = renderonly_dup(ro);
+                if (!screen->ro) {
+                        fprintf(stderr, "Failed to dup renderonly object\n");
+                        ralloc_free(screen);
+                        return NULL;
+                }
+        }
         list_inithead(&screen->bo_cache.time_list);
         (void)mtx_init(&screen->bo_handles_mutex, mtx_plain);
         screen->bo_handles = util_hash_table_create(handle_hash, handle_compare);
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,7 @@
 #define VC5_SCREEN_H
 
 #include "pipe/p_screen.h"
+#include "renderonly/renderonly.h"
 #include "os/os_thread.h"
 #include "state_tracker/drm_driver.h"
 #include "util/list.h"
@@ -34,12 +35,6 @@
 
 struct v3d_bo;
 
-#define VC5_MAX_MIP_LEVELS 12
-#define VC5_MAX_TEXTURE_SAMPLERS 32
-#define VC5_MAX_SAMPLES 4
-#define VC5_MAX_DRAW_BUFFERS 4
-#define VC5_MAX_ATTRIBUTES 16
-
 /* These are tunable parameters in the HW design, but all the V3D
  * implementations agree.
  */
@@ -55,6 +50,7 @@
 
 struct v3d_screen {
         struct pipe_screen base;
+        struct renderonly *ro;
         int fd;
 
         struct v3d_device_info devinfo;
@@ -90,7 +86,7 @@
         return (struct v3d_screen *)screen;
 }
 
-struct pipe_screen *v3d_screen_create(int fd);
+struct pipe_screen *v3d_screen_create(int fd, struct renderonly *ro);
 
 void
 v3d_fence_init(struct v3d_screen *screen);
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator.c	2019-03-31 23:16:37.000000000 +0000
@@ -54,6 +54,7 @@
 #include "util/set.h"
 #include "util/u_memory.h"
 #include "util/u_mm.h"
+#include "drm-uapi/i915_drm.h"
 #include "v3d_simulator_wrapper.h"
 
 #include "v3d_screen.h"
@@ -76,7 +77,7 @@
         struct mem_block *heap;
         struct mem_block *overflow;
 
-        /** Mapping from GEM handle to struct v3d_simulator_bo * */
+        /** Mapping from GEM fd to struct v3d_simulator_file * */
         struct hash_table *fd_map;
 
         int refcount;
@@ -93,6 +94,9 @@
 
         struct mem_block *gmp;
         void *gmp_vaddr;
+
+        /** Actual GEM fd is i915, so we should use their create ioctl. */
+        bool is_i915;
 };
 
 /** Wrapper for drm_v3d_bo tracking the simulator-specific state. */
@@ -102,10 +106,9 @@
         /** Area for this BO within sim_state->mem */
         struct mem_block *block;
         uint32_t size;
-        void *vaddr;
-
-        void *winsys_map;
-        uint32_t winsys_stride;
+        uint64_t mmap_offset;
+        void *sim_vaddr;
+        void *gem_vaddr;
 
         int handle;
 };
@@ -177,10 +180,50 @@
         set_gmp_flags(file, sim_bo->block->ofs, size, 0x3);
 
         sim_bo->size = size;
-        sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
-        memset(sim_bo->vaddr, 0xd0, size);
 
-        *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL;
+        /* Allocate space for the buffer in simulator memory. */
+        sim_bo->sim_vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base;
+        memset(sim_bo->sim_vaddr, 0xd0, size);
+
+        *(uint32_t *)(sim_bo->sim_vaddr + sim_bo->size) = BO_SENTINEL;
+
+        /* Map the GEM buffer for copy in/out to the simulator.  i915 blocks
+         * dumb mmap on render nodes, so use their ioctl directly if we're on
+         * one.
+         */
+        int ret;
+        if (file->is_i915) {
+                struct drm_i915_gem_mmap_gtt map = {
+                        .handle = handle,
+                };
+
+                /* We could potentially use non-gtt (cached) for LLC systems,
+                 * but the copy-in/out won't be the limiting factor on
+                 * simulation anyway.
+                 */
+                ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &map);
+                sim_bo->mmap_offset = map.offset;
+        } else {
+                struct drm_mode_map_dumb map = {
+                        .handle = handle,
+                };
+
+                ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+                sim_bo->mmap_offset = map.offset;
+        }
+        if (ret) {
+                fprintf(stderr, "Failed to get MMAP offset: %d\n", ret);
+                abort();
+        }
+
+        sim_bo->gem_vaddr = mmap(NULL, sim_bo->size,
+                                 PROT_READ | PROT_WRITE, MAP_SHARED,
+                                 fd, sim_bo->mmap_offset);
+        if (sim_bo->gem_vaddr == MAP_FAILED) {
+                fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+                        handle, (long long)sim_bo->mmap_offset, sim_bo->size);
+                abort();
+        }
 
         /* A handle of 0 is used for v3d_gem.c internal allocations that
          * don't need to go in the lookup table.
@@ -200,18 +243,16 @@
 {
         struct v3d_simulator_file *sim_file = sim_bo->file;
 
-        if (sim_bo->winsys_map)
-                munmap(sim_bo->winsys_map, sim_bo->size);
-
         set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0);
 
+        if (sim_bo->gem_vaddr)
+                munmap(sim_bo->gem_vaddr, sim_bo->size);
+
         mtx_lock(&sim_state.mutex);
         u_mmFreeMem(sim_bo->block);
         if (sim_bo->handle) {
-                struct hash_entry *entry =
-                        _mesa_hash_table_search(sim_file->bo_map,
-                                                int_to_key(sim_bo->handle));
-                _mesa_hash_table_remove(sim_file->bo_map, entry);
+                _mesa_hash_table_remove_key(sim_file->bo_map,
+                                            int_to_key(sim_bo->handle));
         }
         mtx_unlock(&sim_state.mutex);
         ralloc_free(sim_bo);
@@ -228,237 +269,90 @@
         return entry ? entry->data : NULL;
 }
 
-static int
-v3d_simulator_pin_bos(int fd, struct v3d_job *job)
-{
-        struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
-
-        set_foreach(job->bos, entry) {
-                struct v3d_bo *bo = (struct v3d_bo *)entry->key;
-                struct v3d_simulator_bo *sim_bo =
-                        v3d_get_simulator_bo(file, bo->handle);
-
-                v3d_bo_map(bo);
-                memcpy(sim_bo->vaddr, bo->map, bo->size);
-        }
-
-        return 0;
-}
-
-static int
-v3d_simulator_unpin_bos(int fd, struct v3d_job *job)
+static void
+v3d_simulator_copy_in_handle(struct v3d_simulator_file *file, int handle)
 {
-        struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+        struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file, handle);
 
-        set_foreach(job->bos, entry) {
-                struct v3d_bo *bo = (struct v3d_bo *)entry->key;
-                struct v3d_simulator_bo *sim_bo =
-                        v3d_get_simulator_bo(file, bo->handle);
-
-                if (*(uint32_t *)(sim_bo->vaddr +
-                                  sim_bo->size) != BO_SENTINEL) {
-                        fprintf(stderr, "Buffer overflow in %s\n", bo->name);
-                }
-
-                v3d_bo_map(bo);
-                memcpy(bo->map, sim_bo->vaddr, bo->size);
-        }
+        if (!sim_bo)
+                return;
 
-        return 0;
+        memcpy(sim_bo->sim_vaddr, sim_bo->gem_vaddr, sim_bo->size);
 }
 
-#if 0
 static void
-v3d_dump_to_file(struct v3d_exec_info *exec)
+v3d_simulator_copy_out_handle(struct v3d_simulator_file *file, int handle)
 {
-        static int dumpno = 0;
-        struct drm_v3d_get_hang_state *state;
-        struct drm_v3d_get_hang_state_bo *bo_state;
-        unsigned int dump_version = 0;
+        struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file, handle);
 
-        if (!(v3d_debug & VC5_DEBUG_DUMP))
+        if (!sim_bo)
                 return;
 
-        state = calloc(1, sizeof(*state));
+        memcpy(sim_bo->gem_vaddr, sim_bo->sim_vaddr, sim_bo->size);
 
-        int unref_count = 0;
-        list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list,
-                                 unref_head) {
-                unref_count++;
-        }
-
-        /* Add one more for the overflow area that isn't wrapped in a BO. */
-        state->bo_count = exec->bo_count + unref_count + 1;
-        bo_state = calloc(state->bo_count, sizeof(*bo_state));
-
-        char *filename = NULL;
-        asprintf(&filename, "v3d-dri-%d.dump", dumpno++);
-        FILE *f = fopen(filename, "w+");
-        if (!f) {
-                fprintf(stderr, "Couldn't open %s: %s", filename,
-                        strerror(errno));
-                return;
-        }
-
-        fwrite(&dump_version, sizeof(dump_version), 1, f);
-
-        state->ct0ca = exec->ct0ca;
-        state->ct0ea = exec->ct0ea;
-        state->ct1ca = exec->ct1ca;
-        state->ct1ea = exec->ct1ea;
-        state->start_bin = exec->ct0ca;
-        state->start_render = exec->ct1ca;
-        fwrite(state, sizeof(*state), 1, f);
-
-        int i;
-        for (i = 0; i < exec->bo_count; i++) {
-                struct drm_gem_cma_object *cma_bo = exec->bo[i];
-                bo_state[i].handle = i; /* Not used by the parser. */
-                bo_state[i].paddr = cma_bo->paddr;
-                bo_state[i].size = cma_bo->base.size;
-        }
-
-        list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list,
-                                 unref_head) {
-                struct drm_gem_cma_object *cma_bo = &bo->base;
-                bo_state[i].handle = 0;
-                bo_state[i].paddr = cma_bo->paddr;
-                bo_state[i].size = cma_bo->base.size;
-                i++;
+        if (*(uint32_t *)(sim_bo->sim_vaddr +
+                          sim_bo->size) != BO_SENTINEL) {
+                fprintf(stderr, "Buffer overflow in handle %d\n",
+                        handle);
         }
+}
 
-        /* Add the static overflow memory area. */
-        bo_state[i].handle = exec->bo_count;
-        bo_state[i].paddr = sim_state.overflow->ofs;
-        bo_state[i].size = sim_state.overflow->size;
-        i++;
+static int
+v3d_simulator_pin_bos(struct v3d_simulator_file *file,
+                      struct drm_v3d_submit_cl *submit)
+{
+        uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles;
 
-        fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+        for (int i = 0; i < submit->bo_handle_count; i++)
+                v3d_simulator_copy_in_handle(file, bo_handles[i]);
 
-        for (int i = 0; i < exec->bo_count; i++) {
-                struct drm_gem_cma_object *cma_bo = exec->bo[i];
-                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
-        }
+        return 0;
+}
 
-        list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list,
-                                 unref_head) {
-                struct drm_gem_cma_object *cma_bo = &bo->base;
-                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
-        }
+static int
+v3d_simulator_unpin_bos(struct v3d_simulator_file *file,
+                        struct drm_v3d_submit_cl *submit)
+{
+        uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles;
 
-        void *overflow = calloc(1, sim_state.overflow->size);
-        fwrite(overflow, 1, sim_state.overflow->size, f);
-        free(overflow);
+        for (int i = 0; i < submit->bo_handle_count; i++)
+                v3d_simulator_copy_out_handle(file, bo_handles[i]);
 
-        free(state);
-        free(bo_state);
-        fclose(f);
+        return 0;
 }
-#endif
 
-int
-v3d_simulator_flush(struct v3d_context *v3d,
-                    struct drm_v3d_submit_cl *submit, struct v3d_job *job)
+static int
+v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
 {
-        struct v3d_screen *screen = v3d->screen;
-        int fd = screen->fd;
         struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
-        struct v3d_surface *csurf = v3d_surface(v3d->framebuffer.cbufs[0]);
-        struct v3d_resource *ctex = csurf ? v3d_resource(csurf->base.texture) : NULL;
-        struct v3d_simulator_bo *csim_bo = ctex ? v3d_get_simulator_bo(file, ctex->bo->handle) : NULL;
-        uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
-        uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
-        uint32_t row_len = MIN2(sim_stride, winsys_stride);
         int ret;
 
-        if (ctex && csim_bo->winsys_map) {
-#if 0
-                fprintf(stderr, "%dx%d %d %d %d\n",
-                        ctex->base.b.width0, ctex->base.b.height0,
-                        winsys_stride,
-                        sim_stride,
-                        ctex->bo->size);
-#endif
-
-                for (int y = 0; y < ctex->base.height0; y++) {
-                        memcpy(ctex->bo->map + y * sim_stride,
-                               csim_bo->winsys_map + y * winsys_stride,
-                               row_len);
-                }
-        }
-
-        ret = v3d_simulator_pin_bos(fd, job);
+        ret = v3d_simulator_pin_bos(file, submit);
         if (ret)
                 return ret;
 
-        //v3d_dump_to_file(&exec);
-
         if (sim_state.ver >= 41)
-                v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs);
+                v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
         else
-                v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs);
+                v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
 
-        ret = v3d_simulator_unpin_bos(fd, job);
+        ret = v3d_simulator_unpin_bos(file, submit);
         if (ret)
                 return ret;
 
-        if (ctex && csim_bo->winsys_map) {
-                for (int y = 0; y < ctex->base.height0; y++) {
-                        memcpy(csim_bo->winsys_map + y * winsys_stride,
-                               ctex->bo->map + y * sim_stride,
-                               row_len);
-                }
-        }
-
         return 0;
 }
 
 /**
- * Map the underlying GEM object from the real hardware GEM handle.
- */
-static void *
-v3d_simulator_map_winsys_bo(int fd, struct v3d_simulator_bo *sim_bo)
-{
-        int ret;
-        void *map;
-
-        struct drm_mode_map_dumb map_dumb = {
-                .handle = sim_bo->handle,
-        };
-        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
-        if (ret != 0) {
-                fprintf(stderr, "map ioctl failure\n");
-                abort();
-        }
-
-        map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                   fd, map_dumb.offset);
-        if (map == MAP_FAILED) {
-                fprintf(stderr,
-                        "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
-                        sim_bo->handle, (long long)map_dumb.offset,
-                        (int)sim_bo->size);
-                abort();
-        }
-
-        return map;
-}
-
-/**
  * Do fixups after a BO has been opened from a handle.
  *
  * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
  * time, but we're still using drmPrimeFDToHandle() so we have this helper to
  * be called afterward instead.
  */
-void v3d_simulator_open_from_handle(int fd, uint32_t winsys_stride,
-                                    int handle, uint32_t size)
+void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size)
 {
-        struct v3d_simulator_bo *sim_bo =
-                v3d_create_simulator_bo(fd, handle, size);
-
-        sim_bo->winsys_stride = winsys_stride;
-        sim_bo->winsys_map = v3d_simulator_map_winsys_bo(fd, sim_bo);
+        v3d_create_simulator_bo(fd, handle, size);
 }
 
 /**
@@ -469,22 +363,38 @@
 static int
 v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args)
 {
+        struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+
+        /* i915 bans dumb create on render nodes, so we have to use their
+         * native ioctl in case we're on a render node.
+         */
         int ret;
-        struct drm_mode_create_dumb create = {
-                .width = 128,
-                .bpp = 8,
-                .height = (args->size + 127) / 128,
-        };
+        if (file->is_i915) {
+                struct drm_i915_gem_create create = {
+                        .size = args->size,
+                };
+                ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+
+                args->handle = create.handle;
+        } else {
+                struct drm_mode_create_dumb create = {
+                        .width = 128,
+                        .bpp = 8,
+                        .height = (args->size + 127) / 128,
+                };
 
-        ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
-        assert(create.size >= args->size);
+                ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+                assert(ret != 0 || create.size >= args->size);
 
-        args->handle = create.handle;
+                args->handle = create.handle;
+        }
 
-        struct v3d_simulator_bo *sim_bo =
-                v3d_create_simulator_bo(fd, create.handle, args->size);
+        if (ret == 0) {
+                struct v3d_simulator_bo *sim_bo =
+                        v3d_create_simulator_bo(fd, args->handle, args->size);
 
-        args->offset = sim_bo->block->ofs;
+                args->offset = sim_bo->block->ofs;
+        }
 
         return ret;
 }
@@ -492,20 +402,19 @@
 /**
  * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
  *
- * We just pass this straight through to dumb mmap.
+ * We've already grabbed the mmap offset when we created the sim bo, so just
+ * return it.
  */
 static int
 v3d_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args)
 {
-        int ret;
-        struct drm_mode_map_dumb map = {
-                .handle = args->handle,
-        };
+        struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+        struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file,
+                                                               args->handle);
 
-        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
-        args->offset = map.offset;
+        args->offset = sim_bo->mmap_offset;
 
-        return ret;
+        return 0;
 }
 
 static int
@@ -543,10 +452,33 @@
                 return v3d33_simulator_get_param_ioctl(sim_state.v3d, args);
 }
 
+static int
+v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args)
+{
+        struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+        int ret;
+
+        v3d_simulator_copy_in_handle(file, args->bo_handles[0]);
+        v3d_simulator_copy_in_handle(file, args->bo_handles[1]);
+        v3d_simulator_copy_in_handle(file, args->bo_handles[2]);
+        v3d_simulator_copy_in_handle(file, args->bo_handles[3]);
+
+        if (sim_state.ver >= 41)
+                ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args);
+        else
+                ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args);
+
+        v3d_simulator_copy_out_handle(file, args->bo_handles[0]);
+
+        return ret;
+}
+
 int
 v3d_simulator_ioctl(int fd, unsigned long request, void *args)
 {
         switch (request) {
+        case DRM_IOCTL_V3D_SUBMIT_CL:
+                return v3d_simulator_submit_cl_ioctl(fd, args);
         case DRM_IOCTL_V3D_CREATE_BO:
                 return v3d_simulator_create_bo_ioctl(fd, args);
         case DRM_IOCTL_V3D_MMAP_BO:
@@ -568,6 +500,9 @@
         case DRM_IOCTL_GEM_CLOSE:
                 return v3d_simulator_gem_close_ioctl(fd, args);
 
+        case DRM_IOCTL_V3D_SUBMIT_TFU:
+                return v3d_simulator_submit_tfu_ioctl(fd, args);
+
         case DRM_IOCTL_GEM_OPEN:
         case DRM_IOCTL_GEM_FLINK:
                 return drmIoctl(fd, request, args);
@@ -627,6 +562,11 @@
         screen->sim_file = rzalloc(screen, struct v3d_simulator_file);
         struct v3d_simulator_file *sim_file = screen->sim_file;
 
+        drmVersionPtr version = drmGetVersion(screen->fd);
+        if (version && strncmp(version->name, "i915", version->name_len) == 0)
+                sim_file->is_i915 = true;
+        drmFreeVersion(version);
+
         screen->sim_file->bo_map =
                 _mesa_hash_table_create(screen->sim_file,
                                         _mesa_hash_pointer,
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -83,6 +83,11 @@
         return ident->tech_version * 10 + ident->revision;
 }
 
+void
+v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status))
+{
+        hw->set_isr(isr);
 }
 
+}
 #endif /* USE_V3D_SIMULATOR */
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.h	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,7 @@
 void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val);
 void v3d_hw_tick(struct v3d_hw *hw);
 int v3d_hw_get_version(struct v3d_hw *hw);
+void v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status));
 
 #ifdef __cplusplus
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,7 @@
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_tiling.h"
+#include "broadcom/common/v3d_cpu_tiling.h"
 
 /** Return the width in pixels of a 64-byte microtile. */
 uint32_t
@@ -78,9 +79,8 @@
 v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
 {
         uint32_t utile_w = v3d_utile_width(cpp);
-        uint32_t utile_h = v3d_utile_height(cpp);
 
-        assert(x < utile_w && y < utile_h);
+        assert(x < utile_w && y < v3d_utile_height(cpp));
 
         return x * cpp + y * utile_w * cpp;
 }
@@ -211,15 +211,19 @@
         return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false);
 }
 
+/* Loads/stores non-utile-aligned boxes by walking over the destination
+ * rectangle, computing the address on the GPU, and storing/loading a pixel at
+ * a time.
+ */
 static inline void
-v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
-                               void *cpu, uint32_t cpu_stride,
-                               int cpp, uint32_t image_h,
-                               const struct pipe_box *box,
-                               uint32_t (*get_pixel_offset)(uint32_t cpp,
-                                                            uint32_t image_h,
-                                                            uint32_t x, uint32_t y),
-                               bool is_load)
+v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride,
+                          void *cpu, uint32_t cpu_stride,
+                          int cpp, uint32_t image_h,
+                          const struct pipe_box *box,
+                          uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                       uint32_t image_h,
+                                                       uint32_t x, uint32_t y),
+                          bool is_load)
 {
         for (uint32_t y = 0; y < box->height; y++) {
                 void *cpu_row = cpu + y * cpu_stride;
@@ -248,6 +252,107 @@
         }
 }
 
+/* Breaks the image down into utiles and calls either the fast whole-utile
+ * load/store functions, or the unaligned fallback case.
+ */
+static inline void
+v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+                               void *cpu, uint32_t cpu_stride,
+                               int cpp, uint32_t image_h,
+                               const struct pipe_box *box,
+                               uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                            uint32_t image_h,
+                                                            uint32_t x, uint32_t y),
+                               bool is_load)
+{
+        uint32_t utile_w = v3d_utile_width(cpp);
+        uint32_t utile_h = v3d_utile_height(cpp);
+        uint32_t utile_gpu_stride = utile_w * cpp;
+        uint32_t x1 = box->x;
+        uint32_t y1 = box->y;
+        uint32_t x2 = box->x + box->width;
+        uint32_t y2 = box->y + box->height;
+        uint32_t align_x1 = align(x1, utile_w);
+        uint32_t align_y1 = align(y1, utile_h);
+        uint32_t align_x2 = x2 & ~(utile_w - 1);
+        uint32_t align_y2 = y2 & ~(utile_h - 1);
+
+        /* Load/store all the whole utiles first. */
+        for (uint32_t y = align_y1; y < align_y2; y += utile_h) {
+                void *cpu_row = cpu + (y - box->y) * cpu_stride;
+
+                for (uint32_t x = align_x1; x < align_x2; x += utile_w) {
+                        void *utile_gpu = (gpu +
+                                           get_pixel_offset(cpp, image_h, x, y));
+                        void *utile_cpu = cpu_row + (x - box->x) * cpp;
+
+                        if (is_load) {
+                                v3d_load_utile(utile_cpu, cpu_stride,
+                                               utile_gpu, utile_gpu_stride);
+                        } else {
+                                v3d_store_utile(utile_gpu, utile_gpu_stride,
+                                                utile_cpu, cpu_stride);
+                        }
+                }
+        }
+
+        /* If there were no aligned utiles in the middle, load/store the whole
+         * thing unaligned.
+         */
+        if (align_y2 <= align_y1 ||
+            align_x2 <= align_x1) {
+                v3d_move_pixels_unaligned(gpu, gpu_stride,
+                                          cpu, cpu_stride,
+                                          cpp, image_h,
+                                          box,
+                                          get_pixel_offset, is_load);
+                return;
+        }
+
+        /* Load/store the partial utiles. */
+        struct pipe_box partial_boxes[4] = {
+                /* Top */
+                {
+                        .x = x1,
+                        .width = x2 - x1,
+                        .y = y1,
+                        .height = align_y1 - y1,
+                },
+                /* Bottom */
+                {
+                        .x = x1,
+                        .width = x2 - x1,
+                        .y = align_y2,
+                        .height = y2 - align_y2,
+                },
+                /* Left */
+                {
+                        .x = x1,
+                        .width = align_x1 - x1,
+                        .y = align_y1,
+                        .height = align_y2 - align_y1,
+                },
+                /* Right */
+                {
+                        .x = align_x2,
+                        .width = x2 - align_x2,
+                        .y = align_y1,
+                        .height = align_y2 - align_y1,
+                },
+        };
+        for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) {
+                void *partial_cpu = (cpu +
+                                     (partial_boxes[i].y - y1) * cpu_stride +
+                                     (partial_boxes[i].x - x1) * cpp);
+
+                v3d_move_pixels_unaligned(gpu, gpu_stride,
+                                          partial_cpu, cpu_stride,
+                                          cpp, image_h,
+                                          &partial_boxes[i],
+                                          get_pixel_offset, is_load);
+        }
+}
+
 static inline void
 v3d_move_pixels_general(void *gpu, uint32_t gpu_stride,
                                void *cpu, uint32_t cpu_stride,
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.h	2019-03-31 23:16:37.000000000 +0000
@@ -27,8 +27,6 @@
 uint32_t v3d_utile_width(int cpp) ATTRIBUTE_CONST;
 uint32_t v3d_utile_height(int cpp) ATTRIBUTE_CONST;
 bool v3d_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
-void v3d_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
-void v3d_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
 void v3d_load_tiled_image(void *dst, uint32_t dst_stride,
                           void *src, uint32_t src_stride,
                           enum v3d_tiling_mode tiling_format, int cpp,
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_uniforms.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_uniforms.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3d_uniforms.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_uniforms.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,82 +28,6 @@
 #include "compiler/v3d_compiler.h"
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 
-#if 0
-
-#define SWIZ(x,y,z,w) {          \
-        PIPE_SWIZZLE_##x, \
-        PIPE_SWIZZLE_##y, \
-        PIPE_SWIZZLE_##z, \
-        PIPE_SWIZZLE_##w  \
-}
-
-static void
-write_texture_border_color(struct v3d_job *job,
-                           struct v3d_cl_out **uniforms,
-                           struct v3d_texture_stateobj *texstate,
-                           uint32_t unit)
-{
-        struct pipe_sampler_state *sampler = texstate->samplers[unit];
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct v3d_resource *rsc = v3d_resource(texture->texture);
-        union util_color uc;
-
-        const struct util_format_description *tex_format_desc =
-                util_format_description(texture->format);
-
-        float border_color[4];
-        for (int i = 0; i < 4; i++)
-                border_color[i] = sampler->border_color.f[i];
-        if (util_format_is_srgb(texture->format)) {
-                for (int i = 0; i < 3; i++)
-                        border_color[i] =
-                                util_format_linear_to_srgb_float(border_color[i]);
-        }
-
-        /* Turn the border color into the layout of channels that it would
-         * have when stored as texture contents.
-         */
-        float storage_color[4];
-        util_format_unswizzle_4f(storage_color,
-                                 border_color,
-                                 tex_format_desc->swizzle);
-
-        /* Now, pack so that when the v3d_format-sampled texture contents are
-         * replaced with our border color, the v3d_get_format_swizzle()
-         * swizzling will get the right channels.
-         */
-        if (util_format_is_depth_or_stencil(texture->format)) {
-                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
-                                       sampler->border_color.f[0]) << 8;
-        } else {
-                switch (rsc->v3d_format) {
-                default:
-                case VC5_TEXTURE_TYPE_RGBA8888:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_RGBA4444:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_RGB565:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_ALPHA:
-                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
-                        break;
-                case VC5_TEXTURE_TYPE_LUMALPHA:
-                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
-                                    (float_to_ubyte(storage_color[0]) << 0));
-                        break;
-                }
-        }
-
-        cl_aligned_u32(uniforms, uc.ui[0]);
-}
-#endif
-
 static uint32_t
 get_texrect_scale(struct v3d_texture_stateobj *texstate,
                   enum quniform_contents contents,
@@ -147,6 +71,30 @@
         }
 }
 
+static uint32_t
+get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
+               enum quniform_contents contents,
+               uint32_t data)
+{
+        struct v3d_image_view *image = &shaderimg->si[data];
+
+        switch (contents) {
+        case QUNIFORM_IMAGE_WIDTH:
+                return u_minify(image->base.resource->width0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_HEIGHT:
+                return u_minify(image->base.resource->height0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_DEPTH:
+                return u_minify(image->base.resource->depth0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_ARRAY_SIZE:
+                return image->base.resource->array_size;
+        default:
+                unreachable("Bad texture size field");
+        }
+}
+
 static struct v3d_bo *
 v3d_upload_ubo(struct v3d_context *v3d,
                struct v3d_compiled_shader *shader,
@@ -224,17 +172,34 @@
              struct v3d_texture_stateobj *texstate,
              uint32_t data)
 {
-        /* Extract the texture unit from the top bits, and the compiler's
+        int unit  = v3d_tmu_config_data_get_unit(data);
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
+        struct v3d_resource *rsc = v3d_resource(sview->texture);
+
+        cl_aligned_reloc(&job->indirect, uniforms, sview->bo,
+                         v3d_tmu_config_data_get_value(data));
+        v3d_job_add_bo(job, rsc->bo);
+}
+
+static void
+write_image_tmu_p0(struct v3d_job *job,
+                   struct v3d_cl_out **uniforms,
+                   struct v3d_shaderimg_stateobj *img,
+                   uint32_t data)
+{
+        /* Extract the image unit from the top bits, and the compiler's
          * packed p0 from the bottom.
          */
         uint32_t unit = data >> 24;
         uint32_t p0 = data & 0x00ffffff;
 
-        struct pipe_sampler_view *psview = texstate->textures[unit];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-        struct v3d_resource *rsc = v3d_resource(psview->texture);
+        struct v3d_image_view *iview = &img->si[unit];
+        struct v3d_resource *rsc = v3d_resource(iview->base.resource);
 
-        cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0);
+        cl_aligned_reloc(&job->indirect, uniforms,
+                         v3d_resource(iview->tex_state)->bo,
+                         iview->tex_state_offset | p0);
         v3d_job_add_bo(job, rsc->bo);
 }
 
@@ -245,23 +210,28 @@
              struct v3d_texture_stateobj *texstate,
              uint32_t data)
 {
-        /* Extract the texture unit from the top bits, and the compiler's
-         * packed p1 from the bottom.
-         */
-        uint32_t unit = data >> 24;
-        uint32_t p0 = data & 0x00ffffff;
-
+        uint32_t unit = v3d_tmu_config_data_get_unit(data);
         struct pipe_sampler_state *psampler = texstate->samplers[unit];
         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
+        int variant = 0;
+
+        if (sampler->border_color_variants)
+                variant = sview->sampler_variant;
 
-        cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0);
+        cl_aligned_reloc(&job->indirect, uniforms,
+                         v3d_resource(sampler->sampler_state)->bo,
+                         sampler->sampler_state_offset[variant] |
+                         v3d_tmu_config_data_get_value(data));
 }
 
 struct v3d_cl_reloc
 v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
-                   struct v3d_constbuf_stateobj *cb,
-                   struct v3d_texture_stateobj *texstate)
+                   enum pipe_shader_type stage)
 {
+        struct v3d_constbuf_stateobj *cb = &v3d->constbuf[stage];
+        struct v3d_texture_stateobj *texstate = &v3d->tex[stage];
         struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
         struct v3d_job *job = v3d->job;
         const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
@@ -279,14 +249,14 @@
                 cl_start(&job->indirect);
 
         for (int i = 0; i < uinfo->count; i++) {
+                uint32_t data = uinfo->data[i];
 
                 switch (uinfo->contents[i]) {
                 case QUNIFORM_CONSTANT:
-                        cl_aligned_u32(&uniforms, uinfo->data[i]);
+                        cl_aligned_u32(&uniforms, data);
                         break;
                 case QUNIFORM_UNIFORM:
-                        cl_aligned_u32(&uniforms,
-                                       gallium_uniforms[uinfo->data[i]]);
+                        cl_aligned_u32(&uniforms, gallium_uniforms[data]);
                         break;
                 case QUNIFORM_VIEWPORT_X_SCALE:
                         cl_aligned_f(&uniforms, v3d->viewport.scale[0] * 256.0f);
@@ -304,37 +274,33 @@
 
                 case QUNIFORM_USER_CLIP_PLANE:
                         cl_aligned_f(&uniforms,
-                                     v3d->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+                                     v3d->clip.ucp[data / 4][data % 4]);
                         break;
 
                 case QUNIFORM_TMU_CONFIG_P0:
-                        write_tmu_p0(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                        write_tmu_p0(job, &uniforms, texstate, data);
                         break;
 
                 case QUNIFORM_TMU_CONFIG_P1:
-                        write_tmu_p1(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                        write_tmu_p1(job, &uniforms, texstate, data);
                         break;
 
-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        write_texture_p1(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                case QUNIFORM_IMAGE_TMU_CONFIG_P0:
+                        write_image_tmu_p0(job, &uniforms,
+                                           &v3d->shaderimg[stage], data);
                         break;
 
-#if 0
-                case QUNIFORM_TEXTURE_FIRST_LEVEL:
-                        write_texture_first_level(job, &uniforms, texstate,
-                                                  uinfo->data[i]);
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                        write_texture_p1(job, &uniforms, texstate,
+                                         data);
                         break;
-#endif
 
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
                         cl_aligned_u32(&uniforms,
                                        get_texrect_scale(texstate,
                                                          uinfo->contents[i],
-                                                         uinfo->data[i]));
+                                                         data));
                         break;
 
                 case QUNIFORM_TEXTURE_WIDTH:
@@ -345,7 +311,17 @@
                         cl_aligned_u32(&uniforms,
                                        get_texture_size(texstate,
                                                         uinfo->contents[i],
-                                                        uinfo->data[i]));
+                                                        data));
+                        break;
+
+                case QUNIFORM_IMAGE_WIDTH:
+                case QUNIFORM_IMAGE_HEIGHT:
+                case QUNIFORM_IMAGE_DEPTH:
+                case QUNIFORM_IMAGE_ARRAY_SIZE:
+                        cl_aligned_u32(&uniforms,
+                                       get_image_size(&v3d->shaderimg[stage],
+                                                      uinfo->contents[i],
+                                                      data));
                         break;
 
                 case QUNIFORM_ALPHA_REF:
@@ -353,16 +329,12 @@
                                      v3d->zsa->base.alpha.ref_value);
                         break;
 
-                case QUNIFORM_SAMPLE_MASK:
-                        cl_aligned_u32(&uniforms, v3d->sample_mask);
-                        break;
-
                 case QUNIFORM_UBO_ADDR:
-                        if (uinfo->data[i] == 0) {
+                        if (data == 0) {
                                 cl_aligned_reloc(&job->indirect, &uniforms,
                                                  ubo, 0);
                         } else {
-                                int ubo_index = uinfo->data[i];
+                                int ubo_index = data;
                                 struct v3d_resource *rsc =
                                         v3d_resource(cb->cb[ubo_index].buffer);
 
@@ -372,13 +344,24 @@
                         }
                         break;
 
-                case QUNIFORM_TEXTURE_FIRST_LEVEL:
-                        cl_aligned_f(&uniforms,
-                                     texstate->textures[uinfo->data[i]]->u.tex.first_level);
+                case QUNIFORM_SSBO_OFFSET: {
+                        struct pipe_shader_buffer *sb =
+                                &v3d->ssbo[stage].sb[data];
+
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d_resource(sb->buffer)->bo,
+                                         sb->buffer_offset);
+                        break;
+                }
+
+                case QUNIFORM_GET_BUFFER_SIZE:
+                        cl_aligned_u32(&uniforms,
+                                       v3d->ssbo[stage].sb[data].buffer_size);
                         break;
 
-                case QUNIFORM_TEXTURE_BORDER_COLOR:
-                        /* XXX */
+                case QUNIFORM_TEXTURE_FIRST_LEVEL:
+                        cl_aligned_f(&uniforms,
+                                     texstate->textures[data]->u.tex.first_level);
                         break;
 
                 case QUNIFORM_SPILL_OFFSET:
@@ -397,15 +380,17 @@
                         write_texture_p0(job, &uniforms, texstate,
                                          uinfo->contents[i] -
                                          QUNIFORM_TEXTURE_CONFIG_P0_0,
-                                         uinfo->data[i]);
+                                         data);
                         break;
 
                 }
 #if 0
                 uint32_t written_val = *((uint32_t *)uniforms - 1);
-                fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+                fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f) ",
                         shader, i, __gen_address_offset(&uniform_stream) + i * 4,
                         written_val, uif(written_val));
+                vir_dump_uniform(uinfo->contents[i], data);
+                fprintf(stderr, "\n");
 #endif
         }
 
@@ -444,7 +429,6 @@
                 case QUNIFORM_TMU_CONFIG_P0:
                 case QUNIFORM_TMU_CONFIG_P1:
                 case QUNIFORM_TEXTURE_CONFIG_P1:
-                case QUNIFORM_TEXTURE_BORDER_COLOR:
                 case QUNIFORM_TEXTURE_FIRST_LEVEL:
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
@@ -461,12 +445,21 @@
                         dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
                         break;
 
-                case QUNIFORM_ALPHA_REF:
-                        dirty |= VC5_DIRTY_ZSA;
+                case QUNIFORM_SSBO_OFFSET:
+                case QUNIFORM_GET_BUFFER_SIZE:
+                        dirty |= VC5_DIRTY_SSBO;
                         break;
 
-                case QUNIFORM_SAMPLE_MASK:
-                        dirty |= VC5_DIRTY_SAMPLE_STATE;
+                case QUNIFORM_IMAGE_TMU_CONFIG_P0:
+                case QUNIFORM_IMAGE_WIDTH:
+                case QUNIFORM_IMAGE_HEIGHT:
+                case QUNIFORM_IMAGE_DEPTH:
+                case QUNIFORM_IMAGE_ARRAY_SIZE:
+                        dirty |= VC5_DIRTY_SHADER_IMAGE;
+                        break;
+
+                case QUNIFORM_ALPHA_REF:
+                        dirty |= VC5_DIRTY_ZSA;
                         break;
 
                 default:
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_context.h mesa-19.0.1/src/gallium/drivers/v3d/v3dx_context.h
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_context.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -39,9 +39,13 @@
 void v3dX(simulator_init_regs)(struct v3d_hw *v3d);
 int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                                     struct drm_v3d_get_param *args);
-void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
-                           uint32_t gmp_ofs);
+void v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
+                                     struct drm_v3d_submit_cl *args,
+                                     uint32_t gmp_offset);
+int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
+                                     struct drm_v3d_submit_tfu *args);
 const struct v3d_format *v3dX(get_format_desc)(enum pipe_format f);
 void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
                                                    uint32_t *type,
                                                    uint32_t *bpp);
+bool v3dX(tfu_supports_tex_format)(uint32_t tex_format);
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_draw.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_draw.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_draw.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -119,18 +119,42 @@
 }
 
 static void
-v3d_predraw_check_textures(struct pipe_context *pctx,
-                           struct v3d_texture_stateobj *stage_tex)
+v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
+                               enum pipe_shader_type s)
 {
         struct v3d_context *v3d = v3d_context(pctx);
 
-        for (int i = 0; i < stage_tex->num_textures; i++) {
-                struct pipe_sampler_view *view = stage_tex->textures[i];
-                if (!view)
+        /* XXX perf: If we're reading from the output of TF in this job, we
+         * should instead be using the wait for transform feedback
+         * functionality.
+         */
+
+        /* Flush writes to textures we're sampling. */
+        for (int i = 0; i < v3d->tex[s].num_textures; i++) {
+                struct pipe_sampler_view *pview = v3d->tex[s].textures[i];
+                if (!pview)
                         continue;
+                struct v3d_sampler_view *view = v3d_sampler_view(pview);
+
+                if (view->texture != view->base.texture)
+                        v3d_update_shadow_texture(pctx, &view->base);
 
                 v3d_flush_jobs_writing_resource(v3d, view->texture);
         }
+
+        /* Flush writes to UBOs. */
+        foreach_bit(i, v3d->constbuf[s].enabled_mask) {
+                struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
+                if (cb->buffer)
+                        v3d_flush_jobs_writing_resource(v3d, cb->buffer);
+        }
+
+        /* Flush writes to our image views */
+        foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
+                struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
+
+                v3d_flush_jobs_writing_resource(v3d, view->base.resource);
+        }
 }
 
 static void
@@ -146,16 +170,13 @@
         /* Upload the uniforms to the indirect CL first */
         struct v3d_cl_reloc fs_uniforms =
                 v3d_write_uniforms(v3d, v3d->prog.fs,
-                                   &v3d->constbuf[PIPE_SHADER_FRAGMENT],
-                                   &v3d->fragtex);
+                                   PIPE_SHADER_FRAGMENT);
         struct v3d_cl_reloc vs_uniforms =
                 v3d_write_uniforms(v3d, v3d->prog.vs,
-                                   &v3d->constbuf[PIPE_SHADER_VERTEX],
-                                   &v3d->verttex);
+                                   PIPE_SHADER_VERTEX);
         struct v3d_cl_reloc cs_uniforms =
                 v3d_write_uniforms(v3d, v3d->prog.cs,
-                                   &v3d->constbuf[PIPE_SHADER_VERTEX],
-                                   &v3d->verttex);
+                                   PIPE_SHADER_VERTEX);
 
         /* See GFXH-930 workaround below */
         uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
@@ -166,6 +187,10 @@
                                     cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
                                     32);
 
+        /* XXX perf: We should move most of the SHADER_STATE_RECORD setup to
+         * compile time, so that we mostly just have to OR the VS and FS
+         * records together at draw time.
+         */
         cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
                 shader.enable_clipping = true;
                 /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
@@ -178,8 +203,13 @@
                  * shader needs to write the Z value (even just discards).
                  */
                 shader.fragment_shader_does_z_writes =
-                        (v3d->prog.fs->prog_data.fs->writes_z ||
-                         v3d->prog.fs->prog_data.fs->discard);
+                        v3d->prog.fs->prog_data.fs->writes_z;
+                /* Set if the EZ test must be disabled (due to shader side
+                 * effects and the early_z flag not being present in the
+                 * shader).
+                 */
+                shader.turn_off_early_z_test =
+                        v3d->prog.fs->prog_data.fs->disable_ez;
 
                 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
                         v3d->prog.fs->prog_data.fs->uses_center_w;
@@ -192,21 +222,27 @@
                 shader.fragment_shader_propagate_nans = true;
 
                 shader.coordinate_shader_code_address =
-                        cl_address(v3d->prog.cs->bo, 0);
+                        cl_address(v3d_resource(v3d->prog.cs->resource)->bo,
+                                   v3d->prog.cs->offset);
                 shader.vertex_shader_code_address =
-                        cl_address(v3d->prog.vs->bo, 0);
+                        cl_address(v3d_resource(v3d->prog.vs->resource)->bo,
+                                   v3d->prog.vs->offset);
                 shader.fragment_shader_code_address =
-                        cl_address(v3d->prog.fs->bo, 0);
+                        cl_address(v3d_resource(v3d->prog.fs->resource)->bo,
+                                   v3d->prog.fs->offset);
 
                 /* XXX: Use combined input/output size flag in the common
                  * case.
                  */
-                shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
-                shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+                shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+                        v3d->prog.cs->prog_data.vs->separate_segments;
+                shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+                        v3d->prog.vs->prog_data.vs->separate_segments;
+
                 shader.coordinate_shader_input_vpm_segment_size =
-                        MAX2(v3d->prog.cs->prog_data.vs->vpm_input_size, 1);
+                        v3d->prog.cs->prog_data.vs->vpm_input_size;
                 shader.vertex_shader_input_vpm_segment_size =
-                        MAX2(v3d->prog.vs->prog_data.vs->vpm_input_size, 1);
+                        v3d->prog.vs->prog_data.vs->vpm_input_size;
 
                 shader.coordinate_shader_output_vpm_segment_size =
                         v3d->prog.cs->prog_data.vs->vpm_output_size;
@@ -259,7 +295,8 @@
                         v3d->prog.vs->prog_data.vs->uses_iid;
 
                 shader.address_of_default_attribute_values =
-                        cl_address(vtx->default_attribute_values, 0);
+                        cl_address(v3d_resource(vtx->defaults)->bo,
+                                   vtx->defaults_offset);
         }
 
         for (int i = 0; i < vtx->num_elements; i++) {
@@ -285,7 +322,7 @@
                         attr.maximum_index = 0xffffff;
 #endif
                 }
-                STATIC_ASSERT(sizeof(vtx->attrs) >= VC5_MAX_ATTRIBUTES * size);
+                STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size);
         }
 
         if (vtx->num_elements == 0) {
@@ -431,8 +468,11 @@
         /* Before setting up the draw, flush anything writing to the textures
          * that we read from.
          */
-        v3d_predraw_check_textures(pctx, &v3d->verttex);
-        v3d_predraw_check_textures(pctx, &v3d->fragtex);
+        for (int s = 0; s < PIPE_SHADER_TYPES; s++)
+                v3d_predraw_check_stage_inputs(pctx, s);
+
+        if (info->indirect)
+                v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer);
 
         struct v3d_job *job = v3d_get_job_for_fbo(v3d);
 
@@ -444,12 +484,29 @@
          * on the last submitted render, rather than tracking the last
          * rendering to each texture's BO.
          */
-        if (v3d->verttex.num_textures) {
+        if (v3d->tex[PIPE_SHADER_VERTEX].num_textures) {
                 perf_debug("Blocking binner on last render "
                            "due to vertex texturing.\n");
                 job->submit.in_sync_bcl = v3d->out_sync;
         }
 
+        /* Mark SSBOs as being written.  We don't actually know which ones are
+         * read vs written, so just assume the worst
+         */
+        for (int s = 0; s < PIPE_SHADER_TYPES; s++) {
+                foreach_bit(i, v3d->ssbo[s].enabled_mask) {
+                        v3d_job_add_write_resource(job,
+                                                   v3d->ssbo[s].sb[i].buffer);
+                        job->tmu_dirty_rcl = true;
+                }
+
+                foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
+                        v3d_job_add_write_resource(job,
+                                                   v3d->shaderimg[s].si[i].base.resource);
+                        job->tmu_dirty_rcl = true;
+                }
+        }
+
         /* Get space to emit our draw call into the BCL, using a branch to
          * jump to a new BO if necessary.
          */
@@ -531,7 +588,23 @@
                 }
 #endif
 
-                if (info->instance_count > 1) {
+                if (info->indirect) {
+                        cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
+                                prim.index_type = ffs(info->index_size) - 1;
+#if V3D_VERSION < 40
+                                prim.address_of_indices_list =
+                                        cl_address(rsc->bo, offset);
+#endif /* V3D_VERSION < 40 */
+                                prim.mode = info->mode | prim_tf_enable;
+                                prim.enable_primitive_restarts = info->primitive_restart;
+
+                                prim.number_of_draw_indirect_indexed_records = info->indirect->draw_count;
+
+                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
+                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
+                                                          info->indirect->offset);
+                        }
+                } else if (info->instance_count > 1) {
                         cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
 #if V3D_VERSION >= 40
@@ -568,7 +641,16 @@
                 if (info->has_user_indices)
                         pipe_resource_reference(&prsc, NULL);
         } else {
-                if (info->instance_count > 1) {
+                if (info->indirect) {
+                        cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
+                                prim.mode = info->mode | prim_tf_enable;
+                                prim.number_of_draw_indirect_array_records = info->indirect->draw_count;
+
+                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
+                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
+                                                          info->indirect->offset);
+                        }
+                } else if (info->instance_count > 1) {
                         cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
                                 prim.mode = info->mode | prim_tf_enable;
                                 prim.index_of_first_vertex = info->start;
@@ -623,7 +705,7 @@
                 rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
         }
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 int blend_rt = v3d->blend->base.independent_blend_enable ? i : 0;
 
@@ -703,7 +785,7 @@
                 buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
         }
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(buffers & bit))
                         continue;
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_emit.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_emit.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_emit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -291,7 +291,7 @@
                 if (blend->independent_blend_enable)
                         config.render_target_mask = 1 << rt;
                 else
-                        config.render_target_mask = (1 << VC5_MAX_DRAW_BUFFERS) - 1;
+                        config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1;
 #else
                 assert(rt == 0);
 #endif
@@ -588,7 +588,7 @@
 #endif
 
                         if (blend->base.independent_blend_enable) {
-                                for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
+                                for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
                                         emit_rt_blend(v3d, job, &blend->base, i);
                         } else {
                                 emit_rt_blend(v3d, job, &blend->base, 0);
@@ -653,10 +653,10 @@
          * the view, so we merge them together at draw time.
          */
         if (v3d->dirty & VC5_DIRTY_FRAGTEX)
-                emit_textures(v3d, &v3d->fragtex);
+                emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
 
         if (v3d->dirty & VC5_DIRTY_VERTTEX)
-                emit_textures(v3d, &v3d->verttex);
+                emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
 #endif
 
         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_format_table.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_format_table.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_format_table.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_format_table.c	2019-03-31 23:16:37.000000000 +0000
@@ -65,6 +65,8 @@
         FORMAT(B8G8R8X8_SRGB,     SRGB8_ALPHA8, RGBA8,       SWIZ_ZYX1, 16, 0),
         FORMAT(R8G8B8A8_UNORM,    RGBA8,        RGBA8,       SWIZ_XYZW, 16, 0),
         FORMAT(R8G8B8X8_UNORM,    RGBA8,        RGBA8,       SWIZ_XYZ1, 16, 0),
+        FORMAT(R8G8B8A8_SRGB,     SRGB8_ALPHA8, RGBA8,       SWIZ_XYZW, 16, 0),
+        FORMAT(R8G8B8X8_SRGB,     SRGB8_ALPHA8, RGBA8,       SWIZ_XYZ1, 16, 0),
         FORMAT(R8G8B8A8_SNORM,    NO,           RGBA8_SNORM, SWIZ_XYZW, 16, 0),
         FORMAT(R8G8B8X8_SNORM,    NO,           RGBA8_SNORM, SWIZ_XYZ1, 16, 0),
         FORMAT(R10G10B10A2_UNORM, RGB10_A2,     RGB10_A2,    SWIZ_XYZW, 16, 0),
@@ -145,12 +147,13 @@
 #if V3D_VERSION >= 40
         FORMAT(S8_UINT_Z24_UNORM, D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
         FORMAT(X8Z24_UNORM,       D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
-        FORMAT(S8X24_UINT,        S8,           DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
+        FORMAT(S8X24_UINT,        S8,           RGBA8UI, SWIZ_XXXX, 16, 1),
         FORMAT(Z32_FLOAT,         D32F,         DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
         FORMAT(Z16_UNORM,         D16,          DEPTH_COMP16,SWIZ_XXXX, 32, 1),
 
         /* Pretend we support this, but it'll be separate Z32F depth and S8. */
         FORMAT(Z32_FLOAT_S8X24_UINT, D32F,      DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
+        FORMAT(X32_S8X24_UINT,    S8,           R8UI,          SWIZ_XXXX, 16, 1),
 #else
         FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
         FORMAT(X8Z24_UNORM,       ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
@@ -316,3 +319,34 @@
                 break;
         }
 }
+
+bool
+v3dX(tfu_supports_tex_format)(enum V3DX(Texture_Data_Formats) format)
+{
+        switch (format) {
+        case TEXTURE_DATA_FORMAT_R8:
+        case TEXTURE_DATA_FORMAT_R8_SNORM:
+        case TEXTURE_DATA_FORMAT_RG8:
+        case TEXTURE_DATA_FORMAT_RG8_SNORM:
+        case TEXTURE_DATA_FORMAT_RGBA8:
+        case TEXTURE_DATA_FORMAT_RGBA8_SNORM:
+        case TEXTURE_DATA_FORMAT_RGB565:
+        case TEXTURE_DATA_FORMAT_RGBA4:
+        case TEXTURE_DATA_FORMAT_RGB5_A1:
+        case TEXTURE_DATA_FORMAT_RGB10_A2:
+        case TEXTURE_DATA_FORMAT_R16:
+        case TEXTURE_DATA_FORMAT_R16_SNORM:
+        case TEXTURE_DATA_FORMAT_RG16:
+        case TEXTURE_DATA_FORMAT_RG16_SNORM:
+        case TEXTURE_DATA_FORMAT_RGBA16:
+        case TEXTURE_DATA_FORMAT_RGBA16_SNORM:
+        case TEXTURE_DATA_FORMAT_R16F:
+        case TEXTURE_DATA_FORMAT_RG16F:
+        case TEXTURE_DATA_FORMAT_RGBA16F:
+        case TEXTURE_DATA_FORMAT_R11F_G11F_B10F:
+        case TEXTURE_DATA_FORMAT_R4:
+                return true;
+        default:
+                return false;
+        }
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_rcl.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_rcl.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_rcl.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_rcl.c	2019-03-31 23:16:37.000000000 +0000
@@ -74,6 +74,7 @@
                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
                 else
                         load.input_image_format = surf->format;
+                load.r_b_swap = surf->swap_rb;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
                     surf->tiling == VC5_TILING_UIF_XOR) {
@@ -137,6 +138,7 @@
                 else
                         store.output_image_format = surf->format;
 
+                store.r_b_swap = surf->swap_rb;
                 store.memory_format = surf->tiling;
 
                 if (surf->tiling == VC5_TILING_UIF_NO_XOR ||
@@ -205,7 +207,7 @@
 {
         uint32_t loads_pending = job->load;
 
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(loads_pending & bit))
                         continue;
@@ -303,7 +305,7 @@
          * perspective.  Non-MSAA surfaces will use
          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
          */
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
                 if (!(job->store & bit))
                         continue;
@@ -372,6 +374,15 @@
                 }
         }
 #else /* V3D_VERSION >= 40 */
+        /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
+         * we still need to emit some sort of store.
+         */
+        if (!job->store) {
+                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+                        store.buffer_to_store = NONE;
+                }
+        }
+
         assert(!stores_pending);
 
         /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
@@ -496,7 +507,7 @@
         v3d_job_add_bo(job, job->rcl.bo);
 
         int nr_cbufs = 0;
-        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (job->cbufs[i])
                         nr_cbufs = i + 1;
         }
@@ -759,7 +770,10 @@
 
         v3d_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1);
 
-        /* XXX: Use Morton order */
+        /* XXX perf: We should expose GL_MESA_tile_raster_order to improve X11
+         * performance, but we should use Morton order otherwise to improve
+         * cache locality.
+         */
         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
@@ -781,5 +795,20 @@
                 }
         }
 
+        if (job->tmu_dirty_rcl) {
+           cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) {
+              flush.tmu_config_cache_clear = 0xf;
+              flush.tmu_data_cache_clear = 0xf;
+              flush.uniforms_cache_clear = 0xf;
+              flush.instruction_cache_clear = 0xf;
+           }
+
+           cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) {
+              flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN;
+              flush.l2t_flush_start = cl_address(NULL, 0);
+              flush.l2t_flush_end = cl_address(NULL, ~0);
+           }
+        }
+
         cl_emit(&job->rcl, END_OF_RENDERING, end);
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_simulator.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_simulator.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_simulator.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_simulator.c	2019-03-31 23:16:37.000000000 +0000
@@ -49,7 +49,7 @@
 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
 
 static void
-v3d_flush_l3(struct v3d_hw *v3d)
+v3d_invalidate_l3(struct v3d_hw *v3d)
 {
         if (!v3d_hw_has_gca(v3d))
                 return;
@@ -62,10 +62,13 @@
 #endif
 }
 
-/* Invalidates the L2 cache.  This is a read-only cache. */
+/* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
 static void
-v3d_flush_l2(struct v3d_hw *v3d)
+v3d_invalidate_l2c(struct v3d_hw *v3d)
 {
+        if (V3D_VERSION >= 33)
+                return;
+
         V3D_WRITE(V3D_CTL_0_L2CACTL,
                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
                   V3D_CTL_0_L2CACTL_L2CENA_SET);
@@ -73,7 +76,7 @@
 
 /* Invalidates texture L2 cachelines */
 static void
-v3d_flush_l2t(struct v3d_hw *v3d)
+v3d_invalidate_l2t(struct v3d_hw *v3d)
 {
         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
@@ -84,18 +87,44 @@
 
 /* Invalidates the slice caches.  These are read-only caches. */
 static void
-v3d_flush_slices(struct v3d_hw *v3d)
+v3d_invalidate_slices(struct v3d_hw *v3d)
 {
         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
 }
 
 static void
-v3d_flush_caches(struct v3d_hw *v3d)
+v3d_invalidate_caches(struct v3d_hw *v3d)
 {
-        v3d_flush_l3(v3d);
-        v3d_flush_l2(v3d);
-        v3d_flush_l2t(v3d);
-        v3d_flush_slices(v3d);
+        v3d_invalidate_l3(v3d);
+        v3d_invalidate_l2c(v3d);
+        v3d_invalidate_l2t(v3d);
+        v3d_invalidate_slices(v3d);
+}
+
+int
+v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
+                                 struct drm_v3d_submit_tfu *args)
+{
+        int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
+
+        V3D_WRITE(V3D_TFU_IIA, args->iia);
+        V3D_WRITE(V3D_TFU_IIS, args->iis);
+        V3D_WRITE(V3D_TFU_ICA, args->ica);
+        V3D_WRITE(V3D_TFU_IUA, args->iua);
+        V3D_WRITE(V3D_TFU_IOA, args->ioa);
+        V3D_WRITE(V3D_TFU_IOS, args->ios);
+        V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
+        V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
+        V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
+        V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
+
+        V3D_WRITE(V3D_TFU_ICFG, args->icfg);
+
+        while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
+                v3d_hw_tick(v3d);
+        }
+
+        return 0;
 }
 
 int
@@ -112,6 +141,12 @@
                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
         };
 
+        switch (args->param) {
+        case DRM_V3D_PARAM_SUPPORTS_TFU:
+                args->value = 1;
+                return 0;
+        }
+
         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
                 args->value = V3D_READ(reg_map[args->param]);
                 return 0;
@@ -122,6 +157,32 @@
         abort();
 }
 
+static struct v3d_hw *v3d_isr_hw;
+
+static void
+v3d_isr(uint32_t hub_status)
+{
+        struct v3d_hw *v3d = v3d_isr_hw;
+
+        /* Check the per-core bits */
+        if (hub_status & (1 << 0)) {
+                uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
+
+                if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
+                        fprintf(stderr, "GMP violation at 0x%08x\n",
+                                V3D_READ(V3D_GMP_0_VIO_ADDR));
+                        abort();
+                } else {
+                        fprintf(stderr,
+                                "Unexpected ISR with core status 0x%08x\n",
+                                core_status);
+                }
+                abort();
+        }
+
+        return;
+}
+
 void
 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
 {
@@ -136,11 +197,19 @@
          */
         V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
 #endif
+
+        uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_GMPV_SET;
+        V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
+        V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
+
+        v3d_isr_hw = v3d;
+        v3d_hw_set_isr(v3d, v3d_isr);
 }
 
 void
-v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit,
-                      uint32_t gmp_ofs)
+v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
+                                struct drm_v3d_submit_cl *submit,
+                                uint32_t gmp_ofs)
 {
         /* Completely reset the GMP. */
         V3D_WRITE(V3D_GMP_0_CFG,
@@ -152,7 +221,7 @@
                 ;
         }
 
-        v3d_flush_caches(v3d);
+        v3d_invalidate_caches(v3d);
 
         if (submit->qma) {
                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
@@ -168,14 +237,17 @@
         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
 
-        /* Wait for bin to complete before firing render, as it seems the
-         * simulator doesn't implement the semaphores.
+        /* Wait for bin to complete before firing render.  The kernel's
+         * scheduler implements this using the GPU scheduler blocking on the
+         * bin fence completing.  (We don't use HW semaphores).
          */
         while (V3D_READ(V3D_CLE_0_CT0CA) !=
                V3D_READ(V3D_CLE_0_CT0EA)) {
                 v3d_hw_tick(v3d);
         }
 
+        v3d_invalidate_caches(v3d);
+
         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_state.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_state.c
--- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_state.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,10 +30,12 @@
 #include "util/u_memory.h"
 #include "util/u_half.h"
 #include "util/u_helpers.h"
+#include "util/u_upload_mgr.h"
 
 #include "v3d_context.h"
 #include "v3d_tiling.h"
 #include "broadcom/common/v3d_macros.h"
+#include "broadcom/compiler/v3d_compiler.h"
 #include "broadcom/cle/v3dx_pack.h"
 
 static void
@@ -77,7 +79,7 @@
 v3d_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
 {
         struct v3d_context *v3d = v3d_context(pctx);
-        v3d->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
+        v3d->sample_mask = sample_mask & ((1 << V3D_MAX_SAMPLES) - 1);
         v3d->dirty |= VC5_DIRTY_SAMPLE_STATE;
 }
 
@@ -130,7 +132,7 @@
         so->base = *cso;
 
         if (cso->independent_blend_enable) {
-                for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
+                for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                         so->blend_enables |= cso->rt[i].blend_enable << i;
 
                         /* V3D 4.x is when we got independent blend enables. */
@@ -139,7 +141,7 @@
                 }
         } else {
                 if (cso->rt[0].blend_enable)
-                        so->blend_enables = (1 << VC5_MAX_DRAW_BUFFERS) - 1;
+                        so->blend_enables = (1 << V3D_MAX_DRAW_BUFFERS) - 1;
         }
 
         return so;
@@ -404,12 +406,12 @@
         /* Set up the default attribute values in case any of the vertex
          * elements use them.
          */
-        so->default_attribute_values = v3d_bo_alloc(v3d->screen,
-                                                    VC5_MAX_ATTRIBUTES *
-                                                    4 * sizeof(float),
-                                                    "default_attributes");
-        uint32_t *attrs = v3d_bo_map(so->default_attribute_values);
-        for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) {
+        uint32_t *attrs;
+        u_upload_alloc(v3d->state_uploader, 0,
+                       V3D_MAX_VS_INPUTS * sizeof(float), 16,
+                       &so->defaults_offset, &so->defaults, (void **)&attrs);
+
+        for (int i = 0; i < V3D_MAX_VS_INPUTS / 4; i++) {
                 attrs[i * 4 + 0] = 0;
                 attrs[i * 4 + 1] = 0;
                 attrs[i * 4 + 2] = 0;
@@ -421,6 +423,7 @@
                 }
         }
 
+        u_upload_unmap(v3d->state_uploader);
         return so;
 }
 
@@ -429,7 +432,7 @@
 {
         struct v3d_vertex_stateobj *so = hwcso;
 
-        v3d_bo_unreference(&so->default_attribute_values);
+        pipe_resource_reference(&so->defaults, NULL);
         free(so);
 }
 
@@ -481,17 +484,17 @@
                 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
                 if (!cbuf)
                         continue;
+                struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);
 
                 const struct util_format_description *desc =
                         util_format_description(cbuf->format);
 
                 /* For BGRA8 formats (DRI window system default format), we
-                 * need to swap R and B, since the HW's format is RGBA8.
+                 * need to swap R and B, since the HW's format is RGBA8.  On
+                 * V3D 4.1+, the RCL can swap R and B on load/store.
                  */
-                if (desc->swizzle[0] == PIPE_SWIZZLE_Z &&
-                    cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) {
+                if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb)
                         v3d->swap_color_rb |= 1 << i;
-                }
 
                 if (desc->swizzle[3] == PIPE_SWIZZLE_1)
                         v3d->blend_dst_alpha_one |= 1 << i;
@@ -500,64 +503,34 @@
         v3d->dirty |= VC5_DIRTY_FRAMEBUFFER;
 }
 
-static struct v3d_texture_stateobj *
-v3d_get_stage_tex(struct v3d_context *v3d, enum pipe_shader_type shader)
-{
-        switch (shader) {
-        case PIPE_SHADER_FRAGMENT:
-                v3d->dirty |= VC5_DIRTY_FRAGTEX;
-                return &v3d->fragtex;
-                break;
-        case PIPE_SHADER_VERTEX:
-                v3d->dirty |= VC5_DIRTY_VERTTEX;
-                return &v3d->verttex;
-                break;
-        default:
-                fprintf(stderr, "Unknown shader target %d\n", shader);
-                abort();
-        }
-}
-
-static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
+static enum V3DX(Wrap_Mode)
+translate_wrap(uint32_t pipe_wrap, bool using_nearest)
 {
         switch (pipe_wrap) {
         case PIPE_TEX_WRAP_REPEAT:
-                return 0;
+                return V3D_WRAP_MODE_REPEAT;
         case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-                return 1;
+                return V3D_WRAP_MODE_CLAMP;
         case PIPE_TEX_WRAP_MIRROR_REPEAT:
-                return 2;
+                return V3D_WRAP_MODE_MIRROR;
         case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-                return 3;
+                return V3D_WRAP_MODE_BORDER;
         case PIPE_TEX_WRAP_CLAMP:
-                return (using_nearest ? 1 : 3);
+                return (using_nearest ?
+                        V3D_WRAP_MODE_CLAMP :
+                        V3D_WRAP_MODE_BORDER);
         default:
                 unreachable("Unknown wrap mode");
         }
 }
 
-
-static void *
-v3d_create_sampler_state(struct pipe_context *pctx,
-                         const struct pipe_sampler_state *cso)
-{
-        MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx);
-        struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state);
-
-        if (!so)
-                return NULL;
-
-        memcpy(so, cso, sizeof(*cso));
-
-        bool either_nearest =
-                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
-                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
 #if V3D_VERSION >= 40
-        so->bo = v3d_bo_alloc(v3d->screen, cl_packet_length(SAMPLER_STATE),
-                              "sampler");
-        void *map = v3d_bo_map(so->bo);
-
+static void
+v3d_upload_sampler_state_variant(void *map,
+                                 const struct pipe_sampler_state *cso,
+                                 enum v3d_sampler_state_variant variant,
+                                 bool either_nearest)
+{
         v3dx_pack(map, SAMPLER_STATE, sampler) {
                 sampler.wrap_i_border = false;
 
@@ -603,29 +576,190 @@
                                 sampler.maximum_anisotropy = 1;
                 }
 
-                sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
-                /* XXX: The border color field is in the TMU blending format
-                 * (32, f16, or i16), and we need to customize it based on
-                 * that.
-                 *
-                 * XXX: for compat alpha formats, we need the alpha field to
-                 * be in the red channel.
-                 */
-                sampler.border_color_red =
-                        util_float_to_half(cso->border_color.f[0]);
-                sampler.border_color_green =
-                        util_float_to_half(cso->border_color.f[1]);
-                sampler.border_color_blue =
-                        util_float_to_half(cso->border_color.f[2]);
-                sampler.border_color_alpha =
-                        util_float_to_half(cso->border_color.f[3]);
+                if (variant == V3D_SAMPLER_STATE_BORDER_0) {
+                        sampler.border_color_mode = V3D_BORDER_COLOR_0000;
+                } else {
+                        sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
+
+                        union pipe_color_union border;
+
+                        /* First, reswizzle the border color for any
+                         * mismatching we're doing between the texture's
+                         * channel order in hardware (R) versus what it is at
+                         * the GL level (ALPHA)
+                         */
+                        switch (variant) {
+                        case V3D_SAMPLER_STATE_F16_BGRA:
+                        case V3D_SAMPLER_STATE_F16_BGRA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_SNORM:
+                                border.i[0] = cso->border_color.i[2];
+                                border.i[1] = cso->border_color.i[1];
+                                border.i[2] = cso->border_color.i[0];
+                                border.i[3] = cso->border_color.i[3];
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_A:
+                        case V3D_SAMPLER_STATE_F16_A_UNORM:
+                        case V3D_SAMPLER_STATE_F16_A_SNORM:
+                        case V3D_SAMPLER_STATE_32_A:
+                        case V3D_SAMPLER_STATE_32_A_UNORM:
+                        case V3D_SAMPLER_STATE_32_A_SNORM:
+                                border.i[0] = cso->border_color.i[3];
+                                border.i[1] = 0;
+                                border.i[2] = 0;
+                                border.i[3] = 0;
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_LA:
+                        case V3D_SAMPLER_STATE_F16_LA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_SNORM:
+                                border.i[0] = cso->border_color.i[0];
+                                border.i[1] = cso->border_color.i[3];
+                                border.i[2] = 0;
+                                border.i[3] = 0;
+                                break;
+
+                        default:
+                                border = cso->border_color;
+                        }
+
+                        /* Perform any clamping. */
+                        switch (variant) {
+                        case V3D_SAMPLER_STATE_F16_UNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_UNORM:
+                        case V3D_SAMPLER_STATE_F16_A_UNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_UNORM:
+                        case V3D_SAMPLER_STATE_32_UNORM:
+                        case V3D_SAMPLER_STATE_32_A_UNORM:
+                                for (int i = 0; i < 4; i++)
+                                        border.f[i] = CLAMP(border.f[i], 0, 1);
+                                break;
+
+                        case V3D_SAMPLER_STATE_F16_SNORM:
+                        case V3D_SAMPLER_STATE_F16_BGRA_SNORM:
+                        case V3D_SAMPLER_STATE_F16_A_SNORM:
+                        case V3D_SAMPLER_STATE_F16_LA_SNORM:
+                        case V3D_SAMPLER_STATE_32_SNORM:
+                        case V3D_SAMPLER_STATE_32_A_SNORM:
+                                for (int i = 0; i < 4; i++)
+                                        border.f[i] = CLAMP(border.f[i], -1, 1);
+                                break;
+
+                        case V3D_SAMPLER_STATE_1010102U:
+                                border.ui[0] = CLAMP(border.ui[0],
+                                                     0, (1 << 10) - 1);
+                                border.ui[1] = CLAMP(border.ui[1],
+                                                     0, (1 << 10) - 1);
+                                border.ui[2] = CLAMP(border.ui[2],
+                                                     0, (1 << 10) - 1);
+                                border.ui[3] = CLAMP(border.ui[3],
+                                                     0, 3);
+                                break;
+
+                        case V3D_SAMPLER_STATE_16U:
+                                for (int i = 0; i < 4; i++)
+                                        border.ui[i] = CLAMP(border.ui[i],
+                                                             0, 0xffff);
+                                break;
+
+                        case V3D_SAMPLER_STATE_16I:
+                                for (int i = 0; i < 4; i++)
+                                        border.i[i] = CLAMP(border.i[i],
+                                                            -32768, 32767);
+                                break;
+
+                        case V3D_SAMPLER_STATE_8U:
+                                for (int i = 0; i < 4; i++)
+                                        border.ui[i] = CLAMP(border.ui[i],
+                                                             0, 0xff);
+                                break;
+
+                        case V3D_SAMPLER_STATE_8I:
+                                for (int i = 0; i < 4; i++)
+                                        border.i[i] = CLAMP(border.i[i],
+                                                            -128, 127);
+                                break;
+
+                        default:
+                                break;
+                        }
+
+                        if (variant >= V3D_SAMPLER_STATE_32) {
+                                sampler.border_color_word_0 = border.ui[0];
+                                sampler.border_color_word_1 = border.ui[1];
+                                sampler.border_color_word_2 = border.ui[2];
+                                sampler.border_color_word_3 = border.ui[3];
+                        } else {
+                                sampler.border_color_word_0 =
+                                        util_float_to_half(border.f[0]);
+                                sampler.border_color_word_1 =
+                                        util_float_to_half(border.f[1]);
+                                sampler.border_color_word_2 =
+                                        util_float_to_half(border.f[2]);
+                                sampler.border_color_word_3 =
+                                        util_float_to_half(border.f[3]);
+                        }
+                }
+        }
+}
+#endif
+
+static void *
+v3d_create_sampler_state(struct pipe_context *pctx,
+                         const struct pipe_sampler_state *cso)
+{
+        MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so, cso, sizeof(*cso));
+
+        bool either_nearest =
+                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+
+        enum V3DX(Wrap_Mode) wrap_s = translate_wrap(cso->wrap_s,
+                                                     either_nearest);
+        enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t,
+                                                     either_nearest);
+        enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r,
+                                                     either_nearest);
+
+        bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER ||
+                                  wrap_t == V3D_WRAP_MODE_BORDER ||
+                                  wrap_r == V3D_WRAP_MODE_BORDER);
+        so->border_color_variants = (uses_border_color &&
+                                     (cso->border_color.ui[0] != 0 ||
+                                      cso->border_color.ui[1] != 0 ||
+                                      cso->border_color.ui[2] != 0 ||
+                                      cso->border_color.ui[3] != 0));
+
+#if V3D_VERSION >= 40
+        void *map;
+        int sampler_align = so->border_color_variants ? 32 : 8;
+        int sampler_size = align(cl_packet_length(SAMPLER_STATE), sampler_align);
+        int num_variants = (so->border_color_variants ? ARRAY_SIZE(so->sampler_state_offset) : 1);
+        u_upload_alloc(v3d->state_uploader, 0,
+                       sampler_size * num_variants,
+                       sampler_align,
+                       &so->sampler_state_offset[0],
+                       &so->sampler_state,
+                       &map);
+
+        for (int i = 0; i < num_variants; i++) {
+                so->sampler_state_offset[i] =
+                        so->sampler_state_offset[0] + i * sampler_size;
+                v3d_upload_sampler_state_variant(map + i * sampler_size,
+                                                 cso, i, either_nearest);
         }
 
 #else /* V3D_VERSION < 40 */
         v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
-                p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest);
-                p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest);
-                p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest);
+                p0.s_wrap_mode = wrap_s;
+                p0.t_wrap_mode = wrap_t;
+                p0.r_wrap_mode = wrap_r;
         }
 
         v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
@@ -642,7 +776,7 @@
                         unsigned nr, void **hwcso)
 {
         struct v3d_context *v3d = v3d_context(pctx);
-        struct v3d_texture_stateobj *stage_tex = v3d_get_stage_tex(v3d, shader);
+        struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader];
 
         assert(start == 0);
         unsigned i;
@@ -668,7 +802,7 @@
         struct pipe_sampler_state *psampler = hwcso;
         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
 
-        v3d_bo_unreference(&sampler->bo);
+        pipe_resource_reference(&sampler->sampler_state, NULL);
         free(psampler);
 }
 
@@ -692,6 +826,69 @@
 }
 #endif
 
+static void
+v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex,
+                               struct pipe_resource *prsc,
+                               int base_level, int last_level,
+                               int first_layer, int last_layer)
+{
+        struct v3d_resource *rsc = v3d_resource(prsc);
+        int msaa_scale = prsc->nr_samples > 1 ? 2 : 1;
+
+        tex->image_width = prsc->width0 * msaa_scale;
+        tex->image_height = prsc->height0 * msaa_scale;
+
+#if V3D_VERSION >= 40
+        /* On 4.x, the height of a 1D texture is redefined to be the
+         * upper 14 bits of the width (which is only usable with txf).
+         */
+        if (prsc->target == PIPE_TEXTURE_1D ||
+            prsc->target == PIPE_TEXTURE_1D_ARRAY) {
+                tex->image_height = tex->image_width >> 14;
+        }
+#endif
+
+        if (prsc->target == PIPE_TEXTURE_3D) {
+                tex->image_depth = prsc->depth0;
+        } else {
+                tex->image_depth = (last_layer - first_layer) + 1;
+        }
+
+        tex->base_level = base_level;
+#if V3D_VERSION >= 40
+        tex->max_level = last_level;
+        /* Note that we don't have a job to reference the texture's sBO
+         * at state create time, so any time this sampler view is used
+         * we need to add the texture to the job.
+         */
+        tex->texture_base_pointer =
+                cl_address(NULL,
+                           rsc->bo->offset +
+                           v3d_layer_offset(prsc, 0, first_layer));
+#endif
+        tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
+
+        /* Since other platform devices may produce UIF images even
+         * when they're not big enough for V3D to assume they're UIF,
+         * we force images with level 0 as UIF to be always treated
+         * that way.
+         */
+        tex->level_0_is_strictly_uif =
+                (rsc->slices[0].tiling == VC5_TILING_UIF_XOR ||
+                 rsc->slices[0].tiling == VC5_TILING_UIF_NO_XOR);
+        tex->level_0_xor_enable = (rsc->slices[0].tiling == VC5_TILING_UIF_XOR);
+
+        if (tex->level_0_is_strictly_uif)
+                tex->level_0_ub_pad = rsc->slices[0].ub_pad;
+
+#if V3D_VERSION >= 40
+        if (tex->uif_xor_disable ||
+            tex->level_0_is_strictly_uif) {
+                tex->extended = true;
+        }
+#endif /* V3D_VERSION >= 40 */
+}
+
 static struct pipe_sampler_view *
 v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                         const struct pipe_sampler_view *cso)
@@ -726,61 +923,149 @@
         so->base.reference.count = 1;
         so->base.context = pctx;
 
-        int msaa_scale = prsc->nr_samples > 1 ? 2 : 1;
+        if (rsc->separate_stencil &&
+            cso->format == PIPE_FORMAT_X32_S8X24_UINT) {
+                rsc = rsc->separate_stencil;
+                prsc = &rsc->base;
+        }
+
+        /* If we're sampling depth from depth/stencil, demote the format to
+         * just depth.  u_format will end up giving the answers for the
+         * stencil channel, otherwise.
+         */
+        enum pipe_format sample_format = cso->format;
+        if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM)
+                sample_format = PIPE_FORMAT_X8Z24_UNORM;
 
 #if V3D_VERSION >= 40
-        so->bo = v3d_bo_alloc(v3d->screen,
-                              cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
-        void *map = v3d_bo_map(so->bo);
+        const struct util_format_description *desc =
+                util_format_description(sample_format);
 
-        v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
-#else /* V3D_VERSION < 40 */
-        STATIC_ASSERT(sizeof(so->texture_shader_state) >=
-                      cl_packet_length(TEXTURE_SHADER_STATE));
-        v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+        if (util_format_is_pure_integer(sample_format) &&
+            !util_format_has_depth(desc)) {
+                int chan = util_format_get_first_non_void_channel(sample_format);
+                if (util_format_is_pure_uint(sample_format)) {
+                        switch (desc->channel[chan].size) {
+                        case 32:
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                                break;
+                        case 16:
+                                so->sampler_variant = V3D_SAMPLER_STATE_16U;
+                                break;
+                        case 10:
+                                so->sampler_variant = V3D_SAMPLER_STATE_1010102U;
+                                break;
+                        case 8:
+                                so->sampler_variant = V3D_SAMPLER_STATE_8U;
+                                break;
+                        }
+                } else {
+                        switch (desc->channel[chan].size) {
+                        case 32:
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                                break;
+                        case 16:
+                                so->sampler_variant = V3D_SAMPLER_STATE_16I;
+                                break;
+                        case 8:
+                                so->sampler_variant = V3D_SAMPLER_STATE_8I;
+                                break;
+                        }
+                }
+        } else {
+                if (v3d_get_tex_return_size(&screen->devinfo, sample_format,
+                                           PIPE_TEX_COMPARE_NONE) == 32) {
+                        if (util_format_is_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_32_A;
+                        else
+                                so->sampler_variant = V3D_SAMPLER_STATE_32;
+                } else {
+                        if (util_format_is_luminance_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_LA;
+                        else if (util_format_is_alpha(sample_format))
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_A;
+                        else if (fmt_swizzle[0] == PIPE_SWIZZLE_Z)
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16_BGRA;
+                        else
+                                so->sampler_variant = V3D_SAMPLER_STATE_F16;
+
+                }
+
+                if (util_format_is_unorm(sample_format)) {
+                        so->sampler_variant += (V3D_SAMPLER_STATE_F16_UNORM -
+                                                V3D_SAMPLER_STATE_F16);
+                } else if (util_format_is_snorm(sample_format)){
+                        so->sampler_variant += (V3D_SAMPLER_STATE_F16_SNORM -
+                                                V3D_SAMPLER_STATE_F16);
+                }
+        }
 #endif
 
-                tex.image_width = prsc->width0 * msaa_scale;
-                tex.image_height = prsc->height0 * msaa_scale;
+        /* V3D still doesn't support sampling from raster textures, so we will
+         * have to copy to a temporary tiled texture.
+         */
+        if (!rsc->tiled && !(prsc->target == PIPE_TEXTURE_1D ||
+                             prsc->target == PIPE_TEXTURE_1D_ARRAY)) {
+                struct v3d_resource *shadow_parent = rsc;
+                struct pipe_resource tmpl = {
+                        .target = prsc->target,
+                        .format = prsc->format,
+                        .width0 = u_minify(prsc->width0,
+                                           cso->u.tex.first_level),
+                        .height0 = u_minify(prsc->height0,
+                                            cso->u.tex.first_level),
+                        .depth0 = 1,
+                        .array_size = 1,
+                        .bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
+                        .last_level = cso->u.tex.last_level - cso->u.tex.first_level,
+                        .nr_samples = prsc->nr_samples,
+                };
 
-#if V3D_VERSION >= 40
-                /* On 4.x, the height of a 1D texture is redefined to be the
-                 * upper 14 bits of the width (which is only usable with txf).
+                /* Create the shadow texture.  The rest of the sampler view
+                 * setup will use the shadow.
                  */
-                if (prsc->target == PIPE_TEXTURE_1D ||
-                    prsc->target == PIPE_TEXTURE_1D_ARRAY) {
-                        tex.image_height = tex.image_width >> 14;
+                prsc = v3d_resource_create(pctx->screen, &tmpl);
+                if (!prsc) {
+                        free(so);
+                        return NULL;
                 }
+                rsc = v3d_resource(prsc);
+
+                /* Flag it as needing update of the contents from the parent. */
+                rsc->writes = shadow_parent->writes - 1;
+                assert(rsc->tiled);
+
+                so->texture = prsc;
+        } else {
+                pipe_resource_reference(&so->texture, prsc);
+        }
+
+        void *map;
+#if V3D_VERSION >= 40
+        so->bo = v3d_bo_alloc(v3d->screen,
+                              cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
+        map = v3d_bo_map(so->bo);
+#else /* V3D_VERSION < 40 */
+        STATIC_ASSERT(sizeof(so->texture_shader_state) >=
+                      cl_packet_length(TEXTURE_SHADER_STATE));
+        map = &so->texture_shader_state;
 #endif
 
-                if (prsc->target == PIPE_TEXTURE_3D) {
-                        tex.image_depth = prsc->depth0;
-                } else {
-                        tex.image_depth = (cso->u.tex.last_layer -
-                                           cso->u.tex.first_layer) + 1;
-                }
+        v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
+                v3d_setup_texture_shader_state(&tex, prsc,
+                                               cso->u.tex.first_level,
+                                               cso->u.tex.last_level,
+                                               cso->u.tex.first_layer,
+                                               cso->u.tex.last_layer);
 
                 tex.srgb = util_format_is_srgb(cso->format);
 
-                tex.base_level = cso->u.tex.first_level;
 #if V3D_VERSION >= 40
-                tex.max_level = cso->u.tex.last_level;
-                /* Note that we don't have a job to reference the texture's sBO
-                 * at state create time, so any time this sampler view is used
-                 * we need to add the texture to the job.
-                 */
-                tex.texture_base_pointer = cl_address(NULL,
-                                                      rsc->bo->offset +
-                                                      rsc->slices[0].offset +
-                                                      cso->u.tex.first_layer *
-                                                      rsc->cube_map_stride),
-
                 tex.swizzle_r = translate_swizzle(so->swizzle[0]);
                 tex.swizzle_g = translate_swizzle(so->swizzle[1]);
                 tex.swizzle_b = translate_swizzle(so->swizzle[2]);
                 tex.swizzle_a = translate_swizzle(so->swizzle[3]);
 #endif
-                tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
 
                 if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
                         /* Using texture views to reinterpret formats on our
@@ -829,28 +1114,6 @@
                         tex.texture_type = v3d_get_tex_format(&screen->devinfo,
                                                               cso->format);
                 }
-
-                /* Since other platform devices may produce UIF images even
-                 * when they're not big enough for V3D to assume they're UIF,
-                 * we force images with level 0 as UIF to be always treated
-                 * that way.
-                 */
-                tex.level_0_is_strictly_uif = (rsc->slices[0].tiling ==
-                                               VC5_TILING_UIF_XOR ||
-                                               rsc->slices[0].tiling ==
-                                               VC5_TILING_UIF_NO_XOR);
-                tex.level_0_xor_enable = (rsc->slices[0].tiling ==
-                                          VC5_TILING_UIF_XOR);
-
-                if (tex.level_0_is_strictly_uif)
-                        tex.level_0_ub_pad = rsc->slices[0].ub_pad;
-
-#if V3D_VERSION >= 40
-                if (tex.uif_xor_disable ||
-                    tex.level_0_is_strictly_uif) {
-                        tex.extended = true;
-                }
-#endif /* V3D_VERSION >= 40 */
         };
 
         return &so->base;
@@ -864,6 +1127,7 @@
 
         v3d_bo_unreference(&sview->bo);
         pipe_resource_reference(&psview->texture, NULL);
+        pipe_resource_reference(&sview->texture, NULL);
         free(psview);
 }
 
@@ -874,7 +1138,7 @@
                       struct pipe_sampler_view **views)
 {
         struct v3d_context *v3d = v3d_context(pctx);
-        struct v3d_texture_stateobj *stage_tex = v3d_get_stage_tex(v3d, shader);
+        struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader];
         unsigned i;
         unsigned new_nr = 0;
 
@@ -950,6 +1214,144 @@
         ctx->dirty |= VC5_DIRTY_STREAMOUT;
 }
 
+static void
+v3d_set_shader_buffers(struct pipe_context *pctx,
+                       enum pipe_shader_type shader,
+                       unsigned start, unsigned count,
+                       const struct pipe_shader_buffer *buffers)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_ssbo_stateobj *so = &v3d->ssbo[shader];
+        unsigned mask = 0;
+
+        if (buffers) {
+                for (unsigned i = 0; i < count; i++) {
+                        unsigned n = i + start;
+                        struct pipe_shader_buffer *buf = &so->sb[n];
+
+                        if ((buf->buffer == buffers[i].buffer) &&
+                            (buf->buffer_offset == buffers[i].buffer_offset) &&
+                            (buf->buffer_size == buffers[i].buffer_size))
+                                continue;
+
+                        mask |= 1 << n;
+
+                        buf->buffer_offset = buffers[i].buffer_offset;
+                        buf->buffer_size = buffers[i].buffer_size;
+                        pipe_resource_reference(&buf->buffer, buffers[i].buffer);
+
+                        if (buf->buffer)
+                                so->enabled_mask |= 1 << n;
+                        else
+                                so->enabled_mask &= ~(1 << n);
+                }
+        } else {
+                mask = ((1 << count) - 1) << start;
+
+                for (unsigned i = 0; i < count; i++) {
+                        unsigned n = i + start;
+                        struct pipe_shader_buffer *buf = &so->sb[n];
+
+                        pipe_resource_reference(&buf->buffer, NULL);
+                }
+
+                so->enabled_mask &= ~mask;
+        }
+
+        v3d->dirty |= VC5_DIRTY_SSBO;
+}
+
+static void
+v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
+                                           struct v3d_shaderimg_stateobj *so,
+                                           int img)
+{
+#if V3D_VERSION >= 40
+        struct v3d_image_view *iview = &so->si[img];
+
+        void *map;
+        u_upload_alloc(v3d->uploader, 0, cl_packet_length(TEXTURE_SHADER_STATE),
+                       32,
+                       &iview->tex_state_offset,
+                       &iview->tex_state,
+                       &map);
+
+        struct pipe_resource *prsc = iview->base.resource;
+
+        v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
+                v3d_setup_texture_shader_state(&tex, prsc,
+                                               iview->base.u.tex.level,
+                                               iview->base.u.tex.level,
+                                               iview->base.u.tex.first_layer,
+                                               iview->base.u.tex.last_layer);
+
+                tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+                tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+                tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+                tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+
+                tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo,
+                                                      iview->base.format);
+        };
+#else /* V3D_VERSION < 40 */
+        /* V3D 3.x doesn't use support shader image load/store operations on
+         * textures, so it would get lowered in the shader to general memory
+         * acceses.
+         */
+#endif
+}
+
+static void
+v3d_set_shader_images(struct pipe_context *pctx,
+                      enum pipe_shader_type shader,
+                      unsigned start, unsigned count,
+                      const struct pipe_image_view *images)
+{
+        struct v3d_context *v3d = v3d_context(pctx);
+        struct v3d_shaderimg_stateobj *so = &v3d->shaderimg[shader];
+
+        if (images) {
+                for (unsigned i = 0; i < count; i++) {
+                        unsigned n = i + start;
+                        struct v3d_image_view *iview = &so->si[n];
+
+                        if ((iview->base.resource == images[i].resource) &&
+                            (iview->base.format == images[i].format) &&
+                            (iview->base.access == images[i].access) &&
+                            !memcmp(&iview->base.u, &images[i].u,
+                                    sizeof(iview->base.u)))
+                                continue;
+
+                        util_copy_image_view(&iview->base, &images[i]);
+
+                        if (iview->base.resource) {
+                                so->enabled_mask |= 1 << n;
+                                v3d_create_image_view_texture_shader_state(v3d,
+                                                                           so,
+                                                                           n);
+                        } else {
+                                so->enabled_mask &= ~(1 << n);
+                                pipe_resource_reference(&iview->tex_state, NULL);
+                        }
+                }
+        } else {
+                for (unsigned i = 0; i < count; i++) {
+                        unsigned n = i + start;
+                        struct v3d_image_view *iview = &so->si[n];
+
+                        pipe_resource_reference(&iview->base.resource, NULL);
+                        pipe_resource_reference(&iview->tex_state, NULL);
+                }
+
+                if (count == 32)
+                        so->enabled_mask = 0;
+                else
+                        so->enabled_mask &= ~(((1 << count) - 1) << start);
+        }
+
+        v3d->dirty |= VC5_DIRTY_SHADER_IMAGE;
+}
+
 void
 v3dX(state_init)(struct pipe_context *pctx)
 {
@@ -989,6 +1391,9 @@
         pctx->sampler_view_destroy = v3d_sampler_view_destroy;
         pctx->set_sampler_views = v3d_set_sampler_views;
 
+        pctx->set_shader_buffers = v3d_set_shader_buffers;
+        pctx->set_shader_images = v3d_set_shader_images;
+
         pctx->create_stream_output_target = v3d_create_stream_output_target;
         pctx->stream_output_target_destroy = v3d_stream_output_target_destroy;
         pctx->set_stream_output_targets = v3d_set_stream_output_targets;
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.c	2018-02-27 16:44:19.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.c	2019-03-31 23:16:37.000000000 +0000
@@ -386,7 +386,6 @@
 
 static struct vc4_bo *
 vc4_bo_open_handle(struct vc4_screen *screen,
-                   uint32_t winsys_stride,
                    uint32_t handle, uint32_t size)
 {
         struct vc4_bo *bo;
@@ -410,8 +409,7 @@
         bo->private = false;
 
 #ifdef USE_VC4_SIMULATOR
-        vc4_simulator_open_from_handle(screen->fd, winsys_stride,
-                                       bo->handle, bo->size);
+        vc4_simulator_open_from_handle(screen->fd, bo->handle, bo->size);
         bo->map = malloc(bo->size);
 #endif
 
@@ -423,8 +421,7 @@
 }
 
 struct vc4_bo *
-vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
-                 uint32_t winsys_stride)
+vc4_bo_open_name(struct vc4_screen *screen, uint32_t name)
 {
         struct drm_gem_open o = {
                 .name = name
@@ -436,11 +433,11 @@
                 return NULL;
         }
 
-        return vc4_bo_open_handle(screen, winsys_stride, o.handle, o.size);
+        return vc4_bo_open_handle(screen, o.handle, o.size);
 }
 
 struct vc4_bo *
-vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, uint32_t winsys_stride)
+vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd)
 {
         uint32_t handle;
         int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
@@ -457,7 +454,7 @@
                 return NULL;
         }
 
-        return vc4_bo_open_handle(screen, winsys_stride, handle, size);
+        return vc4_bo_open_handle(screen, handle, size);
 }
 
 int
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.h
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.h	2018-02-27 16:44:19.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.h	2019-03-31 23:16:37.000000000 +0000
@@ -66,10 +66,8 @@
                                    uint32_t size);
 void vc4_bo_last_unreference(struct vc4_bo *bo);
 void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
-struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
-                                uint32_t winsys_stride);
-struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd,
-                                  uint32_t winsys_stride);
+struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name);
+struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd);
 bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name);
 int vc4_bo_get_dmabuf(struct vc4_bo *bo);
 
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -85,6 +85,18 @@
 }
 
 static void
+vc4_set_debug_callback(struct pipe_context *pctx,
+                       const struct pipe_debug_callback *cb)
+{
+        struct vc4_context *vc4 = vc4_context(pctx);
+
+        if (cb)
+                vc4->debug = *cb;
+        else
+                memset(&vc4->debug, 0, sizeof(vc4->debug));
+}
+
+static void
 vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
@@ -164,6 +176,7 @@
         pctx->priv = priv;
         pctx->destroy = vc4_context_destroy;
         pctx->flush = vc4_pipe_flush;
+        pctx->set_debug_callback = vc4_set_debug_callback;
         pctx->invalidate_resource = vc4_invalidate_resource;
         pctx->texture_barrier = vc4_texture_barrier;
 
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.h
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -405,6 +405,7 @@
         struct pipe_viewport_state viewport;
         struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
         struct vc4_vertexbuf_stateobj vertexbuf;
+        struct pipe_debug_callback debug;
 
         struct vc4_hwperfmon *perfmon;
         /** @} */
@@ -451,6 +452,8 @@
 #define perf_debug(...) do {                            \
         if (unlikely(vc4_debug & VC4_DEBUG_PERF))       \
                 fprintf(stderr, __VA_ARGS__);           \
+        if (unlikely(vc4->debug.debug_message))         \
+                pipe_debug_message(&vc4->debug, PERF_INFO, __VA_ARGS__);    \
 } while (0)
 
 static inline struct vc4_context *
@@ -486,12 +489,8 @@
 void vc4_query_init(struct pipe_context *pctx);
 void vc4_simulator_init(struct vc4_screen *screen);
 void vc4_simulator_destroy(struct vc4_screen *screen);
-int vc4_simulator_flush(struct vc4_context *vc4,
-                        struct drm_vc4_submit_cl *args,
-                        struct vc4_job *job);
 int vc4_simulator_ioctl(int fd, unsigned long request, void *arg);
-void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride,
-                                    int handle, uint32_t size);
+void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size);
 
 static inline int
 vc4_ioctl(int fd, unsigned long request, void *arg)
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_job.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_job.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_job.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_job.c	2019-03-31 23:16:37.000000000 +0000
@@ -492,11 +492,7 @@
         if (!(vc4_debug & VC4_DEBUG_NORAST)) {
                 int ret;
 
-#ifndef USE_VC4_SIMULATOR
-                ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
-#else
-                ret = vc4_simulator_flush(vc4, &submit, job);
-#endif
+                ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
                 static bool warned = false;
                 if (ret && !warned) {
                         fprintf(stderr, "Draw call returned %s.  "
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_blend.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_blend.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_blend.c	2019-03-31 23:16:37.000000000 +0000
@@ -42,6 +42,7 @@
 #include "util/u_format.h"
 #include "vc4_qir.h"
 #include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
 #include "vc4_context.h"
 
 static bool
@@ -67,37 +68,6 @@
         return &load->dest.ssa;
 }
 
-static  nir_ssa_def *
-vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
-{
-        nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
-        nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
-        nir_ssa_def *high = nir_fpow(b,
-                                     nir_fmul(b,
-                                              nir_fadd(b, srgb,
-                                                       nir_imm_float(b, 0.055)),
-                                              nir_imm_float(b, 1.0 / 1.055)),
-                                     nir_imm_float(b, 2.4));
-
-        return nir_bcsel(b, is_low, low, high);
-}
-
-static  nir_ssa_def *
-vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
-{
-        nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
-        nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
-        nir_ssa_def *high = nir_fsub(b,
-                                     nir_fmul(b,
-                                              nir_imm_float(b, 1.055),
-                                              nir_fpow(b,
-                                                       linear,
-                                                       nir_imm_float(b, 0.41666))),
-                                     nir_imm_float(b, 0.055));
-
-        return nir_bcsel(b, is_low, low, high);
-}
-
 static nir_ssa_def *
 vc4_blend_channel_f(nir_builder *b,
                     nir_ssa_def **src,
@@ -130,7 +100,7 @@
                 return nir_load_system_value(b,
                                              nir_intrinsic_load_blend_const_color_r_float +
                                              channel,
-                                             0);
+                                             0, 32);
         case PIPE_BLENDFACTOR_CONST_ALPHA:
                 return nir_load_blend_const_color_a_float(b);
         case PIPE_BLENDFACTOR_ZERO:
@@ -148,7 +118,7 @@
                                 nir_load_system_value(b,
                                                       nir_intrinsic_load_blend_const_color_r_float +
                                                       channel,
-                                                      0));
+                                                      0, 32));
         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                 return nir_fsub(b, nir_imm_float(b, 1.0),
                                 nir_load_blend_const_color_a_float(b));
@@ -501,14 +471,14 @@
 
                 /* Turn dst color to linear. */
                 for (int i = 0; i < 3; i++)
-                        dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+                        dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
 
                 nir_ssa_def *blend_color[4];
                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
 
                 /* sRGB encode the output color */
                 for (int i = 0; i < 3; i++)
-                        blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+                        blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
 
                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
         } else {
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_io.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_io.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_io.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_io.c	2019-03-31 23:16:37.000000000 +0000
@@ -330,7 +330,8 @@
                 nir_intrinsic_instr *intr_comp =
                         nir_intrinsic_instr_create(c->s, intr->intrinsic);
                 intr_comp->num_components = 1;
-                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
+                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1,
+                                  intr->dest.ssa.bit_size, NULL);
 
                 /* Convert the uniform offset to bytes.  If it happens
                  * to be a constant, constant-folding will clean up
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_program.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_program.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -1004,24 +1004,24 @@
         enum qpu_cond cond;
 
         switch (compare_instr->op) {
-        case nir_op_feq:
-        case nir_op_ieq:
+        case nir_op_feq32:
+        case nir_op_ieq32:
         case nir_op_seq:
                 cond = QPU_COND_ZS;
                 break;
-        case nir_op_fne:
-        case nir_op_ine:
+        case nir_op_fne32:
+        case nir_op_ine32:
         case nir_op_sne:
                 cond = QPU_COND_ZC;
                 break;
-        case nir_op_fge:
-        case nir_op_ige:
-        case nir_op_uge:
+        case nir_op_fge32:
+        case nir_op_ige32:
+        case nir_op_uge32:
         case nir_op_sge:
                 cond = QPU_COND_NC;
                 break;
-        case nir_op_flt:
-        case nir_op_ilt:
+        case nir_op_flt32:
+        case nir_op_ilt32:
         case nir_op_slt:
                 cond = QPU_COND_NS;
                 break;
@@ -1048,7 +1048,7 @@
                                 qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0));
                 break;
 
-        case nir_op_bcsel:
+        case nir_op_b32csel:
                 *dest = qir_SEL(c, cond,
                                 ntq_get_alu_src(c, sel_instr, 1),
                                 ntq_get_alu_src(c, sel_instr, 2));
@@ -1208,14 +1208,14 @@
         case nir_op_u2f32:
                 result = qir_ITOF(c, src[0]);
                 break;
-        case nir_op_b2f:
+        case nir_op_b2f32:
                 result = qir_AND(c, src[0], qir_uniform_f(c, 1.0));
                 break;
-        case nir_op_b2i:
+        case nir_op_b2i32:
                 result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
                 break;
-        case nir_op_i2b:
-        case nir_op_f2b:
+        case nir_op_i2b32:
+        case nir_op_f2b32:
                 qir_SF(c, src[0]);
                 result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
                                             qir_uniform_ui(c, ~0),
@@ -1264,21 +1264,21 @@
         case nir_op_sne:
         case nir_op_sge:
         case nir_op_slt:
-        case nir_op_feq:
-        case nir_op_fne:
-        case nir_op_fge:
-        case nir_op_flt:
-        case nir_op_ieq:
-        case nir_op_ine:
-        case nir_op_ige:
-        case nir_op_uge:
-        case nir_op_ilt:
+        case nir_op_feq32:
+        case nir_op_fne32:
+        case nir_op_fge32:
+        case nir_op_flt32:
+        case nir_op_ieq32:
+        case nir_op_ine32:
+        case nir_op_ige32:
+        case nir_op_uge32:
+        case nir_op_ilt32:
                 if (!ntq_emit_comparison(c, &result, instr, instr)) {
                         fprintf(stderr, "Bad comparison instruction\n");
                 }
                 break;
 
-        case nir_op_bcsel:
+        case nir_op_b32csel:
                 result = ntq_emit_bcsel(c, instr, src);
                 break;
         case nir_op_fcsel:
@@ -1591,14 +1591,14 @@
                 NIR_PASS(progress, s, nir_opt_dce);
                 NIR_PASS(progress, s, nir_opt_dead_cf);
                 NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
                 NIR_PASS(progress, s, nir_opt_algebraic);
                 NIR_PASS(progress, s, nir_opt_constant_folding);
                 NIR_PASS(progress, s, nir_opt_undef);
                 NIR_PASS(progress, s, nir_opt_loop_unroll,
                          nir_var_shader_in |
                          nir_var_shader_out |
-                         nir_var_local);
+                         nir_var_function_temp);
         } while (progress);
 }
 
@@ -2363,7 +2363,8 @@
                 if (stage == QSTAGE_FRAG) {
                         NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
                 } else {
-                        NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
+                        NIR_PASS_V(c->s, nir_lower_clip_vs,
+				   c->key->ucp_enables, false);
                         NIR_PASS_V(c->s, nir_lower_io_to_scalar,
                                    nir_var_shader_out);
                 }
@@ -2384,6 +2385,8 @@
 
         vc4_optimize_nir(c->s);
 
+        NIR_PASS_V(c->s, nir_lower_bool_to_int32);
+
         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
         if (vc4_debug & VC4_DEBUG_SHADERDB) {
@@ -2514,7 +2517,7 @@
 
         vc4_optimize_nir(s);
 
-        NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local);
+        NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
 
         /* Garbage collect dead instructions */
         nir_sweep(s);
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_query.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_query.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_query.c	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -132,7 +132,7 @@
 
         /* We can't mix HW and non-HW queries. */
         if (nhwqueries && nhwqueries != num_queries)
-                return NULL;
+                goto err_free_query;
 
         if (!nhwqueries)
                 return (struct pipe_query *)query;
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_resource.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_resource.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -319,8 +319,10 @@
 
                 return vc4_bo_flink(rsc->bo, &whandle->handle);
         case WINSYS_HANDLE_TYPE_KMS:
-                if (screen->ro && renderonly_get_handle(rsc->scanout, whandle))
-                        return TRUE;
+                if (screen->ro) {
+                        assert(rsc->scanout);
+                        return renderonly_get_handle(rsc->scanout, whandle);
+                }
                 whandle->handle = rsc->bo->handle;
                 return TRUE;
         case WINSYS_HANDLE_TYPE_FD:
@@ -622,12 +624,10 @@
 
         switch (whandle->type) {
         case WINSYS_HANDLE_TYPE_SHARED:
-                rsc->bo = vc4_bo_open_name(screen,
-                                           whandle->handle, whandle->stride);
+                rsc->bo = vc4_bo_open_name(screen, whandle->handle);
                 break;
         case WINSYS_HANDLE_TYPE_FD:
-                rsc->bo = vc4_bo_open_dmabuf(screen,
-                                             whandle->handle, whandle->stride);
+                rsc->bo = vc4_bo_open_dmabuf(screen, whandle->handle);
                 break;
         default:
                 fprintf(stderr,
@@ -1013,6 +1013,7 @@
 vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
                                     struct pipe_sampler_view *pview)
 {
+        struct vc4_context *vc4 = vc4_context(pctx);
         struct vc4_sampler_view *view = vc4_sampler_view(pview);
         struct vc4_resource *shadow = vc4_resource(view->texture);
         struct vc4_resource *orig = vc4_resource(pview->texture);
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_screen.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_screen.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_screen.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -178,6 +178,9 @@
                 /* Note: Not supported in hardware, just faking it. */
                 return 5;
 
+        case PIPE_CAP_MAX_VARYINGS:
+                return 8;
+
         case PIPE_CAP_VENDOR_ID:
                 return 0x14E4;
         case PIPE_CAP_ACCELERATED:
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator.c	2018-03-13 20:41:43.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator.c	2019-03-31 23:16:37.000000000 +0000
@@ -99,10 +99,13 @@
 
         /** Area for this BO within sim_state->mem */
         struct mem_block *block;
-        void *winsys_map;
-        uint32_t winsys_stride;
 
         int handle;
+
+        /* Mapping of the underlying GEM object that we copy in/out of
+         * simulator memory.
+         */
+        void *gem_vaddr;
 };
 
 static void *
@@ -143,6 +146,7 @@
         sim_bo->file = file;
         sim_bo->handle = handle;
 
+        /* Allocate space for the buffer in simulator memory. */
         mtx_lock(&sim_state.mutex);
         sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, PAGE_ALIGN2, 0);
         mtx_unlock(&sim_state.mutex);
@@ -162,6 +166,25 @@
                 mtx_lock(&sim_state.mutex);
                 _mesa_hash_table_insert(file->bo_map, int_to_key(handle), bo);
                 mtx_unlock(&sim_state.mutex);
+
+                /* Map the GEM buffer for copy in/out to the simulator. */
+                struct drm_mode_map_dumb map = {
+                        .handle = handle,
+                };
+                int ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
+                if (ret) {
+                        fprintf(stderr, "Failed to get MMAP offset: %d\n",
+                                errno);
+                        abort();
+                }
+                sim_bo->gem_vaddr = mmap(NULL, obj->base.size,
+                                         PROT_READ | PROT_WRITE, MAP_SHARED,
+                                         fd, map.offset);
+                if (sim_bo->gem_vaddr == MAP_FAILED) {
+                        fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
+                                handle, (long long)map.offset, (int)obj->base.size);
+                        abort();
+                }
         }
 
         return sim_bo;
@@ -174,16 +197,19 @@
         struct drm_vc4_bo *bo = &sim_bo->base;
         struct drm_gem_cma_object *obj = &bo->base;
 
-        if (sim_bo->winsys_map)
-                munmap(sim_bo->winsys_map, obj->base.size);
+        if (bo->validated_shader) {
+                free(bo->validated_shader->texture_samples);
+                free(bo->validated_shader);
+        }
+
+        if (sim_bo->gem_vaddr)
+                munmap(sim_bo->gem_vaddr, obj->base.size);
 
         mtx_lock(&sim_state.mutex);
         u_mmFreeMem(sim_bo->block);
         if (sim_bo->handle) {
-                struct hash_entry *entry =
-                        _mesa_hash_table_search(sim_file->bo_map,
-                                                int_to_key(sim_bo->handle));
-                _mesa_hash_table_remove(sim_file->bo_map, entry);
+                _mesa_hash_table_remove_key(sim_file->bo_map,
+                                            int_to_key(sim_bo->handle));
         }
         mtx_unlock(&sim_state.mutex);
         ralloc_free(sim_bo);
@@ -210,41 +236,23 @@
 }
 
 static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job,
+vc4_simulator_pin_bos(struct vc4_simulator_file *file,
                       struct vc4_exec_info *exec)
 {
-        int fd = dev->screen->fd;
-        struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd);
         struct drm_vc4_submit_cl *args = exec->args;
-        struct vc4_bo **bos = job->bo_pointers.base;
+        uint32_t *bo_handles = (uint32_t *)(uintptr_t)args->bo_handles;
 
         exec->bo_count = args->bo_handle_count;
         exec->bo = calloc(exec->bo_count, sizeof(void *));
         for (int i = 0; i < exec->bo_count; i++) {
-                struct vc4_bo *bo = bos[i];
                 struct vc4_simulator_bo *sim_bo =
-                        vc4_get_simulator_bo(file, bo->handle);
+                        vc4_get_simulator_bo(file, bo_handles[i]);
                 struct drm_vc4_bo *drm_bo = &sim_bo->base;
                 struct drm_gem_cma_object *obj = &drm_bo->base;
 
-                drm_bo->bo = bo;
-#if 0
-                fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
-#endif
-
-                vc4_bo_map(bo);
-                memcpy(obj->vaddr, bo->map, bo->size);
+                memcpy(obj->vaddr, sim_bo->gem_vaddr, obj->base.size);
 
                 exec->bo[i] = obj;
-
-                /* The kernel does this validation at shader create ioctl
-                 * time.
-                 */
-                if (strcmp(bo->name, "code") == 0) {
-                        drm_bo->validated_shader = vc4_validate_shader(obj);
-                        if (!drm_bo->validated_shader)
-                                abort();
-                }
         }
         return 0;
 }
@@ -255,16 +263,13 @@
         for (int i = 0; i < exec->bo_count; i++) {
                 struct drm_gem_cma_object *obj = exec->bo[i];
                 struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
-                struct vc4_bo *bo = drm_bo->bo;
+                struct vc4_simulator_bo *sim_bo =
+                        (struct vc4_simulator_bo *)drm_bo;
 
                 assert(*(uint32_t *)(obj->vaddr +
                                      obj->base.size) == BO_SENTINEL);
-                memcpy(bo->map, obj->vaddr, bo->size);
-
-                if (drm_bo->validated_shader) {
-                        free(drm_bo->validated_shader->texture_samples);
-                        free(drm_bo->validated_shader);
-                }
+                if (sim_bo->gem_vaddr)
+                        memcpy(sim_bo->gem_vaddr, obj->vaddr, obj->base.size);
         }
 
         free(exec->bo);
@@ -359,19 +364,10 @@
         fclose(f);
 }
 
-int
-vc4_simulator_flush(struct vc4_context *vc4,
-                    struct drm_vc4_submit_cl *args, struct vc4_job *job)
+static int
+vc4_simulator_submit_cl_ioctl(int fd, struct drm_vc4_submit_cl *args)
 {
-        struct vc4_screen *screen = vc4->screen;
-        int fd = screen->fd;
         struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd);
-        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
-        struct vc4_simulator_bo *csim_bo = ctex ? vc4_get_simulator_bo(file, ctex->bo->handle) : NULL;
-        uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0;
-        uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
-        uint32_t row_len = MIN2(sim_stride, winsys_stride);
         struct vc4_exec_info exec;
         struct drm_device *dev = &file->dev;
         int ret;
@@ -379,25 +375,9 @@
         memset(&exec, 0, sizeof(exec));
         list_inithead(&exec.unref_list);
 
-        if (ctex && csim_bo->winsys_map) {
-#if 0
-                fprintf(stderr, "%dx%d %d %d %d\n",
-                        ctex->base.b.width0, ctex->base.b.height0,
-                        winsys_stride,
-                        sim_stride,
-                        ctex->bo->size);
-#endif
-
-                for (int y = 0; y < ctex->base.height0; y++) {
-                        memcpy(ctex->bo->map + y * sim_stride,
-                               csim_bo->winsys_map + y * winsys_stride,
-                               row_len);
-                }
-        }
-
         exec.args = args;
 
-        ret = vc4_simulator_pin_bos(dev, job, &exec);
+        ret = vc4_simulator_pin_bos(file, &exec);
         if (ret)
                 return ret;
 
@@ -448,65 +428,19 @@
                 vc4_free_simulator_bo(sim_bo);
         }
 
-        if (ctex && csim_bo->winsys_map) {
-                for (int y = 0; y < ctex->base.height0; y++) {
-                        memcpy(csim_bo->winsys_map + y * winsys_stride,
-                               ctex->bo->map + y * sim_stride,
-                               row_len);
-                }
-        }
-
         return 0;
 }
 
 /**
- * Map the underlying GEM object from the real hardware GEM handle.
- */
-static void *
-vc4_simulator_map_winsys_bo(int fd, struct vc4_simulator_bo *sim_bo)
-{
-        struct drm_vc4_bo *bo = &sim_bo->base;
-        struct drm_gem_cma_object *obj = &bo->base;
-        int ret;
-        void *map;
-
-        struct drm_mode_map_dumb map_dumb = {
-                .handle = sim_bo->handle,
-        };
-        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb);
-        if (ret != 0) {
-                fprintf(stderr, "map ioctl failure\n");
-                abort();
-        }
-
-        map = mmap(NULL, obj->base.size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                   fd, map_dumb.offset);
-        if (map == MAP_FAILED) {
-                fprintf(stderr,
-                        "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
-                        sim_bo->handle, (long long)map_dumb.offset,
-                        (int)obj->base.size);
-                abort();
-        }
-
-        return map;
-}
-
-/**
  * Do fixups after a BO has been opened from a handle.
  *
  * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
  * time, but we're still using drmPrimeFDToHandle() so we have this helper to
  * be called afterward instead.
  */
-void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride,
-                                    int handle, uint32_t size)
+void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size)
 {
-        struct vc4_simulator_bo *sim_bo =
-                vc4_create_simulator_bo(fd, handle, size);
-
-        sim_bo->winsys_stride = winsys_stride;
-        sim_bo->winsys_map = vc4_simulator_map_winsys_bo(fd, sim_bo);
+        vc4_create_simulator_bo(fd, handle, size);
 }
 
 /**
@@ -558,19 +492,22 @@
 
         args->handle = create.handle;
 
-        vc4_create_simulator_bo(fd, create.handle, args->size);
+        struct vc4_simulator_bo *sim_bo =
+                vc4_create_simulator_bo(fd, create.handle, args->size);
+        struct drm_vc4_bo *drm_bo = &sim_bo->base;
+        struct drm_gem_cma_object *obj = &drm_bo->base;
 
-        struct drm_mode_map_dumb map = {
-                .handle = create.handle
-        };
-        ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map);
-        if (ret)
-                return ret;
+        /* Copy into the simulator's BO for validation. */
+        memcpy(obj->vaddr, (void *)(uintptr_t)args->data, args->size);
+
+        /* Copy into the GEM BO to prevent the simulator_pin_bos() from
+         * smashing it.
+         */
+        memcpy(sim_bo->gem_vaddr, (void *)(uintptr_t)args->data, args->size);
 
-        void *shader = mmap(NULL, args->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                            fd, map.offset);
-        memcpy(shader, (void *)(uintptr_t)args->data, args->size);
-        munmap(shader, args->size);
+        drm_bo->validated_shader = vc4_validate_shader(obj);
+        if (!drm_bo->validated_shader)
+                return -EINVAL;
 
         return 0;
 }
@@ -643,6 +580,8 @@
 vc4_simulator_ioctl(int fd, unsigned long request, void *args)
 {
         switch (request) {
+        case DRM_IOCTL_VC4_SUBMIT_CL:
+                return vc4_simulator_submit_cl_ioctl(fd, args);
         case DRM_IOCTL_VC4_CREATE_BO:
                 return vc4_simulator_create_bo_ioctl(fd, args);
         case DRM_IOCTL_VC4_CREATE_SHADER_BO:
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator_validate.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator_validate.h
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator_validate.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator_validate.h	2019-03-31 23:16:37.000000000 +0000
@@ -94,7 +94,6 @@
 
 struct drm_vc4_bo {
         struct drm_gem_cma_object base;
-        struct vc4_bo *bo;
         struct vc4_validated_shader_info *validated_shader;
         struct list_head unref_head;
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt.c	2019-03-31 23:16:37.000000000 +0000
@@ -26,7 +26,7 @@
  * Helper functions from vc4_tiling.c that will be compiled for using NEON
  * assembly or not.
  *
- * If VC4_BUILD_NEON is set, then the functions will be suffixed with _neon.
+ * If V3D_BUILD_NEON is set, then the functions will be suffixed with _neon.
  * They will only use NEON assembly if __ARM_ARCH is also set, to keep the x86
  * sim build working.
  */
@@ -34,8 +34,9 @@
 #include <string.h>
 #include "pipe/p_state.h"
 #include "vc4_tiling.h"
+#include "broadcom/common/v3d_cpu_tiling.h"
 
-#ifdef VC4_BUILD_NEON
+#ifdef V3D_BUILD_NEON
 #define NEON_TAG(x) x ## _neon
 #else
 #define NEON_TAG(x) x ## _base
@@ -63,217 +64,6 @@
         }
 }
 
-static void
-vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
-{
-        uint32_t gpu_stride = vc4_utile_stride(cpp);
-#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
-        if (gpu_stride == 8) {
-                __asm__ volatile (
-                        /* Load from the GPU in one shot, no interleave, to
-                         * d0-d7.
-                         */
-                        "vldm %[gpu], {q0, q1, q2, q3}\n"
-                        /* Store each 8-byte line to cpu-side destination,
-                         * incrementing it by the stride each time.
-                         */
-                        "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d1, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d3, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d5, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d6, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d7, [%[cpu]]\n"
-                        : [cpu]         "+r"(cpu)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "q0", "q1", "q2", "q3");
-        } else {
-                assert(gpu_stride == 16);
-                void *cpu2 = cpu + 8;
-                __asm__ volatile (
-                        /* Load from the GPU in one shot, no interleave, to
-                         * d0-d7.
-                         */
-                        "vldm %[gpu], {q0, q1, q2, q3};\n"
-                        /* Store each 16-byte line in 2 parts to the cpu-side
-                         * destination.  (vld1 can only store one d-register
-                         * at a time).
-                         */
-                        "vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d1, [%[cpu2]],%[cpu_stride]\n"
-                        "vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d3, [%[cpu2]],%[cpu_stride]\n"
-                        "vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
-                        "vst1.8 d5, [%[cpu2]],%[cpu_stride]\n"
-                        "vst1.8 d6, [%[cpu]]\n"
-                        "vst1.8 d7, [%[cpu2]]\n"
-                        : [cpu]         "+r"(cpu),
-                          [cpu2]        "+r"(cpu2)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "q0", "q1", "q2", "q3");
-        }
-#elif defined (PIPE_ARCH_AARCH64)
-	if (gpu_stride == 8) {
-                __asm__ volatile (
-                        /* Load from the GPU in one shot, no interleave, to
-                         * d0-d7.
-                         */
-                        "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
-                        /* Store each 8-byte line to cpu-side destination,
-                         * incrementing it by the stride each time.
-                         */
-                        "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v3.D}[1], [%[cpu]]\n"
-                        : [cpu]         "+r"(cpu)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "v0", "v1", "v2", "v3");
-        } else {
-                assert(gpu_stride == 16);
-                void *cpu2 = cpu + 8;
-                __asm__ volatile (
-                        /* Load from the GPU in one shot, no interleave, to
-                         * d0-d7.
-                         */
-                        "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
-                        /* Store each 16-byte line in 2 parts to the cpu-side
-                         * destination.  (vld1 can only store one d-register
-                         * at a time).
-                         */
-                        "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "st1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "st1 {v3.D}[0], [%[cpu]]\n"
-                        "st1 {v3.D}[1], [%[cpu2]]\n"
-                        : [cpu]         "+r"(cpu),
-                          [cpu2]        "+r"(cpu2)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "v0", "v1", "v2", "v3");
-        }
-#else
-        for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
-                memcpy(cpu, gpu + gpu_offset, gpu_stride);
-                cpu += cpu_stride;
-        }
-#endif
-}
-
-static void
-vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
-{
-        uint32_t gpu_stride = vc4_utile_stride(cpp);
-
-#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
-        if (gpu_stride == 8) {
-                __asm__ volatile (
-                        /* Load each 8-byte line from cpu-side source,
-                         * incrementing it by the stride each time.
-                         */
-                        "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d1, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d3, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d5, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d6, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d7, [%[cpu]]\n"
-                        /* Load from the GPU in one shot, no interleave, to
-                         * d0-d7.
-                         */
-                        "vstm %[gpu], {q0, q1, q2, q3}\n"
-                        : [cpu]         "r"(cpu)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "q0", "q1", "q2", "q3");
-        } else {
-                assert(gpu_stride == 16);
-                void *cpu2 = cpu + 8;
-                __asm__ volatile (
-                        /* Load each 16-byte line in 2 parts from the cpu-side
-                         * destination.  (vld1 can only store one d-register
-                         * at a time).
-                         */
-                        "vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d1, [%[cpu2]],%[cpu_stride]\n"
-                        "vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d3, [%[cpu2]],%[cpu_stride]\n"
-                        "vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
-                        "vld1.8 d5, [%[cpu2]],%[cpu_stride]\n"
-                        "vld1.8 d6, [%[cpu]]\n"
-                        "vld1.8 d7, [%[cpu2]]\n"
-                        /* Store to the GPU in one shot, no interleave. */
-                        "vstm %[gpu], {q0, q1, q2, q3}\n"
-                        : [cpu]         "+r"(cpu),
-                          [cpu2]        "+r"(cpu2)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "q0", "q1", "q2", "q3");
-        }
-#elif defined (PIPE_ARCH_AARCH64)
-	if (gpu_stride == 8) {
-                __asm__ volatile (
-                        /* Load each 8-byte line from cpu-side source,
-                         * incrementing it by the stride each time.
-                         */
-                        "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v3.D}[1], [%[cpu]]\n"
-                        /* Store to the GPU in one shot, no interleave. */
-                        "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
-                        : [cpu]         "+r"(cpu)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "v0", "v1", "v2", "v3");
-        } else {
-                assert(gpu_stride == 16);
-                void *cpu2 = cpu + 8;
-                __asm__ volatile (
-                        /* Load each 16-byte line in 2 parts from the cpu-side
-                         * destination.  (vld1 can only store one d-register
-                         * at a time).
-                         */
-                        "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
-                        "ld1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
-                        "ld1 {v3.D}[0], [%[cpu]]\n"
-                        "ld1 {v3.D}[1], [%[cpu2]]\n"
-                        /* Store to the GPU in one shot, no interleave. */
-                        "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
-                        : [cpu]         "+r"(cpu),
-                          [cpu2]        "+r"(cpu2)
-                        : [gpu]         "r"(gpu),
-                          [cpu_stride]  "r"(cpu_stride)
-                        : "v0", "v1", "v2", "v3");
-        }
-#else
-        for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) {
-                memcpy(gpu + gpu_offset, cpu, gpu_stride);
-                cpu += cpu_stride;
-        }
-#endif
-
-}
 /**
  * Returns the X value into the address bits for LT tiling.
  *
@@ -349,6 +139,7 @@
 {
         uint32_t utile_w = vc4_utile_width(cpp);
         uint32_t utile_h = vc4_utile_height(cpp);
+        uint32_t utile_stride = vc4_utile_stride(cpp);
         uint32_t xstart = box->x;
         uint32_t ystart = box->y;
 
@@ -357,15 +148,17 @@
                         void *gpu_tile = gpu + ((ystart + y) * gpu_stride +
                                                 (xstart + x) * 64 / utile_w);
                         if (to_cpu) {
-                                vc4_load_utile(cpu + (cpu_stride * y +
+                                v3d_load_utile(cpu + (cpu_stride * y +
                                                       x * cpp),
+                                               cpu_stride,
                                                gpu_tile,
-                                               cpu_stride, cpp);
+                                               utile_stride);
                         } else {
-                                vc4_store_utile(gpu_tile,
+                                v3d_store_utile(gpu_tile,
+                                                utile_stride,
                                                 cpu + (cpu_stride * y +
                                                        x * cpp),
-                                                cpu_stride, cpp);
+                                                cpu_stride);
                         }
                 }
         }
diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
--- mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c	2019-03-31 23:16:37.000000000 +0000
@@ -26,5 +26,5 @@
  * single file.
  */
 
-#define VC4_BUILD_NEON
+#define V3D_BUILD_NEON
 #include "vc4_tiling_lt.c"
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_buffer.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_buffer.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_buffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,17 +27,6 @@
 #include "virgl_resource.h"
 #include "virgl_screen.h"
 
-static void virgl_buffer_destroy(struct pipe_screen *screen,
-                                 struct pipe_resource *buf)
-{
-   struct virgl_screen *vs = virgl_screen(screen);
-   struct virgl_buffer *vbuf = virgl_buffer(buf);
-
-   util_range_destroy(&vbuf->valid_buffer_range);
-   vs->vws->resource_unref(vs->vws, vbuf->base.hw_res);
-   FREE(vbuf);
-}
-
 static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
                                        struct pipe_resource *resource,
                                        unsigned level,
@@ -47,52 +36,40 @@
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *vs = virgl_screen(ctx->screen);
-   struct virgl_buffer *vbuf = virgl_buffer(resource);
+   struct virgl_resource *vbuf = virgl_resource(resource);
    struct virgl_transfer *trans;
    void *ptr;
    bool readback;
-   uint32_t offset;
    bool doflushwait = false;
 
-   if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE))
+   if (usage & PIPE_TRANSFER_READ)
       doflushwait = true;
    else
-      doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage);
+      doflushwait = virgl_res_needs_flush_wait(vctx, vbuf, usage);
 
    if (doflushwait)
       ctx->flush(ctx, NULL, 0);
 
-   trans = slab_alloc(&vctx->texture_transfer_pool);
-   if (!trans)
-      return NULL;
+   trans = virgl_resource_create_transfer(ctx, resource, &vbuf->metadata, level,
+                                          usage, box);
 
-   trans->base.resource = resource;
-   trans->base.level = level;
-   trans->base.usage = usage;
-   trans->base.box = *box;
-   trans->base.stride = 0;
-   trans->base.layer_stride = 0;
-
-   offset = box->x;
-
-   readback = virgl_res_needs_readback(vctx, &vbuf->base, usage);
+   readback = virgl_res_needs_readback(vctx, vbuf, usage);
    if (readback)
-      vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level);
+      vs->vws->transfer_get(vs->vws, vbuf->hw_res, box, trans->base.stride,
+                            trans->l_stride, trans->offset, level);
 
    if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
       doflushwait = true;
 
    if (doflushwait || readback)
-      vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+      vs->vws->resource_wait(vs->vws, vbuf->hw_res);
 
-   ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res);
+   ptr = vs->vws->resource_map(vs->vws, vbuf->hw_res);
    if (!ptr) {
       return NULL;
    }
 
-   trans->offset = offset;
    *transfer = &trans->base;
-
    return ptr + trans->offset;
 }
 
@@ -101,73 +78,61 @@
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_transfer *trans = virgl_transfer(transfer);
-   struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
+   struct virgl_resource *vbuf = virgl_resource(transfer->resource);
 
    if (trans->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
-         struct virgl_screen *vs = virgl_screen(ctx->screen);
-         vctx->num_transfers++;
-         vs->vws->transfer_put(vs->vws, vbuf->base.hw_res,
-                               &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level);
-
+      struct virgl_screen *vs = virgl_screen(ctx->screen);
+      if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
+         if (trans->range.end <= trans->range.start)
+            goto out;
+
+         transfer->box.x += trans->range.start;
+         transfer->box.width = trans->range.end - trans->range.start;
+         trans->offset = transfer->box.x;
       }
+
+      vctx->num_transfers++;
+      vs->vws->transfer_put(vs->vws, vbuf->hw_res,
+                            &transfer->box, trans->base.stride,
+                            trans->l_stride, trans->offset,
+                            transfer->level);
+
    }
 
-   slab_free(&vctx->texture_transfer_pool, trans);
+out:
+   virgl_resource_destroy_transfer(vctx, trans);
 }
 
 static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx,
                                                struct pipe_transfer *transfer,
                                                const struct pipe_box *box)
 {
-   struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_buffer *vbuf = virgl_buffer(transfer->resource);
-
-   if (!vbuf->on_list) {
-       struct pipe_resource *res = NULL;
-
-       list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs);
-       vbuf->on_list = TRUE;
-       pipe_resource_reference(&res, &vbuf->base.u.b);
-   }
-
-   util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x,
-                  transfer->box.x + box->x + box->width);
+   struct virgl_resource *vbuf = virgl_resource(transfer->resource);
+   struct virgl_transfer *trans = virgl_transfer(transfer);
 
-   vbuf->base.clean = FALSE;
+   /*
+    * FIXME: This is not optimal.  For example,
+    *
+    * glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT)
+    * glFlushMappedBufferRange(.., 25, 30)
+    * glFlushMappedBufferRange(.., 65, 70)
+    *
+    * We'll end up flushing 25 --> 70.
+    */
+   util_range_add(&trans->range, box->x, box->x + box->width);
+   vbuf->clean = FALSE;
 }
 
 static const struct u_resource_vtbl virgl_buffer_vtbl =
 {
    u_default_resource_get_handle,            /* get_handle */
-   virgl_buffer_destroy,                     /* resource_destroy */
+   virgl_resource_destroy,                   /* resource_destroy */
    virgl_buffer_transfer_map,                /* transfer_map */
    virgl_buffer_transfer_flush_region,       /* transfer_flush_region */
    virgl_buffer_transfer_unmap,              /* transfer_unmap */
 };
 
-struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
-                                          const struct pipe_resource *template)
+void virgl_buffer_init(struct virgl_resource *res)
 {
-   struct virgl_buffer *buf;
-   uint32_t size;
-   uint32_t vbind;
-   buf = CALLOC_STRUCT(virgl_buffer);
-   buf->base.clean = TRUE;
-   buf->base.u.b = *template;
-   buf->base.u.b.screen = &vs->base;
-   buf->base.u.vtbl = &virgl_buffer_vtbl;
-   pipe_reference_init(&buf->base.u.b.reference, 1);
-   util_range_init(&buf->valid_buffer_range);
-
-   vbind = pipe_to_virgl_bind(template->bind);
-   size = template->width0;
-
-   /* SSBOs and texture buffers can written to by host compute shaders. */
-   if (vbind == VIRGL_BIND_SHADER_BUFFER || vbind == VIRGL_BIND_SAMPLER_VIEW)
-      buf->base.clean = FALSE;
-   buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size);
-
-   util_range_set_empty(&buf->valid_buffer_range);
-   return &buf->base.u.b;
+   res->u.vtbl = &virgl_buffer_vtbl;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -21,6 +21,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <libsync.h>
 #include "pipe/p_shader_tokens.h"
 
 #include "pipe/p_context.h"
@@ -59,29 +60,6 @@
    return ++next_handle;
 }
 
-static void virgl_buffer_flush(struct virgl_context *vctx,
-                              struct virgl_buffer *vbuf)
-{
-   struct virgl_screen *rs = virgl_screen(vctx->base.screen);
-   struct pipe_box box;
-
-   assert(vbuf->on_list);
-
-   box.height = 1;
-   box.depth = 1;
-   box.y = 0;
-   box.z = 0;
-
-   box.x = vbuf->valid_buffer_range.start;
-   box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0);
-
-   vctx->num_transfers++;
-   rs->vws->transfer_put(rs->vws, vbuf->base.hw_res,
-                         &box, 0, 0, box.x, 0);
-
-   util_range_set_empty(&vbuf->valid_buffer_range);
-}
-
 static void virgl_attach_res_framebuffer(struct virgl_context *vctx)
 {
    struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
@@ -251,6 +229,11 @@
    if (!surf)
       return NULL;
 
+   assert(ctx->screen->get_param(ctx->screen,
+                                 PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) ||
+          (util_format_is_srgb(templ->format) ==
+           util_format_is_srgb(resource->format)));
+
    res->clean = FALSE;
    handle = virgl_object_assign_handle();
    pipe_reference_init(&surf->base.reference, 1);
@@ -344,19 +327,27 @@
                                                    const struct pipe_rasterizer_state *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle;
-   handle = virgl_object_assign_handle();
+   struct virgl_rasterizer_state *vrs = CALLOC_STRUCT(virgl_rasterizer_state);
 
-   virgl_encode_rasterizer_state(vctx, handle, rs_state);
-   return (void *)(unsigned long)handle;
+   if (!vrs)
+      return NULL;
+   vrs->rs = *rs_state;
+   vrs->handle = virgl_object_assign_handle();
+
+   virgl_encode_rasterizer_state(vctx, vrs->handle, rs_state);
+   return (void *)vrs;
 }
 
 static void virgl_bind_rasterizer_state(struct pipe_context *ctx,
                                                 void *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)rs_state;
-
+   uint32_t handle = 0;
+   if (rs_state) {
+      struct virgl_rasterizer_state *vrs = rs_state;
+      vctx->rs_state = *vrs;
+      handle = vrs->handle;
+   }
    virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
 }
 
@@ -364,8 +355,9 @@
                                          void *rs_state)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   uint32_t handle = (unsigned long)rs_state;
-   virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER);
+   struct virgl_rasterizer_state *vrs = rs_state;
+   virgl_encode_delete_object(vctx, vrs->handle, VIRGL_OBJECT_RASTERIZER);
+   FREE(vrs);
 }
 
 static void virgl_set_framebuffer_state(struct pipe_context *ctx,
@@ -455,10 +447,8 @@
    vctx->vertex_array_dirty = TRUE;
 }
 
-static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx)
+static void virgl_hw_set_vertex_buffers(struct virgl_context *vctx)
 {
-   struct virgl_context *vctx = virgl_context(ctx);
-
    if (vctx->vertex_array_dirty) {
       struct virgl_vertex_elements_state *ve = vctx->vertex_elements;
 
@@ -489,10 +479,9 @@
    virgl_encoder_set_blend_color(vctx, color);
 }
 
-static void virgl_hw_set_index_buffer(struct pipe_context *ctx,
+static void virgl_hw_set_index_buffer(struct virgl_context *vctx,
                                      struct virgl_indexbuf *ib)
 {
-   struct virgl_context *vctx = virgl_context(ctx);
    virgl_encoder_set_index_buffer(vctx, ib);
    virgl_attach_res_index_buffer(vctx, ib);
 }
@@ -531,14 +520,13 @@
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *vs = virgl_screen(ctx->screen);
    struct virgl_resource *grres = virgl_resource(res);
-   struct virgl_buffer *vbuf = virgl_buffer(res);
 
    grres->clean = FALSE;
 
-   if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) {
+   if (virgl_res_needs_flush_wait(vctx, grres, usage)) {
       ctx->flush(ctx, NULL, 0);
 
-      vs->vws->resource_wait(vs->vws, vbuf->base.hw_res);
+      vs->vws->resource_wait(vs->vws, grres->hw_res);
    }
 
    virgl_encoder_inline_write(vctx, grres, level, usage,
@@ -721,6 +709,7 @@
       return;
 
    if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) {
+      util_primconvert_save_rasterizer_state(vctx->primconvert, &vctx->rs_state.rs);
       util_primconvert_draw_vbo(vctx->primconvert, dinfo);
       return;
    }
@@ -740,9 +729,9 @@
    u_upload_unmap(vctx->uploader);
 
    vctx->num_draws++;
-   virgl_hw_set_vertex_buffers(ctx);
+   virgl_hw_set_vertex_buffers(vctx);
    if (info.index_size)
-      virgl_hw_set_index_buffer(ctx, &ib);
+      virgl_hw_set_index_buffer(vctx, &ib);
 
    virgl_encoder_draw_vbo(vctx, &info);
 
@@ -750,13 +739,20 @@
 
 }
 
-static void virgl_flush_eq(struct virgl_context *ctx, void *closure)
+static void virgl_flush_eq(struct virgl_context *ctx, void *closure,
+			   struct pipe_fence_handle **fence)
 {
    struct virgl_screen *rs = virgl_screen(ctx->base.screen);
+   int out_fence_fd = -1;
 
    /* send the buffer to the remote side for decoding */
    ctx->num_transfers = ctx->num_draws = 0;
-   rs->vws->submit_cmd(rs->vws, ctx->cbuf);
+
+   rs->vws->submit_cmd(rs->vws, ctx->cbuf, ctx->cbuf->in_fence_fd,
+                       ctx->cbuf->needs_out_fence_fd ? &out_fence_fd : NULL);
+
+   if (fence)
+      *fence = rs->vws->cs_create_fence(rs->vws, out_fence_fd);
 
    virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id);
 
@@ -769,21 +765,17 @@
                                enum pipe_flush_flags flags)
 {
    struct virgl_context *vctx = virgl_context(ctx);
-   struct virgl_screen *rs = virgl_screen(ctx->screen);
-   struct virgl_buffer *buf, *tmp;
 
-   if (fence)
-      *fence = rs->vws->cs_create_fence(rs->vws);
+   if (flags & PIPE_FLUSH_FENCE_FD)
+       vctx->cbuf->needs_out_fence_fd = true;
 
-   LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) {
-      struct pipe_resource *res = &buf->base.u.b;
-      virgl_buffer_flush(vctx, buf);
-      list_del(&buf->flush_list);
-      buf->on_list = FALSE;
-      pipe_resource_reference(&res, NULL);
+   virgl_flush_eq(vctx, vctx, fence);
 
+   if (vctx->cbuf->in_fence_fd != -1) {
+      close(vctx->cbuf->in_fence_fd);
+      vctx->cbuf->in_fence_fd = -1;
    }
-   virgl_flush_eq(vctx, vctx);
+   vctx->cbuf->needs_out_fence_fd = false;
 }
 
 static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx,
@@ -1002,6 +994,11 @@
    struct virgl_resource *dres = virgl_resource(blit->dst.resource);
    struct virgl_resource *sres = virgl_resource(blit->src.resource);
 
+   assert(ctx->screen->get_param(ctx->screen,
+                                 PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) ||
+          (util_format_is_srgb(blit->dst.resource->format) ==
+            util_format_is_srgb(blit->dst.format)));
+
    dres->clean = FALSE;
    virgl_encode_blit(vctx, dres, sres,
                     blit);
@@ -1057,6 +1054,28 @@
    virgl_encode_set_shader_buffers(vctx, shader, start_slot, count, buffers);
 }
 
+static void virgl_create_fence_fd(struct pipe_context *ctx,
+                                  struct pipe_fence_handle **fence,
+                                  int fd,
+                                  enum pipe_fd_type type)
+{
+   assert(type == PIPE_FD_TYPE_NATIVE_SYNC);
+   struct virgl_screen *rs = virgl_screen(ctx->screen);
+
+   if (rs->vws->cs_create_fence)
+      *fence = rs->vws->cs_create_fence(rs->vws, fd);
+}
+
+static void virgl_fence_server_sync(struct pipe_context *ctx,
+			            struct pipe_fence_handle *fence)
+{
+   struct virgl_context *vctx = virgl_context(ctx);
+   struct virgl_screen *rs = virgl_screen(ctx->screen);
+
+   if (rs->vws->fence_server_sync)
+      rs->vws->fence_server_sync(rs->vws, vctx->cbuf, fence);
+}
+
 static void virgl_set_shader_images(struct pipe_context *ctx,
                                     enum pipe_shader_type shader,
                                     unsigned start_slot, unsigned count,
@@ -1149,14 +1168,14 @@
    vctx->framebuffer.zsbuf = NULL;
    vctx->framebuffer.nr_cbufs = 0;
    virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id);
-   virgl_flush_eq(vctx, vctx);
+   virgl_flush_eq(vctx, vctx, NULL);
 
    rs->vws->cmd_buf_destroy(vctx->cbuf);
    if (vctx->uploader)
       u_upload_destroy(vctx->uploader);
    util_primconvert_destroy(vctx->primconvert);
 
-   slab_destroy_child(&vctx->texture_transfer_pool);
+   slab_destroy_child(&vctx->transfer_pool);
    FREE(vctx);
 }
 
@@ -1205,6 +1224,7 @@
    struct virgl_context *vctx;
    struct virgl_screen *rs = virgl_screen(pscreen);
    vctx = CALLOC_STRUCT(virgl_context);
+   const char *host_debug_flagstring;
 
    vctx->cbuf = rs->vws->cmd_buf_create(rs->vws);
    if (!vctx->cbuf) {
@@ -1284,6 +1304,8 @@
    vctx->base.resource_copy_region = virgl_resource_copy_region;
    vctx->base.flush_resource = virgl_flush_resource;
    vctx->base.blit =  virgl_blit;
+   vctx->base.create_fence_fd = virgl_create_fence_fd;
+   vctx->base.fence_server_sync = virgl_fence_server_sync;
 
    vctx->base.set_shader_buffers = virgl_set_shader_buffers;
    vctx->base.set_hw_atomic_buffers = virgl_set_hw_atomic_buffers;
@@ -1294,8 +1316,7 @@
    virgl_init_query_functions(vctx);
    virgl_init_so_functions(vctx);
 
-   list_inithead(&vctx->to_flush_bufs);
-   slab_create_child(&vctx->texture_transfer_pool, &rs->texture_transfer_pool);
+   slab_create_child(&vctx->transfer_pool, &rs->transfer_pool);
 
    vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask);
    vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024,
@@ -1309,6 +1330,13 @@
    virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
 
    virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id);
+
+   if (rs->caps.caps.v2.capability_bits & VIRGL_CAP_GUEST_MAY_INIT_LOG) {
+      host_debug_flagstring = getenv("VIRGL_HOST_DEBUG");
+      if (host_debug_flagstring)
+         virgl_encode_host_debug_flagstring(vctx, host_debug_flagstring);
+   }
+
    return &vctx->base;
 fail:
    return NULL;
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -49,6 +49,11 @@
    uint32_t enabled_mask;
 };
 
+struct virgl_rasterizer_state {
+   struct pipe_rasterizer_state rs;
+   uint32_t handle;
+};
+
 struct virgl_context {
    struct pipe_context base;
    struct virgl_cmd_buf *cbuf;
@@ -58,7 +63,7 @@
 
    struct pipe_framebuffer_state framebuffer;
 
-   struct slab_child_pool texture_transfer_pool;
+   struct slab_child_pool transfer_pool;
 
    struct u_upload_mgr *uploader;
 
@@ -66,6 +71,7 @@
    unsigned num_vertex_buffers;
    boolean vertex_array_dirty;
 
+   struct virgl_rasterizer_state rs_state;
    struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS];
    unsigned num_so_targets;
 
@@ -75,7 +81,6 @@
    struct pipe_resource *images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
    int num_transfers;
    int num_draws;
-   struct list_head to_flush_bufs;
 
    struct pipe_resource *atomic_buffers[PIPE_MAX_HW_ATOMIC_BUFFERS];
 
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.c	2019-03-31 23:16:37.000000000 +0000
@@ -1054,3 +1054,27 @@
    virgl_encoder_write_dword(ctx->cbuf, flags);
    return 0;
 }
+
+int virgl_encode_host_debug_flagstring(struct virgl_context *ctx,
+                                       const char *flagstring)
+{
+   unsigned long slen = strlen(flagstring) + 1;
+   uint32_t sslen;
+   uint32_t string_length;
+
+   if (!slen)
+      return 0;
+
+   if (slen > 4 * 0xffff) {
+      debug_printf("VIRGL: host debug flag string too long, will be truncated\n");
+      slen = 4 * 0xffff;
+   }
+
+   sslen = (uint32_t )(slen + 3) / 4;
+   string_length = (uint32_t)MIN2(sslen * 4, slen);
+
+   virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_DEBUG_FLAGS, 0, sslen));
+   virgl_encoder_write_block(ctx->cbuf, (const uint8_t *)flagstring, string_length);
+
+   return 0;
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.h	2019-03-31 23:16:37.000000000 +0000
@@ -276,4 +276,7 @@
                              const struct pipe_grid_info *grid_info);
 int virgl_encode_texture_barrier(struct virgl_context *ctx,
                                  unsigned flags);
+
+int virgl_encode_host_debug_flagstring(struct virgl_context *ctx,
+                                  const char *envname);
 #endif
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_hw.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_hw.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_hw.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_hw.h	2019-03-31 23:16:37.000000000 +0000
@@ -231,6 +231,8 @@
 #define VIRGL_CAP_SHADER_CLOCK         (1 << 11)
 #define VIRGL_CAP_TEXTURE_BARRIER      (1 << 12)
 #define VIRGL_CAP_TGSI_COMPONENTS      (1 << 13)
+#define VIRGL_CAP_GUEST_MAY_INIT_LOG   (1 << 14)
+#define VIRGL_CAP_SRGB_WRITE_CONTROL   (1 << 15)
 
 /* virgl bind flags - these are compatible with mesa 10.5 gallium.
  * but are fixed, no other should be passed to virgl either.
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_protocol.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_protocol.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_protocol.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_protocol.h	2019-03-31 23:16:37.000000000 +0000
@@ -92,6 +92,7 @@
    VIRGL_CCMD_SET_FRAMEBUFFER_STATE_NO_ATTACH,
    VIRGL_CCMD_TEXTURE_BARRIER,
    VIRGL_CCMD_SET_ATOMIC_BUFFERS,
+   VIRGL_CCMD_SET_DEBUG_FLAGS,
 };
 
 /*
@@ -222,7 +223,7 @@
 #define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0)
 /* start contains full length in VAL - also implies continuations */
 /* continuation contains offset in VAL */
-#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31)
+#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1u << 31)
 #define VIRGL_OBJ_SHADER_NUM_TOKENS 4
 #define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5
 #define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x))
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.c	2019-03-31 23:16:37.000000000 +0000
@@ -20,7 +20,9 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
+#include "util/u_format.h"
 #include "util/u_inlines.h"
+#include "util/u_memory.h"
 #include "virgl_context.h"
 #include "virgl_resource.h"
 #include "virgl_screen.h"
@@ -55,11 +57,37 @@
 static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen,
                                                    const struct pipe_resource *templ)
 {
-    struct virgl_screen *vs = virgl_screen(screen);
-    if (templ->target == PIPE_BUFFER)
-        return virgl_buffer_create(vs, templ);
-    else
-        return virgl_texture_create(vs, templ);
+   unsigned vbind;
+   struct virgl_screen *vs = virgl_screen(screen);
+   struct virgl_resource *res = CALLOC_STRUCT(virgl_resource);
+
+   res->clean = TRUE;
+   res->u.b = *templ;
+   res->u.b.screen = &vs->base;
+   pipe_reference_init(&res->u.b.reference, 1);
+   vbind = pipe_to_virgl_bind(templ->bind);
+   virgl_resource_layout(&res->u.b, &res->metadata);
+   res->hw_res = vs->vws->resource_create(vs->vws, templ->target,
+                                          templ->format, vbind,
+                                          templ->width0,
+                                          templ->height0,
+                                          templ->depth0,
+                                          templ->array_size,
+                                          templ->last_level,
+                                          templ->nr_samples,
+                                          res->metadata.total_size);
+   if (!res->hw_res) {
+      FREE(res);
+      return NULL;
+   }
+
+   if (templ->target == PIPE_BUFFER)
+      virgl_buffer_init(res);
+   else
+      virgl_texture_init(res);
+
+   return &res->u.b;
+
 }
 
 static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen,
@@ -67,11 +95,24 @@
                                                         struct winsys_handle *whandle,
                                                         unsigned usage)
 {
-    struct virgl_screen *vs = virgl_screen(screen);
-    if (templ->target == PIPE_BUFFER)
-        return NULL;
-    else
-        return virgl_texture_from_handle(vs, templ, whandle);
+   struct virgl_screen *vs = virgl_screen(screen);
+   if (templ->target == PIPE_BUFFER)
+      return NULL;
+
+   struct virgl_resource *res = CALLOC_STRUCT(virgl_resource);
+   res->u.b = *templ;
+   res->u.b.screen = &vs->base;
+   pipe_reference_init(&res->u.b.reference, 1);
+
+   res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
+   if (!res->hw_res) {
+      FREE(res);
+      return NULL;
+   }
+
+   virgl_texture_init(res);
+
+   return &res->u.b;
 }
 
 void virgl_init_screen_resource_functions(struct pipe_screen *screen)
@@ -110,3 +151,128 @@
     ctx->buffer_subdata = virgl_buffer_subdata;
     ctx->texture_subdata = u_default_texture_subdata;
 }
+
+void virgl_resource_layout(struct pipe_resource *pt,
+                           struct virgl_resource_metadata *metadata)
+{
+   unsigned level, nblocksy;
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
+   unsigned buffer_size = 0;
+
+   for (level = 0; level <= pt->last_level; level++) {
+      unsigned slices;
+
+      if (pt->target == PIPE_TEXTURE_CUBE)
+         slices = 6;
+      else if (pt->target == PIPE_TEXTURE_3D)
+         slices = depth;
+      else
+         slices = pt->array_size;
+
+      nblocksy = util_format_get_nblocksy(pt->format, height);
+      metadata->stride[level] = util_format_get_stride(pt->format, width);
+      metadata->layer_stride[level] = nblocksy * metadata->stride[level];
+      metadata->level_offset[level] = buffer_size;
+
+      buffer_size += slices * metadata->layer_stride[level];
+
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
+   }
+
+   if (pt->nr_samples <= 1)
+      metadata->total_size = buffer_size;
+   else /* don't create guest backing store for MSAA */
+      metadata->total_size = 0;
+}
+
+struct virgl_transfer *
+virgl_resource_create_transfer(struct pipe_context *ctx,
+                               struct pipe_resource *pres,
+                               const struct virgl_resource_metadata *metadata,
+                               unsigned level, unsigned usage,
+                               const struct pipe_box *box)
+{
+   struct virgl_transfer *trans;
+   enum pipe_format format = pres->format;
+   struct virgl_context *vctx = virgl_context(ctx);
+   const unsigned blocksy = box->y / util_format_get_blockheight(format);
+   const unsigned blocksx = box->x / util_format_get_blockwidth(format);
+
+   unsigned offset = metadata->level_offset[level];
+   if (pres->target == PIPE_TEXTURE_CUBE ||
+       pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
+       pres->target == PIPE_TEXTURE_3D ||
+       pres->target == PIPE_TEXTURE_2D_ARRAY) {
+      offset += box->z * metadata->layer_stride[level];
+   }
+   else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
+      offset += box->z * metadata->stride[level];
+      assert(box->y == 0);
+   } else if (pres->target == PIPE_BUFFER) {
+      assert(box->y == 0 && box->z == 0);
+   } else {
+      assert(box->z == 0);
+   }
+
+   offset += blocksy * metadata->stride[level];
+   offset += blocksx * util_format_get_blocksize(format);
+
+   trans = slab_alloc(&vctx->transfer_pool);
+   if (!trans)
+      return NULL;
+
+   trans->base.resource = pres;
+   trans->base.level = level;
+   trans->base.usage = usage;
+   trans->base.box = *box;
+   trans->base.stride = metadata->stride[level];
+   trans->base.layer_stride = metadata->layer_stride[level];
+   trans->offset = offset;
+   util_range_init(&trans->range);
+
+   if (trans->base.resource->target != PIPE_TEXTURE_3D &&
+       trans->base.resource->target != PIPE_TEXTURE_CUBE &&
+       trans->base.resource->target != PIPE_TEXTURE_1D_ARRAY &&
+       trans->base.resource->target != PIPE_TEXTURE_2D_ARRAY &&
+       trans->base.resource->target != PIPE_TEXTURE_CUBE_ARRAY)
+      trans->l_stride = 0;
+   else
+      trans->l_stride = trans->base.layer_stride;
+
+   return trans;
+}
+
+void virgl_resource_destroy_transfer(struct virgl_context *vctx,
+                                     struct virgl_transfer *trans)
+{
+   util_range_destroy(&trans->range);
+   slab_free(&vctx->transfer_pool, trans);
+}
+
+void virgl_resource_destroy(struct pipe_screen *screen,
+                            struct pipe_resource *resource)
+{
+   struct virgl_screen *vs = virgl_screen(screen);
+   struct virgl_resource *res = virgl_resource(resource);
+   vs->vws->resource_unref(vs->vws, res->hw_res);
+   FREE(res);
+}
+
+boolean virgl_resource_get_handle(struct pipe_screen *screen,
+                                  struct pipe_resource *resource,
+                                  struct winsys_handle *whandle)
+{
+   struct virgl_screen *vs = virgl_screen(screen);
+   struct virgl_resource *res = virgl_resource(resource);
+
+   if (res->u.b.target == PIPE_BUFFER)
+      return FALSE;
+
+   return vs->vws->resource_get_handle(vs->vws, res->hw_res,
+                                       res->metadata.stride[0],
+                                       whandle);
+}
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.h	2019-03-31 23:16:37.000000000 +0000
@@ -36,39 +36,25 @@
 struct virgl_screen;
 struct virgl_context;
 
+struct virgl_resource_metadata
+{
+   unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS];
+   unsigned stride[VR_MAX_TEXTURE_2D_LEVELS];
+   unsigned layer_stride[VR_MAX_TEXTURE_2D_LEVELS];
+   uint32_t total_size;
+};
+
 struct virgl_resource {
    struct u_resource u;
-   struct virgl_hw_res *hw_res;
    boolean clean;
-};
-
-struct virgl_buffer {
-   struct virgl_resource base;
-
-   struct list_head flush_list;
-   boolean on_list;
-
-   /* The buffer range which is initialized (with a write transfer,
-    * streamout, DMA, or as a random access target). The rest of
-    * the buffer is considered invalid and can be mapped unsynchronized.
-    *
-    * This allows unsychronized mapping of a buffer range which hasn't
-    * been used yet. It's for applications which forget to use
-    * the unsynchronized map flag and expect the driver to figure it out.
-    */
-   struct util_range valid_buffer_range;
-};
-
-struct virgl_texture {
-   struct virgl_resource base;
-
-   unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS];
-   unsigned stride[VR_MAX_TEXTURE_2D_LEVELS];
+   struct virgl_hw_res *hw_res;
+   struct virgl_resource_metadata metadata;
 };
 
 struct virgl_transfer {
    struct pipe_transfer base;
-   uint32_t offset;
+   uint32_t offset, l_stride;
+   struct util_range range;
    struct virgl_resource *resolve_tmp;
 };
 
@@ -79,35 +65,19 @@
 
 void virgl_init_context_resource_functions(struct pipe_context *ctx);
 
-struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
-                                           const struct pipe_resource *templ);
-
-struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs,
-                                                const struct pipe_resource *templ,
-                                                struct winsys_handle *whandle);
+void virgl_texture_init(struct virgl_resource *res);
 
 static inline struct virgl_resource *virgl_resource(struct pipe_resource *r)
 {
    return (struct virgl_resource *)r;
 }
 
-static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r)
-{
-   return (struct virgl_buffer *)r;
-}
-
-static inline struct virgl_texture *virgl_texture(struct pipe_resource *r)
-{
-   return (struct virgl_texture *)r;
-}
-
 static inline struct virgl_transfer *virgl_transfer(struct pipe_transfer *trans)
 {
    return (struct virgl_transfer *)trans;
 }
 
-struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs,
-                                          const struct pipe_resource *templ);
+void virgl_buffer_init(struct virgl_resource *res);
 
 static inline unsigned pipe_to_virgl_bind(unsigned pbind)
 {
@@ -145,4 +115,24 @@
 bool virgl_res_needs_readback(struct virgl_context *vctx,
                               struct virgl_resource *res,
                               unsigned usage);
+
+void virgl_resource_layout(struct pipe_resource *pt,
+                           struct virgl_resource_metadata *metadata);
+
+struct virgl_transfer *
+virgl_resource_create_transfer(struct pipe_context *ctx,
+                               struct pipe_resource *pres,
+                               const struct virgl_resource_metadata *metadata,
+                               unsigned level, unsigned usage,
+                               const struct pipe_box *box);
+
+void virgl_resource_destroy_transfer(struct virgl_context *vctx,
+                                     struct virgl_transfer *trans);
+
+void virgl_resource_destroy(struct pipe_screen *screen,
+                            struct pipe_resource *resource);
+
+boolean virgl_resource_get_handle(struct pipe_screen *screen,
+                                  struct pipe_resource *resource,
+                                  struct winsys_handle *whandle);
 #endif
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -255,6 +255,13 @@
       return vscreen->caps.caps.v2.max_combined_atomic_counters;
    case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:
       return vscreen->caps.caps.v2.max_combined_atomic_counter_buffers;
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+      return 1; /* TODO: need to introduce a hw-cap for this */
+   case PIPE_CAP_MAX_VARYINGS:
+      if (vscreen->caps.caps.v1.glsl_level < 150)
+         return vscreen->caps.caps.v2.max_vertex_attribs;
+      return 32;
    case PIPE_CAP_TEXTURE_GATHER_SM5:
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
    case PIPE_CAP_FAKE_SW_MSAA:
@@ -267,8 +274,6 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
-   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_CLEAR_TEXTURE:
@@ -340,7 +345,9 @@
    case PIPE_CAP_VIDEO_MEMORY:
       return 0;
    case PIPE_CAP_NATIVE_FENCE_FD:
-      return 0;
+      return vscreen->vws->supports_fences;
+   case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
+      return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL;
    default:
       return u_pipe_screen_get_param_defaults(screen, param);
    }
@@ -721,6 +728,15 @@
    return vws->fence_wait(vws, fence, timeout);
 }
 
+static int virgl_fence_get_fd(struct pipe_screen *screen,
+            struct pipe_fence_handle *fence)
+{
+   struct virgl_screen *vscreen = virgl_screen(screen);
+   struct virgl_winsys *vws = vscreen->vws;
+
+   return vws->fence_get_fd(vws, fence);
+}
+
 static uint64_t
 virgl_get_timestamp(struct pipe_screen *_screen)
 {
@@ -733,7 +749,7 @@
    struct virgl_screen *vscreen = virgl_screen(screen);
    struct virgl_winsys *vws = vscreen->vws;
 
-   slab_destroy_parent(&vscreen->texture_transfer_pool);
+   slab_destroy_parent(&vscreen->transfer_pool);
 
    if (vws)
       vws->destroy(vws);
@@ -765,6 +781,7 @@
    screen->base.fence_reference = virgl_fence_reference;
    //screen->base.fence_signalled = virgl_fence_signalled;
    screen->base.fence_finish = virgl_fence_finish;
+   screen->base.fence_get_fd = virgl_fence_get_fd;
 
    virgl_init_screen_resource_functions(&screen->base);
 
@@ -772,7 +789,7 @@
 
    screen->refcnt = 1;
 
-   slab_create_parent(&screen->texture_transfer_pool, sizeof(struct virgl_transfer), 16);
+   slab_create_parent(&screen->transfer_pool, sizeof(struct virgl_transfer), 16);
 
    return &screen->base;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -43,7 +43,7 @@
 
    struct virgl_drm_caps caps;
 
-   struct slab_parent_pool texture_transfer_pool;
+   struct slab_parent_pool transfer_pool;
 
    uint32_t sub_ctx_id;
 };
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_texture.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_texture.c
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -94,31 +94,6 @@
    }
 }
 
-static unsigned
-vrend_get_tex_image_offset(const struct virgl_texture *res,
-                           unsigned level, unsigned layer)
-{
-   const struct pipe_resource *pres = &res->base.u.b;
-   const unsigned hgt = u_minify(pres->height0, level);
-   const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt);
-   unsigned offset = res->level_offset[level];
-
-   if (pres->target == PIPE_TEXTURE_CUBE ||
-       pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
-       pres->target == PIPE_TEXTURE_3D ||
-       pres->target == PIPE_TEXTURE_2D_ARRAY) {
-      offset += layer * nblocksy * res->stride[level];
-   }
-   else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
-      offset += layer * res->stride[level];
-   }
-   else {
-      assert(layer == 0);
-   }
-
-   return offset;
-}
-
 static void *virgl_texture_transfer_map(struct pipe_context *ctx,
                                         struct pipe_resource *resource,
                                         unsigned level,
@@ -128,41 +103,19 @@
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_screen *vs = virgl_screen(ctx->screen);
-   struct virgl_texture *vtex = virgl_texture(resource);
-   enum pipe_format format = resource->format;
+   struct virgl_resource *vtex = virgl_resource(resource);
    struct virgl_transfer *trans;
    void *ptr;
    boolean readback = TRUE;
-   uint32_t offset;
    struct virgl_hw_res *hw_res;
-   const unsigned h = u_minify(vtex->base.u.b.height0, level);
-   const unsigned nblocksy = util_format_get_nblocksy(format, h);
-   uint32_t l_stride;
    bool doflushwait;
 
-   doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage);
+   doflushwait = virgl_res_needs_flush_wait(vctx, vtex, usage);
    if (doflushwait)
       ctx->flush(ctx, NULL, 0);
 
-   trans = slab_alloc(&vctx->texture_transfer_pool);
-   if (!trans)
-      return NULL;
-
-   trans->base.resource = resource;
-   trans->base.level = level;
-   trans->base.usage = usage;
-   trans->base.box = *box;
-   trans->base.stride = vtex->stride[level];
-   trans->base.layer_stride = trans->base.stride * nblocksy;
-
-   if (resource->target != PIPE_TEXTURE_3D &&
-       resource->target != PIPE_TEXTURE_CUBE &&
-       resource->target != PIPE_TEXTURE_1D_ARRAY &&
-       resource->target != PIPE_TEXTURE_2D_ARRAY &&
-       resource->target != PIPE_TEXTURE_CUBE_ARRAY)
-      l_stride = 0;
-   else
-      l_stride = trans->base.layer_stride;
+   trans = virgl_resource_create_transfer(ctx, resource, &vtex->metadata,
+                                          level, usage, box);
 
    if (resource->nr_samples > 1) {
       struct pipe_resource tmp_resource;
@@ -175,34 +128,30 @@
       ctx->flush(ctx, NULL, 0);
       /* we want to do a resolve blit into the temporary */
       hw_res = trans->resolve_tmp->hw_res;
-      offset = 0;
-      trans->base.stride = ((struct virgl_texture*)trans->resolve_tmp)->stride[level];
-      trans->base.layer_stride = trans->base.stride * nblocksy;
+      struct virgl_resource_metadata *data = &trans->resolve_tmp->metadata;
+      trans->base.stride = data->stride[level];
+      trans->base.layer_stride = data->layer_stride[level];
+      trans->offset = 0;
    } else {
-      offset = vrend_get_tex_image_offset(vtex, level, box->z);
-
-      offset += box->y / util_format_get_blockheight(format) * trans->base.stride +
-      box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
-      hw_res = vtex->base.hw_res;
+      hw_res = vtex->hw_res;
       trans->resolve_tmp = NULL;
    }
 
-   readback = virgl_res_needs_readback(vctx, &vtex->base, usage);
+   readback = virgl_res_needs_readback(vctx, vtex, usage);
    if (readback)
-      vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level);
+      vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride,
+                            trans->l_stride, trans->offset, level);
 
    if (doflushwait || readback)
-      vs->vws->resource_wait(vs->vws, vtex->base.hw_res);
+      vs->vws->resource_wait(vs->vws, vtex->hw_res);
 
    ptr = vs->vws->resource_map(vs->vws, hw_res);
    if (!ptr) {
-      slab_free(&vctx->texture_transfer_pool, trans);
+      slab_free(&vctx->transfer_pool, trans);
       return NULL;
    }
 
-   trans->offset = offset;
    *transfer = &trans->base;
-
    return ptr + trans->offset;
 }
 
@@ -211,25 +160,17 @@
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_transfer *trans = virgl_transfer(transfer);
-   struct virgl_texture *vtex = virgl_texture(transfer->resource);
-   uint32_t l_stride;
-
-   if (transfer->resource->target != PIPE_TEXTURE_3D &&
-       transfer->resource->target != PIPE_TEXTURE_CUBE &&
-       transfer->resource->target != PIPE_TEXTURE_1D_ARRAY &&
-       transfer->resource->target != PIPE_TEXTURE_2D_ARRAY &&
-       transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY)
-      l_stride = 0;
-   else
-      l_stride = trans->base.layer_stride;
+   struct virgl_resource *vtex = virgl_resource(transfer->resource);
 
    if (trans->base.usage & PIPE_TRANSFER_WRITE) {
       if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
          struct virgl_screen *vs = virgl_screen(ctx->screen);
-         vtex->base.clean = FALSE;
+         vtex->clean = FALSE;
          vctx->num_transfers++;
-         vs->vws->transfer_put(vs->vws, vtex->base.hw_res,
-                               &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level);
+         vs->vws->transfer_put(vs->vws, vtex->hw_res,
+                               &transfer->box, trans->base.stride,
+                               trans->l_stride, trans->offset,
+                               transfer->level);
 
       }
    }
@@ -237,111 +178,19 @@
    if (trans->resolve_tmp)
       pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL);
 
-   slab_free(&vctx->texture_transfer_pool, trans);
-}
-
-
-static void
-vrend_resource_layout(struct virgl_texture *res,
-                      uint32_t *total_size)
-{
-   struct pipe_resource *pt = &res->base.u.b;
-   unsigned level;
-   unsigned width = pt->width0;
-   unsigned height = pt->height0;
-   unsigned depth = pt->depth0;
-   unsigned buffer_size = 0;
-
-   for (level = 0; level <= pt->last_level; level++) {
-      unsigned slices;
-
-      if (pt->target == PIPE_TEXTURE_CUBE)
-         slices = 6;
-      else if (pt->target == PIPE_TEXTURE_3D)
-         slices = depth;
-      else
-         slices = pt->array_size;
-
-      res->stride[level] = util_format_get_stride(pt->format, width);
-      res->level_offset[level] = buffer_size;
-
-      buffer_size += (util_format_get_nblocksy(pt->format, height) *
-                      slices * res->stride[level]);
-
-      width = u_minify(width, 1);
-      height = u_minify(height, 1);
-      depth = u_minify(depth, 1);
-   }
-
-   if (pt->nr_samples <= 1)
-      *total_size = buffer_size;
-   else /* don't create guest backing store for MSAA */
-      *total_size = 0;
-}
-
-static boolean virgl_texture_get_handle(struct pipe_screen *screen,
-                                         struct pipe_resource *ptex,
-                                         struct winsys_handle *whandle)
-{
-   struct virgl_screen *vs = virgl_screen(screen);
-   struct virgl_texture *vtex = virgl_texture(ptex);
-
-   return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle);
-}
-
-static void virgl_texture_destroy(struct pipe_screen *screen,
-                                  struct pipe_resource *res)
-{
-   struct virgl_screen *vs = virgl_screen(screen);
-   struct virgl_texture *vtex = virgl_texture(res);
-   vs->vws->resource_unref(vs->vws, vtex->base.hw_res);
-   FREE(vtex);
+   virgl_resource_destroy_transfer(vctx, trans);
 }
 
 static const struct u_resource_vtbl virgl_texture_vtbl =
 {
-   virgl_texture_get_handle,            /* get_handle */
-   virgl_texture_destroy,               /* resource_destroy */
+   virgl_resource_get_handle,           /* get_handle */
+   virgl_resource_destroy,              /* resource_destroy */
    virgl_texture_transfer_map,          /* transfer_map */
    NULL,                                /* transfer_flush_region */
    virgl_texture_transfer_unmap,        /* transfer_unmap */
 };
 
-struct pipe_resource *
-virgl_texture_from_handle(struct virgl_screen *vs,
-                          const struct pipe_resource *template,
-                          struct winsys_handle *whandle)
+void virgl_texture_init(struct virgl_resource *res)
 {
-   struct virgl_texture *tex = CALLOC_STRUCT(virgl_texture);
-   tex->base.u.b = *template;
-   tex->base.u.b.screen = &vs->base;
-   pipe_reference_init(&tex->base.u.b.reference, 1);
-   tex->base.u.vtbl = &virgl_texture_vtbl;
-
-   tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
-   return &tex->base.u.b;
-}
-
-struct pipe_resource *virgl_texture_create(struct virgl_screen *vs,
-                                           const struct pipe_resource *template)
-{
-   struct virgl_texture *tex;
-   uint32_t size;
-   unsigned vbind;
-
-   tex = CALLOC_STRUCT(virgl_texture);
-   tex->base.clean = TRUE;
-   tex->base.u.b = *template;
-   tex->base.u.b.screen = &vs->base;
-   pipe_reference_init(&tex->base.u.b.reference, 1);
-   tex->base.u.vtbl = &virgl_texture_vtbl;
-   vrend_resource_layout(tex, &size);
-
-   vbind = pipe_to_virgl_bind(template->bind);
-   tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size);
-   if (!tex->base.hw_res) {
-      FREE(tex);
-      return NULL;
-   }
-   return &tex->base.u.b;
+   res->u.vtbl = &virgl_texture_vtbl;
 }
diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_winsys.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_winsys.h
--- mesa-18.3.3/src/gallium/drivers/virgl/virgl_winsys.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -40,10 +40,13 @@
 struct virgl_cmd_buf {
    unsigned cdw;
    uint32_t *buf;
+   int in_fence_fd;
+   bool needs_out_fence_fd;
 };
 
 struct virgl_winsys {
    unsigned pci_id;
+   int supports_fences; /* In/Out fences are supported */
 
    void (*destroy)(struct virgl_winsys *vws);
 
@@ -83,7 +86,8 @@
    void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf);
 
    void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer);
-   int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf);
+   int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf,
+                     int32_t in_fence_fd, int32_t *out_fence_fd);
 
    boolean (*res_is_referenced)(struct virgl_winsys *vws,
                                 struct virgl_cmd_buf *buf,
@@ -92,7 +96,7 @@
    int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps);
 
    /* fence */
-   struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws);
+   struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws, int fd);
    bool (*fence_wait)(struct virgl_winsys *vws,
                       struct pipe_fence_handle *fence,
                       uint64_t timeout);
@@ -107,6 +111,12 @@
                              unsigned level, unsigned layer,
                              void *winsys_drawable_handle,
                              struct pipe_box *sub_box);
+   void (*fence_server_sync)(struct virgl_winsys *vws,
+                             struct virgl_cmd_buf *cbuf,
+                             struct pipe_fence_handle *fence);
+
+   int (*fence_get_fd)(struct virgl_winsys *vws,
+                       struct pipe_fence_handle *fence);
 };
 
 /* this defaults all newer caps,
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_context.h mesa-19.0.1/src/gallium/include/pipe/p_context.h
--- mesa-18.3.3/src/gallium/include/pipe/p_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -793,7 +793,7 @@
     * Invalidate the contents of the resource. This is used to
     *
     * (1) implement EGL's semantic of undefined depth/stencil
-    * contenst after a swapbuffers.  This allows a tiled renderer (for
+    * contents after a swapbuffers.  This allows a tiled renderer (for
     * example) to not store the depth buffer.
     *
     * (2) implement GL's InvalidateBufferData. For backwards compatibility,
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_defines.h mesa-19.0.1/src/gallium/include/pipe/p_defines.h
--- mesa-18.3.3/src/gallium/include/pipe/p_defines.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_defines.h	2019-03-31 23:16:37.000000000 +0000
@@ -341,7 +341,13 @@
     * PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating
     * the resource.
     */
-   PIPE_TRANSFER_COHERENT = (1 << 14)
+   PIPE_TRANSFER_COHERENT = (1 << 14),
+
+   /**
+    * This and higher bits are reserved for private use by drivers. Drivers
+    * should use this as (PIPE_TRANSFER_DRV_PRV << i).
+    */
+   PIPE_TRANSFER_DRV_PRV = (1 << 24)
 };
 
 /**
@@ -401,6 +407,9 @@
  */
 #define PIPE_CONTEXT_LOW_PRIORITY      (1 << 5)
 
+/** Stop execution if the device is reset. */
+#define PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET (1 << 6)
+
 /**
  * Flags for pipe_context::memory_barrier.
  */
@@ -554,12 +563,30 @@
    PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE,
    PIPE_QUERY_GPU_FINISHED,
    PIPE_QUERY_PIPELINE_STATISTICS,
+   PIPE_QUERY_PIPELINE_STATISTICS_SINGLE,
    PIPE_QUERY_TYPES,
    /* start of driver queries, see pipe_screen::get_driver_query_info */
    PIPE_QUERY_DRIVER_SPECIFIC = 256,
 };
 
 /**
+ * Index for PIPE_QUERY_PIPELINE_STATISTICS subqueries.
+ */
+enum pipe_statistics_query_index {
+   PIPE_STAT_QUERY_IA_VERTICES,
+   PIPE_STAT_QUERY_IA_PRIMITIVES,
+   PIPE_STAT_QUERY_VS_INVOCATIONS,
+   PIPE_STAT_QUERY_GS_INVOCATIONS,
+   PIPE_STAT_QUERY_GS_PRIMITIVES,
+   PIPE_STAT_QUERY_C_INVOCATIONS,
+   PIPE_STAT_QUERY_C_PRIMITIVES,
+   PIPE_STAT_QUERY_PS_INVOCATIONS,
+   PIPE_STAT_QUERY_HS_INVOCATIONS,
+   PIPE_STAT_QUERY_DS_INVOCATIONS,
+   PIPE_STAT_QUERY_CS_INVOCATIONS,
+};
+
+/**
  * Conditional rendering modes
  */
 enum pipe_render_cond_flag {
@@ -780,6 +807,7 @@
    PIPE_CAP_TGSI_CAN_READ_OUTPUTS,
    PIPE_CAP_NATIVE_FENCE_FD,
    PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY,
+   PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS,
    PIPE_CAP_TGSI_FS_FBFETCH,
    PIPE_CAP_TGSI_MUL_ZERO_WINS,
    PIPE_CAP_DOUBLES,
@@ -823,6 +851,12 @@
    PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS,
    PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET,
    PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET,
+   PIPE_CAP_SURFACE_SAMPLE_COUNT,
+   PIPE_CAP_TGSI_ATOMFADD,
+   PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE,
+   PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND,
+   PIPE_CAP_DEST_SURFACE_SRGB_CONTROL,
+   PIPE_CAP_MAX_VARYINGS,
 };
 
 /**
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_format.h mesa-19.0.1/src/gallium/include/pipe/p_format.h
--- mesa-18.3.3/src/gallium/include/pipe/p_format.h	2018-02-08 14:40:56.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_format.h	2019-03-31 23:16:37.000000000 +0000
@@ -396,6 +396,13 @@
    PIPE_FORMAT_X1B5G5R5_UNORM          = 310,
    PIPE_FORMAT_A4B4G4R4_UNORM          = 311,
 
+   PIPE_FORMAT_R8_SRGB                 = 312,
+
+   PIPE_FORMAT_A8L8_SINT               = 313,
+   PIPE_FORMAT_G8R8_SINT               = 314,
+   PIPE_FORMAT_A8B8G8R8_SINT           = 315,
+   PIPE_FORMAT_X8B8G8R8_SINT           = 316,
+
    PIPE_FORMAT_COUNT
 };
 
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_shader_tokens.h mesa-19.0.1/src/gallium/include/pipe/p_shader_tokens.h
--- mesa-18.3.3/src/gallium/include/pipe/p_shader_tokens.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_shader_tokens.h	2019-03-31 23:16:37.000000000 +0000
@@ -442,7 +442,7 @@
    TGSI_OPCODE_BGNSUB             = 100,
    TGSI_OPCODE_ENDLOOP            = 101,
    TGSI_OPCODE_ENDSUB             = 102,
-   /* gap */
+   TGSI_OPCODE_ATOMFADD           = 103,
    TGSI_OPCODE_TXQS               = 104,
    TGSI_OPCODE_RESQ               = 105,
    TGSI_OPCODE_READ_FIRST         = 106,
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_state.h mesa-19.0.1/src/gallium/include/pipe/p_state.h
--- mesa-18.3.3/src/gallium/include/pipe/p_state.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_state.h	2019-03-31 23:16:37.000000000 +0000
@@ -443,6 +443,13 @@
    uint16_t width;               /**< logical width in pixels */
    uint16_t height;              /**< logical height in pixels */
 
+   /**
+    * Number of samples for the surface.  This will be 0 if rendering
+    * should use the resource's nr_samples, or another value if the resource
+    * is bound using FramebufferTexture2DMultisampleEXT.
+    */
+   unsigned nr_samples:8;
+
    union pipe_surface_desc u;
 };
 
diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_video_enums.h mesa-19.0.1/src/gallium/include/pipe/p_video_enums.h
--- mesa-18.3.3/src/gallium/include/pipe/p_video_enums.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/include/pipe/p_video_enums.h	2019-03-31 23:16:37.000000000 +0000
@@ -70,7 +70,8 @@
    PIPE_VIDEO_PROFILE_HEVC_MAIN_444,
    PIPE_VIDEO_PROFILE_JPEG_BASELINE,
    PIPE_VIDEO_PROFILE_VP9_PROFILE0,
-   PIPE_VIDEO_PROFILE_VP9_PROFILE2
+   PIPE_VIDEO_PROFILE_VP9_PROFILE2,
+   PIPE_VIDEO_PROFILE_MAX
 };
 
 /* Video caps, can be different for each codec/profile */
diff -Nru mesa-18.3.3/src/gallium/Makefile.am mesa-19.0.1/src/gallium/Makefile.am
--- mesa-18.3.3/src/gallium/Makefile.am	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -56,12 +56,8 @@
 SUBDIRS += drivers/etnaviv winsys/etnaviv/drm
 endif
 
-if HAVE_GALLIUM_IMX
-SUBDIRS += drivers/imx winsys/imx/drm
-endif
-
-if HAVE_GALLIUM_PL111
-SUBDIRS += drivers/pl111 winsys/pl111/drm
+if HAVE_GALLIUM_KMSRO
+SUBDIRS += drivers/kmsro winsys/kmsro/drm
 endif
 
 ## swrast/softpipe
diff -Nru mesa-18.3.3/src/gallium/meson.build mesa-19.0.1/src/gallium/meson.build
--- mesa-18.3.3/src/gallium/meson.build	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -89,27 +89,22 @@
 else
   driver_vc4 = declare_dependency()
 endif
-if with_gallium_pl111
-  subdir('winsys/pl111/drm')
-else
-  driver_pl111 = declare_dependency()
-endif
-if with_gallium_v3d
-  subdir('winsys/v3d/drm')
-  subdir('drivers/v3d')
-else
-  driver_v3d = declare_dependency()
-endif
 if with_gallium_etnaviv
   subdir('winsys/etnaviv/drm')
   subdir('drivers/etnaviv')
 else
   driver_etnaviv = declare_dependency()
 endif
-if with_gallium_imx
-  subdir('winsys/imx/drm')
+if with_gallium_kmsro
+  subdir('winsys/kmsro/drm')
 else
-  driver_imx = declare_dependency()
+  driver_kmsro = declare_dependency()
+endif
+if with_gallium_v3d
+  subdir('winsys/v3d/drm')
+  subdir('drivers/v3d')
+else
+  driver_v3d = declare_dependency()
 endif
 if with_gallium_tegra
   subdir('winsys/tegra/drm')
diff -Nru mesa-18.3.3/src/gallium/state_trackers/clover/meson.build mesa-19.0.1/src/gallium/state_trackers/clover/meson.build
--- mesa-18.3.3/src/gallium/state_trackers/clover/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/clover/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -53,7 +53,7 @@
     '-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_pkgconfig_variable('libexecdir')),
     '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths(
       dep_llvm.get_configtool_variable('libdir'), 'clang',
-      dep_llvm.get_configtool_variable('version'), 'include',
+      dep_llvm.version(), 'include',
     )),
   ],
   dependencies : [dep_llvm, dep_elf],
diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri2.c mesa-19.0.1/src/gallium/state_trackers/dri/dri2.c
--- mesa-18.3.3/src/gallium/state_trackers/dri/dri2.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/dri/dri2.c	2019-03-31 23:16:37.000000000 +0000
@@ -66,305 +66,72 @@
    return (struct dri2_buffer *) driBufferPriv;
 }
 
-static const int fourcc_formats[] = {
-   __DRI_IMAGE_FOURCC_ARGB2101010,
-   __DRI_IMAGE_FOURCC_XRGB2101010,
-   __DRI_IMAGE_FOURCC_ABGR2101010,
-   __DRI_IMAGE_FOURCC_XBGR2101010,
-   __DRI_IMAGE_FOURCC_ARGB8888,
-   __DRI_IMAGE_FOURCC_ABGR8888,
-   __DRI_IMAGE_FOURCC_SARGB8888,
-   __DRI_IMAGE_FOURCC_XRGB8888,
-   __DRI_IMAGE_FOURCC_XBGR8888,
-   __DRI_IMAGE_FOURCC_ARGB1555,
-   __DRI_IMAGE_FOURCC_RGB565,
-   __DRI_IMAGE_FOURCC_R8,
-   __DRI_IMAGE_FOURCC_R16,
-   __DRI_IMAGE_FOURCC_GR88,
-   __DRI_IMAGE_FOURCC_GR1616,
-   __DRI_IMAGE_FOURCC_YUV410,
-   __DRI_IMAGE_FOURCC_YUV411,
-   __DRI_IMAGE_FOURCC_YUV420,
-   __DRI_IMAGE_FOURCC_YUV422,
-   __DRI_IMAGE_FOURCC_YUV444,
-   __DRI_IMAGE_FOURCC_YVU410,
-   __DRI_IMAGE_FOURCC_YVU411,
-   __DRI_IMAGE_FOURCC_YVU420,
-   __DRI_IMAGE_FOURCC_YVU422,
-   __DRI_IMAGE_FOURCC_YVU444,
-   __DRI_IMAGE_FOURCC_NV12,
-   __DRI_IMAGE_FOURCC_NV16,
-   __DRI_IMAGE_FOURCC_YUYV
-};
-
-static int convert_fourcc(int format, int *dri_components_p)
-{
+struct dri2_format_mapping {
+   int dri_fourcc;
+   int dri_format;
    int dri_components;
-   switch(format) {
-   case __DRI_IMAGE_FOURCC_ARGB1555:
-      format = __DRI_IMAGE_FORMAT_ARGB1555;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGBA;
-      break;
-   case __DRI_IMAGE_FOURCC_RGB565:
-      format = __DRI_IMAGE_FORMAT_RGB565;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGB;
-      break;
-   case __DRI_IMAGE_FOURCC_ARGB8888:
-      format = __DRI_IMAGE_FORMAT_ARGB8888;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGBA;
-      break;
-   case __DRI_IMAGE_FOURCC_XRGB8888:
-      format = __DRI_IMAGE_FORMAT_XRGB8888;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGB;
-      break;
-   case __DRI_IMAGE_FOURCC_ABGR8888:
-      format = __DRI_IMAGE_FORMAT_ABGR8888;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGBA;
-      break;
-   case __DRI_IMAGE_FOURCC_XBGR8888:
-      format = __DRI_IMAGE_FORMAT_XBGR8888;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGB;
-      break;
-   case __DRI_IMAGE_FOURCC_ARGB2101010:
-      format = __DRI_IMAGE_FORMAT_ARGB2101010;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGBA;
-      break;
-   case __DRI_IMAGE_FOURCC_XRGB2101010:
-      format = __DRI_IMAGE_FORMAT_XRGB2101010;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGB;
-      break;
-   case __DRI_IMAGE_FOURCC_ABGR2101010:
-      format = __DRI_IMAGE_FORMAT_ABGR2101010;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGBA;
-      break;
-   case __DRI_IMAGE_FOURCC_XBGR2101010:
-      format = __DRI_IMAGE_FORMAT_XBGR2101010;
-      dri_components = __DRI_IMAGE_COMPONENTS_RGB;
-      break;
-   case __DRI_IMAGE_FOURCC_R8:
-      format = __DRI_IMAGE_FORMAT_R8;
-      dri_components = __DRI_IMAGE_COMPONENTS_R;
-      break;
-   case __DRI_IMAGE_FOURCC_GR88:
-      format = __DRI_IMAGE_FORMAT_GR88;
-      dri_components = __DRI_IMAGE_COMPONENTS_RG;
-      break;
-   case __DRI_IMAGE_FOURCC_R16:
-      format = __DRI_IMAGE_FORMAT_R16;
-      dri_components = __DRI_IMAGE_COMPONENTS_R;
-      break;
-   case __DRI_IMAGE_FOURCC_GR1616:
-      format = __DRI_IMAGE_FORMAT_GR1616;
-      dri_components = __DRI_IMAGE_COMPONENTS_RG;
-      break;
-   case __DRI_IMAGE_FOURCC_YUYV:
-      format = __DRI_IMAGE_FORMAT_YUYV;
-      dri_components = __DRI_IMAGE_COMPONENTS_Y_XUXV;
-      break;
-   /*
-    * For multi-planar YUV formats, we return the format of the first
-    * plane only.  Since there is only one caller which supports multi-
-    * planar YUV it gets to figure out the remaining planes on it's
-    * own.
-    */
-   case __DRI_IMAGE_FOURCC_YUV420:
-   case __DRI_IMAGE_FOURCC_YVU420:
-      format = __DRI_IMAGE_FORMAT_R8;
-      dri_components = __DRI_IMAGE_COMPONENTS_Y_U_V;
-      break;
-   case __DRI_IMAGE_FOURCC_NV12:
-      format = __DRI_IMAGE_FORMAT_R8;
-      dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
-      break;
-   default:
-      return -1;
-   }
-   *dri_components_p = dri_components;
-   return format;
-}
-
-/* NOTE this probably isn't going to do the right thing for YUV images
- * (but I think the same can be said for intel_query_image()).  I think
- * only needed for exporting dmabuf's, so I think I won't loose much
- * sleep over it.
- */
-static int convert_to_fourcc(int format)
-{
-   switch(format) {
-   case __DRI_IMAGE_FORMAT_ARGB1555:
-      format = __DRI_IMAGE_FOURCC_ARGB1555;
-      break;
-   case __DRI_IMAGE_FORMAT_RGB565:
-      format = __DRI_IMAGE_FOURCC_RGB565;
-      break;
-   case __DRI_IMAGE_FORMAT_ARGB8888:
-      format = __DRI_IMAGE_FOURCC_ARGB8888;
-      break;
-   case __DRI_IMAGE_FORMAT_XRGB8888:
-      format = __DRI_IMAGE_FOURCC_XRGB8888;
-      break;
-   case __DRI_IMAGE_FORMAT_ABGR8888:
-      format = __DRI_IMAGE_FOURCC_ABGR8888;
-      break;
-   case __DRI_IMAGE_FORMAT_XBGR8888:
-      format = __DRI_IMAGE_FOURCC_XBGR8888;
-      break;
-   case __DRI_IMAGE_FORMAT_ARGB2101010:
-      format = __DRI_IMAGE_FOURCC_ARGB2101010;
-      break;
-   case __DRI_IMAGE_FORMAT_XRGB2101010:
-      format = __DRI_IMAGE_FOURCC_XRGB2101010;
-      break;
-   case __DRI_IMAGE_FORMAT_ABGR2101010:
-      format = __DRI_IMAGE_FOURCC_ABGR2101010;
-      break;
-   case __DRI_IMAGE_FORMAT_XBGR2101010:
-      format = __DRI_IMAGE_FOURCC_XBGR2101010;
-      break;
-   case __DRI_IMAGE_FORMAT_R8:
-      format = __DRI_IMAGE_FOURCC_R8;
-      break;
-   case __DRI_IMAGE_FORMAT_GR88:
-      format = __DRI_IMAGE_FOURCC_GR88;
-      break;
-   default:
-      return -1;
-   }
-   return format;
-}
+   enum pipe_format pipe_format;
+};
 
-static enum pipe_format dri2_format_to_pipe_format (int format)
-{
-   enum pipe_format pf;
+static const struct dri2_format_mapping dri2_format_table[] = {
+      { __DRI_IMAGE_FOURCC_ARGB2101010,   __DRI_IMAGE_FORMAT_ARGB2101010,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_B10G10R10A2_UNORM },
+      { __DRI_IMAGE_FOURCC_XRGB2101010,   __DRI_IMAGE_FORMAT_XRGB2101010,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_B10G10R10X2_UNORM },
+      { __DRI_IMAGE_FOURCC_ABGR2101010,   __DRI_IMAGE_FORMAT_ABGR2101010,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_R10G10B10A2_UNORM },
+      { __DRI_IMAGE_FOURCC_XBGR2101010,   __DRI_IMAGE_FORMAT_XBGR2101010,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_R10G10B10X2_UNORM },
+      { __DRI_IMAGE_FOURCC_ARGB8888,      __DRI_IMAGE_FORMAT_ARGB8888,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_BGRA8888_UNORM },
+      { __DRI_IMAGE_FOURCC_ABGR8888,      __DRI_IMAGE_FORMAT_ABGR8888,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_RGBA8888_UNORM },
+      { __DRI_IMAGE_FOURCC_SARGB8888,     __DRI_IMAGE_FORMAT_SARGB8,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_BGRA8888_SRGB },
+      { __DRI_IMAGE_FOURCC_XRGB8888,      __DRI_IMAGE_FORMAT_XRGB8888,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_BGRX8888_UNORM },
+      { __DRI_IMAGE_FOURCC_XBGR8888,      __DRI_IMAGE_FORMAT_XBGR8888,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_RGBX8888_UNORM },
+      { __DRI_IMAGE_FOURCC_ARGB1555,      __DRI_IMAGE_FORMAT_ARGB1555,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_B5G5R5A1_UNORM },
+      { __DRI_IMAGE_FOURCC_RGB565,        __DRI_IMAGE_FORMAT_RGB565,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_B5G6R5_UNORM },
+      { __DRI_IMAGE_FOURCC_R8,            __DRI_IMAGE_FORMAT_R8,
+        __DRI_IMAGE_COMPONENTS_R,         PIPE_FORMAT_R8_UNORM },
+      { __DRI_IMAGE_FOURCC_R16,           __DRI_IMAGE_FORMAT_R16,
+        __DRI_IMAGE_COMPONENTS_R,         PIPE_FORMAT_R16_UNORM },
+      { __DRI_IMAGE_FOURCC_GR88,          __DRI_IMAGE_FORMAT_GR88,
+        __DRI_IMAGE_COMPONENTS_RG,        PIPE_FORMAT_RG88_UNORM },
+      { __DRI_IMAGE_FOURCC_GR1616,        __DRI_IMAGE_FORMAT_GR88,
+        __DRI_IMAGE_COMPONENTS_RG,        PIPE_FORMAT_RG1616_UNORM },
+      { __DRI_IMAGE_FOURCC_YUV420,        __DRI_IMAGE_FORMAT_NONE,
+        __DRI_IMAGE_COMPONENTS_Y_U_V,     PIPE_FORMAT_IYUV },
+      { __DRI_IMAGE_FOURCC_YVU420,        __DRI_IMAGE_FORMAT_NONE,
+        __DRI_IMAGE_COMPONENTS_Y_U_V,     PIPE_FORMAT_YV12 },
+      { __DRI_IMAGE_FOURCC_NV12,          __DRI_IMAGE_FORMAT_NONE,
+        __DRI_IMAGE_COMPONENTS_Y_UV,      PIPE_FORMAT_NV12 },
+      { __DRI_IMAGE_FOURCC_YUYV,          __DRI_IMAGE_FORMAT_YUYV,
+        __DRI_IMAGE_COMPONENTS_Y_XUXV,    PIPE_FORMAT_YUYV },
+};
 
-   switch (format) {
-   case __DRI_IMAGE_FORMAT_ARGB1555:
-      pf = PIPE_FORMAT_B5G5R5A1_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_RGB565:
-      pf = PIPE_FORMAT_B5G6R5_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_BGRX8888_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_BGRA8888_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_XBGR8888:
-      pf = PIPE_FORMAT_RGBX8888_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_RGBA8888_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_XRGB2101010:
-      pf = PIPE_FORMAT_B10G10R10X2_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_ARGB2101010:
-      pf = PIPE_FORMAT_B10G10R10A2_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_XBGR2101010:
-      pf = PIPE_FORMAT_R10G10B10X2_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_ABGR2101010:
-      pf = PIPE_FORMAT_R10G10B10A2_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_R8:
-      pf = PIPE_FORMAT_R8_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_GR88:
-      pf = PIPE_FORMAT_RG88_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_R16:
-      pf = PIPE_FORMAT_R16_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_GR1616:
-      pf = PIPE_FORMAT_R16G16_UNORM;
-      break;
-   case __DRI_IMAGE_FORMAT_YUYV:
-      pf = PIPE_FORMAT_YUYV;
-      break;
-   default:
-      pf = PIPE_FORMAT_NONE;
-      break;
+static const struct dri2_format_mapping *
+dri2_get_mapping_by_fourcc(int fourcc) {
+   for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) {
+      if (dri2_format_table[i].dri_fourcc == fourcc)
+               return &dri2_format_table[i];
    }
 
-   return pf;
+   return NULL;
 }
 
-static enum pipe_format fourcc_to_pipe_format(int fourcc)
-{
-   enum pipe_format pf;
-
-   switch (fourcc) {
-   case __DRI_IMAGE_FOURCC_R8:
-      pf = PIPE_FORMAT_R8_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_GR88:
-      pf = PIPE_FORMAT_RG88_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_ARGB1555:
-      pf = PIPE_FORMAT_B5G5R5A1_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_R16:
-      pf = PIPE_FORMAT_R16_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_GR1616:
-      pf = PIPE_FORMAT_RG1616_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_RGB565:
-      pf = PIPE_FORMAT_B5G6R5_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_ARGB8888:
-      pf = PIPE_FORMAT_BGRA8888_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_XRGB8888:
-      pf = PIPE_FORMAT_BGRX8888_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_ABGR8888:
-      pf = PIPE_FORMAT_RGBA8888_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_XBGR8888:
-      pf = PIPE_FORMAT_RGBX8888_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_ARGB2101010:
-      pf = PIPE_FORMAT_B10G10R10A2_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_XRGB2101010:
-      pf = PIPE_FORMAT_B10G10R10X2_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_ABGR2101010:
-      pf = PIPE_FORMAT_R10G10B10A2_UNORM;
-      break;
-   case __DRI_IMAGE_FOURCC_XBGR2101010:
-      pf = PIPE_FORMAT_R10G10B10X2_UNORM;
-      break;
-
-   case __DRI_IMAGE_FOURCC_NV12:
-      pf = PIPE_FORMAT_NV12;
-      break;
-   case __DRI_IMAGE_FOURCC_YUYV:
-      pf = PIPE_FORMAT_YUYV;
-      break;
-   case __DRI_IMAGE_FOURCC_YUV420:
-   case __DRI_IMAGE_FOURCC_YVU420:
-      pf = PIPE_FORMAT_YV12;
-      break;
-
-   case __DRI_IMAGE_FOURCC_SARGB8888:
-   case __DRI_IMAGE_FOURCC_YUV410:
-   case __DRI_IMAGE_FOURCC_YUV411:
-   case __DRI_IMAGE_FOURCC_YUV422:
-   case __DRI_IMAGE_FOURCC_YUV444:
-   case __DRI_IMAGE_FOURCC_NV16:
-   case __DRI_IMAGE_FOURCC_YVU410:
-   case __DRI_IMAGE_FOURCC_YVU411:
-   case __DRI_IMAGE_FOURCC_YVU422:
-   case __DRI_IMAGE_FOURCC_YVU444:
-   default:
-      pf = PIPE_FORMAT_NONE;
+static const struct dri2_format_mapping *
+dri2_get_mapping_by_format(int format) {
+   for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) {
+      if (dri2_format_table[i].dri_format == format)
+               return &dri2_format_table[i];
    }
 
-   return pf;
+   return NULL;
 }
 
 /**
@@ -1011,7 +778,7 @@
 
 static __DRIimage *
 dri2_create_image_from_winsys(__DRIscreen *_screen,
-                              int width, int height, int format,
+                              int width, int height, enum pipe_format pf,
                               int num_handles, struct winsys_handle *whandle,
                               void *loaderPrivate)
 {
@@ -1019,14 +786,28 @@
    struct pipe_screen *pscreen = screen->base.screen;
    __DRIimage *img;
    struct pipe_resource templ;
-   unsigned tex_usage;
-   enum pipe_format pf;
+   unsigned tex_usage = 0;
    int i;
 
-   tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+   if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0,
+                                    PIPE_BIND_RENDER_TARGET))
+      tex_usage |= PIPE_BIND_RENDER_TARGET;
+   if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0,
+                                    PIPE_BIND_SAMPLER_VIEW))
+      tex_usage |= PIPE_BIND_SAMPLER_VIEW;
+
+   if (!tex_usage && util_format_is_yuv(pf)) {
+      /* YUV format sampling can be emulated by the Mesa state tracker by
+       * using multiple R8/RG88 samplers. So try to rewrite the pipe format.
+       */
+      pf = PIPE_FORMAT_R8_UNORM;
+
+      if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0,
+                                       PIPE_BIND_SAMPLER_VIEW))
+         tex_usage |= PIPE_BIND_SAMPLER_VIEW;
+   }
 
-   pf = dri2_format_to_pipe_format (format);
-   if (pf == PIPE_FORMAT_NONE)
+   if (!tex_usage)
       return NULL;
 
    img = CALLOC_STRUCT(__DRIimageRec);
@@ -1080,7 +861,6 @@
 
    img->level = 0;
    img->layer = 0;
-   img->dri_format = format;
    img->use = 0;
    img->loader_private = loaderPrivate;
 
@@ -1092,22 +872,31 @@
                             int width, int height, int format,
                             int name, int pitch, void *loaderPrivate)
 {
+   const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format);
    struct winsys_handle whandle;
-   enum pipe_format pf;
+   __DRIimage *img;
+
+   if (!map)
+      return NULL;
 
    memset(&whandle, 0, sizeof(whandle));
    whandle.type = WINSYS_HANDLE_TYPE_SHARED;
    whandle.handle = name;
    whandle.modifier = DRM_FORMAT_MOD_INVALID;
 
-   pf = dri2_format_to_pipe_format (format);
-   if (pf == PIPE_FORMAT_NONE)
+   whandle.stride = pitch * util_format_get_blocksize(map->pipe_format);
+
+   img = dri2_create_image_from_winsys(_screen, width, height, map->pipe_format,
+                                       1, &whandle, loaderPrivate);
+
+   if (!img)
       return NULL;
 
-   whandle.stride = pitch * util_format_get_blocksize(pf);
+   img->dri_components = map->dri_components;
+   img->dri_fourcc = map->dri_fourcc;
+   img->dri_format = map->dri_format;
 
-   return dri2_create_image_from_winsys(_screen, width, height, format,
-                                        1, &whandle, loaderPrivate);
+   return img;
 }
 
 static __DRIimage *
@@ -1115,14 +904,19 @@
                           int width, int height, int fourcc,
                           uint64_t modifier, int *fds, int num_fds,
                           int *strides, int *offsets, unsigned *error,
-                          int *dri_components, void *loaderPrivate)
+                          void *loaderPrivate)
 {
    struct winsys_handle whandles[3];
-   int format;
+   const struct dri2_format_mapping *map = dri2_get_mapping_by_fourcc(fourcc);
    __DRIimage *img = NULL;
    unsigned err = __DRI_IMAGE_ERROR_SUCCESS;
    int expected_num_fds, i;
 
+   if (!map) {
+      err = __DRI_IMAGE_ERROR_BAD_MATCH;
+      goto exit;
+   }
+
    switch (fourcc) {
    case __DRI_IMAGE_FOURCC_YUV420:
    case __DRI_IMAGE_FOURCC_YVU420:
@@ -1141,12 +935,6 @@
       goto exit;
    }
 
-   format = convert_fourcc(fourcc, dri_components);
-   if (format == -1) {
-      err = __DRI_IMAGE_ERROR_BAD_MATCH;
-      goto exit;
-   }
-
    memset(whandles, 0, sizeof(whandles));
 
    for (i = 0; i < num_fds; i++) {
@@ -1168,12 +956,19 @@
       whandles[1] = whandles[2];
       whandles[2] = tmp;
       fourcc = __DRI_IMAGE_FOURCC_YUV420;
+      map = dri2_get_mapping_by_fourcc(fourcc);
    }
 
-   img = dri2_create_image_from_winsys(_screen, width, height, format,
+   img = dri2_create_image_from_winsys(_screen, width, height, map->pipe_format,
                                        num_fds, whandles, loaderPrivate);
-   if(img == NULL)
+   if(img == NULL) {
       err = __DRI_IMAGE_ERROR_BAD_ALLOC;
+      goto exit;
+   }
+
+   img->dri_components = map->dri_components;
+   img->dri_fourcc = fourcc;
+   img->dri_format = map->dri_format;
 
 exit:
    if (error)
@@ -1190,16 +985,14 @@
                          const unsigned count,
                          void *loaderPrivate)
 {
+   const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format);
    struct dri_screen *screen = dri_screen(_screen);
    __DRIimage *img;
    struct pipe_resource templ;
    unsigned tex_usage;
-   enum pipe_format pf;
 
-   /* createImageWithModifiers doesn't supply usage, and we should not get
-    * here with both modifiers and a usage flag.
-    */
-   assert(!(use && (modifiers != NULL)));
+   if (!map)
+      return NULL;
 
    tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
 
@@ -1215,17 +1008,13 @@
       tex_usage |= PIPE_BIND_CURSOR;
    }
 
-   pf = dri2_format_to_pipe_format (format);
-   if (pf == PIPE_FORMAT_NONE)
-      return NULL;
-
    img = CALLOC_STRUCT(__DRIimageRec);
    if (!img)
       return NULL;
 
    memset(&templ, 0, sizeof(templ));
    templ.bind = tex_usage;
-   templ.format = pf;
+   templ.format = map->pipe_format;
    templ.target = PIPE_TEXTURE_2D;
    templ.last_level = 0;
    templ.width0 = width;
@@ -1251,6 +1040,7 @@
    img->level = 0;
    img->layer = 0;
    img->dri_format = format;
+   img->dri_fourcc = map->dri_fourcc;
    img->dri_components = 0;
    img->use = use;
 
@@ -1276,7 +1066,7 @@
                                  void *loaderPrivate)
 {
    return dri2_create_image_common(dri_screen, width, height, format,
-                                   0 /* use */, modifiers, count,
+                                   __DRI_IMAGE_USE_SHARE, modifiers, count,
                                    loaderPrivate);
 }
 
@@ -1345,8 +1135,18 @@
       *value = image->dri_components;
       return GL_TRUE;
    case __DRI_IMAGE_ATTRIB_FOURCC:
-      *value = convert_to_fourcc(image->dri_format);
-      return *value != -1;
+      if (image->dri_fourcc) {
+         *value = image->dri_fourcc;
+      } else {
+         const struct dri2_format_mapping *map;
+
+         map = dri2_get_mapping_by_format(image->dri_format);
+         if (!map)
+            return GL_FALSE;
+
+         *value = map->dri_fourcc;
+      }
+      return GL_TRUE;
    case __DRI_IMAGE_ATTRIB_NUM_PLANES:
       *value = 1;
       return GL_TRUE;
@@ -1429,15 +1229,14 @@
                 int *names, int num_names, int *strides, int *offsets,
                 void *loaderPrivate)
 {
+   const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format);
    __DRIimage *img;
-   int dri_components;
    struct winsys_handle whandle;
 
-   if (num_names != 1)
+   if (!map)
       return NULL;
 
-   format = convert_fourcc(format, &dri_components);
-   if (format == -1)
+   if (num_names != 1)
       return NULL;
 
    memset(&whandle, 0, sizeof(whandle));
@@ -1447,12 +1246,15 @@
    whandle.offset = offsets[0];
    whandle.modifier = DRM_FORMAT_MOD_INVALID;
 
-   img = dri2_create_image_from_winsys(screen, width, height, format,
+   img = dri2_create_image_from_winsys(screen, width, height, map->pipe_format,
                                        1, &whandle, loaderPrivate);
    if (img == NULL)
       return NULL;
 
-   img->dri_components = dri_components;
+   img->dri_components = map->dri_components;
+   img->dri_fourcc = map->dri_fourcc;
+   img->dri_format = map->pipe_format;
+
    return img;
 }
 
@@ -1485,18 +1287,9 @@
               int *fds, int num_fds, int *strides, int *offsets,
               void *loaderPrivate)
 {
-   __DRIimage *img;
-   int dri_components;
-
-   img = dri2_create_image_from_fd(screen, width, height, fourcc,
+   return dri2_create_image_from_fd(screen, width, height, fourcc,
                                    DRM_FORMAT_MOD_INVALID, fds, num_fds,
-                                   strides, offsets, NULL,
-                                   &dri_components, loaderPrivate);
-   if (img == NULL)
-      return NULL;
-
-   img->dri_components = dri_components;
-   return img;
+                                   strides, offsets, NULL, loaderPrivate);
 }
 
 static boolean
@@ -1505,24 +1298,26 @@
 {
    struct dri_screen *screen = dri_screen(_screen);
    struct pipe_screen *pscreen = screen->base.screen;
-   const unsigned bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
    int i, j;
 
-   for (i = 0, j = 0; (i < ARRAY_SIZE(fourcc_formats)) &&
+   for (i = 0, j = 0; (i < ARRAY_SIZE(dri2_format_table)) &&
          (j < max || max == 0); i++) {
+      const struct dri2_format_mapping *map = &dri2_format_table[i];
+
       /* The sRGB format is not a real FourCC as defined by drm_fourcc.h, so we
        * must not leak it out to clients.
        */
-      if (fourcc_formats[i] == __DRI_IMAGE_FOURCC_SARGB8888)
+      if (dri2_format_table[i].dri_fourcc == __DRI_IMAGE_FOURCC_SARGB8888)
          continue;
 
-      if (pscreen->is_format_supported(pscreen,
-                                       fourcc_to_pipe_format(
-                                          fourcc_formats[i]),
-                                       screen->target,
-                                       0, 0, bind)) {
+      if (pscreen->is_format_supported(pscreen, map->pipe_format,
+                                       screen->target, 0, 0,
+                                       PIPE_BIND_RENDER_TARGET) ||
+          pscreen->is_format_supported(pscreen, map->pipe_format,
+                                       screen->target, 0, 0,
+                                       PIPE_BIND_SAMPLER_VIEW)) {
          if (j < max)
-            formats[j] = fourcc_formats[i];
+            formats[j] = map->dri_fourcc;
          j++;
       }
    }
@@ -1537,12 +1332,19 @@
 {
    struct dri_screen *screen = dri_screen(_screen);
    struct pipe_screen *pscreen = screen->base.screen;
-   enum pipe_format format = fourcc_to_pipe_format(fourcc);
-   const unsigned usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+   const struct dri2_format_mapping *map = dri2_get_mapping_by_fourcc(fourcc);
+   enum pipe_format format;
+
+   if (!map)
+      return false;
+
+   format = map->pipe_format;
 
    if (pscreen->query_dmabuf_modifiers != NULL &&
-       pscreen->is_format_supported(pscreen, format, screen->target, 0, 0,
-                                    usage)) {
+       (pscreen->is_format_supported(pscreen, format, screen->target, 0, 0,
+                                     PIPE_BIND_RENDER_TARGET) ||
+        pscreen->is_format_supported(pscreen, format, screen->target, 0, 0,
+                                     PIPE_BIND_SAMPLER_VIEW))) {
       pscreen->query_dmabuf_modifiers(pscreen, format, max, modifiers,
                                       external_only, count);
       return true;
@@ -1563,12 +1365,10 @@
                    void *loaderPrivate)
 {
    __DRIimage *img;
-   int dri_components;
 
    img = dri2_create_image_from_fd(screen, width, height, fourcc,
                                    DRM_FORMAT_MOD_INVALID, fds, num_fds,
-                                   strides, offsets, error,
-                                   &dri_components, loaderPrivate);
+                                   strides, offsets, error, loaderPrivate);
    if (img == NULL)
       return NULL;
 
@@ -1576,7 +1376,6 @@
    img->sample_range = sample_range;
    img->horizontal_siting = horizontal_siting;
    img->vertical_siting = vertical_siting;
-   img->dri_components = dri_components;
 
    *error = __DRI_IMAGE_ERROR_SUCCESS;
    return img;
@@ -1595,11 +1394,10 @@
                     void *loaderPrivate)
 {
    __DRIimage *img;
-   int dri_components;
 
    img = dri2_create_image_from_fd(screen, width, height, fourcc,
                                    modifier, fds, num_fds, strides, offsets,
-                                   error, &dri_components, loaderPrivate);
+                                   error, loaderPrivate);
    if (img == NULL)
       return NULL;
 
@@ -1607,7 +1405,6 @@
    img->sample_range = sample_range;
    img->horizontal_siting = horizontal_siting;
    img->vertical_siting = vertical_siting;
-   img->dri_components = dri_components;
 
    *error = __DRI_IMAGE_ERROR_SUCCESS;
    return img;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri_drawable.c mesa-19.0.1/src/gallium/state_trackers/dri/dri_drawable.c
--- mesa-18.3.3/src/gallium/state_trackers/dri/dri_drawable.c	2018-02-23 13:07:51.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/dri/dri_drawable.c	2019-03-31 23:16:37.000000000 +0000
@@ -524,13 +524,6 @@
 
       dri_postprocessing(ctx, drawable, ST_ATTACHMENT_BACK_LEFT);
 
-      if (ctx->hud) {
-         hud_run(ctx->hud, ctx->st->cso_context,
-                 drawable->textures[ST_ATTACHMENT_BACK_LEFT]);
-      }
-
-      pipe->flush_resource(pipe, drawable->textures[ST_ATTACHMENT_BACK_LEFT]);
-
       if (pipe->invalidate_resource &&
           (flags & __DRI2_FLUSH_INVALIDATE_ANCILLARY)) {
          if (drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
@@ -538,6 +531,13 @@
          if (drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL])
             pipe->invalidate_resource(pipe, drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL]);
       }
+
+      if (ctx->hud) {
+         hud_run(ctx->hud, ctx->st->cso_context,
+                 drawable->textures[ST_ATTACHMENT_BACK_LEFT]);
+      }
+
+      pipe->flush_resource(pipe, drawable->textures[ST_ATTACHMENT_BACK_LEFT]);
    }
 
    flush_flags = 0;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri_screen.h mesa-19.0.1/src/gallium/state_trackers/dri/dri_screen.h
--- mesa-18.3.3/src/gallium/state_trackers/dri/dri_screen.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/dri/dri_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -103,6 +103,7 @@
    unsigned level;
    unsigned layer;
    uint32_t dri_format;
+   uint32_t dri_fourcc;
    uint32_t dri_components;
    unsigned use;
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.c mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,7 +28,7 @@
 #include "cubetexture9.h"
 #include "volumetexture9.h"
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #include "nine_pipe.h"
 #include "nine_dump.h"
 #endif
@@ -605,7 +605,7 @@
     BASETEX_REGISTER_UPDATE(This);
 }
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 void
 NineBaseTexture9_Dump( struct NineBaseTexture9 *This )
 {
@@ -620,4 +620,4 @@
         This->base.info.array_size, This->base.info.last_level,
         This->managed.lod, This->managed.lod_resident);
 }
-#endif /* DEBUG */
+#endif /* DEBUG || !NDEBUG */
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.h mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.h	2019-03-31 23:16:37.000000000 +0000
@@ -150,7 +150,7 @@
     nine_bind(slot, tex);
 }
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 void
 NineBaseTexture9_Dump( struct NineBaseTexture9 *This );
 #else
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -93,18 +93,18 @@
             for (func += 4; func != f; ++func) { *ptr++ = tolower(*func); }
             *ptr = '\0';
             if (tid)
-                debug_printf("nine:0x%08lx:%s:%s: ", tid, klass, ++f);
+                _debug_printf("nine:0x%08lx:%s:%s: ", tid, klass, ++f);
             else
-                debug_printf("nine:%s:%s: ", klass, ++f);
+                _debug_printf("nine:%s:%s: ", klass, ++f);
         } else if (func) {
             if (tid)
-                debug_printf("nine:0x%08lx:%s ", tid, func);
+                _debug_printf("nine:0x%08lx:%s ", tid, func);
             else
-                debug_printf("nine:%s ", func);
+                _debug_printf("nine:%s ", func);
         }
 
         va_start(ap, fmt);
-        debug_vprintf(fmt, ap);
+        _debug_vprintf(fmt, ap);
         va_end(ap);
     }
 }
@@ -116,5 +116,5 @@
 {
     const char *r = strrchr(file, '/');
     if (r == NULL) { r = strrchr(file, '\\'); }
-    debug_printf("nine:%s:%d: %s STUB!\n", r ? ++r : file, line, func);
+    _debug_printf("nine:%s:%d: %s STUB!\n", r ? ++r : file, line, func);
 }
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.h	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 
 #define ERR(fmt, ...) _nine_debug_printf(DBG_ERROR, __FUNCTION__, fmt, ## __VA_ARGS__)
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #define WARN(fmt, ...) _nine_debug_printf(DBG_WARN, __FUNCTION__, fmt, ## __VA_ARGS__)
 #define WARN_ONCE(fmt, ...) \
     do { \
@@ -48,7 +48,7 @@
 #define WARN_ONCE(fmt, ...)
 #endif
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #define DBG_FLAG(flag, fmt, ...) \
     _nine_debug_printf(flag, __FUNCTION__, fmt, ## __VA_ARGS__)
 #else
@@ -90,7 +90,7 @@
             const char *func,
             unsigned line );
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #define STUB(ret) \
     do { \
         _nine_stub(__FILE__, __FUNCTION__, __LINE__); \
@@ -104,7 +104,7 @@
  * macro is designed to be used in conditionals ala
  * if (user_error(required condition)) { assertion failed }
  * It also prints debug message if the assertion fails. */
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #define user_error(x) \
     (!(x) ? (DBG_FLAG(DBG_USER, "User assertion failed: `%s'\n", #x), TRUE) \
           : FALSE)
@@ -112,7 +112,7 @@
 #define user_error(x) (!(x) ? TRUE : FALSE)
 #endif
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 #define user_warn(x) \
     if ((x)) { DBG_FLAG(DBG_USER, "User warning: `%s'\n", #x); }
 #else
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.c	2019-03-31 23:16:37.000000000 +0000
@@ -8,7 +8,7 @@
 
 #include "nine_dump.h"
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 
 static char __thread tls[128];
 
@@ -810,4 +810,4 @@
     FREE(s);
 }
 
-#endif /* DEBUG */
+#endif /* DEBUG || !NDEBUG */
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.h	2019-03-31 23:16:37.000000000 +0000
@@ -16,7 +16,7 @@
 const char *nine_D3DLOCK_to_str(DWORD);
 const char *nine_D3DSAMP_to_str(DWORD);
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 
 void
 nine_dump_D3DADAPTER_IDENTIFIER9(unsigned, const D3DADAPTER_IDENTIFIER9 *);
@@ -29,7 +29,7 @@
 void
 nine_dump_D3DTSS_value(unsigned, D3DTEXTURESTAGESTATETYPE, DWORD);
 
-#else /* !DEBUG */
+#else /* !DEBUG && NDEBUG */
 
 static inline void
 nine_dump_D3DADAPTER_IDENTIFIER9(unsigned ch, const D3DADAPTER_IDENTIFIER9 *id)
@@ -47,6 +47,6 @@
 nine_dump_D3DTSS_value(unsigned ch, D3DTEXTURESTAGESTATETYPE tss, DWORD value)
 { }
 
-#endif /* DEBUG */
+#endif /* DEBUG || !NDEBUG */
 
 #endif /* _NINE_DUMP_H_H_ */
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_ff.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_ff.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_ff.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_ff.c	2019-03-31 23:16:37.000000000 +0000
@@ -2138,7 +2138,7 @@
 {
     struct nine_context *context = &device->context;
 
-    if (device->ff.num_vs > 100) {
+    if (device->ff.num_vs > 1024) {
         /* could destroy the bound one here, so unbind */
         context->pipe->bind_vs_state(context->pipe, NULL);
         util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
@@ -2152,7 +2152,7 @@
 {
     struct nine_context *context = &device->context;
 
-    if (device->ff.num_ps > 100) {
+    if (device->ff.num_ps > 1024) {
         /* could destroy the bound one here, so unbind */
         context->pipe->bind_fs_state(context->pipe, NULL);
         util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
@@ -2491,7 +2491,7 @@
     for (k = 0; k < 4; k++)
         D->m[i][k] *= det;
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
     {
         D3DMATRIX I;
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_pipe.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_pipe.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_pipe.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_pipe.h	2019-03-31 23:16:37.000000000 +0000
@@ -377,6 +377,10 @@
     if (levels)
         *levels = 1;
 
+    /* Ignores multisamplequality */
+    if (*multisample == D3DMULTISAMPLE_NONE)
+        return D3D_OK;
+
     if (*multisample == D3DMULTISAMPLE_NONMASKABLE) {
         if (depth_stencil_format(format))
             bind = d3d9_get_pipe_depth_format_bindings(format);
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_queue.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_queue.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_queue.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_queue.c	2019-03-31 23:16:37.000000000 +0000
@@ -265,8 +265,12 @@
 nine_queue_delete(struct nine_queue_pool *ctx)
 {
     unsigned i;
+
     mtx_destroy(&ctx->mutex_pop);
+    cnd_destroy(&ctx->event_pop);
+
     mtx_destroy(&ctx->mutex_push);
+    cnd_destroy(&ctx->event_push);
 
     for (i = 0; i < NINE_CMD_BUFS; i++)
         FREE(ctx->pool[i].mem_pool);
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_state.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_state.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/nine_state.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -157,7 +157,7 @@
     (void) mtx_init(&ctx->thread_running, mtx_plain);
     (void) mtx_init(&ctx->thread_resume, mtx_plain);
 
-#if DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
     u_thread_setname("Main thread");
 #endif
 
@@ -234,7 +234,12 @@
 
     nine_csmt_wait_processed(ctx);
     nine_queue_delete(ctx->pool);
+
+    mtx_destroy(&ctx->thread_resume);
+    mtx_destroy(&ctx->thread_running);
+
     mtx_destroy(&ctx->mutex_processed);
+    cnd_destroy(&ctx->event_processed);
 
     FREE(ctx);
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/surface9.c mesa-19.0.1/src/gallium/state_trackers/nine/surface9.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/surface9.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/surface9.c	2019-03-31 23:16:37.000000000 +0000
@@ -272,7 +272,7 @@
     assert(This->surface[1]);
 }
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 void
 NineSurface9_Dump( struct NineSurface9 *This )
 {
@@ -300,7 +300,7 @@
         NineUnknown_Release(NineUnknown(tex));
     }
 }
-#endif /* DEBUG */
+#endif /* DEBUG || !NDEBUG */
 
 HRESULT NINE_WINAPI
 NineSurface9_GetContainer( struct NineSurface9 *This,
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/surface9.h mesa-19.0.1/src/gallium/state_trackers/nine/surface9.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/surface9.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/surface9.h	2019-03-31 23:16:37.000000000 +0000
@@ -139,7 +139,7 @@
     return This->base.usage == 0 && !This->texture;
 }
 
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
 void
 NineSurface9_Dump( struct NineSurface9 *This );
 #else
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.c mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 #include "nine_pipe.h"
 #include "nine_dump.h"
 
+#include "util/u_atomic.h"
 #include "util/u_inlines.h"
 #include "util/u_surface.h"
 #include "hud/hud_context.h"
@@ -50,6 +51,7 @@
                      D3DDISPLAYMODEEX *mode )
 {
     HRESULT hr;
+    int i;
 
     DBG("This=%p pDevice=%p pPresent=%p pCTX=%p hFocusWindow=%p\n",
         This, pParams->device, pPresent, pCTX, hFocusWindow);
@@ -65,8 +67,7 @@
     This->mode = NULL;
 
     ID3DPresent_AddRef(pPresent);
-    if (!This->actx->thread_submit &&
-        This->base.device->minor_version_num > 2) {
+    if (This->base.device->minor_version_num > 2) {
         D3DPRESENT_PARAMETERS2 params2;
 
         memset(&params2, 0, sizeof(D3DPRESENT_PARAMETERS2));
@@ -80,6 +81,11 @@
 
     This->rendering_done = FALSE;
     This->pool = NULL;
+    for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+        This->pending_presentation[i] = calloc(1, sizeof(BOOL));
+        if (!This->pending_presentation[i])
+            return E_OUTOFMEMORY;
+    }
     return NineSwapChain9_Resize(This, pPresentationParameters, mode);
 }
 
@@ -122,6 +128,40 @@
     return ret;
 }
 
+static void
+D3DWindowBuffer_release(struct NineSwapChain9 *This,
+                        D3DWindowBuffer *present_handle)
+{
+    int i;
+    /* Add it to the 'pending release' list */
+    for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+        if (!This->present_handles_pending_release[i]) {
+            This->present_handles_pending_release[i] = present_handle;
+            break;
+        }
+    }
+    if (i == (D3DPRESENT_BACK_BUFFERS_MAX_EX + 1)) {
+        ERR("Server not releasing buffers...\n");
+        assert(false);
+    }
+
+    /* Destroy elements of the list released by the server */
+    for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+        if (This->present_handles_pending_release[i] &&
+            ID3DPresent_IsBufferReleased(This->present, This->present_handles_pending_release[i])) {
+            /* WaitBufferReleased also waits the presentation feedback
+             * (which should arrive at about the same time),
+             * while IsBufferReleased doesn't. DestroyD3DWindowBuffer unfortunately
+             * checks it to release immediately all data, else the release
+             * is postponed for This->present release. To avoid leaks (we may handle
+             * a lot of resize), call WaitBufferReleased. */
+            ID3DPresent_WaitBufferReleased(This->present, This->present_handles_pending_release[i]);
+            ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles_pending_release[i]);
+            This->present_handles_pending_release[i] = NULL;
+        }
+    }
+}
+
 static int
 NineSwapChain9_GetBackBufferCountForParams( struct NineSwapChain9 *This,
                                             D3DPRESENT_PARAMETERS *pParams );
@@ -285,7 +325,7 @@
         This->enable_threadpool = FALSE;
 
     for (i = 0; i < oldBufferCount; i++) {
-        ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles[i]);
+        D3DWindowBuffer_release(This, This->present_handles[i]);
         This->present_handles[i] = NULL;
         if (This->present_buffers[i])
             pipe_resource_reference(&(This->present_buffers[i]), NULL);
@@ -508,6 +548,16 @@
     if (This->pool)
         _mesa_threadpool_destroy(This, This->pool);
 
+    for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+        if (This->pending_presentation[i])
+            FREE(This->pending_presentation[i]);
+    }
+
+    for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+        if (This->present_handles_pending_release[i])
+            ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles_pending_release[i]);
+    }
+
     for (i = 0; i < This->num_back_buffers; i++) {
         if (This->buffers[i])
             NineUnknown_Detach(NineUnknown(This->buffers[i]));
@@ -619,6 +669,7 @@
     struct pipe_fence_handle *fence_to_wait;
     ID3DPresent *present;
     D3DWindowBuffer *present_handle;
+    BOOL *pending_presentation;
     HWND hDestWindowOverride;
 };
 
@@ -630,6 +681,7 @@
         work->screen->fence_reference(work->screen, &(work->fence_to_wait), NULL);
     }
     ID3DPresent_PresentBuffer(work->present, work->present_handle, work->hDestWindowOverride, NULL, NULL, NULL, 0);
+    p_atomic_set(work->pending_presentation, FALSE);
     free(work);
 }
 
@@ -643,6 +695,8 @@
     work->present = This->present;
     work->present_handle = This->present_handles[0];
     work->hDestWindowOverride = hDestWindowOverride;
+    work->pending_presentation = This->pending_presentation[0];
+    p_atomic_set(work->pending_presentation, TRUE);
     This->tasks[0] = _mesa_threadpool_queue_task(This->pool, work_present, work);
 
     return;
@@ -661,7 +715,7 @@
     struct pipe_fence_handle *fence;
     HRESULT hr;
     struct pipe_blit_info blit;
-    int target_width, target_height, target_depth;
+    int target_width, target_height, target_depth, i;
 
     DBG("present: This=%p pSourceRect=%p pDestRect=%p "
         "pDirtyRegion=%p hDestWindowOverride=%p"
@@ -696,9 +750,51 @@
     if (This->params.SwapEffect == D3DSWAPEFFECT_DISCARD)
         handle_draw_cursor_and_hud(This, resource);
 
-    ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth);
+    hr = ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth);
     (void)target_depth;
 
+    /* Can happen with old Wine (presentation can still succeed),
+     * or at window destruction. */
+    if (FAILED(hr) || target_width == 0 || target_height == 0) {
+        target_width = resource->width0;
+        target_height = resource->height0;
+    }
+
+    /* Switch to using presentation buffers on window resize.
+     * Note: Most apps should resize the d3d back buffers when
+     * a window resize is detected, which will result in a call to
+     * NineSwapChain9_Resize. Thus everything will get released,
+     * and it will switch back to not using separate presentation
+     * buffers. */
+    if (!This->present_buffers[0] &&
+        (target_width != resource->width0 || target_height != resource->height0)) {
+        BOOL failure = false;
+        struct pipe_resource *new_resource[This->num_back_buffers];
+        D3DWindowBuffer *new_handles[This->num_back_buffers];
+        for (i = 0; i < This->num_back_buffers; i++) {
+            /* Note: if (!new_handles[i]), new_resource[i]
+             * gets released and contains NULL */
+            create_present_buffer(This, target_width, target_height, &new_resource[i], &new_handles[i]);
+            if (!new_handles[i])
+                failure = true;
+        }
+        if (failure) {
+            for (i = 0; i < This->num_back_buffers; i++) {
+                if (new_resource[i])
+                    pipe_resource_reference(&new_resource[i], NULL);
+                if (new_handles[i])
+                    D3DWindowBuffer_release(This, new_handles[i]);
+            }
+        } else {
+            for (i = 0; i < This->num_back_buffers; i++) {
+                D3DWindowBuffer_release(This, This->present_handles[i]);
+                This->present_handles[i] = new_handles[i];
+                pipe_resource_reference(&This->present_buffers[i], new_resource[i]);
+                pipe_resource_reference(&new_resource[i], NULL);
+            }
+        }
+    }
+
     pipe = NineDevice9_GetPipe(This->base.device);
 
     if (This->present_buffers[0]) {
@@ -723,13 +819,7 @@
             create_present_buffer(This, target_width, target_height, &new_resource, &new_handle);
             /* Switch to the new buffer */
             if (new_handle) {
-                /* WaitBufferReleased also waits the presentation feedback,
-                 * while IsBufferReleased doesn't. DestroyD3DWindowBuffer unfortunately
-                 * checks it to release immediately all data, else the release
-                 * is postponed for This->present release. To avoid leaks (we may handle
-                 * a lot of resize), call WaitBufferReleased. */
-                ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
-                ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles[0]);
+                D3DWindowBuffer_release(This, This->present_handles[0]);
                 This->present_handles[0] = new_handle;
                 pipe_resource_reference(&This->present_buffers[0], new_resource);
                 pipe_resource_reference(&new_resource, NULL);
@@ -817,6 +907,7 @@
     struct pipe_resource *res = NULL;
     D3DWindowBuffer *handle_temp;
     struct threadpool_task *task_temp;
+    BOOL *pending_presentation_temp;
     int i;
     HRESULT hr;
 
@@ -850,14 +941,14 @@
 
     if (This->base.device->minor_version_num > 2 &&
         This->params.SwapEffect == D3DSWAPEFFECT_DISCARD &&
-        This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE &&
-        !This->actx->thread_submit) {
+        This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE) {
         int next_buffer = -1;
 
         while (next_buffer == -1) {
             /* Find a free backbuffer */
             for (i = 1; i < This->num_back_buffers; i++) {
-                if (ID3DPresent_IsBufferReleased(This->present, This->present_handles[i])) {
+                if (!p_atomic_read(This->pending_presentation[i]) &&
+                    ID3DPresent_IsBufferReleased(This->present, This->present_handles[i])) {
                     DBG("Found buffer released: %d\n", i);
                     next_buffer = i;
                     break;
@@ -868,6 +959,17 @@
                 ID3DPresent_WaitBufferReleaseEvent(This->present);
             }
         }
+
+        /* Free the task (we already checked it is finished) */
+        if (This->tasks[next_buffer])
+            _mesa_threadpool_wait_for_task(This->pool, &(This->tasks[next_buffer]));
+        assert(!*This->pending_presentation[next_buffer] && !This->tasks[next_buffer]);
+        This->tasks[next_buffer] = This->tasks[0];
+        This->tasks[0] = NULL;
+        pending_presentation_temp = This->pending_presentation[next_buffer];
+        This->pending_presentation[next_buffer] = This->pending_presentation[0];
+        This->pending_presentation[0] = pending_presentation_temp;
+
         /* Switch with the released buffer */
         pipe_resource_reference(&res, This->buffers[0]->base.resource);
         NineSurface9_SetResourceResize(
@@ -886,9 +988,6 @@
         handle_temp = This->present_handles[0];
         This->present_handles[0] = This->present_handles[next_buffer];
         This->present_handles[next_buffer] = handle_temp;
-
-        /* Path not yet compatible with thread_submit */
-        assert(!This->tasks[0] && !This->tasks[next_buffer]);
     } else {
         switch (This->params.SwapEffect) {
             case D3DSWAPEFFECT_OVERLAY: /* Not implemented, fallback to FLIP */
@@ -923,6 +1022,11 @@
                     This->tasks[i-1] = This->tasks[i];
                 }
                 This->tasks[This->num_back_buffers - 1] = task_temp;
+                pending_presentation_temp = This->pending_presentation[0];
+                for (i = 1; i < This->num_back_buffers; i++) {
+                    This->pending_presentation[i-1] = This->pending_presentation[i];
+                }
+                This->pending_presentation[This->num_back_buffers - 1] = pending_presentation_temp;
                 break;
 
             case D3DSWAPEFFECT_COPY:
@@ -932,6 +1036,7 @@
 
         if (This->tasks[0])
             _mesa_threadpool_wait_for_task(This->pool, &(This->tasks[0]));
+        assert(!*This->pending_presentation[0]);
 
         ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
     }
@@ -1159,15 +1264,17 @@
          * without releasing them:
          * . Buffer on screen.
          * . Buffer scheduled kernel side to be next on screen.
-         * . Last buffer sent.
-         * For some reasons, 5 buffers are actually needed, because in
-         * case a pageflip is missed because rendering wasn't finished,
-         * the Xserver will hold 4 buffers. */
-        if (!This->actx->thread_submit &&
-            This->base.device->minor_version_num > 2 &&
-            pParams->PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE &&
-            count < 5)
-            count = 5;
+         * . Last buffer sent. */
+        if (This->base.device->minor_version_num > 2 &&
+            pParams->PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE) {
+            if (This->actx->thread_submit && count < 4)
+                count = 4;
+            /* When thread_submit is not used, 5 buffers are actually needed,
+             * because in case a pageflip is missed because rendering wasn't finished,
+             * the Xserver will hold 4 buffers. */
+            else if (!This->actx->thread_submit && count < 5)
+                count = 5;
+        }
     }
 
     return count;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.h mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.h
--- mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.h	2019-03-31 23:16:37.000000000 +0000
@@ -57,6 +57,7 @@
     struct NineSurface9 *buffers[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; /* 0 to BackBufferCount-1 : the back buffers. BackBufferCount : additional buffer */
     struct pipe_resource *present_buffers[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1];
     D3DWindowBuffer *present_handles[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1];
+    D3DWindowBuffer *present_handles_pending_release[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1];
 
     struct pipe_fence_handle *swap_fences[DRI_SWAP_FENCES_MAX];
     unsigned int cur_fences;
@@ -72,6 +73,7 @@
 
     struct threadpool *pool;
     struct threadpool_task *tasks[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1];
+    BOOL *pending_presentation[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1];
     BOOL enable_threadpool;
 };
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/threadpool.c mesa-19.0.1/src/gallium/state_trackers/nine/threadpool.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/threadpool.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/threadpool.c	2019-03-31 23:16:37.000000000 +0000
@@ -52,10 +52,8 @@
         while (!pool->workqueue && !pool->shutdown)
             pthread_cond_wait(&pool->new_work, &pool->m);
 
-        if (pool->shutdown) {
-            pthread_mutex_unlock(&pool->m);
-            return NULL;
-        }
+        if (pool->shutdown)
+            break;
 
         /* Pull the first task from the list.  We don't free it -- it now lacks
          * a reference other than the worker creator's, whose responsibility it
diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/volume9.c mesa-19.0.1/src/gallium/state_trackers/nine/volume9.c
--- mesa-18.3.3/src/gallium/state_trackers/nine/volume9.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/nine/volume9.c	2019-03-31 23:16:37.000000000 +0000
@@ -186,7 +186,7 @@
 NineVolume9_MarkContainerDirty( struct NineVolume9 *This )
 {
     struct NineBaseTexture9 *tex;
-#ifdef DEBUG
+#if defined(DEBUG) || !defined(NDEBUG)
     /* This is always contained by a NineVolumeTexture9. */
     GUID id = IID_IDirect3DVolumeTexture9;
     REFIID ref = &id;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/context.c mesa-19.0.1/src/gallium/state_trackers/va/context.c
--- mesa-18.3.3/src/gallium/state_trackers/va/context.c	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/va/context.c	2019-03-31 23:16:37.000000000 +0000
@@ -175,7 +175,7 @@
    ctx->version_minor = 1;
    *ctx->vtable = vtable;
    *ctx->vtable_vpp = vtable_vpp;
-   ctx->max_profiles = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH - PIPE_VIDEO_PROFILE_UNKNOWN;
+   ctx->max_profiles = PIPE_VIDEO_PROFILE_MAX - PIPE_VIDEO_PROFILE_UNKNOWN - 1;
    ctx->max_entrypoints = 2;
    ctx->max_attributes = 1;
    ctx->max_image_formats = VL_VA_MAX_IMAGE_FORMATS;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/picture_vp9.c mesa-19.0.1/src/gallium/state_trackers/va/picture_vp9.c
--- mesa-18.3.3/src/gallium/state_trackers/va/picture_vp9.c	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/va/picture_vp9.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,8 @@
 #include "vl/vl_vlc.h"
 #include "va_private.h"
 
+#define NUM_VP9_REFS 8
+
 void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
 {
    VADecPictureParameterBufferVP9 *vp9 = buf->data;
@@ -79,8 +81,11 @@
 
    context->desc.vp9.picture_parameter.bit_depth = vp9->bit_depth;
 
-   for (i = 0 ; i < 8 ; i++)
+   for (i = 0 ; i < NUM_VP9_REFS ; i++)
       vlVaGetReferenceFrame(drv, vp9->reference_frames[i], &context->desc.vp9.ref[i]);
+
+   if (!context->decoder && !context->templat.max_references)
+      context->templat.max_references = NUM_VP9_REFS;
 }
 
 void vlVaHandleSliceParameterBufferVP9(vlVaContext *context, vlVaBuffer *buf)
diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/surface.c mesa-19.0.1/src/gallium/state_trackers/va/surface.c
--- mesa-18.3.3/src/gallium/state_trackers/va/surface.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/va/surface.c	2019-03-31 23:16:37.000000000 +0000
@@ -146,9 +146,40 @@
 VAStatus
 vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfaceStatus *status)
 {
+   vlVaDriver *drv;
+   vlVaSurface *surf;
+   vlVaContext *context;
+
    if (!ctx)
       return VA_STATUS_ERROR_INVALID_CONTEXT;
 
+   drv = VL_VA_DRIVER(ctx);
+   if (!drv)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   mtx_lock(&drv->mutex);
+
+   surf = handle_table_get(drv->htab, render_target);
+   if (!surf || !surf->buffer) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_INVALID_SURFACE;
+   }
+
+   context = handle_table_get(drv->htab, surf->ctx);
+   if (!context) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
+   }
+
+   if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+      if(surf->feedback == NULL)
+         *status=VASurfaceReady;
+      else
+         *status=VASurfaceRendering;
+   }
+
+   mtx_unlock(&drv->mutex);
+
    return VA_STATUS_SUCCESS;
 }
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/meson.build mesa-19.0.1/src/gallium/state_trackers/xa/meson.build
--- mesa-18.3.3/src/gallium/state_trackers/xa/meson.build	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -18,7 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-xa_version = ['2', '4', '0']
+xa_version = ['2', '5', '0']
 
 xa_conf = configuration_data()
 xa_conf.set('XA_MAJOR', xa_version[0])
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.c
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.c	2019-03-31 23:16:37.000000000 +0000
@@ -112,12 +112,6 @@
     boolean supported = FALSE;
 
     /*
-     * Temporarily disable component alpha since it appears buggy.
-     */
-    if (mask_pic && mask_pic->component_alpha)
-	return FALSE;
-
-    /*
      * our default in case something goes wrong
      */
     *blend = xa_blends[XA_BLEND_OP_OVER];
@@ -126,9 +120,16 @@
 	if (xa_blends[i].op == op) {
 	    *blend = xa_blends[i];
 	    supported = TRUE;
+            break;
 	}
     }
 
+    /*
+     * No component alpha yet.
+     */
+    if (mask_pic && mask_pic->component_alpha && blend->alpha_src)
+	return FALSE;
+
     if (!dst_pic->srf)
 	return supported;
 
@@ -150,21 +151,6 @@
 	    blend->rgb_src = PIPE_BLENDFACTOR_ZERO;
     }
 
-    /*
-     * If the source alpha is being used, then we should only be in a case where
-     * the source blend factor is 0, and the source blend value is the mask
-     * channels multiplied by the source picture's alpha.
-     */
-    if (mask_pic && mask_pic->component_alpha &&
-	xa_format_rgb(mask_pic->pict_format) &&
-	blend->alpha_src) {
-	if (blend->rgb_dst == PIPE_BLENDFACTOR_SRC_ALPHA) {
-	    blend->rgb_dst = PIPE_BLENDFACTOR_SRC_COLOR;
-	} else if (blend->rgb_dst == PIPE_BLENDFACTOR_INV_SRC_ALPHA) {
-	    blend->rgb_dst = PIPE_BLENDFACTOR_INV_SRC_COLOR;
-	}
-    }
-
     return supported;
 }
 
@@ -214,43 +200,53 @@
     return 1;
 }
 
+/**
+ * xa_src_pict_is_accelerated - Check whether we support acceleration
+ * of the given src_pict type
+ *
+ * \param src_pic[in]: Pointer to a union xa_source_pict to check.
+ *
+ * \returns TRUE if accelerated, FALSE otherwise.
+ */
+static boolean
+xa_src_pict_is_accelerated(const union xa_source_pict *src_pic)
+{
+    if (!src_pic)
+        return TRUE;
+
+    if (src_pic->type == xa_src_pict_solid_fill ||
+        src_pic->type == xa_src_pict_float_solid_fill)
+        return TRUE;
+
+    return FALSE;
+}
+
 XA_EXPORT int
 xa_composite_check_accelerated(const struct xa_composite *comp)
 {
-    struct xa_composite_blend blend;
     struct xa_picture *src_pic = comp->src;
+    struct xa_picture *mask_pic = comp->mask;
+    struct xa_composite_blend blend;
 
     if (!xa_is_filter_accelerated(src_pic) ||
 	!xa_is_filter_accelerated(comp->mask)) {
 	return -XA_ERR_INVAL;
     }
 
+    if (!xa_src_pict_is_accelerated(src_pic->src_pict) ||
+        (mask_pic && !xa_src_pict_is_accelerated(mask_pic->src_pict)))
+        return -XA_ERR_INVAL;
 
-    if (src_pic->src_pict) {
-	if (src_pic->src_pict->type != xa_src_pict_solid_fill)
-	    return -XA_ERR_INVAL;
-
-	/*
-	 * Currently we don't support solid fill with a mask.
-	 * We can easily do that, but that would require shader,
-	 * sampler view setup and vertex setup modification.
-	 */
-	if (comp->mask)
-	    return -XA_ERR_INVAL;
-    }
-
-    if (blend_for_op(&blend, comp->op, comp->src, comp->mask, comp->dst)) {
-	struct xa_picture *mask = comp->mask;
-	if (mask && mask->component_alpha &&
-	    xa_format_rgb(mask->pict_format)) {
-	    if (blend.alpha_src && blend.rgb_src != PIPE_BLENDFACTOR_ZERO) {
-		return -XA_ERR_INVAL;
-	    }
-	}
+    if (!blend_for_op(&blend, comp->op, comp->src, comp->mask, comp->dst))
+	return -XA_ERR_INVAL;
 
-	return XA_ERR_NONE;
-    }
-    return -XA_ERR_INVAL;
+    /*
+     * No component alpha yet.
+     */
+    if (mask_pic && mask_pic->component_alpha && blend.alpha_src)
+	return -XA_ERR_INVAL;
+
+    return XA_ERR_NONE;
 }
 
 static int
@@ -293,7 +289,7 @@
     src_hw_format = xa_surface_format(src);
     src_pic_format = src_pic->pict_format;
 
-    set_alpha = (xa_format_type_is_color(src_pic_format) &&
+    set_alpha = (xa_format_type_is_color(src_hw_format) &&
 		 xa_format_a(src_pic_format) == 0);
 
     if (set_alpha)
@@ -324,6 +320,61 @@
     return ret;
 }
 
+static void
+xa_src_in_mask(float src[4], const float mask[4])
+{
+	src[0] *= mask[3];
+	src[1] *= mask[3];
+	src[2] *= mask[3];
+	src[3] *= mask[3];
+}
+
+/**
+ * xa_handle_src_pict - Set up xa_context state and fragment shader
+ * input based on scr_pict type
+ *
+ * \param ctx[in, out]: Pointer to the xa context.
+ * \param src_pict[in]: Pointer to the union xa_source_pict to consider.
+ * \param is_mask[in]: Whether we're considering a mask picture.
+ *
+ * \returns TRUE if succesful, FALSE otherwise.
+ *
+ * This function computes some xa_context state used to determine whether
+ * to upload the solid color and also the solid color itself used as an input
+ * to the fragment shader.
+ */
+static boolean
+xa_handle_src_pict(struct xa_context *ctx,
+                   const union xa_source_pict *src_pict,
+                   boolean is_mask)
+{
+    float solid_color[4];
+
+    switch(src_pict->type) {
+    case xa_src_pict_solid_fill:
+        xa_pixel_to_float4(src_pict->solid_fill.color, solid_color);
+        break;
+    case xa_src_pict_float_solid_fill:
+        memcpy(solid_color, src_pict->float_solid_fill.color,
+               sizeof(solid_color));
+        break;
+    default:
+        return FALSE;
+    }
+
+    if (is_mask && ctx->has_solid_src)
+        xa_src_in_mask(ctx->solid_color, solid_color);
+    else
+        memcpy(ctx->solid_color, solid_color, sizeof(solid_color));
+
+    if (is_mask)
+        ctx->has_solid_mask = TRUE;
+    else
+        ctx->has_solid_src = TRUE;
+
+    return TRUE;
+}
+
 static int
 bind_shaders(struct xa_context *ctx, const struct xa_composite *comp)
 {
@@ -331,48 +382,54 @@
     struct xa_shader shader;
     struct xa_picture *src_pic = comp->src;
     struct xa_picture *mask_pic = comp->mask;
+    struct xa_picture *dst_pic = comp->dst;
+
+    ctx->has_solid_src = FALSE;
+    ctx->has_solid_mask = FALSE;
 
-    ctx->has_solid_color = FALSE;
+    if (dst_pic && xa_format_type(dst_pic->pict_format) !=
+        xa_format_type(xa_surface_format(dst_pic->srf)))
+       return -XA_ERR_INVAL;
 
     if (src_pic) {
 	if (src_pic->wrap == xa_wrap_clamp_to_border && src_pic->has_transform)
 	    fs_traits |= FS_SRC_REPEAT_NONE;
 
-	if (src_pic->src_pict) {
-	    if (src_pic->src_pict->type == xa_src_pict_solid_fill) {
-		fs_traits |= FS_SOLID_FILL | FS_FILL;
-		vs_traits |= VS_SOLID_FILL;
-		xa_pixel_to_float4(src_pic->src_pict->solid_fill.color,
-				   ctx->solid_color);
-		ctx->has_solid_color = TRUE;
-	    }
-	} else {
-	    fs_traits |= FS_COMPOSITE;
-	    vs_traits |= VS_COMPOSITE;
-	}
+        fs_traits |= FS_COMPOSITE;
+        vs_traits |= VS_COMPOSITE;
 
-	fs_traits |= picture_format_fixups(src_pic, 0);
+	if (src_pic->src_pict) {
+            if (!xa_handle_src_pict(ctx, src_pic->src_pict, false))
+                return -XA_ERR_INVAL;
+            fs_traits |= FS_SRC_SRC;
+            vs_traits |= VS_SRC_SRC;
+	} else
+           fs_traits |= picture_format_fixups(src_pic, 0);
     }
 
     if (mask_pic) {
 	vs_traits |= VS_MASK;
 	fs_traits |= FS_MASK;
-	if (mask_pic->wrap == xa_wrap_clamp_to_border &&
-	    mask_pic->has_transform)
-	    fs_traits |= FS_MASK_REPEAT_NONE;
-
-	if (mask_pic->component_alpha) {
-	    struct xa_composite_blend blend;
-	    if (!blend_for_op(&blend, comp->op, src_pic, mask_pic, NULL))
-		return -XA_ERR_INVAL;
-
-	    if (blend.alpha_src) {
-		fs_traits |= FS_CA_SRCALPHA;
-	    } else
-		fs_traits |= FS_CA_FULL;
-	}
+        if (mask_pic->component_alpha)
+           fs_traits |= FS_CA;
+        if (mask_pic->src_pict) {
+            if (!xa_handle_src_pict(ctx, mask_pic->src_pict, true))
+                return -XA_ERR_INVAL;
+
+            if (ctx->has_solid_src) {
+                vs_traits &= ~VS_MASK;
+                fs_traits &= ~FS_MASK;
+            } else {
+                vs_traits |= VS_MASK_SRC;
+                fs_traits |= FS_MASK_SRC;
+            }
+        } else {
+            if (mask_pic->wrap == xa_wrap_clamp_to_border &&
+                mask_pic->has_transform)
+                fs_traits |= FS_MASK_REPEAT_NONE;
 
-	fs_traits |= picture_format_fixups(mask_pic, 1);
+            fs_traits |= picture_format_fixups(mask_pic, 1);
+        }
     }
 
     if (ctx->srf->format == PIPE_FORMAT_L8_UNORM ||
@@ -396,42 +453,35 @@
     struct pipe_context *pipe = ctx->pipe;
     struct xa_picture *src_pic = comp->src;
     struct xa_picture *mask_pic = comp->mask;
+    int num_samplers = 0;
 
-    ctx->num_bound_samplers = 0;
-
+    xa_ctx_sampler_views_destroy(ctx);
     memset(&src_sampler, 0, sizeof(struct pipe_sampler_state));
     memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state));
 
-    if (src_pic) {
-	if (ctx->has_solid_color) {
-	    samplers[0] = NULL;
-	    pipe_sampler_view_reference(&ctx->bound_sampler_views[0], NULL);
-	} else {
-	    unsigned src_wrap = xa_repeat_to_gallium(src_pic->wrap);
-	    int filter;
-
-	    (void) xa_filter_to_gallium(src_pic->filter, &filter);
-
-	    src_sampler.wrap_s = src_wrap;
-	    src_sampler.wrap_t = src_wrap;
-	    src_sampler.min_img_filter = filter;
-	    src_sampler.mag_img_filter = filter;
-	    src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
-	    src_sampler.normalized_coords = 1;
-	    samplers[0] = &src_sampler;
-	    ctx->num_bound_samplers = 1;
-	    u_sampler_view_default_template(&view_templ,
-					    src_pic->srf->tex,
-					    src_pic->srf->tex->format);
-	    src_view = pipe->create_sampler_view(pipe, src_pic->srf->tex,
-						 &view_templ);
-	    pipe_sampler_view_reference(&ctx->bound_sampler_views[0], NULL);
-	    ctx->bound_sampler_views[0] = src_view;
-	}
+    if (src_pic && !ctx->has_solid_src) {
+	unsigned src_wrap = xa_repeat_to_gallium(src_pic->wrap);
+	int filter;
+
+	(void) xa_filter_to_gallium(src_pic->filter, &filter);
+
+	src_sampler.wrap_s = src_wrap;
+	src_sampler.wrap_t = src_wrap;
+	src_sampler.min_img_filter = filter;
+	src_sampler.mag_img_filter = filter;
+	src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+	src_sampler.normalized_coords = 1;
+	samplers[0] = &src_sampler;
+	u_sampler_view_default_template(&view_templ,
+					src_pic->srf->tex,+					src_pic->srf->tex->format);
+	src_view = pipe->create_sampler_view(pipe, src_pic->srf->tex,
+					     &view_templ);
+	ctx->bound_sampler_views[0] = src_view;
+	num_samplers++;
     }
 
-    if (mask_pic) {
-	unsigned mask_wrap = xa_repeat_to_gallium(mask_pic->wrap);
+    if (mask_pic && !ctx->has_solid_mask) {
+        unsigned mask_wrap = xa_repeat_to_gallium(mask_pic->wrap);
 	int filter;
 
 	(void) xa_filter_to_gallium(mask_pic->filter, &filter);
@@ -442,31 +492,21 @@
 	mask_sampler.mag_img_filter = filter;
 	src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
 	mask_sampler.normalized_coords = 1;
-	samplers[1] = &mask_sampler;
-	ctx->num_bound_samplers = 2;
+        samplers[num_samplers] = &mask_sampler;
 	u_sampler_view_default_template(&view_templ,
 					mask_pic->srf->tex,
 					mask_pic->srf->tex->format);
 	src_view = pipe->create_sampler_view(pipe, mask_pic->srf->tex,
 					     &view_templ);
-	pipe_sampler_view_reference(&ctx->bound_sampler_views[1], NULL);
-	ctx->bound_sampler_views[1] = src_view;
-
-
-	/*
-	 * If src is a solid color, we have no src view, so set up a
-	 * dummy one that will not be used anyway.
-	 */
-	if (ctx->bound_sampler_views[0] == NULL)
-	    pipe_sampler_view_reference(&ctx->bound_sampler_views[0],
-					src_view);
-
+        ctx->bound_sampler_views[num_samplers] = src_view;
+        num_samplers++;
     }
 
-    cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, ctx->num_bound_samplers,
+    cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, num_samplers,
 		     (const struct pipe_sampler_state **)samplers);
-    cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, ctx->num_bound_samplers,
+    cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, num_samplers,
 				   ctx->bound_sampler_views);
+    ctx->num_bound_samplers = num_samplers;
 }
 
 XA_EXPORT int
@@ -476,9 +516,6 @@
     struct xa_surface *dst_srf = comp->dst->srf;
     int ret;
 
-    if (comp->mask && !comp->mask->srf)
-	return -XA_ERR_INVAL;
-
     ret = xa_ctx_srf_create(ctx, dst_srf);
     if (ret != XA_ERR_NONE)
 	return ret;
@@ -511,8 +548,8 @@
 		  int dstX, int dstY, int width, int height)
 {
     if (ctx->num_bound_samplers == 0 ) { /* solid fill */
-	renderer_solid(ctx, dstX, dstY, dstX + width, dstY + height,
-		       ctx->solid_color);
+	xa_scissor_update(ctx, dstX, dstY, dstX + width, dstY + height);
+	renderer_solid(ctx, dstX, dstY, dstX + width, dstY + height);
     } else {
 	const struct xa_composite *comp = ctx->comp;
 	int pos[6] = {srcX, srcY, maskX, maskY, dstX, dstY};
@@ -537,7 +574,8 @@
     renderer_draw_flush(ctx);
 
     ctx->comp = NULL;
-    ctx->has_solid_color = FALSE;
+    ctx->has_solid_src = FALSE;
+    ctx->has_solid_mask = FALSE;
     xa_ctx_sampler_views_destroy(ctx);
 }
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.h mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.h
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.h	2019-03-31 23:16:37.000000000 +0000
@@ -74,18 +74,34 @@
  * Src picture types.
  */
 enum xa_composite_src_pict_type {
-    xa_src_pict_solid_fill
+    xa_src_pict_solid_fill,
+    xa_src_pict_float_solid_fill
 };
 
+
+/*
+ * struct xa_pict_solid_fill - Description of a solid_fill picture
+ * Deprecated. Use struct xa_pict_float_solid_fill instead.
+ */
 struct xa_pict_solid_fill {
     enum xa_composite_src_pict_type type;
     unsigned int class;
     uint32_t color;
 };
 
+/*
+ * struct xa_pict_solid_fill - Description of a solid_fill picture
+ * with color channels represented by floats.
+ */
+struct xa_pict_float_solid_fill {
+    enum xa_composite_src_pict_type type;
+    float color[4]; /* R, G, B, A */
+};
+
 union xa_source_pict {
-    unsigned int type;
+    enum xa_composite_src_pict_type type;
     struct xa_pict_solid_fill solid_fill;
+    struct xa_pict_float_solid_fill float_solid_fill;
 };
 
 struct xa_picture {
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_context.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_context.c
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -309,7 +309,7 @@
 	xa_pixel_to_float4_a8(fg, ctx->solid_color);
     else
 	xa_pixel_to_float4(fg, ctx->solid_color);
-    ctx->has_solid_color = 1;
+    ctx->has_solid_src = 1;
 
     ctx->dst = dst;
 
@@ -321,8 +321,8 @@
 		 exa->solid_color[2], exa->solid_color[3]);
 #endif
 
-    vs_traits = VS_SOLID_FILL;
-    fs_traits = FS_SOLID_FILL;
+    vs_traits = VS_SRC_SRC | VS_COMPOSITE;
+    fs_traits = FS_SRC_SRC | VS_COMPOSITE;
 
     renderer_bind_destination(ctx, ctx->srf);
     bind_solid_blend_state(ctx);
@@ -343,7 +343,7 @@
 xa_solid(struct xa_context *ctx, int x, int y, int width, int height)
 {
     xa_scissor_update(ctx, x, y, x + width, y + height);
-    renderer_solid(ctx, x, y, x + width, y + height, ctx->solid_color);
+    renderer_solid(ctx, x, y, x + width, y + height);
 }
 
 XA_EXPORT void
@@ -351,7 +351,7 @@
 {
     renderer_draw_flush(ctx);
     ctx->comp = NULL;
-    ctx->has_solid_color = FALSE;
+    ctx->has_solid_src = FALSE;
     ctx->num_bound_samplers = 0;
 }
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_priv.h mesa-19.0.1/src/gallium/state_trackers/xa/xa_priv.h
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_priv.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_priv.h	2019-03-31 23:16:37.000000000 +0000
@@ -113,7 +113,8 @@
 
     int simple_copy;
 
-    int has_solid_color;
+    int has_solid_src;
+    int has_solid_mask;
     float solid_color[4];
 
     unsigned int num_bound_samplers;
@@ -145,35 +146,27 @@
 enum xa_vs_traits {
     VS_COMPOSITE = 1 << 0,
     VS_MASK = 1 << 1,
-    VS_SOLID_FILL = 1 << 2,
-    VS_LINGRAD_FILL = 1 << 3,
-    VS_RADGRAD_FILL = 1 << 4,
-    VS_YUV = 1 << 5,
-
-    VS_FILL = (VS_SOLID_FILL | VS_LINGRAD_FILL | VS_RADGRAD_FILL)
+    VS_SRC_SRC = 1 << 2,
+    VS_MASK_SRC = 1 << 3,
+    VS_YUV = 1 << 4,
 };
 
 enum xa_fs_traits {
     FS_COMPOSITE = 1 << 0,
     FS_MASK = 1 << 1,
-    FS_SOLID_FILL = 1 << 2,
-    FS_LINGRAD_FILL = 1 << 3,
-    FS_RADGRAD_FILL = 1 << 4,
-    FS_CA_FULL = 1 << 5,	/* src.rgba * mask.rgba */
-    FS_CA_SRCALPHA = 1 << 6,	/* src.aaaa * mask.rgba */
-    FS_YUV = 1 << 7,
-    FS_SRC_REPEAT_NONE = 1 << 8,
-    FS_MASK_REPEAT_NONE = 1 << 9,
-    FS_SRC_SWIZZLE_RGB = 1 << 10,
-    FS_MASK_SWIZZLE_RGB = 1 << 11,
-    FS_SRC_SET_ALPHA = 1 << 12,
-    FS_MASK_SET_ALPHA = 1 << 13,
-    FS_SRC_LUMINANCE = 1 << 14,
-    FS_MASK_LUMINANCE = 1 << 15,
-    FS_DST_LUMINANCE = 1 << 16,
-
-    FS_FILL = (FS_SOLID_FILL | FS_LINGRAD_FILL | FS_RADGRAD_FILL),
-    FS_COMPONENT_ALPHA = (FS_CA_FULL | FS_CA_SRCALPHA)
+    FS_SRC_SRC = 1 << 2,
+    FS_MASK_SRC = 1 << 3,
+    FS_YUV = 1 << 4,
+    FS_SRC_REPEAT_NONE = 1 << 5,
+    FS_MASK_REPEAT_NONE = 1 << 6,
+    FS_SRC_SWIZZLE_RGB = 1 << 7,
+    FS_MASK_SWIZZLE_RGB = 1 << 8,
+    FS_SRC_SET_ALPHA = 1 << 9,
+    FS_MASK_SET_ALPHA = 1 << 10,
+    FS_SRC_LUMINANCE = 1 << 11,
+    FS_MASK_LUMINANCE = 1 << 12,
+    FS_DST_LUMINANCE = 1 << 13,
+    FS_CA = 1 << 14,
 };
 
 struct xa_shader {
@@ -282,7 +275,7 @@
 
 void renderer_begin_solid(struct xa_context *r);
 void renderer_solid(struct xa_context *r,
-		    int x0, int y0, int x1, int y1, float *color);
+		    int x0, int y0, int x1, int y1);
 void
 renderer_begin_textures(struct xa_context *r);
 
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_renderer.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_renderer.c
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_renderer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_renderer.c	2019-03-31 23:16:37.000000000 +0000
@@ -46,14 +46,14 @@
 		       int shader_type, const float *params, int param_bytes);
 
 static inline boolean
-is_affine(float *matrix)
+is_affine(const float *matrix)
 {
     return floatIsZero(matrix[2]) && floatIsZero(matrix[5])
 	&& floatsEqual(matrix[8], 1);
 }
 
 static inline void
-map_point(float *mat, float x, float y, float *out_x, float *out_y)
+map_point(const float *mat, float x, float y, float *out_x, float *out_y)
 {
     if (!mat) {
 	*out_x = x;
@@ -137,7 +137,7 @@
 }
 
 static inline void
-add_vertex_color(struct xa_context *r, float x, float y, float color[4])
+add_vertex_none(struct xa_context *r, float x, float y)
 {
     float *vertex = r->buffer + r->buffer_size;
 
@@ -146,12 +146,7 @@
     vertex[2] = 0.f;		/*z */
     vertex[3] = 1.f;		/*w */
 
-    vertex[4] = color[0];	/*r */
-    vertex[5] = color[1];	/*g */
-    vertex[6] = color[2];	/*b */
-    vertex[7] = color[3];	/*a */
-
-    r->buffer_size += 8;
+    r->buffer_size += 4;
 }
 
 static inline void
@@ -197,47 +192,55 @@
 }
 
 static void
-add_vertex_data1(struct xa_context *r,
-                 float srcX, float srcY,  float dstX, float dstY,
-                 float width, float height,
-                 struct pipe_resource *src, const float *src_matrix)
-{
-    float s0, t0, s1, t1, s2, t2, s3, t3;
-    float pt0[2], pt1[2], pt2[2], pt3[2];
-
-    pt0[0] = srcX;
-    pt0[1] = srcY;
-    pt1[0] = (srcX + width);
-    pt1[1] = srcY;
-    pt2[0] = (srcX + width);
-    pt2[1] = (srcY + height);
-    pt3[0] = srcX;
-    pt3[1] = (srcY + height);
+compute_src_coords(float sx, float sy, const struct pipe_resource *src,
+                   const float *src_matrix,
+                   float width, float height,
+                   float tc0[2], float tc1[2], float tc2[2], float tc3[2])
+{
+    tc0[0] = sx;
+    tc0[1] = sy;
+    tc1[0] = sx + width;
+    tc1[1] = sy;
+    tc2[0] = sx + width;
+    tc2[1] = sy + height;
+    tc3[0] = sx;
+    tc3[1] = sy + height;
 
     if (src_matrix) {
-	map_point((float *)src_matrix, pt0[0], pt0[1], &pt0[0], &pt0[1]);
-	map_point((float *)src_matrix, pt1[0], pt1[1], &pt1[0], &pt1[1]);
-	map_point((float *)src_matrix, pt2[0], pt2[1], &pt2[0], &pt2[1]);
-	map_point((float *)src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]);
+	map_point(src_matrix, tc0[0], tc0[1], &tc0[0], &tc0[1]);
+	map_point(src_matrix, tc1[0], tc1[1], &tc1[0], &tc1[1]);
+	map_point(src_matrix, tc2[0], tc2[1], &tc2[0], &tc2[1]);
+	map_point(src_matrix, tc3[0], tc3[1], &tc3[0], &tc3[1]);
     }
 
-    s0 =  pt0[0] / src->width0;
-    s1 =  pt1[0] / src->width0;
-    s2 =  pt2[0] / src->width0;
-    s3 =  pt3[0] / src->width0;
-    t0 =  pt0[1] / src->height0;
-    t1 =  pt1[1] / src->height0;
-    t2 =  pt2[1] / src->height0;
-    t3 =  pt3[1] / src->height0;
+    tc0[0] /= src->width0;
+    tc1[0] /= src->width0;
+    tc2[0] /= src->width0;
+    tc3[0] /= src->width0;
+    tc0[1] /= src->height0;
+    tc1[1] /= src->height0;
+    tc2[1] /= src->height0;
+    tc3[1] /= src->height0;
+}
+
+static void
+add_vertex_data1(struct xa_context *r,
+                 float srcX, float srcY,  float dstX, float dstY,
+                 float width, float height,
+                 const struct pipe_resource *src, const float *src_matrix)
+{
+    float tc0[2], tc1[2], tc2[2], tc3[2];
 
+    compute_src_coords(srcX, srcY, src, src_matrix, width, height,
+                       tc0, tc1, tc2, tc3);
     /* 1st vertex */
-    add_vertex_1tex(r, dstX, dstY, s0, t0);
+    add_vertex_1tex(r, dstX, dstY, tc0[0], tc0[1]);
     /* 2nd vertex */
-    add_vertex_1tex(r, dstX + width, dstY, s1, t1);
+    add_vertex_1tex(r, dstX + width, dstY, tc1[0], tc1[1]);
     /* 3rd vertex */
-    add_vertex_1tex(r, dstX + width, dstY + height, s2, t2);
+    add_vertex_1tex(r, dstX + width, dstY + height, tc2[0], tc2[1]);
     /* 4th vertex */
-    add_vertex_1tex(r, dstX, dstY + height, s3, t3);
+    add_vertex_1tex(r, dstX, dstY + height, tc3[0], tc3[1]);
 }
 
 static void
@@ -248,53 +251,26 @@
                  struct pipe_resource *mask,
                  const float *src_matrix, const float *mask_matrix)
 {
-    float src_s0, src_t0, src_s1, src_t1;
-    float mask_s0, mask_t0, mask_s1, mask_t1;
-    float spt0[2], spt1[2];
-    float mpt0[2], mpt1[2];
-
-    spt0[0] = srcX;
-    spt0[1] = srcY;
-    spt1[0] = srcX + width;
-    spt1[1] = srcY + height;
-
-    mpt0[0] = maskX;
-    mpt0[1] = maskY;
-    mpt1[0] = maskX + width;
-    mpt1[1] = maskY + height;
-
-    if (src_matrix) {
-	map_point((float *)src_matrix, spt0[0], spt0[1], &spt0[0], &spt0[1]);
-	map_point((float *)src_matrix, spt1[0], spt1[1], &spt1[0], &spt1[1]);
-    }
+    float spt0[2], spt1[2], spt2[2], spt3[2];
+    float mpt0[2], mpt1[2], mpt2[2], mpt3[2];
 
-    if (mask_matrix) {
-	map_point((float *)mask_matrix, mpt0[0], mpt0[1], &mpt0[0], &mpt0[1]);
-	map_point((float *)mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]);
-    }
-
-    src_s0 = spt0[0] / src->width0;
-    src_t0 = spt0[1] / src->height0;
-    src_s1 = spt1[0] / src->width0;
-    src_t1 = spt1[1] / src->height0;
-
-    mask_s0 = mpt0[0] / mask->width0;
-    mask_t0 = mpt0[1] / mask->height0;
-    mask_s1 = mpt1[0] / mask->width0;
-    mask_t1 = mpt1[1] / mask->height0;
+    compute_src_coords(srcX, srcY, src, src_matrix, width, height,
+                       spt0, spt1, spt2, spt3);
+    compute_src_coords(maskX, maskY, mask, mask_matrix, width, height,
+                       mpt0, mpt1, mpt2, mpt3);
 
     /* 1st vertex */
     add_vertex_2tex(r, dstX, dstY,
-		    src_s0, src_t0, mask_s0, mask_t0);
+		    spt0[0], spt0[1], mpt0[0], mpt0[1]);
     /* 2nd vertex */
     add_vertex_2tex(r, dstX + width, dstY,
-		    src_s1, src_t0, mask_s1, mask_t0);
+		    spt1[0], spt1[1], mpt1[0], mpt1[1]);
     /* 3rd vertex */
     add_vertex_2tex(r, dstX + width, dstY + height,
-		    src_s1, src_t1, mask_s1, mask_t1);
+		    spt2[0], spt2[1], mpt2[0], mpt2[1]);
     /* 4th vertex */
     add_vertex_2tex(r, dstX, dstY + height,
-		    src_s0, src_t1, mask_s0, mask_t1);
+		    spt3[0], spt3[1], mpt3[0], mpt3[1]);
 }
 
 static void
@@ -554,27 +530,29 @@
 renderer_begin_solid(struct xa_context *r)
 {
     r->buffer_size = 0;
-    r->attrs_per_vertex = 2;
+    r->attrs_per_vertex = 1;
+    renderer_set_constants(r, PIPE_SHADER_FRAGMENT, r->solid_color,
+                           4 * sizeof(float));
 }
 
 void
 renderer_solid(struct xa_context *r,
-	       int x0, int y0, int x1, int y1, float *color)
+	       int x0, int y0, int x1, int y1)
 {
     /*
      * debug_printf("solid rect[(%d, %d), (%d, %d)], rgba[%f, %f, %f, %f]\n",
      * x0, y0, x1, y1, color[0], color[1], color[2], color[3]); */
 
-    renderer_draw_conditional(r, 4 * 8);
+    renderer_draw_conditional(r, 4 * 4);
 
     /* 1st vertex */
-    add_vertex_color(r, x0, y0, color);
+    add_vertex_none(r, x0, y0);
     /* 2nd vertex */
-    add_vertex_color(r, x1, y0, color);
+    add_vertex_none(r, x1, y0);
     /* 3rd vertex */
-    add_vertex_color(r, x1, y1, color);
+    add_vertex_none(r, x1, y1);
     /* 4th vertex */
-    add_vertex_color(r, x0, y1, color);
+    add_vertex_none(r, x0, y1);
 }
 
 void
@@ -588,6 +566,9 @@
 {
     r->attrs_per_vertex = 1 + r->num_bound_samplers;
     r->buffer_size = 0;
+    if (r->has_solid_src || r->has_solid_mask)
+       renderer_set_constants(r, PIPE_SHADER_FRAGMENT, r->solid_color,
+                              4 * sizeof(float));
 }
 
 void
@@ -617,11 +598,19 @@
     switch(r->attrs_per_vertex) {
     case 2:
 	renderer_draw_conditional(r, 4 * 8);
-	add_vertex_data1(r,
-			 pos[0], pos[1], /* src */
-			 pos[4], pos[5], /* dst */
-			 width, height,
-			 sampler_view[0]->texture, src_matrix);
+        if (!r->has_solid_src) {
+           add_vertex_data1(r,
+                            pos[0], pos[1], /* src */
+                            pos[4], pos[5], /* dst */
+                            width, height,
+                            sampler_view[0]->texture, src_matrix);
+        } else {
+           add_vertex_data1(r,
+                            pos[2], pos[3], /* mask */
+                            pos[4], pos[5], /* dst */
+                            width, height,
+                            sampler_view[0]->texture, mask_matrix);
+        }
 	break;
     case 3:
 	renderer_draw_conditional(r, 4 * 12);
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tgsi.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_tgsi.c
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tgsi.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tgsi.c	2019-03-31 23:16:37.000000000 +0000
@@ -48,19 +48,18 @@
  * CONST[1] = (-1, -1, 0, 0)
  *
  * OUT[0]   = vertex pos
- * OUT[1]   = src tex coord | solid fill color
+ * OUT[1]   = src tex coord
  * OUT[2]   = mask tex coord
  * OUT[3]   = dst tex coord
  */
 
-/* Fragment shader:
- * SAMP[0]  = src
- * SAMP[1]  = mask
- * SAMP[2]  = dst
- * IN[0]    = pos src | solid fill color
- * IN[1]    = pos mask
- * IN[2]    = pos dst
- * CONST[0] = (0, 0, 0, 1)
+/* Fragment shader. Samplers are allocated when needed.
+ * SAMP[0]  = sampler for first texture (src or mask if src is solid)
+ * SAMP[1]  = sampler for second texture (mask or none)
+ * IN[0]    = first texture coordinates if present
+ * IN[1]    = second texture coordinates if present
+ * CONST[0] = Solid color (src if src solid or mask if mask solid
+ *            or src in mask if both solid).
  *
  * OUT[0] = color
  */
@@ -71,21 +70,19 @@
     const char *strings[] = {
 	"FS_COMPOSITE",		/* = 1 << 0, */
 	"FS_MASK",		/* = 1 << 1, */
-	"FS_SOLID_FILL",	/* = 1 << 2, */
-	"FS_LINGRAD_FILL",	/* = 1 << 3, */
-	"FS_RADGRAD_FILL",	/* = 1 << 4, */
-	"FS_CA_FULL",		/* = 1 << 5, *//* src.rgba * mask.rgba */
-	"FS_CA_SRCALPHA",	/* = 1 << 6, *//* src.aaaa * mask.rgba */
-	"FS_YUV",		/* = 1 << 7, */
-	"FS_SRC_REPEAT_NONE",	/* = 1 << 8, */
-	"FS_MASK_REPEAT_NONE",	/* = 1 << 9, */
-	"FS_SRC_SWIZZLE_RGB",	/* = 1 << 10, */
-	"FS_MASK_SWIZZLE_RGB",	/* = 1 << 11, */
-	"FS_SRC_SET_ALPHA",	/* = 1 << 12, */
-	"FS_MASK_SET_ALPHA",	/* = 1 << 13, */
-	"FS_SRC_LUMINANCE",	/* = 1 << 14, */
-	"FS_MASK_LUMINANCE",	/* = 1 << 15, */
-	"FS_DST_LUMINANCE",     /* = 1 << 15, */
+	"FS_SRC_SRC",	        /* = 1 << 2, */
+	"FS_MASK_SRC",	        /* = 1 << 3, */
+	"FS_YUV",	        /* = 1 << 4, */
+	"FS_SRC_REPEAT_NONE",	/* = 1 << 5, */
+	"FS_MASK_REPEAT_NONE",	/* = 1 << 6, */
+	"FS_SRC_SWIZZLE_RGB",	/* = 1 << 7, */
+	"FS_MASK_SWIZZLE_RGB",	/* = 1 << 8, */
+	"FS_SRC_SET_ALPHA",	/* = 1 << 9, */
+	"FS_MASK_SET_ALPHA",	/* = 1 << 10, */
+	"FS_SRC_LUMINANCE",	/* = 1 << 11, */
+	"FS_MASK_LUMINANCE",	/* = 1 << 12, */
+	"FS_DST_LUMINANCE",     /* = 1 << 13, */
+        "FS_CA",                /* = 1 << 14, */
     };
     int i, k;
 
@@ -111,18 +108,20 @@
 	    struct ureg_dst dst,
 	    struct ureg_src src,
 	    struct ureg_src mask,
-	    unsigned component_alpha, unsigned mask_luminance)
+	    unsigned mask_luminance, boolean component_alpha)
 {
-    if (component_alpha == FS_CA_FULL) {
-	ureg_MUL(ureg, dst, src, mask);
-    } else if (component_alpha == FS_CA_SRCALPHA) {
-	ureg_MUL(ureg, dst, ureg_scalar(src, TGSI_SWIZZLE_W), mask);
-    } else {
-	if (mask_luminance)
-	    ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_X));
-	else
-	    ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_W));
-    }
+    if (mask_luminance)
+        if (component_alpha) {
+            ureg_MOV(ureg, dst, src);
+            ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
+                     src, ureg_scalar(mask, TGSI_SWIZZLE_X));
+        } else {
+            ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_X));
+        }
+    else if (!component_alpha)
+        ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_W));
+    else
+        ureg_MUL(ureg, dst, src, mask);
 }
 
 static struct ureg_src
@@ -139,125 +138,6 @@
     return ret;
 }
 
-static void
-linear_gradient(struct ureg_program *ureg,
-		struct ureg_dst out,
-		struct ureg_src pos,
-		struct ureg_src sampler,
-		struct ureg_src coords,
-		struct ureg_src const0124,
-		struct ureg_src matrow0,
-		struct ureg_src matrow1, struct ureg_src matrow2)
-{
-    struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
-
-    ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
-    ureg_MOV(ureg,
-	     ureg_writemask(temp0, TGSI_WRITEMASK_Z),
-	     ureg_scalar(const0124, TGSI_SWIZZLE_Y));
-
-    ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
-    ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
-    ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
-    ureg_RCP(ureg, temp3, ureg_src(temp3));
-    ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
-    ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
-
-    ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X), ureg_src(temp1));
-    ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y), ureg_src(temp2));
-
-    ureg_MUL(ureg, temp0,
-	     ureg_scalar(coords, TGSI_SWIZZLE_Y),
-	     ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y));
-    ureg_MAD(ureg, temp1,
-	     ureg_scalar(coords, TGSI_SWIZZLE_X),
-	     ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X), ureg_src(temp0));
-
-    ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_scalar(coords, TGSI_SWIZZLE_Z));
-
-    ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
-
-    ureg_release_temporary(ureg, temp0);
-    ureg_release_temporary(ureg, temp1);
-    ureg_release_temporary(ureg, temp2);
-    ureg_release_temporary(ureg, temp3);
-    ureg_release_temporary(ureg, temp4);
-    ureg_release_temporary(ureg, temp5);
-}
-
-static void
-radial_gradient(struct ureg_program *ureg,
-		struct ureg_dst out,
-		struct ureg_src pos,
-		struct ureg_src sampler,
-		struct ureg_src coords,
-		struct ureg_src const0124,
-		struct ureg_src matrow0,
-		struct ureg_src matrow1, struct ureg_src matrow2)
-{
-    struct ureg_dst temp0 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp1 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp2 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp3 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp4 = ureg_DECL_temporary(ureg);
-    struct ureg_dst temp5 = ureg_DECL_temporary(ureg);
-
-    ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos);
-    ureg_MOV(ureg,
-	     ureg_writemask(temp0, TGSI_WRITEMASK_Z),
-	     ureg_scalar(const0124, TGSI_SWIZZLE_Y));
-
-    ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0));
-    ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0));
-    ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0));
-    ureg_RCP(ureg, temp3, ureg_src(temp3));
-    ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3));
-    ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3));
-
-    ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X), ureg_src(temp1));
-    ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y), ureg_src(temp2));
-
-    ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y),
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
-    ureg_MAD(ureg, temp1,
-	     ureg_scalar(coords, TGSI_SWIZZLE_X),
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp0));
-    ureg_ADD(ureg, temp1, ureg_src(temp1), ureg_src(temp1));
-    ureg_MUL(ureg, temp3,
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y),
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y));
-    ureg_MAD(ureg, temp4,
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X),
-	     ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp3));
-    ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4)));
-    ureg_MUL(ureg, temp2, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_src(temp4));
-    ureg_MUL(ureg, temp0,
-	     ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2));
-    ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1));
-    ureg_ADD(ureg, temp2, ureg_src(temp3), ureg_negate(ureg_src(temp0)));
-    ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2)));
-    ureg_RCP(ureg, temp2, ureg_src(temp2));
-    ureg_ADD(ureg, temp1, ureg_src(temp2), ureg_negate(ureg_src(temp1)));
-    ureg_ADD(ureg, temp0,
-	     ureg_scalar(coords, TGSI_SWIZZLE_Z),
-	     ureg_scalar(coords, TGSI_SWIZZLE_Z));
-    ureg_RCP(ureg, temp0, ureg_src(temp0));
-    ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_src(temp0));
-    ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler);
-
-    ureg_release_temporary(ureg, temp0);
-    ureg_release_temporary(ureg, temp1);
-    ureg_release_temporary(ureg, temp2);
-    ureg_release_temporary(ureg, temp3);
-    ureg_release_temporary(ureg, temp4);
-    ureg_release_temporary(ureg, temp5);
-}
-
 static void *
 create_vs(struct pipe_context *pipe, unsigned vs_traits)
 {
@@ -265,10 +145,11 @@
     struct ureg_src src;
     struct ureg_dst dst;
     struct ureg_src const0, const1;
-    boolean is_fill = (vs_traits & VS_FILL) != 0;
     boolean is_composite = (vs_traits & VS_COMPOSITE) != 0;
     boolean has_mask = (vs_traits & VS_MASK) != 0;
     boolean is_yuv = (vs_traits & VS_YUV) != 0;
+    boolean is_src_src = (vs_traits & VS_SRC_SRC) != 0;
+    boolean is_mask_src = (vs_traits & VS_MASK_SRC) != 0;
     unsigned input_slot = 0;
 
     ureg = ureg_create(PIPE_SHADER_VERTEX);
@@ -279,8 +160,6 @@
     const1 = ureg_DECL_constant(ureg, 1);
 
     /* it has to be either a fill or a composite op */
-    debug_assert((is_fill ^ is_composite) ^ is_yuv);
-
     src = ureg_DECL_vs_input(ureg, input_slot++);
     dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
     src = vs_normalize_coords(ureg, src, const0, const1);
@@ -293,21 +172,17 @@
     }
 
     if (is_composite) {
-	src = ureg_DECL_vs_input(ureg, input_slot++);
-	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
-	ureg_MOV(ureg, dst, src);
-    }
-
-    if (is_fill) {
-	src = ureg_DECL_vs_input(ureg, input_slot++);
-	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
-	ureg_MOV(ureg, dst, src);
-    }
-
-    if (has_mask) {
-	src = ureg_DECL_vs_input(ureg, input_slot++);
-	dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
-	ureg_MOV(ureg, dst, src);
+        if (!is_src_src || (has_mask && !is_mask_src)) {
+            src = ureg_DECL_vs_input(ureg, input_slot++);
+            dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
+            ureg_MOV(ureg, dst, src);
+        }
+
+        if (!is_src_src && (has_mask && !is_mask_src)) {
+            src = ureg_DECL_vs_input(ureg, input_slot++);
+            dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1);
+            ureg_MOV(ureg, dst, src);
+        }
     }
 
     ureg_END(ureg);
@@ -383,7 +258,7 @@
 	    struct ureg_dst dst,
 	    struct ureg_src coords,
 	    struct ureg_src sampler,
-	    struct ureg_src imm0,
+	    const struct ureg_src *imm0,
 	    boolean repeat_none, boolean swizzle, boolean set_alpha)
 {
     if (repeat_none) {
@@ -394,11 +269,11 @@
 					  TGSI_SWIZZLE_X,
 					  TGSI_SWIZZLE_Y,
 					  TGSI_SWIZZLE_X,
-					  TGSI_SWIZZLE_Y), ureg_scalar(imm0,
+					  TGSI_SWIZZLE_Y), ureg_scalar(*imm0,
 								       TGSI_SWIZZLE_X));
 	ureg_SLT(ureg, tmp0,
 		 ureg_swizzle(coords, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
-			      TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y), ureg_scalar(imm0,
+			      TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y), ureg_scalar(*imm0,
 									   TGSI_SWIZZLE_W));
 	ureg_MIN(ureg, tmp0, ureg_src(tmp0), ureg_src(tmp1));
 	ureg_MIN(ureg, tmp0, ureg_scalar(ureg_src(tmp0), TGSI_SWIZZLE_X),
@@ -412,7 +287,7 @@
 	if (set_alpha)
 	    ureg_MOV(ureg,
 		     ureg_writemask(tmp1, TGSI_WRITEMASK_W),
-		     ureg_scalar(imm0, TGSI_SWIZZLE_W));
+		     ureg_scalar(*imm0, TGSI_SWIZZLE_W));
 	ureg_MUL(ureg, dst, ureg_src(tmp1), ureg_src(tmp0));
 	ureg_release_temporary(ureg, tmp0);
 	ureg_release_temporary(ureg, tmp1);
@@ -432,7 +307,32 @@
 	if (set_alpha)
 	    ureg_MOV(ureg,
 		     ureg_writemask(dst, TGSI_WRITEMASK_W),
-		     ureg_scalar(imm0, TGSI_SWIZZLE_W));
+		     ureg_scalar(*imm0, TGSI_SWIZZLE_W));
+    }
+}
+
+static void
+read_input(struct ureg_program *ureg,
+           struct ureg_dst dst,
+           const struct ureg_src *imm0,
+           boolean repeat_none, boolean swizzle, boolean set_alpha,
+           boolean is_src, unsigned *cur_constant, unsigned *cur_sampler)
+{
+    struct ureg_src input, sampler;
+
+    if (is_src) {
+        input = ureg_DECL_constant(ureg, (*cur_constant)++);
+        ureg_MOV(ureg, dst, input);
+    } else {
+        sampler = ureg_DECL_sampler(ureg, *cur_sampler);
+        ureg_DECL_sampler_view(ureg, *cur_sampler, TGSI_TEXTURE_2D,
+                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
+                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
+        input = ureg_DECL_fs_input(ureg,
+                                   TGSI_SEMANTIC_GENERIC, (*cur_sampler)++,
+                                   TGSI_INTERPOLATE_PERSPECTIVE);
+        xrender_tex(ureg, dst, input, sampler, imm0,
+                    repeat_none, swizzle, set_alpha);
     }
 }
 
@@ -440,18 +340,10 @@
 create_fs(struct pipe_context *pipe, unsigned fs_traits)
 {
     struct ureg_program *ureg;
-    struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler;
-    struct ureg_src /*dst_pos, */ src_input, mask_pos;
     struct ureg_dst src, mask;
     struct ureg_dst out;
     struct ureg_src imm0 = { 0 };
     unsigned has_mask = (fs_traits & FS_MASK) != 0;
-    unsigned is_fill = (fs_traits & FS_FILL) != 0;
-    unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0;
-    unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0;
-    unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0;
-    unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0;
-    unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA;
     unsigned is_yuv = (fs_traits & FS_YUV) != 0;
     unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0;
     unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0;
@@ -462,6 +354,11 @@
     unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0;
     unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0;
     unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0;
+    unsigned is_src_src = (fs_traits & FS_SRC_SRC) != 0;
+    unsigned is_mask_src = (fs_traits & FS_MASK_SRC) != 0;
+    boolean component_alpha = (fs_traits & FS_CA) != 0;
+    unsigned cur_sampler = 0;
+    unsigned cur_constant = 0;
 
 #if 0
     print_fs_traits(fs_traits);
@@ -473,9 +370,8 @@
     if (ureg == NULL)
 	return 0;
 
-    /* it has to be either a fill, a composite op or a yuv conversion */
-    debug_assert((is_fill ^ is_composite) ^ is_yuv);
-    (void)is_yuv;
+    if (is_yuv)
+       return create_yuv_shader(pipe, ureg);
 
     out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
 
@@ -483,86 +379,13 @@
 	src_set_alpha || mask_set_alpha || src_luminance) {
 	imm0 = ureg_imm4f(ureg, 0, 0, 0, 1);
     }
-    if (is_composite) {
-	src_sampler = ureg_DECL_sampler(ureg, 0);
-        ureg_DECL_sampler_view(ureg, 0, TGSI_TEXTURE_2D,
-                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
-                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
-	src_input = ureg_DECL_fs_input(ureg,
-				       TGSI_SEMANTIC_GENERIC, 0,
-				       TGSI_INTERPOLATE_PERSPECTIVE);
-    } else if (is_fill) {
-	if (is_solid)
-	    src_input = ureg_DECL_fs_input(ureg,
-					   TGSI_SEMANTIC_COLOR, 0,
-					   TGSI_INTERPOLATE_PERSPECTIVE);
-	else
-	    src_input = ureg_DECL_fs_input(ureg,
-					   TGSI_SEMANTIC_POSITION, 0,
-					   TGSI_INTERPOLATE_PERSPECTIVE);
-    } else {
-	debug_assert(is_yuv);
-	return create_yuv_shader(pipe, ureg);
-    }
 
-    if (has_mask) {
-	mask_sampler = ureg_DECL_sampler(ureg, 1);
-        ureg_DECL_sampler_view(ureg, 1, TGSI_TEXTURE_2D,
-                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
-                               TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
-	mask_pos = ureg_DECL_fs_input(ureg,
-				      TGSI_SEMANTIC_GENERIC, 1,
-				      TGSI_INTERPOLATE_PERSPECTIVE);
-    }
-#if 0				/* unused right now */
-    dst_sampler = ureg_DECL_sampler(ureg, 2);
-    ureg_DECL_sampler_view(ureg, 2, TGSI_TEXTURE_2D,
-                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT,
-                           TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT);
-    dst_pos = ureg_DECL_fs_input(ureg,
-				 TGSI_SEMANTIC_POSITION, 2,
-				 TGSI_INTERPOLATE_PERSPECTIVE);
-#endif
+    src = (has_mask || src_luminance || dst_luminance) ?
+        ureg_DECL_temporary(ureg) : out;
+
+    read_input(ureg, src, &imm0, src_repeat_none, src_swizzle,
+               src_set_alpha, is_src_src, &cur_constant, &cur_sampler);
 
-    if (is_composite) {
-	if (has_mask || src_luminance || dst_luminance)
-	    src = ureg_DECL_temporary(ureg);
-	else
-	    src = out;
-	xrender_tex(ureg, src, src_input, src_sampler, imm0,
-		    src_repeat_none, src_swizzle, src_set_alpha);
-    } else if (is_fill) {
-	if (is_solid) {
-	    if (has_mask || src_luminance || dst_luminance)
-		src = ureg_dst(src_input);
-	    else
-		ureg_MOV(ureg, out, src_input);
-	} else if (is_lingrad || is_radgrad) {
-	    struct ureg_src coords, const0124, matrow0, matrow1, matrow2;
-
-	    if (has_mask || src_luminance || dst_luminance)
-		src = ureg_DECL_temporary(ureg);
-	    else
-		src = out;
-
-	    coords = ureg_DECL_constant(ureg, 0);
-	    const0124 = ureg_DECL_constant(ureg, 1);
-	    matrow0 = ureg_DECL_constant(ureg, 2);
-	    matrow1 = ureg_DECL_constant(ureg, 3);
-	    matrow2 = ureg_DECL_constant(ureg, 4);
-
-	    if (is_lingrad) {
-		linear_gradient(ureg, src,
-				src_input, src_sampler,
-				coords, const0124, matrow0, matrow1, matrow2);
-	    } else if (is_radgrad) {
-		radial_gradient(ureg, src,
-				src_input, src_sampler,
-				coords, const0124, matrow0, matrow1, matrow2);
-	    }
-	} else
-	    debug_assert(!"Unknown fill type!");
-    }
     if (src_luminance) {
 	ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X));
 	ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ),
@@ -573,13 +396,12 @@
 
     if (has_mask) {
 	mask = ureg_DECL_temporary(ureg);
-	xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0,
-		    mask_repeat_none, mask_swizzle, mask_set_alpha);
-	/* src IN mask */
+        read_input(ureg, mask, &imm0, mask_repeat_none,
+                   mask_swizzle, mask_set_alpha, is_mask_src, &cur_constant,
+                   &cur_sampler);
 
 	src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src),
-		    ureg_src(mask),
-		    comp_alpha_mask, mask_luminance);
+		    ureg_src(mask), mask_luminance, component_alpha);
 
 	ureg_release_temporary(ureg, mask);
     }
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.c
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.c	2019-03-31 23:16:37.000000000 +0000
@@ -89,6 +89,15 @@
     fdesc.xa_format = xa_format;
 
     switch (xa_format) {
+    case xa_format_a8:
+        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
+                                            PIPE_TEXTURE_2D, 0, 0,
+                                            stype_bind[xa_type_a] |
+                                            PIPE_BIND_RENDER_TARGET))
+            fdesc.format = PIPE_FORMAT_R8_UNORM;
+        else
+            fdesc.format = PIPE_FORMAT_L8_UNORM;
+	break;
     case xa_format_a8r8g8b8:
 	fdesc.format = PIPE_FORMAT_B8G8R8A8_UNORM;
 	break;
@@ -101,15 +110,21 @@
     case xa_format_x1r5g5b5:
 	fdesc.format = PIPE_FORMAT_B5G5R5A1_UNORM;
 	break;
-    case xa_format_a8:
-        if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM,
-                                            PIPE_TEXTURE_2D, 0, 0,
-                                            stype_bind[xa_type_a] |
-                                            PIPE_BIND_RENDER_TARGET))
-            fdesc.format = PIPE_FORMAT_R8_UNORM;
-        else
-            fdesc.format = PIPE_FORMAT_L8_UNORM;
-	break;
+    case xa_format_a4r4g4b4:
+        fdesc.format = PIPE_FORMAT_B4G4R4A4_UNORM;
+        break;
+    case xa_format_a2b10g10r10:
+        fdesc.format = PIPE_FORMAT_R10G10B10A2_UNORM;
+        break;
+    case xa_format_x2b10g10r10:
+        fdesc.format = PIPE_FORMAT_R10G10B10X2_UNORM;
+        break;
+    case xa_format_b8g8r8a8:
+        fdesc.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+        break;
+    case xa_format_b8g8r8x8:
+        fdesc.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+        break;
     case xa_format_z24:
 	fdesc.format = PIPE_FORMAT_Z24X8_UNORM;
 	break;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.h.in mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.h.in
--- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.h.in	2017-11-23 00:32:52.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.h.in	2019-03-31 23:16:37.000000000 +0000
@@ -126,6 +126,11 @@
     xa_format_x8r8g8b8 = xa_format(32, xa_type_argb, 0, 8, 8, 8),
     xa_format_r5g6b5 = xa_format(16, xa_type_argb, 0, 5, 6, 5),
     xa_format_x1r5g5b5 = xa_format(16, xa_type_argb, 0, 5, 5, 5),
+    xa_format_a4r4g4b4 = xa_format(16, xa_type_argb, 4, 4, 4, 4),
+    xa_format_a2b10g10r10 = xa_format(32, xa_type_abgr, 2, 10, 10, 10),
+    xa_format_x2b10g10r10 = xa_format(32, xa_type_abgr, 0, 10, 10, 10),
+    xa_format_b8g8r8a8 = xa_format(32, xa_type_bgra, 8, 8, 8, 8),
+    xa_format_b8g8r8x8 = xa_format(32, xa_type_bgra, 0, 8, 8, 8),
 
     xa_format_z16 = xa_format_c(16, xa_type_z, 16, 0),
     xa_format_z32 = xa_format_c(32, xa_type_z, 32, 0),
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xvmc/attributes.c mesa-19.0.1/src/gallium/state_trackers/xvmc/attributes.c
--- mesa-18.3.3/src/gallium/state_trackers/xvmc/attributes.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xvmc/attributes.c	2019-03-31 23:16:37.000000000 +0000
@@ -90,15 +90,15 @@
    if (!attr)
       return XvMCBadContext;
 
-   if (strcmp(attr, XV_BRIGHTNESS))
+   if (strcmp(attr, XV_BRIGHTNESS) == 0)
       context_priv->procamp.brightness = value / 1000.0f;
-   else if (strcmp(attr, XV_CONTRAST))
+   else if (strcmp(attr, XV_CONTRAST) == 0)
       context_priv->procamp.contrast = value / 1000.0f + 1.0f;
-   else if (strcmp(attr, XV_SATURATION))
+   else if (strcmp(attr, XV_SATURATION) == 0)
       context_priv->procamp.saturation = value / 1000.0f + 1.0f;
-   else if (strcmp(attr, XV_HUE))
+   else if (strcmp(attr, XV_HUE) == 0)
       context_priv->procamp.hue = value / 1000.0f;
-   else if (strcmp(attr, XV_COLORSPACE))
+   else if (strcmp(attr, XV_COLORSPACE) == 0)
       context_priv->color_standard = value ?
          VL_CSC_COLOR_STANDARD_BT_601 :
          VL_CSC_COLOR_STANDARD_BT_709;
@@ -134,15 +134,15 @@
    if (!attr)
       return XvMCBadContext;
 
-   if (strcmp(attr, XV_BRIGHTNESS))
+   if (strcmp(attr, XV_BRIGHTNESS) == 0)
       *value = context_priv->procamp.brightness * 1000;
-   else if (strcmp(attr, XV_CONTRAST))
+   else if (strcmp(attr, XV_CONTRAST) == 0)
       *value = context_priv->procamp.contrast * 1000 - 1000;
-   else if (strcmp(attr, XV_SATURATION))
+   else if (strcmp(attr, XV_SATURATION) == 0)
       *value = context_priv->procamp.saturation * 1000 + 1000;
-   else if (strcmp(attr, XV_HUE))
+   else if (strcmp(attr, XV_HUE) == 0)
       *value = context_priv->procamp.hue * 1000;
-   else if (strcmp(attr, XV_COLORSPACE))
+   else if (strcmp(attr, XV_COLORSPACE) == 0)
       *value = context_priv->color_standard == VL_CSC_COLOR_STANDARD_BT_709;
    else
       return BadName;
diff -Nru mesa-18.3.3/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c mesa-19.0.1/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c
--- mesa-18.3.3/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c	2019-03-31 23:16:37.000000000 +0000
@@ -123,11 +123,11 @@
 
 			while (token && !fail)
 			{
-				if (strcmp(token, "i"))
+				if (strcmp(token, "i") == 0)
 					config->mb_types |= MB_TYPE_I;
-				else if (strcmp(token, "p"))
+				else if (strcmp(token, "p") == 0)
 					config->mb_types |= MB_TYPE_P;
-				else if (strcmp(token, "b"))
+				else if (strcmp(token, "b") == 0)
 					config->mb_types |= MB_TYPE_B;
 				else
 					fail = 1;
diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/description.c mesa-19.0.1/src/gallium/targets/d3dadapter9/description.c
--- mesa-18.3.3/src/gallium/targets/d3dadapter9/description.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/d3dadapter9/description.c	2019-03-31 23:16:37.000000000 +0000
@@ -20,6 +20,7 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include <stdio.h>
 #include <string.h>
 #include "adapter9.h"
 
@@ -239,7 +240,7 @@
     DBG("unknown vendor 0x4%x, emulating 0x4%x\n", drvid->VendorId, fallback_ven);
     drvid->VendorId = fallback_ven;
     drvid->DeviceId = fallback_dev;
-    strncpy(drvid->Description, fallback_name, sizeof(drvid->Description));
+    snprintf(drvid->Description, sizeof(drvid->Description), "%s", fallback_name);
 }
 
 /* fill in driver name and version */
@@ -277,46 +278,54 @@
     case HW_VENDOR_INTEL:
         for (i = 0; i < sizeof(cards_intel) / sizeof(cards_intel[0]); i++) {
             if (strstr(drvid->Description, cards_intel[i].mesaname)) {
-                strncpy(drvid->Description, cards_intel[i].d3d9name, sizeof(drvid->Description));
+                snprintf(drvid->Description, sizeof(drvid->Description),
+                         "%s", cards_intel[i].d3d9name);
                 return;
             }
         }
         /* use a fall-back if nothing matches */
         DBG("Unknown card name %s!\n", drvid->DeviceName);
-        strncpy(drvid->Description, cards_intel[0].d3d9name, sizeof(drvid->Description));
+        snprintf(drvid->Description, sizeof(drvid->Description),
+                 "%s", cards_intel[0].d3d9name);
         break;
     case HW_VENDOR_VMWARE:
         for (i = 0; i < sizeof(cards_vmware) / sizeof(cards_vmware[0]); i++) {
             if (strstr(drvid->Description, cards_vmware[i].mesaname)) {
-                strncpy(drvid->Description, cards_vmware[i].d3d9name, sizeof(drvid->Description));
+                snprintf(drvid->Description, sizeof(drvid->Description),
+                         "%s", cards_vmware[i].d3d9name);
                 return;
             }
         }
         /* use a fall-back if nothing matches */
         DBG("Unknown card name %s!\n", drvid->DeviceName);
-        strncpy(drvid->Description, cards_vmware[0].d3d9name, sizeof(drvid->Description));
+        snprintf(drvid->Description, sizeof(drvid->Description),
+                 "%s", cards_vmware[0].d3d9name);
         break;
     case HW_VENDOR_AMD:
         for (i = 0; i < sizeof(cards_amd) / sizeof(cards_amd[0]); i++) {
             if (strstr(drvid->Description, cards_amd[i].mesaname)) {
-                strncpy(drvid->Description, cards_amd[i].d3d9name, sizeof(drvid->Description));
+                snprintf(drvid->Description, sizeof(drvid->Description),
+                         "%s", cards_amd[i].d3d9name);
                 return;
             }
         }
         /* use a fall-back if nothing matches */
         DBG("Unknown card name %s!\n", drvid->DeviceName);
-        strncpy(drvid->Description, cards_amd[0].d3d9name, sizeof(drvid->Description));
+        snprintf(drvid->Description, sizeof(drvid->Description),
+                 "%s", cards_amd[0].d3d9name);
         break;
     case HW_VENDOR_NVIDIA:
         for (i = 0; i < sizeof(cards_nvidia) / sizeof(cards_nvidia[0]); i++) {
             if (strstr(drvid->Description, cards_nvidia[i].mesaname)) {
-                strncpy(drvid->Description, cards_nvidia[i].d3d9name, sizeof(drvid->Description));
+                snprintf(drvid->Description, sizeof(drvid->Description),
+                         "%s", cards_nvidia[i].d3d9name);
                 return;
             }
         }
         /* use a fall-back if nothing matches */
         DBG("Unknown card name %s!\n", drvid->DeviceName);
-        strncpy(drvid->Description, cards_nvidia[0].d3d9name, sizeof(drvid->Description));
+        snprintf(drvid->Description, sizeof(drvid->Description),
+                 "%s", cards_nvidia[0].d3d9name);
         break;
     default:
         break;
diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/drm.c mesa-19.0.1/src/gallium/targets/d3dadapter9/drm.c
--- mesa-18.3.3/src/gallium/targets/d3dadapter9/drm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/d3dadapter9/drm.c	2019-03-31 23:16:37.000000000 +0000
@@ -149,8 +149,8 @@
                  &drvid->SubSysId, &drvid->Revision);
     snprintf(drvid->DeviceName, sizeof(drvid->DeviceName),
                  "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal));
-    strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
-                 sizeof(drvid->Description));
+    snprintf(drvid->Description, sizeof(drvid->Description),
+                 "%s", ctx->hal->get_name(ctx->hal));
 
     if (override_vendorid > 0) {
         found = FALSE;
@@ -163,8 +163,8 @@
                         fallback_cards[i].device_id);
                 drvid->VendorId = fallback_cards[i].vendor_id;
                 drvid->DeviceId = fallback_cards[i].device_id;
-                strncpy(drvid->Description, fallback_cards[i].name,
-                             sizeof(drvid->Description));
+                snprintf(drvid->Description, sizeof(drvid->Description),
+                             "%s", fallback_cards[i].name);
                 found = TRUE;
                 break;
             }
@@ -279,9 +279,6 @@
         DBG("You have set a non standard throttling value in combination with thread_submit."
             "We advise to use a throttling value of -2/0");
     }
-    if (ctx->base.thread_submit && !different_device)
-        DBG("You have set thread_submit but do not use a different device than the server."
-            "You should not expect any benefit.");
 
     if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) {
         override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid");
diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/meson.build mesa-19.0.1/src/gallium/targets/d3dadapter9/meson.build
--- mesa-18.3.3/src/gallium/targets/d3dadapter9/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/d3dadapter9/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -68,5 +68,5 @@
   description : 'Native D3D driver modules',
   version : '.'.join(nine_version),
   requires_private : 'libdrm >= ' + dep_libdrm.version(),
-  variables : ['moduledir=${prefix}/@0@'.format(d3d_drivers_path)],
+  variables : ['moduledir=@0@'.format(d3d_drivers_path)],
 )
diff -Nru mesa-18.3.3/src/gallium/targets/dri/Makefile.am mesa-19.0.1/src/gallium/targets/dri/Makefile.am
--- mesa-18.3.3/src/gallium/targets/dri/Makefile.am	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/dri/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -78,12 +78,11 @@
 
 include $(top_srcdir)/src/gallium/drivers/v3d/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc
-include $(top_srcdir)/src/gallium/drivers/pl111/Automake.inc
+include $(top_srcdir)/src/gallium/drivers/kmsro/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/etnaviv/Automake.inc
-include $(top_srcdir)/src/gallium/drivers/imx/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/softpipe/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc
diff -Nru mesa-18.3.3/src/gallium/targets/dri/meson.build mesa-19.0.1/src/gallium/targets/dri/meson.build
--- mesa-18.3.3/src/gallium/targets/dri/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/dri/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -56,13 +56,15 @@
   dependencies : [
     dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread,
     driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
-    driver_pl111, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
-    driver_imx, driver_tegra, driver_i915, driver_svga, driver_virgl,
+    driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
+    driver_tegra, driver_i915, driver_svga, driver_virgl,
     driver_swr,
   ],
 )
 
-foreach d : [[with_gallium_pl111, 'pl111_dri.so'],
+foreach d : [[with_gallium_kmsro, 'pl111_dri.so'],
+             [with_gallium_kmsro, 'hx8357d_dri.so'],
+             [with_gallium_kmsro, 'imx-drm_dri.so'],
              [with_gallium_radeonsi, 'radeonsi_dri.so'],
              [with_gallium_nouveau, 'nouveau_dri.so'],
              [with_gallium_freedreno, ['msm_dri.so', 'kgsl_dri.so']],
@@ -71,7 +73,6 @@
              [with_gallium_v3d, 'v3d_dri.so'],
              [with_gallium_vc4, 'vc4_dri.so'],
              [with_gallium_etnaviv, 'etnaviv_dri.so'],
-             [with_gallium_imx, 'imx-drm_dri.so'],
              [with_gallium_tegra, 'tegra_dri.so'],
              [with_gallium_i915, 'i915_dri.so'],
              [with_gallium_r300, 'r300_dri.so'],
diff -Nru mesa-18.3.3/src/gallium/targets/dri/target.c mesa-19.0.1/src/gallium/targets/dri/target.c
--- mesa-18.3.3/src/gallium/targets/dri/target.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/dri/target.c	2019-03-31 23:16:37.000000000 +0000
@@ -77,7 +77,8 @@
 
 #if defined(GALLIUM_VC4)
 DEFINE_LOADER_DRM_ENTRYPOINT(vc4)
-#if defined(GALLIUM_PL111)
+#if defined(GALLIUM_KMSRO)
+DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d)
 DEFINE_LOADER_DRM_ENTRYPOINT(pl111)
 #endif
 #endif
diff -Nru mesa-18.3.3/src/gallium/targets/omx/meson.build mesa-19.0.1/src/gallium/targets/omx/meson.build
--- mesa-18.3.3/src/gallium/targets/omx/meson.build	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/omx/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -32,7 +32,7 @@
 
 libomx_gallium = shared_library(
   'omx_mesa',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [omx_link_args, ld_args_gc_sections],
diff -Nru mesa-18.3.3/src/gallium/targets/pipe-loader/Makefile.am mesa-19.0.1/src/gallium/targets/pipe-loader/Makefile.am
--- mesa-18.3.3/src/gallium/targets/pipe-loader/Makefile.am	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/pipe-loader/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -157,6 +157,8 @@
 	$(PIPE_LIBS) \
 	$(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
 	$(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+	$(top_builddir)/src/freedreno/libfreedreno_drm.la \
+	$(top_builddir)/src/freedreno/libfreedreno_ir3.la \
 	$(LIBDRM_LIBS) \
 	$(FREEDRENO_LIBS)
 
diff -Nru mesa-18.3.3/src/gallium/targets/pipe-loader/pipe_msm.c mesa-19.0.1/src/gallium/targets/pipe-loader/pipe_msm.c
--- mesa-18.3.3/src/gallium/targets/pipe-loader/pipe_msm.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/pipe-loader/pipe_msm.c	2019-03-31 23:16:37.000000000 +0000
@@ -8,7 +8,7 @@
 {
    struct pipe_screen *screen;
 
-   screen = fd_drm_screen_create(fd);
+   screen = fd_drm_screen_create(fd, NULL);
    if (!screen)
       return NULL;
 
diff -Nru mesa-18.3.3/src/gallium/targets/va/meson.build mesa-19.0.1/src/gallium/targets/va/meson.build
--- mesa-18.3.3/src/gallium/targets/va/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/va/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 
 libva_gallium = shared_library(
   'gallium_drv_video',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [va_link_args, ld_args_gc_sections],
diff -Nru mesa-18.3.3/src/gallium/targets/vdpau/meson.build mesa-19.0.1/src/gallium/targets/vdpau/meson.build
--- mesa-18.3.3/src/gallium/targets/vdpau/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/vdpau/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -38,7 +38,7 @@
 
 libvdpau_gallium = shared_library(
   'vdpau_gallium',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [vdpau_link_args, ld_args_gc_sections],
diff -Nru mesa-18.3.3/src/gallium/targets/xa/meson.build mesa-19.0.1/src/gallium/targets/xa/meson.build
--- mesa-18.3.3/src/gallium/targets/xa/meson.build	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/xa/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -34,7 +34,7 @@
 
 libxatracker = shared_library(
   'xatracker',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [xa_link_args, ld_args_gc_sections],
diff -Nru mesa-18.3.3/src/gallium/targets/xvmc/meson.build mesa-19.0.1/src/gallium/targets/xvmc/meson.build
--- mesa-18.3.3/src/gallium/targets/xvmc/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/targets/xvmc/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -33,7 +33,7 @@
 
 libxvmc_gallium = shared_library(
   'XvMCgallium',
-  'target.c',
+  ['target.c', xmlpool_options_h],
   c_args : c_vis_args,
   cpp_args : cpp_vis_args,
   link_args : [xvmc_link_args, ld_args_gc_sections],
diff -Nru mesa-18.3.3/src/gallium/tests/meson.build mesa-19.0.1/src/gallium/tests/meson.build
--- mesa-18.3.3/src/gallium/tests/meson.build	2018-04-19 04:33:31.000000000 +0000
+++ mesa-19.0.1/src/gallium/tests/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -19,7 +19,5 @@
 # SOFTWARE.
 
 subdir('trivial')
-if with_gallium_softpipe
-  subdir('unit')
-endif
+subdir('unit')
 subdir('graw')
diff -Nru mesa-18.3.3/src/gallium/tests/trivial/compute.c mesa-19.0.1/src/gallium/tests/trivial/compute.c
--- mesa-18.3.3/src/gallium/tests/trivial/compute.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/tests/trivial/compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -240,7 +240,7 @@
                   util_format_get_nblocksy(tex->format, tex->height0));
         struct pipe_transfer *xfer;
         char *map;
-        int x, y, i;
+        int x = 0, y, i;
         int err = 0;
 
         if (!check)
diff -Nru mesa-18.3.3/src/gallium/tests/unit/meson.build mesa-19.0.1/src/gallium/tests/unit/meson.build
--- mesa-18.3.3/src/gallium/tests/unit/meson.build	2018-04-19 04:33:31.000000000 +0000
+++ mesa-19.0.1/src/gallium/tests/unit/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -20,12 +20,16 @@
 
 foreach t : ['pipe_barrier_test', 'u_cache_test', 'u_half_test',
              'u_format_test', 'u_format_compatible_test', 'translate_test']
-  executable(
+  exe = executable(
     t,
     '@0@.c'.format(t),
-    include_directories : [inc_common, inc_gallium_drivers, inc_gallium_winsys],
-    link_with : [libgallium, libmesa_util, libws_null],
-    dependencies : [driver_swrast, dep_thread],
+    include_directories : inc_common,
+    link_with : [libgallium, libmesa_util],
+    dependencies : [dep_thread],
     install : false,
   )
+  # u_cache_test is slow, and translate_test fails.
+  if not ['u_cache_test', 'translate_test'].contains(t)
+    test(t, exe, suite: 'gallium')
+  endif
 endforeach
diff -Nru mesa-18.3.3/src/gallium/tests/unit/u_format_test.c mesa-19.0.1/src/gallium/tests/unit/u_format_test.c
--- mesa-18.3.3/src/gallium/tests/unit/u_format_test.c	2018-07-29 21:31:02.000000000 +0000
+++ mesa-19.0.1/src/gallium/tests/unit/u_format_test.c	2019-03-31 23:16:37.000000000 +0000
@@ -668,6 +668,47 @@
 }
 
 
+/* Touch-test that the unorm/snorm flags are set up right by codegen. */
+static boolean
+test_format_norm_flags(const struct util_format_description *format_desc)
+{
+   boolean success = TRUE;
+
+#define FORMAT_CASE(format, unorm, snorm) \
+   case format: \
+      success = (format_desc->is_unorm == unorm && \
+                 format_desc->is_snorm == snorm); \
+      break
+
+   switch (format_desc->format) {
+      FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_UNORM, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_SRGB, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_SNORM, FALSE, TRUE);
+      FORMAT_CASE(PIPE_FORMAT_R32_FLOAT, FALSE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_X8Z24_UNORM, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_S8X24_UINT, FALSE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_DXT1_RGB, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_ETC2_RGB8, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_ETC2_R11_SNORM, FALSE, TRUE);
+      FORMAT_CASE(PIPE_FORMAT_ASTC_4x4, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_BPTC_RGBA_UNORM, TRUE, FALSE);
+      FORMAT_CASE(PIPE_FORMAT_BPTC_RGB_FLOAT, FALSE, FALSE);
+   default:
+      success = !(format_desc->is_unorm && format_desc->is_snorm);
+      break;
+   }
+#undef FORMAT_CASE
+
+   if (!success) {
+      printf("FAILED: %s (unorm %s, snorm %s)\n",
+             format_desc->short_name,
+             format_desc->is_unorm ? "yes" : "no",
+             format_desc->is_snorm ? "yes" : "no");
+   }
+
+   return success;
+}
+
 typedef boolean
 (*test_func_t)(const struct util_format_description *format_desc,
                const struct util_format_test_case *test);
@@ -698,6 +739,22 @@
    return success;
 }
 
+static boolean
+test_format_metadata(const struct util_format_description *format_desc,
+                     boolean (*func)(const struct util_format_description *format_desc),
+                     const char *suffix)
+{
+   boolean success = TRUE;
+
+   printf("Testing util_format_%s_%s ...\n", format_desc->short_name, suffix);
+   fflush(stdout);
+
+   if (!func(format_desc)) {
+      success = FALSE;
+   }
+
+   return success;
+}
 
 static boolean
 test_all(void)
@@ -724,6 +781,11 @@
          } \
       }
 
+#     define TEST_FORMAT_METADATA(name) \
+      if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \
+         success = FALSE; \
+      } \
+
       TEST_ONE_FUNC(fetch_rgba_float);
       TEST_ONE_FUNC(pack_rgba_float);
       TEST_ONE_FUNC(unpack_rgba_float);
@@ -737,7 +799,10 @@
       TEST_ONE_FUNC(unpack_s_8uint);
       TEST_ONE_FUNC(pack_s_8uint);
 
+      TEST_FORMAT_METADATA(norm_flags);
+
 #     undef TEST_ONE_FUNC
+#     undef TEST_ONE_FORMAT
    }
 
    return success;
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c	2019-03-31 23:16:37.000000000 +0000
@@ -56,6 +56,7 @@
                  unsigned alignment,
                  enum radeon_bo_domain domain,
                  enum radeon_bo_flag flags);
+static void amdgpu_bo_unmap(struct pb_buffer *buf);
 
 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
                            enum radeon_bo_usage usage)
@@ -173,6 +174,12 @@
 
    assert(bo->bo && "must not be called for slab entries");
 
+   if (!bo->is_user_ptr && bo->cpu_ptr) {
+      bo->cpu_ptr = NULL;
+      amdgpu_bo_unmap(&bo->base);
+   }
+   assert(bo->is_user_ptr || bo->u.real.map_count == 0);
+
    if (ws->debug_all_bos) {
       simple_mtx_lock(&ws->global_bo_list_lock);
       LIST_DEL(&bo->u.real.global_list_item);
@@ -184,8 +191,10 @@
    util_hash_table_remove(ws->bo_export_table, bo->bo);
    simple_mtx_unlock(&ws->bo_export_table_lock);
 
-   amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
-   amdgpu_va_range_free(bo->u.real.va_handle);
+   if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) {
+      amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+      amdgpu_va_range_free(bo->u.real.va_handle);
+   }
    amdgpu_bo_free(bo->bo);
 
    amdgpu_bo_remove_fences(bo);
@@ -195,14 +204,7 @@
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
       ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
 
-   if (bo->u.real.map_count >= 1) {
-      if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-         ws->mapped_vram -= bo->base.size;
-      else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-         ws->mapped_gtt -= bo->base.size;
-      ws->num_mapped_buffers--;
-   }
-
+   simple_mtx_destroy(&bo->lock);
    FREE(bo);
 }
 
@@ -218,6 +220,37 @@
       amdgpu_bo_destroy(_buf);
 }
 
+static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws)
+{
+   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++)
+      pb_slabs_reclaim(&ws->bo_slabs[i]);
+
+   pb_cache_release_all_buffers(&ws->bo_cache);
+}
+
+static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu)
+{
+   assert(!bo->sparse && bo->bo && !bo->is_user_ptr);
+   int r = amdgpu_bo_cpu_map(bo->bo, cpu);
+   if (r) {
+      /* Clean up buffer managers and try again. */
+      amdgpu_clean_up_buffer_managers(bo->ws);
+      r = amdgpu_bo_cpu_map(bo->bo, cpu);
+      if (r)
+         return false;
+   }
+
+   if (p_atomic_inc_return(&bo->u.real.map_count) == 1) {
+      if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+         bo->ws->mapped_vram += bo->base.size;
+      else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+         bo->ws->mapped_gtt += bo->base.size;
+      bo->ws->num_mapped_buffers++;
+   }
+
+   return true;
+}
+
 static void *amdgpu_bo_map(struct pb_buffer *buf,
                            struct radeon_cmdbuf *rcs,
                            enum pipe_transfer_usage usage)
@@ -225,9 +258,6 @@
    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
    struct amdgpu_winsys_bo *real;
    struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
-   int r;
-   void *cpu = NULL;
-   uint64_t offset = 0;
 
    assert(!bo->sparse);
 
@@ -312,9 +342,9 @@
       }
    }
 
-   /* If the buffer is created from user memory, return the user pointer. */
-   if (bo->user_ptr)
-      return bo->user_ptr;
+   /* Buffer synchronization has been checked, now actually map the buffer. */
+   void *cpu = NULL;
+   uint64_t offset = 0;
 
    if (bo->bo) {
       real = bo;
@@ -323,22 +353,31 @@
       offset = bo->va - real->va;
    }
 
-   r = amdgpu_bo_cpu_map(real->bo, &cpu);
-   if (r) {
-      /* Clear the cache and try again. */
-      pb_cache_release_all_buffers(&real->ws->bo_cache);
-      r = amdgpu_bo_cpu_map(real->bo, &cpu);
-      if (r)
-         return NULL;
+   if (usage & RADEON_TRANSFER_TEMPORARY) {
+      if (real->is_user_ptr) {
+         cpu = real->cpu_ptr;
+      } else {
+         if (!amdgpu_bo_do_map(real, &cpu))
+            return NULL;
+      }
+   } else {
+      cpu = p_atomic_read(&real->cpu_ptr);
+      if (!cpu) {
+         simple_mtx_lock(&real->lock);
+         /* Must re-check due to the possibility of a race. Re-check need not
+          * be atomic thanks to the lock. */
+         cpu = real->cpu_ptr;
+         if (!cpu) {
+            if (!amdgpu_bo_do_map(real, &cpu)) {
+               simple_mtx_unlock(&real->lock);
+               return NULL;
+            }
+            p_atomic_set(&real->cpu_ptr, cpu);
+         }
+         simple_mtx_unlock(&real->lock);
+      }
    }
 
-   if (p_atomic_inc_return(&real->u.real.map_count) == 1) {
-      if (real->initial_domain & RADEON_DOMAIN_VRAM)
-         real->ws->mapped_vram += real->base.size;
-      else if (real->initial_domain & RADEON_DOMAIN_GTT)
-         real->ws->mapped_gtt += real->base.size;
-      real->ws->num_mapped_buffers++;
-   }
    return (uint8_t*)cpu + offset;
 }
 
@@ -349,12 +388,15 @@
 
    assert(!bo->sparse);
 
-   if (bo->user_ptr)
+   if (bo->is_user_ptr)
       return;
 
    real = bo->bo ? bo : bo->u.slab.real;
-
+   assert(real->u.real.map_count != 0 && "too many unmaps");
    if (p_atomic_dec_zero(&real->u.real.map_count)) {
+      assert(!real->cpu_ptr &&
+             "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag");
+
       if (real->initial_domain & RADEON_DOMAIN_VRAM)
          real->ws->mapped_vram -= real->base.size;
       else if (real->initial_domain & RADEON_DOMAIN_GTT)
@@ -384,6 +426,27 @@
    }
 }
 
+static uint64_t amdgpu_get_optimal_vm_alignment(struct amdgpu_winsys *ws,
+                                                uint64_t size, unsigned alignment)
+{
+   uint64_t vm_alignment = alignment;
+
+   /* Increase the VM alignment for faster address translation. */
+   if (size >= ws->info.pte_fragment_size)
+      vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
+
+   /* Gfx9: Increase the VM alignment to the most significant bit set
+    * in the size for faster address translation.
+    */
+   if (ws->info.chip_class >= GFX9) {
+      unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
+      uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
+
+      vm_alignment = MAX2(vm_alignment, msb_alignment);
+   }
+   return vm_alignment;
+}
+
 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
                                                  uint64_t size,
                                                  unsigned alignment,
@@ -396,11 +459,12 @@
    uint64_t va = 0;
    struct amdgpu_winsys_bo *bo;
    amdgpu_va_handle va_handle;
-   unsigned va_gap_size;
    int r;
 
    /* VRAM or GTT must be specified, but not both at the same time. */
-   assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
+   assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT |
+                                          RADEON_DOMAIN_GDS |
+                                          RADEON_DOMAIN_OA)) == 1);
 
    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
    if (!bo) {
@@ -418,6 +482,10 @@
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
    if (initial_domain & RADEON_DOMAIN_GTT)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+   if (initial_domain & RADEON_DOMAIN_GDS)
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
+   if (initial_domain & RADEON_DOMAIN_OA)
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
 
    /* Since VRAM and GTT have almost the same performance on APUs, we could
     * just set GTT. However, in order to decrease GTT(RAM) usage, which is
@@ -447,27 +515,31 @@
       goto error_bo_alloc;
    }
 
-   va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
-   if (size > ws->info.pte_fragment_size)
-	   alignment = MAX2(alignment, ws->info.pte_fragment_size);
-   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-                             size + va_gap_size, alignment, 0, &va, &va_handle,
-                             (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
-			     AMDGPU_VA_RANGE_HIGH);
-   if (r)
-      goto error_va_alloc;
+   if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
+      unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
 
-   unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
-                       AMDGPU_VM_PAGE_EXECUTABLE;
+      r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+                                size + va_gap_size,
+                                amdgpu_get_optimal_vm_alignment(ws, size, alignment),
+                                0, &va, &va_handle,
+                                (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
+                                AMDGPU_VA_RANGE_HIGH);
+      if (r)
+         goto error_va_alloc;
 
-   if (!(flags & RADEON_FLAG_READ_ONLY))
-       vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+      unsigned vm_flags = AMDGPU_VM_PAGE_READABLE |
+                          AMDGPU_VM_PAGE_EXECUTABLE;
 
-   r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
+      if (!(flags & RADEON_FLAG_READ_ONLY))
+         vm_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+
+      r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags,
 			   AMDGPU_VA_OP_MAP);
-   if (r)
-      goto error_va_map;
+      if (r)
+         goto error_va_map;
+   }
 
+   simple_mtx_init(&bo->lock, mtx_plain);
    pipe_reference_init(&bo->base.reference, 1);
    bo->base.alignment = alignment;
    bo->base.usage = 0;
@@ -486,7 +558,7 @@
    else if (initial_domain & RADEON_DOMAIN_GTT)
       ws->allocated_gtt += align64(size, ws->info.gart_page_size);
 
-   amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle);
+   amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
 
    amdgpu_add_buffer_to_global_list(bo);
 
@@ -522,13 +594,27 @@
    return amdgpu_bo_can_reclaim(&bo->base);
 }
 
+static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size)
+{
+   /* Find the correct slab allocator for the given size. */
+   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
+      struct pb_slabs *slabs = &ws->bo_slabs[i];
+
+      if (size <= 1 << (slabs->min_order + slabs->num_orders - 1))
+         return slabs;
+   }
+
+   assert(0);
+   return NULL;
+}
+
 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
 {
    struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
 
    assert(!bo->bo);
 
-   pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry);
+   pb_slab_free(get_slabs(bo->ws, bo->base.size), &bo->u.slab.entry);
 }
 
 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
@@ -545,19 +631,37 @@
    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
    uint32_t base_id;
+   unsigned slab_size = 0;
 
    if (!slab)
       return NULL;
 
-   unsigned slab_size = 1 << AMDGPU_SLAB_BO_SIZE_LOG2;
+   /* Determine the slab buffer size. */
+   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
+      struct pb_slabs *slabs = &ws->bo_slabs[i];
+      unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1);
+
+      if (entry_size <= max_entry_size) {
+         /* The slab size is twice the size of the largest possible entry. */
+         slab_size = max_entry_size * 2;
+
+         /* The largest slab should have the same size as the PTE fragment
+          * size to get faster address translation.
+          */
+         if (i == NUM_SLAB_ALLOCATORS - 1 &&
+             slab_size < ws->info.pte_fragment_size)
+            slab_size = ws->info.pte_fragment_size;
+         break;
+      }
+   }
+   assert(slab_size != 0);
+
    slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
                                                     slab_size, slab_size,
                                                     domains, flags));
    if (!slab->buffer)
       goto fail;
 
-   assert(slab->buffer->bo);
-
    slab->base.num_entries = slab->buffer->base.size / entry_size;
    slab->base.num_free = slab->base.num_entries;
    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
@@ -571,6 +675,7 @@
    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
       struct amdgpu_winsys_bo *bo = &slab->entries[i];
 
+      simple_mtx_init(&bo->lock, mtx_plain);
       bo->base.alignment = entry_size;
       bo->base.usage = slab->buffer->base.usage;
       bo->base.size = entry_size;
@@ -581,7 +686,15 @@
       bo->unique_id = base_id + i;
       bo->u.slab.entry.slab = &slab->base;
       bo->u.slab.entry.group_index = group_index;
-      bo->u.slab.real = slab->buffer;
+
+      if (slab->buffer->bo) {
+         /* The slab is not suballocated. */
+         bo->u.slab.real = slab->buffer;
+      } else {
+         /* The slab is allocated out of a bigger slab. */
+         bo->u.slab.real = slab->buffer->u.slab.real;
+         assert(bo->u.slab.real->bo);
+      }
 
       LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
    }
@@ -599,8 +712,10 @@
 {
    struct amdgpu_slab *slab = amdgpu_slab(pslab);
 
-   for (unsigned i = 0; i < slab->base.num_entries; ++i)
+   for (unsigned i = 0; i < slab->base.num_entries; ++i) {
       amdgpu_bo_remove_fences(&slab->entries[i]);
+      simple_mtx_destroy(&slab->entries[i].lock);
+   }
 
    FREE(slab->entries);
    amdgpu_winsys_bo_reference(&slab->buffer, NULL);
@@ -858,8 +973,8 @@
    }
 
    amdgpu_va_range_free(bo->u.sparse.va_handle);
-   simple_mtx_destroy(&bo->u.sparse.commit_lock);
    FREE(bo->u.sparse.commitments);
+   simple_mtx_destroy(&bo->lock);
    FREE(bo);
 }
 
@@ -889,6 +1004,7 @@
    if (!bo)
       return NULL;
 
+   simple_mtx_init(&bo->lock, mtx_plain);
    pipe_reference_init(&bo->base.reference, 1);
    bo->base.alignment = RADEON_SPARSE_PAGE_SIZE;
    bo->base.size = size;
@@ -905,7 +1021,6 @@
    if (!bo->u.sparse.commitments)
       goto error_alloc_commitments;
 
-   simple_mtx_init(&bo->u.sparse.commit_lock, mtx_plain);
    LIST_INITHEAD(&bo->u.sparse.backing);
 
    /* For simplicity, we always map a multiple of the page size. */
@@ -928,9 +1043,9 @@
 error_va_map:
    amdgpu_va_range_free(bo->u.sparse.va_handle);
 error_va_alloc:
-   simple_mtx_destroy(&bo->u.sparse.commit_lock);
    FREE(bo->u.sparse.commitments);
 error_alloc_commitments:
+   simple_mtx_destroy(&bo->lock);
    FREE(bo);
    return NULL;
 }
@@ -955,7 +1070,7 @@
    va_page = offset / RADEON_SPARSE_PAGE_SIZE;
    end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE);
 
-   simple_mtx_lock(&bo->u.sparse.commit_lock);
+   simple_mtx_lock(&bo->lock);
 
 #if DEBUG_SPARSE_COMMITS
    sparse_dump(bo, __func__);
@@ -1059,7 +1174,7 @@
    }
 out:
 
-   simple_mtx_unlock(&bo->u.sparse.commit_lock);
+   simple_mtx_unlock(&bo->lock);
 
    return ok;
 }
@@ -1193,22 +1308,28 @@
    /* Sparse buffers must have NO_CPU_ACCESS set. */
    assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS);
 
+   struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
+   unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
+
    /* Sub-allocate small buffers from slabs. */
    if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) &&
-       size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
-       alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
+       size <= max_slab_entry_size &&
+       /* The alignment must be at most the size of the smallest slab entry or
+        * the next power of two. */
+       alignment <= MAX2(1 << ws->bo_slabs[0].min_order, util_next_power_of_two(size))) {
       struct pb_slab_entry *entry;
       int heap = radeon_get_heap_index(domain, flags);
 
       if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
          goto no_slab;
 
-      entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+      struct pb_slabs *slabs = get_slabs(ws, size);
+      entry = pb_slab_alloc(slabs, size, heap);
       if (!entry) {
-         /* Clear the cache and try again. */
-         pb_cache_release_all_buffers(&ws->bo_cache);
+         /* Clean up buffer managers and try again. */
+         amdgpu_clean_up_buffer_managers(ws);
 
-         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+         entry = pb_slab_alloc(slabs, size, heap);
       }
       if (!entry)
          return NULL;
@@ -1235,8 +1356,10 @@
     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
     * like constant/uniform buffers, can benefit from better and more reuse.
     */
-   size = align64(size, ws->info.gart_page_size);
-   alignment = align(alignment, ws->info.gart_page_size);
+   if (domain & RADEON_DOMAIN_VRAM_GTT) {
+      size = align64(size, ws->info.gart_page_size);
+      alignment = align(alignment, ws->info.gart_page_size);
+   }
 
    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
@@ -1254,9 +1377,9 @@
    /* Create a new one. */
    bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
    if (!bo) {
-      /* Clear the cache and try again. */
-      pb_slabs_reclaim(&ws->bo_slabs);
-      pb_cache_release_all_buffers(&ws->bo_cache);
+      /* Clean up buffer managers and try again. */
+      amdgpu_clean_up_buffer_managers(ws);
+
       bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap);
       if (!bo)
          return NULL;
@@ -1268,6 +1391,7 @@
 
 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
                                                struct winsys_handle *whandle,
+                                               unsigned vm_alignment,
                                                unsigned *stride,
                                                unsigned *offset)
 {
@@ -1325,8 +1449,10 @@
       goto error;
 
    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-                             result.alloc_size, 1 << 20, 0, &va, &va_handle,
-			     AMDGPU_VA_RANGE_HIGH);
+                             result.alloc_size,
+                             amdgpu_get_optimal_vm_alignment(ws, result.alloc_size,
+                                                             vm_alignment),
+                             0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
    if (r)
       goto error;
 
@@ -1344,6 +1470,7 @@
       initial |= RADEON_DOMAIN_GTT;
 
    /* Initialize the structure. */
+   simple_mtx_init(&bo->lock, mtx_plain);
    pipe_reference_init(&bo->base.reference, 1);
    bo->base.alignment = info.phys_alignment;
    bo->bo = result.buf_handle;
@@ -1361,7 +1488,7 @@
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
       ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
 
-   amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle);
+   amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
 
    amdgpu_add_buffer_to_global_list(bo);
 
@@ -1445,21 +1572,25 @@
         goto error;
 
     if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-                              aligned_size, 1 << 12, 0, &va, &va_handle,
-			      AMDGPU_VA_RANGE_HIGH))
+                              aligned_size,
+                              amdgpu_get_optimal_vm_alignment(ws, aligned_size,
+                                                              ws->info.gart_page_size),
+                              0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH))
         goto error_va_alloc;
 
     if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP))
         goto error_va_map;
 
     /* Initialize it. */
+    bo->is_user_ptr = true;
     pipe_reference_init(&bo->base.reference, 1);
+    simple_mtx_init(&bo->lock, mtx_plain);
     bo->bo = buf_handle;
     bo->base.alignment = 0;
     bo->base.size = size;
     bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
     bo->ws = ws;
-    bo->user_ptr = pointer;
+    bo->cpu_ptr = pointer;
     bo->va = va;
     bo->u.real.va_handle = va_handle;
     bo->initial_domain = RADEON_DOMAIN_GTT;
@@ -1469,7 +1600,7 @@
 
     amdgpu_add_buffer_to_global_list(bo);
 
-    amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle);
+    amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle);
 
     return (struct pb_buffer*)bo;
 
@@ -1486,7 +1617,7 @@
 
 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf)
 {
-   return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL;
+   return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr;
 }
 
 static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf)
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h	2019-03-31 23:16:37.000000000 +0000
@@ -74,7 +74,6 @@
          struct amdgpu_winsys_bo *real;
       } slab;
       struct {
-         simple_mtx_t commit_lock;
          amdgpu_va_handle va_handle;
          enum radeon_bo_flag flags;
 
@@ -89,10 +88,12 @@
    } u;
 
    struct amdgpu_winsys *ws;
-   void *user_ptr; /* from buffer_from_ptr */
+   void *cpu_ptr; /* for user_ptr and permanent maps */
 
    amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */
    bool sparse;
+   bool is_user_ptr;
+   bool is_local;
    uint32_t unique_id;
    uint64_t va;
    enum radeon_bo_domain initial_domain;
@@ -114,7 +115,7 @@
    unsigned max_fences;
    struct pipe_fence_handle **fences;
 
-   bool is_local;
+   simple_mtx_t lock;
 };
 
 struct amdgpu_slab {
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -172,45 +172,45 @@
                                    uint64_t seq_no,
                                    uint64_t *user_fence_cpu_address)
 {
-   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+   struct amdgpu_fence *afence = (struct amdgpu_fence*)fence;
 
-   rfence->fence.fence = seq_no;
-   rfence->user_fence_cpu_address = user_fence_cpu_address;
-   util_queue_fence_signal(&rfence->submitted);
+   afence->fence.fence = seq_no;
+   afence->user_fence_cpu_address = user_fence_cpu_address;
+   util_queue_fence_signal(&afence->submitted);
 }
 
 static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
 {
-   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+   struct amdgpu_fence *afence = (struct amdgpu_fence*)fence;
 
-   rfence->signalled = true;
-   util_queue_fence_signal(&rfence->submitted);
+   afence->signalled = true;
+   util_queue_fence_signal(&afence->submitted);
 }
 
 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
                        bool absolute)
 {
-   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+   struct amdgpu_fence *afence = (struct amdgpu_fence*)fence;
    uint32_t expired;
    int64_t abs_timeout;
    uint64_t *user_fence_cpu;
    int r;
 
-   if (rfence->signalled)
+   if (afence->signalled)
       return true;
 
    /* Handle syncobjs. */
-   if (amdgpu_fence_is_syncobj(rfence)) {
+   if (amdgpu_fence_is_syncobj(afence)) {
       /* Absolute timeouts are only be used by BO fences, which aren't
        * backed by syncobjs.
        */
       assert(!absolute);
 
-      if (amdgpu_cs_syncobj_wait(rfence->ws->dev, &rfence->syncobj, 1,
+      if (amdgpu_cs_syncobj_wait(afence->ws->dev, &afence->syncobj, 1,
                                  timeout, 0, NULL))
          return false;
 
-      rfence->signalled = true;
+      afence->signalled = true;
       return true;
    }
 
@@ -222,13 +222,13 @@
    /* The fence might not have a number assigned if its IB is being
     * submitted in the other thread right now. Wait until the submission
     * is done. */
-   if (!util_queue_fence_wait_timeout(&rfence->submitted, abs_timeout))
+   if (!util_queue_fence_wait_timeout(&afence->submitted, abs_timeout))
       return false;
 
-   user_fence_cpu = rfence->user_fence_cpu_address;
+   user_fence_cpu = afence->user_fence_cpu_address;
    if (user_fence_cpu) {
-      if (*user_fence_cpu >= rfence->fence.fence) {
-         rfence->signalled = true;
+      if (*user_fence_cpu >= afence->fence.fence) {
+         afence->signalled = true;
          return true;
       }
 
@@ -238,7 +238,7 @@
    }
 
    /* Now use the libdrm query. */
-   r = amdgpu_cs_query_fence_status(&rfence->fence,
+   r = amdgpu_cs_query_fence_status(&afence->fence,
 				    abs_timeout,
 				    AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE,
 				    &expired);
@@ -250,7 +250,7 @@
    if (expired) {
       /* This variable can only transition from false to true, so it doesn't
        * matter if threads race for it. */
-      rfence->signalled = true;
+      afence->signalled = true;
       return true;
    }
    return false;
@@ -598,7 +598,7 @@
    /* We delay adding the backing buffers until we really have to. However,
     * we cannot delay accounting for memory use.
     */
-   simple_mtx_lock(&bo->u.sparse.commit_lock);
+   simple_mtx_lock(&bo->lock);
 
    list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
@@ -607,7 +607,7 @@
          acs->main.base.used_gart += backing->bo->base.size;
    }
 
-   simple_mtx_unlock(&bo->u.sparse.commit_lock);
+   simple_mtx_unlock(&bo->lock);
 
    return idx;
 }
@@ -923,7 +923,8 @@
                  enum ring_type ring_type,
                  void (*flush)(void *ctx, unsigned flags,
                                struct pipe_fence_handle **fence),
-                 void *flush_ctx)
+                 void *flush_ctx,
+                 bool stop_exec_on_failure)
 {
    struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
    struct amdgpu_cs *cs;
@@ -939,6 +940,7 @@
    cs->flush_cs = flush;
    cs->flush_data = flush_ctx;
    cs->ring_type = ring_type;
+   cs->stop_exec_on_failure = stop_exec_on_failure;
 
    struct amdgpu_cs_fence_info fence_info;
    fence_info.handle = cs->ctx->user_fence_bo;
@@ -1217,8 +1219,6 @@
 {
    struct amdgpu_cs_context *cs = acs->csc;
 
-   cs->num_fence_dependencies = 0;
-
    amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers);
    amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers);
    amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers);
@@ -1265,7 +1265,7 @@
       struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i];
       struct amdgpu_winsys_bo *bo = buffer->bo;
 
-      simple_mtx_lock(&bo->u.sparse.commit_lock);
+      simple_mtx_lock(&bo->lock);
 
       list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
          /* We can directly add the buffer here, because we know that each
@@ -1274,7 +1274,7 @@
          int idx = amdgpu_do_add_real_buffer(cs, backing->bo);
          if (idx < 0) {
             fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__);
-            simple_mtx_unlock(&bo->u.sparse.commit_lock);
+            simple_mtx_unlock(&bo->lock);
             return false;
          }
 
@@ -1283,7 +1283,7 @@
          p_atomic_inc(&backing->bo->num_active_ioctls);
       }
 
-      simple_mtx_unlock(&bo->u.sparse.commit_lock);
+      simple_mtx_unlock(&bo->lock);
    }
 
    return true;
@@ -1295,7 +1295,7 @@
    struct amdgpu_winsys *ws = acs->ctx->ws;
    struct amdgpu_cs_context *cs = acs->cst;
    int i, r;
-   amdgpu_bo_list_handle bo_list = NULL;
+   uint32_t bo_list = 0;
    uint64_t seq_no = 0;
    bool has_user_fence = amdgpu_cs_has_user_fence(cs);
    bool use_bo_list_create = ws->info.drm_minor < 27;
@@ -1306,27 +1306,28 @@
       /* The buffer list contains all buffers. This is a slow path that
        * ensures that no buffer is missing in the BO list.
        */
+      unsigned num_handles = 0;
+      struct drm_amdgpu_bo_list_entry *list =
+         alloca(ws->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
       struct amdgpu_winsys_bo *bo;
-      amdgpu_bo_handle *handles;
-      unsigned num = 0;
 
       simple_mtx_lock(&ws->global_bo_list_lock);
-      handles = alloca(sizeof(handles[0]) * ws->num_buffers);
-
       LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) {
-         assert(num < ws->num_buffers);
-         handles[num++] = bo->bo;
+         if (bo->is_local)
+            continue;
+
+         list[num_handles].bo_handle = bo->u.real.kms_handle;
+         list[num_handles].bo_priority = 0;
+         ++num_handles;
       }
 
-      r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
-                                handles, NULL, &bo_list);
+      r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, list, &bo_list);
       simple_mtx_unlock(&ws->global_bo_list_lock);
       if (r) {
          fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
          goto cleanup;
       }
-   } else if (!use_bo_list_create) {
-      /* Standard path passing the buffer list via the CS ioctl. */
+   } else {
       if (!amdgpu_add_sparse_backing_buffers(cs)) {
          fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
          r = -ENOMEM;
@@ -1350,52 +1351,27 @@
          ++num_handles;
       }
 
-      bo_list_in.operation = ~0;
-      bo_list_in.list_handle = ~0;
-      bo_list_in.bo_number = num_handles;
-      bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-      bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list;
-   } else {
-      /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */
-      unsigned num_handles;
-
-      if (!amdgpu_add_sparse_backing_buffers(cs)) {
-         fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
-         r = -ENOMEM;
-         goto cleanup;
-      }
-
-      amdgpu_bo_handle *handles = alloca(sizeof(*handles) * cs->num_real_buffers);
-      uint8_t *flags = alloca(sizeof(*flags) * cs->num_real_buffers);
-
-      num_handles = 0;
-      for (i = 0; i < cs->num_real_buffers; ++i) {
-         struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
-
-	 if (buffer->bo->is_local)
-            continue;
-
-         assert(buffer->u.real.priority_usage != 0);
-
-         handles[num_handles] = buffer->bo->bo;
-         flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
-	 ++num_handles;
-      }
-
-      if (num_handles) {
-         r = amdgpu_bo_list_create(ws->dev, num_handles,
-                                   handles, flags, &bo_list);
+      if (use_bo_list_create) {
+         /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */
+         r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, &bo_list);
          if (r) {
             fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
             goto cleanup;
          }
+      } else {
+         /* Standard path passing the buffer list via the CS ioctl. */
+         bo_list_in.operation = ~0;
+         bo_list_in.list_handle = ~0;
+         bo_list_in.bo_number = num_handles;
+         bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+         bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list;
       }
    }
 
    if (acs->ring_type == RING_GFX)
       ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-   if (acs->ctx->num_rejected_cs) {
+   if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
       r = -ECANCELED;
    } else {
       struct drm_amdgpu_cs_chunk chunks[6];
@@ -1499,8 +1475,8 @@
 
       assert(num_chunks <= ARRAY_SIZE(chunks));
 
-      r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list,
-                               num_chunks, chunks, &seq_no);
+      r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
+                                num_chunks, chunks, &seq_no);
    }
 
    if (r) {
@@ -1525,7 +1501,7 @@
 
    /* Cleanup. */
    if (bo_list)
-      amdgpu_bo_list_destroy(bo_list);
+      amdgpu_bo_list_destroy_raw(ws->dev, bo_list);
 
 cleanup:
    /* If there was an error, signal the fence, because it won't be signalled
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h	2019-03-31 23:16:37.000000000 +0000
@@ -129,6 +129,7 @@
    /* Flush CS. */
    void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
    void *flush_data;
+   bool stop_exec_on_failure;
 
    struct util_queue_fence flush_completed;
    struct pipe_fence_handle *next_fence;
@@ -169,11 +170,11 @@
 static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
                                           struct pipe_fence_handle *src)
 {
-   struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
-   struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
+   struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
+   struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
 
-   if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
-      struct amdgpu_fence *fence = *rdst;
+   if (pipe_reference(&(*adst)->reference, &asrc->reference)) {
+      struct amdgpu_fence *fence = *adst;
 
       if (amdgpu_fence_is_syncobj(fence))
          amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
@@ -183,7 +184,7 @@
       util_queue_fence_destroy(&fence->submitted);
       FREE(fence);
    }
-   *rdst = rsrc;
+   *adst = asrc;
 }
 
 int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,7 @@
 #include <xf86drm.h>
 #include <stdio.h>
 #include <sys/stat.h>
+#include "amd/common/ac_llvm_util.h"
 #include "amd/common/sid.h"
 #include "amd/common/gfx9d.h"
 
@@ -50,6 +51,39 @@
 
 DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
 
+static void handle_env_var_force_family(struct amdgpu_winsys *ws)
+{
+      const char *family = debug_get_option("SI_FORCE_FAMILY", NULL);
+      unsigned i;
+
+      if (!family)
+               return;
+
+      for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
+         if (!strcmp(family, ac_get_llvm_processor_name(i))) {
+            /* Override family and chip_class. */
+            ws->info.family = i;
+            ws->info.name = "GCN-NOOP";
+
+            if (i >= CHIP_VEGA10)
+               ws->info.chip_class = GFX9;
+            else if (i >= CHIP_TONGA)
+               ws->info.chip_class = VI;
+            else if (i >= CHIP_BONAIRE)
+               ws->info.chip_class = CIK;
+            else
+               ws->info.chip_class = SI;
+
+            /* Don't submit any IBs. */
+            setenv("RADEON_NOOP", "1", 1);
+            return;
+         }
+      }
+
+      fprintf(stderr, "radeonsi: Unknown family: %s\n", family);
+      exit(1);
+}
+
 /* Helper function to do the ioctls needed for setup and init. */
 static bool do_winsys_init(struct amdgpu_winsys *ws,
                            const struct pipe_screen_config *config,
@@ -58,6 +92,8 @@
    if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
       goto fail;
 
+   handle_env_var_force_family(ws);
+
    ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment);
    if (!ws->addrlib) {
       fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
@@ -95,7 +131,10 @@
       util_queue_destroy(&ws->cs_queue);
 
    simple_mtx_destroy(&ws->bo_fence_lock);
-   pb_slabs_deinit(&ws->bo_slabs);
+   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
+      if (ws->bo_slabs[i].groups)
+         pb_slabs_deinit(&ws->bo_slabs[i]);
+   }
    pb_cache_deinit(&ws->bo_cache);
    util_hash_table_destroy(ws->bo_export_table);
    simple_mtx_destroy(&ws->global_bo_list_lock);
@@ -307,16 +346,33 @@
                  (ws->info.vram_size + ws->info.gart_size) / 8,
                  amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
 
-   if (!pb_slabs_init(&ws->bo_slabs,
-                      AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2,
-                      RADEON_MAX_SLAB_HEAPS,
-                      ws,
-                      amdgpu_bo_can_reclaim_slab,
-                      amdgpu_bo_slab_alloc,
-                      amdgpu_bo_slab_free))
-      goto fail_cache;
+   unsigned min_slab_order = 9;  /* 512 bytes */
+   unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */
+   unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
+                                            NUM_SLAB_ALLOCATORS;
+
+   /* Divide the size order range among slab managers. */
+   for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
+      unsigned min_order = min_slab_order;
+      unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
+                                max_slab_order);
+
+      if (!pb_slabs_init(&ws->bo_slabs[i],
+                         min_order, max_order,
+                         RADEON_MAX_SLAB_HEAPS,
+                         ws,
+                         amdgpu_bo_can_reclaim_slab,
+                         amdgpu_bo_slab_alloc,
+                         amdgpu_bo_slab_free)) {
+         amdgpu_winsys_destroy(&ws->base);
+         simple_mtx_unlock(&dev_tab_mutex);
+         return NULL;
+      }
+
+      min_slab_order = max_order + 1;
+   }
 
-   ws->info.min_alloc_size = 1 << AMDGPU_SLAB_MIN_SIZE_LOG2;
+   ws->info.min_alloc_size = 1 << ws->bo_slabs[0].min_order;
 
    /* init reference */
    pipe_reference_init(&ws->reference, 1);
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -31,22 +31,24 @@
 #include "pipebuffer/pb_cache.h"
 #include "pipebuffer/pb_slab.h"
 #include "gallium/drivers/radeon/radeon_winsys.h"
-#include "addrlib/addrinterface.h"
+#include "addrlib/inc/addrinterface.h"
 #include "util/simple_mtx.h"
 #include "util/u_queue.h"
 #include <amdgpu.h>
 
 struct amdgpu_cs;
 
-#define AMDGPU_SLAB_MIN_SIZE_LOG2   9  /* 512 bytes */
-#define AMDGPU_SLAB_MAX_SIZE_LOG2   16 /* 64 KB */
-#define AMDGPU_SLAB_BO_SIZE_LOG2    17 /* 128 KB */
+#define NUM_SLAB_ALLOCATORS 3
 
 struct amdgpu_winsys {
    struct radeon_winsys base;
    struct pipe_reference reference;
    struct pb_cache bo_cache;
-   struct pb_slabs bo_slabs;
+
+   /* Each slab buffer can only contain suballocations of equal sizes, so we
+    * need to layer the allocators, so that we don't waste too much memory.
+    */
+   struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
 
    amdgpu_device_handle dev;
 
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/amdgpu/drm/Makefile.am
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/Makefile.am	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -4,6 +4,7 @@
 AM_CFLAGS = \
 	$(GALLIUM_WINSYS_CFLAGS) \
 	$(AMDGPU_CFLAGS) \
+	$(LLVM_CFLAGS) \
 	-I$(top_srcdir)/src/amd/
 
 AM_CXXFLAGS = $(AM_CFLAGS)
diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/meson.build mesa-19.0.1/src/gallium/winsys/amdgpu/drm/meson.build
--- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/meson.build	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -31,5 +31,5 @@
   c_args : [c_vis_args],
   cpp_args : [cpp_vis_args],
   link_with : libamdgpu_addrlib,
-  dependencies : dep_libdrm_amdgpu,
+  dependencies : [dep_llvm, dep_libdrm_amdgpu],
 )
diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h
--- mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h	2019-03-31 23:16:37.000000000 +0000
@@ -3,7 +3,8 @@
 #define __FREEDRENO_DRM_PUBLIC_H__
 
 struct pipe_screen;
+struct renderonly;
 
-struct pipe_screen *fd_drm_screen_create(int drmFD);
+struct pipe_screen *fd_drm_screen_create(int drmFD, struct renderonly *ro);
 
 #endif
diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -85,7 +85,7 @@
 }
 
 struct pipe_screen *
-fd_drm_screen_create(int fd)
+fd_drm_screen_create(int fd, struct renderonly *ro)
 {
 	struct pipe_screen *pscreen = NULL;
 
@@ -104,7 +104,7 @@
 		if (!dev)
 			goto unlock;
 
-		pscreen = fd_screen_create(dev);
+		pscreen = fd_screen_create(dev, ro);
 		if (pscreen) {
 			int fd = fd_device_fd(dev);
 
diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/freedreno/drm/Makefile.am
--- mesa-18.3.3/src/gallium/winsys/freedreno/drm/Makefile.am	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,8 @@
 
 AM_CFLAGS = \
 	-I$(top_srcdir)/src/gallium/drivers \
+	-I$(top_srcdir)/src/freedreno \
+	-I$(top_srcdir)/src/freedreno/registers \
 	$(GALLIUM_WINSYS_CFLAGS) \
 	$(FREEDRENO_CFLAGS)
 
diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/meson.build mesa-19.0.1/src/gallium/winsys/freedreno/drm/meson.build
--- mesa-18.3.3/src/gallium/winsys/freedreno/drm/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -23,6 +23,7 @@
   files('freedreno_drm_public.h', 'freedreno_drm_winsys.c'),
   include_directories : [
     inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+    inc_freedreno,
   ],
   c_args : [c_vis_args],
   dependencies : [dep_libdrm],
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Android.mk mesa-19.0.1/src/gallium/winsys/imx/drm/Android.mk
--- mesa-18.3.3/src/gallium/winsys/imx/drm/Android.mk	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,40 +0,0 @@
-# Copyright (C) 2016 Linaro, Ltd, Rob Herring <robh@kernel.org>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-LOCAL_PATH := $(call my-dir)
-
-include $(LOCAL_PATH)/Makefile.sources
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(C_SOURCES)
-
-LOCAL_SHARED_LIBRARIES := libdrm_etnaviv
-
-LOCAL_MODULE := libmesa_winsys_imx
-
-include $(GALLIUM_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-ifneq ($(HAVE_GALLIUM_IMX),)
-GALLIUM_TARGET_DRIVERS += imx-drm
-$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_etnaviv)
-$(eval GALLIUM_SHARED_LIBS += $(LOCAL_SHARED_LIBRARIES))
-endif
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_public.h mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_public.h
--- mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_public.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_public.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Christian Gmeiner <christian.gmeiner@gmail.com>
- */
-
-#ifndef __IMX_DRM_PUBLIC_H__
-#define __IMX_DRM_PUBLIC_H__
-
-struct pipe_screen;
-
-struct pipe_screen *imx_drm_screen_create(int fd);
-
-#endif /* __IMX_DRM_PUBLIC_H__ */
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_winsys.c mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_winsys.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_winsys.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Christian Gmeiner <christian.gmeiner@gmail.com>
- */
-
-#include "imx_drm_public.h"
-#include "etnaviv/drm/etnaviv_drm_public.h"
-#include "renderonly/renderonly.h"
-
-#include <fcntl.h>
-#include <unistd.h>
-
-struct pipe_screen *imx_drm_screen_create(int fd)
-{
-   struct renderonly ro = {
-      .create_for_resource = renderonly_create_kms_dumb_buffer_for_resource,
-      .kms_fd = fd,
-      .gpu_fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC)
-   };
-
-   if (ro.gpu_fd < 0)
-      return NULL;
-
-   struct pipe_screen *screen = etna_drm_screen_create_renderonly(&ro);
-   if (!screen)
-      close(ro.gpu_fd);
-
-   return screen;
-}
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.am
--- mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.am	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,35 +0,0 @@
-# Copyright © 2012 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-include Makefile.sources
-include $(top_srcdir)/src/gallium/Automake.inc
-
-AM_CFLAGS = \
-	-I$(top_srcdir)/src/gallium/drivers \
-	-I$(top_srcdir)/src/gallium/winsys \
-	$(GALLIUM_WINSYS_CFLAGS)
-
-noinst_LTLIBRARIES = libimxdrm.la
-
-libimxdrm_la_SOURCES = $(C_SOURCES)
-
-EXTRA_DIST = meson.build
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.sources
--- mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.sources	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
@@ -1,3 +0,0 @@
-C_SOURCES := \
-   imx_drm_public.h \
-   imx_drm_winsys.c
diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/meson.build mesa-19.0.1/src/gallium/winsys/imx/drm/meson.build
--- mesa-18.3.3/src/gallium/winsys/imx/drm/meson.build	2017-12-13 19:58:47.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/imx/drm/meson.build	1970-01-01 00:00:00.000000000 +0000
@@ -1,33 +0,0 @@
-# Copyright © 2017 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-libimxdrm = static_library(
-  'imxdrm',
-  'imx_drm_winsys.c',
-  include_directories : [
-    inc_include, inc_src, inc_gallium, inc_gallium_aux,
-    include_directories('../..'),
-  ],
-)
-
-driver_imx = declare_dependency(
-  compile_args : '-DGALLIUM_IMX',
-  link_with : libimxdrm,
-)
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Android.mk mesa-19.0.1/src/gallium/winsys/kmsro/drm/Android.mk
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_kmsro
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Christian Gmeiner <christian.gmeiner@gmail.com>
+ */
+
+#ifndef __KMSRO_DRM_PUBLIC_H__
+#define __KMSRO_DRM_PUBLIC_H__
+
+struct pipe_screen;
+
+struct pipe_screen *kmsro_drm_screen_create(int fd);
+
+#endif /* __KMSRO_DRM_PUBLIC_H__ */
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
+ * Copyright (C) 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "kmsro_drm_public.h"
+#include "vc4/drm/vc4_drm_public.h"
+#include "etnaviv/drm/etnaviv_drm_public.h"
+#include "freedreno/drm/freedreno_drm_public.h"
+#include "xf86drm.h"
+
+#include "pipe/p_screen.h"
+#include "renderonly/renderonly.h"
+
+struct pipe_screen *kmsro_drm_screen_create(int fd)
+{
+   struct pipe_screen *screen = NULL;
+   struct renderonly ro = {
+      .kms_fd = fd,
+      .gpu_fd = -1,
+   };
+
+#if defined(GALLIUM_VC4)
+   ro.gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER);
+   if (ro.gpu_fd >= 0) {
+      /* Passes the vc4-allocated BO through to the KMS-only DRM device using
+       * PRIME buffer sharing.  The VC4 BO must be linear, which the SCANOUT
+       * flag on allocation will have ensured.
+       */
+      ro.create_for_resource = renderonly_create_gpu_import_for_resource,
+      screen = vc4_drm_screen_create_renderonly(&ro);
+      if (!screen)
+         close(ro.gpu_fd);
+
+      return screen;
+   }
+#endif
+
+#if defined(GALLIUM_ETNAVIV)
+   ro.gpu_fd = drmOpenWithType("etnaviv", NULL, DRM_NODE_RENDER);
+   if (ro.gpu_fd >= 0) {
+      ro.create_for_resource = renderonly_create_kms_dumb_buffer_for_resource,
+      screen = etna_drm_screen_create_renderonly(&ro);
+      if (!screen)
+         close(ro.gpu_fd);
+
+      return screen;
+   }
+#endif
+
+#if defined(GALLIUM_FREEDRENO)
+   ro.gpu_fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
+   if (ro.gpu_fd >= 0) {
+      ro.create_for_resource = renderonly_create_kms_dumb_buffer_for_resource,
+      screen = fd_drm_screen_create(ro.gpu_fd, &ro);
+      if (!screen)
+         close(ro.gpu_fd);
+
+      return screen;
+   }
+#endif
+
+   return screen;
+}
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.am
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.am	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,48 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers \
+	-I$(top_srcdir)/src/gallium/winsys \
+	$(GALLIUM_WINSYS_CFLAGS) \
+	$(LIBDRM_CFLAGS)
+
+if HAVE_GALLIUM_ETNAVIV
+AM_CFLAGS += -DGALLIUM_ETNAVIV
+endif
+
+if HAVE_GALLIUM_VC4
+AM_CFLAGS += -DGALLIUM_VC4
+endif
+
+if HAVE_GALLIUM_FREEDRENO
+AM_CFLAGS += -DGALLIUM_FREEDRENO
+endif
+
+noinst_LTLIBRARIES = libkmsrodrm.la
+
+libkmsrodrm_la_SOURCES = $(C_SOURCES)
+
+EXTRA_DIST = meson.build
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.sources
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,3 @@
+C_SOURCES := \
+   kmsro_drm_public.h \
+   kmsro_drm_winsys.c
diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/meson.build mesa-19.0.1/src/gallium/winsys/kmsro/drm/meson.build
--- mesa-18.3.3/src/gallium/winsys/kmsro/drm/meson.build	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,46 @@
+# Copyright © 2017 Broadcom
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+kmsro_c_args = []
+if with_gallium_etnaviv
+  kmsro_c_args += '-DGALLIUM_ETNAVIV'
+endif
+if with_gallium_vc4
+  kmsro_c_args += '-DGALLIUM_VC4'
+endif
+if with_gallium_freedreno
+  kmsro_c_args += '-DGALLIUM_FREEDRENO'
+endif
+
+libkmsrowinsys = static_library(
+  'kmsrowinsys',
+  files('kmsro_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_winsys,
+  ],
+  c_args : [c_vis_args, kmsro_c_args],
+  dependencies: dep_libdrm,
+)
+
+driver_kmsro = declare_dependency(
+  compile_args : '-DGALLIUM_KMSRO',
+  link_with : libkmsrowinsys,
+)
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Android.mk mesa-19.0.1/src/gallium/winsys/pl111/drm/Android.mk
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/Android.mk	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,33 +0,0 @@
-# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-LOCAL_PATH := $(call my-dir)
-
-# get C_SOURCES
-include $(LOCAL_PATH)/Makefile.sources
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(C_SOURCES)
-
-LOCAL_MODULE := libmesa_winsys_pl111
-
-include $(GALLIUM_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.am
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.am	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,36 +0,0 @@
-# Copyright © 2012 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-include Makefile.sources
-include $(top_srcdir)/src/gallium/Automake.inc
-
-AM_CFLAGS = \
-	-I$(top_srcdir)/src/gallium/drivers \
-	-I$(top_srcdir)/src/gallium/winsys \
-	$(GALLIUM_WINSYS_CFLAGS) \
-	$(LIBDRM_CFLAGS)
-
-noinst_LTLIBRARIES = libpl111drm.la
-
-libpl111drm_la_SOURCES = $(C_SOURCES)
-
-EXTRA_DIST = meson.build
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.sources
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.sources	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
@@ -1,3 +0,0 @@
-C_SOURCES := \
-   pl111_drm_public.h \
-   pl111_drm_winsys.c
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/meson.build mesa-19.0.1/src/gallium/winsys/pl111/drm/meson.build
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/meson.build	2017-12-13 19:58:47.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/meson.build	1970-01-01 00:00:00.000000000 +0000
@@ -1,36 +0,0 @@
-# Copyright © 2017 Broadcom
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-libpl111winsys = static_library(
-  'pl111winsys',
-  files('pl111_drm_winsys.c'),
-  include_directories : [
-    inc_src, inc_include,
-    inc_gallium, inc_gallium_aux, inc_gallium_winsys,
-  ],
-  c_args : [c_vis_args],
-  dependencies: dep_libdrm,
-  link_with : libvc4winsys,
-)
-
-driver_pl111 = declare_dependency(
-  compile_args : '-DGALLIUM_PL111',
-  link_with : libpl111winsys,
-)
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_public.h mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_public.h
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_public.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_public.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Christian Gmeiner <christian.gmeiner@gmail.com>
- */
-
-#ifndef __PL111_DRM_PUBLIC_H__
-#define __PL111_DRM_PUBLIC_H__
-
-struct pipe_screen;
-
-struct pipe_screen *pl111_drm_screen_create(int fd);
-
-#endif /* __PL111_DRM_PUBLIC_H__ */
diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2016 Christian Gmeiner <christian.gmeiner@gmail.com>
- * Copyright (C) 2017 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "pl111_drm_public.h"
-#include "vc4/drm/vc4_drm_public.h"
-#include "xf86drm.h"
-
-#include "pipe/p_screen.h"
-#include "renderonly/renderonly.h"
-
-struct pipe_screen *pl111_drm_screen_create(int fd)
-{
-   struct renderonly ro = {
-      /* Passes the vc4-allocated BO through to the pl111 DRM device using
-       * PRIME buffer sharing.  The VC4 BO must be linear, which the SCANOUT
-       * flag on allocation will have ensured.
-       */
-      .create_for_resource = renderonly_create_gpu_import_for_resource,
-      .kms_fd = fd,
-      .gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER),
-   };
-
-   if (ro.gpu_fd < 0)
-      return NULL;
-
-   struct pipe_screen *screen = vc4_drm_screen_create_renderonly(&ro);
-   if (!screen)
-      close(ro.gpu_fd);
-
-   return screen;
-}
diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_bo.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
--- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_bo.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_bo.c	2019-03-31 23:16:37.000000000 +0000
@@ -1134,6 +1134,7 @@
 
 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
                                                       struct winsys_handle *whandle,
+                                                      unsigned vm_alignment,
                                                       unsigned *stride,
                                                       unsigned *offset)
 {
@@ -1239,7 +1240,7 @@
     if (ws->info.r600_has_virtual_memory && !bo->va) {
         struct drm_radeon_gem_va va;
 
-        bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
+        bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
 
         va.handle = bo->handle;
         va.operation = RADEON_VA_MAP;
diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_cs.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
--- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_cs.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -150,7 +150,8 @@
                      enum ring_type ring_type,
                      void (*flush)(void *ctx, unsigned flags,
                                    struct pipe_fence_handle **fence),
-                     void *flush_ctx)
+                     void *flush_ctx,
+                     bool stop_exec_on_failure)
 {
     struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
     struct radeon_drm_cs *cs;
diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -589,6 +589,7 @@
     /* 2D tiling on CIK is supported since DRM 2.35.0 */
     ws->info.has_2d_tiling = ws->info.chip_class <= SI || ws->info.drm_minor >= 35;
     ws->info.has_read_registers_query = ws->info.drm_minor >= 42;
+    ws->info.max_alignment = 1024*1024;
 
     ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;
 
diff -Nru mesa-18.3.3/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c mesa-19.0.1/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
--- mesa-18.3.3/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -396,6 +396,7 @@
 {
    struct xlib_displaytarget *xlib_dt;
    unsigned nblocksy, size;
+   int ignore;
 
    xlib_dt = CALLOC_STRUCT(xlib_displaytarget);
    if (!xlib_dt)
@@ -410,7 +411,8 @@
    xlib_dt->stride = align(util_format_get_stride(format, width), alignment);
    size = xlib_dt->stride * nblocksy;
 
-   if (!debug_get_option_xlib_no_shm()) {
+   if (!debug_get_option_xlib_no_shm() &&
+       XQueryExtension(xlib_dt->display, "MIT-SHM", &ignore, &ignore, &ignore)) {
       xlib_dt->data = alloc_shm(xlib_dt, size);
       if (xlib_dt->data) {
          xlib_dt->shm = True;
diff -Nru mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_public.h mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_public.h
--- mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_public.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_public.h	2019-03-31 23:16:37.000000000 +0000
@@ -25,7 +25,9 @@
 #define __VC5_DRM_PUBLIC_H__
 
 struct pipe_screen;
+struct renderonly;
 
 struct pipe_screen *v3d_drm_screen_create(int drmFD);
+struct pipe_screen *v3d_drm_screen_create_renderonly(struct renderonly *ro);
 
 #endif /* __VC5_DRM_PUBLIC_H__ */
diff -Nru mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,5 +31,11 @@
 struct pipe_screen *
 v3d_drm_screen_create(int fd)
 {
-	return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3));
+	return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3), NULL);
+}
+
+struct pipe_screen *
+v3d_drm_screen_create_renderonly(struct renderonly *ro)
+{
+	return v3d_screen_create(ro->gpu_fd, ro);
 }
diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
--- mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -38,11 +38,17 @@
 #include "virgl/virgl_public.h"
 
 #include <xf86drm.h>
+#include <libsync.h>
 #include "virtgpu_drm.h"
 
 #include "virgl_drm_winsys.h"
 #include "virgl_drm_public.h"
 
+
+#define VIRGL_DRM_VERSION(major, minor) ((major) << 16 | (minor))
+#define VIRGL_DRM_VERSION_FENCE_FD      VIRGL_DRM_VERSION(1, 0)
+
+
 static inline boolean can_cache_resource(struct virgl_hw_res *res)
 {
    return res->cacheable == TRUE;
@@ -70,6 +76,9 @@
       if (res->ptr)
          os_munmap(res->ptr, res->size);
 
+      if (res->fence_fd != -1)
+         close(res->fence_fd);
+
       memset(&args, 0, sizeof(args));
       args.handle = res->bo_handle;
       drmIoctl(qdws->fd, DRM_IOCTL_GEM_CLOSE, &args);
@@ -222,6 +231,7 @@
    res->stride = stride;
    pipe_reference_init(&res->reference, 1);
    res->num_cs_references = 0;
+   res->fence_fd = -1;
    return res;
 }
 
@@ -457,6 +467,7 @@
    res->stride = info_arg.stride;
    pipe_reference_init(&res->reference, 1);
    res->num_cs_references = 0;
+   res->fence_fd = -1;
 
    util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res);
 
@@ -577,6 +588,7 @@
    }
 
    cbuf->base.buf = cbuf->buf;
+   cbuf->base.in_fence_fd = -1;
    return &cbuf->base;
 }
 
@@ -687,7 +699,8 @@
 }
 
 static int virgl_drm_winsys_submit_cmd(struct virgl_winsys *qws,
-                                       struct virgl_cmd_buf *_cbuf)
+                                       struct virgl_cmd_buf *_cbuf,
+                                       int in_fence_fd, int *out_fence_fd)
 {
    struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws);
    struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf);
@@ -702,12 +715,24 @@
    eb.size = cbuf->base.cdw * 4;
    eb.num_bo_handles = cbuf->cres;
    eb.bo_handles = (unsigned long)(void *)cbuf->res_hlist;
+   eb.fence_fd = -1;
+
+   if (in_fence_fd != -1) {
+       eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_IN;
+       eb.fence_fd = in_fence_fd;
+   }
+
+   if (out_fence_fd != NULL)
+       eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_OUT;
 
    ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb);
    if (ret == -1)
       fprintf(stderr,"got error from kernel - expect bad rendering %d\n", errno);
    cbuf->base.cdw = 0;
 
+   if (out_fence_fd != NULL)
+      *out_fence_fd = eb.fence_fd;
+
    virgl_drm_release_all_res(qdws, cbuf);
 
    memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added));
@@ -759,7 +784,7 @@
 }
 
 static struct pipe_fence_handle *
-virgl_cs_create_fence(struct virgl_winsys *vws)
+virgl_cs_create_fence(struct virgl_winsys *vws, int fd)
 {
    struct virgl_hw_res *res;
 
@@ -769,6 +794,7 @@
                                                 VIRGL_BIND_CUSTOM,
                                                 8, 1, 1, 0, 0, 0, 8);
 
+   res->fence_fd = fd;
    return (struct pipe_fence_handle *)res;
 }
 
@@ -793,6 +819,12 @@
       return TRUE;
    }
    virgl_drm_resource_wait(vws, res);
+
+   if (res->fence_fd != -1) {
+      int ret = sync_wait(res->fence_fd, timeout / 1000000);
+      return ret == 0;
+   }
+
    return TRUE;
 }
 
@@ -805,11 +837,51 @@
                                 virgl_hw_res(src));
 }
 
+static void virgl_fence_server_sync(struct virgl_winsys *vws,
+		                    struct virgl_cmd_buf *cbuf,
+                                    struct pipe_fence_handle *fence)
+{
+   struct virgl_hw_res *hw_res = virgl_hw_res(fence);
+
+   /* if not an external fence, then nothing more to do without preemption: */
+   if (hw_res->fence_fd == -1)
+      return;
+
+   sync_accumulate("virgl", &cbuf->in_fence_fd, hw_res->fence_fd);
+}
+
+static int virgl_fence_get_fd(struct virgl_winsys *vws,
+                               struct pipe_fence_handle *fence)
+{
+        struct virgl_hw_res *hw_res = virgl_hw_res(fence);
+
+        return dup(hw_res->fence_fd);
+}
+
+static int virgl_drm_get_version(int fd)
+{
+	int ret;
+	drmVersionPtr version;
+
+	version = drmGetVersion(fd);
+
+	if (!version)
+		ret = -EFAULT;
+	else if (version->version_major != 0)
+		ret = -EINVAL;
+	else
+		ret = version->version_minor;
+
+	drmFreeVersion(version);
+
+	return ret;
+}
 
 static struct virgl_winsys *
 virgl_drm_winsys_create(int drmFD)
 {
    struct virgl_drm_winsys *qdws;
+   int drm_version;
    int ret;
    int gl = 0;
    struct drm_virtgpu_getparam getparam = {0};
@@ -820,6 +892,10 @@
    if (ret < 0 || !gl)
       return NULL;
 
+   drm_version = virgl_drm_get_version(drmFD);
+   if (drm_version < 0)
+      return NULL;
+
    qdws = CALLOC_STRUCT(virgl_drm_winsys);
    if (!qdws)
       return NULL;
@@ -851,6 +927,9 @@
    qdws->base.cs_create_fence = virgl_cs_create_fence;
    qdws->base.fence_wait = virgl_fence_wait;
    qdws->base.fence_reference = virgl_fence_reference;
+   qdws->base.fence_server_sync = virgl_fence_server_sync;
+   qdws->base.fence_get_fd = virgl_fence_get_fd;
+   qdws->base.supports_fences =  drm_version >= VIRGL_DRM_VERSION_FENCE_FD;
 
    qdws->base.get_caps = virgl_drm_get_caps;
 
diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h
--- mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h	2019-03-31 23:16:37.000000000 +0000
@@ -50,6 +50,7 @@
    int64_t start, end;
    boolean flinked;
    uint32_t flink;
+   int fence_fd;
 };
 
 struct virgl_drm_winsys
diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virtgpu_drm.h mesa-19.0.1/src/gallium/winsys/virgl/drm/virtgpu_drm.h
--- mesa-18.3.3/src/gallium/winsys/virgl/drm/virtgpu_drm.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virtgpu_drm.h	2019-03-31 23:16:37.000000000 +0000
@@ -44,6 +44,16 @@
 #define DRM_VIRTGPU_WAIT     0x08
 #define DRM_VIRTGPU_GET_CAPS  0x09
 
+/*
+ * virtgpu execbuffer flags
+ */
+#define VIRTGPU_EXECBUF_FENCE_FD_IN	0x01
+#define VIRTGPU_EXECBUF_FENCE_FD_OUT	0x02
+#define VIRTGPU_EXECBUF_FLAGS	(\
+		VIRTGPU_EXECBUF_FENCE_FD_IN |\
+		VIRTGPU_EXECBUF_FENCE_FD_OUT |\
+		0)
+
 struct drm_virtgpu_map {
 	uint64_t offset; /* use for mmap system call */
 	uint32_t handle;
@@ -56,7 +66,7 @@
 	uint64_t command; /* void* */
 	uint64_t bo_handles;
 	uint32_t num_bo_handles;
-	uint32_t pad;
+	int32_t fence_fd;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
@@ -130,7 +140,7 @@
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
 
 #define DRM_IOCTL_VIRTGPU_EXECBUFFER \
-	DRM_IOW(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\
 		struct drm_virtgpu_execbuffer)
 
 #define DRM_IOCTL_VIRTGPU_GETPARAM \
diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
--- mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c	2019-03-31 23:16:37.000000000 +0000
@@ -30,8 +30,6 @@
 
 #include <os/os_process.h>
 #include <util/u_format.h>
-/* connect to remote socket */
-#define VTEST_SOCKET_NAME "/tmp/.virgl_test"
 
 #include "virgl_vtest_winsys.h"
 #include "virgl_vtest_public.h"
@@ -163,7 +161,7 @@
 
    memset(&un, 0, sizeof(un));
    un.sun_family = AF_UNIX;
-   snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_SOCKET_NAME);
+   snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_DEFAULT_SOCKET_NAME);
 
    do {
       ret = 0;
diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
--- mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c	2019-03-31 23:16:37.000000000 +0000
@@ -344,7 +344,7 @@
    struct virgl_hw_res *res, *curr_res;
    struct list_head *curr, *next;
    int64_t now;
-   int ret;
+   int ret = -1;
 
    /* only store binds for vertex/index/const buffers */
    if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER &&
@@ -427,6 +427,7 @@
    }
    cbuf->ws = vws;
    cbuf->base.buf = cbuf->buf;
+   cbuf->base.in_fence_fd = -1;
    return &cbuf->base;
 }
 
@@ -501,7 +502,8 @@
 }
 
 static int virgl_vtest_winsys_submit_cmd(struct virgl_winsys *vws,
-                                         struct virgl_cmd_buf *_cbuf)
+                                         struct virgl_cmd_buf *_cbuf,
+                                         int in_fence_fd, int *out_fence_fd)
 {
    struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws);
    struct virgl_vtest_cmd_buf *cbuf = virgl_vtest_cmd_buf(_cbuf);
@@ -510,6 +512,9 @@
    if (cbuf->base.cdw == 0)
       return 0;
 
+   assert(in_fence_fd == -1);
+   assert(out_fence_fd == NULL);
+
    ret = virgl_vtest_submit_cmd(vtws, cbuf);
 
    virgl_vtest_release_all_res(vtws, cbuf);
@@ -552,7 +557,7 @@
 }
 
 static struct pipe_fence_handle *
-virgl_cs_create_fence(struct virgl_winsys *vws)
+virgl_cs_create_fence(struct virgl_winsys *vws, int fd)
 {
    struct virgl_hw_res *res;
 
@@ -694,6 +699,7 @@
    vtws->base.cs_create_fence = virgl_cs_create_fence;
    vtws->base.fence_wait = virgl_fence_wait;
    vtws->base.fence_reference = virgl_fence_reference;
+   vtws->base.supports_fences =  0;
 
    vtws->base.flush_frontbuffer = virgl_vtest_flush_frontbuffer;
 
diff -Nru mesa-18.3.3/src/gbm/backends/dri/gbm_dri.c mesa-19.0.1/src/gbm/backends/dri/gbm_dri.c
--- mesa-18.3.3/src/gbm/backends/dri/gbm_dri.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gbm/backends/dri/gbm_dri.c	2019-03-31 23:16:37.000000000 +0000
@@ -304,28 +304,6 @@
 static const __DRIextension **
 dri_open_driver(struct gbm_dri_device *dri)
 {
-   const __DRIextension **extensions = NULL;
-   char path[PATH_MAX], *search_paths, *p, *next, *end;
-   char *get_extensions_name;
-
-   search_paths = NULL;
-   /* don't allow setuid apps to use LIBGL_DRIVERS_PATH or GBM_DRIVERS_PATH */
-   if (geteuid() == getuid()) {
-      /* Read GBM_DRIVERS_PATH first for compatibility, but LIBGL_DRIVERS_PATH
-       * is recommended over GBM_DRIVERS_PATH.
-       */
-      search_paths = getenv("GBM_DRIVERS_PATH");
-
-      /* Read LIBGL_DRIVERS_PATH if GBM_DRIVERS_PATH was not set.
-       * LIBGL_DRIVERS_PATH is recommended over GBM_DRIVERS_PATH.
-       */
-      if (search_paths == NULL) {
-         search_paths = getenv("LIBGL_DRIVERS_PATH");
-      }
-   }
-   if (search_paths == NULL)
-      search_paths = DEFAULT_DRIVER_DIR;
-
    /* Temporarily work around dri driver libs that need symbols in libglapi
     * but don't automatically link it in.
     */
@@ -334,56 +312,18 @@
     */
    dlopen("libglapi.so.0", RTLD_LAZY | RTLD_GLOBAL);
 
-   dri->driver = NULL;
-   end = search_paths + strlen(search_paths);
-   for (p = search_paths; p < end && dri->driver == NULL; p = next + 1) {
-      int len;
-      next = strchr(p, ':');
-      if (next == NULL)
-         next = end;
-
-      len = next - p;
-#if GLX_USE_TLS
-      snprintf(path, sizeof path,
-               "%.*s/tls/%s_dri.so", len, p, dri->driver_name);
-      dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
-#endif
-      if (dri->driver == NULL) {
-         snprintf(path, sizeof path,
-                  "%.*s/%s_dri.so", len, p, dri->driver_name);
-         dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
-      }
-      /* not need continue to loop all paths once the driver is found */
-      if (dri->driver != NULL)
-         break;
-   }
-
-   if (dri->driver == NULL) {
-      fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n",
-              search_paths);
-      fprintf(stderr, "gbm: Last dlopen error: %s\n", dlerror());
-      return NULL;
-   }
-
-   get_extensions_name = loader_get_extensions_name(dri->driver_name);
-   if (get_extensions_name) {
-      const __DRIextension **(*get_extensions)(void);
-
-      get_extensions = dlsym(dri->driver, get_extensions_name);
-      free(get_extensions_name);
-
-      if (get_extensions)
-         extensions = get_extensions();
-   }
-
-   if (!extensions)
-      extensions = dlsym(dri->driver, __DRI_DRIVER_EXTENSIONS);
-   if (extensions == NULL) {
-      fprintf(stderr, "gbm: driver exports no extensions (%s)", dlerror());
-      dlclose(dri->driver);
-   }
-
-   return extensions;
+   static const char *search_path_vars[] = {
+      /* Read GBM_DRIVERS_PATH first for compatibility, but LIBGL_DRIVERS_PATH
+       * is recommended over GBM_DRIVERS_PATH.
+       */
+      "GBM_DRIVERS_PATH",
+      /* Read LIBGL_DRIVERS_PATH if GBM_DRIVERS_PATH was not set.
+       * LIBGL_DRIVERS_PATH is recommended over GBM_DRIVERS_PATH.
+       */
+      "LIBGL_DRIVERS_PATH",
+      NULL
+   };
+   return loader_open_driver(dri->driver_name, &dri->driver, search_path_vars);
 }
 
 static int
@@ -594,22 +534,6 @@
    },
 };
 
-/* The two GBM_BO_FORMAT_[XA]RGB8888 formats alias the GBM_FORMAT_*
- * formats of the same name. We want to accept them whenever someone
- * has a GBM format, but never return them to the user. */
-static int
-gbm_format_canonicalize(uint32_t gbm_format)
-{
-   switch (gbm_format) {
-   case GBM_BO_FORMAT_XRGB8888:
-      return GBM_FORMAT_XRGB8888;
-   case GBM_BO_FORMAT_ARGB8888:
-      return GBM_FORMAT_ARGB8888;
-   default:
-      return gbm_format;
-   }
-}
-
 static int
 gbm_format_to_dri_format(uint32_t gbm_format)
 {
diff -Nru mesa-18.3.3/src/gbm/gbm-symbols-check mesa-19.0.1/src/gbm/gbm-symbols-check
--- mesa-18.3.3/src/gbm/gbm-symbols-check	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/gbm/gbm-symbols-check	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,7 @@
 gbm_bo_set_user_data
 gbm_bo_get_user_data
 gbm_bo_destroy
+gbm_format_get_name
 gbm_surface_create
 gbm_surface_create_with_modifiers
 gbm_surface_lock_front_buffer
diff -Nru mesa-18.3.3/src/gbm/main/gbm.c mesa-19.0.1/src/gbm/main/gbm.c
--- mesa-18.3.3/src/gbm/main/gbm.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gbm/main/gbm.c	2019-03-31 23:16:37.000000000 +0000
@@ -448,14 +448,14 @@
  * \param gbm The gbm device returned from gbm_create_device()
  * \param width The width for the buffer
  * \param height The height for the buffer
- * \param format The format to use for the buffer
+ * \param format The format to use for the buffer, from GBM_FORMAT_* or
+ * GBM_BO_FORMAT_* tokens
  * \param usage The union of the usage flags for this buffer
  *
  * \return A newly allocated buffer that should be freed with gbm_bo_destroy()
  * when no longer needed. If an error occurs during allocation %NULL will be
  * returned and errno set.
  *
- * \sa enum gbm_bo_format for the list of formats
  * \sa enum gbm_bo_flags for the list of usage flags
  */
 GBM_EXPORT struct gbm_bo *
@@ -695,3 +695,39 @@
 {
    return surf->gbm->surface_has_free_buffers(surf);
 }
+
+/* The two GBM_BO_FORMAT_[XA]RGB8888 formats alias the GBM_FORMAT_*
+ * formats of the same name. We want to accept them whenever someone
+ * has a GBM format, but never return them to the user. */
+uint32_t
+gbm_format_canonicalize(uint32_t gbm_format)
+{
+   switch (gbm_format) {
+   case GBM_BO_FORMAT_XRGB8888:
+      return GBM_FORMAT_XRGB8888;
+   case GBM_BO_FORMAT_ARGB8888:
+      return GBM_FORMAT_ARGB8888;
+   default:
+      return gbm_format;
+   }
+}
+
+/**
+ * Returns a string representing the fourcc format name.
+ *
+ * \param desc Caller-provided storage for the format name string.
+ * \return String containing the fourcc of the format.
+ */
+GBM_EXPORT char *
+gbm_format_get_name(uint32_t gbm_format, struct gbm_format_name_desc *desc)
+{
+   gbm_format = gbm_format_canonicalize(gbm_format);
+
+   desc->name[0] = gbm_format;
+   desc->name[1] = gbm_format >> 8;
+   desc->name[2] = gbm_format >> 16;
+   desc->name[3] = gbm_format >> 24;
+   desc->name[4] = 0;
+
+   return desc->name;
+}
diff -Nru mesa-18.3.3/src/gbm/main/gbm.h mesa-19.0.1/src/gbm/main/gbm.h
--- mesa-18.3.3/src/gbm/main/gbm.h	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/gbm/main/gbm.h	2019-03-31 23:16:37.000000000 +0000
@@ -190,6 +190,9 @@
 #define GBM_FORMAT_YUV444	__gbm_fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */
 #define GBM_FORMAT_YVU444	__gbm_fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */
 
+struct gbm_format_name_desc {
+   char name[5];
+};
 
 /**
  * Flags to indicate the intended use for the buffer - these are passed into
@@ -399,6 +402,9 @@
 void
 gbm_surface_destroy(struct gbm_surface *surface);
 
+char *
+gbm_format_get_name(uint32_t gbm_format, struct gbm_format_name_desc *desc);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-18.3.3/src/gbm/main/gbmint.h mesa-19.0.1/src/gbm/main/gbmint.h
--- mesa-18.3.3/src/gbm/main/gbmint.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/gbm/main/gbmint.h	2019-03-31 23:16:37.000000000 +0000
@@ -133,4 +133,7 @@
    struct gbm_device *(*create_device)(int fd);
 };
 
+uint32_t
+gbm_format_canonicalize(uint32_t gbm_format);
+
 #endif
diff -Nru mesa-18.3.3/src/gbm/Makefile.am mesa-19.0.1/src/gbm/Makefile.am
--- mesa-18.3.3/src/gbm/Makefile.am	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/gbm/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -42,7 +42,6 @@
 	$(gbm_dri_FILES)
 
 AM_CFLAGS += \
-	-DDEFAULT_DRIVER_DIR='"$(DRI_DRIVER_SEARCH_DIR)"' \
 	$(LIBDRM_CFLAGS) \
 	$(PTHREADSTUBS_CFLAGS)
 
diff -Nru mesa-18.3.3/src/gbm/meson.build mesa-19.0.1/src/gbm/meson.build
--- mesa-18.3.3/src/gbm/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/gbm/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -37,7 +37,6 @@
 if with_dri2
   files_gbm += files('backends/dri/gbm_dri.c', 'backends/dri/gbm_driint.h')
   deps_gbm += dep_libdrm # TODO: pthread-stubs
-  args_gbm += '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path)
 endif
 if with_platform_wayland
   deps_gbm += dep_wayland_server
@@ -72,6 +71,7 @@
     'gbm-symbols-check',
     find_program('gbm-symbols-check'),
     env : env_test,
-    args : libgbm
+    args : libgbm,
+    suite : ['gbm'],
   )
 endif
diff -Nru mesa-18.3.3/src/glx/dri2_glx.c mesa-19.0.1/src/glx/dri2_glx.c
--- mesa-18.3.3/src/glx/dri2_glx.c	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/glx/dri2_glx.c	2019-03-31 23:16:37.000000000 +0000
@@ -1252,13 +1252,7 @@
       driverName = loader_driverName;
    }
 
-   psc->driver = driOpenDriver(driverName);
-   if (psc->driver == NULL) {
-      ErrorMessageF("driver pointer missing\n");
-      goto handle_error;
-   }
-
-   extensions = driGetDriverExtensions(psc->driver, driverName);
+   extensions = driOpenDriver(driverName, &psc->driver);
    if (extensions == NULL)
       goto handle_error;
 
diff -Nru mesa-18.3.3/src/glx/dri3_glx.c mesa-19.0.1/src/glx/dri3_glx.c
--- mesa-18.3.3/src/glx/dri3_glx.c	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/glx/dri3_glx.c	2019-03-31 23:16:37.000000000 +0000
@@ -861,13 +861,7 @@
       goto handle_error;
    }
 
-   psc->driver = driOpenDriver(driverName);
-   if (psc->driver == NULL) {
-      ErrorMessageF("driver pointer missing\n");
-      goto handle_error;
-   }
-
-   extensions = driGetDriverExtensions(psc->driver, driverName);
+   extensions = driOpenDriver(driverName, &psc->driver);
    if (extensions == NULL)
       goto handle_error;
 
diff -Nru mesa-18.3.3/src/glx/dri_common.c mesa-19.0.1/src/glx/dri_common.c
--- mesa-18.3.3/src/glx/dri_common.c	2017-12-02 01:35:56.000000000 +0000
+++ mesa-19.0.1/src/glx/dri_common.c	2019-03-31 23:16:37.000000000 +0000
@@ -77,11 +77,6 @@
 #define GL_LIB_NAME "libGL.so.1"
 #endif
 
-#ifndef DEFAULT_DRIVER_DIR
-/* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
-#define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
-#endif
-
 /**
  * Try to \c dlopen the named driver.
  *
@@ -90,97 +85,32 @@
  * order to find the driver.
  *
  * \param driverName - a name like "i965", "radeon", "nouveau", etc.
+ * \param out_driver_handle - Address to return the resulting dlopen() handle.
  *
  * \returns
- * A handle from \c dlopen, or \c NULL if driver file not found.
+ * The __DRIextension entrypoint table for the driver, or \c NULL if driver
+ * file not found.
  */
-_X_HIDDEN void *
-driOpenDriver(const char *driverName)
+_X_HIDDEN const __DRIextension **
+driOpenDriver(const char *driverName, void **out_driver_handle)
 {
-   void *glhandle, *handle;
-   const char *libPaths, *p, *next;
-   char realDriverName[200];
-   int len;
+   void *glhandle;
 
    /* Attempt to make sure libGL symbols will be visible to the driver */
    glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);
 
-   libPaths = NULL;
-   if (geteuid() == getuid()) {
-      /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
-      libPaths = getenv("LIBGL_DRIVERS_PATH");
-      if (!libPaths)
-         libPaths = getenv("LIBGL_DRIVERS_DIR");        /* deprecated */
-   }
-   if (libPaths == NULL)
-      libPaths = DEFAULT_DRIVER_DIR;
-
-   handle = NULL;
-   for (p = libPaths; *p; p = next) {
-      next = strchr(p, ':');
-      if (next == NULL) {
-         len = strlen(p);
-         next = p + len;
-      }
-      else {
-         len = next - p;
-         next++;
-      }
-
-#ifdef GLX_USE_TLS
-      snprintf(realDriverName, sizeof realDriverName,
-               "%.*s/tls/%s_dri.so", len, p, driverName);
-      InfoMessageF("OpenDriver: trying %s\n", realDriverName);
-      handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL);
-#endif
-
-      if (handle == NULL) {
-         snprintf(realDriverName, sizeof realDriverName,
-                  "%.*s/%s_dri.so", len, p, driverName);
-         InfoMessageF("OpenDriver: trying %s\n", realDriverName);
-         handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL);
-      }
-
-      if (handle != NULL)
-         break;
-      else
-         InfoMessageF("dlopen %s failed (%s)\n", realDriverName, dlerror());
-   }
+   static const char *search_path_vars[] = {
+      "LIBGL_DRIVERS_PATH",
+      "LIBGL_DRIVERS_DIR", /* deprecated */
+      NULL
+   };
 
-   if (!handle)
-      ErrorMessageF("unable to load driver: %s_dri.so\n", driverName);
+   const __DRIextension **extensions =
+      loader_open_driver(driverName, out_driver_handle, search_path_vars);
 
    if (glhandle)
       dlclose(glhandle);
 
-   return handle;
-}
-
-_X_HIDDEN const __DRIextension **
-driGetDriverExtensions(void *handle, const char *driver_name)
-{
-   const __DRIextension **extensions = NULL;
-   const __DRIextension **(*get_extensions)(void);
-   char *get_extensions_name = loader_get_extensions_name(driver_name);
-
-   if (get_extensions_name) {
-      get_extensions = dlsym(handle, get_extensions_name);
-      if (get_extensions) {
-         free(get_extensions_name);
-         return get_extensions();
-      } else {
-         InfoMessageF("driver does not expose %s(): %s\n",
-                      get_extensions_name, dlerror());
-         free(get_extensions_name);
-      }
-   }
-
-   extensions = dlsym(handle, __DRI_DRIVER_EXTENSIONS);
-   if (extensions == NULL) {
-      ErrorMessageF("driver exports no extensions (%s)\n", dlerror());
-      return NULL;
-   }
-
    return extensions;
 }
 
diff -Nru mesa-18.3.3/src/glx/dri_common.h mesa-19.0.1/src/glx/dri_common.h
--- mesa-18.3.3/src/glx/dri_common.h	2017-11-14 18:46:21.000000000 +0000
+++ mesa-19.0.1/src/glx/dri_common.h	2019-03-31 23:16:37.000000000 +0000
@@ -69,10 +69,8 @@
 #define ErrorMessageF(...) dri_message(_LOADER_WARNING, __VA_ARGS__)
 #define CriticalErrorMessageF(...) dri_message(_LOADER_FATAL, __VA_ARGS__)
 
-extern void *driOpenDriver(const char *driverName);
-
-extern const __DRIextension **
-driGetDriverExtensions(void *handle, const char *driver_name);
+extern const __DRIextension **driOpenDriver(const char *driverName,
+                                            void **out_driver_handle);
 
 extern bool
 dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs,
diff -Nru mesa-18.3.3/src/glx/dri_glx.c mesa-19.0.1/src/glx/dri_glx.c
--- mesa-18.3.3/src/glx/dri_glx.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/glx/dri_glx.c	2019-03-31 23:16:37.000000000 +0000
@@ -199,15 +199,9 @@
 static char *
 get_driver_config(const char *driverName)
 {
-   void *handle = driOpenDriver(driverName);
-   const __DRIextension **extensions;
-
-   if (!handle)
-      return NULL;
-
+   void *handle;
    char *config = NULL;
-
-   extensions = driGetDriverExtensions(handle, driverName);
+   const __DRIextension **extensions = driOpenDriver(driverName, &handle);
    if (extensions) {
       for (int i = 0; extensions[i]; i++) {
          if (strcmp(extensions[i]->name, __DRI_CONFIG_OPTIONS) != 0)
@@ -918,11 +912,7 @@
       goto cleanup;
    }
 
-   psc->driver = driOpenDriver(driverName);
-   if (psc->driver == NULL)
-      goto cleanup;
-
-   extensions = dlsym(psc->driver, __DRI_DRIVER_EXTENSIONS);
+   extensions = driOpenDriver(driverName, &psc->driver);
    if (extensions == NULL) {
       ErrorMessageF("driver exports no extensions (%s)\n", dlerror());
       goto cleanup;
diff -Nru mesa-18.3.3/src/glx/drisw_glx.c mesa-19.0.1/src/glx/drisw_glx.c
--- mesa-18.3.3/src/glx/drisw_glx.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/glx/drisw_glx.c	2019-03-31 23:16:37.000000000 +0000
@@ -147,6 +147,9 @@
    if (pdp->ximage)
       XDestroyImage(pdp->ximage);
 
+   if (pdp->shminfo.shmid > 0)
+      XShmDetach(dpy, &pdp->shminfo);
+
    free(pdp->visinfo);
 
    XFreeGC(dpy, pdp->gc);
@@ -764,17 +767,6 @@
 
 #define SWRAST_DRIVER_NAME "swrast"
 
-static void *
-driOpenSwrast(void)
-{
-   void *driver = NULL;
-
-   if (driver == NULL)
-      driver = driOpenDriver(SWRAST_DRIVER_NAME);
-
-   return driver;
-}
-
 static const struct glx_screen_vtable drisw_screen_vtable = {
    .create_context         = drisw_create_context,
    .create_context_attribs = drisw_create_context_attribs,
@@ -853,11 +845,7 @@
       return NULL;
    }
 
-   psc->driver = driOpenSwrast();
-   if (psc->driver == NULL)
-      goto handle_error;
-
-   extensions = driGetDriverExtensions(psc->driver, SWRAST_DRIVER_NAME);
+   extensions = driOpenDriver(SWRAST_DRIVER_NAME, &psc->driver);
    if (extensions == NULL)
       goto handle_error;
 
diff -Nru mesa-18.3.3/src/glx/glxcmds.c mesa-19.0.1/src/glx/glxcmds.c
--- mesa-18.3.3/src/glx/glxcmds.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/glx/glxcmds.c	2019-03-31 23:16:37.000000000 +0000
@@ -272,6 +272,44 @@
    return True;
 }
 
+/**
+ * Determine if a context uses direct rendering.
+ *
+ * \param dpy        Display where the context was created.
+ * \param contextID  ID of the context to be tested.
+ * \param error      Out parameter, set to True on error if not NULL
+ *
+ * \returns \c True if the context is direct rendering or not.
+ */
+static Bool
+__glXIsDirect(Display * dpy, GLXContextID contextID, Bool *error)
+{
+   CARD8 opcode;
+   xcb_connection_t *c;
+   xcb_generic_error_t *err;
+   xcb_glx_is_direct_reply_t *reply;
+   Bool is_direct;
+
+   opcode = __glXSetupForCommand(dpy);
+   if (!opcode) {
+      return False;
+   }
+
+   c = XGetXCBConnection(dpy);
+   reply = xcb_glx_is_direct_reply(c, xcb_glx_is_direct(c, contextID), &err);
+   is_direct = (reply != NULL && reply->is_direct) ? True : False;
+
+   if (err != NULL) {
+      if (error)
+         *error = True;
+      __glXSendErrorForXcb(dpy, err);
+      free(err);
+   }
+
+   free(reply);
+
+   return is_direct;
+}
 
 /**
  * Create a new context.
@@ -376,6 +414,21 @@
    gc->share_xid = shareList ? shareList->xid : None;
    gc->imported = GL_FALSE;
 
+   /* Unlike most X resource creation requests, we're about to return a handle
+    * with client-side state, not just an XID. To simplify error handling
+    * elsewhere in libGL, force a round-trip here to ensure the CreateContext
+    * request above succeeded.
+    */
+   {
+      Bool error = False;
+      int isDirect = __glXIsDirect(dpy, gc->xid, &error);
+
+      if (error != False || isDirect != gc->isDirect) {
+         gc->vtable->destroy(gc);
+         gc = NULL;
+      }
+   }
+
    return (GLXContext) gc;
 }
 
@@ -613,42 +666,6 @@
 
 
 /**
- * Determine if a context uses direct rendering.
- *
- * \param dpy        Display where the context was created.
- * \param contextID  ID of the context to be tested.
- *
- * \returns \c True if the context is direct rendering or not.
- */
-static Bool
-__glXIsDirect(Display * dpy, GLXContextID contextID)
-{
-   CARD8 opcode;
-   xcb_connection_t *c;
-   xcb_generic_error_t *err;
-   xcb_glx_is_direct_reply_t *reply;
-   Bool is_direct;
-
-   opcode = __glXSetupForCommand(dpy);
-   if (!opcode) {
-      return False;
-   }
-
-   c = XGetXCBConnection(dpy);
-   reply = xcb_glx_is_direct_reply(c, xcb_glx_is_direct(c, contextID), &err);
-   is_direct = (reply != NULL && reply->is_direct) ? True : False;
-
-   if (err != NULL) {
-      __glXSendErrorForXcb(dpy, err);
-      free(err);
-   }
-
-   free(reply);
-
-   return is_direct;
-}
-
-/**
  * \todo
  * Shouldn't this function \b always return \c False when
  * \c GLX_DIRECT_RENDERING is not defined?  Do we really need to bother with
@@ -668,7 +685,7 @@
 #ifdef GLX_USE_APPLEGL  /* TODO: indirect on darwin */
    return False;
 #else
-   return __glXIsDirect(dpy, gc->xid);
+   return __glXIsDirect(dpy, gc->xid, NULL);
 #endif
 }
 
@@ -1428,7 +1445,7 @@
       return NULL;
    }
 
-   if (__glXIsDirect(dpy, contextID))
+   if (__glXIsDirect(dpy, contextID, NULL))
       return NULL;
 
    opcode = __glXSetupForCommand(dpy);
diff -Nru mesa-18.3.3/src/glx/Makefile.am mesa-19.0.1/src/glx/Makefile.am
--- mesa-18.3.3/src/glx/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/glx/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -35,7 +35,6 @@
 	-I$(top_srcdir)/src/mapi/glapi \
 	$(VISIBILITY_CFLAGS) \
 	-D_REENTRANT \
-	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
 	$(DEFINES) \
 	$(LIBDRM_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
diff -Nru mesa-18.3.3/src/glx/meson.build mesa-19.0.1/src/glx/meson.build
--- mesa-18.3.3/src/glx/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/glx/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -134,7 +134,6 @@
 
 gl_lib_cargs = [
   '-D_REENTRANT',
-  '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
 ]
 
 libglx = static_library(
diff -Nru mesa-18.3.3/src/glx/SConscript mesa-19.0.1/src/glx/SConscript
--- mesa-18.3.3/src/glx/SConscript	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/glx/SConscript	2019-03-31 23:16:37.000000000 +0000
@@ -24,7 +24,6 @@
 
 env.Append(CPPDEFINES = [
     '_REENTRANT',
-    #('DEFAULT_DRIVER_DIR', 'DRI_DRIVER_SEARCH_DIR')
 ])
 
 env.Prepend(LIBS = [
diff -Nru mesa-18.3.3/src/glx/tests/meson.build mesa-19.0.1/src/glx/tests/meson.build
--- mesa-18.3.3/src/glx/tests/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/glx/tests/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -50,5 +50,6 @@
       ],
       dependencies : [dep_libdrm, dep_glproto, dep_thread, idep_gtest]
     ),
+    suite : ['glx'],
   )
 endif
diff -Nru mesa-18.3.3/src/intel/Android.isl.mk mesa-19.0.1/src/intel/Android.isl.mk
--- mesa-18.3.3/src/intel/Android.isl.mk	2018-02-16 12:24:09.000000000 +0000
+++ mesa-19.0.1/src/intel/Android.isl.mk	2019-03-31 23:16:37.000000000 +0000
@@ -199,6 +199,47 @@
 include $(BUILD_STATIC_LIBRARY)
 
 # ---------------------------------------
+# Build libmesa_isl_tiled_memcpy
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_isl_tiled_memcpy
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_SRC_FILES := $(ISL_TILED_MEMCPY_FILES)
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_isl_tiled_memcpy_sse41
+# ---------------------------------------
+
+ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_isl_tiled_memcpy_sse41
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_SRC_FILES := $(ISL_TILED_MEMCPY_SSE41_FILES)
+
+LOCAL_CFLAGS += \
+        -DUSE_SSE41 -msse4.1 -mstackrealign
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
+
+# ---------------------------------------
 # Build libmesa_isl
 # ---------------------------------------
 
@@ -227,7 +268,15 @@
 	libmesa_isl_gen9 \
 	libmesa_isl_gen10 \
 	libmesa_isl_gen11 \
-	libmesa_genxml
+	libmesa_genxml \
+	libmesa_isl_tiled_memcpy
+
+ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
+LOCAL_CFLAGS += \
+        -DUSE_SSE41
+LOCAL_WHOLE_STATIC_LIBRARIES += \
+        libmesa_isl_tiled_memcpy_sse41
+endif
 
 # Autogenerated sources
 
diff -Nru mesa-18.3.3/src/intel/Android.vulkan.mk mesa-19.0.1/src/intel/Android.vulkan.mk
--- mesa-18.3.3/src/intel/Android.vulkan.mk	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/Android.vulkan.mk	2019-03-31 23:16:37.000000000 +0000
@@ -23,9 +23,10 @@
 include $(CLEAR_VARS)
 include $(LOCAL_PATH)/Makefile.sources
 
-VK_ENTRYPOINTS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py
-
-VK_EXTENSIONS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_extensions_gen.py
+ANV_ENTRYPOINTS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py
+ANV_EXTENSIONS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions_gen.py
+ANV_EXTENSIONS_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions.py
+VULKAN_API_XML := $(MESA_TOP)/src/vulkan/registry/vk.xml
 
 VULKAN_COMMON_INCLUDES := \
 	$(MESA_TOP)/include \
@@ -38,6 +39,7 @@
 	$(MESA_TOP)/src/intel \
 	$(MESA_TOP)/include/drm-uapi \
 	$(MESA_TOP)/src/intel/vulkan \
+	$(MESA_TOP)/src/compiler \
 	frameworks/native/vulkan/include
 
 # libmesa_anv_entrypoints with header and dummy.c
@@ -64,10 +66,13 @@
 	@echo "Gen Dummy: $(PRIVATE_MODULE) <= $(notdir $(@))"
 	$(hide) touch $@
 
-$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c
-	$(VK_ENTRYPOINTS_SCRIPT) \
+$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \
+					   $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+					   $(ANV_EXTENSIONS_SCRIPT) \
+					   $(VULKAN_API_XML)
+	$(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
 		--outdir $(dir $@) \
-		--xml $(MESA_TOP)/src/vulkan/registry/vk.xml
+		--xml $(VULKAN_API_XML)
 
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
         $(intermediates)
@@ -241,22 +246,28 @@
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h
 
-$(intermediates)/vulkan/anv_entrypoints.c:
+$(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+					   $(ANV_EXTENSIONS_SCRIPT) \
+					   $(VULKAN_API_XML)
 	@mkdir -p $(dir $@)
-	$(VK_ENTRYPOINTS_SCRIPT) \
-		--xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+	$(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+		--xml $(VULKAN_API_XML) \
 		--outdir $(dir $@)
 
-$(intermediates)/vulkan/anv_extensions.c:
+$(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \
+					  $(ANV_EXTENSIONS_SCRIPT) \
+					  $(VULKAN_API_XML)
 	@mkdir -p $(dir $@)
-	$(VK_EXTENSIONS_SCRIPT) \
-		--xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+		--xml $(VULKAN_API_XML) \
 		--out-c $@
 
-$(intermediates)/vulkan/anv_extensions.h:
+$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \
+					   $(ANV_EXTENSIONS_SCRIPT) \
+					   $(VULKAN_API_XML)
 	@mkdir -p $(dir $@)
-	$(VK_EXTENSIONS_SCRIPT) \
-		--xml $(MESA_TOP)/src/vulkan/registry/vk.xml \
+	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+		--xml $(VULKAN_API_XML) \
 		--out-h $@
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
@@ -308,7 +319,7 @@
 	libmesa_intel_compiler \
 	libmesa_anv_entrypoints
 
-LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog
+LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libexpat libz libsync liblog
 
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
diff -Nru mesa-18.3.3/src/intel/blorp/blorp_blit.c mesa-19.0.1/src/intel/blorp/blorp_blit.c
--- mesa-18.3.3/src/intel/blorp/blorp_blit.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp_blit.c	2019-03-31 23:16:37.000000000 +0000
@@ -588,10 +588,11 @@
 }
 
 static nir_ssa_def *
-blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v,
-                               nir_ssa_def *pos, unsigned tex_samples,
-                               enum isl_aux_usage tex_aux_usage,
-                               nir_alu_type dst_type)
+blorp_nir_combine_samples(nir_builder *b, struct brw_blorp_blit_vars *v,
+                          nir_ssa_def *pos, unsigned tex_samples,
+                          enum isl_aux_usage tex_aux_usage,
+                          nir_alu_type dst_type,
+                          enum blorp_filter filter)
 {
    /* If non-null, this is the outer-most if statement */
    nir_if *outer_if = NULL;
@@ -603,6 +604,35 @@
    if (tex_aux_usage == ISL_AUX_USAGE_MCS)
       mcs = blorp_blit_txf_ms_mcs(b, v, pos);
 
+   nir_op combine_op;
+   switch (filter) {
+   case BLORP_FILTER_AVERAGE:
+      assert(dst_type == nir_type_float);
+      combine_op = nir_op_fadd;
+      break;
+
+   case BLORP_FILTER_MIN_SAMPLE:
+      switch (dst_type) {
+      case nir_type_int:   combine_op = nir_op_imin;  break;
+      case nir_type_uint:  combine_op = nir_op_umin;  break;
+      case nir_type_float: combine_op = nir_op_fmin;  break;
+      default: unreachable("Invalid dst_type");
+      }
+      break;
+
+   case BLORP_FILTER_MAX_SAMPLE:
+      switch (dst_type) {
+      case nir_type_int:   combine_op = nir_op_imax;  break;
+      case nir_type_uint:  combine_op = nir_op_umax;  break;
+      case nir_type_float: combine_op = nir_op_fmax;  break;
+      default: unreachable("Invalid dst_type");
+      }
+      break;
+
+   default:
+      unreachable("Invalid filter");
+   }
+
    /* We add together samples using a binary tree structure, e.g. for 4x MSAA:
     *
     *   result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
@@ -689,18 +719,22 @@
          assert(stack_depth >= 2);
          --stack_depth;
 
-         assert(dst_type == nir_type_float);
          texture_data[stack_depth - 1] =
-            nir_fadd(b, texture_data[stack_depth - 1],
-                        texture_data[stack_depth]);
+            nir_build_alu(b, combine_op,
+                             texture_data[stack_depth - 1],
+                             texture_data[stack_depth],
+                             NULL, NULL);
       }
    }
 
    /* We should have just 1 sample on the stack now. */
    assert(stack_depth == 1);
 
-   texture_data[0] = nir_fmul(b, texture_data[0],
-                              nir_imm_float(b, 1.0 / tex_samples));
+   if (filter == BLORP_FILTER_AVERAGE) {
+      assert(dst_type == nir_type_float);
+      texture_data[0] = nir_fmul(b, texture_data[0],
+                                 nir_imm_float(b, 1.0 / tex_samples));
+   }
 
    nir_store_var(b, color, texture_data[0], 0xf);
 
@@ -1351,6 +1385,8 @@
       break;
 
    case BLORP_FILTER_AVERAGE:
+   case BLORP_FILTER_MIN_SAMPLE:
+   case BLORP_FILTER_MAX_SAMPLE:
       assert(!key->src_tiled_w);
       assert(key->tex_samples == key->src_samples);
       assert(key->tex_layout == key->src_layout);
@@ -1369,15 +1405,17 @@
           * to multiply our X and Y coordinates each by 2 and then add 1.
           */
          assert(key->src_coords_normalized);
+         assert(key->filter == BLORP_FILTER_AVERAGE);
          src_pos = nir_fadd(&b,
                             nir_i2f32(&b, src_pos),
                             nir_imm_float(&b, 0.5f));
          color = blorp_nir_tex(&b, &v, key, src_pos);
       } else {
          /* Gen7+ hardware doesn't automaticaly blend. */
-         color = blorp_nir_manual_blend_average(&b, &v, src_pos, key->src_samples,
-                                                key->tex_aux_usage,
-                                                key->texture_data_type);
+         color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples,
+                                           key->tex_aux_usage,
+                                           key->texture_data_type,
+                                           key->filter);
       }
       break;
 
@@ -1428,11 +1466,13 @@
 }
 
 static bool
-brw_blorp_get_blit_kernel(struct blorp_context *blorp,
+brw_blorp_get_blit_kernel(struct blorp_batch *batch,
                           struct blorp_params *params,
                           const struct brw_blorp_blit_prog_key *prog_key)
 {
-   if (blorp->lookup_shader(blorp, prog_key, sizeof(*prog_key),
+   struct blorp_context *blorp = batch->blorp;
+
+   if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key),
                             &params->wm_prog_kernel, &params->wm_prog_data))
       return true;
 
@@ -1455,7 +1495,7 @@
                               &prog_data);
 
    bool result =
-      blorp->upload_shader(blorp, prog_key, sizeof(*prog_key),
+      blorp->upload_shader(batch, prog_key, sizeof(*prog_key),
                            program, prog_data.base.program_size,
                            &prog_data.base, sizeof(prog_data),
                            &params->wm_prog_kernel, &params->wm_prog_data);
@@ -1518,6 +1558,9 @@
 {
    bool ok UNUSED;
 
+   /* It would be insane to try and do this on a compressed surface */
+   assert(info->aux_usage == ISL_AUX_USAGE_NONE);
+
    /* Just bail if we have nothing to do. */
    if (info->surf.dim == ISL_SURF_DIM_2D &&
        info->view.base_level == 0 && info->view.base_array_layer == 0 &&
@@ -2037,10 +2080,10 @@
    /* For some texture types, we need to pass the layer through the sampler. */
    params->wm_inputs.src_z = params->src.z_offset;
 
-   if (!brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key))
+   if (!brw_blorp_get_blit_kernel(batch, params, wm_prog_key))
       return 0;
 
-   if (!blorp_ensure_sf_program(batch->blorp, params))
+   if (!blorp_ensure_sf_program(batch, params))
       return 0;
 
    unsigned result = 0;
diff -Nru mesa-18.3.3/src/intel/blorp/blorp.c mesa-19.0.1/src/intel/blorp/blorp.c
--- mesa-18.3.3/src/intel/blorp/blorp.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp.c	2019-03-31 23:16:37.000000000 +0000
@@ -247,9 +247,10 @@
 };
 
 bool
-blorp_ensure_sf_program(struct blorp_context *blorp,
+blorp_ensure_sf_program(struct blorp_batch *batch,
                         struct blorp_params *params)
 {
+   struct blorp_context *blorp = batch->blorp;
    const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data;
    assert(params->wm_prog_data);
 
@@ -276,7 +277,7 @@
    memcpy(key.key.interp_mode, wm_prog_data->interp_mode,
           sizeof(key.key.interp_mode));
 
-   if (blorp->lookup_shader(blorp, &key, sizeof(key),
+   if (blorp->lookup_shader(batch, &key, sizeof(key),
                             &params->sf_prog_kernel, &params->sf_prog_data))
       return true;
 
@@ -293,7 +294,7 @@
                             &prog_data_tmp, &vue_map, &program_size);
 
    bool result =
-      blorp->upload_shader(blorp, &key, sizeof(key), program, program_size,
+      blorp->upload_shader(batch, &key, sizeof(key), program, program_size,
                            (void *)&prog_data_tmp, sizeof(prog_data_tmp),
                            &params->sf_prog_kernel, &params->sf_prog_data);
 
diff -Nru mesa-18.3.3/src/intel/blorp/blorp_clear.c mesa-19.0.1/src/intel/blorp/blorp_clear.c
--- mesa-18.3.3/src/intel/blorp/blorp_clear.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp_clear.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,18 +43,20 @@
 };
 
 static bool
-blorp_params_get_clear_kernel(struct blorp_context *blorp,
+blorp_params_get_clear_kernel(struct blorp_batch *batch,
                               struct blorp_params *params,
                               bool use_replicated_data,
                               bool clear_rgb_as_red)
 {
+   struct blorp_context *blorp = batch->blorp;
+
    const struct brw_blorp_const_color_prog_key blorp_key = {
       .shader_type = BLORP_SHADER_TYPE_CLEAR,
       .use_simd16_replicated_data = use_replicated_data,
       .clear_rgb_as_red = clear_rgb_as_red,
    };
 
-   if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key),
+   if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
                             &params->wm_prog_kernel, &params->wm_prog_data))
       return true;
 
@@ -104,7 +106,7 @@
                        &prog_data);
 
    bool result =
-      blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key),
+      blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key),
                            program, prog_data.base.program_size,
                            &prog_data.base, sizeof(prog_data),
                            &params->wm_prog_kernel, &params->wm_prog_data);
@@ -126,9 +128,10 @@
  * vertex shader.
  */
 static bool
-blorp_params_get_layer_offset_vs(struct blorp_context *blorp,
+blorp_params_get_layer_offset_vs(struct blorp_batch *batch,
                                  struct blorp_params *params)
 {
+   struct blorp_context *blorp = batch->blorp;
    struct layer_offset_vs_key blorp_key = {
       .shader_type = BLORP_SHADER_TYPE_LAYER_OFFSET_VS,
    };
@@ -136,7 +139,7 @@
    if (params->wm_prog_data)
       blorp_key.num_inputs = params->wm_prog_data->num_varying_inputs;
 
-   if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key),
+   if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
                             &params->vs_prog_kernel, &params->vs_prog_data))
       return true;
 
@@ -194,7 +197,7 @@
       blorp_compile_vs(blorp, mem_ctx, b.shader, &vs_prog_data);
 
    bool result =
-      blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key),
+      blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key),
                            program, vs_prog_data.base.base.program_size,
                            &vs_prog_data.base.base, sizeof(vs_prog_data),
                            &params->vs_prog_kernel, &params->vs_prog_data);
@@ -351,7 +354,7 @@
    get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf,
                        &params.x0, &params.y0, &params.x1, &params.y1);
 
-   if (!blorp_params_get_clear_kernel(batch->blorp, &params, true, false))
+   if (!blorp_params_get_clear_kernel(batch, &params, true, false))
       return;
 
    brw_blorp_surface_info_init(batch->blorp, &params.dst, surf, level,
@@ -453,12 +456,12 @@
       }
    }
 
-   if (!blorp_params_get_clear_kernel(batch->blorp, &params,
+   if (!blorp_params_get_clear_kernel(batch, &params,
                                       use_simd16_replicated_data,
                                       clear_rgb_as_red))
       return;
 
-   if (!blorp_ensure_sf_program(batch->blorp, &params))
+   if (!blorp_ensure_sf_program(batch, &params))
       return;
 
    while (num_layers > 0) {
@@ -589,7 +592,7 @@
        * we disable statistics in 3DSTATE_WM.  Give it the usual clear shader
        * to work around the issue.
        */
-      if (!blorp_params_get_clear_kernel(batch->blorp, &params, false, false))
+      if (!blorp_params_get_clear_kernel(batch, &params, false, false))
          return;
    }
 
@@ -829,7 +832,7 @@
        * is tiled or not, we have to assume it may be linear.  This means no
        * SIMD16_REPDATA for us. :-(
        */
-      if (!blorp_params_get_clear_kernel(batch->blorp, &params, false, false))
+      if (!blorp_params_get_clear_kernel(batch, &params, false, false))
          return;
    }
 
@@ -847,7 +850,7 @@
       params.stencil_ref = stencil_value;
    }
 
-   if (!blorp_params_get_layer_offset_vs(batch->blorp, &params))
+   if (!blorp_params_get_layer_offset_vs(batch, &params))
       return;
 
    params.vs_inputs.base_layer = start_layer;
@@ -914,7 +917,7 @@
     * color" message.
     */
 
-   if (!blorp_params_get_clear_kernel(batch->blorp, &params, true, false))
+   if (!blorp_params_get_clear_kernel(batch, &params, true, false))
       return;
 
    batch->blorp->exec(batch, &params);
@@ -936,9 +939,10 @@
 };
 
 static bool
-blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp,
+blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
                                             struct blorp_params *params)
 {
+   struct blorp_context *blorp = batch->blorp;
    const struct blorp_mcs_partial_resolve_key blorp_key = {
       .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
       .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL,
@@ -946,7 +950,7 @@
       .num_samples = params->num_samples,
    };
 
-   if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key),
+   if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
                             &params->wm_prog_kernel, &params->wm_prog_data))
       return true;
 
@@ -1002,7 +1006,7 @@
                        &prog_data);
 
    bool result =
-      blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key),
+      blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key),
                            program, prog_data.base.program_size,
                            &prog_data.base, sizeof(prog_data),
                            &params->wm_prog_kernel, &params->wm_prog_data);
@@ -1039,7 +1043,7 @@
    memcpy(&params.wm_inputs.clear_color,
           surf->clear_color.f32, sizeof(float) * 4);
 
-   if (!blorp_params_get_mcs_partial_resolve_kernel(batch->blorp, &params))
+   if (!blorp_params_get_mcs_partial_resolve_kernel(batch, &params))
       return;
 
    batch->blorp->exec(batch, &params);
@@ -1192,7 +1196,7 @@
    memset(&params.wm_inputs.clear_color, 0,
           sizeof(params.wm_inputs.clear_color));
 
-   if (!blorp_params_get_clear_kernel(batch->blorp, &params, true, false))
+   if (!blorp_params_get_clear_kernel(batch, &params, true, false))
       return;
 
    batch->blorp->exec(batch, &params);
diff -Nru mesa-18.3.3/src/intel/blorp/blorp_genX_exec.h mesa-19.0.1/src/intel/blorp/blorp_genX_exec.h
--- mesa-18.3.3/src/intel/blorp/blorp_genX_exec.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp_genX_exec.h	2019-03-31 23:16:37.000000000 +0000
@@ -82,6 +82,10 @@
 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
                     struct blorp_address address, uint32_t delta);
 
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *batch,
+                          struct blorp_address address);
+
 #if GEN_GEN >= 7 && GEN_GEN < 10
 static struct blorp_address
 blorp_get_surface_base_address(struct blorp_batch *batch);
@@ -311,7 +315,7 @@
    vb[idx].BufferPitch = stride;
 
 #if GEN_GEN >= 6
-   vb[idx].VertexBufferMOCS = addr.mocs;
+   vb[idx].MOCS = addr.mocs;
 #endif
 
 #if GEN_GEN >= 7
@@ -347,13 +351,13 @@
    blorp_emit_input_varying_data(batch, params, &addrs[1], &size);
    blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0);
 
+   blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
+
    const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
    uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
    if (!dw)
       return;
 
-   blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs);
-
    for (unsigned i = 0; i < num_vbs; i++) {
       GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
       dw += GENX(VERTEX_BUFFER_STATE_length);
@@ -1363,6 +1367,13 @@
    isl_surf_fill_state(batch->blorp->isl_dev, state,
                        .surf = &surf, .view = &surface->view,
                        .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
+                       .address =
+                          blorp_get_surface_address(batch, surface->addr),
+                       .aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 :
+                          blorp_get_surface_address(batch, surface->aux_addr),
+                       .clear_address = !use_clear_address ? 0 :
+                          blorp_get_surface_address(batch,
+                                                    surface->clear_color_addr),
                        .mocs = surface->addr.mocs,
                        .clear_color = surface->clear_color,
                        .use_clear_address = use_clear_address,
diff -Nru mesa-18.3.3/src/intel/blorp/blorp.h mesa-19.0.1/src/intel/blorp/blorp.h
--- mesa-18.3.3/src/intel/blorp/blorp.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp.h	2019-03-31 23:16:37.000000000 +0000
@@ -45,10 +45,10 @@
 
    const struct brw_compiler *compiler;
 
-   bool (*lookup_shader)(struct blorp_context *blorp,
+   bool (*lookup_shader)(struct blorp_batch *batch,
                          const void *key, uint32_t key_size,
                          uint32_t *kernel_out, void *prog_data_out);
-   bool (*upload_shader)(struct blorp_context *blorp,
+   bool (*upload_shader)(struct blorp_batch *batch,
                          const void *key, uint32_t key_size,
                          const void *kernel, uint32_t kernel_size,
                          const struct brw_stage_prog_data *prog_data,
@@ -91,8 +91,8 @@
 
 struct blorp_address {
    void *buffer;
+   uint64_t offset;
    unsigned reloc_flags;
-   uint32_t offset;
    uint32_t mocs;
 };
 
@@ -125,6 +125,8 @@
    BLORP_FILTER_BILINEAR,
    BLORP_FILTER_SAMPLE_0,
    BLORP_FILTER_AVERAGE,
+   BLORP_FILTER_MIN_SAMPLE,
+   BLORP_FILTER_MAX_SAMPLE,
 };
 
 void
diff -Nru mesa-18.3.3/src/intel/blorp/blorp_priv.h mesa-19.0.1/src/intel/blorp/blorp_priv.h
--- mesa-18.3.3/src/intel/blorp/blorp_priv.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/blorp/blorp_priv.h	2019-03-31 23:16:37.000000000 +0000
@@ -360,7 +360,7 @@
                  struct brw_vs_prog_data *vs_prog_data);
 
 bool
-blorp_ensure_sf_program(struct blorp_context *blorp,
+blorp_ensure_sf_program(struct blorp_batch *batch,
                         struct blorp_params *params);
 
 /** \} */
diff -Nru mesa-18.3.3/src/intel/common/gen_batch_decoder.c mesa-19.0.1/src/intel/common/gen_batch_decoder.c
--- mesa-18.3.3/src/intel/common/gen_batch_decoder.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/common/gen_batch_decoder.c	2019-03-31 23:16:37.000000000 +0000
@@ -24,6 +24,7 @@
 #include "common/gen_decoder.h"
 #include "gen_disasm.h"
 #include "util/macros.h"
+#include "main/macros.h" /* Needed for ROUND_DOWN_TO */
 
 #include <string.h>
 
@@ -45,6 +46,7 @@
    ctx->fp = fp;
    ctx->flags = flags;
    ctx->max_vbo_decoded_lines = -1; /* No limit! */
+   ctx->engine = I915_ENGINE_CLASS_RENDER;
 
    if (xml_path == NULL)
       ctx->spec = gen_spec_load(devinfo);
@@ -168,7 +170,8 @@
                  uint32_t pitch,
                  int max_lines)
 {
-   const uint32_t *dw_end = bo.map + MIN2(bo.size, read_length);
+   const uint32_t *dw_end =
+         bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
 
    int column_count = 0, line_count = -1;
    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
@@ -192,10 +195,16 @@
    fprintf(ctx->fp, "\n");
 }
 
+static struct gen_group *
+gen_ctx_find_instruction(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
+{
+   return gen_spec_find_instruction(ctx->spec, ctx->engine, p);
+}
+
 static void
 handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
 
    struct gen_field_iterator iter;
    gen_field_iterator_init(&iter, inst, p, 0, false);
@@ -309,7 +318,7 @@
 handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
                                        const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
    struct gen_group *desc =
       gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
 
@@ -373,7 +382,7 @@
 handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
                               const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
    struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
 
    struct gen_batch_decode_bo vb = {};
@@ -402,7 +411,7 @@
             ready = true;
          } else if (strcmp(vbs_iter.name, "End Address") == 0) {
             if (vb.map && vbs_iter.raw_value >= vb.addr)
-               vb_size = vbs_iter.raw_value - vb.addr;
+               vb_size = (vbs_iter.raw_value + 1) - vb.addr;
             else
                vb_size = 0;
             ready = true;
@@ -436,7 +445,7 @@
 handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
                             const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
 
    struct gen_batch_decode_bo ib = {};
    uint32_t ib_size = 0;
@@ -486,7 +495,7 @@
 static void
 decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
 
    uint64_t ksp = 0;
    bool is_simd8 = false; /* vertex shaders on Gen8+ only */
@@ -528,7 +537,7 @@
 static void
 decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
 
    uint64_t ksp[3] = {0, 0, 0};
    bool enabled[3] = {false, false, false};
@@ -576,7 +585,7 @@
 static void
 decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
    struct gen_group *body =
       gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
 
@@ -658,7 +667,7 @@
                               const char *struct_type, const uint32_t *p,
                               int count)
 {
-   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
+   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
 
    uint32_t state_offset = 0;
 
@@ -802,7 +811,7 @@
    struct gen_group *inst;
 
    for (p = batch; p < end; p += length) {
-      inst = gen_spec_find_instruction(ctx->spec, p);
+      inst = gen_ctx_find_instruction(ctx, p);
       length = gen_group_get_length(inst, p);
       assert(inst == NULL || length > 0);
       length = MAX2(1, length);
diff -Nru mesa-18.3.3/src/intel/common/gen_decoder.c mesa-19.0.1/src/intel/common/gen_decoder.c
--- mesa-18.3.3/src/intel/common/gen_decoder.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/common/gen_decoder.c	2019-03-31 23:16:37.000000000 +0000
@@ -165,6 +165,9 @@
    group->fixed_length = fixed_length;
    group->dword_length_field = NULL;
    group->dw_length = 0;
+   group->engine_mask = I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_RENDER) |
+                        I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_VIDEO) |
+                        I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_COPY);
    group->bias = 1;
 
    for (int i = 0; atts[i]; i += 2) {
@@ -173,6 +176,28 @@
          group->dw_length = strtoul(atts[i + 1], &p, 0);
       } else if (strcmp(atts[i], "bias") == 0) {
          group->bias = strtoul(atts[i + 1], &p, 0);
+      } else if (strcmp(atts[i], "engine") == 0) {
+         void *mem_ctx = ralloc_context(NULL);
+         char *tmp = ralloc_strdup(mem_ctx, atts[i + 1]);
+         char *save_ptr;
+         char *tok = strtok_r(tmp, "|", &save_ptr);
+
+         group->engine_mask = 0;
+         while (tok != NULL) {
+            if (strcmp(tok, "render") == 0) {
+               group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_RENDER);
+            } else if (strcmp(tok, "video") == 0) {
+               group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_VIDEO);
+            } else if (strcmp(tok, "blitter") == 0) {
+               group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_COPY);
+            } else {
+               fprintf(stderr, "unknown engine class defined for instruction \"%s\": %s\n", name, atts[i + 1]);
+            }
+
+            tok = strtok_r(NULL, "|", &save_ptr);
+         }
+
+         ralloc_free(mem_ctx);
       }
    }
 
@@ -708,12 +733,15 @@
 }
 
 struct gen_group *
-gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p)
+gen_spec_find_instruction(struct gen_spec *spec,
+                          enum drm_i915_gem_engine_class engine,
+                          const uint32_t *p)
 {
    hash_table_foreach(spec->commands, entry) {
       struct gen_group *command = entry->data;
       uint32_t opcode = *p & command->opcode_mask;
-      if (opcode == command->opcode)
+      if ((command->engine_mask & I915_ENGINE_CLASS_TO_MASK(engine)) &&
+           opcode == command->opcode)
          return command;
    }
 
diff -Nru mesa-18.3.3/src/intel/common/gen_decoder.h mesa-19.0.1/src/intel/common/gen_decoder.h
--- mesa-18.3.3/src/intel/common/gen_decoder.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/common/gen_decoder.h	2019-03-31 23:16:37.000000000 +0000
@@ -30,6 +30,9 @@
 
 #include "dev/gen_device_info.h"
 #include "util/hash_table.h"
+#include "util/bitset.h"
+
+#include "drm-uapi/i915_drm.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -40,6 +43,8 @@
 struct gen_field;
 union gen_field_value;
 
+#define I915_ENGINE_CLASS_TO_MASK(x) BITSET_BIT(x)
+
 static inline uint32_t gen_make_gen(uint32_t major, uint32_t minor)
 {
    return (major << 8) | minor;
@@ -51,7 +56,9 @@
                                          const char *path);
 void gen_spec_destroy(struct gen_spec *spec);
 uint32_t gen_spec_get_gen(struct gen_spec *spec);
-struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p);
+struct gen_group *gen_spec_find_instruction(struct gen_spec *spec,
+                                            enum drm_i915_gem_engine_class engine,
+                                            const uint32_t *p);
 struct gen_group *gen_spec_find_register(struct gen_spec *spec, uint32_t offset);
 struct gen_group *gen_spec_find_register_by_name(struct gen_spec *spec, const char *name);
 struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name);
@@ -102,6 +109,7 @@
    struct gen_field *dword_length_field; /* <instruction> specific */
 
    uint32_t dw_length;
+   uint32_t engine_mask; /* <instruction> specific */
    uint32_t bias; /* <instruction> specific */
    uint32_t group_offset, group_count;
    uint32_t group_size;
@@ -227,6 +235,8 @@
    uint64_t instruction_base;
 
    int max_vbo_decoded_lines;
+
+   enum drm_i915_gem_engine_class engine;
 };
 
 void gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
diff -Nru mesa-18.3.3/src/intel/common/gen_l3_config.c mesa-19.0.1/src/intel/common/gen_l3_config.c
--- mesa-18.3.3/src/intel/common/gen_l3_config.c	2018-03-26 16:53:06.000000000 +0000
+++ mesa-19.0.1/src/intel/common/gen_l3_config.c	2019-03-31 23:16:37.000000000 +0000
@@ -134,15 +134,15 @@
 
 /**
  * ICL validated L3 configurations.  \sa icl_l3_configs.
+ * Zeroth entry in below table has been commented out intentionally
+ * due to known issues with this configuration. Many other entries
+ * suggested by h/w specification aren't added here because they
+ * do under allocation of L3 cache with below partitioning.
  */
 static const struct gen_l3_config icl_l3_configs[] = {
    /* SLM URB ALL DC  RO  IS   C   T */
-   {{  0, 64, 64,  0,  0,  0,  0,  0 }},
-   {{  0, 64,  0, 16, 48,  0,  0,  0 }},
-   {{  0, 48,  0, 16, 64,  0,  0,  0 }},
-   {{  0, 32,  0,  0, 96,  0,  0,  0 }},
-   {{  0, 32, 96,  0,  0,  0,  0,  0 }},
-   {{  0, 32,  0, 16, 80,  0,  0,  0 }},
+   /*{{  0, 16, 80,  0,  0,  0,  0,  0 }},*/
+   {{  0, 32, 64,  0,  0,  0,  0,  0 }},
    {{  0 }}
 };
 
@@ -309,7 +309,8 @@
 get_l3_way_size(const struct gen_device_info *devinfo)
 {
    const unsigned way_size_per_bank =
-      devinfo->gen >= 9 && devinfo->l3_banks == 1 ? 4 : 2;
+      (devinfo->gen >= 9 && devinfo->l3_banks == 1) || devinfo->gen == 11 ?
+      4 : 2;
 
    assert(devinfo->l3_banks);
    return way_size_per_bank * devinfo->l3_banks;
diff -Nru mesa-18.3.3/src/intel/common/gen_urb_config.c mesa-19.0.1/src/intel/common/gen_urb_config.c
--- mesa-18.3.3/src/intel/common/gen_urb_config.c	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/intel/common/gen_urb_config.c	2019-03-31 23:16:37.000000000 +0000
@@ -195,8 +195,14 @@
    }
 
    /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
-   start[0] = push_constant_chunks;
-   for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
-      start[i] = start[i - 1] + chunks[i - 1];
+   int next = push_constant_chunks;
+   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+      if (entries[i]) {
+         start[i] = next;
+         next += chunks[i];
+      } else {
+         /* Just put disabled stages at the beginning. */
+         start[i] = 0;
+      }
    }
 }
diff -Nru mesa-18.3.3/src/intel/compiler/brw_compiler.c mesa-19.0.1/src/intel/compiler/brw_compiler.c
--- mesa-18.3.3/src/intel/compiler/brw_compiler.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_compiler.c	2019-03-31 23:16:37.000000000 +0000
@@ -42,6 +42,7 @@
    .lower_fdiv = true,                                                        \
    .lower_flrp64 = true,                                                      \
    .lower_ldexp = true,                                                       \
+   .lower_cs_local_id_from_index = true,                                      \
    .lower_device_index_to_zero = true,                                        \
    .native_integers = true,                                                   \
    .use_interpolated_input_intrinsics = true,                                 \
diff -Nru mesa-18.3.3/src/intel/compiler/brw_compiler.h mesa-19.0.1/src/intel/compiler/brw_compiler.h
--- mesa-18.3.3/src/intel/compiler/brw_compiler.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_compiler.h	2019-03-31 23:16:37.000000000 +0000
@@ -195,6 +195,7 @@
    uint32_t y_uv_image_mask;
    uint32_t yx_xuxv_image_mask;
    uint32_t xy_uxvx_image_mask;
+   uint32_t ayuv_image_mask;
 };
 
 /**
@@ -642,19 +643,6 @@
    return prog_data->param + old_nr_params;
 }
 
-static inline void
-brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
-                      unsigned surf_index)
-{
-   /* A binding table index is 8 bits and the top 3 values are reserved for
-    * special things (stateless and SLM).
-    */
-   assert(surf_index <= 252);
-
-   prog_data->binding_table.size_bytes =
-      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
 enum brw_barycentric_mode {
    BRW_BARYCENTRIC_PERSPECTIVE_PIXEL       = 0,
    BRW_BARYCENTRIC_PERSPECTIVE_CENTROID    = 1,
@@ -1238,7 +1226,7 @@
                void *mem_ctx,
                const struct brw_vs_prog_key *key,
                struct brw_vs_prog_data *prog_data,
-               const struct nir_shader *shader,
+               struct nir_shader *shader,
                int shader_time_index,
                char **error_str);
 
@@ -1253,7 +1241,7 @@
                 void *mem_ctx,
                 const struct brw_tcs_prog_key *key,
                 struct brw_tcs_prog_data *prog_data,
-                const struct nir_shader *nir,
+                struct nir_shader *nir,
                 int shader_time_index,
                 char **error_str);
 
@@ -1268,7 +1256,7 @@
                 const struct brw_tes_prog_key *key,
                 const struct brw_vue_map *input_vue_map,
                 struct brw_tes_prog_data *prog_data,
-                const struct nir_shader *shader,
+                struct nir_shader *shader,
                 struct gl_program *prog,
                 int shader_time_index,
                 char **error_str);
@@ -1283,7 +1271,7 @@
                void *mem_ctx,
                const struct brw_gs_prog_key *key,
                struct brw_gs_prog_data *prog_data,
-               const struct nir_shader *shader,
+               struct nir_shader *shader,
                struct gl_program *prog,
                int shader_time_index,
                char **error_str);
@@ -1330,7 +1318,7 @@
                void *mem_ctx,
                const struct brw_wm_prog_key *key,
                struct brw_wm_prog_data *prog_data,
-               const struct nir_shader *shader,
+               struct nir_shader *shader,
                struct gl_program *prog,
                int shader_time_index8,
                int shader_time_index16,
diff -Nru mesa-18.3.3/src/intel/compiler/brw_disasm.c mesa-19.0.1/src/intel/compiler/brw_disasm.c
--- mesa-18.3.3/src/intel/compiler/brw_disasm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_disasm.c	2019-03-31 23:16:37.000000000 +0000
@@ -80,6 +80,22 @@
           opcode == BRW_OPCODE_XOR;
 }
 
+static bool
+is_send(unsigned opcode)
+{
+   return opcode == BRW_OPCODE_SEND ||
+          opcode == BRW_OPCODE_SENDC ||
+          opcode == BRW_OPCODE_SENDS ||
+          opcode == BRW_OPCODE_SENDSC;
+}
+
+static bool
+is_split_send(UNUSED const struct gen_device_info *devinfo, unsigned opcode)
+{
+   return opcode == BRW_OPCODE_SENDS ||
+          opcode == BRW_OPCODE_SENDSC;
+}
+
 const char *const conditional_modifier[16] = {
    [BRW_CONDITIONAL_NONE] = "",
    [BRW_CONDITIONAL_Z]    = ".z",
@@ -289,7 +305,7 @@
    [BRW_SFID_MESSAGE_GATEWAY]          = "gateway",
    [BRW_SFID_URB]                      = "urb",
    [BRW_SFID_THREAD_SPAWNER]           = "thread_spawner",
-   [GEN6_SFID_DATAPORT_SAMPLER_CACHE]  = "sampler",
+   [GEN6_SFID_DATAPORT_SAMPLER_CACHE]  = "dp_sampler",
    [GEN6_SFID_DATAPORT_RENDER_CACHE]   = "render",
    [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
    [GEN7_SFID_DATAPORT_DATA_CACHE]     = "data",
@@ -713,7 +729,28 @@
    unsigned elem_size = brw_reg_type_to_size(type);
    int err = 0;
 
-   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+   if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) {
+      /* These are fixed for split sends */
+      type = BRW_REGISTER_TYPE_UD;
+      elem_size = 4;
+      if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
+         err |= reg(file, brw_inst_send_dst_reg_file(devinfo, inst),
+                    brw_inst_dst_da_reg_nr(devinfo, inst));
+         unsigned subreg_nr = brw_inst_dst_da16_subreg_nr(devinfo, inst);
+         if (subreg_nr)
+            format(file, ".%u", subreg_nr);
+         string(file, brw_reg_type_to_letters(type));
+      } else {
+         string(file, "g[a0");
+         if (brw_inst_dst_ia_subreg_nr(devinfo, inst))
+            format(file, ".%"PRIu64, brw_inst_dst_ia_subreg_nr(devinfo, inst) /
+                   elem_size);
+         if (brw_inst_send_dst_ia16_addr_imm(devinfo, inst))
+            format(file, " %d", brw_inst_send_dst_ia16_addr_imm(devinfo, inst));
+         string(file, "]<");
+         string(file, brw_reg_type_to_letters(type));
+      }
+   } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
       if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
          err |= reg(file, brw_inst_dst_reg_file(devinfo, inst),
                     brw_inst_dst_da_reg_nr(devinfo, inst));
@@ -1316,9 +1353,60 @@
 }
 
 static int
+src_sends_da(FILE *file,
+             const struct gen_device_info *devinfo,
+             enum brw_reg_type type,
+             unsigned _reg_nr,
+             unsigned _reg_subnr)
+{
+   int err = 0;
+
+   err |= reg(file, BRW_GENERAL_REGISTER_FILE, _reg_nr);
+   if (err == -1)
+      return 0;
+   if (_reg_subnr)
+      format(file, ".1");
+   string(file, brw_reg_type_to_letters(type));
+
+   return err;
+}
+
+static int
+src_sends_ia(FILE *file,
+             const struct gen_device_info *devinfo,
+             enum brw_reg_type type,
+             int _addr_imm,
+             unsigned _addr_subreg_nr)
+{
+   string(file, "g[a0");
+   if (_addr_subreg_nr)
+      format(file, ".1");
+   if (_addr_imm)
+      format(file, " %d", _addr_imm);
+   string(file, "]");
+   string(file, brw_reg_type_to_letters(type));
+
+   return 0;
+}
+
+static int
 src0(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst)
 {
-   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
+   if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) {
+      if (brw_inst_send_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
+         return src_sends_da(file,
+                             devinfo,
+                             BRW_REGISTER_TYPE_UD,
+                             brw_inst_src0_da_reg_nr(devinfo, inst),
+                             brw_inst_src0_da16_subreg_nr(devinfo, inst));
+      } else {
+         return src_sends_ia(file,
+                             devinfo,
+                             BRW_REGISTER_TYPE_UD,
+                             brw_inst_send_src0_ia16_addr_imm(devinfo, inst),
+                             brw_inst_src0_ia_subreg_nr(devinfo, inst));
+      }
+   } else if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
       return imm(file, devinfo, brw_inst_src0_type(devinfo, inst), inst);
    } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
       if (brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
@@ -1373,7 +1461,13 @@
 static int
 src1(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst)
 {
-   if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
+   if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) {
+      return src_sends_da(file,
+                          devinfo,
+                          BRW_REGISTER_TYPE_UD,
+                          brw_inst_send_src1_reg_nr(devinfo, inst),
+                          0 /* subreg_nr */);
+   } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
       return imm(file, devinfo, brw_inst_src1_type(devinfo, inst), inst);
    } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
       if (brw_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
@@ -1485,9 +1579,9 @@
       string(file, "(");
       err |= control(file, "predicate inverse", pred_inv,
                      brw_inst_pred_inv(devinfo, inst), NULL);
-      format(file, "f%"PRIu64, devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0);
-      if (brw_inst_flag_subreg_nr(devinfo, inst))
-         format(file, ".%"PRIu64, brw_inst_flag_subreg_nr(devinfo, inst));
+      format(file, "f%"PRIu64".%"PRIu64,
+             devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0,
+             brw_inst_flag_subreg_nr(devinfo, inst));
       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
          err |= control(file, "predicate control align1", pred_ctrl_align1,
                         brw_inst_pred_control(devinfo, inst), NULL);
@@ -1509,7 +1603,7 @@
       string(file, " ");
       err |= control(file, "function", math_function,
                      brw_inst_math_function(devinfo, inst), NULL);
-   } else if (opcode != BRW_OPCODE_SEND && opcode != BRW_OPCODE_SENDC) {
+   } else if (!is_send(opcode)) {
       err |= control(file, "conditional modifier", conditional_modifier,
                      brw_inst_cond_modifier(devinfo, inst), NULL);
 
@@ -1522,10 +1616,9 @@
                                 opcode != BRW_OPCODE_CSEL &&
                                 opcode != BRW_OPCODE_IF &&
                                 opcode != BRW_OPCODE_WHILE))) {
-         format(file, ".f%"PRIu64,
-                devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0);
-         if (brw_inst_flag_subreg_nr(devinfo, inst))
-            format(file, ".%"PRIu64, brw_inst_flag_subreg_nr(devinfo, inst));
+         format(file, ".f%"PRIu64".%"PRIu64,
+                devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0,
+                brw_inst_flag_subreg_nr(devinfo, inst));
       }
    }
 
@@ -1599,20 +1692,47 @@
       }
    }
 
-   if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
+   if (is_send(opcode)) {
       enum brw_message_target sfid = brw_inst_sfid(devinfo, inst);
 
-      if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) {
-         /* show the indirect descriptor source */
-         pad(file, 48);
-         err |= src1(file, devinfo, inst);
+      bool has_imm_desc = false, has_imm_ex_desc = false;
+      uint32_t imm_desc = 0, imm_ex_desc = 0;
+      if (is_split_send(devinfo, opcode)) {
          pad(file, 64);
+         if (brw_inst_send_sel_reg32_desc(devinfo, inst)) {
+            /* show the indirect descriptor source */
+            err |= src_sends_ia(file, devinfo, BRW_REGISTER_TYPE_UD, 0, 0);
+         } else {
+            has_imm_desc = true;
+            imm_desc = brw_inst_send_desc(devinfo, inst);
+            fprintf(file, "0x%08"PRIx32, imm_desc);
+         }
+
+         pad(file, 80);
+         if (brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
+            /* show the indirect descriptor source */
+            err |= src_sends_ia(file, devinfo, BRW_REGISTER_TYPE_UD, 0,
+                                brw_inst_send_ex_desc_ia_subreg_nr(devinfo, inst));
+         } else {
+            has_imm_ex_desc = true;
+            imm_ex_desc = brw_inst_send_ex_desc(devinfo, inst);
+            fprintf(file, "0x%08"PRIx32, imm_ex_desc);
+         }
       } else {
-         pad(file, 48);
-      }
+         if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) {
+            /* show the indirect descriptor source */
+            pad(file, 48);
+            err |= src1(file, devinfo, inst);
+            pad(file, 64);
+         } else {
+            has_imm_desc = true;
+            imm_desc = brw_inst_send_desc(devinfo, inst);
+            pad(file, 48);
+         }
 
-      /* Print message descriptor as immediate source */
-      fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32);
+         /* Print message descriptor as immediate source */
+         fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32);
+      }
 
       newline(file);
       pad(file, 16);
@@ -1623,7 +1743,7 @@
                      sfid, &space);
       string(file, " MsgDesc:");
 
-      if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) {
+      if (!has_imm_desc) {
          format(file, " indirect");
       } else {
          switch (sfid) {
@@ -1642,21 +1762,24 @@
          case BRW_SFID_SAMPLER:
             if (devinfo->gen >= 5) {
                err |= control(file, "sampler message", gen5_sampler_msg_type,
-                              brw_inst_sampler_msg_type(devinfo, inst), &space);
+                              brw_sampler_desc_msg_type(devinfo, imm_desc),
+                              &space);
                err |= control(file, "sampler simd mode", gen5_sampler_simd_mode,
-                              brw_inst_sampler_simd_mode(devinfo, inst), &space);
-               format(file, " Surface = %"PRIu64" Sampler = %"PRIu64,
-                      brw_inst_binding_table_index(devinfo, inst),
-                      brw_inst_sampler(devinfo, inst));
+                              brw_sampler_desc_simd_mode(devinfo, imm_desc),
+                              &space);
+               format(file, " Surface = %u Sampler = %u",
+                      brw_sampler_desc_binding_table_index(devinfo, imm_desc),
+                      brw_sampler_desc_sampler(devinfo, imm_desc));
             } else {
-               format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", ",
-                      brw_inst_binding_table_index(devinfo, inst),
-                      brw_inst_sampler(devinfo, inst),
-                      brw_inst_sampler_msg_type(devinfo, inst));
+               format(file, " (%u, %u, %u, ",
+                      brw_sampler_desc_binding_table_index(devinfo, imm_desc),
+                      brw_sampler_desc_sampler(devinfo, imm_desc),
+                      brw_sampler_desc_msg_type(devinfo, imm_desc));
                if (!devinfo->is_g4x) {
                   err |= control(file, "sampler target format",
                                  sampler_target_format,
-                                 brw_inst_sampler_return_format(devinfo, inst), NULL);
+                                 brw_sampler_desc_return_format(devinfo, imm_desc),
+                                 NULL);
                }
                string(file, ")");
             }
@@ -1665,29 +1788,31 @@
          case GEN6_SFID_DATAPORT_CONSTANT_CACHE:
             /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */
             if (devinfo->gen >= 6) {
-               format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64")",
-                      brw_inst_binding_table_index(devinfo, inst),
-                      brw_inst_dp_msg_control(devinfo, inst),
-                      brw_inst_dp_msg_type(devinfo, inst),
-                      devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst));
+               format(file, " (%u, %u, %u, %u)",
+                      brw_dp_desc_binding_table_index(devinfo, imm_desc),
+                      brw_dp_desc_msg_control(devinfo, imm_desc),
+                      brw_dp_desc_msg_type(devinfo, imm_desc),
+                      devinfo->gen >= 7 ? 0u :
+                      brw_dp_write_desc_write_commit(devinfo, imm_desc));
             } else {
                bool is_965 = devinfo->gen == 4 && !devinfo->is_g4x;
                err |= control(file, "DP read message type",
                               is_965 ? gen4_dp_read_port_msg_type :
                                        g45_dp_read_port_msg_type,
-                              brw_inst_dp_read_msg_type(devinfo, inst),
+                              brw_dp_read_desc_msg_type(devinfo, imm_desc),
                               &space);
 
-               format(file, " MsgCtrl = 0x%"PRIx64,
-                      brw_inst_dp_read_msg_control(devinfo, inst));
+               format(file, " MsgCtrl = 0x%u",
+                      brw_dp_read_desc_msg_control(devinfo, imm_desc));
 
-               format(file, " Surface = %"PRIu64, brw_inst_binding_table_index(devinfo, inst));
+               format(file, " Surface = %u",
+                      brw_dp_desc_binding_table_index(devinfo, imm_desc));
             }
             break;
 
          case GEN6_SFID_DATAPORT_RENDER_CACHE: {
             /* aka BRW_SFID_DATAPORT_WRITE on Gen4-5 */
-            unsigned msg_type = brw_inst_dp_write_msg_type(devinfo, inst);
+            unsigned msg_type = brw_dp_write_desc_msg_type(devinfo, imm_desc);
 
             err |= control(file, "DP rc message type",
                            dp_rc_msg_type(devinfo), msg_type, &space);
@@ -1701,16 +1826,18 @@
                               brw_inst_rt_message_type(devinfo, inst), &space);
                if (devinfo->gen >= 6 && brw_inst_rt_slot_group(devinfo, inst))
                   string(file, " Hi");
-               if (brw_inst_rt_last(devinfo, inst))
+               if (brw_dp_write_desc_last_render_target(devinfo, imm_desc))
                   string(file, " LastRT");
-               if (devinfo->gen < 7 && brw_inst_dp_write_commit(devinfo, inst))
+               if (devinfo->gen < 7 &&
+                   brw_dp_write_desc_write_commit(devinfo, imm_desc))
                   string(file, " WriteCommit");
             } else {
-               format(file, " MsgCtrl = 0x%"PRIx64,
-                      brw_inst_dp_write_msg_control(devinfo, inst));
+               format(file, " MsgCtrl = 0x%u",
+                      brw_dp_write_desc_msg_control(devinfo, imm_desc));
             }
 
-            format(file, " Surface = %"PRIu64, brw_inst_binding_table_index(devinfo, inst));
+            format(file, " Surface = %u",
+                   brw_dp_desc_binding_table_index(devinfo, imm_desc));
             break;
          }
 
@@ -1767,17 +1894,20 @@
 
                err |= control(file, "DP DC0 message type",
                               dp_dc0_msg_type_gen7,
-                              brw_inst_dp_msg_type(devinfo, inst), &space);
+                              brw_dp_desc_msg_type(devinfo, imm_desc), &space);
 
-               format(file, ", %"PRIu64", ", brw_inst_binding_table_index(devinfo, inst));
+               format(file, ", %u, ",
+                      brw_dp_desc_binding_table_index(devinfo, imm_desc));
 
                switch (brw_inst_dp_msg_type(devinfo, inst)) {
                case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
                   control(file, "atomic op", aop,
-                          brw_inst_imm_ud(devinfo, inst) >> 8 & 0xf, &space);
+                          brw_dp_desc_msg_control(devinfo, imm_desc) & 0xf,
+                          &space);
                   break;
                default:
-                  format(file, "%"PRIu64, brw_inst_dp_msg_control(devinfo, inst));
+                  format(file, "%u",
+                         brw_dp_desc_msg_control(devinfo, imm_desc));
                }
                format(file, ")");
                break;
@@ -1788,14 +1918,14 @@
             if (devinfo->gen >= 7) {
                format(file, " (");
 
-               unsigned msg_ctrl = brw_inst_dp_msg_control(devinfo, inst);
+               unsigned msg_ctrl = brw_dp_desc_msg_control(devinfo, imm_desc);
 
                err |= control(file, "DP DC1 message type",
                               dp_dc1_msg_type_hsw,
-                              brw_inst_dp_msg_type(devinfo, inst), &space);
+                              brw_dp_desc_msg_type(devinfo, imm_desc), &space);
 
-               format(file, ", Surface = %"PRIu64", ",
-                      brw_inst_binding_table_index(devinfo, inst));
+               format(file, ", Surface = %u, ",
+                      brw_dp_desc_binding_table_index(devinfo, imm_desc));
 
                switch (brw_inst_dp_msg_type(devinfo, inst)) {
                case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
@@ -1848,9 +1978,15 @@
 
          if (space)
             string(file, " ");
-         format(file, "mlen %"PRIu64, brw_inst_mlen(devinfo, inst));
-         format(file, " rlen %"PRIu64, brw_inst_rlen(devinfo, inst));
       }
+      if (has_imm_desc)
+         format(file, "mlen %u", brw_message_desc_mlen(devinfo, imm_desc));
+      if (has_imm_ex_desc) {
+         format(file, " ex_mlen %u",
+                brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
+      }
+      if (has_imm_desc)
+         format(file, " rlen %u", brw_message_desc_rlen(devinfo, imm_desc));
    }
    pad(file, 64);
    if (opcode != BRW_OPCODE_NOP && opcode != BRW_OPCODE_NENOP) {
@@ -1893,7 +2029,7 @@
          err |= control(file, "acc write control", accwr,
                         brw_inst_acc_wr_control(devinfo, inst), &space);
       }
-      if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC)
+      if (is_send(opcode))
          err |= control(file, "end of thread", end_of_thread,
                         brw_inst_eot(devinfo, inst), &space);
       if (space)
diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_defines.h mesa-19.0.1/src/intel/compiler/brw_eu_defines.h
--- mesa-18.3.3/src/intel/compiler/brw_eu_defines.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_eu_defines.h	2019-03-31 23:16:37.000000000 +0000
@@ -41,14 +41,14 @@
 /* Using the GNU statement expression extension */
 #define SET_FIELD(value, field)                                         \
    ({                                                                   \
-      uint32_t fieldval = (value) << field ## _SHIFT;                   \
+      uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT;         \
       assert((fieldval & ~ field ## _MASK) == 0);                       \
       fieldval & field ## _MASK;                                        \
    })
 
 #define SET_BITS(value, high, low)                                      \
    ({                                                                   \
-      const uint32_t fieldval = (value) << (low);                       \
+      const uint32_t fieldval = (uint32_t)(value) << (low);             \
       assert((fieldval & ~INTEL_MASK(high, low)) == 0);                 \
       fieldval & INTEL_MASK(high, low);                                 \
    })
@@ -316,6 +316,13 @@
    SHADER_OPCODE_COS,
 
    /**
+    * A generic "send" opcode.  The first two sources are the message
+    * descriptor and extended message descriptor respectively.  The third
+    * and optional fourth sources are the message payload
+    */
+   SHADER_OPCODE_SEND,
+
+   /**
     * Texture sampling opcodes.
     *
     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
@@ -355,6 +362,7 @@
    SHADER_OPCODE_SAMPLEINFO_LOGICAL,
 
    SHADER_OPCODE_IMAGE_SIZE,
+   SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
 
    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
@@ -518,13 +526,10 @@
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4,
-   FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
    FS_OPCODE_DISCARD_JUMP,
    FS_OPCODE_SET_SAMPLE_ID,
    FS_OPCODE_PACK_HALF_2x16_SPLIT,
-   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
-   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
    FS_OPCODE_PLACEHOLDER_HALT,
    FS_OPCODE_INTERPOLATE_AT_SAMPLE,
    FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
@@ -811,6 +816,8 @@
    TEX_LOGICAL_SRC_LOD,
    /** dPdy if the operation takes explicit derivatives */
    TEX_LOGICAL_SRC_LOD2,
+   /** Min LOD */
+   TEX_LOGICAL_SRC_MIN_LOD,
    /** Sample index */
    TEX_LOGICAL_SRC_SAMPLE_INDEX,
    /** MCS data */
diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_emit.c mesa-19.0.1/src/intel/compiler/brw_eu_emit.c
--- mesa-18.3.3/src/intel/compiler/brw_eu_emit.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_eu_emit.c	2019-03-31 23:16:37.000000000 +0000
@@ -91,51 +91,65 @@
 
    if (dest.file == BRW_MESSAGE_REGISTER_FILE)
       assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
-   else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   else if (dest.file == BRW_GENERAL_REGISTER_FILE)
       assert(dest.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &dest);
 
-   brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
-   brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
-
-   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+      assert(dest.subnr % 16 == 0);
+      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             dest.vstride == dest.width + 1);
+      assert(!dest.negate && !dest.abs);
       brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
-
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
-	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
-      } else {
-         brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
-         brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
-         if (dest.file == BRW_GENERAL_REGISTER_FILE ||
-             dest.file == BRW_MESSAGE_REGISTER_FILE) {
-            assert(dest.writemask != 0);
-         }
-	 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
-	  *    Although Dst.HorzStride is a don't care for Align16, HW needs
-	  *    this to be programmed as "01".
-	  */
-         brw_inst_set_dst_hstride(devinfo, inst, 1);
-      }
+      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
    } else {
-      brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
+      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
 
-      /* These are different sizes in align1 vs align16:
-       */
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
-                                       dest.indirect_offset);
-	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+         brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
+            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+         } else {
+            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
+            if (dest.file == BRW_GENERAL_REGISTER_FILE ||
+                dest.file == BRW_MESSAGE_REGISTER_FILE) {
+               assert(dest.writemask != 0);
+            }
+            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+             *    Although Dst.HorzStride is a don't care for Align16, HW needs
+             *    this to be programmed as "01".
+             */
+            brw_inst_set_dst_hstride(devinfo, inst, 1);
+         }
       } else {
-         brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
-                                        dest.indirect_offset);
-	 /* even ignored in da16, still need to set as '01' */
-         brw_inst_set_dst_hstride(devinfo, inst, 1);
+         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+
+         /* These are different sizes in align1 vs align16:
+          */
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
+                                          dest.indirect_offset);
+            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+         } else {
+            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
+                                           dest.indirect_offset);
+            /* even ignored in da16, still need to set as '01' */
+            brw_inst_set_dst_hstride(devinfo, inst, 1);
+         }
       }
    }
 
@@ -170,13 +184,16 @@
 
    if (reg.file == BRW_MESSAGE_REGISTER_FILE)
       assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
-   else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   else if (reg.file == BRW_GENERAL_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
 
-   if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
-                             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+   if (devinfo->gen >= 6 &&
+       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
       /* Any source modifiers or regions will be ignored, since this just
        * identifies the MRF/GRF to start reading the message contents from.
        * Check for some likely failures.
@@ -186,84 +203,96 @@
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
    }
 
-   brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
-   brw_inst_set_src0_abs(devinfo, inst, reg.abs);
-   brw_inst_set_src0_negate(devinfo, inst, reg.negate);
-   brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
-
-   if (reg.file == BRW_IMMEDIATE_VALUE) {
-      if (reg.type == BRW_REGISTER_TYPE_DF ||
-          brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
-         brw_inst_set_imm_df(devinfo, inst, reg.df);
-      else if (reg.type == BRW_REGISTER_TYPE_UQ ||
-               reg.type == BRW_REGISTER_TYPE_Q)
-         brw_inst_set_imm_uq(devinfo, inst, reg.u64);
-      else
-         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr % 16 == 0);
+      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             reg.vstride == reg.width + 1);
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+   } else {
+      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
+      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
+      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
+      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
+
+      if (reg.file == BRW_IMMEDIATE_VALUE) {
+         if (reg.type == BRW_REGISTER_TYPE_DF ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
+            brw_inst_set_imm_df(devinfo, inst, reg.df);
+         else if (reg.type == BRW_REGISTER_TYPE_UQ ||
+                  reg.type == BRW_REGISTER_TYPE_Q)
+            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
+         else
+            brw_inst_set_imm_ud(devinfo, inst, reg.ud);
 
-      if (type_sz(reg.type) < 8) {
-         brw_inst_set_src1_reg_file(devinfo, inst,
-                                    BRW_ARCHITECTURE_REGISTER_FILE);
-         brw_inst_set_src1_reg_hw_type(devinfo, inst,
-                                       brw_inst_src0_reg_hw_type(devinfo, inst));
-      }
-   } else {
-      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
-         brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
-         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-             brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
-	 } else {
-            brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
-	 }
+         if (type_sz(reg.type) < 8) {
+            brw_inst_set_src1_reg_file(devinfo, inst,
+                                       BRW_ARCHITECTURE_REGISTER_FILE);
+            brw_inst_set_src1_reg_hw_type(devinfo, inst,
+                                          brw_inst_src0_reg_hw_type(devinfo, inst));
+         }
       } else {
-         brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
+         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+            brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
+            } else {
+               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+            }
+         } else {
+            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
 
-         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-            brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
-	 } else {
-            brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
-	 }
-      }
+            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
+            } else {
+               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
+            }
+         }
 
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-	 if (reg.width == BRW_WIDTH_1 &&
-             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
-            brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
-            brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
-	 } else {
-            brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
-            brw_inst_set_src0_width(devinfo, inst, reg.width);
-            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
-	 }
-      } else {
-         brw_inst_set_src0_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
-         brw_inst_set_src0_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
-         brw_inst_set_src0_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
-         brw_inst_set_src0_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
-         if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
-            /* This is an oddity of the fact we're using the same
-             * descriptions for registers in align_16 as align_1:
-             */
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
-         } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
-                    reg.type == BRW_REGISTER_TYPE_DF &&
-                    reg.vstride == BRW_VERTICAL_STRIDE_2) {
-            /* From SNB PRM:
-             *
-             * "For Align16 access mode, only encodings of 0000 and 0011
-             *  are allowed. Other codes are reserved."
-             *
-             * Presumably the DevSNB behavior applies to IVB as well.
-             */
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            if (reg.width == BRW_WIDTH_1 &&
+                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+            } else {
+               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
+               brw_inst_set_src0_width(devinfo, inst, reg.width);
+               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            }
          } else {
-            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+               /* This is an oddity of the fact we're using the same
+                * descriptions for registers in align_16 as align_1:
+                */
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+                       reg.type == BRW_REGISTER_TYPE_DF &&
+                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
+               /* From SNB PRM:
+                *
+                * "For Align16 access mode, only encodings of 0000 and 0011
+                *  are allowed. Other codes are reserved."
+                *
+                * Presumably the DevSNB behavior applies to IVB as well.
+                */
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else {
+               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            }
          }
       }
    }
@@ -275,85 +304,98 @@
 {
    const struct gen_device_info *devinfo = p->devinfo;
 
-   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file == BRW_GENERAL_REGISTER_FILE)
       assert(reg.nr < 128);
 
-   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
-    *
-    *    "Accumulator registers may be accessed explicitly as src0
-    *    operands only."
-    */
-   assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-          reg.nr != BRW_ARF_ACCUMULATOR);
-
-   gen7_convert_mrf_to_grf(p, &reg);
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr == 0);
+      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             reg.vstride == reg.width + 1);
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+   } else {
+      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+       *
+       *    "Accumulator registers may be accessed explicitly as src0
+       *    operands only."
+       */
+      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+             reg.nr != BRW_ARF_ACCUMULATOR);
 
-   brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
-   brw_inst_set_src1_abs(devinfo, inst, reg.abs);
-   brw_inst_set_src1_negate(devinfo, inst, reg.negate);
+      gen7_convert_mrf_to_grf(p, &reg);
+      assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
-   /* Only src1 can be immediate in two-argument instructions.
-    */
-   assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
+      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
+      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
+      brw_inst_set_src1_negate(devinfo, inst, reg.negate);
 
-   if (reg.file == BRW_IMMEDIATE_VALUE) {
-      /* two-argument instructions can only use 32-bit immediates */
-      assert(type_sz(reg.type) < 8);
-      brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-   } else {
-      /* This is a hardware restriction, which may or may not be lifted
-       * in the future:
+      /* Only src1 can be immediate in two-argument instructions.
        */
-      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
 
-      brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+      if (reg.file == BRW_IMMEDIATE_VALUE) {
+         /* two-argument instructions can only use 32-bit immediates */
+         assert(type_sz(reg.type) < 8);
+         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
       } else {
-         brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
-      }
+         /* This is a hardware restriction, which may or may not be lifted
+          * in the future:
+          */
+         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-	 if (reg.width == BRW_WIDTH_1 &&
-             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
-            brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
-            brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
-	 } else {
-            brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
-            brw_inst_set_src1_width(devinfo, inst, reg.width);
-            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
-	 }
-      } else {
-         brw_inst_set_src1_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
-         brw_inst_set_src1_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
-         brw_inst_set_src1_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
-         brw_inst_set_src1_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
-         if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
-            /* This is an oddity of the fact we're using the same
-             * descriptions for registers in align_16 as align_1:
-             */
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
-         } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
-                    reg.type == BRW_REGISTER_TYPE_DF &&
-                    reg.vstride == BRW_VERTICAL_STRIDE_2) {
-            /* From SNB PRM:
-             *
-             * "For Align16 access mode, only encodings of 0000 and 0011
-             *  are allowed. Other codes are reserved."
-             *
-             * Presumably the DevSNB behavior applies to IVB as well.
-             */
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+         brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+         } else {
+            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+         }
+
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            if (reg.width == BRW_WIDTH_1 &&
+                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+            } else {
+               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
+               brw_inst_set_src1_width(devinfo, inst, reg.width);
+               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            }
          } else {
-            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+               /* This is an oddity of the fact we're using the same
+                * descriptions for registers in align_16 as align_1:
+                */
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+                       reg.type == BRW_REGISTER_TYPE_DF &&
+                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
+               /* From SNB PRM:
+                *
+                * "For Align16 access mode, only encodings of 0000 and 0011
+                *  are allowed. Other codes are reserved."
+                *
+                * Presumably the DevSNB behavior applies to IVB as well.
+                */
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else {
+               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            }
          }
       }
    }
@@ -654,9 +696,9 @@
    gen7_convert_mrf_to_grf(p, &dest);
 
    assert(dest.nr < 128);
-   assert(src0.nr < 128);
-   assert(src1.nr < 128);
-   assert(src2.nr < 128);
+   assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128);
+   assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128);
+   assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128);
    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
@@ -833,7 +875,15 @@
 	      struct brw_reg src0,			\
 	      struct brw_reg src1,			\
 	      struct brw_reg src2)   			\
-{							\
+{                                                       \
+   if (p->current->access_mode == BRW_ALIGN_16) {       \
+      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src0.swizzle = BRW_SWIZZLE_XXXX;               \
+      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src1.swizzle = BRW_SWIZZLE_XXXX;               \
+      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src2.swizzle = BRW_SWIZZLE_XXXX;               \
+   }                                                    \
    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
 }
 
@@ -855,6 +905,15 @@
       assert(src1.type == BRW_REGISTER_TYPE_DF);                \
       assert(src2.type == BRW_REGISTER_TYPE_DF);                \
    }                                                            \
+                                                                \
+   if (p->current->access_mode == BRW_ALIGN_16) {               \
+      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
+      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
+      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
+   }                                                            \
    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
 }
 
@@ -2456,14 +2515,106 @@
       brw_set_src1(p, send, addr);
    }
 
-   if (dst.width < BRW_EXECUTE_8)
-      brw_inst_set_exec_size(devinfo, send, dst.width);
-
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_inst_set_sfid(devinfo, send, sfid);
 }
 
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   struct brw_inst *send;
+
+   dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+   assert(desc.type == BRW_REGISTER_TYPE_UD);
+
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      desc.ud |= desc_imm;
+   } else {
+      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+      /* Load the indirect descriptor to an address register using OR so the
+       * caller can specify additional descriptor bits with the desc_imm
+       * immediate.
+       */
+      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
+
+      brw_pop_insn_state(p);
+      desc = addr;
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+      ex_desc.ud |= ex_desc_imm;
+   } else {
+      struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+      /* Load the indirect extended descriptor to an address register using OR
+       * so the caller can specify additional descriptor bits with the
+       * desc_imm immediate.
+       *
+       * Even though the instruction dispatcher always pulls the SFID from the
+       * instruction itself, the extended descriptor sent to the actual unit
+       * gets the SFID from the extended descriptor which comes from the
+       * address register.  If we don't OR it in, the external unit gets
+       * confused and hangs the GPU.
+       */
+      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
+
+      brw_pop_insn_state(p);
+      ex_desc = addr;
+   }
+
+   send = next_insn(p, BRW_OPCODE_SENDS);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+      brw_inst_set_send_desc(devinfo, send, desc.ud);
+   } else {
+      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(desc.nr == BRW_ARF_ADDRESS);
+      assert(desc.subnr == 0);
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+      brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud);
+   } else {
+      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(ex_desc.nr == BRW_ARF_ADDRESS);
+      assert((ex_desc.subnr & 0x3) == 0);
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+   }
+
+   brw_inst_set_sfid(devinfo, send, sfid);
+}
+
 static void
 brw_send_indirect_surface_message(struct brw_codegen *p,
                                   unsigned sfid,
@@ -2724,45 +2875,14 @@
 static unsigned
 brw_surface_payload_size(struct brw_codegen *p,
                          unsigned num_channels,
-                         bool has_simd4x2,
-                         bool has_simd16)
+                         unsigned exec_size /**< 0 for SIMD4x2 */)
 {
-   if (has_simd4x2 && brw_get_default_access_mode(p) == BRW_ALIGN_16)
-      return 1;
-   else if (has_simd16 && brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-      return 2 * num_channels;
-   else
+   if (exec_size == 0)
+      return 1; /* SIMD4x2 */
+   else if (exec_size <= 8)
       return num_channels;
-}
-
-static uint32_t
-brw_dp_untyped_atomic_desc(struct brw_codegen *p,
-                           unsigned atomic_op,
-                           bool response_expected)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   unsigned msg_control =
-      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
-      (response_expected ? 1 << 5 : 0); /* Return data expected */
-   unsigned msg_type;
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
-            msg_control |= 1 << 4; /* SIMD8 mode */
-
-         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
-      } else {
-         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
-      }
-   } else {
-      if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD8 mode */
-
-      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+   else
+      return 2 * num_channels;
 }
 
 void
@@ -2779,12 +2899,17 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
-   const unsigned response_length = brw_surface_payload_size(
-      p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, true);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 untyped atomic instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
+   const unsigned response_length =
+      brw_surface_payload_size(p, response_expected, exec_size);
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, response_length, header_present) |
-      brw_dp_untyped_atomic_desc(p, atomic_op, response_expected);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+      brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
+                                 response_expected);
    /* Mask out unused components -- This is especially important in Align16
     * mode on generations that don't have native support for SIMD4x2 atomics,
     * because unused but enabled components will cause the dataport to perform
@@ -2797,74 +2922,6 @@
                                      payload, surface, desc);
 }
 
-static uint32_t
-brw_dp_untyped_atomic_float_desc(struct brw_codegen *p,
-                                 unsigned atomic_op,
-                                 bool response_expected)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
-   unsigned msg_control =
-      atomic_op | /* Atomic Operation Type: BRW_AOP_F* */
-      (response_expected ? 1 << 5 : 0); /* Return data expected */
-
-   assert(devinfo->gen >= 9);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-
-   if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
-      msg_control |= 1 << 4; /* SIMD8 mode */
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
-void
-brw_untyped_atomic_float(struct brw_codegen *p,
-                         struct brw_reg dst,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned atomic_op,
-                         unsigned msg_length,
-                         bool response_expected,
-                         bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-
-   assert(devinfo->gen >= 9);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-
-   const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
-   const unsigned response_length = brw_surface_payload_size(
-      p, response_expected, true, true);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, response_length, header_present) |
-      brw_dp_untyped_atomic_float_desc(p, atomic_op, response_expected);
-
-   brw_send_indirect_surface_message(p, sfid,
-                                     brw_writemask(dst, WRITEMASK_XYZW),
-                                     payload, surface, desc);
-}
-
-static uint32_t
-brw_dp_untyped_surface_read_desc(struct brw_codegen *p,
-                                 unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned msg_type = (devinfo->gen >= 8 || devinfo->is_haswell ?
-                              HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ :
-                              GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ);
-   /* Set mask of 32-bit channels to drop. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-      if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD16 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
 void
 brw_untyped_surface_read(struct brw_codegen *p,
                          struct brw_reg dst,
@@ -2877,41 +2934,17 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
    const unsigned response_length =
-      brw_surface_payload_size(p, num_channels, true, true);
+      brw_surface_payload_size(p, num_channels, exec_size);
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, response_length, false) |
-      brw_dp_untyped_surface_read_desc(p, num_channels);
+      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
 
    brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
 }
 
-static uint32_t
-brw_dp_untyped_surface_write_desc(struct brw_codegen *p,
-                                  unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned msg_type = (devinfo->gen >= 8 || devinfo->is_haswell ?
-                              HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
-                              GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);
-   /* Set mask of 32-bit channels to drop. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-      if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD16 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   } else {
-      if (devinfo->gen >= 8 || devinfo->is_haswell)
-         msg_control |= 0 << 4; /* SIMD4x2 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
 void
 brw_untyped_surface_write(struct brw_codegen *p,
                           struct brw_reg payload,
@@ -2924,124 +2957,21 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 untyped surface write instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, 0, header_present) |
-      brw_dp_untyped_surface_write_desc(p, num_channels);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
    /* Mask out unused components -- See comment in brw_untyped_atomic(). */
-   const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
-                          WRITEMASK_X : WRITEMASK_XYZW;
+   const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
 
    brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
                                      payload, surface, desc);
 }
 
-static unsigned
-brw_byte_scattered_data_element_from_bit_size(unsigned bit_size)
-{
-   switch (bit_size) {
-   case 8:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
-   case 16:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
-   case 32:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
-   default:
-      unreachable("Unsupported bit_size for byte scattered messages");
-   }
-}
-
-static uint32_t
-brw_dp_byte_scattered_desc(struct brw_codegen *p, unsigned bit_size,
-                           unsigned msg_type)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   unsigned msg_control =
-      brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
-
-   if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-      msg_control |= 1; /* SIMD16 mode */
-   else
-      msg_control |= 0; /* SIMD8 mode */
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
-void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned response_length =
-      brw_surface_payload_size(p, 1, true, true);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, response_length, false) |
-      brw_dp_byte_scattered_desc(p, bit_size,
-                                 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ);
-
-   brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                     dst, payload, surface, desc);
-}
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned desc =
-      brw_message_desc(devinfo, msg_length, 0, header_present) |
-      brw_dp_byte_scattered_desc(p, bit_size,
-                                 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE);
-
-   brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                     brw_writemask(brw_null_reg(),
-                                                   WRITEMASK_XYZW),
-                                     payload, surface, desc);
-}
-
-static uint32_t
-brw_dp_typed_atomic_desc(struct brw_codegen *p,
-                         unsigned atomic_op,
-                         bool response_expected)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   unsigned msg_control =
-      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
-      (response_expected ? 1 << 5 : 0); /* Return data expected */
-   unsigned msg_type;
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
-
-         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
-      } else {
-         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
-      }
-
-   } else {
-      if ((brw_get_default_group(p) / 8) % 2 == 1)
-         msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
-
-      msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP;
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
 void
 brw_typed_atomic(struct brw_codegen *p,
                  struct brw_reg dst,
@@ -3055,12 +2985,19 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN6_SFID_DATAPORT_RENDER_CACHE);
-   const unsigned response_length = brw_surface_payload_size(
-      p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, false);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 typed atomic instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
+   /* Typed atomics don't support SIMD16 */
+   assert(exec_size <= 8);
+   const unsigned response_length =
+      brw_surface_payload_size(p, response_expected, exec_size);
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, response_length, header_present) |
-      brw_dp_typed_atomic_desc(p, atomic_op, response_expected);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+      brw_dp_typed_atomic_desc(devinfo, exec_size, brw_get_default_group(p),
+                               atomic_op, response_expected);
    /* Mask out unused components -- See comment in brw_untyped_atomic(). */
    const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
 
@@ -3068,36 +3005,6 @@
                                      payload, surface, desc);
 }
 
-static uint32_t
-brw_dp_typed_surface_read_desc(struct brw_codegen *p,
-                               unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of unused channels. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-   unsigned msg_type;
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
-         else
-            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
-      }
-
-      msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
-   } else {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
-      }
-
-      msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ;
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
 void
 brw_typed_surface_read(struct brw_codegen *p,
                        struct brw_reg dst,
@@ -3111,46 +3018,21 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN6_SFID_DATAPORT_RENDER_CACHE);
-   const unsigned response_length = brw_surface_payload_size(
-      p, num_channels, devinfo->gen >= 8 || devinfo->is_haswell, false);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 typed read instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
+   const unsigned response_length =
+      brw_surface_payload_size(p, num_channels, exec_size);
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, response_length, header_present) |
-      brw_dp_typed_surface_read_desc(p, num_channels);
+      brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p),
+                                   num_channels, false);
 
    brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
 }
 
-static uint32_t
-brw_dp_typed_surface_write_desc(struct brw_codegen *p,
-                                unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of unused channels. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-   unsigned msg_type;
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
-         else
-            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
-      }
-
-      msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
-
-   } else {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
-      }
-
-      msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE;
-   }
-
-   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
-}
-
 void
 brw_typed_surface_write(struct brw_codegen *p,
                         struct brw_reg payload,
@@ -3163,13 +3045,17 @@
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN6_SFID_DATAPORT_RENDER_CACHE);
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 typed read instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
    const unsigned desc =
       brw_message_desc(devinfo, msg_length, 0, header_present) |
-      brw_dp_typed_surface_write_desc(p, num_channels);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+      brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p),
+                                   num_channels, true);
    /* Mask out unused components -- See comment in brw_untyped_atomic(). */
-   const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
-                          WRITEMASK_X : WRITEMASK_XYZW);
+   const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
 
    brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
                                      payload, surface, desc);
@@ -3295,6 +3181,14 @@
 
    brw_push_insn_state(p);
 
+   /* The flag register is only used on Gen7 in align1 mode, so avoid setting
+    * unnecessary bits in the instruction words, get the information we need
+    * and reset the default flag register. This allows more instructions to be
+    * compacted.
+    */
+   const unsigned flag_subreg = p->current->flag_subreg;
+   brw_set_default_flag_reg(p, 0, 0);
+
    if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
@@ -3328,8 +3222,7 @@
           */
          inst = brw_FBL(p, vec1(dst), exec_mask);
       } else {
-         const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
-                                                  p->current->flag_subreg % 2);
+         const struct brw_reg flag = brw_flag_subreg(flag_subreg);
 
          brw_set_default_exec_size(p, BRW_EXECUTE_1);
          brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
@@ -3349,6 +3242,8 @@
             brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
             brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
             brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
+            brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
+            brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
          }
 
          /* Find the first bit set in the exec_size-wide portion of the flag
@@ -3554,7 +3449,8 @@
    brw_set_src0(p, send, brw_vec1_reg(payload.file,
                                       payload.nr, 0));
    brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
-                          brw_dp_untyped_atomic_desc(p, BRW_AOP_ADD, false)));
+                          brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
+                                                     false)));
 
    brw_inst_set_sfid(devinfo, send, sfid);
    brw_inst_set_binding_table_index(devinfo, send, surf_index);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu.h mesa-19.0.1/src/intel/compiler/brw_eu.h
--- mesa-18.3.3/src/intel/compiler/brw_eu.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_eu.h	2019-03-31 23:16:37.000000000 +0000
@@ -266,6 +266,46 @@
    }
 }
 
+static inline unsigned
+brw_message_desc_mlen(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 5)
+      return GET_BITS(desc, 28, 25);
+   else
+      return GET_BITS(desc, 23, 20);
+}
+
+static inline unsigned
+brw_message_desc_rlen(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 5)
+      return GET_BITS(desc, 24, 20);
+   else
+      return GET_BITS(desc, 19, 16);
+}
+
+static inline bool
+brw_message_desc_header_present(const struct gen_device_info *devinfo,
+                                uint32_t desc)
+{
+   assert(devinfo->gen >= 5);
+   return GET_BITS(desc, 19, 19);
+}
+
+static inline unsigned
+brw_message_ex_desc(const struct gen_device_info *devinfo,
+                    unsigned ex_msg_length)
+{
+   return SET_BITS(ex_msg_length, 9, 6);
+}
+
+static inline unsigned
+brw_message_ex_desc_ex_mlen(const struct gen_device_info *devinfo,
+                            uint32_t ex_desc)
+{
+   return GET_BITS(ex_desc, 9, 6);
+}
+
 /**
  * Construct a message descriptor immediate with the specified sampler
  * function controls.
@@ -293,6 +333,103 @@
               SET_BITS(msg_type, 15, 14));
 }
 
+static inline unsigned
+brw_sampler_desc_binding_table_index(const struct gen_device_info *devinfo,
+                                     uint32_t desc)
+{
+   return GET_BITS(desc, 7, 0);
+}
+
+static inline unsigned
+brw_sampler_desc_sampler(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   return GET_BITS(desc, 11, 8);
+}
+
+static inline unsigned
+brw_sampler_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 16, 12);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 15, 12);
+   else
+      return GET_BITS(desc, 15, 14);
+}
+
+static inline unsigned
+brw_sampler_desc_simd_mode(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 5);
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 18, 17);
+   else
+      return GET_BITS(desc, 17, 16);
+}
+
+static  inline unsigned
+brw_sampler_desc_return_format(const struct gen_device_info *devinfo,
+                               uint32_t desc)
+{
+   assert(devinfo->gen == 4 && !devinfo->is_g4x);
+   return GET_BITS(desc, 13, 12);
+}
+
+/**
+ * Construct a message descriptor for the dataport
+ */
+static inline uint32_t
+brw_dp_desc(const struct gen_device_info *devinfo,
+            unsigned binding_table_index,
+            unsigned msg_type,
+            unsigned msg_control)
+{
+   /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
+    * helpers instead.
+    */
+   assert(devinfo->gen >= 6);
+   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
+   if (devinfo->gen >= 8) {
+      return (desc | SET_BITS(msg_control, 13, 8) |
+              SET_BITS(msg_type, 18, 14));
+   } else if (devinfo->gen >= 7) {
+      return (desc | SET_BITS(msg_control, 13, 8) |
+              SET_BITS(msg_type, 17, 14));
+   } else {
+      return (desc | SET_BITS(msg_control, 12, 8) |
+              SET_BITS(msg_type, 16, 13));
+   }
+}
+
+static inline unsigned
+brw_dp_desc_binding_table_index(const struct gen_device_info *devinfo,
+                                uint32_t desc)
+{
+   return GET_BITS(desc, 7, 0);
+}
+
+static inline unsigned
+brw_dp_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 6);
+   if (devinfo->gen >= 8)
+      return GET_BITS(desc, 18, 14);
+   else if (devinfo->gen >= 7)
+      return GET_BITS(desc, 17, 14);
+   else
+      return GET_BITS(desc, 16, 13);
+}
+
+static inline unsigned
+brw_dp_desc_msg_control(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 6);
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 13, 8);
+   else
+      return GET_BITS(desc, 12, 8);
+}
+
 /**
  * Construct a message descriptor immediate with the specified dataport read
  * function controls.
@@ -304,23 +441,43 @@
                  unsigned msg_type,
                  unsigned target_cache)
 {
-   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
-   if (devinfo->gen >= 7)
-      return (desc | SET_BITS(msg_control, 13, 8) |
-              SET_BITS(msg_type, 17, 14));
-   else if (devinfo->gen >= 6)
-      return (desc | SET_BITS(msg_control, 12, 8) |
-              SET_BITS(msg_type, 16, 13));
+   if (devinfo->gen >= 6)
+      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
    else if (devinfo->gen >= 5 || devinfo->is_g4x)
-      return (desc | SET_BITS(msg_control, 10, 8) |
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 10, 8) |
               SET_BITS(msg_type, 13, 11) |
               SET_BITS(target_cache, 15, 14));
    else
-      return (desc | SET_BITS(msg_control, 11, 8) |
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 11, 8) |
               SET_BITS(msg_type, 13, 12) |
               SET_BITS(target_cache, 15, 14));
 }
 
+static inline unsigned
+brw_dp_read_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_type(devinfo, desc);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 13, 11);
+   else
+      return GET_BITS(desc, 13, 12);
+}
+
+static inline unsigned
+brw_dp_read_desc_msg_control(const struct gen_device_info *devinfo,
+                             uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_control(devinfo, desc);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 10, 8);
+   else
+      return GET_BITS(desc, 11, 8);
+}
+
 /**
  * Construct a message descriptor immediate with the specified dataport write
  * function controls.
@@ -333,23 +490,60 @@
                   unsigned last_render_target,
                   unsigned send_commit_msg)
 {
-   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
-   if (devinfo->gen >= 7)
-      return (desc | SET_BITS(msg_control, 13, 8) |
-              SET_BITS(last_render_target, 12, 12) |
-              SET_BITS(msg_type, 17, 14));
-   else if (devinfo->gen >= 6)
-      return (desc | SET_BITS(msg_control, 12, 8) |
-              SET_BITS(last_render_target, 12, 12) |
-              SET_BITS(msg_type, 16, 13) |
-              SET_BITS(send_commit_msg, 17, 17));
+   assert(devinfo->gen <= 6 || !send_commit_msg);
+   if (devinfo->gen >= 6)
+      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
+             SET_BITS(last_render_target, 12, 12) |
+             SET_BITS(send_commit_msg, 17, 17);
    else
-      return (desc | SET_BITS(msg_control, 11, 8) |
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 11, 8) |
               SET_BITS(last_render_target, 11, 11) |
               SET_BITS(msg_type, 14, 12) |
               SET_BITS(send_commit_msg, 15, 15));
 }
 
+static inline unsigned
+brw_dp_write_desc_msg_type(const struct gen_device_info *devinfo,
+                           uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_type(devinfo, desc);
+   else
+      return GET_BITS(desc, 14, 12);
+}
+
+static inline unsigned
+brw_dp_write_desc_msg_control(const struct gen_device_info *devinfo,
+                              uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_control(devinfo, desc);
+   else
+      return GET_BITS(desc, 11, 8);
+}
+
+static inline bool
+brw_dp_write_desc_last_render_target(const struct gen_device_info *devinfo,
+                                     uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return GET_BITS(desc, 12, 12);
+   else
+      return GET_BITS(desc, 11, 11);
+}
+
+static inline bool
+brw_dp_write_desc_write_commit(const struct gen_device_info *devinfo,
+                               uint32_t desc)
+{
+   assert(devinfo->gen <= 6);
+   if (devinfo->gen >= 6)
+      return GET_BITS(desc, 17, 17);
+   else
+      return GET_BITS(desc, 15, 15);
+}
+
 /**
  * Construct a message descriptor immediate with the specified dataport
  * surface function controls.
@@ -360,13 +554,221 @@
                     unsigned msg_control)
 {
    assert(devinfo->gen >= 7);
-   if (devinfo->gen >= 8) {
-      return (SET_BITS(msg_control, 13, 8) |
-              SET_BITS(msg_type, 18, 14));
+   /* We'll OR in the binding table index later */
+   return brw_dp_desc(devinfo, 0, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_untyped_atomic_desc(const struct gen_device_info *devinfo,
+                           unsigned exec_size, /**< 0 for SIMD4x2 */
+                           unsigned atomic_op,
+                           bool response_expected)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   unsigned msg_type;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      if (exec_size > 0) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+      } else {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
+      }
    } else {
-      return (SET_BITS(msg_control, 13, 8) |
-              SET_BITS(msg_type, 17, 14));
+      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+   }
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
+                                 unsigned exec_size,
+                                 unsigned atomic_op,
+                                 bool response_expected)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+   assert(devinfo->gen >= 9);
+
+   assert(exec_size > 0);
+   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 1, 0) |
+      SET_BITS(exec_size <= 8, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline unsigned
+brw_mdc_cmask(unsigned num_channels)
+{
+   /* See also MDC_CMASK in the SKL PRM Vol 2d. */
+   return 0xf & (0xf << num_channels);
+}
+
+static inline uint32_t
+brw_dp_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
+                               unsigned exec_size, /**< 0 for SIMD4x2 */
+                               unsigned num_channels,
+                               bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   unsigned msg_type;
+   if (write) {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
+      } else {
+         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
+      }
+   } else {
+      /* Read */
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
+      } else {
+         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
+      }
+   }
+
+   /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
+   if (write && devinfo->gen == 7 && !devinfo->is_haswell && exec_size == 0)
+      exec_size = 8;
+
+   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
+   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
+                              exec_size <= 8 ? 2 : 1;
+
+   const unsigned msg_control =
+      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+      SET_BITS(simd_mode, 5, 4);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline unsigned
+brw_mdc_ds(unsigned bit_size)
+{
+   switch (bit_size) {
+   case 8:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
+   case 16:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
+   case 32:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
+   default:
+      unreachable("Unsupported bit_size for byte scattered messages");
+   }
+}
+
+static inline uint32_t
+brw_dp_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
+                              unsigned exec_size,
+                              unsigned bit_size,
+                              bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   assert(devinfo->gen > 7 || devinfo->is_haswell);
+   const unsigned msg_type =
+      write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
+              HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
+
+   assert(exec_size > 0);
+   const unsigned msg_control =
+      SET_BITS(exec_size == 16, 0, 0) |
+      SET_BITS(brw_mdc_ds(bit_size), 3, 2);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo,
+                         unsigned exec_size,
+                         unsigned exec_group,
+                         unsigned atomic_op,
+                         bool response_expected)
+{
+   assert(exec_size > 0 || exec_group == 0);
+   assert(exec_group % 8 == 0);
+
+   unsigned msg_type;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      if (exec_size == 0) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
+      } else {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
+      }
+   } else {
+      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
+      assert(exec_size > 0);
+      msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP;
    }
+
+   const bool high_sample_mask = (exec_group / 8) % 2 == 1;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(high_sample_mask, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_typed_surface_rw_desc(const struct gen_device_info *devinfo,
+                             unsigned exec_size,
+                             unsigned exec_group,
+                             unsigned num_channels,
+                             bool write)
+{
+   assert(exec_size > 0 || exec_group == 0);
+   assert(exec_group % 8 == 0);
+
+   /* Typed surface reads and writes don't support SIMD16 */
+   assert(exec_size <= 8);
+
+   unsigned msg_type;
+   if (write) {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
+      } else {
+         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE;
+      }
+   } else {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
+      } else {
+         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ;
+      }
+   }
+
+   /* See also MDC_SG3 in the SKL PRM Vol 2d. */
+   unsigned msg_control;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      /* See also MDC_SG3 in the SKL PRM Vol 2d. */
+      const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
+                                  1 + ((exec_group / 8) % 2);
+
+      msg_control =
+         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+         SET_BITS(slot_group, 5, 4);
+   } else {
+      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
+      assert(exec_size > 0);
+      const unsigned slot_group = ((exec_group / 8) % 2);
+
+      msg_control =
+         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+         SET_BITS(slot_group, 5, 5);
+   }
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
 }
 
 /**
@@ -409,6 +811,17 @@
                           struct brw_reg desc,
                           unsigned desc_imm);
 
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm);
+
 void brw_ff_sync(struct brw_codegen *p,
 		   struct brw_reg dest,
 		   unsigned msg_reg_nr,
@@ -578,17 +991,6 @@
                    bool header_present);
 
 void
-brw_untyped_atomic_float(struct brw_codegen *p,
-                         struct brw_reg dst,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned atomic_op,
-                         unsigned msg_length,
-                         bool response_expected,
-                         bool header_present);
-
-
-void
 brw_untyped_surface_read(struct brw_codegen *p,
                          struct brw_reg dst,
                          struct brw_reg payload,
@@ -632,22 +1034,6 @@
                         bool header_present);
 
 void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size);
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present);
-
-void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
                  enum opcode send_op);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_validate.c mesa-19.0.1/src/intel/compiler/brw_eu_validate.c
--- mesa-18.3.3/src/intel/compiler/brw_eu_validate.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_eu_validate.c	2019-03-31 23:16:37.000000000 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2015 Intel Corporation
+ * Copyright © 2015-2019 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,6 +24,18 @@
 /** @file brw_eu_validate.c
  *
  * This file implements a pass that validates shader assembly.
+ *
+ * The restrictions implemented herein are intended to verify that instructions
+ * in shader assembly do not violate restrictions documented in the graphics
+ * programming reference manuals.
+ *
+ * The restrictions are difficult for humans to quickly verify due to their
+ * complexity and abundance.
+ *
+ * It is critical that this code is thoroughly unit tested because false
+ * results will lead developers astray, which is worse than having no validator
+ * at all. Functional changes to this file without corresponding unit tests (in
+ * test_eu_validate.cpp) will be rejected.
  */
 
 #include "brw_eu.h"
@@ -90,6 +102,18 @@
    }
 }
 
+static bool
+inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   switch (brw_inst_opcode(devinfo, inst)) {
+   case BRW_OPCODE_SENDS:
+   case BRW_OPCODE_SENDSC:
+      return true;
+   default:
+      return false;
+   }
+}
+
 static unsigned
 signed_type(unsigned type)
 {
@@ -236,6 +260,12 @@
    if (num_sources == 3)
       return (struct string){};
 
+   /* Nothing to test.  Split sends can only encode a file in sources that are
+    * allowed to be NULL.
+    */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    if (num_sources >= 1)
       ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
 
@@ -251,7 +281,41 @@
 {
    struct string error_msg = { .str = NULL, .len = 0 };
 
-   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
+   if (inst_is_split_send(devinfo, inst)) {
+      ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+               brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
+               "src1 of split send must be a GRF or NULL");
+
+      ERROR_IF(brw_inst_eot(devinfo, inst) &&
+               brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
+               "send with EOT must use g112-g127");
+      ERROR_IF(brw_inst_eot(devinfo, inst) &&
+               brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
+               brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
+               "send with EOT must use g112-g127");
+
+      if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
+         /* Assume minimums if we don't know */
+         unsigned mlen = 1;
+         if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
+            const uint32_t desc = brw_inst_send_desc(devinfo, inst);
+            mlen = brw_message_desc_mlen(devinfo, desc);
+         }
+
+         unsigned ex_mlen = 1;
+         if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
+            const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst);
+            ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
+         }
+         const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
+         const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
+         ERROR_IF((src0_reg_nr <= src1_reg_nr &&
+                   src1_reg_nr < src0_reg_nr + mlen) ||
+                  (src1_reg_nr <= src0_reg_nr &&
+                   src0_reg_nr < src1_reg_nr + ex_mlen),
+                   "split send payloads must not overlap");
+      }
+   } else if (inst_is_send(devinfo, inst)) {
       ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
                "send must use direct addressing");
 
@@ -521,6 +585,12 @@
    if (num_sources == 3)
       return (struct string){};
 
+   /* Split sends don't have the bits in the instruction to encode regions so
+    * there's nothing to check.
+    */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
       if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
          ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
@@ -1111,6 +1181,10 @@
    if (num_sources == 3 || num_sources == 0)
       return (struct string){};
 
+   /* Split sends don't have types so there's no doubles there. */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    enum brw_reg_type exec_type = execution_type(devinfo, inst);
    unsigned exec_type_size = brw_reg_type_to_size(exec_type);
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_builder.h mesa-19.0.1/src/intel/compiler/brw_fs_builder.h
--- mesa-18.3.3/src/intel/compiler/brw_fs_builder.h	2018-10-21 19:21:32.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_builder.h	2019-03-31 23:16:37.000000000 +0000
@@ -114,11 +114,25 @@
       fs_builder
       group(unsigned n, unsigned i) const
       {
-         assert(force_writemask_all ||
-                (n <= dispatch_width() && i < dispatch_width() / n));
          fs_builder bld = *this;
+
+         if (n <= dispatch_width() && i < dispatch_width() / n) {
+            bld._group += i * n;
+         } else {
+            /* The requested channel group isn't a subset of the channel group
+             * of this builder, which means that the resulting instructions
+             * would use (potentially undefined) channel enable signals not
+             * specified by the parent builder.  That's only valid if the
+             * instruction doesn't have per-channel semantics, in which case
+             * we should clear off the default group index in order to prevent
+             * emitting instructions with channel group not aligned to their
+             * own execution size.
+             */
+            assert(force_writemask_all);
+            bld._group = 0;
+         }
+
          bld._dispatch_width = n;
-         bld._group += i * n;
          return bld;
       }
 
@@ -412,6 +426,21 @@
          return src_reg(component(dst, 0));
       }
 
+      src_reg
+      move_to_vgrf(const src_reg &src, unsigned num_components) const
+      {
+         src_reg *const src_comps = new src_reg[num_components];
+         for (unsigned i = 0; i < num_components; i++)
+            src_comps[i] = offset(src, dispatch_width(), i);
+
+         const dst_reg dst = vgrf(src.type, num_components);
+         LOAD_PAYLOAD(dst, src_comps, num_components, 0);
+
+         delete[] src_comps;
+
+         return src_reg(dst);
+      }
+
       void
       emit_scan(enum opcode opcode, const dst_reg &tmp,
                 unsigned cluster_size, brw_conditional_mod mod) const
@@ -437,43 +466,13 @@
 
          if (cluster_size > 1) {
             const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
-            dst_reg left = horiz_stride(tmp, 2);
-            dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
-
-            /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-             *
-             *    "When source or destination datatype is 64b or operation is
-             *    integer DWord multiply, regioning in Align1 must follow
-             *    these rules:
-             *
-             *    [...]
-             *
-             *    3. Source and Destination offset must be the same, except
-             *       the case of scalar source."
-             *
-             * In order to work around this, we create a temporary register
-             * and shift left over to match right.  If we have a 64-bit type,
-             * we have to use two integer MOVs instead of a 64-bit MOV.
-             */
-            if (need_matching_subreg_offset(opcode, tmp.type)) {
-               dst_reg tmp2 = vgrf(tmp.type);
-               dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2);
-               if (type_sz(tmp.type) > 4) {
-                  ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0),
-                           subscript(left, BRW_REGISTER_TYPE_D, 0));
-                  ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1),
-                           subscript(left, BRW_REGISTER_TYPE_D, 1));
-               } else {
-                  ubld.MOV(new_left, left);
-               }
-               left = new_left;
-            }
+            const dst_reg left = horiz_stride(tmp, 2);
+            const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
             set_condmod(mod, ubld.emit(opcode, right, left, right));
          }
 
          if (cluster_size > 2) {
-            if (type_sz(tmp.type) <= 4 &&
-                !need_matching_subreg_offset(opcode, tmp.type)) {
+            if (type_sz(tmp.type) <= 4) {
                const fs_builder ubld =
                   exec_all().group(dispatch_width() / 4, 0);
                src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
@@ -773,38 +772,6 @@
          }
       }
 
-
-      /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-       *
-       *    "When source or destination datatype is 64b or operation is
-       *    integer DWord multiply, regioning in Align1 must follow
-       *    these rules:
-       *
-       *    [...]
-       *
-       *    3. Source and Destination offset must be the same, except
-       *       the case of scalar source."
-       *
-       * This helper just detects when we're in this case.
-       */
-      bool
-      need_matching_subreg_offset(enum opcode opcode,
-                                  enum brw_reg_type type) const
-      {
-         if (!shader->devinfo->is_cherryview &&
-             !gen_device_info_is_9lp(shader->devinfo))
-            return false;
-
-         if (type_sz(type) > 4)
-            return true;
-
-         if (opcode == BRW_OPCODE_MUL &&
-             !brw_reg_type_is_floating_point(type))
-            return true;
-
-         return false;
-      }
-
       bblock_t *block;
       exec_node *cursor;
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_fs_cmod_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_cmod_propagation.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_cmod_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -255,6 +255,13 @@
             if (inst->opcode == BRW_OPCODE_AND)
                break;
 
+            /* Not safe to use inequality operators if the types are different
+             */
+            if (scan_inst->dst.type != inst->src[0].type &&
+                inst->conditional_mod != BRW_CONDITIONAL_Z &&
+                inst->conditional_mod != BRW_CONDITIONAL_NZ)
+               break;
+
             /* Comparisons operate differently for ints and floats */
             if (scan_inst->dst.type != inst->dst.type &&
                 (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_combine_constants.cpp mesa-19.0.1/src/intel/compiler/brw_fs_combine_constants.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_combine_constants.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_combine_constants.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -294,13 +294,14 @@
    for (int i = 0; i < table.len; i++) {
       foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
          fs_reg *reg = link->reg;
+         assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
+                fabsf(reg->f) == fabs(table.imm[i].val));
+
          reg->file = VGRF;
-         reg->nr = table.imm[i].nr;
          reg->offset = table.imm[i].subreg_offset;
          reg->stride = 0;
          reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
-         assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
-                fabsf(reg->f) == fabs(table.imm[i].val));
+         reg->nr = table.imm[i].nr;
       }
    }
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_copy_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_fs_copy_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_copy_propagation.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_copy_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -371,6 +371,20 @@
    return true;
 }
 
+static bool
+instruction_requires_packed_data(fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case FS_OPCODE_DDX_FINE:
+   case FS_OPCODE_DDX_COARSE:
+   case FS_OPCODE_DDY_FINE:
+   case FS_OPCODE_DDY_COARSE:
+      return true;
+   default:
+      return false;
+   }
+}
+
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
@@ -417,6 +431,13 @@
        inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
       return false;
 
+   /* Some instructions implemented in the generator backend, such as
+    * derivatives, assume that their operands are packed so we can't
+    * generally propagate strided regions to them.
+    */
+   if (instruction_requires_packed_data(inst) && entry->src.stride > 1)
+      return false;
+
    /* Bail if the result of composing both strides would exceed the
     * hardware limit.
     */
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs.cpp mesa-19.0.1/src/intel/compiler/brw_fs.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -213,31 +213,10 @@
 }
 
 bool
-fs_inst::equals(fs_inst *inst) const
-{
-   return (opcode == inst->opcode &&
-           dst.equals(inst->dst) &&
-           src[0].equals(inst->src[0]) &&
-           src[1].equals(inst->src[1]) &&
-           src[2].equals(inst->src[2]) &&
-           saturate == inst->saturate &&
-           predicate == inst->predicate &&
-           conditional_mod == inst->conditional_mod &&
-           mlen == inst->mlen &&
-           base_mrf == inst->base_mrf &&
-           target == inst->target &&
-           eot == inst->eot &&
-           header_size == inst->header_size &&
-           shadow_compare == inst->shadow_compare &&
-           exec_size == inst->exec_size &&
-           offset == inst->offset);
-}
-
-bool
 fs_inst::is_send_from_grf() const
 {
    switch (opcode) {
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+   case SHADER_OPCODE_SEND:
    case SHADER_OPCODE_SHADER_TIME_ADD:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
@@ -251,6 +230,7 @@
    case SHADER_OPCODE_TYPED_ATOMIC:
    case SHADER_OPCODE_TYPED_SURFACE_READ:
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_IMAGE_SIZE:
    case SHADER_OPCODE_URB_WRITE_SIMD8:
    case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
@@ -271,6 +251,62 @@
    }
 }
 
+bool
+fs_inst::is_control_source(unsigned arg) const
+{
+   switch (opcode) {
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
+      return arg == 0;
+
+   case SHADER_OPCODE_BROADCAST:
+   case SHADER_OPCODE_SHUFFLE:
+   case SHADER_OPCODE_QUAD_SWIZZLE:
+   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+   case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+   case SHADER_OPCODE_IMAGE_SIZE:
+   case SHADER_OPCODE_GET_BUFFER_SIZE:
+      return arg == 1;
+
+   case SHADER_OPCODE_MOV_INDIRECT:
+   case SHADER_OPCODE_CLUSTER_BROADCAST:
+   case SHADER_OPCODE_TEX:
+   case FS_OPCODE_TXB:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXF_LZ:
+   case SHADER_OPCODE_TXF_CMS:
+   case SHADER_OPCODE_TXF_CMS_W:
+   case SHADER_OPCODE_TXF_UMS:
+   case SHADER_OPCODE_TXF_MCS:
+   case SHADER_OPCODE_TXL:
+   case SHADER_OPCODE_TXL_LZ:
+   case SHADER_OPCODE_TXS:
+   case SHADER_OPCODE_LOD:
+   case SHADER_OPCODE_TG4:
+   case SHADER_OPCODE_TG4_OFFSET:
+   case SHADER_OPCODE_SAMPLEINFO:
+   case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_BYTE_SCATTERED_READ:
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
+   case SHADER_OPCODE_TYPED_ATOMIC:
+   case SHADER_OPCODE_TYPED_SURFACE_READ:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+      return arg == 1 || arg == 2;
+
+   case SHADER_OPCODE_SEND:
+      return arg == 0 || arg == 1;
+
+   default:
+      return false;
+   }
+}
+
 /**
  * Returns true if this instruction's sources and destinations cannot
  * safely be the same register.
@@ -399,7 +435,7 @@
 }
 
 bool
-fs_inst::can_do_source_mods(const struct gen_device_info *devinfo)
+fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const
 {
    if (devinfo->gen == 6 && is_math())
       return false;
@@ -756,6 +792,7 @@
    case SHADER_OPCODE_TXF_LOGICAL:
    case SHADER_OPCODE_TXL_LOGICAL:
    case SHADER_OPCODE_TXS_LOGICAL:
+   case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
    case FS_OPCODE_TXB_LOGICAL:
    case SHADER_OPCODE_TXF_CMS_LOGICAL:
    case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
@@ -868,6 +905,14 @@
 fs_inst::size_read(int arg) const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      if (arg == 2) {
+         return mlen * REG_SIZE;
+      } else if (arg == 3) {
+         return ex_mlen * REG_SIZE;
+      }
+      break;
+
    case FS_OPCODE_FB_WRITE:
    case FS_OPCODE_REP_FB_WRITE:
       if (arg == 0) {
@@ -892,6 +937,7 @@
    case SHADER_OPCODE_TYPED_ATOMIC:
    case SHADER_OPCODE_TYPED_SURFACE_READ:
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_IMAGE_SIZE:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
    case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
@@ -2394,8 +2440,6 @@
          inst->src[i].nr = dst.nr;
          inst->src[i].offset = (base & (block_sz - 1)) +
                                inst->src[i].offset % 4;
-
-         brw_mark_surface_used(prog_data, index);
       }
 
       if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
@@ -2409,8 +2453,6 @@
                                     inst->src[1],
                                     pull_index * 4);
          inst->remove(block);
-
-         brw_mark_surface_used(prog_data, index);
       }
    }
    invalidate_live_intervals();
@@ -2421,9 +2463,45 @@
 {
    bool progress = false;
 
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
       switch (inst->opcode) {
       case BRW_OPCODE_MOV:
+         if (!devinfo->has_64bit_types &&
+             (inst->dst.type == BRW_REGISTER_TYPE_DF ||
+              inst->dst.type == BRW_REGISTER_TYPE_UQ ||
+              inst->dst.type == BRW_REGISTER_TYPE_Q)) {
+            assert(inst->dst.type == inst->src[0].type);
+            assert(!inst->saturate);
+            assert(!inst->src[0].abs);
+            assert(!inst->src[0].negate);
+            const brw::fs_builder ibld(this, block, inst);
+
+            if (inst->src[0].file == IMM) {
+               ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+                        brw_imm_ud(inst->src[0].u64 >> 32));
+               ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+                        brw_imm_ud(inst->src[0].u64));
+            } else {
+               ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+                        subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1));
+               ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+                        subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0));
+            }
+
+            inst->remove(block);
+            progress = true;
+         }
+
+         if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
+              inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
+             inst->dst.is_null() &&
+             (inst->src[0].abs || inst->src[0].negate)) {
+            inst->src[0].abs = false;
+            inst->src[0].negate = false;
+            progress = true;
+            break;
+         }
+
          if (inst->src[0].file != IMM)
             break;
 
@@ -2449,16 +2527,16 @@
          break;
 
       case BRW_OPCODE_MUL:
-	 if (inst->src[1].file != IMM)
-	    continue;
+         if (inst->src[1].file != IMM)
+            continue;
 
-	 /* a * 1.0 = a */
-	 if (inst->src[1].is_one()) {
-	    inst->opcode = BRW_OPCODE_MOV;
-	    inst->src[1] = reg_undef;
-	    progress = true;
-	    break;
-	 }
+         /* a * 1.0 = a */
+         if (inst->src[1].is_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
 
          /* a * -1.0 = -a */
          if (inst->src[1].is_negative_one()) {
@@ -2486,7 +2564,7 @@
             progress = true;
             break;
          }
-	 break;
+         break;
       case BRW_OPCODE_ADD:
          if (inst->src[1].file != IMM)
             continue;
@@ -2528,18 +2606,39 @@
          }
          break;
       case BRW_OPCODE_CMP:
-         if (inst->conditional_mod == BRW_CONDITIONAL_GE &&
-             inst->src[0].abs &&
-             inst->src[0].negate &&
-             inst->src[1].is_zero()) {
+         if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
+              inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
+             inst->src[1].is_zero() &&
+             (inst->src[0].abs || inst->src[0].negate)) {
             inst->src[0].abs = false;
             inst->src[0].negate = false;
-            inst->conditional_mod = BRW_CONDITIONAL_Z;
             progress = true;
             break;
          }
          break;
       case BRW_OPCODE_SEL:
+         if (!devinfo->has_64bit_types &&
+             (inst->dst.type == BRW_REGISTER_TYPE_DF ||
+              inst->dst.type == BRW_REGISTER_TYPE_UQ ||
+              inst->dst.type == BRW_REGISTER_TYPE_Q)) {
+            assert(inst->dst.type == inst->src[0].type);
+            assert(!inst->saturate);
+            assert(!inst->src[0].abs && !inst->src[0].negate);
+            assert(!inst->src[1].abs && !inst->src[1].negate);
+            const brw::fs_builder ibld(this, block, inst);
+
+            set_predicate(inst->predicate,
+                          ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0),
+                                   subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0),
+                                   subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)));
+            set_predicate(inst->predicate,
+                          ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1),
+                                   subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1),
+                                   subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1)));
+
+            inst->remove(block);
+            progress = true;
+         }
          if (inst->src[0].equals(inst->src[1])) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->src[1] = reg_undef;
@@ -2817,8 +2916,8 @@
    bool progress = false;
    int depth = 0;
 
-   int remap[alloc.count];
-   memset(remap, -1, sizeof(int) * alloc.count);
+   unsigned remap[alloc.count];
+   memset(remap, ~0u, sizeof(unsigned) * alloc.count);
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) {
@@ -2831,20 +2930,20 @@
       /* Rewrite instruction sources. */
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF &&
-             remap[inst->src[i].nr] != -1 &&
+             remap[inst->src[i].nr] != ~0u &&
              remap[inst->src[i].nr] != inst->src[i].nr) {
             inst->src[i].nr = remap[inst->src[i].nr];
             progress = true;
          }
       }
 
-      const int dst = inst->dst.nr;
+      const unsigned dst = inst->dst.nr;
 
       if (depth == 0 &&
           inst->dst.file == VGRF &&
           alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
           !inst->is_partial_write()) {
-         if (remap[dst] == -1) {
+         if (remap[dst] == ~0u) {
             remap[dst] = dst;
          } else {
             remap[dst] = alloc.allocate(regs_written(inst));
@@ -2852,7 +2951,7 @@
             progress = true;
          }
       } else if (inst->dst.file == VGRF &&
-                 remap[dst] != -1 &&
+                 remap[dst] != ~0u &&
                  remap[dst] != dst) {
          inst->dst.nr = remap[dst];
          progress = true;
@@ -2863,7 +2962,7 @@
       invalidate_live_intervals();
 
       for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
-         if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) {
+         if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != ~0u) {
             delta_xy[i].nr = remap[delta_xy[i].nr];
          }
       }
@@ -3018,6 +3117,7 @@
 
             if (csel_inst != NULL) {
                progress = true;
+               csel_inst->saturate = inst->saturate;
                inst->remove(block);
             }
 
@@ -3357,7 +3457,13 @@
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF) {
          fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
-	 if (prev_inst && inst->equals(prev_inst)) {
+	 if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV &&
+             inst->dst.equals(prev_inst->dst) &&
+             inst->src[0].equals(prev_inst->src[0]) &&
+             inst->saturate == prev_inst->saturate &&
+             inst->predicate == prev_inst->predicate &&
+             inst->conditional_mod == prev_inst->conditional_mod &&
+             inst->exec_size == prev_inst->exec_size) {
 	    inst->remove(block);
 	    progress = true;
 	    continue;
@@ -3551,7 +3657,7 @@
 fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
 {
    int write_len = regs_written(inst);
-   int first_write_grf = inst->dst.nr;
+   unsigned first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
 
@@ -4501,11 +4607,72 @@
    return sampler.file != IMM || sampler.ud >= 16;
 }
 
+static unsigned
+sampler_msg_type(const gen_device_info *devinfo,
+                 opcode opcode, bool shadow_compare)
+{
+   assert(devinfo->gen >= 5);
+   switch (opcode) {
+   case SHADER_OPCODE_TEX:
+      return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE :
+                              GEN5_SAMPLER_MESSAGE_SAMPLE;
+   case FS_OPCODE_TXB:
+      return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE :
+                              GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
+   case SHADER_OPCODE_TXL:
+      return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE :
+                              GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
+   case SHADER_OPCODE_TXL_LZ:
+      return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
+                              GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
+   case SHADER_OPCODE_TXS:
+   case SHADER_OPCODE_IMAGE_SIZE:
+      return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+   case SHADER_OPCODE_TXD:
+      assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell);
+      return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE :
+                              GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
+   case SHADER_OPCODE_TXF:
+      return GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+   case SHADER_OPCODE_TXF_LZ:
+      assert(devinfo->gen >= 9);
+      return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
+   case SHADER_OPCODE_TXF_CMS_W:
+      assert(devinfo->gen >= 9);
+      return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+   case SHADER_OPCODE_TXF_CMS:
+      return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS :
+                                 GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+   case SHADER_OPCODE_TXF_UMS:
+      assert(devinfo->gen >= 7);
+      return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
+   case SHADER_OPCODE_TXF_MCS:
+      assert(devinfo->gen >= 7);
+      return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
+   case SHADER_OPCODE_LOD:
+      return GEN5_SAMPLER_MESSAGE_LOD;
+   case SHADER_OPCODE_TG4:
+      assert(devinfo->gen >= 7);
+      return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C :
+                              GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
+      break;
+   case SHADER_OPCODE_TG4_OFFSET:
+      assert(devinfo->gen >= 7);
+      return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C :
+                              GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
+   case SHADER_OPCODE_SAMPLEINFO:
+      return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
+   default:
+      unreachable("not reached");
+   }
+}
+
 static void
 lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
                                 const fs_reg &coordinate,
                                 const fs_reg &shadow_c,
                                 fs_reg lod, const fs_reg &lod2,
+                                const fs_reg &min_lod,
                                 const fs_reg &sample_index,
                                 const fs_reg &mcs,
                                 const fs_reg &surface,
@@ -4515,6 +4682,7 @@
                                 unsigned grad_components)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
+   const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data;
    unsigned reg_width = bld.dispatch_width() / 8;
    unsigned header_size = 0, length = 0;
    fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
@@ -4625,6 +4793,11 @@
       bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
       length++;
       break;
+   case SHADER_OPCODE_IMAGE_SIZE:
+      /* We need an LOD; just use 0 */
+      bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+      length++;
+      break;
    case SHADER_OPCODE_TXF:
       /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
        * On Gen9 they are u, v, lod, r
@@ -4716,7 +4889,16 @@
          bld.MOV(sources[length++], offset(coordinate, bld, i));
    }
 
-   int mlen;
+   if (min_lod.file != BAD_FILE) {
+      /* Account for all of the missing coordinate sources */
+      length += 4 - coord_components;
+      if (op == SHADER_OPCODE_TXD)
+         length += (3 - grad_components) * 2;
+
+      bld.MOV(sources[length++], min_lod);
+   }
+
+   unsigned mlen;
    if (reg_width == 2)
       mlen = length * reg_width - header_size;
    else
@@ -4727,14 +4909,81 @@
    bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
 
    /* Generate the SEND. */
-   inst->opcode = op;
-   inst->src[0] = src_payload;
-   inst->src[1] = surface;
-   inst->src[2] = sampler;
-   inst->resize_sources(3);
+   inst->opcode = SHADER_OPCODE_SEND;
    inst->mlen = mlen;
    inst->header_size = header_size;
 
+   const unsigned msg_type =
+      sampler_msg_type(devinfo, op, inst->shadow_compare);
+   const unsigned simd_mode =
+      inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
+                             BRW_SAMPLER_SIMD_MODE_SIMD16;
+
+   uint32_t base_binding_table_index;
+   switch (op) {
+   case SHADER_OPCODE_TG4:
+   case SHADER_OPCODE_TG4_OFFSET:
+      base_binding_table_index = prog_data->binding_table.gather_texture_start;
+      break;
+   case SHADER_OPCODE_IMAGE_SIZE:
+      base_binding_table_index = prog_data->binding_table.image_start;
+      break;
+   default:
+      base_binding_table_index = prog_data->binding_table.texture_start;
+      break;
+   }
+
+   inst->sfid = BRW_SFID_SAMPLER;
+   if (surface.file == IMM && sampler.file == IMM) {
+      inst->desc = brw_sampler_desc(devinfo,
+                                    surface.ud + base_binding_table_index,
+                                    sampler.ud % 16,
+                                    msg_type,
+                                    simd_mode,
+                                    0 /* return_format unused on gen7+ */);
+      inst->src[0] = brw_imm_ud(0);
+   } else {
+      /* Immediate portion of the descriptor */
+      inst->desc = brw_sampler_desc(devinfo,
+                                    0, /* surface */
+                                    0, /* sampler */
+                                    msg_type,
+                                    simd_mode,
+                                    0 /* return_format unused on gen7+ */);
+      const fs_builder ubld = bld.group(1, 0).exec_all();
+      fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+      if (surface.equals(sampler)) {
+         /* This case is common in GL */
+         ubld.MUL(desc, surface, brw_imm_ud(0x101));
+      } else {
+         if (sampler.file == IMM) {
+            ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
+         } else {
+            ubld.SHL(desc, sampler, brw_imm_ud(8));
+            ubld.OR(desc, desc, surface);
+         }
+      }
+      if (base_binding_table_index)
+         ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index));
+      ubld.AND(desc, desc, brw_imm_ud(0xfff));
+
+      inst->src[0] = component(desc, 0);
+   }
+   inst->src[1] = brw_imm_ud(0); /* ex_desc */
+
+   inst->src[2] = src_payload;
+   inst->resize_sources(3);
+
+   if (inst->eot) {
+      /* EOT sampler messages don't make sense to split because it would
+       * involve ending half of the thread early.
+       */
+      assert(inst->group == 0);
+      /* We need to use SENDC for EOT sampler messages */
+      inst->check_tdr = true;
+      inst->send_has_side_effects = true;
+   }
+
    /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
    assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
 }
@@ -4747,6 +4996,7 @@
    const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
    const fs_reg &lod = inst->src[TEX_LOGICAL_SRC_LOD];
    const fs_reg &lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
+   const fs_reg &min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];
    const fs_reg &sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
    const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];
    const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
@@ -4759,7 +5009,8 @@
 
    if (devinfo->gen >= 7) {
       lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
-                                      shadow_c, lod, lod2, sample_index,
+                                      shadow_c, lod, lod2, min_lod,
+                                      sample_index,
                                       mcs, surface, sampler, tg4_offset,
                                       coord_components, grad_components);
    } else if (devinfo->gen >= 5) {
@@ -4790,8 +5041,7 @@
 }
 
 static void
-lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
-                           const fs_reg &sample_mask)
+lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
 {
    const gen_device_info *devinfo = bld.shader->devinfo;
 
@@ -4801,10 +5051,17 @@
    const fs_reg &surface = inst->src[2];
    const UNUSED fs_reg &dims = inst->src[3];
    const fs_reg &arg = inst->src[4];
+   assert(arg.file == IMM);
 
    /* Calculate the total number of components of the payload. */
    const unsigned addr_sz = inst->components_read(0);
    const unsigned src_sz = inst->components_read(1);
+
+   const bool is_typed_access =
+      inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
+      inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||
+      inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;
+
    /* From the BDW PRM Volume 7, page 147:
     *
     *  "For the Data Cache Data Port*, the header must be present for the
@@ -4815,28 +5072,43 @@
     * messages prior to Gen9, since we have to provide a header anyway.  On
     * Gen11+ the header has been removed so we can only use predication.
     */
-   const unsigned header_sz = devinfo->gen < 9 &&
-                              (op == SHADER_OPCODE_TYPED_SURFACE_READ ||
-                               op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
-                               op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
-   const unsigned sz = header_sz + addr_sz + src_sz;
-
-   /* Allocate space for the payload. */
-   fs_reg *const components = new fs_reg[sz];
-   const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
-   unsigned n = 0;
-
-   /* Construct the payload. */
-   if (header_sz)
-      components[n++] = emit_surface_header(bld, sample_mask);
+   const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
 
-   for (unsigned i = 0; i < addr_sz; i++)
-      components[n++] = offset(addr, bld, i);
+   const bool has_side_effects = inst->has_side_effects();
+   fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
+                                           fs_reg(brw_imm_d(0xffff));
+
+   fs_reg payload, payload2;
+   unsigned mlen, ex_mlen = 0;
+   if (devinfo->gen >= 9) {
+      /* We have split sends on gen9 and above */
+      assert(header_sz == 0);
+      payload = bld.move_to_vgrf(addr, addr_sz);
+      payload2 = bld.move_to_vgrf(src, src_sz);
+      mlen = addr_sz * (inst->exec_size / 8);
+      ex_mlen = src_sz * (inst->exec_size / 8);
+   } else {
+      /* Allocate space for the payload. */
+      const unsigned sz = header_sz + addr_sz + src_sz;
+      payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+      fs_reg *const components = new fs_reg[sz];
+      unsigned n = 0;
+
+      /* Construct the payload. */
+      if (header_sz)
+         components[n++] = emit_surface_header(bld, sample_mask);
+
+      for (unsigned i = 0; i < addr_sz; i++)
+         components[n++] = offset(addr, bld, i);
 
-   for (unsigned i = 0; i < src_sz; i++)
-      components[n++] = offset(src, bld, i);
+      for (unsigned i = 0; i < src_sz; i++)
+         components[n++] = offset(src, bld, i);
 
-   bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+      bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+      mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+
+      delete[] components;
+   }
 
    /* Predicate the instruction on the sample mask if no header is
     * provided.
@@ -4864,17 +5136,128 @@
       }
    }
 
+   uint32_t sfid;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      /* Byte scattered opcodes go through the normal data cache */
+      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+      break;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      /* Untyped Surface messages go through the data cache but the SFID value
+       * changed on Haswell.
+       */
+      sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+              HSW_SFID_DATAPORT_DATA_CACHE_1 :
+              GEN7_SFID_DATAPORT_DATA_CACHE);
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      /* Typed surface messages go through the render cache on IVB and the
+       * data cache on HSW+.
+       */
+      sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+              HSW_SFID_DATAPORT_DATA_CACHE_1 :
+              GEN6_SFID_DATAPORT_RENDER_CACHE);
+      break;
+
+   default:
+      unreachable("Unsupported surface opcode");
+   }
+
+   uint32_t desc;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+      desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+                                            arg.ud, /* num_channels */
+                                            false   /* write */);
+      break;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+      desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+                                            arg.ud, /* num_channels */
+                                            true    /* write */);
+      break;
+
+   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+                                           arg.ud, /* bit_size */
+                                           false   /* write */);
+      break;
+
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+      desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+                                           arg.ud, /* bit_size */
+                                           true    /* write */);
+      break;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+      desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
+                                        arg.ud, /* atomic_op */
+                                        !inst->dst.is_null());
+      break;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
+                                              arg.ud, /* atomic_op */
+                                              !inst->dst.is_null());
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+      desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+                                          arg.ud, /* num_channels */
+                                          false   /* write */);
+      break;
+
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
+                                          arg.ud, /* num_channels */
+                                          true    /* write */);
+      break;
+
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
+                                      arg.ud, /* atomic_op */
+                                      !inst->dst.is_null());
+      break;
+
+   default:
+      unreachable("Unknown surface logical instruction");
+   }
+
    /* Update the original instruction. */
-   inst->opcode = op;
-   inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+   inst->opcode = SHADER_OPCODE_SEND;
+   inst->mlen = mlen;
+   inst->ex_mlen = ex_mlen;
    inst->header_size = header_sz;
+   inst->send_has_side_effects = has_side_effects;
+   inst->send_is_volatile = !has_side_effects;
 
-   inst->src[0] = payload;
-   inst->src[1] = surface;
-   inst->src[2] = arg;
-   inst->resize_sources(3);
+   /* Set up SFID and descriptors */
+   inst->sfid = sfid;
+   inst->desc = desc;
+   if (surface.file == IMM) {
+      inst->desc |= surface.ud & 0xff;
+      inst->src[0] = brw_imm_ud(0);
+   } else {
+      const fs_builder ubld = bld.exec_all().group(1, 0);
+      fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+      ubld.AND(tmp, surface, brw_imm_ud(0xff));
+      inst->src[0] = component(tmp, 0);
+   }
+   inst->src[1] = brw_imm_ud(0); /* ex_desc */
+
+   /* Finally, the payload */
+   inst->src[2] = payload;
+   inst->src[3] = payload2;
 
-   delete[] components;
+   inst->resize_sources(4);
 }
 
 static void
@@ -4883,16 +5266,37 @@
    const gen_device_info *devinfo = bld.shader->devinfo;
 
    if (devinfo->gen >= 7) {
+      fs_reg index = inst->src[0];
       /* We are switching the instruction from an ALU-like instruction to a
        * send-from-grf instruction.  Since sends can't handle strides or
        * source modifiers, we have to make a copy of the offset source.
        */
-      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
-      bld.MOV(tmp, inst->src[1]);
-      inst->src[1] = tmp;
+      fs_reg offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
+      bld.MOV(offset, inst->src[1]);
+
+      const unsigned simd_mode =
+         inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
+                                BRW_SAMPLER_SIMD_MODE_SIMD16;
 
-      inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+      inst->opcode = SHADER_OPCODE_SEND;
       inst->mlen = inst->exec_size / 8;
+      inst->resize_sources(3);
+
+      inst->sfid = BRW_SFID_SAMPLER;
+      inst->desc = brw_sampler_desc(devinfo, 0, 0,
+                                    GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+                                    simd_mode, 0);
+      if (index.file == IMM) {
+         inst->desc |= index.ud & 0xff;
+         inst->src[0] = brw_imm_ud(0);
+      } else {
+         const fs_builder ubld = bld.exec_all().group(1, 0);
+         fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+         ubld.AND(tmp, index, brw_imm_ud(0xff));
+         inst->src[0] = component(tmp, 0);
+      }
+      inst->src[1] = brw_imm_ud(0); /* ex_desc */
+      inst->src[2] = offset; /* payload */
    } else {
       const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen),
                            BRW_REGISTER_TYPE_UD);
@@ -4978,6 +5382,10 @@
          lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS);
          break;
 
+      case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_IMAGE_SIZE);
+         break;
+
       case FS_OPCODE_TXB_LOGICAL:
          lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB);
          break;
@@ -5015,57 +5423,15 @@
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_SURFACE_READ,
-                                    fs_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_BYTE_SCATTERED_READ,
-                                    fs_reg());
-         break;
-
       case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_BYTE_SCATTERED_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_ATOMIC,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_SURFACE_READ,
-                                    brw_imm_d(0xffff));
-         break;
-
       case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_SURFACE_WRITE,
-                                    ibld.sample_mask_reg());
-         break;
-
       case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
-         lower_surface_logical_send(ibld, inst,
-                                    SHADER_OPCODE_TYPED_ATOMIC,
-                                    ibld.sample_mask_reg());
+         lower_surface_logical_send(ibld, inst);
          break;
 
       case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
@@ -5296,6 +5662,14 @@
 get_sampler_lowered_simd_width(const struct gen_device_info *devinfo,
                                const fs_inst *inst)
 {
+   /* If we have a min_lod parameter on anything other than a simple sample
+    * message, it will push it over 5 arguments and we have to fall back to
+    * SIMD8.
+    */
+   if (inst->opcode != SHADER_OPCODE_TEX &&
+       inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
+      return 8;
+
    /* Calculate the number of coordinate components that have to be present
     * assuming that additional arguments follow the texel coordinates in the
     * message payload.  On IVB+ there is no need for padding, on ILK-SNB we
@@ -5444,10 +5818,7 @@
    case FS_OPCODE_DDX_FINE:
    case FS_OPCODE_DDY_COARSE:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
-   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
-   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
@@ -5967,6 +6338,10 @@
       fprintf(file, "(mlen: %d) ", inst->mlen);
    }
 
+   if (inst->ex_mlen) {
+      fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen);
+   }
+
    if (inst->eot) {
       fprintf(file, "(EOT) ");
    }
@@ -6078,6 +6453,11 @@
                     brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
                     brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
             break;
+         case BRW_REGISTER_TYPE_V:
+         case BRW_REGISTER_TYPE_UV:
+            fprintf(file, "%08x%s", inst->src[i].ud,
+                    inst->src[i].type == BRW_REGISTER_TYPE_V ? "V" : "UV");
+            break;
          default:
             fprintf(file, "???");
             break;
@@ -6444,18 +6824,68 @@
       OPT(dead_code_eliminate);
    }
 
-   if (OPT(lower_conversions)) {
+   if (OPT(lower_regioning)) {
       OPT(opt_copy_propagation);
       OPT(dead_code_eliminate);
       OPT(lower_simd_width);
    }
 
+   OPT(fixup_sends_duplicate_payload);
+
    lower_uniform_pull_constant_loads();
 
    validate();
 }
 
 /**
+ * From the Skylake PRM Vol. 2a docs for sends:
+ *
+ *    "It is required that the second block of GRFs does not overlap with the
+ *    first block."
+ *
+ * There are plenty of cases where we may accidentally violate this due to
+ * having, for instance, both sources be the constant 0.  This little pass
+ * just adds a new vgrf for the second payload and copies it over.
+ */
+bool
+fs_visitor::fixup_sends_duplicate_payload()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+      if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+          regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
+                          inst->src[3], inst->ex_mlen * REG_SIZE)) {
+         fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),
+                             BRW_REGISTER_TYPE_UD);
+         /* Sadly, we've lost all notion of channels and bit sizes at this
+          * point.  Just WE_all it.
+          */
+         const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+         fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
+         fs_reg copy_dst = tmp;
+         for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
+            if (inst->ex_mlen == i + 1) {
+               /* Only one register left; do SIMD8 */
+               ibld.group(8, 0).MOV(copy_dst, copy_src);
+            } else {
+               ibld.MOV(copy_dst, copy_src);
+            }
+            copy_src = offset(copy_src, ibld, 1);
+            copy_dst = offset(copy_dst, ibld, 1);
+         }
+         inst->src[3] = tmp;
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
+/**
  * Three source instruction must have a GRF/MRF destination register.
  * ARF NULL is not allowed.  Fix that up by allocating a temporary GRF.
  */
@@ -7161,7 +7591,7 @@
                void *mem_ctx,
                const struct brw_wm_prog_key *key,
                struct brw_wm_prog_data *prog_data,
-               const nir_shader *src_shader,
+               nir_shader *shader,
                struct gl_program *prog,
                int shader_time_index8, int shader_time_index16,
                int shader_time_index32, bool allow_spilling,
@@ -7170,7 +7600,6 @@
 {
    const struct gen_device_info *devinfo = compiler->devinfo;
 
-   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
    brw_nir_lower_fs_inputs(shader, devinfo, key);
    brw_nir_lower_fs_outputs(shader);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_cse.cpp mesa-19.0.1/src/intel/compiler/brw_fs_cse.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_cse.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_cse.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -74,7 +74,6 @@
    case FS_OPCODE_FB_READ_LOGICAL:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
    case SHADER_OPCODE_BROADCAST:
@@ -184,8 +183,13 @@
           a->dst.type == b->dst.type &&
           a->offset == b->offset &&
           a->mlen == b->mlen &&
+          a->ex_mlen == b->ex_mlen &&
+          a->sfid == b->sfid &&
+          a->desc == b->desc &&
           a->size_written == b->size_written &&
           a->base_mrf == b->base_mrf &&
+          a->check_tdr == b->check_tdr &&
+          a->send_has_side_effects == b->send_has_side_effects &&
           a->eot == b->eot &&
           a->header_size == b->header_size &&
           a->shadow_compare == b->shadow_compare &&
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_generator.cpp mesa-19.0.1/src/intel/compiler/brw_fs_generator.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_generator.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_generator.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -90,9 +90,16 @@
           *       different execution size when the number of components
           *       written to each destination GRF is not the same.
           */
-         const unsigned width = MIN2(reg_width, phys_width);
-         brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
-         brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+         if (reg->stride > 4) {
+            assert(reg != &inst->dst);
+            assert(reg->stride * type_sz(reg->type) <= REG_SIZE);
+            brw_reg = brw_vecn_reg(1, brw_file_from_reg(reg), reg->nr, 0);
+            brw_reg = stride(brw_reg, reg->stride, 1, 0);
+         } else {
+            const unsigned width = MIN2(reg_width, phys_width);
+            brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
+            brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+         }
 
          if (devinfo->gen == 7 && !devinfo->is_haswell) {
             /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13):
@@ -251,6 +258,40 @@
 }
 
 void
+fs_generator::generate_send(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg desc,
+                            struct brw_reg ex_desc,
+                            struct brw_reg payload,
+                            struct brw_reg payload2)
+{
+   const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+                            dst.nr == BRW_ARF_NULL;
+   const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
+
+   uint32_t desc_imm = inst->desc |
+      brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
+
+   uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
+
+   if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
+      /* If we have any sort of extended descriptor, then we need SENDS.  This
+       * also covers the dual-payload case because ex_mlen goes in ex_desc.
+       */
+      brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
+                                      desc, desc_imm, ex_desc, ex_desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
+   } else {
+      brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
+      if (inst->check_tdr)
+         brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
+   }
+
+   brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
+}
+
+void
 fs_generator::fire_fb_write(fs_inst *inst,
                             struct brw_reg payload,
                             struct brw_reg implied_header,
@@ -315,8 +356,6 @@
 
    if (devinfo->gen >= 6)
       brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16);
-
-   brw_mark_surface_used(&prog_data->base, surf_index);
 }
 
 void
@@ -373,8 +412,6 @@
    gen9_fb_READ(p, dst, payload, surf_index,
                 inst->header_size, inst->size_written / REG_SIZE,
                 prog_data->persample_dispatch);
-
-   brw_mark_surface_used(&prog_data->base, surf_index);
 }
 
 void
@@ -440,7 +477,8 @@
 
       if (type_sz(reg.type) > 4 &&
           ((devinfo->gen == 7 && !devinfo->is_haswell) ||
-           devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+           devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
+           !devinfo->has_64bit_types)) {
          /* IVB has an issue (which we found empirically) where it reads two
           * address register components per channel for indirectly addressed
           * 64-bit sources.
@@ -938,15 +976,14 @@
               inst->header_size > 0,
               simd_mode,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
-
-   brw_mark_surface_used(prog_data, surf_index.ud);
 }
 
 void
-fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
                            struct brw_reg surface_index,
                            struct brw_reg sampler_index)
 {
+   assert(devinfo->gen < 7);
    assert(inst->size_written % REG_SIZE == 0);
    int msg_type = -1;
    uint32_t simd_mode;
@@ -1015,71 +1052,26 @@
 	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
 	 }
 	 break;
-      case SHADER_OPCODE_TXL_LZ:
-         assert(devinfo->gen >= 9);
-	 if (inst->shadow_compare) {
-            msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ;
-         } else {
-            msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
-         }
-         break;
       case SHADER_OPCODE_TXS:
-      case SHADER_OPCODE_IMAGE_SIZE:
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
 	 break;
       case SHADER_OPCODE_TXD:
-         if (inst->shadow_compare) {
-            /* Gen7.5+.  Otherwise, lowered in NIR */
-            assert(devinfo->gen >= 8 || devinfo->is_haswell);
-            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
-         } else {
-            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
-         }
+         assert(!inst->shadow_compare);
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
 	 break;
       case SHADER_OPCODE_TXF:
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 	 break;
-      case SHADER_OPCODE_TXF_LZ:
-         assert(devinfo->gen >= 9);
-         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ;
-         break;
-      case SHADER_OPCODE_TXF_CMS_W:
-         assert(devinfo->gen >= 9);
-         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
-         break;
       case SHADER_OPCODE_TXF_CMS:
-         if (devinfo->gen >= 7)
-            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
-         else
-            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
-         break;
-      case SHADER_OPCODE_TXF_UMS:
-         assert(devinfo->gen >= 7);
-         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
-         break;
-      case SHADER_OPCODE_TXF_MCS:
-         assert(devinfo->gen >= 7);
-         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
          break;
       case SHADER_OPCODE_LOD:
          msg_type = GEN5_SAMPLER_MESSAGE_LOD;
          break;
       case SHADER_OPCODE_TG4:
-         if (inst->shadow_compare) {
-            assert(devinfo->gen >= 7);
-            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
-         } else {
-            assert(devinfo->gen >= 6);
-            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
-         }
-         break;
-      case SHADER_OPCODE_TG4_OFFSET:
-         assert(devinfo->gen >= 7);
-         if (inst->shadow_compare) {
-            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
-         } else {
-            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
-         }
+         assert(devinfo->gen == 6);
+         assert(!inst->shadow_compare);
+         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
          break;
       case SHADER_OPCODE_SAMPLEINFO:
          msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
@@ -1158,16 +1150,14 @@
       dst = vec16(dst);
    }
 
-   assert(devinfo->gen < 7 || inst->header_size == 0 ||
-          src.file == BRW_GENERAL_REGISTER_FILE);
-
    assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
 
    /* Load the message header if present.  If there's a texture offset,
     * we need to set it up explicitly and load the offset bitfield.
     * Otherwise, we can use an implied move from g0 to the first message reg.
     */
-   if (inst->header_size != 0 && devinfo->gen < 7) {
+   struct brw_reg src = brw_null_reg();
+   if (inst->header_size != 0) {
       if (devinfo->gen < 6 && !inst->offset) {
          /* Set up an implied move from g0 to the MRF. */
          src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1196,85 +1186,28 @@
    uint32_t base_binding_table_index;
    switch (inst->opcode) {
    case SHADER_OPCODE_TG4:
-   case SHADER_OPCODE_TG4_OFFSET:
       base_binding_table_index = prog_data->binding_table.gather_texture_start;
       break;
-   case SHADER_OPCODE_IMAGE_SIZE:
-      base_binding_table_index = prog_data->binding_table.image_start;
-      break;
    default:
       base_binding_table_index = prog_data->binding_table.texture_start;
       break;
    }
 
-   if (surface_index.file == BRW_IMMEDIATE_VALUE &&
-       sampler_index.file == BRW_IMMEDIATE_VALUE) {
-      uint32_t surface = surface_index.ud;
-      uint32_t sampler = sampler_index.ud;
-
-      brw_SAMPLE(p,
-                 retype(dst, BRW_REGISTER_TYPE_UW),
-                 inst->base_mrf,
-                 src,
-                 surface + base_binding_table_index,
-                 sampler % 16,
-                 msg_type,
-                 inst->size_written / REG_SIZE,
-                 inst->mlen,
-                 inst->header_size != 0,
-                 simd_mode,
-                 return_format);
+   assert(surface_index.file == BRW_IMMEDIATE_VALUE);
+   assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
 
-      brw_mark_surface_used(prog_data, surface + base_binding_table_index);
-   } else {
-      /* Non-const sampler index */
-
-      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
-      struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
-      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
-
-      brw_push_insn_state(p);
-      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_default_access_mode(p, BRW_ALIGN_1);
-      brw_set_default_exec_size(p, BRW_EXECUTE_1);
-
-      if (brw_regs_equal(&surface_reg, &sampler_reg)) {
-         brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
-      } else {
-         if (sampler_reg.file == BRW_IMMEDIATE_VALUE) {
-            brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8));
-         } else {
-            brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
-            brw_OR(p, addr, addr, surface_reg);
-         }
-      }
-      if (base_binding_table_index)
-         brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
-      brw_AND(p, addr, addr, brw_imm_ud(0xfff));
-
-      brw_pop_insn_state(p);
-
-      /* dst = send(offset, a0.0 | <descriptor>) */
-      brw_send_indirect_message(
-         p, BRW_SFID_SAMPLER, dst, src, addr,
-         brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE,
-                          inst->header_size) |
-         brw_sampler_desc(devinfo,
-                          0 /* surface */,
-                          0 /* sampler */,
-                          msg_type,
-                          simd_mode,
-                          return_format));
-
-      /* visitor knows more than we do about the surface limit required,
-       * so has already done marking.
-       */
-   }
-
-   if (is_combined_send) {
-      brw_inst_set_eot(p->devinfo, brw_last_inst, true);
-      brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
-   }
+   brw_SAMPLE(p,
+              retype(dst, BRW_REGISTER_TYPE_UW),
+              inst->base_mrf,
+              src,
+              surface_index.ud + base_binding_table_index,
+              sampler_index.ud % 16,
+              msg_type,
+              inst->size_written / REG_SIZE,
+              inst->mlen,
+              inst->header_size != 0,
+              simd_mode,
+              return_format);
 }
 
 
@@ -1596,75 +1529,6 @@
 }
 
 void
-fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
-                                                       struct brw_reg dst,
-                                                       struct brw_reg index,
-                                                       struct brw_reg offset)
-{
-   assert(devinfo->gen >= 7);
-   /* Varying-offset pull constant loads are treated as a normal expression on
-    * gen7, so the fact that it's a send message is hidden at the IR level.
-    */
-   assert(inst->header_size == 0);
-   assert(inst->mlen);
-   assert(index.type == BRW_REGISTER_TYPE_UD);
-
-   uint32_t simd_mode, rlen;
-   if (inst->exec_size == 16) {
-      rlen = 8;
-      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
-   } else {
-      assert(inst->exec_size == 8);
-      rlen = 4;
-      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
-   }
-
-   if (index.file == BRW_IMMEDIATE_VALUE) {
-
-      uint32_t surf_index = index.ud;
-
-      brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-      brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER);
-      brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
-      brw_set_src0(p, send, offset);
-      brw_set_desc(p, send,
-                   brw_message_desc(devinfo, inst->mlen, rlen, false) |
-                   brw_sampler_desc(devinfo, surf_index,
-                                    0, /* LD message ignores sampler unit */
-                                    GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
-                                    simd_mode, 0));
-
-   } else {
-
-      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
-
-      brw_push_insn_state(p);
-      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-      /* a0.0 = surf_index & 0xff */
-      brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
-      brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
-      brw_set_dest(p, insn_and, addr);
-      brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD)));
-      brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
-
-      brw_pop_insn_state(p);
-
-      /* dst = send(offset, a0.0 | <descriptor>) */
-      brw_send_indirect_message(
-         p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW),
-         offset, addr,
-         brw_message_desc(devinfo, inst->mlen, rlen, false) |
-         brw_sampler_desc(devinfo,
-                          0 /* surface */,
-                          0 /* sampler */,
-                          GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
-                          simd_mode,
-                          0));
-   }
-}
-
-void
 fs_generator::generate_pixel_interpolator_query(fs_inst *inst,
                                                 struct brw_reg dst,
                                                 struct brw_reg src,
@@ -1756,35 +1620,6 @@
 }
 
 void
-fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
-                                              struct brw_reg dst,
-                                              struct brw_reg src)
-{
-   assert(devinfo->gen >= 7);
-   assert(dst.type == BRW_REGISTER_TYPE_F);
-   assert(src.type == BRW_REGISTER_TYPE_UD);
-
-   /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
-    *
-    *   Because this instruction does not have a 16-bit floating-point type,
-    *   the source data type must be Word (W). The destination type must be
-    *   F (Float).
-    */
-   struct brw_reg src_w = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
-
-   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
-    * For the Y case, we wish to access only the upper word; therefore
-    * a 16-bit subregister offset is needed.
-    */
-   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
-          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
-   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
-      src_w.subnr += 2;
-
-   brw_F16TO32(p, dst, src_w);
-}
-
-void
 fs_generator::generate_shader_time_add(fs_inst *,
                                        struct brw_reg payload,
                                        struct brw_reg offset,
@@ -1818,9 +1653,6 @@
    brw_shader_time_add(p, payload,
                        prog_data->binding_table.shader_time_start);
    brw_pop_insn_state(p);
-
-   brw_mark_surface_used(prog_data,
-                         prog_data->binding_table.shader_time_start);
 }
 
 void
@@ -1846,7 +1678,7 @@
    struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
 
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
-      struct brw_reg src[3], dst;
+      struct brw_reg src[4], dst;
       unsigned int last_insn_offset = p->next_insn_offset;
       bool multiple_instructions_emitted = false;
 
@@ -2169,6 +2001,12 @@
          src[0].subnr = 4 * type_sz(src[0].type);
          brw_MOV(p, dst, stride(src[0], 8, 4, 1));
          break;
+
+      case SHADER_OPCODE_SEND:
+         generate_send(inst, dst, src[0], src[1], src[2],
+                       inst->ex_mlen > 0 ? src[3] : brw_null_reg());
+         break;
+
       case SHADER_OPCODE_GET_BUFFER_SIZE:
          generate_get_buffer_size(inst, dst, src[0], src[1]);
          break;
@@ -2176,23 +2014,14 @@
       case FS_OPCODE_TXB:
       case SHADER_OPCODE_TXD:
       case SHADER_OPCODE_TXF:
-      case SHADER_OPCODE_TXF_LZ:
       case SHADER_OPCODE_TXF_CMS:
-      case SHADER_OPCODE_TXF_CMS_W:
-      case SHADER_OPCODE_TXF_UMS:
-      case SHADER_OPCODE_TXF_MCS:
       case SHADER_OPCODE_TXL:
-      case SHADER_OPCODE_TXL_LZ:
       case SHADER_OPCODE_TXS:
       case SHADER_OPCODE_LOD:
       case SHADER_OPCODE_TG4:
-      case SHADER_OPCODE_TG4_OFFSET:
       case SHADER_OPCODE_SAMPLEINFO:
-	 generate_tex(inst, dst, src[0], src[1], src[2]);
-	 break;
-
-      case SHADER_OPCODE_IMAGE_SIZE:
-         generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
+         assert(inst->src[0].file == BAD_FILE);
+         generate_tex(inst, dst, src[1], src[2]);
          break;
 
       case FS_OPCODE_DDX_COARSE:
@@ -2249,10 +2078,6 @@
 	 generate_varying_pull_constant_load_gen4(inst, dst, src[0]);
 	 break;
 
-      case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
-	 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
-	 break;
-
       case FS_OPCODE_REP_FB_WRITE:
       case FS_OPCODE_FB_WRITE:
 	 generate_fb_write(inst, src[0]);
@@ -2270,73 +2095,12 @@
          generate_shader_time_add(inst, src[0], src[1], src[2]);
          break;
 
-      case SHADER_OPCODE_UNTYPED_ATOMIC:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
-                            inst->mlen, !inst->dst.is_null(),
-                            inst->header_size);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
-                                  inst->mlen, !inst->dst.is_null(),
-                                  inst->header_size);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_SURFACE_READ:
-         assert(!inst->header_size);
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_read(p, dst, src[0], src[1],
-                                  inst->mlen, src[2].ud);
-         break;
-
-      case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_surface_write(p, src[0], src[1],
-                                   inst->mlen, src[2].ud,
-                                   inst->header_size);
-         break;
-
-      case SHADER_OPCODE_BYTE_SCATTERED_READ:
-         assert(!inst->header_size);
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_byte_scattered_read(p, dst, src[0], src[1],
-                                 inst->mlen, src[2].ud);
-         break;
-
-      case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_byte_scattered_write(p, src[0], src[1],
-                                  inst->mlen, src[2].ud,
-                                  inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_ATOMIC:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_atomic(p, dst, src[0], src[1],
-                          src[2].ud, inst->mlen, !inst->dst.is_null(),
-                          inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_SURFACE_READ:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_read(p, dst, src[0], src[1],
-                                inst->mlen, src[2].ud,
-                                inst->header_size);
-         break;
-
-      case SHADER_OPCODE_TYPED_SURFACE_WRITE:
-         assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
-                                 inst->header_size);
-         break;
-
       case SHADER_OPCODE_MEMORY_FENCE:
          brw_memory_fence(p, dst, BRW_OPCODE_SEND);
          break;
 
       case SHADER_OPCODE_INTERLOCK:
+         assert(devinfo->gen >= 9);
          /* The interlock is basically a memory fence issued via sendc */
          brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
          break;
@@ -2421,11 +2185,6 @@
           generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
           break;
 
-      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
-      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-         generate_unpack_half_2x16_split(inst, dst, src[0]);
-         break;
-
       case FS_OPCODE_PLACEHOLDER_HALT:
          /* This is the place where the final HALT needs to be inserted if
           * we've emitted any discards.  If not, this will emit no code.
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs.h mesa-19.0.1/src/intel/compiler/brw_fs.h
--- mesa-18.3.3/src/intel/compiler/brw_fs.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs.h	2019-03-31 23:16:37.000000000 +0000
@@ -103,6 +103,7 @@
    void setup_vs_payload();
    void setup_gs_payload();
    void setup_cs_payload();
+   bool fixup_sends_duplicate_payload();
    void fixup_3src_null_dest();
    void assign_curb_setup();
    void calculate_urb_setup();
@@ -119,7 +120,7 @@
    void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
                                    int first_payload_node);
    int choose_spill_reg(struct ra_graph *g);
-   void spill_reg(int spill_reg);
+   void spill_reg(unsigned spill_reg);
    void split_virtual_grfs();
    bool compact_virtual_grfs();
    void assign_constant_locations();
@@ -164,7 +165,7 @@
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
    bool lower_pack();
-   bool lower_conversions();
+   bool lower_regioning();
    bool lower_logical_sends();
    bool lower_integer_multiplication();
    bool lower_minmax();
@@ -218,6 +219,8 @@
                               nir_intrinsic_instr *instr);
    fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
                                         nir_intrinsic_instr *instr);
+   fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
+                                       nir_intrinsic_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
@@ -404,6 +407,12 @@
                       struct brw_reg payload,
                       struct brw_reg implied_header,
                       GLuint nr);
+   void generate_send(fs_inst *inst,
+                      struct brw_reg dst,
+                      struct brw_reg desc,
+                      struct brw_reg ex_desc,
+                      struct brw_reg payload,
+                      struct brw_reg payload2);
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
                          struct brw_reg payload);
@@ -413,7 +422,7 @@
    void generate_barrier(fs_inst *inst, struct brw_reg src);
    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
 			 struct brw_reg *src);
-   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+   void generate_tex(fs_inst *inst, struct brw_reg dst,
                      struct brw_reg surface_index,
                      struct brw_reg sampler_index);
    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
@@ -436,10 +445,6 @@
    void generate_varying_pull_constant_load_gen4(fs_inst *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg index);
-   void generate_varying_pull_constant_load_gen7(fs_inst *inst,
-                                                 struct brw_reg dst,
-                                                 struct brw_reg index,
-                                                 struct brw_reg offset);
    void generate_mov_dispatch_to_flags(fs_inst *inst);
 
    void generate_pixel_interpolator_query(fs_inst *inst,
@@ -459,9 +464,6 @@
                                       struct brw_reg dst,
                                       struct brw_reg x,
                                       struct brw_reg y);
-   void generate_unpack_half_2x16_split(fs_inst *inst,
-                                        struct brw_reg dst,
-                                        struct brw_reg src);
 
    void generate_shader_time_add(fs_inst *inst,
                                  struct brw_reg payload,
@@ -534,24 +536,8 @@
       }
    }
 
-   /**
-    * Remove any modifiers from the \p i-th source region of the instruction,
-    * including negate, abs and any implicit type conversion to the execution
-    * type.  Instead any source modifiers will be implemented as a separate
-    * MOV instruction prior to the original instruction.
-    */
-   inline bool
-   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
-   {
-      assert(inst->components_read(i) == 1);
-      const fs_builder ibld(v, block, inst);
-      const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
-
-      ibld.MOV(tmp, inst->src[i]);
-      inst->src[i] = tmp;
-
-      return true;
-   }
+   bool
+   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
 }
 
 void shuffle_from_32bit_read(const brw::fs_builder &bld,
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_lower_conversions.cpp mesa-19.0.1/src/intel/compiler/brw_fs_lower_conversions.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_lower_conversions.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_lower_conversions.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,132 +0,0 @@
-/*
- * Copyright © 2015 Connor Abbott
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "brw_fs.h"
-#include "brw_cfg.h"
-#include "brw_fs_builder.h"
-
-using namespace brw;
-
-static bool
-supports_type_conversion(const fs_inst *inst) {
-   switch (inst->opcode) {
-   case BRW_OPCODE_MOV:
-   case SHADER_OPCODE_MOV_INDIRECT:
-      return true;
-   case BRW_OPCODE_SEL:
-      return inst->dst.type == get_exec_type(inst);
-   default:
-      /* FIXME: We assume the opcodes don't explicitly mentioned
-       * before just work fine with arbitrary conversions.
-       */
-      return true;
-   }
-}
-
-/* From the SKL PRM Vol 2a, "Move":
- *
- *    "A mov with the same source and destination type, no source modifier,
- *     and no saturation is a raw move. A packed byte destination region (B
- *     or UB type with HorzStride == 1 and ExecSize > 1) can only be written
- *     using raw move."
- */
-static bool
-is_byte_raw_mov (const fs_inst *inst)
-{
-   return type_sz(inst->dst.type) == 1 &&
-          inst->opcode == BRW_OPCODE_MOV &&
-          inst->src[0].type == inst->dst.type &&
-          !inst->saturate &&
-          !inst->src[0].negate &&
-          !inst->src[0].abs;
-}
-
-bool
-fs_visitor::lower_conversions()
-{
-   bool progress = false;
-
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      const fs_builder ibld(this, block, inst);
-      fs_reg dst = inst->dst;
-      bool saturate = inst->saturate;
-
-      if (supports_type_conversion(inst)) {
-         if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
-             !is_byte_raw_mov(inst)) {
-            /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
-             * Single Precision Float":
-             *
-             *    The upper Dword of every Qword will be written with undefined
-             *    value when converting DF to F.
-             *
-             * So we need to allocate a temporary that's two registers, and then do
-             * a strided MOV to get the lower DWord of every Qword that has the
-             * result.
-             *
-             * This restriction applies, in general, whenever we convert to
-             * a type with a smaller bit-size.
-             */
-            fs_reg temp = ibld.vgrf(get_exec_type(inst));
-            fs_reg strided_temp = subscript(temp, dst.type, 0);
-
-            assert(inst->size_written == inst->dst.component_size(inst->exec_size));
-            inst->dst = strided_temp;
-            inst->saturate = false;
-            /* As it is an strided destination, we write n-times more being n the
-             * size ratio between source and destination types. Update
-             * size_written accordingly.
-             */
-            inst->size_written = inst->dst.component_size(inst->exec_size);
-
-            fs_inst *mov = ibld.at(block, inst->next).MOV(dst, strided_temp);
-            mov->saturate = saturate;
-            mov->predicate = inst->predicate;
-
-            progress = true;
-         }
-      } else {
-         fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
-
-         assert(inst->size_written == inst->dst.component_size(inst->exec_size));
-         inst->dst = temp0;
-         /* As it is an strided destination, we write n-times more being n the
-          * size ratio between source and destination types. Update
-          * size_written accordingly.
-          */
-         inst->size_written = inst->dst.component_size(inst->exec_size);
-         inst->saturate = false;
-         /* Now, do the conversion to original destination's type. In next iteration,
-          * we will lower it if it is a d2f conversion.
-          */
-         ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
-
-         progress = true;
-      }
-   }
-
-   if (progress)
-      invalidate_live_intervals();
-
-   return progress;
-}
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_lower_regioning.cpp mesa-19.0.1/src/intel/compiler/brw_fs_lower_regioning.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_lower_regioning.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_lower_regioning.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,421 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "brw_fs_builder.h"
+
+using namespace brw;
+
+namespace {
+   /* From the SKL PRM Vol 2a, "Move":
+    *
+    * "A mov with the same source and destination type, no source modifier,
+    *  and no saturation is a raw move. A packed byte destination region (B
+    *  or UB type with HorzStride == 1 and ExecSize > 1) can only be written
+    *  using raw move."
+    */
+   bool
+   is_byte_raw_mov(const fs_inst *inst)
+   {
+      return type_sz(inst->dst.type) == 1 &&
+             inst->opcode == BRW_OPCODE_MOV &&
+             inst->src[0].type == inst->dst.type &&
+             !inst->saturate &&
+             !inst->src[0].negate &&
+             !inst->src[0].abs;
+   }
+
+   /*
+    * Return an acceptable byte stride for the destination of an instruction
+    * that requires it to have some particular alignment.
+    */
+   unsigned
+   required_dst_byte_stride(const fs_inst *inst)
+   {
+      if (inst->dst.is_accumulator()) {
+         /* If the destination is an accumulator, insist that we leave the
+          * stride alone.  We cannot "fix" accumulator destinations by writing
+          * to a temporary and emitting a MOV into the original destination.
+          * For multiply instructions (our one use of the accumulator), the
+          * MUL writes the full 66 bits of the accumulator whereas the MOV we
+          * would emit only writes 33 bits and leaves the top 33 bits
+          * undefined.
+          *
+          * It's safe to just require the original stride here because the
+          * lowering pass will detect the mismatch in has_invalid_src_region
+          * and fix the sources of the multiply instead of the destination.
+          */
+         return inst->dst.stride * type_sz(inst->dst.type);
+      } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
+          !is_byte_raw_mov(inst)) {
+         return get_exec_type_size(inst);
+      } else {
+         unsigned stride = inst->dst.stride * type_sz(inst->dst.type);
+
+         for (unsigned i = 0; i < inst->sources; i++) {
+            if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
+               stride = MAX2(stride, inst->src[i].stride *
+                             type_sz(inst->src[i].type));
+         }
+
+         return stride;
+      }
+   }
+
+   /*
+    * Return an acceptable byte sub-register offset for the destination of an
+    * instruction that requires it to be aligned to the sub-register offset of
+    * the sources.
+    */
+   unsigned
+   required_dst_byte_offset(const fs_inst *inst)
+   {
+      for (unsigned i = 0; i < inst->sources; i++) {
+         if (!is_uniform(inst->src[i]) && !inst->is_control_source(i))
+            if (reg_offset(inst->src[i]) % REG_SIZE !=
+                reg_offset(inst->dst) % REG_SIZE)
+               return 0;
+      }
+
+      return reg_offset(inst->dst) % REG_SIZE;
+   }
+
+   /*
+    * Return whether the instruction has an unsupported channel bit layout
+    * specified for the i-th source region.
+    */
+   bool
+   has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst,
+                          unsigned i)
+   {
+      if (is_unordered(inst) || inst->is_control_source(i)) {
+         return false;
+      } else {
+         const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
+         const unsigned src_byte_stride = inst->src[i].stride *
+            type_sz(inst->src[i].type);
+         const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
+         const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE;
+
+         return has_dst_aligned_region_restriction(devinfo, inst) &&
+                !is_uniform(inst->src[i]) &&
+                (src_byte_stride != dst_byte_stride ||
+                 src_byte_offset != dst_byte_offset);
+      }
+   }
+
+   /*
+    * Return whether the instruction has an unsupported channel bit layout
+    * specified for the destination region.
+    */
+   bool
+   has_invalid_dst_region(const gen_device_info *devinfo,
+                          const fs_inst *inst)
+   {
+      if (is_unordered(inst)) {
+         return false;
+      } else {
+         const brw_reg_type exec_type = get_exec_type(inst);
+         const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE;
+         const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type);
+         const bool is_narrowing_conversion = !is_byte_raw_mov(inst) &&
+            type_sz(inst->dst.type) < type_sz(exec_type);
+
+         return (has_dst_aligned_region_restriction(devinfo, inst) &&
+                 (required_dst_byte_stride(inst) != dst_byte_stride ||
+                  required_dst_byte_offset(inst) != dst_byte_offset)) ||
+                (is_narrowing_conversion &&
+                 required_dst_byte_stride(inst) != dst_byte_stride);
+      }
+   }
+
+   /*
+    * Return whether the instruction has unsupported source modifiers
+    * specified for the i-th source region.
+    */
+   bool
+   has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst,
+                             unsigned i)
+   {
+      return !inst->can_do_source_mods(devinfo) &&
+             (inst->src[i].negate || inst->src[i].abs);
+   }
+
+   /*
+    * Return whether the instruction has an unsupported type conversion
+    * specified for the destination.
+    */
+   bool
+   has_invalid_conversion(const gen_device_info *devinfo, const fs_inst *inst)
+   {
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         return false;
+      case BRW_OPCODE_SEL:
+         return inst->dst.type != get_exec_type(inst);
+      case SHADER_OPCODE_BROADCAST:
+      case SHADER_OPCODE_MOV_INDIRECT:
+         /* The source and destination types of these may be hard-coded to
+          * integer at codegen time due to hardware limitations of 64-bit
+          * types.
+          */
+         return ((devinfo->gen == 7 && !devinfo->is_haswell) ||
+                 devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) &&
+                type_sz(inst->src[0].type) > 4 &&
+                inst->dst.type != inst->src[0].type;
+      default:
+         /* FIXME: We assume the opcodes don't explicitly mentioned before
+          * just work fine with arbitrary conversions.
+          */
+         return false;
+      }
+   }
+
+   /**
+    * Return whether the instruction has non-standard semantics for the
+    * conditional mod which don't cause the flag register to be updated with
+    * the comparison result.
+    */
+   bool
+   has_inconsistent_cmod(const fs_inst *inst)
+   {
+      return inst->opcode == BRW_OPCODE_SEL ||
+             inst->opcode == BRW_OPCODE_CSEL ||
+             inst->opcode == BRW_OPCODE_IF ||
+             inst->opcode == BRW_OPCODE_WHILE;
+   }
+
+   bool
+   lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst);
+}
+
+namespace brw {
+   /**
+    * Remove any modifiers from the \p i-th source region of the instruction,
+    * including negate, abs and any implicit type conversion to the execution
+    * type.  Instead any source modifiers will be implemented as a separate
+    * MOV instruction prior to the original instruction.
+    */
+   bool
+   lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
+   {
+      assert(inst->components_read(i) == 1);
+      const fs_builder ibld(v, block, inst);
+      const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
+
+      lower_instruction(v, block, ibld.MOV(tmp, inst->src[i]));
+      inst->src[i] = tmp;
+
+      return true;
+   }
+}
+
+namespace {
+   /**
+    * Remove any modifiers from the destination region of the instruction,
+    * including saturate, conditional mod and any implicit type conversion
+    * from the execution type.  Instead any destination modifiers will be
+    * implemented as a separate MOV instruction after the original
+    * instruction.
+    */
+   bool
+   lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst)
+   {
+      const fs_builder ibld(v, block, inst);
+      const brw_reg_type type = get_exec_type(inst);
+      /* Not strictly necessary, but if possible use a temporary with the same
+       * channel alignment as the current destination in order to avoid
+       * violating the restrictions enforced later on by lower_src_region()
+       * and lower_dst_region(), which would introduce additional copy
+       * instructions into the program unnecessarily.
+       */
+      const unsigned stride =
+         type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 :
+         type_sz(inst->dst.type) * inst->dst.stride / type_sz(type);
+      const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride), stride);
+
+      /* Emit a MOV taking care of all the destination modifiers. */
+      fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp);
+      mov->saturate = inst->saturate;
+      if (!has_inconsistent_cmod(inst))
+         mov->conditional_mod = inst->conditional_mod;
+      if (inst->opcode != BRW_OPCODE_SEL) {
+         mov->predicate = inst->predicate;
+         mov->predicate_inverse = inst->predicate_inverse;
+      }
+      mov->flag_subreg = inst->flag_subreg;
+      lower_instruction(v, block, mov);
+
+      /* Point the original instruction at the temporary, and clean up any
+       * destination modifiers.
+       */
+      assert(inst->size_written == inst->dst.component_size(inst->exec_size));
+      inst->dst = tmp;
+      inst->size_written = inst->dst.component_size(inst->exec_size);
+      inst->saturate = false;
+      if (!has_inconsistent_cmod(inst))
+         inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+      assert(!inst->flags_written() || !mov->predicate);
+      return true;
+   }
+
+   /**
+    * Remove any non-trivial shuffling of data from the \p i-th source region
+    * of the instruction.  Instead implement the region as a series of integer
+    * copies into a temporary with the same channel layout as the destination.
+    */
+   bool
+   lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
+   {
+      assert(inst->components_read(i) == 1);
+      const fs_builder ibld(v, block, inst);
+      const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride /
+                              type_sz(inst->src[i].type);
+      assert(stride > 0);
+      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type, stride),
+                                      stride);
+
+      /* Emit a series of 32-bit integer copies with any source modifiers
+       * cleaned up (because their semantics are dependent on the type).
+       */
+      const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
+                                                 false);
+      const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
+      fs_reg raw_src = inst->src[i];
+      raw_src.negate = false;
+      raw_src.abs = false;
+
+      for (unsigned j = 0; j < n; j++)
+         ibld.MOV(subscript(tmp, raw_type, j), subscript(raw_src, raw_type, j));
+
+      /* Point the original instruction at the temporary, making sure to keep
+       * any source modifiers in the instruction.
+       */
+      fs_reg lower_src = tmp;
+      lower_src.negate = inst->src[i].negate;
+      lower_src.abs = inst->src[i].abs;
+      inst->src[i] = lower_src;
+
+      return true;
+   }
+
+   /**
+    * Remove any non-trivial shuffling of data from the destination region of
+    * the instruction.  Instead implement the region as a series of integer
+    * copies from a temporary with a channel layout compatible with the
+    * sources.
+    */
+   bool
+   lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst)
+   {
+      /* We cannot replace the result of an integer multiply which writes the
+       * accumulator because MUL+MACH pairs act on the accumulator as a 66-bit
+       * value whereas the MOV will act on only 32 or 33 bits of the
+       * accumulator.
+       */
+      assert(inst->opcode != BRW_OPCODE_MUL || !inst->dst.is_accumulator() ||
+             brw_reg_type_is_floating_point(inst->dst.type));
+
+      const fs_builder ibld(v, block, inst);
+      const unsigned stride = required_dst_byte_stride(inst) /
+                              type_sz(inst->dst.type);
+      assert(stride > 0);
+      const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type, stride),
+                                      stride);
+
+      /* Emit a series of 32-bit integer copies from the temporary into the
+       * original destination.
+       */
+      const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4),
+                                                 false);
+      const unsigned n = type_sz(tmp.type) / type_sz(raw_type);
+
+      if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) {
+         /* Note that in general we cannot simply predicate the copies on the
+          * same flag register as the original instruction, since it may have
+          * been overwritten by the instruction itself.  Instead initialize
+          * the temporary with the previous contents of the destination
+          * register.
+          */
+         for (unsigned j = 0; j < n; j++)
+            ibld.MOV(subscript(tmp, raw_type, j),
+                     subscript(inst->dst, raw_type, j));
+      }
+
+      for (unsigned j = 0; j < n; j++)
+         ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j),
+                                        subscript(tmp, raw_type, j));
+
+      /* Point the original instruction at the temporary, making sure to keep
+       * any destination modifiers in the instruction.
+       */
+      assert(inst->size_written == inst->dst.component_size(inst->exec_size));
+      inst->dst = tmp;
+      inst->size_written = inst->dst.component_size(inst->exec_size);
+
+      return true;
+   }
+
+   /**
+    * Legalize the source and destination regioning controls of the specified
+    * instruction.
+    */
+   bool
+   lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst)
+   {
+      const gen_device_info *devinfo = v->devinfo;
+      bool progress = false;
+
+      if (has_invalid_conversion(devinfo, inst))
+         progress |= lower_dst_modifiers(v, block, inst);
+
+      if (has_invalid_dst_region(devinfo, inst))
+         progress |= lower_dst_region(v, block, inst);
+
+      for (unsigned i = 0; i < inst->sources; i++) {
+         if (has_invalid_src_modifiers(devinfo, inst, i))
+            progress |= lower_src_modifiers(v, block, inst, i);
+
+         if (has_invalid_src_region(devinfo, inst, i))
+            progress |= lower_src_region(v, block, inst, i);
+      }
+
+      return progress;
+   }
+}
+
+bool
+fs_visitor::lower_regioning()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg)
+      progress |= lower_instruction(this, block, inst);
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_nir.cpp mesa-19.0.1/src/intel/compiler/brw_fs_nir.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_nir.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_nir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -26,6 +26,7 @@
 #include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
 #include "util/u_math.h"
+#include "util/bitscan.h"
 
 using namespace brw;
 using namespace brw::surface_access;
@@ -511,8 +512,16 @@
        src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
       return false;
 
-   nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
-   assert(element != NULL);
+   /* If either opcode has source modifiers, bail.
+    *
+    * TODO: We can potentially handle source modifiers if both of the opcodes
+    * we're combining are signed integers.
+    */
+   if (instr->src[0].abs || instr->src[0].negate ||
+       src0->src[0].abs || src0->src[0].negate)
+      return false;
+
+   unsigned element = nir_src_as_uint(src0->src[1].src);
 
    /* Element type to extract.*/
    const brw_reg_type type = brw_int_type(
@@ -526,7 +535,7 @@
    op0 = offset(op0, bld, src0->src[0].swizzle[0]);
 
    set_saturate(instr->dest.saturate,
-                bld.MOV(result, subscript(op0, type, element->u32[0])));
+                bld.MOV(result, subscript(op0, type, element)));
    return true;
 }
 
@@ -544,14 +553,18 @@
    if (src0->intrinsic != nir_intrinsic_load_front_face)
       return false;
 
-   nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-   if (!value1 || fabsf(value1->f32[0]) != 1.0f)
+   if (!nir_src_is_const(instr->src[1].src) ||
+       !nir_src_is_const(instr->src[2].src))
       return false;
 
-   nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
-   if (!value2 || fabsf(value2->f32[0]) != 1.0f)
+   const float value1 = nir_src_as_float(instr->src[1].src);
+   const float value2 = nir_src_as_float(instr->src[2].src);
+   if (fabsf(value1) != 1.0f || fabsf(value2) != 1.0f)
       return false;
 
+   /* nir_opt_algebraic should have gotten rid of bcsel(b, a, a) */
+   assert(value1 == -value2);
+
    fs_reg tmp = vgrf(glsl_type::int_type);
 
    if (devinfo->gen >= 6) {
@@ -569,7 +582,7 @@
        * surely be TRIANGLES
        */
 
-      if (value1->f32[0] == -1.0f) {
+      if (value1 == -1.0f) {
          g0.negate = true;
       }
 
@@ -590,7 +603,7 @@
        * surely be TRIANGLES
        */
 
-      if (value1->f32[0] == -1.0f) {
+      if (value1 == -1.0f) {
          g1_6.negate = true;
       }
 
@@ -784,8 +797,13 @@
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_b2i:
-   case nir_op_b2f:
+   case nir_op_b2i8:
+   case nir_op_b2i16:
+   case nir_op_b2i32:
+   case nir_op_b2i64:
+   case nir_op_b2f16:
+   case nir_op_b2f32:
+   case nir_op_b2f64:
       op[0].type = BRW_REGISTER_TYPE_D;
       op[0].negate = !op[0].negate;
       /* fallthrough */
@@ -796,30 +814,6 @@
    case nir_op_i2i64:
    case nir_op_u2f64:
    case nir_op_u2u64:
-      /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
-       *
-       *    "When source or destination is 64b (...), regioning in Align1
-       *     must follow these rules:
-       *
-       *     1. Source and destination horizontal stride must be aligned to
-       *        the same qword.
-       *     (...)"
-       *
-       * This means that conversions from bit-sizes smaller than 64-bit to
-       * 64-bit need to have the source data elements aligned to 64-bit.
-       * This restriction does not apply to BDW and later.
-       */
-      if (nir_dest_bit_size(instr->dest.dest) == 64 &&
-          nir_src_bit_size(instr->src[0].src) < 64 &&
-          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
-         fs_reg tmp = bld.vgrf(result.type, 1);
-         tmp = subscript(tmp, op[0].type, 0);
-         inst = bld.MOV(tmp, op[0]);
-         inst = bld.MOV(result, tmp);
-         inst->saturate = instr->dest.saturate;
-         break;
-      }
-      /* fallthrough */
    case nir_op_f2f32:
    case nir_op_f2i32:
    case nir_op_f2u32:
@@ -973,6 +967,11 @@
       inst->saturate = instr->dest.saturate;
       break;
 
+   case nir_op_uadd_sat:
+      inst = bld.ADD(result, op[0], op[1]);
+      inst->saturate = true;
+      break;
+
    case nir_op_fmul:
       inst = bld.MUL(result, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
@@ -1042,10 +1041,10 @@
       break;
    }
 
-   case nir_op_flt:
-   case nir_op_fge:
-   case nir_op_feq:
-   case nir_op_fne: {
+   case nir_op_flt32:
+   case nir_op_fge32:
+   case nir_op_feq32:
+   case nir_op_fne32: {
       fs_reg dest = result;
 
       const uint32_t bit_size =  nir_src_bit_size(instr->src[0].src);
@@ -1054,16 +1053,16 @@
 
       brw_conditional_mod cond;
       switch (instr->op) {
-      case nir_op_flt:
+      case nir_op_flt32:
          cond = BRW_CONDITIONAL_L;
          break;
-      case nir_op_fge:
+      case nir_op_fge32:
          cond = BRW_CONDITIONAL_GE;
          break;
-      case nir_op_feq:
+      case nir_op_feq32:
          cond = BRW_CONDITIONAL_Z;
          break;
-      case nir_op_fne:
+      case nir_op_fne32:
          cond = BRW_CONDITIONAL_NZ;
          break;
       default:
@@ -1086,12 +1085,12 @@
       break;
    }
 
-   case nir_op_ilt:
-   case nir_op_ult:
-   case nir_op_ige:
-   case nir_op_uge:
-   case nir_op_ieq:
-   case nir_op_ine: {
+   case nir_op_ilt32:
+   case nir_op_ult32:
+   case nir_op_ige32:
+   case nir_op_uge32:
+   case nir_op_ieq32:
+   case nir_op_ine32: {
       fs_reg dest = result;
 
       const uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
@@ -1100,18 +1099,18 @@
 
       brw_conditional_mod cond;
       switch (instr->op) {
-      case nir_op_ilt:
-      case nir_op_ult:
+      case nir_op_ilt32:
+      case nir_op_ult32:
          cond = BRW_CONDITIONAL_L;
          break;
-      case nir_op_ige:
-      case nir_op_uge:
+      case nir_op_ige32:
+      case nir_op_uge32:
          cond = BRW_CONDITIONAL_GE;
          break;
-      case nir_op_ieq:
+      case nir_op_ieq32:
          cond = BRW_CONDITIONAL_Z;
          break;
-      case nir_op_ine:
+      case nir_op_ine32:
          cond = BRW_CONDITIONAL_NZ;
          break;
       default:
@@ -1164,18 +1163,18 @@
    case nir_op_fdot2:
    case nir_op_fdot3:
    case nir_op_fdot4:
-   case nir_op_ball_fequal2:
-   case nir_op_ball_iequal2:
-   case nir_op_ball_fequal3:
-   case nir_op_ball_iequal3:
-   case nir_op_ball_fequal4:
-   case nir_op_ball_iequal4:
-   case nir_op_bany_fnequal2:
-   case nir_op_bany_inequal2:
-   case nir_op_bany_fnequal3:
-   case nir_op_bany_inequal3:
-   case nir_op_bany_fnequal4:
-   case nir_op_bany_inequal4:
+   case nir_op_b32all_fequal2:
+   case nir_op_b32all_iequal2:
+   case nir_op_b32all_fequal3:
+   case nir_op_b32all_iequal3:
+   case nir_op_b32all_fequal4:
+   case nir_op_b32all_iequal4:
+   case nir_op_b32any_fnequal2:
+   case nir_op_b32any_inequal2:
+   case nir_op_b32any_fnequal3:
+   case nir_op_b32any_inequal3:
+   case nir_op_b32any_fnequal4:
+   case nir_op_b32any_inequal4:
       unreachable("Lowered by nir_lower_alu_reductions");
 
    case nir_op_fnoise1_1:
@@ -1209,15 +1208,15 @@
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_i2b:
-   case nir_op_f2b: {
+   case nir_op_i2b32:
+   case nir_op_f2b32: {
       uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
       if (bit_size == 64) {
          /* two-argument instructions can't take 64-bit immediates */
          fs_reg zero;
          fs_reg tmp;
 
-         if (instr->op == nir_op_f2b) {
+         if (instr->op == nir_op_f2b32) {
             zero = vgrf(glsl_type::double_type);
             tmp = vgrf(glsl_type::double_type);
             bld.MOV(zero, setup_imm_df(bld, 0.0));
@@ -1236,10 +1235,10 @@
       } else {
          fs_reg zero;
          if (bit_size == 32) {
-            zero = instr->op == nir_op_f2b ? brw_imm_f(0.0f) : brw_imm_d(0);
+            zero = instr->op == nir_op_f2b32 ? brw_imm_f(0.0f) : brw_imm_d(0);
          } else {
             assert(bit_size == 16);
-            zero = instr->op == nir_op_f2b ?
+            zero = instr->op == nir_op_f2b32 ?
                retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
          }
          bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
@@ -1329,11 +1328,13 @@
       unreachable("not reached: should be handled by lower_packing_builtins");
 
    case nir_op_unpack_half_2x16_split_x:
-      inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
+      inst = bld.emit(BRW_OPCODE_F16TO32, result,
+                      subscript(op[0], BRW_REGISTER_TYPE_UW, 0));
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_unpack_half_2x16_split_y:
-      inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
+      inst = bld.emit(BRW_OPCODE_F16TO32, result,
+                      subscript(op[0], BRW_REGISTER_TYPE_UW, 1));
       inst->saturate = instr->dest.saturate;
       break;
 
@@ -1449,36 +1450,14 @@
       unreachable("not reached: should have been lowered");
 
    case nir_op_ishl:
+      bld.SHL(result, op[0], op[1]);
+      break;
    case nir_op_ishr:
-   case nir_op_ushr: {
-      fs_reg shift_count = op[1];
-
-      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
-         if (op[1].file == VGRF &&
-             (result.type == BRW_REGISTER_TYPE_Q ||
-              result.type == BRW_REGISTER_TYPE_UQ)) {
-            shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
-                                 BRW_REGISTER_TYPE_UD);
-            shift_count.stride = 2;
-            bld.MOV(shift_count, op[1]);
-         }
-      }
-
-      switch (instr->op) {
-      case nir_op_ishl:
-         bld.SHL(result, op[0], shift_count);
-         break;
-      case nir_op_ishr:
-         bld.ASR(result, op[0], shift_count);
-         break;
-      case nir_op_ushr:
-         bld.SHR(result, op[0], shift_count);
-         break;
-      default:
-         unreachable("not reached");
-      }
+      bld.ASR(result, op[0], op[1]);
+      break;
+   case nir_op_ushr:
+      bld.SHR(result, op[0], op[1]);
       break;
-   }
 
    case nir_op_pack_half_2x16_split:
       bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
@@ -1494,7 +1473,7 @@
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_bcsel:
+   case nir_op_b32csel:
       if (optimize_frontfacing_ternary(instr, result))
          return;
 
@@ -1505,8 +1484,7 @@
 
    case nir_op_extract_u8:
    case nir_op_extract_i8: {
-      nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
-      assert(byte != NULL);
+      unsigned byte = nir_src_as_uint(instr->src[1].src);
 
       /* The PRMs say:
        *
@@ -1515,20 +1493,29 @@
        *    Use two instructions and a word or DWord intermediate integer type.
        */
       if (nir_dest_bit_size(instr->dest.dest) == 64) {
-         const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i8);
+         const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
 
          if (instr->op == nir_op_extract_i8) {
             /* If we need to sign extend, extract to a word first */
             fs_reg w_temp = bld.vgrf(BRW_REGISTER_TYPE_W);
-            bld.MOV(w_temp, subscript(op[0], type, byte->u32[0]));
+            bld.MOV(w_temp, subscript(op[0], type, byte));
             bld.MOV(result, w_temp);
+         } else if (byte & 1) {
+            /* Extract the high byte from the word containing the desired byte
+             * offset.
+             */
+            bld.SHR(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2),
+                    brw_imm_uw(8));
          } else {
             /* Otherwise use an AND with 0xff and a word type */
-            bld.AND(result, subscript(op[0], type, byte->u32[0] / 2), brw_imm_uw(0xff));
+            bld.AND(result,
+                    subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2),
+                    brw_imm_uw(0xff));
          }
       } else {
          const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8);
-         bld.MOV(result, subscript(op[0], type, byte->u32[0]));
+         bld.MOV(result, subscript(op[0], type, byte));
       }
       break;
    }
@@ -1536,9 +1523,8 @@
    case nir_op_extract_u16:
    case nir_op_extract_i16: {
       const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i16);
-      nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
-      assert(word != NULL);
-      bld.MOV(result, subscript(op[0], type, word->u32[0]));
+      unsigned word = nir_src_as_uint(instr->src[1].src);
+      bld.MOV(result, subscript(op[0], type, word));
       break;
    }
 
@@ -1649,9 +1635,9 @@
 fs_reg
 fs_visitor::get_nir_src_imm(const nir_src &src)
 {
-   nir_const_value *val = nir_src_as_const_value(src);
    assert(nir_src_bit_size(src) == 32);
-   return val ? fs_reg(brw_imm_d(val->i32[0])) : get_nir_src(src);
+   return nir_src_is_const(src) ?
+          fs_reg(brw_imm_d(nir_src_as_int(src))) : get_nir_src(src);
 }
 
 fs_reg
@@ -1872,7 +1858,7 @@
    }
 
    /* Store the control data bits in the message payload and send it. */
-   int mlen = 2;
+   unsigned mlen = 2;
    if (channel_mask.file != BAD_FILE)
       mlen += 4; /* channel masks, plus 3 extra copies of the data */
    if (per_slot_offset.file != BAD_FILE)
@@ -1880,7 +1866,7 @@
 
    fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
    fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen);
-   int i = 0;
+   unsigned i = 0;
    sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
    if (per_slot_offset.file != BAD_FILE)
       sources[i++] = per_slot_offset;
@@ -2049,19 +2035,16 @@
                                unsigned first_component)
 {
    struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
-
-   nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
-   nir_const_value *offset_const = nir_src_as_const_value(offset_src);
    const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8;
 
    /* TODO: figure out push input layout for invocations == 1 */
    /* TODO: make this work with 64-bit inputs */
    if (gs_prog_data->invocations == 1 &&
        type_sz(dst.type) <= 4 &&
-       offset_const != NULL && vertex_const != NULL &&
-       4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
-      int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
-                       vertex_const->u32[0] * push_reg_count;
+       nir_src_is_const(offset_src) && nir_src_is_const(vertex_src) &&
+       4 * (base_offset + nir_src_as_uint(offset_src)) < push_reg_count) {
+      int imm_offset = (base_offset + nir_src_as_uint(offset_src)) * 4 +
+                       nir_src_as_uint(vertex_src) * push_reg_count;
       for (unsigned i = 0; i < num_components; i++) {
          bld.MOV(offset(dst, bld, i),
                  fs_reg(ATTR, imm_offset + i + first_component, dst.type));
@@ -2076,10 +2059,10 @@
    fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
 
    if (gs_prog_data->invocations == 1) {
-      if (vertex_const) {
+      if (nir_src_is_const(vertex_src)) {
          /* The vertex index is constant; just select the proper URB handle. */
          icp_handle =
-            retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0),
+            retype(brw_vec8_grf(first_icp_handle + nir_src_as_uint(vertex_src), 0),
                    BRW_REGISTER_TYPE_UD);
       } else {
          /* The vertex index is non-constant.  We need to use indirect
@@ -2120,12 +2103,11 @@
    } else {
       assert(gs_prog_data->invocations > 1);
 
-      if (vertex_const) {
-         assert(devinfo->gen >= 9 || vertex_const->i32[0] <= 5);
+      if (nir_src_is_const(vertex_src)) {
+         unsigned vertex = nir_src_as_uint(vertex_src);
+         assert(devinfo->gen >= 9 || vertex <= 5);
          bld.MOV(icp_handle,
-                 retype(brw_vec1_grf(first_icp_handle +
-                                     vertex_const->i32[0] / 8,
-                                     vertex_const->i32[0] % 8),
+                 retype(brw_vec1_grf(first_icp_handle + vertex / 8, vertex % 8),
                         BRW_REGISTER_TYPE_UD));
       } else {
          /* The vertex index is non-constant.  We need to use indirect
@@ -2169,7 +2151,7 @@
    }
 
    for (unsigned iter = 0; iter < num_iterations; iter++) {
-      if (offset_const) {
+      if (nir_src_is_const(offset_src)) {
          /* Constant indexing - use global offset. */
          if (first_component != 0) {
             unsigned read_components = num_components + first_component;
@@ -2187,7 +2169,7 @@
             inst->size_written = num_components *
                                  tmp_dst.component_size(inst->exec_size);
          }
-         inst->offset = base_offset + offset_const->u32[0];
+         inst->offset = base_offset + nir_src_as_uint(offset_src);
          inst->mlen = 1;
       } else {
          /* Indirect indexing - use per-slot offsets as well. */
@@ -2225,7 +2207,7 @@
 
       if (num_iterations > 1) {
          num_components = orig_num_components - 2;
-         if(offset_const) {
+         if(nir_src_is_const(offset_src)) {
             base_offset++;
          } else {
             fs_reg new_indirect = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
@@ -2240,121 +2222,19 @@
 fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
 {
    nir_src *offset_src = nir_get_io_offset_src(instr);
-   nir_const_value *const_value = nir_src_as_const_value(*offset_src);
 
-   if (const_value) {
+   if (nir_src_is_const(*offset_src)) {
       /* The only constant offset we should find is 0.  brw_nir.c's
        * add_const_offset_to_base() will fold other constant offsets
        * into instr->const_index[0].
        */
-      assert(const_value->u32[0] == 0);
+      assert(nir_src_as_uint(*offset_src) == 0);
       return fs_reg();
    }
 
    return get_nir_src(*offset_src);
 }
 
-static void
-do_untyped_vector_read(const fs_builder &bld,
-                       const fs_reg dest,
-                       const fs_reg surf_index,
-                       const fs_reg offset_reg,
-                       unsigned num_components)
-{
-   if (type_sz(dest.type) <= 2) {
-      assert(dest.stride == 1);
-      boolean is_const_offset = offset_reg.file == BRW_IMMEDIATE_VALUE;
-
-      if (is_const_offset) {
-         uint32_t start = offset_reg.ud & ~3;
-         uint32_t end = offset_reg.ud + num_components * type_sz(dest.type);
-         end = ALIGN(end, 4);
-         assert (end - start <= 16);
-
-         /* At this point we have 16-bit component/s that have constant
-          * offset aligned to 4-bytes that can be read with untyped_reads.
-          * untyped_read message requires 32-bit aligned offsets.
-          */
-         unsigned first_component = (offset_reg.ud & 3) / type_sz(dest.type);
-         unsigned num_components_32bit = (end - start) / 4;
-
-         fs_reg read_result =
-            emit_untyped_read(bld, surf_index, brw_imm_ud(start),
-                              1 /* dims */,
-                              num_components_32bit,
-                              BRW_PREDICATE_NONE);
-         shuffle_from_32bit_read(bld, dest, read_result, first_component,
-                                 num_components);
-      } else {
-         fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
-         for (unsigned i = 0; i < num_components; i++) {
-            if (i == 0) {
-               bld.MOV(read_offset, offset_reg);
-            } else {
-               bld.ADD(read_offset, offset_reg,
-                       brw_imm_ud(i * type_sz(dest.type)));
-            }
-            /* Non constant offsets are not guaranteed to be aligned 32-bits
-             * so they are read using one byte_scattered_read message
-             * for each component.
-             */
-            fs_reg read_result =
-               emit_byte_scattered_read(bld, surf_index, read_offset,
-                                        1 /* dims */, 1,
-                                        type_sz(dest.type) * 8 /* bit_size */,
-                                        BRW_PREDICATE_NONE);
-            bld.MOV(offset(dest, bld, i),
-                    subscript (read_result, dest.type, 0));
-         }
-      }
-   } else if (type_sz(dest.type) == 4) {
-      fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
-                                             1 /* dims */,
-                                             num_components,
-                                             BRW_PREDICATE_NONE);
-      read_result.type = dest.type;
-      for (unsigned i = 0; i < num_components; i++)
-         bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
-   } else if (type_sz(dest.type) == 8) {
-      /* Reading a dvec, so we need to:
-       *
-       * 1. Multiply num_components by 2, to account for the fact that we
-       *    need to read 64-bit components.
-       * 2. Shuffle the result of the load to form valid 64-bit elements
-       * 3. Emit a second load (for components z/w) if needed.
-       */
-      fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
-      bld.MOV(read_offset, offset_reg);
-
-      int iters = num_components <= 2 ? 1 : 2;
-
-      /* Load the dvec, the first iteration loads components x/y, the second
-       * iteration, if needed, loads components z/w
-       */
-      for (int it = 0; it < iters; it++) {
-         /* Compute number of components to read in this iteration */
-         int iter_components = MIN2(2, num_components);
-         num_components -= iter_components;
-
-         /* Read. Since this message reads 32-bit components, we need to
-          * read twice as many components.
-          */
-         fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset,
-                                                1 /* dims */,
-                                                iter_components * 2,
-                                                BRW_PREDICATE_NONE);
-
-         /* Shuffle the 32-bit load result into valid 64-bit data */
-         shuffle_from_32bit_read(bld, offset(dest, bld, it * 2),
-                                 read_result, 0, iter_components);
-
-         bld.ADD(read_offset, read_offset, brw_imm_ud(16));
-      }
-   } else {
-      unreachable("Unsupported type");
-   }
-}
-
 void
 fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
                                   nir_intrinsic_instr *instr)
@@ -2375,9 +2255,7 @@
       unsigned first_component = nir_intrinsic_component(instr);
       unsigned num_components = instr->num_components;
 
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-      assert(const_offset && "Indirect input loads not allowed");
-      src = offset(src, bld, const_offset->u32[0]);
+      src = offset(src, bld, nir_src_as_uint(instr->src[0]));
 
       if (type_sz(dest.type) == 8)
          first_component /= 2;
@@ -2464,18 +2342,17 @@
       unsigned imm_offset = instr->const_index[0];
 
       const nir_src &vertex_src = instr->src[0];
-      nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
 
       fs_inst *inst;
 
       fs_reg icp_handle;
 
-      if (vertex_const) {
+      if (nir_src_is_const(vertex_src)) {
          /* Emit a MOV to resolve <0,1,0> regioning. */
          icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+         unsigned vertex = nir_src_as_uint(vertex_src);
          bld.MOV(icp_handle,
-                 retype(brw_vec1_grf(1 + (vertex_const->i32[0] >> 3),
-                                     vertex_const->i32[0] & 7),
+                 retype(brw_vec1_grf(1 + (vertex >> 3), vertex & 7),
                         BRW_REGISTER_TYPE_UD));
       } else if (tcs_prog_data->instances == 1 &&
                  vertex_src.is_ssa &&
@@ -3036,10 +2913,6 @@
       wm_prog_data->binding_table.render_target_read_start -
       wm_prog_data->base.binding_table.texture_start;
 
-   brw_mark_surface_used(
-      bld.shader->stage_prog_data,
-      wm_prog_data->binding_table.render_target_read_start + target);
-
    /* Calculate the fragment coordinates. */
    const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
    bld.MOV(offset(coords, bld, 0), pixel_x);
@@ -3070,7 +2943,7 @@
 
    /* Emit the instruction. */
    const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(),
-                           sample, mcs,
+                           fs_reg(), sample, mcs,
                            brw_imm_ud(surface), brw_imm_ud(0),
                            fs_reg(), brw_imm_ud(3), brw_imm_ud(0) };
    STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
@@ -3189,10 +3062,9 @@
 
    case nir_intrinsic_store_output: {
       const fs_reg src = get_nir_src(instr->src[0]);
-      const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      assert(const_offset && "Indirect output stores not allowed");
+      const unsigned store_offset = nir_src_as_uint(instr->src[1]);
       const unsigned location = nir_intrinsic_base(instr) +
-         SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
+         SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
       const fs_reg new_dest = retype(alloc_frag_output(this, location),
                                      src.type);
 
@@ -3207,9 +3079,8 @@
       const unsigned l = GET_FIELD(nir_intrinsic_base(instr),
                                    BRW_NIR_FRAG_OUTPUT_LOCATION);
       assert(l >= FRAG_RESULT_DATA0);
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-      assert(const_offset && "Indirect output loads not allowed");
-      const unsigned target = l - FRAG_RESULT_DATA0 + const_offset->u32[0];
+      const unsigned load_offset = nir_src_as_uint(instr->src[0]);
+      const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
       const fs_reg tmp = bld.vgrf(dest.type, 4);
 
       if (reinterpret_cast<const brw_wm_prog_key *>(key)->coherent_fb_fetch)
@@ -3298,10 +3169,8 @@
       const glsl_interp_mode interpolation =
          (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
 
-      nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-
-      if (const_sample) {
-         unsigned msg_data = const_sample->i32[0] << 4;
+      if (nir_src_is_const(instr->src[0])) {
+         unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4;
 
          emit_pixel_interpolater_send(bld,
                                       FS_OPCODE_INTERPOLATE_AT_SAMPLE,
@@ -3370,6 +3239,7 @@
       nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
 
       if (const_offset) {
+         assert(nir_src_bit_size(instr->src[0]) == 32);
          unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
          unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
 
@@ -3474,12 +3344,11 @@
 static int
 get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src)
 {
-   const nir_const_value *const val = nir_src_as_const_value(instr->src[src]);
-
-   if (val != NULL) {
-      if (val->i32[0] == 1)
+   if (nir_src_is_const(instr->src[src])) {
+      int64_t add_val = nir_src_as_int(instr->src[src]);
+      if (add_val == 1)
          return BRW_AOP_INC;
-      else if (val->i32[0] == -1)
+      else if (add_val == -1)
          return BRW_AOP_DEC;
    }
 
@@ -3525,7 +3394,6 @@
       cs_prog_data->uses_num_work_groups = true;
 
       fs_reg surf_index = brw_imm_ud(surface);
-      brw_mark_surface_used(prog_data, surface);
 
       /* Read the 3 GLuint components of gl_NumWorkGroups */
       for (unsigned i = 0; i < 3; i++) {
@@ -3583,93 +3451,64 @@
 
    case nir_intrinsic_load_shared: {
       assert(devinfo->gen >= 7);
+      assert(stage == MESA_SHADER_COMPUTE);
 
-      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+      const unsigned bit_size = nir_dest_bit_size(instr->dest);
+      fs_reg offset_reg = retype(get_nir_src(instr->src[0]),
+                                 BRW_REGISTER_TYPE_UD);
 
-      /* Get the offset to read from */
-      fs_reg offset_reg;
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-      if (const_offset) {
-         offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
-      } else {
-         offset_reg = vgrf(glsl_type::uint_type);
-         bld.ADD(offset_reg,
-                 retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
-                 brw_imm_ud(instr->const_index[0]));
-      }
+      /* Make dest unsigned because that's what the temporary will be */
+      dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
 
       /* Read the vector */
-      do_untyped_vector_read(bld, dest, surf_index, offset_reg,
-                             instr->num_components);
+      if (nir_intrinsic_align(instr) >= 4) {
+         assert(nir_dest_bit_size(instr->dest) == 32);
+         fs_reg read_result = emit_untyped_read(bld, brw_imm_ud(GEN7_BTI_SLM),
+                                                offset_reg, 1 /* dims */,
+                                                instr->num_components,
+                                                BRW_PREDICATE_NONE);
+         for (unsigned i = 0; i < instr->num_components; i++)
+            bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
+      } else {
+         assert(nir_dest_bit_size(instr->dest) <= 32);
+         assert(nir_dest_num_components(instr->dest) == 1);
+         fs_reg read_result =
+            emit_byte_scattered_read(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg,
+                                     1 /* dims */, 1, bit_size,
+                                     BRW_PREDICATE_NONE);
+         bld.MOV(dest, read_result);
+      }
       break;
    }
 
    case nir_intrinsic_store_shared: {
       assert(devinfo->gen >= 7);
+      assert(stage == MESA_SHADER_COMPUTE);
 
-      /* Block index */
-      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
-
-      /* Value */
+      const unsigned bit_size = nir_src_bit_size(instr->src[0]);
       fs_reg val_reg = get_nir_src(instr->src[0]);
+      fs_reg offset_reg = retype(get_nir_src(instr->src[1]),
+                                 BRW_REGISTER_TYPE_UD);
 
-      /* Writemask */
-      unsigned writemask = instr->const_index[1];
-
-      /* get_nir_src() retypes to integer. Be wary of 64-bit types though
-       * since the untyped writes below operate in units of 32-bits, which
-       * means that we need to write twice as many components each time.
-       * Also, we have to suffle 64-bit data to be in the appropriate layout
-       * expected by our 32-bit write messages.
-       */
-      unsigned type_size = 4;
-      if (nir_src_bit_size(instr->src[0]) == 64) {
-         type_size = 8;
-         val_reg = shuffle_for_32bit_write(bld, val_reg, 0,
-                                           instr->num_components);
-      }
-
-      unsigned type_slots = type_size / 4;
-
-      /* Combine groups of consecutive enabled channels in one write
-       * message. We use ffs to find the first enabled channel and then ffs on
-       * the bit-inverse, down-shifted writemask to determine the length of
-       * the block of enabled bits.
-       */
-      while (writemask) {
-         unsigned first_component = ffs(writemask) - 1;
-         unsigned length = ffs(~(writemask >> first_component)) - 1;
-
-         /* We can't write more than 2 64-bit components at once. Limit the
-          * length of the write to what we can do and let the next iteration
-          * handle the rest
-          */
-         if (type_size > 4)
-            length = MIN2(2, length);
-
-         fs_reg offset_reg;
-         nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-         if (const_offset) {
-            offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
-                                    type_size * first_component);
-         } else {
-            offset_reg = vgrf(glsl_type::uint_type);
-            bld.ADD(offset_reg,
-                    retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
-                    brw_imm_ud(instr->const_index[0] + type_size * first_component));
-         }
+      val_reg.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
 
-         emit_untyped_write(bld, surf_index, offset_reg,
-                            offset(val_reg, bld, first_component * type_slots),
-                            1 /* dims */, length * type_slots,
+      assert(nir_intrinsic_write_mask(instr) ==
+             (1u << instr->num_components) - 1);
+      if (nir_intrinsic_align(instr) >= 4) {
+         assert(nir_src_bit_size(instr->src[0]) == 32);
+         assert(nir_src_num_components(instr->src[0]) <= 4);
+         emit_untyped_write(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg, val_reg,
+                            1 /* dims */, instr->num_components,
                             BRW_PREDICATE_NONE);
-
-         /* Clear the bits in the writemask that we just wrote, then try
-          * again to see if more channels are left.
-          */
-         writemask &= (15 << (first_component + length));
+      } else {
+         assert(nir_src_bit_size(instr->src[0]) <= 32);
+         assert(nir_src_num_components(instr->src[0]) == 1);
+         fs_reg write_src = bld.vgrf(BRW_REGISTER_TYPE_UD);
+         bld.MOV(write_src, val_reg);
+         emit_byte_scattered_write(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg,
+                                   write_src, 1 /* dims */, bit_size,
+                                   BRW_PREDICATE_NONE);
       }
-
       break;
    }
 
@@ -3762,6 +3601,27 @@
    return bld.emit_uniformize(image);
 }
 
+fs_reg
+fs_visitor::get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
+                                         nir_intrinsic_instr *instr)
+{
+   /* SSBO stores are weird in that their index is in src[1] */
+   const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0;
+
+   fs_reg surf_index;
+   if (nir_src_is_const(instr->src[src])) {
+      unsigned index = stage_prog_data->binding_table.ssbo_start +
+                       nir_src_as_uint(instr->src[src]);
+      surf_index = brw_imm_ud(index);
+   } else {
+      surf_index = vgrf(glsl_type::uint_type);
+      bld.ADD(surf_index, get_nir_src(instr->src[src]),
+              brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
+   }
+
+   return surf_index;
+}
+
 static unsigned
 image_intrinsic_coord_components(nir_intrinsic_instr *instr)
 {
@@ -3889,18 +3749,20 @@
                             BRW_REGISTER_TYPE_UD);
       image = bld.emit_uniformize(image);
 
+      fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
+      srcs[TEX_LOGICAL_SRC_SURFACE] = image;
+      srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0);
+      srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0);
+      srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
+
       /* Since the image size is always uniform, we can just emit a SIMD8
        * query instruction and splat the result out.
        */
       const fs_builder ubld = bld.exec_all().group(8, 0);
 
-      /* The LOD also serves as the message payload */
-      fs_reg lod = ubld.vgrf(BRW_REGISTER_TYPE_UD);
-      ubld.MOV(lod, brw_imm_ud(0));
-
       fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4);
-      fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE, tmp, lod, image);
-      inst->mlen = 1;
+      fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
+                                tmp, srcs, ARRAY_SIZE(srcs));
       inst->size_written = 4 * REG_SIZE;
 
       for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
@@ -3981,13 +3843,13 @@
 
       fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type);
 
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-      if (const_offset) {
-         assert(const_offset->u32[0] % type_sz(dest.type) == 0);
+      if (nir_src_is_const(instr->src[0])) {
+         unsigned load_offset = nir_src_as_uint(instr->src[0]);
+         assert(load_offset % type_sz(dest.type) == 0);
          /* For 16-bit types we add the module of the const_index[0]
           * offset to access to not 32-bit aligned element
           */
-         src.offset = const_offset->u32[0] + instr->const_index[0] % 4;
+         src.offset = load_offset + instr->const_index[0] % 4;
 
          for (unsigned j = 0; j < instr->num_components; j++) {
             bld.MOV(offset(dest, bld, j), offset(src, bld, j));
@@ -4037,14 +3899,11 @@
    }
 
    case nir_intrinsic_load_ubo: {
-      nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
       fs_reg surf_index;
-
-      if (const_index) {
+      if (nir_src_is_const(instr->src[0])) {
          const unsigned index = stage_prog_data->binding_table.ubo_start +
-                                const_index->u32[0];
+                                nir_src_as_uint(instr->src[0]);
          surf_index = brw_imm_ud(index);
-         brw_mark_surface_used(prog_data, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
           * per-channel and add the base UBO index; we have to select a value
@@ -4054,17 +3913,9 @@
          bld.ADD(surf_index, get_nir_src(instr->src[0]),
                  brw_imm_ud(stage_prog_data->binding_table.ubo_start));
          surf_index = bld.emit_uniformize(surf_index);
-
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(prog_data,
-                               stage_prog_data->binding_table.ubo_start +
-                               nir->info.num_ubos - 1);
       }
 
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      if (const_offset == NULL) {
+      if (!nir_src_is_const(instr->src[1])) {
          fs_reg base_offset = retype(get_nir_src(instr->src[1]),
                                      BRW_REGISTER_TYPE_UD);
 
@@ -4081,11 +3932,12 @@
           * and we have to split it if necessary.
           */
          const unsigned type_size = type_sz(dest.type);
+         const unsigned load_offset = nir_src_as_uint(instr->src[1]);
 
          /* See if we've selected this as a push constant candidate */
-         if (const_index) {
-            const unsigned ubo_block = const_index->u32[0];
-            const unsigned offset_256b = const_offset->u32[0] / 32;
+         if (nir_src_is_const(instr->src[0])) {
+            const unsigned ubo_block = nir_src_as_uint(instr->src[0]);
+            const unsigned offset_256b = load_offset / 32;
 
             fs_reg push_reg;
             for (int i = 0; i < 4; i++) {
@@ -4095,7 +3947,7 @@
                    offset_256b < range->start + range->length) {
 
                   push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
-                  push_reg.offset = const_offset->u32[0] - 32 * range->start;
+                  push_reg.offset = load_offset - 32 * range->start;
                   break;
                }
             }
@@ -4114,7 +3966,7 @@
          const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
 
          for (unsigned c = 0; c < instr->num_components;) {
-            const unsigned base = const_offset->u32[0] + c * type_size;
+            const unsigned base = load_offset + c * type_size;
             /* Number of usable components in the next block-aligned load. */
             const unsigned count = MIN2(instr->num_components - c,
                                         (block_sz - base % block_sz) / type_size);
@@ -4139,40 +3991,32 @@
    case nir_intrinsic_load_ssbo: {
       assert(devinfo->gen >= 7);
 
-      nir_const_value *const_uniform_block =
-         nir_src_as_const_value(instr->src[0]);
+      const unsigned bit_size = nir_dest_bit_size(instr->dest);
+      fs_reg surf_index = get_nir_ssbo_intrinsic_index(bld, instr);
+      fs_reg offset_reg = retype(get_nir_src(instr->src[1]),
+                                 BRW_REGISTER_TYPE_UD);
 
-      fs_reg surf_index;
-      if (const_uniform_block) {
-         unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u32[0];
-         surf_index = brw_imm_ud(index);
-         brw_mark_surface_used(prog_data, index);
-      } else {
-         surf_index = vgrf(glsl_type::uint_type);
-         bld.ADD(surf_index, get_nir_src(instr->src[0]),
-                 brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
+      /* Make dest unsigned because that's what the temporary will be */
+      dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
 
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(prog_data,
-                               stage_prog_data->binding_table.ssbo_start +
-                               nir->info.num_ssbos - 1);
-      }
-
-      fs_reg offset_reg;
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u32[0]);
+      /* Read the vector */
+      if (nir_intrinsic_align(instr) >= 4) {
+         assert(nir_dest_bit_size(instr->dest) == 32);
+         fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                                1 /* dims */,
+                                                instr->num_components,
+                                                BRW_PREDICATE_NONE);
+         for (unsigned i = 0; i < instr->num_components; i++)
+            bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
       } else {
-         offset_reg = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD);
+         assert(nir_dest_bit_size(instr->dest) <= 32);
+         assert(nir_dest_num_components(instr->dest) == 1);
+         fs_reg read_result =
+            emit_byte_scattered_read(bld, surf_index, offset_reg,
+                                     1 /* dims */, 1, bit_size,
+                                     BRW_PREDICATE_NONE);
+         bld.MOV(dest, read_result);
       }
-
-      /* Read the vector */
-      do_untyped_vector_read(bld, dest, surf_index, offset_reg,
-                             instr->num_components);
-
       break;
    }
 
@@ -4182,143 +4026,30 @@
       if (stage == MESA_SHADER_FRAGMENT)
          brw_wm_prog_data(prog_data)->has_side_effects = true;
 
-      /* Block index */
-      fs_reg surf_index;
-      nir_const_value *const_uniform_block =
-         nir_src_as_const_value(instr->src[1]);
-      if (const_uniform_block) {
-         unsigned index = stage_prog_data->binding_table.ssbo_start +
-                          const_uniform_block->u32[0];
-         surf_index = brw_imm_ud(index);
-         brw_mark_surface_used(prog_data, index);
-      } else {
-         surf_index = vgrf(glsl_type::uint_type);
-         bld.ADD(surf_index, get_nir_src(instr->src[1]),
-                  brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
-
-         brw_mark_surface_used(prog_data,
-                               stage_prog_data->binding_table.ssbo_start +
-                               nir->info.num_ssbos - 1);
-      }
-
-      /* Value */
+      const unsigned bit_size = nir_src_bit_size(instr->src[0]);
       fs_reg val_reg = get_nir_src(instr->src[0]);
+      fs_reg surf_index = get_nir_ssbo_intrinsic_index(bld, instr);
+      fs_reg offset_reg = retype(get_nir_src(instr->src[2]),
+                                 BRW_REGISTER_TYPE_UD);
 
-      /* Writemask */
-      unsigned writemask = instr->const_index[0];
-
-      /* get_nir_src() retypes to integer. Be wary of 64-bit types though
-       * since the untyped writes below operate in units of 32-bits, which
-       * means that we need to write twice as many components each time.
-       * Also, we have to suffle 64-bit data to be in the appropriate layout
-       * expected by our 32-bit write messages.
-       */
-      unsigned bit_size = nir_src_bit_size(instr->src[0]);
-      unsigned type_size = bit_size / 8;
-
-      /* Combine groups of consecutive enabled channels in one write
-       * message. We use ffs to find the first enabled channel and then ffs on
-       * the bit-inverse, down-shifted writemask to determine the num_components
-       * of the block of enabled bits.
-       */
-      while (writemask) {
-         unsigned first_component = ffs(writemask) - 1;
-         unsigned num_components = ffs(~(writemask >> first_component)) - 1;
-         fs_reg write_src = offset(val_reg, bld, first_component);
-
-         nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
-
-         if (type_size > 4) {
-            /* We can't write more than 2 64-bit components at once. Limit
-             * the num_components of the write to what we can do and let the next
-             * iteration handle the rest.
-             */
-            num_components = MIN2(2, num_components);
-            write_src = shuffle_for_32bit_write(bld, write_src, 0,
-                                                num_components);
-         } else if (type_size < 4) {
-            /* For 16-bit types we pack two consecutive values into a 32-bit
-             * word and use an untyped write message. For single values or not
-             * 32-bit-aligned we need to use byte-scattered writes because
-             * untyped writes works with 32-bit components with 32-bit
-             * alignment. byte_scattered_write messages only support one
-             * 16-bit component at a time. As VK_KHR_relaxed_block_layout
-             * could be enabled we can not guarantee that not constant offsets
-             * to be 32-bit aligned for 16-bit types. For example an array, of
-             * 16-bit vec3 with array element stride of 6.
-             *
-             * In the case of 32-bit aligned constant offsets if there is
-             * a 3-components vector we submit one untyped-write message
-             * of 32-bit (first two components), and one byte-scattered
-             * write message (the last component).
-             */
-
-            if ( !const_offset || ((const_offset->u32[0] +
-                                   type_size * first_component) % 4)) {
-               /* If we use a .yz writemask we also need to emit 2
-                * byte-scattered write messages because of y-component not
-                * being aligned to 32-bit.
-                */
-               num_components = 1;
-            } else if (num_components * type_size > 4 &&
-                       (num_components * type_size % 4)) {
-               /* If the pending components size is not a multiple of 4 bytes
-                * we left the not aligned components for following emits of
-                * length == 1 with byte_scattered_write.
-                */
-               num_components -= (num_components * type_size % 4) / type_size;
-            } else if (num_components * type_size < 4) {
-               num_components = 1;
-            }
-            /* For num_components == 1 we are also shuffling the component
-             * because byte scattered writes of 16-bit need values to be dword
-             * aligned. Shuffling only one component would be the same as
-             * striding it.
-             */
-            write_src = shuffle_for_32bit_write(bld, write_src, 0,
-                                                num_components);
-         }
-
-         fs_reg offset_reg;
-
-         if (const_offset) {
-            offset_reg = brw_imm_ud(const_offset->u32[0] +
-                                    type_size * first_component);
-         } else {
-            offset_reg = vgrf(glsl_type::uint_type);
-            bld.ADD(offset_reg,
-                    retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
-                    brw_imm_ud(type_size * first_component));
-         }
-
-         if (type_size < 4 && num_components == 1) {
-            /* Untyped Surface messages have a fixed 32-bit size, so we need
-             * to rely on byte scattered in order to write 16-bit elements.
-             * The byte_scattered_write message needs that every written 16-bit
-             * type to be aligned 32-bits (stride=2).
-             */
-            emit_byte_scattered_write(bld, surf_index, offset_reg,
-                                      write_src,
-                                      1 /* dims */,
-                                      bit_size,
-                                      BRW_PREDICATE_NONE);
-         } else {
-            assert(num_components * type_size <= 16);
-            assert((num_components * type_size) % 4 == 0);
-            assert(offset_reg.file != BRW_IMMEDIATE_VALUE ||
-                   offset_reg.ud % 4 == 0);
-            unsigned num_slots = (num_components * type_size) / 4;
-
-            emit_untyped_write(bld, surf_index, offset_reg,
-                               write_src,
-                               1 /* dims */, num_slots,
-                               BRW_PREDICATE_NONE);
-         }
+      val_reg.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
 
-         /* Clear the bits in the writemask that we just wrote, then try
-          * again to see if more channels are left.
-          */
-         writemask &= (15 << (first_component + num_components));
+      assert(nir_intrinsic_write_mask(instr) ==
+             (1u << instr->num_components) - 1);
+      if (nir_intrinsic_align(instr) >= 4) {
+         assert(nir_src_bit_size(instr->src[0]) == 32);
+         assert(nir_src_num_components(instr->src[0]) <= 4);
+         emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                            1 /* dims */, instr->num_components,
+                            BRW_PREDICATE_NONE);
+      } else {
+         assert(nir_src_bit_size(instr->src[0]) <= 32);
+         assert(nir_src_num_components(instr->src[0]) == 1);
+         fs_reg write_src = bld.vgrf(BRW_REGISTER_TYPE_UD);
+         bld.MOV(write_src, val_reg);
+         emit_byte_scattered_write(bld, surf_index, offset_reg,
+                                   write_src, 1 /* dims */, bit_size,
+                                   BRW_PREDICATE_NONE);
       }
       break;
    }
@@ -4326,9 +4057,7 @@
    case nir_intrinsic_store_output: {
       fs_reg src = get_nir_src(instr->src[0]);
 
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      assert(const_offset && "Indirect output stores not allowed");
-
+      unsigned store_offset = nir_src_as_uint(instr->src[1]);
       unsigned num_components = instr->num_components;
       unsigned first_component = nir_intrinsic_component(instr);
       if (nir_src_bit_size(instr->src[0]) == 64) {
@@ -4337,7 +4066,7 @@
       }
 
       fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld,
-                                      4 * const_offset->u32[0]), src.type);
+                                      4 * store_offset), src.type);
       for (unsigned j = 0; j < num_components; j++) {
          bld.MOV(offset(new_dest, bld, j + first_component),
                  offset(src, bld, j));
@@ -4386,8 +4115,8 @@
       break;
 
    case nir_intrinsic_get_buffer_size: {
-      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
-      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
+      unsigned ssbo_index = nir_src_is_const(instr->src[0]) ?
+                            nir_src_as_uint(instr->src[0]) : 0;
 
       /* A resinfo's sampler message is used to get the buffer size.  The
        * SIMD8's writeback message consists of four registers and SIMD16's
@@ -4440,8 +4169,6 @@
       ubld.ADD(buffer_size, size_aligned4, negate(size_padding));
 
       bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0));
-
-      brw_mark_surface_used(prog_data, index);
       break;
    }
 
@@ -4623,11 +4350,10 @@
 
    case nir_intrinsic_quad_broadcast: {
       const fs_reg value = get_nir_src(instr->src[0]);
-      nir_const_value *index = nir_src_as_const_value(instr->src[1]);
-      assert(nir_src_bit_size(instr->src[1]) == 32);
+      const unsigned index = nir_src_as_uint(instr->src[1]);
 
       bld.emit(SHADER_OPCODE_CLUSTER_BROADCAST, retype(dest, value.type),
-               value, brw_imm_ud(index->u32[0]), brw_imm_ud(4));
+               value, brw_imm_ud(index), brw_imm_ud(4));
       break;
    }
 
@@ -4641,34 +4367,9 @@
       const fs_reg tmp_left = horiz_stride(tmp, 2);
       const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
 
-      /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-       *
-       *    "When source or destination datatype is 64b or operation is
-       *    integer DWord multiply, regioning in Align1 must follow
-       *    these rules:
-       *
-       *    [...]
-       *
-       *    3. Source and Destination offset must be the same, except
-       *       the case of scalar source."
-       *
-       * In order to work around this, we have to emit two 32-bit MOVs instead
-       * of a single 64-bit MOV to do the shuffle.
-       */
-      if (type_sz(value.type) > 4 &&
-          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
-         ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0),
-                  subscript(src_right, BRW_REGISTER_TYPE_D, 0));
-         ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1),
-                  subscript(src_right, BRW_REGISTER_TYPE_D, 1));
-         ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0),
-                  subscript(src_left, BRW_REGISTER_TYPE_D, 0));
-         ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1),
-                  subscript(src_left, BRW_REGISTER_TYPE_D, 1));
-      } else {
-         ubld.MOV(tmp_left, src_right);
-         ubld.MOV(tmp_right, src_left);
-      }
+      ubld.MOV(tmp_left, src_right);
+      ubld.MOV(tmp_right, src_left);
+
       bld.MOV(retype(dest, value.type), tmp);
       break;
    }
@@ -4835,26 +4536,7 @@
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
       dest = get_nir_dest(instr->dest);
 
-   fs_reg surface;
-   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
-   if (const_surface) {
-      unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
-                            const_surface->u32[0];
-      surface = brw_imm_ud(surf_index);
-      brw_mark_surface_used(prog_data, surf_index);
-   } else {
-      surface = vgrf(glsl_type::uint_type);
-      bld.ADD(surface, get_nir_src(instr->src[0]),
-              brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
-
-      /* Assume this may touch any SSBO. This is the same we do for other
-       * UBO/SSBO accesses with non-constant surface.
-       */
-      brw_mark_surface_used(prog_data,
-                            stage_prog_data->binding_table.ssbo_start +
-                            nir->info.num_ssbos - 1);
-   }
-
+   fs_reg surface = get_nir_ssbo_intrinsic_index(bld, instr);
    fs_reg offset = get_nir_src(instr->src[1]);
    fs_reg data1;
    if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -4885,26 +4567,7 @@
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
       dest = get_nir_dest(instr->dest);
 
-   fs_reg surface;
-   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
-   if (const_surface) {
-      unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
-                            const_surface->u32[0];
-      surface = brw_imm_ud(surf_index);
-      brw_mark_surface_used(prog_data, surf_index);
-   } else {
-      surface = vgrf(glsl_type::uint_type);
-      bld.ADD(surface, get_nir_src(instr->src[0]),
-              brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
-
-      /* Assume this may touch any SSBO. This is the same we do for other
-       * UBO/SSBO accesses with non-constant surface.
-       */
-      brw_mark_surface_used(prog_data,
-                            stage_prog_data->binding_table.ssbo_start +
-                            nir->info.num_ssbos - 1);
-   }
-
+   fs_reg surface = get_nir_ssbo_intrinsic_index(bld, instr);
    fs_reg offset = get_nir_src(instr->src[1]);
    fs_reg data1 = get_nir_src(instr->src[2]);
    fs_reg data2;
@@ -4940,9 +4603,9 @@
       data2 = get_nir_src(instr->src[2]);
 
    /* Get the offset */
-   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-   if (const_offset) {
-      offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+   if (nir_src_is_const(instr->src[0])) {
+      offset = brw_imm_ud(instr->const_index[0] +
+                          nir_src_as_uint(instr->src[0]));
    } else {
       offset = vgrf(glsl_type::uint_type);
       bld.ADD(offset,
@@ -4977,9 +4640,9 @@
       data2 = get_nir_src(instr->src[2]);
 
    /* Get the offset */
-   nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-   if (const_offset) {
-      offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+   if (nir_src_is_const(instr->src[0])) {
+      offset = brw_imm_ud(instr->const_index[0] +
+                          nir_src_as_uint(instr->src[0]));
    } else {
       offset = vgrf(glsl_type::uint_type);
       bld.ADD(offset,
@@ -5062,6 +4725,10 @@
             break;
          }
          break;
+      case nir_tex_src_min_lod:
+         srcs[TEX_LOGICAL_SRC_MIN_LOD] =
+            retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F);
+         break;
       case nir_tex_src_ms_index:
          srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_REGISTER_TYPE_UD);
          break;
@@ -5069,6 +4736,7 @@
       case nir_tex_src_offset: {
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
+         assert(nir_src_bit_size(instr->src[i].src) == 32);
          unsigned offset_bits = 0;
          if (const_offset &&
              brw_texture_offset(const_offset->i32,
@@ -5086,15 +4754,6 @@
          unreachable("should be lowered");
 
       case nir_tex_src_texture_offset: {
-         /* Figure out the highest possible texture index and mark it as used */
-         uint32_t max_used = texture + instr->texture_array_size - 1;
-         if (instr->op == nir_texop_tg4 && devinfo->gen < 8) {
-            max_used += stage_prog_data->binding_table.gather_texture_start;
-         } else {
-            max_used += stage_prog_data->binding_table.texture_start;
-         }
-         brw_mark_surface_used(prog_data, max_used);
-
          /* Emit code to evaluate the actual indexing expression */
          fs_reg tmp = vgrf(glsl_type::uint_type);
          bld.ADD(tmp, src, brw_imm_ud(texture));
@@ -5116,9 +4775,7 @@
          break;
 
       case nir_tex_src_plane: {
-         nir_const_value *const_plane =
-            nir_src_as_const_value(instr->src[i].src);
-         const uint32_t plane = const_plane->u32[0];
+         const uint32_t plane = nir_src_as_uint(instr->src[i].src);
          const uint32_t texture_index =
             instr->texture_index +
             stage_prog_data->binding_table.plane_start[plane] -
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_reg_allocate.cpp mesa-19.0.1/src/intel/compiler/brw_fs_reg_allocate.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_reg_allocate.cpp	2018-07-14 15:13:03.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_reg_allocate.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -617,7 +617,9 @@
           * highest register that works.
           */
          if (inst->eot) {
-            int size = alloc.sizes[inst->src[0].nr];
+            const int vgrf = inst->opcode == SHADER_OPCODE_SEND ?
+                             inst->src[2].nr : inst->src[0].nr;
+            int size = alloc.sizes[vgrf];
             int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
 
             /* If something happened to spill, we want to push the EOT send
@@ -626,32 +628,30 @@
              */
             reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
 
-            ra_set_node_reg(g, inst->src[0].nr, reg);
+            ra_set_node_reg(g, vgrf, reg);
             break;
          }
       }
    }
 
-   if (dispatch_width > 8) {
-      /* In 16-wide dispatch we have an issue where a compressed
-       * instruction is actually two instructions executed simultaneiously.
-       * It's actually ok to have the source and destination registers be
-       * the same.  In this case, each instruction over-writes its own
-       * source and there's no problem.  The real problem here is if the
-       * source and destination registers are off by one.  Then you can end
-       * up in a scenario where the first instruction over-writes the
-       * source of the second instruction.  Since the compiler doesn't know
-       * about this level of granularity, we simply make the source and
-       * destination interfere.
-       */
-      foreach_block_and_inst(block, fs_inst, inst, cfg) {
-         if (inst->dst.file != VGRF)
-            continue;
+   /* In 16-wide instructions we have an issue where a compressed
+    * instruction is actually two instructions executed simultaneously.
+    * It's actually ok to have the source and destination registers be
+    * the same.  In this case, each instruction over-writes its own
+    * source and there's no problem.  The real problem here is if the
+    * source and destination registers are off by one.  Then you can end
+    * up in a scenario where the first instruction over-writes the
+    * source of the second instruction.  Since the compiler doesn't know
+    * about this level of granularity, we simply make the source and
+    * destination interfere.
+    */
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      if (inst->exec_size < 16 || inst->dst.file != VGRF)
+         continue;
 
-         for (int i = 0; i < inst->sources; ++i) {
-            if (inst->src[i].file == VGRF) {
-               ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
-            }
+      for (int i = 0; i < inst->sources; ++i) {
+         if (inst->src[i].file == VGRF) {
+            ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
          }
       }
    }
@@ -667,15 +667,14 @@
        * messages adding a node interference to the grf127_send_hack_node.
        * This node has a fixed asignment to grf127.
        *
-       * We don't apply it to SIMD16 because previous code avoids any register
-       * overlap between sources and destination.
+       * We don't apply it to SIMD16 instructions because previous code avoids
+       * any register overlap between sources and destination.
        */
       ra_set_node_reg(g, grf127_send_hack_node, 127);
-      if (dispatch_width == 8) {
-         foreach_block_and_inst(block, fs_inst, inst, cfg) {
-            if (inst->is_send_from_grf() && inst->dst.file == VGRF)
-               ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
-         }
+      foreach_block_and_inst(block, fs_inst, inst, cfg) {
+         if (inst->exec_size < 16 && inst->is_send_from_grf() &&
+             inst->dst.file == VGRF)
+            ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
       }
 
       if (spilled_any_registers) {
@@ -695,6 +694,33 @@
       }
    }
 
+   /* From the Skylake PRM Vol. 2a docs for sends:
+    *
+    *    "It is required that the second block of GRFs does not overlap with
+    *    the first block."
+    *
+    * Normally, this is taken care of by fixup_sends_duplicate_payload() but
+    * in the case where one of the registers is an undefined value, the
+    * register allocator may decide that they don't interfere even though
+    * they're used as sources in the same instruction.  We also need to add
+    * interference here.
+    */
+   if (devinfo->gen >= 9) {
+      foreach_block_and_inst(block, fs_inst, inst, cfg) {
+         if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+             inst->src[2].file == VGRF &&
+             inst->src[3].file == VGRF &&
+             inst->src[2].nr != inst->src[3].nr) {
+            for (unsigned i = 0; i < inst->mlen; i++) {
+               for (unsigned j = 0; j < inst->ex_mlen; j++) {
+                  ra_add_node_interference(g, inst->src[2].nr + i,
+                                           inst->src[3].nr + j);
+               }
+            }
+         }
+      }
+   }
+
    /* Debug of register spilling: Go spill everything. */
    if (unlikely(spill_all)) {
       int reg = choose_spill_reg(g);
@@ -914,7 +940,7 @@
 }
 
 void
-fs_visitor::spill_reg(int spill_reg)
+fs_visitor::spill_reg(unsigned spill_reg)
 {
    int size = alloc.sizes[spill_reg];
    unsigned int spill_offset = last_scratch;
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_register_coalesce.cpp mesa-19.0.1/src/intel/compiler/brw_fs_register_coalesce.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_register_coalesce.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_register_coalesce.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -158,7 +158,7 @@
 
    int src_size = 0;
    int channels_remaining = 0;
-   int src_reg = -1, dst_reg = -1;
+   unsigned src_reg = ~0u, dst_reg = ~0u;
    int dst_reg_offset[MAX_VGRF_SIZE];
    fs_inst *mov[MAX_VGRF_SIZE];
    int dst_var[MAX_VGRF_SIZE];
@@ -221,7 +221,7 @@
          if (dst_reg_offset[i] != dst_reg_offset[0] + i) {
             /* Registers are out-of-order. */
             can_coalesce = false;
-            src_reg = -1;
+            src_reg = ~0u;
             break;
          }
 
@@ -231,7 +231,7 @@
          if (!can_coalesce_vars(live_intervals, cfg, inst,
                                 dst_var[i], src_var[i])) {
             can_coalesce = false;
-            src_reg = -1;
+            src_reg = ~0u;
             break;
          }
       }
@@ -278,7 +278,7 @@
             MAX2(live_intervals->end[dst_var[i]],
                  live_intervals->end[src_var[i]]);
       }
-      src_reg = -1;
+      src_reg = ~0u;
    }
 
    if (progress) {
diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_sel_peephole.cpp mesa-19.0.1/src/intel/compiler/brw_fs_sel_peephole.cpp
--- mesa-18.3.3/src/intel/compiler/brw_fs_sel_peephole.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_fs_sel_peephole.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -198,8 +198,7 @@
              */
             fs_reg src0(then_mov[i]->src[0]);
             if (src0.file == IMM) {
-               src0 = vgrf(glsl_type::float_type);
-               src0.type = then_mov[i]->src[0].type;
+               src0 = ibld.vgrf(then_mov[i]->src[0].type);
                ibld.MOV(src0, then_mov[i]->src[0]);
             }
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_inst.h mesa-19.0.1/src/intel/compiler/brw_inst.h
--- mesa-18.3.3/src/intel/compiler/brw_inst.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_inst.h	2019-03-31 23:16:37.000000000 +0000
@@ -455,6 +455,19 @@
 FC(gen4_pop_count,  115, 112, devinfo->gen < 6)
 /** @} */
 
+/**
+ * SEND instructions:
+ *  @{
+ */
+FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9)
+FC(send_src0_address_mode,    79, 79, devinfo->gen >= 9)
+FC(send_sel_reg32_desc,       77, 77, devinfo->gen >= 9)
+FC(send_sel_reg32_ex_desc,    61, 61, devinfo->gen >= 9)
+FC(send_src1_reg_nr,          51, 44, devinfo->gen >= 9)
+FC(send_src1_reg_file,        36, 36, devinfo->gen >= 9)
+FC(send_dst_reg_file,         35, 35, devinfo->gen >= 9)
+/** @} */
+
 /* Message descriptor bits */
 #define MD(x) ((x) + 96)
 
@@ -513,11 +526,21 @@
                           brw_inst *inst, uint32_t value)
 {
    assert(devinfo->gen >= 9);
-   brw_inst_set_bits(inst, 94, 91, (value >> 28) & ((1u << 4) - 1));
-   brw_inst_set_bits(inst, 88, 85, (value >> 24) & ((1u << 4) - 1));
-   brw_inst_set_bits(inst, 83, 80, (value >> 20) & ((1u << 4) - 1));
-   brw_inst_set_bits(inst, 67, 64, (value >> 16) & ((1u << 4) - 1));
-   assert((value & ((1u << 16) - 1)) == 0);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+      brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
+      brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
+      brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
+      brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
+      assert(GET_BITS(value, 15, 0) == 0);
+   } else {
+      assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+      brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16));
+      assert(GET_BITS(value, 15, 10) == 0);
+      brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6));
+      assert(GET_BITS(value, 5, 0) == 0);
+   }
 }
 
 /**
@@ -530,10 +553,18 @@
                       const brw_inst *inst)
 {
    assert(devinfo->gen >= 9);
-   return (brw_inst_bits(inst, 94, 91) << 28 |
-           brw_inst_bits(inst, 88, 85) << 24 |
-           brw_inst_bits(inst, 83, 80) << 20 |
-           brw_inst_bits(inst, 67, 64) << 16);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+      return (brw_inst_bits(inst, 94, 91) << 28 |
+              brw_inst_bits(inst, 88, 85) << 24 |
+              brw_inst_bits(inst, 83, 80) << 20 |
+              brw_inst_bits(inst, 67, 64) << 16);
+   } else {
+      assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+      return (brw_inst_bits(inst, 95, 80) << 16 |
+              brw_inst_bits(inst, 67, 64) << 6);
+   }
 }
 
 /**
@@ -933,10 +964,11 @@
 {                                                                         \
    assert((value & ~0x3ff) == 0);                                         \
    if (devinfo->gen >= 8) {                                               \
-      brw_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff);            \
-      brw_inst_set_bits(inst, g8_nine, g8_nine, value >> 9);              \
+      assert(GET_BITS(value, 3, 0) == 0);                                 \
+      brw_inst_set_bits(inst, g8_high, g8_low, GET_BITS(value, 8, 4));    \
+      brw_inst_set_bits(inst, g8_nine, g8_nine, GET_BITS(value, 9, 9));   \
    } else {                                                               \
-      brw_inst_set_bits(inst, g4_high, g4_low, value >> 9);               \
+      brw_inst_set_bits(inst, g4_high, g4_low, value);                    \
    }                                                                      \
 }                                                                         \
 static inline unsigned                                                    \
@@ -944,7 +976,7 @@
                                const brw_inst *inst)                      \
 {                                                                         \
    if (devinfo->gen >= 8) {                                               \
-      return brw_inst_bits(inst, g8_high, g8_low) |                       \
+      return (brw_inst_bits(inst, g8_high, g8_low) << 4) |                \
              (brw_inst_bits(inst, g8_nine, g8_nine) << 9);                \
    } else {                                                               \
       return brw_inst_bits(inst, g4_high, g4_low);                        \
@@ -955,9 +987,11 @@
  * Compared to Align1, these are missing the low 4 bits.
  *                     -Gen 4-  ----Gen8----
  */
-BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
-BRW_IA16_ADDR_IMM(src0,  73, 64,  95,  72,  68)
-BRW_IA16_ADDR_IMM(dst,   57, 52,  47,  56,  52)
+BRW_IA16_ADDR_IMM(src1,       105, 96, 121, 104, 100)
+BRW_IA16_ADDR_IMM(src0,        73, 64,  95,  72,  68)
+BRW_IA16_ADDR_IMM(dst,         57, 52,  47,  56,  52)
+BRW_IA16_ADDR_IMM(send_src0,   -1, -1,  78,  72,  68)
+BRW_IA16_ADDR_IMM(send_dst,    -1, -1,  62,  56,  52)
 
 /**
  * Fetch a set of contiguous bits from the instruction.
diff -Nru mesa-18.3.3/src/intel/compiler/brw_ir_allocator.h mesa-19.0.1/src/intel/compiler/brw_ir_allocator.h
--- mesa-18.3.3/src/intel/compiler/brw_ir_allocator.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_ir_allocator.h	2019-03-31 23:16:37.000000000 +0000
@@ -47,6 +47,7 @@
       unsigned
       allocate(unsigned size)
       {
+         assert(size > 0);
          if (capacity <= count) {
             capacity = MAX2(16, capacity * 2);
             sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned));
diff -Nru mesa-18.3.3/src/intel/compiler/brw_ir_fs.h mesa-19.0.1/src/intel/compiler/brw_ir_fs.h
--- mesa-18.3.3/src/intel/compiler/brw_ir_fs.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_ir_fs.h	2019-03-31 23:16:37.000000000 +0000
@@ -347,18 +347,24 @@
 
    void resize_sources(uint8_t num_sources);
 
-   bool equals(fs_inst *inst) const;
    bool is_send_from_grf() const;
    bool is_partial_write() const;
    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
    unsigned components_read(unsigned i) const;
    unsigned size_read(int arg) const;
-   bool can_do_source_mods(const struct gen_device_info *devinfo);
+   bool can_do_source_mods(const struct gen_device_info *devinfo) const;
    bool can_do_cmod();
    bool can_change_types() const;
    bool has_source_and_destination_hazard() const;
 
    /**
+    * Return whether \p arg is a control source of a virtual instruction which
+    * shouldn't contribute to the execution type and usual regioning
+    * restriction calculations of arithmetic instructions.
+    */
+   bool is_control_source(unsigned arg) const;
+
+   /**
     * Return the subset of flag registers read by the instruction as a bitset
     * with byte granularity.
     */
@@ -462,7 +468,8 @@
    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
 
    for (int i = 0; i < inst->sources; i++) {
-      if (inst->src[i].file != BAD_FILE) {
+      if (inst->src[i].file != BAD_FILE &&
+          !inst->is_control_source(i)) {
          const brw_reg_type t = get_exec_type(inst->src[i].type);
          if (type_sz(t) > type_sz(exec_type))
             exec_type = t;
@@ -477,6 +484,27 @@
 
    assert(exec_type != BRW_REGISTER_TYPE_B);
 
+   /* Promotion of the execution type to 32-bit for conversions from or to
+    * half-float seems to be consistent with the following text from the
+    * Cherryview PRM Vol. 7, "Execution Data Type":
+    *
+    * "When single precision and half precision floats are mixed between
+    *  source operands or between source and destination operand [..] single
+    *  precision float is the execution datatype."
+    *
+    * and from "Register Region Restrictions":
+    *
+    * "Conversion between Integer and HF (Half Float) must be DWord aligned
+    *  and strided by a DWord on the destination."
+    */
+   if (type_sz(exec_type) == 2 &&
+       inst->dst.type != exec_type) {
+      if (exec_type == BRW_REGISTER_TYPE_HF)
+         exec_type = BRW_REGISTER_TYPE_F;
+      else if (inst->dst.type == BRW_REGISTER_TYPE_HF)
+         exec_type = BRW_REGISTER_TYPE_D;
+   }
+
    return exec_type;
 }
 
@@ -487,6 +515,16 @@
 }
 
 /**
+ * Return whether the instruction isn't an ALU instruction and cannot be
+ * assumed to complete in-order.
+ */
+static inline bool
+is_unordered(const fs_inst *inst)
+{
+   return inst->mlen || inst->is_send_from_grf() || inst->is_math();
+}
+
+/**
  * Return whether the following regioning restriction applies to the specified
  * instruction.  From the Cherryview PRM Vol 7. "Register Region
  * Restrictions":
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_analyze_boolean_resolves.c mesa-19.0.1/src/intel/compiler/brw_nir_analyze_boolean_resolves.c
--- mesa-18.3.3/src/intel/compiler/brw_nir_analyze_boolean_resolves.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir_analyze_boolean_resolves.c	2019-03-31 23:16:37.000000000 +0000
@@ -109,18 +109,18 @@
          uint8_t resolve_status;
          nir_alu_instr *alu = nir_instr_as_alu(instr);
          switch (alu->op) {
-         case nir_op_ball_fequal2:
-         case nir_op_ball_iequal2:
-         case nir_op_ball_fequal3:
-         case nir_op_ball_iequal3:
-         case nir_op_ball_fequal4:
-         case nir_op_ball_iequal4:
-         case nir_op_bany_fnequal2:
-         case nir_op_bany_inequal2:
-         case nir_op_bany_fnequal3:
-         case nir_op_bany_inequal3:
-         case nir_op_bany_fnequal4:
-         case nir_op_bany_inequal4:
+         case nir_op_b32all_fequal2:
+         case nir_op_b32all_iequal2:
+         case nir_op_b32all_fequal3:
+         case nir_op_b32all_iequal3:
+         case nir_op_b32all_fequal4:
+         case nir_op_b32all_iequal4:
+         case nir_op_b32any_fnequal2:
+         case nir_op_b32any_inequal2:
+         case nir_op_b32any_fnequal3:
+         case nir_op_b32any_inequal3:
+         case nir_op_b32any_fnequal4:
+         case nir_op_b32any_inequal4:
             /* These are only implemented by the vec4 backend and its
              * implementation emits resolved booleans.  At some point in the
              * future, this may change and we'll have to remove some of the
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c mesa-19.0.1/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
--- mesa-18.3.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir_analyze_ubo_ranges.c	2019-03-31 23:16:37.000000000 +0000
@@ -147,12 +147,11 @@
          continue; /* Not a uniform or UBO intrinsic */
       }
 
-      nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]);
-      nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]);
-
-      if (block_const && offset_const) {
-         const int block = block_const->u32[0];
-         const int offset = offset_const->u32[0] / 32;
+      if (nir_src_is_const(intrin->src[0]) &&
+          nir_src_is_const(intrin->src[1])) {
+         const int block = nir_src_as_uint(intrin->src[0]);
+         const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
+         const int offset = byte_offset / 32;
 
          /* Avoid shifting by larger than the width of our bitfield, as this
           * is undefined in C.  Even if we require multiple bits to represent
@@ -166,8 +165,8 @@
          /* The value might span multiple 32-byte chunks. */
          const int bytes = nir_intrinsic_dest_components(intrin) *
                            (nir_dest_bit_size(intrin->dest) / 8);
-         const int start = ROUND_DOWN_TO(offset_const->u32[0], 32);
-         const int end = ALIGN(offset_const->u32[0] + bytes, 32);
+         const int start = ROUND_DOWN_TO(byte_offset, 32);
+         const int end = ALIGN(byte_offset + bytes, 32);
          const int chunks = (end - start) / 32;
 
          /* TODO: should we count uses in loops as higher benefit? */
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir.c mesa-19.0.1/src/intel/compiler/brw_nir.c
--- mesa-18.3.3/src/intel/compiler/brw_nir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -527,7 +527,7 @@
    if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
       indirect_mask |= nir_var_shader_out;
    if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
-      indirect_mask |= nir_var_local;
+      indirect_mask |= nir_var_function_temp;
 
    return indirect_mask;
 }
@@ -542,8 +542,9 @@
    bool progress;
    do {
       progress = false;
-      OPT(nir_split_array_vars, nir_var_local);
-      OPT(nir_shrink_vec_array_vars, nir_var_local);
+      OPT(nir_split_array_vars, nir_var_function_temp);
+      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
+      OPT(nir_opt_deref);
       OPT(nir_lower_vars_to_ssa);
       if (allow_copies) {
          /* Only run this pass in the first call to brw_nir_optimize.  Later
@@ -568,8 +569,20 @@
       OPT(nir_copy_prop);
       OPT(nir_opt_dce);
       OPT(nir_opt_cse);
-      OPT(nir_opt_peephole_select, 0);
+
+      /* For indirect loads of uniforms (push constants), we assume that array
+       * indices will nearly always be in bounds and the cost of the load is
+       * low.  Therefore there shouldn't be a performance benefit to avoid it.
+       * However, in vec4 tessellation shaders, these loads operate by
+       * actually pulling from memory.
+       */
+      const bool is_vec4_tessellation = !is_scalar &&
+         (nir->info.stage == MESA_SHADER_TESS_CTRL ||
+          nir->info.stage == MESA_SHADER_TESS_EVAL);
+      OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation);
+
       OPT(nir_opt_intrinsics);
+      OPT(nir_opt_idiv_const, 32);
       OPT(nir_opt_algebraic);
       OPT(nir_opt_constant_folding);
       OPT(nir_opt_dead_cf);
@@ -587,22 +600,13 @@
       }
       OPT(nir_opt_remove_phis);
       OPT(nir_opt_undef);
-      OPT(nir_lower_doubles, nir_lower_drcp |
-                             nir_lower_dsqrt |
-                             nir_lower_drsq |
-                             nir_lower_dtrunc |
-                             nir_lower_dfloor |
-                             nir_lower_dceil |
-                             nir_lower_dfract |
-                             nir_lower_dround_even |
-                             nir_lower_dmod);
       OPT(nir_lower_pack);
    } while (progress);
 
    /* Workaround Gfxbench unused local sampler variable which will trigger an
     * assert in the opt_large_constants pass.
     */
-   OPT(nir_remove_dead_variables, nir_var_local);
+   OPT(nir_remove_dead_variables, nir_var_function_temp);
 
    return nir;
 }
@@ -643,6 +647,76 @@
 
    const bool is_scalar = compiler->scalar_stage[nir->info.stage];
 
+   if (is_scalar) {
+      OPT(nir_lower_alu_to_scalar);
+   }
+
+   /* Run opt_algebraic before int64 lowering so we can hopefully get rid
+    * of some int64 instructions.
+    */
+   OPT(nir_opt_algebraic);
+
+   /* Lower 64-bit operations before nir_optimize so that loop unrolling sees
+    * their actual cost.
+    */
+   nir_lower_int64_options int64_options =
+      nir_lower_imul64 |
+      nir_lower_isign64 |
+      nir_lower_divmod64 |
+      nir_lower_imul_high64;
+   nir_lower_doubles_options fp64_options =
+      nir_lower_drcp |
+      nir_lower_dsqrt |
+      nir_lower_drsq |
+      nir_lower_dtrunc |
+      nir_lower_dfloor |
+      nir_lower_dceil |
+      nir_lower_dfract |
+      nir_lower_dround_even |
+      nir_lower_dmod;
+
+   if (!devinfo->has_64bit_types) {
+      int64_options |= nir_lower_mov64 |
+                       nir_lower_icmp64 |
+                       nir_lower_iadd64 |
+                       nir_lower_iabs64 |
+                       nir_lower_ineg64 |
+                       nir_lower_logic64 |
+                       nir_lower_minmax64 |
+                       nir_lower_shift64;
+      fp64_options |= nir_lower_fp64_full_software;
+   }
+
+   bool lowered_64bit_ops = false;
+   do {
+      progress = false;
+
+      OPT(nir_lower_int64, int64_options);
+      OPT(nir_lower_doubles, fp64_options);
+
+      /* Necessary to lower add -> sub and div -> mul/rcp */
+      OPT(nir_opt_algebraic);
+
+      lowered_64bit_ops |= progress;
+   } while (progress);
+
+   if (lowered_64bit_ops) {
+      OPT(nir_lower_constant_initializers, nir_var_function_temp);
+      OPT(nir_lower_returns);
+      OPT(nir_inline_functions);
+      OPT(nir_opt_deref);
+   }
+
+   const nir_function *entry_point = nir_shader_get_entrypoint(nir)->function;
+   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+      if (func != entry_point) {
+         exec_node_remove(&func->node);
+      }
+   }
+   assert(exec_list_length(&nir->functions) == 1);
+
+   OPT(nir_lower_constant_initializers, ~nir_var_function_temp);
+
    if (nir->info.stage == MESA_SHADER_GEOMETRY)
       OPT(nir_lower_gs_intrinsics);
 
@@ -656,6 +730,9 @@
       .lower_txf_offset = true,
       .lower_rect_offset = true,
       .lower_txd_cube_map = true,
+      .lower_txb_shadow_clamp = true,
+      .lower_txd_shadow_clamp = true,
+      .lower_txd_offset_clamp = true,
    };
 
    OPT(nir_lower_tex, &tex_options);
@@ -664,19 +741,7 @@
    OPT(nir_lower_global_vars_to_local);
 
    OPT(nir_split_var_copies);
-   OPT(nir_split_struct_vars, nir_var_local);
-
-   /* Run opt_algebraic before int64 lowering so we can hopefully get rid
-    * of some int64 instructions.
-    */
-   OPT(nir_opt_algebraic);
-
-   /* Lower int64 instructions before nir_optimize so that loop unrolling
-    * sees their actual cost.
-    */
-   OPT(nir_lower_int64, nir_lower_imul64 |
-                        nir_lower_isign64 |
-                        nir_lower_divmod64);
+   OPT(nir_split_struct_vars, nir_var_function_temp);
 
    nir = brw_nir_optimize(nir, compiler, is_scalar, true);
 
@@ -714,6 +779,19 @@
       brw_nir_no_indirect_mask(compiler, nir->info.stage);
    OPT(nir_lower_indirect_derefs, indirect_mask);
 
+   OPT(brw_nir_lower_mem_access_bit_sizes);
+
+   /* Lower array derefs of vectors for SSBO and UBO loads.  For both UBOs and
+    * SSBOs, our back-end is capable of loading an entire vec4 at a time and
+    * we would like to take advantage of that whenever possible regardless of
+    * whether or not the app gives us full loads.  This should allow the
+    * optimizer to combine UBO and SSBO load operations and save us some send
+    * messages.
+    */
+   OPT(nir_lower_array_deref_of_vec,
+       nir_var_mem_ubo | nir_var_mem_ssbo,
+       nir_lower_direct_array_deref_of_vec_load);
+
    /* Get rid of split copies */
    nir = brw_nir_optimize(nir, compiler, is_scalar, false);
 
@@ -740,6 +818,9 @@
       *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
    }
 
+   if (nir_link_opt_varyings(*producer, *consumer))
+      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
+
    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 
@@ -759,6 +840,23 @@
       *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false);
       *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
    }
+
+   NIR_PASS_V(*producer, nir_lower_io_to_vector, nir_var_shader_out);
+   NIR_PASS_V(*consumer, nir_lower_io_to_vector, nir_var_shader_in);
+
+   if ((*producer)->info.stage != MESA_SHADER_TESS_CTRL) {
+      /* Calling lower_io_to_vector creates output variable writes with
+       * write-masks.  On non-TCS outputs, the back-end can't handle it and we
+       * need to call nir_lower_io_to_temporaries to get rid of them.  This,
+       * in turn, creates temporary variables and extra copy_deref intrinsics
+       * that we need to clean up.
+       */
+      NIR_PASS_V(*producer, nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(*producer), true, false);
+      NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
+      NIR_PASS_V(*producer, nir_split_var_copies);
+      NIR_PASS_V(*producer, nir_lower_var_copies);
+   }
 }
 
 /* Prepare the given shader for codegen
@@ -793,11 +891,13 @@
 
    OPT(nir_opt_algebraic_late);
 
-   OPT(nir_lower_to_source_mods);
+   OPT(nir_lower_to_source_mods, nir_lower_all_source_mods);
    OPT(nir_copy_prop);
    OPT(nir_opt_dce);
    OPT(nir_opt_move_comparisons);
 
+   OPT(nir_lower_bool_to_int32);
+
    OPT(nir_lower_locals_to_regs);
 
    if (unlikely(debug_enabled)) {
@@ -847,7 +947,9 @@
                           bool is_scalar)
 {
    const struct gen_device_info *devinfo = compiler->devinfo;
-   nir_lower_tex_options tex_options = { 0 };
+   nir_lower_tex_options tex_options = {
+      .lower_txd_clamp_if_sampler_index_not_lt_16 = true,
+   };
 
    /* Iron Lake and prior require lowering of all rectangle textures */
    if (devinfo->gen < 6)
@@ -877,6 +979,7 @@
    tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
    tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
    tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask;
+   tex_options.lower_ayuv_external = key_tex->ayuv_image_mask;
 
    if (nir_lower_tex(nir, &tex_options)) {
       nir_validate_shader(nir, "after nir_lower_tex");
@@ -973,8 +1076,7 @@
    nir_intrinsic_instr *load;
    nir_intrinsic_instr *store;
    nir_ssa_def *zero = nir_imm_int(&b, 0);
-   nir_ssa_def *invoc_id =
-      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
+   nir_ssa_def *invoc_id = nir_load_invocation_id(&b);
 
    nir->info.inputs_read = key->outputs_written &
       ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir.h mesa-19.0.1/src/intel/compiler/brw_nir.h
--- mesa-18.3.3/src/intel/compiler/brw_nir.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir.h	2019-03-31 23:16:37.000000000 +0000
@@ -119,6 +119,8 @@
 void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
                                      nir_ssa_def *index);
 
+bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader);
+
 nir_shader *brw_postprocess_nir(nir_shader *nir,
                                 const struct brw_compiler *compiler,
                                 bool is_scalar);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_cs_intrinsics.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
--- mesa-18.3.3/src/intel/compiler/brw_nir_lower_cs_intrinsics.c	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_cs_intrinsics.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,39 +70,6 @@
          break;
       }
 
-      case nir_intrinsic_load_local_invocation_id: {
-         /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
-          * on this formula:
-          *
-          *    gl_LocalInvocationID.x =
-          *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
-          *    gl_LocalInvocationID.y =
-          *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
-          *       gl_WorkGroupSize.y;
-          *    gl_LocalInvocationID.z =
-          *       (gl_LocalInvocationIndex /
-          *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
-          *       gl_WorkGroupSize.z;
-          */
-         unsigned *size = nir->info.cs.local_size;
-
-         nir_ssa_def *local_index = nir_load_local_invocation_index(b);
-
-         nir_const_value uvec3;
-         memset(&uvec3, 0, sizeof(uvec3));
-         uvec3.u32[0] = 1;
-         uvec3.u32[1] = size[0];
-         uvec3.u32[2] = size[0] * size[1];
-         nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
-         uvec3.u32[0] = size[0];
-         uvec3.u32[1] = size[1];
-         uvec3.u32[2] = size[2];
-         nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
-
-         sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
-         break;
-      }
-
       case nir_intrinsic_load_subgroup_id:
          if (state->local_workgroup_size > 8)
             continue;
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_image_load_store.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_image_load_store.c
--- mesa-18.3.3/src/intel/compiler/brw_nir_lower_image_load_store.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_image_load_store.c	2019-03-31 23:16:37.000000000 +0000
@@ -544,38 +544,16 @@
       break;
 
    case ISL_SFLOAT:
-      if (image.bits[0] == 16) {
-         nir_ssa_def *f16comps[4];
-         for (unsigned i = 0; i < image.chans; i++) {
-            f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i),
-                                                      nir_imm_float(b, 0));
-         }
-         color = nir_vec(b, f16comps, image.chans);
-      }
+      if (image.bits[0] == 16)
+         color = nir_format_float_to_half(b, color);
       break;
 
    case ISL_UINT:
-      if (image.bits[0] < 32) {
-         nir_const_value max;
-         for (unsigned i = 0; i < image.chans; i++) {
-            assert(image.bits[i] < 32);
-            max.u32[i] = (1u << image.bits[i]) - 1;
-         }
-         color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max));
-      }
+      color = nir_format_clamp_uint(b, color, image.bits);
       break;
 
    case ISL_SINT:
-      if (image.bits[0] < 32) {
-         nir_const_value min, max;
-         for (unsigned i = 0; i < image.chans; i++) {
-            assert(image.bits[i] < 32);
-            max.i32[i] = (1 << (image.bits[i] - 1)) - 1;
-            min.i32[i] = -(1 << (image.bits[i] - 1));
-         }
-         color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max));
-         color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min));
-      }
+      color = nir_format_clamp_sint(b, color, image.bits);
       break;
 
    default:
diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
--- mesa-18.3.3/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,313 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "util/u_math.h"
+#include "util/bitscan.h"
+
+static nir_ssa_def *
+dup_mem_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
+                  nir_ssa_def *store_src, int offset,
+                  unsigned num_components, unsigned bit_size,
+                  unsigned align)
+{
+   const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
+
+   nir_intrinsic_instr *dup =
+      nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
+
+   nir_src *intrin_offset_src = nir_get_io_offset_src(intrin);
+   for (unsigned i = 0; i < info->num_srcs; i++) {
+      assert(intrin->src[i].is_ssa);
+      if (i == 0 && store_src) {
+         assert(!info->has_dest);
+         assert(&intrin->src[i] != intrin_offset_src);
+         dup->src[i] = nir_src_for_ssa(store_src);
+      } else if (&intrin->src[i] == intrin_offset_src) {
+         dup->src[i] = nir_src_for_ssa(nir_iadd_imm(b, intrin->src[i].ssa,
+                                                       offset));
+      } else {
+         dup->src[i] = nir_src_for_ssa(intrin->src[i].ssa);
+      }
+   }
+
+   dup->num_components = num_components;
+
+   for (unsigned i = 0; i < info->num_indices; i++)
+      dup->const_index[i] = intrin->const_index[i];
+
+   nir_intrinsic_set_align(dup, align, 0);
+
+   if (info->has_dest) {
+      assert(intrin->dest.is_ssa);
+      nir_ssa_dest_init(&dup->instr, &dup->dest,
+                        num_components, bit_size,
+                        intrin->dest.ssa.name);
+   } else {
+      nir_intrinsic_set_write_mask(dup, (1 << num_components) - 1);
+   }
+
+   nir_builder_instr_insert(b, &dup->instr);
+
+   return info->has_dest ? &dup->dest.ssa : NULL;
+}
+
+static bool
+lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   assert(intrin->dest.is_ssa);
+   if (intrin->dest.ssa.bit_size == 32)
+      return false;
+
+   const unsigned bit_size = intrin->dest.ssa.bit_size;
+   const unsigned num_components = intrin->dest.ssa.num_components;
+   const unsigned bytes_read = num_components * (bit_size / 8);
+   const unsigned align = nir_intrinsic_align(intrin);
+
+   nir_ssa_def *result[4] = { NULL, };
+
+   nir_src *offset_src = nir_get_io_offset_src(intrin);
+   if (bit_size < 32 && nir_src_is_const(*offset_src)) {
+      /* The offset is constant so we can use a 32-bit load and just shift it
+       * around as needed.
+       */
+      const int load_offset = nir_src_as_uint(*offset_src) % 4;
+      assert(load_offset % (bit_size / 8) == 0);
+      const unsigned load_comps32 = DIV_ROUND_UP(bytes_read + load_offset, 4);
+      /* A 16-bit vec4 is a 32-bit vec2.  We add an extra component in case
+       * we offset into a component with load_offset.
+       */
+      assert(load_comps32 <= 3);
+
+      nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, -load_offset,
+                                            load_comps32, 32, 4);
+      nir_ssa_def *unpacked[3];
+      for (unsigned i = 0; i < load_comps32; i++)
+         unpacked[i] = nir_unpack_bits(b, nir_channel(b, load, i), bit_size);
+
+      assert(load_offset % (bit_size / 8) == 0);
+      const unsigned divisor = 32 / bit_size;
+
+      for (unsigned i = 0; i < num_components; i++) {
+         unsigned load_i = i + load_offset / (bit_size / 8);
+         result[i] = nir_channel(b, unpacked[load_i / divisor],
+                                    load_i % divisor);
+      }
+   } else {
+      /* Otherwise, we have to break it into smaller loads */
+      unsigned res_idx = 0;
+      int load_offset = 0;
+      while (load_offset < bytes_read) {
+         const unsigned bytes_left = bytes_read - load_offset;
+         unsigned load_bit_size, load_comps;
+         if (align < 4) {
+            load_comps = 1;
+            /* Choose a byte, word, or dword */
+            load_bit_size = util_next_power_of_two(MIN2(bytes_left, 4)) * 8;
+         } else {
+            assert(load_offset % 4 == 0);
+            load_bit_size = 32;
+            load_comps = DIV_ROUND_UP(MIN2(bytes_left, 16), 4);
+         }
+
+         nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, load_offset,
+                                               load_comps, load_bit_size,
+                                               align);
+
+         nir_ssa_def *unpacked = nir_bitcast_vector(b, load, bit_size);
+         for (unsigned i = 0; i < unpacked->num_components; i++) {
+            if (res_idx < num_components)
+               result[res_idx++] = nir_channel(b, unpacked, i);
+         }
+
+         load_offset += load_comps * (load_bit_size / 8);
+      }
+   }
+
+   nir_ssa_def *vec_result = nir_vec(b, result, num_components);
+   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                            nir_src_for_ssa(vec_result));
+   nir_instr_remove(&intrin->instr);
+
+   return true;
+}
+
+static bool
+lower_mem_store_bit_size(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   assert(intrin->src[0].is_ssa);
+   nir_ssa_def *value = intrin->src[0].ssa;
+
+   assert(intrin->num_components == value->num_components);
+   const unsigned bit_size = value->bit_size;
+   const unsigned num_components = intrin->num_components;
+   const unsigned bytes_written = num_components * (bit_size / 8);
+   const unsigned align_mul = nir_intrinsic_align_mul(intrin);
+   const unsigned align_offset = nir_intrinsic_align_offset(intrin);
+   const unsigned align = nir_intrinsic_align(intrin);
+
+   nir_component_mask_t writemask = nir_intrinsic_write_mask(intrin);
+   assert(writemask < (1 << num_components));
+
+   if ((value->bit_size <= 32 && num_components == 1) ||
+       (value->bit_size == 32 && writemask == (1 << num_components) - 1))
+      return false;
+
+   nir_src *offset_src = nir_get_io_offset_src(intrin);
+   const bool offset_is_const = nir_src_is_const(*offset_src);
+   const unsigned const_offset =
+      offset_is_const ? nir_src_as_uint(*offset_src) : 0;
+
+   assert(num_components * (bit_size / 8) <= 32);
+   uint32_t byte_mask = 0;
+   for (unsigned i = 0; i < num_components; i++) {
+      if (writemask & (1 << i))
+         byte_mask |= ((1 << (bit_size / 8)) - 1) << i * (bit_size / 8);
+   }
+
+   while (byte_mask) {
+      const int start = ffs(byte_mask) - 1;
+      assert(start % (bit_size / 8) == 0);
+
+      int end;
+      for (end = start + 1; end < bytes_written; end++) {
+         if (!(byte_mask & (1 << end)))
+            break;
+      }
+      /* The size of the current contiguous chunk in bytes */
+      const unsigned chunk_bytes = end - start;
+
+      const bool is_dword_aligned =
+         (align_mul >= 4 && (align_offset + start) % 4 == 0) ||
+         (offset_is_const && (start + const_offset) % 4 == 0);
+
+      unsigned store_comps, store_bit_size, store_align;
+      if (chunk_bytes >= 4 && is_dword_aligned) {
+         store_align = MAX2(align, 4);
+         store_bit_size = 32;
+         store_comps = MIN2(chunk_bytes, 16) / 4;
+      } else {
+         store_align = align;
+         store_comps = 1;
+         store_bit_size = MIN2(chunk_bytes, 4) * 8;
+         /* The bit size must be a power of two */
+         if (store_bit_size == 24)
+            store_bit_size = 16;
+      }
+
+      const unsigned store_bytes = store_comps * (store_bit_size / 8);
+      assert(store_bytes % (bit_size / 8) == 0);
+      const unsigned store_first_src_comp = start / (bit_size / 8);
+      const unsigned store_src_comps = store_bytes / (bit_size / 8);
+      assert(store_first_src_comp + store_src_comps <= num_components);
+
+      unsigned src_swiz[4];
+      for (unsigned i = 0; i < store_src_comps; i++)
+         src_swiz[i] = store_first_src_comp + i;
+      nir_ssa_def *store_value =
+         nir_swizzle(b, value, src_swiz, store_src_comps, false);
+      nir_ssa_def *packed = nir_bitcast_vector(b, store_value, store_bit_size);
+
+      dup_mem_intrinsic(b, intrin, packed, start,
+                        store_comps, store_bit_size, store_align);
+
+      byte_mask &= ~(((1u << store_bytes) - 1) << start);
+   }
+
+   nir_instr_remove(&intrin->instr);
+
+   return true;
+}
+
+static bool
+lower_mem_access_bit_sizes_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         b.cursor = nir_after_instr(instr);
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_ssbo:
+         case nir_intrinsic_load_shared:
+            if (lower_mem_load_bit_size(&b, intrin))
+               progress = true;
+            break;
+
+         case nir_intrinsic_store_ssbo:
+         case nir_intrinsic_store_shared:
+            if (lower_mem_store_bit_size(&b, intrin))
+               progress = true;
+            break;
+
+         default:
+            break;
+         }
+      }
+   }
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+/**
+ * This pass loads arbitrary SSBO and shared memory load/store operations to
+ * intrinsics which are natively handleable by GEN hardware.  In particular,
+ * we have two general types of memory load/store messages:
+ *
+ *  - Untyped surface read/write:  These can load/store between one and four
+ *    dword components to/from a dword-aligned offset.
+ *
+ *  - Byte scattered read/write:  These can load/store a single byte, word, or
+ *    dword scalar to/from an unaligned byte offset.
+ *
+ * Neither type of message can do a write-masked store.  This pass converts
+ * all nir load/store intrinsics into a series of either 8 or 32-bit
+ * load/store intrinsics with a number of components that we can directly
+ * handle in hardware and with a trivial write-mask.
+ */
+bool
+brw_nir_lower_mem_access_bit_sizes(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(func, shader) {
+      if (func->impl && lower_mem_access_bit_sizes_impl(func->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff -Nru mesa-18.3.3/src/intel/compiler/brw_reg.h mesa-19.0.1/src/intel/compiler/brw_reg.h
--- mesa-18.3.3/src/intel/compiler/brw_reg.h	2018-10-21 19:21:32.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_reg.h	2019-03-31 23:16:37.000000000 +0000
@@ -221,15 +221,15 @@
          unsigned negate:1;             /* source only */
          unsigned abs:1;                /* source only */
          unsigned address_mode:1;       /* relative addressing, hopefully! */
-         unsigned pad0:1;
+         unsigned pad0:17;
          unsigned subnr:5;              /* :1 in align16 */
-         unsigned nr:16;
       };
       uint32_t bits;
    };
 
    union {
       struct {
+         unsigned nr;
          unsigned swizzle:8;      /* src only, align16 only */
          unsigned writemask:4;    /* dest only, align16 only */
          int  indirect_offset:10; /* relative addressing offset */
@@ -251,8 +251,7 @@
 static inline bool
 brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b)
 {
-   const bool df = a->type == BRW_REGISTER_TYPE_DF && a->file == IMM;
-   return a->bits == b->bits && (df ? a->u64 == b->u64 : a->ud == b->ud);
+   return a->bits == b->bits && a->u64 == b->u64;
 }
 
 static inline bool
diff -Nru mesa-18.3.3/src/intel/compiler/brw_schedule_instructions.cpp mesa-19.0.1/src/intel/compiler/brw_schedule_instructions.cpp
--- mesa-18.3.3/src/intel/compiler/brw_schedule_instructions.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_schedule_instructions.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -323,7 +323,6 @@
       break;
 
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
    case VS_OPCODE_PULL_CONSTANT_LOAD:
@@ -414,6 +413,102 @@
       latency = is_haswell ? 300 : 600;
       break;
 
+   case SHADER_OPCODE_SEND:
+      switch (inst->sfid) {
+      case BRW_SFID_SAMPLER: {
+         unsigned msg_type = (inst->desc >> 12) & 0x1f;
+         switch (msg_type) {
+         case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
+         case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
+            /* See also SHADER_OPCODE_TXS */
+            latency = 100;
+            break;
+
+         default:
+            /* See also SHADER_OPCODE_TEX */
+            latency = 200;
+            break;
+         }
+         break;
+      }
+
+      case GEN6_SFID_DATAPORT_RENDER_CACHE:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
+         case GEN7_DATAPORT_RC_TYPED_SURFACE_READ:
+            /* See also SHADER_OPCODE_TYPED_SURFACE_READ */
+            assert(!is_haswell);
+            latency = 600;
+            break;
+
+         case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP:
+            /* See also SHADER_OPCODE_TYPED_ATOMIC */
+            assert(!is_haswell);
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown render cache message");
+         }
+         break;
+
+      case GEN7_SFID_DATAPORT_DATA_CACHE:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ:
+         case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE:
+            /* We have no data for this but assume it's roughly the same as
+             * untyped surface read/write.
+             */
+            latency = 300;
+            break;
+
+         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
+         case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
+            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            assert(!is_haswell);
+            latency = 600;
+            break;
+
+         case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
+            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            assert(!is_haswell);
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown data cache message");
+         }
+         break;
+
+      case HSW_SFID_DATAPORT_DATA_CACHE_1:
+         switch ((inst->desc >> 14) & 0x1f) {
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
+         case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
+         case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
+            /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
+            latency = 300;
+            break;
+
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
+         case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
+         case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
+         case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
+         case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+            /* See also SHADER_OPCODE_UNTYPED_ATOMIC */
+            latency = 14000;
+            break;
+
+         default:
+            unreachable("Unknown data cache message");
+         }
+         break;
+
+      default:
+         unreachable("Unknown SFID");
+      }
+      break;
+
    default:
       /* 2 cycles:
        * mul(8) g4<1>F g2<0,1,0>F      0.5F            { align1 WE_normal 1Q };
@@ -430,7 +525,7 @@
 class instruction_scheduler {
 public:
    instruction_scheduler(backend_shader *s, int grf_count,
-                         int hw_reg_count, int block_count,
+                         unsigned hw_reg_count, int block_count,
                          instruction_scheduler_mode mode)
    {
       this->bs = s;
@@ -511,7 +606,7 @@
    bool post_reg_alloc;
    int instructions_to_schedule;
    int grf_count;
-   int hw_reg_count;
+   unsigned hw_reg_count;
    int reg_pressure;
    int block_idx;
    exec_list instructions;
@@ -665,7 +760,7 @@
    int payload_last_use_ip[hw_reg_count];
    v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip);
 
-   for (int i = 0; i < hw_reg_count; i++) {
+   for (unsigned i = 0; i < hw_reg_count; i++) {
       if (payload_last_use_ip[i] == -1)
          continue;
 
@@ -973,7 +1068,7 @@
     * After register allocation, reg_offsets are gone and we track individual
     * GRF registers.
     */
-   schedule_node *last_grf_write[grf_count * 16];
+   schedule_node **last_grf_write;
    schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)];
    schedule_node *last_conditional_mod[8] = {};
    schedule_node *last_accumulator_write = NULL;
@@ -984,7 +1079,7 @@
     */
    schedule_node *last_fixed_grf_write = NULL;
 
-   memset(last_grf_write, 0, sizeof(last_grf_write));
+   last_grf_write = (schedule_node **)calloc(sizeof(schedule_node *), grf_count * 16);
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
    /* top-to-bottom dependencies: RAW and WAW. */
@@ -1111,7 +1206,7 @@
    }
 
    /* bottom-to-top dependencies: WAR */
-   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_grf_write, 0, sizeof(schedule_node *) * grf_count * 16);
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
    memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
    last_accumulator_write = NULL;
@@ -1227,6 +1322,8 @@
          last_accumulator_write = n;
       }
    }
+
+   free(last_grf_write);
 }
 
 void
diff -Nru mesa-18.3.3/src/intel/compiler/brw_shader.cpp mesa-19.0.1/src/intel/compiler/brw_shader.cpp
--- mesa-18.3.3/src/intel/compiler/brw_shader.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_shader.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -206,6 +206,9 @@
    case SHADER_OPCODE_COS:
       return "cos";
 
+   case SHADER_OPCODE_SEND:
+      return "send";
+
    case SHADER_OPCODE_TEX:
       return "tex";
    case SHADER_OPCODE_TEX_LOGICAL:
@@ -269,6 +272,8 @@
 
    case SHADER_OPCODE_IMAGE_SIZE:
       return "image_size";
+   case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
+      return "image_size_logical";
 
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return "shader_time_add";
@@ -402,8 +407,6 @@
       return "uniform_pull_const_gen7";
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
       return "varying_pull_const_gen4";
-   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
-      return "varying_pull_const_gen7";
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
       return "varying_pull_const_logical";
 
@@ -415,10 +418,6 @@
 
    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
       return "pack_half_2x16_split";
-   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
-      return "unpack_half_2x16_split_x";
-   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-      return "unpack_half_2x16_split_y";
 
    case FS_OPCODE_PLACEHOLDER_HALT:
       return "placeholder_halt";
@@ -1001,6 +1000,9 @@
 backend_instruction::has_side_effects() const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      return send_has_side_effects;
+
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
@@ -1037,6 +1039,9 @@
 backend_instruction::is_volatile() const
 {
    switch (opcode) {
+   case SHADER_OPCODE_SEND:
+      return send_is_volatile;
+
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_READ:
@@ -1189,7 +1194,7 @@
                 const struct brw_tes_prog_key *key,
                 const struct brw_vue_map *input_vue_map,
                 struct brw_tes_prog_data *prog_data,
-                const nir_shader *src_shader,
+                nir_shader *nir,
                 struct gl_program *prog,
                 int shader_time_index,
                 char **error_str)
@@ -1198,7 +1203,6 @@
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
    const unsigned *assembly;
 
-   nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
    nir->info.inputs_read = key->inputs_read;
    nir->info.patch_inputs_read = key->patch_inputs_read;
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_shader.h mesa-19.0.1/src/intel/compiler/brw_shader.h
--- mesa-18.3.3/src/intel/compiler/brw_shader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_shader.h	2019-03-31 23:16:37.000000000 +0000
@@ -156,8 +156,11 @@
 
    uint32_t offset; /**< spill/unspill offset or texture offset bitfield */
    uint8_t mlen; /**< SEND message length */
+   uint8_t ex_mlen; /**< SENDS extended message length */
    int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
    uint8_t target; /**< MRT target. */
+   uint8_t sfid; /**< SFID for SEND instructions */
+   uint32_t desc; /**< SEND[S] message descriptor immediate */
    unsigned size_written; /**< Data written to the destination register in bytes. */
 
    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
@@ -170,6 +173,9 @@
    bool no_dd_check:1;
    bool saturate:1;
    bool shadow_compare:1;
+   bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */
+   bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */
+   bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */
    bool eot:1;
 
    /* Chooses which flag subregister (f0.0 to f1.1) is used for conditional
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_cmod_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp	2018-07-14 15:13:03.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_cmod_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -47,7 +47,7 @@
 }
 
 static bool
-opt_cmod_propagation_local(bblock_t *block)
+opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
 {
    bool progress = false;
    int ip = block->end_ip + 1;
@@ -146,12 +146,109 @@
                              scan_inst->dst, scan_inst->size_written)) {
             if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
                 scan_inst->dst.offset != inst->src[0].offset ||
-                writemasks_incompatible(scan_inst, inst) ||
                 scan_inst->exec_size != inst->exec_size ||
                 scan_inst->group != inst->group) {
                break;
             }
 
+            /* If scan_inst is a CMP that produces a single value and inst is
+             * a CMP.NZ that consumes only that value, remove inst.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+                (inst->src[0].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[0].type == BRW_REGISTER_TYPE_UD) &&
+                (inst->opcode == BRW_OPCODE_CMP ||
+                 inst->opcode == BRW_OPCODE_MOV) &&
+                scan_inst->opcode == BRW_OPCODE_CMP &&
+                ((inst->src[0].swizzle == BRW_SWIZZLE_XXXX &&
+                  scan_inst->dst.writemask == WRITEMASK_X) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_YYYY &&
+                  scan_inst->dst.writemask == WRITEMASK_Y) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_ZZZZ &&
+                  scan_inst->dst.writemask == WRITEMASK_Z) ||
+                 (inst->src[0].swizzle == BRW_SWIZZLE_WWWW &&
+                  scan_inst->dst.writemask == WRITEMASK_W))) {
+               if (inst->dst.writemask != scan_inst->dst.writemask) {
+                  src_reg temp(v, glsl_type::vec4_type, 1);
+
+                  /* Given a sequence like:
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.xF      g18<4>.xF
+                   *    ...
+                   *    cmp.nz.f0(8)  null<1>D       g21<4>.zD      0D
+                   *
+                   * Replace it with something like:
+                   *
+                   *    cmp.ge.f0(8)  g22<1>.zF      g20<4>.xF      g18<4>.xF
+                   *    mov(8)        g21<1>.xF      g22<1>.zzzzF
+                   *
+                   * The added MOV will most likely be removed later.  In the
+                   * worst case, it should be cheaper to schedule.
+                   */
+                  temp.swizzle = brw_swizzle_for_mask(inst->dst.writemask);
+                  temp.type = scan_inst->src[0].type;
+
+                  vec4_instruction *mov = v->MOV(scan_inst->dst, temp);
+
+                  /* Modify the source swizzles on scan_inst.  If scan_inst
+                   * was
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.wzyxF   g18<4>.yxwzF
+                   *
+                   * replace it with
+                   *
+                   *    cmp.ge.f0(8)  g21<1>.zF      g20<4>.yyyyF   g18<4>.wwwwF
+                   */
+                  unsigned src0_chan;
+                  unsigned src1_chan;
+                  switch (scan_inst->dst.writemask) {
+                  case WRITEMASK_X:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 0);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 0);
+                     break;
+                  case WRITEMASK_Y:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 1);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 1);
+                     break;
+                  case WRITEMASK_Z:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 2);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 2);
+                     break;
+                  case WRITEMASK_W:
+                     src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 3);
+                     src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 3);
+                     break;
+                  default:
+                     unreachable("Impossible writemask");
+                  }
+
+                  scan_inst->src[0].swizzle = BRW_SWIZZLE4(src0_chan,
+                                                           src0_chan,
+                                                           src0_chan,
+                                                           src0_chan);
+
+                  /* There's no swizzle on immediate value sources. */
+                  if (scan_inst->src[1].file != IMM) {
+                     scan_inst->src[1].swizzle = BRW_SWIZZLE4(src1_chan,
+                                                              src1_chan,
+                                                              src1_chan,
+                                                              src1_chan);
+                  }
+
+                  scan_inst->dst = dst_reg(temp);
+                  scan_inst->dst.writemask = inst->dst.writemask;
+
+                  scan_inst->insert_after(block, mov);
+               }
+
+               inst->remove(block);
+               progress = true;
+               break;
+            }
+
+            if (writemasks_incompatible(scan_inst, inst))
+               break;
+
             /* CMP's result is the same regardless of dest type. */
             if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
                 scan_inst->opcode == BRW_OPCODE_CMP &&
@@ -256,7 +353,7 @@
    bool progress = false;
 
    foreach_block_reverse(block, cfg) {
-      progress = opt_cmod_propagation_local(block) || progress;
+      progress = opt_cmod_propagation_local(block, this) || progress;
    }
 
    if (progress)
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4.cpp mesa-19.0.1/src/intel/compiler/brw_vec4.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -409,7 +409,7 @@
    bool progress = false;
 
    foreach_block(block, cfg) {
-      int last_reg = -1, last_offset = -1;
+      unsigned last_reg = ~0u, last_offset = ~0u;
       enum brw_reg_file last_reg_file = BAD_FILE;
 
       uint8_t imm[4] = { 0 };
@@ -442,7 +442,7 @@
                need_type = BRW_REGISTER_TYPE_F;
             }
          } else {
-            last_reg = -1;
+            last_reg = ~0u;
          }
 
          /* If this wasn't a MOV, or the destination register doesn't match,
@@ -470,7 +470,7 @@
             }
 
             inst_count = 0;
-            last_reg = -1;
+            last_reg = ~0u;;
             writemask = 0;
             dest_type = BRW_REGISTER_TYPE_F;
 
@@ -892,18 +892,6 @@
             progress = true;
 	 }
 	 break;
-      case BRW_OPCODE_CMP:
-         if (inst->conditional_mod == BRW_CONDITIONAL_GE &&
-             inst->src[0].abs &&
-             inst->src[0].negate &&
-             inst->src[1].is_zero()) {
-            inst->src[0].abs = false;
-            inst->src[0].negate = false;
-            inst->conditional_mod = BRW_CONDITIONAL_Z;
-            progress = true;
-            break;
-         }
-         break;
       case SHADER_OPCODE_BROADCAST:
          if (is_uniform(inst->src[0]) ||
              inst->src[1].is_zero()) {
@@ -1409,8 +1397,10 @@
           * in the register instead.
           */
          if (to_mrf && scan_inst->mlen > 0) {
-            if (inst->dst.nr >= scan_inst->base_mrf &&
-                inst->dst.nr < scan_inst->base_mrf + scan_inst->mlen) {
+            unsigned start = scan_inst->base_mrf;
+            unsigned end = scan_inst->base_mrf + scan_inst->mlen;
+
+            if (inst->dst.nr >= start && inst->dst.nr < end) {
                break;
             }
          } else {
@@ -2828,12 +2818,11 @@
                void *mem_ctx,
                const struct brw_vs_prog_key *key,
                struct brw_vs_prog_data *prog_data,
-               const nir_shader *src_shader,
+               nir_shader *shader,
                int shader_time_index,
                char **error_str)
 {
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
-   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
 
    const unsigned *assembly = NULL;
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_generator.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_generator.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_generator.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_generator.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -291,8 +291,6 @@
                  inst->header_size != 0,
                  BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                  return_format);
-
-      brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
    } else {
       /* Non-constant sampler index. */
 
@@ -1351,8 +1349,6 @@
               inst->header_size > 0,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
-
-   brw_mark_surface_used(&prog_data->base, surf_index.ud);
 }
 
 static void
@@ -1378,9 +1374,6 @@
                                     0, /* LD message ignores sampler unit */
                                     GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                                     BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0));
-
-      brw_mark_surface_used(&prog_data->base, surf_index.ud);
-
    } else {
 
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1866,8 +1859,6 @@
       case SHADER_OPCODE_SHADER_TIME_ADD:
          brw_shader_time_add(p, src[0],
                              prog_data->base.binding_table.shader_time_start);
-         brw_mark_surface_used(&prog_data->base,
-                               prog_data->base.binding_table.shader_time_start);
          break;
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_gs_nir.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_gs_nir.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_gs_nir.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_gs_nir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -41,14 +41,14 @@
       /* The EmitNoIndirectInput flag guarantees our vertex index will
        * be constant.  We should handle indirects someday.
        */
-      nir_const_value *vertex = nir_src_as_const_value(instr->src[0]);
-      nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]);
+      const unsigned vertex = nir_src_as_uint(instr->src[0]);
+      const unsigned offset_reg = nir_src_as_uint(instr->src[1]);
 
       const unsigned input_array_stride = prog_data->urb_read_length * 2;
 
       if (nir_dest_bit_size(instr->dest) == 64) {
-         src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
-                       instr->const_index[0] + offset_reg->u32[0],
+         src = src_reg(ATTR, input_array_stride * vertex +
+                       instr->const_index[0] + offset_reg,
                        glsl_type::dvec4_type);
 
          dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
@@ -65,8 +65,8 @@
          /* Make up a type...we have no way of knowing... */
          const glsl_type *const type = glsl_type::ivec(instr->num_components);
 
-         src = src_reg(ATTR, input_array_stride * vertex->u32[0] +
-                       instr->const_index[0] + offset_reg->u32[0],
+         src = src_reg(ATTR, input_array_stride * vertex +
+                       instr->const_index[0] + offset_reg,
                        type);
          src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr));
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_gs_visitor.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_gs_visitor.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_gs_visitor.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_gs_visitor.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -615,7 +615,7 @@
                void *mem_ctx,
                const struct brw_gs_prog_key *key,
                struct brw_gs_prog_data *prog_data,
-               const nir_shader *src_shader,
+               nir_shader *shader,
                struct gl_program *prog,
                int shader_time_index,
                char **error_str)
@@ -625,7 +625,6 @@
    c.key = *key;
 
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
-   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
 
    /* The GLSL linker will have already matched up GS inputs and the outputs
     * of prior stages.  The driver does extend VS outputs in some cases, but
@@ -668,7 +667,7 @@
          prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
 
          /* We only have to emit control bits if we are using streams */
-         if (prog && prog->info.gs.uses_streams)
+         if (shader->info.gs.uses_streams)
             c.control_data_bits_per_vertex = 2;
          else
             c.control_data_bits_per_vertex = 0;
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4.h mesa-19.0.1/src/intel/compiler/brw_vec4.h
--- mesa-18.3.3/src/intel/compiler/brw_vec4.h	2018-03-26 16:53:06.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4.h	2019-03-31 23:16:37.000000000 +0000
@@ -132,7 +132,7 @@
    bool reg_allocate();
    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
    int choose_spill_reg(struct ra_graph *g);
-   void spill_reg(int spill_reg);
+   void spill_reg(unsigned spill_reg);
    void move_grf_array_access_to_scratch();
    void move_uniform_array_access_to_pull_constants();
    void move_push_constants_to_pull_constants();
@@ -338,6 +338,7 @@
    virtual void nir_emit_block(nir_block *block);
    virtual void nir_emit_instr(nir_instr *instr);
    virtual void nir_emit_load_const(nir_load_const_instr *instr);
+   src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
    virtual void nir_emit_alu(nir_alu_instr *instr);
    virtual void nir_emit_jump(nir_jump_instr *instr);
@@ -354,6 +355,7 @@
                        unsigned num_components = 4);
    src_reg get_nir_src(const nir_src &src,
                        unsigned num_components = 4);
+   src_reg get_nir_src_imm(const nir_src &src);
    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
 
    dst_reg *nir_locals;
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_nir.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_nir.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_nir.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_nir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -253,17 +253,25 @@
 }
 
 src_reg
+vec4_visitor::get_nir_src_imm(const nir_src &src)
+{
+   assert(nir_src_num_components(src) == 1);
+   assert(nir_src_bit_size(src) == 32);
+   return nir_src_is_const(src) ? src_reg(brw_imm_d(nir_src_as_int(src))) :
+                                  get_nir_src(src, 1);
+}
+
+src_reg
 vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
 {
    nir_src *offset_src = nir_get_io_offset_src(instr);
-   nir_const_value *const_value = nir_src_as_const_value(*offset_src);
 
-   if (const_value) {
+   if (nir_src_is_const(*offset_src)) {
       /* The only constant offset we should find is 0.  brw_nir.c's
        * add_const_offset_to_base() will fold other constant offsets
        * into instr->const_index[0].
        */
-      assert(const_value->u32[0] == 0);
+      assert(nir_src_as_uint(*offset_src) == 0);
       return src_reg();
    }
 
@@ -368,6 +376,27 @@
    nir_ssa_values[instr->def.index] = reg;
 }
 
+src_reg
+vec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr)
+{
+   /* SSBO stores are weird in that their index is in src[1] */
+   const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0;
+
+   src_reg surf_index;
+   if (nir_src_is_const(instr->src[src])) {
+      unsigned index = prog_data->base.binding_table.ssbo_start +
+                       nir_src_as_uint(instr->src[src]);
+      surf_index = brw_imm_ud(index);
+   } else {
+      surf_index = src_reg(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[src], 1),
+               brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
+      surf_index = emit_uniformize(surf_index);
+   }
+
+   return surf_index;
+}
+
 void
 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
 {
@@ -377,15 +406,13 @@
    switch (instr->intrinsic) {
 
    case nir_intrinsic_load_input: {
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-
       /* We set EmitNoIndirectInput for VS */
-      assert(const_offset);
+      unsigned load_offset = nir_src_as_uint(instr->src[0]);
 
       dest = get_nir_dest(instr->dest);
       dest.writemask = brw_writemask_for_size(instr->num_components);
 
-      src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0],
+      src = src_reg(ATTR, instr->const_index[0] + load_offset,
                     glsl_type::uvec4_type);
       src = retype(src, dest.type);
 
@@ -404,10 +431,8 @@
    }
 
    case nir_intrinsic_store_output: {
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      assert(const_offset);
-
-      int varying = instr->const_index[0] + const_offset->u32[0];
+      unsigned store_offset = nir_src_as_uint(instr->src[1]);
+      int varying = instr->const_index[0] + store_offset;
 
       bool is_64bit = nir_src_bit_size(instr->src[0]) == 64;
       if (is_64bit) {
@@ -442,8 +467,8 @@
    }
 
    case nir_intrinsic_get_buffer_size: {
-      nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
-      unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
+      unsigned ssbo_index = nir_src_is_const(instr->src[0]) ?
+                            nir_src_as_uint(instr->src[0]) : 0;
 
       const unsigned index =
          prog_data->base.binding_table.ssbo_start + ssbo_index;
@@ -462,49 +487,24 @@
       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
 
       emit(inst);
-
-      brw_mark_surface_used(&prog_data->base, index);
       break;
    }
 
    case nir_intrinsic_store_ssbo: {
       assert(devinfo->gen >= 7);
 
-      /* Block index */
-      src_reg surf_index;
-      nir_const_value *const_uniform_block =
-         nir_src_as_const_value(instr->src[1]);
-      if (const_uniform_block) {
-         unsigned index = prog_data->base.binding_table.ssbo_start +
-                          const_uniform_block->u32[0];
-         surf_index = brw_imm_ud(index);
-         brw_mark_surface_used(&prog_data->base, index);
-      } else {
-         surf_index = src_reg(this, glsl_type::uint_type);
-         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1),
-                  brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
-         surf_index = emit_uniformize(surf_index);
-
-         brw_mark_surface_used(&prog_data->base,
-                               prog_data->base.binding_table.ssbo_start +
-                               nir->info.num_ssbos - 1);
-      }
-
-      /* Offset */
-      src_reg offset_reg;
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
-      if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u32[0]);
-      } else {
-         offset_reg = get_nir_src(instr->src[2], 1);
-      }
+      /* brw_nir_lower_mem_access_bit_sizes takes care of this */
+      assert(nir_src_bit_size(instr->src[0]) == 32);
+      assert(nir_intrinsic_write_mask(instr) ==
+             (1u << instr->num_components) - 1);
+
+      src_reg surf_index = get_nir_ssbo_intrinsic_index(instr);
+      src_reg offset_reg = retype(get_nir_src_imm(instr->src[2]),
+                                  BRW_REGISTER_TYPE_UD);
 
       /* Value */
       src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4);
 
-      /* Writemask */
-      unsigned write_mask = instr->const_index[0];
-
       /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped
        * writes will use SIMD8 mode. In order to hide this and keep symmetry across
        * typed and untyped messages and across hardware platforms, the
@@ -546,158 +546,30 @@
       const vec4_builder bld = vec4_builder(this).at_end()
                                .annotate(current_annotation, base_ir);
 
-      unsigned type_slots = nir_src_bit_size(instr->src[0]) / 32;
-      if (type_slots == 2) {
-         dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
-         shuffle_64bit_data(tmp, retype(val_reg, tmp.type), true);
-         val_reg = src_reg(retype(tmp, BRW_REGISTER_TYPE_F));
-      }
-
-      uint8_t swizzle[4] = { 0, 0, 0, 0};
-      int num_channels = 0;
-      unsigned skipped_channels = 0;
-      int num_components = instr->num_components;
-      for (int i = 0; i < num_components; i++) {
-         /* Read components Z/W of a dvec from the appropriate place. We will
-          * also have to adjust the swizzle (we do that with the '% 4' below)
-          */
-         if (i == 2 && type_slots == 2)
-            val_reg = byte_offset(val_reg, REG_SIZE);
-
-         /* Check if this channel needs to be written. If so, record the
-          * channel we need to take the data from in the swizzle array
-          */
-         int component_mask = 1 << i;
-         int write_test = write_mask & component_mask;
-         if (write_test) {
-            /* If we are writing doubles we have to write 2 channels worth of
-             * of data (64 bits) for each double component.
-             */
-            swizzle[num_channels++] = (i * type_slots) % 4;
-            if (type_slots == 2)
-               swizzle[num_channels++] = (i * type_slots + 1) % 4;
-         }
-
-         /* If we don't have to write this channel it means we have a gap in the
-          * vector, so write the channels we accumulated until now, if any. Do
-          * the same if this was the last component in the vector, if we have
-          * enough channels for a full vec4 write or if we have processed
-          * components XY of a dvec (since components ZW are not in the same
-          * SIMD register)
-          */
-         if (!write_test || i == num_components - 1 || num_channels == 4 ||
-             (i == 1 && type_slots == 2)) {
-            if (num_channels > 0) {
-               /* We have channels to write, so update the offset we need to
-                * write at to skip the channels we skipped, if any.
-                */
-               if (skipped_channels > 0) {
-                  if (offset_reg.file == IMM) {
-                     offset_reg.ud += 4 * skipped_channels;
-                  } else {
-                     emit(ADD(dst_reg(offset_reg), offset_reg,
-                              brw_imm_ud(4 * skipped_channels)));
-                  }
-               }
-
-               /* Swizzle the data register so we take the data from the channels
-                * we need to write and send the write message. This will write
-                * num_channels consecutive dwords starting at offset.
-                */
-               val_reg.swizzle =
-                  BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-               emit_untyped_write(bld, surf_index, offset_reg, val_reg,
-                                  1 /* dims */, num_channels /* size */,
-                                  BRW_PREDICATE_NONE);
-
-               /* If we have to do a second write we will have to update the
-                * offset so that we jump over the channels we have just written
-                * now.
-                */
-               skipped_channels = num_channels;
-
-               /* Restart the count for the next write message */
-               num_channels = 0;
-            }
-
-            /* If we didn't write the channel, increase skipped count */
-            if (!write_test)
-               skipped_channels += type_slots;
-         }
-      }
-
+      emit_untyped_write(bld, surf_index, offset_reg, val_reg,
+                         1 /* dims */, instr->num_components /* size */,
+                         BRW_PREDICATE_NONE);
       break;
    }
 
    case nir_intrinsic_load_ssbo: {
       assert(devinfo->gen >= 7);
 
-      nir_const_value *const_uniform_block =
-         nir_src_as_const_value(instr->src[0]);
+      /* brw_nir_lower_mem_access_bit_sizes takes care of this */
+      assert(nir_dest_bit_size(instr->dest) == 32);
 
-      src_reg surf_index;
-      if (const_uniform_block) {
-         unsigned index = prog_data->base.binding_table.ssbo_start +
-                          const_uniform_block->u32[0];
-         surf_index = brw_imm_ud(index);
-
-         brw_mark_surface_used(&prog_data->base, index);
-      } else {
-         surf_index = src_reg(this, glsl_type::uint_type);
-         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1),
-                  brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
-         surf_index = emit_uniformize(surf_index);
-
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(&prog_data->base,
-                               prog_data->base.binding_table.ssbo_start +
-                               nir->info.num_ssbos - 1);
-      }
-
-      src_reg offset_reg;
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u32[0]);
-      } else {
-         offset_reg = get_nir_src(instr->src[1], 1);
-      }
+      src_reg surf_index = get_nir_ssbo_intrinsic_index(instr);
+      src_reg offset_reg = retype(get_nir_src_imm(instr->src[1]),
+                                  BRW_REGISTER_TYPE_UD);
 
       /* Read the vector */
       const vec4_builder bld = vec4_builder(this).at_end()
          .annotate(current_annotation, base_ir);
 
-      src_reg read_result;
+      src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+                                              1 /* dims */, 4 /* size*/,
+                                              BRW_PREDICATE_NONE);
       dst_reg dest = get_nir_dest(instr->dest);
-      if (type_sz(dest.type) < 8) {
-         read_result = emit_untyped_read(bld, surf_index, offset_reg,
-                                         1 /* dims */, 4 /* size*/,
-                                         BRW_PREDICATE_NONE);
-      } else {
-         src_reg shuffled = src_reg(this, glsl_type::dvec4_type);
-
-         src_reg temp;
-         temp = emit_untyped_read(bld, surf_index, offset_reg,
-                                  1 /* dims */, 4 /* size*/,
-                                  BRW_PREDICATE_NONE);
-         emit(MOV(dst_reg(retype(shuffled, temp.type)), temp));
-
-         if (offset_reg.file == IMM)
-            offset_reg.ud += 16;
-         else
-            emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16)));
-
-         temp = emit_untyped_read(bld, surf_index, offset_reg,
-                                  1 /* dims */, 4 /* size*/,
-                                  BRW_PREDICATE_NONE);
-         emit(MOV(dst_reg(retype(byte_offset(shuffled, REG_SIZE), temp.type)),
-                  temp));
-
-         read_result = src_reg(this, glsl_type::dvec4_type);
-         shuffle_64bit_data(dst_reg(read_result), shuffled, false);
-      }
-
       read_result.type = dest.type;
       read_result.swizzle = brw_swizzle_for_size(instr->num_components);
       emit(MOV(dest, read_result));
@@ -706,12 +578,12 @@
 
    case nir_intrinsic_ssbo_atomic_add: {
       int op = BRW_AOP_ADD;
-      const nir_const_value *const val = nir_src_as_const_value(instr->src[2]);
 
-      if (val != NULL) {
-         if (val->i32[0] == 1)
+      if (nir_src_is_const(instr->src[2])) {
+         int add_val = nir_src_as_int(instr->src[2]);
+         if (add_val == 1)
             op = BRW_AOP_INC;
-         else if (val->i32[0] == -1)
+         else if (add_val == -1)
             op = BRW_AOP_DEC;
       }
 
@@ -778,14 +650,14 @@
       unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
       assert(shift + instr->num_components <= 4);
 
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-      if (const_offset) {
+      if (nir_src_is_const(instr->src[0])) {
+         const unsigned load_offset = nir_src_as_uint(instr->src[0]);
          /* Offsets are in bytes but they should always be multiples of 4 */
-         assert(const_offset->u32[0] % 4 == 0);
+         assert(load_offset % 4 == 0);
 
          src.swizzle = brw_swizzle_for_size(instr->num_components);
          dest.writemask = brw_writemask_for_size(instr->num_components);
-         unsigned offset = const_offset->u32[0] + shift * type_size;
+         unsigned offset = load_offset + shift * type_size;
          src.offset = ROUND_DOWN_TO(offset, 16);
          shift = (offset % 16) / type_size;
          assert(shift + instr->num_components <= 4);
@@ -810,19 +682,17 @@
    }
 
    case nir_intrinsic_load_ubo: {
-      nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
       src_reg surf_index;
 
       dest = get_nir_dest(instr->dest);
 
-      if (const_block_index) {
+      if (nir_src_is_const(instr->src[0])) {
          /* The block index is a constant, so just emit the binding table entry
           * as an immediate.
           */
          const unsigned index = prog_data->base.binding_table.ubo_start +
-                                const_block_index->u32[0];
+                                nir_src_as_uint(instr->src[0]);
          surf_index = brw_imm_ud(index);
-         brw_mark_surface_used(&prog_data->base, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
           * per-channel and add the base UBO index; we have to select a value
@@ -833,19 +703,12 @@
                                                    instr->num_components),
                   brw_imm_ud(prog_data->base.binding_table.ubo_start)));
          surf_index = emit_uniformize(surf_index);
-
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(&prog_data->base,
-                               prog_data->base.binding_table.ubo_start +
-                               nir->info.num_ubos - 1);
       }
 
       src_reg offset_reg;
-      nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
-      if (const_offset) {
-         offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
+      if (nir_src_is_const(instr->src[1])) {
+         unsigned load_offset = nir_src_as_uint(instr->src[1]);
+         offset_reg = brw_imm_ud(load_offset & ~15);
       } else {
          offset_reg = src_reg(this, glsl_type::uint_type);
          emit(MOV(dst_reg(offset_reg),
@@ -877,13 +740,14 @@
       }
 
       packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
-      if (const_offset) {
+      if (nir_src_is_const(instr->src[1])) {
+         unsigned load_offset = nir_src_as_uint(instr->src[1]);
          unsigned type_size = type_sz(dest.type);
          packed_consts.swizzle +=
-            BRW_SWIZZLE4(const_offset->u32[0] % 16 / type_size,
-                         const_offset->u32[0] % 16 / type_size,
-                         const_offset->u32[0] % 16 / type_size,
-                         const_offset->u32[0] % 16 / type_size);
+            BRW_SWIZZLE4(load_offset % 16 / type_size,
+                         load_offset % 16 / type_size,
+                         load_offset % 16 / type_size,
+                         load_offset % 16 / type_size);
       }
 
       emit(MOV(dest, retype(packed_consts, dest.type)));
@@ -922,26 +786,7 @@
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
       dest = get_nir_dest(instr->dest);
 
-   src_reg surface;
-   nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
-   if (const_surface) {
-      unsigned surf_index = prog_data->base.binding_table.ssbo_start +
-                            const_surface->u32[0];
-      surface = brw_imm_ud(surf_index);
-      brw_mark_surface_used(&prog_data->base, surf_index);
-   } else {
-      surface = src_reg(this, glsl_type::uint_type);
-      emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]),
-               brw_imm_ud(prog_data->base.binding_table.ssbo_start)));
-
-      /* Assume this may touch any UBO. This is the same we do for other
-       * UBO/SSBO accesses with non-constant surface.
-       */
-      brw_mark_surface_used(&prog_data->base,
-                            prog_data->base.binding_table.ssbo_start +
-                            nir->info.num_ssbos - 1);
-   }
-
+   src_reg surface = get_nir_ssbo_intrinsic_index(instr);
    src_reg offset = get_nir_src(instr->src[1], 1);
    src_reg data1;
    if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -973,34 +818,34 @@
 brw_conditional_for_nir_comparison(nir_op op)
 {
    switch (op) {
-   case nir_op_flt:
-   case nir_op_ilt:
-   case nir_op_ult:
+   case nir_op_flt32:
+   case nir_op_ilt32:
+   case nir_op_ult32:
       return BRW_CONDITIONAL_L;
 
-   case nir_op_fge:
-   case nir_op_ige:
-   case nir_op_uge:
+   case nir_op_fge32:
+   case nir_op_ige32:
+   case nir_op_uge32:
       return BRW_CONDITIONAL_GE;
 
-   case nir_op_feq:
-   case nir_op_ieq:
-   case nir_op_ball_fequal2:
-   case nir_op_ball_iequal2:
-   case nir_op_ball_fequal3:
-   case nir_op_ball_iequal3:
-   case nir_op_ball_fequal4:
-   case nir_op_ball_iequal4:
+   case nir_op_feq32:
+   case nir_op_ieq32:
+   case nir_op_b32all_fequal2:
+   case nir_op_b32all_iequal2:
+   case nir_op_b32all_fequal3:
+   case nir_op_b32all_iequal3:
+   case nir_op_b32all_fequal4:
+   case nir_op_b32all_iequal4:
       return BRW_CONDITIONAL_Z;
 
-   case nir_op_fne:
-   case nir_op_ine:
-   case nir_op_bany_fnequal2:
-   case nir_op_bany_inequal2:
-   case nir_op_bany_fnequal3:
-   case nir_op_bany_inequal3:
-   case nir_op_bany_fnequal4:
-   case nir_op_bany_inequal4:
+   case nir_op_fne32:
+   case nir_op_ine32:
+   case nir_op_b32any_fnequal2:
+   case nir_op_b32any_inequal2:
+   case nir_op_b32any_fnequal3:
+   case nir_op_b32any_inequal3:
+   case nir_op_b32any_fnequal4:
+   case nir_op_b32any_inequal4:
       return BRW_CONDITIONAL_NZ;
 
    default:
@@ -1020,20 +865,20 @@
       nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
 
    switch (cmp_instr->op) {
-   case nir_op_bany_fnequal2:
-   case nir_op_bany_inequal2:
-   case nir_op_bany_fnequal3:
-   case nir_op_bany_inequal3:
-   case nir_op_bany_fnequal4:
-   case nir_op_bany_inequal4:
+   case nir_op_b32any_fnequal2:
+   case nir_op_b32any_inequal2:
+   case nir_op_b32any_fnequal3:
+   case nir_op_b32any_inequal3:
+   case nir_op_b32any_fnequal4:
+   case nir_op_b32any_inequal4:
       *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
       break;
-   case nir_op_ball_fequal2:
-   case nir_op_ball_iequal2:
-   case nir_op_ball_fequal3:
-   case nir_op_ball_iequal3:
-   case nir_op_ball_fequal4:
-   case nir_op_ball_iequal4:
+   case nir_op_b32all_fequal2:
+   case nir_op_b32all_iequal2:
+   case nir_op_b32all_fequal3:
+   case nir_op_b32all_iequal3:
+   case nir_op_b32all_fequal4:
+   case nir_op_b32all_iequal4:
       *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
       break;
    default:
@@ -1225,6 +1070,12 @@
       inst->saturate = instr->dest.saturate;
       break;
 
+   case nir_op_uadd_sat:
+      assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      inst = emit(ADD(dst, op[0], op[1]));
+      inst->saturate = true;
+      break;
+
    case nir_op_fmul:
       inst = emit(MUL(dst, op[0], op[1]));
       inst->saturate = instr->dest.saturate;
@@ -1233,21 +1084,22 @@
    case nir_op_imul: {
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       if (devinfo->gen < 8) {
-         nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
-         nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
-
          /* For integer multiplication, the MUL uses the low 16 bits of one of
           * the operands (src0 through SNB, src1 on IVB and later). The MACH
           * accumulates in the contribution of the upper 16 bits of that
           * operand. If we can determine that one of the args is in the low
           * 16 bits, though, we can just emit a single MUL.
           */
-         if (value0 && value0->u32[0] < (1 << 16)) {
+         if (nir_src_is_const(instr->src[0].src) &&
+             nir_alu_instr_src_read_mask(instr, 0) == 1 &&
+             nir_src_comp_as_uint(instr->src[0].src, 0) < (1 << 16)) {
             if (devinfo->gen < 7)
                emit(MUL(dst, op[0], op[1]));
             else
                emit(MUL(dst, op[1], op[0]));
-         } else if (value1 && value1->u32[0] < (1 << 16)) {
+         } else if (nir_src_is_const(instr->src[1].src) &&
+                    nir_alu_instr_src_read_mask(instr, 1) == 1 &&
+                    nir_src_comp_as_uint(instr->src[1].src, 0) < (1 << 16)) {
             if (devinfo->gen < 7)
                emit(MUL(dst, op[1], op[0]));
             else
@@ -1473,18 +1325,18 @@
    case nir_op_fddy_fine:
       unreachable("derivatives are not valid in vertex shaders");
 
-   case nir_op_ilt:
-   case nir_op_ult:
-   case nir_op_ige:
-   case nir_op_uge:
-   case nir_op_ieq:
-   case nir_op_ine:
+   case nir_op_ilt32:
+   case nir_op_ult32:
+   case nir_op_ige32:
+   case nir_op_uge32:
+   case nir_op_ieq32:
+   case nir_op_ine32:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* Fallthrough */
-   case nir_op_flt:
-   case nir_op_fge:
-   case nir_op_feq:
-   case nir_op_fne: {
+   case nir_op_flt32:
+   case nir_op_fge32:
+   case nir_op_feq32:
+   case nir_op_fne32: {
       enum brw_conditional_mod conditional_mod =
          brw_conditional_for_nir_comparison(instr->op);
 
@@ -1505,14 +1357,14 @@
       break;
    }
 
-   case nir_op_ball_iequal2:
-   case nir_op_ball_iequal3:
-   case nir_op_ball_iequal4:
+   case nir_op_b32all_iequal2:
+   case nir_op_b32all_iequal3:
+   case nir_op_b32all_iequal4:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* Fallthrough */
-   case nir_op_ball_fequal2:
-   case nir_op_ball_fequal3:
-   case nir_op_ball_fequal4: {
+   case nir_op_b32all_fequal2:
+   case nir_op_b32all_fequal3:
+   case nir_op_b32all_fequal4: {
       unsigned swiz =
          brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
 
@@ -1524,14 +1376,14 @@
       break;
    }
 
-   case nir_op_bany_inequal2:
-   case nir_op_bany_inequal3:
-   case nir_op_bany_inequal4:
+   case nir_op_b32any_inequal2:
+   case nir_op_b32any_inequal3:
+   case nir_op_b32any_inequal4:
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       /* Fallthrough */
-   case nir_op_bany_fnequal2:
-   case nir_op_bany_fnequal3:
-   case nir_op_bany_fnequal4: {
+   case nir_op_b32any_fnequal2:
+   case nir_op_b32any_fnequal3:
+   case nir_op_b32any_fnequal4: {
       unsigned swiz =
          brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
 
@@ -1579,8 +1431,9 @@
       emit(AND(dst, op[0], op[1]));
       break;
 
-   case nir_op_b2i:
-   case nir_op_b2f:
+   case nir_op_b2i32:
+   case nir_op_b2f32:
+   case nir_op_b2f64:
       if (nir_dest_bit_size(instr->dest.dest) > 32) {
          assert(dst.type == BRW_REGISTER_TYPE_DF);
          emit_conversion_to_double(dst, negate(op[0]), false);
@@ -1589,7 +1442,7 @@
       }
       break;
 
-   case nir_op_f2b:
+   case nir_op_f2b32:
       if (nir_src_bit_size(instr->src[0].src) == 64) {
          /* We use a MOV with conditional_mod to check if the provided value is
           * 0.0. We want this to flush denormalized numbers to zero, so we set a
@@ -1610,7 +1463,7 @@
       }
       break;
 
-   case nir_op_i2b:
+   case nir_op_i2b32:
       emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
       break;
 
@@ -1930,7 +1783,7 @@
       inst->saturate = instr->dest.saturate;
       break;
 
-   case nir_op_bcsel:
+   case nir_op_b32csel:
       enum brw_predicate predicate;
       if (!optimize_predicate(instr, &predicate)) {
          emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
@@ -2151,6 +2004,7 @@
       case nir_tex_src_offset: {
          nir_const_value *const_offset =
             nir_src_as_const_value(instr->src[i].src);
+         assert(nir_src_bit_size(instr->src[i].src) == 32);
          if (!const_offset ||
              !brw_texture_offset(const_offset->i32,
                                  nir_tex_instr_src_size(instr, i),
@@ -2162,20 +2016,6 @@
       }
 
       case nir_tex_src_texture_offset: {
-         /* The highest texture which may be used by this operation is
-          * the last element of the array. Mark it here, because the generator
-          * doesn't have enough information to determine the bound.
-          */
-         uint32_t array_size = instr->texture_array_size;
-         uint32_t max_used = texture + array_size - 1;
-         if (instr->op == nir_texop_tg4) {
-            max_used += prog_data->base.binding_table.gather_texture_start;
-         } else {
-            max_used += prog_data->base.binding_table.texture_start;
-         }
-
-         brw_mark_surface_used(&prog_data->base, max_used);
-
          /* Emit code to evaluate the actual indexing expression */
          src_reg src = get_nir_src(instr->src[i].src, 1);
          src_reg temp(this, glsl_type::uint_type);
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_reg_allocate.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_reg_allocate.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_reg_allocate.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_reg_allocate.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -502,18 +502,18 @@
 }
 
 void
-vec4_visitor::spill_reg(int spill_reg_nr)
+vec4_visitor::spill_reg(unsigned spill_reg_nr)
 {
    assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2);
-   unsigned int spill_offset = last_scratch;
+   unsigned spill_offset = last_scratch;
    last_scratch += alloc.sizes[spill_reg_nr];
 
    /* Generate spill/unspill instructions for the objects being spilled. */
-   int scratch_reg = -1;
+   unsigned scratch_reg = ~0u;
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      for (unsigned int i = 0; i < 3; i++) {
+      for (unsigned i = 0; i < 3; i++) {
          if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
-            if (scratch_reg == -1 ||
+            if (scratch_reg == ~0u ||
                 !can_use_scratch_for_source(inst, i, scratch_reg)) {
                /* We need to unspill anyway so make sure we read the full vec4
                 * in any case. This way, the cached register can be reused
@@ -529,7 +529,7 @@
                                  dst_reg(temp), inst->src[i], spill_offset);
                temp.offset = inst->src[i].offset;
             }
-            assert(scratch_reg != -1);
+            assert(scratch_reg != ~0u);
             inst->src[i].nr = scratch_reg;
          }
       }
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_tcs.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_tcs.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_tcs.cpp	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_tcs.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -260,10 +260,8 @@
       src_reg indirect_offset = get_indirect_offset(instr);
       unsigned imm_offset = instr->const_index[0];
 
-      nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
-      src_reg vertex_index =
-         vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
-                      : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
+      src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]),
+                                    BRW_REGISTER_TYPE_UD);
 
       unsigned first_component = nir_intrinsic_component(instr);
       if (nir_dest_bit_size(instr->dest) == 64) {
@@ -380,7 +378,7 @@
                 void *mem_ctx,
                 const struct brw_tcs_prog_key *key,
                 struct brw_tcs_prog_data *prog_data,
-                const nir_shader *src_shader,
+                nir_shader *nir,
                 int shader_time_index,
                 char **error_str)
 {
@@ -389,7 +387,6 @@
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
    const unsigned *assembly;
 
-   nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
    nir->info.outputs_written = key->outputs_written;
    nir->info.patch_outputs_written = key->patch_outputs_written;
 
diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_visitor.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_visitor.cpp
--- mesa-18.3.3/src/intel/compiler/brw_vec4_visitor.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/brw_vec4_visitor.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -1201,12 +1201,14 @@
       if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
          current_annotation = "Clipping flags";
          dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
-         dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
 
          emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
          emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
+      }
 
+      if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) {
+         dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
          emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
          emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
          emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
@@ -1335,8 +1337,8 @@
    }
 }
 
-static int
-align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen)
+static unsigned
+align_interleaved_urb_mlen(const struct gen_device_info *devinfo, unsigned mlen)
 {
    if (devinfo->gen >= 6) {
       /* URB data written (does not include the message header reg) must
@@ -1746,8 +1748,6 @@
       src = byte_offset(src, 16);
    }
 
-   brw_mark_surface_used(&prog_data->base, index);
-
    if (is_64bit) {
       temp = retype(temp, BRW_REGISTER_TYPE_DF);
       shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
diff -Nru mesa-18.3.3/src/intel/compiler/gen6_gs_visitor.cpp mesa-19.0.1/src/intel/compiler/gen6_gs_visitor.cpp
--- mesa-18.3.3/src/intel/compiler/gen6_gs_visitor.cpp	2018-07-14 15:13:00.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/gen6_gs_visitor.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -274,8 +274,8 @@
    emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
 }
 
-static int
-align_interleaved_urb_mlen(int mlen)
+static unsigned
+align_interleaved_urb_mlen(unsigned mlen)
 {
    /* URB data written (does not include the message header reg) must
     * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
diff -Nru mesa-18.3.3/src/intel/compiler/meson.build mesa-19.0.1/src/intel/compiler/meson.build
--- mesa-18.3.3/src/intel/compiler/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -55,8 +55,8 @@
   'brw_fs.h',
   'brw_fs_live_variables.cpp',
   'brw_fs_live_variables.h',
-  'brw_fs_lower_conversions.cpp',
   'brw_fs_lower_pack.cpp',
+  'brw_fs_lower_regioning.cpp',
   'brw_fs_nir.cpp',
   'brw_fs_reg_allocate.cpp',
   'brw_fs_register_coalesce.cpp',
@@ -78,6 +78,7 @@
   'brw_nir_attribute_workarounds.c',
   'brw_nir_lower_cs_intrinsics.c',
   'brw_nir_lower_image_load_store.c',
+  'brw_nir_lower_mem_access_bit_sizes.c',
   'brw_nir_opt_peephole_ffma.c',
   'brw_nir_tcs_workarounds.c',
   'brw_packed_float.c',
@@ -157,7 +158,8 @@
           libintel_compiler, libintel_common, libintel_dev, libmesa_util, libisl,
         ],
         dependencies : [dep_thread, dep_dl, idep_gtest, idep_nir],
-      )
+      ),
+      suite : ['intel'],
     )
   endforeach
 endif
diff -Nru mesa-18.3.3/src/intel/compiler/test_fs_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/test_fs_cmod_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/test_fs_cmod_propagation.cpp	2018-03-26 16:53:06.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/test_fs_cmod_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -889,3 +889,35 @@
    EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode);
    EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
 }
+
+TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::int_type);
+   src0.type = BRW_REGISTER_TYPE_W;
+
+   bld.ASR(dest0, negate(src0), brw_imm_d(15));
+   bld.CMP(bld.null_reg_ud(), retype(dest0, BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(0u), BRW_CONDITIONAL_LE);
+
+   /* = Before =
+    * 0: asr(8)          dest:D   -src0:W 15D
+    * 1: cmp.le.f0(8)    null:UD  dest:UD 0UD
+    *
+    * = After =
+    * (no changes)
+    */
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ASR, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 1)->conditional_mod);
+}
diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/test_vec4_cmod_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/test_vec4_cmod_propagation.cpp	2018-07-14 15:13:03.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/test_vec4_cmod_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -57,7 +57,7 @@
 
 protected:
    /* Dummy implementation for pure virtual methods */
-   virtual dst_reg *make_reg_for_system_value(int location)
+   virtual dst_reg *make_reg_for_system_value(int /* location */)
    {
       unreachable("Not reached");
    }
@@ -82,12 +82,12 @@
       unreachable("Not reached");
    }
 
-   virtual void emit_urb_write_header(int mrf)
+   virtual void emit_urb_write_header(int /* mrf */)
    {
       unreachable("Not reached");
    }
 
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
    {
       unreachable("Not reached");
    }
diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_copy_propagation.cpp mesa-19.0.1/src/intel/compiler/test_vec4_copy_propagation.cpp
--- mesa-18.3.3/src/intel/compiler/test_vec4_copy_propagation.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/test_vec4_copy_propagation.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -54,7 +54,7 @@
    }
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(int location)
+   virtual dst_reg *make_reg_for_system_value(int /* location */)
    {
       unreachable("Not reached");
    }
@@ -74,12 +74,12 @@
       unreachable("Not reached");
    }
 
-   virtual void emit_urb_write_header(int mrf)
+   virtual void emit_urb_write_header(int /* mrf */)
    {
       unreachable("Not reached");
    }
 
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
    {
       unreachable("Not reached");
    }
diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_register_coalesce.cpp mesa-19.0.1/src/intel/compiler/test_vec4_register_coalesce.cpp
--- mesa-18.3.3/src/intel/compiler/test_vec4_register_coalesce.cpp	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/compiler/test_vec4_register_coalesce.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -57,7 +57,7 @@
    }
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(int location)
+   virtual dst_reg *make_reg_for_system_value(int /* location */)
    {
       unreachable("Not reached");
    }
@@ -77,12 +77,12 @@
       unreachable("Not reached");
    }
 
-   virtual void emit_urb_write_header(int mrf)
+   virtual void emit_urb_write_header(int /* mrf */)
    {
       unreachable("Not reached");
    }
 
-   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */)
    {
       unreachable("Not reached");
    }
diff -Nru mesa-18.3.3/src/intel/dev/gen_device_info.c mesa-19.0.1/src/intel/dev/gen_device_info.c
--- mesa-18.3.3/src/intel/dev/gen_device_info.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/dev/gen_device_info.c	2019-03-31 23:16:37.000000000 +0000
@@ -777,6 +777,7 @@
    .num_subslices = { 2, },
    .num_eu_per_subslice = 6,
    .l3_banks = 2,
+   .urb.size = 192,
    .simulator_id = 24,
 };
 static const struct gen_device_info gen_device_info_cfl_gt2 = {
diff -Nru mesa-18.3.3/src/intel/genxml/gen10.xml mesa-19.0.1/src/intel/genxml/gen10.xml
--- mesa-18.3.3/src/intel/genxml/gen10.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen10.xml	2019-03-31 23:16:37.000000000 +0000
@@ -219,14 +219,9 @@
     <field name="Binding Table Index Offset" start="0" end="3" type="uint"/>
   </struct>
 
-  <struct name="MEMORY_OBJECT_CONTROL_STATE" length="1">
-    <field name="Index to MOCS Tables" start="1" end="6" type="uint"/>
-  </struct>
-
   <struct name="VERTEX_BUFFER_STATE" length="4">
     <field name="Vertex Buffer Index" start="26" end="31" type="uint"/>
-    <field name="Memory Object Control State" start="16" end="22" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="22" type="uint"/>
+    <field name="MOCS" start="16" end="22" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Buffer Pitch" start="0" end="11" type="uint"/>
@@ -495,7 +490,6 @@
     <field name="Cube Face Enable - Negative Y" start="3" end="3" type="bool"/>
     <field name="Cube Face Enable - Positive X" start="4" end="4" type="bool"/>
     <field name="Cube Face Enable - Negative X" start="5" end="5" type="bool"/>
-    <field name="Memory Object Control State" start="56" end="62" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="56" end="62" type="uint"/>
     <field name="Base Mip Level" start="51" end="55" type="u4.1"/>
     <field name="Surface QPitch" start="32" end="46" type="uint"/>
@@ -813,7 +807,7 @@
     </field>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -839,7 +833,7 @@
     <field name="Extended Parameter 2" start="288" end="319" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -855,7 +849,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -872,7 +866,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -889,7 +883,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -906,7 +900,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -923,7 +917,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -940,7 +934,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -949,7 +943,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -958,7 +952,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -967,7 +961,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -976,7 +970,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -985,7 +979,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -993,13 +987,13 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Binding Table Pool Base Address" start="44" end="95" type="address"/>
     <field name="Binding Table Pool Enable" start="43" end="43" type="uint"/>
-    <field name="Surface Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Binding Table Pool Buffer Size" start="108" end="127" type="uint">
       <value name="No Valid Data" value="0"/>
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1009,7 +1003,7 @@
     <field name="Blend State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1019,7 +1013,7 @@
     <field name="Color Calc State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1030,7 +1024,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1040,7 +1034,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1080,58 +1074,58 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="26"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="22"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="25"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="23"/>
     <field name="Disable Gather at Set Shader Hint" start="15" end="15" type="uint"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="21"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1157,8 +1151,7 @@
     <field name="LOD" start="128" end="131" type="uint"/>
     <field name="Depth" start="181" end="191" type="uint"/>
     <field name="Minimum Array Element" start="170" end="180" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="160" end="166" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="160" end="166" type="uint"/>
+    <field name="MOCS" start="160" end="166" type="uint"/>
     <field name="Tiled Resource Mode" start="222" end="223" type="uint">
       <value name="NONE" value="0"/>
       <value name="TILEYF" value="1"/>
@@ -1169,7 +1162,7 @@
     <field name="Surface QPitch" start="224" end="238" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1188,7 +1181,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="11">
+  <instruction name="3DSTATE_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1236,7 +1229,7 @@
     <field name="DUAL_PATCH Kernel Start Pointer" start="294" end="351" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1260,7 +1253,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1284,7 +1277,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1308,7 +1301,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1334,7 +1327,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1360,7 +1353,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1368,11 +1361,11 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Gather Pool Base Address" start="44" end="95" type="address"/>
     <field name="Gather Pool Enable" start="43" end="43" type="bool"/>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Gather Pool Buffer Size" start="108" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="10">
+  <instruction name="3DSTATE_GS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1441,20 +1434,19 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="288" end="295" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="63" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="63" type="uint"/>
+    <field name="MOCS" start="57" end="63" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="9">
+  <instruction name="3DSTATE_HS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1500,7 +1492,7 @@
     <field name="Include Primitive ID" start="224" end="224" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1511,13 +1503,12 @@
       <value name="WORD" value="1"/>
       <value name="DWORD" value="2"/>
     </field>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="32" end="38" type="uint"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Buffer Starting Address" start="64" end="127" type="address"/>
     <field name="Buffer Size" start="128" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1531,7 +1522,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1541,7 +1532,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1555,7 +1546,7 @@
     <field name="Number of Multisamples" start="33" end="35" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1565,7 +1556,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1576,7 +1567,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="12">
+  <instruction name="3DSTATE_PS" bias="2" length="12" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1639,7 +1630,7 @@
     <field name="Kernel Start Pointer 2" start="326" end="383" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2">
+  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1656,7 +1647,7 @@
     <field name="Independent Alpha Blend Enable" start="39" end="39" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2">
+  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1694,7 +1685,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1704,7 +1695,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1714,7 +1705,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1724,7 +1715,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1734,7 +1725,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1744,7 +1735,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_RASTER" bias="2" length="5">
+  <instruction name="3DSTATE_RASTER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1805,7 +1796,7 @@
     <field name="Global Depth Offset Clamp" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4">
+  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1823,7 +1814,7 @@
     <field name="Global Constant Buffer Address High" start="96" end="127" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1834,7 +1825,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1848,7 +1839,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1857,7 +1848,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1866,7 +1857,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1875,7 +1866,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1884,7 +1875,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1893,7 +1884,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1902,7 +1893,7 @@
     <field name="Sample Mask" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9">
+  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1972,7 +1963,7 @@
     <field name="2x Sample0 Y Offset" start="256" end="259" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="6">
+  <instruction name="3DSTATE_SBE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2005,7 +1996,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11">
+  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2019,7 +2010,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2028,7 +2019,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="4">
+  <instruction name="3DSTATE_SF" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2052,7 +2043,10 @@
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -2060,7 +2054,7 @@
     <field name="Point Width" start="96" end="106" type="u8.3"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2068,8 +2062,7 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
     <field name="SO Buffer Enable" start="63" end="63" type="bool"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Stream Offset Write Enable" start="53" end="53" type="bool"/>
     <field name="Stream Output Buffer Offset Address Enable" start="52" end="52" type="bool"/>
     <field name="Surface Base Address" start="66" end="111" type="address"/>
@@ -2078,7 +2071,7 @@
     <field name="Stream Offset" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2097,21 +2090,20 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Stencil Buffer Enable" start="63" end="63" type="bool"/>
-    <field name="Stencil Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2144,7 +2136,7 @@
     <field name="Buffer 2 Surface Pitch" start="128" end="139" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2174,7 +2166,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2">
+  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2184,7 +2176,7 @@
     <field name="URB Address" start="32" end="46" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2195,7 +2187,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2206,7 +2198,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2217,7 +2209,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2228,7 +2220,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2239,7 +2231,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2250,7 +2242,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF" bias="2" length="2">
+  <instruction name="3DSTATE_VF" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2263,7 +2255,7 @@
     <field name="Cut Index" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5">
+  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2303,7 +2295,7 @@
     <field name="Vertex Element 24 Enables" start="128" end="131" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3">
+  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2314,7 +2306,7 @@
     <field name="Instance Data Step Rate" start="64" end="95" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2">
+  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2338,7 +2330,7 @@
     <field name="VertexID Element Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS_2" bias="2" length="3">
+  <instruction name="3DSTATE_VF_SGVS_2" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2378,7 +2370,7 @@
     <field name="XP2 Element Offset" start="64" end="69" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2386,7 +2378,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2">
+  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2395,7 +2387,7 @@
     <field name="Primitive Topology Type" start="32" end="37" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2404,7 +2396,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2413,7 +2405,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="9">
+  <instruction name="3DSTATE_VS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2457,7 +2449,7 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="256" end="263" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="2">
+  <instruction name="3DSTATE_WM" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2514,7 +2506,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2">
+  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2523,7 +2515,7 @@
     <field name="ChromaKey Kill Enable" start="63" end="63" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4">
+  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2551,7 +2543,7 @@
     <field name="Backface Stencil Reference Value" start="96" end="103" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5">
+  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2573,7 +2565,7 @@
     <field name="Sample Mask" start="128" end="143" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="15">
+  <instruction name="GPGPU_WALKER" bias="2" length="15" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2602,7 +2594,7 @@
     <field name="Bottom Execution Mask" start="448" end="479" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2612,7 +2604,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2622,7 +2614,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2662,7 +2654,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_GRPID" bias="2">
+  <instruction name="MEDIA_OBJECT_GRPID" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2686,7 +2678,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2704,7 +2696,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2758,7 +2750,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2769,7 +2761,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="9">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2892,7 +2884,7 @@
     <field name="Batch Buffer Start Address" start="34" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -2928,7 +2920,7 @@
     <field name="Source Memory Address" start="98" end="159" type="address"/>
   </instruction>
 
-  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3">
+  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="20"/>
     <field name="Async Flip Indicator" start="22" end="22" type="bool"/>
@@ -3000,7 +2992,7 @@
     <field name="Destination Register Address" start="66" end="86" type="offset"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -3013,7 +3005,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="18"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -3058,6 +3050,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -3068,7 +3062,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="2"/>
@@ -3078,7 +3072,7 @@
     <field name="Report ID" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MI_RS_CONTEXT" bias="1" length="1">
+  <instruction name="MI_RS_CONTEXT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="15"/>
     <field name="Resource Streamer Save" start="0" end="0" type="uint" prefix="RS">
@@ -3087,7 +3081,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_CONTROL" bias="1" length="1">
+  <instruction name="MI_RS_CONTROL" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="6"/>
     <field name="Resource Streamer Control" start="0" end="0" type="uint" prefix="RS">
@@ -3096,7 +3090,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4">
+  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="43"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -3208,7 +3202,7 @@
     <field name="Suspend Flush" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1">
+  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="13"/>
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
@@ -3229,7 +3223,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Plane 1 C Vertical Blank Wait Enable" start="21" end="21" type="bool"/>
@@ -3252,7 +3246,7 @@
     <field name="Display Plnae 1 A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3267,7 +3261,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="6">
+  <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -3311,27 +3305,27 @@
     <field name="Immediate Data" start="128" end="191" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="22">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="22" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="20"/>
     <field name="General State Base Address" start="44" end="95" type="address"/>
-    <field name="General State Memory Object Control State" start="36" end="42" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="36" end="42" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="112" end="118" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Stateless Data Port Access MOCS" start="112" end="118" type="uint"/>
     <field name="Surface State Base Address" start="140" end="191" type="address"/>
-    <field name="Surface State Memory Object Control State" start="132" end="138" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="132" end="138" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Dynamic State Base Address" start="204" end="255" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="196" end="202" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="196" end="202" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="192" end="192" type="bool"/>
     <field name="Indirect Object Base Address" start="268" end="319" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="260" end="266" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="260" end="266" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="256" end="256" type="bool"/>
     <field name="Instruction Base Address" start="332" end="383" type="address"/>
-    <field name="Instruction Memory Object Control State" start="324" end="330" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="324" end="330" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="320" end="320" type="bool"/>
     <field name="General State Buffer Size" start="396" end="415" type="uint"/>
     <field name="General State Buffer Size Modify Enable" start="384" end="384" type="bool"/>
@@ -3342,16 +3336,16 @@
     <field name="Instruction Buffer Size" start="492" end="511" type="uint"/>
     <field name="Instruction Buffer size Modify Enable" start="480" end="480" type="bool"/>
     <field name="Bindless Surface State Base Address" start="524" end="575" type="address"/>
-    <field name="Bindless Surface State Memory Object Control State" start="516" end="522" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Bindless Surface State MOCS" start="516" end="522" type="uint"/>
     <field name="Bindless Surface State Base Address Modify Enable" start="512" end="512" type="bool"/>
     <field name="Bindless Surface State Size" start="588" end="607" type="uint"/>
     <field name="Bindless Sampler State Base Address" start="620" end="671" type="address"/>
-    <field name="Bindless Sampler State Memory Object Control State" start="612" end="618" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Bindless Sampler State MOCS" start="612" end="618" type="uint"/>
     <field name="Bindless Sampler State Base Address Modify Enable" start="608" end="608" type="bool"/>
     <field name="Bindless Sampler State Buffer Size" start="684" end="703" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="3">
+  <instruction name="STATE_SIP" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3553,6 +3547,46 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/genxml/gen11.xml mesa-19.0.1/src/intel/genxml/gen11.xml
--- mesa-18.3.3/src/intel/genxml/gen11.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen11.xml	2019-03-31 23:16:37.000000000 +0000
@@ -220,14 +220,9 @@
     <field name="Binding Table Index Offset" start="0" end="3" type="uint"/>
   </struct>
 
-  <struct name="MEMORY_OBJECT_CONTROL_STATE" length="1">
-    <field name="Index to MOCS Tables" start="1" end="6" type="uint"/>
-  </struct>
-
   <struct name="VERTEX_BUFFER_STATE" length="4">
     <field name="Vertex Buffer Index" start="26" end="31" type="uint"/>
-    <field name="Memory Object Control State" start="16" end="22" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="22" type="uint"/>
+    <field name="MOCS" start="16" end="22" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Buffer Pitch" start="0" end="11" type="uint"/>
@@ -496,7 +491,6 @@
     <field name="Cube Face Enable - Negative Y" start="3" end="3" type="bool"/>
     <field name="Cube Face Enable - Positive X" start="4" end="4" type="bool"/>
     <field name="Cube Face Enable - Negative X" start="5" end="5" type="bool"/>
-    <field name="Memory Object Control State" start="56" end="62" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="56" end="62" type="uint"/>
     <field name="Base Mip Level" start="51" end="55" type="u4.1"/>
     <field name="Surface QPitch" start="32" end="46" type="uint"/>
@@ -823,7 +817,7 @@
     </field>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -849,7 +843,7 @@
     <field name="Extended Parameter 2" start="288" end="319" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_3D_MODE" bias="2" length="2">
+  <instruction name="3DSTATE_3D_MODE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -858,7 +852,7 @@
     <field name="Mask Bits" start="48" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -874,7 +868,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -891,7 +885,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -908,7 +902,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -925,7 +919,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -942,7 +936,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -959,7 +953,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -968,7 +962,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -977,7 +971,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -986,7 +980,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -995,7 +989,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1004,7 +998,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1012,13 +1006,13 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Binding Table Pool Base Address" start="44" end="95" type="address"/>
     <field name="Binding Table Pool Enable" start="43" end="43" type="uint"/>
-    <field name="Surface Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Binding Table Pool Buffer Size" start="108" end="127" type="uint">
       <value name="No Valid Data" value="0"/>
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1028,7 +1022,7 @@
     <field name="Blend State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1038,7 +1032,7 @@
     <field name="Color Calc State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1049,7 +1043,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1059,7 +1053,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1099,58 +1093,58 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="26"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="22"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="25"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="23"/>
     <field name="Disable Gather at Set Shader Hint" start="15" end="15" type="uint"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="21"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1176,8 +1170,7 @@
     <field name="LOD" start="128" end="131" type="uint"/>
     <field name="Depth" start="181" end="191" type="uint"/>
     <field name="Minimum Array Element" start="170" end="180" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="160" end="166" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="160" end="166" type="uint"/>
+    <field name="MOCS" start="160" end="166" type="uint"/>
     <field name="Tiled Resource Mode" start="222" end="223" type="uint">
       <value name="NONE" value="0"/>
       <value name="TILEYF" value="1"/>
@@ -1188,7 +1181,7 @@
     <field name="Surface QPitch" start="224" end="238" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1207,7 +1200,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="11">
+  <instruction name="3DSTATE_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1254,7 +1247,7 @@
     <field name="DUAL_PATCH Kernel Start Pointer" start="294" end="351" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1278,7 +1271,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1302,7 +1295,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1326,7 +1319,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1352,7 +1345,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1378,7 +1371,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1386,11 +1379,11 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Gather Pool Base Address" start="44" end="95" type="address"/>
     <field name="Gather Pool Enable" start="43" end="43" type="bool"/>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Gather Pool Buffer Size" start="108" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="10">
+  <instruction name="3DSTATE_GS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1457,14 +1450,13 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="288" end="295" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="63" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="63" type="uint"/>
+    <field name="MOCS" start="57" end="63" type="uint"/>
     <field name="Tiled Resource Mode" start="55" end="56" type="uint">
       <value name="NONE" value="0"/>
       <value name="TILEYF" value="1"/>
@@ -1475,7 +1467,7 @@
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="9">
+  <instruction name="3DSTATE_HS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1520,7 +1512,7 @@
     <field name="Include Primitive ID" start="224" end="224" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1531,13 +1523,12 @@
       <value name="WORD" value="1"/>
       <value name="DWORD" value="2"/>
     </field>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="32" end="38" type="uint"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Buffer Starting Address" start="64" end="127" type="address"/>
     <field name="Buffer Size" start="128" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1551,7 +1542,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1561,7 +1552,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1575,7 +1566,7 @@
     <field name="Number of Multisamples" start="33" end="35" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1585,7 +1576,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1596,7 +1587,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="12">
+  <instruction name="3DSTATE_PS" bias="2" length="12" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1659,7 +1650,7 @@
     <field name="Kernel Start Pointer 2" start="326" end="383" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2">
+  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1676,7 +1667,7 @@
     <field name="Independent Alpha Blend Enable" start="39" end="39" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2">
+  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1714,7 +1705,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1724,7 +1715,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1734,7 +1725,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1744,7 +1735,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1754,7 +1745,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1764,7 +1755,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_RASTER" bias="2" length="5">
+  <instruction name="3DSTATE_RASTER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1825,7 +1816,7 @@
     <field name="Global Depth Offset Clamp" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4">
+  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1843,7 +1834,7 @@
     <field name="Global Constant Buffer Address High" start="96" end="127" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1854,7 +1845,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1868,7 +1859,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1877,7 +1868,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1886,7 +1877,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1895,7 +1886,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1904,7 +1895,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1913,7 +1904,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1922,7 +1913,7 @@
     <field name="Sample Mask" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9">
+  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1992,7 +1983,7 @@
     <field name="2x Sample0 Y Offset" start="256" end="259" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="6">
+  <instruction name="3DSTATE_SBE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2025,7 +2016,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11">
+  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2039,7 +2030,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2048,7 +2039,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="4">
+  <instruction name="3DSTATE_SF" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2072,7 +2063,10 @@
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -2080,7 +2074,7 @@
     <field name="Point Width" start="96" end="106" type="u8.3"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2088,8 +2082,7 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
     <field name="SO Buffer Enable" start="63" end="63" type="bool"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Stream Offset Write Enable" start="53" end="53" type="bool"/>
     <field name="Stream Output Buffer Offset Address Enable" start="52" end="52" type="bool"/>
     <field name="Surface Base Address" start="66" end="111" type="address"/>
@@ -2098,7 +2091,7 @@
     <field name="Stream Offset" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2117,21 +2110,20 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Stencil Buffer Enable" start="63" end="63" type="bool"/>
-    <field name="Stencil Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2164,7 +2156,7 @@
     <field name="Buffer 2 Surface Pitch" start="128" end="139" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2194,7 +2186,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2">
+  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2204,7 +2196,7 @@
     <field name="URB Address" start="32" end="46" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2215,7 +2207,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2226,7 +2218,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2237,7 +2229,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2248,7 +2240,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2259,7 +2251,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2270,7 +2262,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF" bias="2" length="2">
+  <instruction name="3DSTATE_VF" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2283,7 +2275,7 @@
     <field name="Cut Index" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5">
+  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2323,7 +2315,7 @@
     <field name="Vertex Element 24 Enables" start="128" end="131" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3">
+  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2334,7 +2326,7 @@
     <field name="Instance Data Step Rate" start="64" end="95" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2">
+  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2358,7 +2350,7 @@
     <field name="VertexID Element Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS_2" bias="2" length="3">
+  <instruction name="3DSTATE_VF_SGVS_2" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2398,7 +2390,7 @@
     <field name="XP2 Element Offset" start="64" end="69" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2406,7 +2398,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2">
+  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2415,7 +2407,7 @@
     <field name="Primitive Topology Type" start="32" end="37" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2424,7 +2416,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2433,7 +2425,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="9">
+  <instruction name="3DSTATE_VS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2476,7 +2468,7 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="256" end="263" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="2">
+  <instruction name="3DSTATE_WM" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2533,7 +2525,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2">
+  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2542,7 +2534,7 @@
     <field name="ChromaKey Kill Enable" start="63" end="63" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4">
+  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2570,7 +2562,7 @@
     <field name="Backface Stencil Reference Value" start="96" end="103" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5">
+  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2592,7 +2584,7 @@
     <field name="Sample Mask" start="128" end="143" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="15">
+  <instruction name="GPGPU_WALKER" bias="2" length="15" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2621,7 +2613,7 @@
     <field name="Bottom Execution Mask" start="448" end="479" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2631,7 +2623,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2641,7 +2633,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2676,7 +2668,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_GRPID" bias="2">
+  <instruction name="MEDIA_OBJECT_GRPID" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2695,7 +2687,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2713,7 +2705,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2762,7 +2754,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2772,7 +2764,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="9">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2857,7 +2849,7 @@
     <field name="Batch Buffer Start Address" start="34" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -2893,7 +2885,7 @@
     <field name="Source Memory Address" start="98" end="159" type="address"/>
   </instruction>
 
-  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3">
+  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="20"/>
     <field name="Async Flip Indicator" start="22" end="22" type="bool"/>
@@ -2993,7 +2985,7 @@
     <field name="Destination Register Address" start="66" end="86" type="offset"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -3007,7 +2999,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="18"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -3053,6 +3045,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -3063,7 +3057,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="2"/>
@@ -3073,7 +3067,7 @@
     <field name="Report ID" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MI_RS_CONTEXT" bias="1" length="1">
+  <instruction name="MI_RS_CONTEXT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="15"/>
     <field name="Resource Streamer Save" start="0" end="0" type="uint" prefix="RS">
@@ -3082,7 +3076,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_CONTROL" bias="1" length="1">
+  <instruction name="MI_RS_CONTROL" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="6"/>
     <field name="Resource Streamer Control" start="0" end="0" type="uint" prefix="RS">
@@ -3091,7 +3085,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4">
+  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="43"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -3149,7 +3143,7 @@
     <field name="Semaphore Address" start="66" end="127" type="address"/>
   </instruction>
 
-  <instruction name="MI_SET_CONTEXT" bias="2" length="2">
+  <instruction name="MI_SET_CONTEXT" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -3213,7 +3207,7 @@
     <field name="Suspend Flush" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1">
+  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="13"/>
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
@@ -3224,7 +3218,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Plane 1 C Vertical Blank Wait Enable" start="21" end="21" type="bool"/>
@@ -3247,7 +3241,7 @@
     <field name="Display Plnae 1 A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT_2" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT_2" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Display Pipe Scan Line Wait Enable" start="12" end="14" type="uint"/>
@@ -3255,7 +3249,7 @@
     <field name="Display Plane Flip Pending Wait Enable" start="0" end="5" type="uint"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3270,7 +3264,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="6">
+  <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -3314,27 +3308,27 @@
     <field name="Immediate Data" start="128" end="191" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="22">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="22" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="20"/>
     <field name="General State Base Address" start="44" end="95" type="address"/>
-    <field name="General State Memory Object Control State" start="36" end="42" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="36" end="42" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="112" end="118" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Stateless Data Port Access MOCS" start="112" end="118" type="uint"/>
     <field name="Surface State Base Address" start="140" end="191" type="address"/>
-    <field name="Surface State Memory Object Control State" start="132" end="138" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="132" end="138" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Dynamic State Base Address" start="204" end="255" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="196" end="202" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="196" end="202" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="192" end="192" type="bool"/>
     <field name="Indirect Object Base Address" start="268" end="319" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="260" end="266" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="260" end="266" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="256" end="256" type="bool"/>
     <field name="Instruction Base Address" start="332" end="383" type="address"/>
-    <field name="Instruction Memory Object Control State" start="324" end="330" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="324" end="330" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="320" end="320" type="bool"/>
     <field name="General State Buffer Size" start="396" end="415" type="uint"/>
     <field name="General State Buffer Size Modify Enable" start="384" end="384" type="bool"/>
@@ -3345,16 +3339,16 @@
     <field name="Instruction Buffer Size" start="492" end="511" type="uint"/>
     <field name="Instruction Buffer size Modify Enable" start="480" end="480" type="bool"/>
     <field name="Bindless Surface State Base Address" start="524" end="575" type="address"/>
-    <field name="Bindless Surface State Memory Object Control State" start="516" end="522" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Bindless Surface State MOCS" start="516" end="522" type="uint"/>
     <field name="Bindless Surface State Base Address Modify Enable" start="512" end="512" type="bool"/>
     <field name="Bindless Surface State Size" start="588" end="607" type="uint"/>
     <field name="Bindless Sampler State Base Address" start="620" end="671" type="address"/>
-    <field name="Bindless Sampler State Memory Object Control State" start="612" end="618" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Bindless Sampler State MOCS" start="612" end="618" type="uint"/>
     <field name="Bindless Sampler State Base Address Modify Enable" start="608" end="608" type="bool"/>
     <field name="Bindless Sampler State Buffer Size" start="684" end="703" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="3">
+  <instruction name="STATE_SIP" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3546,11 +3540,53 @@
   <register name="L3CNTLREG" length="1" num="0x7034">
     <field name="SLM Enable" start="0" end="0" type="uint"/>
     <field name="URB Allocation" start="1" end="7" type="uint"/>
+    <field name="Error Detection Behavior Control" start="9" end="9" type="bool"/>
+    <field name="Use Full Ways" start="10" end="10" type="bool"/>
     <field name="RO Allocation" start="11" end="17" type="uint"/>
     <field name="DC Allocation" start="18" end="24" type="uint"/>
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/genxml/gen45.xml mesa-19.0.1/src/intel/genxml/gen45.xml
--- mesa-18.3.3/src/intel/genxml/gen45.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen45.xml	2019-03-31 23:16:37.000000000 +0000
@@ -619,7 +619,7 @@
     <field name="Global Depth Offset Scale" start="224" end="255" type="float"/>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="6">
+  <instruction name="3DPRIMITIVE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -638,7 +638,7 @@
     <field name="Base Vertex Location" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -650,7 +650,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -663,7 +663,7 @@
     <field name="Pointer to PS Binding Table" start="165" end="191" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -675,7 +675,7 @@
     <field name="Blend Constant Color Alpha" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="6">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -725,7 +725,7 @@
     <field name="Depth Coordinate Offset X" start="160" end="175" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -739,7 +739,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2">
+  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -748,7 +748,7 @@
     <field name="Global Depth Offset Clamp" start="32" end="63" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -764,7 +764,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -778,7 +778,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7">
+  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -794,7 +794,7 @@
     <field name="Pointer to Color Calc State" start="197" end="223" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -804,7 +804,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -815,7 +815,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -826,7 +826,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -837,7 +837,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -845,7 +845,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="CS_URB_STATE" bias="2" length="2">
+  <instruction name="CS_URB_STATE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -855,7 +855,7 @@
     <field name="Number of URB Entries" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="CONSTANT_BUFFER" bias="2" length="2">
+  <instruction name="CONSTANT_BUFFER" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -866,7 +866,7 @@
     <field name="Buffer Length" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Global Snapshot Count Reset" start="3" end="3" type="uint">
@@ -918,7 +918,7 @@
     <field name="Memory Address" start="66" end="95" type="address"/>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="4">
+  <instruction name="PIPE_CONTROL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -944,7 +944,7 @@
     <field name="Immediate Data" start="64" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -955,7 +955,7 @@
     </field>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="6">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -973,7 +973,7 @@
     <field name="Indirect Object Access Upper Bound Modify Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -982,7 +982,7 @@
     <field name="System Instruction Pointer" start="36" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="URB_FENCE" bias="2" length="3">
+  <instruction name="URB_FENCE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1002,7 +1002,7 @@
     <field name="SF Fence" start="64" end="73" type="uint"/>
   </instruction>
 
-  <instruction name="XY_COLOR_BLT" bias="2" length="6">
+  <instruction name="XY_COLOR_BLT" bias="2" length="6" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="80"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="4"/>
@@ -1025,7 +1025,7 @@
     <field name="Solid Pattern Color" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="XY_SETUP_BLT" bias="2" length="8">
+  <instruction name="XY_SETUP_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1051,7 +1051,7 @@
     <field name="Pattern Base Address" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8">
+  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="83"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1078,7 +1078,7 @@
     <field name="Source Base Address" start="224" end="255" type="address"/>
   </instruction>
 
-  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3">
+  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="49"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
diff -Nru mesa-18.3.3/src/intel/genxml/gen4.xml mesa-19.0.1/src/intel/genxml/gen4.xml
--- mesa-18.3.3/src/intel/genxml/gen4.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen4.xml	2019-03-31 23:16:37.000000000 +0000
@@ -602,7 +602,7 @@
     <field name="Global Depth Offset Scale" start="224" end="255" type="float"/>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="6">
+  <instruction name="3DPRIMITIVE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -622,7 +622,7 @@
     <field name="Base Vertex Location" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -635,7 +635,7 @@
     <field name="Pointer to PS Binding Table" start="165" end="191" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -647,7 +647,7 @@
     <field name="Blend Constant Color Alpha" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -695,7 +695,7 @@
     <field name="Render Target View Extent" start="129" end="137" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -709,7 +709,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2">
+  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -718,7 +718,7 @@
     <field name="Global Depth Offset Clamp" start="32" end="63" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -734,7 +734,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -748,7 +748,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7">
+  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -764,7 +764,7 @@
     <field name="Pointer to Color Calc State" start="197" end="223" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -774,7 +774,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -785,7 +785,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -796,7 +796,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -807,7 +807,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -815,7 +815,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="CS_URB_STATE" bias="2" length="2">
+  <instruction name="CS_URB_STATE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -825,7 +825,7 @@
     <field name="Number of URB Entries" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="CONSTANT_BUFFER" bias="2" length="2">
+  <instruction name="CONSTANT_BUFFER" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -836,7 +836,7 @@
     <field name="Buffer Length" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Global Snapshot Count Reset" start="3" end="3" type="uint">
@@ -888,7 +888,7 @@
     <field name="Memory Address" start="66" end="95" type="address"/>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="4">
+  <instruction name="PIPE_CONTROL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -911,7 +911,7 @@
     <field name="Immediate Data" start="64" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -922,7 +922,7 @@
     </field>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="6">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -940,7 +940,7 @@
     <field name="Instruction Access Upper Bound Modify Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -969,7 +969,7 @@
     <field name="SF Fence" start="64" end="73" type="uint"/>
   </instruction>
 
-  <instruction name="XY_COLOR_BLT" bias="2" length="6">
+  <instruction name="XY_COLOR_BLT" bias="2" length="6" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="80"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="4"/>
@@ -992,7 +992,7 @@
     <field name="Solid Pattern Color" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="XY_SETUP_BLT" bias="2" length="8">
+  <instruction name="XY_SETUP_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1018,7 +1018,7 @@
     <field name="Pattern Base Address" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8">
+  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="83"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1045,7 +1045,7 @@
     <field name="Source Base Address" start="224" end="255" type="address"/>
   </instruction>
 
-  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3">
+  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="49"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
diff -Nru mesa-18.3.3/src/intel/genxml/gen5.xml mesa-19.0.1/src/intel/genxml/gen5.xml
--- mesa-18.3.3/src/intel/genxml/gen5.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen5.xml	2019-03-31 23:16:37.000000000 +0000
@@ -441,7 +441,7 @@
     <field name="Y Offset" start="180" end="183" type="uint"/>
   </struct>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="6">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="6" engine="render">
     <!-- The hardware docs incorrectly say length is 7. The actual length is 6. -->
     <!-- DWord 0 -->
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
@@ -720,7 +720,7 @@
     <field name="GRF Register Count 3" start="321" end="323" type="uint"/>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="6">
+  <instruction name="3DPRIMITIVE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -739,7 +739,7 @@
     <field name="Base Vertex Location" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -751,7 +751,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -764,7 +764,7 @@
     <field name="Pointer to PS Binding Table" start="165" end="191" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="2">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -774,7 +774,7 @@
     <field name="Depth Clear Value" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_COLOR" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -786,7 +786,7 @@
     <field name="Blend Constant Color Alpha" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -800,7 +800,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2">
+  <instruction name="3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -809,7 +809,7 @@
     <field name="Global Depth Offset Clamp" start="32" end="63" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -819,7 +819,7 @@
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -835,7 +835,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -849,7 +849,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -859,7 +859,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -870,7 +870,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7">
+  <instruction name="3DSTATE_PIPELINED_POINTERS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -886,7 +886,7 @@
     <field name="Pointer to Color Calc State" start="197" end="223" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -896,7 +896,7 @@
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -907,7 +907,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -918,7 +918,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -926,7 +926,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="CS_URB_STATE" bias="2" length="2">
+  <instruction name="CS_URB_STATE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -936,7 +936,7 @@
     <field name="Number of URB Entries" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="CONSTANT_BUFFER" bias="2" length="2">
+  <instruction name="CONSTANT_BUFFER" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -947,7 +947,7 @@
     <field name="Buffer Length" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Protected Memory Enable" start="6" end="6" type="bool"/>
@@ -1000,7 +1000,7 @@
     <field name="Memory Address" start="66" end="95" type="address"/>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="4">
+  <instruction name="PIPE_CONTROL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -1031,7 +1031,7 @@
     <field name="Immediate Data" start="64" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1043,7 +1043,7 @@
     </field>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="8">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1065,7 +1065,7 @@
     <field name="Instruction Access Upper Bound Modify Enable" start="224" end="224" type="bool"/>
   </instruction>
 i
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1074,7 +1074,7 @@
     <field name="System Instruction Pointer" start="36" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="URB_FENCE" bias="2" length="3">
+  <instruction name="URB_FENCE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1094,7 +1094,7 @@
     <field name="SF Fence" start="64" end="73" type="uint"/>
   </instruction>
 
-  <instruction name="XY_COLOR_BLT" bias="2" length="6">
+  <instruction name="XY_COLOR_BLT" bias="2" length="6" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="80"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="4"/>
@@ -1117,7 +1117,7 @@
     <field name="Solid Pattern Color" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="XY_SETUP_BLT" bias="2" length="8">
+  <instruction name="XY_SETUP_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1143,7 +1143,7 @@
     <field name="Pattern Base Address" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8">
+  <instruction name="XY_SRC_COPY_BLT" bias="2" length="8" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="83"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
@@ -1170,7 +1170,7 @@
     <field name="Source Base Address" start="224" end="255" type="address"/>
   </instruction>
 
-  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3">
+  <instruction name="XY_TEXT_IMMEDIATE_BLT" bias="2" length="3" engine="blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="2"/>
     <field name="2D Command Opcode" start="22" end="28" type="uint" default="49"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
diff -Nru mesa-18.3.3/src/intel/genxml/gen6.xml mesa-19.0.1/src/intel/genxml/gen6.xml
--- mesa-18.3.3/src/intel/genxml/gen6.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen6.xml	2019-03-31 23:16:37.000000000 +0000
@@ -126,8 +126,7 @@
       <value name="VERTEXDATA" value="0"/>
       <value name="INSTANCEDATA" value="1"/>
     </field>
-    <field name="Vertex Buffer Memory Object Control State" start="16" end="19" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="19" type="uint"/>
+    <field name="MOCS" start="16" end="19" type="uint"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Vertex Fetch Invalidate" start="12" end="12" type="bool" default="0"/>
     <field name="Buffer Pitch" start="0" end="11" type="uint"/>
@@ -384,7 +383,6 @@
       <value name="VALIGN_4" value="1"/>
     </field>
     <field name="Y Offset" start="180" end="183" type="uint"/>
-    <field name="Surface Object Control State" start="176" end="179" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="176" end="179" type="uint"/>
   </struct>
 
@@ -494,7 +492,7 @@
     <field name="Non-normalized Coordinate Enable" start="96" end="96" type="bool"/>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="6">
+  <instruction name="3DPRIMITIVE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -516,7 +514,7 @@
     <field name="Base Vertex Location" start="160" end="191" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -528,7 +526,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="4">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -542,7 +540,7 @@
     <field name="Pointer to PS Binding Table" start="101" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="4">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -556,7 +554,7 @@
     <field name="Color Calc State Pointer Valid" start="96" end="96" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -567,7 +565,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="2">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -577,7 +575,7 @@
     <field name="Depth Clear Value" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -633,7 +631,7 @@
     <field name="Constant Buffer 3 Read Length" start="96" end="100" type="uint"/>
   </struct>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -642,12 +640,12 @@
     <field name="Buffer 2 Valid" start="14" end="14" type="bool"/>
     <field name="Buffer 1 Valid" start="13" end="13" type="bool"/>
     <field name="Buffer 0 Valid" start="12" end="12" type="bool"/>
-    <field name="Constant Buffer Object Control State" start="8" end="11" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="11" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Constant Body" start="32" end="159" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -656,12 +654,12 @@
     <field name="Buffer 2 Valid" start="14" end="14" type="bool"/>
     <field name="Buffer 1 Valid" start="13" end="13" type="bool"/>
     <field name="Buffer 0 Valid" start="12" end="12" type="bool"/>
-    <field name="Constant Buffer Object Control State" start="8" end="11" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="11" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Constant Body" start="32" end="159" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="5">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -670,12 +668,12 @@
     <field name="Buffer 2 Valid" start="14" end="14" type="bool"/>
     <field name="Buffer 1 Valid" start="13" end="13" type="bool"/>
     <field name="Buffer 0 Valid" start="12" end="12" type="bool"/>
-    <field name="Constant Buffer Object Control State" start="8" end="11" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="11" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Constant Body" start="32" end="159" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -720,11 +718,10 @@
     <field name="Render Target View Extent" start="129" end="137" type="uint"/>
     <field name="Depth Coordinate Offset Y" start="176" end="191" type="int"/>
     <field name="Depth Coordinate Offset X" start="160" end="175" type="int"/>
-    <field name="Depth Buffer Object Control State" start="219" end="223" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="219" end="223" type="uint"/>
+    <field name="MOCS" start="219" end="223" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -738,7 +735,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="7">
+  <instruction name="3DSTATE_GS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -786,7 +783,7 @@
     <field name="Enable" start="207" end="207" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS_SVB_INDEX" bias="2" length="7">
+  <instruction name="3DSTATE_GS_SVB_INDEX" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -798,25 +795,23 @@
     <field name="Maximum Index" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="15"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="10"/>
-    <field name="Memory Object Control State" start="12" end="15" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="12" end="15" type="uint"/>
+    <field name="MOCS" start="12" end="15" type="uint"/>
     <field name="Cut Index Enable" start="10" end="10" type="bool"/>
     <field name="Index Format" start="8" end="9" type="uint" prefix="INDEX">
       <value name="BYTE" value="0"/>
@@ -828,7 +823,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -842,7 +837,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -852,7 +847,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="3">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -876,7 +871,7 @@
     <field name="Sample0 Y Offset" start="64" end="67" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -886,7 +881,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -897,7 +892,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -908,7 +903,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -922,7 +917,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS" bias="2" length="4">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -936,7 +931,7 @@
     <field name="Pointer to PS Sampler State" start="101" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -945,7 +940,7 @@
     <field name="Sample Mask" start="32" end="35" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -954,7 +949,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="20">
+  <instruction name="3DSTATE_SF" bias="2" length="20" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1062,19 +1057,18 @@
     <field name="Attribute 8 WrapShortest Enables" start="608" end="611" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="14"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
-    <field name="Stencil Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB" bias="2" length="3">
+  <instruction name="3DSTATE_URB" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1086,7 +1080,7 @@
     <field name="GS URB Entry Allocation Size" start="64" end="66" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1097,7 +1091,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1108,7 +1102,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1116,7 +1110,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS" bias="2" length="4">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1130,7 +1124,7 @@
     <field name="Pointer to CC_VIEWPORT" start="101" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="6">
+  <instruction name="3DSTATE_VS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1164,7 +1158,7 @@
     <field name="Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="9">
+  <instruction name="3DSTATE_WM" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1257,7 +1251,7 @@
     <field name="Kernel Start Pointer 2" start="262" end="287" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1267,7 +1261,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_GATEWAY_STATE" bias="2" length="2">
+  <instruction name="MEDIA_GATEWAY_STATE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1278,7 +1272,7 @@
     <field name="Barrier.ThreadCount" start="32" end="39" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1288,7 +1282,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1315,7 +1309,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1333,7 +1327,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1383,7 +1377,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1396,7 +1390,7 @@
     </field>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="8">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1473,7 +1467,7 @@
     <field name="Batch Buffer Start Address" start="34" end="63" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -1495,7 +1489,7 @@
     <field name="Compare Address" start="67" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Indirect State Pointers Disable" start="5" end="5" type="bool"/>
@@ -1527,7 +1521,7 @@
     </group>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -1564,7 +1558,7 @@
     <field name="Semaphore Data Dword" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="MI_SET_CONTEXT" bias="2" length="2">
+  <instruction name="MI_SET_CONTEXT" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -1611,7 +1605,7 @@
     <field name="Suspend Flush" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_URB_CLEAR" bias="2" length="2">
+  <instruction name="MI_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="25"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -1624,7 +1618,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Condition Code Wait Select" start="16" end="19" type="uint">
@@ -1642,7 +1636,7 @@
     <field name="Display Pipe A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1653,7 +1647,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="5">
+  <instruction name="PIPE_CONTROL" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -1689,33 +1683,27 @@
     <field name="Immediate Data" start="96" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="8"/>
     <field name="General State Base Address" start="44" end="63" type="address"/>
-    <field name="General State Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="General State MOCS" start="40" end="43" type="uint"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="36" end="39" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="Stateless Data Port Access MOCS" start="36" end="39" type="uint"/>
     <field name="Stateless Data Port Access Force Write Thru" start="35" end="35" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
     <field name="Surface State Base Address" start="76" end="95" type="address"/>
-    <field name="Surface State Memory Object Control State" start="72" end="75" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="Surface State MOCS" start="72" end="75" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="64" end="64" type="bool"/>
     <field name="Dynamic State Base Address" start="108" end="127" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="104" end="107" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="Dynamic State MOCS" start="104" end="107" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="96" end="96" type="bool"/>
     <field name="Indirect Object Base Address" start="140" end="159" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="136" end="139" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="Indirect Object MOCS" start="136" end="139" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Instruction Base Address" start="172" end="191" type="address"/>
-    <field name="Instruction Memory Object Control State" start="168" end="171" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="Instruction MOCS" start="168" end="171" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="160" end="160" type="bool"/>
     <field name="General State Access Upper Bound" start="204" end="223" type="address"/>
@@ -1728,7 +1716,7 @@
     <field name="Instruction Access Upper Bound Modify Enable" start="288" end="288" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_PREFETCH" bias="2" length="2">
+  <instruction name="STATE_PREFETCH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1738,7 +1726,7 @@
     <field name="Prefetch Count" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
diff -Nru mesa-18.3.3/src/intel/genxml/gen75.xml mesa-19.0.1/src/intel/genxml/gen75.xml
--- mesa-18.3.3/src/intel/genxml/gen75.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen75.xml	2019-03-31 23:16:37.000000000 +0000
@@ -165,7 +165,7 @@
     <group count="4" start="0" size="16">
       <field name="Read Length" start="0" end="15" type="uint"/>
     </group>
-    <field name="Constant Buffer Object Control State" start="64" end="68" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="64" end="68" type="uint"/>
     <group count="4" start="64" size="32">
       <field name="Buffer" start="5" end="31" type="address"/>
     </group>
@@ -188,8 +188,7 @@
       <value name="VERTEXDATA" value="0"/>
       <value name="INSTANCEDATA" value="1"/>
     </field>
-    <field name="Vertex Buffer Memory Object Control State" start="16" end="19" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="19" type="uint"/>
+    <field name="MOCS" start="16" end="19" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Vertex Fetch Invalidate" start="12" end="12" type="bool" default="0"/>
@@ -463,7 +462,6 @@
     <field name="Strbuf Minimum Array Element" start="128" end="154" type="uint"/>
     <field name="X Offset" start="185" end="191" type="uint"/>
     <field name="Y Offset" start="180" end="183" type="uint"/>
-    <field name="Surface Object Control State" start="176" end="179" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="176" end="179" type="uint"/>
     <field name="Surface Min LOD" start="164" end="167" type="uint"/>
     <field name="MIP Count / LOD" start="160" end="163" type="uint"/>
@@ -654,7 +652,7 @@
     </field>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -676,7 +674,7 @@
     <field name="Base Vertex Location" start="192" end="223" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -688,7 +686,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -705,7 +703,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -722,7 +720,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -739,7 +737,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -756,7 +754,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -773,7 +771,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -782,7 +780,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -791,7 +789,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -800,7 +798,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -809,7 +807,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -818,7 +816,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="3">
+  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -826,11 +824,11 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
     <field name="Binding Table Pool Base Address" start="44" end="63" type="address"/>
     <field name="Binding Table Pool Enable" start="43" end="43" type="uint"/>
-    <field name="Surface Object Control State" start="39" end="42" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="39" end="42" type="uint"/>
     <field name="Binding Table Pool Upper Bound" start="76" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -840,7 +838,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -850,7 +848,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -861,7 +859,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -871,7 +869,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -924,7 +922,7 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -933,7 +931,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -942,7 +940,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -951,7 +949,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -960,7 +958,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -969,7 +967,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -999,14 +997,13 @@
       <value name="SURFTYPE_CUBE (must be zero)" value="0"/>
     </field>
     <field name="Minimum Array Element" start="138" end="148" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="128" end="131" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="128" end="131" type="uint"/>
+    <field name="MOCS" start="128" end="131" type="uint"/>
     <field name="Depth Coordinate Offset Y" start="176" end="191" type="int"/>
     <field name="Depth Coordinate Offset X" start="160" end="175" type="int"/>
     <field name="Render Target View Extent" start="213" end="223" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_STENCIL_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_DEPTH_STENCIL_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1016,7 +1013,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1035,7 +1032,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="6">
+  <instruction name="3DSTATE_DS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1074,7 +1071,7 @@
     <field name="Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1089,7 +1086,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1104,7 +1101,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1119,7 +1116,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1135,7 +1132,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1151,7 +1148,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="3">
+  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1160,11 +1157,11 @@
     <field name="Gather Pool Base Address" start="44" end="63" type="address"/>
     <field name="Gather Pool Enable" start="43" end="43" type="bool"/>
     <field start="36" end="37" type="mbo"/>
-    <field name="Memory Object Control State" start="32" end="35" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="35" type="uint"/>
     <field name="Gather Pool Upper Bound" start="76" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="7">
+  <instruction name="3DSTATE_GS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1227,19 +1224,18 @@
     <field name="Semaphore Handle" start="192" end="204" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="7">
+  <instruction name="3DSTATE_HS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1279,13 +1275,12 @@
     <field name="Semaphore Handle" start="192" end="204" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="10"/>
-    <field name="Memory Object Control State" start="12" end="15" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="12" end="15" type="uint"/>
+    <field name="MOCS" start="12" end="15" type="uint"/>
     <field name="Index Format" start="8" end="9" type="uint" prefix="INDEX">
       <value name="BYTE" value="0"/>
       <value name="WORD" value="1"/>
@@ -1296,7 +1291,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1310,7 +1305,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1320,7 +1315,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="4">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1354,7 +1349,7 @@
     <field name="Sample4 Y Offset" start="96" end="99" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1364,7 +1359,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1375,7 +1370,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="8">
+  <instruction name="3DSTATE_PS" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1432,7 +1427,7 @@
     <field name="Kernel Start Pointer 2" start="230" end="255" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1442,7 +1437,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1452,7 +1447,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1462,7 +1457,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1472,7 +1467,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1482,7 +1477,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_RAST_MULTISAMPLE" bias="2" length="6">
+  <instruction name="3DSTATE_RAST_MULTISAMPLE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1529,7 +1524,7 @@
     <field name="Sample12 Y Offset" start="160" end="163" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1540,7 +1535,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1554,7 +1549,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1563,7 +1558,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1572,7 +1567,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1581,7 +1576,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1590,7 +1585,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1599,7 +1594,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1608,7 +1603,7 @@
     <field name="Sample Mask" start="32" end="39" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="14">
+  <instruction name="3DSTATE_SBE" bias="2" length="14" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1646,7 +1641,7 @@
     <field name="Attribute 8 WrapShortest Enables" start="416" end="419" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1655,7 +1650,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="7">
+  <instruction name="3DSTATE_SF" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1718,7 +1713,10 @@
     <field name="AA Line Distance Mode" start="110" end="110" type="uint">
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -1729,21 +1727,20 @@
     <field name="Global Depth Offset Clamp" start="192" end="223" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="4">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="43" type="uint"/>
     <field name="Surface Base Address" start="66" end="95" type="address"/>
     <field name="Surface End Address" start="98" end="127" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1762,20 +1759,19 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
     <field name="Stencil Buffer Enable" start="63" end="63" type="bool"/>
-    <field name="Stencil Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="3">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1803,7 +1799,7 @@
     <field name="Stream 0 Vertex Read Length" start="64" end="68" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1834,7 +1830,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1845,7 +1841,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1856,7 +1852,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1867,7 +1863,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1878,7 +1874,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1889,7 +1885,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1900,7 +1896,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF" bias="2" length="2">
+  <instruction name="3DSTATE_VF" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1910,7 +1906,7 @@
     <field name="Cut Index" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1918,7 +1914,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1927,7 +1923,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1936,7 +1932,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="6">
+  <instruction name="3DSTATE_VS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1975,7 +1971,7 @@
     <field name="Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="3">
+  <instruction name="3DSTATE_WM" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2045,7 +2041,7 @@
     </field>
   </instruction>
 
-  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="2">
+  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2054,7 +2050,7 @@
     <field name="GPGPU CSR Base Address" start="44" end="63" type="address"/>
   </instruction>
 
-  <instruction name="GPGPU_OBJECT" bias="2" length="8">
+  <instruction name="GPGPU_OBJECT" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2082,7 +2078,7 @@
     <field name="Execution Mask" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="11">
+  <instruction name="GPGPU_WALKER" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2109,7 +2105,7 @@
     <field name="Bottom Execution Mask" start="320" end="351" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2119,7 +2115,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2129,7 +2125,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2166,7 +2162,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2184,7 +2180,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2233,7 +2229,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2245,7 +2241,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="8">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2331,7 +2327,7 @@
     <field name="Batch Buffer Start Address" start="34" end="63" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -2354,7 +2350,7 @@
     <field name="Compare Address" start="67" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Indirect State Pointers Disable" start="5" end="5" type="bool"/>
@@ -2404,7 +2400,7 @@
     <field name="Destination Register Address" start="66" end="86" type="offset"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2417,7 +2413,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="18"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2438,7 +2434,7 @@
     <field name="Memory Address" start="70" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_MATH" bias="2">
+  <instruction name="MI_MATH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="26"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="0"/>
@@ -2469,6 +2465,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -2479,7 +2477,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="3">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="1"/>
@@ -2489,7 +2487,7 @@
     <field name="Report ID" start="64" end="95" type="uint"/>
   </instruction>
 
-  <instruction name="MI_RS_CONTEXT" bias="1" length="1">
+  <instruction name="MI_RS_CONTEXT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="15"/>
     <field name="Resource Streamer Save" start="0" end="0" type="uint" prefix="RS">
@@ -2498,7 +2496,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_CONTROL" bias="1" length="1">
+  <instruction name="MI_RS_CONTROL" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="6"/>
     <field name="Resource Streamer Control" start="0" end="0" type="uint" prefix="RS">
@@ -2507,7 +2505,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4">
+  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="43"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -2530,7 +2528,7 @@
     <field name="Semaphore Data Dword" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="MI_SET_CONTEXT" bias="2" length="2">
+  <instruction name="MI_SET_CONTEXT" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -2578,7 +2576,7 @@
     <field name="Memory Address" start="66" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_STORE_URB_MEM" bias="2" length="3">
+  <instruction name="MI_STORE_URB_MEM" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="45"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
@@ -2598,14 +2596,14 @@
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1">
+  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="9"/>
     <field name="URB Atomic Storage Offset" start="12" end="19" type="uint"/>
     <field name="URB Atomic Storage Size" start="0" end="8" type="uint"/>
   </instruction>
 
-  <instruction name="MI_URB_CLEAR" bias="2" length="2">
+  <instruction name="MI_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="25"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -2618,7 +2616,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Pipe C Horizontal Blank Wait Enable" start="22" end="22" type="bool"/>
@@ -2641,7 +2639,7 @@
     <field name="Display Pipe A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2653,7 +2651,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="5">
+  <instruction name="PIPE_CONTROL" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -2695,27 +2693,27 @@
     <field name="Immediate Data" start="96" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="8"/>
     <field name="General State Base Address" start="44" end="63" type="address"/>
-    <field name="General State Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="36" end="39" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="40" end="43" type="uint"/>
+    <field name="Stateless Data Port Access MOCS" start="36" end="39" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
     <field name="Surface State Base Address" start="76" end="95" type="address"/>
-    <field name="Surface State Memory Object Control State" start="72" end="75" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="72" end="75" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="64" end="64" type="bool"/>
     <field name="Dynamic State Base Address" start="108" end="127" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="104" end="107" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="104" end="107" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="96" end="96" type="bool"/>
     <field name="Indirect Object Base Address" start="140" end="159" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="136" end="139" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="136" end="139" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Instruction Base Address" start="172" end="191" type="address"/>
-    <field name="Instruction Memory Object Control State" start="168" end="171" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="168" end="171" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="160" end="160" type="bool"/>
     <field name="General State Access Upper Bound" start="204" end="223" type="address"/>
     <field name="General State Access Upper Bound Modify Enable" start="192" end="192" type="bool"/>
@@ -2727,7 +2725,7 @@
     <field name="Instruction Access Upper Bound Modify Enable" start="288" end="288" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_PREFETCH" bias="2" length="2">
+  <instruction name="STATE_PREFETCH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2737,7 +2735,7 @@
     <field name="Prefetch Count" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2746,14 +2744,14 @@
     <field name="System Instruction Pointer" start="36" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2">
+  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="3"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
     <field name="SW Tessellation Base Address" start="44" end="63" type="address"/>
-    <field name="SW Tessellation Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="SW Tessellation MOCS" start="40" end="43" type="uint"/>
   </instruction>
 
   <register name="IA_VERTICES_COUNT" length="2" num="0x2310">
@@ -2972,6 +2970,38 @@
     <field name="L3 Atomic Disable Mask" start="22" end="22" type="uint"/>
   </register>
 
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/genxml/gen7.xml mesa-19.0.1/src/intel/genxml/gen7.xml
--- mesa-18.3.3/src/intel/genxml/gen7.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen7.xml	2019-03-31 23:16:37.000000000 +0000
@@ -157,7 +157,7 @@
     <group count="4" start="0" size="16">
       <field name="Read Length" start="0" end="15" type="uint"/>
     </group>
-    <field name="Constant Buffer Object Control State" start="64" end="68" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="64" end="68" type="uint"/>
     <group count="4" start="64" size="32">
       <field name="Buffer" start="5" end="31" type="address"/>
     </group>
@@ -169,8 +169,7 @@
       <value name="VERTEXDATA" value="0"/>
       <value name="INSTANCEDATA" value="1"/>
     </field>
-    <field name="Vertex Buffer Memory Object Control State" start="16" end="19" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="19" type="uint"/>
+    <field name="MOCS" start="16" end="19" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Vertex Fetch Invalidate" start="12" end="12" type="bool" default="0"/>
@@ -443,7 +442,6 @@
     <field name="Strbuf Minimum Array Element" start="128" end="154" type="uint"/>
     <field name="X Offset" start="185" end="191" type="uint"/>
     <field name="Y Offset" start="180" end="183" type="uint"/>
-    <field name="Surface Object Control State" start="176" end="179" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="176" end="179" type="uint"/>
     <field name="Surface Min LOD" start="164" end="167" type="uint"/>
     <field name="MIP Count / LOD" start="160" end="163" type="uint"/>
@@ -568,7 +566,7 @@
     <field name="TCZ Address Control Mode" start="96" end="98" type="Texture Coordinate Mode"/>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -589,7 +587,7 @@
     <field name="Base Vertex Location" start="192" end="223" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -601,7 +599,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -610,7 +608,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -619,7 +617,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -628,7 +626,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -637,7 +635,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -646,7 +644,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -656,7 +654,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -666,7 +664,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -677,7 +675,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -687,7 +685,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -740,7 +738,7 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -749,7 +747,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -758,7 +756,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -767,7 +765,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -776,7 +774,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="7">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -785,7 +783,7 @@
     <field name="Constant Body" start="32" end="223" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -815,14 +813,13 @@
       <value name="SURFTYPE_CUBE (must be zero)" value="0"/>
     </field>
     <field name="Minimum Array Element" start="138" end="148" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="128" end="131" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="128" end="131" type="uint"/>
+    <field name="MOCS" start="128" end="131" type="uint"/>
     <field name="Depth Coordinate Offset Y" start="176" end="191" type="int"/>
     <field name="Depth Coordinate Offset X" start="160" end="175" type="int"/>
     <field name="Render Target View Extent" start="213" end="223" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_STENCIL_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_DEPTH_STENCIL_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -832,7 +829,7 @@
     <field start="32" end="32" type="mbo"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -846,7 +843,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="6">
+  <instruction name="3DSTATE_DS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -881,7 +878,7 @@
     <field name="Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="7">
+  <instruction name="3DSTATE_GS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -943,19 +940,18 @@
     <field name="Semaphore Handle" start="192" end="203" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="7">
+  <instruction name="3DSTATE_HS" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -991,13 +987,12 @@
     <field name="Semaphore Handle" start="192" end="203" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="10"/>
-    <field name="Memory Object Control State" start="12" end="15" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="12" end="15" type="uint"/>
+    <field name="MOCS" start="12" end="15" type="uint"/>
     <field name="Cut Index Enable" start="10" end="10" type="bool"/>
     <field name="Index Format" start="8" end="9" type="uint" prefix="INDEX">
       <value name="BYTE" value="0"/>
@@ -1009,7 +1004,7 @@
     <field name="Buffer Ending Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1023,7 +1018,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1033,7 +1028,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="4">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1066,7 +1061,7 @@
     <field name="Sample4 Y Offset" start="96" end="99" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1076,7 +1071,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1087,7 +1082,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="8">
+  <instruction name="3DSTATE_PS" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1139,7 +1134,7 @@
     <field name="Kernel Start Pointer 2" start="230" end="255" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1153,7 +1148,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1167,7 +1162,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1181,7 +1176,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1195,7 +1190,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1209,7 +1204,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1220,7 +1215,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1234,7 +1229,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1243,7 +1238,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1252,7 +1247,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1261,7 +1256,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1270,7 +1265,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1279,7 +1274,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1288,7 +1283,7 @@
     <field name="Sample Mask" start="32" end="39" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="14">
+  <instruction name="3DSTATE_SBE" bias="2" length="14" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1329,7 +1324,7 @@
     <field name="Attribute 8 WrapShortest Enables" start="416" end="419" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1338,7 +1333,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="7">
+  <instruction name="3DSTATE_SF" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1404,7 +1399,10 @@
     <field name="AA Line Distance Mode" start="110" end="110" type="uint">
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -1415,21 +1413,20 @@
     <field name="Global Depth Offset Clamp" start="192" end="223" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="4">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="43" type="uint"/>
     <field name="Surface Base Address" start="66" end="95" type="address"/>
     <field name="Surface End Address" start="98" end="127" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1448,19 +1445,18 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="1"/>
-    <field name="Stencil Buffer Object Control State" start="57" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="57" end="60" type="uint"/>
+    <field name="MOCS" start="57" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="95" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="3">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1488,7 +1484,7 @@
     <field name="Stream 0 Vertex Read Length" start="64" end="68" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1519,7 +1515,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1530,7 +1526,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1541,7 +1537,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1552,7 +1548,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1563,7 +1559,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1574,7 +1570,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1585,7 +1581,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1593,7 +1589,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1602,7 +1598,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1611,7 +1607,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="6">
+  <instruction name="3DSTATE_VS" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1645,7 +1641,7 @@
     <field name="Enable" start="160" end="160" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="3">
+  <instruction name="3DSTATE_WM" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1710,7 +1706,7 @@
     </field>
   </instruction>
 
-  <instruction name="GPGPU_OBJECT" bias="2" length="8">
+  <instruction name="GPGPU_OBJECT" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1734,7 +1730,7 @@
     <field name="Execution Mask" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="11">
+  <instruction name="GPGPU_WALKER" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1761,7 +1757,7 @@
     <field name="Bottom Execution Mask" start="320" end="351" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1771,7 +1767,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1781,7 +1777,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1813,7 +1809,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1831,7 +1827,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1881,7 +1877,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1891,7 +1887,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="8">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1971,7 +1967,7 @@
     <field name="Batch Buffer Start Address" start="34" end="63" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -1994,7 +1990,7 @@
     <field name="Compare Address" start="67" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_FLUSH" bias="1" length="1">
+  <instruction name="MI_FLUSH" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="4"/>
     <field name="Indirect State Pointers Disable" start="5" end="5" type="bool"/>
@@ -2058,6 +2054,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -2068,7 +2066,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="3">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="1"/>
@@ -2141,7 +2139,7 @@
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="MI_URB_CLEAR" bias="2" length="2">
+  <instruction name="MI_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="25"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -2154,7 +2152,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Pipe C Horizontal Blank Wait Enable" start="22" end="22" type="bool"/>
@@ -2177,7 +2175,7 @@
     <field name="Display Pipe A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2189,7 +2187,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="5">
+  <instruction name="PIPE_CONTROL" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -2231,28 +2229,28 @@
     <field name="Immediate Data" start="96" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="8"/>
     <field name="General State Base Address" start="44" end="63" type="address"/>
-    <field name="General State Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="36" end="39" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="40" end="43" type="uint"/>
+    <field name="Stateless Data Port Access MOCS" start="36" end="39" type="uint"/>
     <field name="Stateless Data Port Access Force Write Thru" start="35" end="35" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
     <field name="Surface State Base Address" start="76" end="95" type="address"/>
-    <field name="Surface State Memory Object Control State" start="72" end="75" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="72" end="75" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="64" end="64" type="bool"/>
     <field name="Dynamic State Base Address" start="108" end="127" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="104" end="107" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="104" end="107" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="96" end="96" type="bool"/>
     <field name="Indirect Object Base Address" start="140" end="159" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="136" end="139" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="136" end="139" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Instruction Base Address" start="172" end="191" type="address"/>
-    <field name="Instruction Memory Object Control State" start="168" end="171" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="168" end="171" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="160" end="160" type="bool"/>
     <field name="General State Access Upper Bound" start="204" end="223" type="address"/>
     <field name="General State Access Upper Bound Modify Enable" start="192" end="192" type="bool"/>
@@ -2264,7 +2262,7 @@
     <field name="Instruction Access Upper Bound Modify Enable" start="288" end="288" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_PREFETCH" bias="2" length="2">
+  <instruction name="STATE_PREFETCH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2274,7 +2272,7 @@
     <field name="Prefetch Count" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="2">
+  <instruction name="STATE_SIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2283,14 +2281,14 @@
     <field name="System Instruction Pointer" start="36" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2">
+  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="3"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
     <field name="SW Tessellation Base Address" start="44" end="63" type="address"/>
-    <field name="SW Tessellation Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="SW Tessellation MOCS" start="40" end="43" type="uint"/>
   </instruction>
 
   <register name="IA_VERTICES_COUNT" length="2" num="0x2310">
@@ -2489,6 +2487,38 @@
     <field name="T Low Bandwidth" start="21" end="21" type="uint"/>
   </register>
 
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/genxml/gen8.xml mesa-19.0.1/src/intel/genxml/gen8.xml
--- mesa-18.3.3/src/intel/genxml/gen8.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen8.xml	2019-03-31 23:16:37.000000000 +0000
@@ -216,8 +216,7 @@
 
   <struct name="VERTEX_BUFFER_STATE" length="4">
     <field name="Vertex Buffer Index" start="26" end="31" type="uint"/>
-    <field name="Memory Object Control State" start="16" end="22" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="22" type="uint"/>
+    <field name="MOCS" start="16" end="22" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Buffer Pitch" start="0" end="11" type="uint"/>
@@ -462,7 +461,6 @@
     <field name="Cube Face Enable - Negative Y" start="3" end="3" type="bool"/>
     <field name="Cube Face Enable - Positive X" start="4" end="4" type="bool"/>
     <field name="Cube Face Enable - Negative X" start="5" end="5" type="bool"/>
-    <field name="Memory Object Control State" start="56" end="62" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="56" end="62" type="uint"/>
     <field name="Base Mip Level" start="51" end="55" type="u4.1"/>
     <field name="Surface QPitch" start="32" end="46" type="uint"/>
@@ -718,7 +716,7 @@
     </field>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -740,7 +738,7 @@
     <field name="Base Vertex Location" start="192" end="223" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -756,7 +754,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -773,7 +771,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -790,7 +788,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -807,7 +805,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -824,7 +822,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -841,7 +839,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -850,7 +848,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -859,7 +857,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -868,7 +866,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -877,7 +875,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -886,7 +884,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -894,13 +892,13 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Binding Table Pool Base Address" start="44" end="95" type="address"/>
     <field name="Binding Table Pool Enable" start="43" end="43" type="uint"/>
-    <field name="Surface Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Binding Table Pool Buffer Size" start="108" end="127" type="uint">
       <value name="No Valid Data" value="0"/>
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -910,7 +908,7 @@
     <field name="Blend State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -920,7 +918,7 @@
     <field name="Color Calc State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -931,7 +929,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -941,7 +939,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -981,57 +979,57 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="26"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="22"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="25"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="23"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="21"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1059,13 +1057,12 @@
     <field name="LOD" start="128" end="131" type="uint"/>
     <field name="Depth" start="181" end="191" type="uint"/>
     <field name="Minimum Array Element" start="170" end="180" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="160" end="166" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="160" end="166" type="uint"/>
+    <field name="MOCS" start="160" end="166" type="uint"/>
     <field name="Render Target View Extent" start="245" end="255" type="uint"/>
     <field name="Surface QPitch" start="224" end="238" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1084,7 +1081,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="9">
+  <instruction name="3DSTATE_DS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1131,7 +1128,7 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="256" end="263" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1147,7 +1144,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1163,7 +1160,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1179,7 +1176,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1196,7 +1193,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1213,7 +1210,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1221,11 +1218,11 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Gather Pool Base Address" start="44" end="95" type="address"/>
     <field name="Gather Pool Enable" start="43" end="43" type="bool"/>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Gather Pool Buffer Size" start="108" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="10">
+  <instruction name="3DSTATE_GS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1293,20 +1290,19 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="288" end="295" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="63" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="63" type="uint"/>
+    <field name="MOCS" start="57" end="63" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="9">
+  <instruction name="3DSTATE_HS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1345,7 +1341,7 @@
     <field name="Vertex URB Entry Read Offset" start="228" end="233" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1356,13 +1352,12 @@
       <value name="WORD" value="1"/>
       <value name="DWORD" value="2"/>
     </field>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="32" end="38" type="uint"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Buffer Starting Address" start="64" end="127" type="address"/>
     <field name="Buffer Size" start="128" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1376,7 +1371,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1386,7 +1381,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1400,7 +1395,7 @@
     <field name="Number of Multisamples" start="33" end="35" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1410,7 +1405,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1421,7 +1416,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="12">
+  <instruction name="3DSTATE_PS" bias="2" length="12" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1479,7 +1474,7 @@
     <field name="Kernel Start Pointer 2" start="326" end="383" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2">
+  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1496,7 +1491,7 @@
     <field name="Independent Alpha Blend Enable" start="39" end="39" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2">
+  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1522,7 +1517,7 @@
     <field name="Pixel Shader Uses Input Coverage Mask" start="33" end="33" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1532,7 +1527,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1542,7 +1537,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1552,7 +1547,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1562,7 +1557,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1572,7 +1567,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_RASTER" bias="2" length="5">
+  <instruction name="3DSTATE_RASTER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1631,7 +1626,7 @@
     <field name="Global Depth Offset Clamp" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1642,7 +1637,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1656,7 +1651,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1665,7 +1660,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1674,7 +1669,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1683,7 +1678,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1692,7 +1687,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1701,7 +1696,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1710,7 +1705,7 @@
     <field name="Sample Mask" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9">
+  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1748,7 +1743,7 @@
     <field name="2x Sample0 Y Offset" start="256" end="259" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="4">
+  <instruction name="3DSTATE_SBE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1773,7 +1768,7 @@
     <field name="Constant Interpolation Enable" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11">
+  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1787,7 +1782,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1796,7 +1791,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="4">
+  <instruction name="3DSTATE_SF" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1821,7 +1816,10 @@
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -1829,7 +1827,7 @@
     <field name="Point Width" start="96" end="106" type="u8.3"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1837,8 +1835,7 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
     <field name="SO Buffer Enable" start="63" end="63" type="bool"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Stream Offset Write Enable" start="53" end="53" type="bool"/>
     <field name="Stream Output Buffer Offset Address Enable" start="52" end="52" type="bool"/>
     <field name="Surface Base Address" start="66" end="111" type="address"/>
@@ -1847,7 +1844,7 @@
     <field name="Stream Offset" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1866,21 +1863,20 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Stencil Buffer Enable" start="63" end="63" type="bool"/>
-    <field name="Stencil Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1913,7 +1909,7 @@
     <field name="Buffer 2 Surface Pitch" start="128" end="139" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1944,7 +1940,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1955,7 +1951,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1966,7 +1962,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1977,7 +1973,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1988,7 +1984,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1999,7 +1995,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2010,7 +2006,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF" bias="2" length="2">
+  <instruction name="3DSTATE_VF" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2020,7 +2016,7 @@
     <field name="Cut Index" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3">
+  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2031,7 +2027,7 @@
     <field name="Instance Data Step Rate" start="64" end="95" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2">
+  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2055,7 +2051,7 @@
     <field name="VertexID Element Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2063,7 +2059,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2">
+  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2072,7 +2068,7 @@
     <field name="Primitive Topology Type" start="32" end="37" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2081,7 +2077,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2090,7 +2086,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="9">
+  <instruction name="3DSTATE_VS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2133,7 +2129,7 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="256" end="263" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="2">
+  <instruction name="3DSTATE_WM" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2190,7 +2186,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2">
+  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2199,7 +2195,7 @@
     <field name="ChromaKey Kill Enable" start="63" end="63" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="3">
+  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2225,7 +2221,7 @@
     <field name="Backface Stencil Write Mask" start="64" end="71" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5">
+  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2247,7 +2243,7 @@
     <field name="Sample Mask" start="128" end="143" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="3">
+  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2256,7 +2252,7 @@
     <field name="GPGPU CSR Base Address" start="44" end="95" type="address"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="15">
+  <instruction name="GPGPU_WALKER" bias="2" length="15" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2285,7 +2281,7 @@
     <field name="Bottom Execution Mask" start="448" end="479" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2295,7 +2291,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2305,7 +2301,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2343,7 +2339,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_GRPID" bias="2">
+  <instruction name="MEDIA_OBJECT_GRPID" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2378,7 +2374,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2396,7 +2392,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2443,7 +2439,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2454,7 +2450,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="9">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2568,7 +2564,7 @@
     <field name="Batch Buffer Start Address" start="34" end="79" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -2631,7 +2627,7 @@
     <field name="Destination Register Address" start="66" end="86" type="offset"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2644,7 +2640,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="18"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2658,7 +2654,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_URB_MEM" bias="2" length="4">
+  <instruction name="MI_LOAD_URB_MEM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="44"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -2697,6 +2693,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -2707,7 +2705,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="2"/>
@@ -2717,7 +2715,7 @@
     <field name="Report ID" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MI_RS_CONTEXT" bias="1" length="1">
+  <instruction name="MI_RS_CONTEXT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="15"/>
     <field name="Resource Streamer Save" start="0" end="0" type="uint" prefix="RS">
@@ -2726,7 +2724,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_CONTROL" bias="1" length="1">
+  <instruction name="MI_RS_CONTROL" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="6"/>
     <field name="Resource Streamer Control" start="0" end="0" type="uint" prefix="RS">
@@ -2735,7 +2733,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4">
+  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="43"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -2784,7 +2782,7 @@
     <field name="Semaphore Address High" start="96" end="111" type="address"/>
   </instruction>
 
-  <instruction name="MI_SET_CONTEXT" bias="2" length="2">
+  <instruction name="MI_SET_CONTEXT" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="24"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -2844,7 +2842,7 @@
     <field name="Memory Address" start="66" end="127" type="address"/>
   </instruction>
 
-  <instruction name="MI_STORE_URB_MEM" bias="2" length="4">
+  <instruction name="MI_STORE_URB_MEM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="45"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -2858,20 +2856,20 @@
     <field name="Suspend Flush" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1">
+  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="13"/>
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1">
+  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="9"/>
     <field name="URB Atomic Storage Offset" start="12" end="19" type="uint"/>
     <field name="URB Atomic Storage Size" start="0" end="8" type="uint"/>
   </instruction>
 
-  <instruction name="MI_URB_CLEAR" bias="2" length="2">
+  <instruction name="MI_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="25"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
@@ -2884,7 +2882,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Pipe C Vertical Blank Wait Enable" start="21" end="21" type="bool"/>
@@ -2901,7 +2899,7 @@
     <field name="Display Pipe A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2913,7 +2911,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="6">
+  <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -2955,27 +2953,27 @@
     <field name="Immediate Data" start="128" end="191" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="16">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="14"/>
     <field name="General State Base Address" start="44" end="95" type="address"/>
-    <field name="General State Memory Object Control State" start="36" end="42" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="36" end="42" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="112" end="118" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Stateless Data Port Access MOCS" start="112" end="118" type="uint"/>
     <field name="Surface State Base Address" start="140" end="191" type="address"/>
-    <field name="Surface State Memory Object Control State" start="132" end="138" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="132" end="138" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Dynamic State Base Address" start="204" end="255" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="196" end="202" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="196" end="202" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="192" end="192" type="bool"/>
     <field name="Indirect Object Base Address" start="268" end="319" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="260" end="266" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="260" end="266" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="256" end="256" type="bool"/>
     <field name="Instruction Base Address" start="332" end="383" type="address"/>
-    <field name="Instruction Memory Object Control State" start="324" end="330" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="324" end="330" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="320" end="320" type="bool"/>
     <field name="General State Buffer Size" start="396" end="415" type="uint"/>
     <field name="General State Buffer Size Modify Enable" start="384" end="384" type="bool"/>
@@ -2987,7 +2985,7 @@
     <field name="Instruction Buffer size Modify Enable" start="480" end="480" type="bool"/>
   </instruction>
 
-  <instruction name="STATE_PREFETCH" bias="2" length="2">
+  <instruction name="STATE_PREFETCH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2997,7 +2995,7 @@
     <field name="Prefetch Count" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="3">
+  <instruction name="STATE_SIP" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3006,14 +3004,14 @@
     <field name="System Instruction Pointer" start="36" end="95" type="offset"/>
   </instruction>
 
-  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2">
+  <instruction name="SWTESS_BASE_ADDRESS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="3"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="0"/>
     <field name="SW Tessellation Base Address" start="44" end="79" type="address"/>
-    <field name="SW Tessellation Memory Object Control State" start="40" end="43" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="SW Tessellation MOCS" start="40" end="43" type="uint"/>
   </instruction>
 
   <register name="IA_VERTICES_COUNT" length="2" num="0x2310">
@@ -3206,6 +3204,38 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/genxml/gen9.xml mesa-19.0.1/src/intel/genxml/gen9.xml
--- mesa-18.3.3/src/intel/genxml/gen9.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/genxml/gen9.xml	2019-03-31 23:16:37.000000000 +0000
@@ -219,14 +219,9 @@
     <field name="Binding Table Index Offset" start="0" end="3" type="uint"/>
   </struct>
 
-  <struct name="MEMORY_OBJECT_CONTROL_STATE" length="1">
-    <field name="Index to MOCS Tables" start="1" end="6" type="uint"/>
-  </struct>
-
   <struct name="VERTEX_BUFFER_STATE" length="4">
     <field name="Vertex Buffer Index" start="26" end="31" type="uint"/>
-    <field name="Memory Object Control State" start="16" end="22" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Vertex Buffer MOCS" start="16" end="22" type="uint"/>
+    <field name="MOCS" start="16" end="22" type="uint"/>
     <field name="Address Modify Enable" start="14" end="14" type="bool"/>
     <field name="Null Vertex Buffer" start="13" end="13" type="bool"/>
     <field name="Buffer Pitch" start="0" end="11" type="uint"/>
@@ -494,7 +489,6 @@
     <field name="Cube Face Enable - Negative Y" start="3" end="3" type="bool"/>
     <field name="Cube Face Enable - Positive X" start="4" end="4" type="bool"/>
     <field name="Cube Face Enable - Negative X" start="5" end="5" type="bool"/>
-    <field name="Memory Object Control State" start="56" end="62" type="MEMORY_OBJECT_CONTROL_STATE"/>
     <field name="MOCS" start="56" end="62" type="uint"/>
     <field name="Base Mip Level" start="51" end="55" type="u4.1"/>
     <field name="Surface QPitch" start="32" end="46" type="uint"/>
@@ -776,7 +770,7 @@
     </field>
   </struct>
 
-  <instruction name="3DPRIMITIVE" bias="2" length="7">
+  <instruction name="3DPRIMITIVE" bias="2" length="7" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="3"/>
@@ -798,7 +792,7 @@
     <field name="Base Vertex Location" start="192" end="223" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3">
+  <instruction name="3DSTATE_AA_LINE_PARAMETERS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -814,7 +808,7 @@
     <field name="AA Coverage EndCap Slope" start="64" end="71" type="u0.8"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -831,7 +825,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -848,7 +842,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -865,7 +859,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -882,7 +876,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2">
+  <instruction name="3DSTATE_BINDING_TABLE_EDIT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -899,7 +893,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -908,7 +902,7 @@
     <field name="Pointer to DS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -917,7 +911,7 @@
     <field name="Pointer to GS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -926,7 +920,7 @@
     <field name="Pointer to HS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -935,7 +929,7 @@
     <field name="Pointer to PS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_BINDING_TABLE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -944,7 +938,7 @@
     <field name="Pointer to VS Binding Table" start="37" end="47" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_BINDING_TABLE_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -952,13 +946,13 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Binding Table Pool Base Address" start="44" end="95" type="address"/>
     <field name="Binding Table Pool Enable" start="43" end="43" type="uint"/>
-    <field name="Surface Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Binding Table Pool Buffer Size" start="108" end="127" type="uint">
       <value name="No Valid Data" value="0"/>
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_BLEND_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -968,7 +962,7 @@
     <field name="Blend State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_CC_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -978,7 +972,7 @@
     <field name="Color Calc State Pointer Valid" start="32" end="32" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4">
+  <instruction name="3DSTATE_CHROMA_KEY" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -989,7 +983,7 @@
     <field name="ChromaKey High Value" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3">
+  <instruction name="3DSTATE_CLEAR_PARAMS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -999,7 +993,7 @@
     <field name="Depth Clear Value Valid" start="64" end="64" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_CLIP" bias="2" length="4">
+  <instruction name="3DSTATE_CLIP" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1039,57 +1033,57 @@
     <field name="Maximum VP Index" start="96" end="99" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="26"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_GS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="22"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_HS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="25"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_PS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="23"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11">
+  <instruction name="3DSTATE_CONSTANT_VS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="21"/>
-    <field name="Constant Buffer Object Control State" start="8" end="14" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="8" end="14" type="uint"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="9"/>
     <field name="Constant Body" start="32" end="351" type="3DSTATE_CONSTANT_BODY"/>
   </instruction>
 
-  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_DEPTH_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1115,8 +1109,7 @@
     <field name="LOD" start="128" end="131" type="uint"/>
     <field name="Depth" start="181" end="191" type="uint"/>
     <field name="Minimum Array Element" start="170" end="180" type="uint"/>
-    <field name="Depth Buffer Object Control State" start="160" end="166" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Depth Buffer MOCS" start="160" end="166" type="uint"/>
+    <field name="MOCS" start="160" end="166" type="uint"/>
     <field name="Tiled Resource Mode" start="222" end="223" type="uint">
       <value name="NONE" value="0"/>
       <value name="TILEYF" value="1"/>
@@ -1127,7 +1120,7 @@
     <field name="Surface QPitch" start="224" end="238" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4">
+  <instruction name="3DSTATE_DRAWING_RECTANGLE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1146,7 +1139,7 @@
     <field name="Drawing Rectangle Origin X" start="96" end="111" type="int"/>
   </instruction>
 
-  <instruction name="3DSTATE_DS" bias="2" length="11">
+  <instruction name="3DSTATE_DS" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1194,7 +1187,7 @@
     <field name="DUAL_PATCH Kernel Start Pointer" start="294" end="351" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_DS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1218,7 +1211,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_GS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1242,7 +1235,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_HS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1266,7 +1259,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_PS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1292,7 +1285,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2">
+  <instruction name="3DSTATE_GATHER_CONSTANT_VS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1318,7 +1311,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4">
+  <instruction name="3DSTATE_GATHER_POOL_ALLOC" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1326,11 +1319,11 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
     <field name="Gather Pool Base Address" start="44" end="95" type="address"/>
     <field name="Gather Pool Enable" start="43" end="43" type="bool"/>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Gather Pool Buffer Size" start="108" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_GS" bias="2" length="10">
+  <instruction name="3DSTATE_GS" bias="2" length="10" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1399,20 +1392,19 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="288" end="295" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_HIER_DEPTH_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="7"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
-    <field name="Hierarchical Depth Buffer Object Control State" start="57" end="63" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Hierarchical Depth Buffer MOCS" start="57" end="63" type="uint"/>
+    <field name="MOCS" start="57" end="63" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_HS" bias="2" length="9">
+  <instruction name="3DSTATE_HS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1458,7 +1450,7 @@
     <field name="Include Primitive ID" start="224" end="224" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_INDEX_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1469,13 +1461,12 @@
       <value name="WORD" value="1"/>
       <value name="DWORD" value="2"/>
     </field>
-    <field name="Memory Object Control State" start="32" end="38" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="IndexBufferMOCS" start="32" end="38" type="uint"/>
+    <field name="MOCS" start="32" end="38" type="uint"/>
     <field name="Buffer Starting Address" start="64" end="127" type="address"/>
     <field name="Buffer Size" start="128" end="159" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3">
+  <instruction name="3DSTATE_LINE_STIPPLE" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1489,7 +1480,7 @@
     <field name="Line Stipple Repeat Count" start="64" end="72" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2">
+  <instruction name="3DSTATE_MONOFILTER_SIZE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1499,7 +1490,7 @@
     <field name="Monochrome Filter Height" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2">
+  <instruction name="3DSTATE_MULTISAMPLE" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1513,7 +1504,7 @@
     <field name="Number of Multisamples" start="33" end="35" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2">
+  <instruction name="3DSTATE_POLY_STIPPLE_OFFSET" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1523,7 +1514,7 @@
     <field name="Polygon Stipple Y Offset" start="32" end="36" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33">
+  <instruction name="3DSTATE_POLY_STIPPLE_PATTERN" bias="2" length="33" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1534,7 +1525,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_PS" bias="2" length="12">
+  <instruction name="3DSTATE_PS" bias="2" length="12" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1596,7 +1587,7 @@
     <field name="Kernel Start Pointer 2" start="326" end="383" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2">
+  <instruction name="3DSTATE_PS_BLEND" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1613,7 +1604,7 @@
     <field name="Independent Alpha Blend Enable" start="39" end="39" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2">
+  <instruction name="3DSTATE_PS_EXTRA" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1646,7 +1637,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1656,7 +1647,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1666,7 +1657,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1676,7 +1667,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1686,7 +1677,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2">
+  <instruction name="3DSTATE_PUSH_CONSTANT_ALLOC_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1696,7 +1687,7 @@
     <field name="Constant Buffer Size" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_RASTER" bias="2" length="5">
+  <instruction name="3DSTATE_RASTER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1757,7 +1748,7 @@
     <field name="Global Depth Offset Clamp" start="128" end="159" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4">
+  <instruction name="3DSTATE_RS_CONSTANT_POINTER" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1775,7 +1766,7 @@
     <field name="Global Constant Buffer Address High" start="96" end="127" type="address"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD0" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1786,7 +1777,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2">
+  <instruction name="3DSTATE_SAMPLER_PALETTE_LOAD1" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1800,7 +1791,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1809,7 +1800,7 @@
     <field name="Pointer to DS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1818,7 +1809,7 @@
     <field name="Pointer to GS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1827,7 +1818,7 @@
     <field name="Pointer to HS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_PS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1836,7 +1827,7 @@
     <field name="Pointer to PS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLER_STATE_POINTERS_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1845,7 +1836,7 @@
     <field name="Pointer to VS Sampler State" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2">
+  <instruction name="3DSTATE_SAMPLE_MASK" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1854,7 +1845,7 @@
     <field name="Sample Mask" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9">
+  <instruction name="3DSTATE_SAMPLE_PATTERN" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -1924,7 +1915,7 @@
     <field name="2x Sample0 Y Offset" start="256" end="259" type="u0.4"/>
   </instruction>
 
-  <instruction name="3DSTATE_SBE" bias="2" length="6">
+  <instruction name="3DSTATE_SBE" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1957,7 +1948,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11">
+  <instruction name="3DSTATE_SBE_SWIZ" bias="2" length="11" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1971,7 +1962,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2">
+  <instruction name="3DSTATE_SCISSOR_STATE_POINTERS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -1980,7 +1971,7 @@
     <field name="Scissor Rect Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_SF" bias="2" length="4">
+  <instruction name="3DSTATE_SF" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2004,7 +1995,10 @@
       <value name="AALINEDISTANCE_TRUE" value="1"/>
     </field>
     <field name="Smooth Point Enable" start="109" end="109" type="bool"/>
-    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint"/>
+    <field name="Vertex Sub Pixel Precision Select" start="108" end="108" type="uint">
+      <value name="8 Bit" value="0"/>
+      <value name="4 Bit" value="1"/>
+    </field>
     <field name="Point Width Source" start="107" end="107" type="uint">
       <value name="Vertex" value="0"/>
       <value name="State" value="1"/>
@@ -2012,7 +2006,7 @@
     <field name="Point Width" start="96" end="106" type="u8.3"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8">
+  <instruction name="3DSTATE_SO_BUFFER" bias="2" length="8" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2020,8 +2014,7 @@
     <field name="DWord Length" start="0" end="7" type="uint" default="6"/>
     <field name="SO Buffer Enable" start="63" end="63" type="bool"/>
     <field name="SO Buffer Index" start="61" end="62" type="uint"/>
-    <field name="SO Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="SO Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Stream Offset Write Enable" start="53" end="53" type="bool"/>
     <field name="Stream Output Buffer Offset Address Enable" start="52" end="52" type="bool"/>
     <field name="Surface Base Address" start="66" end="111" type="address"/>
@@ -2030,7 +2023,7 @@
     <field name="Stream Offset" start="224" end="255" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_SO_DECL_LIST" bias="2">
+  <instruction name="3DSTATE_SO_DECL_LIST" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2049,21 +2042,20 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5">
+  <instruction name="3DSTATE_STENCIL_BUFFER" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="6"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="3"/>
     <field name="Stencil Buffer Enable" start="63" end="63" type="bool"/>
-    <field name="Stencil Buffer Object Control State" start="54" end="60" type="MEMORY_OBJECT_CONTROL_STATE"/>
-    <field name="Stencil Buffer MOCS" start="54" end="60" type="uint"/>
+    <field name="MOCS" start="54" end="60" type="uint"/>
     <field name="Surface Pitch" start="32" end="48" type="uint"/>
     <field name="Surface Base Address" start="64" end="127" type="address"/>
     <field name="Surface QPitch" start="128" end="142" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5">
+  <instruction name="3DSTATE_STREAMOUT" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2096,7 +2088,7 @@
     <field name="Buffer 2 Surface Pitch" start="128" end="139" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_TE" bias="2" length="4">
+  <instruction name="3DSTATE_TE" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2126,7 +2118,7 @@
     <field name="Maximum Tessellation Factor Not Odd" start="96" end="127" type="float"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2">
+  <instruction name="3DSTATE_URB_CLEAR" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2136,7 +2128,7 @@
     <field name="URB Address" start="32" end="46" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_DS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_DS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2147,7 +2139,7 @@
     <field name="DS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_GS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_GS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2158,7 +2150,7 @@
     <field name="GS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_HS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_HS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2169,7 +2161,7 @@
     <field name="HS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_URB_VS" bias="2" length="2">
+  <instruction name="3DSTATE_URB_VS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2180,7 +2172,7 @@
     <field name="VS Number of URB Entries" start="32" end="47" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2">
+  <instruction name="3DSTATE_VERTEX_BUFFERS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2191,7 +2183,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2">
+  <instruction name="3DSTATE_VERTEX_ELEMENTS" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2202,7 +2194,7 @@
     </group>
   </instruction>
 
-  <instruction name="3DSTATE_VF" bias="2" length="2">
+  <instruction name="3DSTATE_VF" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2214,7 +2206,7 @@
     <field name="Cut Index" start="32" end="63" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5">
+  <instruction name="3DSTATE_VF_COMPONENT_PACKING" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2254,7 +2246,7 @@
     <field name="Vertex Element 24 Enables" start="128" end="131" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3">
+  <instruction name="3DSTATE_VF_INSTANCING" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2265,7 +2257,7 @@
     <field name="Instance Data Step Rate" start="64" end="95" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2">
+  <instruction name="3DSTATE_VF_SGVS" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2289,7 +2281,7 @@
     <field name="VertexID Element Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1">
+  <instruction name="3DSTATE_VF_STATISTICS" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2297,7 +2289,7 @@
     <field name="Statistics Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2">
+  <instruction name="3DSTATE_VF_TOPOLOGY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2306,7 +2298,7 @@
     <field name="Primitive Topology Type" start="32" end="37" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_CC" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2315,7 +2307,7 @@
     <field name="CC Viewport Pointer" start="37" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2">
+  <instruction name="3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2324,7 +2316,7 @@
     <field name="SF Clip Viewport Pointer" start="38" end="63" type="offset"/>
   </instruction>
 
-  <instruction name="3DSTATE_VS" bias="2" length="9">
+  <instruction name="3DSTATE_VS" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2367,7 +2359,7 @@
     <field name="User Clip Distance Cull Test Enable Bitmask" start="256" end="263" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM" bias="2" length="2">
+  <instruction name="3DSTATE_WM" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2424,7 +2416,7 @@
     </field>
   </instruction>
 
-  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2">
+  <instruction name="3DSTATE_WM_CHROMAKEY" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2433,7 +2425,7 @@
     <field name="ChromaKey Kill Enable" start="63" end="63" type="bool"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4">
+  <instruction name="3DSTATE_WM_DEPTH_STENCIL" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2461,7 +2453,7 @@
     <field name="Backface Stencil Reference Value" start="96" end="103" type="uint"/>
   </instruction>
 
-  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5">
+  <instruction name="3DSTATE_WM_HZ_OP" bias="2" length="5" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2483,7 +2475,7 @@
     <field name="Sample Mask" start="128" end="143" type="uint"/>
   </instruction>
 
-  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="3">
+  <instruction name="GPGPU_CSR_BASE_ADDRESS" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2492,7 +2484,7 @@
     <field name="GPGPU CSR Base Address" start="44" end="95" type="address"/>
   </instruction>
 
-  <instruction name="GPGPU_WALKER" bias="2" length="15">
+  <instruction name="GPGPU_WALKER" bias="2" length="15" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2521,7 +2513,7 @@
     <field name="Bottom Execution Mask" start="448" end="479" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_CURBE_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2531,7 +2523,7 @@
     <field name="CURBE Data Start Address" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4">
+  <instruction name="MEDIA_INTERFACE_DESCRIPTOR_LOAD" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2541,7 +2533,7 @@
     <field name="Interface Descriptor Data Start Address" start="96" end="127" type="offset"/>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT" bias="2">
+  <instruction name="MEDIA_OBJECT" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2581,7 +2573,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_GRPID" bias="2">
+  <instruction name="MEDIA_OBJECT_GRPID" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Media Command Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2618,7 +2610,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16">
+  <instruction name="MEDIA_OBJECT_PRT" bias="2" length="16" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2636,7 +2628,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_OBJECT_WALKER" bias="2">
+  <instruction name="MEDIA_OBJECT_WALKER" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -2690,7 +2682,7 @@
     </group>
   </instruction>
 
-  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2">
+  <instruction name="MEDIA_STATE_FLUSH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2701,7 +2693,7 @@
     <field name="Interface Descriptor Offset" start="32" end="37" type="uint"/>
   </instruction>
 
-  <instruction name="MEDIA_VFE_STATE" bias="2" length="9">
+  <instruction name="MEDIA_VFE_STATE" bias="2" length="9" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Pipeline" start="27" end="28" type="uint" default="2"/>
     <field name="Media Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -2811,7 +2803,7 @@
     <field name="Batch Buffer Start Address" start="34" end="95" type="address"/>
   </instruction>
 
-  <instruction name="MI_CLFLUSH" bias="2">
+  <instruction name="MI_CLFLUSH" bias="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="39"/>
     <field name="Use Global GTT" start="22" end="22" type="bool"/>
@@ -2847,7 +2839,7 @@
     <field name="Source Memory Address" start="98" end="159" type="address"/>
   </instruction>
 
-  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3">
+  <instruction name="MI_DISPLAY_FLIP" bias="2" length="3" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="20"/>
     <field name="Async Flip Indicator" start="22" end="22" type="bool"/>
@@ -2918,7 +2910,7 @@
     <field name="Destination Register Address" start="66" end="86" type="offset"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_EXCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="19"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2931,7 +2923,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2">
+  <instruction name="MI_LOAD_SCAN_LINES_INCL" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="18"/>
     <field name="Display (Plane) Select" start="19" end="21" type="uint">
@@ -2945,7 +2937,7 @@
     <field name="End Scan Line Number" start="32" end="44" type="uint"/>
   </instruction>
 
-  <instruction name="MI_LOAD_URB_MEM" bias="2" length="4">
+  <instruction name="MI_LOAD_URB_MEM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="44"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -2984,6 +2976,8 @@
       <value name="XOR" value="3"/>
     </field>
     <field name="Compare Operation" start="0" end="1" type="uint" prefix="COMPARE">
+      <value name="TRUE" value="0"/>
+      <value name="FALSE" value="1"/>
       <value name="SRCS_EQUAL" value="2"/>
       <value name="DELTAS_EQUAL" value="3"/>
     </field>
@@ -2994,7 +2988,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="7"/>
   </instruction>
 
-  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4">
+  <instruction name="MI_REPORT_PERF_COUNT" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="40"/>
     <field name="DWord Length" start="0" end="5" type="uint" default="2"/>
@@ -3004,7 +2998,7 @@
     <field name="Report ID" start="96" end="127" type="uint"/>
   </instruction>
 
-  <instruction name="MI_RS_CONTEXT" bias="1" length="1">
+  <instruction name="MI_RS_CONTEXT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="15"/>
     <field name="Resource Streamer Save" start="0" end="0" type="uint" prefix="RS">
@@ -3013,7 +3007,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_CONTROL" bias="1" length="1">
+  <instruction name="MI_RS_CONTROL" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="6"/>
     <field name="Resource Streamer Control" start="0" end="0" type="uint" prefix="RS">
@@ -3022,7 +3016,7 @@
     </field>
   </instruction>
 
-  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4">
+  <instruction name="MI_RS_STORE_DATA_IMM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="43"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -3131,7 +3125,7 @@
     <field name="Memory Address" start="66" end="127" type="address"/>
   </instruction>
 
-  <instruction name="MI_STORE_URB_MEM" bias="2" length="4">
+  <instruction name="MI_STORE_URB_MEM" bias="2" length="4" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="45"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="2"/>
@@ -3145,13 +3139,13 @@
     <field name="Suspend Flush" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1">
+  <instruction name="MI_TOPOLOGY_FILTER" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="13"/>
     <field name="Topology Filter Value" start="0" end="5" type="3D_Prim_Topo_Type"/>
   </instruction>
 
-  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1">
+  <instruction name="MI_URB_ATOMIC_ALLOC" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="9"/>
     <field name="URB Atomic Storage Offset" start="12" end="19" type="uint"/>
@@ -3163,7 +3157,7 @@
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="2"/>
   </instruction>
 
-  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1">
+  <instruction name="MI_WAIT_FOR_EVENT" bias="1" length="1" engine="render|blitter">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="3"/>
     <field name="Display Plane 1 C Vertical Blank Wait Enable" start="21" end="21" type="bool"/>
@@ -3186,7 +3180,7 @@
     <field name="Display Plnae 1 A Scan Line Wait Enable" start="0" end="0" type="bool"/>
   </instruction>
 
-  <instruction name="PIPELINE_SELECT" bias="1" length="1">
+  <instruction name="PIPELINE_SELECT" bias="1" length="1" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="1"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3201,7 +3195,7 @@
     </field>
   </instruction>
 
-  <instruction name="PIPE_CONTROL" bias="2" length="6">
+  <instruction name="PIPE_CONTROL" bias="2" length="6" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="3"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="2"/>
@@ -3244,27 +3238,27 @@
     <field name="Immediate Data" start="128" end="191" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_BASE_ADDRESS" bias="2" length="19">
+  <instruction name="STATE_BASE_ADDRESS" bias="2" length="19" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
     <field name="3D Command Sub Opcode" start="16" end="23" type="uint" default="1"/>
     <field name="DWord Length" start="0" end="7" type="uint" default="17"/>
     <field name="General State Base Address" start="44" end="95" type="address"/>
-    <field name="General State Memory Object Control State" start="36" end="42" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="General State MOCS" start="36" end="42" type="uint"/>
     <field name="General State Base Address Modify Enable" start="32" end="32" type="bool"/>
-    <field name="Stateless Data Port Access Memory Object Control State" start="112" end="118" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Stateless Data Port Access MOCS" start="112" end="118" type="uint"/>
     <field name="Surface State Base Address" start="140" end="191" type="address"/>
-    <field name="Surface State Memory Object Control State" start="132" end="138" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Surface State MOCS" start="132" end="138" type="uint"/>
     <field name="Surface State Base Address Modify Enable" start="128" end="128" type="bool"/>
     <field name="Dynamic State Base Address" start="204" end="255" type="address"/>
-    <field name="Dynamic State Memory Object Control State" start="196" end="202" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Dynamic State MOCS" start="196" end="202" type="uint"/>
     <field name="Dynamic State Base Address Modify Enable" start="192" end="192" type="bool"/>
     <field name="Indirect Object Base Address" start="268" end="319" type="address"/>
-    <field name="Indirect Object Memory Object Control State" start="260" end="266" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Indirect Object MOCS" start="260" end="266" type="uint"/>
     <field name="Indirect Object Base Address Modify Enable" start="256" end="256" type="bool"/>
     <field name="Instruction Base Address" start="332" end="383" type="address"/>
-    <field name="Instruction Memory Object Control State" start="324" end="330" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Instruction MOCS" start="324" end="330" type="uint"/>
     <field name="Instruction Base Address Modify Enable" start="320" end="320" type="bool"/>
     <field name="General State Buffer Size" start="396" end="415" type="uint"/>
     <field name="General State Buffer Size Modify Enable" start="384" end="384" type="bool"/>
@@ -3275,12 +3269,12 @@
     <field name="Instruction Buffer Size" start="492" end="511" type="uint"/>
     <field name="Instruction Buffer size Modify Enable" start="480" end="480" type="bool"/>
     <field name="Bindless Surface State Base Address" start="524" end="575" type="address"/>
-    <field name="Bindless Surface State Memory Object Control State" start="516" end="522" type="MEMORY_OBJECT_CONTROL_STATE"/>
+    <field name="Bindless Surface State MOCS" start="516" end="522" type="uint"/>
     <field name="Bindless Surface State Base Address Modify Enable" start="512" end="512" type="bool"/>
     <field name="Bindless Surface State Size" start="588" end="607" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_PREFETCH" bias="2" length="2">
+  <instruction name="STATE_PREFETCH" bias="2" length="2" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="0"/>
@@ -3290,7 +3284,7 @@
     <field name="Prefetch Count" start="32" end="34" type="uint"/>
   </instruction>
 
-  <instruction name="STATE_SIP" bias="2" length="3">
+  <instruction name="STATE_SIP" bias="2" length="3" engine="render">
     <field name="Command Type" start="29" end="31" type="uint" default="3"/>
     <field name="Command SubType" start="27" end="28" type="uint" default="0"/>
     <field name="3D Command Opcode" start="24" end="26" type="uint" default="1"/>
@@ -3491,6 +3485,46 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN0" length="2" num="0x5200">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN1" length="2" num="0x5208">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN2" length="2" num="0x5210">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_NUM_PRIMS_WRITTEN3" length="2" num="0x5218">
+    <field name="Num Prims Written Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED0" length="2" num="0x5240">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED1" length="2" num="0x5248">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED2" length="2" num="0x5250">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
+  <register name="SO_PRIM_STORAGE_NEEDED3" length="2" num="0x5258">
+    <field name="Prim Storage Needed Count" start="0" end="63" type="uint"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff -Nru mesa-18.3.3/src/intel/isl/isl.c mesa-19.0.1/src/intel/isl/isl.c
--- mesa-18.3.3/src/intel/isl/isl.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl.c	2019-03-31 23:16:37.000000000 +0000
@@ -35,6 +35,52 @@
 #include "isl_gen9.h"
 #include "isl_priv.h"
 
+void
+isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
+                           uint32_t yt1, uint32_t yt2,
+                           char *dst, const char *src,
+                           uint32_t dst_pitch, int32_t src_pitch,
+                           bool has_swizzling,
+                           enum isl_tiling tiling,
+                           isl_memcpy_type copy_type)
+{
+#ifdef USE_SSE41
+   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
+      _isl_memcpy_linear_to_tiled_sse41(
+         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
+         tiling, copy_type);
+      return;
+   }
+#endif
+
+   _isl_memcpy_linear_to_tiled(
+      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
+      tiling, copy_type);
+}
+
+void
+isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
+                           uint32_t yt1, uint32_t yt2,
+                           char *dst, const char *src,
+                           int32_t dst_pitch, uint32_t src_pitch,
+                           bool has_swizzling,
+                           enum isl_tiling tiling,
+                           isl_memcpy_type copy_type)
+{
+#ifdef USE_SSE41
+   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
+      _isl_memcpy_tiled_to_linear_sse41(
+         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
+         tiling, copy_type);
+      return;
+   }
+#endif
+
+   _isl_memcpy_tiled_to_linear(
+      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
+      tiling, copy_type);
+}
+
 void PRINTFLIKE(3, 4) UNUSED
 __isl_finishme(const char *file, int line, const char *fmt, ...)
 {
diff -Nru mesa-18.3.3/src/intel/isl/isl_emit_depth_stencil.c mesa-19.0.1/src/intel/isl/isl_emit_depth_stencil.c
--- mesa-18.3.3/src/intel/isl/isl_emit_depth_stencil.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_emit_depth_stencil.c	2019-03-31 23:16:37.000000000 +0000
@@ -94,7 +94,7 @@
 #endif
       db.SurfaceBaseAddress = info->depth_address;
 #if GEN_GEN >= 6
-      db.DepthBufferMOCS = info->mocs;
+      db.MOCS = info->mocs;
 #endif
 
 #if GEN_GEN <= 6
@@ -138,7 +138,7 @@
 #endif
       sb.SurfaceBaseAddress = info->stencil_address;
 #if GEN_GEN >= 6
-      sb.StencilBufferMOCS = info->mocs;
+      sb.MOCS = info->mocs;
 #endif
       sb.SurfacePitch = info->stencil_surf->row_pitch_B - 1;
 #if GEN_GEN >= 8
@@ -161,7 +161,7 @@
       db.HierarchicalDepthBufferEnable = true;
 
       hiz.SurfaceBaseAddress = info->hiz_address;
-      hiz.HierarchicalDepthBufferMOCS = info->mocs;
+      hiz.MOCS = info->mocs;
       hiz.SurfacePitch = info->hiz_surf->row_pitch_B - 1;
 #if GEN_GEN >= 8
       /* From the SKL PRM Vol2a:
diff -Nru mesa-18.3.3/src/intel/isl/isl_format_layout.csv mesa-19.0.1/src/intel/isl/isl_format_layout.csv
--- mesa-18.3.3/src/intel/isl/isl_format_layout.csv	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_format_layout.csv	2019-03-31 23:16:37.000000000 +0000
@@ -211,7 +211,7 @@
 R8_SSCALED                  ,   8,  1,  1,  1,  ss8,     ,     ,     ,     ,     ,    ,     r, linear,
 R8_USCALED                  ,   8,  1,  1,  1,  us8,     ,     ,     ,     ,     ,    ,     r, linear,
 P8_UNORM_PALETTE0           ,   8,  1,  1,  1,     ,     ,     ,     ,     ,     , un8,     p, linear,
-L8_UNORM_SRGB               ,   8,  1,  1,  1,     ,     ,     ,     ,  un8,     ,    ,     l, linear,
+L8_UNORM_SRGB               ,   8,  1,  1,  1,     ,     ,     ,     ,  un8,     ,    ,     l,   srgb,
 P8_UNORM_PALETTE1           ,   8,  1,  1,  1,     ,     ,     ,     ,     ,     , un8,     p, linear,
 P4A4_UNORM_PALETTE1         ,   8,  1,  1,  1,     ,     ,     ,  un4,     ,     , un4,    pa, linear,
 A4P4_UNORM_PALETTE1         ,   8,  1,  1,  1,     ,     ,     ,  un4,     ,     , un4,    ap, linear,
diff -Nru mesa-18.3.3/src/intel/isl/isl.h mesa-19.0.1/src/intel/isl/isl.h
--- mesa-18.3.3/src/intel/isl/isl.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl.h	2019-03-31 23:16:37.000000000 +0000
@@ -949,6 +949,12 @@
    ISL_MSAA_LAYOUT_ARRAY,
 };
 
+typedef enum {
+  ISL_MEMCPY = 0,
+  ISL_MEMCPY_BGRA8,
+  ISL_MEMCPY_STREAMING_LOAD,
+  ISL_MEMCPY_INVALID,
+} isl_memcpy_type;
 
 struct isl_device {
    const struct gen_device_info *info;
@@ -2065,6 +2071,32 @@
 isl_surf_get_depth_format(const struct isl_device *dev,
                           const struct isl_surf *surf);
 
+/**
+ * @brief performs a copy from linear to tiled surface
+ *
+ */
+void
+isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
+                           uint32_t yt1, uint32_t yt2,
+                           char *dst, const char *src,
+                           uint32_t dst_pitch, int32_t src_pitch,
+                           bool has_swizzling,
+                           enum isl_tiling tiling,
+                           isl_memcpy_type copy_type);
+
+/**
+ * @brief performs a copy from tiled to linear surface
+ *
+ */
+void
+isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
+                           uint32_t yt1, uint32_t yt2,
+                           char *dst, const char *src,
+                           int32_t dst_pitch, uint32_t src_pitch,
+                           bool has_swizzling,
+                           enum isl_tiling tiling,
+                           isl_memcpy_type copy_type);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-18.3.3/src/intel/isl/isl_priv.h mesa-19.0.1/src/intel/isl/isl_priv.h
--- mesa-18.3.3/src/intel/isl/isl_priv.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_priv.h	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,7 @@
 #define ISL_PRIV_H
 
 #include <assert.h>
+#include <stddef.h>
 #include <strings.h>
 
 #include "dev/gen_device_info.h"
@@ -47,6 +48,8 @@
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
+typedef void *(*isl_mem_copy_fn)(void *dest, const void *src, size_t n);
+
 static inline bool
 isl_is_pow2(uintmax_t n)
 {
@@ -158,6 +161,42 @@
    };
 }
 
+void
+_isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
+                            uint32_t yt1, uint32_t yt2,
+                            char *dst, const char *src,
+                            uint32_t dst_pitch, int32_t src_pitch,
+                            bool has_swizzling,
+                            enum isl_tiling tiling,
+                            isl_memcpy_type copy_type);
+
+void
+_isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
+                            uint32_t yt1, uint32_t yt2,
+                            char *dst, const char *src,
+                            int32_t dst_pitch, uint32_t src_pitch,
+                            bool has_swizzling,
+                            enum isl_tiling tiling,
+                            isl_memcpy_type copy_type);
+
+void
+_isl_memcpy_linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
+                                  uint32_t yt1, uint32_t yt2,
+                                  char *dst, const char *src,
+                                  uint32_t dst_pitch, int32_t src_pitch,
+                                  bool has_swizzling,
+                                  enum isl_tiling tiling,
+                                  isl_memcpy_type copy_type);
+
+void
+_isl_memcpy_tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
+                                  uint32_t yt1, uint32_t yt2,
+                                  char *dst, const char *src,
+                                  int32_t dst_pitch, uint32_t src_pitch,
+                                  bool has_swizzling,
+                                  enum isl_tiling tiling,
+                                  isl_memcpy_type copy_type);
+
 /* This is useful for adding the isl_prefix to genX functions */
 #define __PASTE2(x, y) x ## y
 #define __PASTE(x, y) __PASTE2(x, y)
diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy.c
--- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,1005 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2012 Intel Corporation
+ * Copyright 2013 Google
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chad Versace <chad.versace@linux.intel.com>
+ *    Frank Henigman <fjhenigman@google.com>
+ */
+
+#include <string.h>
+
+#include "util/macros.h"
+#include "main/macros.h"
+
+#include "isl_priv.h"
+
+#if defined(__SSSE3__)
+#include <tmmintrin.h>
+#elif defined(__SSE2__)
+#include <emmintrin.h>
+#endif
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
+#define ALIGN_UP(a, b) ALIGN(a, b)
+
+/* Tile dimensions.  Width and span are in bytes, height is in pixels (i.e.
+ * unitless).  A "span" is the most number of bytes we can copy from linear
+ * to tiled without needing to calculate a new destination address.
+ */
+static const uint32_t xtile_width = 512;
+static const uint32_t xtile_height = 8;
+static const uint32_t xtile_span = 64;
+static const uint32_t ytile_width = 128;
+static const uint32_t ytile_height = 32;
+static const uint32_t ytile_span = 16;
+
+static inline uint32_t
+ror(uint32_t n, uint32_t d)
+{
+   return (n >> d) | (n << (32 - d));
+}
+
+static inline uint32_t
+bswap32(uint32_t n)
+{
+#if defined(HAVE___BUILTIN_BSWAP32)
+   return __builtin_bswap32(n);
+#else
+   return (n >> 24) |
+          ((n >> 8) & 0x0000ff00) |
+          ((n << 8) & 0x00ff0000) |
+          (n << 24);
+#endif
+}
+
+/**
+ * Copy RGBA to BGRA - swap R and B.
+ */
+static inline void *
+rgba8_copy(void *dst, const void *src, size_t bytes)
+{
+   uint32_t *d = dst;
+   uint32_t const *s = src;
+
+   assert(bytes % 4 == 0);
+
+   while (bytes >= 4) {
+      *d = ror(bswap32(*s), 8);
+      d += 1;
+      s += 1;
+      bytes -= 4;
+   }
+   return dst;
+}
+
+#ifdef __SSSE3__
+static const uint8_t rgba8_permutation[16] =
+   { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
+
+static inline void
+rgba8_copy_16_aligned_dst(void *dst, const void *src)
+{
+   _mm_store_si128(dst,
+                   _mm_shuffle_epi8(_mm_loadu_si128(src),
+                                    *(__m128i *)rgba8_permutation));
+}
+
+static inline void
+rgba8_copy_16_aligned_src(void *dst, const void *src)
+{
+   _mm_storeu_si128(dst,
+                    _mm_shuffle_epi8(_mm_load_si128(src),
+                                     *(__m128i *)rgba8_permutation));
+}
+
+#elif defined(__SSE2__)
+static inline void
+rgba8_copy_16_aligned_dst(void *dst, const void *src)
+{
+   __m128i srcreg, dstreg, agmask, ag, rb, br;
+
+   agmask = _mm_set1_epi32(0xFF00FF00);
+   srcreg = _mm_loadu_si128((__m128i *)src);
+
+   rb = _mm_andnot_si128(agmask, srcreg);
+   ag = _mm_and_si128(agmask, srcreg);
+   br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)),
+                            _MM_SHUFFLE(2, 3, 0, 1));
+   dstreg = _mm_or_si128(ag, br);
+
+   _mm_store_si128((__m128i *)dst, dstreg);
+}
+
+static inline void
+rgba8_copy_16_aligned_src(void *dst, const void *src)
+{
+   __m128i srcreg, dstreg, agmask, ag, rb, br;
+
+   agmask = _mm_set1_epi32(0xFF00FF00);
+   srcreg = _mm_load_si128((__m128i *)src);
+
+   rb = _mm_andnot_si128(agmask, srcreg);
+   ag = _mm_and_si128(agmask, srcreg);
+   br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)),
+                            _MM_SHUFFLE(2, 3, 0, 1));
+   dstreg = _mm_or_si128(ag, br);
+
+   _mm_storeu_si128((__m128i *)dst, dstreg);
+}
+#endif
+
+/**
+ * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned.
+ */
+static inline void *
+rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
+{
+   assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
+
+#if defined(__SSSE3__) || defined(__SSE2__)
+   if (bytes == 64) {
+      rgba8_copy_16_aligned_dst(dst +  0, src +  0);
+      rgba8_copy_16_aligned_dst(dst + 16, src + 16);
+      rgba8_copy_16_aligned_dst(dst + 32, src + 32);
+      rgba8_copy_16_aligned_dst(dst + 48, src + 48);
+      return dst;
+   }
+
+   while (bytes >= 16) {
+      rgba8_copy_16_aligned_dst(dst, src);
+      src += 16;
+      dst += 16;
+      bytes -= 16;
+   }
+#endif
+
+   rgba8_copy(dst, src, bytes);
+
+   return dst;
+}
+
+/**
+ * Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned.
+ */
+static inline void *
+rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
+{
+   assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
+
+#if defined(__SSSE3__) || defined(__SSE2__)
+   if (bytes == 64) {
+      rgba8_copy_16_aligned_src(dst +  0, src +  0);
+      rgba8_copy_16_aligned_src(dst + 16, src + 16);
+      rgba8_copy_16_aligned_src(dst + 32, src + 32);
+      rgba8_copy_16_aligned_src(dst + 48, src + 48);
+      return dst;
+   }
+
+   while (bytes >= 16) {
+      rgba8_copy_16_aligned_src(dst, src);
+      src += 16;
+      dst += 16;
+      bytes -= 16;
+   }
+#endif
+
+   rgba8_copy(dst, src, bytes);
+
+   return dst;
+}
+
+/**
+ * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
+ * These ranges are in bytes, i.e. pixels * bytes-per-pixel.
+ * The first and last ranges must be shorter than a "span" (the longest linear
+ * stretch within a tile) and the middle must equal a whole number of spans.
+ * Ranges may be empty.  The region copied must land entirely within one tile.
+ * 'dst' is the start of the tile and 'src' is the corresponding
+ * address to copy from, though copying begins at (x0, y0).
+ * To enable swizzling 'swizzle_bit' must be 1<<6, otherwise zero.
+ * Swizzling flips bit 6 in the copy destination offset, when certain other
+ * bits are set in it.
+ */
+typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                             uint32_t y0, uint32_t y1,
+                             char *dst, const char *src,
+                             int32_t linear_pitch,
+                             uint32_t swizzle_bit,
+                             isl_memcpy_type copy_type);
+
+/**
+ * Copy texture data from linear to X tile layout.
+ *
+ * \copydoc tile_copy_fn
+ *
+ * The mem_copy parameters allow the user to specify an alternative mem_copy
+ * function that, for instance, may do RGBA -> BGRA swizzling.  The first
+ * function must handle any memory alignment while the second function must
+ * only handle 16-byte alignment in whichever side (source or destination) is
+ * tiled.
+ */
+static inline void
+linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                 uint32_t y0, uint32_t y1,
+                 char *dst, const char *src,
+                 int32_t src_pitch,
+                 uint32_t swizzle_bit,
+                 isl_mem_copy_fn mem_copy,
+                 isl_mem_copy_fn mem_copy_align16)
+{
+   /* The copy destination offset for each range copied is the sum of
+    * an X offset 'x0' or 'xo' and a Y offset 'yo.'
+    */
+   uint32_t xo, yo;
+
+   src += (ptrdiff_t)y0 * src_pitch;
+
+   for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) {
+      /* Bits 9 and 10 of the copy destination offset control swizzling.
+       * Only 'yo' contributes to those bits in the total offset,
+       * so calculate 'swizzle' just once per row.
+       * Move bits 9 and 10 three and four places respectively down
+       * to bit 6 and xor them.
+       */
+      uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit;
+
+      mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
+
+      for (xo = x1; xo < x2; xo += xtile_span) {
+         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
+      }
+
+      mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+
+      src += src_pitch;
+   }
+}
+
+/**
+ * Copy texture data from linear to Y tile layout.
+ *
+ * \copydoc tile_copy_fn
+ */
+static inline void
+linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                 uint32_t y0, uint32_t y3,
+                 char *dst, const char *src,
+                 int32_t src_pitch,
+                 uint32_t swizzle_bit,
+                 isl_mem_copy_fn mem_copy,
+                 isl_mem_copy_fn mem_copy_align16)
+{
+   /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
+    * as the tile).  Thus the destination offset for (x,y) is the sum of:
+    *   (x % column_width)                    // position within column
+    *   (x / column_width) * bytes_per_column // column number * bytes per column
+    *   y * column_width
+    *
+    * The copy destination offset for each range copied is the sum of
+    * an X offset 'xo0' or 'xo' and a Y offset 'yo.'
+    */
+   const uint32_t column_width = ytile_span;
+   const uint32_t bytes_per_column = column_width * ytile_height;
+
+   uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4));
+   uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4));
+
+   uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column;
+   uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column;
+
+   /* Bit 9 of the destination offset control swizzling.
+    * Only the X offset contributes to bit 9 of the total offset,
+    * so swizzle can be calculated in advance for these X positions.
+    * Move bit 9 three places down to bit 6.
+    */
+   uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit;
+   uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit;
+
+   uint32_t x, yo;
+
+   src += (ptrdiff_t)y0 * src_pitch;
+
+   if (y0 != y1) {
+      for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) {
+         uint32_t xo = xo1;
+         uint32_t swizzle = swizzle1;
+
+         mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0);
+
+         /* Step by spans/columns.  As it happens, the swizzle bit flips
+          * at each step so we don't need to calculate it explicitly.
+          */
+         for (x = x1; x < x2; x += ytile_span) {
+            mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+            xo += bytes_per_column;
+            swizzle ^= swizzle_bit;
+         }
+
+         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+
+         src += src_pitch;
+      }
+   }
+
+   for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) {
+      uint32_t xo = xo1;
+      uint32_t swizzle = swizzle1;
+
+      if (x0 != x1) {
+         mem_copy(dst + ((xo0 + yo + 0 * column_width) ^ swizzle0), src + x0 + 0 * src_pitch, x1 - x0);
+         mem_copy(dst + ((xo0 + yo + 1 * column_width) ^ swizzle0), src + x0 + 1 * src_pitch, x1 - x0);
+         mem_copy(dst + ((xo0 + yo + 2 * column_width) ^ swizzle0), src + x0 + 2 * src_pitch, x1 - x0);
+         mem_copy(dst + ((xo0 + yo + 3 * column_width) ^ swizzle0), src + x0 + 3 * src_pitch, x1 - x0);
+      }
+
+      /* Step by spans/columns.  As it happens, the swizzle bit flips
+       * at each step so we don't need to calculate it explicitly.
+       */
+      for (x = x1; x < x2; x += ytile_span) {
+         mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x + 0 * src_pitch, ytile_span);
+         mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x + 1 * src_pitch, ytile_span);
+         mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x + 2 * src_pitch, ytile_span);
+         mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x + 3 * src_pitch, ytile_span);
+         xo += bytes_per_column;
+         swizzle ^= swizzle_bit;
+      }
+
+      if (x2 != x3) {
+         mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x2 + 0 * src_pitch, x3 - x2);
+         mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x2 + 1 * src_pitch, x3 - x2);
+         mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x2 + 2 * src_pitch, x3 - x2);
+         mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x2 + 3 * src_pitch, x3 - x2);
+      }
+
+      src += 4 * src_pitch;
+   }
+
+   if (y2 != y3) {
+      for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) {
+         uint32_t xo = xo1;
+         uint32_t swizzle = swizzle1;
+
+         mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0);
+
+         /* Step by spans/columns.  As it happens, the swizzle bit flips
+          * at each step so we don't need to calculate it explicitly.
+          */
+         for (x = x1; x < x2; x += ytile_span) {
+            mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
+            xo += bytes_per_column;
+            swizzle ^= swizzle_bit;
+         }
+
+         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
+
+         src += src_pitch;
+      }
+   }
+}
+
+/**
+ * Copy texture data from X tile layout to linear.
+ *
+ * \copydoc tile_copy_fn
+ */
+static inline void
+xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                 uint32_t y0, uint32_t y1,
+                 char *dst, const char *src,
+                 int32_t dst_pitch,
+                 uint32_t swizzle_bit,
+                 isl_mem_copy_fn mem_copy,
+                 isl_mem_copy_fn mem_copy_align16)
+{
+   /* The copy destination offset for each range copied is the sum of
+    * an X offset 'x0' or 'xo' and a Y offset 'yo.'
+    */
+   uint32_t xo, yo;
+
+   dst += (ptrdiff_t)y0 * dst_pitch;
+
+   for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) {
+      /* Bits 9 and 10 of the copy destination offset control swizzling.
+       * Only 'yo' contributes to those bits in the total offset,
+       * so calculate 'swizzle' just once per row.
+       * Move bits 9 and 10 three and four places respectively down
+       * to bit 6 and xor them.
+       */
+      uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit;
+
+      mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
+
+      for (xo = x1; xo < x2; xo += xtile_span) {
+         mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
+      }
+
+      mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+
+      dst += dst_pitch;
+   }
+}
+
+ /**
+ * Copy texture data from Y tile layout to linear.
+ *
+ * \copydoc tile_copy_fn
+ */
+static inline void
+ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                 uint32_t y0, uint32_t y3,
+                 char *dst, const char *src,
+                 int32_t dst_pitch,
+                 uint32_t swizzle_bit,
+                 isl_mem_copy_fn mem_copy,
+                 isl_mem_copy_fn mem_copy_align16)
+{
+   /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
+    * as the tile).  Thus the destination offset for (x,y) is the sum of:
+    *   (x % column_width)                    // position within column
+    *   (x / column_width) * bytes_per_column // column number * bytes per column
+    *   y * column_width
+    *
+    * The copy destination offset for each range copied is the sum of
+    * an X offset 'xo0' or 'xo' and a Y offset 'yo.'
+    */
+   const uint32_t column_width = ytile_span;
+   const uint32_t bytes_per_column = column_width * ytile_height;
+
+   uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4));
+   uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4));
+
+   uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column;
+   uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column;
+
+   /* Bit 9 of the destination offset control swizzling.
+    * Only the X offset contributes to bit 9 of the total offset,
+    * so swizzle can be calculated in advance for these X positions.
+    * Move bit 9 three places down to bit 6.
+    */
+   uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit;
+   uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit;
+
+   uint32_t x, yo;
+
+   dst += (ptrdiff_t)y0 * dst_pitch;
+
+   if (y0 != y1) {
+      for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) {
+         uint32_t xo = xo1;
+         uint32_t swizzle = swizzle1;
+
+         mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0);
+
+         /* Step by spans/columns.  As it happens, the swizzle bit flips
+          * at each step so we don't need to calculate it explicitly.
+          */
+         for (x = x1; x < x2; x += ytile_span) {
+            mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+            xo += bytes_per_column;
+            swizzle ^= swizzle_bit;
+         }
+
+         mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+
+         dst += dst_pitch;
+      }
+   }
+
+   for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) {
+      uint32_t xo = xo1;
+      uint32_t swizzle = swizzle1;
+
+      if (x0 != x1) {
+         mem_copy(dst + x0 + 0 * dst_pitch, src + ((xo0 + yo + 0 * column_width) ^ swizzle0), x1 - x0);
+         mem_copy(dst + x0 + 1 * dst_pitch, src + ((xo0 + yo + 1 * column_width) ^ swizzle0), x1 - x0);
+         mem_copy(dst + x0 + 2 * dst_pitch, src + ((xo0 + yo + 2 * column_width) ^ swizzle0), x1 - x0);
+         mem_copy(dst + x0 + 3 * dst_pitch, src + ((xo0 + yo + 3 * column_width) ^ swizzle0), x1 - x0);
+      }
+
+      /* Step by spans/columns.  As it happens, the swizzle bit flips
+       * at each step so we don't need to calculate it explicitly.
+       */
+      for (x = x1; x < x2; x += ytile_span) {
+         mem_copy_align16(dst + x + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), ytile_span);
+         mem_copy_align16(dst + x + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), ytile_span);
+         mem_copy_align16(dst + x + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), ytile_span);
+         mem_copy_align16(dst + x + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), ytile_span);
+         xo += bytes_per_column;
+         swizzle ^= swizzle_bit;
+      }
+
+      if (x2 != x3) {
+         mem_copy_align16(dst + x2 + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), x3 - x2);
+         mem_copy_align16(dst + x2 + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), x3 - x2);
+         mem_copy_align16(dst + x2 + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), x3 - x2);
+         mem_copy_align16(dst + x2 + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), x3 - x2);
+      }
+
+      dst += 4 * dst_pitch;
+   }
+
+   if (y2 != y3) {
+      for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) {
+         uint32_t xo = xo1;
+         uint32_t swizzle = swizzle1;
+
+         mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0);
+
+         /* Step by spans/columns.  As it happens, the swizzle bit flips
+          * at each step so we don't need to calculate it explicitly.
+          */
+         for (x = x1; x < x2; x += ytile_span) {
+            mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
+            xo += bytes_per_column;
+            swizzle ^= swizzle_bit;
+         }
+
+         mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
+
+         dst += dst_pitch;
+      }
+   }
+}
+
+#if defined(INLINE_SSE41)
+static ALWAYS_INLINE void *
+_memcpy_streaming_load(void *dest, const void *src, size_t count)
+{
+   if (count == 16) {
+      __m128i val = _mm_stream_load_si128((__m128i *)src);
+      _mm_storeu_si128((__m128i *)dest, val);
+      return dest;
+   } else if (count == 64) {
+      __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
+      __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
+      __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
+      __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
+      _mm_storeu_si128(((__m128i *)dest) + 0, val0);
+      _mm_storeu_si128(((__m128i *)dest) + 1, val1);
+      _mm_storeu_si128(((__m128i *)dest) + 2, val2);
+      _mm_storeu_si128(((__m128i *)dest) + 3, val3);
+      return dest;
+   } else {
+      assert(count < 64); /* and (count < 16) for ytiled */
+      return memcpy(dest, src, count);
+   }
+}
+#endif
+
+static isl_mem_copy_fn
+choose_copy_function(isl_memcpy_type copy_type)
+{
+   switch(copy_type) {
+   case ISL_MEMCPY:
+      return memcpy;
+   case ISL_MEMCPY_BGRA8:
+      return rgba8_copy;
+   case ISL_MEMCPY_STREAMING_LOAD:
+#if defined(INLINE_SSE41)
+      return _memcpy_streaming_load;
+#else
+      unreachable("ISL_MEMCOPY_STREAMING_LOAD requires sse4.1");
+#endif
+   case ISL_MEMCPY_INVALID:
+      unreachable("invalid copy_type");
+   }
+   unreachable("unhandled copy_type");
+   return NULL;
+}
+
+/**
+ * Copy texture data from linear to X tile layout, faster.
+ *
+ * Same as \ref linear_to_xtiled but faster, because it passes constant
+ * parameters for common cases, allowing the compiler to inline code
+ * optimized for those cases.
+ *
+ * \copydoc tile_copy_fn
+ */
+static FLATTEN void
+linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                        uint32_t y0, uint32_t y1,
+                        char *dst, const char *src,
+                        int32_t src_pitch,
+                        uint32_t swizzle_bit,
+                        isl_memcpy_type copy_type)
+{
+   isl_mem_copy_fn mem_copy = choose_copy_function(copy_type);
+
+   if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
+      if (mem_copy == memcpy)
+         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_dst);
+      else
+         unreachable("not reached");
+   } else {
+      if (mem_copy == memcpy)
+         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_dst);
+      else
+         unreachable("not reached");
+   }
+   linear_to_xtiled(x0, x1, x2, x3, y0, y1,
+                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+}
+
+/**
+ * Copy texture data from linear to Y tile layout, faster.
+ *
+ * Same as \ref linear_to_ytiled but faster, because it passes constant
+ * parameters for common cases, allowing the compiler to inline code
+ * optimized for those cases.
+ *
+ * \copydoc tile_copy_fn
+ */
+static FLATTEN void
+linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                        uint32_t y0, uint32_t y1,
+                        char *dst, const char *src,
+                        int32_t src_pitch,
+                        uint32_t swizzle_bit,
+                        isl_memcpy_type copy_type)
+{
+   isl_mem_copy_fn mem_copy = choose_copy_function(copy_type);
+
+   if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
+      if (mem_copy == memcpy)
+         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_dst);
+      else
+         unreachable("not reached");
+   } else {
+      if (mem_copy == memcpy)
+         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_dst);
+      else
+         unreachable("not reached");
+   }
+   linear_to_ytiled(x0, x1, x2, x3, y0, y1,
+                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
+}
+
+/**
+ * Copy texture data from X tile layout to linear, faster.
+ *
+ * Same as \ref xtile_to_linear but faster, because it passes constant
+ * parameters for common cases, allowing the compiler to inline code
+ * optimized for those cases.
+ *
+ * \copydoc tile_copy_fn
+ */
+static FLATTEN void
+xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                        uint32_t y0, uint32_t y1,
+                        char *dst, const char *src,
+                        int32_t dst_pitch,
+                        uint32_t swizzle_bit,
+                        isl_memcpy_type copy_type)
+{
+   isl_mem_copy_fn mem_copy = choose_copy_function(copy_type);
+
+   if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
+      if (mem_copy == memcpy)
+         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_src);
+#if defined(INLINE_SSE41)
+      else if (mem_copy == _memcpy_streaming_load)
+         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 memcpy, _memcpy_streaming_load);
+#endif
+      else
+         unreachable("not reached");
+   } else {
+      if (mem_copy == memcpy)
+         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_src);
+#if defined(INLINE_SSE41)
+      else if (mem_copy == _memcpy_streaming_load)
+         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 memcpy, _memcpy_streaming_load);
+#endif
+      else
+         unreachable("not reached");
+   }
+   xtiled_to_linear(x0, x1, x2, x3, y0, y1,
+                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+}
+
+/**
+ * Copy texture data from Y tile layout to linear, faster.
+ *
+ * Same as \ref ytile_to_linear but faster, because it passes constant
+ * parameters for common cases, allowing the compiler to inline code
+ * optimized for those cases.
+ *
+ * \copydoc tile_copy_fn
+ */
+static FLATTEN void
+ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+                        uint32_t y0, uint32_t y1,
+                        char *dst, const char *src,
+                        int32_t dst_pitch,
+                        uint32_t swizzle_bit,
+                        isl_memcpy_type copy_type)
+{
+   isl_mem_copy_fn mem_copy = choose_copy_function(copy_type);
+
+   if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
+      if (mem_copy == memcpy)
+         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_src);
+#if defined(INLINE_SSE41)
+      else if (copy_type == ISL_MEMCPY_STREAMING_LOAD)
+         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 memcpy, _memcpy_streaming_load);
+#endif
+      else
+         unreachable("not reached");
+   } else {
+      if (mem_copy == memcpy)
+         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
+      else if (mem_copy == rgba8_copy)
+         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy, rgba8_copy_aligned_src);
+#if defined(INLINE_SSE41)
+      else if (copy_type == ISL_MEMCPY_STREAMING_LOAD)
+         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 memcpy, _memcpy_streaming_load);
+#endif
+      else
+         unreachable("not reached");
+   }
+   ytiled_to_linear(x0, x1, x2, x3, y0, y1,
+                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
+}
+
+/**
+ * Copy from linear to tiled texture.
+ *
+ * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into
+ * pieces that do not cross tile boundaries and copy each piece with a tile
+ * copy function (\ref tile_copy_fn).
+ * The X range is in bytes, i.e. pixels * bytes-per-pixel.
+ * The Y range is in pixels (i.e. unitless).
+ * 'dst' is the address of (0, 0) in the destination tiled texture.
+ * 'src' is the address of (xt1, yt1) in the source linear texture.
+ */
+static void
+intel_linear_to_tiled(uint32_t xt1, uint32_t xt2,
+                      uint32_t yt1, uint32_t yt2,
+                      char *dst, const char *src,
+                      uint32_t dst_pitch, int32_t src_pitch,
+                      bool has_swizzling,
+                      enum isl_tiling tiling,
+                      isl_memcpy_type copy_type)
+{
+   tile_copy_fn tile_copy;
+   uint32_t xt0, xt3;
+   uint32_t yt0, yt3;
+   uint32_t xt, yt;
+   uint32_t tw, th, span;
+   uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0;
+
+   if (tiling == ISL_TILING_X) {
+      tw = xtile_width;
+      th = xtile_height;
+      span = xtile_span;
+      tile_copy = linear_to_xtiled_faster;
+   } else if (tiling == ISL_TILING_Y0) {
+      tw = ytile_width;
+      th = ytile_height;
+      span = ytile_span;
+      tile_copy = linear_to_ytiled_faster;
+   } else {
+      unreachable("unsupported tiling");
+   }
+
+   /* Round out to tile boundaries. */
+   xt0 = ALIGN_DOWN(xt1, tw);
+   xt3 = ALIGN_UP  (xt2, tw);
+   yt0 = ALIGN_DOWN(yt1, th);
+   yt3 = ALIGN_UP  (yt2, th);
+
+   /* Loop over all tiles to which we have something to copy.
+    * 'xt' and 'yt' are the origin of the destination tile, whether copying
+    * copying a full or partial tile.
+    * tile_copy() copies one tile or partial tile.
+    * Looping x inside y is the faster memory access pattern.
+    */
+   for (yt = yt0; yt < yt3; yt += th) {
+      for (xt = xt0; xt < xt3; xt += tw) {
+         /* The area to update is [x0,x3) x [y0,y1).
+          * May not want the whole tile, hence the min and max.
+          */
+         uint32_t x0 = MAX2(xt1, xt);
+         uint32_t y0 = MAX2(yt1, yt);
+         uint32_t x3 = MIN2(xt2, xt + tw);
+         uint32_t y1 = MIN2(yt2, yt + th);
+
+         /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that
+          * the middle interval is the longest span-aligned part.
+          * The sub-ranges could be empty.
+          */
+         uint32_t x1, x2;
+         x1 = ALIGN_UP(x0, span);
+         if (x1 > x3)
+            x1 = x2 = x3;
+         else
+            x2 = ALIGN_DOWN(x3, span);
+
+         assert(x0 <= x1 && x1 <= x2 && x2 <= x3);
+         assert(x1 - x0 < span && x3 - x2 < span);
+         assert(x3 - x0 <= tw);
+         assert((x2 - x1) % span == 0);
+
+         /* Translate by (xt,yt) for single-tile copier. */
+         tile_copy(x0-xt, x1-xt, x2-xt, x3-xt,
+                   y0-yt, y1-yt,
+                   dst + (ptrdiff_t)xt * th  +  (ptrdiff_t)yt        * dst_pitch,
+                   src + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * src_pitch,
+                   src_pitch,
+                   swizzle_bit,
+                   copy_type);
+      }
+   }
+}
+
+/**
+ * Copy from tiled to linear texture.
+ *
+ * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into
+ * pieces that do not cross tile boundaries and copy each piece with a tile
+ * copy function (\ref tile_copy_fn).
+ * The X range is in bytes, i.e. pixels * bytes-per-pixel.
+ * The Y range is in pixels (i.e. unitless).
+ * 'dst' is the address of (xt1, yt1) in the destination linear texture.
+ * 'src' is the address of (0, 0) in the source tiled texture.
+ */
+static void
+intel_tiled_to_linear(uint32_t xt1, uint32_t xt2,
+                      uint32_t yt1, uint32_t yt2,
+                      char *dst, const char *src,
+                      int32_t dst_pitch, uint32_t src_pitch,
+                      bool has_swizzling,
+                      enum isl_tiling tiling,
+                      isl_memcpy_type copy_type)
+{
+   tile_copy_fn tile_copy;
+   uint32_t xt0, xt3;
+   uint32_t yt0, yt3;
+   uint32_t xt, yt;
+   uint32_t tw, th, span;
+   uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0;
+
+   if (tiling == ISL_TILING_X) {
+      tw = xtile_width;
+      th = xtile_height;
+      span = xtile_span;
+      tile_copy = xtiled_to_linear_faster;
+   } else if (tiling == ISL_TILING_Y0) {
+      tw = ytile_width;
+      th = ytile_height;
+      span = ytile_span;
+      tile_copy = ytiled_to_linear_faster;
+   } else {
+      unreachable("unsupported tiling");
+   }
+
+#if defined(INLINE_SSE41)
+   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
+      /* The hidden cacheline sized register used by movntdqa can apparently
+       * give you stale data, so do an mfence to invalidate it.
+       */
+      _mm_mfence();
+   }
+#endif
+
+   /* Round out to tile boundaries. */
+   xt0 = ALIGN_DOWN(xt1, tw);
+   xt3 = ALIGN_UP  (xt2, tw);
+   yt0 = ALIGN_DOWN(yt1, th);
+   yt3 = ALIGN_UP  (yt2, th);
+
+   /* Loop over all tiles to which we have something to copy.
+    * 'xt' and 'yt' are the origin of the destination tile, whether copying
+    * copying a full or partial tile.
+    * tile_copy() copies one tile or partial tile.
+    * Looping x inside y is the faster memory access pattern.
+    */
+   for (yt = yt0; yt < yt3; yt += th) {
+      for (xt = xt0; xt < xt3; xt += tw) {
+         /* The area to update is [x0,x3) x [y0,y1).
+          * May not want the whole tile, hence the min and max.
+          */
+         uint32_t x0 = MAX2(xt1, xt);
+         uint32_t y0 = MAX2(yt1, yt);
+         uint32_t x3 = MIN2(xt2, xt + tw);
+         uint32_t y1 = MIN2(yt2, yt + th);
+
+         /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that
+          * the middle interval is the longest span-aligned part.
+          * The sub-ranges could be empty.
+          */
+         uint32_t x1, x2;
+         x1 = ALIGN_UP(x0, span);
+         if (x1 > x3)
+            x1 = x2 = x3;
+         else
+            x2 = ALIGN_DOWN(x3, span);
+
+         assert(x0 <= x1 && x1 <= x2 && x2 <= x3);
+         assert(x1 - x0 < span && x3 - x2 < span);
+         assert(x3 - x0 <= tw);
+         assert((x2 - x1) % span == 0);
+
+         /* Translate by (xt,yt) for single-tile copier. */
+         tile_copy(x0-xt, x1-xt, x2-xt, x3-xt,
+                   y0-yt, y1-yt,
+                   dst + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * dst_pitch,
+                   src + (ptrdiff_t)xt * th  +  (ptrdiff_t)yt        * src_pitch,
+                   dst_pitch,
+                   swizzle_bit,
+                   copy_type);
+      }
+   }
+}
diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_normal.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_normal.c
--- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_normal.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_normal.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,59 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2012 Intel Corporation
+ * Copyright 2013 Google
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chad Versace <chad.versace@linux.intel.com>
+ *    Frank Henigman <fjhenigman@google.com>
+ */
+
+
+#include "isl_tiled_memcpy.c"
+
+void
+_isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
+                            uint32_t yt1, uint32_t yt2,
+                            char *dst, const char *src,
+                            uint32_t dst_pitch, int32_t src_pitch,
+                            bool has_swizzling,
+                            enum isl_tiling tiling,
+                            isl_memcpy_type copy_type)
+{
+   intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
+                         has_swizzling, tiling, copy_type);
+}
+
+void
+_isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
+                            uint32_t yt1, uint32_t yt2,
+                            char *dst, const char *src,
+                            int32_t dst_pitch, uint32_t src_pitch,
+                            bool has_swizzling,
+                            enum isl_tiling tiling,
+                            isl_memcpy_type copy_type)
+{
+   intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
+                         has_swizzling, tiling, copy_type);
+}
diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_sse41.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_sse41.c
--- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_sse41.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_sse41.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,60 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2012 Intel Corporation
+ * Copyright 2013 Google
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chad Versace <chad.versace@linux.intel.com>
+ *    Frank Henigman <fjhenigman@google.com>
+ */
+
+#define INLINE_SSE41
+
+#include "isl_tiled_memcpy.c"
+
+void
+_isl_memcpy_linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
+                                  uint32_t yt1, uint32_t yt2,
+                                  char *dst, const char *src,
+                                  uint32_t dst_pitch, int32_t src_pitch,
+                                  bool has_swizzling,
+                                  enum isl_tiling tiling,
+                                  isl_memcpy_type copy_type)
+{
+   intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
+                         has_swizzling, tiling, copy_type);
+}
+
+void
+_isl_memcpy_tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
+                                  uint32_t yt1, uint32_t yt2,
+                                  char *dst, const char *src,
+                                  int32_t dst_pitch, uint32_t src_pitch,
+                                  bool has_swizzling,
+                                  enum isl_tiling tiling,
+                                  isl_memcpy_type copy_type)
+{
+   intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
+                         has_swizzling, tiling, copy_type);
+}
diff -Nru mesa-18.3.3/src/intel/isl/meson.build mesa-19.0.1/src/intel/isl/meson.build
--- mesa-18.3.3/src/intel/isl/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/isl/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -69,6 +69,39 @@
   command : [prog_python, '@INPUT0@', '--csv', '@INPUT1@', '--out', '@OUTPUT@'],
 )
 
+files_isl_tiled_memcpy = files(
+  'isl_tiled_memcpy_normal.c'
+)
+
+files_isl_tiled_memcpy_sse41 = files(
+  'isl_tiled_memcpy_sse41.c',
+)
+
+isl_tiled_memcpy = static_library(
+  'isl_tiled_memcpy',
+  [files_isl_tiled_memcpy],
+  include_directories : [
+    inc_common, inc_intel, inc_drm_uapi,
+  ],
+  c_args : [c_vis_args, no_override_init_args, '-msse2'],
+  extra_files : ['isl_tiled_memcpy.c']
+)
+
+if with_sse41
+  isl_tiled_memcpy_sse41 = static_library(
+    'isl_tiled_memcpy_sse41',
+    [files_isl_tiled_memcpy_sse41],
+    include_directories : [
+      inc_common, inc_intel, inc_drm_uapi,
+    ],
+    link_args : ['-Wl,--exclude-libs=ALL'],
+    c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
+    extra_files : ['isl_tiled_memcpy.c']
+  )
+else
+  isl_tiled_memcpy_sse41 = []
+endif
+
 libisl_files = files(
   'isl.c',
   'isl.h',
@@ -83,7 +116,7 @@
   'isl',
   [libisl_files, isl_format_layout_c, genX_bits_h],
   include_directories : [inc_common, inc_intel, inc_drm_uapi],
-  link_with : isl_gen_libs,
+  link_with : [isl_gen_libs, isl_tiled_memcpy, isl_tiled_memcpy_sse41],
   c_args : [c_vis_args, no_override_init_args],
 )
 
@@ -96,6 +129,7 @@
       dependencies : dep_m,
       include_directories : [inc_common, inc_intel],
       link_with : [libisl, libintel_dev, libmesa_util],
-    )
+    ),
+    suite : ['intel'],
   )
 endif
diff -Nru mesa-18.3.3/src/intel/Makefile.isl.am mesa-19.0.1/src/intel/Makefile.isl.am
--- mesa-18.3.3/src/intel/Makefile.isl.am	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/Makefile.isl.am	2019-03-31 23:16:37.000000000 +0000
@@ -31,11 +31,29 @@
 	isl/libisl-gen11.la                              \
 	$(NULL)
 
-noinst_LTLIBRARIES += $(ISL_GEN_LIBS) isl/libisl.la
+noinst_LTLIBRARIES += $(ISL_GEN_LIBS) \
+                      isl/libisl.la \
+                      libisl_tiled_memcpy.la
+
+isl_libisl_la_LIBADD = $(ISL_GEN_LIBS) \
+                       libisl_tiled_memcpy.la
+
+if SSE41_SUPPORTED
+isl_libisl_la_LIBADD += libisl_tiled_memcpy_sse41.la
+noinst_LTLIBRARIES += libisl_tiled_memcpy_sse41.la
+endif
 
-isl_libisl_la_LIBADD = $(ISL_GEN_LIBS)
 isl_libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
 
+libisl_tiled_memcpy_la_SOURCES = $(ISL_TILED_MEMCPY_FILES)
+libisl_tiled_memcpy_la_CFLAGS = $(AM_CFLAGS)
+
+libisl_tiled_memcpy_sse41_la_SOURCES = $(ISL_TILED_MEMCPY_SSE41_FILES)
+libisl_tiled_memcpy_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
+
+isl_tiled_memcpy_normal.c: $(ISL_TILED_MEMCPY_DEP_FILES)
+isl_tiled_memcpy_sse41.c: $(ISL_TILED_MEMCPY_DEP_FILES)
+
 isl_libisl_gen4_la_SOURCES = $(ISL_GEN4_FILES)
 isl_libisl_gen4_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=40
 
@@ -90,4 +108,5 @@
 EXTRA_DIST += \
 	isl/gen_format_layout.py \
 	isl/isl_format_layout.csv \
-	isl/README
+	isl/README \
+	$(ISL_TILED_MEMCPY_DEP_FILES)
diff -Nru mesa-18.3.3/src/intel/Makefile.sources mesa-19.0.1/src/intel/Makefile.sources
--- mesa-18.3.3/src/intel/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -62,8 +62,8 @@
 	compiler/brw_fs.h \
 	compiler/brw_fs_live_variables.cpp \
 	compiler/brw_fs_live_variables.h \
-	compiler/brw_fs_lower_conversions.cpp \
 	compiler/brw_fs_lower_pack.cpp \
+	compiler/brw_fs_lower_regioning.cpp \
 	compiler/brw_fs_nir.cpp \
 	compiler/brw_fs_reg_allocate.cpp \
 	compiler/brw_fs_register_coalesce.cpp \
@@ -85,6 +85,7 @@
 	compiler/brw_nir_attribute_workarounds.c \
 	compiler/brw_nir_lower_cs_intrinsics.c \
 	compiler/brw_nir_lower_image_load_store.c \
+	compiler/brw_nir_lower_mem_access_bit_sizes.c \
 	compiler/brw_nir_opt_peephole_ffma.c \
 	compiler/brw_nir_tcs_workarounds.c \
 	compiler/brw_packed_float.c \
@@ -218,8 +219,18 @@
 ISL_GENERATED_FILES = \
 	isl/isl_format_layout.c
 
+ISL_TILED_MEMCPY_FILES = \
+        isl/isl_tiled_memcpy_normal.c
+
+ISL_TILED_MEMCPY_SSE41_FILES = \
+        isl/isl_tiled_memcpy_sse41.c
+
+ISL_TILED_MEMCPY_DEP_FILES = \
+        isl/isl_tiled_memcpy.c
+
 VULKAN_FILES := \
 	vulkan/anv_allocator.c \
+	vulkan/anv_android.h \
 	vulkan/anv_batch_chain.c \
 	vulkan/anv_blorp.c \
 	vulkan/anv_cmd_buffer.c \
@@ -246,6 +257,9 @@
 	vulkan/anv_wsi.c \
 	vulkan/vk_format_info.h
 
+VULKAN_NON_ANDROID_FILES := \
+	vulkan/anv_android_stubs.c
+
 VULKAN_ANDROID_FILES := \
 	vulkan/anv_android.c
 
diff -Nru mesa-18.3.3/src/intel/Makefile.vulkan.am mesa-19.0.1/src/intel/Makefile.vulkan.am
--- mesa-18.3.3/src/intel/Makefile.vulkan.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/Makefile.vulkan.am	2019-03-31 23:16:37.000000000 +0000
@@ -171,6 +171,8 @@
 VULKAN_CFLAGS += $(ANDROID_CFLAGS)
 VULKAN_LIB_DEPS += $(ANDROID_LIBS)
 VULKAN_SOURCES += $(VULKAN_ANDROID_FILES)
+else
+VULKAN_SOURCES += $(VULKAN_NON_ANDROID_FILES)
 endif
 
 if HAVE_PLATFORM_X11
@@ -251,6 +253,7 @@
 	vulkan/tests/block_pool_no_free \
 	vulkan/tests/state_pool_no_free \
 	vulkan/tests/state_pool_free_list_only \
+	vulkan/tests/state_pool_padding \
 	vulkan/tests/state_pool
 
 VULKAN_TEST_LDADD = \
@@ -260,15 +263,23 @@
 check_PROGRAMS += $(VULKAN_TESTS)
 TESTS += $(VULKAN_TESTS)
 
+vulkan_tests_block_pool_no_free_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_block_pool_no_free_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_block_pool_no_free_LDADD = $(VULKAN_TEST_LDADD)
 
+vulkan_tests_state_pool_no_free_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_state_pool_no_free_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_state_pool_no_free_LDADD = $(VULKAN_TEST_LDADD)
 
+vulkan_tests_state_pool_free_list_only_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_state_pool_free_list_only_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_state_pool_free_list_only_LDADD = $(VULKAN_TEST_LDADD)
 
+vulkan_tests_state_pool_padding_CFLAGS = $(VULKAN_CFLAGS)
+vulkan_tests_state_pool_padding_CPPFLAGS = $(VULKAN_CPPFLAGS)
+vulkan_tests_state_pool_padding_LDADD = $(VULKAN_TEST_LDADD)
+
+vulkan_tests_state_pool_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_state_pool_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_state_pool_LDADD = $(VULKAN_TEST_LDADD)
 
diff -Nru mesa-18.3.3/src/intel/tools/aubinator.c mesa-19.0.1/src/intel/tools/aubinator.c
--- mesa-18.3.3/src/intel/tools/aubinator.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aubinator.c	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,6 @@
 
 #include "util/macros.h"
 
-#include "common/gen_decoder.h"
 #include "aub_read.h"
 #include "aub_mem.h"
 
@@ -131,7 +130,7 @@
 }
 
 static void
-handle_execlist_write(void *user_data, enum gen_engine engine, uint64_t context_descriptor)
+handle_execlist_write(void *user_data, enum drm_i915_gem_engine_class engine, uint64_t context_descriptor)
 {
    const uint32_t pphwsp_size = 4096;
    uint32_t pphwsp_addr = context_descriptor & 0xfffff000;
@@ -143,6 +142,7 @@
    uint32_t ring_buffer_head = context[5];
    uint32_t ring_buffer_tail = context[7];
    uint32_t ring_buffer_start = context[9];
+   uint32_t ring_buffer_length = (context[11] & 0x1ff000) + 4096;
 
    mem.pml4 = (uint64_t)context[49] << 32 | context[51];
    batch_ctx.user_data = &mem;
@@ -150,7 +150,7 @@
    struct gen_batch_decode_bo ring_bo = aub_mem_get_ggtt_bo(&mem,
                                                             ring_buffer_start);
    assert(ring_bo.size > 0);
-   void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr);
+   void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr) + ring_buffer_head;
 
    if (context_descriptor & 0x100 /* ppgtt */) {
       batch_ctx.get_bo = aub_mem_get_ppgtt_bo;
@@ -158,19 +158,21 @@
       batch_ctx.get_bo = aub_mem_get_ggtt_bo;
    }
 
-   (void)engine; /* TODO */
-   gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head,
-                   0);
+   batch_ctx.engine = engine;
+   gen_print_batch(&batch_ctx, commands,
+                   MIN2(ring_buffer_tail - ring_buffer_head, ring_buffer_length),
+                   ring_bo.addr + ring_buffer_head);
    aub_mem_clear_bo_maps(&mem);
 }
 
 static void
-handle_ring_write(void *user_data, enum gen_engine engine,
+handle_ring_write(void *user_data, enum drm_i915_gem_engine_class engine,
                   const void *data, uint32_t data_len)
 {
    batch_ctx.user_data = &mem;
    batch_ctx.get_bo = aub_mem_get_ggtt_bo;
 
+   batch_ctx.engine = engine;
    gen_print_batch(&batch_ctx, data, data_len, 0);
 
    aub_mem_clear_bo_maps(&mem);
diff -Nru mesa-18.3.3/src/intel/tools/aubinator_error_decode.c mesa-19.0.1/src/intel/tools/aubinator_error_decode.c
--- mesa-18.3.3/src/intel/tools/aubinator_error_decode.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aubinator_error_decode.c	2019-03-31 23:16:37.000000000 +0000
@@ -76,49 +76,42 @@
 }
 
 struct ring_register_mapping {
-   unsigned ring_class;
+   enum drm_i915_gem_engine_class ring_class;
    unsigned ring_instance;
    const char *register_name;
 };
 
-enum {
-   RCS,
-   BCS,
-   VCS,
-   VECS,
-};
-
 static const struct ring_register_mapping acthd_registers[] = {
-   { BCS, 0, "BCS_ACTHD_UDW" },
-   { VCS, 0, "VCS_ACTHD_UDW" },
-   { VCS, 1, "VCS2_ACTHD_UDW" },
-   { RCS, 0, "ACTHD_UDW" },
-   { VECS, 0, "VECS_ACTHD_UDW" },
+   { I915_ENGINE_CLASS_COPY, 0, "BCS_ACTHD_UDW" },
+   { I915_ENGINE_CLASS_VIDEO, 0, "VCS_ACTHD_UDW" },
+   { I915_ENGINE_CLASS_VIDEO, 1, "VCS2_ACTHD_UDW" },
+   { I915_ENGINE_CLASS_RENDER, 0, "ACTHD_UDW" },
+   { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "VECS_ACTHD_UDW" },
 };
 
 static const struct ring_register_mapping ctl_registers[] = {
-   { BCS, 0, "BCS_RING_BUFFER_CTL" },
-   { VCS, 0, "VCS_RING_BUFFER_CTL" },
-   { VCS, 1, "VCS2_RING_BUFFER_CTL" },
-   { RCS, 0, "RCS_RING_BUFFER_CTL" },
-   { VECS, 0,  "VECS_RING_BUFFER_CTL" },
+   { I915_ENGINE_CLASS_COPY, 0, "BCS_RING_BUFFER_CTL" },
+   { I915_ENGINE_CLASS_VIDEO, 0, "VCS_RING_BUFFER_CTL" },
+   { I915_ENGINE_CLASS_VIDEO, 1, "VCS2_RING_BUFFER_CTL" },
+   { I915_ENGINE_CLASS_RENDER, 0, "RCS_RING_BUFFER_CTL" },
+   { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0,  "VECS_RING_BUFFER_CTL" },
 };
 
 static const struct ring_register_mapping fault_registers[] = {
-   { BCS, 0, "BCS_FAULT_REG" },
-   { VCS, 0, "VCS_FAULT_REG" },
-   { RCS, 0, "RCS_FAULT_REG" },
-   { VECS, 0, "VECS_FAULT_REG" },
+   { I915_ENGINE_CLASS_COPY, 0, "BCS_FAULT_REG" },
+   { I915_ENGINE_CLASS_VIDEO, 0, "VCS_FAULT_REG" },
+   { I915_ENGINE_CLASS_RENDER, 0, "RCS_FAULT_REG" },
+   { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "VECS_FAULT_REG" },
 };
 
 static int ring_name_to_class(const char *ring_name,
-                              unsigned int *class)
+                              enum drm_i915_gem_engine_class *class)
 {
    static const char *class_names[] = {
-      [RCS] = "rcs",
-      [BCS] = "bcs",
-      [VCS] = "vcs",
-      [VECS] = "vecs",
+      [I915_ENGINE_CLASS_RENDER] = "rcs",
+      [I915_ENGINE_CLASS_COPY] = "bcs",
+      [I915_ENGINE_CLASS_VIDEO] = "vcs",
+      [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs",
    };
    for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) {
       if (strncmp(ring_name, class_names[i], strlen(class_names[i])))
@@ -133,11 +126,11 @@
       unsigned int class;
       int instance;
    } legacy_names[] = {
-      { "render", RCS, 0 },
-      { "blt", BCS, 0 },
-      { "bsd", VCS, 0 },
-      { "bsd2", VCS, 1 },
-      { "vebox", VECS, 0 },
+      { "render", I915_ENGINE_CLASS_RENDER, 0 },
+      { "blt", I915_ENGINE_CLASS_COPY, 0 },
+      { "bsd", I915_ENGINE_CLASS_VIDEO, 0 },
+      { "bsd2", I915_ENGINE_CLASS_VIDEO, 1 },
+      { "vebox", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
    };
    for (size_t i = 0; i < ARRAY_SIZE(legacy_names); i++) {
       if (strcmp(ring_name, legacy_names[i].name))
@@ -155,7 +148,7 @@
                         unsigned nb_mapping,
                         const char *ring_name)
 {
-   unsigned int class;
+   enum drm_i915_gem_engine_class class;
    int instance;
 
    instance = ring_name_to_class(ring_name, &class);
@@ -174,7 +167,7 @@
 instdone_register_for_ring(const struct gen_device_info *devinfo,
                            const char *ring_name)
 {
-   unsigned int class;
+   enum drm_i915_gem_engine_class class;
    int instance;
 
    instance = ring_name_to_class(ring_name, &class);
@@ -182,16 +175,16 @@
       return NULL;
 
    switch (class) {
-   case RCS:
+   case I915_ENGINE_CLASS_RENDER:
       if (devinfo->gen == 6)
          return "INSTDONE_2";
       else
          return "INSTDONE_1";
 
-   case BCS:
+   case I915_ENGINE_CLASS_COPY:
       return "BCS_INSTDONE";
 
-   case VCS:
+   case I915_ENGINE_CLASS_VIDEO:
       switch (instance) {
       case 0:
          return "VCS_INSTDONE";
@@ -201,8 +194,11 @@
          return NULL;
       }
 
-   case VECS:
+   case I915_ENGINE_CLASS_VIDEO_ENHANCE:
       return "VECS_INSTDONE";
+
+   default:
+      return NULL;
    }
 
    return NULL;
@@ -601,6 +597,9 @@
 
 
    for (int s = 0; s < num_sections; s++) {
+      enum drm_i915_gem_engine_class class;
+      ring_name_to_class(sections[s].ring_name, &class);
+
       printf("--- %s (%s) at 0x%08x %08x\n",
              sections[s].buffer_name, sections[s].ring_name,
              (unsigned) (sections[s].gtt_offset >> 32),
@@ -610,6 +609,7 @@
           strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
           strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
           strcmp(sections[s].buffer_name, "HW Context") == 0) {
+         batch_ctx.engine = class;
          gen_print_batch(&batch_ctx, sections[s].data,
                          sections[s].dword_count * 4,
                          sections[s].gtt_offset);
diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer.cpp mesa-19.0.1/src/intel/tools/aubinator_viewer.cpp
--- mesa-18.3.3/src/intel/tools/aubinator_viewer.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aubinator_viewer.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -97,7 +97,7 @@
 }
 
 static void
-handle_ring_write(void *user_data, enum gen_engine engine,
+handle_ring_write(void *user_data, enum drm_i915_gem_engine_class engine,
                   const void *ring_data, uint32_t ring_data_len)
 {
    struct aub_file *file = (struct aub_file *) user_data;
@@ -387,16 +387,14 @@
    window->base.display = display_shader_window;
    window->base.destroy = destroy_shader_window;
 
-   struct gen_batch_decode_bo shader_bo;
-   if (mem->pml4)
-      shader_bo = aub_mem_get_ppgtt_bo(mem, address);
-   else
-      shader_bo = aub_mem_get_ggtt_bo(mem, address);
-
+   struct gen_batch_decode_bo shader_bo =
+      aub_mem_get_ppgtt_bo(mem, address);
    if (shader_bo.map) {
       FILE *f = open_memstream(&window->shader, &window->shader_size);
       if (f) {
-         gen_disasm_disassemble(context.file->disasm, shader_bo.map, 0, f);
+         gen_disasm_disassemble(context.file->disasm,
+                                (const uint8_t *) shader_bo.map +
+                                (address - shader_bo.addr), 0, f);
          fclose(f);
       }
    }
@@ -695,7 +693,7 @@
 }
 
 static void
-display_batch_ring_write(void *user_data, enum gen_engine engine,
+display_batch_ring_write(void *user_data, enum drm_i915_gem_engine_class engine,
                          const void *data, uint32_t data_len)
 {
    struct batch_window *window = (struct batch_window *) user_data;
@@ -706,7 +704,8 @@
 }
 
 static void
-display_batch_execlist_write(void *user_data, enum gen_engine engine,
+display_batch_execlist_write(void *user_data,
+                             enum drm_i915_gem_engine_class engine,
                              uint64_t context_descriptor)
 {
    struct batch_window *window = (struct batch_window *) user_data;
@@ -722,19 +721,21 @@
    uint32_t ring_buffer_head = context_img[5];
    uint32_t ring_buffer_tail = context_img[7];
    uint32_t ring_buffer_start = context_img[9];
+   uint32_t ring_buffer_length = (context_img[11] & 0x1ff000) + 4096;
 
    window->mem.pml4 = (uint64_t)context_img[49] << 32 | context_img[51];
 
    struct gen_batch_decode_bo ring_bo =
       aub_mem_get_ggtt_bo(&window->mem, ring_buffer_start);
    assert(ring_bo.size > 0);
-   void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr);
+   void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr) + ring_buffer_head;
 
    window->uses_ppgtt = true;
 
+   window->decode_ctx.engine = engine;
    aub_viewer_render_batch(&window->decode_ctx, commands,
-                           ring_buffer_tail - ring_buffer_head,
-                           ring_buffer_start);
+                           MIN2(ring_buffer_tail - ring_buffer_head, ring_buffer_length),
+                           ring_buffer_start + ring_buffer_head);
 }
 
 static void
@@ -992,6 +993,7 @@
    ImGui::ColorEdit3("error", (float *)&cfg->error_color, cflags); ImGui::SameLine();
    ImGui::ColorEdit3("highlight", (float *)&cfg->highlight_color, cflags); ImGui::SameLine();
    ImGui::ColorEdit3("dwords", (float *)&cfg->dwords_color, cflags); ImGui::SameLine();
+   ImGui::ColorEdit3("booleans", (float *)&cfg->boolean_color, cflags); ImGui::SameLine();
 
    if (ImGui::Button("Commands list") || has_ctrl_key('c')) { show_commands_window(); } ImGui::SameLine();
    if (ImGui::Button("Registers list") || has_ctrl_key('r')) { show_register_window(); } ImGui::SameLine();
diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer_decoder.cpp mesa-19.0.1/src/intel/tools/aubinator_viewer_decoder.cpp
--- mesa-18.3.3/src/intel/tools/aubinator_viewer_decoder.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aubinator_viewer_decoder.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -42,6 +42,7 @@
    ctx->get_bo = get_bo;
    ctx->get_state_size = get_state_size;
    ctx->user_data = user_data;
+   ctx->engine = I915_ENGINE_CLASS_RENDER;
 
    ctx->cfg = cfg;
    ctx->decode_cfg = decode_cfg;
@@ -73,7 +74,12 @@
       }
       if (!gen_field_is_header(iter.field)) {
          if (ctx->decode_cfg->field_filter.PassFilter(iter.name)) {
-            ImGui::Text("%s: %s", iter.name, iter.value);
+            if (iter.field->type.kind == gen_type::GEN_TYPE_BOOL && iter.raw_value) {
+               ImGui::Text("%s: ", iter.name); ImGui::SameLine();
+               ImGui::TextColored(ctx->cfg->boolean_color, "true");
+            } else {
+               ImGui::Text("%s: %s", iter.name, iter.value);
+            }
             if (iter.struct_desc) {
                int struct_dword = iter.start_bit / 32;
                uint64_t struct_address = address + 4 * struct_dword;
@@ -140,7 +146,8 @@
    uint64_t addr = ctx->instruction_base + ksp;
    struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
    if (!bo.map) {
-      ImGui::TextColored(ctx->cfg->missing_color, "Shader unavailable");
+      ImGui::TextColored(ctx->cfg->missing_color,
+                         "Shader unavailable addr=0x%012" PRIx64, addr);
       return;
    }
 
@@ -231,8 +238,12 @@
          continue;
       }
 
-      ImGui::Text("pointer %u: %08x", i, pointers[i]);
-      aub_viewer_print_group(ctx, strct, addr, (const uint8_t *) bo.map + (addr - bo.addr));
+      const uint8_t *state = (const uint8_t *) bo.map + (addr - bo.addr);
+      if (ImGui::TreeNodeEx(&pointers[i], ImGuiTreeNodeFlags_Framed,
+                            "pointer %u: %08x", i, pointers[i])) {
+         aub_viewer_print_group(ctx, strct, addr, state);
+         ImGui::TreePop();
+      }
    }
 }
 
@@ -260,8 +271,11 @@
    }
 
    for (int i = 0; i < count; i++) {
-      ImGui::Text("sampler state %d", i);
-      aub_viewer_print_group(ctx, strct, state_addr, state_map);
+      if (ImGui::TreeNodeEx(state_map, ImGuiTreeNodeFlags_Framed,
+                            "sampler state %d", i)) {
+         aub_viewer_print_group(ctx, strct, state_addr, state_map);
+         ImGui::TreePop();
+      }
       state_addr += 16;
       state_map += 16;
    }
@@ -624,8 +638,6 @@
                               struct gen_group *inst, const uint32_t *p,
                               const char *struct_type,  int count)
 {
-   struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
-
    uint32_t state_offset = 0;
 
    struct gen_field_iterator iter;
@@ -648,12 +660,28 @@
       return;
    }
 
+   struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
+   if (strcmp(struct_type, "BLEND_STATE") == 0) {
+      /* Blend states are different from the others because they have a header
+       * struct called BLEND_STATE which is followed by a variable number of
+       * BLEND_STATE_ENTRY structs.
+       */
+      ImGui::Text("%s", struct_type);
+      aub_viewer_print_group(ctx, state, state_addr, state_map);
+
+      state_addr += state->dw_length * 4;
+      state_map += state->dw_length * 4;
+
+      struct_type = "BLEND_STATE_ENTRY";
+      state = gen_spec_find_struct(ctx->spec, struct_type);
+   }
+
    for (int i = 0; i < count; i++) {
       ImGui::Text("%s %d", struct_type, i);
-      aub_viewer_print_group(ctx, state, state_offset, state_map);
+      aub_viewer_print_group(ctx, state, state_addr, state_map);
 
       state_addr += state->dw_length * 4;
-      state_map += state->dw_length;
+      state_map += state->dw_length * 4;
    }
 }
 
@@ -871,7 +899,7 @@
    int length;
 
    for (p = batch; p < end; p += length) {
-      inst = gen_spec_find_instruction(ctx->spec, p);
+      inst = gen_spec_find_instruction(ctx->spec, ctx->engine, p);
       length = gen_group_get_length(inst, p);
       assert(inst == NULL || length > 0);
       length = MAX2(1, length);
@@ -880,7 +908,7 @@
 
       if (inst == NULL) {
          ImGui::TextColored(ctx->cfg->error_color,
-                            "x%08" PRIx64 ": unknown instruction %08x",
+                            "0x%08" PRIx64 ": unknown instruction %08x",
                             offset, p[0]);
          continue;
       }
diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer.h mesa-19.0.1/src/intel/tools/aubinator_viewer.h
--- mesa-18.3.3/src/intel/tools/aubinator_viewer.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aubinator_viewer.h	2019-03-31 23:16:37.000000000 +0000
@@ -12,13 +12,15 @@
    ImColor highlight_color;
    ImColor error_color;
    ImColor missing_color;
+   ImColor boolean_color;
 
   aub_viewer_cfg() :
     clear_color(114, 144, 154),
     dwords_color(29, 177, 194, 255),
     highlight_color(0, 230, 0, 255),
     error_color(236, 255, 0, 255),
-    missing_color(230, 0, 230, 255) {}
+    missing_color(230, 0, 230, 255),
+    boolean_color(228, 75, 255) {}
 };
 
 struct aub_viewer_decode_cfg {
@@ -68,6 +70,7 @@
 
    struct gen_spec *spec;
    struct gen_disasm *disasm;
+   enum drm_i915_gem_engine_class engine;
 
    struct aub_viewer_cfg *cfg;
    struct aub_viewer_decode_cfg *decode_cfg;
diff -Nru mesa-18.3.3/src/intel/tools/aub_mem.c mesa-19.0.1/src/intel/tools/aub_mem.c
--- mesa-18.3.3/src/intel/tools/aub_mem.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aub_mem.c	2019-03-31 23:16:37.000000000 +0000
@@ -289,8 +289,9 @@
          continue;
 
       uint32_t map_offset = i->virt_addr - address;
-      void *res = mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ,
-                       MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
+      MAYBE_UNUSED void *res =
+            mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ,
+                  MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
       assert(res != MAP_FAILED);
    }
 
@@ -354,8 +355,9 @@
    for (uint64_t page = address; page < end; page += 4096) {
       struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page);
 
-      void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
-                       MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
+      MAYBE_UNUSED void *res =
+            mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ,
+                  MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset);
       assert(res != MAP_FAILED);
    }
 
diff -Nru mesa-18.3.3/src/intel/tools/aub_read.c mesa-19.0.1/src/intel/tools/aub_read.c
--- mesa-18.3.3/src/intel/tools/aub_read.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aub_read.c	2019-03-31 23:16:37.000000000 +0000
@@ -136,7 +136,7 @@
    int type = p[1] & AUB_TRACE_TYPE_MASK;
    int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK;
    int header_length = p[0] & 0xffff;
-   int engine = GEN_ENGINE_RENDER;
+   enum drm_i915_gem_engine_class engine = I915_ENGINE_CLASS_RENDER;
    const void *data = p + header_length + 2;
    uint64_t address = gen_48b_address((read->devinfo.gen >= 8 ? ((uint64_t) p[5] << 32) : 0) |
                                       ((uint64_t) p[3]));
@@ -151,13 +151,13 @@
    case AUB_TRACE_OP_COMMAND_WRITE:
       switch (type) {
       case AUB_TRACE_TYPE_RING_PRB0:
-         engine = GEN_ENGINE_RENDER;
+         engine = I915_ENGINE_CLASS_RENDER;
          break;
       case AUB_TRACE_TYPE_RING_PRB1:
-         engine = GEN_ENGINE_VIDEO;
+         engine = I915_ENGINE_CLASS_VIDEO;
          break;
       case AUB_TRACE_TYPE_RING_PRB2:
-         engine = GEN_ENGINE_BLITTER;
+         engine = I915_ENGINE_CLASS_COPY;
          break;
       default:
          parse_error(read, p, "command write to unknown ring %d\n", type);
@@ -182,7 +182,7 @@
    if (read->reg_write)
       read->reg_write(read->user_data, offset, value);
 
-   int engine;
+   enum drm_i915_gem_engine_class engine;
    uint64_t context_descriptor;
 
    switch (offset) {
@@ -192,7 +192,7 @@
          return;
 
       read->render_elsp_index = 0;
-      engine = GEN_ENGINE_RENDER;
+      engine = I915_ENGINE_CLASS_RENDER;
       context_descriptor = (uint64_t)read->render_elsp[2] << 32 |
          read->render_elsp[3];
       break;
@@ -202,7 +202,7 @@
          return;
 
       read->video_elsp_index = 0;
-      engine = GEN_ENGINE_VIDEO;
+      engine = I915_ENGINE_CLASS_VIDEO;
       context_descriptor = (uint64_t)read->video_elsp[2] << 32 |
          read->video_elsp[3];
       break;
@@ -212,46 +212,40 @@
          return;
 
       read->blitter_elsp_index = 0;
-      engine = GEN_ENGINE_BLITTER;
+      engine = I915_ENGINE_CLASS_COPY;
       context_descriptor = (uint64_t)read->blitter_elsp[2] << 32 |
          read->blitter_elsp[3];
       break;
    case 0x2510: /* render elsq0 lo */
       read->render_elsp[3] = value;
       return;
-      break;
    case 0x2514: /* render elsq0 hi */
       read->render_elsp[2] = value;
       return;
-      break;
    case 0x12510: /* video elsq0 lo */
       read->video_elsp[3] = value;
       return;
-      break;
    case 0x12514: /* video elsq0 hi */
       read->video_elsp[2] = value;
       return;
-      break;
    case 0x22510: /* blitter elsq0 lo */
       read->blitter_elsp[3] = value;
       return;
-      break;
    case 0x22514: /* blitter elsq0 hi */
       read->blitter_elsp[2] = value;
       return;
-      break;
    case 0x2550: /* render elsc */
-      engine = GEN_ENGINE_RENDER;
+      engine = I915_ENGINE_CLASS_RENDER;
       context_descriptor = (uint64_t)read->render_elsp[2] << 32 |
          read->render_elsp[3];
       break;
    case 0x12550: /* video_elsc */
-      engine = GEN_ENGINE_VIDEO;
+      engine = I915_ENGINE_CLASS_VIDEO;
       context_descriptor = (uint64_t)read->video_elsp[2] << 32 |
          read->video_elsp[3];
       break;
    case 0x22550: /* blitter elsc */
-      engine = GEN_ENGINE_BLITTER;
+      engine = I915_ENGINE_CLASS_COPY;
       context_descriptor = (uint64_t)read->blitter_elsp[2] << 32 |
          read->blitter_elsp[3];
       break;
@@ -294,7 +288,8 @@
 int
 aub_read_command(struct aub_read *read, const void *data, uint32_t data_len)
 {
-   const uint32_t *p = data, *end = data + data_len, *next;
+   const uint32_t *p = data, *next;
+   MAYBE_UNUSED const uint32_t *end = data + data_len;
    uint32_t h, header_length, bias;
 
    assert(data_len >= 4);
diff -Nru mesa-18.3.3/src/intel/tools/aub_read.h mesa-19.0.1/src/intel/tools/aub_read.h
--- mesa-18.3.3/src/intel/tools/aub_read.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/aub_read.h	2019-03-31 23:16:37.000000000 +0000
@@ -28,17 +28,12 @@
 #include <stdint.h>
 
 #include "dev/gen_device_info.h"
+#include "drm-uapi/i915_drm.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-enum gen_engine {
-   GEN_ENGINE_RENDER = 1,
-   GEN_ENGINE_VIDEO = 2,
-   GEN_ENGINE_BLITTER = 3,
-};
-
 struct aub_read {
    /* Caller's data */
    void *user_data;
@@ -55,9 +50,9 @@
 
    void (*reg_write)(void *user_data, uint32_t reg_offset, uint32_t reg_value);
 
-   void (*ring_write)(void *user_data, enum gen_engine engine,
+   void (*ring_write)(void *user_data, enum drm_i915_gem_engine_class engine,
                       const void *data, uint32_t data_len);
-   void (*execlist_write)(void *user_data, enum gen_engine engine,
+   void (*execlist_write)(void *user_data, enum drm_i915_gem_engine_class engine,
                           uint64_t context_descriptor);
 
    /* Reader's data */
diff -Nru mesa-18.3.3/src/intel/tools/i965_disasm.c mesa-19.0.1/src/intel/tools/i965_disasm.c
--- mesa-18.3.3/src/intel/tools/i965_disasm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/i965_disasm.c	2019-03-31 23:16:37.000000000 +0000
@@ -47,17 +47,23 @@
 static void *
 i965_disasm_read_binary(FILE *fp, size_t *end)
 {
+   size_t size;
    void *assembly;
 
    *end = i965_disasm_get_file_size(fp);
+   if (!*end)
+      return NULL;
 
    assembly = malloc(*end + 1);
    if (assembly == NULL)
       return NULL;
 
-   fread(assembly, *end, 1, fp);
+   size = fread(assembly, *end, 1, fp);
    fclose(fp);
-
+   if (!size) {
+      free(assembly);
+      return NULL;
+   }
    return assembly;
 }
 
@@ -167,7 +173,11 @@
 
    assembly = i965_disasm_read_binary(fp, &end);
    if (!assembly) {
-      fprintf(stderr, "Unable to allocate buffer to read binary file\n");
+      if (end)
+        fprintf(stderr, "Unable to allocate buffer to read binary file\n");
+      else
+        fprintf(stderr, "Input file is empty\n");
+
       exit(EXIT_FAILURE);
    }
 
diff -Nru mesa-18.3.3/src/intel/tools/intel_dump_gpu.c mesa-19.0.1/src/intel/tools/intel_dump_gpu.c
--- mesa-18.3.3/src/intel/tools/intel_dump_gpu.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/intel_dump_gpu.c	2019-03-31 23:16:37.000000000 +0000
@@ -358,10 +358,16 @@
             verbose = 2;
          }
       } else if (!strcmp(key, "device")) {
+         fail_if(device != 0, "Device/Platform override specified multiple times.");
          fail_if(sscanf(value, "%i", &device) != 1,
                  "failed to parse device id '%s'",
                  value);
          device_override = true;
+      } else if (!strcmp(key, "platform")) {
+         fail_if(device != 0, "Device/Platform override specified multiple times.");
+         device = gen_device_name_to_pci_device_id(value);
+         fail_if(device == -1, "Unknown platform '%s'", value);
+         device_override = true;
       } else if (!strcmp(key, "file")) {
          output_filename = strdup(value);
          output_file = fopen(output_filename, "w+");
diff -Nru mesa-18.3.3/src/intel/tools/intel_dump_gpu.in mesa-19.0.1/src/intel/tools/intel_dump_gpu.in
--- mesa-18.3.3/src/intel/tools/intel_dump_gpu.in	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/intel_dump_gpu.in	2019-03-31 23:16:37.000000000 +0000
@@ -8,15 +8,19 @@
 Run COMMAND with ARGUMENTS and dump an AUB file that captures buffer
 contents and execution of the GEM application.
 
-  -o, --output=FILE  Name of AUB file. Defaults to COMMAND.aub
+  -g, --gdb           Launch GDB
 
-      --device=ID    Override PCI ID of the reported device
+  -o, --output=FILE   Name of AUB file. Defaults to COMMAND.aub
 
-  -v                 Enable verbose output
+      --device=ID     Override PCI ID of the reported device
 
-  -vv                Enable extra verbosity - dumps gtt mappings
+  -p, --platform=NAME Override PCI ID using a platform name
 
-      --help         Display this help message and exit
+  -v                  Enable verbose output
+
+  -vv                 Enable extra verbosity - dumps gtt mappings
+
+      --help          Display this help message and exit
 
 EOF
 
@@ -35,11 +39,6 @@
 
 while true; do
     case "$1" in
-        -o)
-            file=$2
-            add_arg "file=${file:-$(basename ${file}).aub}"
-            shift 2
-            ;;
         -v)
             add_arg "verbose=1"
             shift 1
@@ -48,6 +47,11 @@
             add_arg "verbose=2"
             shift 1
             ;;
+        -o)
+            file=$2
+            add_arg "file=${file:-$(basename ${file}).aub}"
+            shift 2
+            ;;
         -o*)
             file=${1##-o}
             add_arg "file=${file:-$(basename ${file}).aub}"
@@ -62,6 +66,21 @@
             add_arg "device=${1##--device=}"
             shift
             ;;
+        -p)
+            platform=$2
+            add_arg "platform=${platform}"
+            shift 2
+            ;;
+        -p*)
+            platform=${1##-p}
+            add_arg "platform=${platform}"
+            shift
+            ;;
+        --platform=*)
+            platform=${1##-p}
+            add_arg "platform=${platform}"
+            shift
+            ;;
         --gdb)
             gdb=1
             shift
diff -Nru mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.c mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.c
--- mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.c	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.c	2019-03-31 23:16:37.000000000 +0000
@@ -39,6 +39,7 @@
 #include <i915_drm.h>
 
 #include "util/hash_table.h"
+#include "util/u_math.h"
 
 #define INTEL_LOG_TAG "INTEL-SANITIZE-GPU"
 #include "common/intel_log.h"
@@ -109,8 +110,7 @@
 {
    struct refcnt_hash_table *r = malloc(sizeof(*r));
    r->refcnt = 1;
-   r->t = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                  _mesa_key_pointer_equal);
+   r->t = _mesa_pointer_hash_table_create(NULL);
    _mesa_hash_table_insert(fds_to_bo_sizes, (void*)(uintptr_t)fd,
                            (void*)(uintptr_t)r);
 }
@@ -165,7 +165,7 @@
 {
    struct drm_i915_gem_mmap mmap_arg = {
       .handle = handle,
-      .offset = bo_size(fd, handle),
+      .offset = align64(bo_size(fd, handle), 4096),
       .size = PADDING_SIZE,
       .flags = 0,
    };
@@ -207,9 +207,11 @@
 static int
 create_with_padding(int fd, struct drm_i915_gem_create *create)
 {
-   create->size += PADDING_SIZE;
+   uint64_t original_size = create->size;
+
+   create->size = align64(original_size, 4096) + PADDING_SIZE;
    int ret = libc_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, create);
-   create->size -= PADDING_SIZE;
+   create->size = original_size;
 
    if (ret != 0)
       return ret;
@@ -217,14 +219,16 @@
    uint8_t *noise_values;
    struct drm_i915_gem_mmap mmap_arg = {
       .handle = create->handle,
-      .offset = create->size,
+      .offset = align64(create->size, 4096),
       .size = PADDING_SIZE,
       .flags = 0,
    };
 
    ret = libc_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
-   if (ret != 0)
+   if (ret != 0) {
+      intel_logd("Unable to map buffer %d for pad creation.\n", create->handle);
       return 0;
+   }
 
    noise_values = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
    fill_noise_buffer(noise_values, create->handle & 0xFF,
@@ -421,8 +425,7 @@
 static void __attribute__ ((constructor))
 init(void)
 {
-   fds_to_bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
+   fds_to_bo_sizes = _mesa_pointer_hash_table_create(NULL);
    libc_open = dlsym(RTLD_NEXT, "open");
    libc_close = dlsym(RTLD_NEXT, "close");
    libc_fcntl = dlsym(RTLD_NEXT, "fcntl");
diff -Nru mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.in mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.in
--- mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.in	2018-04-11 19:02:35.000000000 +0000
+++ mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.in	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,57 @@
 #!/bin/bash
 # -*- mode: sh -*-
 
-LD_PRELOAD="@install_libexecdir@/libintel_sanitize_gpu.so${LD_PRELOAD:+:$LD_PRELOAD}" exec "$@"
+function show_help() {
+    cat <<EOF
+Usage: intel_sanitize_gpu [OPTION]... [--] COMMAND ARGUMENTS
+
+Run COMMAND with ARGUMENTS and verify the GPU doesn't write outside its memory
+mapped buffers.
+
+  -g, --gdb          Launch GDB
+
+      --help         Display this help message and exit
+
+EOF
+
+    exit 0
+}
+
+gdb=""
+
+while true; do
+    case "$1" in
+        --gdb)
+            gdb=1
+            shift
+            ;;
+        -g)
+            gdb=1
+            shift
+            ;;
+        --help)
+            show_help
+            ;;
+        --)
+            shift
+            break
+            ;;
+        -*)
+            echo "intel_sanitize_gpu: invalid option: $1"
+            echo
+            show_help
+            ;;
+        *)
+            break
+            ;;
+    esac
+done
+
+[ -z $1 ] && show_help
+
+ld_preload="@install_libexecdir@/libintel_sanitize_gpu.so${LD_PRELOAD:+:$LD_PRELOAD}"
+if [ -z $gdb ]; then
+    LD_PRELOAD=$ld_preload exec "$@"
+else
+    gdb -iex "set exec-wrapper env LD_PRELOAD=$ld_preload" --args $@
+fi
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_allocator.c mesa-19.0.1/src/intel/vulkan/anv_allocator.c
--- mesa-18.3.3/src/intel/vulkan/anv_allocator.c	2018-10-21 19:21:32.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_allocator.c	2019-03-31 23:16:37.000000000 +0000
@@ -99,7 +99,9 @@
 
 /* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
  * We use it to indicate the free list is empty. */
-#define EMPTY 1
+#define EMPTY UINT32_MAX
+
+#define PAGE_SIZE 4096
 
 struct anv_mmap_cleanup {
    void *map;
@@ -130,61 +132,242 @@
    return 1 << ilog2_round_up(value);
 }
 
-static bool
-anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
+struct anv_state_table_cleanup {
+   void *map;
+   size_t size;
+};
+
+#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0})
+#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry))
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size);
+
+VkResult
+anv_state_table_init(struct anv_state_table *table,
+                    struct anv_device *device,
+                    uint32_t initial_entries)
 {
-   union anv_free_list current, new, old;
+   VkResult result;
 
-   current.u64 = list->u64;
-   while (current.offset != EMPTY) {
-      /* We have to add a memory barrier here so that the list head (and
-       * offset) gets read before we read the map pointer.  This way we
-       * know that the map pointer is valid for the given offset at the
-       * point where we read it.
-       */
-      __sync_synchronize();
+   table->device = device;
 
-      int32_t *next_ptr = *map + current.offset;
-      new.offset = VG_NOACCESS_READ(next_ptr);
-      new.count = current.count + 1;
-      old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
-      if (old.u64 == current.u64) {
-         *offset = current.offset;
-         return true;
-      }
-      current = old;
+   table->fd = memfd_create("state table", MFD_CLOEXEC);
+   if (table->fd == -1)
+      return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+   /* Just make it 2GB up-front.  The Linux kernel won't actually back it
+    * with pages until we either map and fault on one of them or we use
+    * userptr and send a chunk of it off to the GPU.
+    */
+   if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+      result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+      goto fail_fd;
    }
 
-   return false;
+   if (!u_vector_init(&table->mmap_cleanups,
+                      round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
+                      128)) {
+      result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+      goto fail_fd;
+   }
+
+   table->state.next = 0;
+   table->state.end = 0;
+   table->size = 0;
+
+   uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
+   result = anv_state_table_expand_range(table, initial_size);
+   if (result != VK_SUCCESS)
+      goto fail_mmap_cleanups;
+
+   return VK_SUCCESS;
+
+ fail_mmap_cleanups:
+   u_vector_finish(&table->mmap_cleanups);
+ fail_fd:
+   close(table->fd);
+
+   return result;
 }
 
-static void
-anv_free_list_push(union anv_free_list *list, void *map, int32_t offset,
-                   uint32_t size, uint32_t count)
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
 {
-   union anv_free_list current, old, new;
-   int32_t *next_ptr = map + offset;
+   void *map;
+   struct anv_mmap_cleanup *cleanup;
+
+   /* Assert that we only ever grow the pool */
+   assert(size >= table->state.end);
+
+   /* Make sure that we don't go outside the bounds of the memfd */
+   if (size > BLOCK_POOL_MEMFD_SIZE)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   cleanup = u_vector_add(&table->mmap_cleanups);
+   if (!cleanup)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   *cleanup = ANV_MMAP_CLEANUP_INIT;
+
+   /* Just leak the old map until we destroy the pool.  We can't munmap it
+    * without races or imposing locking on the block allocate fast path. On
+    * the whole the leaked maps adds up to less than the size of the
+    * current map.  MAP_POPULATE seems like the right thing to do, but we
+    * should try to get some numbers.
+    */
+   map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+              MAP_SHARED | MAP_POPULATE, table->fd, 0);
+   if (map == MAP_FAILED) {
+      return vk_errorf(table->device->instance, table->device,
+                       VK_ERROR_OUT_OF_HOST_MEMORY, "mmap failed: %m");
+   }
+
+   cleanup->map = map;
+   cleanup->size = size;
+
+   table->map = map;
+   table->size = size;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_state_table_grow(struct anv_state_table *table)
+{
+   VkResult result = VK_SUCCESS;
 
-   /* If we're returning more than one chunk, we need to build a chain to add
-    * to the list.  Fortunately, we can do this without any atomics since we
-    * own everything in the chain right now.  `offset` is left pointing to the
-    * head of our chain list while `next_ptr` points to the tail.
+   uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE,
+                             PAGE_SIZE);
+   uint32_t old_size = table->size;
+
+   /* The block pool is always initialized to a nonzero size and this function
+    * is always called after initialization.
     */
-   for (uint32_t i = 1; i < count; i++) {
-      VG_NOACCESS_WRITE(next_ptr, offset + i * size);
-      next_ptr = map + offset + i * size;
+   assert(old_size > 0);
+
+   uint32_t required = MAX2(used, old_size);
+   if (used * 2 <= required) {
+      /* If we're in this case then this isn't the firsta allocation and we
+       * already have enough space on both sides to hold double what we
+       * have allocated.  There's nothing for us to do.
+       */
+      goto done;
+   }
+
+   uint32_t size = old_size * 2;
+   while (size < required)
+      size *= 2;
+
+   assert(size > table->size);
+
+   result = anv_state_table_expand_range(table, size);
+
+ done:
+   return result;
+}
+
+void
+anv_state_table_finish(struct anv_state_table *table)
+{
+   struct anv_state_table_cleanup *cleanup;
+
+   u_vector_foreach(cleanup, &table->mmap_cleanups) {
+      if (cleanup->map)
+         munmap(cleanup->map, cleanup->size);
+   }
+
+   u_vector_finish(&table->mmap_cleanups);
+
+   close(table->fd);
+}
+
+VkResult
+anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+                    uint32_t count)
+{
+   struct anv_block_state state, old, new;
+   VkResult result;
+
+   assert(idx);
+
+   while(1) {
+      state.u64 = __sync_fetch_and_add(&table->state.u64, count);
+      if (state.next + count <= state.end) {
+         assert(table->map);
+         struct anv_free_entry *entry = &table->map[state.next];
+         for (int i = 0; i < count; i++) {
+            entry[i].state.idx = state.next + i;
+         }
+         *idx = state.next;
+         return VK_SUCCESS;
+      } else if (state.next <= state.end) {
+         /* We allocated the first block outside the pool so we have to grow
+          * the pool.  pool_state->next acts a mutex: threads who try to
+          * allocate now will get block indexes above the current limit and
+          * hit futex_wait below.
+          */
+         new.next = state.next + count;
+         do {
+            result = anv_state_table_grow(table);
+            if (result != VK_SUCCESS)
+               return result;
+            new.end = table->size / ANV_STATE_ENTRY_SIZE;
+         } while (new.end < new.next);
+
+         old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64);
+         if (old.next != state.next)
+            futex_wake(&table->state.end, INT_MAX);
+      } else {
+         futex_wait(&table->state.end, state.end, NULL);
+         continue;
+      }
    }
+}
+
+void
+anv_free_list_push(union anv_free_list *list,
+                   struct anv_state_table *table,
+                   uint32_t first, uint32_t count)
+{
+   union anv_free_list current, old, new;
+   uint32_t last = first;
+
+   for (uint32_t i = 1; i < count; i++, last++)
+      table->map[last].next = last + 1;
 
    old = *list;
    do {
       current = old;
-      VG_NOACCESS_WRITE(next_ptr, current.offset);
-      new.offset = offset;
+      table->map[last].next = current.offset;
+      new.offset = first;
       new.count = current.count + 1;
       old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
    } while (old.u64 != current.u64);
 }
 
+struct anv_state *
+anv_free_list_pop(union anv_free_list *list,
+                  struct anv_state_table *table)
+{
+   union anv_free_list current, new, old;
+
+   current.u64 = list->u64;
+   while (current.offset != EMPTY) {
+      __sync_synchronize();
+      new.offset = table->map[current.offset].next;
+      new.count = current.count + 1;
+      old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+      if (old.u64 == current.u64) {
+         struct anv_free_entry *entry = &table->map[current.offset];
+         return &entry->state;
+      }
+      current = old;
+   }
+
+   return NULL;
+}
+
 /* All pointers in the ptr_free_list are assumed to be page-aligned.  This
  * means that the bottom 12 bits should all be zero.
  */
@@ -251,21 +434,32 @@
 
    pool->device = device;
    pool->bo_flags = bo_flags;
+   pool->nbos = 0;
+   pool->size = 0;
+   pool->center_bo_offset = 0;
    pool->start_address = gen_canonical_address(start_address);
+   pool->map = NULL;
 
-   anv_bo_init(&pool->bo, 0, 0);
+   /* This pointer will always point to the first BO in the list */
+   pool->bo = &pool->bos[0];
 
-   pool->fd = memfd_create("block pool", MFD_CLOEXEC);
-   if (pool->fd == -1)
-      return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+   anv_bo_init(pool->bo, 0, 0);
 
-   /* Just make it 2GB up-front.  The Linux kernel won't actually back it
-    * with pages until we either map and fault on one of them or we use
-    * userptr and send a chunk of it off to the GPU.
-    */
-   if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
-      result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
-      goto fail_fd;
+   if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) {
+      pool->fd = memfd_create("block pool", MFD_CLOEXEC);
+      if (pool->fd == -1)
+         return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+      /* Just make it 2GB up-front.  The Linux kernel won't actually back it
+       * with pages until we either map and fault on one of them or we use
+       * userptr and send a chunk of it off to the GPU.
+       */
+      if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_fd;
+      }
+   } else {
+      pool->fd = -1;
    }
 
    if (!u_vector_init(&pool->mmap_cleanups,
@@ -289,7 +483,8 @@
  fail_mmap_cleanups:
    u_vector_finish(&pool->mmap_cleanups);
  fail_fd:
-   close(pool->fd);
+   if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+      close(pool->fd);
 
    return result;
 }
@@ -307,12 +502,10 @@
    }
 
    u_vector_finish(&pool->mmap_cleanups);
-
-   close(pool->fd);
+   if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+      close(pool->fd);
 }
 
-#define PAGE_SIZE 4096
-
 static VkResult
 anv_block_pool_expand_range(struct anv_block_pool *pool,
                             uint32_t center_bo_offset, uint32_t size)
@@ -320,6 +513,7 @@
    void *map;
    uint32_t gem_handle;
    struct anv_mmap_cleanup *cleanup;
+   const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED);
 
    /* Assert that we only ever grow the pool */
    assert(center_bo_offset >= pool->back_state.end);
@@ -327,7 +521,8 @@
 
    /* Assert that we don't go outside the bounds of the memfd */
    assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER);
-   assert(size - center_bo_offset <=
+   assert(use_softpin ||
+          size - center_bo_offset <=
           BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER);
 
    cleanup = u_vector_add(&pool->mmap_cleanups);
@@ -336,48 +531,55 @@
 
    *cleanup = ANV_MMAP_CLEANUP_INIT;
 
-   /* Just leak the old map until we destroy the pool.  We can't munmap it
-    * without races or imposing locking on the block allocate fast path. On
-    * the whole the leaked maps adds up to less than the size of the
-    * current map.  MAP_POPULATE seems like the right thing to do, but we
-    * should try to get some numbers.
-    */
-   map = mmap(NULL, size, PROT_READ | PROT_WRITE,
-              MAP_SHARED | MAP_POPULATE, pool->fd,
-              BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
-   if (map == MAP_FAILED)
-      return vk_errorf(pool->device->instance, pool->device,
-                       VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m");
-
-   gem_handle = anv_gem_userptr(pool->device, map, size);
-   if (gem_handle == 0) {
-      munmap(map, size);
-      return vk_errorf(pool->device->instance, pool->device,
-                       VK_ERROR_TOO_MANY_OBJECTS, "userptr failed: %m");
+   uint32_t newbo_size = size - pool->size;
+   if (use_softpin) {
+      gem_handle = anv_gem_create(pool->device, newbo_size);
+      map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
+      if (map == MAP_FAILED)
+         return vk_errorf(pool->device->instance, pool->device,
+                          VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m");
+      assert(center_bo_offset == 0);
+   } else {
+      /* Just leak the old map until we destroy the pool.  We can't munmap it
+       * without races or imposing locking on the block allocate fast path. On
+       * the whole the leaked maps adds up to less than the size of the
+       * current map.  MAP_POPULATE seems like the right thing to do, but we
+       * should try to get some numbers.
+       */
+      map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                 MAP_SHARED | MAP_POPULATE, pool->fd,
+                 BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
+      if (map == MAP_FAILED)
+         return vk_errorf(pool->device->instance, pool->device,
+                          VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m");
+
+      /* Now that we mapped the new memory, we can write the new
+       * center_bo_offset back into pool and update pool->map. */
+      pool->center_bo_offset = center_bo_offset;
+      pool->map = map + center_bo_offset;
+      gem_handle = anv_gem_userptr(pool->device, map, size);
+      if (gem_handle == 0) {
+         munmap(map, size);
+         return vk_errorf(pool->device->instance, pool->device,
+                          VK_ERROR_TOO_MANY_OBJECTS, "userptr failed: %m");
+      }
    }
 
    cleanup->map = map;
-   cleanup->size = size;
+   cleanup->size = use_softpin ? newbo_size : size;
    cleanup->gem_handle = gem_handle;
 
-#if 0
    /* Regular objects are created I915_CACHING_CACHED on LLC platforms and
     * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are
     * always created as I915_CACHING_CACHED, which on non-LLC means
-    * snooped. That can be useful but comes with a bit of overheard.  Since
-    * we're eplicitly clflushing and don't want the overhead we need to turn
-    * it off. */
-   if (!pool->device->info.has_llc) {
-      anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE);
-      anv_gem_set_domain(pool->device, gem_handle,
-                         I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
-   }
-#endif
-
-   /* Now that we successfull allocated everything, we can write the new
-    * values back into pool. */
-   pool->map = map + center_bo_offset;
-   pool->center_bo_offset = center_bo_offset;
+    * snooped.
+    *
+    * On platforms that support softpin, we are not going to use userptr
+    * anymore, but we still want to rely on the snooped states. So make sure
+    * everything is set to I915_CACHING_CACHED.
+    */
+   if (!pool->device->info.has_llc)
+      anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_CACHED);
 
    /* For block pool BOs we have to be a bit careful about where we place them
     * in the GTT.  There are two documented workarounds for state base address
@@ -404,17 +606,82 @@
     * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
     * hard work for us.
     */
-   anv_bo_init(&pool->bo, gem_handle, size);
-   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
-      pool->bo.offset = pool->start_address + BLOCK_POOL_MEMFD_CENTER -
-         center_bo_offset;
+   struct anv_bo *bo;
+   uint32_t bo_size;
+   uint64_t bo_offset;
+
+   assert(pool->nbos < ANV_MAX_BLOCK_POOL_BOS);
+
+   if (use_softpin) {
+      /* With softpin, we add a new BO to the pool, and set its offset to right
+       * where the previous BO ends (the end of the pool).
+       */
+      bo = &pool->bos[pool->nbos++];
+      bo_size = newbo_size;
+      bo_offset = pool->start_address + pool->size;
+   } else {
+      /* Without softpin, we just need one BO, and we already have a pointer to
+       * it. Simply "allocate" it from our array if we didn't do it before.
+       * The offset doesn't matter since we are not pinning the BO anyway.
+       */
+      if (pool->nbos == 0)
+         pool->nbos++;
+      bo = pool->bo;
+      bo_size = size;
+      bo_offset = 0;
    }
-   pool->bo.flags = pool->bo_flags;
-   pool->bo.map = map;
+
+   anv_bo_init(bo, gem_handle, bo_size);
+   bo->offset = bo_offset;
+   bo->flags = pool->bo_flags;
+   bo->map = map;
+   pool->size = size;
 
    return VK_SUCCESS;
 }
 
+static struct anv_bo *
+anv_block_pool_get_bo(struct anv_block_pool *pool, int32_t *offset)
+{
+   struct anv_bo *bo, *bo_found = NULL;
+   int32_t cur_offset = 0;
+
+   assert(offset);
+
+   if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+      return pool->bo;
+
+   anv_block_pool_foreach_bo(bo, pool) {
+      if (*offset < cur_offset + bo->size) {
+         bo_found = bo;
+         break;
+      }
+      cur_offset += bo->size;
+   }
+
+   assert(bo_found != NULL);
+   *offset -= cur_offset;
+
+   return bo_found;
+}
+
+/** Returns current memory map of the block pool.
+ *
+ * The returned pointer points to the map for the memory at the specified
+ * offset. The offset parameter is relative to the "center" of the block pool
+ * rather than the start of the block pool BO map.
+ */
+void*
+anv_block_pool_map(struct anv_block_pool *pool, int32_t offset)
+{
+   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
+      struct anv_bo *bo = anv_block_pool_get_bo(pool, &offset);
+      return bo->map + offset;
+   } else {
+      return pool->map + offset;
+   }
+}
+
 /** Grows and re-centers the block pool.
  *
  * We grow the block pool in one or both directions in such a way that the
@@ -464,7 +731,7 @@
 
    assert(state == &pool->state || back_used > 0);
 
-   uint32_t old_size = pool->bo.size;
+   uint32_t old_size = pool->size;
 
    /* The block pool is always initialized to a nonzero size and this function
     * is always called after initialization.
@@ -490,7 +757,7 @@
    while (size < back_required + front_required)
       size *= 2;
 
-   assert(size > pool->bo.size);
+   assert(size > pool->size);
 
    /* We compute a new center_bo_offset such that, when we double the size
     * of the pool, we maintain the ratio of how much is used by each side.
@@ -527,7 +794,7 @@
 
    result = anv_block_pool_expand_range(pool, center_bo_offset, size);
 
-   pool->bo.flags = pool->bo_flags;
+   pool->bo->flags = pool->bo_flags;
 
 done:
    pthread_mutex_unlock(&pool->device->mutex);
@@ -538,7 +805,7 @@
        * needs to do so in order to maintain its concurrency model.
        */
       if (state == &pool->state) {
-         return pool->bo.size - pool->center_bo_offset;
+         return pool->size - pool->center_bo_offset;
       } else {
          assert(pool->center_bo_offset > 0);
          return pool->center_bo_offset;
@@ -551,16 +818,35 @@
 static uint32_t
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
                          struct anv_block_state *pool_state,
-                         uint32_t block_size)
+                         uint32_t block_size, uint32_t *padding)
 {
    struct anv_block_state state, old, new;
 
+   /* Most allocations won't generate any padding */
+   if (padding)
+      *padding = 0;
+
    while (1) {
       state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
       if (state.next + block_size <= state.end) {
-         assert(pool->map);
          return state.next;
       } else if (state.next <= state.end) {
+         if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) {
+            /* We need to grow the block pool, but still have some leftover
+             * space that can't be used by that particular allocation. So we
+             * add that as a "padding", and return it.
+             */
+            uint32_t leftover = state.end - state.next;
+
+            /* If there is some leftover space in the pool, the caller must
+             * deal with it.
+             */
+            assert(leftover == 0 || padding);
+            if (padding)
+               *padding = leftover;
+            state.next += leftover;
+         }
+
          /* We allocated the first block outside the pool so we have to grow
           * the pool.  pool_state->next acts a mutex: threads who try to
           * allocate now will get block indexes above the current limit and
@@ -584,9 +870,13 @@
 
 int32_t
 anv_block_pool_alloc(struct anv_block_pool *pool,
-                     uint32_t block_size)
+                     uint32_t block_size, uint32_t *padding)
 {
-   return anv_block_pool_alloc_new(pool, &pool->state, block_size);
+   uint32_t offset;
+
+   offset = anv_block_pool_alloc_new(pool, &pool->state, block_size, padding);
+
+   return offset;
 }
 
 /* Allocates a block out of the back of the block pool.
@@ -603,7 +893,7 @@
                           uint32_t block_size)
 {
    int32_t offset = anv_block_pool_alloc_new(pool, &pool->back_state,
-                                             block_size);
+                                             block_size, NULL);
 
    /* The offset we get out of anv_block_pool_alloc_new() is actually the
     * number of bytes downwards from the middle to the end of the block.
@@ -628,6 +918,12 @@
    if (result != VK_SUCCESS)
       return result;
 
+   result = anv_state_table_init(&pool->table, device, 64);
+   if (result != VK_SUCCESS) {
+      anv_block_pool_finish(&pool->block_pool);
+      return result;
+   }
+
    assert(util_is_power_of_two_or_zero(block_size));
    pool->block_size = block_size;
    pool->back_alloc_free_list = ANV_FREE_LIST_EMPTY;
@@ -645,6 +941,7 @@
 anv_state_pool_finish(struct anv_state_pool *pool)
 {
    VG(VALGRIND_DESTROY_MEMPOOL(pool));
+   anv_state_table_finish(&pool->table);
    anv_block_pool_finish(&pool->block_pool);
 }
 
@@ -652,16 +949,24 @@
 anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
                                     struct anv_block_pool *block_pool,
                                     uint32_t state_size,
-                                    uint32_t block_size)
+                                    uint32_t block_size,
+                                    uint32_t *padding)
 {
    struct anv_block_state block, old, new;
    uint32_t offset;
 
+   /* We don't always use anv_block_pool_alloc(), which would set *padding to
+    * zero for us. So if we have a pointer to padding, we must zero it out
+    * ourselves here, to make sure we always return some sensible value.
+    */
+   if (padding)
+      *padding = 0;
+
    /* If our state is large, we don't need any sub-allocation from a block.
     * Instead, we just grab whole (potentially large) blocks.
     */
    if (state_size >= block_size)
-      return anv_block_pool_alloc(block_pool, state_size);
+      return anv_block_pool_alloc(block_pool, state_size, padding);
 
  restart:
    block.u64 = __sync_fetch_and_add(&pool->block.u64, state_size);
@@ -669,7 +974,7 @@
    if (block.next < block.end) {
       return block.next;
    } else if (block.next == block.end) {
-      offset = anv_block_pool_alloc(block_pool, block_size);
+      offset = anv_block_pool_alloc(block_pool, block_size, padding);
       new.next = offset + state_size;
       new.end = offset + block_size;
       old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
@@ -699,30 +1004,124 @@
    return 1 << size_log2;
 }
 
+/** Helper to push a chunk into the state table.
+ *
+ * It creates 'count' entries into the state table and update their sizes,
+ * offsets and maps, also pushing them as "free" states.
+ */
+static void
+anv_state_pool_return_blocks(struct anv_state_pool *pool,
+                             uint32_t chunk_offset, uint32_t count,
+                             uint32_t block_size)
+{
+   /* Disallow returning 0 chunks */
+   assert(count != 0);
+
+   /* Make sure we always return chunks aligned to the block_size */
+   assert(chunk_offset % block_size == 0);
+
+   uint32_t st_idx;
+   VkResult result = anv_state_table_add(&pool->table, &st_idx, count);
+   assert(result == VK_SUCCESS);
+   for (int i = 0; i < count; i++) {
+      /* update states that were added back to the state table */
+      struct anv_state *state_i = anv_state_table_get(&pool->table,
+                                                      st_idx + i);
+      state_i->alloc_size = block_size;
+      state_i->offset = chunk_offset + block_size * i;
+      state_i->map = anv_block_pool_map(&pool->block_pool, state_i->offset);
+   }
+
+   uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
+   anv_free_list_push(&pool->buckets[block_bucket].free_list,
+                      &pool->table, st_idx, count);
+}
+
+/** Returns a chunk of memory back to the state pool.
+ *
+ * Do a two-level split. If chunk_size is bigger than divisor
+ * (pool->block_size), we return as many divisor sized blocks as we can, from
+ * the end of the chunk.
+ *
+ * The remaining is then split into smaller blocks (starting at small_size if
+ * it is non-zero), with larger blocks always being taken from the end of the
+ * chunk.
+ */
+static void
+anv_state_pool_return_chunk(struct anv_state_pool *pool,
+                            uint32_t chunk_offset, uint32_t chunk_size,
+                            uint32_t small_size)
+{
+   uint32_t divisor = pool->block_size;
+   uint32_t nblocks = chunk_size / divisor;
+   uint32_t rest = chunk_size - nblocks * divisor;
+
+   if (nblocks > 0) {
+      /* First return divisor aligned and sized chunks. We start returning
+       * larger blocks from the end fo the chunk, since they should already be
+       * aligned to divisor. Also anv_state_pool_return_blocks() only accepts
+       * aligned chunks.
+       */
+      uint32_t offset = chunk_offset + rest;
+      anv_state_pool_return_blocks(pool, offset, nblocks, divisor);
+   }
+
+   chunk_size = rest;
+   divisor /= 2;
+
+   if (small_size > 0 && small_size < divisor)
+      divisor = small_size;
+
+   uint32_t min_size = 1 << ANV_MIN_STATE_SIZE_LOG2;
+
+   /* Just as before, return larger divisor aligned blocks from the end of the
+    * chunk first.
+    */
+   while (chunk_size > 0 && divisor >= min_size) {
+      nblocks = chunk_size / divisor;
+      rest = chunk_size - nblocks * divisor;
+      if (nblocks > 0) {
+         anv_state_pool_return_blocks(pool, chunk_offset + rest,
+                                      nblocks, divisor);
+         chunk_size = rest;
+      }
+      divisor /= 2;
+   }
+}
+
 static struct anv_state
 anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
                            uint32_t size, uint32_t align)
 {
    uint32_t bucket = anv_state_pool_get_bucket(MAX2(size, align));
 
-   struct anv_state state;
-   state.alloc_size = anv_state_pool_get_bucket_size(bucket);
+   struct anv_state *state;
+   uint32_t alloc_size = anv_state_pool_get_bucket_size(bucket);
+   int32_t offset;
 
    /* Try free list first. */
-   if (anv_free_list_pop(&pool->buckets[bucket].free_list,
-                         &pool->block_pool.map, &state.offset)) {
-      assert(state.offset >= 0);
+   state = anv_free_list_pop(&pool->buckets[bucket].free_list,
+                             &pool->table);
+   if (state) {
+      assert(state->offset >= 0);
       goto done;
    }
 
    /* Try to grab a chunk from some larger bucket and split it up */
    for (unsigned b = bucket + 1; b < ANV_STATE_BUCKETS; b++) {
-      int32_t chunk_offset;
-      if (anv_free_list_pop(&pool->buckets[b].free_list,
-                            &pool->block_pool.map, &chunk_offset)) {
+      state = anv_free_list_pop(&pool->buckets[b].free_list, &pool->table);
+      if (state) {
          unsigned chunk_size = anv_state_pool_get_bucket_size(b);
+         int32_t chunk_offset = state->offset;
+
+         /* First lets update the state we got to its new size. offset and map
+          * remain the same.
+          */
+         state->alloc_size = alloc_size;
 
-         /* We've found a chunk that's larger than the requested state size.
+         /* Now return the unused part of the chunk back to the pool as free
+          * blocks
+          *
           * There are a couple of options as to what we do with it:
           *
           *    1) We could fully split the chunk into state.alloc_size sized
@@ -744,48 +1143,42 @@
           *       two-level split.  If it's bigger than some fixed block_size,
           *       we split it into block_size sized chunks and return all but
           *       one of them.  Then we split what remains into
-          *       state.alloc_size sized chunks and return all but one.
+          *       state.alloc_size sized chunks and return them.
           *
-          * We choose option (3).
+          * We choose something close to option (3), which is implemented with
+          * anv_state_pool_return_chunk(). That is done by returning the
+          * remaining of the chunk, with alloc_size as a hint of the size that
+          * we want the smaller chunk split into.
           */
-         if (chunk_size > pool->block_size &&
-             state.alloc_size < pool->block_size) {
-            assert(chunk_size % pool->block_size == 0);
-            /* We don't want to split giant chunks into tiny chunks.  Instead,
-             * break anything bigger than a block into block-sized chunks and
-             * then break it down into bucket-sized chunks from there.  Return
-             * all but the first block of the chunk to the block bucket.
-             */
-            const uint32_t block_bucket =
-               anv_state_pool_get_bucket(pool->block_size);
-            anv_free_list_push(&pool->buckets[block_bucket].free_list,
-                               pool->block_pool.map,
-                               chunk_offset + pool->block_size,
-                               pool->block_size,
-                               (chunk_size / pool->block_size) - 1);
-            chunk_size = pool->block_size;
-         }
-
-         assert(chunk_size % state.alloc_size == 0);
-         anv_free_list_push(&pool->buckets[bucket].free_list,
-                            pool->block_pool.map,
-                            chunk_offset + state.alloc_size,
-                            state.alloc_size,
-                            (chunk_size / state.alloc_size) - 1);
-
-         state.offset = chunk_offset;
+         anv_state_pool_return_chunk(pool, chunk_offset + alloc_size,
+                                     chunk_size - alloc_size, alloc_size);
          goto done;
       }
    }
 
-   state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
-                                                      &pool->block_pool,
-                                                      state.alloc_size,
-                                                      pool->block_size);
+   uint32_t padding;
+   offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
+                                                &pool->block_pool,
+                                                alloc_size,
+                                                pool->block_size,
+                                                &padding);
+   /* Everytime we allocate a new state, add it to the state pool */
+   uint32_t idx;
+   VkResult result = anv_state_table_add(&pool->table, &idx, 1);
+   assert(result == VK_SUCCESS);
+
+   state = anv_state_table_get(&pool->table, idx);
+   state->offset = offset;
+   state->alloc_size = alloc_size;
+   state->map = anv_block_pool_map(&pool->block_pool, offset);
+
+   if (padding > 0) {
+      uint32_t return_offset = offset - padding;
+      anv_state_pool_return_chunk(pool, return_offset, padding, 0);
+   }
 
 done:
-   state.map = pool->block_pool.map + state.offset;
-   return state;
+   return *state;
 }
 
 struct anv_state
@@ -802,22 +1195,30 @@
 struct anv_state
 anv_state_pool_alloc_back(struct anv_state_pool *pool)
 {
-   struct anv_state state;
-   state.alloc_size = pool->block_size;
+   struct anv_state *state;
+   uint32_t alloc_size = pool->block_size;
 
-   if (anv_free_list_pop(&pool->back_alloc_free_list,
-                         &pool->block_pool.map, &state.offset)) {
-      assert(state.offset < 0);
+   state = anv_free_list_pop(&pool->back_alloc_free_list, &pool->table);
+   if (state) {
+      assert(state->offset < 0);
       goto done;
    }
 
-   state.offset = anv_block_pool_alloc_back(&pool->block_pool,
-                                            pool->block_size);
+   int32_t offset;
+   offset = anv_block_pool_alloc_back(&pool->block_pool,
+                                      pool->block_size);
+   uint32_t idx;
+   VkResult result = anv_state_table_add(&pool->table, &idx, 1);
+   assert(result == VK_SUCCESS);
+
+   state = anv_state_table_get(&pool->table, idx);
+   state->offset = offset;
+   state->alloc_size = alloc_size;
+   state->map = anv_block_pool_map(&pool->block_pool, state->offset);
 
 done:
-   state.map = pool->block_pool.map + state.offset;
-   VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, state.alloc_size));
-   return state;
+   VG(VALGRIND_MEMPOOL_ALLOC(pool, state->map, state->alloc_size));
+   return *state;
 }
 
 static void
@@ -829,12 +1230,10 @@
    if (state.offset < 0) {
       assert(state.alloc_size == pool->block_size);
       anv_free_list_push(&pool->back_alloc_free_list,
-                         pool->block_pool.map, state.offset,
-                         state.alloc_size, 1);
+                         &pool->table, state.idx, 1);
    } else {
       anv_free_list_push(&pool->buckets[bucket].free_list,
-                         pool->block_pool.map, state.offset,
-                         state.alloc_size, 1);
+                         &pool->table, state.idx, 1);
    }
 }
 
@@ -1037,6 +1436,14 @@
       return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
    }
 
+   /* We are removing the state flushes, so lets make sure that these buffers
+    * are cached/snooped.
+    */
+   if (!pool->device->info.has_llc) {
+      anv_gem_set_caching(pool->device, new_bo.gem_handle,
+                          I915_CACHING_CACHED);
+   }
+
    *bo = new_bo;
 
    VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
@@ -1201,8 +1608,7 @@
 VkResult
 anv_bo_cache_init(struct anv_bo_cache *cache)
 {
-   cache->bo_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                           _mesa_key_pointer_equal);
+   cache->bo_map = _mesa_pointer_hash_table_create(NULL);
    if (!cache->bo_map)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
@@ -1319,7 +1725,7 @@
    uint32_t gem_handle = anv_gem_fd_to_handle(device, fd);
    if (!gem_handle) {
       pthread_mutex_unlock(&cache->mutex);
-      return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+      return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
    }
 
    struct anv_cached_bo *bo = anv_bo_cache_lookup_locked(cache, gem_handle);
@@ -1372,7 +1778,7 @@
       if (size == (off_t)-1) {
          anv_gem_close(device, gem_handle);
          pthread_mutex_unlock(&cache->mutex);
-         return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+         return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
       }
 
       bo = vk_alloc(&device->alloc, sizeof(struct anv_cached_bo), 8,
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android.c mesa-19.0.1/src/intel/vulkan/anv_android.c
--- mesa-18.3.3/src/intel/vulkan/anv_android.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_android.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,6 +29,8 @@
 #include <sync/sync.h>
 
 #include "anv_private.h"
+#include "vk_format_info.h"
+#include "vk_util.h"
 
 static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
 static int anv_hal_close(struct hw_device_t *dev);
@@ -96,6 +98,317 @@
    return -1;
 }
 
+static VkResult
+get_ahw_buffer_format_properties(
+   VkDevice device_h,
+   const struct AHardwareBuffer *buffer,
+   VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+
+   /* Get a description of buffer contents . */
+   AHardwareBuffer_Desc desc;
+   AHardwareBuffer_describe(buffer, &desc);
+
+   /* Verify description. */
+   uint64_t gpu_usage =
+      AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
+      AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+      AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
+
+   /* "Buffer must be a valid Android hardware buffer object with at least
+    * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
+    */
+   if (!(desc.usage & (gpu_usage)))
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* Fill properties fields based on description. */
+   VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
+
+   p->format = vk_format_from_android(desc.format);
+
+   const struct anv_format *anv_format = anv_get_format(p->format);
+   p->externalFormat = (uint64_t) (uintptr_t) anv_format;
+
+   /* Default to OPTIMAL tiling but set to linear in case
+    * of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage.
+    */
+   VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL;
+
+   if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
+      tiling = VK_IMAGE_TILING_LINEAR;
+
+   p->formatFeatures =
+      anv_get_image_format_features(&device->info, p->format, anv_format,
+                                    tiling);
+
+   /* "Images can be created with an external format even if the Android hardware
+    *  buffer has a format which has an equivalent Vulkan format to enable
+    *  consistent handling of images from sources that might use either category
+    *  of format. However, all images created with an external format are subject
+    *  to the valid usage requirements associated with external formats, even if
+    *  the Android hardware buffer’s format has a Vulkan equivalent."
+    *
+    * "The formatFeatures member *must* include
+    *  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
+    *  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
+    *  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
+    */
+   p->formatFeatures |=
+      VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+   /* "Implementations may not always be able to determine the color model,
+    *  numerical range, or chroma offsets of the image contents, so the values
+    *  in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
+    *  Applications should treat these values as sensible defaults to use in
+    *  the absence of more reliable information obtained through some other
+    *  means."
+    */
+   p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+   p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+   p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+   p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+
+   p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+   p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_GetAndroidHardwareBufferPropertiesANDROID(
+   VkDevice device_h,
+   const struct AHardwareBuffer *buffer,
+   VkAndroidHardwareBufferPropertiesANDROID *pProperties)
+{
+   ANV_FROM_HANDLE(anv_device, dev, device_h);
+   struct anv_physical_device *pdevice = &dev->instance->physicalDevice;
+
+   VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
+      vk_find_struct(pProperties->pNext,
+                     ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
+
+   /* Fill format properties of an Android hardware buffer. */
+   if (format_prop)
+      get_ahw_buffer_format_properties(device_h, buffer, format_prop);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   const native_handle_t *handle =
+      AHardwareBuffer_getNativeHandle(buffer);
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   /* All memory types. */
+   uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1;
+
+   pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
+   pProperties->memoryTypeBits = memory_types;
+
+   return VK_SUCCESS;
+}
+
+/* Construct ahw usage mask from image usage bits, see
+ * 'AHardwareBuffer Usage Equivalence' in Vulkan spec.
+ */
+uint64_t
+anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                            const VkImageUsageFlags vk_usage)
+{
+   uint64_t ahw_usage = 0;
+
+   if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+
+   if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+
+   if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT;
+
+   if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP;
+
+   if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT)
+      ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
+
+   /* No usage bits set - set at least one GPU usage. */
+   if (ahw_usage == 0)
+      ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE;
+
+   return ahw_usage;
+}
+
+VkResult
+anv_GetMemoryAndroidHardwareBufferANDROID(
+   VkDevice device_h,
+   const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
+   struct AHardwareBuffer **pBuffer)
+{
+   ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory);
+
+   /* Some quotes from Vulkan spec:
+    *
+    * "If the device memory was created by importing an Android hardware
+    * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
+    * Android hardware buffer object."
+    *
+    * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
+    * have been included in VkExportMemoryAllocateInfo::handleTypes when
+    * memory was created."
+    */
+   if (mem->ahw) {
+      *pBuffer = mem->ahw;
+      /* Increase refcount. */
+      AHardwareBuffer_acquire(mem->ahw);
+      return VK_SUCCESS;
+   }
+
+   return VK_ERROR_OUT_OF_HOST_MEMORY;
+}
+
+/*
+ * Called from anv_AllocateMemory when import AHardwareBuffer.
+ */
+VkResult
+anv_import_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkImportAndroidHardwareBufferInfoANDROID *info)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+
+   /* Import from AHardwareBuffer to anv_device_memory. */
+   const native_handle_t *handle =
+      AHardwareBuffer_getNativeHandle(info->buffer);
+
+   /* NOTE - We support buffers with only one handle but do not error on
+    * multiple handle case. Reason is that we want to support YUV formats
+    * where we have many logical planes but they all point to the same
+    * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+    */
+   int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+   if (dma_buf < 0)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
+   if (device->instance->physicalDevice.supports_48bit_addresses)
+      bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+   if (device->instance->physicalDevice.use_softpin)
+      bo_flags |= EXEC_OBJECT_PINNED;
+
+   VkResult result = anv_bo_cache_import(device, &device->bo_cache,
+                                dma_buf, bo_flags, &mem->bo);
+   assert(VK_SUCCESS);
+
+   /* "If the vkAllocateMemory command succeeds, the implementation must
+    * acquire a reference to the imported hardware buffer, which it must
+    * release when the device memory object is freed. If the command fails,
+    * the implementation must not retain a reference."
+    */
+   AHardwareBuffer_acquire(info->buffer);
+   mem->ahw = info->buffer;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_create_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkMemoryAllocateInfo *pAllocateInfo)
+{
+   ANV_FROM_HANDLE(anv_device, dev, device_h);
+
+   const VkMemoryDedicatedAllocateInfo *dedicated_info =
+      vk_find_struct_const(pAllocateInfo->pNext,
+                           MEMORY_DEDICATED_ALLOCATE_INFO);
+
+   uint32_t w = 0;
+   uint32_t h = 1;
+   uint32_t layers = 1;
+   uint32_t format = 0;
+   uint64_t usage = 0;
+
+   /* If caller passed dedicated information. */
+   if (dedicated_info && dedicated_info->image) {
+      ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
+      w = image->extent.width;
+      h = image->extent.height;
+      layers = image->array_size;
+      format = android_format_from_vk(image->vk_format);
+      usage = anv_ahw_usage_from_vk_usage(image->create_flags, image->usage);
+   } else if (dedicated_info && dedicated_info->buffer) {
+      ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer);
+      w = buffer->size;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
+              AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   } else {
+      w = pAllocateInfo->allocationSize;
+      format = AHARDWAREBUFFER_FORMAT_BLOB;
+      usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
+              AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+   }
+
+   struct AHardwareBuffer *ahw = NULL;
+   struct AHardwareBuffer_Desc desc = {
+      .width = w,
+      .height = h,
+      .layers = layers,
+      .format = format,
+      .usage = usage,
+    };
+
+   if (AHardwareBuffer_allocate(&desc, &ahw) != 0)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   mem->ahw = ahw;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+anv_image_from_external(
+   VkDevice device_h,
+   const VkImageCreateInfo *base_info,
+   const struct VkExternalMemoryImageCreateInfo *create_info,
+   const VkAllocationCallbacks *alloc,
+   VkImage *out_image_h)
+{
+   ANV_FROM_HANDLE(anv_device, device, device_h);
+
+   const struct VkExternalFormatANDROID *ext_info =
+      vk_find_struct_const(base_info->pNext, EXTERNAL_FORMAT_ANDROID);
+
+   if (ext_info && ext_info->externalFormat != 0) {
+      assert(base_info->format == VK_FORMAT_UNDEFINED);
+      assert(base_info->imageType == VK_IMAGE_TYPE_2D);
+      assert(base_info->usage == VK_IMAGE_USAGE_SAMPLED_BIT);
+      assert(base_info->tiling == VK_IMAGE_TILING_OPTIMAL);
+   }
+
+   struct anv_image_create_info anv_info = {
+      .vk_info = base_info,
+      .isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT,
+      .external_format = true,
+   };
+
+   VkImage image_h;
+   VkResult result = anv_image_create(device_h, &anv_info, alloc, &image_h);
+   if (result != VK_SUCCESS)
+      return result;
+
+   *out_image_h = image_h;
+
+   return VK_SUCCESS;
+}
+
 VkResult
 anv_image_from_gralloc(VkDevice device_h,
                        const VkImageCreateInfo *base_info,
@@ -117,7 +430,7 @@
 
    if (gralloc_info->handle->numFds != 1) {
       return vk_errorf(device->instance, device,
-                       VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                       VK_ERROR_INVALID_EXTERNAL_HANDLE,
                        "VkNativeBufferANDROID::handle::numFds is %d, "
                        "expected 1", gralloc_info->handle->numFds);
    }
@@ -153,13 +466,13 @@
       break;
    case -1:
       result = vk_errorf(device->instance, device,
-                         VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                         VK_ERROR_INVALID_EXTERNAL_HANDLE,
                          "DRM_IOCTL_I915_GEM_GET_TILING failed for "
                          "VkNativeBufferANDROID");
       goto fail_tiling;
    default:
       result = vk_errorf(device->instance, device,
-                         VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                         VK_ERROR_INVALID_EXTERNAL_HANDLE,
                          "DRM_IOCTL_I915_GEM_GET_TILING returned unknown "
                          "tiling %d for VkNativeBufferANDROID", i915_tiling);
       goto fail_tiling;
@@ -181,7 +494,7 @@
 
    if (bo->size < image->size) {
       result = vk_errorf(device->instance, device,
-                         VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                         VK_ERROR_INVALID_EXTERNAL_HANDLE,
                          "dma-buf from VkNativeBufferANDROID is too small for "
                          "VkImage: %"PRIu64"B < %"PRIu64"B",
                          bo->size, image->size);
@@ -247,16 +560,16 @@
     * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
     */
 
-   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
-      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+   const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
       .format = format,
       .type = VK_IMAGE_TYPE_2D,
       .tiling = VK_IMAGE_TILING_OPTIMAL,
       .usage = imageUsage,
    };
 
-   VkImageFormatProperties2KHR image_format_props = {
-      .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
+   VkImageFormatProperties2 image_format_props = {
+      .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
    };
 
    /* Check that requested format and usage are supported. */
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android.h mesa-19.0.1/src/intel/vulkan/anv_android.h
--- mesa-18.3.3/src/intel/vulkan/anv_android.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_android.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef ANV_ANDROID_H
+#define ANV_ANDROID_H
+
+#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_android.h>
+#include <vulkan/vk_android_native_buffer.h>
+
+struct anv_device_memory;
+struct anv_device;
+struct anv_image;
+
+VkResult anv_image_from_gralloc(VkDevice device_h,
+                                const VkImageCreateInfo *base_info,
+                                const VkNativeBufferANDROID *gralloc_info,
+                                const VkAllocationCallbacks *alloc,
+                                VkImage *pImage);
+
+VkResult anv_image_from_external(VkDevice device_h,
+                                 const VkImageCreateInfo *base_info,
+                                 const struct VkExternalMemoryImageCreateInfo *create_info,
+                                 const VkAllocationCallbacks *alloc,
+                                 VkImage *out_image_h);
+
+uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                                     const VkImageUsageFlags vk_usage);
+
+VkResult anv_import_ahw_memory(VkDevice device_h,
+                               struct anv_device_memory *mem,
+                               const VkImportAndroidHardwareBufferInfoANDROID *info);
+
+VkResult anv_create_ahw_memory(VkDevice device_h,
+                               struct anv_device_memory *mem,
+                               const VkMemoryAllocateInfo *pAllocateInfo);
+#endif /* ANV_ANDROID_H */
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android_stubs.c mesa-19.0.1/src/intel/vulkan/anv_android_stubs.c
--- mesa-18.3.3/src/intel/vulkan/anv_android_stubs.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_android_stubs.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_android.h"
+
+VkResult
+anv_image_from_gralloc(VkDevice device_h,
+                       const VkImageCreateInfo *base_info,
+                       const VkNativeBufferANDROID *gralloc_info,
+                       const VkAllocationCallbacks *alloc,
+                       VkImage *pImage)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+uint64_t
+anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                            const VkImageUsageFlags vk_usage)
+{
+   return 0;
+}
+
+VkResult
+anv_import_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkImportAndroidHardwareBufferInfoANDROID *info)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+VkResult
+anv_create_ahw_memory(VkDevice device_h,
+                      struct anv_device_memory *mem,
+                      const VkMemoryAllocateInfo *pAllocateInfo)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
+
+VkResult
+anv_image_from_external(VkDevice device_h,
+                        const VkImageCreateInfo *base_info,
+                        const struct VkExternalMemoryImageCreateInfo *create_info,
+                        const VkAllocationCallbacks *alloc,
+                        VkImage *out_image_h)
+{
+   return VK_ERROR_EXTENSION_NOT_PRESENT;
+}
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_batch_chain.c mesa-19.0.1/src/intel/vulkan/anv_batch_chain.c
--- mesa-18.3.3/src/intel/vulkan/anv_batch_chain.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_batch_chain.c	2019-03-31 23:16:37.000000000 +0000
@@ -75,8 +75,7 @@
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    }
 
-   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
-                                 _mesa_key_pointer_equal);
+   list->deps = _mesa_pointer_set_create(NULL);
 
    if (!list->deps) {
       vk_free(alloc, list->relocs);
@@ -501,7 +500,7 @@
 {
    struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
    return (struct anv_address) {
-      .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
+      .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
       .offset = bt_block->offset,
    };
 }
@@ -679,8 +678,8 @@
       return (struct anv_state) { 0 };
 
    state.offset = cmd_buffer->bt_next;
-   state.map = anv_binding_table_pool(device)->block_pool.map +
-      bt_block->offset + state.offset;
+   state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
+                                  bt_block->offset + state.offset);
 
    cmd_buffer->bt_next += state.alloc_size;
 
@@ -1037,6 +1036,12 @@
 }
 
 static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc);
+
+static VkResult
 anv_execbuf_add_bo(struct anv_execbuf *exec,
                    struct anv_bo *bo,
                    struct anv_reloc_list *relocs,
@@ -1125,36 +1130,46 @@
          }
       }
 
-      if (relocs->deps && relocs->deps->entries > 0) {
-         const uint32_t entries = relocs->deps->entries;
-         struct anv_bo **bos =
-            vk_alloc(alloc, entries * sizeof(*bos),
-                     8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-         if (bos == NULL)
-            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
+   }
 
-         struct anv_bo **bo = bos;
-         set_foreach(relocs->deps, entry) {
-            *bo++ = (void *)entry->key;
-         }
+   return VK_SUCCESS;
+}
 
-         qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
+/* Add BO dependencies to execbuf */
+static VkResult
+anv_execbuf_add_bo_set(struct anv_execbuf *exec,
+                       struct set *deps,
+                       uint32_t extra_flags,
+                       const VkAllocationCallbacks *alloc)
+{
+   if (!deps || deps->entries <= 0)
+      return VK_SUCCESS;
 
-         VkResult result = VK_SUCCESS;
-         for (bo = bos; bo < bos + entries; bo++) {
-            result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
-            if (result != VK_SUCCESS)
-               break;
-         }
+   const uint32_t entries = deps->entries;
+   struct anv_bo **bos =
+      vk_alloc(alloc, entries * sizeof(*bos),
+               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (bos == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-         vk_free(alloc, bos);
+   struct anv_bo **bo = bos;
+   set_foreach(deps, entry) {
+      *bo++ = (void *)entry->key;
+   }
 
-         if (result != VK_SUCCESS)
-            return result;
-      }
+   qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
+
+   VkResult result = VK_SUCCESS;
+   for (bo = bos; bo < bos + entries; bo++) {
+      result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
+      if (result != VK_SUCCESS)
+         break;
    }
 
-   return VK_SUCCESS;
+   vk_free(alloc, bos);
+
+   return result;
 }
 
 static VkResult
@@ -1228,7 +1243,7 @@
     * relocations that point to the pool bo with the correct offset.
     */
    for (size_t i = 0; i < relocs->num_relocs; i++) {
-      if (relocs->reloc_bos[i] == &pool->block_pool.bo) {
+      if (relocs->reloc_bos[i] == pool->block_pool.bo) {
          /* Adjust the delta value in the relocation to correctly
           * correspond to the new delta.  Initially, this value may have
           * been negative (if treated as unsigned), but we trust in
@@ -1336,7 +1351,7 @@
     * given time.  The only option is to always relocate them.
     */
    anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
-                        &cmd_buffer->device->surface_state_pool.block_pool.bo,
+                        cmd_buffer->device->surface_state_pool.block_pool.bo,
                         true /* always relocate surface states */);
 
    /* Since we own all of the batch buffers, we know what values are stored
@@ -1365,11 +1380,55 @@
 
    adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
                                       cmd_buffer->last_ss_pool_center);
-   VkResult result = anv_execbuf_add_bo(execbuf, &ss_pool->block_pool.bo,
-                                        &cmd_buffer->surface_relocs, 0,
-                                        &cmd_buffer->device->alloc);
-   if (result != VK_SUCCESS)
-      return result;
+   VkResult result;
+   struct anv_bo *bo;
+   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
+      anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+      /* Add surface dependencies (BOs) to the execbuf */
+      anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
+                             &cmd_buffer->device->alloc);
+
+      struct anv_block_pool *pool;
+      pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->instruction_state_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pool = &cmd_buffer->device->binding_table_pool.block_pool;
+      anv_block_pool_foreach_bo(bo, pool) {
+         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   } else {
+      /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
+       * will get added automatically by processing relocations on the batch
+       * buffer.  We have to add the surface state BO manually because it has
+       * relocations of its own that we need to be sure are processsed.
+       */
+      result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
+                                  &cmd_buffer->surface_relocs, 0,
+                                  &cmd_buffer->device->alloc);
+      if (result != VK_SUCCESS)
+         return result;
+   }
 
    /* First, we walk over all of the bos we've seen and add them and their
     * relocations to the validate list.
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_blorp.c mesa-19.0.1/src/intel/vulkan/anv_blorp.c
--- mesa-18.3.3/src/intel/vulkan/anv_blorp.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_blorp.c	2019-03-31 23:16:37.000000000 +0000
@@ -24,10 +24,11 @@
 #include "anv_private.h"
 
 static bool
-lookup_blorp_shader(struct blorp_context *blorp,
+lookup_blorp_shader(struct blorp_batch *batch,
                     const void *key, uint32_t key_size,
                     uint32_t *kernel_out, void *prog_data_out)
 {
+   struct blorp_context *blorp = batch->blorp;
    struct anv_device *device = blorp->driver_ctx;
 
    /* The default cache must be a real cache */
@@ -50,13 +51,14 @@
 }
 
 static bool
-upload_blorp_shader(struct blorp_context *blorp,
+upload_blorp_shader(struct blorp_batch *batch,
                     const void *key, uint32_t key_size,
                     const void *kernel, uint32_t kernel_size,
                     const struct brw_stage_prog_data *prog_data,
                     uint32_t prog_data_size,
                     uint32_t *kernel_out, void *prog_data_out)
 {
+   struct blorp_context *blorp = batch->blorp;
    struct anv_device *device = blorp->driver_ctx;
 
    /* The blorp cache must be a real cache */
@@ -71,7 +73,8 @@
       anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache,
                                        key, key_size, kernel, kernel_size,
                                        NULL, 0,
-                                       prog_data, prog_data_size, &bind_map);
+                                       prog_data, prog_data_size,
+                                       NULL, &bind_map);
 
    if (!bin)
       return false;
@@ -473,6 +476,8 @@
 
    copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout,
                         regionCount, pRegions, false);
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
 }
 
 static bool
@@ -680,6 +685,8 @@
    }
 
    blorp_batch_finish(&batch);
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
 }
 
 void anv_CmdUpdateBuffer(
@@ -716,10 +723,8 @@
 
       memcpy(tmp_data.map, pData, copy_size);
 
-      anv_state_flush(cmd_buffer->device, tmp_data);
-
       struct blorp_address src = {
-         .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          .offset = tmp_data.offset,
          .mocs = cmd_buffer->device->default_mocs,
       };
@@ -737,6 +742,8 @@
    }
 
    blorp_batch_finish(&batch);
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
 }
 
 void anv_CmdFillBuffer(
@@ -824,6 +831,8 @@
    }
 
    blorp_batch_finish(&batch);
+
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
 }
 
 void anv_CmdClearColorImage(
@@ -1144,8 +1153,12 @@
     * trash our depth and stencil buffers.
     */
    struct blorp_batch batch;
-   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
-                    BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
+   enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
+   if (cmd_buffer->state.conditional_render_enabled) {
+      anv_cmd_emit_conditional_render_predicate(cmd_buffer);
+      flags |= BLORP_BATCH_PREDICATE_ENABLE;
+   }
+   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags);
 
    for (uint32_t a = 0; a < attachmentCount; ++a) {
       if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
@@ -1169,63 +1182,52 @@
    SUBPASS_STAGE_RESOLVE,
 };
 
-static void
-resolve_surface(struct blorp_batch *batch,
-                struct blorp_surf *src_surf,
-                uint32_t src_level, uint32_t src_layer,
-                struct blorp_surf *dst_surf,
-                uint32_t dst_level, uint32_t dst_layer,
-                uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
-                uint32_t width, uint32_t height,
-                enum blorp_filter filter)
-{
-   blorp_blit(batch,
-              src_surf, src_level, src_layer,
-              ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
-              dst_surf, dst_level, dst_layer,
-              ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
-              src_x, src_y, src_x + width, src_y + height,
-              dst_x, dst_y, dst_x + width, dst_y + height,
-              filter, false, false);
-}
-
-static void
-resolve_image(struct anv_device *device,
-              struct blorp_batch *batch,
-              const struct anv_image *src_image,
-              VkImageLayout src_image_layout,
-              uint32_t src_level, uint32_t src_layer,
-              const struct anv_image *dst_image,
-              VkImageLayout dst_image_layout,
-              uint32_t dst_level, uint32_t dst_layer,
-              VkImageAspectFlags aspect_mask,
-              uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y,
-              uint32_t width, uint32_t height)
+void
+anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
+                       const struct anv_image *src_image,
+                       enum isl_aux_usage src_aux_usage,
+                       uint32_t src_level, uint32_t src_base_layer,
+                       const struct anv_image *dst_image,
+                       enum isl_aux_usage dst_aux_usage,
+                       uint32_t dst_level, uint32_t dst_base_layer,
+                       VkImageAspectFlagBits aspect,
+                       uint32_t src_x, uint32_t src_y,
+                       uint32_t dst_x, uint32_t dst_y,
+                       uint32_t width, uint32_t height,
+                       uint32_t layer_count,
+                       enum blorp_filter filter)
 {
-   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+   struct blorp_batch batch;
+   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
 
    assert(src_image->type == VK_IMAGE_TYPE_2D);
    assert(src_image->samples > 1);
    assert(dst_image->type == VK_IMAGE_TYPE_2D);
    assert(dst_image->samples == 1);
    assert(src_image->n_planes == dst_image->n_planes);
+   assert(!src_image->format->can_ycbcr);
+   assert(!dst_image->format->can_ycbcr);
 
-   uint32_t aspect_bit;
-
-   anv_foreach_image_aspect_bit(aspect_bit, src_image, aspect_mask) {
-      struct blorp_surf src_surf, dst_surf;
-      get_blorp_surf_for_anv_image(device, src_image, 1UL << aspect_bit,
-                                   src_image_layout, ISL_AUX_USAGE_NONE,
-                                   &src_surf);
-      get_blorp_surf_for_anv_image(device, dst_image, 1UL << aspect_bit,
-                                   dst_image_layout, ISL_AUX_USAGE_NONE,
-                                   &dst_surf);
-      anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
-                                        1UL << aspect_bit,
-                                        dst_surf.aux_usage,
-                                        dst_level, dst_layer, 1);
-
-      enum blorp_filter filter;
+   struct blorp_surf src_surf, dst_surf;
+   get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
+                                ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
+                                src_aux_usage, &src_surf);
+   if (src_aux_usage == ISL_AUX_USAGE_MCS) {
+      src_surf.clear_color_addr = anv_to_blorp_address(
+         anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
+                                        VK_IMAGE_ASPECT_COLOR_BIT));
+   }
+   get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
+                                ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
+                                dst_aux_usage, &dst_surf);
+   anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
+                                     aspect, dst_aux_usage,
+                                     dst_level, dst_base_layer, layer_count);
+
+   if (filter == BLORP_FILTER_NONE) {
+      /* If no explicit filter is provided, then it's implied by the type of
+       * the source image.
+       */
       if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
           (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
           isl_format_has_int_channel(src_surf.surf->format)) {
@@ -1233,15 +1235,20 @@
       } else {
          filter = BLORP_FILTER_AVERAGE;
       }
+   }
 
-      assert(!src_image->format->can_ycbcr);
-      assert(!dst_image->format->can_ycbcr);
-
-      resolve_surface(batch,
-                      &src_surf, src_level, src_layer,
-                      &dst_surf, dst_level, dst_layer,
-                      src_x, src_y, dst_x, dst_y, width, height, filter);
+   for (uint32_t l = 0; l < layer_count; l++) {
+      blorp_blit(&batch,
+                 &src_surf, src_level, src_base_layer + l,
+                 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
+                 &dst_surf, dst_level, dst_base_layer + l,
+                 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
+                 src_x, src_y, src_x + width, src_y + height,
+                 dst_x, dst_y, dst_x + width, dst_y + height,
+                 filter, false, false);
    }
+
+   blorp_batch_finish(&batch);
 }
 
 void anv_CmdResolveImage(
@@ -1257,8 +1264,7 @@
    ANV_FROM_HANDLE(anv_image, src_image, srcImage);
    ANV_FROM_HANDLE(anv_image, dst_image, dstImage);
 
-   struct blorp_batch batch;
-   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
+   assert(!src_image->format->can_ycbcr);
 
    for (uint32_t r = 0; r < regionCount; r++) {
       assert(pRegions[r].srcSubresource.aspectMask ==
@@ -1269,27 +1275,38 @@
       const uint32_t layer_count =
          anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
 
-      VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask,
-         dst_mask = pRegions[r].dstSubresource.aspectMask;
+      VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask;
+      VkImageAspectFlags dst_mask = pRegions[r].dstSubresource.aspectMask;
 
       assert(anv_image_aspects_compatible(src_mask, dst_mask));
 
-      for (uint32_t layer = 0; layer < layer_count; layer++) {
-         resolve_image(cmd_buffer->device, &batch,
-                       src_image, srcImageLayout,
-                       pRegions[r].srcSubresource.mipLevel,
-                       pRegions[r].srcSubresource.baseArrayLayer + layer,
-                       dst_image, dstImageLayout,
-                       pRegions[r].dstSubresource.mipLevel,
-                       pRegions[r].dstSubresource.baseArrayLayer + layer,
-                       pRegions[r].dstSubresource.aspectMask,
-                       pRegions[r].srcOffset.x, pRegions[r].srcOffset.y,
-                       pRegions[r].dstOffset.x, pRegions[r].dstOffset.y,
-                       pRegions[r].extent.width, pRegions[r].extent.height);
+      uint32_t aspect_bit;
+      anv_foreach_image_aspect_bit(aspect_bit, src_image,
+                                   pRegions[r].srcSubresource.aspectMask) {
+         enum isl_aux_usage src_aux_usage =
+            anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image,
+                                    (1 << aspect_bit), srcImageLayout);
+         enum isl_aux_usage dst_aux_usage =
+            anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image,
+                                    (1 << aspect_bit), dstImageLayout);
+
+         anv_image_msaa_resolve(cmd_buffer,
+                                src_image, src_aux_usage,
+                                pRegions[r].srcSubresource.mipLevel,
+                                pRegions[r].srcSubresource.baseArrayLayer,
+                                dst_image, dst_aux_usage,
+                                pRegions[r].dstSubresource.mipLevel,
+                                pRegions[r].dstSubresource.baseArrayLayer,
+                                (1 << aspect_bit),
+                                pRegions[r].srcOffset.x,
+                                pRegions[r].srcOffset.y,
+                                pRegions[r].dstOffset.x,
+                                pRegions[r].dstOffset.y,
+                                pRegions[r].extent.width,
+                                pRegions[r].extent.height,
+                                layer_count, BLORP_FILTER_NONE);
       }
    }
-
-   blorp_batch_finish(&batch);
 }
 
 static enum isl_aux_usage
@@ -1304,115 +1321,6 @@
 }
 
 void
-anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
-{
-   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
-   struct anv_subpass *subpass = cmd_buffer->state.subpass;
-
-   if (subpass->has_resolve) {
-      struct blorp_batch batch;
-      blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
-
-      /* We are about to do some MSAA resolves.  We need to flush so that the
-       * result of writes to the MSAA color attachments show up in the sampler
-       * when we blit to the single-sampled resolve target.
-       */
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
-         ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
-
-      for (uint32_t i = 0; i < subpass->color_count; ++i) {
-         uint32_t src_att = subpass->color_attachments[i].attachment;
-         uint32_t dst_att = subpass->resolve_attachments[i].attachment;
-
-         if (dst_att == VK_ATTACHMENT_UNUSED)
-            continue;
-
-         assert(src_att < cmd_buffer->state.pass->attachment_count);
-         assert(dst_att < cmd_buffer->state.pass->attachment_count);
-
-         if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
-            /* From the Vulkan 1.0 spec:
-             *
-             *    If the first use of an attachment in a render pass is as a
-             *    resolve attachment, then the loadOp is effectively ignored
-             *    as the resolve is guaranteed to overwrite all pixels in the
-             *    render area.
-             */
-            cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
-         }
-
-         struct anv_image_view *src_iview = fb->attachments[src_att];
-         struct anv_image_view *dst_iview = fb->attachments[dst_att];
-
-         enum isl_aux_usage src_aux_usage =
-            cmd_buffer->state.attachments[src_att].aux_usage;
-         enum isl_aux_usage dst_aux_usage =
-            cmd_buffer->state.attachments[dst_att].aux_usage;
-
-         const VkRect2D render_area = cmd_buffer->state.render_area;
-
-         assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
-                dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT);
-
-         enum blorp_filter filter;
-         if (isl_format_has_int_channel(src_iview->planes[0].isl.format)) {
-            filter = BLORP_FILTER_SAMPLE_0;
-         } else {
-            filter = BLORP_FILTER_AVERAGE;
-         }
-
-         struct blorp_surf src_surf, dst_surf;
-         get_blorp_surf_for_anv_image(cmd_buffer->device, src_iview->image,
-                                      VK_IMAGE_ASPECT_COLOR_BIT,
-                                      ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
-                                      src_aux_usage, &src_surf);
-         if (src_aux_usage == ISL_AUX_USAGE_MCS) {
-            src_surf.clear_color_addr = anv_to_blorp_address(
-               anv_image_get_clear_color_addr(cmd_buffer->device,
-                                              src_iview->image,
-                                              VK_IMAGE_ASPECT_COLOR_BIT));
-         }
-         get_blorp_surf_for_anv_image(cmd_buffer->device, dst_iview->image,
-                                      VK_IMAGE_ASPECT_COLOR_BIT,
-                                      ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
-                                      dst_aux_usage, &dst_surf);
-
-         uint32_t base_src_layer = src_iview->planes[0].isl.base_array_layer;
-         uint32_t base_dst_layer = dst_iview->planes[0].isl.base_array_layer;
-
-         assert(src_iview->planes[0].isl.array_len >= fb->layers);
-         assert(dst_iview->planes[0].isl.array_len >= fb->layers);
-
-         anv_cmd_buffer_mark_image_written(cmd_buffer, dst_iview->image,
-                                           VK_IMAGE_ASPECT_COLOR_BIT,
-                                           dst_surf.aux_usage,
-                                           dst_iview->planes[0].isl.base_level,
-                                           base_dst_layer, fb->layers);
-
-         assert(!src_iview->image->format->can_ycbcr);
-         assert(!dst_iview->image->format->can_ycbcr);
-
-         for (uint32_t i = 0; i < fb->layers; i++) {
-            resolve_surface(&batch,
-                            &src_surf,
-                            src_iview->planes[0].isl.base_level,
-                            base_src_layer + i,
-                            &dst_surf,
-                            dst_iview->planes[0].isl.base_level,
-                            base_dst_layer + i,
-                            render_area.offset.x, render_area.offset.y,
-                            render_area.offset.x, render_area.offset.y,
-                            render_area.extent.width, render_area.extent.height,
-                            filter);
-         }
-      }
-
-      blorp_batch_finish(&batch);
-   }
-}
-
-void
 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
                          const struct anv_image *image,
                          uint32_t base_level, uint32_t level_count,
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/anv_cmd_buffer.c
--- mesa-18.3.3/src/intel/vulkan/anv_cmd_buffer.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -128,8 +128,13 @@
 anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
                               struct anv_cmd_pipeline_state *pipe_state)
 {
-   for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++)
-      vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]);
+   for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) {
+      if (pipe_state->push_descriptors[i]) {
+         anv_descriptor_set_layout_unref(cmd_buffer->device,
+             pipe_state->push_descriptors[i]->set.layout);
+         vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]);
+      }
+   }
 }
 
 static void
@@ -377,6 +382,14 @@
                  level, base_layer, layer_count);
 }
 
+void
+anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
+{
+   anv_genX_call(&cmd_buffer->device->info,
+                 cmd_emit_conditional_render_predicate,
+                 cmd_buffer);
+}
+
 void anv_CmdBindPipeline(
     VkCommandBuffer                             commandBuffer,
     VkPipelineBindPoint                         pipelineBindPoint,
@@ -645,6 +658,35 @@
    }
 }
 
+void anv_CmdBindTransformFeedbackBuffersEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstBinding,
+    uint32_t                                    bindingCount,
+    const VkBuffer*                             pBuffers,
+    const VkDeviceSize*                         pOffsets,
+    const VkDeviceSize*                         pSizes)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
+
+   /* We have to defer setting up vertex buffer since we need the buffer
+    * stride from the pipeline. */
+
+   assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      if (pBuffers[i] == VK_NULL_HANDLE) {
+         xfb[firstBinding + i].buffer = NULL;
+      } else {
+         ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
+         xfb[firstBinding + i].buffer = buffer;
+         xfb[firstBinding + i].offset = pOffsets[i];
+         xfb[firstBinding + i].size =
+            anv_buffer_get_range(buffer, pOffsets[i],
+                                 pSizes ? pSizes[i] : VK_WHOLE_SIZE);
+      }
+   }
+}
+
 enum isl_format
 anv_isl_format_for_descriptor_type(VkDescriptorType type)
 {
@@ -671,8 +713,6 @@
    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
    memcpy(state.map, data, size);
 
-   anv_state_flush(cmd_buffer->device, state);
-
    VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
 
    return state;
@@ -692,8 +732,6 @@
    for (uint32_t i = 0; i < dwords; i++)
       p[i] = a[i] | b[i];
 
-   anv_state_flush(cmd_buffer->device, state);
-
    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
 
    return state;
@@ -754,8 +792,6 @@
    for (unsigned i = 0; i < prog_data->nr_params; i++)
       u32_map[i] = anv_push_constant_value(data, prog_data->param[i]);
 
-   anv_state_flush(cmd_buffer->device, state);
-
    return state;
 }
 
@@ -810,8 +846,6 @@
       }
    }
 
-   anv_state_flush(cmd_buffer->device, state);
-
    return state;
 }
 
@@ -928,10 +962,11 @@
    return iview;
 }
 
-static struct anv_push_descriptor_set *
-anv_cmd_buffer_get_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
-                                       VkPipelineBindPoint bind_point,
-                                       uint32_t set)
+static struct anv_descriptor_set *
+anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
+                                   VkPipelineBindPoint bind_point,
+                                   struct anv_descriptor_set_layout *layout,
+                                   uint32_t _set)
 {
    struct anv_cmd_pipeline_state *pipe_state;
    if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) {
@@ -942,19 +977,31 @@
    }
 
    struct anv_push_descriptor_set **push_set =
-      &pipe_state->push_descriptors[set];
+      &pipe_state->push_descriptors[_set];
 
    if (*push_set == NULL) {
-      *push_set = vk_alloc(&cmd_buffer->pool->alloc,
-                           sizeof(struct anv_push_descriptor_set), 8,
-                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      *push_set = vk_zalloc(&cmd_buffer->pool->alloc,
+                            sizeof(struct anv_push_descriptor_set), 8,
+                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
       if (*push_set == NULL) {
          anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
          return NULL;
       }
    }
 
-   return *push_set;
+   struct anv_descriptor_set *set = &(*push_set)->set;
+
+   if (set->layout != layout) {
+      if (set->layout)
+         anv_descriptor_set_layout_unref(cmd_buffer->device, set->layout);
+      anv_descriptor_set_layout_ref(layout);
+      set->layout = layout;
+   }
+   set->size = anv_descriptor_set_layout_size(layout);
+   set->buffer_count = layout->buffer_count;
+   set->buffer_views = (*push_set)->buffer_views;
+
+   return set;
 }
 
 void anv_CmdPushDescriptorSetKHR(
@@ -972,19 +1019,12 @@
 
    struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
 
-   struct anv_push_descriptor_set *push_set =
-      anv_cmd_buffer_get_push_descriptor_set(cmd_buffer,
-                                             pipelineBindPoint, _set);
-   if (!push_set)
+   struct anv_descriptor_set *set =
+      anv_cmd_buffer_push_descriptor_set(cmd_buffer, pipelineBindPoint,
+                                         set_layout, _set);
+   if (!set)
       return;
 
-   struct anv_descriptor_set *set = &push_set->set;
-
-   set->layout = set_layout;
-   set->size = anv_descriptor_set_layout_size(set_layout);
-   set->buffer_count = set_layout->buffer_count;
-   set->buffer_views = push_set->buffer_views;
-
    /* Go through the user supplied descriptors. */
    for (uint32_t i = 0; i < descriptorWriteCount; i++) {
       const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
@@ -1064,19 +1104,12 @@
 
    struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
 
-   struct anv_push_descriptor_set *push_set =
-      anv_cmd_buffer_get_push_descriptor_set(cmd_buffer,
-                                             template->bind_point, _set);
-   if (!push_set)
+   struct anv_descriptor_set *set =
+      anv_cmd_buffer_push_descriptor_set(cmd_buffer, template->bind_point,
+                                         set_layout, _set);
+   if (!set)
       return;
 
-   struct anv_descriptor_set *set = &push_set->set;
-
-   set->layout = set_layout;
-   set->size = anv_descriptor_set_layout_size(set_layout);
-   set->buffer_count = set_layout->buffer_count;
-   set->buffer_views = push_set->buffer_views;
-
    anv_descriptor_set_write_template(set,
                                      cmd_buffer->device,
                                      &cmd_buffer->surface_state_stream,
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_descriptor_set.c mesa-19.0.1/src/intel/vulkan/anv_descriptor_set.c
--- mesa-18.3.3/src/intel/vulkan/anv_descriptor_set.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_descriptor_set.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,6 +58,9 @@
                anv_foreach_stage(s, binding->stageFlags)
                   surface_count[s] += sampler->n_planes;
             }
+         } else {
+            anv_foreach_stage(s, binding->stageFlags)
+               surface_count[s] += binding->descriptorCount;
          }
          break;
 
@@ -458,6 +461,8 @@
                          &device->surface_state_pool, 4096);
    pool->surface_state_free_list = NULL;
 
+   list_inithead(&pool->desc_sets);
+
    *pDescriptorPool = anv_descriptor_pool_to_handle(pool);
 
    return VK_SUCCESS;
@@ -475,6 +480,12 @@
       return;
 
    anv_state_stream_finish(&pool->surface_state_stream);
+
+   list_for_each_entry_safe(struct anv_descriptor_set, set,
+                            &pool->desc_sets, pool_link) {
+      anv_descriptor_set_destroy(device, pool, set);
+   }
+
    vk_free2(&device->alloc, pAllocator, pool);
 }
 
@@ -486,6 +497,11 @@
    ANV_FROM_HANDLE(anv_device, device, _device);
    ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
 
+   list_for_each_entry_safe(struct anv_descriptor_set, set,
+                            &pool->desc_sets, pool_link) {
+      anv_descriptor_set_destroy(device, pool, set);
+   }
+
    pool->next = 0;
    pool->free_list = EMPTY;
    anv_state_stream_finish(&pool->surface_state_stream);
@@ -630,6 +646,8 @@
       entry->size = set->size;
       pool->free_list = (char *) entry - pool->data;
    }
+
+   list_del(&set->pool_link);
 }
 
 VkResult anv_AllocateDescriptorSets(
@@ -652,6 +670,8 @@
       if (result != VK_SUCCESS)
          break;
 
+      list_addtail(&set->pool_link, &pool->desc_sets);
+
       pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
    }
 
@@ -992,7 +1012,7 @@
 
    template->entry_count = pCreateInfo->descriptorUpdateEntryCount;
    for (uint32_t i = 0; i < template->entry_count; i++) {
-      const VkDescriptorUpdateTemplateEntryKHR *pEntry =
+      const VkDescriptorUpdateTemplateEntry *pEntry =
          &pCreateInfo->pDescriptorUpdateEntries[i];
 
       template->entries[i] = (struct anv_descriptor_template_entry) {
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_device.c mesa-19.0.1/src/intel/vulkan/anv_device.c
--- mesa-18.3.3/src/intel/vulkan/anv_device.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_device.c	2019-03-31 23:16:37.000000000 +0000
@@ -41,6 +41,7 @@
 #include "git_sha1.h"
 #include "vk_util.h"
 #include "common/gen_defines.h"
+#include "compiler/glsl_types.h"
 
 #include "genxml/gen7_pack.h"
 
@@ -60,8 +61,8 @@
    va_end(args);
 }
 
-static VkResult
-anv_compute_heap_size(int fd, uint64_t gtt_size, uint64_t *heap_size)
+static uint64_t
+anv_compute_heap_size(int fd, uint64_t gtt_size)
 {
    /* Query the total ram from the system */
    struct sysinfo info;
@@ -83,9 +84,7 @@
     */
    uint64_t available_gtt = gtt_size * 3 / 4;
 
-   *heap_size = MIN2(available_ram, available_gtt);
-
-   return VK_SUCCESS;
+   return MIN2(available_ram, available_gtt);
 }
 
 static VkResult
@@ -109,10 +108,7 @@
    device->supports_48bit_addresses = (device->info.gen >= 8) &&
       gtt_size > (4ULL << 30 /* GiB */);
 
-   uint64_t heap_size = 0;
-   VkResult result = anv_compute_heap_size(fd, gtt_size, &heap_size);
-   if (result != VK_SUCCESS)
-      return result;
+   uint64_t heap_size = anv_compute_heap_size(fd, gtt_size);
 
    if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) {
       /* When running with an overridden PCI ID, we may get a GTT size from
@@ -708,6 +704,7 @@
 
    vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
 
+   _mesa_glsl_release_types();
    _mesa_locale_fini();
 
    vk_free(&instance->alloc, instance);
@@ -865,7 +862,7 @@
       .shaderInt64                              = pdevice->info.gen >= 8 &&
                                                   pdevice->info.has_64bit_types,
       .shaderInt16                              = pdevice->info.gen >= 8,
-      .shaderResourceMinLod                     = false,
+      .shaderResourceMinLod                     = pdevice->info.gen >= 9,
       .variableMultisampleRate                  = true,
       .inheritedQueries                         = true,
    };
@@ -893,9 +890,38 @@
 
    vk_foreach_struct(ext, pFeatures->pNext) {
       switch (ext->sType) {
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
-         VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
-         features->protectedMemory = VK_FALSE;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
+         VkPhysicalDevice8BitStorageFeaturesKHR *features =
+            (VkPhysicalDevice8BitStorageFeaturesKHR *)ext;
+         ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+         features->storageBuffer8BitAccess = pdevice->info.gen >= 8;
+         features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8;
+         features->storagePushConstant8 = pdevice->info.gen >= 8;
+         break;
+      }
+
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
+         VkPhysicalDevice16BitStorageFeatures *features =
+            (VkPhysicalDevice16BitStorageFeatures *)ext;
+         ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+         features->storageBuffer16BitAccess = pdevice->info.gen >= 8;
+         features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8;
+         features->storagePushConstant16 = pdevice->info.gen >= 8;
+         features->storageInputOutput16 = false;
+         break;
+      }
+
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
+         VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
+            (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
+         ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+         features->conditionalRendering = pdevice->info.gen >= 8 ||
+                                          pdevice->info.is_haswell;
+         features->inheritedConditionalRendering = pdevice->info.gen >= 8 ||
+                                                   pdevice->info.is_haswell;
          break;
       }
 
@@ -908,10 +934,9 @@
          break;
       }
 
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
-         VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
-         features->variablePointersStorageBuffer = true;
-         features->variablePointers = true;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
+         VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
+         features->protectedMemory = VK_FALSE;
          break;
       }
 
@@ -922,32 +947,31 @@
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
+         VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
+            (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
+         features->scalarBlockLayout = true;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
          VkPhysicalDeviceShaderDrawParameterFeatures *features = (void *)ext;
          features->shaderDrawParameters = true;
          break;
       }
 
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: {
-         VkPhysicalDevice16BitStorageFeaturesKHR *features =
-            (VkPhysicalDevice16BitStorageFeaturesKHR *)ext;
-         ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
-
-         features->storageBuffer16BitAccess = pdevice->info.gen >= 8;
-         features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8;
-         features->storagePushConstant16 = pdevice->info.gen >= 8;
-         features->storageInputOutput16 = false;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
+         VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
+         features->variablePointersStorageBuffer = true;
+         features->variablePointers = true;
          break;
       }
 
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
-         VkPhysicalDevice8BitStorageFeaturesKHR *features =
-            (VkPhysicalDevice8BitStorageFeaturesKHR *)ext;
-         ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
-
-         features->storageBuffer8BitAccess = pdevice->info.gen >= 8;
-         features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8;
-         features->storagePushConstant8 = pdevice->info.gen >= 8;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+         VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
+            (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
+         features->transformFeedback = VK_TRUE;
+         features->geometryStreams = VK_TRUE;
          break;
       }
 
@@ -1046,7 +1070,7 @@
          16 * devinfo->max_cs_threads,
          16 * devinfo->max_cs_threads,
       },
-      .subPixelPrecisionBits                    = 4 /* FIXME */,
+      .subPixelPrecisionBits                    = 8,
       .subTexelPrecisionBits                    = 4 /* FIXME */,
       .mipmapPrecisionBits                      = 4 /* FIXME */,
       .maxDrawIndexedIndexValue                 = UINT32_MAX,
@@ -1126,11 +1150,31 @@
 
    vk_foreach_struct(ext, pProperties->pNext) {
       switch (ext->sType) {
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
-         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
-            (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
+         VkPhysicalDeviceDepthStencilResolvePropertiesKHR *props =
+            (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
+
+         /* We support all of the depth resolve modes */
+         props->supportedDepthResolveModes =
+            VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+            VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
+            VK_RESOLVE_MODE_MIN_BIT_KHR |
+            VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+         /* Average doesn't make sense for stencil so we don't support that */
+         props->supportedStencilResolveModes =
+            VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
+         if (pdevice->info.gen >= 8) {
+            /* The advanced stencil resolve modes currently require stencil
+             * sampling be supported by the hardware.
+             */
+            props->supportedStencilResolveModes |=
+               VK_RESOLVE_MODE_MIN_BIT_KHR |
+               VK_RESOLVE_MODE_MAX_BIT_KHR;
+         }
 
-         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
+         props->independentResolveNone = VK_TRUE;
+         props->independentResolve = VK_TRUE;
          break;
       }
 
@@ -1201,6 +1245,21 @@
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+         VkPhysicalDeviceProtectedMemoryProperties *props =
+            (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+         props->protectedNoFault = false;
+         break;
+      }
+
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
+         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
+            (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
+
+         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
          VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
             (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
@@ -1233,6 +1292,23 @@
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+         VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
+            (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
+
+         props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
+         props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
+         props->maxTransformFeedbackBufferSize = (1ull << 32);
+         props->maxTransformFeedbackStreamDataSize = 128 * 4;
+         props->maxTransformFeedbackBufferDataSize = 128 * 4;
+         props->maxTransformFeedbackBufferDataStride = 2048;
+         props->transformFeedbackQueries = VK_TRUE;
+         props->transformFeedbackStreamsLinesTriangles = VK_FALSE;
+         props->transformFeedbackRasterizationStreamSelect = VK_FALSE;
+         props->transformFeedbackDraw = VK_TRUE;
+         break;
+      }
+
       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
@@ -1241,13 +1317,6 @@
          break;
       }
 
-      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
-         VkPhysicalDeviceProtectedMemoryProperties *props =
-            (VkPhysicalDeviceProtectedMemoryProperties *)ext;
-         props->protectedNoFault = false;
-         break;
-      }
-
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
@@ -1477,8 +1546,6 @@
    state = anv_state_pool_alloc(pool, size, align);
    memcpy(state.map, p, size);
 
-   anv_state_flush(pool->block_pool.device, state);
-
    return state;
 }
 
@@ -2264,6 +2331,7 @@
    mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
    mem->map = NULL;
    mem->map_size = 0;
+   mem->ahw = NULL;
 
    uint64_t bo_flags = 0;
 
@@ -2286,6 +2354,43 @@
    if (pdevice->use_softpin)
       bo_flags |= EXEC_OBJECT_PINNED;
 
+   const VkExportMemoryAllocateInfo *export_info =
+      vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
+
+   /* Check if we need to support Android HW buffer export. If so,
+    * create AHardwareBuffer and import memory from it.
+    */
+   bool android_export = false;
+   if (export_info && export_info->handleTypes &
+       VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
+      android_export = true;
+
+   /* Android memory import. */
+   const struct VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info =
+      vk_find_struct_const(pAllocateInfo->pNext,
+                           IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
+
+   if (ahw_import_info) {
+      result = anv_import_ahw_memory(_device, mem, ahw_import_info);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      goto success;
+   } else if (android_export) {
+      result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      const struct VkImportAndroidHardwareBufferInfoANDROID import_info = {
+         .buffer = mem->ahw,
+      };
+      result = anv_import_ahw_memory(_device, mem, &import_info);
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      goto success;
+   }
+
    const VkImportMemoryFdInfoKHR *fd_info =
       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
 
@@ -2317,9 +2422,9 @@
        */
       if (mem->bo->size < aligned_alloc_size) {
          result = vk_errorf(device->instance, device,
-                            VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                            VK_ERROR_INVALID_EXTERNAL_HANDLE,
                             "aligned allocationSize too large for "
-                            "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: "
+                            "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
                             "%"PRIu64"B > %"PRIu64"B",
                             aligned_alloc_size, mem->bo->size);
          anv_bo_cache_release(device, &device->bo_cache, mem->bo);
@@ -2336,42 +2441,44 @@
        * If the import fails, we leave the file descriptor open.
        */
       close(fd_info->fd);
-   } else {
-      const VkExportMemoryAllocateInfoKHR *fd_info =
-         vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
-      if (fd_info && fd_info->handleTypes)
-         bo_flags |= ANV_BO_EXTERNAL;
-
-      result = anv_bo_cache_alloc(device, &device->bo_cache,
-                                  pAllocateInfo->allocationSize, bo_flags,
-                                  &mem->bo);
-      if (result != VK_SUCCESS)
-         goto fail;
+      goto success;
+   }
 
-      const VkMemoryDedicatedAllocateInfoKHR *dedicated_info =
-         vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
-      if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
-         ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
+   /* Regular allocate (not importing memory). */
 
-         /* Some legacy (non-modifiers) consumers need the tiling to be set on
-          * the BO.  In this case, we have a dedicated allocation.
-          */
-         if (image->needs_set_tiling) {
-            const uint32_t i915_tiling =
-               isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling);
-            int ret = anv_gem_set_tiling(device, mem->bo->gem_handle,
-                                         image->planes[0].surface.isl.row_pitch_B,
-                                         i915_tiling);
-            if (ret) {
-               anv_bo_cache_release(device, &device->bo_cache, mem->bo);
-               return vk_errorf(device->instance, NULL,
-                                VK_ERROR_OUT_OF_DEVICE_MEMORY,
-                                "failed to set BO tiling: %m");
-            }
+   if (export_info && export_info->handleTypes)
+      bo_flags |= ANV_BO_EXTERNAL;
+
+   result = anv_bo_cache_alloc(device, &device->bo_cache,
+                               pAllocateInfo->allocationSize, bo_flags,
+                               &mem->bo);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   const VkMemoryDedicatedAllocateInfo *dedicated_info =
+      vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+   if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
+      ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
+
+      /* Some legacy (non-modifiers) consumers need the tiling to be set on
+       * the BO.  In this case, we have a dedicated allocation.
+       */
+      if (image->needs_set_tiling) {
+         const uint32_t i915_tiling =
+            isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling);
+         int ret = anv_gem_set_tiling(device, mem->bo->gem_handle,
+                                      image->planes[0].surface.isl.row_pitch_B,
+                                      i915_tiling);
+         if (ret) {
+            anv_bo_cache_release(device, &device->bo_cache, mem->bo);
+            return vk_errorf(device->instance, NULL,
+                             VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                             "failed to set BO tiling: %m");
          }
       }
    }
 
+ success:
    *pMem = anv_device_memory_to_handle(mem);
 
    return VK_SUCCESS;
@@ -2400,7 +2507,7 @@
 
 VkResult anv_GetMemoryFdPropertiesKHR(
     VkDevice                                    _device,
-    VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
+    VkExternalMemoryHandleTypeFlagBits          handleType,
     int                                         fd,
     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
 {
@@ -2442,6 +2549,11 @@
 
    anv_bo_cache_release(device, &device->bo_cache, mem->bo);
 
+#ifdef ANDROID
+   if (mem->ahw)
+      AHardwareBuffer_release(mem->ahw);
+#endif
+
    vk_free2(&device->alloc, pAllocator, mem);
 }
 
@@ -2663,6 +2775,12 @@
     */
    uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1;
 
+   /* We must have image allocated or imported at this point. According to the
+    * specification, external images must have been bound to memory before
+    * calling GetImageMemoryRequirements.
+    */
+   assert(image->size > 0);
+
    pMemoryRequirements->size = image->size;
    pMemoryRequirements->alignment = image->alignment;
    pMemoryRequirements->memoryTypeBits = memory_types;
@@ -2683,8 +2801,8 @@
       switch (ext->sType) {
       case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: {
          struct anv_physical_device *pdevice = &device->instance->physicalDevice;
-         const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs =
-            (const VkImagePlaneMemoryRequirementsInfoKHR *) ext;
+         const VkImagePlaneMemoryRequirementsInfo *plane_reqs =
+            (const VkImagePlaneMemoryRequirementsInfo *) ext;
          uint32_t plane = anv_image_aspect_to_plane(image->aspects,
                                                     plane_reqs->planeAspect);
 
@@ -2703,6 +2821,12 @@
          pMemoryRequirements->memoryRequirements.memoryTypeBits =
                (1ull << pdevice->memory.type_count) - 1;
 
+         /* We must have image allocated or imported at this point. According to the
+          * specification, external images must have been bound to memory before
+          * calling GetImageMemoryRequirements.
+          */
+         assert(image->planes[plane].size > 0);
+
          pMemoryRequirements->memoryRequirements.size = image->planes[plane].size;
          pMemoryRequirements->memoryRequirements.alignment =
             image->planes[plane].alignment;
@@ -2719,7 +2843,7 @@
       switch (ext->sType) {
       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
          VkMemoryDedicatedRequirements *requirements = (void *)ext;
-         if (image->needs_set_tiling) {
+         if (image->needs_set_tiling || image->external_format) {
             /* If we need to set the tiling for external consumers, we need a
              * dedicated allocation.
              *
@@ -2981,8 +3105,6 @@
                          .size_B = range,
                          .format = format,
                          .stride_B = stride);
-
-   anv_state_flush(device, state);
 }
 
 void anv_DestroySampler(
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_dump.c mesa-19.0.1/src/intel/vulkan/anv_dump.c
--- mesa-18.3.3/src/intel/vulkan/anv_dump.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_dump.c	2019-03-31 23:16:37.000000000 +0000
@@ -429,9 +429,9 @@
          case VK_IMAGE_ASPECT_COLOR_BIT:       suffix = "c"; break;
          case VK_IMAGE_ASPECT_DEPTH_BIT:       suffix = "d"; break;
          case VK_IMAGE_ASPECT_STENCIL_BIT:     suffix = "s"; break;
-         case VK_IMAGE_ASPECT_PLANE_0_BIT_KHR: suffix = "c0"; break;
-         case VK_IMAGE_ASPECT_PLANE_1_BIT_KHR: suffix = "c1"; break;
-         case VK_IMAGE_ASPECT_PLANE_2_BIT_KHR: suffix = "c2"; break;
+         case VK_IMAGE_ASPECT_PLANE_0_BIT:     suffix = "c0"; break;
+         case VK_IMAGE_ASPECT_PLANE_1_BIT:     suffix = "c1"; break;
+         case VK_IMAGE_ASPECT_PLANE_2_BIT:     suffix = "c2"; break;
          default:
             unreachable("Invalid aspect");
          }
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_extensions.py mesa-19.0.1/src/intel/vulkan/anv_extensions.py
--- mesa-18.3.3/src/intel/vulkan/anv_extensions.py	2019-01-13 21:16:37.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_extensions.py	2019-03-31 23:16:37.000000000 +0000
@@ -47,7 +47,7 @@
         self.version = version
         self.enable = _bool_to_c_expr(enable)
 
-API_PATCH_VERSION = 90
+API_PATCH_VERSION = 96
 
 # Supported API versions.  Each one is the maximum patch version for the given
 # version.  Version come in increasing order and each version is available if
@@ -69,15 +69,19 @@
 # the those extension strings, then tests dEQP-VK.api.info.instance.extensions
 # and dEQP-VK.api.info.device fail due to the duplicated strings.
 EXTENSIONS = [
+    Extension('VK_ANDROID_external_memory_android_hardware_buffer', 3, 'ANDROID'),
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID'),
-    Extension('VK_KHR_16bit_storage',                     1, 'device->info.gen >= 8'),
     Extension('VK_KHR_8bit_storage',                      1, 'device->info.gen >= 8'),
+    Extension('VK_KHR_16bit_storage',                     1, 'device->info.gen >= 8'),
     Extension('VK_KHR_bind_memory2',                      1, True),
     Extension('VK_KHR_create_renderpass2',                1, True),
     Extension('VK_KHR_dedicated_allocation',              1, True),
+    Extension('VK_KHR_depth_stencil_resolve',             1, True),
     Extension('VK_KHR_descriptor_update_template',        1, True),
     Extension('VK_KHR_device_group',                      1, True),
     Extension('VK_KHR_device_group_creation',             1, True),
+    Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+    Extension('VK_KHR_draw_indirect_count',               1, True),
     Extension('VK_KHR_driver_properties',                 1, True),
     Extension('VK_KHR_external_fence',                    1,
               'device->has_syncobj_wait'),
@@ -99,6 +103,7 @@
     Extension('VK_KHR_maintenance1',                      1, True),
     Extension('VK_KHR_maintenance2',                      1, True),
     Extension('VK_KHR_maintenance3',                      1, True),
+    Extension('VK_KHR_multiview',                         1, True),
     Extension('VK_KHR_push_descriptor',                   1, True),
     Extension('VK_KHR_relaxed_block_layout',              1, True),
     Extension('VK_KHR_sampler_mirror_clamp_to_edge',      1, True),
@@ -111,9 +116,9 @@
     Extension('VK_KHR_wayland_surface',                   6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
     Extension('VK_KHR_xcb_surface',                       6, 'VK_USE_PLATFORM_XCB_KHR'),
     Extension('VK_KHR_xlib_surface',                      6, 'VK_USE_PLATFORM_XLIB_KHR'),
-    Extension('VK_KHR_multiview',                         1, True),
-    Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
+    Extension('VK_EXT_conditional_rendering',             1, 'device->info.gen >= 8 || device->info.is_haswell'),
     Extension('VK_EXT_debug_report',                      8, True),
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_display_control',                   1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
@@ -121,13 +126,14 @@
     Extension('VK_EXT_external_memory_dma_buf',           1, True),
     Extension('VK_EXT_global_priority',                   1,
               'device->has_context_priority'),
-    Extension('VK_EXT_pci_bus_info',                      1, False),
+    Extension('VK_EXT_pci_bus_info',                      2, True),
+    Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
+    Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
+    Extension('VK_EXT_scalar_block_layout',               1, True),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
     Extension('VK_EXT_shader_stencil_export',             1, 'device->info.gen >= 9'),
+    Extension('VK_EXT_transform_feedback',                1, True),
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
-    Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
-    Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
-    Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_GOOGLE_decorate_string',                1, True),
     Extension('VK_GOOGLE_hlsl_functionality1',            1, True),
 ]
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_formats.c mesa-19.0.1/src/intel/vulkan/anv_formats.c
--- mesa-18.3.3/src/intel/vulkan/anv_formats.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_formats.c	2019-03-31 23:16:37.000000000 +0000
@@ -51,26 +51,51 @@
       .planes = { \
          { .isl_format = __hw_fmt, .swizzle = __swizzle, \
            .denominator_scales = { 1, 1, }, \
+           .aspect = VK_IMAGE_ASPECT_COLOR_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 1, \
    }
 
 #define fmt1(__vk_fmt, __hw_fmt) \
    swiz_fmt1(__vk_fmt, __hw_fmt, RGBA)
 
-#define fmt2(__vk_fmt, __fmt1, __fmt2) \
+#define d_fmt(__vk_fmt, __hw_fmt) \
    [VK_ENUM_OFFSET(__vk_fmt)] = { \
       .planes = { \
-         { .isl_format = __fmt1, \
-           .swizzle = RGBA,       \
+         { .isl_format = __hw_fmt, .swizzle = RGBA, \
            .denominator_scales = { 1, 1, }, \
+           .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \
          }, \
-         { .isl_format = __fmt2, \
-           .swizzle = RGBA,       \
+      }, \
+      .n_planes = 1, \
+   }
+
+#define s_fmt(__vk_fmt, __hw_fmt) \
+   [VK_ENUM_OFFSET(__vk_fmt)] = { \
+      .planes = { \
+         { .isl_format = __hw_fmt, .swizzle = RGBA, \
+           .denominator_scales = { 1, 1, }, \
+           .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \
+         }, \
+      }, \
+      .n_planes = 1, \
+   }
+
+#define ds_fmt2(__vk_fmt, __fmt1, __fmt2) \
+   [VK_ENUM_OFFSET(__vk_fmt)] = { \
+      .planes = { \
+         { .isl_format = __fmt1, .swizzle = RGBA, \
+           .denominator_scales = { 1, 1, }, \
+           .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \
+         }, \
+         { .isl_format = __fmt2, .swizzle = RGBA, \
            .denominator_scales = { 1, 1, }, \
+           .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 2, \
    }
 
@@ -79,22 +104,25 @@
       .planes = { \
          { .isl_format = ISL_FORMAT_UNSUPPORTED, }, \
       }, \
+      .vk_format = VK_FORMAT_UNDEFINED, \
    }
 
-#define y_plane(__hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \
+#define y_plane(__plane, __hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \
    { .isl_format = __hw_fmt, \
      .swizzle = __swizzle, \
      .ycbcr_swizzle = __ycbcr_swizzle, \
      .denominator_scales = { dhs, dvs, }, \
      .has_chroma = false, \
+     .aspect = VK_IMAGE_ASPECT_PLANE_0_BIT, /* Y plane is always plane 0 */ \
    }
 
-#define chroma_plane(__hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \
+#define chroma_plane(__plane, __hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \
    { .isl_format = __hw_fmt, \
      .swizzle = __swizzle, \
      .ycbcr_swizzle = __ycbcr_swizzle, \
      .denominator_scales = { dhs, dvs, }, \
      .has_chroma = true, \
+     .aspect = VK_IMAGE_ASPECT_PLANE_ ## __plane ## _BIT, \
    }
 
 #define ycbcr_fmt(__vk_fmt, __n_planes, ...) \
@@ -102,6 +130,7 @@
       .planes = { \
          __VA_ARGS__, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = __n_planes, \
       .can_ycbcr = true, \
    }
@@ -224,13 +253,13 @@
    fmt1(VK_FORMAT_B10G11R11_UFLOAT_PACK32,           ISL_FORMAT_R11G11B10_FLOAT),
    fmt1(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,            ISL_FORMAT_R9G9B9E5_SHAREDEXP),
 
-   fmt1(VK_FORMAT_D16_UNORM,                         ISL_FORMAT_R16_UNORM),
-   fmt1(VK_FORMAT_X8_D24_UNORM_PACK32,               ISL_FORMAT_R24_UNORM_X8_TYPELESS),
-   fmt1(VK_FORMAT_D32_SFLOAT,                        ISL_FORMAT_R32_FLOAT),
-   fmt1(VK_FORMAT_S8_UINT,                           ISL_FORMAT_R8_UINT),
+   d_fmt(VK_FORMAT_D16_UNORM,                        ISL_FORMAT_R16_UNORM),
+   d_fmt(VK_FORMAT_X8_D24_UNORM_PACK32,              ISL_FORMAT_R24_UNORM_X8_TYPELESS),
+   d_fmt(VK_FORMAT_D32_SFLOAT,                       ISL_FORMAT_R32_FLOAT),
+   s_fmt(VK_FORMAT_S8_UINT,                          ISL_FORMAT_R8_UINT),
    fmt_unsupported(VK_FORMAT_D16_UNORM_S8_UINT),
-   fmt2(VK_FORMAT_D24_UNORM_S8_UINT,                 ISL_FORMAT_R24_UNORM_X8_TYPELESS, ISL_FORMAT_R8_UINT),
-   fmt2(VK_FORMAT_D32_SFLOAT_S8_UINT,                ISL_FORMAT_R32_FLOAT, ISL_FORMAT_R8_UINT),
+   ds_fmt2(VK_FORMAT_D24_UNORM_S8_UINT,              ISL_FORMAT_R24_UNORM_X8_TYPELESS, ISL_FORMAT_R8_UINT),
+   ds_fmt2(VK_FORMAT_D32_SFLOAT_S8_UINT,             ISL_FORMAT_R32_FLOAT, ISL_FORMAT_R8_UINT),
 
    swiz_fmt1(VK_FORMAT_BC1_RGB_UNORM_BLOCK,          ISL_FORMAT_BC1_UNORM, RGB1),
    swiz_fmt1(VK_FORMAT_BC1_RGB_SRGB_BLOCK,           ISL_FORMAT_BC1_UNORM_SRGB, RGB1),
@@ -304,27 +333,27 @@
 
 static const struct anv_format ycbcr_formats[] = {
    ycbcr_fmt(VK_FORMAT_G8B8G8R8_422_UNORM, 1,
-             y_plane(ISL_FORMAT_YCRCB_SWAPUV, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)),
+             y_plane(0, ISL_FORMAT_YCRCB_SWAPUV, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)),
    ycbcr_fmt(VK_FORMAT_B8G8R8G8_422_UNORM, 1,
-             y_plane(ISL_FORMAT_YCRCB_SWAPUVY, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)),
+             y_plane(0, ISL_FORMAT_YCRCB_SWAPUVY, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)),
    ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, 3,
-             y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)),
+             y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2),
+             chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)),
    ycbcr_fmt(VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, 2,
-             y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)),
+             y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)),
    ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, 3,
-             y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)),
+             y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1),
+             chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)),
    ycbcr_fmt(VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, 2,
-             y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)),
+             y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)),
    ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, 3,
-             y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)),
+             y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)),
 
    fmt_unsupported(VK_FORMAT_R10X6_UNORM_PACK16),
    fmt_unsupported(VK_FORMAT_R10X6G10X6_UNORM_2PACK16),
@@ -353,23 +382,23 @@
    fmt_unsupported(VK_FORMAT_B16G16R16G16_422_UNORM),
 
    ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, 3,
-             y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)),
+             y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2),
+             chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)),
    ycbcr_fmt(VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, 2,
-             y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)),
+             y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)),
    ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, 3,
-             y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)),
+             y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1),
+             chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)),
    ycbcr_fmt(VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, 2,
-             y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)),
+             y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)),
    ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, 3,
-             y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1),
-             chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)),
+             y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1),
+             chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)),
 };
 
 #undef _fmt
@@ -467,11 +496,11 @@
 
 // Format capabilities
 
-static VkFormatFeatureFlags
-get_image_format_features(const struct gen_device_info *devinfo,
-                          VkFormat vk_format,
-                          const struct anv_format *anv_format,
-                          VkImageTiling vk_tiling)
+VkFormatFeatureFlags
+anv_get_image_format_features(const struct gen_device_info *devinfo,
+                              VkFormat vk_format,
+                              const struct anv_format *anv_format,
+                              VkImageTiling vk_tiling)
 {
    VkFormatFeatureFlags flags = 0;
 
@@ -494,8 +523,8 @@
 
       flags |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
                VK_FORMAT_FEATURE_BLIT_DST_BIT |
-               VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
-               VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
+               VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+               VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
 
       return flags;
    }
@@ -718,11 +747,11 @@
 
    *pFormatProperties = (VkFormatProperties) {
       .linearTilingFeatures =
-         get_image_format_features(devinfo, vk_format, anv_format,
-                                   VK_IMAGE_TILING_LINEAR),
+         anv_get_image_format_features(devinfo, vk_format, anv_format,
+                                       VK_IMAGE_TILING_LINEAR),
       .optimalTilingFeatures =
-         get_image_format_features(devinfo, vk_format, anv_format,
-                                   VK_IMAGE_TILING_OPTIMAL),
+         anv_get_image_format_features(devinfo, vk_format, anv_format,
+                                       VK_IMAGE_TILING_OPTIMAL),
       .bufferFeatures =
          get_buffer_format_features(devinfo, vk_format, anv_format),
    };
@@ -756,7 +785,7 @@
    struct anv_physical_device *physical_device,
    const VkPhysicalDeviceImageFormatInfo2 *info,
    VkImageFormatProperties *pImageFormatProperties,
-   VkSamplerYcbcrConversionImageFormatPropertiesKHR *pYcbcrImageFormatProperties)
+   VkSamplerYcbcrConversionImageFormatProperties *pYcbcrImageFormatProperties)
 {
    VkFormatFeatureFlags format_feature_flags;
    VkExtent3D maxExtent;
@@ -769,8 +798,8 @@
    if (format == NULL)
       goto unsupported;
 
-   format_feature_flags = get_image_format_features(devinfo, info->format,
-                                                    format, info->tiling);
+   format_feature_flags = anv_get_image_format_features(devinfo, info->format,
+                                                        format, info->tiling);
 
    switch (info->type) {
    default:
@@ -948,6 +977,26 @@
       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
 };
 
+static const VkExternalMemoryProperties android_buffer_props = {
+   .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+                             VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
+   .exportFromImportedHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID,
+   .compatibleHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID,
+};
+
+
+static const VkExternalMemoryProperties android_image_props = {
+   .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+                             VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT |
+                             VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT,
+   .exportFromImportedHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID,
+   .compatibleHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID,
+};
+
 VkResult anv_GetPhysicalDeviceImageFormatProperties2(
     VkPhysicalDevice                            physicalDevice,
     const VkPhysicalDeviceImageFormatInfo2*     base_info,
@@ -955,8 +1004,9 @@
 {
    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
    const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
-   VkExternalImageFormatPropertiesKHR *external_props = NULL;
+   VkExternalImageFormatProperties *external_props = NULL;
    VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
+   struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
    VkResult result;
 
    /* Extract input structs */
@@ -980,6 +1030,9 @@
       case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
          ycbcr_props = (void *) s;
          break;
+      case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
+         android_usage = (void *) s;
+         break;
       default:
          anv_debug_ignored_stype(s->sType);
          break;
@@ -991,6 +1044,18 @@
    if (result != VK_SUCCESS)
       goto fail;
 
+   bool ahw_supported =
+      physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer;
+
+   if (ahw_supported && android_usage) {
+      android_usage->androidHardwareBufferUsage =
+         anv_ahw_usage_from_vk_usage(base_info->flags,
+                                     base_info->usage);
+
+      /* Limit maxArrayLayers to 1 for AHardwareBuffer based images for now. */
+      base_props->imageFormatProperties.maxArrayLayers = 1;
+   }
+
    /* From the Vulkan 1.0.42 spec:
     *
     *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
@@ -1004,6 +1069,12 @@
          if (external_props)
             external_props->externalMemoryProperties = prime_fd_props;
          break;
+      case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
+         if (ahw_supported && external_props) {
+            external_props->externalMemoryProperties = android_image_props;
+            break;
+         }
+      /* fallthrough if ahw not supported */
       default:
          /* From the Vulkan 1.0.42 spec:
           *
@@ -1081,11 +1152,19 @@
    if (pExternalBufferInfo->flags)
       goto unsupported;
 
+   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+
    switch (pExternalBufferInfo->handleType) {
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
       pExternalBufferProperties->externalMemoryProperties = prime_fd_props;
       return;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
+      if (physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer) {
+         pExternalBufferProperties->externalMemoryProperties = android_buffer_props;
+         return;
+      }
+      /* fallthrough if ahw not supported */
    default:
       goto unsupported;
    }
@@ -1104,6 +1183,17 @@
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_ycbcr_conversion *conversion;
 
+   /* Search for VkExternalFormatANDROID and resolve the format. */
+   struct anv_format *ext_format = NULL;
+   const struct VkExternalFormatANDROID *ext_info =
+      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_FORMAT_ANDROID);
+
+   uint64_t format = ext_info ? ext_info->externalFormat : 0;
+   if (format) {
+      assert(pCreateInfo->format == VK_FORMAT_UNDEFINED);
+      ext_format = (struct anv_format *) (uintptr_t) format;
+   }
+
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO);
 
    conversion = vk_alloc2(&device->alloc, pAllocator, sizeof(*conversion), 8,
@@ -1116,14 +1206,25 @@
    conversion->format = anv_get_format(pCreateInfo->format);
    conversion->ycbcr_model = pCreateInfo->ycbcrModel;
    conversion->ycbcr_range = pCreateInfo->ycbcrRange;
-   conversion->mapping[0] = pCreateInfo->components.r;
-   conversion->mapping[1] = pCreateInfo->components.g;
-   conversion->mapping[2] = pCreateInfo->components.b;
-   conversion->mapping[3] = pCreateInfo->components.a;
+
+   /* The Vulkan 1.1.95 spec says "When creating an external format conversion,
+    * the value of components if ignored."
+    */
+   if (!ext_format) {
+      conversion->mapping[0] = pCreateInfo->components.r;
+      conversion->mapping[1] = pCreateInfo->components.g;
+      conversion->mapping[2] = pCreateInfo->components.b;
+      conversion->mapping[3] = pCreateInfo->components.a;
+   }
+
    conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
    conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
    conversion->chroma_filter = pCreateInfo->chromaFilter;
 
+   /* Setup external format. */
+   if (ext_format)
+      conversion->format = ext_format;
+
    bool has_chroma_subsampled = false;
    for (uint32_t p = 0; p < conversion->format->n_planes; p++) {
       if (conversion->format->planes[p].has_chroma &&
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_genX.h mesa-19.0.1/src/intel/vulkan/anv_genX.h
--- mesa-18.3.3/src/intel/vulkan/anv_genX.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_genX.h	2019-03-31 23:16:37.000000000 +0000
@@ -66,6 +66,8 @@
                                          uint32_t base_layer,
                                          uint32_t layer_count);
 
+void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
+
 void
 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
                      const struct gen_l3_config *l3_config,
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_image.c mesa-19.0.1/src/intel/vulkan/anv_image.c
--- mesa-18.3.3/src/intel/vulkan/anv_image.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_image.c	2019-03-31 23:16:37.000000000 +0000
@@ -159,28 +159,26 @@
 
 static bool
 all_formats_ccs_e_compatible(const struct gen_device_info *devinfo,
-                             const struct VkImageCreateInfo *vk_info)
+                             const VkImageFormatListCreateInfoKHR *fmt_list,
+                             struct anv_image *image)
 {
    enum isl_format format =
-      anv_get_isl_format(devinfo, vk_info->format,
-                         VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling);
+      anv_get_isl_format(devinfo, image->vk_format,
+                         VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
 
    if (!isl_format_supports_ccs_e(devinfo, format))
       return false;
 
-   if (!(vk_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT))
+   if (!(image->create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT))
       return true;
 
-   const VkImageFormatListCreateInfoKHR *fmt_list =
-      vk_find_struct_const(vk_info->pNext, IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
-
    if (!fmt_list || fmt_list->viewFormatCount == 0)
       return false;
 
    for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
       enum isl_format view_format =
          anv_get_isl_format(devinfo, fmt_list->pViewFormats[i],
-                            VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling);
+                            VK_IMAGE_ASPECT_COLOR_BIT, image->tiling);
 
       if (!isl_formats_are_ccs_e_compatible(devinfo, format, view_format))
          return false;
@@ -245,7 +243,6 @@
  */
 static void
 add_aux_state_tracking_buffer(struct anv_image *image,
-                              VkImageAspectFlagBits aspect,
                               uint32_t plane,
                               const struct anv_device *device)
 {
@@ -300,11 +297,11 @@
 static VkResult
 make_surface(const struct anv_device *dev,
              struct anv_image *image,
-             const struct anv_image_create_info *anv_info,
+             uint32_t stride,
              isl_tiling_flags_t tiling_flags,
+             isl_surf_usage_flags_t isl_extra_usage_flags,
              VkImageAspectFlagBits aspect)
 {
-   const VkImageCreateInfo *vk_info = anv_info->vk_info;
    bool ok;
 
    static const enum isl_surf_dim vk_to_isl_surf_dim[] = {
@@ -313,8 +310,7 @@
       [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D,
    };
 
-   image->extent = anv_sanitize_image_extent(vk_info->imageType,
-                                             vk_info->extent);
+   image->extent = anv_sanitize_image_extent(image->type, image->extent);
 
    const unsigned plane = anv_image_aspect_to_plane(image->aspects, aspect);
    const  struct anv_format_plane plane_format =
@@ -322,8 +318,8 @@
    struct anv_surface *anv_surf = &image->planes[plane].surface;
 
    const isl_surf_usage_flags_t usage =
-      choose_isl_surf_usage(vk_info->flags, image->usage,
-                            anv_info->isl_extra_usage_flags, aspect);
+      choose_isl_surf_usage(image->create_flags, image->usage,
+                            isl_extra_usage_flags, aspect);
 
    /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to
     * fall back to linear on Broadwell and earlier because we aren't
@@ -333,24 +329,24 @@
     */
    bool needs_shadow = false;
    if (dev->info.gen <= 8 &&
-       (vk_info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
-       vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) {
+       (image->create_flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
+       image->tiling == VK_IMAGE_TILING_OPTIMAL) {
       assert(isl_format_is_compressed(plane_format.isl_format));
       tiling_flags = ISL_TILING_LINEAR_BIT;
       needs_shadow = true;
    }
 
    ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
-      .dim = vk_to_isl_surf_dim[vk_info->imageType],
+      .dim = vk_to_isl_surf_dim[image->type],
       .format = plane_format.isl_format,
       .width = image->extent.width / plane_format.denominator_scales[0],
       .height = image->extent.height / plane_format.denominator_scales[1],
       .depth = image->extent.depth,
-      .levels = vk_info->mipLevels,
-      .array_len = vk_info->arrayLayers,
-      .samples = vk_info->samples,
+      .levels = image->levels,
+      .array_len = image->array_size,
+      .samples = image->samples,
       .min_alignment_B = 0,
-      .row_pitch_B = anv_info->stride,
+      .row_pitch_B = stride,
       .usage = usage,
       .tiling_flags = tiling_flags);
 
@@ -370,16 +366,16 @@
       assert(tiling_flags == ISL_TILING_LINEAR_BIT);
 
       ok = isl_surf_init(&dev->isl_dev, &image->planes[plane].shadow_surface.isl,
-         .dim = vk_to_isl_surf_dim[vk_info->imageType],
+         .dim = vk_to_isl_surf_dim[image->type],
          .format = plane_format.isl_format,
          .width = image->extent.width,
          .height = image->extent.height,
          .depth = image->extent.depth,
-         .levels = vk_info->mipLevels,
-         .array_len = vk_info->arrayLayers,
-         .samples = vk_info->samples,
+         .levels = image->levels,
+         .array_len = image->array_size,
+         .samples = image->samples,
          .min_alignment_B = 0,
-         .row_pitch_B = anv_info->stride,
+         .row_pitch_B = stride,
          .usage = usage,
          .tiling_flags = ISL_TILING_ANY_MASK);
 
@@ -406,12 +402,12 @@
          /* It will never be used as an attachment, HiZ is pointless. */
       } else if (dev->info.gen == 7) {
          anv_perf_warn(dev->instance, image, "Implement gen7 HiZ");
-      } else if (vk_info->mipLevels > 1) {
+      } else if (image->levels > 1) {
          anv_perf_warn(dev->instance, image, "Enable multi-LOD HiZ");
-      } else if (vk_info->arrayLayers > 1) {
+      } else if (image->array_size > 1) {
          anv_perf_warn(dev->instance, image,
                        "Implement multi-arrayLayer HiZ clears and resolves");
-      } else if (dev->info.gen == 8 && vk_info->samples > 1) {
+      } else if (dev->info.gen == 8 && image->samples > 1) {
          anv_perf_warn(dev->instance, image, "Enable gen8 multisampled HiZ");
       } else if (!unlikely(INTEL_DEBUG & DEBUG_NO_HIZ)) {
          assert(image->planes[plane].aux_surface.isl.size_B == 0);
@@ -422,7 +418,7 @@
          add_surface(image, &image->planes[plane].aux_surface, plane);
          image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ;
       }
-   } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && vk_info->samples == 1) {
+   } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->samples == 1) {
       /* TODO: Disallow compression with :
        *
        *     1) non multiplanar images (We appear to hit a sampler bug with
@@ -436,7 +432,7 @@
        */
       const bool allow_compression =
          image->n_planes == 1 &&
-         (vk_info->flags & VK_IMAGE_CREATE_ALIAS_BIT) == 0 &&
+         (image->create_flags & VK_IMAGE_CREATE_ALIAS_BIT) == 0 &&
          likely((INTEL_DEBUG & DEBUG_NO_RBC) == 0);
 
       if (allow_compression) {
@@ -463,7 +459,7 @@
             }
 
             add_surface(image, &image->planes[plane].aux_surface, plane);
-            add_aux_state_tracking_buffer(image, aspect, plane, dev);
+            add_aux_state_tracking_buffer(image, plane, dev);
 
             /* For images created without MUTABLE_FORMAT_BIT set, we know that
              * they will always be used with the original format.  In
@@ -473,21 +469,21 @@
              * a render target.  This means that it's safe to just leave
              * compression on at all times for these formats.
              */
-            if (!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
-                all_formats_ccs_e_compatible(&dev->info, vk_info)) {
+            if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+                image->ccs_e_compatible) {
                image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_E;
             }
          }
       }
-   } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && vk_info->samples > 1) {
-      assert(!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT));
+   } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->samples > 1) {
+      assert(!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT));
       assert(image->planes[plane].aux_surface.isl.size_B == 0);
       ok = isl_surf_get_mcs_surf(&dev->isl_dev,
                                  &image->planes[plane].surface.isl,
                                  &image->planes[plane].aux_surface.isl);
       if (ok) {
          add_surface(image, &image->planes[plane].aux_surface, plane);
-         add_aux_state_tracking_buffer(image, aspect, plane, dev);
+         add_aux_state_tracking_buffer(image, plane, dev);
          image->planes[plane].aux_usage = ISL_AUX_USAGE_MCS;
       }
    }
@@ -591,12 +587,22 @@
    image->array_size = pCreateInfo->arrayLayers;
    image->samples = pCreateInfo->samples;
    image->usage = pCreateInfo->usage;
+   image->create_flags = pCreateInfo->flags;
    image->tiling = pCreateInfo->tiling;
    image->disjoint = pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
    image->needs_set_tiling = wsi_info && wsi_info->scanout;
    image->drm_format_mod = isl_mod_info ? isl_mod_info->modifier :
                                           DRM_FORMAT_MOD_INVALID;
 
+   /* In case of external format, We don't know format yet,
+    * so skip the rest for now.
+    */
+   if (create_info->external_format) {
+      image->external_format = true;
+      *pImage = anv_image_to_handle(image);
+      return VK_SUCCESS;
+   }
+
    const struct anv_format *format = anv_get_format(image->vk_format);
    assert(format != NULL);
 
@@ -606,10 +612,17 @@
 
    image->n_planes = format->n_planes;
 
+   const VkImageFormatListCreateInfoKHR *fmt_list =
+      vk_find_struct_const(pCreateInfo->pNext,
+                           IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
+
+   image->ccs_e_compatible =
+      all_formats_ccs_e_compatible(&device->info, fmt_list, image);
+
    uint32_t b;
    for_each_bit(b, image->aspects) {
-      r = make_surface(device, image, create_info, isl_tiling_flags,
-                       (1 << b));
+      r = make_surface(device, image, create_info->stride, isl_tiling_flags,
+                       create_info->isl_extra_usage_flags, (1 << b));
       if (r != VK_SUCCESS)
          goto fail;
    }
@@ -631,14 +644,19 @@
                 const VkAllocationCallbacks *pAllocator,
                 VkImage *pImage)
 {
-#ifdef ANDROID
+   const struct VkExternalMemoryImageCreateInfo *create_info =
+      vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
+
+   if (create_info && (create_info->handleTypes &
+       VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))
+      return anv_image_from_external(device, pCreateInfo, create_info,
+                                     pAllocator, pImage);
+
    const VkNativeBufferANDROID *gralloc_info =
       vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
-
    if (gralloc_info)
       return anv_image_from_gralloc(device, pCreateInfo, gralloc_info,
                                     pAllocator, pImage);
-#endif
 
    return anv_image_create(device,
       &(struct anv_image_create_info) {
@@ -688,6 +706,83 @@
    };
 }
 
+/* We are binding AHardwareBuffer. Get a description, resolve the
+ * format and prepare anv_image properly.
+ */
+static void
+resolve_ahw_image(struct anv_device *device,
+                  struct anv_image *image,
+                  struct anv_device_memory *mem)
+{
+#ifdef ANDROID
+   assert(mem->ahw);
+   AHardwareBuffer_Desc desc;
+   AHardwareBuffer_describe(mem->ahw, &desc);
+
+   /* Check tiling. */
+   int i915_tiling = anv_gem_get_tiling(device, mem->bo->gem_handle);
+   VkImageTiling vk_tiling;
+   isl_tiling_flags_t isl_tiling_flags = 0;
+
+   switch (i915_tiling) {
+   case I915_TILING_NONE:
+      vk_tiling = VK_IMAGE_TILING_LINEAR;
+      isl_tiling_flags = ISL_TILING_LINEAR_BIT;
+      break;
+   case I915_TILING_X:
+      vk_tiling = VK_IMAGE_TILING_OPTIMAL;
+      isl_tiling_flags = ISL_TILING_X_BIT;
+      break;
+   case I915_TILING_Y:
+      vk_tiling = VK_IMAGE_TILING_OPTIMAL;
+      isl_tiling_flags = ISL_TILING_Y0_BIT;
+      break;
+   case -1:
+   default:
+      unreachable("Invalid tiling flags.");
+   }
+
+   assert(vk_tiling == VK_IMAGE_TILING_LINEAR ||
+          vk_tiling == VK_IMAGE_TILING_OPTIMAL);
+
+   /* Check format. */
+   VkFormat vk_format = vk_format_from_android(desc.format);
+   enum isl_format isl_fmt = anv_get_isl_format(&device->info,
+                                                vk_format,
+                                                VK_IMAGE_ASPECT_COLOR_BIT,
+                                                vk_tiling);
+   assert(format != ISL_FORMAT_UNSUPPORTED);
+
+   /* Handle RGB(X)->RGBA fallback. */
+   switch (desc.format) {
+   case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
+   case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
+      if (isl_format_is_rgb(isl_fmt))
+         isl_fmt = isl_format_rgb_to_rgba(isl_fmt);
+      break;
+   }
+
+   /* Now we are able to fill anv_image fields properly and create
+    * isl_surface for it.
+    */
+   image->vk_format = vk_format;
+   image->format = anv_get_format(vk_format);
+   image->aspects = vk_format_aspects(image->vk_format);
+   image->n_planes = image->format->n_planes;
+   image->ccs_e_compatible = false;
+
+   uint32_t stride = desc.stride *
+                     (isl_format_get_layout(isl_fmt)->bpb / 8);
+
+   uint32_t b;
+   for_each_bit(b, image->aspects) {
+      VkResult r = make_surface(device, image, stride, isl_tiling_flags,
+                                ISL_SURF_USAGE_DISABLE_AUX_BIT, (1 << b));
+      assert(r == VK_SUCCESS);
+   }
+#endif
+}
+
 VkResult anv_BindImageMemory(
     VkDevice                                    _device,
     VkImage                                     _image,
@@ -698,6 +793,9 @@
    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
    ANV_FROM_HANDLE(anv_image, image, _image);
 
+   if (mem->ahw)
+      resolve_ahw_image(device, image, mem);
+
    uint32_t aspect_bit;
    anv_foreach_image_aspect_bit(aspect_bit, image, image->aspects) {
       uint32_t plane =
@@ -719,8 +817,11 @@
       const VkBindImageMemoryInfo *bind_info = &pBindInfos[i];
       ANV_FROM_HANDLE(anv_device_memory, mem, bind_info->memory);
       ANV_FROM_HANDLE(anv_image, image, bind_info->image);
-      VkImageAspectFlags aspects = image->aspects;
 
+      if (mem->ahw)
+         resolve_ahw_image(device, image, mem);
+
+      VkImageAspectFlags aspects = image->aspects;
       vk_foreach_struct_const(s, bind_info->pNext) {
          switch (s->sType) {
          case VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO: {
@@ -757,7 +858,7 @@
    ANV_FROM_HANDLE(anv_image, image, _image);
 
    const struct anv_surface *surface;
-   if (subresource->aspectMask == VK_IMAGE_ASPECT_PLANE_1_BIT_KHR &&
+   if (subresource->aspectMask == VK_IMAGE_ASPECT_PLANE_1_BIT &&
        image->drm_format_mod != DRM_FORMAT_MOD_INVALID &&
        isl_drm_modifier_has_aux(image->drm_format_mod))
       surface = &image->planes[0].aux_surface;
@@ -921,6 +1022,9 @@
    case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
       unreachable("VK_KHR_shared_presentable_image is unsupported");
 
+   case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT:
+      unreachable("VK_EXT_fragment_density_map is unsupported");
+
    case VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV:
       unreachable("VK_NV_shading_rate_image is unsupported");
    }
@@ -1223,8 +1327,6 @@
       }
    }
 
-   anv_state_flush(device, state_inout->state);
-
    if (image_param_out) {
       assert(view_usage == ISL_SURF_USAGE_STORAGE_BIT);
       isl_surf_fill_image_param(&device->isl_dev, image_param_out,
@@ -1248,6 +1350,28 @@
    return view_aspects;
 }
 
+static uint32_t
+anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask)
+{
+   uint32_t planes = 0;
+
+   if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT |
+                      VK_IMAGE_ASPECT_DEPTH_BIT |
+                      VK_IMAGE_ASPECT_STENCIL_BIT |
+                      VK_IMAGE_ASPECT_PLANE_0_BIT))
+      planes++;
+   if (aspect_mask & VK_IMAGE_ASPECT_PLANE_1_BIT)
+      planes++;
+   if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT)
+      planes++;
+
+   if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 &&
+       (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
+      planes++;
+
+   return planes;
+}
+
 VkResult
 anv_CreateImageView(VkDevice _device,
                     const VkImageViewCreateInfo *pCreateInfo,
@@ -1268,6 +1392,22 @@
    assert(range->layerCount > 0);
    assert(range->baseMipLevel < image->levels);
 
+   /* Check if a conversion info was passed. */
+   const struct anv_format *conv_format = NULL;
+   const struct VkSamplerYcbcrConversionInfo *conv_info =
+      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
+
+   /* If image has an external format, the pNext chain must contain an instance of
+    * VKSamplerYcbcrConversionInfo with a conversion object created with the same
+    * external format as image."
+    */
+   assert(!image->external_format || conv_info);
+
+   if (conv_info) {
+      ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion, conv_info->conversion);
+      conv_format = conversion->format;
+   }
+
    const VkImageViewUsageCreateInfo *usage_info =
       vk_find_struct_const(pCreateInfo, IMAGE_VIEW_USAGE_CREATE_INFO);
    VkImageUsageFlags view_usage = usage_info ? usage_info->usage : image->usage;
@@ -1296,7 +1436,7 @@
     * VK_IMAGE_ASPECT_COLOR_BIT will be converted to
     * VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT |
     * VK_IMAGE_ASPECT_PLANE_2_BIT for an image of format
-    * VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR.
+    * VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM.
     */
    VkImageAspectFlags expanded_aspects =
       anv_image_expand_aspects(image, range->aspectMask);
@@ -1312,6 +1452,15 @@
    iview->n_planes = anv_image_aspect_get_planes(iview->aspect_mask);
    iview->vk_format = pCreateInfo->format;
 
+   /* "If image has an external format, format must be VK_FORMAT_UNDEFINED." */
+   assert(!image->external_format || pCreateInfo->format == VK_FORMAT_UNDEFINED);
+
+   /* Format is undefined, this can happen when using external formats. Set
+    * view format from the passed conversion info.
+    */
+   if (iview->vk_format == VK_FORMAT_UNDEFINED && conv_format)
+      iview->vk_format = conv_format->vk_format;
+
    iview->extent = (VkExtent3D) {
       .width  = anv_minify(image->extent.width , range->baseMipLevel),
       .height = anv_minify(image->extent.height, range->baseMipLevel),
@@ -1324,11 +1473,11 @@
    uint32_t iaspect_bit, vplane = 0;
    anv_foreach_image_aspect_bit(iaspect_bit, image, expanded_aspects) {
       uint32_t iplane =
-         anv_image_aspect_to_plane(expanded_aspects, 1UL << iaspect_bit);
+         anv_image_aspect_to_plane(image->aspects, 1UL << iaspect_bit);
       VkImageAspectFlags vplane_aspect =
          anv_plane_to_aspect(iview->aspect_mask, vplane);
       struct anv_format_plane format =
-         anv_get_format_plane(&device->info, pCreateInfo->format,
+         anv_get_format_plane(&device->info, iview->vk_format,
                               vplane_aspect, image->tiling);
 
       iview->planes[vplane].image_plane = iplane;
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_intel.c mesa-19.0.1/src/intel/vulkan/anv_intel.c
--- mesa-18.3.3/src/intel/vulkan/anv_intel.c	2018-10-21 19:21:33.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_intel.c	2019-03-31 23:16:37.000000000 +0000
@@ -88,7 +88,7 @@
 
    if (mem->bo->size < aligned_image_size) {
       result = vk_errorf(device->instance, device,
-                         VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+                         VK_ERROR_INVALID_EXTERNAL_HANDLE,
                          "dma-buf too small for image in "
                          "vkCreateDmaBufImageINTEL: %"PRIu64"B < "PRIu64"B",
                          mem->bo->size, aligned_image_size);
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_apply_pipeline_layout.c mesa-19.0.1/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
--- mesa-18.3.3/src/intel/vulkan/anv_nir_apply_pipeline_layout.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_nir_apply_pipeline_layout.c	2019-03-31 23:16:37.000000000 +0000
@@ -144,21 +144,11 @@
    uint32_t array_size =
       state->layout->set[set].layout->binding[binding].array_size;
 
-   nir_const_value *const_array_index = nir_src_as_const_value(intrin->src[0]);
+   nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1);
+   if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks)
+      array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1));
 
-   nir_ssa_def *block_index;
-   if (const_array_index) {
-      unsigned array_index = const_array_index->u32[0];
-      array_index = MIN2(array_index, array_size - 1);
-      block_index = nir_imm_int(b, surface_index + array_index);
-   } else {
-      block_index = nir_ssa_for_src(b, intrin->src[0], 1);
-
-      if (state->add_bounds_checks)
-         block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 1));
-
-      block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
-   }
+   nir_ssa_def *block_index = nir_iadd_imm(b, array_index, surface_index);
 
    assert(intrin->dest.is_ssa);
    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
@@ -187,6 +177,23 @@
 }
 
 static void
+lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
+                             struct apply_pipeline_layout_state *state)
+{
+   nir_builder *b = &state->builder;
+
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   /* We follow the nir_address_format_vk_index_offset model */
+   assert(intrin->src[0].is_ssa);
+   nir_ssa_def *vec2 = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0));
+
+   assert(intrin->dest.is_ssa);
+   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec2));
+   nir_instr_remove(&intrin->instr);
+}
+
+static void
 lower_image_intrinsic(nir_intrinsic_instr *intrin,
                       struct apply_pipeline_layout_state *state)
 {
@@ -301,9 +308,9 @@
    if (deref->deref_type != nir_deref_type_var) {
       assert(deref->deref_type == nir_deref_type_array);
 
-      nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
-      if (const_index) {
-         *base_index += MIN2(const_index->u32[0], array_size - 1);
+      if (nir_src_is_const(deref->arr.index)) {
+         unsigned arr_index = nir_src_as_uint(deref->arr.index);
+         *base_index += MIN2(arr_index, array_size - 1);
       } else {
          nir_builder *b = &state->builder;
 
@@ -339,8 +346,7 @@
    if (plane_src_idx < 0)
       return 0;
 
-   unsigned plane =
-      nir_src_as_const_value(tex->src[plane_src_idx].src)->u32[0];
+   unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
 
    nir_tex_instr_remove_src(tex, plane_src_idx);
 
@@ -383,6 +389,9 @@
          case nir_intrinsic_vulkan_resource_reindex:
             lower_res_reindex_intrinsic(intrin, state);
             break;
+         case nir_intrinsic_load_vulkan_descriptor:
+            lower_load_vulkan_descriptor(intrin, state);
+            break;
          case nir_intrinsic_image_deref_load:
          case nir_intrinsic_image_deref_store:
          case nir_intrinsic_image_deref_atomic_add:
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_input_attachments.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_input_attachments.c
--- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_input_attachments.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_input_attachments.c	2019-03-31 23:16:37.000000000 +0000
@@ -61,8 +61,7 @@
    nir_ssa_def *offset = nir_ssa_for_src(&b, load->src[1], 2);
    nir_ssa_def *pos = nir_iadd(&b, frag_coord, offset);
 
-   nir_ssa_def *layer =
-      nir_load_system_value(&b, nir_intrinsic_load_layer_id, 0);
+   nir_ssa_def *layer = nir_load_layer_id(&b);
    nir_ssa_def *coord =
       nir_vec3(&b, nir_channel(&b, pos, 0), nir_channel(&b, pos, 1), layer);
 
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_multiview.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_multiview.c
--- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_multiview.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_multiview.c	2019-03-31 23:16:37.000000000 +0000
@@ -125,7 +125,7 @@
          const struct glsl_type *type = glsl_int_type();
          if (b->shader->info.stage == MESA_SHADER_TESS_CTRL ||
              b->shader->info.stage == MESA_SHADER_GEOMETRY)
-            type = glsl_array_type(type, 1);
+            type = glsl_array_type(type, 1, 0);
 
          nir_variable *idx_var =
             nir_variable_create(b->shader, nir_var_shader_in,
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
--- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c	2019-03-31 23:16:37.000000000 +0000
@@ -38,7 +38,7 @@
 y_range(nir_builder *b,
         nir_ssa_def *y_channel,
         int bpc,
-        VkSamplerYcbcrRangeKHR range)
+        VkSamplerYcbcrRange range)
 {
    switch (range) {
    case VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
@@ -60,7 +60,7 @@
 chroma_range(nir_builder *b,
              nir_ssa_def *chroma_channel,
              int bpc,
-             VkSamplerYcbcrRangeKHR range)
+             VkSamplerYcbcrRange range)
 {
    switch (range) {
    case VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
@@ -80,7 +80,7 @@
 }
 
 static const nir_const_value *
-ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversionKHR model)
+ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model)
 {
    switch (model) {
    case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: {
@@ -269,6 +269,7 @@
    tex->texture_index = old_tex->texture_index;
    tex->texture_array_size = old_tex->texture_array_size;
    tex->sampler_index = old_tex->sampler_index;
+   tex->is_array = old_tex->is_array;
 
    nir_ssa_dest_init(&tex->instr, &tex->dest,
                      old_tex->dest.ssa.num_components,
@@ -344,10 +345,10 @@
    unsigned array_index = 0;
    if (deref->deref_type != nir_deref_type_var) {
       assert(deref->deref_type == nir_deref_type_array);
-      nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
-      if (!const_index)
+      if (!nir_src_is_const(deref->arr.index))
          return false;
-      array_index = MIN2(const_index->u32[0], binding->array_size - 1);
+      array_index = nir_src_as_uint(deref->arr.index);
+      array_index = MIN2(array_index, binding->array_size - 1);
    }
    const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
 
@@ -373,11 +374,11 @@
    uint8_t y_bpc = y_isl_layout->channels_array[0].bits;
 
    /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
-   nir_ssa_def *ycbcr_comp[5] = { NULL, NULL, NULL,
-                                  /* Use extra 2 channels for following swizzle */
-                                  nir_imm_float(builder, 1.0f),
-                                  nir_imm_float(builder, 0.0f),
-   };
+   nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
+   nir_ssa_def *one = nir_imm_float(builder, 1.0f);
+   /* Use extra 2 channels for following swizzle */
+   nir_ssa_def *ycbcr_comp[5] = { zero, zero, zero, one, zero };
+
    uint8_t ycbcr_bpcs[5];
    memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs));
 
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pass.c mesa-19.0.1/src/intel/vulkan/anv_pass.c
--- mesa-18.3.3/src/intel/vulkan/anv_pass.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_pass.c	2019-03-31 23:16:37.000000000 +0000
@@ -74,6 +74,10 @@
           subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
          subpass->depth_stencil_attachment = NULL;
 
+      if (subpass->ds_resolve_attachment &&
+          subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
+         subpass->ds_resolve_attachment = NULL;
+
       for (uint32_t j = 0; j < subpass->attachment_count; j++) {
          struct anv_subpass_attachment *subpass_att = &subpass->attachments[j];
          if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
@@ -100,7 +104,7 @@
       }
 
       /* We have to handle resolve attachments specially */
-      subpass->has_resolve = false;
+      subpass->has_color_resolve = false;
       if (subpass->resolve_attachments) {
          for (uint32_t j = 0; j < subpass->color_count; j++) {
             struct anv_subpass_attachment *color_att =
@@ -110,12 +114,22 @@
             if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
                continue;
 
-            subpass->has_resolve = true;
+            subpass->has_color_resolve = true;
 
             assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT);
             color_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
          }
       }
+
+      if (subpass->ds_resolve_attachment) {
+         struct anv_subpass_attachment *ds_att =
+            subpass->depth_stencil_attachment;
+         UNUSED struct anv_subpass_attachment *resolve_att =
+            subpass->ds_resolve_attachment;
+
+         assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT);
+         ds_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+      }
    }
 
    /* From the Vulkan 1.0.39 spec:
@@ -164,12 +178,28 @@
     * subpasses and checking to see if any of them don't have an external
     * dependency.  Or, we could just be lazy and add a couple extra flushes.
     * We choose to be lazy.
+    *
+    * From the documentation for vkCmdNextSubpass:
+    *
+    *    "Moving to the next subpass automatically performs any multisample
+    *    resolve operations in the subpass being ended. End-of-subpass
+    *    multisample resolves are treated as color attachment writes for the
+    *    purposes of synchronization. This applies to resolve operations for
+    *    both color and depth/stencil attachments. That is, they are
+    *    considered to execute in the
+    *    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and
+    *    their writes are synchronized with
+    *    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT."
+    *
+    * Therefore, the above flags concerning color attachments also apply to
+    * color and depth/stencil resolve attachments.
     */
    if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
       pass->subpass_flushes[0] |=
          ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
    }
-   if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+   if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+                    VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
       pass->subpass_flushes[pass->subpass_count] |=
          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
    }
@@ -318,8 +348,8 @@
 
    vk_foreach_struct(ext, pCreateInfo->pNext) {
       switch (ext->sType) {
-      case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR: {
-         VkRenderPassMultiviewCreateInfoKHR *mv = (void *)ext;
+      case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: {
+         VkRenderPassMultiviewCreateInfo *mv = (void *)ext;
 
          for (uint32_t i = 0; i < mv->subpassCount; i++) {
             pass->subpasses[i].view_mask = mv->pViewMasks[i];
@@ -342,10 +372,15 @@
 static unsigned
 num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
 {
+   const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+      vk_find_struct_const(desc->pNext,
+                           SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
+
    return desc->inputAttachmentCount +
           desc->colorAttachmentCount +
           (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-          (desc->pDepthStencilAttachment != NULL);
+          (desc->pDepthStencilAttachment != NULL) +
+          (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
 }
 
 VkResult anv_CreateRenderPass2KHR(
@@ -460,6 +495,22 @@
             .layout =      desc->pDepthStencilAttachment->layout,
          };
       }
+
+      const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve =
+         vk_find_struct_const(desc->pNext,
+                              SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
+
+      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
+         subpass->ds_resolve_attachment = subpass_attachments++;
+
+         *subpass->ds_resolve_attachment = (struct anv_subpass_attachment) {
+            .usage =       VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+            .attachment =  ds_resolve->pDepthStencilResolveAttachment->attachment,
+            .layout =      ds_resolve->pDepthStencilResolveAttachment->layout,
+         };
+         subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
+         subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
+      }
    }
 
    for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++)
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pipeline.c mesa-19.0.1/src/intel/vulkan/anv_pipeline.c
--- mesa-18.3.3/src/intel/vulkan/anv_pipeline.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_pipeline.c	2019-03-31 23:16:37.000000000 +0000
@@ -32,6 +32,7 @@
 #include "anv_private.h"
 #include "compiler/brw_nir.h"
 #include "anv_nir.h"
+#include "nir/nir_xfb_info.h"
 #include "spirv/nir_spirv.h"
 #include "vk_util.h"
 
@@ -97,17 +98,16 @@
  * we can't do that yet because we don't have the ability to copy nir.
  */
 static nir_shader *
-anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
+anv_shader_compile_to_nir(struct anv_device *device,
                           void *mem_ctx,
                           const struct anv_shader_module *module,
                           const char *entrypoint_name,
                           gl_shader_stage stage,
                           const VkSpecializationInfo *spec_info)
 {
-   const struct anv_device *device = pipeline->device;
-
-   const struct brw_compiler *compiler =
-      device->instance->physicalDevice.compiler;
+   const struct anv_physical_device *pdevice =
+      &device->instance->physicalDevice;
+   const struct brw_compiler *compiler = pdevice->compiler;
    const nir_shader_compiler_options *nir_options =
       compiler->glsl_compiler_options[stage].NirOptions;
 
@@ -136,27 +136,34 @@
    struct spirv_to_nir_options spirv_options = {
       .lower_workgroup_access_to_offsets = true,
       .caps = {
-         .float64 = device->instance->physicalDevice.info.gen >= 8,
-         .int64 = device->instance->physicalDevice.info.gen >= 8,
-         .tessellation = true,
          .device_group = true,
          .draw_parameters = true,
+         .float64 = pdevice->info.gen >= 8,
+         .geometry_streams = true,
          .image_write_without_format = true,
+         .int16 = pdevice->info.gen >= 8,
+         .int64 = pdevice->info.gen >= 8,
+         .min_lod = true,
          .multiview = true,
-         .variable_pointers = true,
-         .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
-         .int16 = device->instance->physicalDevice.info.gen >= 8,
+         .post_depth_coverage = pdevice->info.gen >= 9,
          .shader_viewport_index_layer = true,
+         .stencil_export = pdevice->info.gen >= 9,
+         .storage_8bit = pdevice->info.gen >= 8,
+         .storage_16bit = pdevice->info.gen >= 8,
          .subgroup_arithmetic = true,
          .subgroup_basic = true,
          .subgroup_ballot = true,
          .subgroup_quad = true,
          .subgroup_shuffle = true,
          .subgroup_vote = true,
-         .stencil_export = device->instance->physicalDevice.info.gen >= 9,
-         .storage_8bit = device->instance->physicalDevice.info.gen >= 8,
-         .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9,
+         .tessellation = true,
+         .transform_feedback = pdevice->info.gen >= 8,
+         .variable_pointers = true,
       },
+      .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
+      .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
+      .push_const_ptr_type = glsl_uint_type(),
+      .shared_ptr_type = glsl_uint_type(),
    };
 
    nir_function *entry_point =
@@ -180,10 +187,10 @@
     * inline functions.  That way they get properly initialized at the top
     * of the function and not at the top of its caller.
     */
-   NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+   NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
    NIR_PASS_V(nir, nir_lower_returns);
    NIR_PASS_V(nir, nir_inline_functions);
-   NIR_PASS_V(nir, nir_copy_prop);
+   NIR_PASS_V(nir, nir_opt_deref);
 
    /* Pick off the single entrypoint that we want */
    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -208,8 +215,8 @@
    NIR_PASS_V(nir, nir_remove_dead_variables,
               nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
 
-   if (stage == MESA_SHADER_FRAGMENT)
-      NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
+   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo,
+              nir_address_format_vk_index_offset);
 
    NIR_PASS_V(nir, nir_propagate_invariant);
    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
@@ -220,9 +227,6 @@
 
    nir = brw_preprocess_nir(compiler, nir);
 
-   if (stage == MESA_SHADER_FRAGMENT)
-      NIR_PASS_V(nir, anv_nir_lower_input_attachments);
-
    return nir;
 }
 
@@ -398,6 +402,8 @@
    const char *entrypoint;
    const VkSpecializationInfo *spec_info;
 
+   unsigned char shader_sha1[20];
+
    union brw_any_prog_key key;
 
    struct {
@@ -415,20 +421,27 @@
 };
 
 static void
-anv_pipeline_hash_shader(struct mesa_sha1 *ctx,
-                         struct anv_pipeline_stage *stage)
+anv_pipeline_hash_shader(const struct anv_shader_module *module,
+                         const char *entrypoint,
+                         gl_shader_stage stage,
+                         const VkSpecializationInfo *spec_info,
+                         unsigned char *sha1_out)
 {
-   _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1));
-   _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint));
-   _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage));
-   if (stage->spec_info) {
-      _mesa_sha1_update(ctx, stage->spec_info->pMapEntries,
-                        stage->spec_info->mapEntryCount *
-                        sizeof(*stage->spec_info->pMapEntries));
-      _mesa_sha1_update(ctx, stage->spec_info->pData,
-                        stage->spec_info->dataSize);
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+
+   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
+   if (spec_info) {
+      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+                        spec_info->mapEntryCount *
+                        sizeof(*spec_info->pMapEntries));
+      _mesa_sha1_update(&ctx, spec_info->pData,
+                        spec_info->dataSize);
    }
-   _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage));
+
+   _mesa_sha1_final(&ctx, sha1_out);
 }
 
 static void
@@ -450,8 +463,11 @@
    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 
    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
-      if (stages[s].entrypoint)
-         anv_pipeline_hash_shader(&ctx, &stages[s]);
+      if (stages[s].entrypoint) {
+         _mesa_sha1_update(&ctx, stages[s].shader_sha1,
+                           sizeof(stages[s].shader_sha1));
+         _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
+      }
    }
 
    _mesa_sha1_final(&ctx, sha1_out);
@@ -472,11 +488,48 @@
    const bool rba = pipeline->device->robust_buffer_access;
    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
 
-   anv_pipeline_hash_shader(&ctx, stage);
+   _mesa_sha1_update(&ctx, stage->shader_sha1,
+                     sizeof(stage->shader_sha1));
+   _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
 
    _mesa_sha1_final(&ctx, sha1_out);
 }
 
+static nir_shader *
+anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
+                           struct anv_pipeline_cache *cache,
+                           void *mem_ctx,
+                           struct anv_pipeline_stage *stage)
+{
+   const struct brw_compiler *compiler =
+      pipeline->device->instance->physicalDevice.compiler;
+   const nir_shader_compiler_options *nir_options =
+      compiler->glsl_compiler_options[stage->stage].NirOptions;
+   nir_shader *nir;
+
+   nir = anv_device_search_for_nir(pipeline->device, cache,
+                                   nir_options,
+                                   stage->shader_sha1,
+                                   mem_ctx);
+   if (nir) {
+      assert(nir->info.stage == stage->stage);
+      return nir;
+   }
+
+   nir = anv_shader_compile_to_nir(pipeline->device,
+                                   mem_ctx,
+                                   stage->module,
+                                   stage->entrypoint,
+                                   stage->stage,
+                                   stage->spec_info);
+   if (nir) {
+      anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
+      return nir;
+   }
+
+   return NULL;
+}
+
 static void
 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
                        void *mem_ctx,
@@ -489,6 +542,11 @@
    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
    nir_shader *nir = stage->nir;
 
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
+      NIR_PASS_V(nir, anv_nir_lower_input_attachments);
+   }
+
    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
 
    NIR_PASS_V(nir, anv_nir_lower_push_constants);
@@ -536,6 +594,7 @@
                                     pipeline->device->robust_buffer_access,
                                     layout, nir, prog_data,
                                     &stage->bind_map);
+      NIR_PASS_V(nir, nir_opt_constant_folding);
    }
 
    if (nir->info.stage != MESA_SHADER_COMPUTE)
@@ -780,7 +839,7 @@
           !(stage->key.wm.color_outputs_valid & (1 << rt))) {
          /* Unused or out-of-bounds, throw it away */
          deleted_output = true;
-         var->data.mode = nir_var_local;
+         var->data.mode = nir_var_function_temp;
          exec_node_remove(&var->node);
          exec_list_push_tail(&impl->locals, &var->node);
          continue;
@@ -875,6 +934,11 @@
       stages[stage].module = anv_shader_module_from_handle(sinfo->module);
       stages[stage].entrypoint = sinfo->pName;
       stages[stage].spec_info = sinfo->pSpecializationInfo;
+      anv_pipeline_hash_shader(stages[stage].module,
+                               stages[stage].entrypoint,
+                               stage,
+                               stages[stage].spec_info,
+                               stages[stage].shader_sha1);
 
       const struct gen_device_info *devinfo = &pipeline->device->info;
       switch (stage) {
@@ -976,11 +1040,9 @@
          .sampler_to_descriptor = stages[s].sampler_to_descriptor
       };
 
-      stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx,
-                                                stages[s].module,
-                                                stages[s].entrypoint,
-                                                stages[s].stage,
-                                                stages[s].spec_info);
+      stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
+                                                 pipeline_ctx,
+                                                 &stages[s]);
       if (stages[s].nir == NULL) {
          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
          goto fail;
@@ -1023,6 +1085,12 @@
 
       void *stage_ctx = ralloc_context(NULL);
 
+      nir_xfb_info *xfb_info = NULL;
+      if (s == MESA_SHADER_VERTEX ||
+          s == MESA_SHADER_TESS_EVAL ||
+          s == MESA_SHADER_GEOMETRY)
+         xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
+
       anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
 
       const unsigned *code;
@@ -1064,7 +1132,7 @@
                                   stages[s].nir->constant_data_size,
                                   &stages[s].prog_data.base,
                                   brw_prog_data_size(s),
-                                  &stages[s].bind_map);
+                                  xfb_info, &stages[s].bind_map);
       if (!bin) {
          ralloc_free(stage_ctx);
          result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -1125,6 +1193,11 @@
          .stage = MESA_SHADER_COMPUTE,
       }
    };
+   anv_pipeline_hash_shader(stage.module,
+                            stage.entrypoint,
+                            MESA_SHADER_COMPUTE,
+                            stage.spec_info,
+                            stage.shader_sha1);
 
    struct anv_shader_bin *bin = NULL;
 
@@ -1144,11 +1217,7 @@
 
       void *mem_ctx = ralloc_context(NULL);
 
-      stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
-                                            stage.module,
-                                            stage.entrypoint,
-                                            stage.stage,
-                                            stage.spec_info);
+      stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
       if (stage.nir == NULL) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
@@ -1177,7 +1246,7 @@
                                      stage.nir->constant_data_size,
                                      &stage.prog_data.base,
                                      sizeof(stage.prog_data.cs),
-                                     &stage.bind_map);
+                                     NULL, &stage.bind_map);
       if (!bin) {
          ralloc_free(mem_ctx);
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pipeline_cache.c mesa-19.0.1/src/intel/vulkan/anv_pipeline_cache.c
--- mesa-18.3.3/src/intel/vulkan/anv_pipeline_cache.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_pipeline_cache.c	2019-03-31 23:16:37.000000000 +0000
@@ -26,7 +26,9 @@
 #include "util/debug.h"
 #include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
+#include "nir/nir_serialize.h"
 #include "anv_private.h"
+#include "nir/nir_xfb_info.h"
 
 struct anv_shader_bin *
 anv_shader_bin_create(struct anv_device *device,
@@ -35,12 +37,14 @@
                       const void *constant_data, uint32_t constant_data_size,
                       const struct brw_stage_prog_data *prog_data_in,
                       uint32_t prog_data_size, const void *prog_data_param_in,
+                      const nir_xfb_info *xfb_info_in,
                       const struct anv_pipeline_bind_map *bind_map)
 {
    struct anv_shader_bin *shader;
    struct anv_shader_bin_key *key;
    struct brw_stage_prog_data *prog_data;
    uint32_t *prog_data_param;
+   nir_xfb_info *xfb_info;
    struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor;
 
    ANV_MULTIALLOC(ma);
@@ -48,6 +52,10 @@
    anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size);
    anv_multialloc_add_size(&ma, &prog_data, prog_data_size);
    anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params);
+   if (xfb_info_in) {
+      uint32_t xfb_info_size = nir_xfb_info_size(xfb_info_in->output_count);
+      anv_multialloc_add_size(&ma, &xfb_info, xfb_info_size);
+   }
    anv_multialloc_add(&ma, &surface_to_descriptor,
                            bind_map->surface_count);
    anv_multialloc_add(&ma, &sampler_to_descriptor,
@@ -85,6 +93,15 @@
    shader->prog_data = prog_data;
    shader->prog_data_size = prog_data_size;
 
+   if (xfb_info_in) {
+      *xfb_info = *xfb_info_in;
+      typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
+                   xfb_info_in->output_count);
+      shader->xfb_info = xfb_info;
+   } else {
+      shader->xfb_info = NULL;
+   }
+
    shader->bind_map = *bind_map;
    typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
                 bind_map->surface_count);
@@ -128,6 +145,15 @@
                                shader->prog_data->nr_params *
                                sizeof(*shader->prog_data->param));
 
+   if (shader->xfb_info) {
+      uint32_t xfb_info_size =
+         nir_xfb_info_size(shader->xfb_info->output_count);
+      ok = blob_write_uint32(blob, xfb_info_size);
+      ok = blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
+   } else {
+      ok = blob_write_uint32(blob, 0);
+   }
+
    ok = blob_write_uint32(blob, shader->bind_map.surface_count);
    ok = blob_write_uint32(blob, shader->bind_map.sampler_count);
    ok = blob_write_uint32(blob, shader->bind_map.image_count);
@@ -162,6 +188,11 @@
    const void *prog_data_param =
       blob_read_bytes(blob, prog_data->nr_params * sizeof(*prog_data->param));
 
+   const nir_xfb_info *xfb_info = NULL;
+   uint32_t xfb_size = blob_read_uint32(blob);
+   if (xfb_size)
+      xfb_info = blob_read_bytes(blob, xfb_size);
+
    struct anv_pipeline_bind_map bind_map;
    bind_map.surface_count = blob_read_uint32(blob);
    bind_map.sampler_count = blob_read_uint32(blob);
@@ -181,7 +212,7 @@
                                 kernel_data, kernel_size,
                                 constant_data, constant_data_size,
                                 prog_data, prog_data_size, prog_data_param,
-                                &bind_map);
+                                xfb_info, &bind_map);
 }
 
 /* Remaining work:
@@ -211,6 +242,18 @@
    return memcmp(a->data, b->data, a->size) == 0;
 }
 
+static uint32_t
+sha1_hash_func(const void *sha1)
+{
+   return _mesa_hash_data(sha1, 20);
+}
+
+static bool
+sha1_compare_func(const void *sha1_a, const void *sha1_b)
+{
+   return memcmp(sha1_a, sha1_b, 20) == 0;
+}
+
 void
 anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
                         struct anv_device *device,
@@ -222,8 +265,11 @@
    if (cache_enabled) {
       cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
                                              shader_bin_key_compare_func);
+      cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+                                                 sha1_compare_func);
    } else {
       cache->cache = NULL;
+      cache->nir_cache = NULL;
    }
 }
 
@@ -242,6 +288,13 @@
 
       _mesa_hash_table_destroy(cache->cache, NULL);
    }
+
+   if (cache->nir_cache) {
+      hash_table_foreach(cache->nir_cache, entry)
+         ralloc_free(entry->data);
+
+      _mesa_hash_table_destroy(cache->nir_cache, NULL);
+   }
 }
 
 static struct anv_shader_bin *
@@ -310,6 +363,7 @@
                                      const struct brw_stage_prog_data *prog_data,
                                      uint32_t prog_data_size,
                                      const void *prog_data_param,
+                                     const nir_xfb_info *xfb_info,
                                      const struct anv_pipeline_bind_map *bind_map)
 {
    struct anv_shader_bin *shader =
@@ -322,7 +376,7 @@
                             kernel_data, kernel_size,
                             constant_data, constant_data_size,
                             prog_data, prog_data_size, prog_data_param,
-                            bind_map);
+                            xfb_info, bind_map);
    if (!bin)
       return NULL;
 
@@ -339,6 +393,7 @@
                                  uint32_t constant_data_size,
                                  const struct brw_stage_prog_data *prog_data,
                                  uint32_t prog_data_size,
+                                 const nir_xfb_info *xfb_info,
                                  const struct anv_pipeline_bind_map *bind_map)
 {
    if (cache->cache) {
@@ -349,7 +404,8 @@
                                               kernel_data, kernel_size,
                                               constant_data, constant_data_size,
                                               prog_data, prog_data_size,
-                                              prog_data->param, bind_map);
+                                              prog_data->param,
+                                              xfb_info, bind_map);
 
       pthread_mutex_unlock(&cache->mutex);
 
@@ -364,7 +420,8 @@
                                    kernel_data, kernel_size,
                                    constant_data, constant_data_size,
                                    prog_data, prog_data_size,
-                                   prog_data->param, bind_map);
+                                   prog_data->param,
+                                   xfb_info, bind_map);
    }
 }
 
@@ -601,6 +658,7 @@
                          uint32_t constant_data_size,
                          const struct brw_stage_prog_data *prog_data,
                          uint32_t prog_data_size,
+                         const nir_xfb_info *xfb_info,
                          const struct anv_pipeline_bind_map *bind_map)
 {
    struct anv_shader_bin *bin;
@@ -609,13 +667,14 @@
                                              kernel_data, kernel_size,
                                              constant_data, constant_data_size,
                                              prog_data, prog_data_size,
-                                             bind_map);
+                                             xfb_info, bind_map);
    } else {
       bin = anv_shader_bin_create(device, key_data, key_size,
                                   kernel_data, kernel_size,
                                   constant_data, constant_data_size,
                                   prog_data, prog_data_size,
-                                  prog_data->param, bind_map);
+                                  prog_data->param,
+                                  xfb_info, bind_map);
    }
 
    if (bin == NULL)
@@ -641,3 +700,88 @@
 
    return bin;
 }
+
+struct serialized_nir {
+   unsigned char sha1_key[20];
+   size_t size;
+   char data[0];
+};
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+                          struct anv_pipeline_cache *cache,
+                          const nir_shader_compiler_options *nir_options,
+                          unsigned char sha1_key[20],
+                          void *mem_ctx)
+{
+   if (cache && cache->nir_cache) {
+      const struct serialized_nir *snir = NULL;
+
+      pthread_mutex_lock(&cache->mutex);
+      struct hash_entry *entry =
+         _mesa_hash_table_search(cache->nir_cache, sha1_key);
+      if (entry)
+         snir = entry->data;
+      pthread_mutex_unlock(&cache->mutex);
+
+      if (snir) {
+         struct blob_reader blob;
+         blob_reader_init(&blob, snir->data, snir->size);
+
+         nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob);
+         if (blob.overrun) {
+            ralloc_free(nir);
+         } else {
+            return nir;
+         }
+      }
+   }
+
+   return NULL;
+}
+
+void
+anv_device_upload_nir(struct anv_device *device,
+                      struct anv_pipeline_cache *cache,
+                      const struct nir_shader *nir,
+                      unsigned char sha1_key[20])
+{
+   if (cache && cache->nir_cache) {
+      pthread_mutex_lock(&cache->mutex);
+      struct hash_entry *entry =
+         _mesa_hash_table_search(cache->nir_cache, sha1_key);
+      pthread_mutex_unlock(&cache->mutex);
+      if (entry)
+         return;
+
+      struct blob blob;
+      blob_init(&blob);
+
+      nir_serialize(&blob, nir);
+      if (blob.out_of_memory) {
+         blob_finish(&blob);
+         return;
+      }
+
+      pthread_mutex_lock(&cache->mutex);
+      /* Because ralloc isn't thread-safe, we have to do all this inside the
+       * lock.  We could unlock for the big memcpy but it's probably not worth
+       * the hassle.
+       */
+      entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
+      if (entry) {
+         pthread_mutex_unlock(&cache->mutex);
+         return;
+      }
+
+      struct serialized_nir *snir =
+         ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
+      memcpy(snir->sha1_key, sha1_key, 20);
+      snir->size = blob.size;
+      memcpy(snir->data, blob.data, blob.size);
+
+      _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
+
+      pthread_mutex_unlock(&cache->mutex);
+   }
+}
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_private.h mesa-19.0.1/src/intel/vulkan/anv_private.h
--- mesa-18.3.3/src/intel/vulkan/anv_private.h	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -76,8 +76,8 @@
 #include <vulkan/vulkan.h>
 #include <vulkan/vulkan_intel.h>
 #include <vulkan/vk_icd.h>
-#include <vulkan/vk_android_native_buffer.h>
 
+#include "anv_android.h"
 #include "anv_entrypoints.h"
 #include "anv_extensions.h"
 #include "isl/isl.h"
@@ -151,6 +151,8 @@
 #define ANV_HZ_FC_VAL 1.0f
 
 #define MAX_VBS         28
+#define MAX_XFB_BUFFERS  4
+#define MAX_XFB_STREAMS  4
 #define MAX_SETS         8
 #define MAX_RTS          8
 #define MAX_VIEWPORTS   16
@@ -183,6 +185,11 @@
 #define ANV_SVGS_VB_INDEX    MAX_VBS
 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
 
+/* We reserve this MI ALU register for the purpose of handling predication.
+ * Other code which uses the MI ALU should leave it alone.
+ */
+#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15
+
 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
 
 static inline uint32_t
@@ -608,7 +615,7 @@
  */
 union anv_free_list {
    struct {
-      int32_t offset;
+      uint32_t offset;
 
       /* A simple count that is incremented every time the head changes. */
       uint32_t count;
@@ -616,7 +623,7 @@
    uint64_t u64;
 };
 
-#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
+#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
 
 struct anv_block_state {
    union {
@@ -628,12 +635,21 @@
    };
 };
 
+#define anv_block_pool_foreach_bo(bo, pool)  \
+   for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++)
+
+#define ANV_MAX_BLOCK_POOL_BOS 20
+
 struct anv_block_pool {
    struct anv_device *device;
 
    uint64_t bo_flags;
 
-   struct anv_bo bo;
+   struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS];
+   struct anv_bo *bo;
+   uint32_t nbos;
+
+   uint64_t size;
 
    /* The address where the start of the pool is pinned. The various bos that
     * are created as the pool grows will have addresses in the range
@@ -655,6 +671,9 @@
     * will be valid relative to this pointer.
     *
     * In particular, map == bo.map + center_offset
+    *
+    * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
+    * since it will handle the softpin case as well, where this points to NULL.
     */
    void *map;
    int fd;
@@ -688,6 +707,7 @@
    int32_t offset;
    uint32_t alloc_size;
    void *map;
+   uint32_t idx;
 };
 
 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
@@ -702,9 +722,25 @@
 
 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
 
+struct anv_free_entry {
+   uint32_t next;
+   struct anv_state state;
+};
+
+struct anv_state_table {
+   struct anv_device *device;
+   int fd;
+   struct anv_free_entry *map;
+   uint32_t size;
+   struct anv_block_state state;
+   struct u_vector mmap_cleanups;
+};
+
 struct anv_state_pool {
    struct anv_block_pool block_pool;
 
+   struct anv_state_table table;
+
    /* The size of blocks which will be allocated from the block pool */
    uint32_t block_size;
 
@@ -742,9 +778,10 @@
                              uint64_t bo_flags);
 void anv_block_pool_finish(struct anv_block_pool *pool);
 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
-                             uint32_t block_size);
+                             uint32_t block_size, uint32_t *padding);
 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
                                   uint32_t block_size);
+void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
 
 VkResult anv_state_pool_init(struct anv_state_pool *pool,
                              struct anv_device *device,
@@ -763,6 +800,24 @@
 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
                                         uint32_t size, uint32_t alignment);
 
+VkResult anv_state_table_init(struct anv_state_table *table,
+                             struct anv_device *device,
+                             uint32_t initial_entries);
+void anv_state_table_finish(struct anv_state_table *table);
+VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+                             uint32_t count);
+void anv_free_list_push(union anv_free_list *list,
+                        struct anv_state_table *table,
+                        uint32_t idx, uint32_t count);
+struct anv_state* anv_free_list_pop(union anv_free_list *list,
+                                    struct anv_state_table *table);
+
+
+static inline struct anv_state *
+anv_state_table_get(struct anv_state_table *table, uint32_t idx)
+{
+   return &table->map[idx].state;
+}
 /**
  * Implements a pool of re-usable BOs.  The interface is identical to that
  * of block_pool except that each block is its own BO.
@@ -948,9 +1003,12 @@
    struct anv_device *                          device;
    pthread_mutex_t                              mutex;
 
+   struct hash_table *                          nir_cache;
+
    struct hash_table *                          cache;
 };
 
+struct nir_xfb_info;
 struct anv_pipeline_bind_map;
 
 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
@@ -969,6 +1027,7 @@
                                  uint32_t constant_data_size,
                                  const struct brw_stage_prog_data *prog_data,
                                  uint32_t prog_data_size,
+                                 const struct nir_xfb_info *xfb_info,
                                  const struct anv_pipeline_bind_map *bind_map);
 
 struct anv_shader_bin *
@@ -985,8 +1044,25 @@
                          uint32_t constant_data_size,
                          const struct brw_stage_prog_data *prog_data,
                          uint32_t prog_data_size,
+                         const struct nir_xfb_info *xfb_info,
                          const struct anv_pipeline_bind_map *bind_map);
 
+struct nir_shader;
+struct nir_shader_compiler_options;
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+                          struct anv_pipeline_cache *cache,
+                          const struct nir_shader_compiler_options *nir_options,
+                          unsigned char sha1_key[20],
+                          void *mem_ctx);
+
+void
+anv_device_upload_nir(struct anv_device *device,
+                      struct anv_pipeline_cache *cache,
+                      const struct nir_shader *nir,
+                      unsigned char sha1_key[20]);
+
 struct anv_device {
     VK_LOADER_DATA                              _loader_data;
 
@@ -1072,15 +1148,6 @@
       return device->default_mocs;
 }
 
-static void inline
-anv_state_flush(struct anv_device *device, struct anv_state state)
-{
-   if (device->info.has_llc)
-      return;
-
-   gen_flush_range(state.map, state.alloc_size);
-}
-
 void anv_device_init_blorp(struct anv_device *device);
 void anv_device_finish_blorp(struct anv_device *device);
 
@@ -1350,70 +1417,61 @@
            _dst = NULL;                                                 \
          }))
 
-#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) {  \
-   .GraphicsDataTypeGFDT                        = 0,           \
-   .LLCCacheabilityControlLLCCC                 = 0,           \
-   .L3CacheabilityControlL3CC                   = 1,           \
-}
+/* MEMORY_OBJECT_CONTROL_STATE:
+ * .GraphicsDataTypeGFDT                        = 0,
+ * .LLCCacheabilityControlLLCCC                 = 0,
+ * .L3CacheabilityControlL3CC                   = 1,
+ */
+#define GEN7_MOCS 1
 
-#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) {  \
-   .LLCeLLCCacheabilityControlLLCCC             = 0,           \
-   .L3CacheabilityControlL3CC                   = 1,           \
-}
+/* MEMORY_OBJECT_CONTROL_STATE:
+ * .LLCeLLCCacheabilityControlLLCCC             = 0,
+ * .L3CacheabilityControlL3CC                   = 1,
+ */
+#define GEN75_MOCS 1
 
-#define GEN8_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {  \
-      .MemoryTypeLLCeLLCCacheabilityControl = WB,              \
-      .TargetCache = L3DefertoPATforLLCeLLCselection,          \
-      .AgeforQUADLRU = 0                                       \
-   }
+/* MEMORY_OBJECT_CONTROL_STATE:
+ * .MemoryTypeLLCeLLCCacheabilityControl = WB,
+ * .TargetCache = L3DefertoPATforLLCeLLCselection,
+ * .AgeforQUADLRU = 0
+ */
+#define GEN8_MOCS 0x78
 
-#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {     \
-      .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,  \
-      .TargetCache = L3DefertoPATforLLCeLLCselection,                      \
-      .AgeforQUADLRU = 0                                                   \
-   }
+/* MEMORY_OBJECT_CONTROL_STATE:
+ * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
+ * .TargetCache = L3DefertoPATforLLCeLLCselection,
+ * .AgeforQUADLRU = 0
+ */
+#define GEN8_EXTERNAL_MOCS 0x18
 
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
 
-#define GEN9_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) {  \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */              \
-      .IndextoMOCSTables                           = 2         \
-   }
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define GEN9_MOCS (2 << 1)
 
-#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) {  \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                       \
-      .IndextoMOCSTables                           = 1                  \
-   }
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define GEN9_EXTERNAL_MOCS (1 << 1)
 
 /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN10_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) {  \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */              \
-      .IndextoMOCSTables                           = 2         \
-   }
-
-#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) {   \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
-      .IndextoMOCSTables                           = 1                     \
-   }
+#define GEN10_MOCS GEN9_MOCS
+#define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
 
 /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
-#define GEN11_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) {  \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */              \
-      .IndextoMOCSTables                           = 2         \
-   }
-
-#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) {   \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
-      .IndextoMOCSTables                           = 1                     \
-   }
+#define GEN11_MOCS GEN9_MOCS
+#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
 
 struct anv_device_memory {
    struct anv_bo *                              bo;
    struct anv_memory_type *                     type;
    VkDeviceSize                                 map_size;
    void *                                       map;
+
+   /* If set, we are holding reference to AHardwareBuffer
+    * which we must release when memory is freed.
+    */
+   struct AHardwareBuffer *                     ahw;
 };
 
 /**
@@ -1523,6 +1581,10 @@
    uint32_t size;
    uint32_t buffer_count;
    struct anv_buffer_view *buffer_views;
+
+   /* Link to descriptor pool's desc_sets list . */
+   struct list_head pool_link;
+
    struct anv_descriptor descriptors[0];
 };
 
@@ -1556,6 +1618,8 @@
    struct anv_state_stream surface_state_stream;
    void *surface_state_free_list;
 
+   struct list_head desc_sets;
+
    char data[0];
 };
 
@@ -1590,7 +1654,7 @@
 
    /* The descriptor set this template corresponds to. This value is only
     * valid if the template was created with the templateType
-    * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR.
+    * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
     */
    uint8_t set;
 
@@ -1726,6 +1790,7 @@
    ANV_CMD_DIRTY_PIPELINE                          = 1 << 9,
    ANV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 10,
    ANV_CMD_DIRTY_RENDER_TARGETS                    = 1 << 11,
+   ANV_CMD_DIRTY_XFB_ENABLE                        = 1 << 12,
 };
 typedef uint32_t anv_cmd_dirty_mask_t;
 
@@ -1750,11 +1815,12 @@
    ANV_PIPE_NEEDS_CS_STALL_BIT               = (1 << 21),
 
    /* This bit does not exist directly in PIPE_CONTROL. It means that render
-    * target operations are ongoing. Some operations like copies on the
-    * command streamer might need to be aware of this to trigger the
-    * appropriate stall before they can proceed with the copy.
+    * target operations related to transfer commands with VkBuffer as
+    * destination are ongoing. Some operations like copies on the command
+    * streamer might need to be aware of this to trigger the appropriate stall
+    * before they can proceed with the copy.
     */
-   ANV_PIPE_RENDER_TARGET_WRITES              = (1 << 22),
+   ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 22),
 };
 
 #define ANV_PIPE_FLUSH_BITS ( \
@@ -1784,19 +1850,47 @@
    for_each_bit(b, flags) {
       switch ((VkAccessFlagBits)(1 << b)) {
       case VK_ACCESS_SHADER_WRITE_BIT:
+         /* We're transitioning a buffer that was previously used as write
+          * destination through the data port. To make its content available
+          * to future operations, flush the data cache.
+          */
          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
          break;
       case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+         /* We're transitioning a buffer that was previously used as render
+          * target. To make its content available to future operations, flush
+          * the render target cache.
+          */
          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
          break;
       case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+         /* We're transitioning a buffer that was previously used as depth
+          * buffer. To make its content available to future operations, flush
+          * the depth cache.
+          */
          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
          break;
       case VK_ACCESS_TRANSFER_WRITE_BIT:
+         /* We're transitioning a buffer that was previously used as a
+          * transfer write destination. Generic write operations include color
+          * & depth operations as well as buffer operations like :
+          *     - vkCmdClearColorImage()
+          *     - vkCmdClearDepthStencilImage()
+          *     - vkCmdBlitImage()
+          *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
+          *
+          * Most of these operations are implemented using Blorp which writes
+          * through the render target, so flush that cache to make it visible
+          * to future operations. And for depth related operations we also
+          * need to flush the depth cache.
+          */
          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
          break;
       case VK_ACCESS_MEMORY_WRITE_BIT:
+         /* We're transitioning a buffer for generic write operations. Flush
+          * all the caches.
+          */
          pipe_bits |= ANV_PIPE_FLUSH_BITS;
          break;
       default:
@@ -1816,25 +1910,67 @@
    for_each_bit(b, flags) {
       switch ((VkAccessFlagBits)(1 << b)) {
       case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+         /* Indirect draw commands take a buffer as input that we're going to
+          * read from the command streamer to load some of the HW registers
+          * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
+          * command streamer stall so that all the cache flushes have
+          * completed before the command streamer loads from memory.
+          */
+         pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
+         /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
+          * through a vertex buffer, so invalidate that cache.
+          */
+         pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+         /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
+          * UBO from the buffer, so we need to invalidate constant cache.
+          */
+         pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+         break;
       case VK_ACCESS_INDEX_READ_BIT:
       case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+         /* We transitioning a buffer to be used for as input for vkCmdDraw*
+          * commands, so we invalidate the VF cache to make sure there is no
+          * stale data when we start rendering.
+          */
          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
          break;
       case VK_ACCESS_UNIFORM_READ_BIT:
+         /* We transitioning a buffer to be used as uniform data. Because
+          * uniform is accessed through the data port & sampler, we need to
+          * invalidate the texture cache (sampler) & constant cache (data
+          * port) to avoid stale data.
+          */
          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
          break;
       case VK_ACCESS_SHADER_READ_BIT:
       case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
       case VK_ACCESS_TRANSFER_READ_BIT:
+         /* Transitioning a buffer to be read through the sampler, so
+          * invalidate the texture cache, we don't want any stale data.
+          */
          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
          break;
       case VK_ACCESS_MEMORY_READ_BIT:
+         /* Transitioning a buffer for generic read, invalidate all the
+          * caches.
+          */
          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
          break;
       case VK_ACCESS_MEMORY_WRITE_BIT:
+         /* Generic write, make sure all previously written things land in
+          * memory.
+          */
          pipe_bits |= ANV_PIPE_FLUSH_BITS;
          break;
+      case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
+         /* Transitioning a buffer for conditional rendering. We'll load the
+          * content of this buffer into HW registers using the command
+          * streamer, so we need to stall the command streamer to make sure
+          * any in-flight flush operations have completed.
+          */
+         pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+         break;
       default:
          break; /* Nothing to do */
       }
@@ -1858,6 +1994,12 @@
    VkDeviceSize                                 offset;
 };
 
+struct anv_xfb_binding {
+   struct anv_buffer *                          buffer;
+   VkDeviceSize                                 offset;
+   VkDeviceSize                                 size;
+};
+
 #define ANV_PARAM_PUSH(offset)         ((1 << 16) | (uint32_t)(offset))
 #define ANV_PARAM_PUSH_OFFSET(param)   ((param) & 0xffff)
 
@@ -2050,6 +2192,8 @@
    VkRect2D                                     render_area;
    uint32_t                                     restart_index;
    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
+   bool                                         xfb_enabled;
+   struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
    VkShaderStageFlags                           push_constant_stages;
    struct anv_push_constants *                  push_constants[MESA_SHADER_STAGES];
    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
@@ -2069,6 +2213,8 @@
     */
    bool                                         hiz_enabled;
 
+   bool                                         conditional_render_enabled;
+
    /**
     * Array length is anv_cmd_state::pass::attachment_count. Array content is
     * valid only when recording a render pass instance.
@@ -2215,8 +2361,6 @@
 struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
 
-void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer);
-
 const struct anv_image_view *
 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
 
@@ -2228,6 +2372,8 @@
 
 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
 
+void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
+
 enum anv_fence_type {
    ANV_FENCE_TYPE_NONE = 0,
    ANV_FENCE_TYPE_BO,
@@ -2404,6 +2550,8 @@
    const struct brw_stage_prog_data *prog_data;
    uint32_t prog_data_size;
 
+   struct nir_xfb_info *xfb_info;
+
    struct anv_pipeline_bind_map bind_map;
 };
 
@@ -2414,6 +2562,7 @@
                       const void *constant_data, uint32_t constant_data_size,
                       const struct brw_stage_prog_data *prog_data,
                       uint32_t prog_data_size, const void *prog_data_param,
+                      const struct nir_xfb_info *xfb_info,
                       const struct anv_pipeline_bind_map *bind_map);
 
 void
@@ -2463,6 +2612,8 @@
       uint32_t                                  instance_divisor;
    } vb[MAX_VBS];
 
+   uint8_t                                      xfb_used;
+
    bool                                         primitive_restart;
    uint32_t                                     topology;
 
@@ -2557,11 +2708,15 @@
 
    /* How to map sampled ycbcr planes to a single 4 component element. */
    struct isl_swizzle ycbcr_swizzle;
+
+   /* What aspect is associated to this plane */
+   VkImageAspectFlags aspect;
 };
 
 
 struct anv_format {
    struct anv_format_plane planes[3];
+   VkFormat vk_format;
    uint8_t n_planes;
    bool can_ycbcr;
 };
@@ -2589,28 +2744,6 @@
    }
 }
 
-static inline uint32_t
-anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask)
-{
-   uint32_t planes = 0;
-
-   if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT |
-                      VK_IMAGE_ASPECT_DEPTH_BIT |
-                      VK_IMAGE_ASPECT_STENCIL_BIT |
-                      VK_IMAGE_ASPECT_PLANE_0_BIT))
-      planes++;
-   if (aspect_mask & VK_IMAGE_ASPECT_PLANE_1_BIT)
-      planes++;
-   if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT)
-      planes++;
-
-   if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 &&
-       (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
-      planes++;
-
-   return planes;
-}
-
 static inline VkImageAspectFlags
 anv_plane_to_aspect(VkImageAspectFlags image_aspects,
                     uint32_t plane)
@@ -2697,6 +2830,7 @@
    uint32_t samples; /**< VkImageCreateInfo::samples */
    uint32_t n_planes;
    VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+   VkImageCreateFlags create_flags; /* Flags used when creating image. */
    VkImageTiling tiling; /** VkImageCreateInfo::tiling */
 
    /** True if this is needs to be bound to an appropriately tiled BO.
@@ -2722,6 +2856,14 @@
     */
    bool disjoint;
 
+   /* All the formats that can be used when creating views of this image
+    * are CCS_E compatible.
+    */
+   bool ccs_e_compatible;
+
+   /* Image was created with external format. */
+   bool external_format;
+
    /**
     * Image subsurfaces
     *
@@ -2872,8 +3014,7 @@
    const unsigned clear_color_state_size = device->info.gen >= 10 ?
       device->isl_dev.ss.clear_color_state_size :
       device->isl_dev.ss.clear_value_size;
-   addr.offset += clear_color_state_size;
-   return addr;
+   return anv_address_add(addr, clear_color_state_size);
 }
 
 static inline struct anv_address
@@ -2943,6 +3084,20 @@
                               VkRect2D area,
                               float depth_value, uint8_t stencil_value);
 void
+anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
+                       const struct anv_image *src_image,
+                       enum isl_aux_usage src_aux_usage,
+                       uint32_t src_level, uint32_t src_base_layer,
+                       const struct anv_image *dst_image,
+                       enum isl_aux_usage dst_aux_usage,
+                       uint32_t dst_level, uint32_t dst_base_layer,
+                       VkImageAspectFlagBits aspect,
+                       uint32_t src_x, uint32_t src_y,
+                       uint32_t dst_x, uint32_t dst_y,
+                       uint32_t width, uint32_t height,
+                       uint32_t layer_count,
+                       enum blorp_filter filter);
+void
 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
                  const struct anv_image *image,
                  VkImageAspectFlagBits aspect, uint32_t level,
@@ -3099,6 +3254,7 @@
    isl_surf_usage_flags_t isl_extra_usage_flags;
 
    uint32_t stride;
+   bool external_format;
 };
 
 VkResult anv_image_create(VkDevice _device,
@@ -3106,14 +3262,6 @@
                           const VkAllocationCallbacks* alloc,
                           VkImage *pImage);
 
-#ifdef ANDROID
-VkResult anv_image_from_gralloc(VkDevice device_h,
-                                const VkImageCreateInfo *base_info,
-                                const VkNativeBufferANDROID *gralloc_info,
-                                const VkAllocationCallbacks *alloc,
-                                VkImage *pImage);
-#endif
-
 const struct anv_surface *
 anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
                                       VkImageAspectFlags aspect_mask);
@@ -3153,6 +3301,11 @@
    }
 }
 
+VkFormatFeatureFlags
+anv_get_image_format_features(const struct gen_device_info *devinfo,
+                              VkFormat vk_format,
+                              const struct anv_format *anv_format,
+                              VkImageTiling vk_tiling);
 
 void anv_fill_buffer_surface_state(struct anv_device *device,
                                    struct anv_state state,
@@ -3227,14 +3380,17 @@
    struct anv_subpass_attachment *              resolve_attachments;
 
    struct anv_subpass_attachment *              depth_stencil_attachment;
+   struct anv_subpass_attachment *              ds_resolve_attachment;
+   VkResolveModeFlagBitsKHR                     depth_resolve_mode;
+   VkResolveModeFlagBitsKHR                     stencil_resolve_mode;
 
    uint32_t                                     view_mask;
 
    /** Subpass has a depth/stencil self-dependency */
    bool                                         has_ds_self_dep;
 
-   /** Subpass has at least one resolve attachment */
-   bool                                         has_resolve;
+   /** Subpass has at least one color resolve attachment */
+   bool                                         has_color_resolve;
 };
 
 static inline unsigned
@@ -3370,7 +3526,7 @@
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool)
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet)
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout)
-ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplateKHR)
+ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplate)
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory)
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence)
 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent)
diff -Nru mesa-18.3.3/src/intel/vulkan/anv_queue.c mesa-19.0.1/src/intel/vulkan/anv_queue.c
--- mesa-18.3.3/src/intel/vulkan/anv_queue.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/anv_queue.c	2019-03-31 23:16:37.000000000 +0000
@@ -757,8 +757,8 @@
 
 void anv_GetPhysicalDeviceExternalFenceProperties(
     VkPhysicalDevice                            physicalDevice,
-    const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
-    VkExternalFencePropertiesKHR*               pExternalFenceProperties)
+    const VkPhysicalDeviceExternalFenceInfo*    pExternalFenceInfo,
+    VkExternalFenceProperties*                  pExternalFenceProperties)
 {
    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
 
@@ -927,9 +927,9 @@
    if (semaphore == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   const VkExportSemaphoreCreateInfoKHR *export =
+   const VkExportSemaphoreCreateInfo *export =
       vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
-    VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
+    VkExternalSemaphoreHandleTypeFlags handleTypes =
       export ? export->handleTypes : 0;
 
    if (handleTypes == 0) {
@@ -1038,8 +1038,8 @@
 
 void anv_GetPhysicalDeviceExternalSemaphoreProperties(
     VkPhysicalDevice                            physicalDevice,
-    const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
-    VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
+    const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo,
+    VkExternalSemaphoreProperties*               pExternalSemaphoreProperties)
 {
    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
 
@@ -1056,7 +1056,8 @@
 
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
       if (device->has_exec_fence) {
-         pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+         pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
          pExternalSemaphoreProperties->compatibleHandleTypes =
             VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
          pExternalSemaphoreProperties->externalSemaphoreFeatures =
@@ -1106,7 +1107,7 @@
 
          if (new_impl.bo->size < 4096) {
             anv_bo_cache_release(device, &device->bo_cache, new_impl.bo);
-            return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+            return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
          }
 
          /* If we're going to use this as a fence, we need to *not* have the
diff -Nru mesa-18.3.3/src/intel/vulkan/gen7_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/gen7_cmd_buffer.c
--- mesa-18.3.3/src/intel/vulkan/gen7_cmd_buffer.c	2018-10-21 19:21:33.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/gen7_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,12 +70,36 @@
       };
 
       const int max = 0xffff;
+
+      uint32_t y_min = s->offset.y;
+      uint32_t x_min = s->offset.x;
+      uint32_t y_max = s->offset.y + s->extent.height - 1;
+      uint32_t x_max = s->offset.x + s->extent.width - 1;
+
+      /* Do this math using int64_t so overflow gets clamped correctly. */
+      if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+         y_min = clamp_int64((uint64_t) y_min,
+                             cmd_buffer->state.render_area.offset.y, max);
+         x_min = clamp_int64((uint64_t) x_min,
+                             cmd_buffer->state.render_area.offset.x, max);
+         y_max = clamp_int64((uint64_t) y_max, 0,
+                             cmd_buffer->state.render_area.offset.y +
+                             cmd_buffer->state.render_area.extent.height - 1);
+         x_max = clamp_int64((uint64_t) x_max, 0,
+                             cmd_buffer->state.render_area.offset.x +
+                             cmd_buffer->state.render_area.extent.width - 1);
+      } else if (fb) {
+         y_min = clamp_int64((uint64_t) y_min, 0, max);
+         x_min = clamp_int64((uint64_t) x_min, 0, max);
+         y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1);
+         x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1);
+      }
+
       struct GEN7_SCISSOR_RECT scissor = {
-         /* Do this math using int64_t so overflow gets clamped correctly. */
-         .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
-         .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
-         .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, fb->height - 1),
-         .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, fb->width - 1)
+         .ScissorRectangleYMin = y_min,
+         .ScissorRectangleXMin = x_min,
+         .ScissorRectangleYMax = y_max,
+         .ScissorRectangleXMax = x_max
       };
 
       if (s->extent.width <= 0 || s->extent.height <= 0) {
@@ -90,8 +114,6 @@
                   GEN7_3DSTATE_SCISSOR_STATE_POINTERS, ssp) {
       ssp.ScissorRectPointer = scissor_state.offset;
    }
-
-   anv_state_flush(cmd_buffer->device, scissor_state);
 }
 #endif
 
@@ -191,7 +213,6 @@
          .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
       };
       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
-      anv_state_flush(cmd_buffer->device, cc_state);
 
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
          ccp.ColorCalcStatePointer = cc_state.offset;
@@ -246,7 +267,7 @@
          ib.CutIndexEnable             = pipeline->primitive_restart;
 #endif
          ib.IndexFormat                = cmd_buffer->state.gfx.gen7.index_type;
-         ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+         ib.MOCS                       = anv_mocs_for_bo(cmd_buffer->device,
                                                          buffer->address.bo);
 
          ib.BufferStartingAddress      = anv_address_add(buffer->address,
diff -Nru mesa-18.3.3/src/intel/vulkan/gen8_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/gen8_cmd_buffer.c
--- mesa-18.3.3/src/intel/vulkan/gen8_cmd_buffer.c	2018-10-21 19:21:33.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/gen8_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -68,8 +68,6 @@
                                  &sf_clip_viewport);
    }
 
-   anv_state_flush(cmd_buffer->device, sf_clip_state);
-
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
       clip.SFClipViewportPointer = sf_clip_state.offset;
@@ -97,8 +95,6 @@
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
    }
 
-   anv_state_flush(cmd_buffer->device, cc_state);
-
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
       cc.CCViewportPointer = cc_state.offset;
@@ -441,8 +437,6 @@
       };
       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
 
-      anv_state_flush(cmd_buffer->device, cc_state);
-
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
          ccp.ColorCalcStatePointer        = cc_state.offset;
          ccp.ColorCalcStatePointerValid   = true;
@@ -491,8 +485,6 @@
       };
       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
 
-      anv_state_flush(cmd_buffer->device, cc_state);
-
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
          ccp.ColorCalcStatePointer = cc_state.offset;
          ccp.ColorCalcStatePointerValid = true;
@@ -565,7 +557,7 @@
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
       ib.IndexFormat                = vk_to_gen_index_type[indexType];
-      ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+      ib.MOCS                       = anv_mocs_for_bo(cmd_buffer->device,
                                                       buffer->address.bo);
       ib.BufferStartingAddress      = anv_address_add(buffer->address, offset);
       ib.BufferSize                 = buffer->size - offset;
@@ -610,7 +602,7 @@
       pc.DestinationAddressType  = DAT_PPGTT,
       pc.PostSyncOperation       = WriteImmediateData,
       pc.Address = (struct anv_address) {
-         &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_SET;
@@ -634,7 +626,7 @@
       pc.DestinationAddressType  = DAT_PPGTT;
       pc.PostSyncOperation       = WriteImmediateData;
       pc.Address = (struct anv_address) {
-         &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_RESET;
@@ -663,7 +655,7 @@
          sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD,
          sem.SemaphoreDataDword  = VK_EVENT_SET,
          sem.SemaphoreAddress = (struct anv_address) {
-            &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+            cmd_buffer->device->dynamic_state_pool.block_pool.bo,
             event->state.offset
          };
       }
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_blorp_exec.c mesa-19.0.1/src/intel/vulkan/genX_blorp_exec.c
--- mesa-18.3.3/src/intel/vulkan/genX_blorp_exec.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_blorp_exec.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,20 +63,28 @@
    if (result != VK_SUCCESS)
       anv_batch_set_error(&cmd_buffer->batch, result);
 
-   void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
-      ss_offset;
+   void *dest = anv_block_pool_map(
+      &cmd_buffer->device->surface_state_pool.block_pool, ss_offset);
    uint64_t val = ((struct anv_bo*)address.buffer)->offset + address.offset +
       delta;
    write_reloc(cmd_buffer->device, dest, val, false);
 }
 
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *blorp_batch,
+                          struct blorp_address address)
+{
+   /* We'll let blorp_surface_reloc write the address. */
+   return 0ull;
+}
+
 #if GEN_GEN >= 7 && GEN_GEN < 10
 static struct blorp_address
 blorp_get_surface_base_address(struct blorp_batch *batch)
 {
    struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
    return (struct blorp_address) {
-      .buffer = &cmd_buffer->device->surface_state_pool.block_pool.bo,
+      .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo,
       .offset = 0,
    };
 }
@@ -124,8 +132,6 @@
       surface_offsets[i] = surface_state.offset;
       surface_maps[i] = surface_state.map;
    }
-
-   anv_state_flush(cmd_buffer->device, bt_state);
 }
 
 static void *
@@ -150,7 +156,7 @@
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
 
    *addr = (struct blorp_address) {
-      .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+      .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
       .offset = vb_state.offset,
       .mocs = cmd_buffer->device->default_mocs,
    };
@@ -183,9 +189,8 @@
 static void
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
 {
-   struct anv_device *device = batch->blorp->driver_ctx;
-   if (!device->info.has_llc)
-      gen_flush_range(start, size);
+   /* We don't need to flush states anymore, since everything will be snooped.
+    */
 }
 
 static void
@@ -263,5 +268,4 @@
    cmd_buffer->state.gfx.vb_dirty = ~0;
    cmd_buffer->state.gfx.dirty = ~0;
    cmd_buffer->state.push_constants_dirty = ~0;
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/genX_cmd_buffer.c
--- mesa-18.3.3/src/intel/vulkan/genX_cmd_buffer.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_cmd_buffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -27,6 +27,7 @@
 #include "anv_private.h"
 #include "vk_format_info.h"
 #include "vk_util.h"
+#include "util/fast_idiv_by_const.h"
 
 #include "common/gen_l3_config.h"
 #include "genxml/gen_macros.h"
@@ -86,26 +87,26 @@
 
    anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
       sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
-      sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
+      sba.GeneralStateMOCS = GENX(MOCS);
       sba.GeneralStateBaseAddressModifyEnable = true;
 
       sba.SurfaceStateBaseAddress =
          anv_cmd_buffer_surface_base_address(cmd_buffer);
-      sba.SurfaceStateMemoryObjectControlState = GENX(MOCS);
+      sba.SurfaceStateMOCS = GENX(MOCS);
       sba.SurfaceStateBaseAddressModifyEnable = true;
 
       sba.DynamicStateBaseAddress =
-         (struct anv_address) { &device->dynamic_state_pool.block_pool.bo, 0 };
-      sba.DynamicStateMemoryObjectControlState = GENX(MOCS);
+         (struct anv_address) { device->dynamic_state_pool.block_pool.bo, 0 };
+      sba.DynamicStateMOCS = GENX(MOCS);
       sba.DynamicStateBaseAddressModifyEnable = true;
 
       sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 };
-      sba.IndirectObjectMemoryObjectControlState = GENX(MOCS);
+      sba.IndirectObjectMOCS = GENX(MOCS);
       sba.IndirectObjectBaseAddressModifyEnable = true;
 
       sba.InstructionBaseAddress =
-         (struct anv_address) { &device->instruction_state_pool.block_pool.bo, 0 };
-      sba.InstructionMemoryObjectControlState = GENX(MOCS);
+         (struct anv_address) { device->instruction_state_pool.block_pool.bo, 0 };
+      sba.InstructionMOCS = GENX(MOCS);
       sba.InstructionBaseAddressModifyEnable = true;
 
 #  if (GEN_GEN >= 8)
@@ -124,13 +125,13 @@
 #  endif
 #  if (GEN_GEN >= 9)
       sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { NULL, 0 };
-      sba.BindlessSurfaceStateMemoryObjectControlState = GENX(MOCS);
+      sba.BindlessSurfaceStateMOCS = GENX(MOCS);
       sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
       sba.BindlessSurfaceStateSize = 0;
 #  endif
 #  if (GEN_GEN >= 10)
       sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 };
-      sba.BindlessSamplerStateMemoryObjectControlState = GENX(MOCS);
+      sba.BindlessSamplerStateMOCS = GENX(MOCS);
       sba.BindlessSamplerStateBaseAddressModifyEnable = true;
       sba.BindlessSamplerStateBufferSize = 0;
 #  endif
@@ -479,8 +480,9 @@
                        0, 0, 1, hiz_op);
 }
 
-#define MI_PREDICATE_SRC0  0x2400
-#define MI_PREDICATE_SRC1  0x2408
+#define MI_PREDICATE_SRC0    0x2400
+#define MI_PREDICATE_SRC1    0x2408
+#define MI_PREDICATE_RESULT  0x2418
 
 static void
 set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer,
@@ -886,7 +888,7 @@
    assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
 
    struct anv_address ss_clear_addr = {
-      .bo = &cmd_buffer->device->surface_state_pool.block_pool.bo,
+      .bo = cmd_buffer->device->surface_state_pool.block_pool.bo,
       .offset = surface_state.offset +
                 cmd_buffer->device->isl_dev.ss.clear_value_offset,
    };
@@ -1411,6 +1413,19 @@
       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
    }
 
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+      const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =
+         vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT);
+
+      /* If secondary buffer supports conditional rendering
+       * we should emit commands as if conditional rendering is enabled.
+       */
+      cmd_buffer->state.conditional_render_enabled =
+         conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable;
+   }
+#endif
+
    return result;
 }
 
@@ -1515,6 +1530,19 @@
       assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
       assert(!anv_batch_has_error(&secondary->batch));
 
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+      if (secondary->state.conditional_render_enabled) {
+         if (!primary->state.conditional_render_enabled) {
+            /* Secondary buffer is constructed as if it will be executed
+             * with conditional rendering, we should satisfy this dependency
+             * regardless of conditional rendering being enabled in primary.
+             */
+            emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG), UINT32_MAX);
+            emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG) + 4, UINT32_MAX);
+         }
+      }
+#endif
+
       if (secondary->usage_flags &
           VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
          /* If we're continuing a render pass from the primary, we need to
@@ -1522,7 +1550,7 @@
           * we allocated for them in BeginCommandBuffer.
           */
          struct anv_bo *ss_bo =
-            &primary->device->surface_state_pool.block_pool.bo;
+            primary->device->surface_state_pool.block_pool.bo;
          struct anv_state src_state = primary->state.render_pass_states;
          struct anv_state dst_state = secondary->state.render_pass_states;
          assert(src_state.alloc_size == dst_state.alloc_size);
@@ -1631,6 +1659,14 @@
    uint32_t l3cr;
    anv_pack_struct(&l3cr, GENX(L3CNTLREG),
                    .SLMEnable = has_slm,
+#if GEN_GEN == 11
+   /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+    * in L3CNTLREG register. The default setting of the bit is not the
+    * desirable behavior.
+   */
+                   .ErrorDetectionBehaviorControl = true,
+                   .UseFullWays = true,
+#endif
                    .URBAllocation = cfg->n[GEN_L3P_URB],
                    .ROAllocation = cfg->n[GEN_L3P_RO],
                    .DCAllocation = cfg->n[GEN_L3P_DC],
@@ -1776,7 +1812,7 @@
        * saying that render target writes are ongoing.
        */
       if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
-         bits &= ~(ANV_PIPE_RENDER_TARGET_WRITES);
+         bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES);
 
       bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT);
    }
@@ -2102,7 +2138,7 @@
             anv_cmd_buffer_alloc_surface_state(cmd_buffer);
 
          struct anv_address constant_data = {
-            .bo = &pipeline->device->dynamic_state_pool.block_pool.bo,
+            .bo = pipeline->device->dynamic_state_pool.block_pool.bo,
             .offset = pipeline->shaders[stage]->constant_data.offset,
          };
          unsigned constant_data_size =
@@ -2243,8 +2279,6 @@
    assert(image == map->image_count);
 
  out:
-   anv_state_flush(cmd_buffer->device, *bt_state);
-
 #if GEN_GEN >= 11
    /* The PIPE_CONTROL command description says:
     *
@@ -2316,8 +2350,6 @@
              sampler->state[binding->plane], sizeof(sampler->state[0]));
    }
 
-   anv_state_flush(cmd_buffer->device, *state);
-
    return VK_SUCCESS;
 }
 
@@ -2479,7 +2511,7 @@
                uint32_t read_len;
                if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) {
                   struct anv_address constant_data = {
-                     .bo = &pipeline->device->dynamic_state_pool.block_pool.bo,
+                     .bo = pipeline->device->dynamic_state_pool.block_pool.bo,
                      .offset = pipeline->shaders[stage]->constant_data.offset,
                   };
                   unsigned constant_data_size =
@@ -2527,7 +2559,7 @@
 
             if (state.alloc_size > 0) {
                c.ConstantBody.Buffer[n] = (struct anv_address) {
-                  .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+                  .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
                   .offset = state.offset,
                };
                c.ConstantBody.ReadLength[n] =
@@ -2587,8 +2619,7 @@
          struct GENX(VERTEX_BUFFER_STATE) state = {
             .VertexBufferIndex = vb,
 
-            .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
-                                                buffer->address.bo),
+            .MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo),
 #if GEN_GEN <= 7
             .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
             .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
@@ -2612,6 +2643,34 @@
 
    cmd_buffer->state.gfx.vb_dirty &= ~vb_emit;
 
+#if GEN_GEN >= 8
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
+      /* We don't need any per-buffer dirty tracking because you're not
+       * allowed to bind different XFB buffers while XFB is enabled.
+       */
+      for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
+         struct anv_xfb_binding *xfb = &cmd_buffer->state.xfb_bindings[idx];
+         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
+            sob.SOBufferIndex = idx;
+
+            if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) {
+               sob.SOBufferEnable = true;
+               sob.MOCS = cmd_buffer->device->default_mocs,
+               sob.StreamOffsetWriteEnable = false;
+               sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address,
+                                                        xfb->offset);
+               /* Size is in DWords - 1 */
+               sob.SurfaceSize = xfb->size / 4 - 1;
+            }
+         }
+      }
+
+      /* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */
+      if (GEN_GEN >= 10)
+         cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   }
+#endif
+
    if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
 
@@ -2706,7 +2765,7 @@
          .VertexBufferIndex = index,
          .AddressModifyEnable = true,
          .BufferPitch = 0,
-         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
+         .MOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
 #if (GEN_GEN >= 8)
          .BufferStartingAddress = addr,
          .BufferSize = size
@@ -2734,10 +2793,8 @@
    ((uint32_t *)id_state.map)[0] = base_vertex;
    ((uint32_t *)id_state.map)[1] = base_instance;
 
-   anv_state_flush(cmd_buffer->device, id_state);
-
    struct anv_address addr = {
-      .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+      .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
       .offset = id_state.offset,
    };
 
@@ -2752,10 +2809,8 @@
 
    ((uint32_t *)state.map)[0] = draw_index;
 
-   anv_state_flush(cmd_buffer->device, state);
-
    struct anv_address addr = {
-      .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+      .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
       .offset = state.offset,
    };
 
@@ -2778,6 +2833,9 @@
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
    if (vs_prog_data->uses_firstvertex ||
        vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
@@ -2790,6 +2848,7 @@
    instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
       prim.VertexAccessType         = SEQUENTIAL;
       prim.PrimitiveTopologyType    = pipeline->topology;
       prim.VertexCountPerInstance   = vertexCount;
@@ -2798,8 +2857,6 @@
       prim.StartInstanceLocation    = firstInstance;
       prim.BaseVertexLocation       = 0;
    }
-
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
 
 void genX(CmdDrawIndexed)(
@@ -2819,6 +2876,9 @@
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
    if (vs_prog_data->uses_firstvertex ||
        vs_prog_data->uses_baseinstance)
       emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
@@ -2831,6 +2891,7 @@
    instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
       prim.VertexAccessType         = RANDOM;
       prim.PrimitiveTopologyType    = pipeline->topology;
       prim.VertexCountPerInstance   = indexCount;
@@ -2839,8 +2900,6 @@
       prim.StartInstanceLocation    = firstInstance;
       prim.BaseVertexLocation       = vertexOffset;
    }
-
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
 
 /* Auto-Draw / Indirect Registers */
@@ -2901,8 +2960,154 @@
    build_alu_multiply_gpr0(dw + 1, &num_dwords, N);
 }
 
+static void
+emit_alu_add(struct anv_batch *batch, unsigned dst_reg,
+             unsigned reg_a, unsigned reg_b)
+{
+   uint32_t *dw = anv_batch_emitn(batch, 1 + 4, GENX(MI_MATH));
+   dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, reg_a);
+   dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, reg_b);
+   dw[3] = mi_alu(MI_ALU_ADD, 0, 0);
+   dw[4] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
+}
+
+static void
+emit_add32_gpr0(struct anv_batch *batch, uint32_t N)
+{
+   emit_lri(batch, CS_GPR(1), N);
+   emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
+}
+
+static void
+emit_alu_shl(struct anv_batch *batch, unsigned dst_reg,
+             unsigned src_reg, unsigned shift)
+{
+   assert(shift > 0);
+
+   uint32_t *dw = anv_batch_emitn(batch, 1 + 4 * shift, GENX(MI_MATH));
+   for (unsigned i = 0; i < shift; i++) {
+      unsigned add_src = (i == 0) ? src_reg : dst_reg;
+      dw[1 + (i * 4) + 0] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, add_src);
+      dw[1 + (i * 4) + 1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, add_src);
+      dw[1 + (i * 4) + 2] = mi_alu(MI_ALU_ADD, 0, 0);
+      dw[1 + (i * 4) + 3] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
+   }
+}
+
+static void
+emit_div32_gpr0(struct anv_batch *batch, uint32_t D)
+{
+   /* Zero out the top of GPR0 */
+   emit_lri(batch, CS_GPR(0) + 4, 0);
+
+   if (D == 0) {
+      /* This invalid, but we should do something so we set GPR0 to 0. */
+      emit_lri(batch, CS_GPR(0), 0);
+   } else if (util_is_power_of_two_or_zero(D)) {
+      unsigned log2_D = util_logbase2(D);
+      assert(log2_D < 32);
+      /* We right-shift by log2(D) by left-shifting by 32 - log2(D) and taking
+       * the top 32 bits of the result.
+       */
+      emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - log2_D);
+      emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+      emit_lri(batch, CS_GPR(0) + 4, 0);
+   } else {
+      struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
+      assert(m.multiplier <= UINT32_MAX);
+
+      if (m.pre_shift) {
+         /* We right-shift by L by left-shifting by 32 - l and taking the top
+          * 32 bits of the result.
+          */
+         if (m.pre_shift < 32)
+            emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.pre_shift);
+         emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+         emit_lri(batch, CS_GPR(0) + 4, 0);
+      }
+
+      /* Do the 32x32 multiply  into gpr0 */
+      emit_mul_gpr0(batch, m.multiplier);
+
+      if (m.increment) {
+         /* If we need to increment, save off a copy of GPR0 */
+         emit_lri(batch, CS_GPR(1) + 0, m.multiplier);
+         emit_lri(batch, CS_GPR(1) + 4, 0);
+         emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1);
+      }
+
+      /* Shift by 32 */
+      emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+      emit_lri(batch, CS_GPR(0) + 4, 0);
+
+      if (m.post_shift) {
+         /* We right-shift by L by left-shifting by 32 - l and taking the top
+          * 32 bits of the result.
+          */
+         if (m.post_shift < 32)
+            emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.post_shift);
+         emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4);
+         emit_lri(batch, CS_GPR(0) + 4, 0);
+      }
+   }
+}
+
 #endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
 
+void genX(CmdDrawIndirectByteCountEXT)(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstInstance,
+    VkBuffer                                    counterBuffer,
+    VkDeviceSize                                counterBufferOffset,
+    uint32_t                                    counterOffset,
+    uint32_t                                    vertexStride)
+{
+#if GEN_IS_HASWELL || GEN_GEN >= 8
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
+   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+   /* firstVertex is always zero for this draw function */
+   const uint32_t firstVertex = 0;
+
+   if (anv_batch_has_error(&cmd_buffer->batch))
+      return;
+
+   genX(cmd_buffer_flush_state)(cmd_buffer);
+
+   if (vs_prog_data->uses_firstvertex ||
+       vs_prog_data->uses_baseinstance)
+      emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
+   if (vs_prog_data->uses_drawid)
+      emit_draw_index(cmd_buffer, 0);
+
+   /* Our implementation of VK_KHR_multiview uses instancing to draw the
+    * different views.  We need to multiply instanceCount by the view count.
+    */
+   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+
+   emit_lrm(&cmd_buffer->batch, CS_GPR(0),
+            anv_address_add(counter_buffer->address, counterBufferOffset));
+   if (counterOffset)
+      emit_add32_gpr0(&cmd_buffer->batch, -counterOffset);
+   emit_div32_gpr0(&cmd_buffer->batch, vertexStride);
+   emit_lrr(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, CS_GPR(0));
+
+   emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, firstVertex);
+   emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, instanceCount);
+   emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, firstInstance);
+   emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.IndirectParameterEnable  = true;
+      prim.VertexAccessType         = SEQUENTIAL;
+      prim.PrimitiveTopologyType    = pipeline->topology;
+   }
+#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
+}
+
 static void
 load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
                          struct anv_address addr,
@@ -2955,6 +3160,9 @@
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
    for (uint32_t i = 0; i < drawCount; i++) {
       struct anv_address draw = anv_address_add(buffer->address, offset);
 
@@ -2968,14 +3176,13 @@
 
       anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
          prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
          prim.VertexAccessType         = SEQUENTIAL;
          prim.PrimitiveTopologyType    = pipeline->topology;
       }
 
       offset += stride;
    }
-
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
 
 void genX(CmdDrawIndexedIndirect)(
@@ -2995,6 +3202,9 @@
 
    genX(cmd_buffer_flush_state)(cmd_buffer);
 
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
    for (uint32_t i = 0; i < drawCount; i++) {
       struct anv_address draw = anv_address_add(buffer->address, offset);
 
@@ -3009,14 +3219,333 @@
 
       anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
          prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
          prim.VertexAccessType         = RANDOM;
          prim.PrimitiveTopologyType    = pipeline->topology;
       }
 
       offset += stride;
    }
+}
+
+#define TMP_DRAW_COUNT_REG MI_ALU_REG14
+
+static void
+prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+                                 struct anv_address count_address,
+                                 const bool conditional_render_enabled)
+{
+   if (conditional_render_enabled) {
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+      emit_lrm(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG), count_address);
+      emit_lri(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG) + 4, 0);
+#endif
+   } else {
+      /* Upload the current draw count from the draw parameters buffer to
+       * MI_PREDICATE_SRC0.
+       */
+      emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address);
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+
+      emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+   }
+}
+
+static void
+emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
+                          uint32_t draw_index)
+{
+   /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index);
+
+   if (draw_index == 0) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOADINV;
+         mip.CombineOperation = COMBINE_SET;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   } else {
+      /* While draw_index < draw_count the predicate's result will be
+       *  (draw_index == draw_count) ^ TRUE = TRUE
+       * When draw_index == draw_count the result is
+       *  (TRUE) ^ TRUE = FALSE
+       * After this all results will be:
+       *  (FALSE) ^ FALSE = FALSE
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOAD;
+         mip.CombineOperation = COMBINE_XOR;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   }
+}
+
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+static void
+emit_draw_count_predicate_with_conditional_render(
+                          struct anv_cmd_buffer *cmd_buffer,
+                          uint32_t draw_index)
+{
+   const int draw_index_reg = MI_ALU_REG0;
+   const int tmp_result_reg = MI_ALU_REG1;
+
+   emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index);
+   emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0);
+
+   uint32_t *dw;
+   /* Compute (draw_index < draw_count).
+    * We do this by subtracting and storing the carry bit.
+    */
+   dw = anv_batch_emitn(&cmd_buffer->batch, 9, GENX(MI_MATH));
+   dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg);
+   dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, TMP_DRAW_COUNT_REG);
+   dw[3] = mi_alu(MI_ALU_SUB, 0, 0);
+   dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF);
+   /* & condition */
+   dw[5] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg);
+   dw[6] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, ANV_PREDICATE_RESULT_REG);
+   dw[7] = mi_alu(MI_ALU_AND, 0, 0);
+   dw[8] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU);
+
+#if GEN_GEN >= 8
+   emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(tmp_result_reg));
+#else
+   /* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
+    * so we emit MI_PREDICATE to set it.
+    */
+
+   emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(tmp_result_reg));
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
 
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOADINV;
+      mip.CombineOperation = COMBINE_SET;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+#endif
+}
+#endif
+
+void genX(CmdDrawIndirectCountKHR)(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    _buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    _countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+   ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+   if (anv_batch_has_error(&cmd_buffer->batch))
+      return;
+
+   genX(cmd_buffer_flush_state)(cmd_buffer);
+
+   struct anv_address count_address =
+      anv_address_add(count_buffer->address, countBufferOffset);
+
+   prepare_for_draw_count_predicate(cmd_buffer, count_address,
+                                    cmd_state->conditional_render_enabled);
+
+   for (uint32_t i = 0; i < maxDrawCount; i++) {
+      struct anv_address draw = anv_address_add(buffer->address, offset);
+
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+      if (cmd_state->conditional_render_enabled) {
+         emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
+      } else {
+         emit_draw_count_predicate(cmd_buffer, i);
+      }
+#else
+      emit_draw_count_predicate(cmd_buffer, i);
+#endif
+
+      if (vs_prog_data->uses_firstvertex ||
+          vs_prog_data->uses_baseinstance)
+         emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 8));
+      if (vs_prog_data->uses_drawid)
+         emit_draw_index(cmd_buffer, i);
+
+      load_indirect_parameters(cmd_buffer, draw, false);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+         prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = true;
+         prim.VertexAccessType         = SEQUENTIAL;
+         prim.PrimitiveTopologyType    = pipeline->topology;
+      }
+
+      offset += stride;
+   }
+}
+
+void genX(CmdDrawIndexedIndirectCountKHR)(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    _buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    _countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+   ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+
+   if (anv_batch_has_error(&cmd_buffer->batch))
+      return;
+
+   genX(cmd_buffer_flush_state)(cmd_buffer);
+
+   struct anv_address count_address =
+      anv_address_add(count_buffer->address, countBufferOffset);
+
+   prepare_for_draw_count_predicate(cmd_buffer, count_address,
+                                    cmd_state->conditional_render_enabled);
+
+   for (uint32_t i = 0; i < maxDrawCount; i++) {
+      struct anv_address draw = anv_address_add(buffer->address, offset);
+
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+      if (cmd_state->conditional_render_enabled) {
+         emit_draw_count_predicate_with_conditional_render(cmd_buffer, i);
+      } else {
+         emit_draw_count_predicate(cmd_buffer, i);
+      }
+#else
+      emit_draw_count_predicate(cmd_buffer, i);
+#endif
+
+      /* TODO: We need to stomp base vertex to 0 somehow */
+      if (vs_prog_data->uses_firstvertex ||
+          vs_prog_data->uses_baseinstance)
+         emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 12));
+      if (vs_prog_data->uses_drawid)
+         emit_draw_index(cmd_buffer, i);
+
+      load_indirect_parameters(cmd_buffer, draw, true);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+         prim.IndirectParameterEnable  = true;
+         prim.PredicateEnable          = true;
+         prim.VertexAccessType         = RANDOM;
+         prim.PrimitiveTopologyType    = pipeline->topology;
+      }
+
+      offset += stride;
+   }
+}
+
+void genX(CmdBeginTransformFeedbackEXT)(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstCounterBuffer,
+    uint32_t                                    counterBufferCount,
+    const VkBuffer*                             pCounterBuffers,
+    const VkDeviceSize*                         pCounterBufferOffsets)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   assert(firstCounterBuffer < MAX_XFB_BUFFERS);
+   assert(counterBufferCount <= MAX_XFB_BUFFERS);
+   assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
+
+   /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
+    *
+    *    "Ssoftware must ensure that no HW stream output operations can be in
+    *    process or otherwise pending at the point that the MI_LOAD/STORE
+    *    commands are processed. This will likely require a pipeline flush."
+    */
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) {
+      /* If we have a counter buffer, this is a resume so we need to load the
+       * value into the streamout offset register.  Otherwise, this is a begin
+       * and we need to reset it to zero.
+       */
+      if (pCounterBuffers &&
+          idx >= firstCounterBuffer &&
+          idx - firstCounterBuffer < counterBufferCount &&
+          pCounterBuffers[idx - firstCounterBuffer] != VK_NULL_HANDLE) {
+         uint32_t cb_idx = idx - firstCounterBuffer;
+         ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]);
+         uint64_t offset = pCounterBufferOffsets ?
+                           pCounterBufferOffsets[cb_idx] : 0;
+
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+            lrm.RegisterAddress  = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
+            lrm.MemoryAddress    = anv_address_add(counter_buffer->address,
+                                                   offset);
+         }
+      } else {
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+            lri.RegisterOffset   = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
+            lri.DataDWord        = 0;
+         }
+      }
+   }
+
+   cmd_buffer->state.xfb_enabled = true;
+   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
+}
+
+void genX(CmdEndTransformFeedbackEXT)(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstCounterBuffer,
+    uint32_t                                    counterBufferCount,
+    const VkBuffer*                             pCounterBuffers,
+    const VkDeviceSize*                         pCounterBufferOffsets)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   assert(firstCounterBuffer < MAX_XFB_BUFFERS);
+   assert(counterBufferCount <= MAX_XFB_BUFFERS);
+   assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS);
+
+   /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET:
+    *
+    *    "Ssoftware must ensure that no HW stream output operations can be in
+    *    process or otherwise pending at the point that the MI_LOAD/STORE
+    *    commands are processed. This will likely require a pipeline flush."
+    */
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   for (uint32_t cb_idx = 0; cb_idx < counterBufferCount; cb_idx++) {
+      unsigned idx = firstCounterBuffer + cb_idx;
+
+      /* If we have a counter buffer, this is a resume so we need to load the
+       * value into the streamout offset register.  Otherwise, this is a begin
+       * and we need to reset it to zero.
+       */
+      if (pCounterBuffers &&
+          cb_idx < counterBufferCount &&
+          pCounterBuffers[cb_idx] != VK_NULL_HANDLE) {
+         ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]);
+         uint64_t offset = pCounterBufferOffsets ?
+                           pCounterBufferOffsets[cb_idx] : 0;
+
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+            srm.MemoryAddress    = anv_address_add(counter_buffer->address,
+                                                   offset);
+            srm.RegisterAddress  = GENX(SO_WRITE_OFFSET0_num) + idx * 4;
+         }
+      }
+   }
+
+   cmd_buffer->state.xfb_enabled = false;
+   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE;
 }
 
 static VkResult
@@ -3215,16 +3744,19 @@
       sizes[0] = groupCountX;
       sizes[1] = groupCountY;
       sizes[2] = groupCountZ;
-      anv_state_flush(cmd_buffer->device, state);
       cmd_buffer->state.compute.num_workgroups = (struct anv_address) {
-         .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          .offset = state.offset,
       };
    }
 
    genX(cmd_buffer_flush_compute_state)(cmd_buffer);
 
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+
    anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
+      ggw.PredicateEnable              = cmd_buffer->state.conditional_render_enabled;
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
@@ -3312,17 +3844,33 @@
    }
 
    /* predicate = !predicate; */
-#define COMPARE_FALSE                           1
    anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
       mip.LoadOperation    = LOAD_LOADINV;
       mip.CombineOperation = COMBINE_OR;
       mip.CompareOperation = COMPARE_FALSE;
    }
+
+#if GEN_IS_HASWELL
+   if (cmd_buffer->state.conditional_render_enabled) {
+      emit_lrr(batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG));
+      /* predicate &= !(conditional_rendering_predicate == 0); */
+      anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
+         mip.LoadOperation    = LOAD_LOADINV;
+         mip.CombineOperation = COMBINE_AND;
+         mip.CompareOperation = COMPARE_SRCS_EQUAL;
+      }
+   }
+#endif
+
+#else /* GEN_GEN > 7 */
+   if (cmd_buffer->state.conditional_render_enabled)
+      genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
 #endif
 
    anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
       ggw.IndirectParameterEnable      = true;
-      ggw.PredicateEnable              = GEN_GEN <= 7;
+      ggw.PredicateEnable              = GEN_GEN <= 7 ||
+                                         cmd_buffer->state.conditional_render_enabled;
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
@@ -3888,16 +4436,209 @@
    cmd_buffer_emit_depth_stencil(cmd_buffer);
 }
 
+static enum blorp_filter
+vk_to_blorp_resolve_mode(VkResolveModeFlagBitsKHR vk_mode)
+{
+   switch (vk_mode) {
+   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+      return BLORP_FILTER_SAMPLE_0;
+   case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+      return BLORP_FILTER_AVERAGE;
+   case VK_RESOLVE_MODE_MIN_BIT_KHR:
+      return BLORP_FILTER_MIN_SAMPLE;
+   case VK_RESOLVE_MODE_MAX_BIT_KHR:
+      return BLORP_FILTER_MAX_SAMPLE;
+   default:
+      return BLORP_FILTER_NONE;
+   }
+}
+
 static void
 cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_cmd_state *cmd_state = &cmd_buffer->state;
    struct anv_subpass *subpass = cmd_state->subpass;
    uint32_t subpass_id = anv_get_subpass_id(&cmd_buffer->state);
+   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 
-   anv_cmd_buffer_resolve_subpass(cmd_buffer);
+   if (subpass->has_color_resolve) {
+      /* We are about to do some MSAA resolves.  We need to flush so that the
+       * result of writes to the MSAA color attachments show up in the sampler
+       * when we blit to the single-sampled resolve target.
+       */
+      cmd_buffer->state.pending_pipe_bits |=
+         ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
+         ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+
+      for (uint32_t i = 0; i < subpass->color_count; ++i) {
+         uint32_t src_att = subpass->color_attachments[i].attachment;
+         uint32_t dst_att = subpass->resolve_attachments[i].attachment;
+
+         if (dst_att == VK_ATTACHMENT_UNUSED)
+            continue;
+
+         assert(src_att < cmd_buffer->state.pass->attachment_count);
+         assert(dst_att < cmd_buffer->state.pass->attachment_count);
+
+         if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
+            /* From the Vulkan 1.0 spec:
+             *
+             *    If the first use of an attachment in a render pass is as a
+             *    resolve attachment, then the loadOp is effectively ignored
+             *    as the resolve is guaranteed to overwrite all pixels in the
+             *    render area.
+             */
+            cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
+         }
+
+         struct anv_image_view *src_iview = fb->attachments[src_att];
+         struct anv_image_view *dst_iview = fb->attachments[dst_att];
+
+         const VkRect2D render_area = cmd_buffer->state.render_area;
+
+         enum isl_aux_usage src_aux_usage =
+            cmd_buffer->state.attachments[src_att].aux_usage;
+         enum isl_aux_usage dst_aux_usage =
+            cmd_buffer->state.attachments[dst_att].aux_usage;
+
+         assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT &&
+                dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT);
+
+         anv_image_msaa_resolve(cmd_buffer,
+                                src_iview->image, src_aux_usage,
+                                src_iview->planes[0].isl.base_level,
+                                src_iview->planes[0].isl.base_array_layer,
+                                dst_iview->image, dst_aux_usage,
+                                dst_iview->planes[0].isl.base_level,
+                                dst_iview->planes[0].isl.base_array_layer,
+                                VK_IMAGE_ASPECT_COLOR_BIT,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.extent.width,
+                                render_area.extent.height,
+                                fb->layers, BLORP_FILTER_NONE);
+      }
+   }
+
+   if (subpass->ds_resolve_attachment) {
+      /* We are about to do some MSAA resolves.  We need to flush so that the
+       * result of writes to the MSAA depth attachments show up in the sampler
+       * when we blit to the single-sampled resolve target.
+       */
+      cmd_buffer->state.pending_pipe_bits |=
+         ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
+         ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+
+      uint32_t src_att = subpass->depth_stencil_attachment->attachment;
+      uint32_t dst_att = subpass->ds_resolve_attachment->attachment;
+
+      assert(src_att < cmd_buffer->state.pass->attachment_count);
+      assert(dst_att < cmd_buffer->state.pass->attachment_count);
+
+      if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) {
+         /* From the Vulkan 1.0 spec:
+          *
+          *    If the first use of an attachment in a render pass is as a
+          *    resolve attachment, then the loadOp is effectively ignored
+          *    as the resolve is guaranteed to overwrite all pixels in the
+          *    render area.
+          */
+         cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0;
+      }
+
+      struct anv_image_view *src_iview = fb->attachments[src_att];
+      struct anv_image_view *dst_iview = fb->attachments[dst_att];
+
+      const VkRect2D render_area = cmd_buffer->state.render_area;
+
+      if ((src_iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+          subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+
+         struct anv_attachment_state *src_state =
+            &cmd_state->attachments[src_att];
+         struct anv_attachment_state *dst_state =
+            &cmd_state->attachments[dst_att];
+
+         /* MSAA resolves sample from the source attachment.  Transition the
+          * depth attachment first to get rid of any HiZ that we may not be
+          * able to handle.
+          */
+         transition_depth_buffer(cmd_buffer, src_iview->image,
+                                 src_state->current_layout,
+                                 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+         src_state->aux_usage =
+            anv_layout_to_aux_usage(&cmd_buffer->device->info, src_iview->image,
+                                    VK_IMAGE_ASPECT_DEPTH_BIT,
+                                    VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+         src_state->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+         /* MSAA resolves write to the resolve attachment as if it were any
+          * other transfer op.  Transition the resolve attachment accordingly.
+          */
+         VkImageLayout dst_initial_layout = dst_state->current_layout;
+
+         /* If our render area is the entire size of the image, we're going to
+          * blow it all away so we can claim the initial layout is UNDEFINED
+          * and we'll get a HiZ ambiguate instead of a resolve.
+          */
+         if (dst_iview->image->type != VK_IMAGE_TYPE_3D &&
+             render_area.offset.x == 0 && render_area.offset.y == 0 &&
+             render_area.extent.width == dst_iview->extent.width &&
+             render_area.extent.height == dst_iview->extent.height)
+            dst_initial_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+         transition_depth_buffer(cmd_buffer, dst_iview->image,
+                                 dst_initial_layout,
+                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+         dst_state->aux_usage =
+            anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_iview->image,
+                                    VK_IMAGE_ASPECT_DEPTH_BIT,
+                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+         dst_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+         enum blorp_filter filter =
+            vk_to_blorp_resolve_mode(subpass->depth_resolve_mode);
+
+         anv_image_msaa_resolve(cmd_buffer,
+                                src_iview->image, src_state->aux_usage,
+                                src_iview->planes[0].isl.base_level,
+                                src_iview->planes[0].isl.base_array_layer,
+                                dst_iview->image, dst_state->aux_usage,
+                                dst_iview->planes[0].isl.base_level,
+                                dst_iview->planes[0].isl.base_array_layer,
+                                VK_IMAGE_ASPECT_DEPTH_BIT,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.extent.width,
+                                render_area.extent.height,
+                                fb->layers, filter);
+      }
+
+      if ((src_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+          subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+
+         enum isl_aux_usage src_aux_usage = ISL_AUX_USAGE_NONE;
+         enum isl_aux_usage dst_aux_usage = ISL_AUX_USAGE_NONE;
+
+         enum blorp_filter filter =
+            vk_to_blorp_resolve_mode(subpass->stencil_resolve_mode);
+
+         anv_image_msaa_resolve(cmd_buffer,
+                                src_iview->image, src_aux_usage,
+                                src_iview->planes[0].isl.base_level,
+                                src_iview->planes[0].isl.base_array_layer,
+                                dst_iview->image, dst_aux_usage,
+                                dst_iview->planes[0].isl.base_level,
+                                dst_iview->planes[0].isl.base_array_layer,
+                                VK_IMAGE_ASPECT_STENCIL_BIT,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.offset.x, render_area.offset.y,
+                                render_area.extent.width,
+                                render_area.extent.height,
+                                fb->layers, filter);
+      }
+   }
 
-   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
    for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
       const uint32_t a = subpass->attachments[i].attachment;
       if (a == VK_ATTACHMENT_UNUSED)
@@ -4085,3 +4826,75 @@
 {
    genX(CmdEndRenderPass)(commandBuffer);
 }
+
+void
+genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
+{
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+   emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG));
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
+   emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOADINV;
+      mip.CombineOperation = COMBINE_SET;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+#endif
+}
+
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+void genX(CmdBeginConditionalRenderingEXT)(
+   VkCommandBuffer                             commandBuffer,
+   const VkConditionalRenderingBeginInfoEXT*   pConditionalRenderingBegin)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+   struct anv_address value_address =
+      anv_address_add(buffer->address, pConditionalRenderingBegin->offset);
+
+   const bool isInverted = pConditionalRenderingBegin->flags &
+                           VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
+
+   cmd_state->conditional_render_enabled = true;
+
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   /* Section 19.4 of the Vulkan 1.1.85 spec says:
+    *
+    *    If the value of the predicate in buffer memory changes
+    *    while conditional rendering is active, the rendering commands
+    *    may be discarded in an implementation-dependent way.
+    *    Some implementations may latch the value of the predicate
+    *    upon beginning conditional rendering while others
+    *    may read it before every rendering command.
+    *
+    * So it's perfectly fine to read a value from the buffer once.
+    */
+   emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0), value_address);
+   /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
+   emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0) + 4, 0);
+
+   /* Precompute predicate result, it is necessary to support secondary
+    * command buffers since it is unknown if conditional rendering is
+    * inverted when populating them.
+    */
+   uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
+   dw[1] = mi_alu(MI_ALU_LOAD0, MI_ALU_SRCA, 0);
+   dw[2] = mi_alu(MI_ALU_LOAD,  MI_ALU_SRCB, MI_ALU_REG0);
+   dw[3] = mi_alu(MI_ALU_SUB, 0, 0);
+   dw[4] = mi_alu(isInverted ? MI_ALU_STOREINV : MI_ALU_STORE,
+                  ANV_PREDICATE_RESULT_REG, MI_ALU_CF);
+}
+
+void genX(CmdEndConditionalRenderingEXT)(
+	VkCommandBuffer                             commandBuffer)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct anv_cmd_state *cmd_state = &cmd_buffer->state;
+
+   cmd_state->conditional_render_enabled = false;
+}
+#endif
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_gpu_memcpy.c mesa-19.0.1/src/intel/vulkan/genX_gpu_memcpy.c
--- mesa-18.3.3/src/intel/vulkan/genX_gpu_memcpy.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_gpu_memcpy.c	2019-03-31 23:16:37.000000000 +0000
@@ -133,9 +133,6 @@
    if (size == 0)
       return;
 
-   assert(dst.offset + size <= dst.bo->size);
-   assert(src.offset + size <= src.bo->size);
-
    /* The maximum copy block size is 4 32-bit components at a time. */
    assert(size % 4 == 0);
    unsigned bs = gcd_pow2_u64(16, size);
@@ -167,7 +164,7 @@
          .AddressModifyEnable = true,
          .BufferStartingAddress = src,
          .BufferPitch = bs,
-         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
+         .MOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
 #if (GEN_GEN >= 8)
          .BufferSize = size,
 #else
@@ -227,7 +224,7 @@
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
       sob.SOBufferIndex = 0;
-      sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
+      sob.MOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
       sob.SurfaceBaseAddress = dst;
 
 #if GEN_GEN >= 8
@@ -302,5 +299,4 @@
    }
 
    cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES;
 }
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_pipeline.c mesa-19.0.1/src/intel/vulkan/genX_pipeline.c
--- mesa-18.3.3/src/intel/vulkan/genX_pipeline.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_pipeline.c	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 
 #include "common/gen_l3_config.h"
 #include "common/gen_sample_positions.h"
+#include "nir/nir_xfb_info.h"
 #include "vk_util.h"
 #include "vk_format_info.h"
 
@@ -105,9 +106,7 @@
       __builtin_popcount(elements_double) / 2;
 
    const uint32_t total_elems =
-      elem_count + needs_svgs_elem + vs_prog_data->uses_drawid;
-   if (total_elems == 0)
-      return;
+      MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid);
 
    uint32_t *p;
 
@@ -465,6 +464,7 @@
    sf.TriangleStripListProvokingVertexSelect = 0;
    sf.LineStripListProvokingVertexSelect = 0;
    sf.TriangleFanProvokingVertexSelect = 1;
+   sf.VertexSubPixelPrecisionSelect = _8Bit;
 
    const struct brw_vue_prog_data *last_vue_prog_data =
       anv_pipeline_get_last_vue_prog_data(pipeline);
@@ -1055,7 +1055,6 @@
 #endif
 
    GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
-   anv_state_flush(device, pipeline->blend_state);
 
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
       bsp.BlendStatePointer      = pipeline->blend_state.offset;
@@ -1079,6 +1078,10 @@
       clip.APIMode                  = APIMODE_D3D,
       clip.ViewportXYClipTestEnable = true;
 
+#if GEN_GEN >= 8
+      clip.VertexSubPixelPrecisionSelect = _8Bit;
+#endif
+
       clip.ClipMode = CLIPMODE_NORMAL;
 
       clip.TriangleStripListProvokingVertexSelect = 0;
@@ -1116,10 +1119,8 @@
       clip.FrontWinding            = vk_to_gen_front_face[rs_info->frontFace];
       clip.CullMode                = vk_to_gen_cullmode[rs_info->cullMode];
       clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
-      if (last) {
-         clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
-         clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
-      }
+      clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
+      clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
 #else
       clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
          (wm_prog_data->barycentric_interp_modes &
@@ -1132,9 +1133,148 @@
 emit_3dstate_streamout(struct anv_pipeline *pipeline,
                        const VkPipelineRasterizationStateCreateInfo *rs_info)
 {
+#if GEN_GEN >= 8
+   const struct brw_vue_prog_data *prog_data =
+      anv_pipeline_get_last_vue_prog_data(pipeline);
+   const struct brw_vue_map *vue_map = &prog_data->vue_map;
+#endif
+
+   nir_xfb_info *xfb_info;
+   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
+      xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
+   else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
+      xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
+   else
+      xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
+
+   pipeline->xfb_used = xfb_info ? xfb_info->buffers_written : 0;
+
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) {
       so.RenderingDisable = rs_info->rasterizerDiscardEnable;
+
+#if GEN_GEN >= 8
+      if (xfb_info) {
+         so.SOFunctionEnable = true;
+         so.SOStatisticsEnable = true;
+
+         const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
+            vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
+         so.RenderStreamSelect = stream_info ?
+                                 stream_info->rasterizationStream : 0;
+
+         so.Buffer0SurfacePitch = xfb_info->strides[0];
+         so.Buffer1SurfacePitch = xfb_info->strides[1];
+         so.Buffer2SurfacePitch = xfb_info->strides[2];
+         so.Buffer3SurfacePitch = xfb_info->strides[3];
+
+         int urb_entry_read_offset = 0;
+         int urb_entry_read_length =
+            (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
+
+         /* We always read the whole vertex.  This could be reduced at some
+          * point by reading less and offsetting the register index in the
+          * SO_DECLs.
+          */
+         so.Stream0VertexReadOffset = urb_entry_read_offset;
+         so.Stream0VertexReadLength = urb_entry_read_length - 1;
+         so.Stream1VertexReadOffset = urb_entry_read_offset;
+         so.Stream1VertexReadLength = urb_entry_read_length - 1;
+         so.Stream2VertexReadOffset = urb_entry_read_offset;
+         so.Stream2VertexReadLength = urb_entry_read_length - 1;
+         so.Stream3VertexReadOffset = urb_entry_read_offset;
+         so.Stream3VertexReadLength = urb_entry_read_length - 1;
+      }
+#endif /* GEN_GEN >= 8 */
+   }
+
+#if GEN_GEN >= 8
+   if (xfb_info) {
+      struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
+      int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
+      int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
+
+      memset(so_decl, 0, sizeof(so_decl));
+
+      for (unsigned i = 0; i < xfb_info->output_count; i++) {
+         const nir_xfb_output_info *output = &xfb_info->outputs[i];
+         unsigned buffer = output->buffer;
+         unsigned stream = xfb_info->buffer_to_stream[buffer];
+
+         /* Our hardware is unusual in that it requires us to program SO_DECLs
+          * for fake "hole" components, rather than simply taking the offset
+          * for each real varying.  Each hole can have size 1, 2, 3, or 4; we
+          * program as many size = 4 holes as we can, then a final hole to
+          * accommodate the final 1, 2, or 3 remaining.
+          */
+         int hole_dwords = (output->offset - next_offset[buffer]) / 4;
+         while (hole_dwords > 0) {
+            so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
+               .HoleFlag = 1,
+               .OutputBufferSlot = buffer,
+               .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
+            };
+            hole_dwords -= 4;
+         }
+
+         int varying = output->location;
+         uint8_t component_mask = output->component_mask;
+         /* VARYING_SLOT_PSIZ contains three scalar fields packed together:
+          * - VARYING_SLOT_LAYER    in VARYING_SLOT_PSIZ.y
+          * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
+          * - VARYING_SLOT_PSIZ     in VARYING_SLOT_PSIZ.w
+          */
+         if (varying == VARYING_SLOT_LAYER) {
+            varying = VARYING_SLOT_PSIZ;
+            component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
+         } else if (varying == VARYING_SLOT_VIEWPORT) {
+            varying = VARYING_SLOT_PSIZ;
+            component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
+         } else if (varying == VARYING_SLOT_PSIZ) {
+            component_mask = 1 << 3; // SO_DECL_COMPMASK_W
+         }
+
+         next_offset[buffer] = output->offset +
+                               __builtin_popcount(component_mask) * 4;
+
+         so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
+            .OutputBufferSlot = buffer,
+            .RegisterIndex = vue_map->varying_to_slot[varying],
+            .ComponentMask = component_mask,
+         };
+      }
+
+      int max_decls = 0;
+      for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
+         max_decls = MAX2(max_decls, decls[s]);
+
+      uint8_t sbs[MAX_XFB_STREAMS] = { };
+      for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
+         if (xfb_info->buffers_written & (1 << b))
+            sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
+      }
+
+      uint32_t *dw = anv_batch_emitn(&pipeline->batch, 3 + 2 * max_decls,
+                                     GENX(3DSTATE_SO_DECL_LIST),
+                                     .StreamtoBufferSelects0 = sbs[0],
+                                     .StreamtoBufferSelects1 = sbs[1],
+                                     .StreamtoBufferSelects2 = sbs[2],
+                                     .StreamtoBufferSelects3 = sbs[3],
+                                     .NumEntries0 = decls[0],
+                                     .NumEntries1 = decls[1],
+                                     .NumEntries2 = decls[2],
+                                     .NumEntries3 = decls[3]);
+
+      for (int i = 0; i < max_decls; i++) {
+         GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
+            &(struct GENX(SO_DECL_ENTRY)) {
+               .Stream0Decl = so_decl[0][i],
+               .Stream1Decl = so_decl[1][i],
+               .Stream2Decl = so_decl[2][i],
+               .Stream3Decl = so_decl[3][i],
+            });
+      }
    }
+#endif /* GEN_GEN >= 8 */
 }
 
 static uint32_t
@@ -1198,7 +1338,12 @@
       vs.SingleVertexDispatch       = false;
 #endif
       vs.VectorMaskEnable           = false;
-      vs.SamplerCount               = get_sampler_count(vs_bin);
+      /* WA_1606682166:
+       * Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes.
+       * Disable the Sampler state prefetch functionality in the SARB by
+       * programming 0xB000[30] to '1'.
+       */
+      vs.SamplerCount               = GEN_GEN == 11 ? 0 : get_sampler_count(vs_bin);
      /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to
       * disable prefetching of binding tables on A0 and B0 steppings.
       * TODO: Revisit this WA on newer steppings.
@@ -1273,8 +1418,8 @@
       hs.Enable = true;
       hs.StatisticsEnable = true;
       hs.KernelStartPointer = tcs_bin->kernel.offset;
-
-      hs.SamplerCount = get_sampler_count(tcs_bin);
+      /* WA_1606682166 */
+      hs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tcs_bin);
       /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
       hs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tcs_bin);
       hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
@@ -1291,17 +1436,17 @@
          get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin);
    }
 
-   const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state =
-      tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR) : NULL;
+   const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
+      tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO) : NULL;
 
-   VkTessellationDomainOriginKHR uv_origin =
+   VkTessellationDomainOrigin uv_origin =
       domain_origin_state ? domain_origin_state->domainOrigin :
-                            VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR;
+                            VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
 
    anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) {
       te.Partitioning = tes_prog_data->partitioning;
 
-      if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR) {
+      if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
          te.OutputTopology = tes_prog_data->output_topology;
       } else {
          /* When the origin is upper-left, we have to flip the winding order */
@@ -1324,8 +1469,8 @@
       ds.Enable = true;
       ds.StatisticsEnable = true;
       ds.KernelStartPointer = tes_bin->kernel.offset;
-
-      ds.SamplerCount = get_sampler_count(tes_bin);
+      /* WA_1606682166 */
+      ds.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tes_bin);
       /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
       ds.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tes_bin);
       ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
@@ -1383,7 +1528,8 @@
 
       gs.SingleProgramFlow       = false;
       gs.VectorMaskEnable        = false;
-      gs.SamplerCount            = get_sampler_count(gs_bin);
+      /* WA_1606682166 */
+      gs.SamplerCount            = GEN_GEN == 11 ? 0 : get_sampler_count(gs_bin);
       /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
       gs.BindingTableEntryCount  = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(gs_bin);
       gs.IncludeVertexHandles    = gs_prog_data->base.include_vue_handles;
@@ -1616,7 +1762,8 @@
 
       ps.SingleProgramFlow          = false;
       ps.VectorMaskEnable           = true;
-      ps.SamplerCount               = get_sampler_count(fs_bin);
+      /* WA_1606682166 */
+      ps.SamplerCount               = GEN_GEN == 11 ? 0 : get_sampler_count(fs_bin);
       /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
       ps.BindingTableEntryCount     = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(fs_bin);
       ps.PushConstantEnable         = wm_prog_data->base.nr_params > 0 ||
@@ -1947,10 +2094,14 @@
 
    struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
       .KernelStartPointer     = cs_bin->kernel.offset,
-
-      .SamplerCount           = get_sampler_count(cs_bin),
-      /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */
-      .BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(cs_bin),
+      /* WA_1606682166 */
+      .SamplerCount           = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
+      /* Gen 11 workarounds table #2056 WABTPPrefetchDisable
+       *
+       * We add 1 because the CS indirect parameters buffer isn't accounted
+       * for in bind_map.surface_count.
+       */
+      .BindingTableEntryCount = GEN_GEN == 11 ? 0 : 1 + MIN2(cs_bin->bind_map.surface_count, 30),
       .BarrierEnable          = cs_prog_data->uses_barrier,
       .SharedLocalMemorySize  =
          encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared),
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_query.c mesa-19.0.1/src/intel/vulkan/genX_query.c
--- mesa-18.3.3/src/intel/vulkan/genX_query.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_query.c	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,12 @@
       /* Statistics queries have a min and max for every statistic */
       uint64s_per_slot += 2 * util_bitcount(pipeline_statistics);
       break;
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      /* Transform feedback queries are 4 values, begin/end for
+       * written/available.
+       */
+      uint64s_per_slot += 4;
+      break;
    default:
       assert(!"Invalid query type");
    }
@@ -220,7 +226,8 @@
 
    assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
           pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
-          pool->type == VK_QUERY_TYPE_TIMESTAMP);
+          pool->type == VK_QUERY_TYPE_TIMESTAMP ||
+          pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT);
 
    if (anv_device_is_lost(device))
       return VK_ERROR_DEVICE_LOST;
@@ -284,6 +291,15 @@
          break;
       }
 
+      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+         if (write_results)
+            cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]);
+         idx++;
+         if (write_results)
+            cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]);
+         idx++;
+         break;
+
       case VK_QUERY_TYPE_TIMESTAMP:
          if (write_results)
             cpu_write_query_result(pData, flags, idx, slot[1]);
@@ -411,12 +427,47 @@
    emit_srm64(&cmd_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]);
 }
 
+static void
+emit_xfb_query(struct anv_cmd_buffer *cmd_buffer, uint32_t stream,
+               struct anv_address addr)
+{
+   assert(stream < MAX_XFB_STREAMS);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 0 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 0);
+   }
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 4 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 4);
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 0 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 16);
+   }
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 4 + stream * 8;
+      lrm.MemoryAddress    = anv_address_add(addr, 20);
+   }
+}
+
 void genX(CmdBeginQuery)(
     VkCommandBuffer                             commandBuffer,
     VkQueryPool                                 queryPool,
     uint32_t                                    query,
     VkQueryControlFlags                         flags)
 {
+   genX(CmdBeginQueryIndexedEXT)(commandBuffer, queryPool, query, flags, 0);
+}
+
+void genX(CmdBeginQueryIndexedEXT)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query,
+    VkQueryControlFlags                         flags,
+    uint32_t                                    index)
+{
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
    struct anv_address query_addr = anv_query_address(pool, query);
@@ -444,6 +495,14 @@
       break;
    }
 
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard = true;
+      }
+      emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 8));
+      break;
+
    default:
       unreachable("");
    }
@@ -452,7 +511,16 @@
 void genX(CmdEndQuery)(
     VkCommandBuffer                             commandBuffer,
     VkQueryPool                                 queryPool,
-    uint32_t                                    query)
+    VkQueryControlFlags                         flags)
+{
+   genX(CmdEndQueryIndexedEXT)(commandBuffer, queryPool, flags, 0);
+}
+
+void genX(CmdEndQueryIndexedEXT)(
+    VkCommandBuffer                             commandBuffer,
+    VkQueryPool                                 queryPool,
+    uint32_t                                    query,
+    uint32_t                                    index)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
@@ -484,6 +552,16 @@
       break;
    }
 
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard = true;
+      }
+
+      emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16));
+      emit_query_availability(cmd_buffer, query_addr);
+      break;
+
    default:
       unreachable("");
    }
@@ -733,7 +811,7 @@
     * to ensure proper ordering of the commands from the 3d pipe and the
     * command streamer.
     */
-   if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_WRITES) {
+   if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) {
       cmd_buffer->state.pending_pipe_bits |=
          ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
    }
@@ -778,6 +856,17 @@
          break;
       }
 
+      case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+         compute_query_result(&cmd_buffer->batch, MI_ALU_REG2,
+                              anv_address_add(query_addr, 8));
+         gpu_write_query_result(&cmd_buffer->batch, dest_addr,
+                                flags, idx++, CS_GPR(2));
+         compute_query_result(&cmd_buffer->batch, MI_ALU_REG2,
+                              anv_address_add(query_addr, 24));
+         gpu_write_query_result(&cmd_buffer->batch, dest_addr,
+                                flags, idx++, CS_GPR(2));
+         break;
+
       case VK_QUERY_TYPE_TIMESTAMP:
          emit_load_alu_reg_u64(&cmd_buffer->batch,
                                CS_GPR(2), anv_address_add(query_addr, 8));
diff -Nru mesa-18.3.3/src/intel/vulkan/genX_state.c mesa-19.0.1/src/intel/vulkan/genX_state.c
--- mesa-18.3.3/src/intel/vulkan/genX_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/genX_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -91,11 +91,9 @@
 VkResult
 genX(init_device_state)(struct anv_device *device)
 {
-   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
-                                          &GENX(MOCS));
+   device->default_mocs = GENX(MOCS);
 #if GEN_GEN >= 8
-   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
-                                          &GENX(EXTERNAL_MOCS));
+   device->external_mocs = GENX(EXTERNAL_MOCS);
 #else
    device->external_mocs = device->default_mocs;
 #endif
@@ -334,7 +332,12 @@
          ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion,
                          pSamplerConversion->conversion);
 
-         if (conversion == NULL)
+         /* Ignore conversion for non-YUV formats. This fulfills a requirement
+          * for clients that want to utilize same code path for images with
+          * external formats (VK_FORMAT_UNDEFINED) and "regular" RGBA images
+          * where format is known.
+          */
+         if (conversion == NULL || !conversion->format->can_ycbcr)
             break;
 
          sampler->n_planes = conversion->format->n_planes;
diff -Nru mesa-18.3.3/src/intel/vulkan/meson.build mesa-19.0.1/src/intel/vulkan/meson.build
--- mesa-18.3.3/src/intel/vulkan/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -1,4 +1,4 @@
-# Copyright © 2017-2018 Intel Corporation
+# Copyright © 2017-2019 Intel Corporation
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -111,6 +111,8 @@
 
 libanv_files = files(
   'anv_allocator.c',
+  'anv_android_stubs.c',
+  'anv_android.h',
   'anv_batch_chain.c',
   'anv_blorp.c',
   'anv_cmd_buffer.c',
@@ -176,7 +178,10 @@
 
 libanv_common = static_library(
   'anv_common',
-  [libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h],
+  [
+    libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h,
+    gen_xml_pack,
+  ],
   include_directories : [
     inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util,
     inc_vulkan_wsi,
@@ -225,18 +230,21 @@
   )
 
   foreach t : ['block_pool_no_free', 'state_pool_no_free',
-               'state_pool_free_list_only', 'state_pool']
+               'state_pool_free_list_only', 'state_pool',
+               'state_pool_padding']
     test(
       'anv_@0@'.format(t),
       executable(
         t,
         ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_extensions_h],
+        c_args : [ c_sse2_args ],
         link_with : libvulkan_intel_test,
         dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind],
         include_directories : [
           inc_common, inc_intel, inc_compiler, inc_vulkan_util, inc_vulkan_wsi,
         ],
-      )
+      ),
+      suite : ['intel'],
     )
   endforeach
 endif
diff -Nru mesa-18.3.3/src/intel/vulkan/tests/block_pool_no_free.c mesa-19.0.1/src/intel/vulkan/tests/block_pool_no_free.c
--- mesa-18.3.3/src/intel/vulkan/tests/block_pool_no_free.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/tests/block_pool_no_free.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,8 +33,8 @@
    pthread_t thread;
    unsigned id;
    struct anv_block_pool *pool;
-   uint32_t blocks[BLOCKS_PER_THREAD];
-   uint32_t back_blocks[BLOCKS_PER_THREAD];
+   int32_t blocks[BLOCKS_PER_THREAD];
+   int32_t back_blocks[BLOCKS_PER_THREAD];
 } jobs[NUM_THREADS];
 
 
@@ -46,14 +46,14 @@
    int32_t block, *data;
 
    for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
-      block = anv_block_pool_alloc(job->pool, block_size);
-      data = job->pool->map + block;
+      block = anv_block_pool_alloc(job->pool, block_size, NULL);
+      data = anv_block_pool_map(job->pool, block);
       *data = block;
       assert(block >= 0);
       job->blocks[i] = block;
 
       block = anv_block_pool_alloc_back(job->pool, block_size);
-      data = job->pool->map + block;
+      data = anv_block_pool_map(job->pool, block);
       *data = block;
       assert(block < 0);
       job->back_blocks[i] = -block;
@@ -61,18 +61,18 @@
 
    for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
       block = job->blocks[i];
-      data = job->pool->map + block;
+      data = anv_block_pool_map(job->pool, block);
       assert(*data == block);
 
       block = -job->back_blocks[i];
-      data = job->pool->map + block;
+      data = anv_block_pool_map(job->pool, block);
       assert(*data == block);
    }
 
    return NULL;
 }
 
-static void validate_monotonic(uint32_t **blocks)
+static void validate_monotonic(int32_t **blocks)
 {
    /* A list of indices, one per thread */
    unsigned next[NUM_THREADS];
@@ -80,30 +80,30 @@
 
    int highest = -1;
    while (true) {
-      /* First, we find which thread has the highest next element */
-      int thread_max = -1;
-      int max_thread_idx = -1;
+      /* First, we find which thread has the lowest next element */
+      int32_t thread_min = INT32_MAX;
+      int min_thread_idx = -1;
       for (unsigned i = 0; i < NUM_THREADS; i++) {
          if (next[i] >= BLOCKS_PER_THREAD)
             continue;
 
-         if (thread_max < blocks[i][next[i]]) {
-            thread_max = blocks[i][next[i]];
-            max_thread_idx = i;
+         if (thread_min > blocks[i][next[i]]) {
+            thread_min = blocks[i][next[i]];
+            min_thread_idx = i;
          }
       }
 
       /* The only way this can happen is if all of the next[] values are at
        * BLOCKS_PER_THREAD, in which case, we're done.
        */
-      if (thread_max == -1)
+      if (thread_min == INT32_MAX)
          break;
 
       /* That next element had better be higher than the previous highest */
-      assert(blocks[max_thread_idx][next[max_thread_idx]] > highest);
+      assert(blocks[min_thread_idx][next[min_thread_idx]] > highest);
 
-      highest = blocks[max_thread_idx][next[max_thread_idx]];
-      next[max_thread_idx]++;
+      highest = blocks[min_thread_idx][next[min_thread_idx]];
+      next[min_thread_idx]++;
    }
 }
 
@@ -128,7 +128,7 @@
       pthread_join(jobs[i].thread, NULL);
 
    /* Validate that the block allocations were monotonic */
-   uint32_t *block_ptrs[NUM_THREADS];
+   int32_t *block_ptrs[NUM_THREADS];
    for (unsigned i = 0; i < NUM_THREADS; i++)
       block_ptrs[i] = jobs[i].blocks;
    validate_monotonic(block_ptrs);
diff -Nru mesa-18.3.3/src/intel/vulkan/tests/state_pool_padding.c mesa-19.0.1/src/intel/vulkan/tests/state_pool_padding.c
--- mesa-18.3.3/src/intel/vulkan/tests/state_pool_padding.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/tests/state_pool_padding.c	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,73 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "anv_private.h"
+
+int main(int argc, char **argv)
+{
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
+   struct anv_state_pool state_pool;
+
+   anv_state_pool_init(&state_pool, &device, 4096, 4096, EXEC_OBJECT_PINNED);
+
+   /* Get the size of the underlying block_pool */
+   struct anv_block_pool *bp = &state_pool.block_pool;
+   uint64_t pool_size = bp->size;
+
+   /* Grab one so the pool has some initial usage */
+   anv_state_pool_alloc(&state_pool, 16, 16);
+
+   /* Grab a state that is the size of the initial allocation */
+   struct anv_state state = anv_state_pool_alloc(&state_pool, pool_size, 16);
+
+   /* The pool must have grown */
+   assert(bp->size > pool_size);
+
+   /* And the state must have been allocated at the end of the original size  */
+   assert(state.offset == pool_size);
+
+   /* A new allocation that fits into the returned empty space should have an
+    * offset within the original pool size
+    */
+   state = anv_state_pool_alloc(&state_pool, 4096, 16);
+   assert(state.offset + state.alloc_size <= pool_size);
+
+   /* We should be able to allocate pool->block_size'd chunks in the returned area
+    */
+   int left_chunks = pool_size / 4096 - 2;
+   for (int i = 0; i < left_chunks; i++) {
+      state = anv_state_pool_alloc(&state_pool, 4096, 16);
+      assert(state.offset + state.alloc_size <= pool_size);
+   }
+
+   /* Now the next chunk to be allocated should make the pool grow again */
+   pool_size = bp->size;
+   state = anv_state_pool_alloc(&state_pool, 4096, 16);
+   assert(bp->size > pool_size);
+   assert(state.offset == pool_size);
+
+   anv_state_pool_finish(&state_pool);
+}
diff -Nru mesa-18.3.3/src/intel/vulkan/vk_format_info.h mesa-19.0.1/src/intel/vulkan/vk_format_info.h
--- mesa-18.3.3/src/intel/vulkan/vk_format_info.h	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/intel/vulkan/vk_format_info.h	2019-03-31 23:16:37.000000000 +0000
@@ -27,6 +27,56 @@
 #include <stdbool.h>
 #include <vulkan/vulkan.h>
 
+#ifdef ANDROID
+#include <vndk/hardware_buffer.h>
+/* See i915_private_android_types.h in minigbm. */
+#define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100
+
+static inline VkFormat
+vk_format_from_android(unsigned android_format)
+{
+   switch (android_format) {
+   case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
+      return VK_FORMAT_R8G8B8A8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
+   case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
+      return VK_FORMAT_R8G8B8_UNORM;
+   case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
+      return VK_FORMAT_R5G6B5_UNORM_PACK16;
+   case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
+      return VK_FORMAT_R16G16B16A16_SFLOAT;
+   case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
+      return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+   case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL:
+      return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+   case AHARDWAREBUFFER_FORMAT_BLOB:
+   default:
+      return VK_FORMAT_UNDEFINED;
+   }
+}
+
+static inline unsigned
+android_format_from_vk(unsigned vk_format)
+{
+   switch (vk_format) {
+   case VK_FORMAT_R8G8B8A8_UNORM:
+      return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
+   case VK_FORMAT_R8G8B8_UNORM:
+      return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM;
+   case VK_FORMAT_R5G6B5_UNORM_PACK16:
+      return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM;
+   case VK_FORMAT_R16G16B16A16_SFLOAT:
+      return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT;
+   case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+      return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM;
+   case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+      return HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL;
+   default:
+      return AHARDWAREBUFFER_FORMAT_BLOB;
+   }
+}
+#endif
+
 static inline VkImageAspectFlags
 vk_format_aspects(VkFormat format)
 {
diff -Nru mesa-18.3.3/src/loader/loader.c mesa-19.0.1/src/loader/loader.c
--- mesa-18.3.3/src/loader/loader.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/loader/loader.c	2019-03-31 23:16:37.000000000 +0000
@@ -26,6 +26,7 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
+#include <dlfcn.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/stat.h>
@@ -35,12 +36,15 @@
 #include <string.h>
 #include <unistd.h>
 #include <stdlib.h>
+#include <sys/param.h>
 #ifdef MAJOR_IN_MKDEV
 #include <sys/mkdev.h>
 #endif
 #ifdef MAJOR_IN_SYSMACROS
 #include <sys/sysmacros.h>
 #endif
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
 #include "loader.h"
 
 #ifdef HAVE_LIBDRM
@@ -64,7 +68,7 @@
    }
 }
 
-static void (*log_)(int level, const char *fmt, ...) = default_logger;
+static loader_logger *log_ = default_logger;
 
 int
 loader_open_device(const char *device_name)
@@ -487,19 +491,11 @@
 }
 
 void
-loader_set_logger(void (*logger)(int level, const char *fmt, ...))
+loader_set_logger(loader_logger *logger)
 {
    log_ = logger;
 }
 
-/* XXX: Local definition to avoid pulling the heavyweight GL/gl.h and
- * GL/internal/dri_interface.h
- */
-
-#ifndef __DRI_DRIVER_GET_EXTENSIONS
-#define __DRI_DRIVER_GET_EXTENSIONS "__driDriverGetExtensions"
-#endif
-
 char *
 loader_get_extensions_name(const char *driver_name)
 {
@@ -516,3 +512,91 @@
 
    return name;
 }
+
+/**
+ * Opens a DRI driver using its driver name, returning the __DRIextension
+ * entrypoints.
+ *
+ * \param driverName - a name like "i965", "radeon", "nouveau", etc.
+ * \param out_driver - Address where the dlopen() return value will be stored.
+ * \param search_path_vars - NULL-terminated list of env vars that can be used
+ * to override the DEFAULT_DRIVER_DIR search path.
+ */
+const struct __DRIextensionRec **
+loader_open_driver(const char *driver_name,
+                   void **out_driver_handle,
+                   const char **search_path_vars)
+{
+   char path[PATH_MAX], *search_paths, *next, *end;
+   char *get_extensions_name;
+   const struct __DRIextensionRec **extensions = NULL;
+   const struct __DRIextensionRec **(*get_extensions)(void);
+
+   search_paths = NULL;
+   if (geteuid() == getuid() && search_path_vars) {
+      for (int i = 0; search_path_vars[i] != NULL; i++) {
+         search_paths = getenv(search_path_vars[i]);
+         if (search_paths)
+            break;
+      }
+   }
+   if (search_paths == NULL)
+      search_paths = DEFAULT_DRIVER_DIR;
+
+   void *driver = NULL;
+   end = search_paths + strlen(search_paths);
+   for (char *p = search_paths; p < end; p = next + 1) {
+      int len;
+      next = strchr(p, ':');
+      if (next == NULL)
+         next = end;
+
+      len = next - p;
+#if GLX_USE_TLS
+      snprintf(path, sizeof(path), "%.*s/tls/%s_dri.so", len, p, driver_name);
+      driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
+#endif
+      if (driver == NULL) {
+         snprintf(path, sizeof(path), "%.*s/%s_dri.so", len, p, driver_name);
+         driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
+         if (driver == NULL)
+            log_(_LOADER_DEBUG, "MESA-LOADER: failed to open %s: %s\n",
+                 path, dlerror());
+      }
+      /* not need continue to loop all paths once the driver is found */
+      if (driver != NULL)
+         break;
+   }
+
+   if (driver == NULL) {
+      log_(_LOADER_WARNING, "MESA-LOADER: failed to open %s (search paths %s)\n",
+           driver_name, search_paths);
+      *out_driver_handle = NULL;
+      return NULL;
+   }
+
+   log_(_LOADER_DEBUG, "MESA-LOADER: dlopen(%s)\n", path);
+
+   get_extensions_name = loader_get_extensions_name(driver_name);
+   if (get_extensions_name) {
+      get_extensions = dlsym(driver, get_extensions_name);
+      if (get_extensions) {
+         extensions = get_extensions();
+      } else {
+         log_(_LOADER_DEBUG, "MESA-LOADER: driver does not expose %s(): %s\n",
+              get_extensions_name, dlerror());
+      }
+      free(get_extensions_name);
+   }
+
+   if (!extensions)
+      extensions = dlsym(driver, __DRI_DRIVER_EXTENSIONS);
+   if (extensions == NULL) {
+      log_(_LOADER_WARNING,
+           "MESA-LOADER: driver exports no extensions (%s)\n", dlerror());
+      dlclose(driver);
+   }
+
+   *out_driver_handle = driver;
+   return extensions;
+}
diff -Nru mesa-18.3.3/src/loader/loader_dri3_helper.c mesa-19.0.1/src/loader/loader_dri3_helper.c
--- mesa-18.3.3/src/loader/loader_dri3_helper.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/loader/loader_dri3_helper.c	2019-03-31 23:16:37.000000000 +0000
@@ -101,6 +101,32 @@
    return NULL;
 }
 
+/* Sets the adaptive sync window property state. */
+static void
+set_adaptive_sync_property(xcb_connection_t *conn, xcb_drawable_t drawable,
+                           uint32_t state)
+{
+   static char const name[] = "_VARIABLE_REFRESH";
+   xcb_intern_atom_cookie_t cookie;
+   xcb_intern_atom_reply_t* reply;
+   xcb_void_cookie_t check;
+
+   cookie = xcb_intern_atom(conn, 0, strlen(name), name);
+   reply = xcb_intern_atom_reply(conn, cookie, NULL);
+   if (reply == NULL)
+      return;
+
+   if (state)
+      check = xcb_change_property_checked(conn, XCB_PROP_MODE_REPLACE,
+                                          drawable, reply->atom,
+                                          XCB_ATOM_CARDINAL, 32, 1, &state);
+   else
+      check = xcb_delete_property_checked(conn, drawable, reply->atom);
+
+   xcb_discard_reply(conn, check.sequence);
+   free(reply);
+}
+
 /* Get red channel mask for given drawable at given depth. */
 static unsigned int
 dri3_get_red_mask_for_depth(struct loader_dri3_drawable *draw, int depth)
@@ -331,16 +357,30 @@
    draw->have_back = 0;
    draw->have_fake_front = 0;
    draw->first_init = true;
+   draw->adaptive_sync = false;
+   draw->adaptive_sync_active = false;
 
    draw->cur_blit_source = -1;
    draw->back_format = __DRI_IMAGE_FORMAT_NONE;
    mtx_init(&draw->mtx, mtx_plain);
    cnd_init(&draw->event_cnd);
 
-   if (draw->ext->config)
+   if (draw->ext->config) {
+      unsigned char adaptive_sync = 0;
+
       draw->ext->config->configQueryi(draw->dri_screen,
                                       "vblank_mode", &vblank_mode);
 
+      draw->ext->config->configQueryb(draw->dri_screen,
+                                      "adaptive_sync",
+                                      &adaptive_sync);
+
+      draw->adaptive_sync = adaptive_sync;
+   }
+
+   if (!draw->adaptive_sync)
+      set_adaptive_sync_property(conn, draw->drawable, false);
+
    switch (vblank_mode) {
    case DRI_CONF_VBLANK_NEVER:
    case DRI_CONF_VBLANK_DEF_INTERVAL_0:
@@ -879,6 +919,12 @@
    back = dri3_find_back_alloc(draw);
 
    mtx_lock(&draw->mtx);
+
+   if (draw->adaptive_sync && !draw->adaptive_sync_active) {
+      set_adaptive_sync_property(draw->conn, draw->drawable, true);
+      draw->adaptive_sync_active = true;
+   }
+
    if (draw->is_different_gpu && back) {
       /* Update the linear buffer before presenting the pixmap */
       (void) loader_dri3_blit_image(draw,
diff -Nru mesa-18.3.3/src/loader/loader_dri3_helper.h mesa-19.0.1/src/loader/loader_dri3_helper.h
--- mesa-18.3.3/src/loader/loader_dri3_helper.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/loader/loader_dri3_helper.h	2019-03-31 23:16:37.000000000 +0000
@@ -156,6 +156,8 @@
    xcb_special_event_t *special_event;
 
    bool first_init;
+   bool adaptive_sync;
+   bool adaptive_sync_active;
    int swap_interval;
 
    struct loader_dri3_extensions *ext;
diff -Nru mesa-18.3.3/src/loader/loader.h mesa-19.0.1/src/loader/loader.h
--- mesa-18.3.3/src/loader/loader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/loader/loader.h	2019-03-31 23:16:37.000000000 +0000
@@ -33,6 +33,8 @@
 extern "C" {
 #endif
 
+struct __DRIextensionRec;
+
 /* Helpers to figure out driver and device name, eg. from pci-id, etc. */
 
 int
@@ -47,6 +49,11 @@
 char *
 loader_get_driver_for_fd(int fd);
 
+const struct __DRIextensionRec **
+loader_open_driver(const char *driver_name,
+                   void **out_driver_handle,
+                   const char **search_path_vars);
+
 char *
 loader_get_device_name_for_fd(int fd);
 
@@ -67,8 +74,9 @@
 #define _LOADER_INFO    2   /* just useful info */
 #define _LOADER_DEBUG   3   /* useful info for debugging */
 
+typedef void loader_logger(int level, const char *fmt, ...);
 void
-loader_set_logger(void (*logger)(int level, const char *fmt, ...));
+loader_set_logger(loader_logger *logger);
 
 char *
 loader_get_extensions_name(const char *driver_name);
diff -Nru mesa-18.3.3/src/loader/Makefile.am mesa-19.0.1/src/loader/Makefile.am
--- mesa-18.3.3/src/loader/Makefile.am	2018-03-17 22:00:11.000000000 +0000
+++ mesa-19.0.1/src/loader/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -28,6 +28,7 @@
 AM_CPPFLAGS = \
 	-I$(top_builddir)/src/util/ \
 	-DUSE_DRICONF \
+	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
 	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/include/drm-uapi \
diff -Nru mesa-18.3.3/src/loader/meson.build mesa-19.0.1/src/loader/meson.build
--- mesa-18.3.3/src/loader/meson.build	2018-03-13 20:41:43.000000000 +0000
+++ mesa-19.0.1/src/loader/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -39,7 +39,9 @@
   'loader',
   ['loader.c', 'loader.h', 'pci_id_driver_map.c', 'pci_id_driver_map.h',
    xmlpool_options_h],
-  c_args : [c_vis_args, '-DUSE_DRICONF'],
+  c_args : [c_vis_args, '-DUSE_DRICONF',
+            '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
+],
   include_directories : [inc_include, inc_src, inc_util],
   dependencies : [dep_libdrm, dep_thread],
   build_by_default : false,
diff -Nru mesa-18.3.3/src/loader/SConscript mesa-19.0.1/src/loader/SConscript
--- mesa-18.3.3/src/loader/SConscript	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/loader/SConscript	2019-03-31 23:16:37.000000000 +0000
@@ -12,6 +12,8 @@
     env.PkgUseModules('DRM')
     env.Append(CPPDEFINES = ['HAVE_LIBDRM'])
 
+env.Append(CPPDEFINES = ['DEFAULT_DRIVER_DIR=\\"/usr/local/lib/dri\\"'])
+
 # parse Makefile.sources
 sources = env.ParseSourceList('Makefile.sources', 'LOADER_C_FILES')
 
diff -Nru mesa-18.3.3/src/Makefile.am mesa-19.0.1/src/Makefile.am
--- mesa-18.3.3/src/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -81,6 +81,10 @@
 SUBDIRS += broadcom
 endif
 
+if HAVE_FREEDRENO_DRIVERS
+SUBDIRS += freedreno
+endif
+
 if NEED_OPENGL_COMMON
 SUBDIRS += mesa
 endif
diff -Nru mesa-18.3.3/src/mapi/es1api/ABI-check mesa-19.0.1/src/mapi/es1api/ABI-check
--- mesa-18.3.3/src/mapi/es1api/ABI-check	2018-02-27 16:44:19.000000000 +0000
+++ mesa-19.0.1/src/mapi/es1api/ABI-check	2019-03-31 23:16:37.000000000 +0000
@@ -1,11 +1,7 @@
 #!/bin/sh
 set -eu
 
-# Print defined gl.* functions not in GL ES 1.1 or in
-# (FIXME, none of these should be part of the ABI)
-# GL_EXT_multi_draw_arrays
-# GL_OES_EGL_image
-
+# Print defined gl.* functions not in GL ES 1.1
 # or in extensions that are part of the ES 1.1 extension pack.
 # (see http://www.khronos.org/registry/gles/specs/1.1/opengles_spec_1_1_extension_pack.pdf)
 
@@ -65,8 +61,6 @@
 glDisableClientState
 glDrawArrays
 glDrawElements
-glEGLImageTargetRenderbufferStorageOES
-glEGLImageTargetTexture2DOES
 glEnable
 glEnableClientState
 glFinish
@@ -123,8 +117,6 @@
 glMaterialx
 glMaterialxv
 glMatrixMode
-glMultiDrawArraysEXT
-glMultiDrawElementsEXT
 glMultiTexCoord4f
 glMultiTexCoord4x
 glMultMatrixf
diff -Nru mesa-18.3.3/src/mapi/es1api/meson.build mesa-19.0.1/src/mapi/es1api/meson.build
--- mesa-18.3.3/src/mapi/es1api/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/es1api/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -20,10 +20,10 @@
 
 es1_glapi_mapi_tmp_h = custom_target(
   'es1_glapi_mapi_tmp.h',
-  input : [mapi_abi_py, gl_and_es_api_files],
+  input : [glapi_gen_mapi_py, glapi_gen_gl_xml],
   output : 'glapi_mapi_tmp.h',
-  command : [prog_python, '@INPUT0@', '--printer', 'es1api', '@INPUT1@'],
-  depend_files : api_xml_files,
+  command : [prog_python, '@INPUT0@', 'glesv1', '@INPUT1@'],
+  depend_files : glapi_gen_mapi_deps,
   capture : true,
 )
 
@@ -56,6 +56,7 @@
     'es1-ABI-check',
     find_program('ABI-check'),
     env : env_test,
-    args : libglesv1_cm
+    args : libglesv1_cm,
+    suite : ['mapi'],
   )
 endif
diff -Nru mesa-18.3.3/src/mapi/es2api/ABI-check mesa-19.0.1/src/mapi/es2api/ABI-check
--- mesa-18.3.3/src/mapi/es2api/ABI-check	2018-02-27 16:44:19.000000000 +0000
+++ mesa-19.0.1/src/mapi/es2api/ABI-check	2019-03-31 23:16:37.000000000 +0000
@@ -1,10 +1,7 @@
 #!/bin/sh
 set -eu
 
-# Print defined gl.* functions not in GL ES 3.0 or in
-# (FIXME, none of these should be part of the ABI)
-# GL_EXT_multi_draw_arrays
-# GL_OES_EGL_image
+# Print defined gl.* functions not in GL ES 3.{0..2}
 
 case "$(uname)" in
 Darwin)
@@ -34,7 +31,6 @@
 glBindBuffer
 glBindBufferBase
 glBindBufferRange
-glBindFragDataLocationEXT
 glBindFramebuffer
 glBindImageTexture
 glBindProgramPipeline
@@ -118,8 +114,6 @@
 glDrawElementsInstancedBaseVertex
 glDrawRangeElements
 glDrawRangeElementsBaseVertex
-glEGLImageTargetRenderbufferStorageOES
-glEGLImageTargetTexture2DOES
 glEnable
 glEnableVertexAttribArray
 glEnablei
@@ -200,9 +194,7 @@
 glGetTexLevelParameterfv
 glGetTexLevelParameteriv
 glGetTexParameterIiv
-glGetTexParameterIivEXT
 glGetTexParameterIuiv
-glGetTexParameterIuivEXT
 glGetTexParameterfv
 glGetTexParameteriv
 glGetTransformFeedbackVarying
@@ -243,8 +235,6 @@
 glMemoryBarrier
 glMemoryBarrierByRegion
 glMinSampleShading
-glMultiDrawArraysEXT
-glMultiDrawElementsEXT
 glObjectLabel
 glObjectPtrLabel
 glPatchParameteri
@@ -318,9 +308,7 @@
 glTexImage2D
 glTexImage3D
 glTexParameterIiv
-glTexParameterIivEXT
 glTexParameterIuiv
-glTexParameterIuivEXT
 glTexParameterf
 glTexParameterfv
 glTexParameteri
diff -Nru mesa-18.3.3/src/mapi/es2api/meson.build mesa-19.0.1/src/mapi/es2api/meson.build
--- mesa-18.3.3/src/mapi/es2api/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/es2api/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -20,10 +20,10 @@
 
 es2_glapi_mapi_tmp_h = custom_target(
   'es2_glapi_mapi_tmp.h',
-  input : [mapi_abi_py, gl_and_es_api_files],
+  input : [glapi_gen_mapi_py, glapi_gen_gl_xml],
   output : 'glapi_mapi_tmp.h',
-  command : [prog_python, '@INPUT0@', '--printer', 'es2api', '@INPUT1@'],
-  depend_files : api_xml_files,
+  command : [prog_python, '@INPUT0@', 'glesv2', '@INPUT1@'],
+  depend_files : glapi_gen_mapi_deps,
   capture : true,
 )
 
@@ -56,6 +56,7 @@
     'es2-ABI-check',
     find_program('ABI-check'),
     env : env_test,
-    args : libgles2
+    args : libgles2,
+    suite : ['mapi'],
   )
 endif
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/ARB_framebuffer_object.xml mesa-19.0.1/src/mapi/glapi/gen/ARB_framebuffer_object.xml
--- mesa-18.3.3/src/mapi/glapi/gen/ARB_framebuffer_object.xml	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/ARB_framebuffer_object.xml	2019-03-31 23:16:37.000000000 +0000
@@ -172,7 +172,15 @@
 	<glx rop="4318"/>
     </function>
 
-    <function name="RenderbufferStorageMultisample" es2="3.0">
+<!--
+    The EXT aliasee, as part of EXT_multisampled_render_to_texture can
+    work on GLES 2.0. While the entry point below is only set when
+    gl || (gles && version > 3.0)
+
+    As such, the entrypoint will be noop, and calling it will do nothing.
+    Workaround that by loosening the version to 2.0.
+ -->
+    <function name="RenderbufferStorageMultisample" es2="2.0">
         <param name="target" type="GLenum"/>
         <param name="samples" type="GLsizei"/>
         <param name="internalformat" type="GLenum"/>
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/es_EXT.xml mesa-19.0.1/src/mapi/glapi/gen/es_EXT.xml
--- mesa-18.3.3/src/mapi/glapi/gen/es_EXT.xml	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/es_EXT.xml	2019-03-31 23:16:37.000000000 +0000
@@ -810,6 +810,8 @@
     <enum name="RG8_EXT"                                  value="0x822B"/>
 </category>
 
+<xi:include href="EXT_multisampled_render_to_texture.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
 <!-- 111. GL_ANGLE_texture_compression_dxt -->
 <category name="GL_ANGLE_texture_compression_dxt" number="111">
     <enum name="COMPRESSED_RGBA_S3TC_DXT3_ANGLE"        value="0x83F2"/>
@@ -1452,6 +1454,19 @@
         <param name="texture" type="GLuint"/>
         <param name="target" type="GLenum"/>
         <param name="origtexture" type="GLuint"/>
+        <param name="internalformat" type="GLenum"/>
+        <param name="minlevel" type="GLuint"/>
+        <param name="numlevels" type="GLuint"/>
+        <param name="minlayer" type="GLuint"/>
+        <param name="numlayers" type="GLuint"/>
+   </function>
+</category>
+
+<category name="GL_EXT_texture_view" number="185">
+    <function name="TextureViewEXT" es2="3.1" alias="TextureView">
+        <param name="texture" type="GLuint"/>
+        <param name="target" type="GLenum"/>
+        <param name="origtexture" type="GLuint"/>
         <param name="internalformat" type="GLenum"/>
         <param name="minlevel" type="GLuint"/>
         <param name="numlevels" type="GLuint"/>
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml mesa-19.0.1/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml
--- mesa-18.3.3/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_EXT_multisampled_render_to_texture" number="106">
+
+    <enum name="RENDERBUFFER_SAMPLES_EXT"                   value="0x8CAB"/>
+    <enum name="FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT"     value="0x8D56"/>
+    <enum name="MAX_SAMPLES_EXT"                            value="0x8D57"/>
+    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT" value="0x8D6C"/>
+
+<!-- Already defined in EXT_framebuffer_object.xml
+    <function name="RenderbufferStorageMultisampleEXT" es2="2.0">
+        <param name="target" type="GLenum"/>
+	<param name="samples" type="GLsizei"/>
+        <param name="internalformat" type="GLenum"/>
+        <param name="width" type="GLsizei"/>
+	<param name="height" type="GLsizei"/>
+    </function>
+-->
+
+    <function name="FramebufferTexture2DMultisampleEXT" es2="2.0" desktop="false">
+        <param name="target" type="GLenum"/>
+        <param name="attachment" type="GLenum"/>
+        <param name="textarget" type="GLenum"/>
+        <param name="texture" type="GLuint"/>
+        <param name="level" type="GLint"/>
+        <param name="samples" type="GLsizei"/>
+    </function>
+
+</category>
+
+</OpenGLAPI>
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_API.xml mesa-19.0.1/src/mapi/glapi/gen/gl_API.xml
--- mesa-18.3.3/src/mapi/glapi/gen/gl_API.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/gl_API.xml	2019-03-31 23:16:37.000000000 +0000
@@ -1148,7 +1148,7 @@
         <glx rop="3"/>
     </function>
 
-    <function name="Begin" deprecated="3.1" exec="dynamic" marshal_fail="true">
+    <function name="Begin" deprecated="3.1" exec="dynamic">
         <param name="mode" type="GLenum"/>
         <glx rop="4"/>
     </function>
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_marshal.py mesa-19.0.1/src/mapi/glapi/gen/gl_marshal.py
--- mesa-18.3.3/src/mapi/glapi/gen/gl_marshal.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/gl_marshal.py	2019-03-31 23:16:37.000000000 +0000
@@ -249,7 +249,7 @@
                 out('if ({0}) {{'.format(func.marshal_fail))
                 with indent():
                     out('_mesa_glthread_finish(ctx);')
-                    out('_mesa_glthread_restore_dispatch(ctx);')
+                    out('_mesa_glthread_restore_dispatch(ctx, __func__);')
                     self.print_sync_dispatch(func)
                     out('return;')
                 out('}')
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_XML.py mesa-19.0.1/src/mapi/glapi/gen/gl_XML.py
--- mesa-18.3.3/src/mapi/glapi/gen/gl_XML.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/gl_XML.py	2019-03-31 23:16:37.000000000 +0000
@@ -611,21 +611,11 @@
         self.deprecated = None
         self.has_no_error_variant = False
 
-        # self.entry_point_api_map[name][api] is a decimal value
-        # indicating the earliest version of the given API in which
-        # each entry point exists.  Every entry point is included in
-        # the first level of the map; the second level of the map only
-        # lists APIs which contain the entry point in at least one
-        # version.  For example,
-        # self.entry_point_api_map['ClipPlanex'] == { 'es1':
-        # Decimal('1.1') }.
-        self.entry_point_api_map = {}
-
         # self.api_map[api] is a decimal value indicating the earliest
         # version of the given API in which ANY alias for the function
         # exists.  The map only lists APIs which contain the function
         # in at least one version.  For example, for the ClipPlanex
-        # function, self.entry_point_api_map == { 'es1':
+        # function, self.api_map == { 'es1':
         # Decimal('1.1') }.
         self.api_map = {}
 
@@ -658,13 +648,11 @@
 
         self.entry_points.append( name )
 
-        self.entry_point_api_map[name] = {}
         for api in ('es1', 'es2'):
             version_str = element.get(api, 'none')
             assert version_str is not None
             if version_str != 'none':
                 version_decimal = Decimal(version_str)
-                self.entry_point_api_map[name][api] = version_decimal
                 if api not in self.api_map or \
                         version_decimal < self.api_map[api]:
                     self.api_map[api] = version_decimal
@@ -693,7 +681,7 @@
             # Only try to set the offset when a non-alias entry-point
             # is being processed.
 
-            if name in static_data.offsets:
+            if name in static_data.offsets and static_data.offsets[name] <= static_data.MAX_OFFSETS:
                 self.offset = static_data.offsets[name]
             else:
                 self.offset = -1
@@ -826,23 +814,6 @@
         else:
             return "_dispatch_stub_%u" % (self.offset)
 
-    def entry_points_for_api_version(self, api, version = None):
-        """Return a list of the entry point names for this function
-        which are supported in the given API (and optionally, version).
-
-        Use the decimal.Decimal type to precisely express non-integer
-        versions.
-        """
-        result = []
-        for entry_point, api_to_ver in self.entry_point_api_map.items():
-            if api not in api_to_ver:
-                continue
-            if version is not None and version < api_to_ver[api]:
-                continue
-            result.append(entry_point)
-        return result
-
-
 class gl_item_factory(object):
     """Factory to create objects derived from gl_item."""
 
@@ -878,31 +849,6 @@
         typeexpr.create_initial_types()
         return
 
-    def filter_functions(self, entry_point_list):
-        """Filter out entry points not in entry_point_list."""
-        functions_by_name = {}
-        for func in self.functions_by_name.values():
-            entry_points = [ent for ent in func.entry_points if ent in entry_point_list]
-            if entry_points:
-                func.filter_entry_points(entry_points)
-                functions_by_name[func.name] = func
-
-        self.functions_by_name = functions_by_name
-
-    def filter_functions_by_api(self, api, version = None):
-        """Filter out entry points not in the given API (or
-        optionally, not in the given version of the given API).
-        """
-        functions_by_name = {}
-        for func in self.functions_by_name.values():
-            entry_points = func.entry_points_for_api_version(api, version)
-            if entry_points:
-                func.filter_entry_points(entry_points)
-                functions_by_name[func.name] = func
-
-        self.functions_by_name = functions_by_name
-
-
     def parse_file(self, file_name):
         doc = ET.parse( file_name )
         self.process_element(file_name, doc)
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/Makefile.am mesa-19.0.1/src/mapi/glapi/gen/Makefile.am
--- mesa-18.3.3/src/mapi/glapi/gen/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -200,6 +200,7 @@
 	EXT_external_objects_fd.xml \
 	EXT_framebuffer_object.xml \
 	EXT_gpu_shader4.xml \
+	EXT_multisampled_render_to_texture.xml \
 	EXT_packed_depth_stencil.xml \
 	EXT_provoking_vertex.xml \
 	EXT_separate_shader_objects.xml \
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/meson.build mesa-19.0.1/src/mapi/glapi/gen/meson.build
--- mesa-18.3.3/src/mapi/glapi/gen/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -18,6 +18,13 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+glapi_gen_gl_xml = files('../registry/gl.xml')
+glapi_gen_mapi_deps = [
+  glapi_gen_gl_xml,
+  genCommon_py,
+  glapi_gen_gl_xml,
+]
+
 gl_and_es_api_files = files('gl_and_es_API.xml')
 
 api_xml_files = files(
@@ -107,6 +114,7 @@
   'EXT_external_objects_fd.xml',
   'EXT_framebuffer_object.xml',
   'EXT_gpu_shader4.xml',
+  'EXT_multisampled_render_to_texture.xml',
   'EXT_packed_depth_stencil.xml',
   'EXT_provoking_vertex.xml',
   'EXT_separate_shader_objects.xml',
diff -Nru mesa-18.3.3/src/mapi/glapi/gen/static_data.py mesa-19.0.1/src/mapi/glapi/gen/static_data.py
--- mesa-18.3.3/src/mapi/glapi/gen/static_data.py	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/gen/static_data.py	2019-03-31 23:16:37.000000000 +0000
@@ -20,8 +20,17 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+
+"""The maximum entries of actual static data required by indirect GLX."""
+
+
+MAX_OFFSETS = 407
+
 """Table of functions that have ABI-mandated offsets in the dispatch table.
 
+The first MAX_OFFSETS entries are required by indirect GLX. The rest are
+required to preserve the glapi <> drivers ABI. This is to be addressed shortly.
+
 This list will never change."""
 offsets = {
     "NewList": 0,
@@ -331,33 +340,33 @@
     "Translatef": 304,
     "Viewport": 305,
     "ArrayElement": 306,
+    "BindTexture": 307,
     "ColorPointer": 308,
     "DisableClientState": 309,
     "DrawArrays": 310,
     "DrawElements": 311,
     "EdgeFlagPointer": 312,
     "EnableClientState": 313,
-    "GetPointerv": 329,
     "IndexPointer": 314,
+    "Indexub": 315,
+    "Indexubv": 316,
     "InterleavedArrays": 317,
     "NormalPointer": 318,
+    "PolygonOffset": 319,
     "TexCoordPointer": 320,
     "VertexPointer": 321,
-    "PolygonOffset": 319,
+    "AreTexturesResident": 322,
     "CopyTexImage1D": 323,
     "CopyTexImage2D": 324,
     "CopyTexSubImage1D": 325,
     "CopyTexSubImage2D": 326,
-    "TexSubImage1D": 332,
-    "TexSubImage2D": 333,
-    "AreTexturesResident": 322,
-    "BindTexture": 307,
     "DeleteTextures": 327,
     "GenTextures": 328,
+    "GetPointerv": 329,
     "IsTexture": 330,
     "PrioritizeTextures": 331,
-    "Indexub": 315,
-    "Indexubv": 316,
+    "TexSubImage1D": 332,
+    "TexSubImage2D": 333,
     "PopClientAttrib": 334,
     "PushClientAttrib": 335,
     "BlendColor": 336,
@@ -431,7 +440,1019 @@
     "MultiTexCoord4i": 404,
     "MultiTexCoord4iv": 405,
     "MultiTexCoord4s": 406,
-    "MultiTexCoord4sv": 407
+    "MultiTexCoord4sv": 407,
+    "CompressedTexImage1D": 408,
+    "CompressedTexImage2D": 409,
+    "CompressedTexImage3D": 410,
+    "CompressedTexSubImage1D": 411,
+    "CompressedTexSubImage2D": 412,
+    "CompressedTexSubImage3D": 413,
+    "GetCompressedTexImage": 414,
+    "LoadTransposeMatrixd": 415,
+    "LoadTransposeMatrixf": 416,
+    "MultTransposeMatrixd": 417,
+    "MultTransposeMatrixf": 418,
+    "SampleCoverage": 419,
+    "BlendFuncSeparate": 420,
+    "FogCoordPointer": 421,
+    "FogCoordd": 422,
+    "FogCoorddv": 423,
+    "MultiDrawArrays": 424,
+    "PointParameterf": 425,
+    "PointParameterfv": 426,
+    "PointParameteri": 427,
+    "PointParameteriv": 428,
+    "SecondaryColor3b": 429,
+    "SecondaryColor3bv": 430,
+    "SecondaryColor3d": 431,
+    "SecondaryColor3dv": 432,
+    "SecondaryColor3i": 433,
+    "SecondaryColor3iv": 434,
+    "SecondaryColor3s": 435,
+    "SecondaryColor3sv": 436,
+    "SecondaryColor3ub": 437,
+    "SecondaryColor3ubv": 438,
+    "SecondaryColor3ui": 439,
+    "SecondaryColor3uiv": 440,
+    "SecondaryColor3us": 441,
+    "SecondaryColor3usv": 442,
+    "SecondaryColorPointer": 443,
+    "WindowPos2d": 444,
+    "WindowPos2dv": 445,
+    "WindowPos2f": 446,
+    "WindowPos2fv": 447,
+    "WindowPos2i": 448,
+    "WindowPos2iv": 449,
+    "WindowPos2s": 450,
+    "WindowPos2sv": 451,
+    "WindowPos3d": 452,
+    "WindowPos3dv": 453,
+    "WindowPos3f": 454,
+    "WindowPos3fv": 455,
+    "WindowPos3i": 456,
+    "WindowPos3iv": 457,
+    "WindowPos3s": 458,
+    "WindowPos3sv": 459,
+    "BeginQuery": 460,
+    "BindBuffer": 461,
+    "BufferData": 462,
+    "BufferSubData": 463,
+    "DeleteBuffers": 464,
+    "DeleteQueries": 465,
+    "EndQuery": 466,
+    "GenBuffers": 467,
+    "GenQueries": 468,
+    "GetBufferParameteriv": 469,
+    "GetBufferPointerv": 470,
+    "GetBufferSubData": 471,
+    "GetQueryObjectiv": 472,
+    "GetQueryObjectuiv": 473,
+    "GetQueryiv": 474,
+    "IsBuffer": 475,
+    "IsQuery": 476,
+    "MapBuffer": 477,
+    "UnmapBuffer": 478,
+    "AttachShader": 479,
+    "BindAttribLocation": 480,
+    "BlendEquationSeparate": 481,
+    "CompileShader": 482,
+    "CreateProgram": 483,
+    "CreateShader": 484,
+    "DeleteProgram": 485,
+    "DeleteShader": 486,
+    "DetachShader": 487,
+    "DisableVertexAttribArray": 488,
+    "DrawBuffers": 489,
+    "EnableVertexAttribArray": 490,
+    "GetActiveAttrib": 491,
+    "GetActiveUniform": 492,
+    "GetAttachedShaders": 493,
+    "GetAttribLocation": 494,
+    "GetProgramInfoLog": 495,
+    "GetProgramiv": 496,
+    "GetShaderInfoLog": 497,
+    "GetShaderSource": 498,
+    "GetShaderiv": 499,
+    "GetUniformLocation": 500,
+    "GetUniformfv": 501,
+    "GetUniformiv": 502,
+    "GetVertexAttribPointerv": 503,
+    "GetVertexAttribdv": 504,
+    "GetVertexAttribfv": 505,
+    "GetVertexAttribiv": 506,
+    "IsProgram": 507,
+    "IsShader": 508,
+    "LinkProgram": 509,
+    "ShaderSource": 510,
+    "StencilFuncSeparate": 511,
+    "StencilMaskSeparate": 512,
+    "StencilOpSeparate": 513,
+    "Uniform1f": 514,
+    "Uniform1fv": 515,
+    "Uniform1i": 516,
+    "Uniform1iv": 517,
+    "Uniform2f": 518,
+    "Uniform2fv": 519,
+    "Uniform2i": 520,
+    "Uniform2iv": 521,
+    "Uniform3f": 522,
+    "Uniform3fv": 523,
+    "Uniform3i": 524,
+    "Uniform3iv": 525,
+    "Uniform4f": 526,
+    "Uniform4fv": 527,
+    "Uniform4i": 528,
+    "Uniform4iv": 529,
+    "UniformMatrix2fv": 530,
+    "UniformMatrix3fv": 531,
+    "UniformMatrix4fv": 532,
+    "UseProgram": 533,
+    "ValidateProgram": 534,
+    "VertexAttrib1d": 535,
+    "VertexAttrib1dv": 536,
+    "VertexAttrib1s": 537,
+    "VertexAttrib1sv": 538,
+    "VertexAttrib2d": 539,
+    "VertexAttrib2dv": 540,
+    "VertexAttrib2s": 541,
+    "VertexAttrib2sv": 542,
+    "VertexAttrib3d": 543,
+    "VertexAttrib3dv": 544,
+    "VertexAttrib3s": 545,
+    "VertexAttrib3sv": 546,
+    "VertexAttrib4Nbv": 547,
+    "VertexAttrib4Niv": 548,
+    "VertexAttrib4Nsv": 549,
+    "VertexAttrib4Nub": 550,
+    "VertexAttrib4Nubv": 551,
+    "VertexAttrib4Nuiv": 552,
+    "VertexAttrib4Nusv": 553,
+    "VertexAttrib4bv": 554,
+    "VertexAttrib4d": 555,
+    "VertexAttrib4dv": 556,
+    "VertexAttrib4iv": 557,
+    "VertexAttrib4s": 558,
+    "VertexAttrib4sv": 559,
+    "VertexAttrib4ubv": 560,
+    "VertexAttrib4uiv": 561,
+    "VertexAttrib4usv": 562,
+    "VertexAttribPointer": 563,
+    "UniformMatrix2x3fv": 564,
+    "UniformMatrix2x4fv": 565,
+    "UniformMatrix3x2fv": 566,
+    "UniformMatrix3x4fv": 567,
+    "UniformMatrix4x2fv": 568,
+    "UniformMatrix4x3fv": 569,
+    "BeginConditionalRender": 570,
+    "BeginTransformFeedback": 571,
+    "BindBufferBase": 572,
+    "BindBufferRange": 573,
+    "BindFragDataLocation": 574,
+    "ClampColor": 575,
+    "ClearBufferfi": 576,
+    "ClearBufferfv": 577,
+    "ClearBufferiv": 578,
+    "ClearBufferuiv": 579,
+    "ColorMaski": 580,
+    "Disablei": 581,
+    "Enablei": 582,
+    "EndConditionalRender": 583,
+    "EndTransformFeedback": 584,
+    "GetBooleani_v": 585,
+    "GetFragDataLocation": 586,
+    "GetIntegeri_v": 587,
+    "GetStringi": 588,
+    "GetTexParameterIiv": 589,
+    "GetTexParameterIuiv": 590,
+    "GetTransformFeedbackVarying": 591,
+    "GetUniformuiv": 592,
+    "GetVertexAttribIiv": 593,
+    "GetVertexAttribIuiv": 594,
+    "IsEnabledi": 595,
+    "TexParameterIiv": 596,
+    "TexParameterIuiv": 597,
+    "TransformFeedbackVaryings": 598,
+    "Uniform1ui": 599,
+    "Uniform1uiv": 600,
+    "Uniform2ui": 601,
+    "Uniform2uiv": 602,
+    "Uniform3ui": 603,
+    "Uniform3uiv": 604,
+    "Uniform4ui": 605,
+    "Uniform4uiv": 606,
+    "VertexAttribI1iv": 607,
+    "VertexAttribI1uiv": 608,
+    "VertexAttribI4bv": 609,
+    "VertexAttribI4sv": 610,
+    "VertexAttribI4ubv": 611,
+    "VertexAttribI4usv": 612,
+    "VertexAttribIPointer": 613,
+    "PrimitiveRestartIndex": 614,
+    "TexBuffer": 615,
+    "FramebufferTexture": 616,
+    "GetBufferParameteri64v": 617,
+    "GetInteger64i_v": 618,
+    "VertexAttribDivisor": 619,
+    "MinSampleShading": 620,
+    "MemoryBarrierByRegion": 621,
+    "BindProgramARB": 622,
+    "DeleteProgramsARB": 623,
+    "GenProgramsARB": 624,
+    "GetProgramEnvParameterdvARB": 625,
+    "GetProgramEnvParameterfvARB": 626,
+    "GetProgramLocalParameterdvARB": 627,
+    "GetProgramLocalParameterfvARB": 628,
+    "GetProgramStringARB": 629,
+    "GetProgramivARB": 630,
+    "IsProgramARB": 631,
+    "ProgramEnvParameter4dARB": 632,
+    "ProgramEnvParameter4dvARB": 633,
+    "ProgramEnvParameter4fARB": 634,
+    "ProgramEnvParameter4fvARB": 635,
+    "ProgramLocalParameter4dARB": 636,
+    "ProgramLocalParameter4dvARB": 637,
+    "ProgramLocalParameter4fARB": 638,
+    "ProgramLocalParameter4fvARB": 639,
+    "ProgramStringARB": 640,
+    "VertexAttrib1fARB": 641,
+    "VertexAttrib1fvARB": 642,
+    "VertexAttrib2fARB": 643,
+    "VertexAttrib2fvARB": 644,
+    "VertexAttrib3fARB": 645,
+    "VertexAttrib3fvARB": 646,
+    "VertexAttrib4fARB": 647,
+    "VertexAttrib4fvARB": 648,
+    "AttachObjectARB": 649,
+    "CreateProgramObjectARB": 650,
+    "CreateShaderObjectARB": 651,
+    "DeleteObjectARB": 652,
+    "DetachObjectARB": 653,
+    "GetAttachedObjectsARB": 654,
+    "GetHandleARB": 655,
+    "GetInfoLogARB": 656,
+    "GetObjectParameterfvARB": 657,
+    "GetObjectParameterivARB": 658,
+    "DrawArraysInstancedARB": 659,
+    "DrawElementsInstancedARB": 660,
+    "BindFramebuffer": 661,
+    "BindRenderbuffer": 662,
+    "BlitFramebuffer": 663,
+    "CheckFramebufferStatus": 664,
+    "DeleteFramebuffers": 665,
+    "DeleteRenderbuffers": 666,
+    "FramebufferRenderbuffer": 667,
+    "FramebufferTexture1D": 668,
+    "FramebufferTexture2D": 669,
+    "FramebufferTexture3D": 670,
+    "FramebufferTextureLayer": 671,
+    "GenFramebuffers": 672,
+    "GenRenderbuffers": 673,
+    "GenerateMipmap": 674,
+    "GetFramebufferAttachmentParameteriv": 675,
+    "GetRenderbufferParameteriv": 676,
+    "IsFramebuffer": 677,
+    "IsRenderbuffer": 678,
+    "RenderbufferStorage": 679,
+    "RenderbufferStorageMultisample": 680,
+    "FlushMappedBufferRange": 681,
+    "MapBufferRange": 682,
+    "BindVertexArray": 683,
+    "DeleteVertexArrays": 684,
+    "GenVertexArrays": 685,
+    "IsVertexArray": 686,
+    "GetActiveUniformBlockName": 687,
+    "GetActiveUniformBlockiv": 688,
+    "GetActiveUniformName": 689,
+    "GetActiveUniformsiv": 690,
+    "GetUniformBlockIndex": 691,
+    "GetUniformIndices": 692,
+    "UniformBlockBinding": 693,
+    "CopyBufferSubData": 694,
+    "ClientWaitSync": 695,
+    "DeleteSync": 696,
+    "FenceSync": 697,
+    "GetInteger64v": 698,
+    "GetSynciv": 699,
+    "IsSync": 700,
+    "WaitSync": 701,
+    "DrawElementsBaseVertex": 702,
+    "DrawElementsInstancedBaseVertex": 703,
+    "DrawRangeElementsBaseVertex": 704,
+    "MultiDrawElementsBaseVertex": 705,
+    "ProvokingVertex": 706,
+    "GetMultisamplefv": 707,
+    "SampleMaski": 708,
+    "TexImage2DMultisample": 709,
+    "TexImage3DMultisample": 710,
+    "BlendEquationSeparateiARB": 711,
+    "BlendEquationiARB": 712,
+    "BlendFuncSeparateiARB": 713,
+    "BlendFunciARB": 714,
+    "BindFragDataLocationIndexed": 715,
+    "GetFragDataIndex": 716,
+    "BindSampler": 717,
+    "DeleteSamplers": 718,
+    "GenSamplers": 719,
+    "GetSamplerParameterIiv": 720,
+    "GetSamplerParameterIuiv": 721,
+    "GetSamplerParameterfv": 722,
+    "GetSamplerParameteriv": 723,
+    "IsSampler": 724,
+    "SamplerParameterIiv": 725,
+    "SamplerParameterIuiv": 726,
+    "SamplerParameterf": 727,
+    "SamplerParameterfv": 728,
+    "SamplerParameteri": 729,
+    "SamplerParameteriv": 730,
+    "GetQueryObjecti64v": 731,
+    "GetQueryObjectui64v": 732,
+    "QueryCounter": 733,
+    "ColorP3ui": 734,
+    "ColorP3uiv": 735,
+    "ColorP4ui": 736,
+    "ColorP4uiv": 737,
+    "MultiTexCoordP1ui": 738,
+    "MultiTexCoordP1uiv": 739,
+    "MultiTexCoordP2ui": 740,
+    "MultiTexCoordP2uiv": 741,
+    "MultiTexCoordP3ui": 742,
+    "MultiTexCoordP3uiv": 743,
+    "MultiTexCoordP4ui": 744,
+    "MultiTexCoordP4uiv": 745,
+    "NormalP3ui": 746,
+    "NormalP3uiv": 747,
+    "SecondaryColorP3ui": 748,
+    "SecondaryColorP3uiv": 749,
+    "TexCoordP1ui": 750,
+    "TexCoordP1uiv": 751,
+    "TexCoordP2ui": 752,
+    "TexCoordP2uiv": 753,
+    "TexCoordP3ui": 754,
+    "TexCoordP3uiv": 755,
+    "TexCoordP4ui": 756,
+    "TexCoordP4uiv": 757,
+    "VertexAttribP1ui": 758,
+    "VertexAttribP1uiv": 759,
+    "VertexAttribP2ui": 760,
+    "VertexAttribP2uiv": 761,
+    "VertexAttribP3ui": 762,
+    "VertexAttribP3uiv": 763,
+    "VertexAttribP4ui": 764,
+    "VertexAttribP4uiv": 765,
+    "VertexP2ui": 766,
+    "VertexP2uiv": 767,
+    "VertexP3ui": 768,
+    "VertexP3uiv": 769,
+    "VertexP4ui": 770,
+    "VertexP4uiv": 771,
+    "DrawArraysIndirect": 772,
+    "DrawElementsIndirect": 773,
+    "GetUniformdv": 774,
+    "Uniform1d": 775,
+    "Uniform1dv": 776,
+    "Uniform2d": 777,
+    "Uniform2dv": 778,
+    "Uniform3d": 779,
+    "Uniform3dv": 780,
+    "Uniform4d": 781,
+    "Uniform4dv": 782,
+    "UniformMatrix2dv": 783,
+    "UniformMatrix2x3dv": 784,
+    "UniformMatrix2x4dv": 785,
+    "UniformMatrix3dv": 786,
+    "UniformMatrix3x2dv": 787,
+    "UniformMatrix3x4dv": 788,
+    "UniformMatrix4dv": 789,
+    "UniformMatrix4x2dv": 790,
+    "UniformMatrix4x3dv": 791,
+    "GetActiveSubroutineName": 792,
+    "GetActiveSubroutineUniformName": 793,
+    "GetActiveSubroutineUniformiv": 794,
+    "GetProgramStageiv": 795,
+    "GetSubroutineIndex": 796,
+    "GetSubroutineUniformLocation": 797,
+    "GetUniformSubroutineuiv": 798,
+    "UniformSubroutinesuiv": 799,
+    "PatchParameterfv": 800,
+    "PatchParameteri": 801,
+    "BindTransformFeedback": 802,
+    "DeleteTransformFeedbacks": 803,
+    "DrawTransformFeedback": 804,
+    "GenTransformFeedbacks": 805,
+    "IsTransformFeedback": 806,
+    "PauseTransformFeedback": 807,
+    "ResumeTransformFeedback": 808,
+    "BeginQueryIndexed": 809,
+    "DrawTransformFeedbackStream": 810,
+    "EndQueryIndexed": 811,
+    "GetQueryIndexediv": 812,
+    "ClearDepthf": 813,
+    "DepthRangef": 814,
+    "GetShaderPrecisionFormat": 815,
+    "ReleaseShaderCompiler": 816,
+    "ShaderBinary": 817,
+    "GetProgramBinary": 818,
+    "ProgramBinary": 819,
+    "ProgramParameteri": 820,
+    "GetVertexAttribLdv": 821,
+    "VertexAttribL1d": 822,
+    "VertexAttribL1dv": 823,
+    "VertexAttribL2d": 824,
+    "VertexAttribL2dv": 825,
+    "VertexAttribL3d": 826,
+    "VertexAttribL3dv": 827,
+    "VertexAttribL4d": 828,
+    "VertexAttribL4dv": 829,
+    "VertexAttribLPointer": 830,
+    "DepthRangeArrayv": 831,
+    "DepthRangeIndexed": 832,
+    "GetDoublei_v": 833,
+    "GetFloati_v": 834,
+    "ScissorArrayv": 835,
+    "ScissorIndexed": 836,
+    "ScissorIndexedv": 837,
+    "ViewportArrayv": 838,
+    "ViewportIndexedf": 839,
+    "ViewportIndexedfv": 840,
+    "GetGraphicsResetStatusARB": 841,
+    "GetnColorTableARB": 842,
+    "GetnCompressedTexImageARB": 843,
+    "GetnConvolutionFilterARB": 844,
+    "GetnHistogramARB": 845,
+    "GetnMapdvARB": 846,
+    "GetnMapfvARB": 847,
+    "GetnMapivARB": 848,
+    "GetnMinmaxARB": 849,
+    "GetnPixelMapfvARB": 850,
+    "GetnPixelMapuivARB": 851,
+    "GetnPixelMapusvARB": 852,
+    "GetnPolygonStippleARB": 853,
+    "GetnSeparableFilterARB": 854,
+    "GetnTexImageARB": 855,
+    "GetnUniformdvARB": 856,
+    "GetnUniformfvARB": 857,
+    "GetnUniformivARB": 858,
+    "GetnUniformuivARB": 859,
+    "ReadnPixelsARB": 860,
+    "DrawArraysInstancedBaseInstance": 861,
+    "DrawElementsInstancedBaseInstance": 862,
+    "DrawElementsInstancedBaseVertexBaseInstance": 863,
+    "DrawTransformFeedbackInstanced": 864,
+    "DrawTransformFeedbackStreamInstanced": 865,
+    "GetInternalformativ": 866,
+    "GetActiveAtomicCounterBufferiv": 867,
+    "BindImageTexture": 868,
+    "MemoryBarrier": 869,
+    "TexStorage1D": 870,
+    "TexStorage2D": 871,
+    "TexStorage3D": 872,
+    "TextureStorage1DEXT": 873,
+    "TextureStorage2DEXT": 874,
+    "TextureStorage3DEXT": 875,
+    "ClearBufferData": 876,
+    "ClearBufferSubData": 877,
+    "DispatchCompute": 878,
+    "DispatchComputeIndirect": 879,
+    "CopyImageSubData": 880,
+    "TextureView": 881,
+    "BindVertexBuffer": 882,
+    "VertexAttribBinding": 883,
+    "VertexAttribFormat": 884,
+    "VertexAttribIFormat": 885,
+    "VertexAttribLFormat": 886,
+    "VertexBindingDivisor": 887,
+    "FramebufferParameteri": 888,
+    "GetFramebufferParameteriv": 889,
+    "GetInternalformati64v": 890,
+    "MultiDrawArraysIndirect": 891,
+    "MultiDrawElementsIndirect": 892,
+    "GetProgramInterfaceiv": 893,
+    "GetProgramResourceIndex": 894,
+    "GetProgramResourceLocation": 895,
+    "GetProgramResourceLocationIndex": 896,
+    "GetProgramResourceName": 897,
+    "GetProgramResourceiv": 898,
+    "ShaderStorageBlockBinding": 899,
+    "TexBufferRange": 900,
+    "TexStorage2DMultisample": 901,
+    "TexStorage3DMultisample": 902,
+    "BufferStorage": 903,
+    "ClearTexImage": 904,
+    "ClearTexSubImage": 905,
+    "BindBuffersBase": 906,
+    "BindBuffersRange": 907,
+    "BindImageTextures": 908,
+    "BindSamplers": 909,
+    "BindTextures": 910,
+    "BindVertexBuffers": 911,
+    "GetImageHandleARB": 912,
+    "GetTextureHandleARB": 913,
+    "GetTextureSamplerHandleARB": 914,
+    "GetVertexAttribLui64vARB": 915,
+    "IsImageHandleResidentARB": 916,
+    "IsTextureHandleResidentARB": 917,
+    "MakeImageHandleNonResidentARB": 918,
+    "MakeImageHandleResidentARB": 919,
+    "MakeTextureHandleNonResidentARB": 920,
+    "MakeTextureHandleResidentARB": 921,
+    "ProgramUniformHandleui64ARB": 922,
+    "ProgramUniformHandleui64vARB": 923,
+    "UniformHandleui64ARB": 924,
+    "UniformHandleui64vARB": 925,
+    "VertexAttribL1ui64ARB": 926,
+    "VertexAttribL1ui64vARB": 927,
+    "DispatchComputeGroupSizeARB": 928,
+    "MultiDrawArraysIndirectCountARB": 929,
+    "MultiDrawElementsIndirectCountARB": 930,
+    "ClipControl": 931,
+    "BindTextureUnit": 932,
+    "BlitNamedFramebuffer": 933,
+    "CheckNamedFramebufferStatus": 934,
+    "ClearNamedBufferData": 935,
+    "ClearNamedBufferSubData": 936,
+    "ClearNamedFramebufferfi": 937,
+    "ClearNamedFramebufferfv": 938,
+    "ClearNamedFramebufferiv": 939,
+    "ClearNamedFramebufferuiv": 940,
+    "CompressedTextureSubImage1D": 941,
+    "CompressedTextureSubImage2D": 942,
+    "CompressedTextureSubImage3D": 943,
+    "CopyNamedBufferSubData": 944,
+    "CopyTextureSubImage1D": 945,
+    "CopyTextureSubImage2D": 946,
+    "CopyTextureSubImage3D": 947,
+    "CreateBuffers": 948,
+    "CreateFramebuffers": 949,
+    "CreateProgramPipelines": 950,
+    "CreateQueries": 951,
+    "CreateRenderbuffers": 952,
+    "CreateSamplers": 953,
+    "CreateTextures": 954,
+    "CreateTransformFeedbacks": 955,
+    "CreateVertexArrays": 956,
+    "DisableVertexArrayAttrib": 957,
+    "EnableVertexArrayAttrib": 958,
+    "FlushMappedNamedBufferRange": 959,
+    "GenerateTextureMipmap": 960,
+    "GetCompressedTextureImage": 961,
+    "GetNamedBufferParameteri64v": 962,
+    "GetNamedBufferParameteriv": 963,
+    "GetNamedBufferPointerv": 964,
+    "GetNamedBufferSubData": 965,
+    "GetNamedFramebufferAttachmentParameteriv": 966,
+    "GetNamedFramebufferParameteriv": 967,
+    "GetNamedRenderbufferParameteriv": 968,
+    "GetQueryBufferObjecti64v": 969,
+    "GetQueryBufferObjectiv": 970,
+    "GetQueryBufferObjectui64v": 971,
+    "GetQueryBufferObjectuiv": 972,
+    "GetTextureImage": 973,
+    "GetTextureLevelParameterfv": 974,
+    "GetTextureLevelParameteriv": 975,
+    "GetTextureParameterIiv": 976,
+    "GetTextureParameterIuiv": 977,
+    "GetTextureParameterfv": 978,
+    "GetTextureParameteriv": 979,
+    "GetTransformFeedbacki64_v": 980,
+    "GetTransformFeedbacki_v": 981,
+    "GetTransformFeedbackiv": 982,
+    "GetVertexArrayIndexed64iv": 983,
+    "GetVertexArrayIndexediv": 984,
+    "GetVertexArrayiv": 985,
+    "InvalidateNamedFramebufferData": 986,
+    "InvalidateNamedFramebufferSubData": 987,
+    "MapNamedBuffer": 988,
+    "MapNamedBufferRange": 989,
+    "NamedBufferData": 990,
+    "NamedBufferStorage": 991,
+    "NamedBufferSubData": 992,
+    "NamedFramebufferDrawBuffer": 993,
+    "NamedFramebufferDrawBuffers": 994,
+    "NamedFramebufferParameteri": 995,
+    "NamedFramebufferReadBuffer": 996,
+    "NamedFramebufferRenderbuffer": 997,
+    "NamedFramebufferTexture": 998,
+    "NamedFramebufferTextureLayer": 999,
+    "NamedRenderbufferStorage": 1000,
+    "NamedRenderbufferStorageMultisample": 1001,
+    "TextureBuffer": 1002,
+    "TextureBufferRange": 1003,
+    "TextureParameterIiv": 1004,
+    "TextureParameterIuiv": 1005,
+    "TextureParameterf": 1006,
+    "TextureParameterfv": 1007,
+    "TextureParameteri": 1008,
+    "TextureParameteriv": 1009,
+    "TextureStorage1D": 1010,
+    "TextureStorage2D": 1011,
+    "TextureStorage2DMultisample": 1012,
+    "TextureStorage3D": 1013,
+    "TextureStorage3DMultisample": 1014,
+    "TextureSubImage1D": 1015,
+    "TextureSubImage2D": 1016,
+    "TextureSubImage3D": 1017,
+    "TransformFeedbackBufferBase": 1018,
+    "TransformFeedbackBufferRange": 1019,
+    "UnmapNamedBuffer": 1020,
+    "VertexArrayAttribBinding": 1021,
+    "VertexArrayAttribFormat": 1022,
+    "VertexArrayAttribIFormat": 1023,
+    "VertexArrayAttribLFormat": 1024,
+    "VertexArrayBindingDivisor": 1025,
+    "VertexArrayElementBuffer": 1026,
+    "VertexArrayVertexBuffer": 1027,
+    "VertexArrayVertexBuffers": 1028,
+    "GetCompressedTextureSubImage": 1029,
+    "GetTextureSubImage": 1030,
+    "BufferPageCommitmentARB": 1031,
+    "NamedBufferPageCommitmentARB": 1032,
+    "GetUniformi64vARB": 1033,
+    "GetUniformui64vARB": 1034,
+    "GetnUniformi64vARB": 1035,
+    "GetnUniformui64vARB": 1036,
+    "ProgramUniform1i64ARB": 1037,
+    "ProgramUniform1i64vARB": 1038,
+    "ProgramUniform1ui64ARB": 1039,
+    "ProgramUniform1ui64vARB": 1040,
+    "ProgramUniform2i64ARB": 1041,
+    "ProgramUniform2i64vARB": 1042,
+    "ProgramUniform2ui64ARB": 1043,
+    "ProgramUniform2ui64vARB": 1044,
+    "ProgramUniform3i64ARB": 1045,
+    "ProgramUniform3i64vARB": 1046,
+    "ProgramUniform3ui64ARB": 1047,
+    "ProgramUniform3ui64vARB": 1048,
+    "ProgramUniform4i64ARB": 1049,
+    "ProgramUniform4i64vARB": 1050,
+    "ProgramUniform4ui64ARB": 1051,
+    "ProgramUniform4ui64vARB": 1052,
+    "Uniform1i64ARB": 1053,
+    "Uniform1i64vARB": 1054,
+    "Uniform1ui64ARB": 1055,
+    "Uniform1ui64vARB": 1056,
+    "Uniform2i64ARB": 1057,
+    "Uniform2i64vARB": 1058,
+    "Uniform2ui64ARB": 1059,
+    "Uniform2ui64vARB": 1060,
+    "Uniform3i64ARB": 1061,
+    "Uniform3i64vARB": 1062,
+    "Uniform3ui64ARB": 1063,
+    "Uniform3ui64vARB": 1064,
+    "Uniform4i64ARB": 1065,
+    "Uniform4i64vARB": 1066,
+    "Uniform4ui64ARB": 1067,
+    "Uniform4ui64vARB": 1068,
+    "EvaluateDepthValuesARB": 1069,
+    "FramebufferSampleLocationsfvARB": 1070,
+    "NamedFramebufferSampleLocationsfvARB": 1071,
+    "SpecializeShaderARB": 1072,
+    "InvalidateBufferData": 1073,
+    "InvalidateBufferSubData": 1074,
+    "InvalidateFramebuffer": 1075,
+    "InvalidateSubFramebuffer": 1076,
+    "InvalidateTexImage": 1077,
+    "InvalidateTexSubImage": 1078,
+    "DrawTexfOES": 1079,
+    "DrawTexfvOES": 1080,
+    "DrawTexiOES": 1081,
+    "DrawTexivOES": 1082,
+    "DrawTexsOES": 1083,
+    "DrawTexsvOES": 1084,
+    "DrawTexxOES": 1085,
+    "DrawTexxvOES": 1086,
+    "PointSizePointerOES": 1087,
+    "QueryMatrixxOES": 1088,
+    "SampleMaskSGIS": 1089,
+    "SamplePatternSGIS": 1090,
+    "ColorPointerEXT": 1091,
+    "EdgeFlagPointerEXT": 1092,
+    "IndexPointerEXT": 1093,
+    "NormalPointerEXT": 1094,
+    "TexCoordPointerEXT": 1095,
+    "VertexPointerEXT": 1096,
+    "DiscardFramebufferEXT": 1097,
+    "ActiveShaderProgram": 1098,
+    "BindProgramPipeline": 1099,
+    "CreateShaderProgramv": 1100,
+    "DeleteProgramPipelines": 1101,
+    "GenProgramPipelines": 1102,
+    "GetProgramPipelineInfoLog": 1103,
+    "GetProgramPipelineiv": 1104,
+    "IsProgramPipeline": 1105,
+    "LockArraysEXT": 1106,
+    "ProgramUniform1d": 1107,
+    "ProgramUniform1dv": 1108,
+    "ProgramUniform1f": 1109,
+    "ProgramUniform1fv": 1110,
+    "ProgramUniform1i": 1111,
+    "ProgramUniform1iv": 1112,
+    "ProgramUniform1ui": 1113,
+    "ProgramUniform1uiv": 1114,
+    "ProgramUniform2d": 1115,
+    "ProgramUniform2dv": 1116,
+    "ProgramUniform2f": 1117,
+    "ProgramUniform2fv": 1118,
+    "ProgramUniform2i": 1119,
+    "ProgramUniform2iv": 1120,
+    "ProgramUniform2ui": 1121,
+    "ProgramUniform2uiv": 1122,
+    "ProgramUniform3d": 1123,
+    "ProgramUniform3dv": 1124,
+    "ProgramUniform3f": 1125,
+    "ProgramUniform3fv": 1126,
+    "ProgramUniform3i": 1127,
+    "ProgramUniform3iv": 1128,
+    "ProgramUniform3ui": 1129,
+    "ProgramUniform3uiv": 1130,
+    "ProgramUniform4d": 1131,
+    "ProgramUniform4dv": 1132,
+    "ProgramUniform4f": 1133,
+    "ProgramUniform4fv": 1134,
+    "ProgramUniform4i": 1135,
+    "ProgramUniform4iv": 1136,
+    "ProgramUniform4ui": 1137,
+    "ProgramUniform4uiv": 1138,
+    "ProgramUniformMatrix2dv": 1139,
+    "ProgramUniformMatrix2fv": 1140,
+    "ProgramUniformMatrix2x3dv": 1141,
+    "ProgramUniformMatrix2x3fv": 1142,
+    "ProgramUniformMatrix2x4dv": 1143,
+    "ProgramUniformMatrix2x4fv": 1144,
+    "ProgramUniformMatrix3dv": 1145,
+    "ProgramUniformMatrix3fv": 1146,
+    "ProgramUniformMatrix3x2dv": 1147,
+    "ProgramUniformMatrix3x2fv": 1148,
+    "ProgramUniformMatrix3x4dv": 1149,
+    "ProgramUniformMatrix3x4fv": 1150,
+    "ProgramUniformMatrix4dv": 1151,
+    "ProgramUniformMatrix4fv": 1152,
+    "ProgramUniformMatrix4x2dv": 1153,
+    "ProgramUniformMatrix4x2fv": 1154,
+    "ProgramUniformMatrix4x3dv": 1155,
+    "ProgramUniformMatrix4x3fv": 1156,
+    "UnlockArraysEXT": 1157,
+    "UseProgramStages": 1158,
+    "ValidateProgramPipeline": 1159,
+    "FramebufferTexture2DMultisampleEXT": 1160,
+    "DebugMessageCallback": 1161,
+    "DebugMessageControl": 1162,
+    "DebugMessageInsert": 1163,
+    "GetDebugMessageLog": 1164,
+    "GetObjectLabel": 1165,
+    "GetObjectPtrLabel": 1166,
+    "ObjectLabel": 1167,
+    "ObjectPtrLabel": 1168,
+    "PopDebugGroup": 1169,
+    "PushDebugGroup": 1170,
+    "SecondaryColor3fEXT": 1171,
+    "SecondaryColor3fvEXT": 1172,
+    "MultiDrawElementsEXT": 1173,
+    "FogCoordfEXT": 1174,
+    "FogCoordfvEXT": 1175,
+    "ResizeBuffersMESA": 1176,
+    "WindowPos4dMESA": 1177,
+    "WindowPos4dvMESA": 1178,
+    "WindowPos4fMESA": 1179,
+    "WindowPos4fvMESA": 1180,
+    "WindowPos4iMESA": 1181,
+    "WindowPos4ivMESA": 1182,
+    "WindowPos4sMESA": 1183,
+    "WindowPos4svMESA": 1184,
+    "MultiModeDrawArraysIBM": 1185,
+    "MultiModeDrawElementsIBM": 1186,
+    "AreProgramsResidentNV": 1187,
+    "ExecuteProgramNV": 1188,
+    "GetProgramParameterdvNV": 1189,
+    "GetProgramParameterfvNV": 1190,
+    "GetProgramStringNV": 1191,
+    "GetProgramivNV": 1192,
+    "GetTrackMatrixivNV": 1193,
+    "GetVertexAttribdvNV": 1194,
+    "GetVertexAttribfvNV": 1195,
+    "GetVertexAttribivNV": 1196,
+    "LoadProgramNV": 1197,
+    "ProgramParameters4dvNV": 1198,
+    "ProgramParameters4fvNV": 1199,
+    "RequestResidentProgramsNV": 1200,
+    "TrackMatrixNV": 1201,
+    "VertexAttrib1dNV": 1202,
+    "VertexAttrib1dvNV": 1203,
+    "VertexAttrib1fNV": 1204,
+    "VertexAttrib1fvNV": 1205,
+    "VertexAttrib1sNV": 1206,
+    "VertexAttrib1svNV": 1207,
+    "VertexAttrib2dNV": 1208,
+    "VertexAttrib2dvNV": 1209,
+    "VertexAttrib2fNV": 1210,
+    "VertexAttrib2fvNV": 1211,
+    "VertexAttrib2sNV": 1212,
+    "VertexAttrib2svNV": 1213,
+    "VertexAttrib3dNV": 1214,
+    "VertexAttrib3dvNV": 1215,
+    "VertexAttrib3fNV": 1216,
+    "VertexAttrib3fvNV": 1217,
+    "VertexAttrib3sNV": 1218,
+    "VertexAttrib3svNV": 1219,
+    "VertexAttrib4dNV": 1220,
+    "VertexAttrib4dvNV": 1221,
+    "VertexAttrib4fNV": 1222,
+    "VertexAttrib4fvNV": 1223,
+    "VertexAttrib4sNV": 1224,
+    "VertexAttrib4svNV": 1225,
+    "VertexAttrib4ubNV": 1226,
+    "VertexAttrib4ubvNV": 1227,
+    "VertexAttribPointerNV": 1228,
+    "VertexAttribs1dvNV": 1229,
+    "VertexAttribs1fvNV": 1230,
+    "VertexAttribs1svNV": 1231,
+    "VertexAttribs2dvNV": 1232,
+    "VertexAttribs2fvNV": 1233,
+    "VertexAttribs2svNV": 1234,
+    "VertexAttribs3dvNV": 1235,
+    "VertexAttribs3fvNV": 1236,
+    "VertexAttribs3svNV": 1237,
+    "VertexAttribs4dvNV": 1238,
+    "VertexAttribs4fvNV": 1239,
+    "VertexAttribs4svNV": 1240,
+    "VertexAttribs4ubvNV": 1241,
+    "GetTexBumpParameterfvATI": 1242,
+    "GetTexBumpParameterivATI": 1243,
+    "TexBumpParameterfvATI": 1244,
+    "TexBumpParameterivATI": 1245,
+    "AlphaFragmentOp1ATI": 1246,
+    "AlphaFragmentOp2ATI": 1247,
+    "AlphaFragmentOp3ATI": 1248,
+    "BeginFragmentShaderATI": 1249,
+    "BindFragmentShaderATI": 1250,
+    "ColorFragmentOp1ATI": 1251,
+    "ColorFragmentOp2ATI": 1252,
+    "ColorFragmentOp3ATI": 1253,
+    "DeleteFragmentShaderATI": 1254,
+    "EndFragmentShaderATI": 1255,
+    "GenFragmentShadersATI": 1256,
+    "PassTexCoordATI": 1257,
+    "SampleMapATI": 1258,
+    "SetFragmentShaderConstantATI": 1259,
+    "DepthRangeArrayfvOES": 1260,
+    "DepthRangeIndexedfOES": 1261,
+    "ActiveStencilFaceEXT": 1262,
+    "GetProgramNamedParameterdvNV": 1263,
+    "GetProgramNamedParameterfvNV": 1264,
+    "ProgramNamedParameter4dNV": 1265,
+    "ProgramNamedParameter4dvNV": 1266,
+    "ProgramNamedParameter4fNV": 1267,
+    "ProgramNamedParameter4fvNV": 1268,
+    "PrimitiveRestartNV": 1269,
+    "GetTexGenxvOES": 1270,
+    "TexGenxOES": 1271,
+    "TexGenxvOES": 1272,
+    "DepthBoundsEXT": 1273,
+    "BindFramebufferEXT": 1274,
+    "BindRenderbufferEXT": 1275,
+    "StringMarkerGREMEDY": 1276,
+    "BufferParameteriAPPLE": 1277,
+    "FlushMappedBufferRangeAPPLE": 1278,
+    "VertexAttribI1iEXT": 1279,
+    "VertexAttribI1uiEXT": 1280,
+    "VertexAttribI2iEXT": 1281,
+    "VertexAttribI2ivEXT": 1282,
+    "VertexAttribI2uiEXT": 1283,
+    "VertexAttribI2uivEXT": 1284,
+    "VertexAttribI3iEXT": 1285,
+    "VertexAttribI3ivEXT": 1286,
+    "VertexAttribI3uiEXT": 1287,
+    "VertexAttribI3uivEXT": 1288,
+    "VertexAttribI4iEXT": 1289,
+    "VertexAttribI4ivEXT": 1290,
+    "VertexAttribI4uiEXT": 1291,
+    "VertexAttribI4uivEXT": 1292,
+    "ClearColorIiEXT": 1293,
+    "ClearColorIuiEXT": 1294,
+    "BindBufferOffsetEXT": 1295,
+    "BeginPerfMonitorAMD": 1296,
+    "DeletePerfMonitorsAMD": 1297,
+    "EndPerfMonitorAMD": 1298,
+    "GenPerfMonitorsAMD": 1299,
+    "GetPerfMonitorCounterDataAMD": 1300,
+    "GetPerfMonitorCounterInfoAMD": 1301,
+    "GetPerfMonitorCounterStringAMD": 1302,
+    "GetPerfMonitorCountersAMD": 1303,
+    "GetPerfMonitorGroupStringAMD": 1304,
+    "GetPerfMonitorGroupsAMD": 1305,
+    "SelectPerfMonitorCountersAMD": 1306,
+    "GetObjectParameterivAPPLE": 1307,
+    "ObjectPurgeableAPPLE": 1308,
+    "ObjectUnpurgeableAPPLE": 1309,
+    "ActiveProgramEXT": 1310,
+    "CreateShaderProgramEXT": 1311,
+    "UseShaderProgramEXT": 1312,
+    "TextureBarrierNV": 1313,
+    "VDPAUFiniNV": 1314,
+    "VDPAUGetSurfaceivNV": 1315,
+    "VDPAUInitNV": 1316,
+    "VDPAUIsSurfaceNV": 1317,
+    "VDPAUMapSurfacesNV": 1318,
+    "VDPAURegisterOutputSurfaceNV": 1319,
+    "VDPAURegisterVideoSurfaceNV": 1320,
+    "VDPAUSurfaceAccessNV": 1321,
+    "VDPAUUnmapSurfacesNV": 1322,
+    "VDPAUUnregisterSurfaceNV": 1323,
+    "BeginPerfQueryINTEL": 1324,
+    "CreatePerfQueryINTEL": 1325,
+    "DeletePerfQueryINTEL": 1326,
+    "EndPerfQueryINTEL": 1327,
+    "GetFirstPerfQueryIdINTEL": 1328,
+    "GetNextPerfQueryIdINTEL": 1329,
+    "GetPerfCounterInfoINTEL": 1330,
+    "GetPerfQueryDataINTEL": 1331,
+    "GetPerfQueryIdByNameINTEL": 1332,
+    "GetPerfQueryInfoINTEL": 1333,
+    "PolygonOffsetClampEXT": 1334,
+    "SubpixelPrecisionBiasNV": 1335,
+    "ConservativeRasterParameterfNV": 1336,
+    "ConservativeRasterParameteriNV": 1337,
+    "WindowRectanglesEXT": 1338,
+    "BufferStorageMemEXT": 1339,
+    "CreateMemoryObjectsEXT": 1340,
+    "DeleteMemoryObjectsEXT": 1341,
+    "DeleteSemaphoresEXT": 1342,
+    "GenSemaphoresEXT": 1343,
+    "GetMemoryObjectParameterivEXT": 1344,
+    "GetSemaphoreParameterui64vEXT": 1345,
+    "GetUnsignedBytei_vEXT": 1346,
+    "GetUnsignedBytevEXT": 1347,
+    "IsMemoryObjectEXT": 1348,
+    "IsSemaphoreEXT": 1349,
+    "MemoryObjectParameterivEXT": 1350,
+    "NamedBufferStorageMemEXT": 1351,
+    "SemaphoreParameterui64vEXT": 1352,
+    "SignalSemaphoreEXT": 1353,
+    "TexStorageMem1DEXT": 1354,
+    "TexStorageMem2DEXT": 1355,
+    "TexStorageMem2DMultisampleEXT": 1356,
+    "TexStorageMem3DEXT": 1357,
+    "TexStorageMem3DMultisampleEXT": 1358,
+    "TextureStorageMem1DEXT": 1359,
+    "TextureStorageMem2DEXT": 1360,
+    "TextureStorageMem2DMultisampleEXT": 1361,
+    "TextureStorageMem3DEXT": 1362,
+    "TextureStorageMem3DMultisampleEXT": 1363,
+    "WaitSemaphoreEXT": 1364,
+    "ImportMemoryFdEXT": 1365,
+    "ImportSemaphoreFdEXT": 1366,
+    "FramebufferFetchBarrierEXT": 1367,
+    "NamedRenderbufferStorageMultisampleAdvancedAMD": 1368,
+    "RenderbufferStorageMultisampleAdvancedAMD": 1369,
+    "StencilFuncSeparateATI": 1370,
+    "ProgramEnvParameters4fvEXT": 1371,
+    "ProgramLocalParameters4fvEXT": 1372,
+    "EGLImageTargetRenderbufferStorageOES": 1373,
+    "EGLImageTargetTexture2DOES": 1374,
+    "AlphaFuncx": 1375,
+    "ClearColorx": 1376,
+    "ClearDepthx": 1377,
+    "Color4x": 1378,
+    "DepthRangex": 1379,
+    "Fogx": 1380,
+    "Fogxv": 1381,
+    "Frustumf": 1382,
+    "Frustumx": 1383,
+    "LightModelx": 1384,
+    "LightModelxv": 1385,
+    "Lightx": 1386,
+    "Lightxv": 1387,
+    "LineWidthx": 1388,
+    "LoadMatrixx": 1389,
+    "Materialx": 1390,
+    "Materialxv": 1391,
+    "MultMatrixx": 1392,
+    "MultiTexCoord4x": 1393,
+    "Normal3x": 1394,
+    "Orthof": 1395,
+    "Orthox": 1396,
+    "PointSizex": 1397,
+    "PolygonOffsetx": 1398,
+    "Rotatex": 1399,
+    "SampleCoveragex": 1400,
+    "Scalex": 1401,
+    "TexEnvx": 1402,
+    "TexEnvxv": 1403,
+    "TexParameterx": 1404,
+    "Translatex": 1405,
+    "ClipPlanef": 1406,
+    "ClipPlanex": 1407,
+    "GetClipPlanef": 1408,
+    "GetClipPlanex": 1409,
+    "GetFixedv": 1410,
+    "GetLightxv": 1411,
+    "GetMaterialxv": 1412,
+    "GetTexEnvxv": 1413,
+    "GetTexParameterxv": 1414,
+    "PointParameterx": 1415,
+    "PointParameterxv": 1416,
+    "TexParameterxv": 1417,
+    "BlendBarrier": 1418,
+    "PrimitiveBoundingBox": 1419,
 }
 
 functions = [
diff -Nru mesa-18.3.3/src/mapi/glapi/meson.build mesa-19.0.1/src/mapi/glapi/meson.build
--- mesa-18.3.3/src/mapi/glapi/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/glapi/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -87,6 +87,7 @@
       include_directories : [inc_include, inc_src, inc_mesa, inc_mapi],
       link_with : [libglapi_static],
       dependencies : [idep_gtest, dep_thread],
-    )
+    ),
+    suite : ['mapi'],
   )
 endif
diff -Nru mesa-18.3.3/src/mapi/Makefile.am mesa-19.0.1/src/mapi/Makefile.am
--- mesa-18.3.3/src/mapi/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -31,6 +31,8 @@
 pkgconfig_DATA =
 
 EXTRA_DIST = \
+	new/genCommon.py \
+	new/gen_gldispatch_mapi.py \
 	es1api/ABI-check \
 	es2api/ABI-check \
 	mapi_abi.py \
@@ -61,11 +63,20 @@
 MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
 PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS)
 
-glapi_gen_mapi_deps := \
+shared_glapi_gen_mapi_deps := \
 	mapi_abi.py \
 	$(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.xml) \
 	$(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.py)
 
+glapi_gen_gl_xml := \
+	$(srcdir)/glapi/registry/gl.xml
+glapi_gen_mapi_script = $(srcdir)/new/gen_gldispatch_mapi.py
+glapi_gen_mapi_deps = \
+	$(glapi_gen_mapi_script) \
+	$(srcdir)/new/genCommon.py \
+	$(glapi_gen_gl_xml)
+glapi_gen_mapi = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS) $(glapi_gen_mapi_script)
+
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
 
@@ -99,7 +110,7 @@
 	$(top_builddir)/src/gtest/libgtest.la
 endif
 
-shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(shared_glapi_gen_mapi_deps)
 	$(MKDIR_GEN)
 	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer shared-glapi \
 		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
@@ -198,10 +209,9 @@
 es1api_libGLESv1_CM_la_LIBADD += shared-glapi/libglapi.la
 endif
 
-es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+es1api/glapi_mapi_tmp.h: $(glapi_gen_mapi_deps)
 	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer es1api \
-		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
+	$(glapi_gen_mapi) glesv1 $(glapi_gen_gl_xml) > $@
 
 if HAVE_OPENGL_ES2
 TESTS += es2api/ABI-check
@@ -243,10 +253,9 @@
 es2api_libGLESv2_la_LIBADD += shared-glapi/libglapi.la
 endif
 
-es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+es2api/glapi_mapi_tmp.h: $(glapi_gen_mapi_deps)
 	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer es2api \
-		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
+	$(glapi_gen_mapi) glesv2 $(glapi_gen_gl_xml) > $@
 
 include $(top_srcdir)/install-lib-links.mk
 
diff -Nru mesa-18.3.3/src/mapi/mapi_abi.py mesa-19.0.1/src/mapi/mapi_abi.py
--- mesa-18.3.3/src/mapi/mapi_abi.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/mapi_abi.py	2019-03-31 23:16:37.000000000 +0000
@@ -29,8 +29,8 @@
 import sys
 # make it possible to import glapi
 import os
-GLAPI = os.path.join(".", os.path.dirname(sys.argv[0]), "glapi/gen")
-sys.path.append(GLAPI)
+GLAPI = os.path.join(".", os.path.dirname(__file__), "glapi", "gen")
+sys.path.insert(0, GLAPI)
 
 from operator import attrgetter
 import re
@@ -184,75 +184,6 @@
 
     return entries
 
-def abi_parse_line(line):
-    cols = [col.strip() for col in line.split(',')]
-
-    attrs = {
-            'slot': -1,
-            'hidden': False,
-            'alias': None,
-            'handcode': None,
-    }
-
-    # extract attributes from the first column
-    vals = cols[0].split(':')
-    while len(vals) > 1:
-        val = vals.pop(0)
-        if val.startswith('slot='):
-            attrs['slot'] = int(val[5:])
-        elif val == 'hidden':
-            attrs['hidden'] = True
-        elif val.startswith('alias='):
-            attrs['alias'] = val[6:]
-        elif val.startswith('handcode='):
-            attrs['handcode'] = val[9:]
-        elif not val:
-            pass
-        else:
-            raise Exception('unknown attribute %s' % val)
-    cols[0] = vals[0]
-
-    return (attrs, cols)
-
-def abi_parse(filename):
-    """Parse a CSV file for ABI entries."""
-    fp = open(filename) if filename != '-' else sys.stdin
-    lines = [line.strip() for line in fp.readlines()
-            if not line.startswith('#') and line.strip()]
-
-    entry_dict = {}
-    next_slot = 0
-    for line in lines:
-        attrs, cols = abi_parse_line(line)
-
-        # post-process attributes
-        if attrs['alias']:
-            try:
-                alias = entry_dict[attrs['alias']]
-            except KeyError:
-                raise Exception('failed to alias %s' % attrs['alias'])
-            if alias.alias:
-                raise Exception('recursive alias %s' % ent.name)
-            slot = alias.slot
-            attrs['alias'] = alias
-        else:
-            slot = next_slot
-            next_slot += 1
-
-        if attrs['slot'] < 0:
-            attrs['slot'] = slot
-        elif attrs['slot'] != slot:
-            raise Exception('invalid slot in %s' % (line))
-
-        ent = ABIEntry(cols, attrs)
-        if ent.name in entry_dict:
-            raise Exception('%s is duplicated' % (ent.name))
-        entry_dict[ent.name] = ent
-
-    entries = sorted(entry_dict.values())
-
-    return entries
-
 def abi_sanity_check(entries):
     if not entries:
         return
@@ -334,7 +265,8 @@
             if not self.need_entry_point(ent):
                 continue
             export = self.api_call if not ent.hidden else ''
-            decls.append(self._c_decl(ent, prefix, True, export) + ';')
+            if not ent.hidden or not self.lib_need_non_hidden_entries:
+                decls.append(self._c_decl(ent, prefix, True, export) + ';')
 
         return "\n".join(decls)
 
@@ -684,62 +616,6 @@
 
         return header
 
-class ES1APIPrinter(GLAPIPrinter):
-    """OpenGL ES 1.x API Printer"""
-
-    def __init__(self, entries):
-        super(ES1APIPrinter, self).__init__(entries)
-        self.prefix_lib = 'gl'
-        self.prefix_warn = 'gl'
-
-    def _override_for_api(self, ent):
-        if ent.xml_data is None:
-            raise Exception('ES2 API printer requires XML input')
-        ent.hidden = (ent.name not in \
-            ent.xml_data.entry_points_for_api_version('es1')) \
-            or ent.hidden
-        ent.handcode = False
-
-    def _get_c_header(self):
-        header = """#ifndef _GLAPI_TMP_H_
-#define _GLAPI_TMP_H_
-typedef int GLclampx;
-#endif /* _GLAPI_TMP_H_ */"""
-
-        return header
-
-class ES2APIPrinter(GLAPIPrinter):
-    """OpenGL ES 2.x API Printer"""
-
-    def __init__(self, entries):
-        super(ES2APIPrinter, self).__init__(entries)
-        self.prefix_lib = 'gl'
-        self.prefix_warn = 'gl'
-
-    def _override_for_api(self, ent):
-        if ent.xml_data is None:
-            raise Exception('ES2 API printer requires XML input')
-        ent.hidden = (ent.name not in \
-            ent.xml_data.entry_points_for_api_version('es2')) \
-            or ent.hidden
-
-        # This is hella ugly.  The same-named function in desktop OpenGL is
-        # hidden, but it needs to be exposed by libGLESv2 for OpenGL ES 3.0.
-        # There's no way to express in the XML that a function should be be
-        # hidden in one API but exposed in another.
-        if ent.name == 'GetInternalformativ':
-            ent.hidden = False
-
-        ent.handcode = False
-
-    def _get_c_header(self):
-        header = """#ifndef _GLAPI_TMP_H_
-#define _GLAPI_TMP_H_
-typedef int GLclampx;
-#endif /* _GLAPI_TMP_H_ */"""
-
-        return header
-
 class SharedGLAPIPrinter(GLAPIPrinter):
     """Shared GLAPI API Printer"""
 
@@ -770,7 +646,7 @@
 def parse_args():
     printers = ['glapi', 'es1api', 'es2api', 'shared-glapi']
 
-    parser = OptionParser(usage='usage: %prog [options] <filename>')
+    parser = OptionParser(usage='usage: %prog [options] <xml_file>')
     parser.add_option('-p', '--printer', dest='printer',
             help='printer to use: %s' % (", ".join(printers)))
 
@@ -779,22 +655,21 @@
         parser.print_help()
         sys.exit(1)
 
+    if not args[0].endswith('.xml'):
+        parser.print_help()
+        sys.exit(1)
+
     return (args[0], options)
 
 def main():
     printers = {
         'glapi': GLAPIPrinter,
-        'es1api': ES1APIPrinter,
-        'es2api': ES2APIPrinter,
         'shared-glapi': SharedGLAPIPrinter,
     }
 
     filename, options = parse_args()
 
-    if filename.endswith('.xml'):
-        entries = abi_parse_xml(filename)
-    else:
-        entries = abi_parse(filename)
+    entries = abi_parse_xml(filename)
     abi_sanity_check(entries)
 
     printer = printers[options.printer](entries)
diff -Nru mesa-18.3.3/src/mapi/meson.build mesa-19.0.1/src/mapi/meson.build
--- mesa-18.3.3/src/mapi/meson.build	2017-11-27 17:45:57.000000000 +0000
+++ mesa-19.0.1/src/mapi/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -25,6 +25,8 @@
   'u_execmem.h',
 )
 
+genCommon_py = files('new/genCommon.py')
+glapi_gen_mapi_py = files('new/gen_gldispatch_mapi.py')
 mapi_abi_py = files('mapi_abi.py')
 
 subdir('glapi')
diff -Nru mesa-18.3.3/src/mapi/new/genCommon.py mesa-19.0.1/src/mapi/new/genCommon.py
--- mesa-18.3.3/src/mapi/new/genCommon.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/mapi/new/genCommon.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+
+# (C) Copyright 2015, NVIDIA CORPORATION.
+# All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Kyle Brenneman <kbrenneman@nvidia.com>
+
+import collections
+import re
+import sys
+import xml.etree.cElementTree as etree
+
+import os
+GLAPI = os.path.join(os.path.dirname(__file__), "..", "glapi", "gen")
+sys.path.insert(0, GLAPI)
+import static_data
+
+MAPI_TABLE_NUM_DYNAMIC = 4096
+
+_LIBRARY_FEATURE_NAMES = {
+    # libGL and libGLdiapatch both include every function.
+    "gl" : None,
+    "gldispatch" : None,
+    "opengl" : frozenset(( "GL_VERSION_1_0", "GL_VERSION_1_1",
+        "GL_VERSION_1_2", "GL_VERSION_1_3", "GL_VERSION_1_4", "GL_VERSION_1_5",
+        "GL_VERSION_2_0", "GL_VERSION_2_1", "GL_VERSION_3_0", "GL_VERSION_3_1",
+        "GL_VERSION_3_2", "GL_VERSION_3_3", "GL_VERSION_4_0", "GL_VERSION_4_1",
+        "GL_VERSION_4_2", "GL_VERSION_4_3", "GL_VERSION_4_4", "GL_VERSION_4_5",
+    )),
+    "glesv1" : frozenset(("GL_VERSION_ES_CM_1_0", "GL_OES_point_size_array")),
+    "glesv2" : frozenset(("GL_ES_VERSION_2_0", "GL_ES_VERSION_3_0",
+            "GL_ES_VERSION_3_1", "GL_ES_VERSION_3_2",
+    )),
+}
+
+def getFunctions(xmlFiles):
+    """
+    Reads an XML file and returns all of the functions defined in it.
+
+    xmlFile should be the path to Khronos's gl.xml file. The return value is a
+    sequence of FunctionDesc objects, ordered by slot number.
+    """
+    roots = [ etree.parse(xmlFile).getroot() for xmlFile in xmlFiles ]
+    return getFunctionsFromRoots(roots)
+
+def getFunctionsFromRoots(roots):
+    functions = {}
+    for root in roots:
+        for func in _getFunctionList(root):
+            functions[func.name] = func
+    functions = functions.values()
+
+    # Sort the function list by name.
+    functions = sorted(functions, key=lambda f: f.name)
+
+    # Lookup for fixed offset/slot functions and use it if available.
+    # Assign a slot number to each function. This isn't strictly necessary,
+    # since you can just look at the index in the list, but it makes it easier
+    # to include the slot when formatting output.
+
+    next_slot = 0
+    for i in range(len(functions)):
+        name = functions[i].name[2:]
+
+        if name in static_data.offsets:
+            functions[i] = functions[i]._replace(slot=static_data.offsets[name])
+        elif not name.endswith("ARB") and name + "ARB" in static_data.offsets:
+            functions[i] = functions[i]._replace(slot=static_data.offsets[name + "ARB"])
+        elif not name.endswith("EXT") and name + "EXT" in static_data.offsets:
+            functions[i] = functions[i]._replace(slot=static_data.offsets[name + "EXT"])
+        else:
+            functions[i] = functions[i]._replace(slot=next_slot)
+            next_slot += 1
+
+    return functions
+
+def getExportNamesFromRoots(target, roots):
+    """
+    Goes through the <feature> tags from gl.xml and returns a set of OpenGL
+    functions that a library should export.
+
+    target should be one of "gl", "gldispatch", "opengl", "glesv1", or
+    "glesv2".
+    """
+    featureNames = _LIBRARY_FEATURE_NAMES[target]
+    if featureNames is None:
+        return set(func.name for func in getFunctionsFromRoots(roots))
+
+    names = set()
+    for root in roots:
+        features = []
+        for featElem in root.findall("feature"):
+            if featElem.get("name") in featureNames:
+                features.append(featElem)
+        for featElem in root.findall("extensions/extension"):
+            if featElem.get("name") in featureNames:
+                features.append(featElem)
+        for featElem in features:
+            for commandElem in featElem.findall("require/command"):
+                names.add(commandElem.get("name"))
+    return names
+
+class FunctionArg(collections.namedtuple("FunctionArg", "type name")):
+    @property
+    def dec(self):
+        """
+        Returns a "TYPE NAME" string, suitable for a function prototype.
+        """
+        rv = str(self.type)
+        if not rv.endswith("*"):
+            rv += " "
+        rv += self.name
+        return rv
+
+class FunctionDesc(collections.namedtuple("FunctionDesc", "name rt args slot")):
+    def hasReturn(self):
+        """
+        Returns true if the function returns a value.
+        """
+        return (self.rt != "void")
+
+    @property
+    def decArgs(self):
+        """
+        Returns a string with the types and names of the arguments, as you
+        would use in a function declaration.
+        """
+        if not self.args:
+            return "void"
+        else:
+            return ", ".join(arg.dec for arg in self.args)
+
+    @property
+    def callArgs(self):
+        """
+        Returns a string with the names of the arguments, as you would use in a
+        function call.
+        """
+        return ", ".join(arg.name for arg in self.args)
+
+    @property
+    def basename(self):
+        assert self.name.startswith("gl")
+        return self.name[2:]
+
+def _getFunctionList(root):
+    for elem in root.findall("commands/command"):
+        yield _parseCommandElem(elem)
+
+def _parseCommandElem(elem):
+    protoElem = elem.find("proto")
+    (rt, name) = _parseProtoElem(protoElem)
+
+    args = []
+    for ch in elem.findall("param"):
+        # <param> tags have the same format as a <proto> tag.
+        args.append(FunctionArg(*_parseProtoElem(ch)))
+    func = FunctionDesc(name, rt, tuple(args), slot=None)
+
+    return func
+
+def _parseProtoElem(elem):
+    # If I just remove the tags and string the text together, I'll get valid C code.
+    text = _flattenText(elem)
+    text = text.strip()
+    m = re.match(r"^(.+)\b(\w+)(?:\s*\[\s*(\d*)\s*\])?$", text, re.S)
+    if m:
+        typename = _fixupTypeName(m.group(1))
+        name = m.group(2)
+        if m.group(3):
+            # HACK: glPathGlyphIndexRangeNV defines an argument like this:
+            # GLuint baseAndCount[2]
+            # Convert it to a pointer and hope for the best.
+            typename += "*"
+        return (typename, name)
+    else:
+        raise ValueError("Can't parse element %r -> %r" % (elem, text))
+
+def _flattenText(elem):
+    """
+    Returns the text in an element and all child elements, with the tags
+    removed.
+    """
+    text = ""
+    if elem.text is not None:
+        text = elem.text
+    for ch in elem:
+        text += _flattenText(ch)
+        if ch.tail is not None:
+            text += ch.tail
+    return text
+
+def _fixupTypeName(typeName):
+    """
+    Converts a typename into a more consistent format.
+    """
+
+    rv = typeName.strip()
+
+    # Replace "GLvoid" with just plain "void".
+    rv = re.sub(r"\bGLvoid\b", "void", rv)
+
+    # Remove the vendor suffixes from types that have a suffix-less version.
+    rv = re.sub(r"\b(GLhalf|GLintptr|GLsizeiptr|GLint64|GLuint64)(?:ARB|EXT|NV|ATI)\b", r"\1", rv)
+
+    rv = re.sub(r"\bGLDEBUGPROCKHR\b", "GLDEBUGPROC", rv)
+
+    # Clear out any leading and trailing whitespace.
+    rv = rv.strip()
+
+    # Remove any whitespace before a '*'
+    rv = re.sub(r"\s+\*", r"*", rv)
+
+    # Change "foo*" to "foo *"
+    rv = re.sub(r"([^\*])\*", r"\1 *", rv)
+
+    # Condense all whitespace into a single space.
+    rv = re.sub(r"\s+", " ", rv)
+
+    return rv
+
diff -Nru mesa-18.3.3/src/mapi/new/gen_gldispatch_mapi.py mesa-19.0.1/src/mapi/new/gen_gldispatch_mapi.py
--- mesa-18.3.3/src/mapi/new/gen_gldispatch_mapi.py	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/mapi/new/gen_gldispatch_mapi.py	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,193 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2010 LunarG Inc.
+# (C) Copyright 2015, NVIDIA CORPORATION.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+# Authors:
+#    Kyle Brenneman <kbrenneman@nvidia.com>
+#
+# Based on code ogiginally by:
+#    Chia-I Wu <olv@lunarg.com>
+
+
+"""
+Generates the glapi_mapi_tmp.h header file from Khronos's XML file.
+"""
+
+import sys
+import xml.etree.cElementTree as etree
+
+import genCommon
+
+def _main():
+    target = sys.argv[1]
+    xmlFiles = sys.argv[2:]
+
+    roots = [ etree.parse(filename).getroot() for filename in xmlFiles ]
+    allFunctions = genCommon.getFunctionsFromRoots(roots)
+
+    names = genCommon.getExportNamesFromRoots(target, roots)
+    functions = [f for f in allFunctions if(f.name in names)]
+
+    if (target in ("gl", "gldispatch")):
+        assert(len(functions) == len(allFunctions))
+        assert(all(functions[i] == allFunctions[i] for i in range(len(functions))))
+        assert(all(functions[i].slot == i for i in range(len(functions))))
+
+    print(r"""
+/* This file is automatically generated by mapi_abi.py.  Do not modify. */
+
+#ifndef _GLAPI_TMP_H_
+#define _GLAPI_TMP_H_
+typedef int GLclampx;
+#endif /* _GLAPI_TMP_H_ */
+""".lstrip("\n"))
+
+    print(generate_defines(functions))
+    if target == "gldispatch":
+        print(generate_table(functions, allFunctions))
+        print(generate_noop_array(functions))
+        print(generate_public_stubs(functions))
+    print(generate_public_entries(functions))
+    if target == "gldispatch":
+        print(generate_public_entries_table(functions))
+    print(generate_undef_public_entries())
+    print(generate_stub_asm_gcc(functions))
+
+def generate_defines(functions):
+    text = r"""
+#ifdef MAPI_TMP_DEFINES
+#define GL_GLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "GL/glext.h"
+
+""".lstrip("\n")
+    for func in functions:
+        text += "GLAPI {f.rt} APIENTRY {f.name}({f.decArgs});\n".format(f=func)
+    text += "#undef MAPI_TMP_DEFINES\n"
+    text += "#endif /* MAPI_TMP_DEFINES */\n"
+    return text
+
+def generate_table(functions, allFunctions):
+    text = "#ifdef MAPI_TMP_TABLE\n"
+    text += "#define MAPI_TABLE_NUM_STATIC %d\n" % (len(allFunctions))
+    text += "#define MAPI_TABLE_NUM_DYNAMIC %d\n" % (genCommon.MAPI_TABLE_NUM_DYNAMIC,)
+    text += "#undef MAPI_TMP_TABLE\n"
+    text += "#endif /* MAPI_TMP_TABLE */\n"
+    return text
+
+def generate_noop_array(functions):
+    text = "#ifdef MAPI_TMP_NOOP_ARRAY\n"
+    text += "#ifdef DEBUG\n\n"
+
+    for func in functions:
+        text += "static {f.rt} APIENTRY noop{f.basename}({f.decArgs})\n".format(f=func)
+        text += "{\n"
+        if (len(func.args) > 0):
+            text += "  "
+            for arg in func.args:
+                text += " (void) {a.name};".format(a=arg)
+            text += "\n"
+        text += "   noop_warn(\"{f.name}\");\n".format(f=func)
+        if (func.hasReturn()):
+            text += "   return ({f.rt}) 0;\n".format(f=func)
+        text += "}\n\n"
+
+    text += "const mapi_func table_noop_array[] = {\n"
+    for func in functions:
+        text += "   (mapi_func) noop{f.basename},\n".format(f=func)
+    for i in range(genCommon.MAPI_TABLE_NUM_DYNAMIC - 1):
+        text += "   (mapi_func) noop_generic,\n"
+    text += "   (mapi_func) noop_generic\n"
+    text += "};\n\n"
+    text += "#else /* DEBUG */\n\n"
+    text += "const mapi_func table_noop_array[] = {\n"
+    for i in range(len(functions) + genCommon.MAPI_TABLE_NUM_DYNAMIC - 1):
+        text += "   (mapi_func) noop_generic,\n"
+    text += "   (mapi_func) noop_generic\n"
+
+    text += "};\n\n"
+    text += "#endif /* DEBUG */\n"
+    text += "#undef MAPI_TMP_NOOP_ARRAY\n"
+    text += "#endif /* MAPI_TMP_NOOP_ARRAY */\n"
+    return text
+
+def generate_public_stubs(functions):
+    text = "#ifdef MAPI_TMP_PUBLIC_STUBS\n"
+
+    text += "static const struct mapi_stub public_stubs[] = {\n"
+    for func in functions:
+        text += "   { \"%s\", %d, NULL },\n" % (func.name, func.slot)
+    text += "};\n"
+    text += "#undef MAPI_TMP_PUBLIC_STUBS\n"
+    text += "#endif /* MAPI_TMP_PUBLIC_STUBS */\n"
+    return text
+
+def generate_public_entries(functions):
+    text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n"
+
+    for func in functions:
+        retStr = ("return " if func.hasReturn() else "")
+        text += r"""
+GLAPI {f.rt} APIENTRY {f.name}({f.decArgs})
+{{
+   const struct _glapi_table *_tbl = entry_current_get();
+   mapi_func _func = ((const mapi_func *) _tbl)[{f.slot}];
+   {retStr}(({f.rt} (APIENTRY *)({f.decArgs})) _func)({f.callArgs});
+}}
+
+""".lstrip("\n").format(f=func, retStr=retStr)
+
+    text += "\n"
+    text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n"
+    return text
+
+def generate_public_entries_table(functions):
+    text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n"
+    text += "static const mapi_func public_entries[] = {\n"
+    for func in functions:
+        text += "   (mapi_func) %s,\n" % (func.name,)
+    text += "};\n"
+    text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n"
+    return text
+
+def generate_undef_public_entries():
+    text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n"
+    text += "#undef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n"
+    text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n"
+    return text
+
+def generate_stub_asm_gcc(functions):
+    text = "#ifdef MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN\n"
+    text += "__asm__(\n"
+
+    for func in functions:
+        text += 'STUB_ASM_ENTRY("%s")"\\n"\n' % (func.name,)
+        text += '"\\t"STUB_ASM_CODE("%d")"\\n"\n\n' % (func.slot,)
+
+    text += ");\n"
+    text += "#undef MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN\n"
+    text += "#endif /* MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN */\n"
+    return text
+
+if (__name__ == "__main__"):
+    _main()
+
diff -Nru mesa-18.3.3/src/mapi/shared-glapi/meson.build mesa-19.0.1/src/mapi/shared-glapi/meson.build
--- mesa-18.3.3/src/mapi/shared-glapi/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/shared-glapi/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -60,6 +60,7 @@
       include_directories : [inc_src, inc_include, inc_mapi],
       link_with : [libglapi],
       dependencies : [dep_thread, idep_gtest],
-    )
+    ),
+    suite : ['mapi'],
   )
 endif
diff -Nru mesa-18.3.3/src/mapi/shared-glapi/SConscript mesa-19.0.1/src/mapi/shared-glapi/SConscript
--- mesa-18.3.3/src/mapi/shared-glapi/SConscript	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mapi/shared-glapi/SConscript	2019-03-31 23:16:37.000000000 +0000
@@ -27,13 +27,27 @@
 
     # generate ABI header
     GLAPI = '../glapi/'
-    header = env.CodeGenerate(
-        target = header_name,
-        script = '../mapi_abi.py',
-        source = [GLAPI + 'gen/gl_and_es_API.xml'] + env.Glob(GLAPI + 'gen/*.xml'),
-        command = python_cmd + ' $SCRIPT ' + \
-                '--printer %s $SOURCE > $TARGET' % (printer),
-    )
+    if printer != 'glapi':
+        if printer == 'es1api':
+            abi_tag = 'glesv1'
+        else:
+            abi_tag = 'glesv2'
+
+        header = env.CodeGenerate(
+            target = header_name,
+            script = '../new/gen_gldispatch_mapi.py',
+            source = GLAPI + 'registry/gl.xml'
+            command = python_cmd + ' $SCRIPT ' + \
+                    '%s $SOURCE > $TARGET' % (abi_tag),
+        )
+    else:
+        header = env.CodeGenerate(
+            target = header_name,
+            script = '../mapi_abi.py',
+            source = [GLAPI + 'gen/gl_and_es_API.xml'] + env.Glob(GLAPI + 'gen/*.xml'),
+            command = python_cmd + ' $SCRIPT ' + \
+                    '--printer %s $SOURCE > $TARGET' % (printer),
+        )
 
     cpppath = [
         header[0].dir,
diff -Nru mesa-18.3.3/src/mesa/drivers/common/meta.c mesa-19.0.1/src/mesa/drivers/common/meta.c
--- mesa-18.3.3/src/mesa/drivers/common/meta.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/common/meta.c	2019-03-31 23:16:37.000000000 +0000
@@ -127,7 +127,7 @@
    assert(att);
 
    _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, texTarget,
-                             level, layer, false);
+                             level, att->NumSamples, layer, false);
 }
 
 static struct gl_shader *
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.c mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.c
--- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -442,8 +442,6 @@
 
    intel->is_945 = IS_945(devID);
 
-   intel->has_swizzling = intel->intelScreen->hw_has_swizzling;
-
    memset(&ctx->TextureFormatSupported,
 	  0, sizeof(ctx->TextureFormatSupported));
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.h mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.h
--- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -159,7 +159,6 @@
     */
    int gen;
    bool is_945;
-   bool has_swizzling;
 
    struct intel_batchbuffer batch;
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.c mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.c
--- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -1020,30 +1020,6 @@
    return true;
 }
 
-static bool
-intel_detect_swizzling(struct intel_screen *screen)
-{
-   drm_intel_bo *buffer;
-   unsigned long flags = 0;
-   unsigned long aligned_pitch;
-   uint32_t tiling = I915_TILING_X;
-   uint32_t swizzle_mode = 0;
-
-   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
-				     64, 64, 4,
-				     &tiling, &aligned_pitch, flags);
-   if (buffer == NULL)
-      return false;
-
-   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
-   drm_intel_bo_unreference(buffer);
-
-   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
-      return false;
-   else
-      return true;
-}
-
 static __DRIconfig**
 intel_screen_make_configs(__DRIscreen *dri_screen)
 {
@@ -1200,8 +1176,6 @@
       intelScreen->gen = 2;
    }
 
-   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
-
    set_max_gl_versions(intelScreen);
 
    psp->extensions = intelScreenExtensions;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.h mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.h
--- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.h	2018-01-29 17:10:31.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.h	2019-03-31 23:16:37.000000000 +0000
@@ -44,8 +44,6 @@
 
    bool no_hw;
 
-   bool hw_has_swizzling;
-
    bool no_vbo;
    dri_bufmgr *bufmgr;
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Android.mk mesa-19.0.1/src/mesa/drivers/dri/i965/Android.mk
--- mesa-18.3.3/src/mesa/drivers/dri/i965/Android.mk	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/Android.mk	2019-03-31 23:16:37.000000000 +0000
@@ -51,42 +51,6 @@
 	libmesa_i965_gen10 \
 	libmesa_i965_gen11
 
-
-# ---------------------------------------
-# Build libmesa_intel_tiled_memcpy
-# ---------------------------------------
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libmesa_intel_tiled_memcpy
-
-LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES)
-
-LOCAL_SRC_FILES := $(intel_tiled_memcpy_FILES)
-
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-# ---------------------------------------
-# Build libmesa_intel_tiled_memcpy_sse41
-# ---------------------------------------
-
-ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := libmesa_intel_tiled_memcpy_sse41
-
-LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES)
-
-LOCAL_SRC_FILES := $(intel_tiled_memcpy_sse41_FILES)
-
-LOCAL_CFLAGS += \
-	-DUSE_SSE41 -msse4.1 -mstackrealign
-
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-endif
-
 # ---------------------------------------
 # Build libmesa_i965_gen4
 # ---------------------------------------
@@ -312,6 +276,7 @@
 
 LOCAL_C_INCLUDES := \
 	$(MESA_DRI_C_INCLUDES) \
+	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,) \
 	$(MESA_TOP)/include/drm-uapi
 
 LOCAL_SRC_FILES := \
@@ -320,7 +285,6 @@
 LOCAL_WHOLE_STATIC_LIBRARIES := \
 	$(MESA_DRI_WHOLE_STATIC_LIBRARIES) \
 	$(I965_PERGEN_LIBS) \
-	libmesa_intel_tiled_memcpy \
 	libmesa_intel_dev \
 	libmesa_intel_common \
 	libmesa_isl \
@@ -330,8 +294,6 @@
 ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
 LOCAL_CFLAGS += \
 	-DUSE_SSE41
-LOCAL_WHOLE_STATIC_LIBRARIES += \
-	libmesa_intel_tiled_memcpy_sse41
 endif
 
 LOCAL_SHARED_LIBRARIES := \
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_blorp.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_blorp.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_blorp.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_blorp.c	2019-03-31 23:16:37.000000000 +0000
@@ -43,24 +43,24 @@
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
 static bool
-brw_blorp_lookup_shader(struct blorp_context *blorp,
+brw_blorp_lookup_shader(struct blorp_batch *batch,
                         const void *key, uint32_t key_size,
                         uint32_t *kernel_out, void *prog_data_out)
 {
-   struct brw_context *brw = blorp->driver_ctx;
+   struct brw_context *brw = batch->driver_batch;
    return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
                            kernel_out, prog_data_out, true);
 }
 
 static bool
-brw_blorp_upload_shader(struct blorp_context *blorp,
+brw_blorp_upload_shader(struct blorp_batch *batch,
                         const void *key, uint32_t key_size,
                         const void *kernel, uint32_t kernel_size,
                         const struct brw_stage_prog_data *prog_data,
                         uint32_t prog_data_size,
                         uint32_t *kernel_out, void *prog_data_out)
 {
-   struct brw_context *brw = blorp->driver_ctx;
+   struct brw_context *brw = batch->driver_batch;
    brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
                     kernel, kernel_size, prog_data, prog_data_size,
                     kernel_out, prog_data_out);
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_compute.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_compute.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_compute.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_compute.c	2019-03-31 23:16:37.000000000 +0000
@@ -35,135 +35,6 @@
 
 
 static void
-prepare_indirect_gpgpu_walker(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   GLintptr indirect_offset = brw->compute.num_work_groups_offset;
-   struct brw_bo *bo = brw->compute.num_work_groups_bo;
-
-   brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, indirect_offset + 0);
-   brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, indirect_offset + 4);
-   brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, indirect_offset + 8);
-
-   if (devinfo->gen > 7)
-      return;
-
-   /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
-   BEGIN_BATCH(7);
-   OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2));
-   OUT_BATCH(MI_PREDICATE_SRC0 + 4);
-   OUT_BATCH(0u);
-   OUT_BATCH(MI_PREDICATE_SRC1 + 0);
-   OUT_BATCH(0u);
-   OUT_BATCH(MI_PREDICATE_SRC1 + 4);
-   OUT_BATCH(0u);
-   ADVANCE_BATCH();
-
-   /* Load compute_dispatch_indirect_x_size into SRC0 */
-   brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 0);
-
-   /* predicate = (compute_dispatch_indirect_x_size == 0); */
-   BEGIN_BATCH(1);
-   OUT_BATCH(GEN7_MI_PREDICATE |
-             MI_PREDICATE_LOADOP_LOAD |
-             MI_PREDICATE_COMBINEOP_SET |
-             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-   ADVANCE_BATCH();
-
-   /* Load compute_dispatch_indirect_y_size into SRC0 */
-   brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 4);
-
-   /* predicate |= (compute_dispatch_indirect_y_size == 0); */
-   BEGIN_BATCH(1);
-   OUT_BATCH(GEN7_MI_PREDICATE |
-             MI_PREDICATE_LOADOP_LOAD |
-             MI_PREDICATE_COMBINEOP_OR |
-             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-   ADVANCE_BATCH();
-
-   /* Load compute_dispatch_indirect_z_size into SRC0 */
-   brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 8);
-
-   /* predicate |= (compute_dispatch_indirect_z_size == 0); */
-   BEGIN_BATCH(1);
-   OUT_BATCH(GEN7_MI_PREDICATE |
-             MI_PREDICATE_LOADOP_LOAD |
-             MI_PREDICATE_COMBINEOP_OR |
-             MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
-   ADVANCE_BATCH();
-
-   /* predicate = !predicate; */
-   BEGIN_BATCH(1);
-   OUT_BATCH(GEN7_MI_PREDICATE |
-             MI_PREDICATE_LOADOP_LOADINV |
-             MI_PREDICATE_COMBINEOP_OR |
-             MI_PREDICATE_COMPAREOP_FALSE);
-   ADVANCE_BATCH();
-}
-
-static void
-brw_emit_gpgpu_walker(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_cs_prog_data *prog_data =
-      brw_cs_prog_data(brw->cs.base.prog_data);
-
-   const GLuint *num_groups = brw->compute.num_work_groups;
-   uint32_t indirect_flag;
-
-   if (brw->compute.num_work_groups_bo == NULL) {
-      indirect_flag = 0;
-   } else {
-      indirect_flag =
-         GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE |
-         (devinfo->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0);
-      prepare_indirect_gpgpu_walker(brw);
-   }
-
-   const unsigned simd_size = prog_data->simd_size;
-   unsigned group_size = prog_data->local_size[0] *
-      prog_data->local_size[1] * prog_data->local_size[2];
-   unsigned thread_width_max =
-      (group_size + simd_size - 1) / simd_size;
-
-   uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
-   const unsigned right_non_aligned = group_size & (simd_size - 1);
-   if (right_non_aligned != 0)
-      right_mask >>= (simd_size - right_non_aligned);
-
-   uint32_t dwords = devinfo->gen < 8 ? 11 : 15;
-   BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
-   OUT_BATCH(0);
-   if (devinfo->gen >= 8) {
-      OUT_BATCH(0);                     /* Indirect Data Length */
-      OUT_BATCH(0);                     /* Indirect Data Start Address */
-   }
-   assert(thread_width_max <= brw->screen->devinfo.max_cs_threads);
-   OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
-             SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
-   OUT_BATCH(0);                        /* Thread Group ID Starting X */
-   if (devinfo->gen >= 8)
-      OUT_BATCH(0);                     /* MBZ */
-   OUT_BATCH(num_groups[0]);            /* Thread Group ID X Dimension */
-   OUT_BATCH(0);                        /* Thread Group ID Starting Y */
-   if (devinfo->gen >= 8)
-      OUT_BATCH(0);                     /* MBZ */
-   OUT_BATCH(num_groups[1]);            /* Thread Group ID Y Dimension */
-   OUT_BATCH(0);                        /* Thread Group ID Starting/Resume Z */
-   OUT_BATCH(num_groups[2]);            /* Thread Group ID Z Dimension */
-   OUT_BATCH(right_mask);               /* Right Execution Mask */
-   OUT_BATCH(0xffffffff);               /* Bottom Execution Mask */
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(2);
-   OUT_BATCH(MEDIA_STATE_FLUSH << 16 | (2 - 2));
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-}
-
-
-static void
 brw_dispatch_compute_common(struct gl_context *ctx)
 {
    struct brw_context *brw = brw_context(ctx);
@@ -191,7 +62,7 @@
    brw->batch.no_wrap = true;
    brw_upload_compute_state(brw);
 
-   brw_emit_gpgpu_walker(brw);
+   brw->vtbl.emit_compute_walker(brw);
 
    brw->batch.no_wrap = false;
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_conditional_render.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_conditional_render.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_conditional_render.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_conditional_render.c	2019-03-31 23:16:37.000000000 +0000
@@ -66,7 +66,7 @@
    brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
 
    hsw_overflow_result_to_gpr0(brw, query, count);
-   brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_SRC0);
+   brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0));
    brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull);
 }
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -390,15 +390,15 @@
     */
    assert(devinfo->gen >= 7);
 
+   ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.draw_parameters = true;
    ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
+   ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.image_write_without_format = true;
    ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
    ctx->Const.SpirVCapabilities.tessellation = true;
-   ctx->Const.SpirVCapabilities.draw_parameters = true;
-   ctx->Const.SpirVCapabilities.image_write_without_format = true;
-   ctx->Const.SpirVCapabilities.variable_pointers = true;
-   ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
    ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
-   ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
+   ctx->Const.SpirVCapabilities.variable_pointers = true;
 }
 
 static void
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -752,6 +752,8 @@
                                         struct brw_bo *bo,
                                         uint32_t offset_in_bytes,
                                         uint32_t report_id);
+
+      void (*emit_compute_walker)(struct brw_context *brw);
    } vtbl;
 
    struct brw_bufmgr *bufmgr;
@@ -842,6 +844,8 @@
 
    GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
 
+   bool object_preemption; /**< Object level preemption enabled. */
+
    GLenum reduced_primitive;
 
    /**
@@ -1002,6 +1006,9 @@
 
       /* High bits of the last seen index buffer address (for workarounds). */
       uint16_t last_bo_high_bits;
+
+      /* Used to understand is GPU state of primitive restart is up to date */
+      bool enable_cut_index;
    } ib;
 
    /* Active vertex program:
@@ -1377,13 +1384,6 @@
 /*======================================================================
  * brw_misc_state.c
  */
-void
-brw_meta_resolve_color(struct brw_context *brw,
-                       struct intel_mipmap_tree *mt);
-
-/*======================================================================
- * brw_misc_state.c
- */
 void brw_workaround_depthstencil_alignment(struct brw_context *brw,
                                            GLbitfield clear_mask);
 
@@ -1435,10 +1435,10 @@
                              uint32_t reg, uint32_t imm);
 void brw_load_register_imm64(struct brw_context *brw,
                              uint32_t reg, uint64_t imm);
-void brw_load_register_reg(struct brw_context *brw, uint32_t src,
-                           uint32_t dest);
-void brw_load_register_reg64(struct brw_context *brw, uint32_t src,
-                             uint32_t dest);
+void brw_load_register_reg(struct brw_context *brw, uint32_t dst,
+                           uint32_t src);
+void brw_load_register_reg64(struct brw_context *brw, uint32_t dst,
+                             uint32_t src);
 void brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo,
                           uint32_t offset, uint32_t imm);
 void brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
@@ -1493,7 +1493,7 @@
 
 /* brw_draw_upload.c */
 unsigned brw_get_vertex_surface_type(struct brw_context *brw,
-                                     const struct gl_array_attributes *glattr);
+                                     const struct gl_vertex_format *glformat);
 
 static inline unsigned
 brw_get_index_type(unsigned index_size)
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_cs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_cs.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_cs.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_cs.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,6 +58,7 @@
    struct brw_cs_prog_data prog_data;
    bool start_busy = false;
    double start_time = 0;
+   nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir);
 
    memset(&prog_data, 0, sizeof(prog_data));
 
@@ -76,7 +77,7 @@
 
    assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data);
 
-   brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir,
+   brw_nir_setup_glsl_uniforms(mem_ctx, nir,
                                &cp->program, &prog_data.base, true);
 
    if (unlikely(brw->perf_debug)) {
@@ -91,8 +92,7 @@
 
    char *error_str;
    program = brw_compile_cs(brw->screen->compiler, brw, mem_ctx, key,
-                            &prog_data, cp->program.nir, st_index,
-                            &error_str);
+                            &prog_data, nir, st_index, &error_str);
    if (program == NULL) {
       cp->program.sh.data->LinkStatus = LINKING_FAILURE;
       ralloc_strcat(&cp->program.sh.data->InfoLog, error_str);
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_defines.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_defines.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_defines.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_defines.h	2019-03-31 23:16:37.000000000 +0000
@@ -38,7 +38,7 @@
 /* Using the GNU statement expression extension */
 #define SET_FIELD(value, field)                                         \
    ({                                                                   \
-      uint32_t fieldval = (value) << field ## _SHIFT;                   \
+      uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT;         \
       assert((fieldval & ~ field ## _MASK) == 0);                       \
       fieldval & field ## _MASK;                                        \
    })
@@ -1646,6 +1646,8 @@
 # define GEN8_L3CNTLREG_DC_ALLOC_MASK      INTEL_MASK(24, 18)
 # define GEN8_L3CNTLREG_ALL_ALLOC_SHIFT    25
 # define GEN8_L3CNTLREG_ALL_ALLOC_MASK     INTEL_MASK(31, 25)
+# define GEN8_L3CNTLREG_EDBC_NO_HANG       (1 << 9)
+# define GEN11_L3CNTLREG_USE_FULL_WAYS     (1 << 10)
 
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
@@ -1681,4 +1683,9 @@
 # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS        (1 << 5)
 # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK   REG_MASK(1 << 5)
 
+#define CS_CHICKEN1                        0x2580 /* Gen9+ */
+# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
+# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
+# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
+
 #endif
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -303,16 +303,16 @@
        * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
        */
       while (mask) {
-         const struct gl_array_attributes *glattrib;
+         const struct gl_vertex_format *glformat;
          uint8_t wa_flags = 0;
 
          i = u_bit_scan64(&mask);
-         glattrib = brw->vb.inputs[i].glattrib;
+         glformat = &brw->vb.inputs[i].glattrib->Format;
 
-         switch (glattrib->Type) {
+         switch (glformat->Type) {
 
          case GL_FIXED:
-            wa_flags = glattrib->Size;
+            wa_flags = glformat->Size;
             break;
 
          case GL_INT_2_10_10_10_REV:
@@ -320,12 +320,12 @@
             /* fallthough */
 
          case GL_UNSIGNED_INT_2_10_10_10_REV:
-            if (glattrib->Format == GL_BGRA)
+            if (glformat->Format == GL_BGRA)
                wa_flags |= BRW_ATTRIB_WA_BGRA;
 
-            if (glattrib->Normalized)
+            if (glformat->Normalized)
                wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
-            else if (!glattrib->Integer)
+            else if (!glformat->Integer)
                wa_flags |= BRW_ATTRIB_WA_SCALE;
 
             break;
@@ -872,6 +872,66 @@
    }
 }
 
+/**
+ * Implement workarounds for preemption:
+ *    - WaDisableMidObjectPreemptionForGSLineStripAdj
+ *    - WaDisableMidObjectPreemptionForTrifanOrPolygon
+ *    - WaDisableMidObjectPreemptionForLineLoop
+ *    - WA#0798
+ */
+static void
+gen9_emit_preempt_wa(struct brw_context *brw,
+                     const struct _mesa_prim *prim)
+{
+   bool object_preemption = true;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   /* Only apply these workarounds for gen9 */
+   assert(devinfo->gen == 9);
+
+   /* WaDisableMidObjectPreemptionForGSLineStripAdj
+    *
+    *    WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
+    *    GS is enabled.
+    */
+   if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled)
+      object_preemption = false;
+
+   /* WaDisableMidObjectPreemptionForTrifanOrPolygon
+    *
+    *    TriFan miscompare in Execlist Preemption test. Cut index that is on a
+    *    previous context. End the previous, the resume another context with a
+    *    tri-fan or polygon, and the vertex count is corrupted. If we prempt
+    *    again we will cause corruption.
+    *
+    *    WA: Disable mid-draw preemption when draw-call has a tri-fan.
+    */
+   if (brw->primitive == _3DPRIM_TRIFAN)
+      object_preemption = false;
+
+   /* WaDisableMidObjectPreemptionForLineLoop
+    *
+    *    VF Stats Counters Missing a vertex when preemption enabled.
+    *
+    *    WA: Disable mid-draw preemption when the draw uses a lineloop
+    *    topology.
+    */
+   if (brw->primitive == _3DPRIM_LINELOOP)
+      object_preemption = false;
+
+   /* WA#0798
+    *
+    *    VF is corrupting GAFS data when preempted on an instance boundary and
+    *    replayed with instancing enabled.
+    *
+    *    WA: Disable preemption when using instanceing.
+    */
+   if (prim->num_instances > 1)
+      object_preemption = false;
+
+   brw_enable_obj_preemption(brw, object_preemption);
+}
+
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
@@ -987,6 +1047,9 @@
       brw_upload_render_state(brw);
    }
 
+   if (devinfo->gen == 9)
+      gen9_emit_preempt_wa(brw, prim);
+
    brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream);
 
    brw->batch.no_wrap = false;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw_upload.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw_upload.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw_upload.c	2019-03-31 23:16:37.000000000 +0000
@@ -249,21 +249,21 @@
  */
 unsigned
 brw_get_vertex_surface_type(struct brw_context *brw,
-                            const struct gl_array_attributes *glattrib)
+                            const struct gl_vertex_format *glformat)
 {
-   int size = glattrib->Size;
+   int size = glformat->Size;
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    const bool is_ivybridge_or_older =
       devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell;
 
    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
       fprintf(stderr, "type %s size %d normalized %d\n",
-              _mesa_enum_to_string(glattrib->Type),
-              glattrib->Size, glattrib->Normalized);
+              _mesa_enum_to_string(glformat->Type),
+              glformat->Size, glformat->Normalized);
 
-   if (glattrib->Integer) {
-      assert(glattrib->Format == GL_RGBA); /* sanity check */
-      switch (glattrib->Type) {
+   if (glformat->Integer) {
+      assert(glformat->Format == GL_RGBA); /* sanity check */
+      switch (glformat->Type) {
       case GL_INT: return int_types_direct[size];
       case GL_SHORT:
          if (is_ivybridge_or_older && size == 3)
@@ -288,11 +288,11 @@
             return ubyte_types_direct[size];
       default: unreachable("not reached");
       }
-   } else if (glattrib->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
+   } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) {
       return ISL_FORMAT_R11G11B10_FLOAT;
-   } else if (glattrib->Normalized) {
-      switch (glattrib->Type) {
-      case GL_DOUBLE: return double_types(size, glattrib->Doubles);
+   } else if (glformat->Normalized) {
+      switch (glformat->Type) {
+      case GL_DOUBLE: return double_types(size, glformat->Doubles);
       case GL_FLOAT: return float_types[size];
       case GL_HALF_FLOAT:
       case GL_HALF_FLOAT_OES:
@@ -306,7 +306,7 @@
       case GL_UNSIGNED_INT: return uint_types_norm[size];
       case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
       case GL_UNSIGNED_BYTE:
-         if (glattrib->Format == GL_BGRA) {
+         if (glformat->Format == GL_BGRA) {
             /* See GL_EXT_vertex_array_bgra */
             assert(size == 4);
             return ISL_FORMAT_B8G8R8A8_UNORM;
@@ -330,7 +330,7 @@
       case GL_INT_2_10_10_10_REV:
          assert(size == 4);
          if (devinfo->gen >= 8 || devinfo->is_haswell) {
-            return glattrib->Format == GL_BGRA
+            return glformat->Format == GL_BGRA
                ? ISL_FORMAT_B10G10R10A2_SNORM
                : ISL_FORMAT_R10G10B10A2_SNORM;
          }
@@ -338,7 +338,7 @@
       case GL_UNSIGNED_INT_2_10_10_10_REV:
          assert(size == 4);
          if (devinfo->gen >= 8 || devinfo->is_haswell) {
-            return glattrib->Format == GL_BGRA
+            return glformat->Format == GL_BGRA
                ? ISL_FORMAT_B10G10R10A2_UNORM
                : ISL_FORMAT_R10G10B10A2_UNORM;
          }
@@ -352,26 +352,26 @@
        * like to use here, so upload everything as UINT and fix
        * it in the shader
        */
-      if (glattrib->Type == GL_INT_2_10_10_10_REV) {
+      if (glformat->Type == GL_INT_2_10_10_10_REV) {
          assert(size == 4);
          if (devinfo->gen >= 8 || devinfo->is_haswell) {
-            return glattrib->Format == GL_BGRA
+            return glformat->Format == GL_BGRA
                ? ISL_FORMAT_B10G10R10A2_SSCALED
                : ISL_FORMAT_R10G10B10A2_SSCALED;
          }
          return ISL_FORMAT_R10G10B10A2_UINT;
-      } else if (glattrib->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
+      } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
          assert(size == 4);
          if (devinfo->gen >= 8 || devinfo->is_haswell) {
-            return glattrib->Format == GL_BGRA
+            return glformat->Format == GL_BGRA
                ? ISL_FORMAT_B10G10R10A2_USCALED
                : ISL_FORMAT_R10G10B10A2_USCALED;
          }
          return ISL_FORMAT_R10G10B10A2_UINT;
       }
-      assert(glattrib->Format == GL_RGBA); /* sanity check */
-      switch (glattrib->Type) {
-      case GL_DOUBLE: return double_types(size, glattrib->Doubles);
+      assert(glformat->Format == GL_RGBA); /* sanity check */
+      switch (glformat->Type) {
+      case GL_DOUBLE: return double_types(size, glformat->Doubles);
       case GL_FLOAT: return float_types[size];
       case GL_HALF_FLOAT:
       case GL_HALF_FLOAT_OES:
@@ -407,6 +407,7 @@
 {
    const struct gl_vertex_buffer_binding *glbinding = element->glbinding;
    const struct gl_array_attributes *glattrib = element->glattrib;
+   const struct gl_vertex_format *glformat = &glattrib->Format;
    const int src_stride = glbinding->Stride;
 
    /* If the source stride is zero, we just want to upload the current
@@ -414,11 +415,11 @@
     * to replicate it out.
     */
    if (src_stride == 0) {
-      brw_upload_data(&brw->upload, glattrib->Ptr, glattrib->_ElementSize,
-                      glattrib->_ElementSize, &buffer->bo, &buffer->offset);
+      brw_upload_data(&brw->upload, glattrib->Ptr, glformat->_ElementSize,
+                      glformat->_ElementSize, &buffer->bo, &buffer->offset);
 
       buffer->stride = 0;
-      buffer->size = glattrib->_ElementSize;
+      buffer->size = glformat->_ElementSize;
       return;
    }
 
@@ -531,13 +532,13 @@
                start = offset + glbinding->Stride * brw->baseinstance;
                range = (glbinding->Stride * ((brw->num_instances - 1) /
                                             glbinding->InstanceDivisor) +
-                        glattrib->_ElementSize);
+                        glattrib->Format._ElementSize);
             }
          } else {
             if (brw->vb.index_bounds_valid) {
                start = offset + min_index * glbinding->Stride;
                range = (glbinding->Stride * (max_index - min_index) +
-                        glattrib->_ElementSize);
+                        glattrib->Format._ElementSize);
             }
          }
 
@@ -594,7 +595,8 @@
 	 else if (interleaved != glbinding->Stride ||
                   glbinding->InstanceDivisor != 0 ||
                   glattrib->Ptr < ptr ||
-                  (uintptr_t)(glattrib->Ptr - ptr) + glattrib->_ElementSize > interleaved)
+                  (uintptr_t)(glattrib->Ptr - ptr) +
+                  glattrib->Format._ElementSize > interleaved)
 	 {
             /* If our stride is different from the first attribute's stride,
              * or if we are using an instance divisor or if the first
@@ -677,7 +679,7 @@
       const struct gl_array_attributes *glattrib = upload[i]->glattrib;
       if (glbinding->InstanceDivisor == 0) {
          copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
-                                 buffer, glattrib->_ElementSize);
+                                 buffer, glattrib->Format._ElementSize);
       } else {
          /* This is an instanced attribute, since its InstanceDivisor
           * is not zero. Therefore, its data will be stepped after the
@@ -686,7 +688,7 @@
          uint32_t instanced_attr_max_index =
             (brw->num_instances - 1) / glbinding->InstanceDivisor;
          copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
-                                 buffer, glattrib->_ElementSize);
+                                 buffer, glattrib->Format._ElementSize);
       }
       buffer->offset -= delta * buffer->stride;
       buffer->size += delta * buffer->stride;
@@ -774,6 +776,14 @@
       brw->ib.index_size = index_buffer->index_size;
       brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
    }
+
+   /* We need to re-emit an index buffer state each time
+    * when cut index flag is changed
+    */
+   if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) {
+      brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index;
+      brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER;
+   }
 }
 
 const struct brw_tracked_state brw_indices = {
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_gs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_gs.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_gs.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_gs.c	2019-03-31 23:16:37.000000000 +0000
@@ -89,15 +89,17 @@
 
    void *mem_ctx = ralloc_context(NULL);
 
+   nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir);
+
    assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data);
 
-   brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program,
+   brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program,
                                &prog_data.base.base,
                                compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
-   brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, NULL,
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
                               prog_data.base.base.ubo_ranges);
 
-   uint64_t outputs_written = gp->program.nir->info.outputs_written;
+   uint64_t outputs_written = nir->info.outputs_written;
 
    brw_compute_vue_map(devinfo,
                        &prog_data.base.vue_map, outputs_written,
@@ -115,8 +117,7 @@
    char *error_str;
    const unsigned *program =
       brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
-                     &prog_data, gp->program.nir, &gp->program,
-                     st_index, &error_str);
+                     &prog_data, nir, &gp->program, st_index, &error_str);
    if (program == NULL) {
       ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);
       _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str);
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp mesa-19.0.1/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -109,10 +109,6 @@
                              image_idx,
                              offsetof(brw_image_param, swizzling), 2);
       param += BRW_IMAGE_PARAM_SIZE;
-
-      brw_mark_surface_used(
-         stage_prog_data,
-         stage_prog_data->binding_table.image_start + image_idx);
    }
 }
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_pipe_control.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_pipe_control.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_pipe_control.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_pipe_control.c	2019-03-31 23:16:37.000000000 +0000
@@ -308,7 +308,7 @@
 void
 gen7_emit_vs_workaround_flush(struct brw_context *brw)
 {
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   MAYBE_UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    assert(devinfo->gen == 7);
    brw_emit_pipe_control_write(brw,
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program_binary.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program_binary.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program_binary.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program_binary.c	2019-03-31 23:16:37.000000000 +0000
@@ -206,14 +206,14 @@
          break;
       switch ((enum driver_cache_blob_part)part_type) {
       case GEN_PART: {
-         uint32_t gen_size = blob_read_uint32(&reader);
+         MAYBE_UNUSED uint32_t gen_size = blob_read_uint32(&reader);
          assert(!reader.overrun &&
                 (uintptr_t)(reader.end - reader.current) > gen_size);
          deserialize_gen_program(&reader, ctx, prog, stage);
          break;
       }
       case NIR_PART: {
-         uint32_t nir_size = blob_read_uint32(&reader);
+         MAYBE_UNUSED uint32_t nir_size = blob_read_uint32(&reader);
          assert(!reader.overrun &&
                 (uintptr_t)(reader.end - reader.current) > nir_size);
          const struct nir_shader_compiler_options *options =
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,9 @@
 #include "tnl/tnl.h"
 #include "util/ralloc.h"
 #include "compiler/glsl/ir.h"
+#include "compiler/glsl/program.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "glsl/float64_glsl.h"
 
 #include "brw_program.h"
 #include "brw_context.h"
@@ -53,6 +55,9 @@
 #include "brw_vs.h"
 #include "brw_wm.h"
 
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+
 static bool
 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 {
@@ -67,6 +72,54 @@
    }
 }
 
+static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+                                        GLuint id, bool is_arb_asm);
+
+static nir_shader *
+compile_fp64_funcs(struct gl_context *ctx,
+                   const nir_shader_compiler_options *options,
+                   void *mem_ctx,
+                   gl_shader_stage stage)
+{
+   const GLuint name = ~0;
+   struct gl_shader *sh;
+
+   sh = _mesa_new_shader(name, stage);
+
+   sh->Source = float64_source;
+   sh->CompileStatus = COMPILE_FAILURE;
+   _mesa_glsl_compile_shader(ctx, sh, false, false, true);
+
+   if (!sh->CompileStatus) {
+      if (sh->InfoLog) {
+         _mesa_problem(ctx,
+                       "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
+                       sh->InfoLog, float64_source);
+      }
+   }
+
+   struct gl_shader_program *sh_prog;
+   sh_prog = _mesa_new_shader_program(name);
+   sh_prog->Label = NULL;
+   sh_prog->NumShaders = 1;
+   sh_prog->Shaders = malloc(sizeof(struct gl_shader *));
+   sh_prog->Shaders[0] = sh;
+
+   struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
+   linked->Stage = stage;
+   linked->Program =
+      brwNewProgram(ctx,
+                    _mesa_shader_stage_to_program(stage),
+                    name, false);
+
+   linked->ir = sh->ir;
+   sh_prog->_LinkedShaders[stage] = linked;
+
+   nir_shader *nir = glsl_to_nir(sh_prog, stage, options);
+
+   return nir_shader_clone(mem_ctx, nir);
+}
+
 nir_shader *
 brw_create_nir(struct brw_context *brw,
                const struct gl_shader_program *shader_prog,
@@ -101,6 +154,15 @@
    }
    nir_validate_shader(nir, "before brw_preprocess_nir");
 
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   if (!devinfo->has_64bit_types && nir->info.uses_64bit) {
+      nir_shader *fp64 = compile_fp64_funcs(ctx, options, ralloc_parent(nir), stage);
+
+      nir_validate_shader(fp64, "fp64");
+      exec_list_append(&nir->functions, &fp64->functions);
+   }
+
    nir = brw_preprocess_nir(brw->screen->compiler, nir);
 
    NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
@@ -407,7 +469,7 @@
        * and we wish to view that there are 4 subslices per slice
        * instead of the actual number of subslices per slice.
        */
-      if (devinfo->gen >= 9)
+      if (devinfo->gen >= 9 && devinfo->gen < 11)
          subslices = 4 * brw->screen->devinfo.num_slices;
 
       unsigned scratch_ids_per_subslice;
@@ -835,7 +897,10 @@
    stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
    next_binding_table_offset += num_textures;
 
-   /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
+   /* Set the binding table size.  Some callers may append new entries
+    * and increase this accordingly.
+    */
+   stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
 
    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
    return next_binding_table_offset;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state.h	2019-03-31 23:16:37.000000000 +0000
@@ -128,7 +128,7 @@
 void brw_disk_cache_write_render_programs(struct brw_context *brw);
 
 /***********************************************************************
- * brw_state.c
+ * brw_state_upload.c
  */
 void brw_upload_render_state(struct brw_context *brw);
 void brw_render_state_finished(struct brw_context *brw);
@@ -138,6 +138,7 @@
 void brw_destroy_state(struct brw_context *brw);
 void brw_emit_select_pipeline(struct brw_context *brw,
                               enum brw_pipeline pipeline);
+void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
 
 static inline void
 brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state_upload.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state_upload.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state_upload.c	2019-03-31 23:16:37.000000000 +0000
@@ -45,6 +45,28 @@
 #include "brw_cs.h"
 #include "main/framebuffer.h"
 
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen >= 9);
+
+   if (enable == brw->object_preemption)
+      return;
+
+   /* A fixed function pipe flush is required before modifying this field */
+   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+   bool replay_mode = enable ?
+      GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+   /* enable object level preemption */
+   brw_load_register_imm32(brw, CS_CHICKEN1,
+                           replay_mode | GEN9_REPLAY_MODE_MASK);
+
+   brw->object_preemption = enable;
+}
+
 static void
 brw_upload_initial_gpu_state(struct brw_context *brw)
 {
@@ -79,6 +101,13 @@
       brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
                               TEXEL_OFFSET_FIX_MASK |
                               TEXEL_OFFSET_FIX_ENABLE);
+
+      /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
+       * in L3CNTLREG register. The default setting of the bit is not the
+       * desirable behavior.
+       */
+      brw_load_register_imm32(brw, GEN8_L3CNTLREG,
+                              GEN8_L3CNTLREG_EDBC_NO_HANG);
    }
 
    if (devinfo->gen == 10 || devinfo->gen == 11) {
@@ -153,6 +182,11 @@
          ADVANCE_BATCH();
       }
    }
+
+   brw->object_preemption = false;
+
+   if (devinfo->gen >= 10)
+      brw_enable_obj_preemption(brw, true);
 }
 
 static inline const struct brw_tracked_state *
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_surface_formats.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_surface_formats.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_surface_formats.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_surface_formats.c	2019-03-31 23:16:37.000000000 +0000
@@ -67,6 +67,7 @@
       [MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
       [MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
       [MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
+      [MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
       [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
       [MESA_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB,
       [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tcs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tcs.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tcs.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tcs.c	2019-03-31 23:16:37.000000000 +0000
@@ -84,7 +84,7 @@
 
    void *mem_ctx = ralloc_context(NULL);
    if (tcp) {
-      nir = tcp->program.nir;
+      nir = nir_shader_clone(mem_ctx, tcp->program.nir);
    } else {
       const nir_shader_compiler_options *options =
          ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
@@ -100,7 +100,7 @@
       brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
                                   &prog_data.base.base,
                                   compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
-      brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, NULL,
+      brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
                                  prog_data.base.base.ubo_ranges);
    } else {
       /* Upload the Patch URB Header as the first two uniforms.
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tes.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tes.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tes.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tes.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,7 +70,6 @@
    const struct brw_compiler *compiler = brw->screen->compiler;
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct brw_stage_state *stage_state = &brw->tes.base;
-   nir_shader *nir = tep->program.nir;
    struct brw_tes_prog_data prog_data;
    bool start_busy = false;
    double start_time = 0;
@@ -79,13 +78,15 @@
 
    void *mem_ctx = ralloc_context(NULL);
 
+   nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir);
+
    brw_assign_common_binding_table_offsets(devinfo, &tep->program,
                                            &prog_data.base.base, 0);
 
    brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program,
                                &prog_data.base.base,
                                compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
-   brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, NULL,
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
                               prog_data.base.base.ubo_ranges);
 
    int st_index = -1;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_vs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_vs.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_vs.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_vs.c	2019-03-31 23:16:37.000000000 +0000
@@ -174,26 +174,28 @@
 
    mem_ctx = ralloc_context(NULL);
 
+   nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir);
+
    brw_assign_common_binding_table_offsets(devinfo, &vp->program,
                                            &prog_data.base.base, 0);
 
    if (!vp->program.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program,
+      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program,
                                   &prog_data.base.base,
                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
-      brw_nir_analyze_ubo_ranges(compiler, vp->program.nir, key,
+      brw_nir_analyze_ubo_ranges(compiler, nir, key,
                                  prog_data.base.base.ubo_ranges);
    } else {
-      brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program,
+      brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program,
                                  &prog_data.base.base);
    }
 
    uint64_t outputs_written =
-      brw_vs_outputs_written(brw, key, vp->program.nir->info.outputs_written);
+      brw_vs_outputs_written(brw, key, nir->info.outputs_written);
 
    brw_compute_vue_map(devinfo,
                        &prog_data.base.vue_map, outputs_written,
-                       vp->program.nir->info.separate_shader);
+                       nir->info.separate_shader);
 
    if (0) {
       _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
@@ -220,8 +222,7 @@
     */
    char *error_str;
    program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data,
-                            vp->program.nir,
-                            st_index, &error_str);
+                            nir, st_index, &error_str);
    if (program == NULL) {
       if (!vp->program.is_arb_asm) {
          vp->program.sh.data->LinkStatus = LINKING_FAILURE;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm.c	2019-03-31 23:16:37.000000000 +0000
@@ -63,6 +63,9 @@
          next_binding_table_offset;
       next_binding_table_offset += key->nr_color_regions;
    }
+
+   /* Update the binding table size */
+   prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4;
 }
 
 static void
@@ -139,6 +142,8 @@
    bool start_busy = false;
    double start_time = 0;
 
+   nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir);
+
    memset(&prog_data, 0, sizeof(prog_data));
 
    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
@@ -148,13 +153,12 @@
    assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
 
    if (!fp->program.is_arb_asm) {
-      brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program,
+      brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program,
                                   &prog_data.base, true);
-      brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir,
+      brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir,
                                  NULL, prog_data.base.ubo_ranges);
    } else {
-      brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program,
-                                 &prog_data.base);
+      brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base);
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM))
          brw_dump_arb_asm("fragment", &fp->program);
@@ -178,7 +182,7 @@
 
    char *error_str = NULL;
    program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx,
-                            key, &prog_data, fp->program.nir,
+                            key, &prog_data, nir,
                             &fp->program, st_index8, st_index16, st_index32,
                             true, false, vue_map,
                             &error_str);
@@ -263,6 +267,9 @@
    found |= key_debug(brw, "xy_uxvx image bound",
                       old_key->xy_uxvx_image_mask,
                       key->xy_uxvx_image_mask);
+   found |= key_debug(brw, "ayuv image bound",
+                      old_key->ayuv_image_mask,
+                      key->ayuv_image_mask);
 
 
    for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
@@ -412,6 +419,9 @@
             case __DRI_IMAGE_COMPONENTS_Y_UXVX:
                key->xy_uxvx_image_mask |= 1 << s;
                break;
+            case __DRI_IMAGE_COMPONENTS_AYUV:
+               key->ayuv_image_mask |= 1 << s;
+               break;
             default:
                break;
             }
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm_surface_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -156,7 +156,7 @@
    struct isl_surf *aux_surf = NULL;
    uint64_t aux_offset = 0;
    struct brw_bo *clear_bo = NULL;
-   uint32_t clear_offset = 0;
+   uint64_t clear_offset = 0;
 
    if (aux_usage != ISL_AUX_USAGE_NONE) {
       aux_surf = &mt->aux_buf->surf;
@@ -420,6 +420,14 @@
       }
       break;
    case GL_RED:
+      if (img->TexFormat == MESA_FORMAT_R_SRGB8) {
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_ZERO;
+         swizzles[2] = SWIZZLE_ZERO;
+         swizzles[3] = SWIZZLE_ONE;
+         break;
+      }
+      /* fallthrough */
    case GL_RG:
    case GL_RGB:
       if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/gen7_l3_state.c mesa-19.0.1/src/mesa/drivers/dri/i965/gen7_l3_state.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/gen7_l3_state.c	2018-04-03 17:32:26.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/gen7_l3_state.c	2019-03-31 23:16:37.000000000 +0000
@@ -119,6 +119,7 @@
       assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);
 
       const unsigned imm_data = ((has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) |
+         (devinfo->gen == 11 ? GEN11_L3CNTLREG_USE_FULL_WAYS : 0) |
          SET_FIELD(cfg->n[GEN_L3P_URB], GEN8_L3CNTLREG_URB_ALLOC) |
          SET_FIELD(cfg->n[GEN_L3P_RO], GEN8_L3CNTLREG_RO_ALLOC) |
          SET_FIELD(cfg->n[GEN_L3P_DC], GEN8_L3CNTLREG_DC_ALLOC) |
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/genX_blorp_exec.c mesa-19.0.1/src/mesa/drivers/dri/i965/genX_blorp_exec.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/genX_blorp_exec.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/genX_blorp_exec.c	2019-03-31 23:16:37.000000000 +0000
@@ -94,6 +94,14 @@
 #endif
 }
 
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *blorp_batch,
+                          struct blorp_address address)
+{
+   /* We'll let blorp_surface_reloc write the address. */
+   return 0ull;
+}
+
 #if GEN_GEN >= 7 && GEN_GEN < 10
 static struct blorp_address
 blorp_get_surface_base_address(struct blorp_batch *batch)
@@ -197,7 +205,7 @@
                                            const struct blorp_address *addrs,
                                            unsigned num_vbs)
 {
-#if GEN_GEN >= 8
+#if GEN_GEN >= 8 && GEN_GEN < 11
    struct brw_context *brw = batch->driver_batch;
    bool need_invalidate = false;
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/genX_state_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/genX_state_upload.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/genX_state_upload.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/genX_state_upload.c	2019-03-31 23:16:37.000000000 +0000
@@ -197,6 +197,37 @@
         _brw_cmd_pack(cmd)(brw, (void *)_dst, &name),              \
         _dst = NULL)
 
+#if GEN_GEN >= 7
+MAYBE_UNUSED static void
+emit_lrm(struct brw_context *brw, uint32_t reg, struct brw_address addr)
+{
+   brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg;
+      lrm.MemoryAddress    = addr;
+   }
+}
+#endif
+
+MAYBE_UNUSED static void
+emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm)
+{
+   brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = reg;
+      lri.DataDWord        = imm;
+   }
+}
+
+#if GEN_IS_HASWELL || GEN_GEN >= 8
+MAYBE_UNUSED static void
+emit_lrr(struct brw_context *brw, uint32_t dst, uint32_t src)
+{
+   brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_REG), lrr) {
+      lrr.SourceRegisterAddress        = src;
+      lrr.DestinationRegisterAddress   = dst;
+   }
+}
+#endif
+
 /**
  * Polygon stipple packet
  */
@@ -363,15 +394,15 @@
 #endif
 
 #if GEN_GEN == 11
-      .VertexBufferMOCS = ICL_MOCS_WB,
+      .MOCS = ICL_MOCS_WB,
 #elif GEN_GEN == 10
-      .VertexBufferMOCS = CNL_MOCS_WB,
+      .MOCS = CNL_MOCS_WB,
 #elif GEN_GEN == 9
-      .VertexBufferMOCS = SKL_MOCS_WB,
+      .MOCS = SKL_MOCS_WB,
 #elif GEN_GEN == 8
-      .VertexBufferMOCS = BDW_MOCS_WB,
+      .MOCS = BDW_MOCS_WB,
 #elif GEN_GEN == 7
-      .VertexBufferMOCS = GEN7_MOCS_L3,
+      .MOCS = GEN7_MOCS_L3,
 #endif
    };
 
@@ -499,11 +530,13 @@
  * In the relocation world, we have no idea what the addresses will be, so
  * we can't apply this workaround.  Instead, we tell the kernel to move it
  * to the low 4GB regardless.
+ *
+ * This HW issue is gone on Gen11+.
  */
 static void
 vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
 {
-#if GEN_GEN >= 8
+#if GEN_GEN >= 8 && GEN_GEN < 11
    bool need_invalidate = false;
 
    for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
@@ -633,7 +666,7 @@
    for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
       struct brw_vertex_element *input = brw->vb.enabled[i];
       const struct gl_array_attributes *glattrib = input->glattrib;
-      uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
+      uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format);
 
       if (uploads_needed(format, input->is_dual_slot) > 1)
          nr_elements++;
@@ -726,7 +759,7 @@
    for (i = 0; i < brw->vb.nr_enabled; i++) {
       const struct brw_vertex_element *input = brw->vb.enabled[i];
       const struct gl_array_attributes *glattrib = input->glattrib;
-      uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
+      uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format);
       uint32_t comp0 = VFCOMP_STORE_SRC;
       uint32_t comp1 = VFCOMP_STORE_SRC;
       uint32_t comp2 = VFCOMP_STORE_SRC;
@@ -769,16 +802,16 @@
 
          const struct gl_array_attributes *glattrib = input->glattrib;
          const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
-            upload_format_size(upload_format) : glattrib->Size;
+            upload_format_size(upload_format) : glattrib->Format.Size;
 
          switch (size) {
             case 0: comp0 = VFCOMP_STORE_0;
             case 1: comp1 = VFCOMP_STORE_0;
             case 2: comp2 = VFCOMP_STORE_0;
             case 3:
-               if (GEN_GEN >= 8 && glattrib->Doubles) {
+               if (GEN_GEN >= 8 && glattrib->Format.Doubles) {
                   comp3 = VFCOMP_STORE_0;
-               } else if (glattrib->Integer) {
+               } else if (glattrib->Format.Integer) {
                   comp3 = VFCOMP_STORE_1_INT;
                } else {
                   comp3 = VFCOMP_STORE_1_FP;
@@ -803,7 +836,7 @@
           *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit
           *     vertex element."
           */
-         if (glattrib->Doubles && !input->is_dual_slot) {
+         if (glattrib->Format.Doubles && !input->is_dual_slot) {
             /* Store vertex elements which correspond to double and dvec2 vertex
              * shader inputs as 128-bit vertex elements, instead of 256-bits.
              */
@@ -890,7 +923,7 @@
 #if GEN_GEN >= 6
    if (gen6_edgeflag_input) {
       const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib;
-      const uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
+      const uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format);
 
       struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
          .Valid = true,
@@ -965,7 +998,8 @@
 
    brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
 #if GEN_GEN < 8 && !GEN_IS_HASWELL
-      ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
+      assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index);
+      ib.CutIndexEnable = brw->ib.enable_cut_index;
 #endif
       ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
 
@@ -978,7 +1012,7 @@
        */
       ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0);
 #if GEN_GEN >= 8
-      ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+      ib.MOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
       ib.BufferSize = brw->ib.size;
 #else
       ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1);
@@ -2017,7 +2051,8 @@
       if (wm_prog_data->base.use_alt_mode)
          wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
 
-      wm.SamplerCount = GEN_GEN == 5 ?
+      /* WA_1606682166 */
+      wm.SamplerCount = (GEN_GEN == 5 || GEN_GEN == 11) ?
          0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
 
       wm.BindingTableEntryCount =
@@ -2179,7 +2214,10 @@
 
 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
    pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
+   /* WA_1606682166 */                                                    \
    pkt.SamplerCount       =                                               \
+      GEN_GEN == 11 ?                                                     \
+      0 :                                                                 \
       DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
    /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to     \
     * disable prefetching of binding tables in A0 and B0 steppings.       \
@@ -2408,7 +2446,7 @@
 
    bbox[0] = MAX2(ctx->ViewportArray[i].X, 0);
    bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width);
-   bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0);
+   bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height);
    bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height);
    _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
 
@@ -3856,7 +3894,7 @@
          sob.SOBufferEnable = true;
          sob.StreamOffsetWriteEnable = true;
          sob.StreamOutputBufferOffsetAddressEnable = true;
-         sob.SOBufferMOCS = mocs_wb;
+         sob.MOCS = mocs_wb;
 
          sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
          sob.StreamOutputBufferOffsetAddress =
@@ -4010,8 +4048,13 @@
        */
       ps.VectorMaskEnable = GEN_GEN >= 8;
 
-      ps.SamplerCount =
-         DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
+      /* WA_1606682166:
+       * "Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes.
+       * Disable the Sampler state prefetch functionality in the SARB by
+       * programming 0xB000[30] to '1'."
+       */
+      ps.SamplerCount = GEN_GEN == 11 ?
+         0 : DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
 
       /* BRW_NEW_FS_PROG_DATA */
       /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to disable
@@ -4552,6 +4595,107 @@
    .emit = genX(upload_cs_state)
 };
 
+#define GPGPU_DISPATCHDIMX 0x2500
+#define GPGPU_DISPATCHDIMY 0x2504
+#define GPGPU_DISPATCHDIMZ 0x2508
+
+#define MI_PREDICATE_SRC0  0x2400
+#define MI_PREDICATE_SRC1  0x2408
+
+static void
+prepare_indirect_gpgpu_walker(struct brw_context *brw)
+{
+   GLintptr indirect_offset = brw->compute.num_work_groups_offset;
+   struct brw_bo *bo = brw->compute.num_work_groups_bo;
+
+   emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0));
+   emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4));
+   emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8));
+
+#if GEN_GEN <= 7
+   /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */
+   emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0);
+   emit_lri(brw, MI_PREDICATE_SRC1    , 0);
+   emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0);
+
+   /* Load compute_dispatch_indirect_x_size into SRC0 */
+   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0));
+
+   /* predicate = (compute_dispatch_indirect_x_size == 0); */
+   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_SET;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+
+   /* Load compute_dispatch_indirect_y_size into SRC0 */
+   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4));
+
+   /* predicate |= (compute_dispatch_indirect_y_size == 0); */
+   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+
+   /* Load compute_dispatch_indirect_z_size into SRC0 */
+   emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8));
+
+   /* predicate |= (compute_dispatch_indirect_z_size == 0); */
+   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
+
+   /* predicate = !predicate; */
+#define COMPARE_FALSE                           1
+   brw_batch_emit(brw, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOADINV;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_FALSE;
+   }
+#endif
+}
+
+static void
+genX(emit_gpgpu_walker)(struct brw_context *brw)
+{
+   const struct brw_cs_prog_data *prog_data =
+      brw_cs_prog_data(brw->cs.base.prog_data);
+
+   const GLuint *num_groups = brw->compute.num_work_groups;
+
+   bool indirect = brw->compute.num_work_groups_bo != NULL;
+   if (indirect)
+      prepare_indirect_gpgpu_walker(brw);
+
+   const unsigned simd_size = prog_data->simd_size;
+   unsigned group_size = prog_data->local_size[0] *
+      prog_data->local_size[1] * prog_data->local_size[2];
+
+   uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
+   const unsigned right_non_aligned = group_size & (simd_size - 1);
+   if (right_non_aligned != 0)
+      right_mask >>= (simd_size - right_non_aligned);
+
+   brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
+      ggw.IndirectParameterEnable      = indirect;
+      ggw.PredicateEnable              = GEN_GEN <= 7 && indirect;
+      ggw.SIMDSize                     = prog_data->simd_size / 16;
+      ggw.ThreadDepthCounterMaximum    = 0;
+      ggw.ThreadHeightCounterMaximum   = 0;
+      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadGroupIDXDimension      = num_groups[0];
+      ggw.ThreadGroupIDYDimension      = num_groups[1];
+      ggw.ThreadGroupIDZDimension      = num_groups[2];
+      ggw.RightExecutionMask           = right_mask;
+      ggw.BottomExecutionMask          = 0xffffffff;
+   }
+
+   brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf);
+}
+
 #endif
 
 /* ---------------------------------------------------------------------- */
@@ -5945,5 +6089,6 @@
                            compute_atoms, ARRAY_SIZE(compute_atoms));
 
    brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count);
+   brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker);
 #endif
 }
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_queryobj.c mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_queryobj.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_queryobj.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_queryobj.c	2019-03-31 23:16:37.000000000 +0000
@@ -154,7 +154,7 @@
 shr_gpr0_by_2_bits(struct brw_context *brw)
 {
    shl_gpr0_by_30_bits(brw);
-   brw_load_register_reg(brw, HSW_CS_GPR(0) + 4, HSW_CS_GPR(0));
+   brw_load_register_reg(brw, HSW_CS_GPR(0), HSW_CS_GPR(0) + 4);
    brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
 }
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_sol.c mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_sol.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_sol.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_sol.c	2019-03-31 23:16:37.000000000 +0000
@@ -98,7 +98,8 @@
          brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
                                  START_OFFSET + i * sizeof(uint64_t));
          /* GPR2 = Ending Snapshot */
-         brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));
+         brw_load_register_reg64(brw, HSW_CS_GPR(2),
+                                 GEN7_SO_NUM_PRIMS_WRITTEN(i));
 
          BEGIN_BATCH(9);
          OUT_BATCH(HSW_MI_MATH | (9 - 2));
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_batchbuffer.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_batchbuffer.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_batchbuffer.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_batchbuffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -732,10 +732,10 @@
 
       /* Update brw_bo::gtt_offset */
       if (batch->validation_list[i].offset != bo->gtt_offset) {
-         assert(!(bo->kflags & EXEC_OBJECT_PINNED));
          DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n",
              bo->gem_handle, bo->gtt_offset,
              batch->validation_list[i].offset);
+         assert(!(bo->kflags & EXEC_OBJECT_PINNED));
          bo->gtt_offset = batch->validation_list[i].offset;
       }
    }
@@ -1218,7 +1218,7 @@
  * Copies a 32-bit register.
  */
 void
-brw_load_register_reg(struct brw_context *brw, uint32_t src, uint32_t dest)
+brw_load_register_reg(struct brw_context *brw, uint32_t dest, uint32_t src)
 {
    assert(brw->screen->devinfo.gen >= 8 || brw->screen->devinfo.is_haswell);
 
@@ -1233,7 +1233,7 @@
  * Copies a 64-bit register.
  */
 void
-brw_load_register_reg64(struct brw_context *brw, uint32_t src, uint32_t dest)
+brw_load_register_reg64(struct brw_context *brw, uint32_t dest, uint32_t src)
 {
    assert(brw->screen->devinfo.gen >= 8 || brw->screen->devinfo.is_haswell);
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_buffer_objects.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_buffer_objects.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_buffer_objects.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_buffer_objects.c	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,6 @@
 #include "brw_blorp.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
-#include "intel_tiled_memcpy.h"
 
 static void
 mark_buffer_gpu_usage(struct intel_buffer_object *intel_obj,
@@ -320,6 +319,8 @@
    mark_buffer_valid_data(intel_obj, offset, size);
 }
 
+/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */
+typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
 
 /**
  * The GetBufferSubData() driver hook.
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_extensions.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_extensions.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_extensions.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_extensions.c	2019-03-31 23:16:37.000000000 +0000
@@ -104,6 +104,7 @@
    ctx->Extensions.EXT_point_parameters = true;
    ctx->Extensions.EXT_provoking_vertex = true;
    ctx->Extensions.EXT_render_snorm = true;
+   ctx->Extensions.EXT_sRGB = true;
    ctx->Extensions.EXT_stencil_two_side = true;
    ctx->Extensions.EXT_texture_array = true;
    ctx->Extensions.EXT_texture_env_dot3 = true;
@@ -113,6 +114,7 @@
    ctx->Extensions.EXT_texture_snorm = true;
    ctx->Extensions.EXT_texture_sRGB = true;
    ctx->Extensions.EXT_texture_sRGB_decode = true;
+   ctx->Extensions.EXT_texture_sRGB_R8 = true;
    ctx->Extensions.EXT_texture_swizzle = true;
    ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true;
    ctx->Extensions.EXT_vertex_array_bgra = true;
@@ -180,14 +182,16 @@
       ctx->Extensions.ARB_conditional_render_inverted = true;
       ctx->Extensions.ARB_cull_distance = true;
       ctx->Extensions.ARB_draw_buffers_blend = true;
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.ARB_enhanced_layouts = true;
       ctx->Extensions.ARB_ES3_compatibility = true;
       ctx->Extensions.ARB_fragment_layer_viewport = true;
       ctx->Extensions.ARB_pipeline_statistics_query = true;
       ctx->Extensions.ARB_sample_shading = true;
       ctx->Extensions.ARB_shading_language_420pack = true;
-      if (ctx->API != API_OPENGL_COMPAT) {
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion) {
          ctx->Extensions.ARB_texture_buffer_object = true;
          ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
          ctx->Extensions.ARB_texture_buffer_range = true;
@@ -197,7 +201,8 @@
       ctx->Extensions.ARB_texture_multisample = true;
       ctx->Extensions.ARB_uniform_buffer_object = true;
 
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.AMD_vertex_shader_layer = true;
       ctx->Extensions.EXT_framebuffer_multisample = true;
       ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
@@ -226,9 +231,10 @@
       ctx->Extensions.ARB_conservative_depth = true;
       ctx->Extensions.ARB_derivative_control = true;
       ctx->Extensions.ARB_framebuffer_no_attachments = true;
-      if (ctx->API != API_OPENGL_COMPAT) {
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion) {
          ctx->Extensions.ARB_gpu_shader5 = true;
-         ctx->Extensions.ARB_gpu_shader_fp64 = devinfo->has_64bit_types;
+         ctx->Extensions.ARB_gpu_shader_fp64 = true;
       }
       ctx->Extensions.ARB_shader_atomic_counters = true;
       ctx->Extensions.ARB_shader_atomic_counter_ops = true;
@@ -237,16 +243,16 @@
       ctx->Extensions.ARB_shader_image_size = true;
       ctx->Extensions.ARB_shader_precision = true;
       ctx->Extensions.ARB_shader_texture_image_samples = true;
-      if (ctx->API != API_OPENGL_COMPAT)
+      if (ctx->API != API_OPENGL_COMPAT ||
+          ctx->Const.AllowHigherCompatVersion)
          ctx->Extensions.ARB_tessellation_shader = true;
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
       ctx->Extensions.ARB_shader_storage_buffer_object = true;
-      ctx->Extensions.ARB_vertex_attrib_64bit = devinfo->has_64bit_types;
+      ctx->Extensions.ARB_vertex_attrib_64bit = true;
       ctx->Extensions.EXT_shader_samples_identical = true;
       ctx->Extensions.OES_primitive_bounding_box = true;
       ctx->Extensions.OES_texture_buffer = true;
-      ctx->Extensions.ARB_fragment_shader_interlock = true;
 
       if (can_do_pipelined_register_writes(brw->screen)) {
          ctx->Extensions.ARB_draw_indirect = true;
@@ -294,9 +300,9 @@
    }
 
    if (devinfo->gen >= 8) {
-      ctx->Extensions.ARB_gpu_shader_int64 = devinfo->has_64bit_types;
+      ctx->Extensions.ARB_gpu_shader_int64 = true;
       /* requires ARB_gpu_shader_int64 */
-      ctx->Extensions.ARB_shader_ballot = devinfo->has_64bit_types;
+      ctx->Extensions.ARB_shader_ballot = true;
       ctx->Extensions.ARB_ES3_2_compatibility = true;
    }
 
@@ -311,6 +317,30 @@
       ctx->Extensions.KHR_blend_equation_advanced_coherent = true;
       ctx->Extensions.KHR_texture_compression_astc_ldr = true;
       ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true;
+
+      /*
+       * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221):
+       *  "A memory fence message issued by a thread causes further messages
+       *   issued by the thread to be blocked until all previous data port
+       *   messages have completed, or the results can be globally observed from
+       *   the point of view of other threads in the system."
+       *
+       * From the Haswell PRM Vol. 7 (Memory Fence, page 256):
+       *  "A memory fence message issued by a thread causes further messages
+       *   issued by the thread to be blocked until all previous messages issued
+       *   by the thread to that data port (data cache or render cache) have
+       *   been globally observed from the point of view of other threads in the
+       *   system."
+       *
+       * Summarized: For ARB_fragment_shader_interlock to work, we need to
+       * ensure memory access ordering for all messages to the dataport from
+       * all threads. Memory fence messages prior to SKL only provide memory
+       * access ordering for messages from the same thread, so we can only
+       * support the feature from Gen9 onwards.
+       *
+       */
+
+      ctx->Extensions.ARB_fragment_shader_interlock = true;
    }
 
    if (gen_device_info_is_9lp(devinfo))
@@ -319,7 +349,8 @@
    if (devinfo->gen >= 6)
       ctx->Extensions.INTEL_performance_query = true;
 
-   if (ctx->API != API_OPENGL_COMPAT)
+   if (ctx->API != API_OPENGL_COMPAT ||
+       ctx->Const.AllowHigherCompatVersion)
       ctx->Extensions.ARB_base_instance = true;
    if (ctx->API != API_OPENGL_CORE)
       ctx->Extensions.ARB_color_buffer_float = true;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_fbo.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_fbo.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_fbo.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_fbo.c	2019-03-31 23:16:37.000000000 +0000
@@ -629,7 +629,7 @@
 }
 
 
-#define fbo_incomplete(fb, ...) do {                                          \
+#define fbo_incomplete(fb, error_id, ...) do {                                          \
       static GLuint msg_id = 0;                                               \
       if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
          _mesa_gl_debug(ctx, &msg_id,                                         \
@@ -639,7 +639,7 @@
                         __VA_ARGS__);                                         \
       }                                                                       \
       DBG(__VA_ARGS__);                                                       \
-      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;                               \
+      fb->_Status = error_id;                                                 \
    } while (0)
 
 /**
@@ -693,7 +693,7 @@
              d_depth != s_depth ||
              depthRb->mt_level != stencilRb->mt_level ||
 	     depthRb->mt_layer != stencilRb->mt_layer) {
-	    fbo_incomplete(fb,
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
                            "FBO incomplete: depth and stencil must match in"
                            "width, height, depth, LOD and layer\n");
 	 }
@@ -705,7 +705,7 @@
 	  */
 	 if (depthRb->mt_level != stencilRb->mt_level ||
 	     depthRb->mt_layer != stencilRb->mt_layer) {
-	    fbo_incomplete(fb,
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
                            "FBO incomplete: depth image level/layer %d/%d != "
                            "stencil image %d/%d\n",
                            depthRb->mt_level,
@@ -715,13 +715,14 @@
 	 }
       } else {
 	 if (!brw->has_separate_stencil) {
-	    fbo_incomplete(fb, "FBO incomplete: separate stencil "
-                           "unsupported\n");
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                      "FBO incomplete: separate stencil unsupported\n");
 	 }
 	 if (stencil_mt->format != MESA_FORMAT_S_UINT8) {
-	    fbo_incomplete(fb, "FBO incomplete: separate stencil is %s "
-                           "instead of S8\n",
-                           _mesa_get_format_name(stencil_mt->format));
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                      "FBO incomplete: separate stencil is %s "
+                      "instead of S8\n",
+                      _mesa_get_format_name(stencil_mt->format));
 	 }
 	 if (devinfo->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) {
 	    /* Before Gen7, separate depth and stencil buffers can be used
@@ -730,8 +731,8 @@
 	     *     [DevSNB]: This field must be set to the same value (enabled
 	     *     or disabled) as Hierarchical Depth Buffer Enable.
 	     */
-	    fbo_incomplete(fb, "FBO incomplete: separate stencil "
-                           "without HiZ\n");
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                          "FBO incomplete: separate stencil without HiZ\n");
 	 }
       }
    }
@@ -749,29 +750,39 @@
        */
       rb = fb->Attachment[i].Renderbuffer;
       if (rb == NULL) {
-	 fbo_incomplete(fb, "FBO incomplete: attachment without "
-                        "renderbuffer\n");
+	 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                       "FBO incomplete: attachment without "
+                       "renderbuffer\n");
 	 continue;
       }
 
       if (fb->Attachment[i].Type == GL_TEXTURE) {
 	 if (rb->TexImage->Border) {
-	    fbo_incomplete(fb, "FBO incomplete: texture with border\n");
+	    fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                      "FBO incomplete: texture with border\n");
 	    continue;
 	 }
       }
 
       irb = intel_renderbuffer(rb);
       if (irb == NULL) {
-	 fbo_incomplete(fb, "FBO incomplete: software rendering "
-                        "renderbuffer\n");
+	 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                   "FBO incomplete: software rendering renderbuffer\n");
 	 continue;
       }
 
+     if (rb->Format == MESA_FORMAT_R_SRGB8) {
+        fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
+                       "FBO incomplete: Format not color renderable: %s\n",
+                       _mesa_get_format_name(rb->Format));
+        continue;
+     }
+
       if (!brw_render_target_supported(brw, rb)) {
-	 fbo_incomplete(fb, "FBO incomplete: Unsupported HW "
-                        "texture/renderbuffer format attached: %s\n",
-                        _mesa_get_format_name(intel_rb_format(irb)));
+	 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
+                   "FBO incomplete: Unsupported HW "
+                   "texture/renderbuffer format attached: %s\n",
+                   _mesa_get_format_name(intel_rb_format(irb)));
       }
    }
 }
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_image.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_image.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_image.h	2018-02-19 19:52:02.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_image.h	2019-03-31 23:16:37.000000000 +0000
@@ -89,9 +89,6 @@
    GLuint tile_y;
    bool has_depthstencil;
 
-   /** The image was created with EGL_EXT_image_dma_buf_import. */
-   bool dma_buf_imported;
-
    /** Offset of the auxiliary compression surface in the bo. */
    uint32_t aux_offset;
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2019-03-31 23:16:37.000000000 +0000
@@ -31,8 +31,6 @@
 #include "intel_image.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
-#include "intel_tiled_memcpy.h"
-#include "intel_tiled_memcpy_sse41.h"
 #include "intel_blit.h"
 #include "intel_fbo.h"
 
@@ -3126,9 +3124,9 @@
       char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
       dst += mt->offset;
 
-      linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B,
-                      map->stride, brw->has_swizzling, mt->surf.tiling,
-                      INTEL_COPY_MEMCPY);
+      isl_memcpy_linear_to_tiled(
+         x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride,
+         brw->has_swizzling, mt->surf.tiling, ISL_MEMCPY);
 
       intel_miptree_unmap_raw(mt);
    }
@@ -3136,6 +3134,66 @@
    map->buffer = map->ptr = NULL;
 }
 
+/**
+ * Determine which copy function to use for the given format combination
+ *
+ * The only two possible copy functions which are ever returned are a
+ * direct memcpy and a RGBA <-> BGRA copy function.  Since RGBA -> BGRA and
+ * BGRA -> RGBA are exactly the same operation (and memcpy is obviously
+ * symmetric), it doesn't matter whether the copy is from the tiled image
+ * to the untiled or vice versa.  The copy function required is the same in
+ * either case so this function can be used.
+ *
+ * \param[in]  tiledFormat The format of the tiled image
+ * \param[in]  format      The GL format of the client data
+ * \param[in]  type        The GL type of the client data
+ * \param[out] mem_copy    Will be set to one of either the standard
+ *                         library's memcpy or a different copy function
+ *                         that performs an RGBA to BGRA conversion
+ * \param[out] cpp         Number of bytes per channel
+ *
+ * \return true if the format and type combination are valid
+ */
+MAYBE_UNUSED isl_memcpy_type
+intel_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
+                              uint32_t *cpp)
+{
+   if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+       !(format == GL_RGBA || format == GL_BGRA))
+      return ISL_MEMCPY_INVALID; /* Invalid type/format combination */
+
+   if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
+       (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
+      *cpp = 1;
+      return ISL_MEMCPY;
+   } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
+              (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
+              (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
+              (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
+      *cpp = 4;
+      if (format == GL_BGRA) {
+         return ISL_MEMCPY;
+      } else if (format == GL_RGBA) {
+         return ISL_MEMCPY_BGRA8;
+      }
+   } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
+              (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
+              (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
+              (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
+      *cpp = 4;
+      if (format == GL_BGRA) {
+         /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
+          * use the same function.
+          */
+         return ISL_MEMCPY_BGRA8;
+      } else if (format == GL_RGBA) {
+         return ISL_MEMCPY;
+      }
+   }
+
+   return ISL_MEMCPY_INVALID;
+}
+
 static void
 intel_miptree_map_tiled_memcpy(struct brw_context *brw,
                                struct intel_mipmap_tree *mt,
@@ -3162,21 +3220,16 @@
       char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
       src += mt->offset;
 
-      const tiled_to_linear_fn ttl_func =
-#if defined(USE_SSE41)
-         cpu_has_sse4_1 ? tiled_to_linear_sse41 :
-#endif
-         tiled_to_linear;
-
-      const mem_copy_fn_type copy_type =
+      const isl_memcpy_type copy_type =
 #if defined(USE_SSE41)
-         cpu_has_sse4_1 ? INTEL_COPY_STREAMING_LOAD :
+         cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD :
 #endif
-         INTEL_COPY_MEMCPY;
+         ISL_MEMCPY;
 
-      ttl_func(x1, x2, y1, y2, map->ptr, src, map->stride,
-               mt->surf.row_pitch_B, brw->has_swizzling, mt->surf.tiling,
-               copy_type);
+      isl_memcpy_tiled_to_linear(
+         x1, x2, y1, y2, map->ptr, src, map->stride,
+         mt->surf.row_pitch_B, brw->has_swizzling, mt->surf.tiling,
+         copy_type);
 
       intel_miptree_unmap_raw(mt);
    }
@@ -3865,7 +3918,7 @@
                               const struct intel_mipmap_tree *mt,
                               enum isl_format view_format, bool sampling,
                               struct brw_bo **clear_color_bo,
-                              uint32_t *clear_color_offset)
+                              uint64_t *clear_color_offset)
 {
    assert(mt->aux_buf);
 
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.h	2019-03-31 23:16:37.000000000 +0000
@@ -714,7 +714,7 @@
                               const struct intel_mipmap_tree *mt,
                               enum isl_format view_format, bool sampling,
                               struct brw_bo **clear_color_bo,
-                              uint32_t *clear_color_offset);
+                              uint64_t *clear_color_offset);
 
 
 static inline int
@@ -726,6 +726,10 @@
    return pitch;
 }
 
+isl_memcpy_type
+intel_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
+                              uint32_t *cpp);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_pixel_read.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_pixel_read.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_pixel_read.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_pixel_read.c	2019-03-31 23:16:37.000000000 +0000
@@ -44,7 +44,6 @@
 #include "intel_mipmap_tree.h"
 #include "intel_pixel.h"
 #include "intel_buffer_objects.h"
-#include "intel_tiled_memcpy.h"
 
 #define FILE_DEBUG_FLAG DEBUG_PIXEL
 
@@ -87,7 +86,7 @@
    struct brw_bo *bo;
 
    uint32_t cpp;
-   mem_copy_fn_type copy_type;
+   isl_memcpy_type copy_type;
 
    /* This fastpath is restricted to specific renderbuffer types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -125,7 +124,8 @@
    if (rb->_BaseFormat == GL_RGB)
       return false;
 
-   if (!intel_get_memcpy_type(rb->Format, format, type, &copy_type, &cpp))
+   copy_type = intel_miptree_get_memcpy_type(rb->Format, format, type, &cpp);
+   if (copy_type == ISL_MEMCPY_INVALID)
       return false;
 
    if (!irb->mt ||
@@ -198,7 +198,7 @@
        pack->Alignment, pack->RowLength, pack->SkipPixels,
        pack->SkipRows);
 
-   tiled_to_linear(
+   isl_memcpy_tiled_to_linear(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
       pixels,
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_screen.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_screen.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_screen.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_screen.c	2019-03-31 23:16:37.000000000 +0000
@@ -286,6 +286,9 @@
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
 
+   { __DRI_IMAGE_FOURCC_AYUV, __DRI_IMAGE_COMPONENTS_AYUV, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
+
    /* For YUYV and UYVY buffers, we set up two overlapping DRI images
     * and treat them as planar buffers in the compositors.
     * Plane 0 is GR88 and samples YU or YV pairs and places Y into
@@ -957,7 +960,6 @@
    image->tile_y          = orig_image->tile_y;
    image->has_depthstencil = orig_image->has_depthstencil;
    image->data            = loaderPrivate;
-   image->dma_buf_imported = orig_image->dma_buf_imported;
    image->aux_offset      = orig_image->aux_offset;
    image->aux_pitch       = orig_image->aux_pitch;
 
@@ -1237,7 +1239,6 @@
       return NULL;
    }
 
-   image->dma_buf_imported = true;
    image->yuv_color_space = yuv_color_space;
    image->sample_range = sample_range;
    image->horizontal_siting = horizontal_siting;
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tex_image.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tex_image.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tex_image.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tex_image.c	2019-03-31 23:16:37.000000000 +0000
@@ -23,7 +23,6 @@
 #include "intel_tex.h"
 #include "intel_fbo.h"
 #include "intel_image.h"
-#include "intel_tiled_memcpy.h"
 #include "brw_context.h"
 #include "brw_blorp.h"
 
@@ -192,7 +191,7 @@
    struct brw_bo *bo;
 
    uint32_t cpp;
-   mem_copy_fn_type copy_type;
+   isl_memcpy_type copy_type;
 
    /* This fastpath is restricted to specific texture types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -222,8 +221,9 @@
    if (ctx->_ImageTransferState)
       return false;
 
-   if (!intel_get_memcpy_type(texImage->TexFormat, format, type, &copy_type,
-                              &cpp))
+   copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type,
+                                             &cpp);
+   if (copy_type == ISL_MEMCPY_INVALID)
       return false;
 
    /* If this is a nontrivial texture view, let another path handle it instead. */
@@ -290,7 +290,7 @@
    xoffset += level_x;
    yoffset += level_y;
 
-   linear_to_tiled(
+   isl_memcpy_linear_to_tiled(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
       map,
@@ -614,16 +614,6 @@
    if (image == NULL)
       return;
 
-   /* We support external textures only for EGLImages created with
-    * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future.
-    */
-   if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "glEGLImageTargetTexture2DOES(external target is enabled only "
-               "for images created with EGL_EXT_image_dma_buf_import");
-      return;
-   }
-
    /* Disallow depth/stencil textures: we don't have a way to pass the
     * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
     */
@@ -695,7 +685,7 @@
    struct brw_bo *bo;
 
    uint32_t cpp;
-   mem_copy_fn_type copy_type;
+   isl_memcpy_type copy_type;
 
    /* This fastpath is restricted to specific texture types:
     * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
@@ -729,8 +719,9 @@
    if (texImage->_BaseFormat == GL_RGB)
       return false;
 
-   if (!intel_get_memcpy_type(texImage->TexFormat, format, type, &copy_type,
-                              &cpp))
+   copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type,
+                                             &cpp);
+   if (copy_type == ISL_MEMCPY_INVALID)
       return false;
 
    /* If this is a nontrivial texture view, let another path handle it instead. */
@@ -794,7 +785,7 @@
    xoffset += level_x;
    yoffset += level_y;
 
-   tiled_to_linear(
+   isl_memcpy_tiled_to_linear(
       xoffset * cpp, (xoffset + width) * cpp,
       yoffset, yoffset + height,
       pixels,
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1003 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright 2012 Intel Corporation
- * Copyright 2013 Google
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chad Versace <chad.versace@linux.intel.com>
- *    Frank Henigman <fjhenigman@google.com>
- */
-
-#include <string.h>
-
-#include "util/macros.h"
-
-#include "brw_context.h"
-#include "intel_tiled_memcpy.h"
-
-#if defined(__SSSE3__)
-#include <tmmintrin.h>
-#elif defined(__SSE2__)
-#include <emmintrin.h>
-#endif
-
-#define FILE_DEBUG_FLAG DEBUG_TEXTURE
-
-#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b)
-#define ALIGN_UP(a, b) ALIGN(a, b)
-
-/* Tile dimensions.  Width and span are in bytes, height is in pixels (i.e.
- * unitless).  A "span" is the most number of bytes we can copy from linear
- * to tiled without needing to calculate a new destination address.
- */
-static const uint32_t xtile_width = 512;
-static const uint32_t xtile_height = 8;
-static const uint32_t xtile_span = 64;
-static const uint32_t ytile_width = 128;
-static const uint32_t ytile_height = 32;
-static const uint32_t ytile_span = 16;
-
-static inline uint32_t
-ror(uint32_t n, uint32_t d)
-{
-   return (n >> d) | (n << (32 - d));
-}
-
-static inline uint32_t
-bswap32(uint32_t n)
-{
-#if defined(HAVE___BUILTIN_BSWAP32)
-   return __builtin_bswap32(n);
-#else
-   return (n >> 24) |
-          ((n >> 8) & 0x0000ff00) |
-          ((n << 8) & 0x00ff0000) |
-          (n << 24);
-#endif
-}
-
-/**
- * Copy RGBA to BGRA - swap R and B.
- */
-static inline void *
-rgba8_copy(void *dst, const void *src, size_t bytes)
-{
-   uint32_t *d = dst;
-   uint32_t const *s = src;
-
-   assert(bytes % 4 == 0);
-
-   while (bytes >= 4) {
-      *d = ror(bswap32(*s), 8);
-      d += 1;
-      s += 1;
-      bytes -= 4;
-   }
-   return dst;
-}
-
-#ifdef __SSSE3__
-static const uint8_t rgba8_permutation[16] =
-   { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
-
-static inline void
-rgba8_copy_16_aligned_dst(void *dst, const void *src)
-{
-   _mm_store_si128(dst,
-                   _mm_shuffle_epi8(_mm_loadu_si128(src),
-                                    *(__m128i *)rgba8_permutation));
-}
-
-static inline void
-rgba8_copy_16_aligned_src(void *dst, const void *src)
-{
-   _mm_storeu_si128(dst,
-                    _mm_shuffle_epi8(_mm_load_si128(src),
-                                     *(__m128i *)rgba8_permutation));
-}
-
-#elif defined(__SSE2__)
-static inline void
-rgba8_copy_16_aligned_dst(void *dst, const void *src)
-{
-   __m128i srcreg, dstreg, agmask, ag, rb, br;
-
-   agmask = _mm_set1_epi32(0xFF00FF00);
-   srcreg = _mm_loadu_si128((__m128i *)src);
-
-   rb = _mm_andnot_si128(agmask, srcreg);
-   ag = _mm_and_si128(agmask, srcreg);
-   br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)),
-                            _MM_SHUFFLE(2, 3, 0, 1));
-   dstreg = _mm_or_si128(ag, br);
-
-   _mm_store_si128((__m128i *)dst, dstreg);
-}
-
-static inline void
-rgba8_copy_16_aligned_src(void *dst, const void *src)
-{
-   __m128i srcreg, dstreg, agmask, ag, rb, br;
-
-   agmask = _mm_set1_epi32(0xFF00FF00);
-   srcreg = _mm_load_si128((__m128i *)src);
-
-   rb = _mm_andnot_si128(agmask, srcreg);
-   ag = _mm_and_si128(agmask, srcreg);
-   br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)),
-                            _MM_SHUFFLE(2, 3, 0, 1));
-   dstreg = _mm_or_si128(ag, br);
-
-   _mm_storeu_si128((__m128i *)dst, dstreg);
-}
-#endif
-
-/**
- * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned.
- */
-static inline void *
-rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
-{
-   assert(bytes == 0 || !(((uintptr_t)dst) & 0xf));
-
-#if defined(__SSSE3__) || defined(__SSE2__)
-   if (bytes == 64) {
-      rgba8_copy_16_aligned_dst(dst +  0, src +  0);
-      rgba8_copy_16_aligned_dst(dst + 16, src + 16);
-      rgba8_copy_16_aligned_dst(dst + 32, src + 32);
-      rgba8_copy_16_aligned_dst(dst + 48, src + 48);
-      return dst;
-   }
-
-   while (bytes >= 16) {
-      rgba8_copy_16_aligned_dst(dst, src);
-      src += 16;
-      dst += 16;
-      bytes -= 16;
-   }
-#endif
-
-   rgba8_copy(dst, src, bytes);
-
-   return dst;
-}
-
-/**
- * Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned.
- */
-static inline void *
-rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
-{
-   assert(bytes == 0 || !(((uintptr_t)src) & 0xf));
-
-#if defined(__SSSE3__) || defined(__SSE2__)
-   if (bytes == 64) {
-      rgba8_copy_16_aligned_src(dst +  0, src +  0);
-      rgba8_copy_16_aligned_src(dst + 16, src + 16);
-      rgba8_copy_16_aligned_src(dst + 32, src + 32);
-      rgba8_copy_16_aligned_src(dst + 48, src + 48);
-      return dst;
-   }
-
-   while (bytes >= 16) {
-      rgba8_copy_16_aligned_src(dst, src);
-      src += 16;
-      dst += 16;
-      bytes -= 16;
-   }
-#endif
-
-   rgba8_copy(dst, src, bytes);
-
-   return dst;
-}
-
-/**
- * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
- * These ranges are in bytes, i.e. pixels * bytes-per-pixel.
- * The first and last ranges must be shorter than a "span" (the longest linear
- * stretch within a tile) and the middle must equal a whole number of spans.
- * Ranges may be empty.  The region copied must land entirely within one tile.
- * 'dst' is the start of the tile and 'src' is the corresponding
- * address to copy from, though copying begins at (x0, y0).
- * To enable swizzling 'swizzle_bit' must be 1<<6, otherwise zero.
- * Swizzling flips bit 6 in the copy destination offset, when certain other
- * bits are set in it.
- */
-typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                             uint32_t y0, uint32_t y1,
-                             char *dst, const char *src,
-                             int32_t linear_pitch,
-                             uint32_t swizzle_bit,
-                             mem_copy_fn_type copy_type);
-
-/**
- * Copy texture data from linear to X tile layout.
- *
- * \copydoc tile_copy_fn
- *
- * The mem_copy parameters allow the user to specify an alternative mem_copy
- * function that, for instance, may do RGBA -> BGRA swizzling.  The first
- * function must handle any memory alignment while the second function must
- * only handle 16-byte alignment in whichever side (source or destination) is
- * tiled.
- */
-static inline void
-linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                 uint32_t y0, uint32_t y1,
-                 char *dst, const char *src,
-                 int32_t src_pitch,
-                 uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy,
-                 mem_copy_fn mem_copy_align16)
-{
-   /* The copy destination offset for each range copied is the sum of
-    * an X offset 'x0' or 'xo' and a Y offset 'yo.'
-    */
-   uint32_t xo, yo;
-
-   src += (ptrdiff_t)y0 * src_pitch;
-
-   for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) {
-      /* Bits 9 and 10 of the copy destination offset control swizzling.
-       * Only 'yo' contributes to those bits in the total offset,
-       * so calculate 'swizzle' just once per row.
-       * Move bits 9 and 10 three and four places respectively down
-       * to bit 6 and xor them.
-       */
-      uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit;
-
-      mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0);
-
-      for (xo = x1; xo < x2; xo += xtile_span) {
-         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span);
-      }
-
-      mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
-
-      src += src_pitch;
-   }
-}
-
-/**
- * Copy texture data from linear to Y tile layout.
- *
- * \copydoc tile_copy_fn
- */
-static inline void
-linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                 uint32_t y0, uint32_t y3,
-                 char *dst, const char *src,
-                 int32_t src_pitch,
-                 uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy,
-                 mem_copy_fn mem_copy_align16)
-{
-   /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
-    * as the tile).  Thus the destination offset for (x,y) is the sum of:
-    *   (x % column_width)                    // position within column
-    *   (x / column_width) * bytes_per_column // column number * bytes per column
-    *   y * column_width
-    *
-    * The copy destination offset for each range copied is the sum of
-    * an X offset 'xo0' or 'xo' and a Y offset 'yo.'
-    */
-   const uint32_t column_width = ytile_span;
-   const uint32_t bytes_per_column = column_width * ytile_height;
-
-   uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4));
-   uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4));
-
-   uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column;
-   uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column;
-
-   /* Bit 9 of the destination offset control swizzling.
-    * Only the X offset contributes to bit 9 of the total offset,
-    * so swizzle can be calculated in advance for these X positions.
-    * Move bit 9 three places down to bit 6.
-    */
-   uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit;
-   uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit;
-
-   uint32_t x, yo;
-
-   src += (ptrdiff_t)y0 * src_pitch;
-
-   if (y0 != y1) {
-      for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) {
-         uint32_t xo = xo1;
-         uint32_t swizzle = swizzle1;
-
-         mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0);
-
-         /* Step by spans/columns.  As it happens, the swizzle bit flips
-          * at each step so we don't need to calculate it explicitly.
-          */
-         for (x = x1; x < x2; x += ytile_span) {
-            mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
-            xo += bytes_per_column;
-            swizzle ^= swizzle_bit;
-         }
-
-         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
-
-         src += src_pitch;
-      }
-   }
-
-   for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) {
-      uint32_t xo = xo1;
-      uint32_t swizzle = swizzle1;
-
-      if (x0 != x1) {
-         mem_copy(dst + ((xo0 + yo + 0 * column_width) ^ swizzle0), src + x0 + 0 * src_pitch, x1 - x0);
-         mem_copy(dst + ((xo0 + yo + 1 * column_width) ^ swizzle0), src + x0 + 1 * src_pitch, x1 - x0);
-         mem_copy(dst + ((xo0 + yo + 2 * column_width) ^ swizzle0), src + x0 + 2 * src_pitch, x1 - x0);
-         mem_copy(dst + ((xo0 + yo + 3 * column_width) ^ swizzle0), src + x0 + 3 * src_pitch, x1 - x0);
-      }
-
-      /* Step by spans/columns.  As it happens, the swizzle bit flips
-       * at each step so we don't need to calculate it explicitly.
-       */
-      for (x = x1; x < x2; x += ytile_span) {
-         mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x + 0 * src_pitch, ytile_span);
-         mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x + 1 * src_pitch, ytile_span);
-         mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x + 2 * src_pitch, ytile_span);
-         mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x + 3 * src_pitch, ytile_span);
-         xo += bytes_per_column;
-         swizzle ^= swizzle_bit;
-      }
-
-      if (x2 != x3) {
-         mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x2 + 0 * src_pitch, x3 - x2);
-         mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x2 + 1 * src_pitch, x3 - x2);
-         mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x2 + 2 * src_pitch, x3 - x2);
-         mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x2 + 3 * src_pitch, x3 - x2);
-      }
-
-      src += 4 * src_pitch;
-   }
-
-   if (y2 != y3) {
-      for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) {
-         uint32_t xo = xo1;
-         uint32_t swizzle = swizzle1;
-
-         mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0);
-
-         /* Step by spans/columns.  As it happens, the swizzle bit flips
-          * at each step so we don't need to calculate it explicitly.
-          */
-         for (x = x1; x < x2; x += ytile_span) {
-            mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span);
-            xo += bytes_per_column;
-            swizzle ^= swizzle_bit;
-         }
-
-         mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2);
-
-         src += src_pitch;
-      }
-   }
-}
-
-/**
- * Copy texture data from X tile layout to linear.
- *
- * \copydoc tile_copy_fn
- */
-static inline void
-xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                 uint32_t y0, uint32_t y1,
-                 char *dst, const char *src,
-                 int32_t dst_pitch,
-                 uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy,
-                 mem_copy_fn mem_copy_align16)
-{
-   /* The copy destination offset for each range copied is the sum of
-    * an X offset 'x0' or 'xo' and a Y offset 'yo.'
-    */
-   uint32_t xo, yo;
-
-   dst += (ptrdiff_t)y0 * dst_pitch;
-
-   for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) {
-      /* Bits 9 and 10 of the copy destination offset control swizzling.
-       * Only 'yo' contributes to those bits in the total offset,
-       * so calculate 'swizzle' just once per row.
-       * Move bits 9 and 10 three and four places respectively down
-       * to bit 6 and xor them.
-       */
-      uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit;
-
-      mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0);
-
-      for (xo = x1; xo < x2; xo += xtile_span) {
-         mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span);
-      }
-
-      mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
-
-      dst += dst_pitch;
-   }
-}
-
- /**
- * Copy texture data from Y tile layout to linear.
- *
- * \copydoc tile_copy_fn
- */
-static inline void
-ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                 uint32_t y0, uint32_t y3,
-                 char *dst, const char *src,
-                 int32_t dst_pitch,
-                 uint32_t swizzle_bit,
-                 mem_copy_fn mem_copy,
-                 mem_copy_fn mem_copy_align16)
-{
-   /* Y tiles consist of columns that are 'ytile_span' wide (and the same height
-    * as the tile).  Thus the destination offset for (x,y) is the sum of:
-    *   (x % column_width)                    // position within column
-    *   (x / column_width) * bytes_per_column // column number * bytes per column
-    *   y * column_width
-    *
-    * The copy destination offset for each range copied is the sum of
-    * an X offset 'xo0' or 'xo' and a Y offset 'yo.'
-    */
-   const uint32_t column_width = ytile_span;
-   const uint32_t bytes_per_column = column_width * ytile_height;
-
-   uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4));
-   uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4));
-
-   uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column;
-   uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column;
-
-   /* Bit 9 of the destination offset control swizzling.
-    * Only the X offset contributes to bit 9 of the total offset,
-    * so swizzle can be calculated in advance for these X positions.
-    * Move bit 9 three places down to bit 6.
-    */
-   uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit;
-   uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit;
-
-   uint32_t x, yo;
-
-   dst += (ptrdiff_t)y0 * dst_pitch;
-
-   if (y0 != y1) {
-      for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) {
-         uint32_t xo = xo1;
-         uint32_t swizzle = swizzle1;
-
-         mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0);
-
-         /* Step by spans/columns.  As it happens, the swizzle bit flips
-          * at each step so we don't need to calculate it explicitly.
-          */
-         for (x = x1; x < x2; x += ytile_span) {
-            mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
-            xo += bytes_per_column;
-            swizzle ^= swizzle_bit;
-         }
-
-         mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
-
-         dst += dst_pitch;
-      }
-   }
-
-   for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) {
-      uint32_t xo = xo1;
-      uint32_t swizzle = swizzle1;
-
-      if (x0 != x1) {
-         mem_copy(dst + x0 + 0 * dst_pitch, src + ((xo0 + yo + 0 * column_width) ^ swizzle0), x1 - x0);
-         mem_copy(dst + x0 + 1 * dst_pitch, src + ((xo0 + yo + 1 * column_width) ^ swizzle0), x1 - x0);
-         mem_copy(dst + x0 + 2 * dst_pitch, src + ((xo0 + yo + 2 * column_width) ^ swizzle0), x1 - x0);
-         mem_copy(dst + x0 + 3 * dst_pitch, src + ((xo0 + yo + 3 * column_width) ^ swizzle0), x1 - x0);
-      }
-
-      /* Step by spans/columns.  As it happens, the swizzle bit flips
-       * at each step so we don't need to calculate it explicitly.
-       */
-      for (x = x1; x < x2; x += ytile_span) {
-         mem_copy_align16(dst + x + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), ytile_span);
-         mem_copy_align16(dst + x + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), ytile_span);
-         mem_copy_align16(dst + x + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), ytile_span);
-         mem_copy_align16(dst + x + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), ytile_span);
-         xo += bytes_per_column;
-         swizzle ^= swizzle_bit;
-      }
-
-      if (x2 != x3) {
-         mem_copy_align16(dst + x2 + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), x3 - x2);
-         mem_copy_align16(dst + x2 + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), x3 - x2);
-         mem_copy_align16(dst + x2 + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), x3 - x2);
-         mem_copy_align16(dst + x2 + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), x3 - x2);
-      }
-
-      dst += 4 * dst_pitch;
-   }
-
-   if (y2 != y3) {
-      for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) {
-         uint32_t xo = xo1;
-         uint32_t swizzle = swizzle1;
-
-         mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0);
-
-         /* Step by spans/columns.  As it happens, the swizzle bit flips
-          * at each step so we don't need to calculate it explicitly.
-          */
-         for (x = x1; x < x2; x += ytile_span) {
-            mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span);
-            xo += bytes_per_column;
-            swizzle ^= swizzle_bit;
-         }
-
-         mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2);
-
-         dst += dst_pitch;
-      }
-   }
-}
-
-#if defined(INLINE_SSE41)
-static ALWAYS_INLINE void *
-_memcpy_streaming_load(void *dest, const void *src, size_t count)
-{
-   if (count == 16) {
-      __m128i val = _mm_stream_load_si128((__m128i *)src);
-      _mm_storeu_si128((__m128i *)dest, val);
-      return dest;
-   } else if (count == 64) {
-      __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
-      __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
-      __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
-      __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
-      _mm_storeu_si128(((__m128i *)dest) + 0, val0);
-      _mm_storeu_si128(((__m128i *)dest) + 1, val1);
-      _mm_storeu_si128(((__m128i *)dest) + 2, val2);
-      _mm_storeu_si128(((__m128i *)dest) + 3, val3);
-      return dest;
-   } else {
-      assert(count < 64); /* and (count < 16) for ytiled */
-      return memcpy(dest, src, count);
-   }
-}
-#endif
-
-static mem_copy_fn
-choose_copy_function(mem_copy_fn_type copy_type)
-{
-   switch(copy_type) {
-   case INTEL_COPY_MEMCPY:
-      return memcpy;
-   case INTEL_COPY_RGBA8:
-      return rgba8_copy;
-#if defined(INLINE_SSE41)
-   case INTEL_COPY_STREAMING_LOAD:
-      return _memcpy_streaming_load;
-#endif
-   case INTEL_COPY_INVALID:
-      unreachable("invalid copy_type");
-   }
-   unreachable("unhandled copy_type");
-   return NULL;
-}
-
-/**
- * Copy texture data from linear to X tile layout, faster.
- *
- * Same as \ref linear_to_xtiled but faster, because it passes constant
- * parameters for common cases, allowing the compiler to inline code
- * optimized for those cases.
- *
- * \copydoc tile_copy_fn
- */
-static FLATTEN void
-linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                        uint32_t y0, uint32_t y1,
-                        char *dst, const char *src,
-                        int32_t src_pitch,
-                        uint32_t swizzle_bit,
-                        mem_copy_fn_type copy_type)
-{
-   mem_copy_fn mem_copy = choose_copy_function(copy_type);
-
-   if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
-      if (mem_copy == memcpy)
-         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_dst);
-      else
-         unreachable("not reached");
-   } else {
-      if (mem_copy == memcpy)
-         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit,
-                                 memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_dst);
-      else
-         unreachable("not reached");
-   }
-   linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
-}
-
-/**
- * Copy texture data from linear to Y tile layout, faster.
- *
- * Same as \ref linear_to_ytiled but faster, because it passes constant
- * parameters for common cases, allowing the compiler to inline code
- * optimized for those cases.
- *
- * \copydoc tile_copy_fn
- */
-static FLATTEN void
-linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                        uint32_t y0, uint32_t y1,
-                        char *dst, const char *src,
-                        int32_t src_pitch,
-                        uint32_t swizzle_bit,
-                        mem_copy_fn_type copy_type)
-{
-   mem_copy_fn mem_copy = choose_copy_function(copy_type);
-
-   if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
-      if (mem_copy == memcpy)
-         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_dst);
-      else
-         unreachable("not reached");
-   } else {
-      if (mem_copy == memcpy)
-         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_dst);
-      else
-         unreachable("not reached");
-   }
-   linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                    dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy);
-}
-
-/**
- * Copy texture data from X tile layout to linear, faster.
- *
- * Same as \ref xtile_to_linear but faster, because it passes constant
- * parameters for common cases, allowing the compiler to inline code
- * optimized for those cases.
- *
- * \copydoc tile_copy_fn
- */
-static FLATTEN void
-xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                        uint32_t y0, uint32_t y1,
-                        char *dst, const char *src,
-                        int32_t dst_pitch,
-                        uint32_t swizzle_bit,
-                        mem_copy_fn_type copy_type)
-{
-   mem_copy_fn mem_copy = choose_copy_function(copy_type);
-
-   if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) {
-      if (mem_copy == memcpy)
-         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_src);
-#if defined(INLINE_SSE41)
-      else if (mem_copy == _memcpy_streaming_load)
-         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
-      else
-         unreachable("not reached");
-   } else {
-      if (mem_copy == memcpy)
-         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_src);
-#if defined(INLINE_SSE41)
-      else if (mem_copy == _memcpy_streaming_load)
-         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
-      else
-         unreachable("not reached");
-   }
-   xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
-}
-
-/**
- * Copy texture data from Y tile layout to linear, faster.
- *
- * Same as \ref ytile_to_linear but faster, because it passes constant
- * parameters for common cases, allowing the compiler to inline code
- * optimized for those cases.
- *
- * \copydoc tile_copy_fn
- */
-static FLATTEN void
-ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
-                        uint32_t y0, uint32_t y1,
-                        char *dst, const char *src,
-                        int32_t dst_pitch,
-                        uint32_t swizzle_bit,
-                        mem_copy_fn_type copy_type)
-{
-   mem_copy_fn mem_copy = choose_copy_function(copy_type);
-
-   if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) {
-      if (mem_copy == memcpy)
-         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_src);
-#if defined(INLINE_SSE41)
-      else if (copy_type == INTEL_COPY_STREAMING_LOAD)
-         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
-      else
-         unreachable("not reached");
-   } else {
-      if (mem_copy == memcpy)
-         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, memcpy, memcpy);
-      else if (mem_copy == rgba8_copy)
-         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 rgba8_copy, rgba8_copy_aligned_src);
-#if defined(INLINE_SSE41)
-      else if (copy_type == INTEL_COPY_STREAMING_LOAD)
-         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
-      else
-         unreachable("not reached");
-   }
-   ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                    dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy);
-}
-
-/**
- * Copy from linear to tiled texture.
- *
- * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into
- * pieces that do not cross tile boundaries and copy each piece with a tile
- * copy function (\ref tile_copy_fn).
- * The X range is in bytes, i.e. pixels * bytes-per-pixel.
- * The Y range is in pixels (i.e. unitless).
- * 'dst' is the address of (0, 0) in the destination tiled texture.
- * 'src' is the address of (xt1, yt1) in the source linear texture.
- */
-static void
-intel_linear_to_tiled(uint32_t xt1, uint32_t xt2,
-                      uint32_t yt1, uint32_t yt2,
-                      char *dst, const char *src,
-                      uint32_t dst_pitch, int32_t src_pitch,
-                      bool has_swizzling,
-                      enum isl_tiling tiling,
-                      mem_copy_fn_type copy_type)
-{
-   tile_copy_fn tile_copy;
-   uint32_t xt0, xt3;
-   uint32_t yt0, yt3;
-   uint32_t xt, yt;
-   uint32_t tw, th, span;
-   uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0;
-
-   if (tiling == ISL_TILING_X) {
-      tw = xtile_width;
-      th = xtile_height;
-      span = xtile_span;
-      tile_copy = linear_to_xtiled_faster;
-   } else if (tiling == ISL_TILING_Y0) {
-      tw = ytile_width;
-      th = ytile_height;
-      span = ytile_span;
-      tile_copy = linear_to_ytiled_faster;
-   } else {
-      unreachable("unsupported tiling");
-   }
-
-   /* Round out to tile boundaries. */
-   xt0 = ALIGN_DOWN(xt1, tw);
-   xt3 = ALIGN_UP  (xt2, tw);
-   yt0 = ALIGN_DOWN(yt1, th);
-   yt3 = ALIGN_UP  (yt2, th);
-
-   /* Loop over all tiles to which we have something to copy.
-    * 'xt' and 'yt' are the origin of the destination tile, whether copying
-    * copying a full or partial tile.
-    * tile_copy() copies one tile or partial tile.
-    * Looping x inside y is the faster memory access pattern.
-    */
-   for (yt = yt0; yt < yt3; yt += th) {
-      for (xt = xt0; xt < xt3; xt += tw) {
-         /* The area to update is [x0,x3) x [y0,y1).
-          * May not want the whole tile, hence the min and max.
-          */
-         uint32_t x0 = MAX2(xt1, xt);
-         uint32_t y0 = MAX2(yt1, yt);
-         uint32_t x3 = MIN2(xt2, xt + tw);
-         uint32_t y1 = MIN2(yt2, yt + th);
-
-         /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that
-          * the middle interval is the longest span-aligned part.
-          * The sub-ranges could be empty.
-          */
-         uint32_t x1, x2;
-         x1 = ALIGN_UP(x0, span);
-         if (x1 > x3)
-            x1 = x2 = x3;
-         else
-            x2 = ALIGN_DOWN(x3, span);
-
-         assert(x0 <= x1 && x1 <= x2 && x2 <= x3);
-         assert(x1 - x0 < span && x3 - x2 < span);
-         assert(x3 - x0 <= tw);
-         assert((x2 - x1) % span == 0);
-
-         /* Translate by (xt,yt) for single-tile copier. */
-         tile_copy(x0-xt, x1-xt, x2-xt, x3-xt,
-                   y0-yt, y1-yt,
-                   dst + (ptrdiff_t)xt * th  +  (ptrdiff_t)yt        * dst_pitch,
-                   src + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * src_pitch,
-                   src_pitch,
-                   swizzle_bit,
-                   copy_type);
-      }
-   }
-}
-
-/**
- * Copy from tiled to linear texture.
- *
- * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into
- * pieces that do not cross tile boundaries and copy each piece with a tile
- * copy function (\ref tile_copy_fn).
- * The X range is in bytes, i.e. pixels * bytes-per-pixel.
- * The Y range is in pixels (i.e. unitless).
- * 'dst' is the address of (xt1, yt1) in the destination linear texture.
- * 'src' is the address of (0, 0) in the source tiled texture.
- */
-static void
-intel_tiled_to_linear(uint32_t xt1, uint32_t xt2,
-                      uint32_t yt1, uint32_t yt2,
-                      char *dst, const char *src,
-                      int32_t dst_pitch, uint32_t src_pitch,
-                      bool has_swizzling,
-                      enum isl_tiling tiling,
-                      mem_copy_fn_type copy_type)
-{
-   tile_copy_fn tile_copy;
-   uint32_t xt0, xt3;
-   uint32_t yt0, yt3;
-   uint32_t xt, yt;
-   uint32_t tw, th, span;
-   uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0;
-
-   if (tiling == ISL_TILING_X) {
-      tw = xtile_width;
-      th = xtile_height;
-      span = xtile_span;
-      tile_copy = xtiled_to_linear_faster;
-   } else if (tiling == ISL_TILING_Y0) {
-      tw = ytile_width;
-      th = ytile_height;
-      span = ytile_span;
-      tile_copy = ytiled_to_linear_faster;
-   } else {
-      unreachable("unsupported tiling");
-   }
-
-#if defined(INLINE_SSE41)
-   if (copy_type == INTEL_COPY_STREAMING_LOAD) {
-      /* The hidden cacheline sized register used by movntdqa can apparently
-       * give you stale data, so do an mfence to invalidate it.
-       */
-      _mm_mfence();
-   }
-#endif
-
-   /* Round out to tile boundaries. */
-   xt0 = ALIGN_DOWN(xt1, tw);
-   xt3 = ALIGN_UP  (xt2, tw);
-   yt0 = ALIGN_DOWN(yt1, th);
-   yt3 = ALIGN_UP  (yt2, th);
-
-   /* Loop over all tiles to which we have something to copy.
-    * 'xt' and 'yt' are the origin of the destination tile, whether copying
-    * copying a full or partial tile.
-    * tile_copy() copies one tile or partial tile.
-    * Looping x inside y is the faster memory access pattern.
-    */
-   for (yt = yt0; yt < yt3; yt += th) {
-      for (xt = xt0; xt < xt3; xt += tw) {
-         /* The area to update is [x0,x3) x [y0,y1).
-          * May not want the whole tile, hence the min and max.
-          */
-         uint32_t x0 = MAX2(xt1, xt);
-         uint32_t y0 = MAX2(yt1, yt);
-         uint32_t x3 = MIN2(xt2, xt + tw);
-         uint32_t y1 = MIN2(yt2, yt + th);
-
-         /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that
-          * the middle interval is the longest span-aligned part.
-          * The sub-ranges could be empty.
-          */
-         uint32_t x1, x2;
-         x1 = ALIGN_UP(x0, span);
-         if (x1 > x3)
-            x1 = x2 = x3;
-         else
-            x2 = ALIGN_DOWN(x3, span);
-
-         assert(x0 <= x1 && x1 <= x2 && x2 <= x3);
-         assert(x1 - x0 < span && x3 - x2 < span);
-         assert(x3 - x0 <= tw);
-         assert((x2 - x1) % span == 0);
-
-         /* Translate by (xt,yt) for single-tile copier. */
-         tile_copy(x0-xt, x1-xt, x2-xt, x3-xt,
-                   y0-yt, y1-yt,
-                   dst + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * dst_pitch,
-                   src + (ptrdiff_t)xt * th  +  (ptrdiff_t)yt        * src_pitch,
-                   dst_pitch,
-                   swizzle_bit,
-                   copy_type);
-      }
-   }
-}
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,139 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright 2012 Intel Corporation
- * Copyright 2013 Google
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chad Versace <chad.versace@linux.intel.com>
- *    Frank Henigman <fjhenigman@google.com>
- */
-
-#ifndef INTEL_TILED_MEMCPY_H
-#define INTEL_TILED_MEMCPY_H
-
-#include <stdint.h>
-#include "main/mtypes.h"
-
-typedef enum {
-  INTEL_COPY_MEMCPY = 0,
-  INTEL_COPY_RGBA8,
-  INTEL_COPY_STREAMING_LOAD,
-  INTEL_COPY_INVALID,
-} mem_copy_fn_type;
-
-typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n);
-
-typedef void (*tiled_to_linear_fn)
-   (uint32_t xt1, uint32_t xt2,
-    uint32_t yt1, uint32_t yt2,
-    char *dst, const char *src,
-    int32_t dst_pitch, uint32_t src_pitch,
-    bool has_swizzling,
-    enum isl_tiling tiling,
-    mem_copy_fn_type copy_type);
-
-void
-linear_to_tiled(uint32_t xt1, uint32_t xt2,
-                uint32_t yt1, uint32_t yt2,
-                char *dst, const char *src,
-                uint32_t dst_pitch, int32_t src_pitch,
-                bool has_swizzling,
-                enum isl_tiling tiling,
-                mem_copy_fn_type copy_type);
-
-void
-tiled_to_linear(uint32_t xt1, uint32_t xt2,
-                uint32_t yt1, uint32_t yt2,
-                char *dst, const char *src,
-                int32_t dst_pitch, uint32_t src_pitch,
-                bool has_swizzling,
-                enum isl_tiling tiling,
-                mem_copy_fn_type copy_type);
-
-/**
- * Determine which copy function to use for the given format combination
- *
- * The only two possible copy functions which are ever returned are a
- * direct memcpy and a RGBA <-> BGRA copy function.  Since RGBA -> BGRA and
- * BGRA -> RGBA are exactly the same operation (and memcpy is obviously
- * symmetric), it doesn't matter whether the copy is from the tiled image
- * to the untiled or vice versa.  The copy function required is the same in
- * either case so this function can be used.
- *
- * \param[in]  tiledFormat The format of the tiled image
- * \param[in]  format      The GL format of the client data
- * \param[in]  type        The GL type of the client data
- * \param[out] mem_copy    Will be set to one of either the standard
- *                         library's memcpy or a different copy function
- *                         that performs an RGBA to BGRA conversion
- * \param[out] cpp         Number of bytes per channel
- *
- * \return true if the format and type combination are valid
- */
-static MAYBE_UNUSED bool
-intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type,
-                      mem_copy_fn_type *copy_type, uint32_t *cpp)
-{
-   *copy_type = INTEL_COPY_INVALID;
-
-   if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
-       !(format == GL_RGBA || format == GL_BGRA))
-      return false; /* Invalid type/format combination */
-
-   if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) ||
-       (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) {
-      *cpp = 1;
-      *copy_type = INTEL_COPY_MEMCPY;
-   } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         *copy_type = INTEL_COPY_MEMCPY;
-      } else if (format == GL_RGBA) {
-         *copy_type = INTEL_COPY_RGBA8;
-      }
-   } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) ||
-              (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) {
-      *cpp = 4;
-      if (format == GL_BGRA) {
-         /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
-          * use the same function.
-          */
-         *copy_type = INTEL_COPY_RGBA8;
-      } else if (format == GL_RGBA) {
-         *copy_type = INTEL_COPY_MEMCPY;
-      }
-   }
-
-   if (*copy_type == INTEL_COPY_INVALID)
-      return false;
-
-   return true;
-}
-
-#endif /* INTEL_TILED_MEMCPY */
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,59 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright 2012 Intel Corporation
- * Copyright 2013 Google
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chad Versace <chad.versace@linux.intel.com>
- *    Frank Henigman <fjhenigman@google.com>
- */
-
-
-#include "intel_tiled_memcpy.c"
-
-void
-linear_to_tiled(uint32_t xt1, uint32_t xt2,
-                uint32_t yt1, uint32_t yt2,
-                char *dst, const char *src,
-                uint32_t dst_pitch, int32_t src_pitch,
-                bool has_swizzling,
-                enum isl_tiling tiling,
-                mem_copy_fn_type copy_type)
-{
-   intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
-                         has_swizzling, tiling, copy_type);
-}
-
-void
-tiled_to_linear(uint32_t xt1, uint32_t xt2,
-                uint32_t yt1, uint32_t yt2,
-                char *dst, const char *src,
-                int32_t dst_pitch, uint32_t src_pitch,
-                bool has_swizzling,
-                enum isl_tiling tiling,
-                mem_copy_fn_type copy_type)
-{
-   intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
-                         has_swizzling, tiling, copy_type);
-}
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,61 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright 2012 Intel Corporation
- * Copyright 2013 Google
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chad Versace <chad.versace@linux.intel.com>
- *    Frank Henigman <fjhenigman@google.com>
- */
-
-#define INLINE_SSE41
-
-#include "intel_tiled_memcpy_sse41.h"
-#include "intel_tiled_memcpy.c"
-
-void
-linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
-                      uint32_t yt1, uint32_t yt2,
-                      char *dst, const char *src,
-                      uint32_t dst_pitch, int32_t src_pitch,
-                      bool has_swizzling,
-                      enum isl_tiling tiling,
-                      mem_copy_fn_type copy_type)
-{
-   intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
-                         has_swizzling, tiling, copy_type);
-}
-
-void
-tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
-                     uint32_t yt1, uint32_t yt2,
-                     char *dst, const char *src,
-                     int32_t dst_pitch, uint32_t src_pitch,
-                     bool has_swizzling,
-                     enum isl_tiling tiling,
-                     mem_copy_fn_type copy_type)
-{
-   intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch,
-                         has_swizzling, tiling, copy_type);
-}
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h
--- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,59 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright 2012 Intel Corporation
- * Copyright 2013 Google
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chad Versace <chad.versace@linux.intel.com>
- *    Frank Henigman <fjhenigman@google.com>
- */
-
-#ifndef INTEL_TILED_MEMCPY_SSE41_H
-#define INTEL_TILED_MEMCPY_SSE41_H
-
-#include <stdint.h>
-#include "main/mtypes.h"
-#include "isl/isl.h"
-
-#include "intel_tiled_memcpy.h"
-
-void
-linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2,
-                      uint32_t yt1, uint32_t yt2,
-                      char *dst, const char *src,
-                      uint32_t dst_pitch, int32_t src_pitch,
-                      bool has_swizzling,
-                      enum isl_tiling tiling,
-                      mem_copy_fn_type copy_type);
-
-void
-tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
-                      uint32_t yt1, uint32_t yt2,
-                      char *dst, const char *src,
-                      int32_t dst_pitch, uint32_t src_pitch,
-                      bool has_swizzling,
-                      enum isl_tiling tiling,
-                      mem_copy_fn_type copy_type);
-
-#endif /* INTEL_TILED_MEMCPY_SSE41_H */
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.am mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.am
--- mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.am	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.am	2019-03-31 23:16:37.000000000 +0000
@@ -26,6 +26,7 @@
 AM_CFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/ \
+	-I$(top_builddir)/src/ \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
 	-I$(top_srcdir)/src/gallium/include \
@@ -33,6 +34,8 @@
 	-I$(top_builddir)/src/util \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_srcdir)/src/gtest/include \
+	-I$(top_builddir)/src/compiler \
+	-I$(top_srcdir)/src/compiler \
 	-I$(top_builddir)/src/compiler/glsl \
 	-I$(top_builddir)/src/compiler/nir \
 	-I$(top_srcdir)/src/compiler/nir \
@@ -92,20 +95,8 @@
 
 noinst_LTLIBRARIES = \
 	libi965_dri.la \
-	libintel_tiled_memcpy.la \
-	libintel_tiled_memcpy_sse41.la \
 	$(I965_PERGEN_LIBS)
 
-libintel_tiled_memcpy_la_SOURCES = \
-	$(intel_tiled_memcpy_FILES)
-libintel_tiled_memcpy_la_CFLAGS = \
-	$(AM_CFLAGS)
-
-libintel_tiled_memcpy_sse41_la_SOURCES = \
-	$(intel_tiled_memcpy_sse41_FILES)
-libintel_tiled_memcpy_sse41_la_CFLAGS = \
-	$(AM_CFLAGS) $(SSE41_CFLAGS)
-
 libi965_dri_la_SOURCES = \
 	$(i965_FILES) \
 	$(i965_oa_GENERATED_FILES)
@@ -116,8 +107,6 @@
 	$(top_builddir)/src/intel/compiler/libintel_compiler.la \
 	$(top_builddir)/src/intel/blorp/libblorp.la \
 	$(I965_PERGEN_LIBS) \
-	libintel_tiled_memcpy.la \
-	libintel_tiled_memcpy_sse41.la \
 	$(LIBDRM_LIBS)
 
 BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
@@ -126,7 +115,6 @@
 EXTRA_DIST = \
 	brw_oa.py \
 	$(i965_oa_xml_FILES) \
-	$(intel_tiled_memcpy_dep_FILES) \
 	meson.build
 
 brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES)
@@ -136,6 +124,3 @@
 	$(i965_oa_xml_FILES:%=$(srcdir)/%)
 
 brw_oa_metrics.h: brw_oa_metrics.c
-
-intel_tiled_memcpy_normal.c: $(intel_tiled_memcpy_dep_FILES)
-intel_tiled_memcpy_sse41.c: $(intel_tiled_memcpy_dep_FILES)
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.sources mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.sources
--- mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -113,17 +113,6 @@
 	intel_upload.c \
 	libdrm_macros.h
 
-intel_tiled_memcpy_FILES = \
-	intel_tiled_memcpy_normal.c \
-	intel_tiled_memcpy.h
-
-intel_tiled_memcpy_sse41_FILES = \
-	intel_tiled_memcpy_sse41.c \
-	intel_tiled_memcpy_sse41.h
-
-intel_tiled_memcpy_dep_FILES = \
-	intel_tiled_memcpy.c
-
 i965_gen4_FILES = \
 	genX_blorp_exec.c \
 	genX_state_upload.c
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/meson.build mesa-19.0.1/src/mesa/drivers/dri/i965/meson.build
--- mesa-18.3.3/src/mesa/drivers/dri/i965/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/i965/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -133,16 +133,6 @@
   'libdrm_macros.h',
 )
 
-files_intel_tiled_memcpy = files(
-  'intel_tiled_memcpy_normal.c',
-  'intel_tiled_memcpy.h',
-)
-
-files_intel_tiled_memcpy_sse41 = files(
-  'intel_tiled_memcpy_sse41.c',
-  'intel_tiled_memcpy_sse41.h',
-)
-
 i965_gen_libs = []
 foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
   i965_gen_libs += static_library(
@@ -184,36 +174,10 @@
   ],
 )
 
-intel_tiled_memcpy = static_library(
-  'intel_tiled_memcpy',
-  [files_intel_tiled_memcpy],
-  include_directories : [
-    inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
-  ],
-  c_args : [c_vis_args, no_override_init_args, '-msse2'],
-  extra_files : ['intel_tiled_memcpy.c']
-)
-
-if with_sse41
-  intel_tiled_memcpy_sse41 = static_library(
-    'intel_tiled_memcpy_sse41',
-    [files_intel_tiled_memcpy_sse41],
-    include_directories : [
-      inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
-    ],
-    link_args : ['-Wl,--exclude-libs=ALL'],
-    c_args : [c_vis_args, no_override_init_args, '-Wl,--exclude-libs=ALL', '-msse2', sse41_args],
-    extra_files : ['intel_tiled_memcpy.c']
-  )
-else
-  intel_tiled_memcpy_sse41 = []
-endif
-
-
 libi965 = static_library(
   'i965',
   [files_i965, i965_oa_sources, ir_expression_operation_h,
-   xmlpool_options_h],
+   xmlpool_options_h, float64_glsl_h],
   include_directories : [
     inc_common, inc_intel, inc_dri_common, inc_util, inc_drm_uapi,
   ],
@@ -221,7 +185,7 @@
   cpp_args : [cpp_vis_args, c_sse2_args],
   link_with : [
     i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
-    libblorp, intel_tiled_memcpy, intel_tiled_memcpy_sse41
+    libblorp
   ],
   dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
 )
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/meson.build mesa-19.0.1/src/mesa/drivers/dri/meson.build
--- mesa-18.3.3/src/mesa/drivers/dri/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -78,7 +78,7 @@
     filebase : 'dri',
     description : 'Direct Rendering Infrastructure',
     version : meson.project_version(),
-    variables : ['dridriverdir=${prefix}/' + dri_drivers_path],
+    variables : ['dridriverdir=' + dri_drivers_path],
     requires_private : dri_req_private,
   )
 endif
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c mesa-19.0.1/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
--- mesa-18.3.3/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c	2019-03-31 23:16:37.000000000 +0000
@@ -49,7 +49,7 @@
 	if (render->mode == VBO && !_mesa_is_bufferobj(binding->BufferObj)) {
 		const struct gl_array_attributes *attrib = a->VertexAttrib;
 		/* Pack client buffers. */
-		return align(_mesa_sizeof_type(attrib->Type) * attrib->Size, 4);
+		return align(attrib->Format._ElementSize, 4);
 	} else {
 		return binding->Stride;
 	}
@@ -86,7 +86,7 @@
 
 		nouveau_init_array(&render->attrs[attr], attr,
 				   get_array_stride(ctx, array),
-				   attrib->Size, attrib->Type,
+				   attrib->Format.Size, attrib->Format.Type,
 				   imm ? binding->BufferObj : NULL,
 				   p, imm, ctx);
 	}
@@ -154,8 +154,8 @@
 			return;
 
 		/* Constant attribute. */
-		nouveau_init_array(a, attr, binding->Stride, attrib->Size,
-				   attrib->Type, binding->BufferObj, p,
+		nouveau_init_array(a, attr, binding->Stride, attrib->Format.Size,
+				   attrib->Format.Type, binding->BufferObj, p,
 				   GL_TRUE, ctx);
 		EMIT_IMM(ctx, a, 0);
 		nouveau_deinit_array(a);
@@ -166,7 +166,7 @@
 
 		if (render->mode == VBO) {
 			render->map[info->vbo_index] = attr;
-			render->vertex_size += attrib->_ElementSize;
+			render->vertex_size += attrib->Format._ElementSize;
 			render->attr_count = MAX2(render->attr_count,
 						  info->vbo_index + 1);
 		} else {
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/r200/radeon_dma.c mesa-19.0.1/src/mesa/drivers/dri/r200/radeon_dma.c
--- mesa-18.3.3/src/mesa/drivers/dri/r200/radeon_dma.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/r200/radeon_dma.c	2019-03-31 23:16:37.000000000 +0000
@@ -217,7 +217,7 @@
 	if (size > rmesa->dma.minimum_size)
 		rmesa->dma.minimum_size = (size + 15) & (~15);
 
-	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
+	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n",
 			__func__, size, rmesa->dma.minimum_size);
 
 	if (is_empty_list(&rmesa->dma.free)
diff -Nru mesa-18.3.3/src/mesa/drivers/dri/radeon/radeon_dma.c mesa-19.0.1/src/mesa/drivers/dri/radeon/radeon_dma.c
--- mesa-18.3.3/src/mesa/drivers/dri/radeon/radeon_dma.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/drivers/dri/radeon/radeon_dma.c	2019-03-31 23:16:37.000000000 +0000
@@ -217,7 +217,7 @@
 	if (size > rmesa->dma.minimum_size)
 		rmesa->dma.minimum_size = (size + 15) & (~15);
 
-	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
+	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n",
 			__func__, size, rmesa->dma.minimum_size);
 
 	if (is_empty_list(&rmesa->dma.free)
diff -Nru mesa-18.3.3/src/mesa/main/api_arrayelt.c mesa-19.0.1/src/mesa/main/api_arrayelt.c
--- mesa-18.3.3/src/mesa/main/api_arrayelt.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/api_arrayelt.c	2019-03-31 23:16:37.000000000 +0000
@@ -1572,15 +1572,15 @@
    actx->nr_vbos = 0;
 
    /* conventional vertex arrays */
-   if (vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
+   if (vao->Enabled & VERT_BIT_COLOR_INDEX) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = IndexFuncs[TYPE_IDX(aa->array->Type)];
+      aa->offset = IndexFuncs[TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
 
-   if (vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
+   if (vao->Enabled & VERT_BIT_EDGEFLAG) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
       aa->offset = _gloffset_EdgeFlagv;
@@ -1588,51 +1588,51 @@
       aa++;
    }
 
-   if (vao->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
+   if (vao->Enabled & VERT_BIT_NORMAL) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_NORMAL];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = NormalFuncs[TYPE_IDX(aa->array->Type)];
+      aa->offset = NormalFuncs[TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
 
-   if (vao->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
+   if (vao->Enabled & VERT_BIT_COLOR0) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR0];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = ColorFuncs[aa->array->Size-3][TYPE_IDX(aa->array->Type)];
+      aa->offset = ColorFuncs[aa->array->Format.Size-3][TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
 
-   if (vao->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
+   if (vao->Enabled & VERT_BIT_COLOR1) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR1];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Type)];
+      aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
 
-   if (vao->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
+   if (vao->Enabled & VERT_BIT_FOG) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_FOG];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Type)];
+      aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
 
    for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
-      struct gl_array_attributes *attribArray =
-         &vao->VertexAttrib[VERT_ATTRIB_TEX(i)];
-      if (attribArray->Enabled) {
+      if (vao->Enabled & VERT_BIT_TEX(i)) {
+         struct gl_array_attributes *attribArray =
+            &vao->VertexAttrib[VERT_ATTRIB_TEX(i)];
          /* NOTE: we use generic glVertexAttribNV functions here.
           * If we ever remove GL_NV_vertex_program this will have to change.
           */
          at->array = attribArray;
          at->binding = &vao->BufferBinding[attribArray->BufferBindingIndex];
-         assert(!at->array->Normalized);
-         at->func = AttribFuncsNV[at->array->Normalized]
-                                 [at->array->Size-1]
-                                 [TYPE_IDX(at->array->Type)];
+         assert(!at->array->Format.Normalized);
+         at->func = AttribFuncsNV[at->array->Format.Normalized]
+                                 [at->array->Format.Size-1]
+                                 [TYPE_IDX(at->array->Format.Type)];
          at->index = VERT_ATTRIB_TEX0 + i;
 	 check_vbo(actx, at->binding->BufferObj);
          at++;
@@ -1641,9 +1641,9 @@
 
    /* generic vertex attribute arrays */
    for (i = 1; i < VERT_ATTRIB_GENERIC_MAX; i++) {  /* skip zero! */
-      struct gl_array_attributes *attribArray =
-         &vao->VertexAttrib[VERT_ATTRIB_GENERIC(i)];
-      if (attribArray->Enabled) {
+      if (vao->Enabled & VERT_BIT_GENERIC(i)) {
+         struct gl_array_attributes *attribArray =
+            &vao->VertexAttrib[VERT_ATTRIB_GENERIC(i)];
          GLint intOrNorm;
          at->array = attribArray;
          at->binding = &vao->BufferBinding[attribArray->BufferBindingIndex];
@@ -1652,18 +1652,18 @@
           * change from one execution of _ae_ArrayElement() to
           * the next.  Doing so caused UT to break.
           */
-         if (at->array->Doubles)
+         if (at->array->Format.Doubles)
             intOrNorm = 3;
-         else if (at->array->Integer)
+         else if (at->array->Format.Integer)
             intOrNorm = 2;
-         else if (at->array->Normalized)
+         else if (at->array->Format.Normalized)
             intOrNorm = 1;
          else
             intOrNorm = 0;
 
          at->func = AttribFuncsARB[intOrNorm]
-            [at->array->Size-1]
-            [TYPE_IDX(at->array->Type)];
+            [at->array->Format.Size-1]
+            [TYPE_IDX(at->array->Format.Type)];
 
          at->index = i;
 	 check_vbo(actx, at->binding->BufferObj);
@@ -1672,21 +1672,21 @@
    }
 
    /* finally, vertex position */
-   if (vao->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled) {
+   if (vao->Enabled & VERT_BIT_GENERIC0) {
       /* Use glVertex(v) instead of glVertexAttrib(0, v) to be sure it's
        * issued as the last (provoking) attribute).
        */
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_GENERIC0];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      assert(aa->array->Size >= 2); /* XXX fix someday? */
-      aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)];
+      assert(aa->array->Format.Size >= 2); /* XXX fix someday? */
+      aa->offset = VertexFuncs[aa->array->Format.Size-2][TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
-   else if (vao->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
+   else if (vao->Enabled & VERT_BIT_POS) {
       aa->array = &vao->VertexAttrib[VERT_ATTRIB_POS];
       aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex];
-      aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)];
+      aa->offset = VertexFuncs[aa->array->Format.Size-2][TYPE_IDX(aa->array->Format.Type)];
       check_vbo(actx, aa->binding->BufferObj);
       aa++;
    }
diff -Nru mesa-18.3.3/src/mesa/main/arrayobj.c mesa-19.0.1/src/mesa/main/arrayobj.c
--- mesa-18.3.3/src/mesa/main/arrayobj.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/arrayobj.c	2019-03-31 23:16:37.000000000 +0000
@@ -385,23 +385,17 @@
    assert(index < ARRAY_SIZE(vao->BufferBinding));
    struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[index];
 
-   array->Size = size;
-   array->Type = type;
-   array->Format = GL_RGBA; /* only significant for GL_EXT_vertex_array_bgra */
+   _mesa_set_vertex_format(&array->Format, size, type, GL_RGBA,
+                           GL_FALSE, GL_FALSE, GL_FALSE);
    array->Stride = 0;
    array->Ptr = NULL;
    array->RelativeOffset = 0;
-   array->Enabled = GL_FALSE;
-   array->Normalized = GL_FALSE;
-   array->Integer = GL_FALSE;
-   array->Doubles = GL_FALSE;
-   array->_ElementSize = size * _mesa_sizeof_type(type);
    ASSERT_BITFIELD_SIZE(struct gl_array_attributes, BufferBindingIndex,
                         VERT_ATTRIB_MAX - 1);
    array->BufferBindingIndex = index;
 
    binding->Offset = 0;
-   binding->Stride = array->_ElementSize;
+   binding->Stride = array->Format._ElementSize;
    binding->BufferObj = NULL;
    binding->_BoundArrays = BITFIELD_BIT(index);
 
@@ -442,7 +436,7 @@
          init_array(ctx, vao, VERT_ATTRIB_COLOR_INDEX, 1, GL_FLOAT);
          break;
       case VERT_ATTRIB_EDGEFLAG:
-         init_array(ctx, vao, VERT_ATTRIB_EDGEFLAG, 1, GL_BOOL);
+         init_array(ctx, vao, VERT_ATTRIB_EDGEFLAG, 1, GL_UNSIGNED_BYTE);
          break;
       case VERT_ATTRIB_POINT_SIZE:
          init_array(ctx, vao, VERT_ATTRIB_POINT_SIZE, 1, GL_FLOAT);
@@ -478,7 +472,7 @@
    GLuint max_offset = 0;
 
    /* We work on the unmapped originaly VAO array entries. */
-   GLbitfield mask = vao->_Enabled & binding->_BoundArrays;
+   GLbitfield mask = vao->Enabled & binding->_BoundArrays;
    /* The binding should be active somehow, not to return inverted ranges */
    assert(mask);
    while (mask) {
@@ -597,7 +591,7 @@
     */
    const gl_attribute_map_mode mode = vao->_AttributeMapMode;
    /* Enabled array bits. */
-   const GLbitfield enabled = vao->_Enabled;
+   const GLbitfield enabled = vao->Enabled;
    /* VBO array bits. */
    const GLbitfield vbos = vao->VertexAttribBufferMask;
 
@@ -642,9 +636,6 @@
                attrib2->_EffBufferBindingIndex = bindex;
                attrib2->_EffRelativeOffset = attrib2->RelativeOffset;
                assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset);
-
-               /* Only enabled arrays shall appear in the unique bindings */
-               assert(attrib2->Enabled);
             }
             /* Finally this is the set of effectively bound arrays with the
              * original binding offset.
@@ -720,9 +711,6 @@
                attrib2->_EffRelativeOffset =
                   binding2->Offset + attrib2->RelativeOffset - min_offset;
                assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset);
-
-               /* Only enabled arrays shall appear in the unique bindings */
-               assert(attrib2->Enabled);
             }
             /* Finally this is the set of effectively bound arrays */
             binding->_EffOffset = min_offset;
@@ -756,14 +744,14 @@
           * grouping information in a seperate array beside
           * gl_array_attributes/gl_vertex_buffer_binding.
           */
-         assert(util_bitcount(binding->_BoundArrays & vao->_Enabled) == 1
-                || (vao->_Enabled & ~binding->_BoundArrays) == 0);
+         assert(util_bitcount(binding->_BoundArrays & vao->Enabled) == 1
+                || (vao->Enabled & ~binding->_BoundArrays) == 0);
 
          /* Start this current effective binding with the array */
          GLbitfield eff_bound_arrays = bound;
 
          const GLubyte *ptr = attrib->Ptr;
-         unsigned vertex_end = attrib->_ElementSize;
+         unsigned vertex_end = attrib->Format._ElementSize;
 
          /* Walk other user space arrays and see which are interleaved
           * using the same binding parameters.
@@ -776,8 +764,8 @@
                &vao->BufferBinding[attrib2->BufferBindingIndex];
 
             /* See the comment at the same assert above. */
-            assert(util_bitcount(binding2->_BoundArrays & vao->_Enabled) == 1
-                   || (vao->_Enabled & ~binding->_BoundArrays) == 0);
+            assert(util_bitcount(binding2->_BoundArrays & vao->Enabled) == 1
+                   || (vao->Enabled & ~binding->_BoundArrays) == 0);
 
             /* Check if we have an identical binding */
             if (binding->Stride != binding2->Stride)
@@ -785,9 +773,10 @@
             if (binding->InstanceDivisor != binding2->InstanceDivisor)
                continue;
             if (ptr <= attrib2->Ptr) {
-               if (ptr + binding->Stride < attrib2->Ptr + attrib2->_ElementSize)
+               if (ptr + binding->Stride < attrib2->Ptr +
+                   attrib2->Format._ElementSize)
                   continue;
-               unsigned end = attrib2->Ptr + attrib2->_ElementSize - ptr;
+               unsigned end = attrib2->Ptr + attrib2->Format._ElementSize - ptr;
                vertex_end = MAX2(vertex_end, end);
             } else {
                if (attrib2->Ptr + binding->Stride < ptr + vertex_end)
@@ -812,9 +801,6 @@
             attrib2->_EffBufferBindingIndex = bindex;
             attrib2->_EffRelativeOffset = attrib2->Ptr - ptr;
             assert(attrib2->_EffRelativeOffset <= binding->Stride);
-
-            /* Only enabled arrays shall appear in the unique bindings */
-            assert(attrib2->Enabled);
          }
          /* Finally this is the set of effectively bound arrays */
          binding->_EffOffset = (GLintptr)ptr;
@@ -832,8 +818,9 @@
    for (gl_vert_attrib attr = 0; attr < VERT_ATTRIB_MAX; ++attr) {
       /* Query the original api defined attrib/binding information ... */
       const unsigned char *const map =_mesa_vao_attribute_map[mode];
-      const struct gl_array_attributes *attrib = &vao->VertexAttrib[map[attr]];
-      if (attrib->Enabled) {
+      if (vao->Enabled & VERT_BIT(map[attr])) {
+         const struct gl_array_attributes *attrib =
+            &vao->VertexAttrib[map[attr]];
          const struct gl_vertex_buffer_binding *binding =
             &vao->BufferBinding[attrib->BufferBindingIndex];
          /* ... and compare that with the computed attrib/binding */
@@ -871,7 +858,7 @@
 _mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao)
 {
    /* Walk those enabled arrays that have the default vbo attached */
-   GLbitfield mask = vao->_Enabled & ~vao->VertexAttribBufferMask;
+   GLbitfield mask = vao->Enabled & ~vao->VertexAttribBufferMask;
 
    while (mask) {
       /* Do not use u_bit_scan64 as we can walk multiple
@@ -883,8 +870,6 @@
       const struct gl_vertex_buffer_binding *buffer_binding =
          &vao->BufferBinding[attrib_array->BufferBindingIndex];
 
-      /* Only enabled arrays shall appear in the _Enabled bitmask */
-      assert(attrib_array->Enabled);
       /* We have already masked out vao->VertexAttribBufferMask  */
       assert(!_mesa_is_bufferobj(buffer_binding->BufferObj));
 
@@ -905,7 +890,7 @@
 _mesa_all_buffers_are_unmapped(const struct gl_vertex_array_object *vao)
 {
    /* Walk the enabled arrays that have a vbo attached */
-   GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask;
+   GLbitfield mask = vao->Enabled & vao->VertexAttribBufferMask;
 
    while (mask) {
       const int i = ffs(mask) - 1;
@@ -914,8 +899,6 @@
       const struct gl_vertex_buffer_binding *buffer_binding =
          &vao->BufferBinding[attrib_array->BufferBindingIndex];
 
-      /* Only enabled arrays shall appear in the _Enabled bitmask */
-      assert(attrib_array->Enabled);
       /* We have already masked with vao->VertexAttribBufferMask  */
       assert(_mesa_is_bufferobj(buffer_binding->BufferObj));
 
diff -Nru mesa-18.3.3/src/mesa/main/arrayobj.h mesa-19.0.1/src/mesa/main/arrayobj.h
--- mesa-18.3.3/src/mesa/main/arrayobj.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/arrayobj.h	2019-03-31 23:16:37.000000000 +0000
@@ -111,7 +111,7 @@
 
 /**
  * Apply the position/generic0 aliasing map to a bitfield from the vao.
- * Use for example to convert gl_vertex_array_object::_Enabled
+ * Use for example to convert gl_vertex_array_object::Enabled
  * or gl_vertex_buffer_binding::_VertexBinding from the vao numbering to
  * the numbering used with vertex processing inputs.
  */
@@ -143,7 +143,7 @@
 _mesa_get_vao_vp_inputs(const struct gl_vertex_array_object *vao)
 {
    const gl_attribute_map_mode mode = vao->_AttributeMapMode;
-   return _mesa_vao_enable_to_vp_inputs(mode, vao->_Enabled);
+   return _mesa_vao_enable_to_vp_inputs(mode, vao->Enabled);
 }
 
 
diff -Nru mesa-18.3.3/src/mesa/main/attrib.c mesa-19.0.1/src/mesa/main/attrib.c
--- mesa-18.3.3/src/mesa/main/attrib.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/attrib.c	2019-03-31 23:16:37.000000000 +0000
@@ -1576,8 +1576,8 @@
       _mesa_copy_vertex_buffer_binding(ctx, &dest->BufferBinding[i], &src->BufferBinding[i]);
    }
 
-   /* _Enabled must be the same than on push */
-   dest->_Enabled = src->_Enabled;
+   /* Enabled must be the same than on push */
+   dest->Enabled = src->Enabled;
    dest->_EffEnabledVBO = src->_EffEnabledVBO;
    /* The bitmask of bound VBOs needs to match the VertexBinding array */
    dest->VertexAttribBufferMask = src->VertexAttribBufferMask;
diff -Nru mesa-18.3.3/src/mesa/main/context.h mesa-19.0.1/src/mesa/main/context.h
--- mesa-18.3.3/src/mesa/main/context.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/context.h	2019-03-31 23:16:37.000000000 +0000
@@ -337,6 +337,64 @@
 }
 
 
+static inline bool
+_mesa_has_integer_textures(const struct gl_context *ctx)
+{
+   return _mesa_has_EXT_texture_integer(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_half_float_textures(const struct gl_context *ctx)
+{
+   return _mesa_has_ARB_texture_float(ctx) ||
+          _mesa_has_OES_texture_half_float(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_float_textures(const struct gl_context *ctx)
+{
+   return _mesa_has_ARB_texture_float(ctx) ||
+          _mesa_has_OES_texture_float(ctx) || _mesa_is_gles3(ctx);
+ }
+
+static inline bool
+_mesa_has_texture_rgb10_a2ui(const struct gl_context *ctx)
+{
+   return _mesa_has_ARB_texture_rgb10_a2ui(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_float_depth_buffer(const struct gl_context *ctx)
+{
+   return _mesa_has_ARB_depth_buffer_float(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_packed_float(const struct gl_context *ctx)
+{
+   return _mesa_has_EXT_packed_float(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_rg_textures(const struct gl_context *ctx)
+{
+   return _mesa_has_ARB_texture_rg(ctx) || _mesa_has_EXT_texture_rg(ctx) ||
+          _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_texture_shared_exponent(const struct gl_context *ctx)
+{
+   return _mesa_has_EXT_texture_shared_exponent(ctx) || _mesa_is_gles3(ctx);
+}
+
+static inline bool
+_mesa_has_texture_type_2_10_10_10_REV(const struct gl_context *ctx)
+{
+   return _mesa_is_desktop_gl(ctx) ||
+          _mesa_has_EXT_texture_type_2_10_10_10_REV(ctx);
+}
+
 /**
  * Checks if the context supports geometry shaders.
  */
@@ -361,7 +419,7 @@
 /**
  * Checks if the context supports tessellation.
  */
-static inline GLboolean
+static inline bool
 _mesa_has_tessellation(const struct gl_context *ctx)
 {
    /* _mesa_has_EXT_tessellation_shader(ctx) is redundant with the OES
diff -Nru mesa-18.3.3/src/mesa/main/dd.h mesa-19.0.1/src/mesa/main/dd.h
--- mesa-18.3.3/src/mesa/main/dd.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/dd.h	2019-03-31 23:16:37.000000000 +0000
@@ -784,9 +784,8 @@
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                            GLbitfield mask, GLenum filter);
-   void (*DiscardFramebuffer)(struct gl_context *ctx,
-                              GLenum target, GLsizei numAttachments,
-                              const GLenum *attachments);
+   void (*DiscardFramebuffer)(struct gl_context *ctx, struct gl_framebuffer *fb,
+                              struct gl_renderbuffer_attachment *att);
 
    /**
     * \name Functions for GL_ARB_sample_locations
diff -Nru mesa-18.3.3/src/mesa/main/dlist.c mesa-19.0.1/src/mesa/main/dlist.c
--- mesa-18.3.3/src/mesa/main/dlist.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/dlist.c	2019-03-31 23:16:37.000000000 +0000
@@ -962,6 +962,8 @@
    dlist->Name = name;
    dlist->Head = malloc(sizeof(Node) * count);
    dlist->Head[0].opcode = OPCODE_END_OF_LIST;
+   /* All InstSize[] entries must be non-zero */
+   InstSize[OPCODE_END_OF_LIST] = 1;
    return dlist;
 }
 
diff -Nru mesa-18.3.3/src/mesa/main/draw.c mesa-19.0.1/src/mesa/main/draw.c
--- mesa-18.3.3/src/mesa/main/draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -67,7 +67,7 @@
                  GLuint attrib, GLuint j)
 {
    const struct gl_array_attributes *array = &vao->VertexAttrib[attrib];
-   if (array->Enabled) {
+   if (vao->Enabled & VERT_BIT(attrib)) {
       const struct gl_vertex_buffer_binding *binding =
          &vao->BufferBinding[array->BufferBindingIndex];
       struct gl_buffer_object *bo = binding->BufferObj;
@@ -82,18 +82,19 @@
          data = ADD_POINTERS(_mesa_vertex_attrib_address(array, binding),
                              bo->Mappings[MAP_INTERNAL].Pointer);
       }
-      switch (array->Type) {
+      switch (array->Format.Type) {
       case GL_FLOAT:
          {
             GLfloat *f = (GLfloat *) ((GLubyte *) data + binding->Stride * j);
             GLint k;
-            for (k = 0; k < array->Size; k++) {
+            for (k = 0; k < array->Format.Size; k++) {
                if (IS_INF_OR_NAN(f[k]) || f[k] >= 1.0e20F || f[k] <= -1.0e10F) {
                   printf("Bad array data:\n");
                   printf("  Element[%u].%u = %f\n", j, k, f[k]);
                   printf("  Array %u at %p\n", attrib, (void *) array);
                   printf("  Type 0x%x, Size %d, Stride %d\n",
-                         array->Type, array->Size, binding->Stride);
+                         array->Format.Type, array->Format.Size,
+                         binding->Stride);
                   printf("  Address/offset %p in Buffer Object %u\n",
                          array->Ptr, bo->Name);
                   f[k] = 1.0F;  /* XXX replace the bad value! */
@@ -117,7 +118,7 @@
                    GLuint attrib)
 {
    const struct gl_array_attributes *array = &vao->VertexAttrib[attrib];
-   if (array->Enabled) {
+   if (vao->Enabled & VERT_BIT(attrib)) {
       const struct gl_vertex_buffer_binding *binding =
          &vao->BufferBinding[array->BufferBindingIndex];
       struct gl_buffer_object *bo = binding->BufferObj;
@@ -225,7 +226,7 @@
    case API_OPENGLES:
       /* For OpenGL ES, only draw if we have vertex positions
        */
-      if (!ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled)
+      if (!(ctx->Array.VAO->Enabled & VERT_BIT_POS))
          return true;
       break;
 
@@ -252,8 +253,7 @@
          /* Draw if we have vertex positions (GL_VERTEX_ARRAY or generic
           * array [0]).
           */
-         return (!ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled &&
-                 !ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled);
+         return !(ctx->Array.VAO->Enabled & (VERT_BIT_POS|VERT_BIT_GENERIC0));
       }
       break;
 
@@ -274,23 +274,22 @@
 {
    const struct gl_vertex_array_object *vao = ctx->Array.VAO;
 
-   printf("_mesa_exec_DrawArrays(mode 0x%x, start %d, count %d):\n",
+   printf("_mesa_DrawArrays(mode 0x%x, start %d, count %d):\n",
           mode, start, count);
 
-   unsigned i;
-   for (i = 0; i < VERT_ATTRIB_MAX; ++i) {
+   GLbitfield mask = vao->Enabled;
+   while (mask) {
+      const gl_vert_attrib i = u_bit_scan(&mask);
       const struct gl_array_attributes *array = &vao->VertexAttrib[i];
-      if (!array->Enabled)
-         continue;
 
       const struct gl_vertex_buffer_binding *binding =
          &vao->BufferBinding[array->BufferBindingIndex];
       struct gl_buffer_object *bufObj = binding->BufferObj;
 
-      printf("attr %s: size %d stride %d  enabled %d  "
+      printf("attr %s: size %d stride %d  "
              "ptr %p  Bufobj %u\n",
              gl_vert_attrib_name((gl_vert_attrib) i),
-             array->Size, binding->Stride, array->Enabled,
+             array->Format.Size, binding->Stride,
              array->Ptr, bufObj->Name);
 
       if (_mesa_is_bufferobj(bufObj)) {
@@ -301,7 +300,7 @@
             _mesa_vertex_attrib_address(array, binding);
 
          unsigned multiplier;
-         switch (array->Type) {
+         switch (array->Format.Type) {
          case GL_DOUBLE:
          case GL_INT64_ARB:
          case GL_UNSIGNED_INT64_ARB:
@@ -315,7 +314,7 @@
          int *k = (int *) f;
          int i = 0;
          int n = (count - 1) * (binding->Stride / (4 * multiplier))
-           + array->Size;
+            + array->Format.Size;
          if (n > 32)
             n = 32;
          printf("  Data at offset %d:\n", offset);
@@ -538,8 +537,8 @@
 /**
  * Called from glDrawArrays when in immediate mode (not display list mode).
  */
-static void GLAPIENTRY
-_mesa_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
+void GLAPIENTRY
+_mesa_DrawArrays(GLenum mode, GLint start, GLsizei count)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -573,9 +572,9 @@
  * Called from glDrawArraysInstanced when in immediate mode (not
  * display list mode).
  */
-static void GLAPIENTRY
-_mesa_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
-                               GLsizei numInstances)
+void GLAPIENTRY
+_mesa_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
+                          GLsizei numInstances)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -859,10 +858,10 @@
 /**
  * Called by glDrawRangeElementsBaseVertex() in immediate mode.
  */
-static void GLAPIENTRY
-_mesa_exec_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
-                                       GLsizei count, GLenum type,
-                                       const GLvoid * indices, GLint basevertex)
+void GLAPIENTRY
+_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
+                                  GLsizei count, GLenum type,
+                                  const GLvoid * indices, GLint basevertex)
 {
    static GLuint warnCount = 0;
    GLboolean index_bounds_valid = GL_TRUE;
@@ -958,9 +957,9 @@
 /**
  * Called by glDrawRangeElements() in immediate mode.
  */
-static void GLAPIENTRY
-_mesa_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
-                             GLsizei count, GLenum type, const GLvoid * indices)
+void GLAPIENTRY
+_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
+                        GLsizei count, GLenum type, const GLvoid * indices)
 {
    if (MESA_VERBOSE & VERBOSE_DRAW) {
       GET_CURRENT_CONTEXT(ctx);
@@ -970,17 +969,17 @@
                   _mesa_enum_to_string(type), indices);
    }
 
-   _mesa_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
-                                          indices, 0);
+   _mesa_DrawRangeElementsBaseVertex(mode, start, end, count, type,
+                                     indices, 0);
 }
 
 
 /**
  * Called by glDrawElements() in immediate mode.
  */
-static void GLAPIENTRY
-_mesa_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
-                        const GLvoid * indices)
+void GLAPIENTRY
+_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
+                   const GLvoid * indices)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -1009,9 +1008,9 @@
 /**
  * Called by glDrawElementsBaseVertex() in immediate mode.
  */
-static void GLAPIENTRY
-_mesa_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
-                                  const GLvoid * indices, GLint basevertex)
+void GLAPIENTRY
+_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+                             const GLvoid * indices, GLint basevertex)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -1321,10 +1320,9 @@
 }
 
 
-static void GLAPIENTRY
-_mesa_exec_MultiDrawElements(GLenum mode,
-                             const GLsizei *count, GLenum type,
-                             const GLvoid * const *indices, GLsizei primcount)
+void GLAPIENTRY
+_mesa_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type,
+                        const GLvoid * const *indices, GLsizei primcount)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -1344,12 +1342,12 @@
 }
 
 
-static void GLAPIENTRY
-_mesa_exec_MultiDrawElementsBaseVertex(GLenum mode,
-                                       const GLsizei *count, GLenum type,
-                                       const GLvoid * const *indices,
-                                       GLsizei primcount,
-                                       const GLsizei *basevertex)
+void GLAPIENTRY
+_mesa_MultiDrawElementsBaseVertex(GLenum mode,
+                                  const GLsizei *count, GLenum type,
+                                  const GLvoid * const *indices,
+                                  GLsizei primcount,
+                                  const GLsizei *basevertex)
 {
    GET_CURRENT_CONTEXT(ctx);
 
@@ -1444,8 +1442,8 @@
  * glVertexPointer, glColorPointer, etc.
  * Part of GL_ARB_transform_feedback2.
  */
-static void GLAPIENTRY
-_mesa_exec_DrawTransformFeedback(GLenum mode, GLuint name)
+void GLAPIENTRY
+_mesa_DrawTransformFeedback(GLenum mode, GLuint name)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct gl_transform_feedback_object *obj =
@@ -1997,15 +1995,15 @@
 _mesa_initialize_exec_dispatch(const struct gl_context *ctx,
                                struct _glapi_table *exec)
 {
-   SET_DrawArrays(exec, _mesa_exec_DrawArrays);
-   SET_DrawElements(exec, _mesa_exec_DrawElements);
+   SET_DrawArrays(exec, _mesa_DrawArrays);
+   SET_DrawElements(exec, _mesa_DrawElements);
 
    if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
-      SET_DrawRangeElements(exec, _mesa_exec_DrawRangeElements);
+      SET_DrawRangeElements(exec, _mesa_DrawRangeElements);
    }
 
    SET_MultiDrawArrays(exec, _mesa_exec_MultiDrawArrays);
-   SET_MultiDrawElementsEXT(exec, _mesa_exec_MultiDrawElements);
+   SET_MultiDrawElementsEXT(exec, _mesa_MultiDrawElements);
 
    if (ctx->API == API_OPENGL_COMPAT) {
       SET_Rectf(exec, _mesa_exec_Rectf);
@@ -2015,13 +2013,13 @@
 
    if (ctx->API != API_OPENGLES &&
        ctx->Extensions.ARB_draw_elements_base_vertex) {
-      SET_DrawElementsBaseVertex(exec, _mesa_exec_DrawElementsBaseVertex);
+      SET_DrawElementsBaseVertex(exec, _mesa_DrawElementsBaseVertex);
       SET_MultiDrawElementsBaseVertex(exec,
-                                      _mesa_exec_MultiDrawElementsBaseVertex);
+                                      _mesa_MultiDrawElementsBaseVertex);
 
       if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
          SET_DrawRangeElementsBaseVertex(exec,
-                                         _mesa_exec_DrawRangeElementsBaseVertex);
+                                         _mesa_DrawRangeElementsBaseVertex);
          SET_DrawElementsInstancedBaseVertex(exec,
                                              _mesa_exec_DrawElementsInstancedBaseVertex);
       }
@@ -2042,12 +2040,12 @@
    }
 
    if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
-      SET_DrawArraysInstancedARB(exec, _mesa_exec_DrawArraysInstanced);
+      SET_DrawArraysInstancedARB(exec, _mesa_DrawArraysInstanced);
       SET_DrawElementsInstancedARB(exec, _mesa_exec_DrawElementsInstanced);
    }
 
    if (_mesa_is_desktop_gl(ctx)) {
-      SET_DrawTransformFeedback(exec, _mesa_exec_DrawTransformFeedback);
+      SET_DrawTransformFeedback(exec, _mesa_DrawTransformFeedback);
       SET_DrawTransformFeedbackStream(exec,
                                       _mesa_exec_DrawTransformFeedbackStream);
       SET_DrawTransformFeedbackInstanced(exec,
@@ -2065,87 +2063,6 @@
 
 
 
-/**
- * The following functions are only used for OpenGL ES 1/2 support.
- * And some aren't even supported (yet) in ES 1/2.
- */
-
-
-void GLAPIENTRY
-_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count)
-{
-   _mesa_exec_DrawArrays(mode, first, count);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawArraysInstanced(GLenum mode, GLint first, GLsizei count,
-                          GLsizei primcount)
-{
-   _mesa_exec_DrawArraysInstanced(mode, first, count, primcount);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
-                   const GLvoid *indices)
-{
-   _mesa_exec_DrawElements(mode, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
-                             const GLvoid *indices, GLint basevertex)
-{
-   _mesa_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count,
-                        GLenum type, const GLvoid * indices)
-{
-   _mesa_exec_DrawRangeElements(mode, start, end, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
-                                  GLsizei count, GLenum type,
-                                  const GLvoid *indices, GLint basevertex)
-{
-   _mesa_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
-                                          indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type,
-                           const GLvoid ** indices, GLsizei primcount)
-{
-   _mesa_exec_MultiDrawElements(mode, count, type, indices, primcount);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsBaseVertex(GLenum mode,
-                                  const GLsizei *count, GLenum type,
-                                  const GLvoid **indices, GLsizei primcount,
-                                  const GLint *basevertex)
-{
-   _mesa_exec_MultiDrawElementsBaseVertex(mode, count, type, indices,
-                                          primcount, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawTransformFeedback(GLenum mode, GLuint name)
-{
-   _mesa_exec_DrawTransformFeedback(mode, name);
-}
-
-
 /* GL_IBM_multimode_draw_arrays */
 void GLAPIENTRY
 _mesa_MultiModeDrawArraysIBM( const GLenum * mode, const GLint * first,
diff -Nru mesa-18.3.3/src/mesa/main/draw.h mesa-19.0.1/src/mesa/main/draw.h
--- mesa-18.3.3/src/mesa/main/draw.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/draw.h	2019-03-31 23:16:37.000000000 +0000
@@ -129,14 +129,14 @@
 
 
 void GLAPIENTRY
-_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type,
-                           const GLvoid **indices, GLsizei primcount);
+_mesa_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type,
+                        const GLvoid *const *indices, GLsizei primcount);
 
 
 void GLAPIENTRY
 _mesa_MultiDrawElementsBaseVertex(GLenum mode,
                                   const GLsizei *count, GLenum type,
-                                  const GLvoid **indices, GLsizei primcount,
+                                  const GLvoid * const * indices, GLsizei primcount,
                                   const GLint *basevertex);
 
 
diff -Nru mesa-18.3.3/src/mesa/main/draw_validate.c mesa-19.0.1/src/mesa/main/draw_validate.c
--- mesa-18.3.3/src/mesa/main/draw_validate.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/draw_validate.c	2019-03-31 23:16:37.000000000 +0000
@@ -1100,7 +1100,7 @@
     * buffer bound.
     */
    if (_mesa_is_gles31(ctx) &&
-       ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
+       ctx->Array.VAO->Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
       return GL_FALSE;
    }
diff -Nru mesa-18.3.3/src/mesa/main/enable.c mesa-19.0.1/src/mesa/main/enable.c
--- mesa-18.3.3/src/mesa/main/enable.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/enable.c	2019-03-31 23:16:37.000000000 +0000
@@ -1125,8 +1125,6 @@
 
       /* GL3.0 - GL_framebuffer_sRGB */
       case GL_FRAMEBUFFER_SRGB_EXT:
-         if (!_mesa_is_desktop_gl(ctx))
-            goto invalid_enum_error;
          CHECK_EXTENSION(EXT_framebuffer_sRGB, cap);
          _mesa_set_framebuffer_srgb(ctx, state);
          return;
@@ -1582,41 +1580,40 @@
       case GL_VERTEX_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_POS);
       case GL_NORMAL_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_NORMAL);
       case GL_COLOR_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR0);
       case GL_INDEX_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT)
             goto invalid_enum_error;
-         return ctx->Array.VAO->
-            VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR_INDEX);
       case GL_TEXTURE_COORD_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
             goto invalid_enum_error;
-         return ctx->Array.VAO->
-            VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)].Enabled;
+         return !!(ctx->Array.VAO->Enabled &
+                   VERT_BIT_TEX(ctx->Array.ActiveTexture));
       case GL_EDGE_FLAG_ARRAY:
          if (ctx->API != API_OPENGL_COMPAT)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_EDGEFLAG);
       case GL_FOG_COORDINATE_ARRAY_EXT:
          if (ctx->API != API_OPENGL_COMPAT)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_FOG].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_FOG);
       case GL_SECONDARY_COLOR_ARRAY_EXT:
          if (ctx->API != API_OPENGL_COMPAT)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR1);
       case GL_POINT_SIZE_ARRAY_OES:
          if (ctx->API != API_OPENGLES)
             goto invalid_enum_error;
-         return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled;
+         return !!(ctx->Array.VAO->Enabled & VERT_BIT_POINT_SIZE);
 
       /* GL_ARB_texture_cube_map */
       case GL_TEXTURE_CUBE_MAP:
@@ -1765,8 +1762,6 @@
 
       /* GL3.0 - GL_framebuffer_sRGB */
       case GL_FRAMEBUFFER_SRGB_EXT:
-         if (!_mesa_is_desktop_gl(ctx))
-            goto invalid_enum_error;
          CHECK_EXTENSION(EXT_framebuffer_sRGB);
          return ctx->Color.sRGBEnabled;
 
diff -Nru mesa-18.3.3/src/mesa/main/errors.c mesa-19.0.1/src/mesa/main/errors.c
--- mesa-18.3.3/src/mesa/main/errors.c	2018-01-06 23:02:18.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/errors.c	2019-03-31 23:16:37.000000000 +0000
@@ -231,6 +231,9 @@
    _mesa_debug_get_id(id);
 
    len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
+   if (len >= MAX_DEBUG_MESSAGE_LENGTH)
+      /* message was truncated */
+      len = MAX_DEBUG_MESSAGE_LENGTH - 1;
 
    _mesa_log_msg(ctx, source, type, *id, severity, len, s);
 }
diff -Nru mesa-18.3.3/src/mesa/main/extensions_table.h mesa-19.0.1/src/mesa/main/extensions_table.h
--- mesa-18.3.3/src/mesa/main/extensions_table.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/extensions_table.h	2019-03-31 23:16:37.000000000 +0000
@@ -20,6 +20,7 @@
 EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GLL, GLC,  x ,  x , 2009)
 EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GLL, GLC,  x ,  x , 2009)
 EXT(AMD_shader_trinary_minmax               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(AMD_texture_texture4                    , ARB_texture_gather                     , GLL, GLC,  x ,  x , 2008)
 EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLL, GLC,  x ,  x , 2012)
 EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLL, GLC,  x ,  x , 2012)
 
@@ -241,7 +242,9 @@
 EXT(EXT_memory_object                       , EXT_memory_object                      , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_memory_object_fd                    , EXT_memory_object_fd                   , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL,  x , ES1, ES2, 1999)
-EXT(EXT_occlusion_query_boolean             , ARB_occlusion_query                    ,  x ,  x ,  x , ES2, 2001)
+EXT(EXT_multisampled_render_to_texture      , EXT_multisampled_render_to_texture     ,  x ,  x ,  x , ES2, 2016)
+EXT(EXT_multisampled_render_to_texture2     , EXT_multisampled_render_to_texture     ,  x ,  x ,  x , ES2, 2016)
+EXT(EXT_occlusion_query_boolean             , ARB_occlusion_query2                   ,  x ,  x ,  x , ES2, 2011)
 EXT(EXT_packed_depth_stencil                , dummy_true                             , GLL, GLC,  x ,  x , 2005)
 EXT(EXT_packed_float                        , EXT_packed_float                       , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_packed_pixels                       , dummy_true                             , GLL,  x ,  x ,  x , 1997)
@@ -254,6 +257,7 @@
 EXT(EXT_render_snorm                        , EXT_render_snorm                       ,  x ,  x ,  x,   31, 2014)
 EXT(EXT_rescale_normal                      , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_robustness                          , KHR_robustness                         ,  x,   x,   x , ES2, 2011)
+EXT(EXT_sRGB_write_control                  , EXT_framebuffer_sRGB                   ,   x,  x ,  x ,  30, 2013)
 EXT(EXT_secondary_color                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
 EXT(EXT_semaphore                           , EXT_semaphore                          , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_semaphore_fd                        , EXT_semaphore_fd                       , GLL, GLC,  x , ES2, 2017)
@@ -261,6 +265,7 @@
 EXT(EXT_separate_specular_color             , dummy_true                             , GLL,  x ,  x ,  x , 1997)
 EXT(EXT_shader_framebuffer_fetch            , EXT_shader_framebuffer_fetch           , GLL, GLC,  x , ES2, 2013)
 EXT(EXT_shader_framebuffer_fetch_non_coherent, EXT_shader_framebuffer_fetch_non_coherent, GLL, GLC,  x, ES2, 2018)
+EXT(EXT_shader_implicit_conversions         , dummy_true                             ,  x ,  x ,  x ,  31, 2013)
 EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GLL, GLC,  x ,  30, 2013)
 EXT(EXT_shader_io_blocks                    , dummy_true                             ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_shader_samples_identical            , EXT_shader_samples_identical           , GLL, GLC,  x ,  31, 2015)
@@ -275,9 +280,10 @@
 EXT(EXT_texture_array                       , EXT_texture_array                      , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_border_clamp                , ARB_texture_border_clamp               ,  x ,  x ,  x , ES2, 2014)
 EXT(EXT_texture_buffer                      , OES_texture_buffer                     ,  x ,  x ,  x ,  31, 2014)
+EXT(EXT_texture_compression_bptc            , ARB_texture_compression_bptc           ,  x ,  x ,  x ,  30, 2017)
 EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2004)
 EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL,  x ,  x ,  x , 2006)
-EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GLL, GLC,  x ,  30, 2004)
 EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GLL, GLC,  x , ES2, 2000)
 EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_texture_cube_map_array              , OES_texture_cube_map_array             ,  x ,  x ,  x ,  31, 2014)
@@ -295,11 +301,13 @@
 EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2004)
 EXT(EXT_texture_rg                          , ARB_texture_rg                         ,  x ,  x ,  x , ES2, 2011)
 EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_sRGB_R8                     , EXT_texture_sRGB_R8                    ,  x ,  x ,  x ,  30, 2015)
 EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GLL, GLC,  x ,  30, 2006)
 EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GLL, GLC,  x ,  x , 2004)
 EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
 EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GLL, GLC,  x ,  x , 2008)
 EXT(EXT_texture_type_2_10_10_10_REV         , EXT_texture_type_2_10_10_10_REV        ,  x ,  x ,  x , ES2, 2008)
+EXT(EXT_texture_view                        , OES_texture_view                       ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_timer_query                         , EXT_timer_query                        , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GLL, GLC,  x ,  x , 2011)
 EXT(EXT_unpack_subimage                     , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
diff -Nru mesa-18.3.3/src/mesa/main/fbobject.c mesa-19.0.1/src/mesa/main/fbobject.c
--- mesa-18.3.3/src/mesa/main/fbobject.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/fbobject.c	2019-03-31 23:16:37.000000000 +0000
@@ -497,8 +497,8 @@
                        struct gl_framebuffer *fb,
                        struct gl_renderbuffer_attachment *att,
                        struct gl_texture_object *texObj,
-                       GLenum texTarget, GLuint level, GLuint layer,
-                       GLboolean layered)
+                       GLenum texTarget, GLuint level, GLsizei samples,
+                       GLuint layer, GLboolean layered)
 {
    struct gl_renderbuffer *rb = att->Renderbuffer;
 
@@ -520,6 +520,7 @@
 
    /* always update these fields */
    att->TextureLevel = level;
+   att->NumSamples = samples;
    att->CubeMapFace = _mesa_tex_target_to_face(texTarget);
    att->Zoffset = layer;
    att->Layered = layered;
@@ -750,6 +751,7 @@
    case GL_SRGB8:
    case GL_RGB10:
    case GL_RGB9_E5:
+   case GL_SR8_EXT:
       return GL_FALSE;
    default:
       break;
@@ -1001,6 +1003,7 @@
    fb->_HasSNormOrFloatColorBuffer = GL_FALSE;
    fb->_HasAttachments = true;
    fb->_IntegerBuffers = 0;
+   fb->_RGBBuffers = 0;
 
    /* Start at -2 to more easily loop over all attachment points.
     *  -2: depth buffer
@@ -1084,8 +1087,11 @@
             return;
          }
 
-         attNumSamples = texImg->NumSamples;
-         attNumStorageSamples = texImg->NumSamples;
+         if (att->NumSamples > 0)
+            attNumSamples = att->NumSamples;
+         else
+            attNumSamples = texImg->NumSamples;
+         attNumStorageSamples = attNumSamples;
       }
       else if (att->Type == GL_RENDERBUFFER_EXT) {
          minWidth = MIN2(minWidth, att->Renderbuffer->Width);
@@ -1144,6 +1150,9 @@
          if (_mesa_is_format_integer_color(attFormat))
             fb->_IntegerBuffers |= (1 << i);
 
+         if (f == GL_RGB)
+            fb->_RGBBuffers |= (1 << i);
+
          fb->_AllColorBuffersFixedPoint =
             fb->_AllColorBuffersFixedPoint &&
             (type == GL_UNSIGNED_NORMALIZED || type == GL_SIGNED_NORMALIZED);
@@ -3497,7 +3506,8 @@
                           GLenum attachment,
                           struct gl_renderbuffer_attachment *att,
                           struct gl_texture_object *texObj, GLenum textarget,
-                          GLint level, GLuint layer, GLboolean layered)
+                          GLint level, GLsizei samples,
+                          GLuint layer, GLboolean layered)
 {
    FLUSH_VERTICES(ctx, _NEW_BUFFERS);
 
@@ -3508,6 +3518,7 @@
           level == fb->Attachment[BUFFER_STENCIL].TextureLevel &&
           _mesa_tex_target_to_face(textarget) ==
           fb->Attachment[BUFFER_STENCIL].CubeMapFace &&
+          samples == fb->Attachment[BUFFER_STENCIL].NumSamples &&
           layer == fb->Attachment[BUFFER_STENCIL].Zoffset) {
          /* The texture object is already attached to the stencil attachment
           * point. Don't create a new renderbuffer; just reuse the stencil
@@ -3521,13 +3532,14 @@
                  level == fb->Attachment[BUFFER_DEPTH].TextureLevel &&
                  _mesa_tex_target_to_face(textarget) ==
                  fb->Attachment[BUFFER_DEPTH].CubeMapFace &&
+                 samples == fb->Attachment[BUFFER_DEPTH].NumSamples &&
                  layer == fb->Attachment[BUFFER_DEPTH].Zoffset) {
          /* As above, but with depth and stencil transposed. */
          reuse_framebuffer_texture_attachment(fb, BUFFER_STENCIL,
                                               BUFFER_DEPTH);
       } else {
          set_texture_attachment(ctx, fb, att, texObj, textarget,
-                                level, layer, layered);
+                                level, samples, layer, layered);
 
          if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) {
             /* Above we created a new renderbuffer and attached it to the
@@ -3582,15 +3594,15 @@
       get_attachment(ctx, fb, attachment, NULL);
 
    _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
-                             level, layer, GL_FALSE);
+                             level, 0, layer, GL_FALSE);
 }
 
 
 static void
 framebuffer_texture_with_dims(int dims, GLenum target,
                               GLenum attachment, GLenum textarget,
-                              GLuint texture, GLint level, GLint layer,
-                              const char *caller)
+                              GLuint texture, GLint level, GLsizei samples,
+                              GLint layer, const char *caller)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct gl_framebuffer *fb;
@@ -3625,7 +3637,7 @@
       return;
 
    _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
-                             level, layer, GL_FALSE);
+                             level, samples, layer, GL_FALSE);
 }
 
 
@@ -3644,7 +3656,7 @@
                            GLenum textarget, GLuint texture, GLint level)
 {
    framebuffer_texture_with_dims(1, target, attachment, textarget, texture,
-                                 level, 0, "glFramebufferTexture1D");
+                                 level, 0, 0, "glFramebufferTexture1D");
 }
 
 
@@ -3663,7 +3675,17 @@
                            GLenum textarget, GLuint texture, GLint level)
 {
    framebuffer_texture_with_dims(2, target, attachment, textarget, texture,
-                                 level, 0, "glFramebufferTexture2D");
+                                 level, 0, 0, "glFramebufferTexture2D");
+}
+
+
+void GLAPIENTRY
+_mesa_FramebufferTexture2DMultisampleEXT(GLenum target, GLenum attachment,
+                                         GLenum textarget, GLuint texture,
+                                         GLint level, GLsizei samples)
+{
+   framebuffer_texture_with_dims(2, target, attachment, textarget, texture,
+                                 level, samples, 0, "glFramebufferTexture2DMultisampleEXT");
 }
 
 
@@ -3683,7 +3705,7 @@
                            GLint level, GLint layer)
 {
    framebuffer_texture_with_dims(3, target, attachment, textarget, texture,
-                                 level, layer, "glFramebufferTexture3D");
+                                 level, 0, layer, "glFramebufferTexture3D");
 }
 
 
@@ -3773,7 +3795,7 @@
    }
 
    _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget,
-                             level, layer, layered);
+                             level, 0, layer, layered);
 }
 
 void GLAPIENTRY
@@ -4252,7 +4274,7 @@
          }
       }
       else {
-         if (ctx->Extensions.EXT_framebuffer_sRGB) {
+         if (ctx->Extensions.EXT_sRGB) {
             *params =
                _mesa_get_format_color_encoding(att->Renderbuffer->Format);
          }
@@ -4356,6 +4378,18 @@
          goto invalid_pname_enum;
       }
       return;
+   case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT:
+      if (!ctx->Extensions.EXT_multisampled_render_to_texture) {
+         goto invalid_pname_enum;
+      } else if (att->Type == GL_TEXTURE) {
+         *params = att->NumSamples;
+      } else if (att->Type == GL_NONE) {
+         _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
+                     _mesa_enum_to_string(pname));
+      } else {
+         goto invalid_pname_enum;
+      }
+      return;
    default:
       goto invalid_pname_enum;
    }
@@ -4607,6 +4641,86 @@
    return;
 }
 
+static struct gl_renderbuffer_attachment *
+get_fb_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
+                  const GLenum attachment)
+{
+   switch (attachment) {
+   case GL_COLOR:
+      return &fb->Attachment[BUFFER_BACK_LEFT];
+   case GL_COLOR_ATTACHMENT0:
+   case GL_COLOR_ATTACHMENT1:
+   case GL_COLOR_ATTACHMENT2:
+   case GL_COLOR_ATTACHMENT3:
+   case GL_COLOR_ATTACHMENT4:
+   case GL_COLOR_ATTACHMENT5:
+   case GL_COLOR_ATTACHMENT6:
+   case GL_COLOR_ATTACHMENT7:
+   case GL_COLOR_ATTACHMENT8:
+   case GL_COLOR_ATTACHMENT9:
+   case GL_COLOR_ATTACHMENT10:
+   case GL_COLOR_ATTACHMENT11:
+   case GL_COLOR_ATTACHMENT12:
+   case GL_COLOR_ATTACHMENT13:
+   case GL_COLOR_ATTACHMENT14:
+   case GL_COLOR_ATTACHMENT15: {
+      const unsigned i = attachment - GL_COLOR_ATTACHMENT0;
+      if (i >= ctx->Const.MaxColorAttachments)
+         return NULL;
+      return &fb->Attachment[BUFFER_COLOR0 + i];
+   }
+   case GL_DEPTH:
+   case GL_DEPTH_ATTACHMENT:
+   case GL_DEPTH_STENCIL_ATTACHMENT:
+      return &fb->Attachment[BUFFER_DEPTH];
+   case GL_STENCIL:
+   case GL_STENCIL_ATTACHMENT:
+      return &fb->Attachment[BUFFER_STENCIL];
+   default:
+      return NULL;
+   }
+}
+
+static void
+discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+                    GLsizei numAttachments, const GLenum *attachments)
+{
+   if (!ctx->Driver.DiscardFramebuffer)
+      return;
+
+   for (int i = 0; i < numAttachments; i++) {
+      struct gl_renderbuffer_attachment *att =
+            get_fb_attachment(ctx, fb, attachments[i]);
+
+      if (!att)
+         continue;
+
+      /* If we're asked to invalidate just depth or just stencil, but the
+       * attachment is packed depth/stencil, then we can only use
+       * Driver.DiscardFramebuffer if the attachments list includes both depth
+       * and stencil and they both point at the same renderbuffer.
+       */
+      if ((attachments[i] == GL_DEPTH_ATTACHMENT ||
+           attachments[i] == GL_STENCIL_ATTACHMENT) &&
+          (!att->Renderbuffer ||
+           att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL)) {
+         GLenum other_format = (attachments[i] == GL_DEPTH_ATTACHMENT ?
+                                GL_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT);
+         bool has_both = false;
+         for (int j = 0; j < numAttachments; j++) {
+            if (attachments[j] == other_format)
+               has_both = true;
+            break;
+         }
+
+         if (fb->Attachment[BUFFER_DEPTH].Renderbuffer !=
+             fb->Attachment[BUFFER_STENCIL].Renderbuffer || !has_both)
+            continue;
+      }
+
+      ctx->Driver.DiscardFramebuffer(ctx, fb, att);
+   }
+}
 
 void GLAPIENTRY
 _mesa_InvalidateSubFramebuffer_no_error(GLenum target, GLsizei numAttachments,
@@ -4667,12 +4781,18 @@
                                   "glInvalidateNamedFramebufferSubData");
 }
 
-
 void GLAPIENTRY
 _mesa_InvalidateFramebuffer_no_error(GLenum target, GLsizei numAttachments,
                                      const GLenum *attachments)
 {
-   /* no-op */
+   struct gl_framebuffer *fb;
+   GET_CURRENT_CONTEXT(ctx);
+
+   fb = get_framebuffer_target(ctx, target);
+   if (!fb)
+      return;
+
+   discard_framebuffer(ctx, fb, numAttachments, attachments);
 }
 
 
@@ -4708,6 +4828,8 @@
                                   ctx->Const.MaxViewportWidth,
                                   ctx->Const.MaxViewportHeight,
                                   "glInvalidateFramebuffer");
+
+   discard_framebuffer(ctx, fb, numAttachments, attachments);
 }
 
 
@@ -4794,8 +4916,7 @@
       }
    }
 
-   if (ctx->Driver.DiscardFramebuffer)
-      ctx->Driver.DiscardFramebuffer(ctx, target, numAttachments, attachments);
+   discard_framebuffer(ctx, fb, numAttachments, attachments);
 
    return;
 
diff -Nru mesa-18.3.3/src/mesa/main/fbobject.h mesa-19.0.1/src/mesa/main/fbobject.h
--- mesa-18.3.3/src/mesa/main/fbobject.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/fbobject.h	2019-03-31 23:16:37.000000000 +0000
@@ -129,7 +129,8 @@
                           GLenum attachment,
                           struct gl_renderbuffer_attachment *att,
                           struct gl_texture_object *texObj, GLenum textarget,
-                          GLint level, GLuint layer, GLboolean layered);
+                          GLint level, GLsizei samples,
+                          GLuint layer, GLboolean layered);
 
 extern GLenum
 _mesa_check_framebuffer_status(struct gl_context *ctx,
@@ -250,6 +251,11 @@
 _mesa_FramebufferTexture2D(GLenum target, GLenum attachment,
                               GLenum textarget, GLuint texture, GLint level);
 
+void GLAPIENTRY
+_mesa_FramebufferTexture2DMultisampleEXT(GLenum target, GLenum attachment,
+                                         GLenum textarget, GLuint texture,
+                                         GLint level, GLsizei samples);
+
 extern void GLAPIENTRY
 _mesa_FramebufferTexture3D_no_error(GLenum target, GLenum attachment,
                                     GLenum textarget, GLuint texture,
diff -Nru mesa-18.3.3/src/mesa/main/formatquery.c mesa-19.0.1/src/mesa/main/formatquery.c
--- mesa-18.3.3/src/mesa/main/formatquery.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/formatquery.c	2019-03-31 23:16:37.000000000 +0000
@@ -1241,7 +1241,7 @@
       break;
 
    case GL_SRGB_WRITE:
-      if (!_mesa_has_EXT_framebuffer_sRGB(ctx) ||
+      if (!ctx->Extensions.EXT_sRGB ||
           !_mesa_is_color_format(internalformat)) {
          goto end;
       }
diff -Nru mesa-18.3.3/src/mesa/main/formats.c mesa-19.0.1/src/mesa/main/formats.c
--- mesa-18.3.3/src/mesa/main/formats.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/formats.c	2019-03-31 23:16:37.000000000 +0000
@@ -1108,6 +1108,7 @@
       *comps = 4;
       return;
    case MESA_FORMAT_L_SRGB8:
+   case MESA_FORMAT_R_SRGB8:
       *datatype = GL_UNSIGNED_BYTE;
       *comps = 1;
       return;
@@ -1670,6 +1671,7 @@
               (type == GL_UNSIGNED_SHORT_8_8_REV_MESA && littleEndian != swapBytes));
 
    case MESA_FORMAT_R_UNORM8:
+   case MESA_FORMAT_R_SRGB8:
       return format == GL_RED && type == GL_UNSIGNED_BYTE;
    case MESA_FORMAT_R8G8_UNORM:
       return format == GL_RG && type == GL_UNSIGNED_BYTE && littleEndian;
diff -Nru mesa-18.3.3/src/mesa/main/formats.csv mesa-19.0.1/src/mesa/main/formats.csv
--- mesa-18.3.3/src/mesa/main/formats.csv	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/formats.csv	2019-03-31 23:16:37.000000000 +0000
@@ -158,6 +158,7 @@
 MESA_FORMAT_A8L8_SRGB                     , packed, 1, 1, 1, un8 , un8 ,     ,     , yyyx, srgb
 
 # Array sRGB formats
+MESA_FORMAT_R_SRGB8                       , array , 1, 1, 1, un8 ,     ,     ,     , x001, srgb
 MESA_FORMAT_L_SRGB8                       , array , 1, 1, 1, un8 ,     ,     ,     , xxx1, srgb
 MESA_FORMAT_BGR_SRGB8                     , array , 1, 1, 1, un8 , un8 , un8 ,     , zyx1, srgb
 
diff -Nru mesa-18.3.3/src/mesa/main/formats.h mesa-19.0.1/src/mesa/main/formats.h
--- mesa-18.3.3/src/mesa/main/formats.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/formats.h	2019-03-31 23:16:37.000000000 +0000
@@ -440,6 +440,7 @@
    MESA_FORMAT_X8B8G8R8_SRGB,    /* RRRR RRRR GGGG GGGG BBBB BBBB xxxx xxxx */
    MESA_FORMAT_L8A8_SRGB,                            /* AAAA AAAA LLLL LLLL */
    MESA_FORMAT_A8L8_SRGB,                            /* LLLL LLLL AAAA AAAA */
+   MESA_FORMAT_R_SRGB8,          /* RRRR RRRR */
 
    /* Array sRGB formats */
    MESA_FORMAT_L_SRGB8,       /* ubyte[i] = L */
diff -Nru mesa-18.3.3/src/mesa/main/framebuffer.c mesa-19.0.1/src/mesa/main/framebuffer.c
--- mesa-18.3.3/src/mesa/main/framebuffer.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/framebuffer.c	2019-03-31 23:16:37.000000000 +0000
@@ -459,7 +459,7 @@
             fb->Visual.rgbBits = fb->Visual.redBits
                + fb->Visual.greenBits + fb->Visual.blueBits;
             if (_mesa_get_format_color_encoding(fmt) == GL_SRGB)
-                fb->Visual.sRGBCapable = ctx->Extensions.EXT_framebuffer_sRGB;
+                fb->Visual.sRGBCapable = ctx->Extensions.EXT_sRGB;
             break;
          }
       }
diff -Nru mesa-18.3.3/src/mesa/main/get.c mesa-19.0.1/src/mesa/main/get.c
--- mesa-18.3.3/src/mesa/main/get.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/get.c	2019-03-31 23:16:37.000000000 +0000
@@ -727,14 +727,50 @@
       v->value_matrix = ctx->TextureMatrixStack[unit].Top;
       break;
 
+   case GL_VERTEX_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_POS);
+      break;
+   case GL_NORMAL_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_NORMAL);
+      break;
+   case GL_COLOR_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR0);
+      break;
    case GL_TEXTURE_COORD_ARRAY:
-   case GL_TEXTURE_COORD_ARRAY_SIZE:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_TEX(ctx->Array.ActiveTexture));
+      break;
+   case GL_INDEX_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR_INDEX);
+      break;
+   case GL_EDGE_FLAG_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_EDGEFLAG);
+      break;
+   case GL_SECONDARY_COLOR_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR1);
+      break;
+   case GL_FOG_COORDINATE_ARRAY:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_FOG);
+      break;
+   case GL_POINT_SIZE_ARRAY_OES:
+      v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_POINT_SIZE);
+      break;
+
    case GL_TEXTURE_COORD_ARRAY_TYPE:
    case GL_TEXTURE_COORD_ARRAY_STRIDE:
       array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)];
       v->value_int = *(GLuint *) ((char *) array + d->offset);
       break;
 
+   case GL_TEXTURE_COORD_ARRAY_SIZE:
+      array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)];
+      v->value_int = array->Format.Size;
+      break;
+
+   case GL_VERTEX_ARRAY_SIZE:
+      array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS];
+      v->value_int = array->Format.Size;
+      break;
+
    case GL_ACTIVE_TEXTURE_ARB:
       v->value_int = GL_TEXTURE0_ARB + ctx->Texture.CurrentUnit;
       break;
@@ -870,6 +906,9 @@
       break;
 
    /* GL_EXT_external_objects */
+   case GL_NUM_DEVICE_UUIDS_EXT:
+      v->value_int = 1;
+      break;
    case GL_DRIVER_UUID_EXT:
       _mesa_get_driver_uuid(ctx, v->value_int_4);
       break;
@@ -942,11 +981,11 @@
    /* ARB_vertex_array_bgra */
    case GL_COLOR_ARRAY_SIZE:
       array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR0];
-      v->value_int = array->Format == GL_BGRA ? GL_BGRA : array->Size;
+      v->value_int = array->Format.Format == GL_BGRA ? GL_BGRA : array->Format.Size;
       break;
    case GL_SECONDARY_COLOR_ARRAY_SIZE:
       array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR1];
-      v->value_int = array->Format == GL_BGRA ? GL_BGRA : array->Size;
+      v->value_int = array->Format.Format == GL_BGRA ? GL_BGRA : array->Format.Size;
       break;
 
    /* ARB_copy_buffer */
diff -Nru mesa-18.3.3/src/mesa/main/get_hash_generator.py mesa-19.0.1/src/mesa/main/get_hash_generator.py
--- mesa-18.3.3/src/mesa/main/get_hash_generator.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/get_hash_generator.py	2019-03-31 23:16:37.000000000 +0000
@@ -30,15 +30,14 @@
 
 from __future__ import print_function
 
-import os, sys, imp, getopt
+import os, sys, getopt
 from collections import defaultdict
 import get_hash_params
 
-cur_dir = os.path.dirname(sys.argv[0])
-param_desc_file = "%s/get_hash_params.py" % cur_dir
+param_desc_file = os.path.join(os.path.dirname(__file__), "get_hash_params.py")
 
-GLAPI = "%s/../../mapi/glapi/gen" % cur_dir
-sys.path.append(GLAPI)
+GLAPI = os.path.join(os.path.dirname(__file__), "..", "..", "mapi", "glapi", "gen")
+sys.path.insert(0, GLAPI)
 import gl_XML
 
 prime_factor = 89
diff -Nru mesa-18.3.3/src/mesa/main/get_hash_params.py mesa-19.0.1/src/mesa/main/get_hash_params.py
--- mesa-18.3.3/src/mesa/main/get_hash_params.py	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/get_hash_params.py	2019-03-31 23:16:37.000000000 +0000
@@ -211,20 +211,20 @@
   [ "TEXTURE_2D", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
   [ "TEXTURE_MATRIX", "LOC_CUSTOM, TYPE_MATRIX, 0, extra_valid_texture_unit" ],
   [ "TEXTURE_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, extra_valid_texture_unit" ],
-  [ "VERTEX_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_POS].Enabled), NO_EXTRA" ],
-  [ "VERTEX_ARRAY_SIZE", "ARRAY_UBYTE(VertexAttrib[VERT_ATTRIB_POS].Size), NO_EXTRA" ],
-  [ "VERTEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_POS].Type), NO_EXTRA" ],
+  [ "VERTEX_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "VERTEX_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+  [ "VERTEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_POS].Format.Type), NO_EXTRA" ],
   [ "VERTEX_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_POS].Stride), NO_EXTRA" ],
-  [ "NORMAL_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_NORMAL].Enabled), NO_EXTRA" ],
-  [ "NORMAL_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_NORMAL].Type), NO_EXTRA" ],
+  [ "NORMAL_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "NORMAL_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_NORMAL].Format.Type), NO_EXTRA" ],
   [ "NORMAL_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_NORMAL].Stride), NO_EXTRA" ],
-  [ "COLOR_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR0].Enabled), NO_EXTRA" ],
+  [ "COLOR_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
   [ "COLOR_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
-  [ "COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR0].Type), NO_EXTRA" ],
+  [ "COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR0].Format.Type), NO_EXTRA" ],
   [ "COLOR_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR0].Stride), NO_EXTRA" ],
-  [ "TEXTURE_COORD_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct gl_array_attributes, Enabled), NO_EXTRA" ],
-  [ "TEXTURE_COORD_ARRAY_SIZE", "LOC_CUSTOM, TYPE_UBYTE, offsetof(struct gl_array_attributes, Size), NO_EXTRA" ],
-  [ "TEXTURE_COORD_ARRAY_TYPE", "LOC_CUSTOM, TYPE_ENUM16, offsetof(struct gl_array_attributes, Type), NO_EXTRA" ],
+  [ "TEXTURE_COORD_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "TEXTURE_COORD_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
+  [ "TEXTURE_COORD_ARRAY_TYPE", "LOC_CUSTOM, TYPE_ENUM16, offsetof(struct gl_array_attributes, Format.Type), NO_EXTRA" ],
   [ "TEXTURE_COORD_ARRAY_STRIDE", "LOC_CUSTOM, TYPE_SHORT, offsetof(struct gl_array_attributes, Stride), NO_EXTRA" ],
 
 # GL_ARB_multitexture
@@ -253,8 +253,8 @@
 
 { "apis": ["GLES"], "params": [
 # OES_point_size_array
-  [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN), NO_EXTRA" ],
-  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM16), NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Format.Type, TYPE_ENUM16), NO_EXTRA" ],
   [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_SHORT), NO_EXTRA" ],
   [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
 ]},
@@ -463,6 +463,9 @@
   [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ],
   [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ],
   [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5_or_OES_sample_variables" ],
+
+# GL_EXT_framebuffer_EXT  / GLES 3.0 + EXT_sRGB_write_control
+  [ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ],
 ]},
 
 { "apis": ["GLES", "GLES2"], "params": [
@@ -793,12 +796,12 @@
   [ "VERTEX_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
   [ "NORMAL_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
   [ "COLOR_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
-  [ "INDEX_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled), NO_EXTRA" ],
-  [ "INDEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Type), NO_EXTRA" ],
+  [ "INDEX_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "INDEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Format.Type), NO_EXTRA" ],
   [ "INDEX_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Stride), NO_EXTRA" ],
   [ "INDEX_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
   [ "TEXTURE_COORD_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
-  [ "EDGE_FLAG_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled), NO_EXTRA" ],
+  [ "EDGE_FLAG_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
   [ "EDGE_FLAG_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_EDGEFLAG].Stride), NO_EXTRA" ],
   [ "EDGE_FLAG_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ],
 
@@ -827,15 +830,15 @@
 # GL_EXT_secondary_color
   [ "COLOR_SUM", "CONTEXT_BOOL(Fog.ColorSumEnabled), NO_EXTRA" ],
   [ "CURRENT_SECONDARY_COLOR", "CONTEXT_FIELD(Current.Attrib[VERT_ATTRIB_COLOR1][0], TYPE_FLOATN_4), extra_flush_current" ],
-  [ "SECONDARY_COLOR_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR1].Enabled), NO_EXTRA" ],
-  [ "SECONDARY_COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR1].Type), NO_EXTRA" ],
+  [ "SECONDARY_COLOR_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "SECONDARY_COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR1].Format.Type), NO_EXTRA" ],
   [ "SECONDARY_COLOR_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR1].Stride), NO_EXTRA" ],
   [ "SECONDARY_COLOR_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
 
 # GL_EXT_fog_coord
   [ "CURRENT_FOG_COORDINATE", "CONTEXT_FLOAT(Current.Attrib[VERT_ATTRIB_FOG][0]), extra_flush_current" ],
-  [ "FOG_COORDINATE_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_FOG].Enabled), NO_EXTRA" ],
-  [ "FOG_COORDINATE_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_FOG].Type), NO_EXTRA" ],
+  [ "FOG_COORDINATE_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "FOG_COORDINATE_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_FOG].Format.Type), NO_EXTRA" ],
   [ "FOG_COORDINATE_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_FOG].Stride), NO_EXTRA" ],
   [ "FOG_COORDINATE_SOURCE", "CONTEXT_ENUM16(Fog.FogCoordinateSource), NO_EXTRA" ],
 
@@ -934,7 +937,6 @@
   [ "RGBA_FLOAT_MODE_ARB", "BUFFER_FIELD(Visual.floatMode, TYPE_BOOLEAN), extra_core_ARB_color_buffer_float_and_new_buffers" ],
 
 # GL3.0 / GL_EXT_framebuffer_sRGB
-  [ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ],
   [ "FRAMEBUFFER_SRGB_CAPABLE_EXT", "BUFFER_INT(Visual.sRGBCapable), extra_EXT_framebuffer_sRGB_and_new_buffers" ],
 
 # GL 3.1
diff -Nru mesa-18.3.3/src/mesa/main/glformats.c mesa-19.0.1/src/mesa/main/glformats.c
--- mesa-18.3.3/src/mesa/main/glformats.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glformats.c	2019-03-31 23:16:37.000000000 +0000
@@ -1352,11 +1352,9 @@
    case GL_RGB4_S3TC:
    case GL_RGBA_S3TC:
    case GL_RGBA4_S3TC:
-      return _mesa_is_desktop_gl(ctx) &&
-         ctx->Extensions.ANGLE_texture_compression_dxt;
+      return _mesa_has_S3_s3tc(ctx);
    case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
-      return ctx->API == API_OPENGL_COMPAT
-         && ctx->Extensions.ATI_texture_compression_3dc;
+      return _mesa_has_ATI_texture_compression_3dc(ctx);
    case GL_PALETTE4_RGB8_OES:
    case GL_PALETTE4_RGBA8_OES:
    case GL_PALETTE4_R5_G6_B5_OES:
@@ -1373,34 +1371,27 @@
    switch (_mesa_get_format_layout(m_format)) {
    case MESA_FORMAT_LAYOUT_S3TC:
       if (_mesa_get_format_color_encoding(m_format) == GL_LINEAR) {
-         /* Assume that the ANGLE flag will always be set if the
-          * EXT flag is set.
-          */
-         return ctx->Extensions.ANGLE_texture_compression_dxt;
+         return _mesa_has_EXT_texture_compression_s3tc(ctx);
       } else {
-         return _mesa_is_desktop_gl(ctx)
-            && ctx->Extensions.EXT_texture_sRGB
-            && ctx->Extensions.EXT_texture_compression_s3tc;
+         return _mesa_has_EXT_texture_sRGB(ctx) &&
+            _mesa_has_EXT_texture_compression_s3tc(ctx);
       }
    case MESA_FORMAT_LAYOUT_FXT1:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.TDFX_texture_compression_FXT1;
+      return _mesa_has_3DFX_texture_compression_FXT1(ctx);
    case MESA_FORMAT_LAYOUT_RGTC:
-      return _mesa_is_desktop_gl(ctx)
-         && ctx->Extensions.ARB_texture_compression_rgtc;
+      return _mesa_has_ARB_texture_compression_rgtc(ctx) ||
+             _mesa_has_EXT_texture_compression_rgtc(ctx);
    case MESA_FORMAT_LAYOUT_LATC:
-      return ctx->API == API_OPENGL_COMPAT
-         && ctx->Extensions.EXT_texture_compression_latc;
+      return _mesa_has_EXT_texture_compression_latc(ctx);
    case MESA_FORMAT_LAYOUT_ETC1:
-      return _mesa_is_gles(ctx)
-         && ctx->Extensions.OES_compressed_ETC1_RGB8_texture;
+      return _mesa_has_OES_compressed_ETC1_RGB8_texture(ctx);
    case MESA_FORMAT_LAYOUT_ETC2:
-      return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
+      return _mesa_is_gles3(ctx) || _mesa_has_ARB_ES3_compatibility(ctx);
    case MESA_FORMAT_LAYOUT_BPTC:
-      return _mesa_is_desktop_gl(ctx) &&
-         ctx->Extensions.ARB_texture_compression_bptc;
+      return _mesa_has_ARB_texture_compression_bptc(ctx) ||
+             _mesa_has_EXT_texture_compression_bptc(ctx);
    case MESA_FORMAT_LAYOUT_ASTC:
-      return ctx->Extensions.KHR_texture_compression_astc_ldr;
+      return _mesa_has_KHR_texture_compression_astc_ldr(ctx);
    default:
       return GL_FALSE;
    }
@@ -1811,7 +1802,7 @@
          break; /* OK */
       }
       if (format == GL_RGB_INTEGER_EXT &&
-          ctx->Extensions.ARB_texture_rgb10_a2ui) {
+          _mesa_has_texture_rgb10_a2ui(ctx)) {
          break; /* OK */
       }
       return GL_INVALID_OPERATION;
@@ -1826,7 +1817,7 @@
          break; /* OK */
       }
       if ((format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT) &&
-          ctx->Extensions.ARB_texture_rgb10_a2ui) {
+          _mesa_has_texture_rgb10_a2ui(ctx)) {
          break; /* OK */
       }
       return GL_INVALID_OPERATION;
@@ -1840,7 +1831,7 @@
          break; /* OK */
       }
       if ((format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT) &&
-          ctx->Extensions.ARB_texture_rgb10_a2ui) {
+          _mesa_has_texture_rgb10_a2ui(ctx)) {
          break; /* OK */
       }
       if (type == GL_UNSIGNED_INT_2_10_10_10_REV && format == GL_RGB &&
@@ -1860,7 +1851,7 @@
       return GL_NO_ERROR;
 
    case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
-      if (!ctx->Extensions.ARB_depth_buffer_float) {
+      if (!_mesa_has_float_depth_buffer(ctx)) {
          return GL_INVALID_ENUM;
       }
       if (format != GL_DEPTH_STENCIL) {
@@ -1869,7 +1860,7 @@
       return GL_NO_ERROR;
 
    case GL_UNSIGNED_INT_10F_11F_11F_REV:
-      if (!ctx->Extensions.EXT_packed_float) {
+      if (!_mesa_has_packed_float(ctx)) {
          return GL_INVALID_ENUM;
       }
       if (format != GL_RGB) {
@@ -1887,7 +1878,7 @@
          return GL_NO_ERROR;
       case GL_RG:
       case GL_RED:
-	 if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_texture_rg)
+         if (_mesa_has_rg_textures(ctx))
             return GL_NO_ERROR;
       default:
          return GL_INVALID_OPERATION;
@@ -1941,8 +1932,8 @@
          }
 
       case GL_RG:
-	 if (!ctx->Extensions.ARB_texture_rg)
-	    return GL_INVALID_ENUM;
+         if (!_mesa_has_rg_textures(ctx))
+            return GL_INVALID_ENUM;
          switch (type) {
             case GL_BYTE:
             case GL_UNSIGNED_BYTE:
@@ -1977,10 +1968,10 @@
                return (ctx->API == API_OPENGLES2)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             case GL_UNSIGNED_INT_5_9_9_9_REV:
-               return ctx->Extensions.EXT_texture_shared_exponent
+               return _mesa_has_texture_shared_exponent(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             case GL_UNSIGNED_INT_10F_11F_11F_REV:
-               return ctx->Extensions.EXT_packed_float
+               return _mesa_has_packed_float(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2048,7 +2039,7 @@
          }
 
       case GL_YCBCR_MESA:
-         if (!ctx->Extensions.MESA_ycbcr_texture)
+         if (!_mesa_has_MESA_ycbcr_texture(ctx))
             return GL_INVALID_ENUM;
          if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
              type == GL_UNSIGNED_SHORT_8_8_REV_MESA)
@@ -2059,7 +2050,7 @@
       case GL_DEPTH_STENCIL:
          if (type == GL_UNSIGNED_INT_24_8)
             return GL_NO_ERROR;
-         else if (ctx->Extensions.ARB_depth_buffer_float &&
+         else if (_mesa_has_float_depth_buffer(ctx) &&
              type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
             return GL_NO_ERROR;
          else
@@ -2078,8 +2069,7 @@
             case GL_UNSIGNED_SHORT:
             case GL_INT:
             case GL_UNSIGNED_INT:
-               return (ctx->Version >= 30 ||
-                       ctx->Extensions.EXT_texture_integer)
+               return _mesa_has_integer_textures(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2093,14 +2083,13 @@
             case GL_UNSIGNED_SHORT:
             case GL_INT:
             case GL_UNSIGNED_INT:
-               return (ctx->Version >= 30 ||
-                       ctx->Extensions.EXT_texture_integer)
+               return _mesa_has_integer_textures(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             case GL_UNSIGNED_BYTE_3_3_2:
             case GL_UNSIGNED_BYTE_2_3_3_REV:
             case GL_UNSIGNED_SHORT_5_6_5:
             case GL_UNSIGNED_SHORT_5_6_5_REV:
-               return ctx->Extensions.ARB_texture_rgb10_a2ui
+               return _mesa_has_texture_rgb10_a2ui(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2115,8 +2104,7 @@
             case GL_INT:
             case GL_UNSIGNED_INT:
             /* NOTE: no packed formats w/ BGR format */
-               return (ctx->Version >= 30 ||
-                       ctx->Extensions.EXT_texture_integer)
+               return _mesa_has_integer_textures(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2131,8 +2119,7 @@
             case GL_UNSIGNED_SHORT:
             case GL_INT:
             case GL_UNSIGNED_INT:
-               return (ctx->Version >= 30 ||
-                       ctx->Extensions.EXT_texture_integer)
+               return _mesa_has_integer_textures(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             case GL_UNSIGNED_SHORT_4_4_4_4:
             case GL_UNSIGNED_SHORT_4_4_4_4_REV:
@@ -2142,7 +2129,7 @@
             case GL_UNSIGNED_INT_8_8_8_8_REV:
             case GL_UNSIGNED_INT_10_10_10_2:
             case GL_UNSIGNED_INT_2_10_10_10_REV:
-               return ctx->Extensions.ARB_texture_rgb10_a2ui
+               return _mesa_has_texture_rgb10_a2ui(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2157,7 +2144,7 @@
             case GL_UNSIGNED_SHORT:
             case GL_INT:
             case GL_UNSIGNED_INT:
-               return ctx->Extensions.EXT_texture_integer
+               return _mesa_has_integer_textures(ctx)
                   ? GL_NO_ERROR : GL_INVALID_ENUM;
             default:
                return GL_INVALID_ENUM;
@@ -2185,7 +2172,7 @@
    switch (format) {
    case GL_RED:
    case GL_RG:
-      if (ctx->API == API_OPENGLES || !ctx->Extensions.ARB_texture_rg)
+      if (!_mesa_has_rg_textures(ctx))
          return GL_INVALID_VALUE;
       /* fallthrough */
    case GL_ALPHA:
@@ -2209,7 +2196,7 @@
                     || type == GL_UNSIGNED_SHORT_5_5_5_1
                     || type == GL_FLOAT
                     || type == GL_HALF_FLOAT_OES
-                    || (ctx->Extensions.EXT_texture_type_2_10_10_10_REV &&
+                    || (_mesa_has_texture_type_2_10_10_10_REV(ctx) &&
                         type == GL_UNSIGNED_INT_2_10_10_10_REV));
       break;
 
@@ -2327,7 +2314,9 @@
       }
    }
 
-   if (ctx->Extensions.ARB_ES2_compatibility) {
+   if (_mesa_has_ARB_ES2_compatibility(ctx) ||
+       _mesa_has_OES_framebuffer_object(ctx) ||
+       ctx->API == API_OPENGLES2) {
       switch (internalFormat) {
       case GL_RGB565:
          return GL_RGB;
@@ -2336,7 +2325,8 @@
       }
    }
 
-   if (ctx->Extensions.ARB_depth_texture) {
+   if (_mesa_has_ARB_depth_texture(ctx) || _mesa_has_OES_depth_texture(ctx) ||
+       ctx->API == API_OPENGL_CORE) {
       switch (internalFormat) {
       case GL_DEPTH_COMPONENT:
       case GL_DEPTH_COMPONENT16:
@@ -2351,7 +2341,8 @@
       }
    }
 
-   if (ctx->Extensions.ARB_texture_stencil8) {
+   if (_mesa_has_ARB_texture_stencil8(ctx) ||
+       _mesa_has_OES_texture_stencil8(ctx)) {
       switch (internalFormat) {
       case GL_STENCIL_INDEX:
       case GL_STENCIL_INDEX1:
@@ -2388,43 +2379,52 @@
             return base_compressed;
    }
 
-   if ((ctx->Extensions.KHR_texture_compression_astc_ldr &&
+   if ((_mesa_has_KHR_texture_compression_astc_ldr(ctx) &&
         is_astc_2d_format(internalFormat)) ||
-       (ctx->Extensions.OES_texture_compression_astc &&
+       (_mesa_has_OES_texture_compression_astc(ctx) &&
         is_astc_3d_format(internalFormat)))
         return GL_RGBA;
 
-   if (ctx->Extensions.MESA_ycbcr_texture) {
+   if (!_mesa_has_MESA_ycbcr_texture(ctx)) {
       if (internalFormat == GL_YCBCR_MESA)
          return GL_YCBCR_MESA;
    }
 
-   if (ctx->Extensions.ARB_texture_float) {
+   if (_mesa_has_half_float_textures(ctx)) {
       switch (internalFormat) {
       case GL_ALPHA16F_ARB:
-      case GL_ALPHA32F_ARB:
          return GL_ALPHA;
       case GL_RGBA16F_ARB:
-      case GL_RGBA32F_ARB:
          return GL_RGBA;
       case GL_RGB16F_ARB:
-      case GL_RGB32F_ARB:
          return GL_RGB;
       case GL_INTENSITY16F_ARB:
-      case GL_INTENSITY32F_ARB:
          return GL_INTENSITY;
       case GL_LUMINANCE16F_ARB:
-      case GL_LUMINANCE32F_ARB:
          return GL_LUMINANCE;
       case GL_LUMINANCE_ALPHA16F_ARB:
+         return GL_LUMINANCE_ALPHA;
+      }
+   }
+
+   if (_mesa_has_float_textures(ctx)) {
+      switch (internalFormat) {
+      case GL_ALPHA32F_ARB:
+         return GL_ALPHA;
+      case GL_RGBA32F_ARB:
+         return GL_RGBA;
+      case GL_RGB32F_ARB:
+         return GL_RGB;
+      case GL_INTENSITY32F_ARB:
+         return GL_INTENSITY;
+      case GL_LUMINANCE32F_ARB:
+         return GL_LUMINANCE;
       case GL_LUMINANCE_ALPHA32F_ARB:
          return GL_LUMINANCE_ALPHA;
-      default:
-         ; /* fallthrough */
       }
    }
 
-   if (ctx->Extensions.EXT_texture_snorm) {
+   if (_mesa_has_EXT_texture_snorm(ctx) || _mesa_is_gles3(ctx)) {
       switch (internalFormat) {
       case GL_RED_SNORM:
       case GL_R8_SNORM:
@@ -2463,7 +2463,7 @@
       }
    }
 
-   if (ctx->Extensions.EXT_texture_sRGB) {
+   if (_mesa_has_EXT_texture_sRGB(ctx) || _mesa_is_gles3(ctx)) {
       switch (internalFormat) {
       case GL_SRGB_EXT:
       case GL_SRGB8_EXT:
@@ -2486,8 +2486,16 @@
       }
    }
 
-   if (ctx->Version >= 30 ||
-       ctx->Extensions.EXT_texture_integer) {
+   if (_mesa_has_EXT_texture_sRGB_R8(ctx)) {
+      switch (internalFormat) {
+      case GL_SR8_EXT:
+         return GL_RED;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_has_integer_textures(ctx)) {
       switch (internalFormat) {
       case GL_RGBA8UI_EXT:
       case GL_RGBA16UI_EXT:
@@ -2506,14 +2514,14 @@
       }
    }
 
-   if (ctx->Extensions.ARB_texture_rgb10_a2ui) {
+   if (_mesa_has_texture_rgb10_a2ui(ctx)) {
       switch (internalFormat) {
       case GL_RGB10_A2UI:
          return GL_RGBA;
       }
    }
 
-   if (ctx->Extensions.EXT_texture_integer) {
+   if (_mesa_has_integer_textures(ctx)) {
       switch (internalFormat) {
       case GL_ALPHA8UI_EXT:
       case GL_ALPHA16UI_EXT:
@@ -2548,12 +2556,15 @@
       }
    }
 
-   if (ctx->Extensions.ARB_texture_rg) {
+   if (_mesa_has_rg_textures(ctx)) {
       switch (internalFormat) {
       case GL_R16F:
+         if (!_mesa_has_half_float_textures(ctx))
+            break;
+         return GL_RED;
       case GL_R32F:
-	 if (!ctx->Extensions.ARB_texture_float)
-	    break;
+         if (!_mesa_has_float_textures(ctx))
+            break;
          return GL_RED;
       case GL_R8I:
       case GL_R8UI:
@@ -2561,9 +2572,9 @@
       case GL_R16UI:
       case GL_R32I:
       case GL_R32UI:
-	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-	    break;
-	 /* FALLTHROUGH */
+         if (!_mesa_has_integer_textures(ctx))
+            break;
+         /* FALLTHROUGH */
       case GL_R8:
       case GL_R16:
       case GL_RED:
@@ -2571,9 +2582,12 @@
          return GL_RED;
 
       case GL_RG16F:
+         if (!_mesa_has_half_float_textures(ctx))
+            break;
+         return GL_RG;
       case GL_RG32F:
-	 if (!ctx->Extensions.ARB_texture_float)
-	    break;
+         if (!_mesa_has_float_textures(ctx))
+            break;
          return GL_RG;
       case GL_RG8I:
       case GL_RG8UI:
@@ -2581,9 +2595,9 @@
       case GL_RG16UI:
       case GL_RG32I:
       case GL_RG32UI:
-	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
-	    break;
-	 /* FALLTHROUGH */
+         if (!_mesa_has_integer_textures(ctx))
+            break;
+         /* FALLTHROUGH */
       case GL_RG:
       case GL_RG8:
       case GL_RG16:
@@ -2594,7 +2608,7 @@
       }
    }
 
-   if (ctx->Extensions.EXT_texture_shared_exponent) {
+   if (_mesa_has_texture_shared_exponent(ctx)) {
       switch (internalFormat) {
       case GL_RGB9_E5_EXT:
          return GL_RGB;
@@ -2603,7 +2617,7 @@
       }
    }
 
-   if (ctx->Extensions.EXT_packed_float) {
+   if (_mesa_has_packed_float(ctx)) {
       switch (internalFormat) {
       case GL_R11F_G11F_B10F_EXT:
          return GL_RGB;
@@ -2612,7 +2626,7 @@
       }
    }
 
-   if (ctx->Extensions.ARB_depth_buffer_float) {
+   if (_mesa_has_float_depth_buffer(ctx)) {
       switch (internalFormat) {
       case GL_DEPTH_COMPONENT32F:
          return GL_DEPTH_COMPONENT;
@@ -2644,8 +2658,8 @@
  * \param type the texture type
  */
 static GLenum
-_mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
-                                                        GLenum type)
+gles_effective_internal_format_for_format_and_type(GLenum format,
+                                                   GLenum type)
 {
    switch (type) {
    case GL_UNSIGNED_BYTE:
@@ -2758,9 +2772,9 @@
  * \return error code, or GL_NO_ERROR.
  */
 GLenum
-_mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
-                                      GLenum format, GLenum type,
-                                      GLenum internalFormat)
+_mesa_gles_error_check_format_and_type(const struct gl_context *ctx,
+                                       GLenum format, GLenum type,
+                                       GLenum internalFormat)
 {
    /* If internalFormat is an unsized format, then the effective internal
     * format derived from format and type should be used instead. Page 127,
@@ -2778,7 +2792,7 @@
     */
    if (_mesa_is_enum_format_unsized(internalFormat)) {
       GLenum effectiveInternalFormat =
-         _mesa_es3_effective_internal_format_for_format_and_type(format, type);
+         gles_effective_internal_format_for_format_and_type(format, type);
 
       if (effectiveInternalFormat == GL_NONE)
          return GL_INVALID_OPERATION;
@@ -2806,7 +2820,7 @@
    /* The GLES variant of EXT_texture_compression_s3tc is very vague and
     * doesn't list valid types. Just do exactly what the spec says.
     */
-   if (ctx->Extensions.EXT_texture_compression_s3tc &&
+   if (_mesa_has_EXT_texture_compression_s3tc(ctx) &&
        (internalFormat == GL_COMPRESSED_RGB_S3TC_DXT1_EXT ||
         internalFormat == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ||
         internalFormat == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT ||
@@ -2833,6 +2847,11 @@
             if (ctx->Version <= 20)
                return GL_INVALID_OPERATION;
             break;
+         case GL_COMPRESSED_RGBA_BPTC_UNORM:
+         case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+            if (!_mesa_has_EXT_texture_compression_bptc(ctx))
+               return GL_INVALID_OPERATION;
+            break;
          default:
             return GL_INVALID_OPERATION;
          }
@@ -2879,7 +2898,7 @@
          case GL_RGBA:
          case GL_RGB10_A2:
          case GL_RGB5_A1:
-            if (!ctx->Extensions.EXT_texture_type_2_10_10_10_REV)
+            if (!_mesa_has_texture_type_2_10_10_10_REV(ctx))
                return GL_INVALID_OPERATION;
             break;
          default:
@@ -2900,7 +2919,7 @@
                return GL_INVALID_OPERATION;
             break;
          case GL_RGBA:
-            if (ctx->Extensions.OES_texture_float && internalFormat == format)
+            if (_mesa_has_OES_texture_float(ctx) && internalFormat == format)
                break;
          default:
             return GL_INVALID_OPERATION;
@@ -2908,7 +2927,7 @@
          break;
 
       case GL_HALF_FLOAT_OES:
-         if (ctx->Extensions.OES_texture_half_float && internalFormat == format)
+         if (_mesa_has_OES_texture_half_float(ctx) && internalFormat == format)
             break;
       default:
          return GL_INVALID_OPERATION;
@@ -3035,15 +3054,20 @@
                return GL_INVALID_OPERATION;
             break;
          case GL_RGB:
-            if (ctx->Extensions.OES_texture_float && internalFormat == format)
+            if (_mesa_has_OES_texture_float(ctx) && internalFormat == format)
                break;
+         case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+         case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+            if (!_mesa_has_EXT_texture_compression_bptc(ctx))
+               return GL_INVALID_OPERATION;
+            break;
          default:
             return GL_INVALID_OPERATION;
          }
          break;
 
       case GL_HALF_FLOAT_OES:
-         if (!ctx->Extensions.OES_texture_half_float || internalFormat != format)
+         if (!_mesa_has_OES_texture_half_float(ctx) || internalFormat != format)
             return GL_INVALID_OPERATION;
          break;
 
@@ -3057,7 +3081,7 @@
              * GLES3 doesn't, and GL_OES_required_internalformat extends that
              * to allow the sized RGB internalformats as well.
              */
-            if (!ctx->Extensions.EXT_texture_type_2_10_10_10_REV)
+            if (!_mesa_has_texture_type_2_10_10_10_REV(ctx))
                return GL_INVALID_OPERATION;
             break;
          default:
@@ -3110,16 +3134,20 @@
       break;
 
    case GL_RG:
-      if (!ctx->Extensions.ARB_texture_rg)
+      if (!_mesa_has_rg_textures(ctx))
          return GL_INVALID_OPERATION;
       switch (type) {
       case GL_UNSIGNED_BYTE:
-         if (internalFormat != GL_RG8)
+         if (internalFormat != GL_RG8 &&
+             (!_mesa_has_EXT_texture_compression_rgtc(ctx) ||
+              internalFormat != GL_COMPRESSED_RED_GREEN_RGTC2_EXT))
             return GL_INVALID_OPERATION;
          break;
 
       case GL_BYTE:
-         if (internalFormat != GL_RG8_SNORM)
+         if (internalFormat != GL_RG8_SNORM &&
+             (!_mesa_has_EXT_texture_compression_rgtc(ctx) ||
+              internalFormat != GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT))
             return GL_INVALID_OPERATION;
          break;
 
@@ -3142,8 +3170,8 @@
                   return GL_INVALID_OPERATION;
                break;
             case GL_RG:
-               if (ctx->Extensions.ARB_texture_rg &&
-                   ctx->Extensions.OES_texture_half_float)
+               if (_mesa_has_rg_textures(ctx) &&
+                   _mesa_has_OES_texture_half_float(ctx))
                   break;
             /* fallthrough */
             default:
@@ -3157,8 +3185,8 @@
          case GL_RG32F:
             break;
          case GL_RG:
-            if (ctx->Extensions.ARB_texture_rg &&
-                ctx->Extensions.OES_texture_float)
+            if (_mesa_has_rg_textures(ctx) &&
+                _mesa_has_OES_texture_float(ctx))
                break;
             /* fallthrough */
          default:
@@ -3211,16 +3239,22 @@
       break;
 
    case GL_RED:
-      if (!ctx->Extensions.ARB_texture_rg)
+      if (!_mesa_has_rg_textures(ctx))
          return GL_INVALID_OPERATION;
       switch (type) {
       case GL_UNSIGNED_BYTE:
-         if (internalFormat != GL_R8)
-            return GL_INVALID_OPERATION;
-         break;
+         if (internalFormat == GL_R8 ||
+             ((internalFormat == GL_SR8_EXT) &&
+              _mesa_has_EXT_texture_sRGB_R8(ctx)) ||
+             (internalFormat == GL_COMPRESSED_RED_RGTC1_EXT &&
+              _mesa_has_EXT_texture_compression_rgtc(ctx)))
+            break;
+         return GL_INVALID_OPERATION;
 
       case GL_BYTE:
-         if (internalFormat != GL_R8_SNORM)
+         if (internalFormat != GL_R8_SNORM &&
+             (!_mesa_has_EXT_texture_compression_rgtc(ctx) ||
+              internalFormat != GL_COMPRESSED_SIGNED_RED_RGTC1_EXT))
             return GL_INVALID_OPERATION;
          break;
 
@@ -3244,8 +3278,8 @@
             break;
          case GL_RG:
          case GL_RED:
-            if (ctx->Extensions.ARB_texture_rg &&
-                ctx->Extensions.OES_texture_half_float)
+            if (_mesa_has_rg_textures(ctx) &&
+                _mesa_has_OES_texture_half_float(ctx))
                break;
             /* fallthrough */
          default:
@@ -3259,8 +3293,8 @@
          case GL_R32F:
             break;
          case GL_RED:
-            if (ctx->Extensions.ARB_texture_rg &&
-                ctx->Extensions.OES_texture_float)
+            if (_mesa_has_rg_textures(ctx) &&
+                _mesa_has_OES_texture_float(ctx))
                break;
             /* fallthrough */
          default:
@@ -3372,11 +3406,11 @@
    case GL_LUMINANCE_ALPHA:
       switch (type) {
       case GL_FLOAT:
-         if (!ctx->Extensions.OES_texture_float || internalFormat != format)
+         if (!_mesa_has_OES_texture_float(ctx) || internalFormat != format)
             return GL_INVALID_OPERATION;
          break;
       case GL_HALF_FLOAT_OES:
-         if (!ctx->Extensions.OES_texture_half_float || internalFormat != format)
+         if (!_mesa_has_OES_texture_half_float(ctx) || internalFormat != format)
             return GL_INVALID_OPERATION;
          break;
       case GL_UNSIGNED_BYTE:
@@ -3844,7 +3878,7 @@
        *     internal formats to base internal formats ... and use cases ...'')
        *     for the R32F, RG32F, RGB32F, and RGBA32F formats."
        */
-      return ctx->Extensions.OES_texture_float_linear;
+      return _mesa_has_OES_texture_float_linear(ctx);
    default:
       return false;
    }
diff -Nru mesa-18.3.3/src/mesa/main/glformats.h mesa-19.0.1/src/mesa/main/glformats.h
--- mesa-18.3.3/src/mesa/main/glformats.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glformats.h	2019-03-31 23:16:37.000000000 +0000
@@ -138,9 +138,9 @@
                                      unsigned dimensions);
 
 extern GLenum
-_mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
-                                      GLenum format, GLenum type,
-                                      GLenum internalFormat);
+_mesa_gles_error_check_format_and_type(const struct gl_context *ctx,
+                                       GLenum format, GLenum type,
+                                       GLenum internalFormat);
 extern GLint
 _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );
 
diff -Nru mesa-18.3.3/src/mesa/main/glheader.h mesa-19.0.1/src/mesa/main/glheader.h
--- mesa-18.3.3/src/mesa/main/glheader.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glheader.h	2019-03-31 23:16:37.000000000 +0000
@@ -138,6 +138,9 @@
 #define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI                   0x8837
 #endif
 
+#ifndef GL_EXT_texture_sRGB_R8
+#define GL_SR8_EXT                                              0x8FBD
+#endif
 
 /**
  * Internal token to represent a GLSL shader program (a collection of
@@ -148,6 +151,9 @@
  */
 #define GL_SHADER_PROGRAM_MESA                                  0x9999
 
+#ifndef GL_EXT_multisampled_render_to_texture
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT 0x8D6C
+#endif
 
 #ifdef __cplusplus
 }
diff -Nru mesa-18.3.3/src/mesa/main/glspirv.c mesa-19.0.1/src/mesa/main/glspirv.c
--- mesa-18.3.3/src/mesa/main/glspirv.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glspirv.c	2019-03-31 23:16:37.000000000 +0000
@@ -212,6 +212,7 @@
 
    const struct spirv_to_nir_options spirv_options = {
       .lower_workgroup_access_to_offsets = true,
+      .lower_ubo_ssbo_access_to_offsets = true,
       .caps = ctx->Const.SpirVCapabilities
    };
 
@@ -242,10 +243,10 @@
     * inline functions.  That way they get properly initialized at the top
     * of the function and not at the top of its caller.
     */
-   NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+   NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
    NIR_PASS_V(nir, nir_lower_returns);
    NIR_PASS_V(nir, nir_inline_functions);
-   NIR_PASS_V(nir, nir_copy_prop);
+   NIR_PASS_V(nir, nir_opt_deref);
 
    /* Pick off the single entrypoint that we want */
    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
diff -Nru mesa-18.3.3/src/mesa/main/glthread.c mesa-19.0.1/src/mesa/main/glthread.c
--- mesa-18.3.3/src/mesa/main/glthread.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glthread.c	2019-03-31 23:16:37.000000000 +0000
@@ -121,11 +121,11 @@
    free(glthread);
    ctx->GLThread = NULL;
 
-   _mesa_glthread_restore_dispatch(ctx);
+   _mesa_glthread_restore_dispatch(ctx, "destroy");
 }
 
 void
-_mesa_glthread_restore_dispatch(struct gl_context *ctx)
+_mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func)
 {
    /* Remove ourselves from the dispatch table except if another ctx/thread
     * already installed a new dispatch table.
@@ -136,6 +136,9 @@
    if (_glapi_get_dispatch() == ctx->MarshalExec) {
        ctx->CurrentClientDispatch = ctx->CurrentServerDispatch;
        _glapi_set_dispatch(ctx->CurrentClientDispatch);
+#if 0
+       printf("glthread disabled: %s\n", func);
+#endif
    }
 }
 
diff -Nru mesa-18.3.3/src/mesa/main/glthread.h mesa-19.0.1/src/mesa/main/glthread.h
--- mesa-18.3.3/src/mesa/main/glthread.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/glthread.h	2019-03-31 23:16:37.000000000 +0000
@@ -99,7 +99,7 @@
 void _mesa_glthread_init(struct gl_context *ctx);
 void _mesa_glthread_destroy(struct gl_context *ctx);
 
-void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
+void _mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func);
 void _mesa_glthread_flush_batch(struct gl_context *ctx);
 void _mesa_glthread_finish(struct gl_context *ctx);
 
diff -Nru mesa-18.3.3/src/mesa/main/marshal.c mesa-19.0.1/src/mesa/main/marshal.c
--- mesa-18.3.3/src/mesa/main/marshal.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/marshal.c	2019-03-31 23:16:37.000000000 +0000
@@ -89,7 +89,7 @@
 
    if (cap == GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB) {
       _mesa_glthread_finish(ctx);
-      _mesa_glthread_restore_dispatch(ctx);
+      _mesa_glthread_restore_dispatch(ctx, "Enable(DEBUG_OUTPUT_SYNCHRONOUS)");
    } else {
       cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_Enable,
                                             sizeof(*cmd));
diff -Nru mesa-18.3.3/src/mesa/main/mtypes.h mesa-19.0.1/src/mesa/main/mtypes.h
--- mesa-18.3.3/src/mesa/main/mtypes.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/mtypes.h	2019-03-31 23:16:37.000000000 +0000
@@ -457,6 +457,21 @@
 
 
 /**
+ * Vertex format to describe a vertex element.
+ */
+struct gl_vertex_format
+{
+   GLenum16 Type;        /**< datatype: GL_FLOAT, GL_INT, etc */
+   GLenum16 Format;      /**< default: GL_RGBA, but may be GL_BGRA */
+   GLubyte Size:5;       /**< components per element (1,2,3,4) */
+   GLubyte Normalized:1; /**< GL_ARB_vertex_program */
+   GLubyte Integer:1;    /**< Integer-valued? */
+   GLubyte Doubles:1;    /**< double values are not converted to floats */
+   GLubyte _ElementSize; /**< Size of each element in bytes */
+};
+
+
+/**
  * Current attribute group (GL_CURRENT_BIT).
  */
 struct gl_current_attrib
@@ -1419,17 +1434,12 @@
    const GLubyte *Ptr;
    /** Offset of the first element relative to the binding offset */
    GLuint RelativeOffset;
-   GLshort Stride;          /**< Stride as specified with gl*Pointer() */
-   GLenum16 Type;           /**< Datatype: GL_FLOAT, GL_INT, etc */
-   GLenum16 Format;         /**< Default: GL_RGBA, but may be GL_BGRA */
-   GLboolean Enabled;       /**< Whether the array is enabled */
-   GLubyte Size;            /**< Components per element (1,2,3,4) */
-   unsigned Normalized:1;   /**< Fixed-point values are normalized when converted to floats */
-   unsigned Integer:1;      /**< Fixed-point values are not converted to floats */
-   unsigned Doubles:1;      /**< double precision values are not converted to floats */
-   unsigned _ElementSize:8; /**< Size of each element in bytes */
+   /** Vertex format */
+   struct gl_vertex_format Format;
+   /** Stride as specified with gl*Pointer() */
+   GLshort Stride;
    /** Index into gl_vertex_array_object::BufferBinding[] array */
-   unsigned BufferBindingIndex:6;
+   GLubyte BufferBindingIndex;
 
    /**
     * Derived effective buffer binding index
@@ -1444,7 +1454,7 @@
     * Note that _mesa_update_vao_derived_arrays is called when binding
     * the VAO to Array._DrawVAO.
     */
-   unsigned _EffBufferBindingIndex:6;
+   GLubyte _EffBufferBindingIndex;
    /**
     * Derived effective relative offset.
     *
@@ -1538,7 +1548,7 @@
    GLbitfield VertexAttribBufferMask;
 
    /** Mask of VERT_BIT_* values indicating which arrays are enabled */
-   GLbitfield _Enabled;
+   GLbitfield Enabled;
 
    /**
     * Mask of VERT_BIT_* enabled arrays past position/generic0 mapping
@@ -2566,8 +2576,7 @@
 {
    COMPILE_FAILURE = 0,
    COMPILE_SUCCESS,
-   COMPILE_SKIPPED,
-   COMPILED_NO_OPTS
+   COMPILE_SKIPPED
 };
 
 /**
@@ -3404,6 +3413,7 @@
     */
    struct gl_texture_object *Texture;
    GLuint TextureLevel; /**< Attached mipmap level. */
+   GLsizei NumSamples;  /**< from FramebufferTexture2DMultisampleEXT */
    GLuint CubeMapFace;  /**< 0 .. 5, for cube map textures. */
    GLuint Zoffset;      /**< Slice for 3D textures,  or layer for both 1D
                          * and 2D array textures */
@@ -3495,6 +3505,7 @@
    bool _HasAttachments;
 
    GLbitfield _IntegerBuffers;  /**< Which color buffers are integer valued */
+   GLbitfield _RGBBuffers;  /**< Which color buffers have baseformat == RGB */
 
    /* ARB_color_buffer_float */
    GLboolean _AllColorBuffersFixedPoint; /* no integer, no float */
@@ -4244,6 +4255,7 @@
    GLboolean EXT_gpu_shader4;
    GLboolean EXT_memory_object;
    GLboolean EXT_memory_object_fd;
+   GLboolean EXT_multisampled_render_to_texture;
    GLboolean EXT_packed_float;
    GLboolean EXT_pixel_buffer_object;
    GLboolean EXT_point_parameters;
@@ -4253,6 +4265,7 @@
    GLboolean EXT_semaphore_fd;
    GLboolean EXT_shader_integer_mix;
    GLboolean EXT_shader_samples_identical;
+   GLboolean EXT_sRGB;
    GLboolean EXT_stencil_two_side;
    GLboolean EXT_texture_array;
    GLboolean EXT_texture_compression_latc;
@@ -4264,6 +4277,7 @@
    GLboolean EXT_texture_shared_exponent;
    GLboolean EXT_texture_snorm;
    GLboolean EXT_texture_sRGB;
+   GLboolean EXT_texture_sRGB_R8;
    GLboolean EXT_texture_sRGB_decode;
    GLboolean EXT_texture_swizzle;
    GLboolean EXT_texture_type_2_10_10_10_REV;
diff -Nru mesa-18.3.3/src/mesa/main/queryobj.c mesa-19.0.1/src/mesa/main/queryobj.c
--- mesa-18.3.3/src/mesa/main/queryobj.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/queryobj.c	2019-03-31 23:16:37.000000000 +0000
@@ -146,11 +146,10 @@
 get_pipe_stats_binding_point(struct gl_context *ctx,
                              GLenum target)
 {
-   const int which = target - GL_VERTICES_SUBMITTED_ARB;
+   const int which = target - GL_VERTICES_SUBMITTED;
    assert(which < MAX_PIPELINE_STATISTICS);
 
-   if (!_mesa_is_desktop_gl(ctx) ||
-       !ctx->Extensions.ARB_pipeline_statistics_query)
+   if (!_mesa_has_ARB_pipeline_statistics_query(ctx))
       return NULL;
 
    return &ctx->Query.pipeline_stats[which];
@@ -164,89 +163,80 @@
 static struct gl_query_object **
 get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index)
 {
-
-   /* From GL_EXT_occlusion_query_boolean spec:
-    *
-    *    "Accepted by the <target> parameter of BeginQueryEXT, EndQueryEXT,
-    *    and GetQueryivEXT:
-    *
-    *   ANY_SAMPLES_PASSED_EXT                         0x8C2F
-    *   ANY_SAMPLES_PASSED_CONSERVATIVE_EXT            0x8D6A"
-    */
-   if ((_mesa_is_gles(ctx) && ctx->Version == 20) &&
-       (target != GL_ANY_SAMPLES_PASSED &&
-        target != GL_ANY_SAMPLES_PASSED_CONSERVATIVE))
-      return NULL;
-
    switch (target) {
-   case GL_SAMPLES_PASSED_ARB:
-      if (ctx->Extensions.ARB_occlusion_query)
+   case GL_SAMPLES_PASSED:
+      if (_mesa_has_ARB_occlusion_query(ctx) ||
+          _mesa_has_ARB_occlusion_query2(ctx))
          return &ctx->Query.CurrentOcclusionObject;
       else
          return NULL;
    case GL_ANY_SAMPLES_PASSED:
-      if (ctx->Extensions.ARB_occlusion_query2)
+      if (_mesa_has_ARB_occlusion_query2(ctx) ||
+          _mesa_has_EXT_occlusion_query_boolean(ctx))
          return &ctx->Query.CurrentOcclusionObject;
       else
          return NULL;
    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
-      if (ctx->Extensions.ARB_ES3_compatibility
-          || (ctx->API == API_OPENGLES2 && ctx->Version >= 30))
+      if (_mesa_has_ARB_ES3_compatibility(ctx) ||
+          _mesa_has_EXT_occlusion_query_boolean(ctx))
          return &ctx->Query.CurrentOcclusionObject;
       else
          return NULL;
-   case GL_TIME_ELAPSED_EXT:
-      if (ctx->Extensions.EXT_timer_query)
+   case GL_TIME_ELAPSED:
+      if (_mesa_has_EXT_timer_query(ctx) ||
+          _mesa_has_EXT_disjoint_timer_query(ctx))
          return &ctx->Query.CurrentTimerObject;
       else
          return NULL;
    case GL_PRIMITIVES_GENERATED:
-      if (ctx->Extensions.EXT_transform_feedback)
+      if (_mesa_has_EXT_transform_feedback(ctx) ||
+          _mesa_has_EXT_tessellation_shader(ctx) ||
+          _mesa_has_OES_geometry_shader(ctx))
          return &ctx->Query.PrimitivesGenerated[index];
       else
          return NULL;
    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-      if (ctx->Extensions.EXT_transform_feedback)
+      if (_mesa_has_EXT_transform_feedback(ctx) || _mesa_is_gles3(ctx))
          return &ctx->Query.PrimitivesWritten[index];
       else
          return NULL;
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-      if (ctx->Extensions.ARB_transform_feedback_overflow_query)
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW:
+      if (_mesa_has_ARB_transform_feedback_overflow_query(ctx))
          return &ctx->Query.TransformFeedbackOverflow[index];
       else
          return NULL;
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
-      if (ctx->Extensions.ARB_transform_feedback_overflow_query)
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW:
+      if (_mesa_has_ARB_transform_feedback_overflow_query(ctx))
          return &ctx->Query.TransformFeedbackOverflowAny;
       else
          return NULL;
 
-   case GL_VERTICES_SUBMITTED_ARB:
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+   case GL_VERTICES_SUBMITTED:
+   case GL_PRIMITIVES_SUBMITTED:
+   case GL_VERTEX_SHADER_INVOCATIONS:
+   case GL_FRAGMENT_SHADER_INVOCATIONS:
+   case GL_CLIPPING_INPUT_PRIMITIVES:
+   case GL_CLIPPING_OUTPUT_PRIMITIVES:
          return get_pipe_stats_binding_point(ctx, target);
 
    case GL_GEOMETRY_SHADER_INVOCATIONS:
       /* GL_GEOMETRY_SHADER_INVOCATIONS is defined in a non-sequential order */
-      target = GL_VERTICES_SUBMITTED_ARB + MAX_PIPELINE_STATISTICS - 1;
+      target = GL_VERTICES_SUBMITTED + MAX_PIPELINE_STATISTICS - 1;
       /* fallthrough */
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED:
       if (_mesa_has_geometry_shaders(ctx))
          return get_pipe_stats_binding_point(ctx, target);
       else
          return NULL;
 
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+   case GL_TESS_CONTROL_SHADER_PATCHES:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS:
       if (_mesa_has_tessellation(ctx))
          return get_pipe_stats_binding_point(ctx, target);
       else
          return NULL;
 
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+   case GL_COMPUTE_SHADER_INVOCATIONS:
       if (_mesa_has_compute_shaders(ctx))
          return get_pipe_stats_binding_point(ctx, target);
       else
@@ -316,8 +306,8 @@
    case GL_TIMESTAMP:
    case GL_PRIMITIVES_GENERATED:
    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW:
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW:
       break;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)",
@@ -393,7 +383,7 @@
    switch (target) {
    case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
    case GL_PRIMITIVES_GENERATED:
-   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW:
       if (index >= ctx->Const.MaxVertexStreams) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glBeginQueryIndexed(index>=MaxVertexStreams)");
@@ -676,7 +666,8 @@
    }
 
    if (target == GL_TIMESTAMP) {
-      if (!ctx->Extensions.ARB_timer_query) {
+      if (!_mesa_has_ARB_timer_query(ctx) &&
+          !_mesa_has_EXT_disjoint_timer_query(ctx)) {
          _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryARB(target)");
          return;
       }
@@ -692,7 +683,7 @@
    }
 
    switch (pname) {
-      case GL_QUERY_COUNTER_BITS_ARB:
+      case GL_QUERY_COUNTER_BITS:
          switch (target) {
          case GL_SAMPLES_PASSED:
             *params = ctx->Const.QueryCounterBits.SamplesPassed;
@@ -717,45 +708,45 @@
          case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
             *params = ctx->Const.QueryCounterBits.PrimitivesWritten;
             break;
-         case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
-         case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+         case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW:
+         case GL_TRANSFORM_FEEDBACK_OVERFLOW:
             /* The minimum value of this is 1 if it's nonzero, and the value
              * is only ever GL_TRUE or GL_FALSE, so no sense in reporting more
              * bits.
              */
             *params = 1;
             break;
-         case GL_VERTICES_SUBMITTED_ARB:
+         case GL_VERTICES_SUBMITTED:
             *params = ctx->Const.QueryCounterBits.VerticesSubmitted;
             break;
-         case GL_PRIMITIVES_SUBMITTED_ARB:
+         case GL_PRIMITIVES_SUBMITTED:
             *params = ctx->Const.QueryCounterBits.PrimitivesSubmitted;
             break;
-         case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+         case GL_VERTEX_SHADER_INVOCATIONS:
             *params = ctx->Const.QueryCounterBits.VsInvocations;
             break;
-         case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+         case GL_TESS_CONTROL_SHADER_PATCHES:
             *params = ctx->Const.QueryCounterBits.TessPatches;
             break;
-         case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+         case GL_TESS_EVALUATION_SHADER_INVOCATIONS:
             *params = ctx->Const.QueryCounterBits.TessInvocations;
             break;
          case GL_GEOMETRY_SHADER_INVOCATIONS:
             *params = ctx->Const.QueryCounterBits.GsInvocations;
             break;
-         case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+         case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED:
             *params = ctx->Const.QueryCounterBits.GsPrimitives;
             break;
-         case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+         case GL_FRAGMENT_SHADER_INVOCATIONS:
             *params = ctx->Const.QueryCounterBits.FsInvocations;
             break;
-         case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+         case GL_COMPUTE_SHADER_INVOCATIONS:
             *params = ctx->Const.QueryCounterBits.ComputeInvocations;
             break;
-         case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+         case GL_CLIPPING_INPUT_PRIMITIVES:
             *params = ctx->Const.QueryCounterBits.ClInPrimitives;
             break;
-         case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+         case GL_CLIPPING_OUTPUT_PRIMITIVES:
             *params = ctx->Const.QueryCounterBits.ClOutPrimitives;
             break;
          default:
@@ -766,7 +757,7 @@
             break;
          }
          break;
-      case GL_CURRENT_QUERY_ARB:
+      case GL_CURRENT_QUERY:
          *params = (q && q->Target == target) ? q->Id : 0;
          break;
       default:
@@ -822,7 +813,7 @@
    if (buf && buf != ctx->Shared->NullBufferObj) {
       bool is_64bit = ptype == GL_INT64_ARB ||
          ptype == GL_UNSIGNED_INT64_ARB;
-      if (!ctx->Extensions.ARB_query_buffer_object) {
+      if (!_mesa_has_ARB_query_buffer_object(ctx)) {
          _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func);
          return;
       }
@@ -855,7 +846,7 @@
       value = q->Result;
       break;
    case GL_QUERY_RESULT_NO_WAIT:
-      if (!ctx->Extensions.ARB_query_buffer_object)
+      if (!_mesa_has_ARB_query_buffer_object(ctx))
          goto invalid_enum;
       ctx->Driver.CheckQuery(ctx, q);
       if (!q->Ready)
diff -Nru mesa-18.3.3/src/mesa/main/shaderobj.h mesa-19.0.1/src/mesa/main/shaderobj.h
--- mesa-18.3.3/src/mesa/main/shaderobj.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/shaderobj.h	2019-03-31 23:16:37.000000000 +0000
@@ -225,6 +225,9 @@
       return GL_TESS_EVALUATION_SUBROUTINE;
    case MESA_SHADER_NONE:
       break;
+   case MESA_SHADER_KERNEL:
+      unreachable("not reached");
+      break;
    }
    unreachable("not reached");
 }
@@ -246,6 +249,7 @@
    case MESA_SHADER_TESS_EVAL:
       return GL_TESS_EVALUATION_SUBROUTINE_UNIFORM;
    case MESA_SHADER_NONE:
+   case MESA_SHADER_KERNEL:
       break;
    }
    unreachable("not reached");
diff -Nru mesa-18.3.3/src/mesa/main/tests/dispatch_sanity.cpp mesa-19.0.1/src/mesa/main/tests/dispatch_sanity.cpp
--- mesa-18.3.3/src/mesa/main/tests/dispatch_sanity.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/tests/dispatch_sanity.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -2236,6 +2236,10 @@
    /* GL_NV_conservative_raster_pre_snap_triangles */
    { "glConservativeRasterParameteriNV", 20, -1 },
 
+   /* GL_EXT_multisampled_render_to_texture */
+   { "glRenderbufferStorageMultisampleEXT", 20, -1 },
+   { "glFramebufferTexture2DMultisampleEXT", 20, -1 },
+
    { NULL, 0, -1 }
 };
 
@@ -2330,7 +2334,7 @@
    // glProgramParameteri aliases glProgramParameteriEXT in GLES 2
    // We check for the aliased -NV version in GLES 2
    // { "glReadBuffer", 30, -1 },
-   { "glRenderbufferStorageMultisample", 30, -1 },
+   // glRenderbufferStorageMultisample aliases glRenderbufferStorageMultisampleEXT in GLES 2
    { "glResumeTransformFeedback", 30, -1 },
    { "glSamplerParameterf", 30, -1 },
    { "glSamplerParameterfv", 30, -1 },
diff -Nru mesa-18.3.3/src/mesa/main/tests/meson.build mesa-19.0.1/src/mesa/main/tests/meson.build
--- mesa-18.3.3/src/mesa/main/tests/meson.build	2018-02-08 14:40:56.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/tests/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -41,5 +41,6 @@
     include_directories : [inc_include, inc_src, inc_mapi, inc_mesa],
     dependencies : [idep_gtest, dep_clock, dep_dl, dep_thread],
     link_with : [libmesa_classic, link_main_test],
-  )
+  ),
+  suite : ['mesa'],
 )
diff -Nru mesa-18.3.3/src/mesa/main/texcompress.c mesa-19.0.1/src/mesa/main/texcompress.c
--- mesa-18.3.3/src/mesa/main/texcompress.c	2018-02-08 14:40:56.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/texcompress.c	2019-03-31 23:16:37.000000000 +0000
@@ -327,6 +327,23 @@
       formats[n++] = GL_ETC1_RGB8_OES;
    }
 
+   /* Required by EXT_texture_compression_bptc in GLES. */
+   if (_mesa_has_EXT_texture_compression_bptc(ctx)) {
+      formats[n++] = GL_COMPRESSED_RGBA_BPTC_UNORM;
+      formats[n++] = GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM;
+      formats[n++] = GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
+      formats[n++] = GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;
+   }
+
+   /* Required by EXT_texture_compression_rgtc in GLES. */
+   if (_mesa_is_gles3(ctx) &&
+       _mesa_has_EXT_texture_compression_rgtc(ctx)) {
+      formats[n++] = GL_COMPRESSED_RED_RGTC1_EXT;
+      formats[n++] = GL_COMPRESSED_SIGNED_RED_RGTC1_EXT;
+      formats[n++] = GL_COMPRESSED_RED_GREEN_RGTC2_EXT;
+      formats[n++] = GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT;
+   }
+
    if (ctx->API == API_OPENGLES) {
       formats[n++] = GL_PALETTE4_RGB8_OES;
       formats[n++] = GL_PALETTE4_RGBA8_OES;
diff -Nru mesa-18.3.3/src/mesa/main/texformat.c mesa-19.0.1/src/mesa/main/texformat.c
--- mesa-18.3.3/src/mesa/main/texformat.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/texformat.c	2019-03-31 23:16:37.000000000 +0000
@@ -477,6 +477,9 @@
       RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_SRGB);
       RETURN_IF_SUPPORTED(MESA_FORMAT_A8R8G8B8_SRGB);
       break;
+   case GL_SR8_EXT:
+      RETURN_IF_SUPPORTED(MESA_FORMAT_R_SRGB8);
+      break;
    case GL_SLUMINANCE_EXT:
    case GL_SLUMINANCE8_EXT:
       RETURN_IF_SUPPORTED(MESA_FORMAT_L_SRGB8);
diff -Nru mesa-18.3.3/src/mesa/main/teximage.c mesa-19.0.1/src/mesa/main/teximage.c
--- mesa-18.3.3/src/mesa/main/teximage.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/teximage.c	2019-03-31 23:16:37.000000000 +0000
@@ -1798,8 +1798,8 @@
 texture_format_error_check_gles(struct gl_context *ctx, GLenum format,
                                 GLenum type, GLenum internalFormat, const char *callerName)
 {
-   GLenum err = _mesa_es3_error_check_format_and_type(ctx, format, type,
-                                                      internalFormat);
+   GLenum err = _mesa_gles_error_check_format_and_type(ctx, format, type,
+                                                       internalFormat);
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err,
                   "%s(format = %s, type = %s, internalformat = %s)",
@@ -2438,7 +2438,7 @@
       bool rb_is_srgb = false;
       bool dst_is_srgb = false;
 
-      if (ctx->Extensions.EXT_framebuffer_sRGB &&
+      if (ctx->Extensions.EXT_sRGB &&
           _mesa_get_format_color_encoding(rb->Format) == GL_SRGB) {
          rb_is_srgb = true;
       }
diff -Nru mesa-18.3.3/src/mesa/main/varray.c mesa-19.0.1/src/mesa/main/varray.c
--- mesa-18.3.3/src/mesa/main/varray.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/varray.c	2019-03-31 23:16:37.000000000 +0000
@@ -33,6 +33,7 @@
 #include "context.h"
 #include "enable.h"
 #include "enums.h"
+#include "glformats.h"
 #include "hash.h"
 #include "image.h"
 #include "macros.h"
@@ -141,7 +142,7 @@
    if (ctx->API != API_OPENGL_COMPAT)
       return;
    /* The generic0 attribute superseeds the position attribute */
-   const GLbitfield enabled = vao->_Enabled;
+   const GLbitfield enabled = vao->Enabled;
    if (enabled & VERT_BIT_GENERIC0)
       vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_GENERIC0;
    else if (enabled & VERT_BIT_POS)
@@ -177,7 +178,7 @@
 
       array->BufferBindingIndex = bindingIndex;
 
-      vao->NewArrays |= vao->_Enabled & array_bit;
+      vao->NewArrays |= vao->Enabled & array_bit;
       if (vao == ctx->Array.VAO)
          ctx->NewState |= _NEW_ARRAY;
    }
@@ -213,7 +214,7 @@
       else
          vao->VertexAttribBufferMask |= binding->_BoundArrays;
 
-      vao->NewArrays |= vao->_Enabled & binding->_BoundArrays;
+      vao->NewArrays |= vao->Enabled & binding->_BoundArrays;
       if (vao == ctx->Array.VAO)
          ctx->NewState |= _NEW_ARRAY;
    }
@@ -236,13 +237,31 @@
 
    if (binding->InstanceDivisor != divisor) {
       binding->InstanceDivisor = divisor;
-      vao->NewArrays |= vao->_Enabled & binding->_BoundArrays;
+      vao->NewArrays |= vao->Enabled & binding->_BoundArrays;
       if (vao == ctx->Array.VAO)
          ctx->NewState |= _NEW_ARRAY;
    }
 }
 
 
+void
+_mesa_set_vertex_format(struct gl_vertex_format *vertex_format,
+                        GLubyte size, GLenum16 type, GLenum16 format,
+                        GLboolean normalized, GLboolean integer,
+                        GLboolean doubles)
+{
+   assert(size <= 4);
+   vertex_format->Type = type;
+   vertex_format->Format = format;
+   vertex_format->Size = size;
+   vertex_format->Normalized = normalized;
+   vertex_format->Integer = integer;
+   vertex_format->Doubles = doubles;
+   vertex_format->_ElementSize = _mesa_bytes_per_vertex_attrib(size, type);
+   assert(vertex_format->_ElementSize <= 4*sizeof(double));
+}
+
+
 /**
  * Examine the API profile and extensions to determine which types are legal
  * for vertex arrays.  This is called once from update_array_format().
@@ -330,24 +349,15 @@
                           GLuint relativeOffset)
 {
    struct gl_array_attributes *const array = &vao->VertexAttrib[attrib];
-   GLint elementSize;
 
    assert(!vao->SharedAndImmutable);
    assert(size <= 4);
 
-   elementSize = _mesa_bytes_per_vertex_attrib(size, type);
-   assert(elementSize != -1);
-
-   array->Size = size;
-   array->Type = type;
-   array->Format = format;
-   array->Normalized = normalized;
-   array->Integer = integer;
-   array->Doubles = doubles;
    array->RelativeOffset = relativeOffset;
-   array->_ElementSize = elementSize;
+   _mesa_set_vertex_format(&array->Format, size, type, format,
+                           normalized, integer, doubles);
 
-   vao->NewArrays |= vao->_Enabled & VERT_BIT(attrib);
+   vao->NewArrays |= vao->Enabled & VERT_BIT(attrib);
    if (vao == ctx->Array.VAO)
       ctx->NewState |= _NEW_ARRAY;
 }
@@ -605,11 +615,12 @@
     * to the VAO. But but that is done already unconditionally in
     * _mesa_update_array_format called above.
     */
-   assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attrib));
+   assert((vao->NewArrays | ~vao->Enabled) & VERT_BIT(attrib));
    array->Ptr = ptr;
 
    /* Update the vertex buffer binding */
-   GLsizei effectiveStride = stride != 0 ? stride : array->_ElementSize;
+   GLsizei effectiveStride = stride != 0 ?
+      stride : array->Format._ElementSize;
    _mesa_bind_vertex_buffer(ctx, vao, attrib,
                             ctx->Array.ArrayBufferObj, (GLintptr) ptr,
                             effectiveStride);
@@ -1071,25 +1082,25 @@
 
 
 void
-_mesa_enable_vertex_array_attrib(struct gl_context *ctx,
-                                 struct gl_vertex_array_object *vao,
-                                 gl_vert_attrib attrib)
+_mesa_enable_vertex_array_attribs(struct gl_context *ctx,
+                                  struct gl_vertex_array_object *vao,
+                                  GLbitfield attrib_bits)
 {
-   assert(attrib < ARRAY_SIZE(vao->VertexAttrib));
+   assert((attrib_bits & ~VERT_BIT_ALL) == 0);
    assert(!vao->SharedAndImmutable);
 
-   if (!vao->VertexAttrib[attrib].Enabled) {
+   /* Only work on bits that are disabled */
+   attrib_bits &= ~vao->Enabled;
+   if (attrib_bits) {
       /* was disabled, now being enabled */
-      vao->VertexAttrib[attrib].Enabled = GL_TRUE;
-      const GLbitfield array_bit = VERT_BIT(attrib);
-      vao->_Enabled |= array_bit;
-      vao->NewArrays |= array_bit;
+      vao->Enabled |= attrib_bits;
+      vao->NewArrays |= attrib_bits;
 
       if (vao == ctx->Array.VAO)
          ctx->NewState |= _NEW_ARRAY;
 
       /* Update the map mode if needed */
-      if (array_bit & (VERT_BIT_POS|VERT_BIT_GENERIC0))
+      if (attrib_bits & (VERT_BIT_POS|VERT_BIT_GENERIC0))
          update_attribute_map_mode(ctx, vao);
    }
 }
@@ -1158,25 +1169,25 @@
 
 
 void
-_mesa_disable_vertex_array_attrib(struct gl_context *ctx,
-                                  struct gl_vertex_array_object *vao,
-                                  gl_vert_attrib attrib)
+_mesa_disable_vertex_array_attribs(struct gl_context *ctx,
+                                   struct gl_vertex_array_object *vao,
+                                   GLbitfield attrib_bits)
 {
-   assert(attrib < ARRAY_SIZE(vao->VertexAttrib));
+   assert((attrib_bits & ~VERT_BIT_ALL) == 0);
    assert(!vao->SharedAndImmutable);
 
-   if (vao->VertexAttrib[attrib].Enabled) {
+   /* Only work on bits that are enabled */
+   attrib_bits &= vao->Enabled;
+   if (attrib_bits) {
       /* was enabled, now being disabled */
-      vao->VertexAttrib[attrib].Enabled = GL_FALSE;
-      const GLbitfield array_bit = VERT_BIT(attrib);
-      vao->_Enabled &= ~array_bit;
-      vao->NewArrays |= array_bit;
+      vao->Enabled &= ~attrib_bits;
+      vao->NewArrays |= attrib_bits;
 
       if (vao == ctx->Array.VAO)
          ctx->NewState |= _NEW_ARRAY;
 
       /* Update the map mode if needed */
-      if (array_bit & (VERT_BIT_POS|VERT_BIT_GENERIC0))
+      if (attrib_bits & (VERT_BIT_POS|VERT_BIT_GENERIC0))
          update_attribute_map_mode(ctx, vao);
    }
 }
@@ -1267,27 +1278,27 @@
 
    switch (pname) {
    case GL_VERTEX_ATTRIB_ARRAY_ENABLED_ARB:
-      return array->Enabled;
+      return !!(vao->Enabled & VERT_BIT_GENERIC(index));
    case GL_VERTEX_ATTRIB_ARRAY_SIZE_ARB:
-      return (array->Format == GL_BGRA) ? GL_BGRA : array->Size;
+      return (array->Format.Format == GL_BGRA) ? GL_BGRA : array->Format.Size;
    case GL_VERTEX_ATTRIB_ARRAY_STRIDE_ARB:
       return array->Stride;
    case GL_VERTEX_ATTRIB_ARRAY_TYPE_ARB:
-      return array->Type;
+      return array->Format.Type;
    case GL_VERTEX_ATTRIB_ARRAY_NORMALIZED_ARB:
-      return array->Normalized;
+      return array->Format.Normalized;
    case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB:
       return vao->BufferBinding[array->BufferBindingIndex].BufferObj->Name;
    case GL_VERTEX_ATTRIB_ARRAY_INTEGER:
       if ((_mesa_is_desktop_gl(ctx)
            && (ctx->Version >= 30 || ctx->Extensions.EXT_gpu_shader4))
           || _mesa_is_gles3(ctx)) {
-         return array->Integer;
+         return array->Format.Integer;
       }
       goto error;
    case GL_VERTEX_ATTRIB_ARRAY_LONG:
       if (_mesa_is_desktop_gl(ctx)) {
-         return array->Doubles;
+         return array->Format.Doubles;
       }
       goto error;
    case GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ARB:
@@ -2771,18 +2782,11 @@
                                struct gl_array_attributes *dst,
                                const struct gl_array_attributes *src)
 {
-   dst->Size           = src->Size;
-   dst->Type           = src->Type;
-   dst->Format         = src->Format;
-   dst->BufferBindingIndex = src->BufferBindingIndex;
+   dst->Ptr            = src->Ptr;
    dst->RelativeOffset = src->RelativeOffset;
    dst->Format         = src->Format;
-   dst->Integer        = src->Integer;
-   dst->Doubles        = src->Doubles;
-   dst->Normalized     = src->Normalized;
-   dst->Ptr            = src->Ptr;
-   dst->Enabled        = src->Enabled;
-   dst->_ElementSize   = src->_ElementSize;
+   dst->Stride         = src->Stride;
+   dst->BufferBindingIndex = src->BufferBindingIndex;
    dst->_EffBufferBindingIndex = src->_EffBufferBindingIndex;
    dst->_EffRelativeOffset = src->_EffRelativeOffset;
 }
@@ -2812,11 +2816,10 @@
 
    fprintf(stderr, "Array Object %u\n", vao->Name);
 
-   gl_vert_attrib i;
-   for (i = 0; i < VERT_ATTRIB_MAX; ++i) {
+   GLbitfield mask = vao->Enabled;
+   while (mask) {
+      const gl_vert_attrib i = u_bit_scan(&mask);
       const struct gl_array_attributes *array = &vao->VertexAttrib[i];
-      if (!array->Enabled)
-         continue;
 
       const struct gl_vertex_buffer_binding *binding =
          &vao->BufferBinding[array->BufferBindingIndex];
@@ -2825,8 +2828,9 @@
       fprintf(stderr, "  %s: Ptr=%p, Type=%s, Size=%d, ElemSize=%u, "
               "Stride=%d, Buffer=%u(Size %lu)\n",
               gl_vert_attrib_name((gl_vert_attrib)i),
-              array->Ptr, _mesa_enum_to_string(array->Type), array->Size,
-              array->_ElementSize, binding->Stride, bo->Name,
+              array->Ptr, _mesa_enum_to_string(array->Format.Type),
+              array->Format.Size,
+              array->Format._ElementSize, binding->Stride, bo->Name,
               (unsigned long) bo->Size);
    }
 }
diff -Nru mesa-18.3.3/src/mesa/main/varray.h mesa-19.0.1/src/mesa/main/varray.h
--- mesa-18.3.3/src/mesa/main/varray.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/varray.h	2019-03-31 23:16:37.000000000 +0000
@@ -29,6 +29,12 @@
 
 #include "bufferobj.h"
 
+void
+_mesa_set_vertex_format(struct gl_vertex_format *vertex_format,
+                        GLubyte size, GLenum16 type, GLenum16 format,
+                        GLboolean normalized, GLboolean integer,
+                        GLboolean doubles);
+
 
 /**
  * Returns a pointer to the vertex attribute data in a client array,
@@ -62,15 +68,33 @@
                           GLuint relativeOffset);
 
 extern void
+_mesa_enable_vertex_array_attribs(struct gl_context *ctx,
+                                 struct gl_vertex_array_object *vao,
+                                 GLbitfield attrib_bits);
+
+static inline void
 _mesa_enable_vertex_array_attrib(struct gl_context *ctx,
                                  struct gl_vertex_array_object *vao,
-                                 gl_vert_attrib attrib);
+                                 gl_vert_attrib attrib)
+{
+   assert(attrib < VERT_ATTRIB_MAX);
+   _mesa_enable_vertex_array_attribs(ctx, vao, VERT_BIT(attrib));
+}
 
 
 extern void
+_mesa_disable_vertex_array_attribs(struct gl_context *ctx,
+                                   struct gl_vertex_array_object *vao,
+                                   GLbitfield attrib_bits);
+
+static inline void
 _mesa_disable_vertex_array_attrib(struct gl_context *ctx,
                                   struct gl_vertex_array_object *vao,
-                                  gl_vert_attrib attrib);
+                                  gl_vert_attrib attrib)
+{
+   assert(attrib < VERT_ATTRIB_MAX);
+   _mesa_disable_vertex_array_attribs(ctx, vao, VERT_BIT(attrib));
+}
 
 
 extern void
diff -Nru mesa-18.3.3/src/mesa/main/version.c mesa-19.0.1/src/mesa/main/version.c
--- mesa-18.3.3/src/mesa/main/version.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/main/version.c	2019-03-31 23:16:37.000000000 +0000
@@ -509,20 +509,24 @@
                          extensions->ARB_internalformat_query &&
                          extensions->ARB_map_buffer_range &&
                          extensions->ARB_shader_texture_lod &&
-                         extensions->ARB_texture_float &&
+                         extensions->OES_texture_float &&
+                         extensions->OES_texture_half_float &&
+                         extensions->OES_texture_half_float_linear &&
                          extensions->ARB_texture_rg &&
                          extensions->ARB_depth_buffer_float &&
-                         /* extensions->ARB_framebuffer_object && */
-                         extensions->EXT_framebuffer_sRGB &&
+                         extensions->ARB_framebuffer_object &&
+                         extensions->EXT_sRGB &&
                          extensions->EXT_packed_float &&
                          extensions->EXT_texture_array &&
                          extensions->EXT_texture_shared_exponent &&
+                         extensions->EXT_texture_sRGB &&
                          extensions->EXT_transform_feedback &&
                          extensions->ARB_draw_instanced &&
                          extensions->ARB_uniform_buffer_object &&
                          extensions->EXT_texture_snorm &&
                          extensions->NV_primitive_restart &&
-                         extensions->OES_depth_texture_cube_map);
+                         extensions->OES_depth_texture_cube_map &&
+                         extensions->EXT_texture_type_2_10_10_10_REV);
    const bool es31_compute_shader =
       consts->MaxComputeWorkGroupInvocations >= 128;
    const bool ver_3_1 = (ver_3_0 &&
diff -Nru mesa-18.3.3/src/mesa/program/prog_parameter.c mesa-19.0.1/src/mesa/program/prog_parameter.c
--- mesa-18.3.3/src/mesa/program/prog_parameter.c	2018-03-26 16:53:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/program/prog_parameter.c	2019-03-31 23:16:37.000000000 +0000
@@ -271,6 +271,7 @@
    p->Name = strdup(name ? name : "");
    p->Type = type;
    p->Size = size;
+   p->Padded = pad_and_align;
    p->DataType = datatype;
 
    paramList->ParameterValueOffset[oldNum] = oldValNum;
diff -Nru mesa-18.3.3/src/mesa/program/prog_parameter.h mesa-19.0.1/src/mesa/program/prog_parameter.h
--- mesa-18.3.3/src/mesa/program/prog_parameter.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/program/prog_parameter.h	2019-03-31 23:16:37.000000000 +0000
@@ -104,6 +104,12 @@
     * A sequence of STATE_* tokens and integers to identify GL state.
     */
    gl_state_index16 StateIndexes[STATE_LENGTH];
+
+   /**
+    * We need to keep track of whether the param is padded for use in the
+    * shader cache.
+    */
+   bool Padded;
 };
 
 
diff -Nru mesa-18.3.3/src/mesa/program/prog_to_nir.c mesa-19.0.1/src/mesa/program/prog_to_nir.c
--- mesa-18.3.3/src/mesa/program/prog_to_nir.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/program/prog_to_nir.c	2019-03-31 23:16:37.000000000 +0000
@@ -393,7 +393,7 @@
 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 {
    if (b->shader->options->native_integers) {
-      ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
+      ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1])));
    } else {
       ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
    }
@@ -406,7 +406,7 @@
 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
 {
    if (b->shader->options->native_integers) {
-      ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
+      ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1])));
    } else {
       ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
    }
@@ -983,7 +983,7 @@
    if (prog->Parameters->NumParameters > 0) {
       c->parameters = rzalloc(s, nir_variable);
       c->parameters->type =
-         glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
+         glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
       c->parameters->name = "parameters";
       c->parameters->data.read_only = true;
       c->parameters->data.mode = nir_var_uniform;
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom_array.c mesa-19.0.1/src/mesa/state_tracker/st_atom_array.c
--- mesa-18.3.3/src/mesa/state_tracker/st_atom_array.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_atom_array.c	2019-03-31 23:16:37.000000000 +0000
@@ -237,19 +237,19 @@
 /**
  * Return a PIPE_FORMAT_x for the given GL datatype and size.
  */
-enum pipe_format
-st_pipe_vertex_format(const struct gl_array_attributes *attrib)
+static enum pipe_format
+st_pipe_vertex_format(const struct gl_vertex_format *vformat)
 {
-   const GLubyte size = attrib->Size;
-   const GLenum16 format = attrib->Format;
-   const bool normalized = attrib->Normalized;
-   const bool integer = attrib->Integer;
-   GLenum16 type = attrib->Type;
+   const GLubyte size = vformat->Size;
+   const GLenum16 format = vformat->Format;
+   const bool normalized = vformat->Normalized;
+   const bool integer = vformat->Integer;
+   GLenum16 type = vformat->Type;
    unsigned index;
 
    assert(size >= 1 && size <= 4);
    assert(format == GL_RGBA || format == GL_BGRA);
-   assert(attrib->_ElementSize == _mesa_bytes_per_vertex_attrib(size, type));
+   assert(vformat->_ElementSize == _mesa_bytes_per_vertex_attrib(size, type));
 
    switch (type) {
    case GL_HALF_FLOAT_OES:
@@ -320,13 +320,13 @@
 
 static void init_velement_lowered(const struct st_vertex_program *vp,
                                   struct pipe_vertex_element *velements,
-                                  const struct gl_array_attributes *attrib,
+                                  const struct gl_vertex_format *vformat,
                                   int src_offset, int instance_divisor,
                                   int vbo_index, int idx)
 {
-   const GLubyte nr_components = attrib->Size;
+   const GLubyte nr_components = vformat->Size;
 
-   if (attrib->Doubles) {
+   if (vformat->Doubles) {
       int lower_format;
 
       if (nr_components < 2)
@@ -357,7 +357,7 @@
          }
       }
    } else {
-      const unsigned format = st_pipe_vertex_format(attrib);
+      const unsigned format = st_pipe_vertex_format(vformat);
 
       init_velement(&velements[idx], src_offset,
                     format, instance_divisor, vbo_index);
@@ -384,25 +384,17 @@
 }
 
 void
-st_update_array(struct st_context *st)
+st_setup_arrays(struct st_context *st,
+                const struct st_vertex_program *vp,
+                const struct st_vp_variant *vp_variant,
+                struct pipe_vertex_element *velements,
+                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
 {
    struct gl_context *ctx = st->ctx;
-   /* vertex program validation must be done before this */
-   const struct st_vertex_program *vp = st->vp;
-   /* _NEW_PROGRAM, ST_NEW_VS_STATE */
-   const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
    const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
+   const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
    const ubyte *input_to_index = vp->input_to_index;
 
-   struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   unsigned num_vbuffers = 0;
-
-   st->vertex_array_out_of_memory = FALSE;
-   st->draw_needs_minmax_index = false;
-
-   /* _NEW_PROGRAM */
-   /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
    /* Process attribute array data. */
    GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx);
    while (mask) {
@@ -410,7 +402,7 @@
       const gl_vert_attrib i = ffs(mask) - 1;
       const struct gl_vertex_buffer_binding *const binding
          = _mesa_draw_buffer_binding(vao, i);
-      const unsigned bufidx = num_vbuffers++;
+      const unsigned bufidx = (*num_vbuffers)++;
 
       if (_mesa_is_bufferobj(binding->BufferObj)) {
          struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
@@ -447,35 +439,47 @@
          const struct gl_array_attributes *const attrib
             = _mesa_draw_array_attrib(vao, attr);
          const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
-         init_velement_lowered(vp, velements, attrib, off,
+         init_velement_lowered(vp, velements, &attrib->Format, off,
                                binding->InstanceDivisor, bufidx,
                                input_to_index[attr]);
       }
    }
+}
+
+void
+st_setup_current(struct st_context *st,
+                 const struct st_vertex_program *vp,
+                 const struct st_vp_variant *vp_variant,
+                 struct pipe_vertex_element *velements,
+                 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
+{
+   struct gl_context *ctx = st->ctx;
+   const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
 
-   const unsigned first_current_vbuffer = num_vbuffers;
-   /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */
    /* Process values that should have better been uniforms in the application */
    GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
    if (curmask) {
+      /* vertex program validation must be done before this */
+      const struct st_vertex_program *vp = st->vp;
+      const ubyte *input_to_index = vp->input_to_index;
       /* For each attribute, upload the maximum possible size. */
       GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
       GLubyte *cursor = data;
-      const unsigned bufidx = num_vbuffers++;
+      const unsigned bufidx = (*num_vbuffers)++;
       unsigned max_alignment = 1;
 
       while (curmask) {
          const gl_vert_attrib attr = u_bit_scan(&curmask);
          const struct gl_array_attributes *const attrib
             = _mesa_draw_current_attrib(ctx, attr);
-         const unsigned size = attrib->_ElementSize;
+         const unsigned size = attrib->Format._ElementSize;
          const unsigned alignment = util_next_power_of_two(size);
          max_alignment = MAX2(max_alignment, alignment);
          memcpy(cursor, attrib->Ptr, size);
          if (alignment != size)
             memset(cursor + size, 0, alignment - size);
 
-         init_velement_lowered(vp, velements, attrib, cursor - data, 0,
+         init_velement_lowered(vp, velements, &attrib->Format, cursor - data, 0,
                                bufidx, input_to_index[attr]);
 
          cursor += alignment;
@@ -498,17 +502,79 @@
                     0, cursor - data, max_alignment, data,
                     &vbuffer[bufidx].buffer_offset,
                     &vbuffer[bufidx].buffer.resource);
+
+      if (!ctx->Const.AllowMappedBuffersDuringExecution &&
+          !st->can_bind_const_buffer_as_vertex) {
+         u_upload_unmap(st->pipe->stream_uploader);
+      }
    }
+}
+
+void
+st_setup_current_user(struct st_context *st,
+                      const struct st_vertex_program *vp,
+                      const struct st_vp_variant *vp_variant,
+                      struct pipe_vertex_element *velements,
+                      struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
+{
+   struct gl_context *ctx = st->ctx;
+   const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
+   const ubyte *input_to_index = vp->input_to_index;
 
-   if (!ctx->Const.AllowMappedBuffersDuringExecution) {
-      u_upload_unmap(st->pipe->stream_uploader);
+   /* Process values that should have better been uniforms in the application */
+   GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
+   /* For each attribute, make an own user buffer binding. */
+   while (curmask) {
+      const gl_vert_attrib attr = u_bit_scan(&curmask);
+      const struct gl_array_attributes *const attrib
+         = _mesa_draw_current_attrib(ctx, attr);
+      const unsigned bufidx = (*num_vbuffers)++;
+
+      init_velement_lowered(vp, velements, &attrib->Format, 0, 0,
+                            bufidx, input_to_index[attr]);
+
+      vbuffer[bufidx].is_user_buffer = true;
+      vbuffer[bufidx].buffer.user = attrib->Ptr;
+      vbuffer[bufidx].buffer_offset = 0;
+      vbuffer[bufidx].stride = 0;
    }
+}
 
-   const unsigned num_inputs = st->vp_variant->num_inputs;
-   set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs);
+void
+st_update_array(struct st_context *st)
+{
+   /* vertex program validation must be done before this */
+   /* _NEW_PROGRAM, ST_NEW_VS_STATE */
+   const struct st_vertex_program *vp = st->vp;
+   const struct st_vp_variant *vp_variant = st->vp_variant;
+
+   struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
+   unsigned num_vbuffers = 0, first_upload_vbuffer;
+   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
+   unsigned num_velements;
+
+   st->vertex_array_out_of_memory = FALSE;
+   st->draw_needs_minmax_index = false;
+
+   /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */
+   /* Setup arrays */
+   st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers);
+   if (st->vertex_array_out_of_memory)
+      return;
+
+   /* _NEW_CURRENT_ATTRIB */
+   /* Setup current uploads */
+   first_upload_vbuffer = num_vbuffers;
+   st_setup_current(st, vp, vp_variant, velements, vbuffer, &num_vbuffers);
+   if (st->vertex_array_out_of_memory)
+      return;
+
+   /* Set the array into cso */
+   num_velements = vp_variant->num_inputs;
+   set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_velements);
 
-   /* Unreference uploaded zero-stride vertex buffers. */
-   for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) {
+   /* Unreference uploaded buffer resources. */
+   for (unsigned i = first_upload_vbuffer; i < num_vbuffers; ++i) {
       pipe_resource_reference(&vbuffer[i].buffer.resource, NULL);
    }
 }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom_blend.c mesa-19.0.1/src/mesa/state_tracker/st_atom_blend.c
--- mesa-18.3.3/src/mesa/state_tracker/st_atom_blend.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_atom_blend.c	2019-03-31 23:16:37.000000000 +0000
@@ -41,6 +41,7 @@
 
 #include "framebuffer.h"
 #include "main/blend.h"
+#include "main/glformats.h"
 #include "main/macros.h"
 
 /**
@@ -126,8 +127,9 @@
  * Figure out if blend enables/state are different per rt.
  */
 static GLboolean
-blend_per_rt(const struct gl_context *ctx, unsigned num_cb)
+blend_per_rt(const struct st_context *st, unsigned num_cb)
 {
+   const struct gl_context *ctx = st->ctx;
    GLbitfield cb_mask = u_bit_consecutive(0, num_cb);
    GLbitfield blend_enabled = ctx->Color.BlendEnabled & cb_mask;
 
@@ -145,9 +147,49 @@
        * must be handled on a per buffer basis. */
       return GL_TRUE;
    }
+
+   if (st->needs_rgb_dst_alpha_override && ctx->DrawBuffer->_RGBBuffers) {
+      /* Overriding requires independent blend functions (not just enables),
+       * require drivers exposing PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND to
+       * also expose PIPE_CAP_INDEP_BLEND_FUNC.
+       */
+      assert(st->has_indep_blend_func);
+
+      /* If some of the buffers are RGB, we may need to override blend
+       * factors that reference destination-alpha to constants.  We may
+       * need different blend factor overrides per buffer (say one uses
+       * a DST_ALPHA factor and another uses INV_DST_ALPHA), so we flip
+       * on independent blending.  This may not be required in all cases,
+       * but burning the CPU to figure it out is probably not worthwhile.
+       */
+      return GL_TRUE;
+   }
+
    return GL_FALSE;
 }
 
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0.
+ * This is useful when emulating a GL RGB format with an RGBA pipe_format.
+ */
+static enum pipe_blendfactor
+fix_xrgb_alpha(enum pipe_blendfactor factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      return PIPE_BLENDFACTOR_ONE;
+
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      return PIPE_BLENDFACTOR_ZERO;
+   default:
+      return factor;
+   }
+}
+
 void
 st_update_blend( struct st_context *st )
 {
@@ -160,7 +202,7 @@
    memset(blend, 0, sizeof(*blend));
 
    if (num_cb > 1 &&
-       (blend_per_rt(ctx, num_cb) || colormask_per_rt(ctx, num_cb))) {
+       (blend_per_rt(st, num_cb) || colormask_per_rt(ctx, num_cb))) {
       num_state = num_cb;
       blend->independent_blend_enable = 1;
    }
@@ -216,6 +258,18 @@
             blend->rt[i].alpha_dst_factor =
                translate_blend(ctx->Color.Blend[j].DstA);
          }
+
+         const struct gl_renderbuffer *rb =
+            ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+         if (st->needs_rgb_dst_alpha_override && rb &&
+             (ctx->DrawBuffer->_RGBBuffers & (1 << i))) {
+            struct pipe_rt_blend_state *rt = &blend->rt[i];
+            rt->rgb_src_factor = fix_xrgb_alpha(rt->rgb_src_factor);
+            rt->rgb_dst_factor = fix_xrgb_alpha(rt->rgb_dst_factor);
+            rt->alpha_src_factor = fix_xrgb_alpha(rt->alpha_src_factor);
+            rt->alpha_dst_factor = fix_xrgb_alpha(rt->alpha_dst_factor);
+         }
       }
    }
    else {
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom.h mesa-19.0.1/src/mesa/state_tracker/st_atom.h
--- mesa-18.3.3/src/mesa/state_tracker/st_atom.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_atom.h	2019-03-31 23:16:37.000000000 +0000
@@ -37,6 +37,10 @@
 #include "main/glheader.h"
 
 struct st_context;
+struct st_vertex_program;
+struct st_vp_variant;
+struct pipe_vertex_buffer;
+struct pipe_vertex_element;
 
 /**
  * Enumeration of state tracker pipelines.
@@ -54,9 +58,26 @@
 void st_validate_state( struct st_context *st, enum st_pipeline pipeline );
 GLuint st_compare_func_to_pipe(GLenum func);
 
-enum pipe_format
-st_pipe_vertex_format(const struct gl_array_attributes *attrib);
-
+void
+st_setup_arrays(struct st_context *st,
+                const struct st_vertex_program *vp,
+                const struct st_vp_variant *vp_variant,
+                struct pipe_vertex_element *velements,
+                struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
+
+void
+st_setup_current(struct st_context *st,
+                 const struct st_vertex_program *vp,
+                 const struct st_vp_variant *vp_variant,
+                 struct pipe_vertex_element *velements,
+                 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
+
+void
+st_setup_current_user(struct st_context *st,
+                      const struct st_vertex_program *vp,
+                      const struct st_vp_variant *vp_variant,
+                      struct pipe_vertex_element *velements,
+                      struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers);
 
 /* Define ST_NEW_xxx_INDEX */
 enum {
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_bitmap.c mesa-19.0.1/src/mesa/state_tracker/st_cb_bitmap.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_bitmap.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_bitmap.c	2019-03-31 23:16:37.000000000 +0000
@@ -46,6 +46,7 @@
 #include "st_draw.h"
 #include "st_program.h"
 #include "st_cb_bitmap.h"
+#include "st_cb_drawpixels.h"
 #include "st_sampler_view.h"
 #include "st_texture.h"
 
@@ -53,7 +54,6 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_inlines.h"
-#include "util/u_simple_shaders.h"
 #include "util/u_upload_mgr.h"
 #include "program/prog_instruction.h"
 #include "cso_cache/cso_context.h"
@@ -214,7 +214,7 @@
    cso_set_fragment_shader_handle(cso, fpv->driver_shader);
 
    /* vertex shader state: position + texcoord pass-through */
-   cso_set_vertex_shader_handle(cso, st->bitmap.vs);
+   cso_set_vertex_shader_handle(cso, st->passthrough_vs);
 
    /* disable other shaders */
    cso_set_tessctrl_shader_handle(cso, NULL);
@@ -538,7 +538,7 @@
    struct pipe_screen *screen = pipe->screen;
 
    /* This function should only be called once */
-   assert(st->bitmap.vs == NULL);
+   assert(!st->bitmap.tex_format);
 
    assert(st->internal_target == PIPE_TEXTURE_2D ||
           st->internal_target == PIPE_TEXTURE_RECT);
@@ -585,17 +585,7 @@
    }
 
    /* Create the vertex shader */
-   {
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_COLOR,
-        st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indexes[] = { 0, 0, 0 };
-      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
-                                                          semantic_names,
-                                                          semantic_indexes,
-                                                          FALSE);
-   }
+   st_make_passthrough_vertex_shader(st);
 
    reset_cache(st);
 }
@@ -617,7 +607,7 @@
 
    st_invalidate_readpix_cache(st);
 
-   if (!st->bitmap.vs) {
+   if (!st->bitmap.tex_format) {
       init_bitmap_state(st);
    }
 
@@ -677,7 +667,7 @@
    struct pipe_vertex_buffer vb = {0};
    unsigned i;
 
-   if (!st->bitmap.vs) {
+   if (!st->bitmap.tex_format) {
       init_bitmap_state(st);
    }
 
@@ -807,11 +797,6 @@
    struct pipe_context *pipe = st->pipe;
    struct st_bitmap_cache *cache = &st->bitmap.cache;
 
-   if (st->bitmap.vs) {
-      cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
-      st->bitmap.vs = NULL;
-   }
-
    if (cache->trans && cache->buffer) {
       pipe_transfer_unmap(pipe, cache->trans);
    }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_clear.c mesa-19.0.1/src/mesa/state_tracker/st_cb_clear.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_clear.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_clear.c	2019-03-31 23:16:37.000000000 +0000
@@ -392,12 +392,18 @@
             if (!strb || !strb->surface)
                continue;
 
-            if (!GET_COLORMASK(ctx->Color.ColorMask, colormask_index))
+            unsigned colormask =
+               GET_COLORMASK(ctx->Color.ColorMask, colormask_index);
+
+            if (!colormask)
                continue;
 
+            unsigned surf_colormask =
+               util_format_colormask(util_format_description(strb->surface->format));
+
             if (is_scissor_enabled(ctx, rb) ||
                 is_window_rectangle_enabled(ctx) ||
-                GET_COLORMASK(ctx->Color.ColorMask, colormask_index) != 0xf)
+                ((colormask & surf_colormask) != surf_colormask))
                quad_buffers |= PIPE_CLEAR_COLOR0 << i;
             else
                clear_buffers |= PIPE_CLEAR_COLOR0 << i;
@@ -442,9 +448,6 @@
     * use pipe->clear. We want to always use pipe->clear for the other
     * renderbuffers, because it's likely to be faster.
     */
-   if (quad_buffers) {
-      clear_with_quad(ctx, quad_buffers);
-   }
    if (clear_buffers) {
       /* We can't translate the clear color to the colorbuffer format,
        * because different colorbuffers may have different formats.
@@ -453,6 +456,9 @@
                       (union pipe_color_union*)&ctx->Color.ClearColor,
                       ctx->Depth.Clear, ctx->Stencil.Clear);
    }
+   if (quad_buffers) {
+      clear_with_quad(ctx, quad_buffers);
+   }
    if (mask & BUFFER_BIT_ACCUM)
       _mesa_clear_accum_buffer(ctx);
 }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.c mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.c	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,7 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_math.h"
+#include "util/u_simple_shaders.h"
 #include "util/u_tile.h"
 #include "cso_cache/cso_context.h"
 
@@ -191,45 +192,23 @@
 
 /**
  * Create a simple vertex shader that just passes through the
- * vertex position and texcoord (and optionally, color).
+ * vertex position, texcoord, and color.
  */
-static void *
-make_passthrough_vertex_shader(struct st_context *st, 
-                               GLboolean passColor)
+void
+st_make_passthrough_vertex_shader(struct st_context *st)
 {
-   const enum tgsi_semantic texcoord_semantic = st->needs_texcoord_semantic ?
-      TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
-
-   if (!st->drawpix.vert_shaders[passColor]) {
-      struct ureg_program *ureg = ureg_create( PIPE_SHADER_VERTEX );
-
-      if (ureg == NULL)
-         return NULL;
-
-      /* MOV result.pos, vertex.pos; */
-      ureg_MOV(ureg,
-               ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
-               ureg_DECL_vs_input( ureg, 0 ));
-
-      if (passColor) {
-         /* MOV result.color0, vertex.attr[1]; */
-         ureg_MOV(ureg,
-                  ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
-                  ureg_DECL_vs_input( ureg, 1 ));
-      }
-
-      /* MOV result.texcoord0, vertex.attr[2]; */
-      ureg_MOV(ureg,
-               ureg_DECL_output( ureg, texcoord_semantic, 0 ),
-               ureg_DECL_vs_input( ureg, 2 ));
-
-      ureg_END( ureg );
-      
-      st->drawpix.vert_shaders[passColor] = 
-         ureg_create_shader_and_destroy( ureg, st->pipe );
-   }
+   if (st->passthrough_vs)
+      return;
 
-   return st->drawpix.vert_shaders[passColor];
+   const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                   TGSI_SEMANTIC_COLOR,
+     st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
+                                   TGSI_SEMANTIC_GENERIC };
+   const uint semantic_indexes[] = { 0, 0, 0 };
+
+   st->passthrough_vs =
+      util_make_vertex_passthrough_shader(st->pipe, 3, semantic_names,
+                                          semantic_indexes, false);
 }
 
 
@@ -1135,7 +1114,7 @@
               GLenum format, GLenum type,
               const struct gl_pixelstore_attrib *unpack, const void *pixels)
 {
-   void *driver_vp, *driver_fp;
+   void *driver_fp;
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
    GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
@@ -1185,19 +1164,19 @@
       return;
    }
 
+   st_make_passthrough_vertex_shader(st);
+
    /*
     * Get vertex/fragment shaders
     */
    if (write_depth || write_stencil) {
       driver_fp = get_drawpix_z_stencil_program(st, write_depth,
                                                 write_stencil);
-      driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
    }
    else {
       fpv = get_color_fp_variant(st);
 
       driver_fp = fpv->driver_shader;
-      driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
 
       if (ctx->Pixel.MapColorFlag) {
          pipe_sampler_view_reference(&sv[1],
@@ -1246,7 +1225,7 @@
                       ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
                       sv,
                       num_sampler_view,
-                      driver_vp,
+                      st->passthrough_vs,
                       driver_fp, fpv,
                       ctx->Current.RasterColor,
                       GL_FALSE, write_depth, write_stencil);
@@ -1506,7 +1485,7 @@
    struct pipe_context *pipe = st->pipe;
    struct pipe_screen *screen = pipe->screen;
    struct st_renderbuffer *rbRead;
-   void *driver_vp, *driver_fp;
+   void *driver_fp;
    struct pipe_resource *pt;
    struct pipe_sampler_view *sv[2] = { NULL };
    struct st_fp_variant *fpv = NULL;
@@ -1547,6 +1526,7 @@
     * are handled.
     */
 
+   st_make_passthrough_vertex_shader(st);
 
    /*
     * Get vertex/fragment shaders
@@ -1557,7 +1537,6 @@
       rbRead = st_get_color_read_renderbuffer(ctx);
 
       driver_fp = fpv->driver_shader;
-      driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
 
       if (ctx->Pixel.MapColorFlag) {
          pipe_sampler_view_reference(&sv[1],
@@ -1576,7 +1555,6 @@
                                Attachment[BUFFER_DEPTH].Renderbuffer);
 
       driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE);
-      driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
    }
 
    /* Choose the format for the temporary texture. */
@@ -1703,7 +1681,7 @@
                       width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
                       sv,
                       num_sampler_view,
-                      driver_vp, 
+                      st->passthrough_vs,
                       driver_fp, fpv,
                       ctx->Current.Attrib[VERT_ATTRIB_COLOR0],
                       invertTex, GL_FALSE, GL_FALSE);
@@ -1732,10 +1710,8 @@
                                     st->drawpix.zs_shaders[i]);
    }
 
-   if (st->drawpix.vert_shaders[0])
-      cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]);
-   if (st->drawpix.vert_shaders[1])
-      cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[1]);
+   if (st->passthrough_vs)
+      cso_delete_vertex_shader(st->cso_context, st->passthrough_vs);
 
    /* Free cache data */
    for (i = 0; i < ARRAY_SIZE(st->drawpix_cache.entries); i++) {
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.h mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.h
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.h	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.h	2019-03-31 23:16:37.000000000 +0000
@@ -47,4 +47,7 @@
                       unsigned drawpix_sampler, unsigned pixelmap_sampler,
                       unsigned texcoord_const, unsigned tex_target);
 
+extern void
+st_make_passthrough_vertex_shader(struct st_context *st);
+
 #endif /* ST_CB_DRAWPIXELS_H */
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_eglimage.c mesa-19.0.1/src/mesa/state_tracker/st_cb_eglimage.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_eglimage.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_eglimage.c	2019-03-31 23:16:37.000000000 +0000
@@ -159,20 +159,11 @@
       if (!ps)
          return;
 
-      strb->Base.Width = ps->width;
-      strb->Base.Height = ps->height;
       strb->Base.Format = st_pipe_format_to_mesa_format(ps->format);
       strb->Base._BaseFormat = st_pipe_format_to_base_format(ps->format);
       strb->Base.InternalFormat = strb->Base._BaseFormat;
 
-      struct pipe_surface **psurf =
-         util_format_is_srgb(ps->format) ? &strb->surface_srgb :
-                                           &strb->surface_linear;
-
-      pipe_surface_reference(psurf, ps);
-      strb->surface = *psurf;
-      pipe_resource_reference(&strb->texture, ps->texture);
-
+      st_set_ws_renderbuffer_surface(strb, ps);
       pipe_surface_reference(&ps, NULL);
    }
 }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.c mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.c	2019-03-31 23:16:37.000000000 +0000
@@ -139,7 +139,7 @@
    /* If an sRGB framebuffer is unsupported, sRGB formats behave like linear
     * formats.
     */
-   if (!ctx->Extensions.EXT_framebuffer_sRGB) {
+   if (!ctx->Extensions.EXT_sRGB) {
       internalFormat = _mesa_get_linear_internalformat(internalFormat);
    }
 
@@ -519,6 +519,7 @@
        surf->texture != resource ||
        surf->width != rtt_width ||
        surf->height != rtt_height ||
+       surf->nr_samples != strb->rtt_nr_samples ||
        surf->u.tex.level != level ||
        surf->u.tex.first_layer != first_layer ||
        surf->u.tex.last_layer != last_layer) {
@@ -526,6 +527,7 @@
       struct pipe_surface surf_tmpl;
       memset(&surf_tmpl, 0, sizeof(surf_tmpl));
       surf_tmpl.format = format;
+      surf_tmpl.nr_samples = strb->rtt_nr_samples;
       surf_tmpl.u.tex.level = level;
       surf_tmpl.u.tex.first_layer = first_layer;
       surf_tmpl.u.tex.last_layer = last_layer;
@@ -575,6 +577,7 @@
    strb->rtt_face = att->CubeMapFace;
    strb->rtt_slice = att->Zoffset;
    strb->rtt_layered = att->Layered;
+   strb->rtt_nr_samples = att->NumSamples;
    pipe_resource_reference(&strb->texture, pt);
 
    st_update_renderbuffer_surface(st, strb);
@@ -659,7 +662,7 @@
    /* If the encoding is sRGB and sRGB rendering cannot be enabled,
     * check for linear format support instead.
     * Later when we create a surface, we change the format to a linear one. */
-   if (!ctx->Extensions.EXT_framebuffer_sRGB &&
+   if (!ctx->Extensions.EXT_sRGB &&
        _mesa_get_format_color_encoding(texFormat) == GL_SRGB) {
       const mesa_format linearFormat = _mesa_get_srgb_format_linear(texFormat);
       format = st_mesa_format_to_pipe_format(st_context(ctx), linearFormat);
@@ -760,6 +763,30 @@
 
 
 /**
+ * Called by ctx->Driver.DiscardFramebuffer
+ */
+static void
+st_discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+                       struct gl_renderbuffer_attachment *att)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_resource *prsc;
+
+   if (!att->Renderbuffer)
+      return;
+
+   prsc = st_renderbuffer(att->Renderbuffer)->surface->texture;
+
+   /* using invalidate_resource will only work for simple 2D resources */
+   if (prsc->depth0 != 1 || prsc->array_size != 1 || prsc->last_level != 0)
+      return;
+
+   if (st->pipe->invalidate_resource)
+      st->pipe->invalidate_resource(st->pipe, prsc);
+}
+
+
+/**
  * Called via glDrawBuffer.  We only provide this driver function so that we
  * can check if we need to allocate a new renderbuffer.  Specifically, we
  * don't usually allocate a front color buffer when using a double-buffered
@@ -936,6 +963,7 @@
    functions->RenderTexture = st_render_texture;
    functions->FinishRenderTexture = st_finish_render_texture;
    functions->ValidateFramebuffer = st_validate_framebuffer;
+   functions->DiscardFramebuffer = st_discard_framebuffer;
 
    functions->DrawBufferAllocate = st_DrawBufferAllocate;
    functions->ReadBuffer = st_ReadBuffer;
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.h mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.h
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.h	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.h	2019-03-31 23:16:37.000000000 +0000
@@ -69,6 +69,7 @@
    boolean is_rtt; /**< whether Driver.RenderTexture was called */
    unsigned rtt_face, rtt_slice;
    boolean rtt_layered; /**< whether glFramebufferTexture was called */
+   unsigned rtt_nr_samples; /**< from FramebufferTexture2DMultisampleEXT */
 };
 
 
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_queryobj.c mesa-19.0.1/src/mesa/state_tracker/st_cb_queryobj.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_queryobj.c	2018-01-24 16:24:53.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_queryobj.c	2019-03-31 23:16:37.000000000 +0000
@@ -88,6 +88,45 @@
    free(stq);
 }
 
+static int
+target_to_index(const struct st_context *st, const struct gl_query_object *q)
+{
+   if (q->Target == GL_PRIMITIVES_GENERATED ||
+       q->Target == GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN ||
+       q->Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB)
+      return q->Stream;
+
+   if (st->has_single_pipe_stat) {
+      switch (q->Target) {
+      case GL_VERTICES_SUBMITTED_ARB:
+         return PIPE_STAT_QUERY_IA_VERTICES;
+      case GL_PRIMITIVES_SUBMITTED_ARB:
+         return PIPE_STAT_QUERY_IA_PRIMITIVES;
+      case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+         return PIPE_STAT_QUERY_VS_INVOCATIONS;
+      case GL_GEOMETRY_SHADER_INVOCATIONS:
+         return PIPE_STAT_QUERY_GS_INVOCATIONS;
+      case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+         return PIPE_STAT_QUERY_GS_PRIMITIVES;
+      case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+         return PIPE_STAT_QUERY_C_INVOCATIONS;
+      case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+         return PIPE_STAT_QUERY_C_PRIMITIVES;
+      case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+         return PIPE_STAT_QUERY_PS_INVOCATIONS;
+      case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+         return PIPE_STAT_QUERY_HS_INVOCATIONS;
+      case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+         return PIPE_STAT_QUERY_DS_INVOCATIONS;
+      case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+         return PIPE_STAT_QUERY_CS_INVOCATIONS;
+      default:
+         break;
+      }
+   }
+
+   return 0;
+}
 
 static void
 st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
@@ -140,7 +179,8 @@
    case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
    case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
    case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-      type = PIPE_QUERY_PIPELINE_STATISTICS;
+      type = st->has_single_pipe_stat ? PIPE_QUERY_PIPELINE_STATISTICS_SINGLE
+                                      : PIPE_QUERY_PIPELINE_STATISTICS;
       break;
    default:
       assert(0 && "unexpected query target in st_BeginQuery()");
@@ -164,7 +204,7 @@
          ret = pipe->end_query(pipe, stq->pq_begin);
    } else {
       if (!stq->pq) {
-         stq->pq = pipe->create_query(pipe, type, q->Stream);
+         stq->pq = pipe->create_query(pipe, type, target_to_index(st, q));
          stq->type = type;
       }
       if (stq->pq)
@@ -226,53 +266,55 @@
    if (!pipe->get_query_result(pipe, stq->pq, wait, &data))
       return FALSE;
 
-   switch (stq->base.Target) {
-   case GL_VERTICES_SUBMITTED_ARB:
-      stq->base.Result = data.pipeline_statistics.ia_vertices;
-      break;
-   case GL_PRIMITIVES_SUBMITTED_ARB:
-      stq->base.Result = data.pipeline_statistics.ia_primitives;
-      break;
-   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-      stq->base.Result = data.pipeline_statistics.vs_invocations;
-      break;
-   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-      stq->base.Result = data.pipeline_statistics.hs_invocations;
-      break;
-   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      stq->base.Result = data.pipeline_statistics.ds_invocations;
-      break;
-   case GL_GEOMETRY_SHADER_INVOCATIONS:
-      stq->base.Result = data.pipeline_statistics.gs_invocations;
-      break;
-   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-      stq->base.Result = data.pipeline_statistics.gs_primitives;
-      break;
-   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-      stq->base.Result = data.pipeline_statistics.ps_invocations;
-      break;
-   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-      stq->base.Result = data.pipeline_statistics.cs_invocations;
-      break;
-   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-      stq->base.Result = data.pipeline_statistics.c_invocations;
-      break;
-   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-      stq->base.Result = data.pipeline_statistics.c_primitives;
-      break;
-   default:
-      switch (stq->type) {
-      case PIPE_QUERY_OCCLUSION_PREDICATE:
-      case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
-      case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-      case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
-         stq->base.Result = !!data.b;
+   switch (stq->type) {
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+      switch (stq->base.Target) {
+      case GL_VERTICES_SUBMITTED_ARB:
+         stq->base.Result = data.pipeline_statistics.ia_vertices;
          break;
-      default:
-         stq->base.Result = data.u64;
+      case GL_PRIMITIVES_SUBMITTED_ARB:
+         stq->base.Result = data.pipeline_statistics.ia_primitives;
+         break;
+      case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+         stq->base.Result = data.pipeline_statistics.vs_invocations;
+         break;
+      case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+         stq->base.Result = data.pipeline_statistics.hs_invocations;
+         break;
+      case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+         stq->base.Result = data.pipeline_statistics.ds_invocations;
+         break;
+      case GL_GEOMETRY_SHADER_INVOCATIONS:
+         stq->base.Result = data.pipeline_statistics.gs_invocations;
+         break;
+      case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+         stq->base.Result = data.pipeline_statistics.gs_primitives;
+         break;
+      case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+         stq->base.Result = data.pipeline_statistics.ps_invocations;
+         break;
+      case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+         stq->base.Result = data.pipeline_statistics.cs_invocations;
          break;
+      case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+         stq->base.Result = data.pipeline_statistics.c_invocations;
+         break;
+      case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+         stq->base.Result = data.pipeline_statistics.c_primitives;
+         break;
+      default:
+         unreachable("invalid pipeline statistics counter");
       }
       break;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      stq->base.Result = !!data.b;
+      break;
+   default:
+      stq->base.Result = data.u64;
+      break;
    }
 
    if (stq->base.Target == GL_TIME_ELAPSED &&
@@ -386,37 +428,37 @@
    } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
       switch (q->Target) {
       case GL_VERTICES_SUBMITTED_ARB:
-         index = 0;
+         index = PIPE_STAT_QUERY_IA_VERTICES;
          break;
       case GL_PRIMITIVES_SUBMITTED_ARB:
-         index = 1;
+         index = PIPE_STAT_QUERY_IA_PRIMITIVES;
          break;
       case GL_VERTEX_SHADER_INVOCATIONS_ARB:
-         index = 2;
+         index = PIPE_STAT_QUERY_VS_INVOCATIONS;
          break;
       case GL_GEOMETRY_SHADER_INVOCATIONS:
-         index = 3;
+         index = PIPE_STAT_QUERY_GS_INVOCATIONS;
          break;
       case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
-         index = 4;
+         index = PIPE_STAT_QUERY_GS_PRIMITIVES;
          break;
       case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
-         index = 5;
+         index = PIPE_STAT_QUERY_C_INVOCATIONS;
          break;
       case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
-         index = 6;
+         index = PIPE_STAT_QUERY_C_PRIMITIVES;
          break;
       case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
-         index = 7;
+         index = PIPE_STAT_QUERY_PS_INVOCATIONS;
          break;
       case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
-         index = 8;
+         index = PIPE_STAT_QUERY_HS_INVOCATIONS;
          break;
       case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-         index = 9;
+         index = PIPE_STAT_QUERY_DS_INVOCATIONS;
          break;
       case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
-         index = 10;
+         index = PIPE_STAT_QUERY_CS_INVOCATIONS;
          break;
       default:
          unreachable("Unexpected target");
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_rasterpos.c mesa-19.0.1/src/mesa/state_tracker/st_cb_rasterpos.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_rasterpos.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_rasterpos.c	2019-03-31 23:16:37.000000000 +0000
@@ -208,6 +208,10 @@
    rs->prim.end = 1;
    rs->prim.start = 0;
    rs->prim.count = 1;
+   rs->prim.pad = 0;
+   rs->prim.num_instances = 1;
+   rs->prim.base_instance = 0;
+   rs->prim.is_indirect = 0;
 
    return rs;
 }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_texture.c mesa-19.0.1/src/mesa/state_tracker/st_cb_texture.c
--- mesa-18.3.3/src/mesa/state_tracker/st_cb_texture.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_cb_texture.c	2019-03-31 23:16:37.000000000 +0000
@@ -1192,7 +1192,6 @@
       return false;
 
    cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
-                        CSO_BIT_FRAGMENT_SAMPLERS |
                         CSO_BIT_VERTEX_ELEMENTS |
                         CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
                         CSO_BIT_FRAMEBUFFER |
@@ -1216,8 +1215,6 @@
    {
       struct pipe_sampler_view templ;
       struct pipe_sampler_view *sampler_view;
-      struct pipe_sampler_state sampler = {0};
-      const struct pipe_sampler_state *samplers[1] = {&sampler};
 
       memset(&templ, 0, sizeof(templ));
       templ.target = PIPE_BUFFER;
@@ -1237,8 +1234,6 @@
       cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
 
       pipe_sampler_view_reference(&sampler_view, NULL);
-
-      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
    }
 
    /* Framebuffer_state */
@@ -1248,11 +1243,9 @@
       fb.width = surface->width;
       fb.height = surface->height;
       fb.nr_cbufs = 1;
-      pipe_surface_reference(&fb.cbufs[0], surface);
+      fb.cbufs[0] = surface;
 
       cso_set_framebuffer(cso, &fb);
-
-      pipe_surface_reference(&fb.cbufs[0], NULL);
    }
 
    cso_set_viewport_dims(cso, surface->width, surface->height, FALSE);
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_context.c mesa-19.0.1/src/mesa/state_tracker/st_context.c
--- mesa-18.3.3/src/mesa/state_tracker/st_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -462,6 +462,12 @@
       screen->get_param(screen, PIPE_CAP_TGSI_PACK_HALF_FLOAT);
    st->has_multi_draw_indirect =
       screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT);
+   st->has_single_pipe_stat =
+      screen->get_param(screen, PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE);
+   st->has_indep_blend_func =
+      screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC);
+   st->needs_rgb_dst_alpha_override =
+      screen->get_param(screen, PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND);
 
    st->has_hw_atomics =
       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_context.h mesa-19.0.1/src/mesa/state_tracker/st_context.h
--- mesa-18.3.3/src/mesa/state_tracker/st_context.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_context.h	2019-03-31 23:16:37.000000000 +0000
@@ -127,6 +127,9 @@
    boolean has_shareable_shaders;
    boolean has_half_float_packing;
    boolean has_multi_draw_indirect;
+   boolean has_single_pipe_stat;
+   boolean has_indep_blend_func;
+   boolean needs_rgb_dst_alpha_override;
    boolean can_bind_const_buffer_as_vertex;
 
    /**
@@ -193,6 +196,8 @@
    /** This masks out unused shader resources. Only valid in draw calls. */
    uint64_t active_states;
 
+   unsigned pin_thread_counter; /* for L3 thread pinning on AMD Zen */
+
    /* If true, further analysis of states is required to know if something
     * has changed. Used mainly for shaders.
     */
@@ -222,14 +227,12 @@
       struct pipe_sampler_state sampler;
       struct pipe_sampler_state atlas_sampler;
       enum pipe_format tex_format;
-      void *vs;
       struct st_bitmap_cache cache;
    } bitmap;
 
    /** for glDraw/CopyPixels */
    struct {
       void *zs_shaders[4];
-      void *vert_shaders[2];   /**< ureg shaders */
    } drawpix;
 
    /** Cache of glDrawPixels images */
@@ -276,7 +279,8 @@
    /** for drawing with st_util_vertex */
    struct pipe_vertex_element util_velems[3];
 
-   void *passthrough_fs;  /**< simple pass-through frag shader */
+   /** passthrough vertex shader matching the util_velem attributes */
+   void *passthrough_vs;
 
    enum pipe_texture_target internal_target;
 
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_draw.c mesa-19.0.1/src/mesa/state_tracker/st_draw.c
--- mesa-18.3.3/src/mesa/state_tracker/st_draw.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,6 +58,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
+#include "util/u_cpu_detect.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 #include "util/u_prim.h"
@@ -66,6 +67,13 @@
 #include "draw/draw_context.h"
 #include "cso_cache/cso_context.h"
 
+#if defined(PIPE_OS_LINUX) && !defined(ANDROID)
+#include <sched.h>
+#define HAVE_SCHED_GETCPU 1
+#else
+#define sched_getcpu() 0
+#define HAVE_SCHED_GETCPU 0
+#endif
 
 /**
  * Set the restart index.
@@ -122,12 +130,38 @@
        st->gfx_shaders_may_be_dirty) {
       st_validate_state(st, ST_PIPELINE_RENDER);
    }
+
+   struct pipe_context *pipe = st->pipe;
+
+   /* Pin threads regularly to the same Zen CCX that the main thread is
+    * running on. The main thread can move between CCXs.
+    */
+   if (unlikely(HAVE_SCHED_GETCPU && /* Linux */
+                /* AMD Zen */
+                util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 &&
+                /* no glthread */
+                ctx->CurrentClientDispatch != ctx->MarshalExec &&
+                /* driver support */
+                pipe->set_context_param &&
+                /* do it occasionally */
+                ++st->pin_thread_counter % 512 == 0)) {
+      int cpu = sched_getcpu();
+      if (cpu >= 0) {
+         unsigned L3_cache = cpu / util_cpu_caps.cores_per_L3;
+
+         pipe->set_context_param(pipe,
+                                 PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
+                                 L3_cache);
+      }
+   }
 }
 
 /**
  * This function gets plugged into the VBO module and is called when
  * we have something to render.
  * Basically, translate the information into the format expected by gallium.
+ *
+ * Try to keep this logic in sync with st_feedback_draw_vbo.
  */
 static void
 st_draw_vbo(struct gl_context *ctx,
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_draw_feedback.c mesa-19.0.1/src/mesa/state_tracker/st_draw_feedback.c
--- mesa-18.3.3/src/mesa/state_tracker/st_draw_feedback.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_draw_feedback.c	2019-03-31 23:16:37.000000000 +0000
@@ -85,30 +85,9 @@
 
 
 /**
- * Helper for drawing current vertex arrays.
- */
-static void
-draw_arrays(struct draw_context *draw, unsigned mode,
-            unsigned start, unsigned count)
-{
-   struct pipe_draw_info info;
-
-   util_draw_init_info(&info);
-
-   info.mode = mode;
-   info.start = start;
-   info.count = count;
-   info.min_index = start;
-   info.max_index = start + count - 1;
-
-   draw_vbo(draw, &info);
-}
-
-
-/**
  * Called by VBO to draw arrays when in selection or feedback mode and
  * to implement glRasterPos.
- * This is very much like the normal draw_vbo() function above.
+ * This function mirrors the normal st_draw_vbo().
  * Look at code refactoring some day.
  */
 void
@@ -127,17 +106,27 @@
    struct pipe_context *pipe = st->pipe;
    struct draw_context *draw = st_get_draw_context(st);
    const struct st_vertex_program *vp;
+   struct st_vp_variant *vp_variant;
    const struct pipe_shader_state *vs;
    struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS];
+   unsigned num_vbuffers = 0;
    struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {NULL};
    struct pipe_transfer *ib_transfer = NULL;
-   GLuint attr, i;
+   GLuint i;
    const void *mapped_indices = NULL;
+   struct pipe_draw_info info;
 
    if (!draw)
       return;
 
+   /* Initialize pipe_draw_info. */
+   info.primitive_restart = false;
+   info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices;
+   info.indirect = NULL;
+   info.count_from_stream_output = NULL;
+   info.restart_index = 0;
+
    st_flush_bitmap_cache(st);
    st_invalidate_readpix_cache(st);
 
@@ -148,10 +137,11 @@
 
    /* must get these after state validation! */
    vp = st->vp;
-   vs = &st->vp_variant->tgsi;
+   vp_variant = st->vp_variant;
+   vs = &vp_variant->tgsi;
 
-   if (!st->vp_variant->draw_shader) {
-      st->vp_variant->draw_shader = draw_create_vertex_shader(draw, vs);
+   if (!vp_variant->draw_shader) {
+      vp_variant->draw_shader = draw_create_vertex_shader(draw, vs);
    }
 
    /*
@@ -164,64 +154,30 @@
    draw_set_viewport_states(draw, 0, 1, &st->state.viewport[0]);
    draw_set_clip_state(draw, &st->state.clip);
    draw_set_rasterizer_state(draw, &st->state.rasterizer, NULL);
-   draw_bind_vertex_shader(draw, st->vp_variant->draw_shader);
+   draw_bind_vertex_shader(draw, vp_variant->draw_shader);
    set_feedback_vertex_format(ctx);
 
-   /* loop over TGSI shader inputs to determine vertex buffer
-    * and attribute info
-    */
-   for (attr = 0; attr < vp->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_vertex_buffer_binding *binding;
-      const struct gl_array_attributes *attrib;
-      void *map;
-
-      _mesa_draw_attrib_and_binding(ctx, mesaAttr, &attrib, &binding);
-
-      if (_mesa_is_bufferobj(binding->BufferObj)) {
-         /* Attribute data is in a VBO. */
-         struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
-         assert(stobj->buffer);
-
-         vbuffers[attr].buffer.resource = NULL;
-         vbuffers[attr].is_user_buffer = false;
-         pipe_resource_reference(&vbuffers[attr].buffer.resource, stobj->buffer);
-         vbuffers[attr].buffer_offset = _mesa_draw_binding_offset(binding);
-         velements[attr].src_offset =
-            _mesa_draw_attributes_relative_offset(attrib);
-
-         /* map the attrib buffer */
-         map = pipe_buffer_map(pipe, vbuffers[attr].buffer.resource,
-                               PIPE_TRANSFER_READ,
-                               &vb_transfer[attr]);
-         draw_set_mapped_vertex_buffer(draw, attr, map,
-                                       vbuffers[attr].buffer.resource->width0);
-      }
-      else {
-         /* Attribute data is in a user space array. */
-         vbuffers[attr].buffer.user = attrib->Ptr;
-         vbuffers[attr].is_user_buffer = true;
-         vbuffers[attr].buffer_offset = 0;
-         velements[attr].src_offset = 0;
-
-         draw_set_mapped_vertex_buffer(draw, attr,
-                                       vbuffers[attr].buffer.user, ~0);
+   /* Must setup these after state validation! */
+   /* Setup arrays */
+   st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);
+   /* Setup current values as userspace arrays */
+   st_setup_current_user(st, vp, vp_variant, velements, vbuffers, &num_vbuffers);
+
+   /* Map all buffers and tell draw about their mapping */
+   for (unsigned buf = 0; buf < num_vbuffers; ++buf) {
+      struct pipe_vertex_buffer *vbuffer = &vbuffers[buf];
+
+      if (vbuffer->is_user_buffer) {
+         draw_set_mapped_vertex_buffer(draw, buf, vbuffer->buffer.user, ~0);
+      } else {
+         void *map = pipe_buffer_map(pipe, vbuffer->buffer.resource,
+                                     PIPE_TRANSFER_READ, &vb_transfer[buf]);
+         draw_set_mapped_vertex_buffer(draw, buf, map,
+                                       vbuffer->buffer.resource->width0);
       }
-
-      /* common-case setup */
-      vbuffers[attr].stride = binding->Stride; /* in bytes */
-      velements[attr].instance_divisor = 0;
-      velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format = st_pipe_vertex_format(attrib);
-      assert(velements[attr].src_format);
-
-      /* tell draw about this attribute */
-#if 0
-      draw_set_vertex_buffer(draw, attr, &vbuffer[attr]);
-#endif
    }
 
-   draw_set_vertex_buffers(draw, 0, vp->num_inputs, vbuffers);
+   draw_set_vertex_buffers(draw, 0, num_vbuffers, vbuffers);
    draw_set_vertex_elements(draw, vp->num_inputs, velements);
 
    unsigned start = 0;
@@ -244,9 +200,23 @@
          mapped_indices = ib->ptr;
       }
 
+      info.index_size = ib->index_size;
+      info.min_index = min_index;
+      info.max_index = max_index;
+      info.has_user_indices = true;
+      info.index.user = mapped_indices;
+
       draw_set_indexes(draw,
                        (ubyte *) mapped_indices,
                        index_size, ~0);
+
+      if (ctx->Array._PrimitiveRestart) {
+         info.primitive_restart = true;
+         info.restart_index = _mesa_primitive_restart_index(ctx, info.index_size);
+      }
+   } else {
+      info.index_size = 0;
+      info.has_user_indices = false;
    }
 
    /* set the constant buffer */
@@ -257,7 +227,23 @@
 
    /* draw here */
    for (i = 0; i < nr_prims; i++) {
-      draw_arrays(draw, prims[i].mode, start + prims[i].start, prims[i].count);
+      info.count = prims[i].count;
+
+      if (!info.count)
+         continue;
+
+      info.mode = prims[i].mode;
+      info.start = start + prims[i].start;
+      info.start_instance = prims[i].base_instance;
+      info.instance_count = prims[i].num_instances;
+      info.index_bias = prims[i].basevertex;
+      info.drawid = prims[i].draw_id;
+      if (!ib) {
+         info.min_index = info.start;
+         info.max_index = info.start + info.count - 1;
+      }
+
+      draw_vbo(draw, &info);
    }
 
 
@@ -271,11 +257,10 @@
    }
 
  out_unref_vertex:
-   for (attr = 0; attr < vp->num_inputs; attr++) {
-      if (vb_transfer[attr])
-         pipe_buffer_unmap(pipe, vb_transfer[attr]);
-      draw_set_mapped_vertex_buffer(draw, attr, NULL, 0);
-      pipe_vertex_buffer_unreference(&vbuffers[attr]);
+   for (unsigned buf = 0; buf < num_vbuffers; ++buf) {
+      if (vb_transfer[buf])
+         pipe_buffer_unmap(pipe, vb_transfer[buf]);
+      draw_set_mapped_vertex_buffer(draw, buf, NULL, 0);
    }
-   draw_set_vertex_buffers(draw, 0, vp->num_inputs, NULL);
+   draw_set_vertex_buffers(draw, 0, num_vbuffers, NULL);
 }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_extensions.c mesa-19.0.1/src/mesa/state_tracker/st_extensions.c
--- mesa-18.3.3/src/mesa/state_tracker/st_extensions.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_extensions.c	2019-03-31 23:16:37.000000000 +0000
@@ -183,7 +183,8 @@
             continue;
          supported_irs =
             screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS);
-         if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI)))
+         if (!(supported_irs & ((1 << PIPE_SHADER_IR_TGSI) |
+                                (1 << PIPE_SHADER_IR_NIR))))
             continue;
       }
 
@@ -222,8 +223,13 @@
       pc->MaxUniformComponents = MIN2(pc->MaxUniformComponents,
                                       MAX_UNIFORMS * 4);
 
+      /* For ARB programs, prog_src_register::Index is a signed 13-bit number.
+       * This gives us a limit of 4096 values - but we may need to generate
+       * internal values in addition to what the source program uses.  So, we
+       * drop the limit one step lower, to 2048, to be safe.
+       */
       pc->MaxParameters =
-      pc->MaxNativeParameters = pc->MaxUniformComponents / 4;
+      pc->MaxNativeParameters = MIN2(pc->MaxUniformComponents / 4, 2048);
       pc->MaxInputComponents =
          screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS) * 4;
       pc->MaxOutputComponents =
@@ -333,6 +339,8 @@
 
    c->GLSLOptimizeConservatively =
       screen->get_param(screen, PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY);
+   c->GLSLTessLevelsAsInputs =
+      screen->get_param(screen, PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS);
    c->LowerTessLevel = true;
    c->LowerCsDerivedVariables = true;
    c->PrimitiveRestartForPatches =
@@ -359,10 +367,7 @@
    c->Program[MESA_SHADER_VERTEX].MaxAttribs =
       MIN2(c->Program[MESA_SHADER_VERTEX].MaxAttribs, 16);
 
-   /* PIPE_SHADER_CAP_MAX_INPUTS for the FS specifies the maximum number
-    * of inputs. It's always 2 colors + N generic inputs. */
-   c->MaxVarying = screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
-                                            PIPE_SHADER_CAP_MAX_INPUTS);
+   c->MaxVarying = screen->get_param(screen, PIPE_CAP_MAX_VARYINGS);
    c->MaxVarying = MIN2(c->MaxVarying, MAX_VARYING);
    c->MaxGeometryOutputVertices =
       screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES);
@@ -703,6 +708,7 @@
       { o(ARB_occlusion_query),              PIPE_CAP_OCCLUSION_QUERY                  },
       { o(ARB_occlusion_query2),             PIPE_CAP_OCCLUSION_QUERY                  },
       { o(ARB_pipeline_statistics_query),    PIPE_CAP_QUERY_PIPELINE_STATISTICS        },
+      { o(ARB_pipeline_statistics_query),    PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE },
       { o(ARB_point_sprite),                 PIPE_CAP_POINT_SPRITE                     },
       { o(ARB_polygon_offset_clamp),         PIPE_CAP_POLYGON_OFFSET_CLAMP             },
       { o(ARB_post_depth_coverage),          PIPE_CAP_POST_DEPTH_COVERAGE              },
@@ -740,6 +746,7 @@
       { o(EXT_draw_buffers2),                PIPE_CAP_INDEP_BLEND_ENABLE               },
       { o(EXT_memory_object),                PIPE_CAP_MEMOBJ                           },
       { o(EXT_memory_object_fd),             PIPE_CAP_MEMOBJ                           },
+      { o(EXT_multisampled_render_to_texture), PIPE_CAP_SURFACE_SAMPLE_COUNT           },
       { o(EXT_semaphore),                    PIPE_CAP_FENCE_SIGNAL                     },
       { o(EXT_semaphore_fd),                 PIPE_CAP_FENCE_SIGNAL                     },
       { o(EXT_texture_array),                PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS         },
@@ -759,6 +766,7 @@
       { o(NV_conditional_render),            PIPE_CAP_CONDITIONAL_RENDER               },
       { o(NV_fill_rectangle),                PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE      },
       { o(NV_primitive_restart),             PIPE_CAP_PRIMITIVE_RESTART                },
+      { o(NV_shader_atomic_float),           PIPE_CAP_TGSI_ATOMFADD                    },
       { o(NV_texture_barrier),               PIPE_CAP_TEXTURE_BARRIER                  },
       { o(NVX_gpu_memory_info),              PIPE_CAP_QUERY_MEMORY_INFO                },
       /* GL_NV_point_sprite is not supported by gallium because we don't
@@ -767,14 +775,11 @@
       { o(OES_standard_derivatives),         PIPE_CAP_SM3                              },
       { o(OES_texture_float_linear),         PIPE_CAP_TEXTURE_FLOAT_LINEAR             },
       { o(OES_texture_half_float_linear),    PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR        },
+      { o(OES_texture_view),                 PIPE_CAP_SAMPLER_VIEW_TARGET              },
    };
 
    /* Required: render target and sampler support */
    static const struct st_extension_format_mapping rendertarget_mapping[] = {
-      { { o(ARB_texture_float) },
-        { PIPE_FORMAT_R32G32B32A32_FLOAT,
-          PIPE_FORMAT_R16G16B16A16_FLOAT } },
-
       { { o(OES_texture_float) },
         { PIPE_FORMAT_R32G32B32A32_FLOAT } },
 
@@ -786,7 +791,7 @@
           PIPE_FORMAT_B10G10R10A2_UINT },
          GL_TRUE }, /* at least one format must be supported */
 
-      { { o(EXT_framebuffer_sRGB) },
+      { { o(EXT_sRGB) },
         { PIPE_FORMAT_A8B8G8R8_SRGB,
           PIPE_FORMAT_B8G8R8A8_SRGB,
           PIPE_FORMAT_R8G8B8A8_SRGB },
@@ -802,6 +807,14 @@
       { { o(ARB_texture_rg) },
         { PIPE_FORMAT_R8_UNORM,
           PIPE_FORMAT_R8G8_UNORM } },
+
+      { { o(EXT_render_snorm) },
+        { PIPE_FORMAT_R8_SNORM,
+          PIPE_FORMAT_R8G8_SNORM,
+          PIPE_FORMAT_R8G8B8A8_SNORM,
+          PIPE_FORMAT_R16_SNORM,
+          PIPE_FORMAT_R16G16_SNORM,
+          PIPE_FORMAT_R16G16B16A16_SNORM } },
    };
 
    /* Required: depth stencil and sampler support */
@@ -889,6 +902,10 @@
 	  PIPE_FORMAT_R8G8B8A8_SRGB},
         GL_TRUE }, /* at least one format must be supported */
 
+      { { o(EXT_texture_sRGB_R8) },
+        { PIPE_FORMAT_R8_SRGB },
+        GL_TRUE },
+
       { { o(EXT_texture_type_2_10_10_10_REV) },
         { PIPE_FORMAT_R10G10B10A2_UNORM,
           PIPE_FORMAT_B10G10R10A2_UNORM },
@@ -1312,6 +1329,10 @@
       extensions->ARB_texture_buffer_object_rgb32 &&
       extensions->ARB_shader_image_load_store;
 
+   extensions->EXT_framebuffer_sRGB =
+         screen->get_param(screen, PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) &&
+         extensions->EXT_sRGB;
+
    /* Unpacking a varying in the fragment shader costs 1 texture indirection.
     * If the number of available texture indirections is very limited, then we
     * prefer to disable varying packing rather than run the risk of varying
@@ -1406,18 +1427,22 @@
       int compute_supported_irs =
          screen->get_shader_param(screen, PIPE_SHADER_COMPUTE,
                                   PIPE_SHADER_CAP_SUPPORTED_IRS);
-      if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
+      if (compute_supported_irs & ((1 << PIPE_SHADER_IR_TGSI) |
+                                   (1 << PIPE_SHADER_IR_NIR))) {
+         enum pipe_shader_ir ir =
+            (compute_supported_irs & PIPE_SHADER_IR_NIR) ?
+            PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
          uint64_t grid_size[3], block_size[3];
          uint64_t max_local_size, max_threads_per_block;
 
-         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+         screen->get_compute_param(screen, ir,
                                    PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
-         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+         screen->get_compute_param(screen, ir,
                                    PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
-         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+         screen->get_compute_param(screen, ir,
                                    PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
                                    &max_threads_per_block);
-         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+         screen->get_compute_param(screen, ir,
                                    PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
                                    &max_local_size);
 
@@ -1436,7 +1461,7 @@
          if (extensions->ARB_compute_shader) {
             uint64_t max_variable_threads_per_block = 0;
 
-            screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+            screen->get_compute_param(screen, ir,
                                       PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK,
                                       &max_variable_threads_per_block);
 
@@ -1457,6 +1482,10 @@
       }
    }
 
+   extensions->ARB_texture_float =
+      extensions->OES_texture_half_float &&
+      extensions->OES_texture_float;
+
    if (extensions->EXT_texture_filter_anisotropic &&
        screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY) >= 16.0)
       extensions->ARB_texture_filter_anisotropic = GL_TRUE;
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_format.c mesa-19.0.1/src/mesa/state_tracker/st_format.c
--- mesa-18.3.3/src/mesa/state_tracker/st_format.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_format.c	2019-03-31 23:16:37.000000000 +0000
@@ -169,6 +169,8 @@
       return PIPE_FORMAT_AL88_SRGB;
    case MESA_FORMAT_L_SRGB8:
       return PIPE_FORMAT_L8_SRGB;
+   case MESA_FORMAT_R_SRGB8:
+      return PIPE_FORMAT_R8_SRGB;
    case MESA_FORMAT_BGR_SRGB8:
       return PIPE_FORMAT_R8G8B8_SRGB;
    case MESA_FORMAT_A8B8G8R8_SRGB:
@@ -719,6 +721,8 @@
       return MESA_FORMAT_A8L8_SRGB;
    case PIPE_FORMAT_L8_SRGB:
       return MESA_FORMAT_L_SRGB8;
+   case PIPE_FORMAT_R8_SRGB:
+      return MESA_FORMAT_R_SRGB8;
    case PIPE_FORMAT_R8G8B8_SRGB:
       return MESA_FORMAT_BGR_SRGB8;
    case PIPE_FORMAT_ABGR8888_SRGB:
@@ -1423,6 +1427,10 @@
         0 },
       { PIPE_FORMAT_L8_SRGB, DEFAULT_SRGBA_FORMATS }
    },
+   {
+      { GL_SR8_EXT, 0 },
+      { PIPE_FORMAT_R8_SRGB, 0 }
+   },
 
    /* 16-bit float formats */
    {
@@ -2348,6 +2356,8 @@
       bindings |= PIPE_BIND_DEPTH_STENCIL;
    else if (is_renderbuffer || internalFormat == 3 || internalFormat == 4 ||
             internalFormat == GL_RGB || internalFormat == GL_RGBA ||
+            internalFormat == GL_RGBA2 ||
+            internalFormat == GL_RGB4 || internalFormat == GL_RGBA4 ||
             internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
             internalFormat == GL_BGRA ||
             internalFormat == GL_RGB16F ||
@@ -2449,7 +2459,7 @@
    /* If an sRGB framebuffer is unsupported, sRGB formats behave like linear
     * formats.
     */
-   if (!ctx->Extensions.EXT_framebuffer_sRGB) {
+   if (!ctx->Extensions.EXT_sRGB) {
       internalFormat = _mesa_get_linear_internalformat(internalFormat);
    }
 
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_nir.cpp mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_nir.cpp
--- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_nir.cpp	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_nir.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -81,7 +81,7 @@
  * on varying-slot w/ the VS outputs)
  */
 static void
-st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
+st_nir_assign_vs_in_locations(nir_shader *nir)
 {
    nir->num_inputs = 0;
    nir_foreach_variable_safe(var, &nir->inputs) {
@@ -103,7 +103,7 @@
           * set.
           */
          exec_node_remove(&var->node);
-         var->data.mode = nir_var_global;
+         var->data.mode = nir_var_shader_temp;
          exec_list_push_tail(&nir->globals, &var->node);
       }
    }
@@ -240,7 +240,6 @@
 static void
 st_nir_assign_uniform_locations(struct gl_context *ctx,
                                 struct gl_program *prog,
-                                struct gl_shader_program *shader_program,
                                 struct exec_list *uniform_list, unsigned *size)
 {
    int max = 0;
@@ -254,8 +253,7 @@
        * UBO's have their own address spaces, so don't count them towards the
        * number of global uniforms
        */
-      if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
-          uniform->interface_type != NULL)
+      if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo)
          continue;
 
       const struct glsl_type *type = glsl_without_array(uniform->type);
@@ -329,7 +327,7 @@
       NIR_PASS(progress, nir, nir_opt_if);
       NIR_PASS(progress, nir, nir_opt_dead_cf);
       NIR_PASS(progress, nir, nir_opt_cse);
-      NIR_PASS(progress, nir, nir_opt_peephole_select, 8);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true);
 
       NIR_PASS(progress, nir, nir_opt_algebraic);
       NIR_PASS(progress, nir, nir_opt_constant_folding);
@@ -587,8 +585,19 @@
 static void
 st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
 {
+   if (scalar) {
+      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
+      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
+   }
+
    nir_lower_io_arrays_to_elements(*producer, *consumer);
 
+   st_nir_opts(*producer, scalar);
+   st_nir_opts(*consumer, scalar);
+
+   if (nir_link_opt_varyings(*producer, *consumer))
+      st_nir_opts(*consumer, scalar);
+
    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 
@@ -607,7 +616,7 @@
        * See the following thread for more details of the problem:
        * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
        */
-      nir_variable_mode indirect_mask = nir_var_local;
+      nir_variable_mode indirect_mask = nir_var_function_temp;
 
       NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
       NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
@@ -617,8 +626,63 @@
    }
 }
 
+static void
+st_lower_patch_vertices_in(struct gl_shader_program *shader_prog)
+{
+   struct gl_linked_shader *linked_tcs =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+   struct gl_linked_shader *linked_tes =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+
+   /* If we have a TCS and TES linked together, lower TES patch vertices. */
+   if (linked_tcs && linked_tes) {
+      nir_shader *tcs_nir = linked_tcs->Program->nir;
+      nir_shader *tes_nir = linked_tes->Program->nir;
+
+      /* The TES input vertex count is the TCS output vertex count,
+       * lower TES gl_PatchVerticesIn to a constant.
+       */
+      uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out;
+      NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL);
+   }
+}
+
 extern "C" {
 
+void
+st_nir_lower_wpos_ytransform(struct nir_shader *nir,
+                             struct gl_program *prog,
+                             struct pipe_screen *pscreen)
+{
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
+      STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
+   };
+   nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
+
+   memcpy(wpos_options.state_tokens, wposTransformState,
+          sizeof(wpos_options.state_tokens));
+   wpos_options.fs_coord_origin_upper_left =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
+   wpos_options.fs_coord_origin_lower_left =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+   wpos_options.fs_coord_pixel_center_integer =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+   wpos_options.fs_coord_pixel_center_half_integer =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
+
+   if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
+      nir_validate_shader(nir, "after nir_lower_wpos_ytransform");
+      _mesa_add_state_reference(prog->Parameters, wposTransformState);
+   }
+}
+
 bool
 st_link_nir(struct gl_context *ctx,
             struct gl_shader_program *shader_program)
@@ -627,49 +691,23 @@
    struct pipe_screen *screen = st->pipe->screen;
    bool is_scalar[MESA_SHADER_STAGES];
 
-   /* Determine scalar property of each shader stage */
+   unsigned last_stage = 0;
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-      enum pipe_shader_type type;
-
       if (shader == NULL)
          continue;
 
-      type = pipe_shader_type_from_mesa(shader->Stage);
-      is_scalar[i] = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
-   }
-
-   /* Determine first and last stage. */
-   unsigned first = MESA_SHADER_STAGES;
-   unsigned last = 0;
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (!shader_program->_LinkedShaders[i])
-         continue;
-      if (first == MESA_SHADER_STAGES)
-         first = i;
-      last = i;
-   }
-
-   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
-      if (shader == NULL)
-         continue;
+      /* Determine scalar property of each shader stage */
+      enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
+      is_scalar[i] = screen->get_shader_param(screen, type,
+                                              PIPE_SHADER_CAP_SCALAR_ISA);
 
       st_nir_get_mesa_program(ctx, shader_program, shader);
+      last_stage = i;
 
-      nir_variable_mode mask = (nir_variable_mode) 0;
-      if (i != first)
-         mask = (nir_variable_mode)(mask | nir_var_shader_in);
-
-      if (i != last)
-         mask = (nir_variable_mode)(mask | nir_var_shader_out);
-
-      nir_shader *nir = shader->Program->nir;
-
-      if (is_scalar[i])
-         NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
-
-      st_nir_opts(nir, is_scalar[i]);
+      if (is_scalar[i]) {
+         NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
+      }
    }
 
    /* Linking the stages in the opposite order (from fragment to vertex)
@@ -677,7 +715,7 @@
     * are eliminated if they are (transitively) not used in a later
     * stage.
     */
-   int next = last;
+   int next = last_stage;
    for (int i = next - 1; i >= 0; i--) {
       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
       if (shader == NULL)
@@ -697,35 +735,8 @@
 
       nir_shader *nir = shader->Program->nir;
 
-      /* fragment shaders may need : */
-      if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-         static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
-            STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
-         };
-         nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
-         struct pipe_screen *pscreen = st->pipe->screen;
-
-         memcpy(wpos_options.state_tokens, wposTransformState,
-                sizeof(wpos_options.state_tokens));
-         wpos_options.fs_coord_origin_upper_left =
-            pscreen->get_param(pscreen,
-                               PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
-         wpos_options.fs_coord_origin_lower_left =
-            pscreen->get_param(pscreen,
-                               PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
-         wpos_options.fs_coord_pixel_center_integer =
-            pscreen->get_param(pscreen,
-                               PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
-         wpos_options.fs_coord_pixel_center_half_integer =
-            pscreen->get_param(pscreen,
-                               PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
-
-         if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
-            nir_validate_shader(nir, "after nir_lower_wpos_ytransform");
-            _mesa_add_state_reference(shader->Program->Parameters,
-                                      wposTransformState);
-         }
-      }
+      NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program,
+                 st->pipe->screen);
 
       NIR_PASS_V(nir, nir_lower_system_values);
 
@@ -757,6 +768,8 @@
       prev = i;
    }
 
+   st_lower_patch_vertices_in(shader_program);
+
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
       if (shader == NULL)
@@ -801,7 +814,7 @@
 
    if (nir->info.stage == MESA_SHADER_VERTEX) {
       /* Needs special handling so drvloc matches the vbo state: */
-      st_nir_assign_vs_in_locations(prog, nir);
+      st_nir_assign_vs_in_locations(nir);
       /* Re-lower global vars, to deal with any dead VS inputs. */
       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 
@@ -842,7 +855,7 @@
    NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
          st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
 
-   st_nir_assign_uniform_locations(st->ctx, prog, shader_program,
+   st_nir_assign_uniform_locations(st->ctx, prog,
                                    &nir->uniforms, &nir->num_uniforms);
 
    if (st->ctx->Const.PackedDriverUniformStorage) {
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.cpp mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
--- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.cpp	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp	2019-03-31 23:16:37.000000000 +0000
@@ -781,6 +781,7 @@
       case7(ISHR,    LAST, ISHR,    USHR,    LAST,    I64SHR,  U64SHR);
       case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST,    LAST,    LAST);
       case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST,    LAST,    LAST);
+      case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST,    LAST);
 
       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
       casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
@@ -6204,6 +6205,7 @@
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
    case TGSI_OPCODE_IMG2HND:
       for (i = num_src - 1; i >= 0; i--)
          src[i + 1] = src[i];
@@ -7469,25 +7471,17 @@
 }
 
 void
-st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
-                                const ubyte outputMapping[],
-                                struct pipe_stream_output_info *so)
-{
-   if (!glsl_to_tgsi->shader_program->last_vert_prog)
-      return;
-
-   struct gl_transform_feedback_info *info =
-      glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
-   st_translate_stream_output_info2(info, outputMapping, so);
-}
-
-void
-st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
+st_translate_stream_output_info(struct gl_transform_feedback_info *info,
                                 const ubyte outputMapping[],
                                 struct pipe_stream_output_info *so)
 {
    unsigned i;
 
+   if (!info) {
+      so->num_outputs = 0;
+      return;
+   }
+
    for (i = 0; i < info->NumOutputs; i++) {
       so->output[i].register_index =
          outputMapping[info->Outputs[i].OutputRegister];
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.h mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.h
--- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.h	2019-03-31 23:16:37.000000000 +0000
@@ -61,12 +61,7 @@
 GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
 
 void
-st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
-                                const ubyte outputMapping[],
-                                struct pipe_stream_output_info *so);
-
-void
-st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
+st_translate_stream_output_info(struct gl_transform_feedback_info *info,
                                 const ubyte outputMapping[],
                                 struct pipe_stream_output_info *so);
 
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi_private.h mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi_private.h
--- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi_private.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -181,6 +181,7 @@
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
    case TGSI_OPCODE_IMG2HND:
       return true;
    default:
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_manager.c mesa-19.0.1/src/mesa/state_tracker/st_manager.c
--- mesa-18.3.3/src/mesa/state_tracker/st_manager.c	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_manager.c	2019-03-31 23:16:37.000000000 +0000
@@ -173,6 +173,26 @@
 }
 
 
+void
+st_set_ws_renderbuffer_surface(struct st_renderbuffer *strb,
+                               struct pipe_surface *surf)
+{
+   pipe_surface_reference(&strb->surface_srgb, NULL);
+   pipe_surface_reference(&strb->surface_linear, NULL);
+
+   if (util_format_is_srgb(surf->format))
+      pipe_surface_reference(&strb->surface_srgb, surf);
+   else
+      pipe_surface_reference(&strb->surface_linear, surf);
+
+   strb->surface = surf; /* just assign, don't ref */
+   pipe_resource_reference(&strb->texture, surf->texture);
+
+   strb->Base.Width = surf->width;
+   strb->Base.Height = surf->height;
+}
+
+
 /**
  * Validate a framebuffer to make sure up-to-date pipe_textures are used.
  * The context is only used for creating pipe surfaces and for calling
@@ -234,21 +254,11 @@
       u_surface_default_template(&surf_tmpl, textures[i]);
       ps = st->pipe->create_surface(st->pipe, textures[i], &surf_tmpl);
       if (ps) {
-         struct pipe_surface **psurf =
-            util_format_is_srgb(ps->format) ? &strb->surface_srgb :
-                                              &strb->surface_linear;
-
-         pipe_surface_reference(psurf, ps);
-         strb->surface = *psurf;
-         pipe_resource_reference(&strb->texture, ps->texture);
-         /* ownership transfered */
+         st_set_ws_renderbuffer_surface(strb, ps);
          pipe_surface_reference(&ps, NULL);
 
          changed = TRUE;
 
-         strb->Base.Width = strb->surface->width;
-         strb->Base.Height = strb->surface->height;
-
          width = strb->Base.Width;
          height = strb->Base.Height;
       }
@@ -295,7 +305,7 @@
  */
 static boolean
 st_framebuffer_add_renderbuffer(struct st_framebuffer *stfb,
-                                gl_buffer_index idx)
+                                gl_buffer_index idx, bool prefer_srgb)
 {
    struct gl_renderbuffer *rb;
    enum pipe_format format;
@@ -318,7 +328,7 @@
       break;
    default:
       format = stfb->iface->visual->color_format;
-      if (stfb->Base.Visual.sRGBCapable)
+      if (prefer_srgb)
          format = util_format_srgb(format);
       sw = FALSE;
       break;
@@ -436,6 +446,7 @@
    struct st_framebuffer *stfb;
    struct gl_config mode;
    gl_buffer_index idx;
+   bool prefer_srgb = false;
 
    if (!stfbi)
       return NULL;
@@ -457,14 +468,15 @@
     * format such that util_format_srgb(visual->color_format) can be supported
     * by the pipe driver.  We still need to advertise the capability here.
     *
-    * For GLES, however, sRGB framebuffer write is controlled only by the
-    * capability of the framebuffer.  There is GL_EXT_sRGB_write_control to
-    * give applications the control back, but sRGB write is still enabled by
-    * default.  To avoid unexpected results, we should not advertise the
-    * capability.  This could change when we add support for
-    * EGL_KHR_gl_colorspace.
+    * For GLES, however, sRGB framebuffer write is initially only controlled
+    * by the capability of the framebuffer, with GL_EXT_sRGB_write_control
+    * control is given back to the applications, but GL_FRAMEBUFFER_SRGB is
+    * still enabled by default since this is the behaviour when
+    * EXT_sRGB_write_control is not available. Since GL_EXT_sRGB_write_control
+    * brings GLES on par with desktop GLs EXT_framebuffer_sRGB, in mesa this
+    * is also expressed by using the same extension flag
     */
-   if (_mesa_is_desktop_gl(st->ctx)) {
+   if (_mesa_has_EXT_framebuffer_sRGB(st->ctx)) {
       struct pipe_screen *screen = st->pipe->screen;
       const enum pipe_format srgb_format =
          util_format_srgb(stfbi->visual->color_format);
@@ -475,8 +487,14 @@
                                       PIPE_TEXTURE_2D, stfbi->visual->samples,
                                       stfbi->visual->samples,
                                       (PIPE_BIND_DISPLAY_TARGET |
-                                       PIPE_BIND_RENDER_TARGET)))
+                                       PIPE_BIND_RENDER_TARGET))) {
          mode.sRGBCapable = GL_TRUE;
+         /* Since GL_FRAMEBUFFER_SRGB is enabled by default on GLES we must not
+          * create renderbuffers with an sRGB format derived from the
+          * visual->color_format, but we still want sRGB for desktop GL.
+          */
+         prefer_srgb = _mesa_is_desktop_gl(st->ctx);
+      }
    }
 
    _mesa_initialize_window_framebuffer(&stfb->Base, &mode);
@@ -487,13 +505,13 @@
 
    /* add the color buffer */
    idx = stfb->Base._ColorDrawBufferIndexes[0];
-   if (!st_framebuffer_add_renderbuffer(stfb, idx)) {
+   if (!st_framebuffer_add_renderbuffer(stfb, idx, prefer_srgb)) {
       free(stfb);
       return NULL;
    }
 
-   st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH);
-   st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM);
+   st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH, false);
+   st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM, false);
 
    stfb->stamp = 0;
    st_framebuffer_update_attachments(stfb);
@@ -800,6 +818,17 @@
    struct st_context *st = (struct st_context *) stctxi;
 
    _mesa_glthread_init(st->ctx);
+
+   /* Pin all driver threads to one L3 cache for optimal performance
+    * on AMD Zen. This is only done if glthread is enabled.
+    *
+    * If glthread is disabled, st_draw.c re-pins driver threads regularly
+    * based on the location of the app thread.
+    */
+   struct glthread_state *glthread = st->ctx->GLThread;
+   if (glthread && st->pipe->set_context_param) {
+      util_pin_driver_threads_to_random_L3(st->pipe, &glthread->queue.threads[0]);
+   }
 }
 
 
@@ -888,6 +917,9 @@
    else if (attribs->flags & ST_CONTEXT_FLAG_HIGH_PRIORITY)
       ctx_flags |= PIPE_CONTEXT_HIGH_PRIORITY;
 
+   if (attribs->flags & ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED)
+      ctx_flags |= PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET;
+
    pipe = smapi->screen->context_create(smapi->screen, NULL, ctx_flags);
    if (!pipe) {
       *error = ST_CONTEXT_ERROR_NO_MEMORY;
@@ -1185,7 +1217,8 @@
       return FALSE;
    }
 
-   if (!st_framebuffer_add_renderbuffer(stfb, idx))
+   if (!st_framebuffer_add_renderbuffer(stfb, idx,
+                                        stfb->Base.Visual.sRGBCapable))
       return FALSE;
 
    st_framebuffer_update_attachments(stfb);
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_manager.h mesa-19.0.1/src/mesa/state_tracker/st_manager.h
--- mesa-18.3.3/src/mesa/state_tracker/st_manager.h	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_manager.h	2019-03-31 23:16:37.000000000 +0000
@@ -35,6 +35,8 @@
 struct st_context;
 struct st_framebuffer;
 struct st_framebuffer_interface;
+struct st_renderbuffer;
+struct pipe_surface;
 
 void
 st_manager_flush_frontbuffer(struct st_context *st);
@@ -56,4 +58,8 @@
 void
 st_manager_flush_swapbuffers(void);
 
+void
+st_set_ws_renderbuffer_surface(struct st_renderbuffer *strb,
+                               struct pipe_surface *surf);
+
 #endif /* ST_MANAGER_H */
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_nir.h mesa-19.0.1/src/mesa/state_tracker/st_nir.h
--- mesa-18.3.3/src/mesa/state_tracker/st_nir.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_nir.h	2019-03-31 23:16:37.000000000 +0000
@@ -38,6 +38,10 @@
                                 unsigned lower_2plane, unsigned lower_3plane);
 bool st_nir_lower_uniforms_to_ubo(struct nir_shader *shader);
 
+void st_nir_lower_wpos_ytransform(struct nir_shader *nir,
+                                  struct gl_program *prog,
+                                  struct pipe_screen *pscreen);
+
 void st_finalize_nir(struct st_context *st, struct gl_program *prog,
                      struct gl_shader_program *shader_program,
                      struct nir_shader *nir);
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_nir_lower_builtin.c mesa-19.0.1/src/mesa/state_tracker/st_nir_lower_builtin.c
--- mesa-18.3.3/src/mesa/state_tracker/st_nir_lower_builtin.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_nir_lower_builtin.c	2019-03-31 23:16:37.000000000 +0000
@@ -107,10 +107,6 @@
    memcpy(tokens, element->tokens, sizeof(tokens));
 
    if (path->path[idx]->deref_type == nir_deref_type_array) {
-      nir_const_value *c = nir_src_as_const_value(path->path[idx]->arr.index);
-
-      assert(c);
-
       /* we need to fixup the array index slot: */
       switch (tokens[0]) {
       case STATE_MODELVIEW_MATRIX:
@@ -123,7 +119,7 @@
       case STATE_TEXGEN:
       case STATE_TEXENV_COLOR:
       case STATE_CLIPPLANE:
-         tokens[1] = c->u32[0];
+         tokens[1] = nir_src_as_uint(path->path[idx]->arr.index);
          break;
       }
    }
diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_program.c mesa-19.0.1/src/mesa/state_tracker/st_program.c
--- mesa-18.3.3/src/mesa/state_tracker/st_program.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/st_program.c	2019-03-31 23:16:37.000000000 +0000
@@ -458,12 +458,9 @@
    }
 
    if (stvp->shader_program) {
-      struct gl_program *prog = stvp->shader_program->last_vert_prog;
-      if (prog) {
-         st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback,
-                                          stvp->result_to_output,
-                                          &stvp->tgsi.stream_output);
-      }
+      st_translate_stream_output_info(stvp->Base.sh.LinkedTransformFeedback,
+                                      stvp->result_to_output,
+                                      &stvp->tgsi.stream_output);
 
       st_store_ir_in_disk_cache(st, &stvp->Base, true);
       return true;
@@ -505,7 +502,7 @@
                                    output_semantic_name,
                                    output_semantic_index);
 
-      st_translate_stream_output_info(stvp->glsl_to_tgsi,
+      st_translate_stream_output_info(stvp->Base.sh.LinkedTransformFeedback,
                                       stvp->result_to_output,
                                       &stvp->tgsi.stream_output);
 
@@ -1106,6 +1103,10 @@
                     key->external.lower_iyuv);
       }
 
+      /* Some of the lowering above may have introduced new varyings */
+      nir_shader_gather_info(tgsi.ir.nir,
+                             nir_shader_get_entrypoint(tgsi.ir.nir));
+
       variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
       variant->key = *key;
 
@@ -1417,7 +1418,7 @@
    }
    ureg_destroy(ureg);
 
-   st_translate_stream_output_info(glsl_to_tgsi,
+   st_translate_stream_output_info(prog->sh.LinkedTransformFeedback,
                                    outputMapping,
                                    &out_state->stream_output);
 
@@ -1464,9 +1465,9 @@
       }
    }
 
-   st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback,
-                                    outputMapping,
-                                    stream_output);
+   st_translate_stream_output_info(prog->sh.LinkedTransformFeedback,
+                                   outputMapping,
+                                   stream_output);
 }
 
 /**
diff -Nru mesa-18.3.3/src/mesa/state_tracker/tests/meson.build mesa-19.0.1/src/mesa/state_tracker/tests/meson.build
--- mesa-18.3.3/src/mesa/state_tracker/tests/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/state_tracker/tests/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -35,8 +35,9 @@
       libmesa_st_test_common, libmesa_gallium, libglapi, libgallium,
       libmesa_util,
     ],
-    dependencies : [idep_gtest, dep_thread]
-  )
+    dependencies : [idep_gtest, dep_thread],
+  ),
+  suite : ['st_mesa'],
 )
 
 test(
@@ -50,5 +51,6 @@
       libmesa_util,
     ],
     dependencies : [idep_gtest, dep_thread]
-  )
+  ),
+  suite : ['st_mesa'],
 )
diff -Nru mesa-18.3.3/src/mesa/swrast/s_texfetch.c mesa-19.0.1/src/mesa/swrast/s_texfetch.c
--- mesa-18.3.3/src/mesa/swrast/s_texfetch.c	2018-04-16 21:31:06.000000000 +0000
+++ mesa-19.0.1/src/mesa/swrast/s_texfetch.c	2019-03-31 23:16:37.000000000 +0000
@@ -257,6 +257,7 @@
    FETCH_FUNCS(A8L8_SRGB),
 
    /* Array sRGB formats */
+   FETCH_FUNCS(R_SRGB8),
    FETCH_FUNCS(L_SRGB8),
    FETCH_FUNCS(BGR_SRGB8),
 
diff -Nru mesa-18.3.3/src/mesa/swrast/s_texfetch_tmp.h mesa-19.0.1/src/mesa/swrast/s_texfetch_tmp.h
--- mesa-18.3.3/src/mesa/swrast/s_texfetch_tmp.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/mesa/swrast/s_texfetch_tmp.h	2019-03-31 23:16:37.000000000 +0000
@@ -153,6 +153,7 @@
 FETCH_RGBA(R8G8B8A8_SRGB, GLuint, 1)
 FETCH_RGBA(R8G8B8X8_SRGB, GLuint, 1)
 FETCH_RGBA(X8B8G8R8_SRGB, GLuint, 1)
+FETCH_RGBA(R_SRGB8, GLubyte, 1)
 FETCH_RGBA(L_SRGB8, GLubyte, 1)
 FETCH_RGBA(L8A8_SRGB, GLushort, 1)
 FETCH_RGBA(A8L8_SRGB, GLushort, 2)
diff -Nru mesa-18.3.3/src/mesa/tnl/t_draw.c mesa-19.0.1/src/mesa/tnl/t_draw.c
--- mesa-18.3.3/src/mesa/tnl/t_draw.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/tnl/t_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -70,7 +70,7 @@
  */
 #define CONVERT( TYPE, MACRO ) do {		\
    GLuint i, j;					\
-   if (attrib->Normalized) {			\
+   if (attrib->Format.Normalized) {		\
       for (i = 0; i < count; i++) {		\
 	 const TYPE *in = (TYPE *)ptr;		\
 	 for (j = 0; j < sz; j++) {		\
@@ -104,8 +104,8 @@
                       GLuint count )
 {
    GLuint i;
-   assert(attrib->Normalized);
-   assert(attrib->Size == 4);
+   assert(attrib->Format.Normalized);
+   assert(attrib->Format.Size == 4);
    for (i = 0; i < count; i++) {
       const GLubyte *in = (GLubyte *) ptr;  /* in is in BGRA order */
       *fptr++ = UBYTE_TO_FLOAT(in[2]);  /* red */
@@ -152,9 +152,9 @@
 {
    GLuint i;
    GLint j;
-   const GLint size = attrib->Size;
+   const GLint size = attrib->Format.Size;
 
-   if (attrib->Normalized) {
+   if (attrib->Format.Normalized) {
       for (i = 0; i < count; ++i) {
          const GLfixed *in = (GLfixed *) ptr;
          for (j = 0; j < size; ++j) {
@@ -187,17 +187,17 @@
    struct vertex_buffer *VB = &tnl->vb;
    GLuint stride = binding->Stride;
 
-   if (attrib->Type != GL_FLOAT) {
-      const GLuint sz = attrib->Size;
+   if (attrib->Format.Type != GL_FLOAT) {
+      const GLuint sz = attrib->Format.Size;
       GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat));
       GLfloat *fptr = (GLfloat *)buf;
 
-      switch (attrib->Type) {
+      switch (attrib->Format.Type) {
       case GL_BYTE: 
 	 CONVERT(GLbyte, BYTE_TO_FLOAT); 
 	 break;
       case GL_UNSIGNED_BYTE: 
-         if (attrib->Format == GL_BGRA) {
+         if (attrib->Format.Format == GL_BGRA) {
             /* See GL_EXT_vertex_array_bgra */
             convert_bgra_to_float(binding, attrib, ptr, fptr, count);
          }
@@ -240,11 +240,11 @@
    VB->AttribPtr[attr]->start = (GLfloat *)ptr;
    VB->AttribPtr[attr]->count = count;
    VB->AttribPtr[attr]->stride = stride;
-   VB->AttribPtr[attr]->size = attrib->Size;
+   VB->AttribPtr[attr]->size = attrib->Format.Size;
 
    /* This should die, but so should the whole GLvector4f concept: 
     */
-   VB->AttribPtr[attr]->flags = (((1<<attrib->Size)-1) |
+   VB->AttribPtr[attr]->flags = (((1<<attrib->Format.Size)-1) |
 				   VEC_NOT_WRITEABLE |
 				   (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE));
    
diff -Nru mesa-18.3.3/src/mesa/tnl/t_split_copy.c mesa-19.0.1/src/mesa/tnl/t_split_copy.c
--- mesa-18.3.3/src/mesa/tnl/t_split_copy.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/tnl/t_split_copy.c	2019-03-31 23:16:37.000000000 +0000
@@ -105,13 +105,6 @@
 };
 
 
-static GLuint
-attr_size(const struct gl_array_attributes *attrib)
-{
-   return attrib->Size * _mesa_sizeof_type(attrib->Type);
-}
-
-
 /**
  * Shallow copy one vertex array to another.
  */
@@ -176,7 +169,7 @@
          const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
          printf("    array %d at %p:\n", j, (void*) &arrays[j]);
          printf("      ptr %p, size %d, type 0x%x, stride %d\n",
-                ptr, attrib->Size, attrib->Type, binding->Stride);
+                ptr, attrib->Format.Size, attrib->Format.Type, binding->Stride);
          if (0) {
             GLint k = prims[i].start + prims[i].count - 1;
             GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
@@ -276,7 +269,7 @@
          csr += copy->varying[i].size;
 
 #ifdef NAN_CHECK
-         if (srcarray->Type == GL_FLOAT) {
+         if (srcarray->Format.Type == GL_FLOAT) {
             GLuint k;
             GLfloat *f = (GLfloat *) srcptr;
             for (k = 0; k < srcarray->Size; k++) {
@@ -458,8 +451,8 @@
 
          copy->varying[j].attr = i;
          copy->varying[j].array = &copy->array[i];
-         copy->varying[j].size = attr_size(attrib);
-         copy->vertex_size += attr_size(attrib);
+         copy->varying[j].size = attrib->Format._ElementSize;
+         copy->vertex_size += attrib->Format._ElementSize;
 
          if (_mesa_is_bufferobj(vbo) &&
              !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
@@ -535,16 +528,10 @@
       struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
       struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
 
-      dstattr->Size = srcattr->Size;
-      dstattr->Type = srcattr->Type;
-      dstattr->Format = GL_RGBA;
-      dstbind->Stride = copy->vertex_size;
+      dstattr->Format = srcattr->Format;
       dstattr->Ptr = copy->dstbuf + offset;
-      dstattr->Normalized = srcattr->Normalized;
-      dstattr->Integer = srcattr->Integer;
-      dstattr->Doubles = srcattr->Doubles;
+      dstbind->Stride = copy->vertex_size;
       dstbind->BufferObj = ctx->Shared->NullBufferObj;
-      dstattr->_ElementSize = srcattr->_ElementSize;
       dst->BufferBinding = dstbind;
       dst->VertexAttrib = dstattr;
 
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_context.c mesa-19.0.1/src/mesa/vbo/vbo_context.c
--- mesa-18.3.3/src/mesa/vbo/vbo_context.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_context.c	2019-03-31 23:16:37.000000000 +0000
@@ -58,11 +58,8 @@
 {
    memset(attrib, 0, sizeof(*attrib));
 
-   attrib->Size = size;
-   attrib->Type = GL_FLOAT;
-   attrib->Format = GL_RGBA;
+   vbo_set_vertex_format(&attrib->Format, size, GL_FLOAT);
    attrib->Stride = 0;
-   attrib->_ElementSize = size * sizeof(GLfloat);
    attrib->Ptr = pointer;
 }
 
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_exec_api.c mesa-19.0.1/src/mesa/vbo/vbo_exec_api.c
--- mesa-18.3.3/src/mesa/vbo/vbo_exec_api.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_exec_api.c	2019-03-31 23:16:37.000000000 +0000
@@ -195,7 +195,7 @@
                                      exec->vtx.attrtype[i]);
       }
 
-      if (exec->vtx.attrtype[i] != vbo->current[i].Type ||
+      if (exec->vtx.attrtype[i] != vbo->current[i].Format.Type ||
           memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) {
          memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul);
 
@@ -205,14 +205,9 @@
           * directly.
           */
          /* Size here is in components - not bytes */
-         vbo->current[i].Size = exec->vtx.attrsz[i] / dmul;
-         vbo->current[i]._ElementSize =
-            vbo->current[i].Size * sizeof(GLfloat) * dmul;
-         vbo->current[i].Type = exec->vtx.attrtype[i];
-         vbo->current[i].Integer =
-            vbo_attrtype_to_integer_flag(exec->vtx.attrtype[i]);
-         vbo->current[i].Doubles =
-            vbo_attrtype_to_double_flag(exec->vtx.attrtype[i]);
+         vbo_set_vertex_format(&vbo->current[i].Format,
+                               exec->vtx.attrsz[i] / dmul,
+                               exec->vtx.attrtype[i]);
 
          /* This triggers rather too much recalculation of Mesa state
           * that doesn't get used (eg light positions).
@@ -803,11 +798,14 @@
    ctx->Driver.CurrentExecPrimitive = mode;
 
    ctx->Exec = ctx->BeginEnd;
+
    /* We may have been called from a display list, in which case we should
     * leave dlist.c's dispatch table in place.
     */
-   if (ctx->CurrentClientDispatch == ctx->OutsideBeginEnd) {
-      ctx->CurrentClientDispatch = ctx->BeginEnd;
+   if (ctx->CurrentClientDispatch == ctx->MarshalExec) {
+      ctx->CurrentServerDispatch = ctx->Exec;
+   } else if (ctx->CurrentClientDispatch == ctx->OutsideBeginEnd) {
+      ctx->CurrentClientDispatch = ctx->Exec;
       _glapi_set_dispatch(ctx->CurrentClientDispatch);
    } else {
       assert(ctx->CurrentClientDispatch == ctx->Save);
@@ -858,8 +856,11 @@
    }
 
    ctx->Exec = ctx->OutsideBeginEnd;
-   if (ctx->CurrentClientDispatch == ctx->BeginEnd) {
-      ctx->CurrentClientDispatch = ctx->OutsideBeginEnd;
+
+   if (ctx->CurrentClientDispatch == ctx->MarshalExec) {
+      ctx->CurrentServerDispatch = ctx->Exec;
+   } else if (ctx->CurrentClientDispatch == ctx->BeginEnd) {
+      ctx->CurrentClientDispatch = ctx->Exec;
       _glapi_set_dispatch(ctx->CurrentClientDispatch);
    }
 
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_exec_draw.c mesa-19.0.1/src/mesa/vbo/vbo_exec_draw.c
--- mesa-18.3.3/src/mesa/vbo/vbo_exec_draw.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_exec_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -191,12 +191,8 @@
    GLbitfield vao_enabled = _vbo_get_vao_enabled_from_vbo(mode, exec->vtx.enabled);
 
    /* At first disable arrays no longer needed */
-   GLbitfield mask = vao->_Enabled & ~vao_enabled;
-   while (mask) {
-      const int vao_attr = u_bit_scan(&mask);
-      _mesa_disable_vertex_array_attrib(ctx, vao, vao_attr);
-   }
-   assert((~vao_enabled & vao->_Enabled) == 0);
+   _mesa_disable_vertex_array_attribs(ctx, vao, VERT_BIT_ALL & ~vao_enabled);
+   assert((~vao_enabled & vao->Enabled) == 0);
 
    /* Bind the buffer object */
    const GLuint stride = exec->vtx.vertex_size*sizeof(GLfloat);
@@ -208,7 +204,7 @@
     */
    const GLubyte *const vao_to_vbo_map = _vbo_attribute_alias_map[mode];
    /* Now set the enabled arrays */
-   mask = vao_enabled;
+   GLbitfield mask = vao_enabled;
    while (mask) {
       const int vao_attr = u_bit_scan(&mask);
       const GLubyte vbo_attr = vao_to_vbo_map[vao_attr];
@@ -222,13 +218,12 @@
       /* Set and enable */
       _vbo_set_attrib_format(ctx, vao, vao_attr, buffer_offset,
                              size, type, offset);
-      if ((vao->_Enabled & VERT_BIT(vao_attr)) == 0)
-         _mesa_enable_vertex_array_attrib(ctx, vao, vao_attr);
 
       /* The vao is initially created with all bindings set to 0. */
       assert(vao->VertexAttrib[vao_attr].BufferBindingIndex == 0);
    }
-   assert(vao_enabled == vao->_Enabled);
+   _mesa_enable_vertex_array_attribs(ctx, vao, vao_enabled);
+   assert(vao_enabled == vao->Enabled);
    assert(!_mesa_is_bufferobj(exec->vtx.bufferobj) ||
           (vao_enabled & ~vao->VertexAttribBufferMask) == 0);
 
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_private.h mesa-19.0.1/src/mesa/vbo/vbo_private.h
--- mesa-18.3.3/src/mesa/vbo/vbo_private.h	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_private.h	2019-03-31 23:16:37.000000000 +0000
@@ -115,6 +115,16 @@
 }
 
 
+static inline void
+vbo_set_vertex_format(struct gl_vertex_format* vertex_format,
+                      GLubyte size, GLenum16 type)
+{
+   _mesa_set_vertex_format(vertex_format, size, type, GL_RGBA, GL_FALSE,
+                           vbo_attrtype_to_integer_flag(type),
+                           vbo_attrtype_to_double_flag(type));
+}
+
+
 /**
  * Return default component values for the given format.
  * The return type is an array of fi_types, because that's how we declare
@@ -224,7 +234,7 @@
     * to the VAO. But but that is done already unconditionally in
     * _mesa_update_array_format called above.
     */
-   assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attr));
+   assert((vao->NewArrays | ~vao->Enabled) & VERT_BIT(attr));
    vao->VertexAttrib[attr].Ptr = ADD_POINTERS(buffer_offset, offset);
 }
 
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_api.c mesa-19.0.1/src/mesa/vbo/vbo_save_api.c
--- mesa-18.3.3/src/mesa/vbo/vbo_save_api.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_save_api.c	2019-03-31 23:16:37.000000000 +0000
@@ -426,7 +426,7 @@
       return false;
 
    /* If the enabled arrays are not the same we are not equal. */
-   if (vao_enabled != vao->_Enabled)
+   if (vao_enabled != vao->Enabled)
       return false;
 
    /* Check the buffer binding at 0 */
@@ -450,15 +450,14 @@
       const struct gl_array_attributes *attrib = &vao->VertexAttrib[attr];
       if (attrib->RelativeOffset + vao->BufferBinding[0].Offset != off)
          return false;
-      if (attrib->Type != tp)
+      if (attrib->Format.Type != tp)
          return false;
-      if (attrib->Size != size[vbo_attr])
+      if (attrib->Format.Size != size[vbo_attr])
          return false;
-      assert(attrib->Format == GL_RGBA);
-      assert(attrib->Enabled == GL_TRUE);
-      assert(attrib->Normalized == GL_FALSE);
-      assert(attrib->Integer == vbo_attrtype_to_integer_flag(tp));
-      assert(attrib->Doubles == vbo_attrtype_to_double_flag(tp));
+      assert(attrib->Format.Format == GL_RGBA);
+      assert(attrib->Format.Normalized == GL_FALSE);
+      assert(attrib->Format.Integer == vbo_attrtype_to_integer_flag(tp));
+      assert(attrib->Format.Doubles == vbo_attrtype_to_double_flag(tp));
       assert(attrib->BufferBindingIndex == 0);
    }
 
@@ -515,9 +514,9 @@
       _vbo_set_attrib_format(ctx, *vao, vao_attr, buffer_offset,
                              size[vbo_attr], type[vbo_attr], offset[vbo_attr]);
       _mesa_vertex_attrib_binding(ctx, *vao, vao_attr, 0);
-      _mesa_enable_vertex_array_attrib(ctx, *vao, vao_attr);
    }
-   assert(vao_enabled == (*vao)->_Enabled);
+   _mesa_enable_vertex_array_attribs(ctx, *vao, vao_enabled);
+   assert(vao_enabled == (*vao)->Enabled);
    assert((vao_enabled & ~(*vao)->VertexAttribBufferMask) == 0);
 
    /* Finalize and freeze the VAO */
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_draw.c mesa-19.0.1/src/mesa/vbo/vbo_save_draw.c
--- mesa-18.3.3/src/mesa/vbo/vbo_save_draw.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_save_draw.c	2019-03-31 23:16:37.000000000 +0000
@@ -47,13 +47,13 @@
 {
    struct vbo_context *vbo = vbo_context(ctx);
 
-   mask &= vao->_Enabled;
+   mask &= vao->Enabled;
    while (mask) {
       const int i = u_bit_scan(&mask);
       const struct gl_array_attributes *attrib = &vao->VertexAttrib[i];
       struct gl_array_attributes *currval = &vbo->current[shift + i];
-      const GLubyte size = attrib->Size;
-      const GLenum16 type = attrib->Type;
+      const GLubyte size = attrib->Format.Size;
+      const GLenum16 type = attrib->Format.Type;
       fi_type tmp[8];
       int dmul = 1;
 
@@ -66,17 +66,11 @@
       else
          COPY_CLEAN_4V_TYPE_AS_UNION(tmp, size, *data, type);
 
-      if (type != currval->Type ||
+      if (type != currval->Format.Type ||
           memcmp(currval->Ptr, tmp, 4 * sizeof(GLfloat) * dmul) != 0) {
          memcpy((fi_type*)currval->Ptr, tmp, 4 * sizeof(GLfloat) * dmul);
 
-         currval->Size = size;
-         currval->_ElementSize = size * sizeof(GLfloat) * dmul;
-         currval->Type = type;
-         currval->Integer = vbo_attrtype_to_integer_flag(type);
-         currval->Doubles = vbo_attrtype_to_double_flag(type);
-         currval->Normalized = GL_FALSE;
-         currval->Format = GL_RGBA;
+         vbo_set_vertex_format(&currval->Format, size, type);
 
          ctx->NewState |= state;
       }
diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_loopback.c mesa-19.0.1/src/mesa/vbo/vbo_save_loopback.c
--- mesa-18.3.3/src/mesa/vbo/vbo_save_loopback.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/mesa/vbo/vbo_save_loopback.c	2019-03-31 23:16:37.000000000 +0000
@@ -139,7 +139,7 @@
 {
    la[*nr].index = shift + i;
    la[*nr].offset = vao->VertexAttrib[i].RelativeOffset;
-   la[*nr].func = vert_attrfunc[vao->VertexAttrib[i].Size - 1];
+   la[*nr].func = vert_attrfunc[vao->VertexAttrib[i].Format.Size - 1];
    (*nr)++;
 }
 
@@ -155,23 +155,23 @@
     * the NV attributes entrypoints:
     */
    const struct gl_vertex_array_object *vao = node->VAO[VP_MODE_FF];
-   GLbitfield mask = vao->_Enabled & VERT_BIT_MAT_ALL;
+   GLbitfield mask = vao->Enabled & VERT_BIT_MAT_ALL;
    while (mask) {
       const int i = u_bit_scan(&mask);
       append_attr(&nr, la, i, VBO_MATERIAL_SHIFT, vao);
    }
 
    vao = node->VAO[VP_MODE_SHADER];
-   mask = vao->_Enabled & ~(VERT_BIT_POS | VERT_BIT_GENERIC0);
+   mask = vao->Enabled & ~(VERT_BIT_POS | VERT_BIT_GENERIC0);
    while (mask) {
       const int i = u_bit_scan(&mask);
       append_attr(&nr, la, i, 0, vao);
    }
 
    /* The last in the list should be the vertex provoking attribute */
-   if (vao->_Enabled & VERT_BIT_GENERIC0) {
+   if (vao->Enabled & VERT_BIT_GENERIC0) {
       append_attr(&nr, la, VERT_ATTRIB_GENERIC0, 0, vao);
-   } else if (vao->_Enabled & VERT_BIT_POS) {
+   } else if (vao->Enabled & VERT_BIT_POS) {
       append_attr(&nr, la, VERT_ATTRIB_POS, 0, vao);
    }
 
diff -Nru mesa-18.3.3/src/meson.build mesa-19.0.1/src/meson.build
--- mesa-18.3.3/src/meson.build	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/src/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -63,6 +63,9 @@
 if with_gallium_vc4 or with_gallium_v3d
   subdir('broadcom')
 endif
+if with_gallium_freedreno
+  subdir('freedreno')
+endif
 if with_dri_i965 or with_intel_vk
   subdir('intel')
 endif
diff -Nru mesa-18.3.3/src/util/00-mesa-defaults.conf mesa-19.0.1/src/util/00-mesa-defaults.conf
--- mesa-18.3.3/src/util/00-mesa-defaults.conf	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/00-mesa-defaults.conf	2019-03-31 23:16:37.000000000 +0000
@@ -21,6 +21,8 @@
   built-ins (specifically gl_VertexID), which causes the vertex shaders to fail
   to compile.
 
+* Applications that are not suitable for adapative sync are blacklisted here.
+
 TODO: document the other workarounds.
 
 -->
@@ -227,6 +229,9 @@
         <application name="Civilization 6" executable="Civ6">
             <option name="mesa_glthread" value="true"/>
         </application>
+        <application name="Civilization 6" executable="Civ6Sub">
+            <option name="mesa_glthread" value="true"/>
+        </application>
 
         <application name="Dreamfall Chapters" executable="Dreamfall Chapters">
             <option name="mesa_glthread" value="true"/>
@@ -314,6 +319,99 @@
         <application name="Far Cry 2 (wine)" executable="farcry2.exe">
             <option name="mesa_glthread" value="true"/>
         </application>
+
+        <application name="Talos Principle" executable="Talos">
+            <option name="mesa_glthread" value="true"/>
+        </application>
+        <application name="Talos Principle (Unrestricted)" executable="Talos_Unrestricted">
+            <option name="mesa_glthread" value="true"/>
+        </application>
+
+        <!-- Adaptive sync blacklist follows below: -->
+        <application name="gnome-shell" executable="gnome-shell">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Desktop — Plasma" executable="plasmashell">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="kwin_x11" executable="kwin_x11">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="ksmserver-logout-greeter" executable="ksmserver-logout-greeter">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="ksmserver-switchuser-greeter" executable="ksmserver-switchuser-greeter">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="kscreenlocker_greet" executable="kscreenlocker_greet">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="startplasma" executable="startplasma">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="sddm-greeter" executable="sddm-greeter">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="krunner" executable="krunner">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="marco" executable="marco">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="compton" executable="compton">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="xfwm4" executable="xfwm4">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Enlightenment" executable="enlightenment">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="mutter" executable="mutter">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="muffin" executable="muffin">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="compiz" executable="compiz">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Firefox" executable="firefox">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Firefox ESR" executable="firefox-esr">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Chromium" executable="chromium">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Google Chrome" executable="chrome">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Iceweasel" executable="iceweasel">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Epiphany" executable="epiphany">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Konqueror" executable="konqueror">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Seamonkey" executable="seamonkey">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="VLC Media Player" executable="vlc">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Totem" executable="totem">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="Dragon Player" executable="dragon">
+            <option name="adaptive_sync" value="false" />
+        </application>
+        <application name="mpv" executable="mpv">
+            <option name="adaptive_sync" value="false" />
+        </application>
     </device>
     <!-- vmwgfx doesn't like full buffer swaps and can't sync to vertical retraces.-->
     <device driver="vmwgfx">
@@ -334,5 +432,11 @@
         <application name="No Mans Sky" executable="NMS.exe">
             <option name="radeonsi_zerovram" value="true" />
         </application>
+        <application name="Civilization 6" executable="Civ6">
+            <option name="radeonsi_enable_nir" value="true"/>
+        </application>
+        <application name="Civilization 6" executable="Civ6Sub">
+            <option name="radeonsi_enable_nir" value="true"/>
+        </application>
     </device>
 </driconf>
diff -Nru mesa-18.3.3/src/util/debug.c mesa-19.0.1/src/util/debug.c
--- mesa-18.3.3/src/util/debug.c	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/util/debug.c	2019-03-31 23:16:37.000000000 +0000
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  */
 
+#include <errno.h>
 #include <string.h>
 #include "main/macros.h"
 #include "debug.h"
@@ -52,6 +53,20 @@
    return flag;
 }
 
+bool
+comma_separated_list_contains(const char *list, const char *s)
+{
+   assert(list);
+   const size_t len = strlen(s);
+
+   for (unsigned n; n = strcspn(list, ","), *list; list += MAX2(1, n)) {
+      if (n == len && !strncmp(list, s, n))
+         return true;
+   }
+
+   return false;
+}
+
 /**
  * Reads an environment variable and interprets its value as a boolean.
  *
@@ -76,3 +91,22 @@
       return default_value;
    }
 }
+
+/**
+ * Reads an environment variable and interprets its value as a unsigned.
+ */
+unsigned
+env_var_as_unsigned(const char *var_name, unsigned default_value)
+{
+   char *str = getenv(var_name);
+   if (str) {
+      char *end;
+      unsigned long result;
+
+      errno = 0;
+      result = strtoul(str, &end, 0);
+      if (errno == 0 && end != str && *end == '\0')
+        return result;
+   }
+   return default_value;
+}
diff -Nru mesa-18.3.3/src/util/debug.h mesa-19.0.1/src/util/debug.h
--- mesa-18.3.3/src/util/debug.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/util/debug.h	2019-03-31 23:16:37.000000000 +0000
@@ -40,7 +40,11 @@
 parse_debug_string(const char *debug,
                    const struct debug_control *control);
 bool
+comma_separated_list_contains(const char *list, const char *s);
+bool
 env_var_as_boolean(const char *var_name, bool default_value);
+unsigned
+env_var_as_unsigned(const char *var_name, unsigned default_value);
 
 #ifdef __cplusplus
 } /* extern C */
diff -Nru mesa-18.3.3/src/util/hash_table.c mesa-19.0.1/src/util/hash_table.c
--- mesa-18.3.3/src/util/hash_table.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/hash_table.c	2019-03-31 23:16:37.000000000 +0000
@@ -110,6 +110,27 @@
    return entry->key != NULL && entry->key != ht->deleted_key;
 }
 
+bool
+_mesa_hash_table_init(struct hash_table *ht,
+                      void *mem_ctx,
+                      uint32_t (*key_hash_function)(const void *key),
+                      bool (*key_equals_function)(const void *a,
+                                                  const void *b))
+{
+   ht->size_index = 0;
+   ht->size = hash_sizes[ht->size_index].size;
+   ht->rehash = hash_sizes[ht->size_index].rehash;
+   ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->key_hash_function = key_hash_function;
+   ht->key_equals_function = key_equals_function;
+   ht->table = rzalloc_array(mem_ctx, struct hash_entry, ht->size);
+   ht->entries = 0;
+   ht->deleted_entries = 0;
+   ht->deleted_key = &deleted_key_value;
+
+   return ht->table != NULL;
+}
+
 struct hash_table *
 _mesa_hash_table_create(void *mem_ctx,
                         uint32_t (*key_hash_function)(const void *key),
@@ -118,22 +139,14 @@
 {
    struct hash_table *ht;
 
+   /* mem_ctx is used to allocate the hash table, but the hash table is used
+    * to allocate all of the suballocations.
+    */
    ht = ralloc(mem_ctx, struct hash_table);
    if (ht == NULL)
       return NULL;
 
-   ht->size_index = 0;
-   ht->size = hash_sizes[ht->size_index].size;
-   ht->rehash = hash_sizes[ht->size_index].rehash;
-   ht->max_entries = hash_sizes[ht->size_index].max_entries;
-   ht->key_hash_function = key_hash_function;
-   ht->key_equals_function = key_equals_function;
-   ht->table = rzalloc_array(ht, struct hash_entry, ht->size);
-   ht->entries = 0;
-   ht->deleted_entries = 0;
-   ht->deleted_key = &deleted_key_value;
-
-   if (ht->table == NULL) {
+   if (!_mesa_hash_table_init(ht, ht, key_hash_function, key_equals_function)) {
       ralloc_free(ht);
       return NULL;
    }
@@ -287,7 +300,7 @@
    if (new_size_index >= ARRAY_SIZE(hash_sizes))
       return;
 
-   table = rzalloc_array(ht, struct hash_entry,
+   table = rzalloc_array(ralloc_parent(ht->table), struct hash_entry,
                          hash_sizes[new_size_index].size);
    if (table == NULL)
       return;
@@ -535,6 +548,16 @@
 }
 
 /**
+ * Helper to create a hash table with pointer keys.
+ */
+struct hash_table *
+_mesa_pointer_hash_table_create(void *mem_ctx)
+{
+   return _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
+}
+
+/**
  * Hash table wrapper which supports 64-bit keys.
  *
  * TODO: unify all hash table implementations.
diff -Nru mesa-18.3.3/src/util/hash_table.h mesa-19.0.1/src/util/hash_table.h
--- mesa-18.3.3/src/util/hash_table.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/hash_table.h	2019-03-31 23:16:37.000000000 +0000
@@ -62,6 +62,14 @@
                         uint32_t (*key_hash_function)(const void *key),
                         bool (*key_equals_function)(const void *a,
                                                     const void *b));
+
+bool
+_mesa_hash_table_init(struct hash_table *ht,
+                      void *mem_ctx,
+                      uint32_t (*key_hash_function)(const void *key),
+                      bool (*key_equals_function)(const void *a,
+                                                  const void *b));
+
 struct hash_table *
 _mesa_hash_table_clone(struct hash_table *src, void *dst_mem_ctx);
 void _mesa_hash_table_destroy(struct hash_table *ht,
@@ -113,6 +121,9 @@
    return (uint32_t) ((num >> 2) ^ (num >> 6) ^ (num >> 10) ^ (num >> 14));
 }
 
+struct hash_table *
+_mesa_pointer_hash_table_create(void *mem_ctx);
+
 enum {
    _mesa_fnv32_1a_offset_bias = 2166136261u,
 };
diff -Nru mesa-18.3.3/src/util/Makefile.sources mesa-19.0.1/src/util/Makefile.sources
--- mesa-18.3.3/src/util/Makefile.sources	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/Makefile.sources	2019-03-31 23:16:37.000000000 +0000
@@ -72,6 +72,11 @@
 	u_debug.h \
 	u_cpu_detect.c \
 	u_cpu_detect.h \
+	os_memory_aligned.h \
+	os_memory_debug.h \
+	os_memory_stdc.h \
+	os_memory.h \
+	u_memory.h \
 	vma.c \
 	vma.h
 
diff -Nru mesa-18.3.3/src/util/meson.build mesa-19.0.1/src/util/meson.build
--- mesa-18.3.3/src/util/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -151,7 +151,8 @@
       include_directories : inc_common,
       link_with : libmesa_util,
       c_args : [c_msvc_compat_args],
-    )
+    ),
+    suite : ['util'],
   )
 
   test(
@@ -162,7 +163,8 @@
       include_directories : inc_common,
       c_args : [c_msvc_compat_args],
       dependencies : [dep_m],
-    )
+    ),
+    suite : ['util'],
   )
 
   test(
@@ -173,7 +175,8 @@
       include_directories : inc_common,
       link_with : libmesa_util,
       c_args : [c_msvc_compat_args],
-    )
+    ),
+    suite : ['util'],
   )
 
   subdir('tests/fast_idiv_by_const')
diff -Nru mesa-18.3.3/src/util/os_memory_aligned.h mesa-19.0.1/src/util/os_memory_aligned.h
--- mesa-18.3.3/src/util/os_memory_aligned.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/util/os_memory_aligned.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,98 @@
+/**************************************************************************
+ * 
+ * Copyright 2008-2010 VMware, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/*
+ * Memory alignment wrappers.
+ */
+
+
+#ifndef _OS_MEMORY_H_
+#error "Must not be included directly. Include os_memory.h instead"
+#endif
+
+
+#include "pipe/p_compiler.h"
+
+
+
+/**
+ * Add two size_t values with integer overflow check.
+ * TODO: leverage __builtin_add_overflow where available
+ */
+static inline bool
+add_overflow_size_t(size_t a, size_t b, size_t *res)
+{
+   *res = a + b;
+   return *res < a || *res < b;
+}
+
+
+/**
+ * Return memory on given byte alignment
+ */
+static inline void *
+os_malloc_aligned(size_t size, size_t alignment)
+{
+   char *ptr, *buf;
+   size_t alloc_size;
+
+   /*
+    * Calculate
+    *
+    *   alloc_size = size + alignment + sizeof(void *)
+    *
+    * while checking for overflow.
+    */
+   if (add_overflow_size_t(size, alignment, &alloc_size) ||
+       add_overflow_size_t(alloc_size, sizeof(void *), &alloc_size)) {
+      return NULL;
+   }
+
+   ptr = (char *) os_malloc(alloc_size);
+   if (!ptr)
+      return NULL;
+
+   buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~((uintptr_t)(alignment - 1)));
+   *(char **)(buf - sizeof(void *)) = ptr;
+
+   return buf;
+}
+
+
+/**
+ * Free memory returned by os_malloc_aligned().
+ */
+static inline void
+os_free_aligned(void *ptr)
+{
+   if (ptr) {
+      void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
+      void *realAddr = *cubbyHole;
+      os_free(realAddr);
+   }
+}
diff -Nru mesa-18.3.3/src/util/os_memory_debug.h mesa-19.0.1/src/util/os_memory_debug.h
--- mesa-18.3.3/src/util/os_memory_debug.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/util/os_memory_debug.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,92 @@
+/**************************************************************************
+ * 
+ * Copyright 2008-2010 VMware, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/*
+ * Debugging wrappers for OS memory management abstractions.
+ */
+
+
+#ifndef _OS_MEMORY_H_
+#error "Must not be included directly. Include os_memory.h instead"
+#endif
+
+
+#include "pipe/p_compiler.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void *
+debug_malloc(const char *file, unsigned line, const char *function,
+             size_t size);
+
+void *
+debug_calloc(const char *file, unsigned line, const char *function,
+             size_t count, size_t size );
+
+void
+debug_free(const char *file, unsigned line, const char *function,
+           void *ptr);
+
+void *
+debug_realloc(const char *file, unsigned line, const char *function,
+              void *old_ptr, size_t old_size, size_t new_size );
+
+void
+debug_memory_tag(void *ptr, unsigned tag);
+
+void
+debug_memory_check_block(void *ptr);
+
+void 
+debug_memory_check(void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#ifndef DEBUG_MEMORY_IMPLEMENTATION
+
+#define os_malloc( _size ) \
+   debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size )
+#define os_calloc( _count, _size ) \
+   debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size )
+#define os_free( _ptr ) \
+   debug_free( __FILE__, __LINE__, __FUNCTION__,  _ptr )
+#define os_realloc( _ptr, _old_size, _new_size ) \
+   debug_realloc( __FILE__, __LINE__, __FUNCTION__,  _ptr, _old_size, _new_size )
+
+/* TODO: wrap os_malloc_aligned() and os_free_aligned() too */
+#include "os_memory_aligned.h"
+
+#endif /* !DEBUG_MEMORY_IMPLEMENTATION */
diff -Nru mesa-18.3.3/src/util/os_memory.h mesa-19.0.1/src/util/os_memory.h
--- mesa-18.3.3/src/util/os_memory.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/util/os_memory.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,80 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Vmware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/*
+ * OS memory management abstractions
+ */
+
+
+#ifndef _OS_MEMORY_H_
+#define _OS_MEMORY_H_
+
+
+#include "pipe/p_config.h"
+#include "pipe/p_compiler.h"
+
+
+#if defined(PIPE_SUBSYSTEM_EMBEDDED)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *
+os_malloc(size_t size);
+
+void *
+os_calloc(size_t count, size_t size);
+
+void
+os_free(void *ptr);
+
+void *
+os_realloc(void *ptr, size_t old_size, size_t new_size);
+
+void *
+os_malloc_aligned(size_t size, size_t alignment);
+
+void
+os_free_aligned(void *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#elif defined(PIPE_OS_WINDOWS) && defined(DEBUG) && !defined(DEBUG_MEMORY_IMPLEMENTATION)
+
+#  include "os_memory_debug.h"
+
+#else
+
+#  include "os_memory_stdc.h"
+
+#endif
+
+#endif /* _OS_MEMORY_H_ */
diff -Nru mesa-18.3.3/src/util/os_memory_stdc.h mesa-19.0.1/src/util/os_memory_stdc.h
--- mesa-18.3.3/src/util/os_memory_stdc.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/util/os_memory_stdc.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,76 @@
+/**************************************************************************
+ * 
+ * Copyright 2008-2010 VMware, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/*
+ * OS memory management abstractions for the standard C library.
+ */
+
+
+#ifndef _OS_MEMORY_H_
+#error "Must not be included directly. Include os_memory.h instead"
+#endif
+
+#include <stdlib.h>
+
+#include "pipe/p_compiler.h"
+
+
+#define os_malloc(_size)  malloc(_size)
+#define os_calloc(_count, _size )  calloc(_count, _size )
+#define os_free(_ptr)  free(_ptr)
+
+#define os_realloc( _old_ptr, _old_size, _new_size) \
+   realloc(_old_ptr, _new_size + 0*(_old_size))
+
+
+#if defined(HAVE_POSIX_MEMALIGN)
+
+static inline void *
+os_malloc_aligned(size_t size, size_t alignment)
+{
+   void *ptr;
+   alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+   if(posix_memalign(&ptr, alignment, size) != 0)
+      return NULL;
+   return ptr;
+}
+
+#define os_free_aligned(_ptr) free(_ptr)
+
+#elif defined(PIPE_OS_WINDOWS)
+
+#include <malloc.h>
+
+#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
+#define os_free_aligned(_ptr) _aligned_free(_ptr)
+
+#else
+
+#include "os_memory_aligned.h"
+
+#endif
diff -Nru mesa-18.3.3/src/util/ralloc.c mesa-19.0.1/src/util/ralloc.c
--- mesa-18.3.3/src/util/ralloc.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/ralloc.c	2019-03-31 23:16:37.000000000 +0000
@@ -61,7 +61,7 @@
 #endif
    ralloc_header
 {
-#ifdef DEBUG
+#ifndef NDEBUG
    /* A canary value used to determine whether a pointer is ralloc'd. */
    unsigned canary;
 #endif
@@ -88,9 +88,7 @@
 {
    ralloc_header *info = (ralloc_header *) (((char *) ptr) -
 					    sizeof(ralloc_header));
-#ifdef DEBUG
    assert(info->canary == CANARY);
-#endif
    return info;
 }
 
@@ -140,7 +138,7 @@
 
    add_child(parent, info);
 
-#ifdef DEBUG
+#ifndef NDEBUG
    info->canary = CANARY;
 #endif
 
@@ -566,7 +564,7 @@
  __attribute__((aligned(8)))
 #endif
    linear_header {
-#ifdef DEBUG
+#ifndef NDEBUG
    unsigned magic;   /* for debugging */
 #endif
    unsigned offset;  /* points to the first unused byte in the buffer */
@@ -616,7 +614,7 @@
    if (unlikely(!node))
       return NULL;
 
-#ifdef DEBUG
+#ifndef NDEBUG
    node->magic = LMAGIC;
 #endif
    node->offset = 0;
@@ -636,9 +634,7 @@
    linear_size_chunk *ptr;
    unsigned full_size;
 
-#ifdef DEBUG
    assert(first->magic == LMAGIC);
-#endif
    assert(!latest->next);
 
    size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
@@ -712,9 +708,7 @@
       return;
 
    node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
 
    while (node) {
       void *ptr = node;
@@ -733,9 +727,7 @@
       return;
 
    node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
 
    while (node) {
       ralloc_steal(new_ralloc_ctx, node);
@@ -748,9 +740,7 @@
 ralloc_parent_of_linear_parent(void *ptr)
 {
    linear_header *node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
    return node->ralloc_parent;
 }
 
diff -Nru mesa-18.3.3/src/util/set.c mesa-19.0.1/src/util/set.c
--- mesa-18.3.3/src/util/set.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/set.c	2019-03-31 23:16:37.000000000 +0000
@@ -36,6 +36,7 @@
 #include <assert.h>
 #include <string.h>
 
+#include "hash_table.h"
 #include "macros.h"
 #include "ralloc.h"
 #include "set.h"
@@ -437,3 +438,13 @@
 
    return NULL;
 }
+
+/**
+ * Helper to create a set with pointer keys.
+ */
+struct set *
+_mesa_pointer_set_create(void *mem_ctx)
+{
+   return _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                           _mesa_key_pointer_equal);
+}
diff -Nru mesa-18.3.3/src/util/set.h mesa-19.0.1/src/util/set.h
--- mesa-18.3.3/src/util/set.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/set.h	2019-03-31 23:16:37.000000000 +0000
@@ -91,6 +91,9 @@
 _mesa_set_random_entry(struct set *set,
                        int (*predicate)(struct set_entry *entry));
 
+struct set *
+_mesa_pointer_set_create(void *mem_ctx);
+
 /**
  * This foreach function is safe against deletion, but not against
  * insertion (which may rehash the set, making entry a dangling
diff -Nru mesa-18.3.3/src/util/slab.c mesa-19.0.1/src/util/slab.c
--- mesa-18.3.3/src/util/slab.c	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/util/slab.c	2019-03-31 23:16:37.000000000 +0000
@@ -280,25 +280,25 @@
  * Allocate an object from the slab. Single-threaded (no mutex).
  */
 void *
-slab_alloc_st(struct slab_mempool *pool)
+slab_alloc_st(struct slab_mempool *mempool)
 {
-   return slab_alloc(&pool->child);
+   return slab_alloc(&mempool->child);
 }
 
 /**
  * Free an object allocated from the slab. Single-threaded (no mutex).
  */
 void
-slab_free_st(struct slab_mempool *pool, void *ptr)
+slab_free_st(struct slab_mempool *mempool, void *ptr)
 {
-   slab_free(&pool->child, ptr);
+   slab_free(&mempool->child, ptr);
 }
 
 void
-slab_destroy(struct slab_mempool *pool)
+slab_destroy(struct slab_mempool *mempool)
 {
-   slab_destroy_child(&pool->child);
-   slab_destroy_parent(&pool->parent);
+   slab_destroy_child(&mempool->child);
+   slab_destroy_parent(&mempool->parent);
 }
 
 /**
@@ -308,10 +308,10 @@
  * \param num_items     Number of objects to allocate at once.
  */
 void
-slab_create(struct slab_mempool *pool,
+slab_create(struct slab_mempool *mempool,
             unsigned item_size,
             unsigned num_items)
 {
-   slab_create_parent(&pool->parent, item_size, num_items);
-   slab_create_child(&pool->child, &pool->parent);
+   slab_create_parent(&mempool->parent, item_size, num_items);
+   slab_create_child(&mempool->child, &mempool->parent);
 }
diff -Nru mesa-18.3.3/src/util/slab.h mesa-19.0.1/src/util/slab.h
--- mesa-18.3.3/src/util/slab.h	2017-11-05 00:14:08.000000000 +0000
+++ mesa-19.0.1/src/util/slab.h	2019-03-31 23:16:37.000000000 +0000
@@ -84,11 +84,11 @@
    struct slab_child_pool child;
 };
 
-void slab_create(struct slab_mempool *pool,
+void slab_create(struct slab_mempool *mempool,
                  unsigned item_size,
                  unsigned num_items);
-void slab_destroy(struct slab_mempool *pool);
-void *slab_alloc_st(struct slab_mempool *pool);
-void slab_free_st(struct slab_mempool *pool, void *ptr);
+void slab_destroy(struct slab_mempool *mempool);
+void *slab_alloc_st(struct slab_mempool *mempool);
+void slab_free_st(struct slab_mempool *mempool, void *ptr);
 
 #endif
diff -Nru mesa-18.3.3/src/util/tests/fast_idiv_by_const/meson.build mesa-19.0.1/src/util/tests/fast_idiv_by_const/meson.build
--- mesa-18.3.3/src/util/tests/fast_idiv_by_const/meson.build	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/tests/fast_idiv_by_const/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -26,5 +26,6 @@
     dependencies : [dep_thread, dep_dl, idep_gtest],
     include_directories : inc_common,
     link_with : [libmesa_util],
-  )
+  ),
+  suite : ['util'],
 )
diff -Nru mesa-18.3.3/src/util/tests/hash_table/meson.build mesa-19.0.1/src/util/tests/hash_table/meson.build
--- mesa-18.3.3/src/util/tests/hash_table/meson.build	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/util/tests/hash_table/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -30,6 +30,7 @@
       dependencies : [dep_thread, dep_dl],
       include_directories : [inc_include, inc_util],
       link_with : libmesa_util,
-    )
+    ),
+    suite : ['util'],
   )
 endforeach
diff -Nru mesa-18.3.3/src/util/tests/set/meson.build mesa-19.0.1/src/util/tests/set/meson.build
--- mesa-18.3.3/src/util/tests/set/meson.build	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/util/tests/set/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -26,5 +26,6 @@
     dependencies : [dep_thread, dep_dl, idep_gtest],
     include_directories : inc_common,
     link_with : [libmesa_util],
-  )
+  ),
+  suite : ['util'],
 )
diff -Nru mesa-18.3.3/src/util/tests/string_buffer/meson.build mesa-19.0.1/src/util/tests/string_buffer/meson.build
--- mesa-18.3.3/src/util/tests/string_buffer/meson.build	2018-01-12 19:24:23.000000000 +0000
+++ mesa-19.0.1/src/util/tests/string_buffer/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -26,5 +26,6 @@
     dependencies : [dep_thread, dep_dl, idep_gtest],
     include_directories : inc_common,
     link_with : [libmesa_util],
-  )
+  ),
+  suite : ['util'],
 )
diff -Nru mesa-18.3.3/src/util/tests/vma/meson.build mesa-19.0.1/src/util/tests/vma/meson.build
--- mesa-18.3.3/src/util/tests/vma/meson.build	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/util/tests/vma/meson.build	2019-03-31 23:16:37.000000000 +0000
@@ -25,5 +25,6 @@
     'vma_random_test.cpp',
     include_directories : [inc_include, inc_util],
     link_with : [libmesa_util],
-  )
+  ),
+  suite : ['util'],
 )
diff -Nru mesa-18.3.3/src/util/u_cpu_detect.c mesa-19.0.1/src/util/u_cpu_detect.c
--- mesa-18.3.3/src/util/u_cpu_detect.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/u_cpu_detect.c	2019-03-31 23:16:37.000000000 +0000
@@ -365,19 +365,26 @@
     }
 #endif /* PIPE_OS_LINUX */
 }
-#endif /* PIPE_ARCH_ARM */
 
+#elif defined(PIPE_ARCH_AARCH64)
 static void
-get_cpu_topology(void)
+check_os_arm_support(void)
 {
-   uint32_t regs[4];
+    util_cpu_caps.has_neon = true;
+}
+#endif /* PIPE_ARCH_ARM || PIPE_ARCH_AARCH64 */
 
+static void
+get_cpu_topology(void)
+{
    /* Default. This is correct if L3 is not present or there is only one. */
    util_cpu_caps.cores_per_L3 = util_cpu_caps.nr_cpus;
 
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    /* AMD Zen */
    if (util_cpu_caps.x86_cpu_type == 0x17) {
+      uint32_t regs[4];
+
       /* Query the L3 cache topology information. */
       cpuid_count(0x8000001D, 3, regs);
       unsigned cache_level = (regs[0] >> 5) & 0x7;
@@ -534,7 +541,7 @@
    }
 #endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
 
-#if defined(PIPE_ARCH_ARM)
+#if defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64)
    check_os_arm_support();
 #endif
 
diff -Nru mesa-18.3.3/src/util/u_memory.h mesa-19.0.1/src/util/u_memory.h
--- mesa-18.3.3/src/util/u_memory.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-19.0.1/src/util/u_memory.h	2019-03-31 23:16:37.000000000 +0000
@@ -0,0 +1,99 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/*
+ * Memory functions
+ */
+
+
+#ifndef U_MEMORY_H
+#define U_MEMORY_H
+
+#include "util/u_pointer.h"
+#include "util/u_debug.h"
+#include "util/os_memory.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define MALLOC(_size)  os_malloc(_size)
+
+#define CALLOC(_count, _size) os_calloc(_count, _size)
+
+#define FREE(_ptr ) os_free(_ptr)
+
+#define REALLOC(_ptr, _old_size, _size) os_realloc(_ptr, _old_size, _size)
+
+#define MALLOC_STRUCT(T)   (struct T *) MALLOC(sizeof(struct T))
+
+#define CALLOC_STRUCT(T)   (struct T *) CALLOC(1, sizeof(struct T))
+
+#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size)   ((struct T *) CALLOC(1, sizeof(struct T) + more_size))
+
+
+#define align_malloc(_size, _alignment) os_malloc_aligned(_size, _alignment)
+#define align_free(_ptr) os_free_aligned(_ptr)
+
+static inline void *
+align_calloc(size_t size, unsigned long alignment)
+{
+   void *ptr = align_malloc(size, alignment);
+   if (ptr)
+      memset(ptr, 0, size);
+   return ptr;
+}
+
+/**
+ * Duplicate a block of memory.
+ */
+static inline void *
+mem_dup(const void *src, uint size)
+{
+   void *dup = MALLOC(size);
+   if (dup)
+      memcpy(dup, src, size);
+   return dup;
+}
+
+
+/**
+ * Offset of a field in a struct, in bytes.
+ */
+#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER))
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* U_MEMORY_H */
diff -Nru mesa-18.3.3/src/util/u_process.c mesa-19.0.1/src/util/u_process.c
--- mesa-18.3.3/src/util/u_process.c	2018-10-21 19:21:33.000000000 +0000
+++ mesa-19.0.1/src/util/u_process.c	2019-03-31 23:16:37.000000000 +0000
@@ -41,8 +41,29 @@
 __getProgramName()
 {
    char * arg = strrchr(program_invocation_name, '/');
-   if (arg)
+   if (arg) {
+      /* If the / character was found this is likely a linux path or
+       * an invocation path for a 64-bit wine program.
+       *
+       * However, some programs pass command line arguments into argv[0].
+       * Strip these arguments out by using the realpath only if it was
+       * a prefix of the invocation name.
+       */
+      static char *path;
+
+      if (!path)
+         path = realpath("/proc/self/exe", NULL);
+
+      if (path && strncmp(path, program_invocation_name, strlen(path)) == 0) {
+         /* This shouldn't be null because path is a a prefix,
+          * but check it anyway since path is static. */
+         char * name = strrchr(path, '/');
+         if (name)
+            return name + 1;
+      }
+
       return arg+1;
+   }
 
    /* If there was no '/' at all we likely have a windows like path from
     * a wine application.
diff -Nru mesa-18.3.3/src/util/xmlpool/ca.po mesa-19.0.1/src/util/xmlpool/ca.po
--- mesa-18.3.3/src/util/xmlpool/ca.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/ca.po	2019-03-31 23:16:37.000000000 +0000
@@ -25,7 +25,7 @@
 msgstr ""
 "Project-Id-Version: Mesa 10.1.0-devel\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2015-02-07 02:08-0700\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2015-02-23 14:28-0700\n"
 "Last-Translator: Alex Henrie <alexhenrie24@gmail.com>\n"
 "Language-Team: Catalan <ca@li.org>\n"
@@ -35,70 +35,116 @@
 "Content-Transfer-Encoding: 8bit\n"
 "X-Generator: Poedit 1.7.4\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Depuració"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr "Habilita el buidatge del batchbuffer després de cada trucada de dibuix"
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr ""
 "Habilita el buidatge de les memòries cau de GPU amb cada trucada de dibuix"
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr "Deshabilita la regulació en el primer lot després de buidar"
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 "Força que el comportament per defecte de les extensions GLSL sigui 'warn'"
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr "Deshabilita la barreja de font dual"
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 "Deshabilita les continuacions de línia basades en barra invertida en la font "
 "GLSL"
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 "Força una versió GLSL per defecte en els shaders als quals lis manca una "
 "línia #version explícita"
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr "Permet les directives #extension GLSL en el mitjà dels shaders"
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Qualitat d'imatge"
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtre de postprocessament per a aplicar cel shading a la sortida"
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr "Un filtre de postprocessament per a eliminar el canal vermell"
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr "Un filtre de postprocessament per a eliminar el canal verd"
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr "Un filtre de postprocessament per a eliminar el canal blau"
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
@@ -106,7 +152,7 @@
 "Antialiàsing morfològic basat en el MLAA de Jimenez. 0 per deshabilitar, 8 "
 "per qualitat per defecte"
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
@@ -115,28 +161,28 @@
 "per qualitat per defecte. Versió en color, utilitzable amb les aplicacions "
 "GL 2D"
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Rendiment"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Sincronització amb refresc vertical (intervals d'intercanvi)"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr ""
 "Mai sincronitzis amb el refresc vertical, ignora l'elecció de l'aplicació"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr "Interval d'intercanvi inicial 0, obeeix l'elecció de l'aplicació"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr "Interval d'intercanvi inicial 1, obeeix l'elecció de l'aplicació"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -144,27 +190,59 @@
 "Sempre sincronitza amb el refresc vertical, l'aplicació tria l'interval "
 "mínim d'intercanvi"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr "Miscel·lània"
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr "Crea tots els visuals amb buffer de profunditat"
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr "Inicialització"
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr "Defineix el dispositiu de gràfics que utilitzar si és possible"
 
-#: t_options.h:350
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
 msgid "Gallium Nine"
 msgstr "Gallium Nine"
 
-#: t_options.h:354
+#: src/util/xmlpool/t_options.h:289
 msgid ""
 "Define the throttling value. -1 for no throttling, -2 for default (usually "
 "2), 0 for glfinish behaviour"
@@ -172,6 +250,46 @@
 "Defineix el valor de regulació. -1 per a no regular, -2 per al predeterminat "
 "(generalment 2), 0 per al comportament de glfinish"
 
-#: t_options.h:359
+#: src/util/xmlpool/t_options.h:294
 msgid "Use an additional thread to submit buffers."
 msgstr "Utilitza un fil addicional per a entregar els buffers."
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
diff -Nru mesa-18.3.3/src/util/xmlpool/de.po mesa-19.0.1/src/util/xmlpool/de.po
--- mesa-18.3.3/src/util/xmlpool/de.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/de.po	2019-03-31 23:16:37.000000000 +0000
@@ -7,7 +7,7 @@
 msgstr ""
 "Project-Id-Version: Mesa 6.3\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2014-09-25 22:29-0600\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2005-04-11 01:34+0200\n"
 "Last-Translator: Felix Kuehling <fxkuehl@gmx.de>\n"
 "Language-Team: German <de@li.org>\n"
@@ -17,65 +17,111 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Fehlersuche"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr "Aktiviere sofortige Leerung des Stapelpuffers nach jedem Zeichenaufruf"
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr ""
 "Aktiviere sofortige Leerung der GPU-Zwischenspeicher mit jedem Zeichenaufruf"
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr ""
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr ""
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr ""
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Bildqualität"
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Nachbearbeitungsfilter für Cell Shading"
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr "Nachbearbeitungsfilter zum Entfernen des Rotkanals"
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr "Nachbearbeitungsfilter zum Entfernen des Grünkanals"
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr "Nachbearbeitungsfilter zum Entfernen des Blaukanals"
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
@@ -83,7 +129,7 @@
 "Morphologische Kantenglättung (Anti-Aliasing) basierend auf Jimenez' MLAA. 0 "
 "für deaktiviert, 8 für Standardqualität"
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
@@ -91,29 +137,29 @@
 "Morphologische Kantenglättung (Anti-Aliasing) basierend auf Jimenez' MLAA. 0 "
 "für deaktiviert, 8 für Standardqualität. Farbversion, für 2D-Anwendungen"
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Leistung"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Synchronisation mit der vertikalen Bildwiederholung"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr ""
 "Niemals mit der Bildwiederholung synchronisieren, Anweisungen der Anwendung "
 "ignorieren"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr "Initiales Bildinterval 0, Anweisungen der Anwendung gehorchen"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr "Initiales Bildinterval 1, Anweisungen der Anwendung gehorchen"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -121,22 +167,108 @@
 "Immer mit der Bildwiederholung synchronisieren, Anwendung wählt das minimale "
 "Bildintervall"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr ""
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr ""
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr ""
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr ""
 
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
+msgid "Gallium Nine"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:289
+msgid ""
+"Define the throttling value. -1 for no throttling, -2 for default (usually "
+"2), 0 for glfinish behaviour"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:294
+msgid "Use an additional thread to submit buffers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
+
 #~ msgid "Support larger textures not guaranteed to fit into graphics memory"
 #~ msgstr ""
 #~ "Unterstütze grosse Texturen die evtl. nicht in den Grafikspeicher passen"
diff -Nru mesa-18.3.3/src/util/xmlpool/es.po mesa-19.0.1/src/util/xmlpool/es.po
--- mesa-18.3.3/src/util/xmlpool/es.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/es.po	2019-03-31 23:16:37.000000000 +0000
@@ -9,7 +9,7 @@
 msgstr ""
 "Project-Id-Version: es\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2015-02-07 02:08-0700\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2015-02-23 14:54-0700\n"
 "Last-Translator: Alex Henrie <alexhenrie24@gmail.com>\n"
 "Language-Team: Spanish <es@li.org>\n"
@@ -20,69 +20,115 @@
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 "X-Generator: Poedit 1.7.4\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Depuración"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr "Habilitar vaciado del batchbuffer después de cada llamada de dibujo"
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr "Habilitar vaciado de los cachés GPU con cada llamada de dibujo"
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr "Deshabilitar regulación del primer lote después de vaciar"
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 "Forzar que el comportamiento por defecto de las extensiones GLSL sea 'warn'"
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr "Deshabilitar mezcla de fuente dual"
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 "Deshabilitar continuaciones de línea basadas en barra inversa en el código "
 "GLSL"
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 "Forzar una versión de GLSL por defecto en los shaders a los cuales les falta "
 "una línea #version explícita"
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr "Permite directivas #extension GLSL en medio de los shaders"
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Calidad de imagen"
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr "Un filtro de postprocesamiento para aplicar cel shading a la salida"
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr "Un filtro de postprocesamiento para eliminar el canal rojo"
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr "Un filtro de postprocesamiento para eliminar el canal verde"
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr "Un filtro de postprocesamiento para eliminar el canal azul"
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
@@ -90,7 +136,7 @@
 "Antialiasing morfológico basado en el MLAA de Jimenez. 0 para deshabilitar, "
 "8 para calidad por defecto"
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
@@ -98,31 +144,31 @@
 "Antialiasing morfológico basado en el MLAA de Jimenez. 0 para deshabilitar, "
 "8 para calidad por defecto. Versión en color, usable con aplicaciones GL 2D"
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Rendimiento"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Sincronización con el refresco vertical (intervalos de intercambio)"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr ""
 "No sincronizar nunca con el refresco vertical, ignorar la elección de la "
 "aplicación"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr ""
 "Intervalo de intercambio inicial 0, obedecer la elección de la aplicación"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr ""
 "Intervalo de intercambio inicial 1, obedecer la elección de la aplicación"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -130,27 +176,59 @@
 "Sincronizar siempre con el refresco vertical, la aplicación elige el "
 "intervalo de intercambio mínimo"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr "Misceláneos"
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr "Crear todos los visuales con búfer de profundidad"
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr "Inicialización"
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr "Define el dispositivo de gráficos que usar si es posible"
 
-#: t_options.h:350
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
 msgid "Gallium Nine"
 msgstr "Gallium Nine"
 
-#: t_options.h:354
+#: src/util/xmlpool/t_options.h:289
 msgid ""
 "Define the throttling value. -1 for no throttling, -2 for default (usually "
 "2), 0 for glfinish behaviour"
@@ -158,6 +236,46 @@
 "Define el valor de regulación. -1 para no regular, -2 para el por defecto "
 "(generalmente 2), 0 para el comportamiento de glfinish"
 
-#: t_options.h:359
+#: src/util/xmlpool/t_options.h:294
 msgid "Use an additional thread to submit buffers."
 msgstr "Usar un hilo adicional para entregar los búfer."
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
diff -Nru mesa-18.3.3/src/util/xmlpool/fr.po mesa-19.0.1/src/util/xmlpool/fr.po
--- mesa-18.3.3/src/util/xmlpool/fr.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/fr.po	2019-03-31 23:16:37.000000000 +0000
@@ -7,7 +7,7 @@
 msgstr ""
 "Project-Id-Version: Mesa 6.3\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2014-09-25 22:29-0600\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2005-04-11 01:34+0200\n"
 "Last-Translator: Stephane Marchesin <marchesin@icps.u-strasbg.fr>\n"
 "Language-Team: French <fr@li.org>\n"
@@ -17,102 +17,148 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Debogage"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr ""
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr ""
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr ""
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr ""
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr ""
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Qualité d'image"
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr ""
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr ""
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr ""
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
 msgstr ""
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
 msgstr ""
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Performance"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Synchronisation de l'affichage avec le balayage vertical"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr ""
 "Ne jamais synchroniser avec le balayage vertical, ignorer le choix de "
 "l'application"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr ""
 "Ne pas synchroniser avec le balayage vertical par défaut, mais obéir au "
 "choix de l'application"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr ""
 "Synchroniser avec le balayage vertical par défaut, mais obéir au choix de "
 "l'application"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -120,22 +166,108 @@
 "Toujours synchroniser avec le balayage vertical, l'application choisit "
 "l'intervalle minimal"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr ""
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr ""
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr ""
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr ""
 
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
+msgid "Gallium Nine"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:289
+msgid ""
+"Define the throttling value. -1 for no throttling, -2 for default (usually "
+"2), 0 for glfinish behaviour"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:294
+msgid "Use an additional thread to submit buffers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
+
 #~ msgid ""
 #~ "Enable hack to allow larger textures with texture compression on radeon/"
 #~ "r200"
diff -Nru mesa-18.3.3/src/util/xmlpool/nl.po mesa-19.0.1/src/util/xmlpool/nl.po
--- mesa-18.3.3/src/util/xmlpool/nl.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/nl.po	2019-03-31 23:16:37.000000000 +0000
@@ -7,7 +7,7 @@
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2014-09-25 22:29-0600\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2005-04-12 20:09+0200\n"
 "Last-Translator:  Manfred Stienstra <manfred.stienstra@dwerg.net>\n"
 "Language-Team: Dutch <vertaling@nl.linux.org>\n"
@@ -17,110 +17,144 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Debuggen"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr ""
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr ""
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr ""
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr ""
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr ""
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Beeldkwaliteit"
 
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Horizontale foutdiffusie"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Horizontale foutdiffusie, zet fout bij lijnbegin terug"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Geordende 2D kleurrasterisering"
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr ""
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr ""
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr ""
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
 msgstr ""
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
 msgstr ""
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Prestatie"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Synchronisatie met verticale verversing (interval omwisselen)"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr ""
 "Nooit synchroniseren met verticale verversing, negeer de keuze van de "
 "applicatie"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr "Initïeel omwisselingsinterval 0, honoreer de keuze van de applicatie"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr "Initïeel omwisselingsinterval 1, honoreer de keuze van de applicatie"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -128,22 +162,117 @@
 "Synchroniseer altijd met verticale verversing, de applicatie kiest het "
 "minimum omwisselingsinterval"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr ""
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr ""
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr ""
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr ""
 
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
+msgid "Gallium Nine"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:289
+msgid ""
+"Define the throttling value. -1 for no throttling, -2 for default (usually "
+"2), 0 for glfinish behaviour"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:294
+msgid "Use an additional thread to submit buffers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
+
+#~ msgid "Horizontal error diffusion"
+#~ msgstr "Horizontale foutdiffusie"
+
+#~ msgid "Horizontal error diffusion, reset error at line start"
+#~ msgstr "Horizontale foutdiffusie, zet fout bij lijnbegin terug"
+
+#~ msgid "Ordered 2D color dithering"
+#~ msgstr "Geordende 2D kleurrasterisering"
+
 #~ msgid ""
 #~ "Enable hack to allow larger textures with texture compression on radeon/"
 #~ "r200"
diff -Nru mesa-18.3.3/src/util/xmlpool/sv.po mesa-19.0.1/src/util/xmlpool/sv.po
--- mesa-18.3.3/src/util/xmlpool/sv.po	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/sv.po	2019-03-31 23:16:37.000000000 +0000
@@ -7,7 +7,7 @@
 msgstr ""
 "Project-Id-Version: Mesa DRI\n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2014-09-25 22:29-0600\n"
+"POT-Creation-Date: 2018-11-12 14:15+0000\n"
 "PO-Revision-Date: 2006-09-18 10:56+0100\n"
 "Last-Translator: Daniel Nylander <po@danielnylander.se>\n"
 "Language-Team: Swedish <tp-sv@listor.tp-sv.se>\n"
@@ -17,112 +17,142 @@
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
-#: t_options.h:56
+#: src/util/xmlpool/t_options.h:56
 msgid "Debugging"
 msgstr "Felsökning"
 
-#: t_options.h:70
+#: src/util/xmlpool/t_options.h:60
 msgid "Enable flushing batchbuffer after each draw call"
 msgstr ""
 
-#: t_options.h:75
+#: src/util/xmlpool/t_options.h:65
 msgid "Enable flushing GPU caches with each draw call"
 msgstr ""
 
-#: t_options.h:80
+#: src/util/xmlpool/t_options.h:70
 msgid "Disable throttling on first batch after flush"
 msgstr ""
 
-#: t_options.h:85
+#: src/util/xmlpool/t_options.h:75
 msgid "Force GLSL extension default behavior to 'warn'"
 msgstr ""
 
-#: t_options.h:90
+#: src/util/xmlpool/t_options.h:80
 msgid "Disable dual source blending"
 msgstr ""
 
-#: t_options.h:95
+#: src/util/xmlpool/t_options.h:85
+msgid "Identify dual color blending sources by location rather than index"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:90
 msgid "Disable backslash-based line continuations in GLSL source"
 msgstr ""
 
-#: t_options.h:105
+#: src/util/xmlpool/t_options.h:95
 msgid ""
 "Force a default GLSL version for shaders that lack an explicit #version line"
 msgstr ""
 
-#: t_options.h:110
+#: src/util/xmlpool/t_options.h:100
 msgid "Allow GLSL #extension directives in the middle of shaders"
 msgstr ""
 
-#: t_options.h:120
+#: src/util/xmlpool/t_options.h:105
+msgid "Allow builtins as part of constant expressions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:110
+msgid "Allow some relaxation of GLSL ES shader restrictions"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:115
+msgid "Allow GLSL built-in variables to be redeclared verbatim"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:120
+msgid "Allow a higher compat profile (version 3.1+) for apps that request it"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:125
+msgid "Force computing the absolute value for sqrt() and inversesqrt()"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:130
+msgid ""
+"Implicit and explicit derivatives after a discard behave as if the discard "
+"didn't happen"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:135
+msgid "Allow interpolation qualifier mismatch across shader stages"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:140
+msgid "Allow layout qualifiers on function parameters."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:145
+msgid "Force an OpenGL compatibility context"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:153
 msgid "Image Quality"
 msgstr "Bildkvalitet"
 
-#: t_options.h:181
-msgid "Color dithering method"
-msgstr "Färgutjämningsmetod"
-
-#: t_options.h:182
-msgid "Horizontal error diffusion"
-msgstr "Horisontell felspridning"
-
-#: t_options.h:183
-msgid "Horizontal error diffusion, reset error at line start"
-msgstr "Horisontell felspridning, återställ fel vid radbörjan"
-
-#: t_options.h:184
-msgid "Ordered 2D color dithering"
-msgstr "Ordnad 2D-färgutjämning"
+#: src/util/xmlpool/t_options.h:157
+msgid "Prefer accuracy over performance in trig functions"
+msgstr ""
 
-#: t_options.h:195
+#: src/util/xmlpool/t_options.h:162
 msgid "A post-processing filter to cel-shade the output"
 msgstr ""
 
-#: t_options.h:200
+#: src/util/xmlpool/t_options.h:167
 msgid "A post-processing filter to remove the red channel"
 msgstr ""
 
-#: t_options.h:205
+#: src/util/xmlpool/t_options.h:172
 msgid "A post-processing filter to remove the green channel"
 msgstr ""
 
-#: t_options.h:210
+#: src/util/xmlpool/t_options.h:177
 msgid "A post-processing filter to remove the blue channel"
 msgstr ""
 
-#: t_options.h:215
+#: src/util/xmlpool/t_options.h:182
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality"
 msgstr ""
 
-#: t_options.h:220
+#: src/util/xmlpool/t_options.h:187
 msgid ""
 "Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
 "default quality. Color version, usable with 2d GL apps"
 msgstr ""
 
-#: t_options.h:230
+#: src/util/xmlpool/t_options.h:197
 msgid "Performance"
 msgstr "Prestanda"
 
-#: t_options.h:264
+#: src/util/xmlpool/t_options.h:205
 msgid "Synchronization with vertical refresh (swap intervals)"
 msgstr "Synkronisering med vertikal uppdatering (växlingsintervall)"
 
-#: t_options.h:265
+#: src/util/xmlpool/t_options.h:206
 msgid "Never synchronize with vertical refresh, ignore application's choice"
 msgstr "Synkronisera aldrig med vertikal uppdatering, ignorera programmets val"
 
-#: t_options.h:266
+#: src/util/xmlpool/t_options.h:207
 msgid "Initial swap interval 0, obey application's choice"
 msgstr "Initialt växlingsintervall 0, följ programmets val"
 
-#: t_options.h:267
+#: src/util/xmlpool/t_options.h:208
 msgid "Initial swap interval 1, obey application's choice"
 msgstr "Initialt växlingsintervall 1, följ programmets val"
 
-#: t_options.h:268
+#: src/util/xmlpool/t_options.h:209
 msgid ""
 "Always synchronize with vertical refresh, application chooses the minimum "
 "swap interval"
@@ -130,22 +160,120 @@
 "Synkronisera alltid med vertikal uppdatering, programmet väljer den minsta "
 "växlingsintervallen"
 
-#: t_options.h:323
+#: src/util/xmlpool/t_options.h:215
+msgid "Enable offloading GL driver work to a separate thread"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:220
+msgid "Disable GL driver error checking"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:225
+msgid "Disable the GLX_EXT_buffer_age extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:230
+msgid "Disable the GLX_OML_sync_control extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:235
+msgid "Disable the GLX_SGI_video_sync extension"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:245
 msgid "Miscellaneous"
 msgstr ""
 
-#: t_options.h:327
+#: src/util/xmlpool/t_options.h:249
 msgid "Create all visuals with a depth buffer"
 msgstr ""
 
-#: t_options.h:337
+#: src/util/xmlpool/t_options.h:254
+msgid "Force uninitialized variables to default to zero"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:259
+msgid "Allow exposure of visuals and fbconfigs with rgb10a2 formats"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:267
 msgid "Initialization"
 msgstr ""
 
-#: t_options.h:341
+#: src/util/xmlpool/t_options.h:271
 msgid "Define the graphic device to use if possible"
 msgstr ""
 
+#: src/util/xmlpool/t_options.h:276
+msgid "Override the DRI driver to load"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:285
+msgid "Gallium Nine"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:289
+msgid ""
+"Define the throttling value. -1 for no throttling, -2 for default (usually "
+"2), 0 for glfinish behaviour"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:294
+msgid "Use an additional thread to submit buffers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:299
+msgid ""
+"Define the vendor_id to report. This allows faking another hardware vendor."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:304
+msgid ""
+"Whether to allow the display server to release buffers with a delay when "
+"using d3d's presentation mode DISCARD. Default to true. Set to false if "
+"suffering from lag (thread_submit=true can also help in this situation)."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:309
+msgid ""
+"Whether to make d3d's presentation mode DISCARD (games usually use that "
+"mode) Tear Free. If rendering above screen refresh, some frames will get "
+"skipped. false by default."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:314
+msgid ""
+"If set to 1, force gallium nine CSMT. If set to 0, disable it. By default "
+"(-1) CSMT is enabled on known thread-safe drivers."
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:323
+msgid "Use the LLVM sisched option for shader compiles"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:328
+msgid ""
+"Assume no Z fights (enables aggressive out-of-order rasterization to improve "
+"performance; may cause rendering errors)"
+msgstr ""
+
+#: src/util/xmlpool/t_options.h:333
+msgid ""
+"Commutative additive blending optimizations (may cause rendering errors)"
+msgstr ""
+
+#~ msgid "Color dithering method"
+#~ msgstr "Färgutjämningsmetod"
+
+#~ msgid "Horizontal error diffusion"
+#~ msgstr "Horisontell felspridning"
+
+#~ msgid "Horizontal error diffusion, reset error at line start"
+#~ msgstr "Horisontell felspridning, återställ fel vid radbörjan"
+
+#~ msgid "Ordered 2D color dithering"
+#~ msgstr "Ordnad 2D-färgutjämning"
+
 #~ msgid "Support larger textures not guaranteed to fit into graphics memory"
 #~ msgstr ""
 #~ "Stöd för större texturer är inte garanterat att passa i grafikminnet"
diff -Nru mesa-18.3.3/src/util/xmlpool/t_options.h mesa-19.0.1/src/util/xmlpool/t_options.h
--- mesa-18.3.3/src/util/xmlpool/t_options.h	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/util/xmlpool/t_options.h	2019-03-31 23:16:37.000000000 +0000
@@ -210,6 +210,11 @@
         DRI_CONF_DESC_END \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_ADAPTIVE_SYNC(def) \
+DRI_CONF_OPT_BEGIN_B(adaptive_sync,def) \
+        DRI_CONF_DESC(en,gettext("Adapt the monitor sync to the application performance (when possible)")) \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MESA_GLTHREAD(def) \
 DRI_CONF_OPT_BEGIN_B(mesa_glthread, def) \
         DRI_CONF_DESC(en,gettext("Enable offloading GL driver work to a separate thread")) \
@@ -342,3 +347,8 @@
 DRI_CONF_OPT_BEGIN_B(radeonsi_zerovram, def) \
         DRI_CONF_DESC(en,"Zero all vram allocations") \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_RADEONSI_ENABLE_NIR(def) \
+DRI_CONF_OPT_BEGIN_B(radeonsi_enable_nir, def) \
+        DRI_CONF_DESC(en,gettext("Enable NIR")) \
+DRI_CONF_OPT_END
diff -Nru mesa-18.3.3/src/vulkan/registry/vk.xml mesa-19.0.1/src/vulkan/registry/vk.xml
--- mesa-18.3.3/src/vulkan/registry/vk.xml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/vulkan/registry/vk.xml	2019-03-31 23:16:37.000000000 +0000
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <registry>
     <comment>
-Copyright (c) 2015-2018 The Khronos Group Inc.
+Copyright (c) 2015-2019 The Khronos Group Inc.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -49,7 +49,6 @@
         <platform name="xlib_xrandr" protect="VK_USE_PLATFORM_XLIB_XRANDR_EXT" comment="X Window System, Xlib client library, XRandR extension"/>
         <platform name="xcb" protect="VK_USE_PLATFORM_XCB_KHR" comment="X Window System, Xcb client library"/>
         <platform name="wayland" protect="VK_USE_PLATFORM_WAYLAND_KHR" comment="Wayland display server protocol"/>
-        <platform name="mir" protect="VK_USE_PLATFORM_MIR_KHR" comment="Mir display server"/>
         <platform name="android" protect="VK_USE_PLATFORM_ANDROID_KHR" comment="Android OS"/>
         <platform name="win32" protect="VK_USE_PLATFORM_WIN32_KHR" comment="Microsoft Win32 API (also refers to Win64 apps)"/>
         <platform name="vi" protect="VK_USE_PLATFORM_VI_NN" comment="Nintendo Vi"/>
@@ -96,7 +95,6 @@
 
         <type category="include" name="X11/Xlib.h"/>
         <type category="include" name="X11/extensions/Xrandr.h"/>
-        <type category="include" name="mir_toolkit/client_types.h"/>
         <type category="include" name="wayland-client.h"/>
         <type category="include" name="windows.h"/>
         <type category="include" name="xcb/xcb.h"/>
@@ -122,8 +120,6 @@
         <type requires="X11/Xlib.h" name="VisualID"/>
         <type requires="X11/Xlib.h" name="Window"/>
         <type requires="X11/extensions/Xrandr.h" name="RROutput"/>
-        <type requires="mir_toolkit/client_types.h" name="MirConnection"/>
-        <type requires="mir_toolkit/client_types.h" name="MirSurface"/>
         <type requires="wayland-client.h" name="wl_display"/>
         <type requires="wayland-client.h" name="wl_surface"/>
         <type requires="windows.h" name="HINSTANCE"/>
@@ -150,7 +146,7 @@
         <type category="define">// Vulkan 1.1 version number
 #define <name>VK_API_VERSION_1_1</name> <type>VK_MAKE_VERSION</type>(1, 1, 0)// Patch version should always be set to 0</type>
         <type category="define">// Version of this file
-#define <name>VK_HEADER_VERSION</name> 90</type>
+#define <name>VK_HEADER_VERSION</name> 97</type>
 
         <type category="define">
 #define <name>VK_DEFINE_HANDLE</name>(object) typedef struct object##_T* object;</type>
@@ -162,12 +158,10 @@
 #else
         #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object;
 #endif
-#endif
-        </type>
+#endif</type>
 
         <type category="define">
-#define <name>VK_NULL_HANDLE</name> 0
-        </type>
+#define <name>VK_NULL_HANDLE</name> 0</type>
 
         <type category="define">struct <name>ANativeWindow</name>;</type>
         <type category="define">struct <name>AHardwareBuffer</name>;</type>
@@ -176,6 +170,7 @@
         <type category="basetype">typedef <type>uint32_t</type> <name>VkBool32</name>;</type>
         <type category="basetype">typedef <type>uint32_t</type> <name>VkFlags</name>;</type>
         <type category="basetype">typedef <type>uint64_t</type> <name>VkDeviceSize</name>;</type>
+        <type category="basetype">typedef <type>uint64_t</type> <name>VkDeviceAddress</name>;</type>
 
             <comment>Basic C types, pulled in via vk_platform.h</comment>
         <type requires="vk_platform" name="void"/>
@@ -193,7 +188,7 @@
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkFramebufferCreateFlags</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkQueryPoolCreateFlags</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkRenderPassCreateFlags</name>;</type>
-        <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkSamplerCreateFlags</name>;</type>
+        <type requires="VkSamplerCreateFlagBits"          category="bitmask">typedef <type>VkFlags</type> <name>VkSamplerCreateFlags</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineLayoutCreateFlags</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineCacheCreateFlags</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineDepthStencilStateCreateFlags</name>;</type>
@@ -220,7 +215,7 @@
         <type requires="VkShaderStageFlagBits"            category="bitmask">typedef <type>VkFlags</type> <name>VkShaderStageFlags</name>;</type>
         <type requires="VkImageUsageFlagBits"             category="bitmask">typedef <type>VkFlags</type> <name>VkImageUsageFlags</name>;</type>
         <type requires="VkImageCreateFlagBits"            category="bitmask">typedef <type>VkFlags</type> <name>VkImageCreateFlags</name>;</type>
-        <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkImageViewCreateFlags</name>;</type>
+        <type requires="VkImageViewCreateFlagBits"        category="bitmask">typedef <type>VkFlags</type> <name>VkImageViewCreateFlags</name>;</type>
         <type requires="VkPipelineCreateFlagBits"         category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineCreateFlags</name>;</type>
         <type requires="VkColorComponentFlagBits"         category="bitmask">typedef <type>VkFlags</type> <name>VkColorComponentFlags</name>;</type>
         <type requires="VkFenceCreateFlagBits"            category="bitmask">typedef <type>VkFlags</type> <name>VkFenceCreateFlags</name>;</type>
@@ -251,9 +246,9 @@
         <type requires="VkSubgroupFeatureFlagBits"        category="bitmask">typedef <type>VkFlags</type> <name>VkSubgroupFeatureFlags</name>;</type>
         <type requires="VkIndirectCommandsLayoutUsageFlagBitsNVX"  category="bitmask">typedef <type>VkFlags</type> <name>VkIndirectCommandsLayoutUsageFlagsNVX</name>;</type>
         <type requires="VkObjectEntryUsageFlagBitsNVX"             category="bitmask">typedef <type>VkFlags</type> <name>VkObjectEntryUsageFlagsNVX</name>;</type>
-        <type requires="VkGeometryFlagBitsNVX"            category="bitmask">typedef <type>VkFlags</type> <name>VkGeometryFlagsNVX</name>;</type>
-        <type requires="VkGeometryInstanceFlagBitsNVX"    category="bitmask">typedef <type>VkFlags</type> <name>VkGeometryInstanceFlagsNVX</name>;</type>
-        <type requires="VkBuildAccelerationStructureFlagBitsNVX" category="bitmask">typedef <type>VkFlags</type> <name>VkBuildAccelerationStructureFlagsNVX</name>;</type>
+        <type requires="VkGeometryFlagBitsNV"            category="bitmask">typedef <type>VkFlags</type> <name>VkGeometryFlagsNV</name>;</type>
+        <type requires="VkGeometryInstanceFlagBitsNV"    category="bitmask">typedef <type>VkFlags</type> <name>VkGeometryInstanceFlagsNV</name>;</type>
+        <type requires="VkBuildAccelerationStructureFlagBitsNV" category="bitmask">typedef <type>VkFlags</type> <name>VkBuildAccelerationStructureFlagsNV</name>;</type>
 
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkDescriptorUpdateTemplateCreateFlags</name>;</type>
         <type                                             category="bitmask" name="VkDescriptorUpdateTemplateCreateFlagsKHR" alias="VkDescriptorUpdateTemplateCreateFlags"/>
@@ -266,7 +261,6 @@
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkDisplayModeCreateFlagsKHR</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkDisplaySurfaceCreateFlagsKHR</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkAndroidSurfaceCreateFlagsKHR</name>;</type>
-        <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkMirSurfaceCreateFlagsKHR</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkViSurfaceCreateFlagsNN</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkWaylandSurfaceCreateFlagsKHR</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkWin32SurfaceCreateFlagsKHR</name>;</type>
@@ -315,6 +309,7 @@
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineRasterizationConservativeStateCreateFlagsEXT</name>;</type>
         <type requires="VkDescriptorBindingFlagBitsEXT" category="bitmask">typedef <type>VkFlags</type> <name>VkDescriptorBindingFlagsEXT</name>;</type>
         <type requires="VkConditionalRenderingFlagBitsEXT"   category="bitmask">typedef <type>VkFlags</type> <name>VkConditionalRenderingFlagsEXT</name>;</type>
+        <type requires="VkResolveModeFlagBitsKHR"         category="bitmask">typedef <type>VkFlags</type> <name>VkResolveModeFlagsKHR</name>;</type>
         <type                                             category="bitmask">typedef <type>VkFlags</type> <name>VkPipelineRasterizationStateStreamCreateFlagsEXT</name>;</type>
 
 
@@ -351,7 +346,7 @@
         <type category="handle" parent="VkDevice"><type>VK_DEFINE_NON_DISPATCHABLE_HANDLE</type>(<name>VkSamplerYcbcrConversion</name>)</type>
         <type category="handle" name="VkSamplerYcbcrConversionKHR"   alias="VkSamplerYcbcrConversion"/>
         <type category="handle" parent="VkDevice"><type>VK_DEFINE_NON_DISPATCHABLE_HANDLE</type>(<name>VkValidationCacheEXT</name>)</type>
-        <type category="handle" parent="VkDevice"><type>VK_DEFINE_NON_DISPATCHABLE_HANDLE</type>(<name>VkAccelerationStructureNVX</name>)</type>
+        <type category="handle" parent="VkDevice"><type>VK_DEFINE_NON_DISPATCHABLE_HANDLE</type>(<name>VkAccelerationStructureNV</name>)</type>
 
             <comment>WSI extensions</comment>
         <type category="handle"><type>VK_DEFINE_NON_DISPATCHABLE_HANDLE</type>(<name>VkDisplayKHR</name>)</type>
@@ -413,6 +408,7 @@
         <type name="VkImageTiling" category="enum"/>
         <type name="VkImageType" category="enum"/>
         <type name="VkImageUsageFlagBits" category="enum"/>
+        <type name="VkImageViewCreateFlagBits" category="enum"/>
         <type name="VkImageViewType" category="enum"/>
         <type name="VkSharingMode" category="enum"/>
         <type name="VkIndexType" category="enum"/>
@@ -470,12 +466,16 @@
         <type name="VkQueueGlobalPriorityEXT" category="enum"/>
         <type name="VkTimeDomainEXT" category="enum"/>
         <type name="VkConservativeRasterizationModeEXT" category="enum"/>
-        <type name="VkGeometryFlagBitsNVX" category="enum"/>
-        <type name="VkGeometryInstanceFlagBitsNVX" category="enum"/>
-        <type name="VkBuildAccelerationStructureFlagBitsNVX" category="enum"/>
-        <type name="VkCopyAccelerationStructureModeNVX" category="enum"/>
-        <type name="VkAccelerationStructureTypeNVX" category="enum"/>
-        <type name="VkGeometryTypeNVX" category="enum"/>
+        <type name="VkResolveModeFlagBitsKHR" category="enum"/>
+        <type name="VkGeometryFlagBitsNV" category="enum"/>
+        <type name="VkGeometryInstanceFlagBitsNV" category="enum"/>
+        <type name="VkBuildAccelerationStructureFlagBitsNV" category="enum"/>
+        <type name="VkCopyAccelerationStructureModeNV" category="enum"/>
+        <type name="VkAccelerationStructureTypeNV" category="enum"/>
+        <type name="VkGeometryTypeNV" category="enum"/>
+        <type name="VkRayTracingShaderGroupTypeNV" category="enum"/>
+        <type name="VkAccelerationStructureMemoryRequirementsTypeNV" category="enum"/>
+        <type name="VkMemoryOverallocationBehaviorAMD" category="enum"/>
 
             <comment>WSI extensions</comment>
         <type name="VkColorSpaceKHR" category="enum"/>
@@ -489,6 +489,8 @@
         <type name="VkExternalMemoryHandleTypeFlagBitsNV" category="enum"/>
         <type name="VkExternalMemoryFeatureFlagBitsNV" category="enum"/>
         <type name="VkValidationCheckEXT" category="enum"/>
+        <type name="VkValidationFeatureEnableEXT" category="enum"/>
+        <type name="VkValidationFeatureDisableEXT" category="enum"/>
         <type name="VkExternalMemoryHandleTypeFlagBits" category="enum"/>
         <type category="enum" name="VkExternalMemoryHandleTypeFlagBitsKHR"         alias="VkExternalMemoryHandleTypeFlagBits"/>
         <type name="VkExternalMemoryFeatureFlagBits" category="enum"/>
@@ -1257,7 +1259,7 @@
             <member><type>VkFramebuffer</type>          <name>framebuffer</name></member>
             <member><type>VkRect2D</type>               <name>renderArea</name></member>
             <member optional="true"><type>uint32_t</type>               <name>clearValueCount</name></member>
-            <member len="clearValueCount" noautovalidity="true">const <type>VkClearValue</type>*    <name>pClearValues</name></member>
+            <member len="clearValueCount">const <type>VkClearValue</type>*    <name>pClearValues</name></member>
         </type>
         <type category="union" name="VkClearColorValue" comment="// Union allowing specification of floating point, integer, or unsigned integer color data. Actual value selected is based on image/attachment being cleared.">
             <member><type>float</type>                  <name>float32</name>[4]</member>
@@ -1316,7 +1318,7 @@
         <type category="struct" name="VkRenderPassCreateInfo">
             <member values="VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*            <name>pNext</name></member>
-            <member optional="true"><type>VkRenderPassCreateFlags</type>    <name>flags</name></member>
+            <member optional="true" noautovalidity="true"><type>VkRenderPassCreateFlags</type>    <name>flags</name></member>
             <member optional="true"><type>uint32_t</type>   <name>attachmentCount</name></member>
             <member len="attachmentCount">const <type>VkAttachmentDescription</type>* <name>pAttachments</name></member>
             <member><type>uint32_t</type>               <name>subpassCount</name></member>
@@ -1644,13 +1646,6 @@
             <member optional="true"><type>VkAndroidSurfaceCreateFlagsKHR</type> <name>flags</name></member>
             <member noautovalidity="true">struct <type>ANativeWindow</type>*    <name>window</name></member>
         </type>
-        <type category="struct" name="VkMirSurfaceCreateInfoKHR">
-            <member values="VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR"><type>VkStructureType</type> <name>sType</name></member>
-            <member>const <type>void</type>*                      <name>pNext</name></member>
-            <member optional="true"><type>VkMirSurfaceCreateFlagsKHR</type>   <name>flags</name></member>
-            <member noautovalidity="true"><type>MirConnection</type>*                   <name>connection</name></member>
-            <member noautovalidity="true"><type>MirSurface</type>*                      <name>mirSurface</name></member>
-        </type>
         <type category="struct" name="VkViSurfaceCreateInfoNN">
             <member values="VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
@@ -1738,6 +1733,14 @@
             <member><type>uint32_t</type>                         <name>disabledValidationCheckCount</name><comment>Number of validation checks to disable</comment></member>
             <member len="disabledValidationCheckCount">const <type>VkValidationCheckEXT</type>* <name>pDisabledValidationChecks</name><comment>Validation checks to disable</comment></member>
         </type>
+        <type category="struct" name="VkValidationFeaturesEXT" structextends="VkInstanceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT"><type>VkStructureType</type>  <name>sType</name><comment>Must be VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT</comment></member>
+            <member>const <type>void</type>*                      <name>pNext</name></member>
+            <member optional="true"><type>uint32_t</type>                         <name>enabledValidationFeatureCount</name><comment>Number of validation features to enable</comment></member>
+            <member len="enabledValidationFeatureCount">const <type>VkValidationFeatureEnableEXT</type>* <name>pEnabledValidationFeatures</name><comment>Validation features to enable</comment></member>
+            <member optional="true"><type>uint32_t</type>                         <name>disabledValidationFeatureCount</name><comment>Number of validation features to disable</comment></member>
+            <member len="disabledValidationFeatureCount">const <type>VkValidationFeatureDisableEXT</type>* <name>pDisabledValidationFeatures</name><comment>Validation features to disable</comment></member>
+        </type>
         <type category="struct" name="VkPipelineRasterizationStateRasterizationOrderAMD" structextends="VkPipelineRasterizationStateCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
@@ -2452,7 +2455,7 @@
         <type category="struct" name="VkDescriptorUpdateTemplateEntryKHR"                      alias="VkDescriptorUpdateTemplateEntry"/>
         <type category="struct" name="VkDescriptorUpdateTemplateCreateInfo">
             <member values="VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO"><type>VkStructureType</type> <name>sType</name></member>
-            <member><type>void</type>*                                   <name>pNext</name></member>
+            <member>const <type>void</type>*                               <name>pNext</name></member>
             <member optional="true"><type>VkDescriptorUpdateTemplateCreateFlags</type>    <name>flags</name></member>
             <member><type>uint32_t</type>                 <name>descriptorUpdateEntryCount</name><comment>Number of descriptor update entries to use for the update template</comment></member>
             <member len="descriptorUpdateEntryCount">const <type>VkDescriptorUpdateTemplateEntry</type>* <name>pDescriptorUpdateEntries</name><comment>Descriptor update entries for the template</comment></member>
@@ -2472,15 +2475,15 @@
             <member values="VK_STRUCTURE_TYPE_HDR_METADATA_EXT"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*    <name>pNext</name></member>
                 <comment> From SMPTE 2086</comment>
-            <member><type>VkXYColorEXT</type>   <name>displayPrimaryRed</name><comment>Display primary's Red</comment></member>
-            <member><type>VkXYColorEXT</type>   <name>displayPrimaryGreen</name><comment>Display primary's Green</comment></member>
-            <member><type>VkXYColorEXT</type>   <name>displayPrimaryBlue</name><comment>Display primary's Blue</comment></member>
-            <member><type>VkXYColorEXT</type>   <name>whitePoint</name><comment>Display primary's Blue</comment></member>
-            <member><type>float</type>          <name>maxLuminance</name><comment>Display maximum luminance</comment></member>
-            <member><type>float</type>          <name>minLuminance</name><comment>Display minimum luminance</comment></member>
+            <member noautovalidity="true"><type>VkXYColorEXT</type>   <name>displayPrimaryRed</name><comment>Display primary's Red</comment></member>
+            <member noautovalidity="true"><type>VkXYColorEXT</type>   <name>displayPrimaryGreen</name><comment>Display primary's Green</comment></member>
+            <member noautovalidity="true"><type>VkXYColorEXT</type>   <name>displayPrimaryBlue</name><comment>Display primary's Blue</comment></member>
+            <member noautovalidity="true"><type>VkXYColorEXT</type>   <name>whitePoint</name><comment>Display primary's Blue</comment></member>
+            <member noautovalidity="true"><type>float</type>          <name>maxLuminance</name><comment>Display maximum luminance</comment></member>
+            <member noautovalidity="true"><type>float</type>          <name>minLuminance</name><comment>Display minimum luminance</comment></member>
                 <comment> From CTA 861.3</comment>
-            <member><type>float</type>          <name>maxContentLightLevel</name><comment>Content maximum luminance</comment></member>
-            <member><type>float</type>          <name>maxFrameAverageLightLevel</name></member>
+            <member noautovalidity="true"><type>float</type>          <name>maxContentLightLevel</name><comment>Content maximum luminance</comment></member>
+            <member noautovalidity="true"><type>float</type>          <name>maxFrameAverageLightLevel</name></member>
         </type>
         <type category="struct" name="VkRefreshCycleDurationGOOGLE" returnedonly="true">
             <member><type>uint64_t</type>                         <name>refreshDuration</name><comment>Number of nanoseconds from the start of one refresh cycle to the next</comment></member>
@@ -2523,7 +2526,7 @@
             <member>const <type>void</type>*                      <name>pNext</name></member>
             <member><type>VkBool32</type>               <name>viewportWScalingEnable</name></member>
             <member><type>uint32_t</type>               <name>viewportCount</name></member>
-            <member noautovalidity="true" len="viewportCount">const <type>VkViewportWScalingNV</type>*      <name>pViewportWScalings</name></member>
+            <member noautovalidity="true" optional="true" len="viewportCount">const <type>VkViewportWScalingNV</type>*      <name>pViewportWScalings</name></member>
         </type>
         <type category="struct" name="VkViewportSwizzleNV">
             <member><type>VkViewportCoordinateSwizzleNV</type>          <name>x</name></member>
@@ -2536,7 +2539,7 @@
             <member>const <type>void</type>*            <name>pNext</name></member>
             <member optional="true"><type>VkPipelineViewportSwizzleStateCreateFlagsNV</type>    <name>flags</name></member>
             <member><type>uint32_t</type>               <name>viewportCount</name></member>
-            <member noautovalidity="true" optional="true" len="viewportCount">const <type>VkViewportSwizzleNV</type>*      <name>pViewportSwizzles</name></member>
+            <member len="viewportCount">const <type>VkViewportSwizzleNV</type>*      <name>pViewportSwizzles</name></member>
         </type>
         <type category="struct" name="VkPhysicalDeviceDiscardRectanglePropertiesEXT" structextends="VkPhysicalDeviceProperties2">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT"><type>VkStructureType</type> <name>sType</name></member>
@@ -2859,7 +2862,7 @@
             <member><type>VkBool32</type>               <name>dstPremultiplied</name></member>
             <member><type>VkBlendOverlapEXT</type>      <name>blendOverlap</name></member>
         </type>
-        <type category="struct" name="VkPhysicalDeviceInlineUniformBlockFeaturesEXT" returnedonly="true" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+        <type category="struct" name="VkPhysicalDeviceInlineUniformBlockFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
             <member><type>void</type>*                  <name>pNext</name></member>
             <member><type>VkBool32</type>               <name>inlineUniformBlock</name></member>
@@ -2894,7 +2897,7 @@
             <member><type>uint32_t</type>                                                                         <name>coverageModulationTableCount</name></member>
             <member noautovalidity="true" optional="true" len="coverageModulationTableCount">const <type>float</type>* <name>pCoverageModulationTable</name></member>
         </type>
-        <type category="struct" name="VkImageFormatListCreateInfoKHR" structextends="VkImageCreateInfo,VkPhysicalDeviceImageFormatInfo2">
+        <type category="struct" name="VkImageFormatListCreateInfoKHR" structextends="VkImageCreateInfo,VkSwapchainCreateInfoKHR,VkPhysicalDeviceImageFormatInfo2">
             <member values="VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*            <name>pNext</name></member>
             <member optional="true"><type>uint32_t</type>               <name>viewFormatCount</name></member>
@@ -2930,6 +2933,33 @@
             <member noautovalidity="true"><type>void</type>*                            <name>pNext</name></member>
             <member><type>VkBool32</type>                         <name>shaderDrawParameters</name></member>
         </type>
+        <type category="struct" name="VkPhysicalDeviceFloat16Int8FeaturesKHR" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*      <name>pNext</name></member>                            <!-- Pointer to next structure -->
+            <member><type>VkBool32</type>                         <name>shaderFloat16</name></member>                 <!-- 16-bit floats (halfs) in shaders -->
+            <member><type>VkBool32</type>                         <name>shaderInt8</name></member>                    <!-- 8-bit integers in shaders -->
+        </type>
+        <type category="struct" name="VkPhysicalDeviceFloatControlsPropertiesKHR" structextends="VkPhysicalDeviceProperties2">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                            <name>pNext</name></member>
+            <member><type>VkBool32</type>                         <name>separateDenormSettings</name></member>
+            <member><type>VkBool32</type>                         <name>separateRoundingModeSettings</name></member>
+            <member><type>VkBool32</type>                         <name>shaderSignedZeroInfNanPreserveFloat16</name></member>  <!-- An implementation can preserve signed zero, nan, inf -->
+            <member><type>VkBool32</type>                         <name>shaderSignedZeroInfNanPreserveFloat32</name></member>  <!-- An implementation can preserve signed zero, nan, inf -->
+            <member><type>VkBool32</type>                         <name>shaderSignedZeroInfNanPreserveFloat64</name></member>  <!-- An implementation can preserve signed zero, nan, inf -->
+            <member><type>VkBool32</type>                         <name>shaderDenormPreserveFloat16</name></member>            <!-- An implementation can preserve  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderDenormPreserveFloat32</name></member>            <!-- An implementation can preserve  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderDenormPreserveFloat64</name></member>            <!-- An implementation can preserve  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderDenormFlushToZeroFloat16</name></member>         <!-- An implementation can flush to zero  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderDenormFlushToZeroFloat32</name></member>         <!-- An implementation can flush to zero  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderDenormFlushToZeroFloat64</name></member>         <!-- An implementation can flush to zero  denormals -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTEFloat16</name></member>           <!-- An implementation can support RTE -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTEFloat32</name></member>           <!-- An implementation can support RTE -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTEFloat64</name></member>           <!-- An implementation can support RTE -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTZFloat16</name></member>           <!-- An implementation can support RTZ -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTZFloat32</name></member>           <!-- An implementation can support RTZ -->
+            <member><type>VkBool32</type>                         <name>shaderRoundingModeRTZFloat64</name></member>           <!-- An implementation can support RTZ -->
+        </type>
         <type category="struct" name="VkNativeBufferANDROID">
             <member values="VK_STRUCTURE_TYPE_NATIVE_BUFFER_ANDROID"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>* <name>pNext</name></member>
@@ -2998,11 +3028,11 @@
             <member optional="true"><type>int32_t</type>                                                            <name>messageIdNumber</name></member>
             <member len="null-terminated">const <type>char</type>*                                                  <name>pMessage</name></member>
             <member optional="true"><type>uint32_t</type>                                                           <name>queueLabelCount</name></member>
-            <member noautovalidity="true" optional="true" len="queueLabelCount"><type>VkDebugUtilsLabelEXT</type>*  <name>pQueueLabels</name></member>
+            <member len="queueLabelCount">const <type>VkDebugUtilsLabelEXT</type>*                  <name>pQueueLabels</name></member>
             <member optional="true"><type>uint32_t</type>                                                           <name>cmdBufLabelCount</name></member>
-            <member noautovalidity="true" optional="true" len="cmdBufLabelCount"><type>VkDebugUtilsLabelEXT</type>* <name>pCmdBufLabels</name></member>
-            <member><type>uint32_t</type>                                                                           <name>objectCount</name></member>
-            <member noautovalidity="true" len="objectCount"><type>VkDebugUtilsObjectNameInfoEXT</type>*             <name>pObjects</name></member>
+            <member len="cmdBufLabelCount">const <type>VkDebugUtilsLabelEXT</type>*                 <name>pCmdBufLabels</name></member>
+            <member optional="true"><type>uint32_t</type>                                                           <name>objectCount</name></member>
+            <member len="objectCount">const <type>VkDebugUtilsObjectNameInfoEXT</type>*             <name>pObjects</name></member>
         </type>
         <type category="struct" name="VkImportMemoryHostPointerInfoEXT" structextends="VkMemoryAllocateInfo">
             <member values="VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
@@ -3181,7 +3211,7 @@
         <type category="struct" name="VkRenderPassCreateInfo2KHR">
             <member values="VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                                              <name>pNext</name></member>
-            <member optional="true"><type>VkRenderPassCreateFlags</type>                  <name>flags</name></member>
+            <member optional="true" noautovalidity="true"><type>VkRenderPassCreateFlags</type> <name>flags</name></member>
             <member optional="true"><type>uint32_t</type>                                 <name>attachmentCount</name></member>
             <member len="attachmentCount">const <type>VkAttachmentDescription2KHR</type>* <name>pAttachments</name></member>
             <member><type>uint32_t</type>                                                 <name>subpassCount</name></member>
@@ -3218,10 +3248,10 @@
         <type category="struct" name="VkPhysicalDevicePCIBusInfoPropertiesEXT" structextends="VkPhysicalDeviceProperties2" returnedonly="true">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT"><type>VkStructureType</type> <name>sType</name></member>
             <member><type>void</type>*                  <name>pNext</name></member>
-            <member><type>uint16_t</type>               <name>pciDomain</name></member>
-            <member><type>uint8_t</type>                <name>pciBus</name></member>
-            <member><type>uint8_t</type>                <name>pciDevice</name></member>
-            <member><type>uint8_t</type>                <name>pciFunction</name></member>
+            <member><type>uint32_t</type>               <name>pciDomain</name></member>
+            <member><type>uint32_t</type>               <name>pciBus</name></member>
+            <member><type>uint32_t</type>               <name>pciDevice</name></member>
+            <member><type>uint32_t</type>               <name>pciFunction</name></member>
         </type>
         <type category="struct" name="VkImportAndroidHardwareBufferInfoANDROID" structextends="VkMemoryAllocateInfo">
             <member values="VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID"><type>VkStructureType</type> <name>sType</name></member>
@@ -3279,7 +3309,7 @@
             <member><type>VkBool32</type>                           <name>conditionalRendering</name></member>
             <member><type>VkBool32</type>                           <name>inheritedConditionalRendering</name></member>
         </type>
-        <type category="struct" name="VkPhysicalDeviceVulkanMemoryModelFeaturesKHR" returnedonly="true" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+        <type category="struct" name="VkPhysicalDeviceVulkanMemoryModelFeaturesKHR" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR"><type>VkStructureType</type> <name>sType</name></member>
             <member noautovalidity="true"><type>void</type>*      <name>pNext</name></member>
             <member><type>VkBool32</type>                         <name>vulkanMemoryModel</name></member>
@@ -3308,6 +3338,21 @@
             <member><type>VkPipelineStageFlagBits</type>   <name>stage</name></member>
             <member noautovalidity="true"><type>void</type>* <name>pCheckpointMarker</name></member>
         </type>
+        <type category="struct" name="VkPhysicalDeviceDepthStencilResolvePropertiesKHR" structextends="VkPhysicalDeviceProperties2" returnedonly="true">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                                <name>pNext</name></member>
+            <member><type>VkResolveModeFlagsKHR</type>                <name>supportedDepthResolveModes</name><comment>supported depth resolve modes</comment></member>
+            <member><type>VkResolveModeFlagsKHR</type>                <name>supportedStencilResolveModes</name><comment>supported stencil resolve modes</comment></member>
+            <member><type>VkBool32</type>                             <name>independentResolveNone</name><comment>depth and stencil resolve modes can be set independently if one of them is none</comment></member>
+            <member><type>VkBool32</type>                             <name>independentResolve</name><comment>depth and stencil resolve modes can be set independently</comment></member>
+        </type>
+        <type category="struct" name="VkSubpassDescriptionDepthStencilResolveKHR" structextends="VkSubpassDescription2KHR">
+            <member values="VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                                              <name>pNext</name></member>
+            <member><type>VkResolveModeFlagBitsKHR</type>                                 <name>depthResolveMode</name><comment>depth resolve mode</comment></member>
+            <member><type>VkResolveModeFlagBitsKHR</type>                                 <name>stencilResolveMode</name><comment>stencil resolve mode</comment></member>
+            <member optional="true">const <type>VkAttachmentReference2KHR</type>*         <name>pDepthStencilResolveAttachment</name><comment>depth/stencil resolve attachment</comment></member>
+        </type>
         <type category="struct" name="VkImageViewASTCDecodeModeEXT" structextends="VkImageViewCreateInfo">
             <member values="VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
@@ -3403,7 +3448,7 @@
             <member><type>VkBool32</type>                            <name>shadingRateImage</name></member>
             <member><type>VkBool32</type>                            <name>shadingRateCoarseSampleOrder</name></member>
         </type>
-        <type category="struct" name="VkPhysicalDeviceShadingRateImagePropertiesNV" structextends="VkPhysicalDeviceProperties" returnedonly="true">
+        <type category="struct" name="VkPhysicalDeviceShadingRateImagePropertiesNV" structextends="VkPhysicalDeviceProperties2" returnedonly="true">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_PROPERTIES_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member><type>void</type>*                               <name>pNext</name></member>
             <member><type>VkExtent2D</type>                          <name>shadingRateTexelSize</name></member>
@@ -3455,88 +3500,109 @@
             <member><type>uint32_t</type>               <name>taskCount</name></member>
             <member><type>uint32_t</type>               <name>firstTask</name></member>
         </type>
-        <type category="struct" name="VkRaytracingPipelineCreateInfoNVX">
-            <member values="VK_STRUCTURE_TYPE_RAYTRACING_PIPELINE_CREATE_INFO_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkRayTracingShaderGroupCreateInfoNV">
+            <member values="VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*            <name>pNext</name></member>
+            <member><type>VkRayTracingShaderGroupTypeNV</type> <name>type</name></member>
+            <member><type>uint32_t</type>               <name>generalShader</name></member>
+            <member><type>uint32_t</type>               <name>closestHitShader</name></member>
+            <member><type>uint32_t</type>               <name>anyHitShader</name></member>
+            <member><type>uint32_t</type>               <name>intersectionShader</name></member>
+        </type>
+        <type category="struct" name="VkRayTracingPipelineCreateInfoNV">
+            <member values="VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*            <name>pNext</name></member>
             <member optional="true"><type>VkPipelineCreateFlags</type>  <name>flags</name><comment>Pipeline creation flags</comment></member>
             <member><type>uint32_t</type>               <name>stageCount</name></member>
             <member len="stageCount">const <type>VkPipelineShaderStageCreateInfo</type>* <name>pStages</name><comment>One entry for each active shader stage</comment></member>
-            <member len="stageCount">const <type>uint32_t</type>* <name>pGroupNumbers</name><comment>One entry for each stage used as the query index and for grouping</comment></member>
+            <member><type>uint32_t</type>               <name>groupCount</name></member>
+            <member len="groupCount">const <type>VkRayTracingShaderGroupCreateInfoNV</type>* <name>pGroups</name></member>
             <member><type>uint32_t</type>               <name>maxRecursionDepth</name></member>
             <member><type>VkPipelineLayout</type>       <name>layout</name><comment>Interface layout of the pipeline</comment></member>
             <member noautovalidity="true" optional="true"><type>VkPipeline</type>      <name>basePipelineHandle</name><comment>If VK_PIPELINE_CREATE_DERIVATIVE_BIT is set and this value is nonzero, it specifies the handle of the base pipeline this is a derivative of</comment></member>
             <member><type>int32_t</type>                <name>basePipelineIndex</name><comment>If VK_PIPELINE_CREATE_DERIVATIVE_BIT is set and this value is not -1, it specifies an index into pCreateInfos of the base pipeline this is a derivative of</comment></member>
         </type>
-        <type category="struct" name="VkGeometryTrianglesNVX">
-            <member values="VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkGeometryTrianglesNV">
+            <member values="VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                <name>pNext</name></member>
             <member optional="true"><type>VkBuffer</type>   <name>vertexData</name></member>
             <member><type>VkDeviceSize</type>               <name>vertexOffset</name></member>
             <member><type>uint32_t</type>                   <name>vertexCount</name></member>
             <member><type>VkDeviceSize</type>               <name>vertexStride</name></member>
-            <member><type>VkFormat</type>   <name>vertexFormat</name><comment>VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R16G16B16_SFLOAT, or VK_FORMAT_R16G16B16A16_SFLOAT</comment></member>
+            <member><type>VkFormat</type>                   <name>vertexFormat</name></member>
             <member optional="true"><type>VkBuffer</type>   <name>indexData</name></member>
             <member><type>VkDeviceSize</type>               <name>indexOffset</name></member>
             <member><type>uint32_t</type>                   <name>indexCount</name></member>
-            <member><type>VkIndexType</type> <name>indexType</name></member>
+            <member><type>VkIndexType</type>                <name>indexType</name></member>
             <member optional="true"><type>VkBuffer</type>   <name>transformData</name><comment>Optional reference to array of floats representing a 3x4 row major affine transformation matrix.</comment></member>
             <member><type>VkDeviceSize</type>               <name>transformOffset</name></member>
         </type>
-        <type category="struct" name="VkGeometryAABBNVX">
-            <member values="VK_STRUCTURE_TYPE_GEOMETRY_AABB_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkGeometryAABBNV">
+            <member values="VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                <name>pNext</name></member>
             <member optional="true"><type>VkBuffer</type>   <name>aabbData</name></member>
             <member><type>uint32_t</type>                   <name>numAABBs</name></member>
             <member><type>uint32_t</type>                   <name>stride</name><comment>Stride in bytes between AABBs</comment></member>
             <member><type>VkDeviceSize</type>               <name>offset</name><comment>Offset in bytes of the first AABB in aabbData</comment></member>
         </type>
-        <type category="struct" name="VkGeometryDataNVX">
-            <member><type>VkGeometryTrianglesNVX</type>                  <name>triangles</name></member>
-            <member><type>VkGeometryAABBNVX</type>                       <name>aabbs</name></member>
+        <type category="struct" name="VkGeometryDataNV">
+            <member><type>VkGeometryTrianglesNV</type>                  <name>triangles</name></member>
+            <member><type>VkGeometryAABBNV</type>                       <name>aabbs</name></member>
         </type>
-        <type category="struct" name="VkGeometryNVX">
-            <member values="VK_STRUCTURE_TYPE_GEOMETRY_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkGeometryNV">
+            <member values="VK_STRUCTURE_TYPE_GEOMETRY_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                        <name>pNext</name></member>
-            <member><type>VkGeometryTypeNVX</type>                  <name>geometryType</name></member>
-            <member><type>VkGeometryDataNVX</type>                  <name>geometry</name></member>
-            <member optional="true"><type>VkGeometryFlagsNVX</type> <name>flags</name></member>
+            <member><type>VkGeometryTypeNV</type>                  <name>geometryType</name></member>
+            <member><type>VkGeometryDataNV</type>                  <name>geometry</name></member>
+            <member optional="true"><type>VkGeometryFlagsNV</type> <name>flags</name></member>
         </type>
-        <type category="struct" name="VkAccelerationStructureCreateInfoNVX">
-            <member values="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkAccelerationStructureInfoNV">
+            <member values="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                            <name>pNext</name></member>
-            <member><type>VkAccelerationStructureTypeNVX</type>         <name>type</name></member>
-            <member optional="true"><type>VkBuildAccelerationStructureFlagsNVX</type><name>flags</name></member>
-            <member><type>VkDeviceSize</type>                           <name>compactedSize</name></member>
+            <member><type>VkAccelerationStructureTypeNV</type>         <name>type</name></member>
+            <member optional="true"><type>VkBuildAccelerationStructureFlagsNV</type><name>flags</name></member>
             <member optional="true"><type>uint32_t</type>               <name>instanceCount</name></member>
             <member optional="true"><type>uint32_t</type>               <name>geometryCount</name></member>
-            <member len="geometryCount">const <type>VkGeometryNVX</type>* <name>pGeometries</name></member>
+            <member len="geometryCount">const <type>VkGeometryNV</type>* <name>pGeometries</name></member>
+        </type>
+        <type category="struct" name="VkAccelerationStructureCreateInfoNV">
+            <member values="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                            <name>pNext</name></member>
+            <member><type>VkDeviceSize</type>                           <name>compactedSize</name></member>
+            <member><type>VkAccelerationStructureInfoNV</type>          <name>info</name></member>
         </type>
-        <type category="struct" name="VkBindAccelerationStructureMemoryInfoNVX">
-            <member values="VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkBindAccelerationStructureMemoryInfoNV">
+            <member values="VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
-            <member><type>VkAccelerationStructureNVX</type>        <name>accelerationStructure</name></member>
+            <member><type>VkAccelerationStructureNV</type>        <name>accelerationStructure</name></member>
             <member><type>VkDeviceMemory</type>                   <name>memory</name></member>
             <member><type>VkDeviceSize</type>                     <name>memoryOffset</name></member>
             <member optional="true"><type>uint32_t</type>         <name>deviceIndexCount</name></member>
             <member len="deviceIndexCount">const <type>uint32_t</type>*  <name>pDeviceIndices</name></member>
         </type>
-        <type category="struct" name="VkDescriptorAccelerationStructureInfoNVX" structextends="VkWriteDescriptorSet">
-            <member values="VK_STRUCTURE_TYPE_DESCRIPTOR_ACCELERATION_STRUCTURE_INFO_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkWriteDescriptorSetAccelerationStructureNV" structextends="VkWriteDescriptorSet">
+            <member values="VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
             <member><type>uint32_t</type>                         <name>accelerationStructureCount</name></member>
-            <member len="accelerationStructureCount">const <type>VkAccelerationStructureNVX</type>* <name>pAccelerationStructures</name></member>
+            <member len="accelerationStructureCount">const <type>VkAccelerationStructureNV</type>* <name>pAccelerationStructures</name></member>
         </type>
-        <type category="struct" name="VkAccelerationStructureMemoryRequirementsInfoNVX">
-            <member values="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkAccelerationStructureMemoryRequirementsInfoNV">
+            <member values="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                                                          <name>pNext</name></member>
-            <member><type>VkAccelerationStructureNVX</type>                                            <name>accelerationStructure</name></member>
+            <member><type>VkAccelerationStructureMemoryRequirementsTypeNV</type>                      <name>type</name></member>
+            <member><type>VkAccelerationStructureNV</type>                                            <name>accelerationStructure</name></member>
         </type>
-        <type category="struct" name="VkPhysicalDeviceRaytracingPropertiesNVX" structextends="VkPhysicalDeviceProperties2">
-            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAYTRACING_PROPERTIES_NVX"><type>VkStructureType</type> <name>sType</name></member>
+        <type category="struct" name="VkPhysicalDeviceRayTracingPropertiesNV" structextends="VkPhysicalDeviceProperties2">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member><type>void</type>*                            <name>pNext</name></member>
-            <member><type>uint32_t</type>                         <name>shaderHeaderSize</name></member>
+            <member><type>uint32_t</type>                         <name>shaderGroupHandleSize</name></member>
             <member><type>uint32_t</type>                         <name>maxRecursionDepth</name></member>
-            <member><type>uint32_t</type>                         <name>maxGeometryCount</name></member>
+            <member><type>uint32_t</type>                         <name>maxShaderGroupStride</name></member>
+            <member><type>uint32_t</type>                         <name>shaderGroupBaseAlignment</name></member>
+            <member><type>uint64_t</type>                         <name>maxGeometryCount</name></member>
+            <member><type>uint64_t</type>                         <name>maxInstanceCount</name></member>
+            <member><type>uint64_t</type>                         <name>maxTriangleCount</name></member>
+            <member><type>uint32_t</type>                         <name>maxDescriptorSetAccelerationStructures</name></member>
         </type>
         <type category="struct" name="VkDrmFormatModifierPropertiesListEXT" structextends="VkFormatProperties2">
             <member values="VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT"><type>VkStructureType</type> <name>sType</name></member>
@@ -3575,6 +3641,73 @@
             <member><type>void</type>* <name>pNext</name></member>
             <member><type>uint64_t</type> <name>drmFormatModifier</name></member>
         </type>
+        <type category="struct" name="VkImageStencilUsageCreateInfoEXT" structextends="VkImageCreateInfo,VkPhysicalDeviceImageFormatInfo2">
+            <member values="VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>* <name>pNext</name></member>
+            <member><type>VkImageUsageFlags</type> <name>stencilUsage</name></member>
+        </type>
+        <type category="struct" name="VkDeviceMemoryOverallocationCreateInfoAMD"  structextends="VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                      <name>pNext</name></member>
+            <member><type>VkMemoryOverallocationBehaviorAMD</type> <name>overallocationBehavior</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceFragmentDensityMapFeaturesEXT" returnedonly="true" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                            <name>pNext</name></member>
+            <member><type>VkBool32</type>                         <name>fragmentDensityMap</name></member>
+            <member><type>VkBool32</type>                         <name>fragmentDensityMapDynamic</name></member>
+            <member><type>VkBool32</type>                         <name>fragmentDensityMapNonSubsampledImages</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceFragmentDensityMapPropertiesEXT" returnedonly="true" structextends="VkPhysicalDeviceProperties2">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                            <name>pNext</name></member>
+            <member><type>VkExtent2D</type>                       <name>minFragmentDensityTexelSize</name></member>
+            <member><type>VkExtent2D</type>                       <name>maxFragmentDensityTexelSize</name></member>
+            <member><type>VkBool32</type>                         <name>fragmentDensityInvocations</name></member>
+        </type>
+        <type category="struct" name="VkRenderPassFragmentDensityMapCreateInfoEXT" structextends="VkRenderPassCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                      <name>pNext</name></member>
+            <member><type>VkAttachmentReference</type>            <name>fragmentDensityMapAttachment</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceScalarBlockLayoutFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                               <name>pNext</name></member>
+            <member><type>VkBool32</type>                            <name>scalarBlockLayout</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceMemoryBudgetPropertiesEXT" structextends="VkPhysicalDeviceMemoryProperties2" returnedonly="true">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*        <name>pNext</name></member>
+            <member><type>VkDeviceSize</type>                       <name>heapBudget</name>[<enum>VK_MAX_MEMORY_HEAPS</enum>]</member>
+            <member><type>VkDeviceSize</type>                       <name>heapUsage</name>[<enum>VK_MAX_MEMORY_HEAPS</enum>]</member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceMemoryPriorityFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*        <name>pNext</name></member>
+            <member><type>VkBool32</type>                           <name>memoryPriority</name></member>
+        </type>
+        <type category="struct" name="VkMemoryPriorityAllocateInfoEXT" structextends="VkMemoryAllocateInfo">
+            <member values="VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                        <name>pNext</name></member>
+            <member><type>float</type>                              <name>priority</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceBufferAddressFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*        <name>pNext</name></member>
+            <member><type>VkBool32</type>                           <name>bufferDeviceAddress</name></member>
+            <member><type>VkBool32</type>                           <name>bufferDeviceAddressCaptureReplay</name></member>
+            <member><type>VkBool32</type>                           <name>bufferDeviceAddressMultiDevice</name></member>
+        </type>
+        <type category="struct" name="VkBufferDeviceAddressInfoEXT">
+            <member values="VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                                            <name>pNext</name></member>
+            <member><type>VkBuffer</type>                                               <name>buffer</name></member>
+        </type>
+        <type category="struct" name="VkBufferDeviceAddressCreateInfoEXT" structextends="VkBufferCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                      <name>pNext</name></member>
+            <member><type>VkDeviceSize</type>                     <name>deviceAddress</name></member>
+        </type>
     </types>
 
     <comment>Vulkan enumerant (token) definitions</comment>
@@ -3604,6 +3737,7 @@
         <enum               name="VK_MAX_DEVICE_GROUP_SIZE_KHR" alias="VK_MAX_DEVICE_GROUP_SIZE"/>
         <enum value="256"   name="VK_MAX_DRIVER_NAME_SIZE_KHR"/>
         <enum value="256"   name="VK_MAX_DRIVER_INFO_SIZE_KHR"/>
+        <enum value="(~0U)" name="VK_SHADER_UNUSED_NV"/>
     </enums>
 
     <comment>
@@ -4230,6 +4364,10 @@
         <enum bitpos="3"    name="VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT"                comment="Allows image views to have different format than the base image"/>
         <enum bitpos="4"    name="VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"               comment="Allows creating image views with cube type from the created image"/>
     </enums>
+    <enums name="VkImageViewCreateFlagBits" type="bitmask">
+    </enums>
+    <enums name="VkSamplerCreateFlagBits" type="bitmask">
+    </enums>
     <enums name="VkPipelineCreateFlagBits" type="bitmask">
         <enum bitpos="0"    name="VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT"/>
         <enum bitpos="1"    name="VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT"/>
@@ -4458,6 +4596,21 @@
         <enum value="1"     name="VK_VALIDATION_CHECK_SHADERS_EXT"/>
             <comment>Placeholder for validation enums to be defined for VK_EXT_Validation_flags extension</comment>
     </enums>
+    <enums name="VkValidationFeatureEnableEXT" type="enum">
+        <enum value="0"     name="VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT"/>
+        <enum value="1"     name="VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT"/>
+            <comment>Placeholder for validation feature enable enums to be defined for VK_EXT_validation_features extension</comment>
+    </enums>
+    <enums name="VkValidationFeatureDisableEXT" type="enum">
+        <enum value="0"     name="VK_VALIDATION_FEATURE_DISABLE_ALL_EXT"/>
+        <enum value="1"     name="VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT"/>
+        <enum value="2"     name="VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT"/>
+        <enum value="3"     name="VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT"/>
+        <enum value="4"     name="VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT"/>
+        <enum value="5"     name="VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT"/>
+        <enum value="6"     name="VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT"/>
+            <comment>Placeholder for validation feature disable enums to be defined for VK_EXT_validation_features extension</comment>
+    </enums>
     <enums name="VkSubgroupFeatureFlagBits" type="bitmask">
         <enum bitpos="0"    name="VK_SUBGROUP_FEATURE_BASIC_BIT"              comment="Basic subgroup operations"/>
         <enum bitpos="1"    name="VK_SUBGROUP_FEATURE_VOTE_BIT"               comment="Vote subgroup operations"/>
@@ -4682,10 +4835,18 @@
         <enum value="7"       name="VK_DRIVER_ID_IMAGINATION_PROPRIETARY_KHR"   comment="Imagination Technologies"/>
         <enum value="8"       name="VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR"      comment="Qualcomm Technologies, Inc."/>
         <enum value="9"       name="VK_DRIVER_ID_ARM_PROPRIETARY_KHR"           comment="Arm Limited"/>
+        <enum value="10"      name="VK_DRIVER_ID_GOOGLE_PASTEL_KHR"             comment="Google LLC"/>
     </enums>
     <enums name="VkConditionalRenderingFlagBitsEXT" type="bitmask">
         <enum bitpos="0"    name="VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT"/>
     </enums>
+    <enums name="VkResolveModeFlagBitsKHR" type="bitmask">
+        <enum value="0" name="VK_RESOLVE_MODE_NONE_KHR"/>
+        <enum bitpos="0" name="VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR"/>
+        <enum bitpos="1" name="VK_RESOLVE_MODE_AVERAGE_BIT_KHR"/>
+        <enum bitpos="2" name="VK_RESOLVE_MODE_MIN_BIT_KHR"/>
+        <enum bitpos="3" name="VK_RESOLVE_MODE_MAX_BIT_KHR"/>
+    </enums>
     <enums name="VkShadingRatePaletteEntryNV" type="enum">
         <enum value="0" name="VK_SHADING_RATE_PALETTE_ENTRY_NO_INVOCATIONS_NV"/>
         <enum value="1" name="VK_SHADING_RATE_PALETTE_ENTRY_16_INVOCATIONS_PER_PIXEL_NV"/>
@@ -4706,36 +4867,50 @@
         <enum value="2" name="VK_COARSE_SAMPLE_ORDER_TYPE_PIXEL_MAJOR_NV"/>
         <enum value="3" name="VK_COARSE_SAMPLE_ORDER_TYPE_SAMPLE_MAJOR_NV"/>
     </enums>
-    <enums name="VkGeometryInstanceFlagBitsNVX" type="bitmask">
-        <enum bitpos="0" name="VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NVX"/>
-        <enum bitpos="1" name="VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_FLIP_WINDING_BIT_NVX"/>
-        <enum bitpos="2" name="VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NVX"/>
-        <enum bitpos="3" name="VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NVX"/>
-    </enums>
-    <enums name="VkGeometryFlagBitsNVX" type="bitmask">
-        <enum bitpos="0" name="VK_GEOMETRY_OPAQUE_BIT_NVX"/>
-        <enum bitpos="1" name="VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NVX"/>
-    </enums>
-    <enums name="VkBuildAccelerationStructureFlagBitsNVX" type="bitmask">
-        <enum bitpos="0" name="VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NVX"/>
-        <enum bitpos="1" name="VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NVX"/>
-        <enum bitpos="2" name="VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NVX"/>
-        <enum bitpos="3" name="VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NVX"/>
-        <enum bitpos="4" name="VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NVX"/>
-    </enums>
-    <enums name="VkCopyAccelerationStructureModeNVX" type="enum">
-        <enum value="0" name="VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX"/>
-        <enum value="1" name="VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX"/>
-    </enums>
-    <enums name="VkAccelerationStructureTypeNVX" type="enum">
-        <enum value="0" name="VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX"/>
-        <enum value="1" name="VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX"/>
-    </enums>
-    <enums name="VkGeometryTypeNVX" type="enum">
-        <enum value="0" name="VK_GEOMETRY_TYPE_TRIANGLES_NVX"/>
-        <enum value="1" name="VK_GEOMETRY_TYPE_AABBS_NVX"/>
+    <enums name="VkGeometryInstanceFlagBitsNV" type="bitmask">
+        <enum bitpos="0" name="VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV"/>
+        <enum bitpos="1" name="VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV"/>
+        <enum bitpos="2" name="VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NV"/>
+        <enum bitpos="3" name="VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NV"/>
+    </enums>
+    <enums name="VkGeometryFlagBitsNV" type="bitmask">
+        <enum bitpos="0" name="VK_GEOMETRY_OPAQUE_BIT_NV"/>
+        <enum bitpos="1" name="VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NV"/>
+    </enums>
+    <enums name="VkBuildAccelerationStructureFlagBitsNV" type="bitmask">
+        <enum bitpos="0" name="VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV"/>
+        <enum bitpos="1" name="VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV"/>
+        <enum bitpos="2" name="VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV"/>
+        <enum bitpos="3" name="VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV"/>
+        <enum bitpos="4" name="VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NV"/>
+    </enums>
+    <enums name="VkCopyAccelerationStructureModeNV" type="enum">
+        <enum value="0" name="VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV"/>
+        <enum value="1" name="VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV"/>
+    </enums>
+    <enums name="VkAccelerationStructureTypeNV" type="enum">
+        <enum value="0" name="VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV"/>
+        <enum value="1" name="VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV"/>
+    </enums>
+    <enums name="VkGeometryTypeNV" type="enum">
+        <enum value="0" name="VK_GEOMETRY_TYPE_TRIANGLES_NV"/>
+        <enum value="1" name="VK_GEOMETRY_TYPE_AABBS_NV"/>
+    </enums>
+    <enums name="VkAccelerationStructureMemoryRequirementsTypeNV" type="enum">
+        <enum value="0" name="VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV"/>
+        <enum value="1" name="VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV"/>
+        <enum value="2" name="VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV"/>
+    </enums>
+    <enums name="VkRayTracingShaderGroupTypeNV" type="enum">
+        <enum value="0" name="VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV"/>
+        <enum value="1" name="VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV"/>
+        <enum value="2" name="VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV"/>
+    </enums>
+    <enums name="VkMemoryOverallocationBehaviorAMD" type="enum">
+        <enum value="0"     name="VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD"/>
+        <enum value="1"     name="VK_MEMORY_OVERALLOCATION_BEHAVIOR_ALLOWED_AMD"/>
+        <enum value="2"     name="VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD"/>
     </enums>
-
     <commands comment="Vulkan command definitions">
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY,VK_ERROR_INITIALIZATION_FAILED,VK_ERROR_LAYER_NOT_PRESENT,VK_ERROR_EXTENSION_NOT_PRESENT,VK_ERROR_INCOMPATIBLE_DRIVER">
             <proto><type>VkResult</type> <name>vkCreateInstance</name></proto>
@@ -5059,7 +5234,7 @@
             <param><type>VkDeviceSize</type> <name>stride</name></param>
             <param optional="true"><type>VkQueryResultFlags</type> <name>flags</name></param>
         </command>
-        <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
+        <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY,VK_ERROR_INVALID_DEVICE_ADDRESS_EXT">
             <proto><type>VkResult</type> <name>vkCreateBuffer</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
             <param>const <type>VkBufferCreateInfo</type>* <name>pCreateInfo</name></param>
@@ -5759,19 +5934,6 @@
             <param optional="true">const <type>VkAllocationCallbacks</type>* <name>pAllocator</name></param>
             <param len="swapchainCount"><type>VkSwapchainKHR</type>* <name>pSwapchains</name></param>
         </command>
-        <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
-            <proto><type>VkResult</type> <name>vkCreateMirSurfaceKHR</name></proto>
-            <param><type>VkInstance</type> <name>instance</name></param>
-            <param>const <type>VkMirSurfaceCreateInfoKHR</type>* <name>pCreateInfo</name></param>
-            <param optional="true">const <type>VkAllocationCallbacks</type>* <name>pAllocator</name></param>
-            <param><type>VkSurfaceKHR</type>* <name>pSurface</name></param>
-        </command>
-        <command>
-            <proto><type>VkBool32</type> <name>vkGetPhysicalDeviceMirPresentationSupportKHR</name></proto>
-            <param><type>VkPhysicalDevice</type> <name>physicalDevice</name></param>
-            <param><type>uint32_t</type> <name>queueFamilyIndex</name></param>
-            <param><type>MirConnection</type>* <name>connection</name></param>
-        </command>
         <command>
             <proto><type>void</type> <name>vkDestroySurfaceKHR</name></proto>
             <param><type>VkInstance</type> <name>instance</name></param>
@@ -6825,89 +6987,84 @@
             <param><type>uint32_t</type> <name>stride</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
-            <proto><type>VkResult</type> <name>vkCompileDeferredNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkCompileDeferredNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
             <param><type>VkPipeline</type> <name>pipeline</name></param>
             <param><type>uint32_t</type> <name>shader</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY">
-            <proto><type>VkResult</type> <name>vkCreateAccelerationStructureNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkCreateAccelerationStructureNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
-            <param>const <type>VkAccelerationStructureCreateInfoNVX</type>* <name>pCreateInfo</name></param>
+            <param>const <type>VkAccelerationStructureCreateInfoNV</type>* <name>pCreateInfo</name></param>
             <param optional="true">const <type>VkAllocationCallbacks</type>* <name>pAllocator</name></param>
-            <param><type>VkAccelerationStructureNVX</type>* <name>pAccelerationStructure</name></param>
+            <param><type>VkAccelerationStructureNV</type>* <name>pAccelerationStructure</name></param>
         </command>
         <command>
-            <proto><type>void</type> <name>vkDestroyAccelerationStructureNVX</name></proto>
+            <proto><type>void</type> <name>vkDestroyAccelerationStructureNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>accelerationStructure</name></param>
+            <param><type>VkAccelerationStructureNV</type> <name>accelerationStructure</name></param>
             <param optional="true">const <type>VkAllocationCallbacks</type>* <name>pAllocator</name></param>
         </command>
         <command>
-            <proto><type>void</type> <name>vkGetAccelerationStructureMemoryRequirementsNVX</name></proto>
-            <param><type>VkDevice</type> <name>device</name></param>
-            <param>const <type>VkAccelerationStructureMemoryRequirementsInfoNVX</type>* <name>pInfo</name></param>
-            <param><type>VkMemoryRequirements2KHR</type>* <name>pMemoryRequirements</name></param>
-        </command>
-        <command>
-            <proto><type>void</type> <name>vkGetAccelerationStructureScratchMemoryRequirementsNVX</name></proto>
+            <proto><type>void</type> <name>vkGetAccelerationStructureMemoryRequirementsNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
-            <param>const <type>VkAccelerationStructureMemoryRequirementsInfoNVX</type>* <name>pInfo</name></param>
+            <param>const <type>VkAccelerationStructureMemoryRequirementsInfoNV</type>* <name>pInfo</name></param>
             <param><type>VkMemoryRequirements2KHR</type>* <name>pMemoryRequirements</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
-            <proto><type>VkResult</type> <name>vkBindAccelerationStructureMemoryNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkBindAccelerationStructureMemoryNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
             <param><type>uint32_t</type> <name>bindInfoCount</name></param>
-            <param len="bindInfoCount">const <type>VkBindAccelerationStructureMemoryInfoNVX</type>* <name>pBindInfos</name></param>
+            <param len="bindInfoCount">const <type>VkBindAccelerationStructureMemoryInfoNV</type>* <name>pBindInfos</name></param>
         </command>
-        <command queues="graphics,compute" renderpass="both" cmdbufferlevel="primary,secondary">
-            <proto><type>void</type> <name>vkCmdCopyAccelerationStructureNVX</name></proto>
+        <command queues="compute" renderpass="both" cmdbufferlevel="primary,secondary">
+            <proto><type>void</type> <name>vkCmdCopyAccelerationStructureNV</name></proto>
             <param><type>VkCommandBuffer</type> <name>commandBuffer</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>dst</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>src</name></param>
-            <param><type>VkCopyAccelerationStructureModeNVX</type> <name>mode</name></param>
+            <param><type>VkAccelerationStructureNV</type> <name>dst</name></param>
+            <param><type>VkAccelerationStructureNV</type> <name>src</name></param>
+            <param><type>VkCopyAccelerationStructureModeNV</type> <name>mode</name></param>
         </command>
-        <command queues="graphics,compute" renderpass="both" cmdbufferlevel="primary,secondary">
-            <proto><type>void</type> <name>vkCmdWriteAccelerationStructurePropertiesNVX</name></proto>
+        <command queues="compute" renderpass="both" cmdbufferlevel="primary,secondary">
+            <proto><type>void</type> <name>vkCmdWriteAccelerationStructuresPropertiesNV</name></proto>
             <param><type>VkCommandBuffer</type> <name>commandBuffer</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>accelerationStructure</name></param>
+            <param><type>uint32_t</type> <name>accelerationStructureCount</name></param>
+            <param len="accelerationStructureCount">const <type>VkAccelerationStructureNV</type>* <name>pAccelerationStructures</name></param>
             <param><type>VkQueryType</type> <name>queryType</name></param>
             <param><type>VkQueryPool</type> <name>queryPool</name></param>
-            <param><type>uint32_t</type> <name>query</name></param>
+            <param><type>uint32_t</type> <name>firstQuery</name></param>
         </command>
-        <command queues="graphics,compute" renderpass="both" cmdbufferlevel="primary,secondary">
-            <proto><type>void</type> <name>vkCmdBuildAccelerationStructureNVX</name></proto>
+        <command queues="compute" renderpass="both" cmdbufferlevel="primary,secondary">
+            <proto><type>void</type> <name>vkCmdBuildAccelerationStructureNV</name></proto>
             <param><type>VkCommandBuffer</type> <name>commandBuffer</name></param>
-            <param><type>VkAccelerationStructureTypeNVX</type> <name>type</name></param>
-            <param optional="true"><type>uint32_t</type> <name>instanceCount</name></param>
+            <param>const <type>VkAccelerationStructureInfoNV</type>* <name>pInfo</name></param>
             <param optional="true"><type>VkBuffer</type> <name>instanceData</name></param>
             <param><type>VkDeviceSize</type> <name>instanceOffset</name></param>
-            <param optional="true"><type>uint32_t</type> <name>geometryCount</name></param>
-            <param len="geometryCount">const <type>VkGeometryNVX</type>* <name>pGeometries</name></param>
-            <param optional="true"><type>VkBuildAccelerationStructureFlagsNVX</type> <name>flags</name></param>
             <param><type>VkBool32</type> <name>update</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>dst</name></param>
-            <param optional="true"><type>VkAccelerationStructureNVX</type> <name>src</name></param>
+            <param><type>VkAccelerationStructureNV</type> <name>dst</name></param>
+            <param optional="true"><type>VkAccelerationStructureNV</type> <name>src</name></param>
             <param><type>VkBuffer</type> <name>scratch</name></param>
             <param><type>VkDeviceSize</type> <name>scratchOffset</name></param>
         </command>
-        <command queues="graphics,compute" renderpass="both" cmdbufferlevel="primary,secondary">
-            <proto><type>void</type> <name>vkCmdTraceRaysNVX</name></proto>
+        <command queues="compute" renderpass="both" cmdbufferlevel="primary,secondary">
+            <proto><type>void</type> <name>vkCmdTraceRaysNV</name></proto>
             <param><type>VkCommandBuffer</type> <name>commandBuffer</name></param>
             <param><type>VkBuffer</type> <name>raygenShaderBindingTableBuffer</name></param>
             <param><type>VkDeviceSize</type> <name>raygenShaderBindingOffset</name></param>
-            <param><type>VkBuffer</type> <name>missShaderBindingTableBuffer</name></param>
+            <param optional="true"><type>VkBuffer</type> <name>missShaderBindingTableBuffer</name></param>
             <param><type>VkDeviceSize</type> <name>missShaderBindingOffset</name></param>
             <param><type>VkDeviceSize</type> <name>missShaderBindingStride</name></param>
-            <param><type>VkBuffer</type> <name>hitShaderBindingTableBuffer</name></param>
+            <param optional="true"><type>VkBuffer</type> <name>hitShaderBindingTableBuffer</name></param>
             <param><type>VkDeviceSize</type> <name>hitShaderBindingOffset</name></param>
             <param><type>VkDeviceSize</type> <name>hitShaderBindingStride</name></param>
+            <param optional="true"><type>VkBuffer</type> <name>callableShaderBindingTableBuffer</name></param>
+            <param><type>VkDeviceSize</type> <name>callableShaderBindingOffset</name></param>
+            <param><type>VkDeviceSize</type> <name>callableShaderBindingStride</name></param>
             <param><type>uint32_t</type> <name>width</name></param>
             <param><type>uint32_t</type> <name>height</name></param>
+            <param><type>uint32_t</type> <name>depth</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
-            <proto><type>VkResult</type> <name>vkGetRaytracingShaderHandlesNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkGetRayTracingShaderGroupHandlesNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
             <param><type>VkPipeline</type> <name>pipeline</name></param>
             <param><type>uint32_t</type> <name>firstGroup</name></param>
@@ -6916,18 +7073,18 @@
             <param len="dataSize"><type>void</type>* <name>pData</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY">
-            <proto><type>VkResult</type> <name>vkGetAccelerationStructureHandleNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkGetAccelerationStructureHandleNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
-            <param><type>VkAccelerationStructureNVX</type> <name>accelerationStructure</name></param>
+            <param><type>VkAccelerationStructureNV</type> <name>accelerationStructure</name></param>
             <param><type>size_t</type> <name>dataSize</name></param>
             <param len="dataSize"><type>void</type>* <name>pData</name></param>
         </command>
         <command successcodes="VK_SUCCESS" errorcodes="VK_ERROR_OUT_OF_HOST_MEMORY,VK_ERROR_OUT_OF_DEVICE_MEMORY,VK_ERROR_INVALID_SHADER_NV">
-            <proto><type>VkResult</type> <name>vkCreateRaytracingPipelinesNVX</name></proto>
+            <proto><type>VkResult</type> <name>vkCreateRayTracingPipelinesNV</name></proto>
             <param><type>VkDevice</type> <name>device</name></param>
             <param optional="true"><type>VkPipelineCache</type> <name>pipelineCache</name></param>
             <param><type>uint32_t</type> <name>createInfoCount</name></param>
-            <param len="createInfoCount">const <type>VkRaytracingPipelineCreateInfoNVX</type>* <name>pCreateInfos</name></param>
+            <param len="createInfoCount">const <type>VkRayTracingPipelineCreateInfoNV</type>* <name>pCreateInfos</name></param>
             <param optional="true">const <type>VkAllocationCallbacks</type>* <name>pAllocator</name></param>
             <param len="createInfoCount"><type>VkPipeline</type>* <name>pPipelines</name></param>
         </command>
@@ -6937,6 +7094,11 @@
             <param><type>VkImage</type> <name>image</name></param>
             <param><type>VkImageDrmFormatModifierPropertiesEXT</type>* <name>pProperties</name></param>
         </command>
+        <command>
+            <proto><type>VkDeviceAddress</type> <name>vkGetBufferDeviceAddressEXT</name></proto>
+            <param><type>VkDevice</type> <name>device</name></param>
+            <param>const <type>VkBufferDeviceAddressInfoEXT</type>* <name>pInfo</name></param>
+        </command>
     </commands>
 
     <feature api="vulkan" name="VK_VERSION_1_0" number="1.0" comment="Vulkan core API interface definitions">
@@ -7619,15 +7781,11 @@
                 <command name="vkGetPhysicalDeviceWaylandPresentationSupportKHR"/>
             </require>
         </extension>
-        <extension name="VK_KHR_mir_surface" number="8" type="instance" requires="VK_KHR_surface" platform="mir" author="KHR" contact="Jesse Hall @critsec,Ian Elliott @ianelliottus" obsoletedby="" supported="vulkan">
+        <!-- Extension permanently disabled.  Extension number should not be re-used -->
+        <extension name="VK_KHR_mir_surface" number="8" type="instance" requires="VK_KHR_surface" author="KHR" supported="disabled">
             <require>
                 <enum value="4"                                                 name="VK_KHR_MIR_SURFACE_SPEC_VERSION"/>
                 <enum value="&quot;VK_KHR_mir_surface&quot;"                    name="VK_KHR_MIR_SURFACE_EXTENSION_NAME"/>
-                <enum offset="0" extends="VkStructureType"                      name="VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR"/>
-                <type name="VkMirSurfaceCreateFlagsKHR"/>
-                <type name="VkMirSurfaceCreateInfoKHR"/>
-                <command name="vkCreateMirSurfaceKHR"/>
-                <command name="vkGetPhysicalDeviceMirPresentationSupportKHR"/>
             </require>
         </extension>
         <extension name="VK_KHR_android_surface" number="9" type="instance" requires="VK_KHR_surface" platform="android" author="KHR" contact="Jesse Hall @critsec" supported="vulkan">
@@ -7771,14 +7929,38 @@
         </extension>
         <extension name="VK_AMD_extension_24" number="24" author="AMD" contact="Daniel Rakos @drakos-amd" supported="disabled">
             <require>
-                <enum value="0"                                                 name="VK_AMD_EXTENSION_24_SPEC_VERSION"/>
-                <enum value="&quot;VK_AMD_extension_24&quot;"                   name="VK_AMD_EXTENSION_24_EXTENSION_NAME"/>
+                <enum value="0"                                         name="VK_AMD_EXTENSION_24_SPEC_VERSION"/>
+                <enum value="&quot;VK_AMD_extension_24&quot;"           name="VK_AMD_EXTENSION_24_EXTENSION_NAME"/>
+                <enum bitpos="6" extends="VkQueueFlagBits"              name="VK_QUEUE_RESERVED_6_BIT_KHR"/>
+                <enum bitpos="27" extends="VkPipelineStageFlagBits"     name="VK_PIPELINE_STAGE_RESERVED_27_BIT_KHR"/>
+                <enum bitpos="30" extends="VkAccessFlagBits"            name="VK_ACCESS_RESERVED_30_BIT_KHR"/>
+                <enum bitpos="31" extends="VkAccessFlagBits"            name="VK_ACCESS_RESERVED_31_BIT_KHR"/>
+                <enum bitpos="15" extends="VkBufferUsageFlagBits"       name="VK_BUFFER_USAGE_RESERVED_15_BIT_KHR"/>
+                <enum bitpos="16" extends="VkBufferUsageFlagBits"       name="VK_BUFFER_USAGE_RESERVED_16_BIT_KHR"/>
+                <enum bitpos="13" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_13_BIT_KHR"/>
+                <enum bitpos="14" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_14_BIT_KHR"/>
+                <enum bitpos="15" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_15_BIT_KHR"/>
+                <enum bitpos="27" extends="VkFormatFeatureFlagBits"     name="VK_FORMAT_FEATURE_RESERVED_27_BIT_KHR"/>
+                <enum bitpos="28" extends="VkFormatFeatureFlagBits"     name="VK_FORMAT_FEATURE_RESERVED_28_BIT_KHR"/>
+                <enum offset="8" extends="VkQueryType"                  name="VK_QUERY_TYPE_RESERVED_8"/>
             </require>
         </extension>
         <extension name="VK_AMD_extension_25" number="25" author="AMD" contact="Daniel Rakos @drakos-amd" supported="disabled">
             <require>
-                <enum value="0"                                                 name="VK_AMD_EXTENSION_25_SPEC_VERSION"/>
-                <enum value="&quot;VK_AMD_extension_25&quot;"                   name="VK_AMD_EXTENSION_25_EXTENSION_NAME"/>
+                <enum value="0"                                         name="VK_AMD_EXTENSION_25_SPEC_VERSION"/>
+                <enum value="&quot;VK_AMD_extension_25&quot;"           name="VK_AMD_EXTENSION_25_EXTENSION_NAME"/>
+                <enum bitpos="5" extends="VkQueueFlagBits"              name="VK_QUEUE_RESERVED_5_BIT_KHR"/>
+                <enum bitpos="26" extends="VkPipelineStageFlagBits"     name="VK_PIPELINE_STAGE_RESERVED_26_BIT_KHR"/>
+                <enum bitpos="28" extends="VkAccessFlagBits"            name="VK_ACCESS_RESERVED_28_BIT_KHR"/>
+                <enum bitpos="29" extends="VkAccessFlagBits"            name="VK_ACCESS_RESERVED_29_BIT_KHR"/>
+                <enum bitpos="13" extends="VkBufferUsageFlagBits"       name="VK_BUFFER_USAGE_RESERVED_13_BIT_KHR"/>
+                <enum bitpos="14" extends="VkBufferUsageFlagBits"       name="VK_BUFFER_USAGE_RESERVED_14_BIT_KHR"/>
+                <enum bitpos="10" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_10_BIT_KHR"/>
+                <enum bitpos="11" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_11_BIT_KHR"/>
+                <enum bitpos="12" extends="VkImageUsageFlagBits"        name="VK_IMAGE_USAGE_RESERVED_12_BIT_KHR"/>
+                <enum bitpos="25" extends="VkFormatFeatureFlagBits"     name="VK_FORMAT_FEATURE_RESERVED_25_BIT_KHR"/>
+                <enum bitpos="26" extends="VkFormatFeatureFlagBits"     name="VK_FORMAT_FEATURE_RESERVED_26_BIT_KHR"/>
+                <enum offset="4"  extends="VkQueryType"                 name="VK_QUERY_TYPE_RESERVED_4"/>
             </require>
         </extension>
         <extension name="VK_AMD_gcn_shader" number="26" type="device" author="AMD" contact="Dominik Witczak @dominikwitczakamd" supported="vulkan">
@@ -8417,10 +8599,12 @@
                 <type name="VkCommandBufferInheritanceConditionalRenderingInfoEXT"/>
             </require>
         </extension>
-        <extension name="VK_KHR_extension_83" number="83" author="KHR" contact="Jan-Harald Fredriksen @janharaldfredriksen-arm" supported="disabled">
+        <extension name="VK_KHR_shader_float16_int8" number="83" type="device" requires="VK_KHR_get_physical_device_properties2" author="KHR" contact="Alexander Galazin @alegal-arm" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_KHR_EXTENSION_83_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_83&quot;"               name="VK_KHR_EXTENSION_83_EXTENSION_NAME"/>
+                <enum value="1"                                           name="VK_KHR_SHADER_FLOAT16_INT8_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_shader_float16_int8&quot;"      name="VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR"/>
+                <type name="VkPhysicalDeviceFloat16Int8FeaturesKHR"/>
             </require>
         </extension>
         <extension name="VK_KHR_16bit_storage" number="84" type="device" requires="VK_KHR_get_physical_device_properties2,VK_KHR_storage_buffer_storage_class" author="KHR" contact="Jan-Harald Fredriksen @janharaldfredriksen-arm" supported="vulkan" promotedto="VK_VERSION_1_1">
@@ -9355,11 +9539,8 @@
         </extension>
         <extension name="VK_EXT_image_drm_format_modifier" number="159" type="device" requires="VK_KHR_bind_memory2,VK_KHR_get_physical_device_properties2,VK_KHR_image_format_list,VK_KHR_sampler_ycbcr_conversion" author="EXT" contact="Chad Versace @chadversary" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_EXT_EXTENSION_159_SPEC_VERSION"/>
-                <enum value="&quot;VK_EXT_extension_159&quot;"              name="VK_EXT_EXTENSION_159_EXTENSION_NAME"/>
-
-                <enum value="1" name="VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION"/>
-                <enum value="&quot;VK_EXT_image_drm_format_modifier&quot;" name="VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"/>
+                <enum value="1"                                             name="VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_image_drm_format_modifier&quot;"  name="VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"/>
 
                 <enum offset="0" dir="-" extends="VkResult" name="VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"/>
 
@@ -9470,70 +9651,76 @@
                 <command name="vkCmdSetCoarseSampleOrderNV"/>
             </require>
         </extension>
-        <extension name="VK_NVX_raytracing" number="166" type="device" requires="VK_KHR_get_physical_device_properties2,VK_KHR_get_memory_requirements2" author="NVX" contact="Eric Werness @ewerness" supported="vulkan">
+        <extension name="VK_NV_ray_tracing" number="166" type="device" requires="VK_KHR_get_physical_device_properties2,VK_KHR_get_memory_requirements2" author="NV" contact="Eric Werness @ewerness" supported="vulkan">
             <require>
-                <enum value="1"                                          name="VK_NVX_RAYTRACING_SPEC_VERSION"/>
-                <enum value="&quot;VK_NVX_raytracing&quot;"               name="VK_NVX_RAYTRACING_EXTENSION_NAME"/>
-                <enum offset="0" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_RAYTRACING_PIPELINE_CREATE_INFO_NVX"/>
-                <enum offset="1" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NVX"/>
-                <enum offset="2" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_INSTANCE_NVX"/>
-                <enum offset="3" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_NVX"/>
-                <enum offset="4" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NVX"/>
-                <enum offset="5" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_AABB_NVX"/>
-                <enum offset="6" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NVX"/>
-                <enum offset="7" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_DESCRIPTOR_ACCELERATION_STRUCTURE_INFO_NVX"/>
-                <enum offset="8" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NVX"/>
-                <enum offset="9" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAYTRACING_PROPERTIES_NVX"/>
-                <enum offset="10" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_HIT_SHADER_MODULE_CREATE_INFO_NVX"/>
-                <enum bitpos="8" extends="VkShaderStageFlagBits"         name="VK_SHADER_STAGE_RAYGEN_BIT_NVX"/>
-                <enum bitpos="9" extends="VkShaderStageFlagBits"         name="VK_SHADER_STAGE_ANY_HIT_BIT_NVX"/>
-                <enum bitpos="10" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_CLOSEST_HIT_BIT_NVX"/>
-                <enum bitpos="11" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_MISS_BIT_NVX"/>
-                <enum bitpos="12" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_INTERSECTION_BIT_NVX"/>
-                <enum bitpos="13" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_CALLABLE_BIT_NVX"/>
-                <enum bitpos="21" extends="VkPipelineStageFlagBits"      name="VK_PIPELINE_STAGE_RAYTRACING_BIT_NVX"/>
-                <enum bitpos="10" extends="VkBufferUsageFlagBits"        name="VK_BUFFER_USAGE_RAYTRACING_BIT_NVX"/>
-                <enum offset="0" extends="VkPipelineBindPoint"           name="VK_PIPELINE_BIND_POINT_RAYTRACING_NVX"/>
-                <enum offset="0" extends="VkDescriptorType"              name="VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NVX"/>
-                <enum bitpos="21" extends="VkAccessFlagBits"             name="VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NVX"/>
-                <enum bitpos="22" extends="VkAccessFlagBits"             name="VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NVX"/>
-                <enum offset="0" extends="VkQueryType"                   name="VK_QUERY_TYPE_COMPACTED_SIZE_NVX"/>
-                <enum bitpos="5" extends="VkPipelineCreateFlagBits"      name="VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NVX"/>
-                <enum offset="0" extends="VkObjectType"                  name="VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX"/>
-                <enum offset="0" extends="VkDebugReportObjectTypeEXT"    name="VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX_EXT"/>
-                <type name="VkRaytracingPipelineCreateInfoNVX"/>
-                <type name="VkGeometryTrianglesNVX"/>
-                <type name="VkGeometryAABBNVX"/>
-                <type name="VkGeometryDataNVX"/>
-                <type name="VkGeometryNVX"/>
-                <type name="VkGeometryFlagsNVX"/>
-                <type name="VkGeometryInstanceFlagsNVX"/>
-                <type name="VkGeometryFlagBitsNVX"/>
-                <type name="VkGeometryInstanceFlagBitsNVX"/>
-                <type name="VkAccelerationStructureCreateInfoNVX"/>
-                <type name="VkAccelerationStructureNVX"/>
-                <type name="VkBuildAccelerationStructureFlagBitsNVX"/>
-                <type name="VkBuildAccelerationStructureFlagsNVX"/>
-                <type name="VkCopyAccelerationStructureModeNVX"/>
-                <type name="VkGeometryTypeNVX"/>
-                <type name="VkBindAccelerationStructureMemoryInfoNVX"/>
-                <type name="VkDescriptorAccelerationStructureInfoNVX"/>
-                <type name="VkAccelerationStructureMemoryRequirementsInfoNVX"/>
-                <type name="VkPhysicalDeviceRaytracingPropertiesNVX"/>
+                <enum value="3"                                          name="VK_NV_RAY_TRACING_SPEC_VERSION"/>
+                <enum value="&quot;VK_NV_ray_tracing&quot;"              name="VK_NV_RAY_TRACING_EXTENSION_NAME"/>
+                <enum                                                    name="VK_SHADER_UNUSED_NV"/>
+                <enum offset="0" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV"/>
+                <enum offset="1" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV"/>
+                <enum offset="3" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_NV"/>
+                <enum offset="4" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV"/>
+                <enum offset="5" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV"/>
+                <enum offset="6" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV"/>
+                <enum offset="7" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV"/>
+                <enum offset="8" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV"/>
+                <enum offset="9" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV"/>
+                <enum offset="11" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV"/>
+                <enum offset="12" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV"/>
+                <enum bitpos="8" extends="VkShaderStageFlagBits"         name="VK_SHADER_STAGE_RAYGEN_BIT_NV"/>
+                <enum bitpos="9" extends="VkShaderStageFlagBits"         name="VK_SHADER_STAGE_ANY_HIT_BIT_NV"/>
+                <enum bitpos="10" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV"/>
+                <enum bitpos="11" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_MISS_BIT_NV"/>
+                <enum bitpos="12" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_INTERSECTION_BIT_NV"/>
+                <enum bitpos="13" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_CALLABLE_BIT_NV"/>
+                <enum bitpos="21" extends="VkPipelineStageFlagBits"      name="VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV"/>
+                <enum bitpos="25" extends="VkPipelineStageFlagBits"      name="VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV"/>
+                <enum bitpos="10" extends="VkBufferUsageFlagBits"        name="VK_BUFFER_USAGE_RAY_TRACING_BIT_NV"/>
+                <enum offset="0" extends="VkPipelineBindPoint"           name="VK_PIPELINE_BIND_POINT_RAY_TRACING_NV"/>
+                <enum offset="0" extends="VkDescriptorType"              name="VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV"/>
+                <enum bitpos="21" extends="VkAccessFlagBits"             name="VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV"/>
+                <enum bitpos="22" extends="VkAccessFlagBits"             name="VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV"/>
+                <enum offset="0" extends="VkQueryType"                   name="VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV"/>
+                <enum bitpos="5" extends="VkPipelineCreateFlagBits"      name="VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV"/>
+                <enum offset="0" extends="VkObjectType"                  name="VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV"/>
+                <enum offset="0" extends="VkDebugReportObjectTypeEXT"    name="VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT"/>
+                <enum offset="0" extends="VkIndexType"                   name="VK_INDEX_TYPE_NONE_NV"/>
+                <type name="VkRayTracingShaderGroupCreateInfoNV"/>
+                <type name="VkRayTracingShaderGroupTypeNV"/>
+                <type name="VkRayTracingPipelineCreateInfoNV"/>
+                <type name="VkGeometryTrianglesNV"/>
+                <type name="VkGeometryAABBNV"/>
+                <type name="VkGeometryDataNV"/>
+                <type name="VkGeometryNV"/>
+                <type name="VkGeometryFlagsNV"/>
+                <type name="VkGeometryInstanceFlagsNV"/>
+                <type name="VkGeometryFlagBitsNV"/>
+                <type name="VkGeometryInstanceFlagBitsNV"/>
+                <type name="VkAccelerationStructureInfoNV"/>
+                <type name="VkAccelerationStructureCreateInfoNV"/>
+                <type name="VkAccelerationStructureNV"/>
+                <type name="VkBuildAccelerationStructureFlagBitsNV"/>
+                <type name="VkBuildAccelerationStructureFlagsNV"/>
+                <type name="VkCopyAccelerationStructureModeNV"/>
+                <type name="VkGeometryTypeNV"/>
+                <type name="VkBindAccelerationStructureMemoryInfoNV"/>
+                <type name="VkWriteDescriptorSetAccelerationStructureNV"/>
+                <type name="VkAccelerationStructureMemoryRequirementsInfoNV"/>
+                <type name="VkPhysicalDeviceRayTracingPropertiesNV"/>
                 <type name="VkMemoryRequirements2KHR"/>
-                <command name="vkCreateAccelerationStructureNVX"/>
-                <command name="vkDestroyAccelerationStructureNVX"/>
-                <command name="vkGetAccelerationStructureMemoryRequirementsNVX"/>
-                <command name="vkGetAccelerationStructureScratchMemoryRequirementsNVX"/>
-                <command name="vkBindAccelerationStructureMemoryNVX"/>
-                <command name="vkCmdBuildAccelerationStructureNVX"/>
-                <command name="vkCmdCopyAccelerationStructureNVX"/>
-                <command name="vkCmdTraceRaysNVX"/>
-                <command name="vkCreateRaytracingPipelinesNVX"/>
-                <command name="vkGetRaytracingShaderHandlesNVX"/>
-                <command name="vkGetAccelerationStructureHandleNVX"/>
-                <command name="vkCmdWriteAccelerationStructurePropertiesNVX"/>
-                <command name="vkCompileDeferredNVX"/>
+                <type name="VkAccelerationStructureMemoryRequirementsTypeNV"/>
+                <command name="vkCreateAccelerationStructureNV"/>
+                <command name="vkDestroyAccelerationStructureNV"/>
+                <command name="vkGetAccelerationStructureMemoryRequirementsNV"/>
+                <command name="vkBindAccelerationStructureMemoryNV"/>
+                <command name="vkCmdBuildAccelerationStructureNV"/>
+                <command name="vkCmdCopyAccelerationStructureNV"/>
+                <command name="vkCmdTraceRaysNV"/>
+                <command name="vkCreateRayTracingPipelinesNV"/>
+                <command name="vkGetRayTracingShaderGroupHandlesNV"/>
+                <command name="vkGetAccelerationStructureHandleNV"/>
+                <command name="vkCmdWriteAccelerationStructuresPropertiesNV"/>
+                <command name="vkCompileDeferredNV"/>
             </require>
         </extension>
         <extension name="VK_NV_representative_fragment_test" number="167" type="device" author="NV" contact="Kedarnath Thangudu @kthangudu" supported="vulkan">
@@ -9712,10 +9899,13 @@
                 <enum value="&quot;VK_AMD_extension_189&quot;"              name="VK_KHR_EXTENSION_189_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_AMD_extension_190" number="190" author="AMD" contact="Daniel Rakos @drakos-amd" supported="disabled">
+        <extension name="VK_AMD_memory_overallocation_behavior" number="190" type="device" author="AMD" contact="Martin Dinkov @mdinkov" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_KHR_EXTENSION_190_SPEC_VERSION"/>
-                <enum value="&quot;VK_AMD_extension_190&quot;"              name="VK_KHR_EXTENSION_190_EXTENSION_NAME"/>
+                <enum value="1"                                             name="VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_SPEC_VERSION"/>
+                <enum value="&quot;VK_AMD_memory_overallocation_behavior&quot;"    name="VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD"/>
+                <type name="VkMemoryOverallocationBehaviorAMD"/>
+                <type name="VkDeviceMemoryOverallocationCreateInfoAMD"/>
             </require>
         </extension>
         <extension name="VK_EXT_vertex_attribute_divisor" number="191" type="device" requires="VK_KHR_get_physical_device_properties2" author="NV" contact="Vikram Kushwaha @vkushwaha" supported="vulkan">
@@ -9773,10 +9963,12 @@
                 <type name="VkPhysicalDeviceDriverPropertiesKHR"/>
             </require>
         </extension>
-        <extension name="VK_ARM_extension_198" number="198" author="Alexander Galazin" contact="Alexander Galazin @alegal-arm" supported="disabled">
+        <extension name="VK_KHR_shader_float_controls" number="198" type="device" requires="VK_KHR_get_physical_device_properties2" author="KHR" contact="Alexander Galazin @alegal-arm" supported="vulkan">
             <require>
-                <enum value="0"                                         name="VK_ARM_EXTENSION_198_SPEC_VERSION"/>
-                <enum value="&quot;VK_EXT_extension_198&quot;"          name="VK_ARM_EXTENSION_198_EXTENSION_NAME"/>
+                <enum value="1"                                           name="VK_KHR_SHADER_FLOAT_CONTROLS_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_shader_float_controls&quot;"    name="VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR"/>
+                <type name="VkPhysicalDeviceFloatControlsPropertiesKHR"/>
             </require>
         </extension>
         <extension name="VK_NV_shader_subgroup_partitioned" number="199" type="device" requiresCore="1.1" author="NV" contact="Jeff Bolz @jeffbolznv" supported="vulkan">
@@ -9786,16 +9978,22 @@
                 <enum bitpos="8" extends="VkSubgroupFeatureFlagBits"        name="VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV"/>
             </require>
         </extension>
-        <extension name="VK_KHR_extension_200" number="200" author="KHR" contact="Jan-Harald Fredriksen @janharaldfredriksen-arm" supported="disabled">
+        <extension name="VK_KHR_depth_stencil_resolve" number="200" type="device" requires="VK_KHR_create_renderpass2" author="KHR" contact="Jan-Harald Fredriksen @janharald" supported="vulkan">
             <require>
-                <enum value="0"                                         name="VK_KHR_EXTENSION_200_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_200&quot;"          name="VK_KHR_EXTENSION_200_EXTENSION_NAME"/>
-            </require>
-        </extension>
-        <extension name="VK_KHR_extension_201" number="201" type="device" author="KHR" contact="Daniel Rakos @drakos-arm" supported="disabled">
-            <require>
-                <enum value="0"                                         name="VK_KHR_EXTENSION_201_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_201&quot;"          name="VK_KHR_EXTENSION_201_EXTENSION_NAME"/>
+                <enum value="1"                                             name="VK_KHR_DEPTH_STENCIL_RESOLVE_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_depth_stencil_resolve&quot;"      name="VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR"/>
+                <enum offset="1" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR"/>
+                <type name="VkSubpassDescriptionDepthStencilResolveKHR"/>
+                <type name="VkPhysicalDeviceDepthStencilResolvePropertiesKHR"/>
+                <type name="VkResolveModeFlagBitsKHR"/>
+            </require>
+        </extension>
+        <extension name="VK_KHR_swapchain_mutable_format" number="201" type="device" author="KHR" requires="VK_KHR_swapchain,VK_KHR_maintenance2,VK_KHR_image_format_list" contact="Daniel Rakos @drakos-arm" supported="vulkan">
+            <require>
+                <enum value="1"                                         name="VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_swapchain_mutable_format&quot;" name="VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME"/>
+                <enum bitpos="2" extends="VkSwapchainCreateFlagBitsKHR" name="VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR"/>
             </require>
         </extension>
         <extension name="VK_NV_compute_shader_derivatives" number="202" type="device" requires="VK_KHR_get_physical_device_properties2" author="NV" contact="Pat Brown @nvpbrown" supported="vulkan">
@@ -9898,7 +10096,7 @@
         </extension>
         <extension name="VK_EXT_pci_bus_info" number="213" type="device" author="EXT" requires="VK_KHR_get_physical_device_properties2" contact="Matthaeus G. Chajdas @anteru" supported="vulkan">
             <require>
-                <enum value="1"                                         name="VK_EXT_PCI_BUS_INFO_SPEC_VERSION"/>
+                <enum value="2"                                         name="VK_EXT_PCI_BUS_INFO_SPEC_VERSION"/>
                 <enum value="&quot;VK_EXT_pci_bus_info&quot;"           name="VK_EXT_PCI_BUS_INFO_EXTENSION_NAME"/>
                 <enum offset="0" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT"/>
                 <type name="VkPhysicalDevicePCIBusInfoPropertiesEXT"/>
@@ -9938,15 +10136,25 @@
                 <enum value="&quot;VK_EXT_macos_ios_window&quot;"               name="VK_EXT_MACOS_IOS_WINDOW_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_EXT_extension_219" number="219" type="device" author="EXT" contact="Matthew Netsch @mnetsch" supported="disabled">
+        <extension name="VK_EXT_fragment_density_map" number="219" type="device" requires="VK_KHR_get_physical_device_properties2" author="EXT" contact="Matthew Netsch @mnetsch" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_EXT_EXTENSION_219_SPEC_VERSION"/>
-                <enum value="&quot;VK_EXT_extension_219&quot;"              name="VK_EXT_EXTENSION_219_EXTENSION_NAME"/>
-                <enum bitpos="14" extends="VkImageCreateFlagBits"           name="VK_IMAGE_CREATE_RESERVED_14_BIT_EXT"/>
-                <enum bitpos="24" extends="VkAccessFlagBits"                name="VK_ACCESS_RESERVED_24_BIT_EXT"/>
-                <enum bitpos="24" extends="VkFormatFeatureFlagBits"         name="VK_FORMAT_FEATURE_RESERVED_24_BIT_EXT"/>
-                <enum bitpos="9"  extends="VkImageUsageFlagBits"            name="VK_IMAGE_USAGE_RESERVED_9_BIT_EXT"/>
-                <enum bitpos="23" extends="VkPipelineStageFlagBits"         name="VK_PIPELINE_STAGE_RESERVED_23_BIT_EXT"/>
+                <enum value="1"                                             name="VK_EXT_FRAGMENT_DENSITY_MAP_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_fragment_density_map&quot;"       name="VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME"/>
+                <enum offset="0"  extends="VkStructureType"                 name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT"/>
+                <enum offset="1"  extends="VkStructureType"                 name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT"/>
+                <enum offset="2"  extends="VkStructureType"                 name="VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT"/>
+                <enum bitpos="14" extends="VkImageCreateFlagBits"           name="VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT"/>
+                <enum offset="0"  extends="VkImageLayout"                   name="VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT"/>
+                <enum bitpos="24" extends="VkAccessFlagBits"                name="VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT"/>
+                <enum bitpos="24" extends="VkFormatFeatureFlagBits"         name="VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT"/>
+                <enum bitpos="9"  extends="VkImageUsageFlagBits"            name="VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT"/>
+                <enum bitpos="0"  extends="VkImageViewCreateFlagBits"       name="VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT"/>
+                <enum bitpos="23" extends="VkPipelineStageFlagBits"         name="VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT"/>
+                <enum bitpos="0"  extends="VkSamplerCreateFlagBits"         name="VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT"/>
+                <enum bitpos="1"  extends="VkSamplerCreateFlagBits"         name="VK_SAMPLER_CREATE_SUBSAMPLED_COARSE_RECONSTRUCTION_BIT_EXT"/>
+                <type name="VkPhysicalDeviceFragmentDensityMapFeaturesEXT"/>
+                <type name="VkPhysicalDeviceFragmentDensityMapPropertiesEXT"/>
+                <type name="VkRenderPassFragmentDensityMapCreateInfoEXT"/>
             </require>
         </extension>
         <extension name="VK_EXT_extension_220" number="220" author="EXT" contact="Dzmitry Malyshau @kvark" supported="disabled">
@@ -9962,10 +10170,12 @@
                 <enum bitpos="0" extends="VkRenderPassCreateFlagBits"        name="VK_RENDER_PASS_CREATE_RESERVED_0_BIT_KHR"/>
             </require>
         </extension>
-        <extension name="VK_EXT_extension_222" number="222" author="EXT" contact="Tobias Hector @tobski" supported="disabled">
+        <extension name="VK_EXT_scalar_block_layout" number="222" requires="VK_KHR_get_physical_device_properties2" type="device" author="EXT" contact="Tobias Hector @tobski" supported="vulkan">
             <require>
-                <enum value="0"                                              name="VK_EXT_EXTENSION_222_SPEC_VERSION"/>
-                <enum value="&quot;VK_EXT_extension_222&quot;"               name="VK_EXT_EXTENSION_222_EXTENSION_NAME"/>
+                <enum value="1"                                             name="VK_EXT_SCALAR_BLOCK_LAYOUT_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_scalar_block_layout&quot;"        name="VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME"/>
+                <type                                                       name="VkPhysicalDeviceScalarBlockLayoutFeaturesEXT"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT"/>
             </require>
         </extension>
         <extension name="VK_EXT_extension_223" number="223" author="EXT" contact="Tobias Hector @tobski" supported="disabled">
@@ -9976,13 +10186,13 @@
         </extension>
         <extension name="VK_GOOGLE_hlsl_functionality1" number="224" type="device" author="GOOGLE" contact="Hai Nguyen @chaoticbob" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION"/>
+                <enum value="1"                                             name="VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION"/>
                 <enum value="&quot;VK_GOOGLE_hlsl_functionality1&quot;"     name="VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME"/>
             </require>
         </extension>
         <extension name="VK_GOOGLE_decorate_string" number="225" type="device" author="GOOGLE" contact="Hai Nguyen @chaoticbob" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_GOOGLE_DECORATE_STRING_SPEC_VERSION"/>
+                <enum value="1"                                             name="VK_GOOGLE_DECORATE_STRING_SPEC_VERSION"/>
                 <enum value="&quot;VK_GOOGLE_decorate_string&quot;"         name="VK_GOOGLE_DECORATE_STRING_EXTENSION_NAME"/>
             </require>
         </extension>
@@ -10058,16 +10268,22 @@
                 <enum value="&quot;VK_KHR_extension_237&quot;"              name="VK_KHR_EXTENSION_237_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_KHR_extension_238" number="238" author="KHR" contact="Jeff Bolz @jeffbolznv" supported="disabled">
-            <require>
-                <enum value="0"                                             name="VK_KHR_EXTENSION_238_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_238&quot;"              name="VK_KHR_EXTENSION_238_EXTENSION_NAME"/>
-            </require>
-        </extension>
-        <extension name="VK_KHR_extension_239" number="239" author="KHR" contact="Jeff Bolz @jeffbolznv" supported="disabled">
+        <extension name="VK_EXT_memory_budget" number="238" type="device" requires="VK_KHR_get_physical_device_properties2" author="EXT" contact="Jeff Bolz @jeffbolznv" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_KHR_EXTENSION_239_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_239&quot;"              name="VK_KHR_EXTENSION_239_EXTENSION_NAME"/>
+                <enum value="1"                                             name="VK_EXT_MEMORY_BUDGET_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_memory_budget&quot;"              name="VK_EXT_MEMORY_BUDGET_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT"/>
+                <type name="VkPhysicalDeviceMemoryBudgetPropertiesEXT"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_memory_priority" number="239" type="device" requires="VK_KHR_get_physical_device_properties2"  author="EXT" contact="Jeff Bolz @jeffbolznv" supported="vulkan">
+            <require>
+                <enum value="1"                                             name="VK_EXT_MEMORY_PRIORITY_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_memory_priority&quot;"            name="VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT"/>
+                <enum offset="1" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT"/>
+                <type name="VkPhysicalDeviceMemoryPriorityFeaturesEXT"/>
+                <type name="VkMemoryPriorityAllocateInfoEXT"/>
             </require>
         </extension>
         <extension name="VK_KHR_extension_240" number="240" author="KHR" contact="Sandeep Shinde @nvidia" supported="disabled">
@@ -10100,5 +10316,55 @@
                 <enum value="&quot;VK_MESA_extension_244&quot;"              name="VK_MESA_EXTENSION_244_EXTENSION_NAME"/>
             </require>
         </extension>
+        <extension name="VK_EXT_buffer_device_address" number="245" type="device" requires="VK_KHR_get_physical_device_properties2" author="NV" contact="Jeff Bolz @jeffbolznv" supported="vulkan">
+            <require>
+                <enum value="2"                                             name="VK_EXT_BUFFER_DEVICE_ADDRESS_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_buffer_device_address&quot;"      name="VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT"/>
+                <enum offset="1" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT"/>
+                <enum offset="2" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT"/>
+                <enum bitpos="17" extends="VkBufferUsageFlagBits"           name="VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT"/>
+                <enum bitpos="4"  extends="VkBufferCreateFlagBits"          name="VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT"/>
+                <enum offset="0" dir="-" extends="VkResult"                 name="VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"/>
+                <type name="VkPhysicalDeviceBufferAddressFeaturesEXT"/>
+                <type name="VkBufferDeviceAddressInfoEXT"/>
+                <type name="VkBufferDeviceAddressCreateInfoEXT"/>
+                <command name="vkGetBufferDeviceAddressEXT"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_extension_246" number="246" author="EXT" contact="Tobias Hector @tobski" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_EXT_EXTENSION_246_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_extension_246&quot;"              name="VK_EXT_EXTENSION_246_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_separate_stencil_usage" number="247" type="device" author="EXT" contact="Daniel Rakos @drakos-amd" supported="vulkan">
+            <require>
+                <enum value="1"                                             name="VK_EXT_SEPARATE_STENCIL_USAGE_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_separate_stencil_usage&quot;"     name="VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT"/>
+                <type name="VkImageStencilUsageCreateInfoEXT"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_validation_features" number="248" type="instance" author="LUNARG" contact="Karl Schultz @karl-lunarg" supported="vulkan">
+            <require>
+                <enum value="1"                                             name="VK_EXT_VALIDATION_FEATURES_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_validation_features&quot;"        name="VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT"/>
+                <type name="VkValidationFeaturesEXT"/>
+            </require>
+        </extension>
+        <extension name="VK_KHR_extension_249" number="249" author="KHR" contact="Keith Packard @keithp" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_KHR_EXTENSION_249_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_extension_249&quot;"              name="VK_KHR_EXTENSION_249_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_NV_extension_250" number="250" author="NV" contact="Jeff Bolz @jeffbolznv" supported="disabled">
+            <require>
+                <enum value="0"                                              name="VK_NV_EXTENSION_250_SPEC_VERSION"/>
+                <enum value="&quot;VK_NV_extension_250&quot;"                name="VK_NV_EXTENSION_250_EXTENSION_NAME"/>
+            </require>
+        </extension>
     </extensions>
 </registry>
diff -Nru mesa-18.3.3/src/vulkan/util/gen_enum_to_str.py mesa-19.0.1/src/vulkan/util/gen_enum_to_str.py
--- mesa-18.3.3/src/vulkan/util/gen_enum_to_str.py	2018-09-27 19:13:54.000000000 +0000
+++ mesa-19.0.1/src/vulkan/util/gen_enum_to_str.py	2019-03-31 23:16:37.000000000 +0000
@@ -101,6 +101,10 @@
     #include <vulkan/vulkan.h>
     #include <vulkan/vk_android_native_buffer.h>
 
+    #ifdef __cplusplus
+    extern "C" {
+    #endif
+
     % for ext in extensions:
     #define _${ext.name}_number (${ext.number})
     % endfor
@@ -109,6 +113,10 @@
     const char * vk_${enum.name[2:]}_to_str(${enum.name} input);
     % endfor
 
+    #ifdef __cplusplus
+    } /* extern "C" */
+    #endif
+
     #endif"""),
     output_encoding='utf-8')
 
diff -Nru mesa-18.3.3/src/vulkan/util/vk_util.c mesa-19.0.1/src/vulkan/util/vk_util.c
--- mesa-18.3.3/src/vulkan/util/vk_util.c	2018-03-08 23:00:46.000000000 +0000
+++ mesa-19.0.1/src/vulkan/util/vk_util.c	2019-03-31 23:16:37.000000000 +0000
@@ -29,12 +29,12 @@
 
 uint32_t vk_get_driver_version(void)
 {
-   const char *minor_string = strchr(VERSION, '.');
+   const char *minor_string = strchr(PACKAGE_VERSION, '.');
    const char *patch_string = minor_string ? strchr(minor_string + 1, '.') : NULL;
-   int major = atoi(VERSION);
+   int major = atoi(PACKAGE_VERSION);
    int minor = minor_string ? atoi(minor_string + 1) : 0;
    int patch = patch_string ? atoi(patch_string + 1) : 0;
-   if (strstr(VERSION, "devel")) {
+   if (strstr(PACKAGE_VERSION, "devel")) {
       if (patch == 0) {
          patch = 99;
          if (minor == 0) {
diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common.c mesa-19.0.1/src/vulkan/wsi/wsi_common.c
--- mesa-18.3.3/src/vulkan/wsi/wsi_common.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/vulkan/wsi/wsi_common.c	2019-03-31 23:16:37.000000000 +0000
@@ -278,8 +278,8 @@
          .sType = VK_STRUCTURE_TYPE_WSI_FORMAT_MODIFIER_PROPERTIES_LIST_MESA,
          .pNext = NULL,
       };
-      VkFormatProperties2KHR format_props = {
-         .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR,
+      VkFormatProperties2 format_props = {
+         .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
          .pNext = &modifier_props_list,
       };
       wsi->GetPhysicalDeviceFormatProperties2KHR(wsi->pdevice,
@@ -379,13 +379,13 @@
       .pNext = NULL,
       .implicit_sync = true,
    };
-   const VkExportMemoryAllocateInfoKHR memory_export_info = {
-      .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
+   const VkExportMemoryAllocateInfo memory_export_info = {
+      .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
       .pNext = &memory_wsi_info,
       .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
    };
-   const VkMemoryDedicatedAllocateInfoKHR memory_dedicated_info = {
-      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
+   const VkMemoryDedicatedAllocateInfo memory_dedicated_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
       .pNext = &memory_export_info,
       .image = image->image,
       .buffer = VK_NULL_HANDLE,
@@ -431,7 +431,7 @@
 
       for (uint32_t p = 0; p < image->num_planes; p++) {
          const VkImageSubresource image_subresource = {
-            .aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR << p,
+            .aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT << p,
             .mipLevel = 0,
             .arrayLayer = 0,
          };
@@ -504,8 +504,8 @@
    uint32_t linear_size = linear_stride * pCreateInfo->imageExtent.height;
    linear_size = align_u32(linear_size, 4096);
 
-   const VkExternalMemoryBufferCreateInfoKHR prime_buffer_external_info = {
-      .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
+   const VkExternalMemoryBufferCreateInfo prime_buffer_external_info = {
+      .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
       .pNext = NULL,
       .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
    };
@@ -530,13 +530,13 @@
       .pNext = NULL,
       .implicit_sync = true,
    };
-   const VkExportMemoryAllocateInfoKHR prime_memory_export_info = {
-      .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
+   const VkExportMemoryAllocateInfo prime_memory_export_info = {
+      .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
       .pNext = &memory_wsi_info,
       .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
    };
-   const VkMemoryDedicatedAllocateInfoKHR prime_memory_dedicated_info = {
-      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
+   const VkMemoryDedicatedAllocateInfo prime_memory_dedicated_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
       .pNext = &prime_memory_export_info,
       .image = VK_NULL_HANDLE,
       .buffer = image->prime.buffer,
@@ -585,8 +585,8 @@
 
    wsi->GetImageMemoryRequirements(chain->device, image->image, &reqs);
 
-   const VkMemoryDedicatedAllocateInfoKHR memory_dedicated_info = {
-      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
+   const VkMemoryDedicatedAllocateInfo memory_dedicated_info = {
+      .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
       .pNext = NULL,
       .image = image->image,
       .buffer = VK_NULL_HANDLE,
diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_display.c mesa-19.0.1/src/vulkan/wsi/wsi_common_display.c
--- mesa-18.3.3/src/vulkan/wsi/wsi_common_display.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/vulkan/wsi/wsi_common_display.c	2019-03-31 23:16:37.000000000 +0000
@@ -834,6 +834,7 @@
       VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
       VK_IMAGE_USAGE_SAMPLED_BIT |
       VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+      VK_IMAGE_USAGE_STORAGE_BIT |
       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 
    return VK_SUCCESS;
diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_wayland.c mesa-19.0.1/src/vulkan/wsi/wsi_common_wayland.c
--- mesa-18.3.3/src/vulkan/wsi/wsi_common_wayland.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/vulkan/wsi/wsi_common_wayland.c	2019-03-31 23:16:37.000000000 +0000
@@ -508,6 +508,7 @@
       VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
       VK_IMAGE_USAGE_SAMPLED_BIT |
       VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+      VK_IMAGE_USAGE_STORAGE_BIT |
       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 
    return VK_SUCCESS;
diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_x11.c mesa-19.0.1/src/vulkan/wsi/wsi_common_x11.c
--- mesa-18.3.3/src/vulkan/wsi/wsi_common_x11.c	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/src/vulkan/wsi/wsi_common_x11.c	2019-03-31 23:16:37.000000000 +0000
@@ -515,6 +515,7 @@
       VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
       VK_IMAGE_USAGE_SAMPLED_BIT |
       VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+      VK_IMAGE_USAGE_STORAGE_BIT |
       VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 
    return VK_SUCCESS;
diff -Nru mesa-18.3.3/.travis.yml mesa-19.0.1/.travis.yml
--- mesa-18.3.3/.travis.yml	2018-12-07 18:58:04.000000000 +0000
+++ mesa-19.0.1/.travis.yml	2019-03-31 23:16:37.000000000 +0000
@@ -1,7 +1,6 @@
 language: c
 
-sudo: false
-dist: trusty
+dist: xenial
 
 cache:
   apt: true
@@ -16,7 +15,7 @@
     - GLPROTO_VERSION=glproto-1.4.17
     - DRI2PROTO_VERSION=dri2proto-2.8
     - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.74
+    - LIBDRM_VERSION=libdrm-2.4.97
     - XCBPROTO_VERSION=xcb-proto-1.13
     - RANDRPROTO_VERSION=randrproto-1.3.0
     - LIBXRANDR_VERSION=libXrandr-1.3.0
@@ -35,20 +34,19 @@
     - env:
         - LABEL="meson Vulkan"
         - BUILD=meson
-        - DRI_DRIVERS=""
-        - GALLIUM_DRIVERS=""
+        - UNWIND="false"
+        - DRI_LOADERS="-Dglx=disabled -Dgbm=false -Degl=false -Dplatforms=x11,wayland,drm -Dosmesa=none"
+        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
         - VULKAN_DRIVERS="intel,amd"
-        - LLVM_VERSION=6.0
+        - LLVM_VERSION=7
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
       addons:
         apt:
           sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
           packages:
-            # From sources above
-            - llvm-6.0-dev
+            - llvm-7-dev
             # Common
             - xz-utils
             - libexpat1-dev
@@ -56,23 +54,27 @@
             - libelf-dev
             - python3.5
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="meson loaders/classic DRI"
         - BUILD=meson
+        - UNWIND="false"
+        - DRI_LOADERS="-Dglx=dri -Dgbm=true -Degl=true -Dplatforms=x11,wayland,drm,surfaceless -Dosmesa=classic"
         - DRI_DRIVERS="i915,i965,r100,r200,swrast,nouveau"
-        - GALLIUM_DRIVERS=""
-        - VULKAN_DRIVERS=""
+        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
       addons:
         apt:
           packages:
             - xz-utils
             - x11proto-xf86vidmode-dev
+            - libxxf86vm-dev
             - libexpat1-dev
             - libx11-xcb-dev
             - libxdamage-dev
             - libxfixes-dev
             - python3.5
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make loaders/classic DRI"
         - BUILD=make
@@ -89,11 +91,200 @@
           packages:
             - xz-utils
             - x11proto-xf86vidmode-dev
+            - libxxf86vm-dev
             - libexpat1-dev
             - libx11-xcb-dev
             - libxdamage-dev
             - libxfixes-dev
             - python3-pip
+            - python3-setuptools
+    - env:
+        # NOTE: Building SWR is 2x (yes two) times slower than all the other
+        # gallium drivers combined.
+        # Start this early so that it doesn't hunder the run time.
+        - LABEL="meson Gallium Drivers SWR"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
+        - GALLIUM_DRIVERS="swr"
+        - LLVM_VERSION=6.0
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - llvm-6.0-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium Drivers RadeonSI"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
+        - GALLIUM_DRIVERS="radeonsi"
+        - LLVM_VERSION=7
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          sources:
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
+          packages:
+            # From sources above
+            - llvm-7-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium Drivers Other"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
+        - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
+        - LLVM_VERSION=5.0
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - llvm-5.0-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium ST Clover LLVM-5.0"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
+        - GALLIUM_DRIVERS="r600"
+        - LLVM_VERSION=5.0
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - libclc-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - llvm-5.0-dev
+            - clang-5.0
+            - libclang-5.0-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium ST Clover LLVM-6.0"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
+        - GALLIUM_DRIVERS="r600"
+        - LLVM_VERSION=6.0
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - libclc-dev
+            - llvm-6.0-dev
+            - clang-6.0
+            - libclang-6.0-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium ST Clover LLVM-7"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
+        - GALLIUM_DRIVERS="r600,radeonsi"
+        - LLVM_VERSION=7
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          sources:
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
+          packages:
+            - libclc-dev
+            # From sources above
+            - llvm-7-dev
+            - clang-7
+            - libclang-7-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
+    - env:
+        - LABEL="meson Gallium ST Other"
+        - BUILD=meson
+        - UNWIND="true"
+        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
+        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=true -Dgallium-xvmc=true -Dgallium-omx=bellagio -Dgallium-va=true -Dgallium-xa=true -Dgallium-nine=true -Dgallium-opencl=disabled -Dosmesa=gallium"
+        # We need swrast for osmesa and nine.
+        # Nouveau supports, or builds at least against all ST.
+        - GALLIUM_DRIVERS="nouveau,swrast"
+        - LLVM_VERSION=5.0
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - llvm-5.0-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # Nine requires gcc 4.6... which is the one we have right ?
+            - libxvmc-dev
+            # Build locally, for now.
+            #- libvdpau-dev
+            #- libva-dev
+            - libomxil-bellagio-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
+            - python3.5
+            - python3-pip
+            - python3-setuptools
     - env:
         # NOTE: Building SWR is 2x (yes two) times slower than all the other
         # gallium drivers combined.
@@ -112,12 +303,7 @@
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
           packages:
-            # From sources above
             - llvm-6.0-dev
             # Common
             - xz-utils
@@ -126,12 +312,13 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium Drivers RadeonSI"
         - BUILD=make
         - MAKEFLAGS="-j4"
         - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=6.0
+        - LLVM_VERSION=7
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
@@ -142,12 +329,11 @@
       addons:
         apt:
           sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
           packages:
             # From sources above
-            - llvm-6.0-dev
+            - llvm-7-dev
             # Common
             - xz-utils
             - libexpat1-dev
@@ -155,6 +341,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium Drivers Other"
         - BUILD=make
@@ -162,23 +349,17 @@
         - MAKE_CHECK_COMMAND="true"
         - LLVM_VERSION=3.9
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        # New binutils linker is required for llvm-3.9
-        - OVERRIDE_PATH=/usr/lib/binutils-2.26/bin
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
         - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv,imx"
+        - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
         - VULKAN_DRIVERS=""
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-3.9
           packages:
-            - binutils-2.26
             # LLVM packaging is broken and misses these dependencies
             - libedit-dev
-            # From sources above
             - llvm-3.9-dev
             # Common
             - xz-utils
@@ -187,6 +368,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium ST Clover LLVM-3.9"
         - BUILD=make
@@ -194,10 +376,6 @@
         - MAKE_CHECK_COMMAND="true"
         - LLVM_VERSION=3.9
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - OVERRIDE_CC=gcc-4.7
-        - OVERRIDE_CXX=g++-4.7
-        # New binutils linker is required for llvm-3.9
-        - OVERRIDE_PATH=/usr/lib/binutils-2.26/bin
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
         - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
@@ -206,15 +384,10 @@
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-3.9
           packages:
-            - binutils-2.26
             - libclc-dev
             # LLVM packaging is broken and misses these dependencies
             - libedit-dev
-            - g++-4.7
-            # From sources above
             - llvm-3.9-dev
             - clang-3.9
             - libclang-3.9-dev
@@ -225,6 +398,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium ST Clover LLVM-4.0"
         - BUILD=make
@@ -232,8 +406,6 @@
         - MAKE_CHECK_COMMAND="true"
         - LLVM_VERSION=4.0
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - OVERRIDE_CC=gcc-4.8
-        - OVERRIDE_CXX=g++-4.8
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
         - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
@@ -242,14 +414,10 @@
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-4.0
           packages:
             - libclc-dev
             # LLVM packaging is broken and misses these dependencies
             - libedit-dev
-            - g++-4.8
-            # From sources above
             - llvm-4.0-dev
             - clang-4.0
             - libclang-4.0-dev
@@ -260,6 +428,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium ST Clover LLVM-5.0"
         - BUILD=make
@@ -267,8 +436,6 @@
         - MAKE_CHECK_COMMAND="true"
         - LLVM_VERSION=5.0
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - OVERRIDE_CC=gcc-4.8
-        - OVERRIDE_CXX=g++-4.8
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
         - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
@@ -277,14 +444,10 @@
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-5.0
           packages:
             - libclc-dev
             # LLVM packaging is broken and misses these dependencies
             - libedit-dev
-            - g++-4.8
-            # From sources above
             - llvm-5.0-dev
             - clang-5.0
             - libclang-5.0-dev
@@ -295,6 +458,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium ST Clover LLVM-6.0"
         - BUILD=make
@@ -305,18 +469,13 @@
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
         - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600,radeonsi"
+        - GALLIUM_DRIVERS="r600"
         - VULKAN_DRIVERS=""
         - LIBUNWIND_FLAGS="--enable-libunwind"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
           packages:
             - libclc-dev
-            # From sources above
             - llvm-6.0-dev
             - clang-6.0
             - libclang-6.0-dev
@@ -327,6 +486,7 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Gallium ST Clover LLVM-7"
         - BUILD=make
@@ -343,10 +503,8 @@
       addons:
         apt:
           sources:
-            - sourceline: 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
               key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-            # llvm-7 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
           packages:
             - libclc-dev
             # From sources above
@@ -364,7 +522,7 @@
         - BUILD=make
         - MAKEFLAGS="-j4"
         - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=3.3
+        - LLVM_VERSION=3.5
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
         - DRI_DRIVERS=""
@@ -378,8 +536,8 @@
       addons:
         apt:
           packages:
-            # We actually want to test against llvm-3.3
-            - llvm-3.3-dev
+            # We actually want to test against llvm-3.3, yet 3.5 is available
+            - llvm-3.5-dev
             # Nine requires gcc 4.6... which is the one we have right ?
             - libxvmc-dev
             # Build locally, for now.
@@ -395,12 +553,13 @@
             - libelf-dev
             - libunwind8-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="make Vulkan"
         - BUILD=make
         - MAKEFLAGS="-j4"
         - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
-        - LLVM_VERSION=6.0
+        - LLVM_VERSION=7
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
         - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
         - DRI_DRIVERS=""
@@ -411,18 +570,18 @@
       addons:
         apt:
           sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
+            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
           packages:
             # From sources above
-            - llvm-6.0-dev
+            - llvm-7-dev
             # Common
             - xz-utils
             - libexpat1-dev
             - libx11-xcb-dev
             - libelf-dev
             - python3-pip
+            - python3-setuptools
     - env:
         - LABEL="scons"
         - BUILD=scons
@@ -447,14 +606,15 @@
         - SCONS_TARGET="llvm=1"
         # Keep it symmetrical to the make build.
         - SCONS_CHECK_COMMAND="scons llvm=1 check"
-        - LLVM_VERSION=3.3
+        - LLVM_VERSION=3.5
         - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
       addons:
         apt:
           packages:
             # LLVM packaging is broken and misses these dependencies
             - libedit-dev
-            - llvm-3.3-dev
+            # We actually want to test against llvm-3.3, yet 3.5 is available
+            - llvm-3.5-dev
             # Common
             - xz-utils
             - x11proto-xf86vidmode-dev
@@ -472,12 +632,7 @@
         - SCONS_CHECK_COMMAND="true"
       addons:
         apt:
-          sources:
-            - llvm-toolchain-trusty-6.0
-            # llvm-6 requires libstdc++4.9 which is not in main repo
-            - ubuntu-toolchain-r-test
           packages:
-            # From sources above
             - llvm-6.0-dev
             # Common
             - xz-utils
@@ -495,6 +650,9 @@
     - env:
         - LABEL="macOS meson"
         - BUILD=meson
+        - UNWIND="false"
+        - DRI_LOADERS="-Dglx=dri -Dgbm=false -Degl=false -Dplatforms=x11 -Dosmesa=none"
+        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
       os: osx
 
 before_install:
@@ -522,10 +680,8 @@
 
 install:
   # Install a more modern meson from pip, since the version in the
-  # ubuntu repos is often quite old. This requires python>=3.5, so
-  # let's make it default
+  # ubuntu repos is often quite old.
   - if test "x$BUILD" = xmeson; then
-      sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.5 10;
       pip3 install --user meson;
       pip3 install --user mako;
     fi
@@ -541,16 +697,6 @@
       pip2 install --user mako;
     fi
 
-  # Since libdrm gets updated in configure.ac regularly, try to pick up the
-  # latest version from there.
-  - for line in `grep "^LIBDRM.*_REQUIRED=" configure.ac`; do
-      old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`;
-      new_ver=`echo $line | sed 's/.*REQUIRED=//'`;
-      if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then
-        export LIBDRM_VERSION="libdrm-$new_ver";
-      fi;
-    done
-
   # Install dependencies where we require specific versions (or where
   # disallowed by Travis CI's package whitelisting).
 
@@ -612,7 +758,7 @@
       tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
       (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
 
-      # Meson requires ninja >= 1.6, but trusty has 1.3.x
+      # Meson requires ninja >= 1.6, but xenial has 1.3.x
       wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
       unzip ninja-linux.zip
       mv ninja $HOME/prefix/bin/
@@ -655,15 +801,13 @@
 
 script:
   - if test "x$BUILD" = xmake; then
-      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
-      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
-      test -n "$OVERRIDE_PATH" && export PATH="$OVERRIDE_PATH:$PATH";
-
       export CFLAGS="$CFLAGS -isystem`pwd`";
 
       mkdir build &&
       cd build &&
-      ../autogen.sh --enable-debug
+      ../autogen.sh
+        --enable-autotools
+        --enable-debug
         $LIBUNWIND_FLAGS
         $DRI_LOADERS
         --with-dri-drivers=$DRI_DRIVERS
@@ -676,41 +820,33 @@
     fi
 
   - if test "x$BUILD" = xscons; then
-      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
-      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
       scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
     fi
 
   - |
     if test "x$BUILD" = xmeson; then
-
-      if test "x$TRAVIS_OS_NAME" == xosx; then
-        MESON_OPTIONS="-Degl=false"
+      if test -n "$LLVM_CONFIG"; then
+        # We need to control the version of llvm-config we're using, so we'll
+        # generate a native file to do so. This requires meson >=0.49
+        #
+        echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file
+
+        $LLVM_CONFIG --version
+      else
+        : > native.file
       fi
 
-      if test "x$TRAVIS_OS_NAME" == xlinux; then
-        MESON_OPTIONS="-Ddri-drivers=${DRI_DRIVERS:-[]} -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} -Dvulkan-drivers=${VULKAN_DRIVERS:-[]}"
-      fi
-
-      # Travis CI has moved to LLVM 5.0, and meson is detecting
-      # automatically the available version in /usr/local/bin based on
-      # the PATH env variable order preference.
-      #
-      # As for 0.44.x, Meson cannot receive the path to the
-      # llvm-config binary as a configuration parameter. See
-      # https://github.com/mesonbuild/meson/issues/2887 and
-      # https://github.com/dcbaker/meson/commit/7c8b6ee3fa42f43c9ac7dcacc61a77eca3f1bcef
-      #
-      # We want to use the custom (APT) installed version. Therefore,
-      # let's make Meson find our wanted version sooner than the one
-      # at /usr/local/bin
-      #
-      # Once this is corrected, we would still need a patch similar
-      # to:
-      # https://lists.freedesktop.org/archives/mesa-dev/2017-December/180217.html
-      test -f /usr/bin/$LLVM_CONFIG && ln -s /usr/bin/$LLVM_CONFIG $HOME/prefix/bin/llvm-config
-
       export CFLAGS="$CFLAGS -isystem`pwd`"
-      meson _build $MESON_OPTIONS
+      meson _build \
+                   --native-file=native.file \
+                   -Dbuild-tests=true \
+                   -Dlibunwind=${UNWIND} \
+                   ${DRI_LOADERS} \
+                   -Ddri-drivers=${DRI_DRIVERS:-[]} \
+                   ${GALLIUM_ST} \
+                   -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} \
+                   -Dvulkan-drivers=${VULKAN_DRIVERS:-[]}
+      meson configure _build
       ninja -C _build
+      ninja -C _build test
     fi
diff -Nru mesa-18.3.3/VERSION mesa-19.0.1/VERSION
--- mesa-18.3.3/VERSION	2019-02-01 12:03:20.000000000 +0000
+++ mesa-19.0.1/VERSION	2019-03-31 23:16:37.000000000 +0000
@@ -1 +1 @@
-18.3.3
+19.0.1